+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Micro612_pre-course_hw/Micro612_w18_pre-course_hw.pdf b/Micro612_pre-course_hw/Micro612_w18_pre-course_hw.pdf
new file mode 100644
index 0000000..039980b
Binary files /dev/null and b/Micro612_pre-course_hw/Micro612_w18_pre-course_hw.pdf differ
diff --git a/README.md b/README.md
index eed26a3..3e09a9f 100644
--- a/README.md
+++ b/README.md
@@ -1,23 +1,36 @@
-# Bacterial Comparative Genomics Workshop
+Microbial Comparative Genomics Workshop
+=======================================
-#### A 3 day microbial bioinformatics workshop conducted by [Dr. Evan Snitkin](http://thesnitkinlab.com/index.php) at [University of Michigan](https://www.umich.edu/). This module covers the basics of microbial genomic analysis using publicly available tools that are commonly referenced in genomics literature. Students will learn the steps and associated tools that are required to process, annotate and compare microbial genomes.
-
-#### Date: 15 - 17 March
+***A 3 day microbial bioinformatics workshop conducted by [Dr. Evan Snitkin](http://thesnitkinlab.com/index.php) at [University of Michigan](https://www.umich.edu/). This module covers the basics of microbial genomic analysis using publicly available tools that are commonly referenced in genomics literature. Students will learn the steps and associated tools that are required to process, annotate and compare microbial genomes.***
+***Date: Feb 28 - 2 March 2018***
+***
+
+
+Prerequisites
+-------------
+- Prior participation in a [Software Carpentry Workshop](https://umswc.github.io/2018-02-26-UMich/)
***
-#### Prerequisites:
-- Prior participation in a [Software Carpentry Workshop](https://umswc.github.io/2017-01-17-UMich/)
-- [Micro612 pre-course hw](https://github.com/alipirani88/Comparative_Genomics/blob/master/Micro612_pre-course_hw/Micro612_w17_pre-course_hw.pdf): A pre-course homework will help setting up Micro612 flux directories and bash profile, familiarize with basic unix/shell scripting and R commands.
+
+Link
+----
+
+GOTO: http://comparative-genomics.readthedocs.io/en/latest/index.html#
***
-#### Workshop:
+Workshop
+--------
[Day 1 Morning](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_morning/README.md)
***
+- [Installing and setting up Cyberduck for file transfer](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_morning/README.md#installing-and-setting-up-cyberduck-for-file-transfer)
- [Getting your data onto Flux and setting up Environment variable](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_morning/README.md#getting-your-data-onto-glux-and-setting-up-environment-variable)
- [Unix is your friend](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_morning/README.md#unix-is-your-friend)
- [Quality Control using FastQC](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_morning/README.md#quality-control-using-fastqc)
@@ -28,6 +41,7 @@ http://pad.software-carpentry.org/micro612_bacterial_genomics_workshop
- [Read Mapping](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_afternoon/README.md#read-mapping)
- [Variant Calling](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_afternoon/README.md#variant-calling-and-filteration)
- [Visualize BAM/VCF files in Artemis](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_afternoon/README.md#visualize-bam-and-vcf-files-in-artemis)
+- [VRE variant calling analysis](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_afternoon/README.md#vre-variant-calling-analysis)
[Day 2 Morning](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_morning/README.md#day-2-morning)
***
@@ -40,9 +54,8 @@ http://pad.software-carpentry.org/micro612_bacterial_genomics_workshop
[Day 2 Afternoon](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_afternoon/README.md#day-2-afternoon)
***
- [Determine which genomes contain beta-lactamase genes](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_afternoon/README.md#determine-which-genomes-contain-beta-lactamase-genes)
-- [Identification of antibiotic resistance genes with LS-BSR and the ARDB database](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_afternoon/README.md#identification-of-antibiotic-resistance-genes-with-ls-bsr-and-the-ardb-database)
-- [Perform pan-genome analysis with LS-BSR](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_afternoon/README.md#perform-pan-genome-analysis-with-ls-bsr)
-- [Perform genome comparisons with ACT](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_afternoon/README.md#perform-genome-comparisons-with-act)
+- [Identification of antibiotic resistance genes with ARIBA directly from paired-end reads](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_afternoon/README.md#identification-of-antibiotic-resistance-genes-with-ariba-directly-from-paired-end-reads)
+- [Perform pan-genome analysis with Roary](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_afternoon/README.md#perform-pan-genome-analysis-with-roary)
[Day 3 Morning](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_morning/README.md#day-3-morning)
***
@@ -60,7 +73,7 @@ http://pad.software-carpentry.org/micro612_bacterial_genomics_workshop
- [Phylogenetic tree annotation and visualization](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_afternoon/README.md#phylogenetic-tree-annotation-and-visualization)
- [Assessment of genomic deletions](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_afternoon/README.md#assessment-of-genomic-deletions)
-- [Helpful resources for microbial genomics](https://github.com/alipirani88/Comparative_Genomics/blob/master/online_resources/README.md#helpful-resources-for-microbial-genomics)
-***
+
+[Helpful resources for microbial genomics](https://github.com/alipirani88/Comparative_Genomics/blob/master/online_resources/README.md#helpful-resources-for-microbial-genomics)
***
diff --git a/_img/day1_after/1.png b/_img/day1_after/1_1.png
similarity index 100%
rename from _img/day1_after/1.png
rename to _img/day1_after/1_1.png
diff --git a/_img/day1_after/HET_variant.png b/_img/day1_after/HET_variant.png
new file mode 100644
index 0000000..284955f
Binary files /dev/null and b/_img/day1_after/HET_variant.png differ
diff --git a/_img/day1_after/HET_variant_gene_selected.png b/_img/day1_after/HET_variant_gene_selected.png
new file mode 100644
index 0000000..87eccc3
Binary files /dev/null and b/_img/day1_after/HET_variant_gene_selected.png differ
diff --git a/_img/day1_after/graphs.png b/_img/day1_after/graphs.png
new file mode 100644
index 0000000..39df74f
Binary files /dev/null and b/_img/day1_after/graphs.png differ
diff --git a/_img/day1_after/read_details.png b/_img/day1_after/read_details.png
new file mode 100644
index 0000000..15c99c8
Binary files /dev/null and b/_img/day1_after/read_details.png differ
diff --git a/_img/day1_after/select_graph.png b/_img/day1_after/select_graph.png
new file mode 100644
index 0000000..5d13302
Binary files /dev/null and b/_img/day1_after/select_graph.png differ
diff --git a/_img/day1_after/spike_true.png b/_img/day1_after/spike_true.png
new file mode 100644
index 0000000..3782999
Binary files /dev/null and b/_img/day1_after/spike_true.png differ
diff --git a/_img/day1_morning/plot_1.png b/_img/day1_morning/plot_1.png
new file mode 100644
index 0000000..fa9c0e0
Binary files /dev/null and b/_img/day1_morning/plot_1.png differ
diff --git a/_img/day1_morning/plot_2.png b/_img/day1_morning/plot_2.png
new file mode 100644
index 0000000..facddfe
Binary files /dev/null and b/_img/day1_morning/plot_2.png differ
diff --git a/_img/spandx.jpg b/_img/spandx.jpg
new file mode 100644
index 0000000..43e124c
Binary files /dev/null and b/_img/spandx.jpg differ
diff --git a/backup/README.md b/backup/README.md
new file mode 100644
index 0000000..0b9854f
--- /dev/null
+++ b/backup/README.md
@@ -0,0 +1,70 @@
+# Bacterial Comparative Genomics Workshop
+
+#### A 3 day microbial bioinformatics workshop conducted by [Dr. Evan Snitkin](http://thesnitkinlab.com/index.php) at [University of Michigan](https://www.umich.edu/). This module covers the basics of microbial genomic analysis using publicly available tools that are commonly referenced in genomics literature. Students will learn the steps and associated tools that are required to process, annotate and compare microbial genomes.
+
+#### Date: Feb 28 - 2 March 2018
+
+
+
+***
+#### Prerequisites:
+- Prior participation in a [Software Carpentry Workshop](https://umswc.github.io/2018-02-26-UMich/)
+
+
+***
+
+#### Workshop:
+
+[Day 1 Morning](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_morning/README.md)
+***
+- [Getting your data onto Flux and setting up Environment variable](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_morning/README.md#getting-your-data-onto-glux-and-setting-up-environment-variable)
+- [Unix is your friend](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_morning/README.md#unix-is-your-friend)
+- [Quality Control using FastQC](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_morning/README.md#quality-control-using-fastqc)
+- [Quality Trimming using Trimmomatic](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_morning/README.md#quality-trimming-using-trimmomatic)
+
+[Day 1 Afternoon](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_afternoon/README.md#day-1-afternoon)
+***
+- [Read Mapping](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_afternoon/README.md#read-mapping)
+- [Variant Calling](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_afternoon/README.md#variant-calling-and-filteration)
+- [Visualize BAM/VCF files in Artemis](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_afternoon/README.md#visualize-bam-and-vcf-files-in-artemis)
+
+[Day 2 Morning](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_morning/README.md#day-2-morning)
+***
+- [Genome Assembly](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_morning/README.md#genome-assembly)
+- [Assembly evaluation](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_morning/README.md#assembly-evaluation-using-quast)
+- [Compare assembly to reference genome and Post-assembly genome improvement](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_morning/README.md#compare-assembly-to-reference-genome-and-post-assembly-genome-improvement)
+- [Map reads to the final ordered assembly](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_morning/README.md#map-reads-to-the-final-ordered-assembly)
+- [Genome Annotation](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_morning/README.md#genome-annotation)
+
+[Day 2 Afternoon](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_afternoon/README.md#day-2-afternoon)
+***
+- [Determine which genomes contain beta-lactamase genes](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_afternoon/README.md#determine-which-genomes-contain-beta-lactamase-genes)
+- [Identification of antibiotic resistance genes with LS-BSR and the ARDB database](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_afternoon/README.md#identification-of-antibiotic-resistance-genes-with-ls-bsr-and-the-ardb-database)
+- [Perform pan-genome analysis with LS-BSR](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_afternoon/README.md#perform-pan-genome-analysis-with-ls-bsr)
+- [Perform genome comparisons with ACT](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_afternoon/README.md#perform-genome-comparisons-with-act)
+
+[Day 3 Morning](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_morning/README.md#day-3-morning)
+***
+- [Perform whole genome alignment with Mauve](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_morning/README.md#perform-whole-genome-alignment-with-Mauve)
+- [Perform DNA sequence comparisons and phylogenetic analysis in ape](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_morning/README.md#perform-some-dna-sequence-comparisons-and-phylogenetic-analysis-in-ape)
+- [Perform SNP density analysis to discern evidence of recombination](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_morning/README.md#perform-snp-density-analysis-to-discern-evidence-of-recombination)
+- [Perform recombination filtering with gubbins](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_morning/README.md#perform-recombination-filtering-with-gubbins)
+- [Create annotated publication quality trees with iTOL](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_morning/README.md#create-annotated-publication-quality-trees-with-itol)
+
+[Day 3 Afternoon](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_afternoon/README.md#day-3-afternoon)
+***
+- [Perform QC on fastq files](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_afternoon/README.md#perform-qc-on-fastq-files)
+- [Examine results of SPANDx pipeline](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_afternoon/README.md#examine-results-of-spandx-pipeline)
+- [Recombination detection and tree generation](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_afternoon/README.md#recombination-detection-and-tree-generation)
+- [Phylogenetic tree annotation and visualization](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_afternoon/README.md#phylogenetic-tree-annotation-and-visualization)
+- [Assessment of genomic deletions](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_afternoon/README.md#assessment-of-genomic-deletions)
+
+
+
+[Helpful resources for microbial genomics](https://github.com/alipirani88/Comparative_Genomics/blob/master/online_resources/README.md#helpful-resources-for-microbial-genomics)
+***
diff --git a/backup/sed.sh b/backup/sed.sh
new file mode 100644
index 0000000..f921d2b
--- /dev/null
+++ b/backup/sed.sh
@@ -0,0 +1 @@
+sed 's/\!\[alt tag\](https:\/\/github.com\/alipirani88\/Comparative_Genomics\/blob\/master\/_img\/day1_morning\//![alt tag](/g' day1_morning.md
diff --git a/day1_afternoon/README.md b/day1_afternoon/README.md
index ef64c28..1275502 100644
--- a/day1_afternoon/README.md
+++ b/day1_afternoon/README.md
@@ -1,4 +1,5 @@
-# Day 1 Afternoon
+Day 1 Afternoon
+===============
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
Earlier this morning, We performed some quality control steps on our sequencing data to make it clean and usable for various downstream analysis. Now we will perform our first sequence analysis, specifically variant calling, and map these reads to a reference genome and try to find out the differences between them.
@@ -13,18 +14,19 @@ These alignment has a vast number of uses, including:
In this session, we will be covering the important steps that are part of any Read mapping/Variant calling bioinformatics pipleine.
-## Read Mapping
+Read Mapping
+------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_afternoon/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
-![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_after/1.png)
+![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_after/1_1.png)
**1. Navigate to your workshop home directory and copy day1_after directory from shared data directory.**
```
wd
-cp -r /scratch/micro612w17_fluxod/shared/data/day1_after ./
+cp -r /scratch/micro612w18_fluxod/shared/data/day1_after ./
```
We will be using trimmed clean reads that were obtained after running Trimmomatic on raw reads.
@@ -44,7 +46,7 @@ Read Mapping is a time-consuming step that involves searching the reference and
Note: each read mapper has its own unique way of indexing a reference genome and therefore the reference index created by BWA cannot be used for Bowtie. (Most Bioinformatics tools nowadays require some kind of indexing or reference database creation)
->i. To create BWA index of Reference, you need to run following command.
+> ***i. To create BWA index of Reference, you need to run following command.***
Start a flux interactive session
@@ -58,9 +60,9 @@ Navigate to day1_after folder that you recently copied and create a new folder R
```
d1a
-# or
+#or
-cd /scratch/micro612w17_fluxod/username/day1_after/
+cd /scratch/micro612w18_fluxod/username/day1_after/
mkdir Rush_KPC_266_varcall_result
@@ -78,7 +80,7 @@ Also go ahead and create fai index file using samtools required by GATK in later
samtools faidx KPNIH1.fasta
```
->ii. Align reads to reference and redirect the output into SAM file
+> ***ii. Align reads to reference and redirect the output into SAM file***
Quoting BWA:
"BWA consists of three algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is designed for Illumina sequence reads up to 100bp, while the rest two for longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar features such as long-read support and split alignment, but BWA-MEM, which is the latest, is generally recommended for high-quality queries as it is faster and more accurate. BWA-MEM also has better performance than BWA-backtrack for 70-100bp Illumina reads."
@@ -101,7 +103,7 @@ You can extract this information from fastq read header. (@M02127:96:000000000-A
**3. SAM/BAM manipulation and variant calling using [Samtools](http://www.htslib.org/doc/samtools.html "Samtools Manual")**
->i. Change directory to results folder and look for BWA output:
+> ***i. Change directory to results folder and look for BWA output:***
```
cd Rush_KPC_266_varcall_result
@@ -146,7 +148,7 @@ MD tag tells you what positions in the read alignment are different from referen
AS is an alignment score and XS:i:0 is an suboptimal alignment score.
->ii. Convert SAM to BAM using SAMTOOLS:
+> ***ii. Convert SAM to BAM using SAMTOOLS:***
BAM is the compressed binary equivalent of SAM but are usually quite smaller in size than SAM format. Since, parsing through a SAM format is slow, Most of the downstream tools require SAM file to be converted to BAM so that it can be easily sorted and indexed.
@@ -156,7 +158,7 @@ The below command will ask samtools to convert SAM format(-S) to BAM format(-b)
samtools view -Sb Rush_KPC_266__aln.sam > Rush_KPC_266__aln.bam
```
->iii. Sort BAM file using SAMTOOLS:
+> ***iii. Sort BAM file using SAMTOOLS:***
Most of the downstream tools such as GATK requires your BAM file to be indexed and sorted by reference genome positions.
@@ -176,21 +178,21 @@ Picard identifies duplicates by searching reads that have same start position on
![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_after/picard.png)
->i. Create a dictionary for reference fasta file required by PICARD
+> ***i. Create a dictionary for reference fasta file required by PICARD***
Make sure you are in Rush_KPC_266_varcall_result directory and are giving proper reference genome path (day1_after directory).
```
-java -jar /scratch/micro612w17_fluxod/shared/bin/picard-tools-1.130/picard.jar CreateSequenceDictionary REFERENCE=../KPNIH1.fasta OUTPUT=../KPNIH1.dict
+java -jar /scratch/micro612w18_fluxod/shared/bin/picard-tools-1.130/picard.jar CreateSequenceDictionary REFERENCE=../KPNIH1.fasta OUTPUT=../KPNIH1.dict
```
->ii. Run PICARD for removing duplicates.
+> ***ii. Run PICARD for removing duplicates.***
```
-java -jar /scratch/micro612w17_fluxod/shared/bin/picard-tools-1.130/picard.jar MarkDuplicates REMOVE_DUPLICATES=true INPUT=Rush_KPC_266__aln_sort.bam OUTPUT=Rush_KPC_266__aln_marked.bam METRICS_FILE=Rush_KPC_266__markduplicates_metrics CREATE_INDEX=true VALIDATION_STRINGENCY=LENIENT
+java -jar /scratch/micro612w18_fluxod/shared/bin/picard-tools-1.130/picard.jar MarkDuplicates REMOVE_DUPLICATES=true INPUT=Rush_KPC_266__aln_sort.bam OUTPUT=Rush_KPC_266__aln_marked.bam METRICS_FILE=Rush_KPC_266__markduplicates_metrics CREATE_INDEX=true VALIDATION_STRINGENCY=LENIENT
```
@@ -198,118 +200,162 @@ The output of Picard remove duplicate step is a new bam file "Rush_KPC_266__aln_
You will need to index this new marked.bam file for further processing.
->iii. Index these marked bam file again using SAMTOOLS(For input in Artemis later)
+> ***iii. Index these marked bam file again using SAMTOOLS(For input in Artemis later)***
```
samtools index Rush_KPC_266__aln_marked.bam
```
Open the markduplicates metrics file and glance through the number and percentage of PCR duplicates removed.
-For more details about each metrics in a metrics file, please refer [this](https://broadinstitute.github.io/picard/picard-metric-definitions.html#DuplicationMetrics)
+For more details about each metrics in a metrics file, please refer to [this](https://broadinstitute.github.io/picard/picard-metric-definitions.html#DuplicationMetrics)
```
nano Rush_KPC_266__markduplicates_metrics
-# or
+#or
less Rush_KPC_266__markduplicates_metrics
```
-## Generate Alignment Statistics
+Generate Alignment Statistics
+-----------------------------
-Often, While analyzing sequencing data, we are required to make sure that our analysis steps are correct. Some statistics about our analysis will help us in making that decision. So Lets try to get some statistics about various outputs that were created using the above steps and check if everything makes sense.
+Often, while analyzing sequencing data, we are required to make sure that our analysis steps are correct. Some statistics about our analysis will help us in making that decision. So Lets try to get some statistics about various outputs that were created using the above steps and check if everything makes sense.
->i. Collect Alignment statistics using Picard
+> ***i. Collect Alignment statistics using Picard***
Run the below command on your marked.bam file
```
-java -jar /scratch/micro612w17_fluxod/shared/bin/picard-tools-1.130/picard.jar CollectAlignmentSummaryMetrics R=../KPNIH1.fasta I=Rush_KPC_266__aln_marked.bam O=AlignmentSummaryMetrics.txt
+java -jar /scratch/micro612w18_fluxod/shared/bin/picard-tools-1.130/picard.jar CollectAlignmentSummaryMetrics R=../KPNIH1.fasta I=Rush_KPC_266__aln_marked.bam O=AlignmentSummaryMetrics.txt
```
-Open the file AlignmentSummaryMetrics.txt and explore various statistics. It will generate various statistics and the definition for each statistic s can be found [here](http://broadinstitute.github.io/picard/picard-metric-definitions.html#AlignmentSummaryMetrics)
+Open the file AlignmentSummaryMetrics.txt and explore various statistics. It will generate various statistics and the definition for each can be found [here](http://broadinstitute.github.io/picard/picard-metric-definitions.html#AlignmentSummaryMetrics)
-> Question: Extract alignment percentage from AlignmentSummaryMetrics file. (% of reads aligned to reference genome)
+The file AlignmentSummaryMetrics.txt contains many columns and at times it becomes difficult to extract information from a particular column if we dont know the exact column number. Run the below unix gem to print column name with its number.
```
+grep 'CATEGORY' AlignmentSummaryMetrics.txt | tr '\t' '\n' | cat --number
+```
+
+- Question: Extract alignment percentage from AlignmentSummaryMetrics file. (% of reads aligned to reference genome)
+
+
+
+```
+grep -v '#' AlignmentSummaryMetrics.txt | cut -f7
```
->ii. Estimate read coverage/read depth using Picard
+Try to explore other statistics and their definitions from Picard AlignmentSummaryMetrics [link](http://broadinstitute.github.io/picard/picard-metric-definitions.html#AlignmentSummaryMetrics)
+
+> ***ii. Estimate read coverage/read depth using Picard***
-Read coverage/depth describes the average number of reads that align to, or "cover," known reference bases.
+Read coverage/depth describes the average number of reads that align to, or "cover," known reference bases. The sequencing depth is one of the most crucial issue in the design of next-generation sequencing experiments. This [paper](https://www.nature.com/articles/nrg3642) review current guidelines and precedents on the issue of coverage, as well as their underlying considerations, for four major study designs, which include de novo genome sequencing, genome resequencing, transcriptome sequencing and genomic location analyses
+
+After read mapping, it is important to make sure that the reference bases are represented by enough read depth before making any inferences such as variant calling.
```
-java -jar /scratch/micro612w17_fluxod/shared/bin/picard-tools-1.130/picard.jar CollectWgsMetrics R=../KPNIH1.fasta I=Rush_KPC_266__aln_marked.bam O=WgsMetrics.txt
+java -jar /scratch/micro612w18_fluxod/shared/bin/picard-tools-1.130/picard.jar CollectWgsMetrics R=../KPNIH1.fasta I=Rush_KPC_266__aln_marked.bam O=WgsMetrics.txt
```
-Open the file WgsMetrics.txt and explore various statistics. It will generate various statistics and the definition for each statistic s can be found [here](https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectWgsMetrics.WgsMetrics)
+Open the file "WgsMetrics.txt" and explore various statistics. It will generate various statistics and the definition for each can be found [here](https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectWgsMetrics.WgsMetrics).
-> Question: Extract mean coverage information from WgsMetrics.txt
+Print column names
+```
+grep 'GENOME_TERRITORY' WgsMetrics.txt | tr '\t' '\n' | cat --number
```
-sed -n 7,8p WgsMetrics.txt | awk -F'\t' '{print $2}'
+Since "WgsMetrics.txt" also contains histogram information, we will run commands on only the first few lines to extract information.
-```
-
-qualimap bamqc -bam Rush_KPC_266__aln_sort.bam -outdir ./ -outfile Rush_KPC_266__report.pdf -outformat pdf
+```
+grep -v '#' WgsMetrics.txt | cut -f2 | head -n3
```
-Lets get this pdf report onto our local system and check the chromosome stats table, mapping quality and coverage across the entire reference genome.
+> Question: Percentage of bases that attained at least 5X sequence coverage.
+```
+grep -v '#' WgsMetrics.txt | cut -f13 | head -n3
```
-scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day1_after/Rush_KPC_266_varcall_result/Rush_KPC_266__report.pdf /path-to-local-directory/
+> Question: Percentage of bases that had siginificantly high coverage. Regions with unusually high depth sometimes indicate either repetitive regions or PCR amplification bias.
+```
+grep -v '#' WgsMetrics.txt | cut -f25 | head -n3
+```
+
+
-## Variant Calling and Filteration
+
+Variant Calling and Filteration
+-------------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_afternoon/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
-One of the downstream uses of read mapping is finding differences between our sequence data against a reference. This step is achieved by carrying out variants calling using any of the variant callers(samtools, gatk, freebayes etc). Each variant caller uses a different statistical framework to discover SNPs and other types of mutations. For those of you who are interested in finding out more about the statistics involved, please refer to [this]() samtools paper, one of most commonly used variant callers.
+One of the downstream uses of read mapping is finding differences between our sequence data against a reference. This step is achieved by carrying out variant calling using any of the variant callers (samtools, gatk, freebayes etc). Each variant caller uses a different statistical framework to discover SNPs and other types of mutations. For those of you who are interested in finding out more about the statistics involved, please refer to [this]() samtools paper, one of most commonly used variant callers.
-This GATK best practices [guide](https://www.broadinstitute.org/gatk/guide/best-practices.php) will provide more details about various steps that you can incorporate in your analysis.
+The [GATK best practices guide](https://www.broadinstitute.org/gatk/guide/best-practices.php) will provide more details about various steps that you can incorporate in your analysis.
-There are many published articles that compares different variant callers but this is a very interesting [blog](https://bcbio.wordpress.com/2013/10/21/updated-comparison-of-variant-detection-methods-ensemble-freebayes-and-minimal-bam-preparation-pipelines/) that compares the performance and accuracy of different variant callers.
+There are many published articles that compare different variant callers but this is a very interesting [blog post](https://bcbio.wordpress.com/2013/10/21/updated-comparison-of-variant-detection-methods-ensemble-freebayes-and-minimal-bam-preparation-pipelines/) that compares the performance and accuracy of different variant callers.
-Here we will use samtools mpileup to perform this operation on our BAM file and generate VCF file.
+Here we will use samtools mpileup to perform this operation on our BAM file and generate a VCF (variant call format) file.
**1. Call variants using [samtools](http://www.htslib.org/doc/samtools.html "samtools manual") mpileup and [bcftools](https://samtools.github.io/bcftools/bcftools.html "bcftools")**
```
-/scratch/micro612w17_fluxod/shared/bin/samtools-1.2/samtools mpileup -ug -f ../KPNIH1.fasta Rush_KPC_266__aln_marked.bam | /scratch/micro612w17_fluxod/shared/bin/bcftools-1.2/bcftools call -O v -v -c -o Rush_KPC_266__aln_mpileup_raw.vcf
+samtools mpileup -ug -f ../KPNIH1.fasta Rush_KPC_266__aln_marked.bam | bcftools call -O v -v -c -o Rush_KPC_266__aln_mpileup_raw.vcf
-# In the above command, we are using samtools mpileup to generate a pileup formatted file from BAM alignments and genotype likelihoods(-g flag) in BCF format(binary version of vcf). This bcf output is then piped to bcftools, which calls variants and outputs them in vcf format(-c flag for using consensus calling algorithm and -v for outputting variants positions only)
+#In the above command, we are using samtools mpileup to generate a pileup formatted file from BAM alignments and genotype likelihoods (-g flag) in BCF format (binary version of vcf). This bcf output is then piped to bcftools, which calls variants and outputs them in vcf format (-c flag for using consensus calling algorithm and -v for outputting variants positions only)
```
-Lets go through an the vcf file and try to understand a few important vcf specifications and criteria that we can use for filtering low confidence snps.
+Let's go through the VCF file and try to understand a few important VCF specifications and criteria that we can use for filtering low confidence SNPs.
```
less Rush_KPC_266__aln_mpileup_raw.vcf
```
-Press 'q' from keyboard to exit.
+1. CHROM, POS: 1st and 2nd column represent the reference genome name and reference base position where a variant was called
+2. REF, ALT: 4th and 5th columns represent the reference allele at the position and alternate/variant allele called from the reads
+3. QUAL: Phred-scaled quality score for the assertion made in ALT
+4. INFO: Additional information that provides technical scores and obervations for each variant. Important parameters to look for: Depth (DP), mapping quality (MQ), FQ (consensus score), allele frequency for each ALT allele (AF)
+
+VCF format stores a large variety of information and you can find more details in [this pdf](https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&ved=0ahUKEwit35bvktzLAhVHkoMKHe3hAhYQFggdMAA&url=https%3A%2F%2Fsamtools.github.io%2Fhts-specs%2FVCFv4.2.pdf&usg=AFQjCNGFka33WgRmvOfOfp4nSaCzkV95HA&sig2=tPLD6jW5ALombN3ALRiCZg&cad=rja).
+
+Lets count the number of raw unfiltered variants found:
-VCF format stores a large variety of information and you can find more details about each nomenclature in this [pdf](https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&ved=0ahUKEwit35bvktzLAhVHkoMKHe3hAhYQFggdMAA&url=https%3A%2F%2Fsamtools.github.io%2Fhts-specs%2FVCFv4.2.pdf&usg=AFQjCNGFka33WgRmvOfOfp4nSaCzkV95HA&sig2=tPLD6jW5ALombN3ALRiCZg&cad=rja)
+```
+grep -v '#' Rush_KPC_266__aln_mpileup_raw.vcf | wc -l
+grep -v '#' Rush_KPC_266__aln_mpileup_raw.vcf | grep 'INDEL' | wc -l
+```
**2. Variant filtering and processed file generation using GATK and vcftools**
->i. Variant filtering using [GATK](https://www.broadinstitute.org/gatk/guide/tooldocs/org_broadinstitute_gatk_tools_walkers_filters_VariantFiltration.php "GATK Variant Filteration"):
+> ***i. Variant filtering using [GATK](https://www.broadinstitute.org/gatk/guide/tooldocs/org_broadinstitute_gatk_tools_walkers_filters_VariantFiltration.php "GATK Variant Filteration"):***
There are various tools that can you can try for variant filteration such as vcftools, GATK, vcfutils etc. Here we will use GATK VariantFiltration utility to filter out low confidence variants.
@@ -317,7 +363,7 @@ Run this command on raw vcf file Rush_KPC_266__aln_mpileup_raw.vcf.
```
-java -jar /scratch/micro612w17_fluxod/shared/bin/GenomeAnalysisTK-3.3-0/GenomeAnalysisTK.jar -T VariantFiltration -R ../KPNIH1.fasta -o Rush_KPC_266__filter_gatk.vcf --variant Rush_KPC_266__aln_mpileup_raw.vcf --filterExpression "FQ < 0.025 && MQ > 50 && QUAL > 100 && DP > 15" --filterName pass_filter
+java -jar /scratch/micro612w18_fluxod/shared/bin/GenomeAnalysisTK-3.3-0/GenomeAnalysisTK.jar -T VariantFiltration -R ../KPNIH1.fasta -o Rush_KPC_266__filter_gatk.vcf --variant Rush_KPC_266__aln_mpileup_raw.vcf --filterExpression "FQ < 0.025 && MQ > 50 && QUAL > 100 && DP > 15" --filterName pass_filter
```
@@ -328,19 +374,20 @@ This command will add a 'pass_filter' text in the 7th FILTER column for those va
3. QUAL stands for phred-scaled quality score for the assertion made in ALT. High QUAL scores indicate high confidence calls.
4. FQ stands for consensus quality. A positive value indicates heterozygote and a negative value indicates homozygous. In bacterial analysis, this plays an important role in defining if a gene was duplicated in a particular sample. We will learn more about this later while visualizing our BAM files in Artemis.
-Check if the pass_filter was added properly.
+Check if the pass_filter was added properly and count the number of variants that passed the filter.
```
grep 'pass_filter' Rush_KPC_266__filter_gatk.vcf | head
+
```
-caveat: These filter criteria should be applied carefully after giving some thought to the type of library, coverage, average mapping quality, type of analysis and other such requirements.
+***Caveat: This filter criteria should be applied carefully after giving some thought to the type of library, coverage, average mapping quality, type of analysis and other such requirements.***
->ii. Remove indels and keep only SNPS that passed our filter criteria using [vcftools](http://vcftools.sourceforge.net/man_latest.html vcftools manual):
+> ***ii. Remove indels and keep only SNPS that passed our filter criteria using [the vcftools manual](http://vcftools.sourceforge.net/man_latest.html):***
-vcftools is a program package that is especially written to work with vcf file formats. It thus saves your precious time by making available all the common operations that you would like to perform on vcf file using a single command. One such operation is removing INDEL infromation from a vcf file.
+vcftools is a program package that is especially written to work with vcf file formats. It thus saves your precious time by making available all the common operations that you would like to perform on the vcf file using a single command. One such operation is removing INDEL information from a vcf file.
-Now, Lets remove indels from our final vcf file and keep only variants that passed our filter criteria(positions with pass_filter in their FILTER column).
+Now, let's remove indels from our final vcf file and keep only variants that passed our filter criteria (positions with pass_filter in their FILTER column).
```
@@ -351,70 +398,66 @@ vcftools --vcf Rush_KPC_266__filter_gatk.vcf --keep-filtered pass_filter --remov
-**3. Variant Annotation using snpEff**
+***3. Variant Annotation using snpEff***
-Variant annotation is one of the crucial steps in any variant calling pipeline. Most of the variant annotation tools creates their own database or use an external one to assign function and predict the effect of variants on genes. We will try to touch base on some basic steps of annotating variants in our vcf file using snpEff.
+Variant annotation is one of the crucial steps in any variant calling pipeline. Most of the variant annotation tools create their own database or use an external one to assign function and predict the effect of variants on genes. We will try to touch base on some basic steps of annotating variants in our vcf file using snpEff.
You can annotate these variants before performing any filtering steps that we did earlier or you can decide to annotate just the final filtered variants.
-snpEff contains database of about 20000 reference genome built from trusted and public sources. Lets check if snpEff contains a database of our reference genome.
+snpEff contains a database of about 20,000 reference genomes built from trusted and public sources. Lets check if snpEff contains a database of our reference genome.
->i. Check snpEff internal database for your reference genome:
+> ***i. Check snpEff internal database for your reference genome:***
```
-java -jar /scratch/micro612w17_fluxod/shared/bin/snpEff/snpEff.jar databases | grep 'kpnih1'
+java -jar /scratch/micro612w18_fluxod/shared/bin/snpEff/snpEff.jar databases | grep 'kpnih1'
```
Note down the genome id for your reference genome KPNIH1. In this case: GCA_000281535.2.29
->ii. Change the chromosome name in vcf file to ‘Chromosome’ for snpEff reference database compatibility.
+> ***ii. Change the chromosome name in the vcf file to ‘Chromosome’ for snpEff reference database compatibility.***
```
sed -i 's/gi.*|/Chromosome/g' Rush_KPC_266__filter_gatk.vcf
```
->iii. Run snpEff for variant annotation.
+> ***iii. Run snpEff for variant annotation.***
```
-java -jar /scratch/micro612w17_fluxod/shared/bin/snpEff/snpEff.jar -onlyProtein -no-upstream -no-downstream -no-intergenic -v GCA_000281535.2.29 Rush_KPC_266__filter_gatk.vcf > Rush_KPC_266__filter_gatk_ann.vcf -csvStats Rush_KPC_266__filter_gatk_stats
+java -jar /scratch/micro612w18_fluxod/shared/bin/snpEff/snpEff.jar -onlyProtein -no-upstream -no-downstream -no-intergenic -v GCA_000281535.2.29 Rush_KPC_266__filter_gatk.vcf > Rush_KPC_266__filter_gatk_ann.vcf -csvStats Rush_KPC_266__filter_gatk_stats
```
-The STDOUT will print out some useful details such as genome name and version being used, no. of genes, protein-coding genes and transcripts, chromosome and plasmid names etc
+The STDOUT will print out some useful details such as genome name and version being used, no. of genes, protein-coding genes and transcripts, chromosome and plasmid names etc.
-Lets go through the ANN field added after annotation step.
+snpEff will add an extra field named 'ANN' at the end of INFO field. Lets go through the ANN field added after annotation step.
```
grep 'ANN=' Rush_KPC_266__filter_gatk_ann.vcf | head -n1
+
+or to print on seperate lines
+
+grep -o 'ANN=.*GT:PL' Rush_KPC_266__filter_gatk_ann.vcf | head -n1 | tr '|' '\n' | cat --number
```
-ANN field will provide information such as the impact of variants (HIGH/LOW/MODERATE/MODIFIER) on genes and transcripts along with other useful annotations.
+The ANN field will provide information such as the impact of variants (HIGH/LOW/MODERATE/MODIFIER) on genes and transcripts along with other useful annotations.
-Detailed information of ANN field and sequence ontology terms that it uses can be found [here](http://snpeff.sourceforge.net/SnpEff_manual.html#input)
+Detailed information of the ANN field and sequence ontology terms that it uses can be found [here](http://snpeff.sourceforge.net/SnpEff_manual.html#input).
-Lets see how many SNPs and Indels passed the filter using grep and wc
+Let's see how many SNPs and Indels passed the filter using grep and wc.
```
@@ -433,21 +476,26 @@ grep '^Chromosome.*pass_filter' Rush_KPC_266__filter_gatk_ann.vcf | grep 'INDEL'
```
-## Visualize BAM and VCF files in [Artemis](http://www.sanger.ac.uk/science/tools/artemis)
+Visualize BAM and VCF files in [Artemis](http://www.sanger.ac.uk/science/tools/artemis)
+----------------------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_afternoon/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
While these various statistical/text analyses are helpful, visualization of all of these various output files can help in making some significant decisions and inferences about your entire analysis. There are a wide variety of visualization tools out there that you can choose from for this purpose.
-We will be using [Artemis](http://www.sanger.ac.uk/science/tools/artemis) here, developed by Sanger Institute for viewing BAM and vcf files for manual inspection of some of the variants.
+We will be using [Artemis](http://www.sanger.ac.uk/science/tools/artemis) here, developed by the Sanger Institute for viewing BAM and vcf files for manual inspection of some of the variants.
+
+- ***Required Input files:***
-> Required Input files:
-KPNIH1 reference fasta and genbank file,
-Rush_KPC_266__aln_marked.bam and Rush_KPC_266__aln_marked.bam.bai,
-Rush_KPC_266__filter_gatk_ann.vcf.gz and Rush_KPC_266__filter_gatk_ann.vcf.gz.tbi
+> KPNIH1 reference fasta
+> KPNIH1 genbank file
+> Rush_KPC_266__aln_marked.bam
+> Rush_KPC_266__aln_marked.bam.bai
+> Rush_KPC_266__filter_gatk_ann.vcf.gz
+> Rush_KPC_266__filter_gatk_ann.vcf.gz.tbi
-Lets make a seperate folder(make sure you are in Rush_KPC_266_varcall_result folder) for the files that we need for visualization and copy it to that folder
+Let's make a seperate folder (make sure you are in the Rush_KPC_266_varcall_result folder) for the files that we need for visualization and copy it to that folder
```
@@ -470,65 +518,85 @@ bgzip Rush_KPC_266__filter_gatk_ann.vcf
tabix Rush_KPC_266__filter_gatk_ann.vcf.gz
```
-Open a new terminal and run scp/sftp commands to get these files to your local system.
+Open a new terminal and run the scp command or cyberduck to get these files to your local system.
```
-scp -r username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day1_after/Rush_KPC_266_varcall_result/Artemis_files/ /path-to-local-directory/
+scp -r username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day1_after/Rush_KPC_266_varcall_result/Artemis_files/ /path-to-local-directory/
-# You can use ~/Desktop/ as your local directory path
+#You can use ~/Desktop/ as your local directory path
```
-start Artemis.
+Start Artemis.
-Set your working directory to Artemis_files(The Artemis_files folder that you copied to your local system) by clicking at browse button and click OK.
+Set your working directory to Artemis_files (the Artemis_files folder that you copied to your local system) by clicking the browse button and click OK.
Now go to the top left File options and select Open File Manager. You should see the folder Artemis_files. Expand it and select KPNIH.gb file. A new window should open displaying your features stored in a genbank file.
-Now open BAM file by selecting File(Top left corner) -> Read BAM/VCF file -> Select -> Rush_KPC_266__aln_marked.bam -> OK
+Now open the BAM file by selecting File (Top left corner) -> Read BAM/VCF file -> Select -> Rush_KPC_266__aln_marked.bam -> OK
-Reads aligned to your reference are displayed as stacked at the top panel of Artemis. The reads are colour coded so that paired reads are blue and those with an inversion are red. Reads that do not have a mapped mate are black and are optionally shown in the inferred insert size view. In the stack view, duplicated reads that span the same region are collapsed into one green line.
+Reads aligned to your reference are displayed as stacked at the top panel of Artemis. The reads are color-coded so that paired reads are blue and those with an inversion are red. Reads that do not have a mapped mate are black and are optionally shown in the inferred insert size view. In the stack view, duplicated reads that span the same region are collapsed into one green line.
-Now right click on any of the stacked reads and Go to Graph and select Coverage(screenshot below).
+Now right click on any of the stacked reads and Go to Graph and select Coverage (screenshot below).
-Now right click on any of the stacked reads and Go to Show and select SNP marks to show SNP's in red marks.
+Now right click on any of the stacked reads and Go to Show and select SNP marks to show SNPs in red marks.
-![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_after/artemis/select_graph.png)
+![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_after/select_graph.png)
Follow the same procedure and select SNP graph. Adjust the gene features panel height to show all the graph in a window.
-![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_after/artemis/graphs.png)
+![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_after/graphs.png)
Play around by moving the genbank panel cursor to look at coverage and SNP density across the genome. This will let you look at any regions where the coverage or SNP density is unusually high or low.
If you click a read, its mate pair will also be selected. If the cursor hovers over a read for long enough details of that read will appear in a small box. For more details of the read, right-click and select 'Show details of: READ NAME' (last option in list) from the
-menu.(screenshot below) This will open up a new window giving you some useful details such as mapping quality, coordinates etc.
+menu (screenshot below). This will open up a new window giving you some useful details such as mapping quality, coordinates etc.
-![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_after/artemis/read_details.png)
+![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_after/read_details.png)
-The snps are denoted by red marks as observed inside the reads. Go to one of the SNPs in VCF file(Position: 50195) by directly navigating to the position. For this, select Goto at the top -> select Navigator -> Type the position in Goto Base box
+The snps are denoted by red marks as observed inside the reads. Go to one of the SNPs in the VCF file (Position: 50195) by directly navigating to the position. For this, select Goto at the top -> select Navigator -> Type the position in Goto Base box
-You will Notice a spike in the middle of the SNP graph window. This is one of the SNPs that passed all our filter criteria. (Screenshot)
+You will notice a spike in the middle of the SNP graph window. This is one of the SNPs that passed all our filter criteria. (Screenshot)
-![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_after/artemis/spike_true.png)
+![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_after/spike_true.png)
-Lets try to see an example of HET variant. Variant positions where more than one allele(variants) with suffficiently high read depth are observed are considered as HET type variant.
+Lets try to see an example of HET variant. Variant positions where more than one allele (variant) with sufficiently high read depth are observed are considered HET type variants.
-For this, click on Goto option at the top and select navigator. Type 321818 in Goto Base box and click Goto.
+For this, click on tje Goto option at the top and select navigator. Type 321818 in Goto Base box and click Goto.
-You will see a thick spike in the SNP graph as well as thick red vertical line in BAM panel. Also notice the sudden spike in the coverage for this particular region compared to its flanking region(Region before and after a selected region). The coverage here is more than 300 which is unusually high compared to the entire genome coverage. This means that more than one allele with high quality and depth were observed at these positions so we cannot decide which one of these is a true variant. We removed these types of variants during our Variant Filteration step using the criteria FQ. (If the FQ is unusually high, it is suggestive of HET variant and negative FQ value is a suggestive of true variant as observed in the mapped reads)
+You will see a thick spike in the SNP graph as well as thick red vertical line in BAM panel. Also notice the sudden spike in the coverage for this particular region compared to its flanking region (the region before and after a selected region). The coverage here is more than 300 which is unusually high compared to the entire genome coverage. This means that more than one allele with high quality and depth were observed at these positions so we cannot decide which one of these is a true variant. We removed these types of variants during our Variant Filteration step using the criteria FQ. (If the FQ is unusually high, it is suggestive of a HET variant and negative FQ value is a suggestive of true variant as observed in the mapped reads)
-![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_after/artemis/HET_variant.png)
+![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_after/HET_variant.png)
-Now select the gene right below this spiked region. Right click on this gene(KPNIH1_RS01560) and select Zoom to Selection.
+Now select the gene right below this spiked region. Right click on this gene (KPNIH1_RS01560) and select Zoom to Selection.
-![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_after/artemis/HET_variant_gene_selected.png)
+![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_after/HET_variant_gene_selected.png)
Check the details about gene by selecting View -> Selected Features
-You can inspect these type of HET variants later for any gene duplication or copy number analysis (by extracting variant positions with high FQ values). Addition of these details will give a better resolution while inferring Phylogenetic trees.
+You can inspect these type of HET variants later for any gene duplication or copy number analysis (by extracting variant positions with high FQ values). Addition of these details will give a better resolution while inferring phylogenetic trees.
-Play around with Artemis to look at what other kind of information you can find from these BAM and vcf files. Also refer to the manual at Artemis [Homepage](http://www.sanger.ac.uk/science/tools/artemis) for full information about its usage.
+Play around with Artemis to look at what other kind of information you can find from these BAM and vcf files. Also refer to the manual at the [Artemis Homepage](http://www.sanger.ac.uk/science/tools/artemis) for full information about its usage.
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_afternoon/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
+
+
+VRE variant calling analysis
+----------------------------
+
+Today, we learned how to assess the quality, perform quality trimming and variant calling to find variants between the sample and reference genome. This exercise requires you to apply these tools and commands on a new data set. These samples both come from a patient infected with VRE before and after treatment with daptomycin. The first sample was the patients initial sample and is susceptible to daptomycin, and the second was after daptomycin resistance emerged during treatment. Your goal is to map reads from the resistant genome to the susceptible reference and search for variants that may be associated with resistance. To accomplish this you will run the programs from this session to generate filtered variant files (VCF), and then explore these variants in Artemis to see what genes they are in. To help with your interpretation, see if you see any genes hit that were reported in this [paper](http://www.nejm.org/doi/full/10.1056/nejmoa1011138), which was the first to idenitfy putative daptomycin resistance loci.
+
+- Use VRE_daptoS_ref_strain.fa as your reference genome and VRE_daptoS_gene_annot.gff annotation file for Artemis.
+
+- This is how the command and tools workflow should look like:
+
+>1. FastQC to check the quality of reads(you can skip here for time)
+>2. Trimmomatic to remove bad quality data(you can skip here for time)
+>3. Prepare reference genome index for BWA and align reads to reference genome
+>4. SAM/BAM manipulation using samtools
+>5. Remove duplicates using picard(dont forget to create a dictionary for reference fasta file required by PICARD)
+>6. Index marked bam file generated by picard using SAMTOOLS(For input in Artemis later)
+>7. Variant calling using samtools
+>8. Variant Filteration using GATK
+>9. Visualize BAM and VCF files in Artemis
diff --git a/day1_morning/README.md b/day1_morning/README.md
index d593324..f538067 100644
--- a/day1_morning/README.md
+++ b/day1_morning/README.md
@@ -1,12 +1,29 @@
-# Day 1 Morning
+Day 1 Morning
+=============
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
-## If you were not able to follow the video, here is the [link](https://www.youtube.com/watch?v=womKfikWlxM) to illumina Sequencing
+Installing and setting up Cyberduck for file transfer
+-----------------------------------------------------
-## Getting your data onto Flux and setting up environment variable
+During workshop, we will transfer different output files from flux to your local system. Cyberduck makes it easier to drag and drop any remote file onto your local system and vice versa. Of course, you can use "scp" to transfer files but Cyberduck provides a graphical interface to manage file transfer and helps avoid typing long file paths and commands.
+
+> ***1. Go to [this](https://cyberduck.io/) cyberduck website and download the executable for your respective operating system.***
+
+> ***2. Double-click on the downloaded zip file to unzip it and double click cyberduck icon.***
+
+> ***3. Type sftp://flux-xfer.arc-ts.umich.edu in quickconnect bar, press enter and enter your flux username and password.***
+
+> ***4. This will take you to your flux home directory /home/username. Select "Go" from tool bar at the top then select "Go to folder" and enter workshop home directory path: /scratch/micro612w18_fluxod/***
+
+To transfer or upload a file, you can drag and drop it into the location you want.
+
+
+Getting your data onto Flux and setting up environment variable
+---------------------------------------------------------------
**Log in to Flux**
+
```
ssh username@flux-login.arc-ts.umich.edu
```
@@ -15,7 +32,12 @@ ssh username@flux-login.arc-ts.umich.edu
**Setting up environment variables in .bashrc file so your environment is all set for genomic analysis!**
-Environment variables are the variables/values that describe the environment in which programs run in. All the programs and scripts on your unix system use these variables for extracting information such as: What is my current working directory?, Where are temporary files stored?, Where are perl/python libraries?, Where is Blast installed? etc.
+Environment variables are the variables/values that describe the environment in which programs run in. All the programs and scripts on your unix system use these variables for extracting information such as:
+
+- What is my current working directory?,
+- Where are temporary files stored?,
+- Where are perl/python libraries?,
+- Where is Blast installed? etc.
In addition to environment variables that are set up by system administators, each user can set their own environment variables to customize their experience. This may sound like something super advanced that isn't relevant to beginners, but that's not true!
@@ -29,9 +51,9 @@ Some examples of ways that we will use environment variables in the class are:
One way to set your environment variables would be to manually set up these variables everytime you log in, but this would be extremely tedious and inefficient. So, Unix has setup a way around this, which is to put your environment variable assignments in special files called .bashrc or .bash_profile. Every user has one or both of these files in their home directory, and what's special about them is that the commands in them are executed every time you login. So, if you simply set your environmental variable assignments in one of these files, your environment will be setup just the way you want it each time you login!
-All the softwares/tools that we need in this workshop are installed in a directory "/scratch/micro612w17_fluxod/shared/bin/" and we want the shell to look for these installed tools in this directory. For this, We will save the full path to these tools in an environment variable PATH.
+All the softwares/tools that we need in this workshop are installed in a directory "/scratch/micro612w18_fluxod/shared/bin/" and we want the shell to look for these installed tools in this directory. For this, We will save the full path to these tools in an environment variable PATH.
->i. Make a backup copy of bashrc file in case something goes wrong.
+> ***i. Make a backup copy of bashrc file in case something goes wrong.***
```
@@ -41,69 +63,76 @@ cp ~/.bashrc ~/bashrc_backup
```
->ii. Open ~/.bashrc file using any text editor and add the following lines to your .bashrc file.
-
-Note: Replace "username" under alias shortcuts with your own umich "uniqname". You can also customize the alias name such as wd, d1m etc. catering to your own need and convenience.
+> ***ii. Open ~/.bashrc file using any text editor and add the following lines to your .bashrc file.***
- Click to expand entries
+ Click here to expand entries
```
-## Micro612 Workshop ENV
+##Micro612 Workshop ENV
#Aliases
-alias iflux='qsub -I -V -l nodes=1:ppn=4,pmem=4000mb,walltime=1:00:00:00 -q fluxod -l qos=flux -A micro612w17_fluxod'
-alias wd='cd /scratch/micro612w17_fluxod/username/'
-alias d1m='cd /scratch/micro612w17_fluxod/username/day1_morn'
-alias d1a='cd /scratch/micro612w17_fluxod/username/day1_after'
-alias d2m='cd /scratch/micro612w17_fluxod/username/day2_morn'
-alias d2a='cd /scratch/micro612w17_fluxod/username/day2_after'
-alias d3m='cd /scratch/micro612w17_fluxod/username/day3_morn'
-alias d3a='cd /scratch/micro612w17_fluxod/username/day3_after'
-
-
-# Flux Modules
-module load python-anaconda2/latest
+alias iflux='qsub -I -V -l nodes=1:ppn=4,pmem=4000mb,walltime=1:00:00:00 -q fluxod -l qos=flux -A micro612w18_fluxod'
+alias wd='cd /scratch/micro612w18_fluxod/username/'
+alias d1m='cd /scratch/micro612w18_fluxod/username/day1_morn'
+alias d1a='cd /scratch/micro612w18_fluxod/username/day1_after'
+alias d2m='cd /scratch/micro612w18_fluxod/username/day2_morn'
+alias d2a='cd /scratch/micro612w18_fluxod/username/day2_after'
+alias d3m='cd /scratch/micro612w18_fluxod/username/day3_morn'
+alias d3a='cd /scratch/micro612w18_fluxod/username/day3_after'
+
+
+#Flux Modules
module load perl-modules
-# Perl Libraries
-export PERL5LIB=/scratch/micro612w17_fluxod/shared/bin/PAGIT/lib:/scratch/micro612w17_fluxod/shared/bin/vcftools_0.1.12b/perl:$PERL5LIB
-export PERL5LIB=/scratch/micro612w17_fluxod/shared/perl_libs:$PERL5LIB
-
-# Bioinformatics Tools
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/mauve_snapshot_2015-02-13/linux-x64/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/blast/bin/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/vcftools_0.1.12b/perl/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/tabix-0.2.6/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/bwa-0.7.12/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/Trimmomatic/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/bcftools-1.2/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/samtools-1.2/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/sratoolkit/bin/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/Spades/bin/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/FastQC/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/GenomeAnalysisTK-3.3-0/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/picard-tools-1.130/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/qualimap_v2.1/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/vcftools_0.1.12b/bin/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/snpEff/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/PAGIT/ABACAS/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/blast-2.2.26/bin/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/quast/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/MUMmer3.23/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/fastq_screen_v0.5.2/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/prokka-1.11/bin/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/LS-BSR-master/
-export PATH=$PATH:/scratch/micro612w17_fluxod/shared/bin/bowtie2-2.2.6/
+#Perl Libraries
+export PERL5LIB=/scratch/micro612w18_fluxod/shared/bin/PAGIT/lib:/scratch/micro612w18_fluxod/shared/bin/vcftools_0.1.12b/perl:$PERL5LIB
+export PERL5LIB=/scratch/micro612w18_fluxod/shared/perl_libs:$PERL5LIB
+
+#Bioinformatics Tools
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/ncbi-blast-2.7.1+/bin/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/MultiQC/build/scripts-2.7/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/mauve_snapshot_2015-02-13/linux-x64/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/vcftools_0.1.12b/perl/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/tabix-0.2.6/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/bwa-0.7.12/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/Trimmomatic/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/bcftools-1.2/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/samtools-1.2/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/sratoolkit/bin/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/Spades/bin/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/FastQC/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/GenomeAnalysisTK-3.3-0/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/picard-tools-1.130/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/qualimap_v2.1/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/vcftools_0.1.12b/bin/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/snpEff/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/PAGIT/ABACAS/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/blast-2.2.26/bin/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/quast/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/MUMmer3.23/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/fastq_screen_v0.5.2/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/prokka-1.11/bin/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/LS-BSR-master/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/bowtie2-2.2.6/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/mcl-14-137/src/alien/oxygen/src/
```
+Note: Replace "username" under alias shortcuts with your own umich "uniqname". In the text editor, nano, you can do this by
+
+- typing Ctrl + \ and You will then be prompted to type in your search string (here, username).
+- Press return. Then you will be prompted to enter what you want to replace "username" with (here, your uniqname).
+- Press return. Then press a to replace all incidences or y to accept each incidence one by one.
+
+You can also customize the alias name such as wd, d1m etc. catering to your own need and convenience.
+
The above environment settings will set various shortcuts such as "iflux" for entering interactive flux session, "wd" to navigate to your workshop directory, call necessary flux modules and perl libraries required by certain tools and finally sets the path for bioinformatics programs that we will run during the workshop.
->iii. Save the file and Source .bashrc file to make these changes permanent.
+> ***iii. Save the file and Source .bashrc file to make these changes permanent.***
```
@@ -111,45 +140,46 @@ source ~/.bashrc
```
->iv. Check if the $PATH environment variable is updated
+> ***iv. Check if the $PATH environment variable is updated***
```
echo $PATH
-# You will see a long list of paths that has been added to your $PATH variable
+#You will see a long list of paths that has been added to your $PATH variable
wd
```
-You should be in your workshop working directory that is /scratch/micro612w17_fluxod/username
+You should be in your workshop working directory that is /scratch/micro612w18_fluxod/username
-
+
-## Unix is your friend
+Unix is your friend
+-------------------
+Up until now you’ve probably accessed sequence data from NCBI by going to the website, laboriously clicking around and finally finding and downloading the data you want.
-In software carpentry, you learned working with shell and automating simple tasks using basic unix commands. Lets see how some of these commands can be employed in genomics analysis while exploring various file formats that we use in day to day analysis. For this session, we will try to explore three different types of bioinformatics file formats:
+There are a lot of reasons that is not ideal:
-fasta: used for representing either nucleotide or peptide sequences
+- It’s frustrating and slow to deal with the web interface
+- It can be hard to keep track of where the data came from and exactly which version of a sequence you downloaded
+- Its not conducive to downloading lots of sequence data
-gff: used for describing genes and other features of DNA, RNA and protein sequences
+To download sequence data in Unix you can use a variety of commands (e.g. sftp, wget, curl). Here, we will use the curl command to download some genome assemblies from NCBI ftp location:
-fastq: used for storing biological sequence / sequencing reads (usually nucleotide sequence) and its corresponding quality scores
+- Go to your class home directory (use your wd shortcut!)
-> Execute the following commands to copy files for this morning’s exercises to your home directory:
+- Execute the following commands to copy files for this morning’s exercises to your home directory:
```
-
-cp -r /scratch/micro612w17_fluxod/shared/data/day1_morn/ ./
+cp -r /scratch/micro612w18_fluxod/shared/data/day1_morn/ ./
cd day1_morn/
-# or
+#or
d1m
@@ -157,27 +187,143 @@ ls
```
-> Question: In the homework assignment, you downloaded genome assembly fasta files and ran a shell script to count contigs. Now, lets say you want to find out the combined length of genome in each of these files. This can be achieved by running a short unix command piping together three extremely powerful unix programs: grep, sed and awk. The key to crafting the command is understanding the required features of fasta files, including: 1) each sequence is preceded by a fasta header that starts with ">", 2) the types of bases that a nucleotide sequence represents (A,T,G,C,N) and 3) that each line is seperated by a new line character ("\n"). To determine the total length of our genome assemblies, we will use grep to match only those lines that doesn't start with ">" (remember grep -v option to ignore lines), use sed to remove characters that match "N" or "n" which represents unknown bases and finally use awk to count the remaining characters. We can use unix pipe "|" to pass the output of one command to another for further processing. Lets start by counting the number of bases in Acinetobacter_baumannii.fna file
+- Now get three genome sequences with the following commands:
+
+```
+curl ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/bacteria/Acinetobacter_baumannii/latest_assembly_versions/GCF_000018445.1_ASM1844v1/GCF_000018445.1_ASM1844v1_genomic.fna.gz > Acinetobacter_baumannii.fna.gz
+
+curl ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/bacteria/Klebsiella_pneumoniae/latest_assembly_versions/GCF_000220485.1_ASM22048v1/GCF_000220485.1_ASM22048v1_genomic.fna.gz > Klen_pneu.fna.gz
+
+curl ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/bacteria/Escherichia_coli/all_assembly_versions/GCF_000194495.1_ASM19449v2/GCF_000194495.1_ASM19449v2_genomic.fna.gz > E_coli.fna.gz
+
+```
+
+- Decompress the compressed fasta file using gzip
+
+```
+gzip -d Acinetobacter_baumannii.fna.gz
+gzip -d Klen_pneu.fna.gz
+gzip -d E_coli.fna.gz
+```
+
+These files are genome assemblies in fasta format. Fasta files are a common sequence data format that is composed of alternating sequence headers (sequence names and comments) and their corresponding sequences. Of great importance, the sequence header lines must start with “>”. These genome assemblies have one header line for each contig in the assembly, and our goal will be to count the number of contigs/sequences. To do this we will string together two Unix commands: “grep” and “wc”. “grep” (stands for global regular expression print), is an extremely powerful pattern matching command, which we will use to identify all the lines that start with a “>”. “wc” (stand for word count) is a command for counting words, characters and lines in a file. To count the number of contigs in one of your fasta files enter:
+
+
+```
+grep ">" E_coli.fna | wc -l
+```
+
+Try this command on other assemblies to see how many contigs they have
+
+Your first sequence analysis program!!!
+---------------------------------------
+
+OK, so now that we have a useful command, wouldn’t it be great to turn it into a program that you can easily apply to a large number of genome assemblies? Of course it would! So, now we are going to take out cool contig counting command, and put it in a shell script that applies it to all files in the desired directory.
+
+
+
+- Open “fasta_counter.sh” in pico or your favourite text editor and follow instructions for making edits so it will do what we want it to do
+
+- Run this script in day1_morn directory and verify that you get the correct results
+
+```
+bash fasta_counter.sh .
+```
+
+Plotting genomic coverage in R
+------------------------------
+
+Data visualization plays an important role in organizing, analyzing and interpreting large amount of omics data. R is one of the most basic and powerful tool for manipulating and visualizing these types of data. The following task will brush up some basic R plotting commands and help you visualize some complex omics data for interpretation.
+One of the most common types of genomic analysis involves comparing the newly sequenced read data of an organism to your choice of reference organism genome. Mapping millions of reads generated in a sequencing experiment to the reference genome fasta file and interpreting various parameters can achieve this analysis.
+One such parameter is validating how well your sequencing experiment performed and assessing the “uniformity” of coverage from whole-genome sequencing. Visualizing Sequencing coverage across the reference genome help us answer this question. Sequencing coverage describes the average number of reads that align to, or "cover," known reference bases.
+
+The input for this task is a comma-separated file, which contains average sequencing coverage information i.e average number of reads mapped to each 1000 base pairs in reference genome. You can find this input file in your day1_morn directory by the name, Ecoli_coverage_average_bed.csv
+
+
+
+Drag and drop this Ecoli_coverage_average_bed.csv to your local system using cyberduck.
+
+Now, Fire up R console or studio and import the file (Ecoli_coverage_average_bed.csv) using any type of data import functions in R (read.table, read.csv etc.)
+
+Hint: The file is comma-separated and contains header line (“bin,Average_coverage”) so use appropriate parameters while importing the file
+
+Once the data in file is imported into R object, you can plot the column Average_coverage as a time series plot to assess the coverage of your mapped reads across genome.
+
+Note: A time series plot is a graph that you can use to evaluate patterns and behavior in data over time. Here, we can employ the same plot to see the pattern i.e read depth/coverage at each 1000 bases (represented by bins columns where each bin represents Average number of reads mapped to each 1000 bases in reference genome) using the simplest R function for time series such as [plot.ts]( http://stat.ethz.ch/R-manual/R-devel/library/stats/html/plot.ts.html )
+
+An example plot.ts plot for Ecoli_coverage_average_bed.csv is shown below for your reference.
+
+![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_morning/plot_1.png)
+
+For advance and more beautiful visualization, ggplot2 can be employed to display the same plot. An example ggplot2 plot for Ecoli_coverage_average_bed.csv is shown below for your reference.
+
+![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_morning/plot_2.png)
Solution
-
+
```
+x <- read.table("Ecoli_coverage_average_bed.csv", sep=",", header=TRUE)
+plot.ts(x$Average_coverage, xlab="Genome Position(1000bp bins)", ylab="Average Read Depth", main="Ecoli Bed Coverage", col="blue")
+
+```
+
+
+
+Power of Unix commands
+----------------------
+
+In software carpentry, you learned working with shell and automating simple tasks using basic unix commands. Lets see how some of these commands can be employed in genomics analysis while exploring various file formats that we use in day to day analysis. For this session, we will try to explore three different types of bioinformatics file formats:
+
+fasta: used for representing either nucleotide or peptide sequences
+
+gff: used for describing genes and other features of DNA, RNA and protein sequences
+
+fastq: used for storing biological sequence / sequencing reads (usually nucleotide sequence) and its corresponding quality scores
+
+
+- Question: Previously, you downloaded genome assembly fasta files and ran a shell script to count contigs. Now, lets say you want to find out the combined length of genome in each of these files. This can be achieved by running a short unix command piping together two unix programs: grep and wc. The key to crafting the command is understanding the features of fasta files,
+
+> ***1) each sequence in fasta file is preceded by a fasta header that starts with ">",***
+
+> ***2) the types of bases that a nucleotide sequence represents (A,T,G,C,N)***
+
+
+To determine the total length of our genome assemblies, we will use grep to match only those lines that doesn't start with ">" (remember grep -v option is used to ignore lines) and doesn't contain character "N". Then use wc command (stands for word count) to count the characters. We can use unix pipe "|" to pass the output of one command to another for further processing. Lets start by counting the number of bases in Acinetobacter_baumannii.fna file
+
+
+ Solution
+
+
+
+```
+
+grep -v '^>' Acinetobacter_baumannii.fna | grep -v "N" | grep -v "n" | wc -m
#Note:
#- The sign "^" inside the grep pattern represents any pattern that starts with ">" and -v asks grep to ignore those lines.
-#- Use "|" to pass these lines to sed. sed stands for stream editor and can be used to parse, transform and replace text. Here, we are removing the characters "N" or "n" and keeping only "A,T,G,C" bases
-#- awk consists of three blocks: The first block (-F '\n') tells awk how each line is seperated from each other using a field seperator, the second block will keep counting characters in a line (using awk's default option "length") and save it in a variable "sum" and when it runs through all the lines in a stream, the third block will print the value of sum which represents total bases in a fasta file.
+#- Use "|" to pass the output of one command to another.
+#- -m parameter will show the character counts. Check wc help menu by typing "wc --help" on terminal to explore other parameters
```
+
Now run the same command on other fasta files in day1_morn directory. Try using a for loop.
@@ -186,13 +332,13 @@ Now run the same command on other fasta files in day1_morn directory. Try using
```
-for i in *.fna; do grep -v '^>' $i | sed 's/[N,n]//g' | awk -F '\n' '{sum += length} END {print sum}'; done
+for i in *.fna; do grep -v '^>' $i | grep -v "N" | grep -v "n" | wc -m; done
```
---
-> Exploring GFF files
+
+- Exploring GFF files
The GFF (General Feature Format) format is a tab-seperated file and consists of one line per feature, each containing 9 columns of data.
@@ -214,7 +360,7 @@ column 8: frame - One of '0', '1' or '2'. '0' indicates that the first base of t
column 9: attribute - A semicolon-separated list of tag-value pairs, providing additional information about each feature such as gene name, product name etc.
-> Use less to explore first few lines of a gff file sample.gff
+- Use less to explore first few lines of a gff file sample.gff
```
@@ -227,7 +373,7 @@ You will notice that the GFF format follows version 3 specifications("##gff-vers
You can press space bar on keyboard to read more lines and "q" key to exit less command.
-> Question: Suppose, you want to find out the number of annotated features in a gff file. how will you achieve this using grep and wc?
+- Question: Suppose, you want to find out the number of annotated features in a gff file. how will you achieve this using grep and wc?
Solution
@@ -237,42 +383,26 @@ grep -v '^#' sample.gff | wc -l
```
-> Question: How about counting the number of rRNA features in a gff file using grep, awk and wc? Note: Awk is a very powerful utility for working with columns in a file.
+- Question: How about counting the number of rRNA features in a gff(third column) file using grep, cut and wc? You can check the usage for cut by typing "cut --help"
Solution
```
-grep -v '^#' sample.gff | awk -F '\t' '{print $3}' | grep 'rRNA' | wc -l
-
-# Or number of CDS or tRNA features?
-
-grep -v '^#' sample.gff | awk -F '\t' '{print $3}' | grep 'CDS' | wc -l
-grep -v '^#' sample.gff | awk -F '\t' '{print $3}' | grep 'tRNA' | wc -l
-
-# Note: In the above command, we are trying to search lines that doesn't starts with "#" and extracting feature information from third column.
-
-```
-
-
-If for some reason you find awk daunting or too long, you can use "cut" command directly to extract specific columns.
-
-
- Solution
-
-```
cut -f 3 sample.gff | grep 'rRNA' | wc -l
-# Or number of CDS or tRNA features?
+#Or number of CDS or tRNA features?
cut -f 3 sample.gff | grep 'CDS' | wc -l
cut -f 3 sample.gff | grep 'tRNA' | wc -l
+#Note: In the above command, we are trying to extract feature information from third column.
+
```
-> Question: Try counting the number of features on a "+" or "-" strand.
+- Question: Try counting the number of features on a "+" or "-" strand (column 7).
Some more useful one-line unix commands for GFF files: [here](https://github.com/stephenturner/oneliners#gff3-annotations)
@@ -285,12 +415,15 @@ Run the following command to print total number of reads in each file, total num
```
for i in Rush_KPC_266_*.gz; do zcat $i | awk 'BEGIN{OFS="\t"};((NR-2)%4==0){read=$1;total++;count[read]++;len+=length(read)}END{for(read in count){if(!max||count[read]>max) {max=count[read];maxRead=read};if(count[read]==1){unique++}};print total,unique,unique*100/total,maxRead,count[maxRead],count[maxRead]*100/total,len/total}'; done
-# The above awk command reads every fourth record and calculates some basic fastq statistics.
+#The above awk command reads every fourth record and calculates some basic fastq statistics.
```
+Now try running the above command using fastq_screen.fastq.gz as input.
+
You can find more of such super useful bash one-liners at Stephen Turner's github [page](https://github.com/stephenturner/oneliners). You can also use some pre-written unix utilities and tools such as [seqtk](https://github.com/lh3/seqtk), [bioawk](https://github.com/lh3/bioawk) and [fastx](http://hannonlab.cshl.edu/fastx_toolkit/) which comes in handy while extracting complex information from fasta/fastq/sam/bam files and are optimized to be insanely fast.
-## Contamination Screening using [FastQ Screen](http://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/)
+Contamination Screening using [FastQ Screen](http://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/)
+--------------------------------------------
When running a sequencing pipeline, it is very important to make sure that your data matches appropriate quality threshold and are free from any contaminants. This step will help you make correct interpretations in downstream analysis and will also let you know if you are required to redo the experiment/library preparation or resequencing or remove contaminant sequences.
@@ -304,13 +437,13 @@ We have already created the human, mouse and ecoli reference databases inside fa
```
-ls /scratch/micro612w17_fluxod/shared/bin/fastq_screen_v0.5.2/data/
+ls /scratch/micro612w18_fluxod/shared/bin/fastq_screen_v0.5.2/data/
```
Note: You will learn creating reference databases in our afternoon session.
->i. Get an interactive cluster node to start running programs. Use the shortcut that we created in .bashrc file for getting into interactive flux session.
+> ***i. Get an interactive cluster node to start running programs. Use the shortcut that we created in .bashrc file for getting into interactive flux session.***
How do you know if you are in interactive session?: you should see "username@nyx" in your command prompt
@@ -325,56 +458,51 @@ d1m
#or
-cd /scratch/micro612w17_fluxod/username/day1_morn/
+cd /scratch/micro612w18_fluxod/username/day1_morn/
```
->ii. Lets run fastq_screen on fastq_screen.fastq.gz
+> ***ii. Lets run fastq_screen on fastq_screen.fastq.gz***
```
fastq_screen --subset 1000 --force --outdir ./ --aligner bowtie2 fastq_screen.fastq.gz
#Note: We will screen only a subset of fastq reads against reference databases. To screen all the reads, change this argument to --subset 0 but will take long time to finish. (searching sequences against human or mouse genome is a time consuming step)
-# Also Dont worry about "Broken pipe" warning.
+#Also Dont worry about "Broken pipe" warning.
```
The above run will generate two types of output file: a screen report in text format "fastq_screen_screen.txt" and a graphical output "fastq_screen_screen.png" showing percentage of reads mapped to each reference genomes.
->iii. Download the fastq_screen graphical report to your home computer for inspection. Use scp if you find sftp annoying :)
+> ***iii. Download the fastq_screen graphical report to your home computer for inspection.***
-```
-# Open a new terminal
-
-sftp username@flux-login.arc-ts.umich.edu
-cd /scratch/micro612w17_fluxod/username/day1_morn/
-get fastq_screen_screen.png
+Use scp command as shown below or use cyberduck. If you dont the file in cyberduck window, try refreshing it using the refresh button at the top.
-# or Use scp if you find sftp annoying :)
-
-scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day1_morn/fastq_screen_screen.png /path-to-local-directory/
+```
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day1_morn/fastq_screen_screen.png /path-to-local-directory/
-# You can use ~/Desktop/ as your local directory path
+#You can use ~/Desktop/ as your local directory path
```
Open fastq_screen_screen.png on your system. You will notice that the sample contain a significant amount of human reads; we should always remove these contaminants from our sample before proceeding to any type of microbial analysis.
-## Quality Control using [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/ "FastQC homepage")
+Quality Control using [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/ "FastQC homepage")
+------------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_morning/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
Now we will run FastQC on some sample raw data to assess its quality. FastQC is a quality control tool that reads in sequence data in a variety of formats(fastq, bam, sam) and can either provide an interactive application to review the results or create an HTML based report which can be integrated into any pipeline. It is generally the first step that you take upon receiving the sequence data from sequencing facility to get a quick sense of its quality and whether it exhibits any unusual properties (e.g. contamination or unexpected biological features)
->ii. In your day1_morn directory, create a new directory for saving FastQC results.
+> ***i. In your day1_morn directory, create a new directory for saving FastQC results.***
```
mkdir Rush_KPC_266_FastQC_results
mkdir Rush_KPC_266_FastQC_results/before_trimmomatic
```
->iii. Verify that FastQC is in your path by invoking it from command line.
+> ***ii. Verify that FastQC is in your path by invoking it from command line.***
```
fastqc -h
@@ -382,7 +510,7 @@ fastqc -h
FastQC can be run in two modes: "command line" or as a GUI (graphical user interface). We will be using command line version of it.
->iv. Run FastQC to generate quality report of sequence reads.
+> ***iii. Run FastQC to generate quality report of sequence reads.***
```
fastqc -o Rush_KPC_266_FastQC_results/before_trimmomatic/ Rush_KPC_266_1_combine.fastq.gz Rush_KPC_266_2_combine.fastq.gz --extract
@@ -394,19 +522,12 @@ The summary.txt file in these directories indicates if the data passed different
You can visualize and assess the quality of data by opening html report in a local browser.
->v. Exit your cluster node so you don’t waste cluster resources and $$$!
+> ***iv. Exit your cluster node so you don’t waste cluster resources and $$$!***
->vi. Download the FastQC report to your home computer to examine
+> ***v. Download the FastQC html report to your home computer to examine using scp or cyberduck***
```
-sftp username@flux-login.arc-ts.umich.edu
-cd /scratch/micro612w17_fluxod/username/day1_morn/Rush_KPC_266_FastQC_results/before_trimmomatic/
-get Rush_KPC_266_1_combine_fastqc.html
-get Rush_KPC_266_2_combine_fastqc.html
-
-or use scp.
-
-scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day1_morn/Rush_KPC_266_FastQC_results/before_trimmomatic/*.html /path-to-local-directory/
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day1_morn/Rush_KPC_266_FastQC_results/before_trimmomatic/*.html /path-to-local-directory/
```
The analysis in FastQC is broken down into a series of analysis modules. The left hand side of the main interactive display or the top of the HTML report show a summary of the modules which were run, and a quick evaluation of whether the results of the module seem entirely normal (green tick), slightly abnormal (orange triangle) or very unusual (red cross).
@@ -419,11 +540,12 @@ Next, lets check the overrepresented sequences graph and the kind of adapters th
![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_morning/2.png)
-Check out [this](https://sequencing.qcfail.com/articles/loss-of-base-call-accuracy-with-increasing-sequencing-cycles/) for more detailed explaination as to why quality drops with increasing sequencing cycles.
+- Check out [this](https://sequencing.qcfail.com/articles/loss-of-base-call-accuracy-with-increasing-sequencing-cycles/) for more detailed explaination as to why quality drops with increasing sequencing cycles.
-> [A video FastQC walkthrough created by FastQC developers](https://www.youtube.com/watch?v=bz93ReOv87Y "FastQC video")
+- [A video FastQC walkthrough created by FastQC developers](https://www.youtube.com/watch?v=bz93ReOv87Y "FastQC video")
-## Quality Trimming using [Trimmomatic](http://www.usadellab.org/cms/?page=trimmomatic "Trimmomatic Homepage")
+Quality Trimming using [Trimmomatic](http://www.usadellab.org/cms/?page=trimmomatic "Trimmomatic Homepage")
+------------------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_morning/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
@@ -435,42 +557,42 @@ For more information on how Trimmomatic tries to achieve this, Please refer [thi
Now we will run Trimmomatic on these raw data to remove low quality reads as well as adapters.
->i. If the interactive session timed out, get an interactive cluster node again to start running programs and navigate to day1_morn directory.
+> ***i. If the interactive session timed out, get an interactive cluster node again to start running programs and navigate to day1_morn directory.***
How to know if you are in interactive session: you should see "username@nyx" in your command prompt
```
iflux
-cd /scratch/micro612w17_fluxod/username/day1_morn/
+cd /scratch/micro612w18_fluxod/username/day1_morn/
-# or
+#or
d1m
```
->ii. Create these output directories in your day1_morn folder to save trimmomatic results
+> ***ii. Create these output directories in your day1_morn folder to save trimmomatic results***
```
mkdir Rush_KPC_266_trimmomatic_results
```
->iii. Try to invoke trimmomatic from command line.
+> ***iii. Try to invoke trimmomatic from command line.***
```
-java -jar /scratch/micro612w17_fluxod/shared/bin/Trimmomatic/trimmomatic-0.33.jar –h
+java -jar /scratch/micro612w18_fluxod/shared/bin/Trimmomatic/trimmomatic-0.33.jar –h
```
->iv. Run the below trimmomatic commands on raw reads.
+> ***iv. Run the below trimmomatic commands on raw reads.***
```
-java -jar /scratch/micro612w17_fluxod/shared/bin/Trimmomatic/trimmomatic-0.33.jar PE Rush_KPC_266_1_combine.fastq.gz Rush_KPC_266_2_combine.fastq.gz Rush_KPC_266_trimmomatic_results/forward_paired.fq.gz Rush_KPC_266_trimmomatic_results/forward_unpaired.fq.gz Rush_KPC_266_trimmomatic_results/reverse_paired.fq.gz Rush_KPC_266_trimmomatic_results/reverse_unpaired.fq.gz ILLUMINACLIP:/scratch/micro612w17_fluxod/shared/bin/Trimmomatic/adapters/TruSeq3-PE.fa:2:30:10:8:true SLIDINGWINDOW:4:15 MINLEN:40 HEADCROP:0
+java -jar /scratch/micro612w18_fluxod/shared/bin/Trimmomatic/trimmomatic-0.33.jar PE Rush_KPC_266_1_combine.fastq.gz Rush_KPC_266_2_combine.fastq.gz Rush_KPC_266_trimmomatic_results/forward_paired.fq.gz Rush_KPC_266_trimmomatic_results/forward_unpaired.fq.gz Rush_KPC_266_trimmomatic_results/reverse_paired.fq.gz Rush_KPC_266_trimmomatic_results/reverse_unpaired.fq.gz ILLUMINACLIP:/scratch/micro612w18_fluxod/shared/bin/Trimmomatic/adapters/TruSeq3-PE.fa:2:30:10:8:true SLIDINGWINDOW:4:15 MINLEN:40 HEADCROP:0
```
![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_morning/trimm_parameters.png)
-First, Trimmomatic searches for any matches between the reads and adapter sequences. Adapter sequences are stored in this directory of Trimmomatic tool: /scratch/micro612w17_fluxod/shared/bin/Trimmomatic/adapters/. Trimmomatic comes with a list of standard adapter fasta sequences such TruSeq, Nextera etc. You should use appropriate adapter fasta sequence file based on the illumina kit that was used for sequencing. You can get this information from your sequencing centre or can find it in FastQC html report (Section: Overrepresented sequences).
+First, Trimmomatic searches for any matches between the reads and adapter sequences. Adapter sequences are stored in this directory of Trimmomatic tool: /scratch/micro612w18_fluxod/shared/bin/Trimmomatic/adapters/. Trimmomatic comes with a list of standard adapter fasta sequences such TruSeq, Nextera etc. You should use appropriate adapter fasta sequence file based on the illumina kit that was used for sequencing. You can get this information from your sequencing centre or can find it in FastQC html report (Section: Overrepresented sequences).
Short sections (2 bp as determined by seed misMatch parameter) of each adapter sequences (contained in TruSeq3-PE.fa) are tested in each possible position within the reads. If it finds a perfect match, It starts searching the entire adapter sequence and scores the alignment. The advantage here is that the full alignment is calculated only when there is a perfect seed match which results in considerable efficiency gains. So, When it finds a match, it moves forward with full alignment and when the match reaches 10 bp determined by simpleClipThreshold, it finally trims off the adapter from reads.
@@ -478,7 +600,7 @@ Quoting Trimmomatic:
"'Palindrome' trimming is specifically designed for the case of 'reading through' a short fragment into the adapter sequence on the other end. In this approach, the appropriate adapter sequences are 'in silico ligated' onto the start of the reads, and the combined adapter+read sequences, forward and reverse are aligned. If they align in a manner which indicates 'read- through' i.e atleast 30 bp match, the forward read is clipped and the reverse read dropped (since it contains no new data)."
->v. Now create new directories in day1_morn folder and Run FastQC on these trimmomatic results.
+> ***v. Now create new directories in day1_morn folder and Run FastQC on these trimmomatic results.***
```
mkdir Rush_KPC_266_FastQC_results/after_trimmomatic
@@ -486,17 +608,10 @@ mkdir Rush_KPC_266_FastQC_results/after_trimmomatic
fastqc -o Rush_KPC_266_FastQC_results/after_trimmomatic/ Rush_KPC_266_trimmomatic_results/forward_paired.fq.gz Rush_KPC_266_trimmomatic_results/reverse_paired.fq.gz --extract
```
-Get these html reports to local system.
+Get these html reports to your local system.
```
-sftp username@flux-login.arc-ts.umich.edu
-cd /scratch/micro612w17_fluxod/username/day1_morn/Rush_KPC_266_FastQC_results/after_trimmomatic/
-get forward_paired.fq_fastqc.html
-get reverse_paired.fq_fastqc.html
-
-or use scp
-
-scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day1_morn/Rush_KPC_266_FastQC_results/after_trimmomatic/*.html /path-to-local-directory/
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day1_morn/Rush_KPC_266_FastQC_results/after_trimmomatic/*.html /path-to-local-directory/
```
![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day1_morning/3.png)
@@ -510,17 +625,17 @@ Quoting FastQC:
This doesn't look very bad but you can remove the red cross sign by trimming these imbalanced head bases using HEADCROP:9 flag in the above command.
->vi. Lets Run trimmomatic again with headcrop 9 and save it in a different directory called Rush_KPC_266_trimmomatic_results_with_headcrop/
+> ***vi. Lets Run trimmomatic again with headcrop 9 and save it in a different directory called Rush_KPC_266_trimmomatic_results_with_headcrop/***
```
mkdir Rush_KPC_266_trimmomatic_results_with_headcrop/
-time java -jar /scratch/micro612w17_fluxod/shared/bin/Trimmomatic/trimmomatic-0.33.jar PE Rush_KPC_266_1_combine.fastq.gz Rush_KPC_266_2_combine.fastq.gz Rush_KPC_266_trimmomatic_results_with_headcrop/forward_paired.fq.gz Rush_KPC_266_trimmomatic_results_with_headcrop/forward_unpaired.fq.gz Rush_KPC_266_trimmomatic_results_with_headcrop/reverse_paired.fq.gz Rush_KPC_266_trimmomatic_results_with_headcrop/reverse_unpaired.fq.gz ILLUMINACLIP:/scratch/micro612w17_fluxod/shared/bin/Trimmomatic/adapters/TruSeq3-PE.fa:2:30:10:8:true SLIDINGWINDOW:4:20 MINLEN:40 HEADCROP:9
+time java -jar /scratch/micro612w18_fluxod/shared/bin/Trimmomatic/trimmomatic-0.33.jar PE Rush_KPC_266_1_combine.fastq.gz Rush_KPC_266_2_combine.fastq.gz Rush_KPC_266_trimmomatic_results_with_headcrop/forward_paired.fq.gz Rush_KPC_266_trimmomatic_results_with_headcrop/forward_unpaired.fq.gz Rush_KPC_266_trimmomatic_results_with_headcrop/reverse_paired.fq.gz Rush_KPC_266_trimmomatic_results_with_headcrop/reverse_unpaired.fq.gz ILLUMINACLIP:/scratch/micro612w18_fluxod/shared/bin/Trimmomatic/adapters/TruSeq3-PE.fa:2:30:10:8:true SLIDINGWINDOW:4:20 MINLEN:40 HEADCROP:9
```
Unix gem: time in above command shows how long a command takes to run?
->vii. Run FastQC 'one last time' on updated trimmomatic results with headcrop and check report on your local computer
+> ***vii. Run FastQC 'one last time' on updated trimmomatic results with headcrop and check report on your local computer***
```
mkdir Rush_KPC_266_FastQC_results/after_trimmomatic_headcrop/
@@ -528,14 +643,7 @@ fastqc -o Rush_KPC_266_FastQC_results/after_trimmomatic_headcrop/ --extract -f f
```
Download the reports again and see the difference.
```
-sftp username@flux-login.arc-ts.umich.edu
-cd /scratch/micro612w17_fluxod/username/day1_morn/Rush_KPC_266_FastQC_results/after_trimmomatic_headcrop/
-get forward_paired.fq_fastqc.html
-get reverse_paired.fq_fastqc.html
-
-or use scp
-
-scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day1_morn/Rush_KPC_266_FastQC_results/after_trimmomatic_headcrop/*.html /path-to-local-directory/
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day1_morn/Rush_KPC_266_FastQC_results/after_trimmomatic_headcrop/*.html /path-to-local-directory/
```
The red cross sign disappeared!
diff --git a/day2_afternoon/README.md b/day2_afternoon/README.md
index 031b1f9..7a417bb 100644
--- a/day2_afternoon/README.md
+++ b/day2_afternoon/README.md
@@ -1,7 +1,9 @@
-# Day 2 Afternoon
+Day 2 Afternoon
+===============
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
-## High-throughput BLAST and pan-genome analysis
+High-throughput BLAST and pan-genome analysis
+---------------------------------------------
This morning we learned how to perform basic genome annotation and comparison using Prokka and ACT. Now we will up the ante and do some more sophisticated comparative genomics analyses!
First, we will create custom BLAST databases to identify specific antibiotic resistance genes of interest in a set of genomes.
@@ -15,76 +17,61 @@ Execute the following command to copy files for this afternoon’s exercises to
```
-cd /scratch/micro612w17_fluxod/username
-cp -r /scratch/micro612w17_fluxod/shared/data/day2_after/ ./
+cd /scratch/micro612w18_fluxod/username
+
+or
+
+wd
+
+cp -r /scratch/micro612w18_fluxod/shared/data/day2_after/ ./
```
-## Determine which genomes contain beta-lactamase genes
+Determine which genomes contain beta-lactamase genes
+----------------------------------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_afternoon/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
-Before comparing full genomic content, lets start by looking for the presence of particular genes of interest. A. baumannii harbors an arsenal of resistance genes, and it would be interesting to know how particular resistance families vary among our 4 genomes. To accomplish this we will use the antibiotic resistance database ([ARDB](http://ardb.cbcb.umd.edu/)). In particular, we are going to extract a set of genes from ARDB that we are interested in probing our genomes for, and create a custom BLAST database to compare against.
-
-**Get beta-lactamase genes from [ARDB](http://ardb.cbcb.umd.edu/) database**
+Before comparing full genomic content, lets start by looking for the presence of particular genes of interest. A. baumannii harbors an arsenal of resistance genes, and it would be interesting to know how particular resistance families vary among our 4 genomes. To accomplish this we will use the antibiotic resistance database ([ARDB](http://ardb.cbcb.umd.edu/)) and particularly beta-lactamase genes extracted from ARDB. These extracted genes can be found in file ardb_beta_lactam_genes.pfasta, which we will use to generate a Blast database.
->i. Run the custom perl script filter_fasta_file.pl to extract genes annotated as beta-lactamases from the full ARDB fasta file.
+> ***i. Run makeblastdb on the file of beta-lactamases to create a BLAST database.***
-The script takes as input:
+makeblastdb takes as input:
-1) the ARDB database (resisGenes.pfasta),
+1) an input fasta file of protein or nucleotide sequences (ardb_beta_lactam_genes.pfasta) and
-2) a file containing terms to search the database for (fasta_file_keys) and
-
-3) an output file to contain the subset of sequences that match the text your searching for (ardb_beta_lactam_genes.pfasta).
+2) a flag indicating whether to construct a protein or nucleotide database (in this case protein/ -dbtype prot).
```
+#change directory to day2_after
+d2a
-module load bioperl
-cd scratch/micro612w17_fluxod/username/day2_after
-perl filter_fasta_file.pl resisGenes.pfasta fasta_file_keys ardb_beta_lactam_genes.pfasta
-
-```
->ii. Build BLAST database from fasta file
-
-Run formatdb on the file of beta-lactamases to create a BLAST database.
-formatdb takes as input:
-
-1) a fasta file of protein or nucleotide sequences (ardb_beta_lactam_genes.pfasta) and
-
-2) a flag indicating whether to construct a protein or nucleotide database (in this case protein/ -p T).
+makeblastdb -in ardb_beta_lactam_genes.pfasta -dbtype prot
-```
-formatdb -i ardb_beta_lactam_genes.pfasta -p T
```
->iii. BLAST A. baumannii proteins against our custom beta-lactamase database
+> ***ii. BLAST A. baumannii protein sequences against our custom beta-lactamase database.***
Run BLAST!
The input parameters are:
-1) the type of blast to use (-p blastp),
-
-2) query sequences (-i Abau_all.pfasta),
+1) query sequences (-query Abau_all.pfasta),
-3) the database to search against (-d ardb_beta_lactam_genes.pfasta),
+2) the database to search against (-db ardb_beta_lactam_genes.pfasta),
-4) the name of a file to store your results (-o bl_blastp_results),
+3) the name of a file to store your results (-out bl_blastp_results),
-5) output format (-m 8),
+4) output format (-outfmt 6),
-6) e-value cutoff (-e 1e-20),
+5) e-value cutoff (-evalue 1e-20),
-7) number of database sequences to return (-v 1) and
+6) number of database sequences to return (-max_target_seqs 1)
-8) number of database sequences to show alignment for (-b 1).
```
-
-blastall -p blastp -i Abau_all.pfasta -d ardb_beta_lactam_genes.pfasta -o bl_blastp_results -m 8 -e 1e-20 -v 1 -b 1
-
+blastp -query Abau_all.pfasta -db ardb_beta_lactam_genes.pfasta -out bl_blastp_results -outfmt 6 -evalue 1e-20 -max_target_seqs 1
```
Use less to look at bl_blastp_results.
@@ -93,362 +80,370 @@ Use less to look at bl_blastp_results.
less bl_blastp_results
```
-> Question: Experiment with the –m parameter, which controls different output formats that BLAST can produce.
+- Question: Experiment with the –outfmt parameter, which controls different output formats that BLAST can produce.
+- Question: Determine which Enterococcus genomes contain vancomycin resistance genes. To do this you will need to: i) create a protein BLAST database for ardb_van.pfasta, ii) concetenate the genomes sequences in the .fasta files and iii) use blastx to BLAST nucleotide genomes against a protein database
->iv. Repeat steps i-iii for a different resistance gene class
+Identification of antibiotic resistance genes with [ARIBA](https://github.com/sanger-pathogens/ariba) directly from paired end reads
+----------------------------------------------------------
+[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_afternoon/README.md)
+[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
-Use nano to change fasta_file_keys to contain phrase you’d like to search for (e.g. acetyltransferase, carbapenemase)
+ARIBA, Antimicrobial Resistance Identification By Assembly is a tool that identifies antibiotic resistance genes by running local assemblies. The input is a FASTA file of reference sequences (can be a mix of genes and noncoding sequences) and paired sequencing reads. ARIBA reports which of the reference sequences were found, plus detailed information on the quality of the assemblies and any variants between the sequencing reads and the reference sequences.
-Run filter_fasta_file.pl to extract genes annotated with your resistance of interest (ROI) from the full ARDB fasta file
+ARIBA is compatible with various databases and also contains an utility to download different databases such as: argannot, card, megares, plasmidfinder, resfinder, srst2_argannot, vfdb_core. Today, we will be working with the [card](https://card.mcmaster.ca/) database, which has been downloaded and placed in /scratch/micro612w18_fluxod/shared/out.card.prepareref/ directory.
+
-**BLAST!**
-
+> ***i. Run ARIBA on input paired-end fastq reads for resistance gene identification.***
-```
-blastall -p blastp -i Abau_all.pfasta -d ardb_ROI_genes.pfasta -o bl_blastp_results -m 8 -e 1e-20 -v 1 -b 1
-```
+The fastq reads are placed in Abau_genomes_fastq directory. Enter interactive flux session, change directory to day2_after workshop directory and run the below four commands to start ARIBA jobs in background.
-## Identification of antibiotic resistance genes with [LS-BSR](https://github.com/jasonsahl/LS-BSR) and the [ARDB](http://ardb.cbcb.umd.edu/) database
-[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_afternoon/README.md)
-[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
+
-Next, instead of looking at resistance classes one at a time, lets look at them all in one shot! To do this we will use [LS-BSR](https://peerj.com/articles/332/), which essentially is just a wrapper for doing the same sort of BLASTing we just did in the previous step. BSR stands for BLAST Score Ratio, which refers to what the output is. In particular, for each query gene LS-BSR returns the ratio between: 1) the BLAST score of best hit in target genome and 2) BLAST score of query gene against itself. So, the output is a query by target genome matrix, where the values are between 0 and 1, and indicate the strength of a given queries BLAST hit in the target genome.
+```
+iflux
+cd /scratch/micro612w18_fluxod/username/day2_after
->i. Create a non-redundant list of resistance genes
+or
-There is a lot of redundancy in the ARDB (e.g. lots of closely related genes), which would make the output difficult to sort through. Here, we use usearch to select representatives from the database and create a non-redundant gene set!
+d2a
-We are running usearch with the following parameters:
-1) the clustering algorithm (-cluster_fast),
-2) the files of sequences you want to cluster (resisGenes.pep),
-3) the minimum sequence identity to be included in an existing cluster (-id 0.8),
-4) an output fasta file with reperesentatives (centroids) of each sequence cluster (-centroids resisGenes_nr.pep) and
-5) an output file describing the results of the clustering (-uc resisGenes.uc).
+#Load dependency
-
+module load cd-hit
-```
+#ARIBA commands
-> Make sure you are in day2_after directory
+/nfs/esnitkin/bin_group/anaconda3/bin/ariba run --force /scratch/micro612w18_fluxod/shared/out.card.prepareref/ Abau_genomes_fastq/AbauA_genome.1.fastq.gz Abau_genomes_fastq/AbauA_genome.2.fastq.gz AbauA_genome &
-cd scratch/micro612w17_fluxod/username/day2_after
+/nfs/esnitkin/bin_group/anaconda3/bin/ariba run --force /scratch/micro612w18_fluxod/shared/out.card.prepareref/ Abau_genomes_fastq/AbauB_genome.1.fastq.gz Abau_genomes_fastq/AbauB_genome.2.fastq.gz AbauB_genome &
-> Load relevant Modules
+/nfs/esnitkin/bin_group/anaconda3/bin/ariba run --force /scratch/micro612w18_fluxod/shared/out.card.prepareref/ Abau_genomes_fastq/AbauC_genome.1.fastq.gz Abau_genomes_fastq/AbauC_genome.2.fastq.gz AbauC_genome &
-module load usearch
-module load prodigal
+/nfs/esnitkin/bin_group/anaconda3/bin/ariba run --force /scratch/micro612w18_fluxod/shared/out.card.prepareref/ Abau_genomes_fastq/ACICU_genome.1.fastq.gz Abau_genomes_fastq/ACICU_genome.2.fastq.gz ACICU_genome &
-> Run usearch to select representatives from the database and create a non-redundant gene set!
+```
-usearch -cluster_fast resisGenes.pep -id 0.8 -centroids resisGenes_nr.pep -uc resisGenes.uc
+The "&" in the above commands(at the end) is a little unix trick to run commands in background. You can run multiple commands in background and make full use of parallel processing. You can check the status of these background jobs by typing:
+```
+jobs
```
->ii. Run LS-BSR
+> ***ii. Run ARIBA summary function to generate a summary report.***
-Change your directory to day2_after:
+ARIBA has a summary function that summarises the results from one or more sample runs of ARIBA and generates an output report with various level of information determined by -preset parameter. The parameter "-preset minimal" will generate a minimal report showing only the presence/absence of resistance genes whereas "-preset all" will output all the extra information related to each database hit such as reads and reference sequence coverage, variants and their associated annotations(if the variant confers resistance to an Antibiotic) etc.
```
-> Make sure you are in day2_after directory
+/nfs/esnitkin/bin_group/anaconda3/bin/ariba summary --preset minimal Abau_genomes_ariba_minimal_results *_genome/report.tsv
-cd /scratch/micro612w17_fluxod/username/day2_after/
+/nfs/esnitkin/bin_group/anaconda3/bin/ariba summary --preset all Abau_genomes_ariba_all_results *_genome/report.tsv
```
-Run LS-BSR (it will take a few minutes)!
-
-The input parameters are: a directory with your genomes (-d Abau_genomes) and a fasta file of query genes (-g resisGenes_nr.pep)
+ARIBA summary generates three output:
-```
+1. Abau_genomes_ariba*.csv file that can be viewed in your favourite spreadsheet program.
+2. Abau_genomes_ariba*.phandango.{csv,tre} that allow you to view the results in [Phandango](http://jameshadfield.github.io/phandango/#/). They can be drag-and-dropped straight into Phandango.
-python /scratch/micro612w17_fluxod/shared/bin/LS-BSR-master/ls_bsr.py -d Abau_genomes/ -g resisGenes_nr.pep
+Lets copy this phandango files Abau_genomes_ariba_minimal_results.phandango.csv and Abau_genomes_ariba_minimal_results.phandango.tre to the local system using cyberduck or scp
+```
+scp username\@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day2_after/*minimal_results.phandango* ~/Desktop/
```
->iii. Download LS-BSR output matrix to your own computer for analysis in R
+Drag and drop these two files on [Phandango](http://jameshadfield.github.io/phandango/#/) website. What types of resistance genes do you see in these Acinetobacter genomes? This [review](http://aac.asm.org/content/55/3/947.full) may help interpret.
-Use scp to get LS-BSR output onto your laptop
+> ***iii. Explore full ARIBA matrix in R***
-```
+- Now, Fire up R console or studio and read ariba full report "Abau_genomes_ariba_all_results.csv"
-> Dont forget to change username in the below command
+```
+ariba_full = read.csv(file = 'Abau_genomes_ariba_all_results.csv', row.names = 1)
+```
-scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day2_after/bsr_matrix_values.txt ~/Desktop
+- Subset to get description for each gene
+```
+ariba_full_asm = ariba_full[, grep('assembled',colnames(ariba_full))]
```
-Fire up RStudio and read the matrix:
+- Make binary for plotting purposes
+```
+ariba_full_asm[,] = as.numeric(ariba_full_asm != 'no')
```
-> Make sure you have copied bsr_matrix_values.txt file to your desktop. If not then give the path where bsr_matrix_values.txt is located.
-
-bsr_mat = read.table('~/Desktop/bsr_matrix_values.txt', sep = "\t", row.names = 1, header = TRUE, quote = "")
+- Make a heatmap!
```
+heatmap(as.matrix(ariba_full_asm), scale = "none", col= c('black', 'red'), margins = c(10,5), cexRow = 0.75)
+```
+
+Perform pan-genome analysis with [Roary](https://sanger-pathogens.github.io/Roary/)
+----------------------------------------
-Use head, str, dim, etc. to explore the matrix you read in
+Roary is a pan genome pipeline, which takes annotated assemblies in GFF3 format and calculates the pan genome. The pan-genome is just a fancy term for the full complement of genes in a set of genomes.
-iv. Make a heatmap of all the LS-BSR results
+The way Roary does this is by:
+1) Roary gets all the coding sequences from GFF files, convert them into protein, and create pre-clusters of all the genes,
+2) Then, using BLASTP and MCL, Roary will create gene clusters, and check for paralogs. and
+3) Finally, Roary will take every isolate and order them by presence/absence of genes.
-Install and load the R library "heatmap3"
+> ***i. Generate pan-genome matrix using Roary and GFF files***
-Make a heatmap of the complete LS-BSR matrix. Check out the help file to see what the input parameters do, and behold the plethora of other options to customize your heatmaps!
+Make sure you are on an interactive node, as this will be even more computationally intensive!
+```
+iflux
```
-heatmap3(bsr_mat, , scale = "none", distfun = function(x){dist(x, method = "manhattan")}, margin = c(10,10), cexCol = 0.85, cexRow = 0.5)
+Change your directory to day2_after
```
+> Make sure to change username with your uniqname
->v. Subset LS-BSR data to only include genes present in at least one genome
+cd /scratch/micro612w18_fluxod/username/day2_after/
-From the previous step you should have discerned that full LS-BSR matrix is too large to get a useful visualization, so we need to subset it.
-Lets first subset the matrix to focus only on genes present in at least one of our genomes.
-Values in the LS-BSR matrix are between 0 and 1, and represent the sequence identity to the query gene.
-We will arbitrarily say that if a protein have a BLAST score ratio of less then 0.5, then its absent.
+or
-```
-
-bsr_mat_subset = bsr_mat[rowSums(bsr_mat > 0.5) > 0,]
+d2a
```
-Make a heatmap of your subset (much better!)
+Load all the required dependencies and run roary on GFF files placed in Abau_genomes_gff folder.
```
+module load samtools
+module load bedtools2
+module load cd-hit
+module load ncbi-blast
+module load mcl
+module load parallel
+module load mafft
+module load fasttree
+module load perl-modules
+module load R
+module load roary
-heatmap3(bsr_mat_subset, , scale = "none", distfun = function(x){dist(x, method = "manhattan")}, margin = c(10,10), cexCol = 0.85, cexRow = 0.5)
-
+#Run roary
+roary -p 4 -f Abau_genomes_roary_output -r -n -v Abau_genomes_gff/*.gff
```
->vi. Determine the total number of resistance genes present in each genome
+The above roary command will run pan-genome pipeline on gff files placed in Abau_genomes_gff(-v) using 4 threads(-p), save the results in an output directory Abau_genomes_roary_output(-f), generate R plots using .Rtab output files and align core genes(-n)
-We use colSums to count the number of genes with greater than 50% identity to the query
+Change directory to Abau_genomes_roary_output to explore the results.
```
-colSums(bsr_mat > 0.5)
-```
+cd Abau_genomes_roary_output
-How does the total number of genes vary by altering the percent identity threshold?
-
->vii. Determine the total number of bla genes in each genome
+ls
+```
-Next, we will use grepl to pull out genes of interest
+Output files:
-```
-bla_bsr_mat = bsr_mat[grepl('beta-lactamase', row.names(bsr_mat)) ,]
-```
+1. summary_statistics.txt: This file is an overview of your pan genome analysis showing the number of core genes(present in all isolates) and accessory genes(genes absent from one or more isolates or unique to a given isolate).
-Print out to screen and make a heatmap to explore
+2. gene_presence_absence.csv: This file contain detailed information about each gene including their annotations which can be opened in any spreadsheet software to manually explore the results. It contains plethora of information such as gene name and their functional annotation, whether a gene is present in a genome or not, minimum/maximum/Average sequence length etc.
->viii. Subset the full matrix to look at genes that are present in only one genome
+3. gene_presence_absence.Rtab: This file is similar to the gene_presence_absence.csv file, however it just contains a simple tab delimited binary matrix with the presence and absence of each gene in each sample. It can be easily loaded into R using the read.table function for further analysis and plotting. The first row is the header containing the name of each sample, and the first column contains the gene name. A 1 indicates the gene is present in the sample, a 0 indicates it is absent.
-Get genes present in only one genome
+4. core_gene_alignment.aln: a multi-FASTA alignment of all of the core genes that can be used to generate a phylogenetic tree.
+
-Print out to screen and make a heatmap to explore
-
-## Perform pan-genome analysis with LS-BSR
-[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_afternoon/README.md)
-[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
+> ***ii. Explore pan-genome matrix gene_presence_absence.csv and gene_presence_absence.Rtab using R***
-As a final BLASTing exercise we will use LS-BSR to explore the pan-genome of our A. baumannii. The pan-genome is just a fancy term for the full complement of genes in a set of genomes.
-The way LS-BSR does this is by:
-1) applying prodigal to identify protein coding genes in input genomes,
-2) applying usearch to create non-redundant set of genes and
-3) BLASTing the set of non-redundant genes against the genomes.
+
->i. Get pan-genome matrix and transfer annotation
+**Modify gene_presence_absence.Rtab file to include annotations**
-Make sure you are on an interactive node, as this will be even more computationally intensive!
+- Get column names from gene_presence_absence.csv file
```
-iflux
+head -n1 gene_presence_absence.csv | tr ',' '\n' | cat --number
```
-
-Change your directory to day2_after
+- Pull columns of interest
```
-
-> Make sure to change username with your uniqname
-
-cd /scratch/micro612w17_fluxod/username/day2_after/
-
+cut -d "," -f 3 gene_presence_absence.csv | tr '"' '_' > gene_presence_absence_annot.csv
```
-
-Run LS-BSR! The –u parameter is just a path to where usearch lives on flux.
-If you started a new interactive job since you ran LS-BSR, you will need to re-load the required modules for LS-BSR listed above.
+- Paste it into pan-genome matrix
```
-
-cd scratch/micro612w17_fluxod/username/day2_after
-
-python /scratch/micro612w17_fluxod/shared/bin/LS-BSR-master/ls_bsr.py -d Abau_genomes/ -u /sw/med/centos7/usearch/8.1/usearch
-
+paste -d "" gene_presence_absence_annot.csv gene_presence_absence.Rtab > gene_presence_absence_wannot.Rtab
```
-Run the custom perl script transfer_annotations.pl to add annotations to your BSR matrix. The output of this script will be bsr_matrix_values_annot.txt
+- Check gene_presence_absence_wannot.Rtab file
```
-perl transfer_annotations.pl Abau_ECII_PC.fasta Abau_ECII_PC.NR.annot bsr_matrix_values.txt consensus.fasta
+less gene_presence_absence_wannot.Rtab
```
->ii. Read matrix into R and create heatmap
-
-Use scp to get LS-BSR output onto your laptop
+**Read matrix into R, generate exploratory plots and query pan-genome**
-```
+Use scp or cyberduck to get gene_presence_absence_wannot.Rtab onto your laptop.
-> Make sure to change username with your uniqname
+> ***i. Prepare and clean data***
-scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day2_after/bsr_matrix_values_annot.txt ~/Desktop
+- Fire up RStudio and read gene_presence_absence_wannot.Rtab into matrix.
```
-
-Fire up RStudio and read the matrix in
-
+pg_matrix = read.table('gene_presence_absence_wannot.Rtab', sep = "\t", quote = "", row.names = 1, skip = 1)
```
-bsr_mat_PG = read.table('~/Desktop/bsr_matrix_values_annot.txt', sep = "\t", row.names = 1, header = TRUE, quote = "")
+- Add column names back
+```
+colnames(pg_matrix) = c('ACICU', 'AbauA', 'AbauB', 'AbauC')
```
-Use head, str, dim, etc. to explore the matrix you read in
-Make a heatmap for the full matrix
+- Use head, str, dim, etc. to explore the matrix.
-```
+> ***ii. Generate exploratory heatmaps.***
-heatmap3(as.matrix(bsr_mat_PG), , scale = "none", distfun = function(x){dist(x, method = "manhattan")}, margin = c(10,10), cexCol = 0.85, cexRow = 0.5)
+- Make a heatmap for the full matrix
+```
+heatmap(as.matrix(pg_matrix), , scale = "none", distfun = function(x){dist(x, method = "manhattan")}, margin = c(10,10), cexCol = 0.85, cexRow = 0.5, col= c('black', 'red'))
```
-Make a heatmap for variable genes (present in at least one, but not all of the genomes
+- Make a heatmap for variable genes (present in at least one, but not all of the genomes)
```
-bsr_mat_PG_subset = bsr_mat_PG[rowSums(bsr_mat_PG > 0.4) > 0 & rowSums(bsr_mat_PG > 0.4) < 4 ,]
-heatmap3(as.matrix(bsr_mat_PG_subset), , scale = "none", distfun = function(x){dist(x, method = "manhattan")}, margin = c(10,10), cexCol = 0.85, cexRow = 0.5)
+pg_matrix_subset = pg_matrix[rowSums(pg_matrix > 0) > 0 & rowSums(pg_matrix > 0) < 4 ,]
+heatmap(as.matrix(pg_matrix_subset), , scale = "none", distfun = function(x){dist(x, method = "manhattan")}, margin = c(10,10), cexCol = 0.85, cexRow = 0.5, col= c('black', 'red'))
```
->iii. Which genomes are most closely related based upon shared gene content?
+> ***iii. Query pan-genome***
+
+- Which genomes are most closely related based upon shared gene content?
We will use the outer function to determine the number of genes shared by each pair of genomes.
+
+
+
Look at the help page for outer to gain additional insight into how this is working.
```
-outer(1:4,1:4, FUN = Vectorize(function(x,y){sum(bsr_mat_PG_subset[,x] > 0.4 & bsr_mat_PG_subset[,y] > 0.4)}))
+help(outer)
+```
+
+```
+outer(1:4,1:4, FUN = Vectorize(function(x,y){sum(pg_matrix_subset[,x] > 0 & pg_matrix_subset[,y] > 0)}))
```
->iv. What is the size of the core genome?
+- What is the size of the core genome?
Lets first get an overview of how many genes are present in different numbers of genomes (0, 1, 2, 3 or 4) by plotting a histogram. Here, we combine hist with rowSums to accomplish this.
```
-hist(rowSums(bsr_mat_PG > 0.4))
+hist(rowSums(pg_matrix > 0), col="red")
```
Next, lets figure out how big the core genome is (e.g. how many genes are common to all of our genomes)?
```
-sum(rowSums(bsr_mat_PG > 0.4) == 4)
+sum(rowSums(pg_matrix > 0) == 4)
```
->v. What is the size of the accessory genome?
+- What is the size of the accessory genome?
Lets use a similar approach to determine the size of the accessory genome (e.g. those genes present in only a subset of our genomes).
```
-sum(rowSums(bsr_mat_PG > 0.4) < 4 & rowSums(bsr_mat_PG > 0.4) > 0)
+sum(rowSums(pg_matrix > 0) < 4 & rowSums(pg_matrix > 0) > 0)
```
->vi. What types of genes are unique to a given genome?
+- What types of genes are unique to a given genome?
-So far we have quantified the core and accessory genome, now lets see if we can get an idea of what types of genes are core vs. accessory. Lets start by looking at those genes present in only a single genome. What do you notice about these genes?
+So far we have quantified the core and accessory genome, now lets see if we can get an idea of what types of genes are core vs. accessory. Lets start by looking at those genes present in only a single genome.
```
-row.names(bsr_mat_PG[rowSums(bsr_mat_PG > 0.4) == 1,])
+row.names(pg_matrix[rowSums(pg_matrix > 0) == 1,])
```
-vii. What is the number of hypothetical genes in core vs. accessory genome?
-
-Looking at unqiue genes we see that many are annotated as “hypothetical”, indicating that the sequence looks like a gene, but has no detectable homology with a functionally characterized gene. Determine the fraction of “hypothetical” genes in unique vs. core. Why does this make sense?
+What do you notice about these genes?
-```
+- What is the number of hypothetical genes in core vs. accessory genome?
-sum(grepl("hypothetical" , row.names(bsr_mat_PG[rowSums(bsr_mat_PG > 0.4) == 1,]))) / sum(rowSums(bsr_mat_PG > 0.4) == 1)
+Looking at unique genes we see that many are annotated as “hypothetical”, indicating that the sequence looks like a gene, but has no detectable homology with a functionally characterized gene.
-sum(grepl("hypothetical" , row.names(bsr_mat_PG[rowSums(bsr_mat_PG > 0.4) == 4,]))) / sum(rowSums(bsr_mat_PG > 0.4) == 4)
+Determine the fraction of “hypothetical” genes in unique vs. core.
```
+sum(grepl("hypothetical" , row.names(pg_matrix[rowSums(pg_matrix > 0) == 1,]))) / sum(rowSums(pg_matrix > 0) == 1)
+sum(grepl("hypothetical" , row.names(pg_matrix[rowSums(pg_matrix > 0) == 4,]))) / sum(rowSums(pg_matrix > 0) == 4)
+```
+
+Why does this make sense?
-## Perform genome comparisons with [ACT](http://www.sanger.ac.uk/science/tools/artemis-comparison-tool-act)
+Perform genome comparisons with [ACT](http://www.sanger.ac.uk/science/tools/artemis-comparison-tool-act)
+-------------------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_afternoon/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
In the previous exercises we were focusing on gene content, but losing the context of the structural variation underlying gene content variation (e.g. large insertions and deletions).
Here we will use ACT to compare two of our genomes (note that you can use ACT to compare more than two genomes if desired).
-i. Create ACT alignment file with BLAST
+> ***i. Create ACT alignment file with BLAST***
As we saw this morning, to compare genomes in ACT we need to use BLAST to create the alignments. We will do this on flux.
```
-cd scratch/micro612w17_fluxod/username/day2_after
+cd scratch/micro612w18_fluxod/username/day2_after
blastall -p blastn -i ./Abau_genomes/AbauA_genome.fasta -d ./Abau_BLAST_DB/ACICU_genome.fasta -m 8 -e 1e-20 -o AbauA_vs_ACICU.blast
```
->ii. Read in genomes, alignments and annotation files
+> ***ii. Read in genomes, alignments and annotation files***
-Use sftp to get ACT files onto your laptop
+Use scp or cyberduck to transfer Abau_ACT_files folder onto your laptop
-```
-cd ~/Desktop (or wherever your desktop is)
-mkdir Abau_ACT
-cd Abau_ACT
-sftp username@flux-login.arc-ts.umich.edu
-cd /scratch/micro612w17_fluxod/username/day2_after
-get Abau_genomes/AbauA_genome.fasta
-get Abau_genomes/ACICU_genome.fasta
-get AbauA_vs_ACICU.blast
-get Abau_ACT_files/AbauA_genome_gene.gff
-get Abau_ACT_files/ACICU_genome_gene.gff
+1. Abau_genomes/AbauA_genome.fasta
+2. Abau_genomes/ACICU_genome.fasta
+3. AbauA_vs_ACICU.blast
+4. Abau_ACT_files/AbauA_genome_gene.gff
+5. Abau_ACT_files/ACICU_genome_gene.gff
-```
->iii. Explore genome comparison and features of ACT
+> ***iii. Explore genome comparison and features of ACT***
Read in genomes and alignment into ACT
@@ -472,7 +467,8 @@ Go to File -> AbauA_genome.fasta -> Read an entry file = AbauA_genome_gene.gff
```
Play around in ACT to gain some insight into the sorts of genes present in large insertion/deletion regions.
-See if you can find:
+See if you can find:
+
1) differences in phage content,
2) membrane biosynthetic gene cluster variation and
3) antibiotic resistance island variation.
diff --git a/day2_morning/README.md b/day2_morning/README.md
index b7f39cf..6e2c678 100644
--- a/day2_morning/README.md
+++ b/day2_morning/README.md
@@ -1,4 +1,5 @@
-# Day 2 Morning
+Day 2 Morning
+=============
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
On day 1 we worked through a pipeline to map short-read data to a pre-existing assembly and identify single-nucleotide variants (SNVs) and small insertions/deletions. However, what this sort of analysis misses is the existence of sequence that is not present in your reference. Today we will tackle this issue by assembling our short reads into larger sequences, which we will then analyze to characterize the functions unique to our sequenced genome.
@@ -10,20 +11,21 @@ Execute the following command to copy files for this morning’s exercises to yo
wd
-# or
+#or
-cd /scratch/micro612w17_fluxod/username
+cd /scratch/micro612w18_fluxod/username
-> Note: Check if you are in your home directory(/scratch/micro612w17_fluxod/username) by executing 'pwd' in terminal. 'pwd' stands for present working directory and it will display the directory you are in.
+> Note: Check if you are in your home directory(/scratch/micro612w18_fluxod/username) by executing 'pwd' in terminal. 'pwd' stands for present working directory and it will display the directory you are in.
pwd
> Note: Copy files for this morning's exercise in your home directory.
-cp -r /scratch/micro612w17_fluxod/shared/data/day2_morn ./
+cp -r /scratch/micro612w18_fluxod/shared/data/day2_morn ./
```
-## Genome Assembly using [Spades](http://bioinf.spbau.ru/spades) Pipeline
+Genome Assembly using [Spades](http://bioinf.spbau.ru/spades) Pipeline
+------------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_morning/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
@@ -33,7 +35,7 @@ There are a wide range of tools available for assembly of microbial genomes. The
Here we will use the Spades assembler with default parameters. Because genome assembly is a computationally intensive process, we will submit our assembly jobs to the cluster, and move ahead with some pre-assembled genomes, while your assemblies are running.
->i. Create directory to hold your assembly output.
+> ***i. Create directory to hold your assembly output.***
Create a new directory for the spades output in your day2_morn folder
@@ -42,9 +44,9 @@ Create a new directory for the spades output in your day2_morn folder
d2m
-# or
+#or
-cd /scratch/micro612w17_fluxod/username/day2_morn
+cd /scratch/micro612w18_fluxod/username/day2_morn
> We will create a new directory in day2_morn to save genome assembly results:
@@ -54,7 +56,7 @@ mkdir Rush_KPC_266_assembly_result
Now, we will use a genome assembly tool called Spades for assembling the reads.
->ii. Test out Spades to make sure its in your path
+> ***ii. Test out Spades to make sure it's in your path***
To make sure that your paths are set up correctly, try running Spades with the –h (help) flag, which should produce usage instruction.
@@ -65,14 +67,14 @@ spades.py -h
```
->iii. Submit a cluster job to assemble
+> ***iii. Submit a cluster job to assemble***
-Since it takes huge amount of memory and time to assemble genomes using spades, we will run a pbs script on cluster for this step.
+Since it takes a huge amount of memory and time to assemble genomes using spades, we will run a pbs script on the cluster for this step.
-Now, Open the spades.pbs file residing in day2_morning folder with nano and add the following spades command to the bottom of the file.
+Now, open the spades.pbs file residing in the day2_morning folder with nano and add the following spades command to the bottom of the file. Replace the EMAIL_ADDRESS in spades.pbs file with your actual email-address. This will make sure that whenever the job starts, aborts or ends, you will get an email notification.
```
-> Open spades.pbs file using nano:
+> Open the spades.pbs file using nano:
nano spades.pbs
@@ -84,27 +86,28 @@ spades.py --pe1-1 forward_paired.fq.gz --pe1-2 reverse_paired.fq.gz --pe1-s forw
```
->iv. Submit your job to the cluster with qsub
+> ***iv. Submit your job to the cluster with qsub***
```
qsub -V spades.pbs
```
->v. Verify that your job is in the queue with the qstat command
+> ***v. Verify that your job is in the queue with the qstat command***
```
qstat –u username
```
-## Assembly evaluation using [QUAST](http://bioinf.spbau.ru/quast)
+Assembly evaluation using [QUAST](http://bioinf.spbau.ru/quast)
+---------------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_morning/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
-The output of an assembler is a set of contigs (contiguous sequences), that are composed of the short reads that we fed in. Once we have an assembly we want to evaluate how good it is. This is somewhat qualitative, but there are some standard metrics that people use to quantify the quality of their assembly. Useful metrics include: i) number of contigs (the fewer the better), ii) N50 (the minimum contig size that at least 50% of your assembly belongs, the bigger the better). In general you want your assembly to be less than 200 contigs and have an N50 greater than 50 Kb, although these numbers of highly dependent on the properties of the assembled genome.
+The output of an assembler is a set of contigs (contiguous sequences), that are composed of the short reads that we fed in. Once we have an assembly we want to evaluate how good it is. This is somewhat qualitative, but there are some standard metrics that people use to quantify the quality of their assembly. Useful metrics include: i) number of contigs (the fewer the better), ii) N50 (the minimum contig size that at least 50% of your assembly belongs, the bigger the better). In general you want your assembly to be less than 200 contigs and have an N50 greater than 50 Kb, although these numbers are highly dependent on the properties of the assembled genome.
To evaluate some example assemblies we will use the tool quast. Quast produces a series of metrics describing the quality of your genome assemblies.
->i. Run quast on a set of previously generated assemblies
+> ***i. Run quast on a set of previously generated assemblies***
Now to check the example assemblies residing in your day2_morn folder, run the below quast command. Make sure you are in day2_morn folder in your home directory using 'pwd'
@@ -112,9 +115,9 @@ Now to check the example assemblies residing in your day2_morn folder, run the b
quast.py -o quast sample_264_contigs.fasta sample_266_contigs.fasta
```
-The command above will generate a report file in /scratch/micro612w17_fluxod/username/day2_morn/quast
+The command above will generate a report file in /scratch/micro612w18_fluxod/username/day2_morn/quast
->ii. Explore quast output
+> ***ii. Explore quast output***
QUAST creates output in different formats such as html, pdf and text. Now lets check the report.txt file residing in quast folder for assembly statistics. Open report.txt using nano.
@@ -122,34 +125,37 @@ QUAST creates output in different formats such as html, pdf and text. Now lets c
less quast/report.txt
```
-Check the difference between each assembly statistics. Also check different types of report it generated.
+Check the difference between the different assembly statistics. Also check the different types of report it generated.
-## Generating multiple sample reports using [multiqc](http://multiqc.info/)
+Generating multiple sample reports using [multiqc](http://multiqc.info/)
+--------------------------------------------------
![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day2_morning/multiqc.jpeg)
-Lets imagine a real life scenario where you are working on a project which requires you to analyze and process hundreds of samples. Having a few samples with extremely bad quality is a very commonplace. including these bad samples into your analysis without adjusting their quality threshold can have a profound effect on downstream analysis and interpretations.
+Let's imagine a real-life scenario where you are working on a project which requires you to analyze and process hundreds of samples. Having a few samples with extremely bad quality is very commonplace. Including these bad samples into your analysis without adjusting their quality threshold can have a profound effect on downstream analysis and interpretations.
-> Question How will you find those bad apples?
+- Question: How will you find those bad apples?
-Yesterday, we learned how to assess and control the quality of samples as well as screen for contaminants. But the problem with such tools or any other tools is, they work on per-sample basis and produce only single report/logs per sample. Therefore, it becomes cumbersome to dig through each sample reports and make appropriate quality control calls.
+Yesterday, we learned how to assess and control the quality of samples as well as screen for contaminants. But the problem with such tools or any other tools is, they work on per-sample basis and produce only single report/logs per sample. Therefore, it becomes cumbersome to dig through each sample's reports and make appropriate quality control calls.
-Thankfully, there is a tool called multiqc which parses the results directory containing output from various tools, reads the log report created by those tools (ex: FastQC, FastqScreen, Quast), aggregates them and create a single report summarizing all of these results so that you have everything in one place. This helps greatly in identifying the outliers and removing or reanalysizing it individually.
+Thankfully, there is a tool called multiqc which parses the results directory containing output from various tools, reads the log report created by those tools (ex: FastQC, FastqScreen, Quast), aggregates them and creates a single report summarizing all of these results so that you have everything in one place. This helps greatly in identifying the outliers and removing or reanalysizing it individually.
-Lets take a look at one such mutiqc report that was generated using FastQC results on C. difficile samples.
+Lets take a look at one such mutiqc report that was generated using FastQC results on *C. difficile* samples.
-Download the html report Cdiff_multiqc_report.html from your day2_morn folder
+Download the html report Cdiff_multiqc_report.html from your day2_morn folder.
```
-# Note: Make sure you change 'username' in the below command with your 'uniqname'.
+#Note: Make sure you change 'username' in the below command to your 'uniqname'.
-scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day2_morn/Cdiff_multiqc_report.html /path-to-local-directory/
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day2_morn/Cdiff_multiqc_report.html /path-to-local-directory/
```
-> Question: Open this report in a browser and try to find the outlier sample/s
-> Question: What is the most important parameter to look for while identifying contamination or bad samples?
-> Question: What is the overall quality of data?
+- Question: Open this report in a browser and try to find the outlier sample/s
+
+- Question: What is the most important parameter to look for while identifying contamination or bad samples?
+
+- Question: What is the overall quality of data?
Lets run multiqc on one such directory where we ran and stored FastQC, FastQ Screen and Quast reports.
@@ -158,74 +164,67 @@ if you are not in day2_morn folder, navigate to it and change directory to multi
```
d2m
-# or
+#or
-cd /scratch/micro612w17_fluxod/username/day2_morn/
+cd /scratch/micro612w18_fluxod/username/day2_morn/
cd multiqc_analysis
-# Try invoking multiqc
+#Load python and Try invoking multiqc
+
+module load python-anaconda2/latest
multiqc -h
-# Run multiqc on sample reports
+#Run multiqc on sample reports
multiqc ./ --force --filename workshop_multiqc
-# Check if workshop_multiqc.html report was generated
+#Check if workshop_multiqc.html report was generated
ls
-# Copy this report to your local system and open it in a browser for visual inspection
+#transfer this report to your local system and open it in a browser for visual inspection
-scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day2_morn/workshop_multiqc.html /path-to-local-directory/
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day2_morn/workshop_multiqc.html /path-to-local-directory/
```
-The report contains Assembly, Fastq Screen and FastQC report for a mixture of 51 organism sequence data. Sample names for Assembly statistics ends with "l500_contigs".
+The report contains the Assembly, Fastq Screen and FastQC report for a mixture of 51 organisms' sequence data. Sample names for Assembly statistics ends with "l500_contigs".
-> Question: Play around with General statistics table by sorting different columns. (click on a column header). To view just the assembly statistics, click on N50 column header. Which sample has the worst N50 value? what do you think must be the reason?
+- Question: Play around with the General statistics table by sorting different columns. (click on a column header). To view just the assembly statistics, click on the N50 column header. Which sample has the worst N50 value? What do you think must be the reason?
-> Question? Which two sample's genome length i.e column Length(Mbp) stand out from all the other genome lengths? What is their GC %? What about their FastQ Screen result?
+- Question: Which two sample's genome length i.e column Length (Mbp) stand out from all the other genome lengths? What is their GC %? What about their FastQ Screen result?
-> Question? What about Number of Contigs section? Are you getting reasonable number of contigs or is there any bad assembly?
+- Question: What about Number of Contigs section? Are you getting reasonable number of contigs or is there any bad assembly?
-> Question? Any sample's quality stand from the rest of the bunch?
+- Question: Any sample's quality stand out from the rest of the bunch?
-## Compare assembly to reference genome and post-assembly genome improvement
+Compare assembly to reference genome and post-assembly genome improvement
+-------------------------------------------------------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_morning/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
-Now that we feel confident in our assembly, lets compare it to our reference to see if we can identify any large insertions/deletions using a graphical user interface called Artemis Comparison Tool (ACT) for visualization.
+Now that we feel confident in our assembly, let's compare it to our reference to see if we can identify any large insertions/deletions using a graphical user interface called Artemis Comparison Tool (ACT) for visualization.
+
+
+In order to simplify the comparison between assembly and reference, we first need to orient the order of the contigs to reference.
-iv. Run abacas to orient contigs to reference
+> ***i. Run abacas to orient contigs to the reference***
To orient our contigs relative to the reference we will use a tool called abacas. [ABACAS](http://www.sanger.ac.uk/science/tools/pagit) aligns contigs to a reference genome and then stitches them together to form a “pseudo-chromosome”.
@@ -286,13 +270,11 @@ Go back to flux and into the directory where the assembly is located.
```
d2m
-# or
+#or
-cd /scratch/micro612w17_fluxod/username/day2_morn/
+cd /scratch/micro612w18_fluxod/username/day2_morn/
```
-
-
Now, we will run abacas using these input parameters:
1) your reference sequence (-r KPNIH.fasta),
@@ -312,9 +294,7 @@ Now, we will run abacas using these input parameters:
Check if abacas can be properly invoked:
```
-
abacas.1.3.1.pl -h
-
```
Run abacas on assembly:
@@ -323,18 +303,18 @@ Run abacas on assembly:
abacas.1.3.1.pl -r KPNIH1.fasta -q sample_266_contigs.fasta -p nucmer -b -d -a -o sample_266_contigs_ordered
```
-v. Use ACT to view contig alignment to reference genome
+> ***ii. Use ACT to view contig alignment to reference genome***
-> Use scp to get ordered fasta sequence and .cruch file onto your laptop
+- Use scp to get ordered fasta sequence and .cruch file onto your laptop
```
> Dont forget to change username and /path-to-local-ACT_contig_comparison-directory/ in the below command
-scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day2_morn/sample_266_contigs_ordered* /path-to-previously-created-local-ACT_contig_comparison-directory/
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day2_morn/sample_266_contigs_ordered* /path-to-previously-created-local-ACT_contig_comparison-directory/
```
-> Read files into ACT
+- Read files into ACT
```
Go to File on top left corner of ACT window -> open
@@ -344,36 +324,37 @@ Sequence file 2 = sample_266_contigs_ordered.fasta
Click Apply button
-> Dont close the ACT window
+Dont close the ACT window
```
-> Notice that the alignment is totally beautiful now!!! Scan through the alignment and play with ACT features to look at genes present in reference but not in assembly. Keep the ACT window open for further visualizations.
+- Notice that the alignment is totally beautiful now!!! Scan through the alignment and play with ACT features to look at genes present in reference but not in assembly. Keep the ACT window open for further visualizations.
![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day2_morning/beautiful.png)
-## Map reads to the final ordered assembly
+Map reads to the final ordered assembly
+---------------------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_morning/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
You already know the drill/steps involved in reads mapping. Here, we will map the reads to the final ordered assembly genome instead of KPNIH1.fasta.
-First create bwa index of ordered fasta file.
+- First create a bwa index of the ordered fasta file.
```
> Only proceed further if everything worked uptil now. Make sure you are in day2_morn directory.
d2m
-# or
+#or
-cd /scratch/micro612w17_fluxod/username/day2_morn/
+cd /scratch/micro612w18_fluxod/username/day2_morn/
bwa index sample_266_contigs_ordered.fasta
samtools faidx sample_266_contigs_ordered.fasta
```
-Align the trimmed reads which we used for genome assembly to this ordered assembly using BWA mem. Convert SAM to BAM. Sort and index it.
+- Align the trimmed reads which we used for genome assembly to this ordered assembly using BWA mem. Convert SAM to BAM. Sort and index it.
```
@@ -387,13 +368,14 @@ samtools index sample_266_contigs_ordered_sort.bam
```
-Lets visualize the alignments against our ordered assembly.
+- Lets visualize the alignments against our ordered assembly.
+
Copy this sorted and indexed BAM files to local ACT_contig_comparison directory.
```
> Dont forget to change username and /path-to-local-ACT_contig_comparison-directory/ in the below command
-scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day2_morn/sample_266_contigs_ordered_sort* /path-to-previously-created-local-ACT_contig_comparison-directory/
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day2_morn/sample_266_contigs_ordered_sort* /path-to-previously-created-local-ACT_contig_comparison-directory/
```
@@ -405,16 +387,21 @@ Select File -> sample_266_contigs_ordered.fasta -> Read BAM/VCF > select sorted
![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/day2_morning/aligned_reads_deletion.png)
+Using abacas and ACT to compare VRE/VSE genome
+----------------------------------------------
-## Genome Annotation
+Now that we learned how ACT can be used to explore and compare genome organization and differences, try comparing VSE_ERR374928_contigs.fasta, a Vancomycin-susceptible Enterococcus against a Vancomycin-resistant Enterococcus reference genome Efaecium_Aus0085.fasta that are placed in VRE_vanB_comparison folder under day2_morn directory. The relevant reference genbank file that can be used in ACT is Efaecium_Aus0085.gbf.
+
+Genome Annotation
+-----------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_morning/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
**Identify protein-coding genes with [Prokka](http://www.vicbioinformatics.com/software.prokka.shtml)**
-From our ACT comparison of our assembly and the reference we can clearly see that there is unique sequence in our assembly. However, we still don’t know what that sequence encodes! To try to get some insight into the sorts of genes unique to our assembly we will run a genome annotation pipeline called Prokka. Prokka works by first running denovo gene prediction algorithms to identify protein coding genes and tRNA genes. Next, for protein coding genes Prokka runs a series of comparisons against databases of annotated genes to generate putative annotations for your genome.
+From our ACT comparison of our assembly and the reference we can clearly see that there is unique sequence in our assembly. However, we still don’t know what that sequence encodes! To try to get some insight into the sorts of genes unique to our assembly we will run a genome annotation pipeline called Prokka. Prokka works by first running *de novo* gene prediction algorithms to identify protein coding genes and tRNA genes. Next, for protein coding genes Prokka runs a series of comparisons against databases of annotated genes to generate putative annotations for your genome.
->i. Run Prokka on assembly
+> ***i. Run Prokka on assembly***
```
prokka –setupdb
@@ -427,23 +414,21 @@ Execute Prokka on your ordered assembly
d2m
-# or
+#or
-cd /scratch/micro612w17_fluxod/username/day2_morn/
+cd /scratch/micro612w18_fluxod/username/day2_morn/
mkdir sample_266_prokka
-> Dont forget to change username in the below command
-
prokka -kingdom Bacteria -outdir sample_266_prokka -force -prefix sample_266 sample_266_contigs_ordered.fasta
-> Use scp to get Prokka annotated genome on your laptop.
+> Use scp or cyberduck to get Prokka annotated genome on your laptop. Dont forget to change username in the below command
-scp -r username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day2_morn/sample_266_prokka/ /path-to-local-ACT_contig_comparison-directory/
+scp -r username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day2_morn/sample_266_prokka/ /path-to-local-ACT_contig_comparison-directory/
```
->ii. Reload comparison into ACT now that we’ve annotated the un-annotated!
+> ***ii. Reload comparison into ACT now that we’ve annotated the un-annotated!***
Read files into ACT
@@ -454,4 +439,4 @@ Comparison file 1 = sample_266_contigs_ordered.crunch
Sequence file 2 = sample_266_contigs_ordered.gbf
```
->Play around with ACT to see what types of genes are unique to sample 266!!!
+- Play around with ACT to see what types of genes are unique to sample 266!!!
diff --git a/day3_afternoon/README.md b/day3_afternoon/README.md
index fe15114..e9f3b0c 100644
--- a/day3_afternoon/README.md
+++ b/day3_afternoon/README.md
@@ -1,7 +1,9 @@
-# Day 3 Afternoon
+Day 3 Afternoon
+===============
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
-## Klebsiella pneumoniae comparative genomic analysis
+Klebsiella pneumoniae comparative genomic analysis
+--------------------------------------------------
To finish up the workshop we are going to go through the process of working up a complete dataset, from start to finish. This set of genomes originated from a regional outbreak of bla-KPC carrying Klebsiella pneumoniae – one of the most concerning healthcare associated pathogens.
The goal is to follow up on a previously [published](http://cid.oxfordjournals.org/content/53/6/532.abstract) epidemiologic analysis, and see if genomics supports prior epidemiologic conclusions and can provide additional insights.
@@ -25,20 +27,25 @@ Execute the following command to copy files for this afternoon’s exercises to
```
-cd /scratch/micro612w17_fluxod/username
+cd /scratch/micro612w18_fluxod/username
-cp -r /scratch/micro612w17_fluxod/shared/data/day3_after ./
+or
+
+wd
+
+cp -r /scratch/micro612w18_fluxod/shared/data/day3_after ./
```
-## Perform QC on fastq files
+Perform QC on fastq files
+-------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_afternoon/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
On the first morning you ran FastQC to evaluate the quality of a single genome. However, a typical project will include many genomes and you will want to check the quality of all of your samples. From the bash workshop, I hope you can appreciate that you do not want to process 100 genomes by typing 100 commands – rather you want to write a short shell script to do the work for you!
->i. Edit the shell script fastqc.sh located in /scratch/micro612w17_fluxod/your username/day3_after to run FastQC on all fastq files.
+> ***i. Edit the shell script fastqc.sh located in /scratch/micro612w18_fluxod/your username/day3_after to run FastQC on all fastq files.***
**Important info about this shell script**
- The shell script includes a for loop that loops over all of the genomes in the target directory
@@ -53,7 +60,7 @@ On the first morning you ran FastQC to evaluate the quality of a single genome.
The fastq files are located in:
```
-/scratch/micro612w17_fluxod/shared/data/day3_after_fastq/
+/scratch/micro612w18_fluxod/shared/data/day3_after_fastq/
```
Rather than copying these to your directory, analyze the files directly in that directory, so everyone doesn’t have to copy 25G to their home directories.
@@ -62,14 +69,15 @@ Copy and paste commands to run fastqc.sh as PBS script, into a PBS script and su
Your PBS script wil contain the following command after the PBS preamble stuff(Make sure your $PBS_O_WORKDIR is set inside the pbs script):
-```bash fastqc.sh /scratch/micro612w17_fluxod/shared/data/day3_after_fastq/ ```
+```bash fastqc.sh /scratch/micro612w18_fluxod/shared/data/day3_after_fastq/ ```
->ii. Examine output of FastQC to verify that all samples are OK
+> ***ii. Examine output of FastQC to verify that all samples are OK***
Check the multiqc report of your fastq files.
-## Examine results of [SPANDx](http://www.ncbi.nlm.nih.gov/pubmed/25201145) pipeline
+Examine results of [SPANDx](http://www.ncbi.nlm.nih.gov/pubmed/25201145) pipeline
+---------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_afternoon/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
@@ -77,9 +85,13 @@ On the afternoon of day 1 we saw how many steps are involved in calling variants
More information on SPANDx pipeline can be obtained from [this](https://sourceforge.net/projects/spandx/files/SPANDx%20Manual_v3.1.pdf/download) manual.
+A snapshot of the pipeline is shown below:
+
+![alt tag](https://github.com/alipirani88/Comparative_Genomics/blob/master/_img/spandx.jpg)
+
Because it takes a while to run, we have pre-run it for you. Your task will be to sort through the outputs of SPANDx. The detailed information about how to interpret the output is in SPANDx manual(section INTERPRETING THE OUTPUTS).
->i. Look at overall statistics for variant calling in excel
+> ***i. Look at overall statistics for variant calling in excel***
SPANDx produces an overall summary file of its run that includes:
@@ -96,25 +108,26 @@ Use less to look at this file and then apply unix commands to extract and sort i
**HINTS**
The following unix commands can be used to get sorted lists of coverage and numbers of SNPs/indels: tail, cut, sort
->ii. Look at filtered variants produced by SPANDx in excel
+> ***ii. Look at filtered variants produced by SPANDx in excel***
SPANDx also produces a summary file of the variants/indels it identified in the core genome.
This summary file is:
-```/scratch/micro612w17_fluxod/username/day3_after/SPANDx_output/Outputs/All_SNPs_annotated.txt ```
+```/scratch/micro612w18_fluxod/username/day3_after/SPANDx_output/Outputs/All_SNPs_annotated.txt ```
-Use sftp to download this file and view in excel
+Use cyberduck/scp to download this file and view in excel
- View SPANDx manual for interpretation of different columns which can be found [here](https://sourceforge.net/projects/spandx/files/SPANDx%20Manual_v3.1.pdf/download)
- Back on Flux, use grep to pull SNPs that have HIGH impact
- What types of mutations are predicted to have “HIGH” impact?
- How many genomes do these HIGH impact mutations tend to be present in? How do you interpret this?
-## Recombination detection and tree generation
+Recombination detection and tree generation
+-------------------------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_afternoon/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
->i. Plot the distribution of variants across the genome in R
+> ***i. Plot the distribution of variants across the genome in R***
The positions of variants are embedded in the first column of Outputs/Comparative/All_SNPs_annotated.txt, but you have to do some work to isolate them!
@@ -129,11 +142,11 @@ The positions of variants are embedded in the first column of Outputs/Comparativ
- Finally, download this file, read it into R using ‘read.table’ and use ‘hist’ to plot a histogram of the positions
- Do you observe clustering of variants that would be indicative of recombination?
->ii. Create fasta file of variants from nexus file
+> ***ii. Create fasta file of variants from nexus file***
SPANDx creates a file of core SNPs in a slightly odd format (transposed nexus).
This file is called:
-```/scratch/micro612w17_fluxod/username/day3_after/SPANDx_output/Outputs/Comparative/Ortho_SNP_matrix.nex ```
+```/scratch/micro612w18_fluxod/username/day3_after/SPANDx_output/Outputs/Comparative/Ortho_SNP_matrix.nex ```
For convenience, apply the custom perl script located in the same directory to convert it to fasta format
@@ -143,7 +156,7 @@ perl transpose_nex_to_fasta.pl Ortho_SNP_matrix.nex
This file Outputs/Comparative/Ortho_SNP_matrix.fasta should now exist
->iii. Create maximum likelihood tree in Seaview
+> ***iii. Create maximum likelihood tree in Seaview***
```
@@ -153,11 +166,12 @@ Save tree for later analysis
```
-## Phylogenetic tree annotation and visualization
+Phylogenetic tree annotation and visualization
+----------------------------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_afternoon/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
->i. Load the maximum likelihood tree into iTOL
+> ***i. Load the maximum likelihood tree into iTOL***
Note that because the out-group is so distantly related it is difficult to make out the structure of the rest of the tree.
@@ -166,7 +180,7 @@ Note that because the out-group is so distantly related it is difficult to make
- Click on the KPNIH1 leaf, go to the “tree structure” menu and “delete leaf”
- Click on the extended branch leading to where KPNIH1 was, go to the “tree structure” menu and click “collapse branch”
->ii. Load the annotation file ‘Rush_KPC_facility_codes_iTOL.txt’ to view the facility of isolation, play with tree visualization properties to understand how isolates group by facility, Circular vs. normal tree layout, Bootstrap values, Ignoring branch lengths
+> ***ii. Load the annotation file ‘Rush_KPC_facility_codes_iTOL.txt’ to view the facility of isolation, play with tree visualization properties to understand how isolates group by facility, Circular vs. normal tree layout, Bootstrap values, Ignoring branch lengths***
```
@@ -175,11 +189,12 @@ Which patient’s infections might have originated from the blue facility?
```
-## Assessment of genomic deletions
+Assessment of genomic deletions
+-------------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_afternoon/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
->i. Download genome coverage bed file and load into R
+> ***i. Download genome coverage bed file and load into R***
This file is located in: Outputs/Comparative/Bedcov_merge.txt
This file contains information regarding locations in the reference genome that each sequenced genome does and does not map to.
@@ -201,14 +216,14 @@ After you download this file, read it into R
**HINTS**
- Use the read.table function with the relevant parameters being: header and sep
->ii. Plot heatmap of genome coverage bed file
+> ***ii. Plot heatmap of genome coverage bed file***
**HINTS**
- The first 3 columns of the bed file specify the name of the chromosome and the genome coordinates – therefore you want to subset your matrix to not include these columns
- Use the heatmap3 function to make your heatmap with the following parameters: scale = “none” (keeps original values), Rowv = NA (suppress clustering by rows – why might we not want to cluster by rows for this analysis?)
-> Note a large genomic deletion among a subset of isolates. Does this deletion fit with the phylogeny from above?
+- Note a large genomic deletion among a subset of isolates. Does this deletion fit with the phylogeny from above?
iii. Explore genomic deletion in more detail with ACT
diff --git a/day3_morning/README.md b/day3_morning/README.md
index 485cb07..0daf316 100644
--- a/day3_morning/README.md
+++ b/day3_morning/README.md
@@ -1,4 +1,5 @@
-# Day 3 Morning
+Day 3 Morning
+=============
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
On day 1, we ran through a pipeline to map reads against a reference genome and call variants, but didn’t do much with the variants we identified. Among the most common analyses to perform on a set of variants is to construct phylogenetic trees. Here we will explore different tools for generating and visualizing phylogenetic trees, and also see how recombination can distort phylogenetic signal.
@@ -20,23 +21,24 @@ Execute the following command to copy files for this afternoon’s exercises to
```
wd
-# or
+#or
-cd /scratch/micro612w17_fluxod/username
+cd /scratch/micro612w18_fluxod/username
-cp -r /scratch/micro612w17_fluxod/shared/data/day3_morn ./
+cp -r /scratch/micro612w18_fluxod/shared/data/day3_morn ./
```
-## Perform whole genome alignment with [Mauve](http://darlinglab.org/mauve/mauve.html) and convert alignment to other useful formats
+Perform whole genome alignment with [Mauve](http://darlinglab.org/mauve/mauve.html) and convert alignment to other useful formats
+-------------------------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_morning/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
An alternative approach for identification of variants among genomes is to perform whole genome alignments of assemblies. If the original short read data is unavailable, this might be the only approach available to you. Typically, these programs don’t scale well to large numbers of genomes (e.g. > 100), but they are worth being familiar with. We will use the tool mauve for constructing whole genome alignments of our five A. baumannii genomes.
->i. Perform mauve alignment and transfer xmfa back to flux
+> ***i. Perform mauve alignment and transfer xmfa back to flux***
-Use sftp to get genomes onto your laptop
+Use cyberduck/scp to get genomes folder Abau_genomes onto your laptop
```
Run these commands on your local system/terminal:
@@ -47,15 +49,9 @@ mkdir Abau_mauve
cd Abau_mauve
-> Now copy Abau_genomes folder residing in your day3_morn folder using scp or sftp:
+- Now copy Abau_genomes folder residing in your day3_morn folder using scp or cyberduck:
-scp -r username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day3_morn/Abau_genomes ./
-
-OR
-
-sftp –r username@flux-login.arc-ts.umich.edu
-cd /scratch/micro612w17_fluxod/username/day3_morn
-get Abau_genomes
+scp -r username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day3_morn/Abau_genomes ./
```
@@ -72,28 +68,20 @@ vi. Wait for Mauve to finish and explore the graphical interface
```
-Use sftp or scp to transfer your alignment back to flux for some processing
+Use cyberduck or scp to transfer your alignment back to flux for some processing
```
-cd ~/Desktop/Abau_mauve
-sftp –r username@flux-login.arc-ts.umich.edu
-cd /scratch/micro612w17_fluxod/username/day3_morn
-put mauve_ECII_outgroup
-
-OR
-
-scp ~/Desktop/Abau_mauve/mauve_ECII_outgroup username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day3_morn
+scp ~/Desktop/Abau_mauve/mauve_ECII_outgroup username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day3_morn
```
->ii. Convert alignment to fasta format
+> ***ii. Convert alignment to fasta format***
Mauve produces alignments in .xmfa format (use less to see what this looks like), which is not compatible with other programs we want to use. We will use a custom script convert_msa_format.pl to change the alignment format to fasta format
-
-```
+```
Now run these command in day3_morn folder on flux:
module load bioperl
@@ -102,7 +90,8 @@ perl convert_msa_format.pl -i mauve_ECII_outgroup -o mauve_ECII_outgroup.fasta -
```
-## Perform some DNA sequence comparisons and phylogenetic analysis in [APE](http://ape-package.ird.fr/), an R package
+Perform some DNA sequence comparisons and phylogenetic analysis in [APE](http://ape-package.ird.fr/), an R package
+------------------------------------------------------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_morning/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
@@ -110,36 +99,32 @@ There are lots of options for phylogenetic analysis. Here, we will use the ape p
Note that ape has a ton of useful functions for more sophisticated phylogenetic analyses!
->i. Get fasta alignment you just converted to your own computer using sftp or scp
+> ***i. Get fasta alignment you just converted to your own computer using cyberduck or scp***
```
cd ~/Desktop/Abau_mauve
-sftp –r username@flux-login.arc-ts.umich.edu
-cd /scratch/micro612w17_fluxod/username/day3_morn
-get mauve_ECII_outgroup.fasta
-
-OR
-scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day3_morn/mauve_ECII_outgroup.fasta ./
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day3_morn/mauve_ECII_outgroup.fasta ./
```
-ii. Read alignment into R
+> ***ii. Read alignment into R***
-Fire up RStudio and install/load ape
+Fire up RStudio, set your working directory to ~/Desktop/Abau_mauve/ or wherever you have downloaded mauve_ECII_outgroup.fasta file and install/load ape
Use the read.dna function in ape to read in you multiple alignments.
Print out the variable to get a summary.
```
+setwd("~/Desktop/Abau_mauve/")
install.packages("ape")
library(ape)
abau_msa = read.dna('mauve_ECII_outgroup.fasta', format = "fasta")
```
->iii. Get variable positions
+> ***iii. Get variable positions***
The DNA object created by read.dna can also be addressed as a matrix, where the columns are positions in the alignment and rows are your sequences. We will next treat our alignment as a matrix, and use apply and colSums to get positions in the alignment that vary among our sequences. Examine these commands in detail to understand how they are working together to give you a logical vector indicating which positions vary in your alignment.
@@ -150,7 +135,7 @@ abau_msa_bin = apply(abau_msa, 2, FUN = function(x){x == x[1]})
abau_var_pos = colSums(abau_msa_bin) < 5
```
->iv. Get non-gap positions
+> ***iv. Get non-gap positions***
For our phylogenetic analysis we want to focus on the core genome, so we will next identify positions in the alignment where all our genomes have sequence.
@@ -158,7 +143,7 @@ For our phylogenetic analysis we want to focus on the core genome, so we will ne
non_gap_pos = colSums(as.character(abau_msa) == '-') == 0
```
->v. Count number of variants between sequences
+> ***v. Count number of variants between sequences***
Now that we know which positions in the alignment are core and variable, we can extract these positions and count how many variants there are among our genomes. Do count pairwise variants we will use the dist.dna function in ape. The model parameter indicates that we want to compare sequences by counting differences. Print out the resulting matrix to see how different our genomes are.
@@ -169,7 +154,7 @@ var_count_matrix = dist.dna(abau_msa_var, model = "N")
```
->vi. Construct phylogenetic tree
+> ***vi. Construct phylogenetic tree***
Now we are ready to construct our first phylogenetic tree!
@@ -193,17 +178,18 @@ Finally, plot your tree to see how the genomes group.
plot(abau_nj_tree)
```
-## Perform SNP density analysis to discern evidence of recombination
+Perform SNP density analysis to discern evidence of recombination
+-----------------------------------------------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_morning/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
An often-overlooked aspect of a proper phylogenetic analysis is to exclude recombinant sequences. Homologous recombination in bacterial genomes is a mode of horizontal transfer, wherein genomic DNA is taken up and swapped in for a homologous sequence. The reason it is critical to account for these recombinant regions is that these horizontally acquired sequences do not represent the phylogenetic history of the strain of interest, but rather in contains information regarding the strain in which the sequence was acquired from. One simple approach for detecting the presence of recombination is to look at the density of variants across a genome. The existence of unusually high or low densities of variants is suggestive that these regions of aberrant density were horizontally acquired. Here we will look at our closely related A. baumannii genomes to see if there is evidence of aberrant variant densities.
->i. Subset sequences to exclude the out-group
+> ***i. Subset sequences to exclude the out-group***
For this analysis we want to exclude the out-group, because we are interested in determining whether recombination would hamper our ability to reconstruct the phylogenetic relationship among our closely related set of genomes.
->Note that the names of the sequences might be different for you, so check that if the command doesn’t work.
+- Note that the names of the sequences might be different for you, so check that if the command doesn’t work.
```
@@ -211,7 +197,7 @@ abau_msa_no_outgroup = abau_msa[c('ACICU_genome','AbauA_genome','AbauC_genome','
```
->ii. Get variable positions
+> ***ii. Get variable positions***
Next, we will get the variable positions, as before
@@ -223,7 +209,7 @@ abau_no_outgroup_var_pos = colSums(abau_msa_no_outgroup_bin) < 4
```
->iii. Get non-gap positions
+> ***iii. Get non-gap positions***
Next, we will get the core positions, as before
@@ -233,7 +219,7 @@ abau_no_outgroup_non_gap_pos = colSums(as.character(abau_msa_no_outgroup) == '-'
```
->iv. Create overall histogram of SNP density
+> ***iv. Create overall histogram of SNP density***
Finally, create a histogram of SNP density across the genome. Does the density look even, or do you think there might be just a touch of recombination?
@@ -241,25 +227,25 @@ Finally, create a histogram of SNP density across the genome. Does the density l
hist(which(abau_no_outgroup_var_pos & abau_no_outgroup_non_gap_pos), 10000)
```
-## Perform recombination filtering with [Gubbins](https://www.google.com/search?q=gubbins+sanger&ie=utf-8&oe=utf-8)
+Perform recombination filtering with [Gubbins](https://www.google.com/search?q=gubbins+sanger&ie=utf-8&oe=utf-8)
+----------------------------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_morning/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
Now that we know there is recombination, we know that we need to filter out the recombinant regions to discern the true phylogenetic relationship among our strains. In fact, this is such an extreme case (~99% of variants of recombinant), that we could be totally misled without filtering recombinant regions. To accomplish this we will use the tool gubbins, which essentially relies on elevated regions of variant density to perform recombination filtering.
->i. Run gubbins on your fasta alignment
+> ***i. Run gubbins on your fasta alignment***
Go back on flux and load modules required by gubbins
-
-
-```
-Check if gubbins run after loading newer version flux modules
+
+
+```
-Newer version:
-module load python-anaconda2/201607 biopython dendropy reportlab fasttree RAxML fastml/gub gubbins
+module load bioperl python-anaconda2/201607 biopython dendropy reportlab fasttree RAxML fastml/gub gubbins
```
@@ -268,15 +254,15 @@ Run gubbins on your fasta formatted alignment
```
d3m
-# or
+#or
-cd /scratch/micro612w17_fluxod/username/day3_morn
+cd /scratch/micro612w18_fluxod/username/day3_morn
run_gubbins.py -v -f 50 -o Abau_AB0057_genome mauve_ECII_outgroup.fasta
```
->ii. Create gubbins output figure
+> ***ii. Create gubbins output figure***
Gubbins produces a series of output files, some of which can be run through another program to produce a visual display of filtered recombinant regions. Run the gubbins_drawer.py script to create a pdf visualization of recombinant regions.
@@ -293,23 +279,16 @@ The inputs are:
gubbins_drawer.py -t mauve_ECII_outgroup.final_tree.tre -o mauve_ECII_outgroup.recombination.pdf mauve_ECII_outgroup.recombination_predictions.embl
```
->iii. Download and view gubbins figure and filtered tree
+> ***iii. Download and view gubbins figure and filtered tree***
-Use sftp or scp to get gubbins output files into Abau_mauve on your local system
+Use cyberduck or scp to get gubbins output files into Abau_mauve on your local system
```
cd ~/Desktop/Abau_mauve
-sftp –r username@flux-login.arc-ts.umich.edu
-cd /scratch/micro612w17_fluxod/username/day3_morn
-get mauve_ECII_outgroup.recombination.pdf
-get mauve_ECII_outgroup.final_tree.tre
-
-OR
-
-scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day3_morn/mauve_ECII_outgroup.recombination.pdf ./
-scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day3_morn/mauve_ECII_outgroup.final_tree.tre ./
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day3_morn/mauve_ECII_outgroup.recombination.pdf ./
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day3_morn/mauve_ECII_outgroup.final_tree.tre ./
```
@@ -337,17 +316,18 @@ To view sub-tree of interest click on “sub-tree” and select the sub-tree exc
How does the structure look different than the unfiltered tree?
-> Note that turning back to the backstory of these isolates, Abau_B and Abau_C were both isolated first from the same patient. So this analysis supports that patient having imported both strains, which likely diverged at a prior hospital at which they resided.
+- Note that turning back to the backstory of these isolates, Abau_B and Abau_C were both isolated first from the same patient. So this analysis supports that patient having imported both strains, which likely diverged at a prior hospital at which they resided.
-## Create annotated publication quality trees with [iTOL](http://itol.embl.de/)
+Create annotated publication quality trees with [iTOL](http://itol.embl.de/)
+------------------------------------------------------
[[back to top]](https://github.com/alipirani88/Comparative_Genomics/blob/master/day3_morning/README.md)
[[HOME]](https://github.com/alipirani88/Comparative_Genomics/blob/master/README.md)
For the final exercise we will use a different dataset, composed of USA300 methicillin-resistant Staphylococcus aureus genomes. USA300 is a strain of growing concern, as it has been observed to cause infections in both hospitals and in otherwise healthy individuals in the community. An open question is whether there are sub-clades of USA300 in the hospital and the community, or if they are all the same. Here you will create an annotated phylogenetic tree of strains from the community and the hospital, to discern if these form distinct clusters.
->i. Download MRSA genome alignment from flux
+> ***i. Download MRSA genome alignment from flux***
-Use sftp or scp to get genomes onto your laptop
+Use cyberduck or scp to get genomes onto your laptop
```
@@ -355,20 +335,13 @@ cd ~/Desktop (or wherever your desktop is)
mkdir MRSA_genomes
cd MRSA_genomes
-sftp –r username@flux-login.arc-ts.umich.edu
-cd /scratch/micro612w17_fluxod/username/day3_morn
-get 2016-3-9_KP_BSI_USA300.fa
-get 2016-3-9_KP_BSI_USA300_iTOL_HA_vs_CA.txt
-
-OR
-
-scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day3_morn/2016-3-9_KP_BSI_USA300.fa ./
-scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w17_fluxod/username/day3_morn/2016-3-9_KP_BSI_USA300_iTOL_HA_vs_CA.txt ./
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day3_morn/2016-3-9_KP_BSI_USA300.fa ./
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day3_morn/2016-3-9_KP_BSI_USA300_iTOL_HA_vs_CA.txt ./
```
->ii. Look at SNP density for MRSA alignment in R
+> ***ii. Look at SNP density for MRSA alignment in R***
Before we embark on our phylogenetic analysis, lets look at the SNP density to verify that there is no recombination
@@ -383,7 +356,7 @@ hist(which(mrsa_var_pos), 10000)
Does it look like there is evidence of recombination?
->iii. Create fasta alignment with only variable positions
+> ***iii. Create fasta alignment with only variable positions***
Next, lets create a new fasta alignment file containing only the variant positions, as this will be easier to deal with in Seaview
@@ -393,7 +366,7 @@ write.dna(mrsa_msa[, mrsa_var_pos], file = '2016-3-9_KP_BSI_USA300_var_pos.fa',
```
->iv. Read alignment into Seaview and construct Neighbor Joining tree
+> ***iv. Read alignment into Seaview and construct Neighbor Joining tree***
In the previous exercise, we used Seaview to look at a pre-existing tree, here we will use Seaview to create a tree from a
multiple sequence alignment
@@ -418,7 +391,7 @@ File -> Save rooted tree
Note that in your research it is not a good idea to use these phylogenetic tools completely blind and I strongly encourage embarking on deeper learning yourself, or consulting with an expert before doing an analysis for a publication
-v. Read tree into iTOL
+> ***v. Read tree into iTOL***
```
@@ -434,9 +407,9 @@ Explore different visualization options for your tree (e.g. make it circular, sh
Note that you can always reset your tree if you are unhappy with the changes you’ve made
->vi. Add annotations to tree
+> ***vi. Add annotations to tree***
One of the most powerful features of iTOL is its ability to overlay diverse types of descriptive meta-data on your tree (http://itol.embl.de/help.cgi#datasets). Here, we will overlay our data on whether an isolate was from a community or hospital infection. To do this simply drag-and-drop the annotation file (2016-3-9_KP_BSI_USA300_iTOL_HA_vs_CA.txt) on your tree and voila!
-> Do community and hospital isolates cluster together, or are they inter-mixed?
+- Do community and hospital isolates cluster together, or are they inter-mixed?
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..7c0845a
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,192 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS =
+SPHINXBUILD = /nfs/esnitkin/bin_group/anaconda2/bin/sphinx-build
+PAPER =
+BUILDDIR = build
+
+# User-friendly check for sphinx-build
+ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
+$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
+endif
+
+# Internal variables.
+PAPEROPT_a4 = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
+
+help:
+ @echo "Please use \`make ' where is one of"
+ @echo " html to make standalone HTML files"
+ @echo " dirhtml to make HTML files named index.html in directories"
+ @echo " singlehtml to make a single large HTML file"
+ @echo " pickle to make pickle files"
+ @echo " json to make JSON files"
+ @echo " htmlhelp to make HTML files and a HTML help project"
+ @echo " qthelp to make HTML files and a qthelp project"
+ @echo " applehelp to make an Apple Help Book"
+ @echo " devhelp to make HTML files and a Devhelp project"
+ @echo " epub to make an epub"
+ @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+ @echo " latexpdf to make LaTeX files and run them through pdflatex"
+ @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
+ @echo " text to make text files"
+ @echo " man to make manual pages"
+ @echo " texinfo to make Texinfo files"
+ @echo " info to make Texinfo files and run them through makeinfo"
+ @echo " gettext to make PO message catalogs"
+ @echo " changes to make an overview of all changed/added/deprecated items"
+ @echo " xml to make Docutils-native XML files"
+ @echo " pseudoxml to make pseudoxml-XML files for display purposes"
+ @echo " linkcheck to check all external links for integrity"
+ @echo " doctest to run all doctests embedded in the documentation (if enabled)"
+ @echo " coverage to run coverage check of the documentation (if enabled)"
+
+clean:
+ rm -rf $(BUILDDIR)/*
+
+html:
+ $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+ $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+ $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+ @echo
+ @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+ $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+ @echo
+ @echo "Build finished; now you can process the pickle files."
+
+json:
+ $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+ @echo
+ @echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+ $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+ @echo
+ @echo "Build finished; now you can run HTML Help Workshop with the" \
+ ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+ $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+ @echo
+ @echo "Build finished; now you can run "qcollectiongenerator" with the" \
+ ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+ @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Micro612genomicsworkshop.qhcp"
+ @echo "To view the help file:"
+ @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Micro612genomicsworkshop.qhc"
+
+applehelp:
+ $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
+ @echo
+ @echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
+ @echo "N.B. You won't be able to view it unless you put it in" \
+ "~/Library/Documentation/Help or install it in your application" \
+ "bundle."
+
+devhelp:
+ $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+ @echo
+ @echo "Build finished."
+ @echo "To view the help file:"
+ @echo "# mkdir -p $$HOME/.local/share/devhelp/Micro612genomicsworkshop"
+ @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Micro612genomicsworkshop"
+ @echo "# devhelp"
+
+epub:
+ $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+ @echo
+ @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo
+ @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+ @echo "Run \`make' in that directory to run these through (pdf)latex" \
+ "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo "Running LaTeX files through pdflatex..."
+ $(MAKE) -C $(BUILDDIR)/latex all-pdf
+ @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+latexpdfja:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo "Running LaTeX files through platex and dvipdfmx..."
+ $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
+ @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+ $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+ @echo
+ @echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+ $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+ @echo
+ @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+ $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+ @echo
+ @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+ @echo "Run \`make' in that directory to run these through makeinfo" \
+ "(use \`make info' here to do that automatically)."
+
+info:
+ $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+ @echo "Running Texinfo files through makeinfo..."
+ make -C $(BUILDDIR)/texinfo info
+ @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+ $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+ @echo
+ @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+ $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+ @echo
+ @echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+ $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+ @echo
+ @echo "Link check complete; look for any errors in the above output " \
+ "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+ $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+ @echo "Testing of doctests in the sources finished, look at the " \
+ "results in $(BUILDDIR)/doctest/output.txt."
+
+coverage:
+ $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
+ @echo "Testing of coverage in the sources finished, look at the " \
+ "results in $(BUILDDIR)/coverage/python.txt."
+
+xml:
+ $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
+ @echo
+ @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
+
+pseudoxml:
+ $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
+ @echo
+ @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
diff --git a/docs/build/doctrees/day1_afternoon.doctree b/docs/build/doctrees/day1_afternoon.doctree
new file mode 100644
index 0000000..c246cd7
Binary files /dev/null and b/docs/build/doctrees/day1_afternoon.doctree differ
diff --git a/docs/build/doctrees/day1_morning.doctree b/docs/build/doctrees/day1_morning.doctree
new file mode 100644
index 0000000..b893756
Binary files /dev/null and b/docs/build/doctrees/day1_morning.doctree differ
diff --git a/docs/build/doctrees/day2_afternoon.doctree b/docs/build/doctrees/day2_afternoon.doctree
new file mode 100644
index 0000000..72f39ce
Binary files /dev/null and b/docs/build/doctrees/day2_afternoon.doctree differ
diff --git a/docs/build/doctrees/day2_morning.doctree b/docs/build/doctrees/day2_morning.doctree
new file mode 100644
index 0000000..0680164
Binary files /dev/null and b/docs/build/doctrees/day2_morning.doctree differ
diff --git a/docs/build/doctrees/day3_afternoon.doctree b/docs/build/doctrees/day3_afternoon.doctree
new file mode 100644
index 0000000..a03e0db
Binary files /dev/null and b/docs/build/doctrees/day3_afternoon.doctree differ
diff --git a/docs/build/doctrees/day3_morning.doctree b/docs/build/doctrees/day3_morning.doctree
new file mode 100644
index 0000000..e8018aa
Binary files /dev/null and b/docs/build/doctrees/day3_morning.doctree differ
diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle
new file mode 100644
index 0000000..e820ccc
Binary files /dev/null and b/docs/build/doctrees/environment.pickle differ
diff --git a/docs/build/doctrees/index.doctree b/docs/build/doctrees/index.doctree
new file mode 100644
index 0000000..c3990c3
Binary files /dev/null and b/docs/build/doctrees/index.doctree differ
diff --git a/docs/build/doctrees/index_backup.doctree b/docs/build/doctrees/index_backup.doctree
new file mode 100644
index 0000000..e8c3c22
Binary files /dev/null and b/docs/build/doctrees/index_backup.doctree differ
diff --git a/docs/build/doctrees/index_temp.doctree b/docs/build/doctrees/index_temp.doctree
new file mode 100644
index 0000000..3de65ca
Binary files /dev/null and b/docs/build/doctrees/index_temp.doctree differ
diff --git a/docs/build/doctrees/online_resources.doctree b/docs/build/doctrees/online_resources.doctree
new file mode 100644
index 0000000..ac954b9
Binary files /dev/null and b/docs/build/doctrees/online_resources.doctree differ
diff --git a/docs/build/doctrees/test.doctree b/docs/build/doctrees/test.doctree
new file mode 100644
index 0000000..fee68b1
Binary files /dev/null and b/docs/build/doctrees/test.doctree differ
diff --git a/docs/build/html/.buildinfo b/docs/build/html/.buildinfo
new file mode 100644
index 0000000..3c2e1f1
--- /dev/null
+++ b/docs/build/html/.buildinfo
@@ -0,0 +1,4 @@
+# Sphinx build info version 1
+# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
+config: 613a824ca420526f4ed8d641142d3e6e
+tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/docs/build/html/_images/1.png b/docs/build/html/_images/1.png
new file mode 100644
index 0000000..2c684ac
Binary files /dev/null and b/docs/build/html/_images/1.png differ
diff --git a/docs/build/html/_images/1_1.png b/docs/build/html/_images/1_1.png
new file mode 100644
index 0000000..6745464
Binary files /dev/null and b/docs/build/html/_images/1_1.png differ
diff --git a/docs/build/html/_images/2.png b/docs/build/html/_images/2.png
new file mode 100644
index 0000000..eb2cfef
Binary files /dev/null and b/docs/build/html/_images/2.png differ
diff --git a/docs/build/html/_images/3.png b/docs/build/html/_images/3.png
new file mode 100644
index 0000000..1664b14
Binary files /dev/null and b/docs/build/html/_images/3.png differ
diff --git a/docs/build/html/_images/HET_variant.png b/docs/build/html/_images/HET_variant.png
new file mode 100644
index 0000000..284955f
Binary files /dev/null and b/docs/build/html/_images/HET_variant.png differ
diff --git a/docs/build/html/_images/HET_variant_gene_selected.png b/docs/build/html/_images/HET_variant_gene_selected.png
new file mode 100644
index 0000000..87eccc3
Binary files /dev/null and b/docs/build/html/_images/HET_variant_gene_selected.png differ
diff --git a/docs/build/html/_images/aligned_reads_deletion.png b/docs/build/html/_images/aligned_reads_deletion.png
new file mode 100644
index 0000000..60a924d
Binary files /dev/null and b/docs/build/html/_images/aligned_reads_deletion.png differ
diff --git a/docs/build/html/_images/beautiful.png b/docs/build/html/_images/beautiful.png
new file mode 100644
index 0000000..fa7f432
Binary files /dev/null and b/docs/build/html/_images/beautiful.png differ
diff --git a/docs/build/html/_images/graphs.png b/docs/build/html/_images/graphs.png
new file mode 100644
index 0000000..39df74f
Binary files /dev/null and b/docs/build/html/_images/graphs.png differ
diff --git a/docs/build/html/_images/intro.png b/docs/build/html/_images/intro.png
new file mode 100644
index 0000000..2357284
Binary files /dev/null and b/docs/build/html/_images/intro.png differ
diff --git a/docs/build/html/_images/multiqc.jpeg b/docs/build/html/_images/multiqc.jpeg
new file mode 100644
index 0000000..8ad7bfe
Binary files /dev/null and b/docs/build/html/_images/multiqc.jpeg differ
diff --git a/docs/build/html/_images/picard.png b/docs/build/html/_images/picard.png
new file mode 100644
index 0000000..701b828
Binary files /dev/null and b/docs/build/html/_images/picard.png differ
diff --git a/docs/build/html/_images/plot_1.png b/docs/build/html/_images/plot_1.png
new file mode 100644
index 0000000..fa9c0e0
Binary files /dev/null and b/docs/build/html/_images/plot_1.png differ
diff --git a/docs/build/html/_images/plot_2.png b/docs/build/html/_images/plot_2.png
new file mode 100644
index 0000000..facddfe
Binary files /dev/null and b/docs/build/html/_images/plot_2.png differ
diff --git a/docs/build/html/_images/read_details.png b/docs/build/html/_images/read_details.png
new file mode 100644
index 0000000..15c99c8
Binary files /dev/null and b/docs/build/html/_images/read_details.png differ
diff --git a/docs/build/html/_images/select_graph.png b/docs/build/html/_images/select_graph.png
new file mode 100644
index 0000000..5d13302
Binary files /dev/null and b/docs/build/html/_images/select_graph.png differ
diff --git a/docs/build/html/_images/spandx.jpg b/docs/build/html/_images/spandx.jpg
new file mode 100644
index 0000000..43e124c
Binary files /dev/null and b/docs/build/html/_images/spandx.jpg differ
diff --git a/docs/build/html/_images/spike_true.png b/docs/build/html/_images/spike_true.png
new file mode 100644
index 0000000..3782999
Binary files /dev/null and b/docs/build/html/_images/spike_true.png differ
diff --git a/docs/build/html/_images/trimm_parameters.png b/docs/build/html/_images/trimm_parameters.png
new file mode 100644
index 0000000..406f7e4
Binary files /dev/null and b/docs/build/html/_images/trimm_parameters.png differ
diff --git a/docs/build/html/_sources/day1_afternoon.txt b/docs/build/html/_sources/day1_afternoon.txt
new file mode 100644
index 0000000..6618c2b
--- /dev/null
+++ b/docs/build/html/_sources/day1_afternoon.txt
@@ -0,0 +1,602 @@
+Day 1 Afternoon
+===============
+[[HOME]](index.html)
+
+Earlier this morning, We performed some quality control steps on our sequencing data to make it clean and usable for various downstream analysis. Now we will perform our first sequence analysis, specifically variant calling, and map these reads to a reference genome and try to find out the differences between them.
+
+Read Mapping is one of the most common Bioinformatics operations that needs to be carried out on NGS data. The main goal behind read mapping/aligning is to find the best possible reference genome position to which reads could be aligned. Reads are generally mapped to a reference genome sequence that is sufficiently closely related genome to accurately align reads. There are number of tools that can map reads to a reference genome and they differ from each other in algorithm, speed and accuracy. Most of these tools work by first building an index of reference sequence which works like a dictionary for fast search/lookup and then applying an alignment algorithm that uses these index to align short read sequences against the reference.
+
+These alignment has a vast number of uses, including:
+
+1) variant/SNP calling: Finding differences between your sequenced organism genome and the reference genome
+2) coverage estimation: If you have sufficient reads to cover each position of reference genome.
+3) gene expression analysis: determining the level of expression of each genes in a genome.
+
+In this session, we will be covering the important steps that are part of any Read mapping/Variant calling bioinformatics pipleine.
+
+Read Mapping
+------------
+[[back to top]](day1_afternoon.html)
+[[HOME]](index.html)
+
+![alt tag](1_1.png)
+
+**1. Navigate to your workshop home directory and copy day1_after directory from shared data directory.**
+
+```
+wd
+
+cp -r /scratch/micro612w18_fluxod/shared/data/day1_after ./
+```
+
+We will be using trimmed clean reads that were obtained after running Trimmomatic on raw reads.
+
+**2. Map your reads against a finished reference genome using [BWA](http://bio-bwa.sourceforge.net/bwa.shtml "BWA manual")**
+
+Choosing the right read mapper is crucial and should be based on the type of analysis and data you are working with. Each aligners are meant to be better used with specific types of data, for example:
+
+For whole genome or whole exome sequencing data: Use BWA for long reads (> 50/100 bp), use Bowtie2 for short reads (< 50/100bp)
+For transcriptomic data (RNA-Seq): use Splice-aware Mapper such as Tophat. (Not applicable for microbial data)
+
+Here, we will be using BWA aligner to map the reads against a reference genome, KPNIH1.
+
+BWA is one of the several read mappers that are based on Burrows-Wheeler transform algorithm. If you feel like challenging yourselves, you can read BWA paper [here](http://bioinformatics.oxfordjournals.org/content/25/14/1754.short)
+
+Read Mapping is a time-consuming step that involves searching the reference and finding the optimal location for the alignment for millions of reads. Creating an index file of a reference sequence for quick lookup/search operations significantly decreases the time required for read alignment. Imagine indexing a genome sequence like the index at the end of a book. If you want to know on which page a word appears or a chapter begins, it is much more efficient to look it up in a pre-built index than going through every page of the book. Similarly, an index of a large DNA sequence allows aligners to rapidly find shorter sequences embedded within it.
+
+Note: each read mapper has its own unique way of indexing a reference genome and therefore the reference index created by BWA cannot be used for Bowtie. (Most Bioinformatics tools nowadays require some kind of indexing or reference database creation)
+
+> ***i. To create BWA index of Reference, you need to run following command.***
+
+Start a flux interactive session
+
+```
+iflux
+```
+
+
+Navigate to day1_after folder that you recently copied and create a new folder Rush_KPC_266_varcall_result for saving this exercise's output.
+
+```
+d1a
+
+#or
+
+cd /scratch/micro612w18_fluxod/username/day1_after/
+
+mkdir Rush_KPC_266_varcall_result
+
+```
+
+Create bwa index for the reference genome.
+
+```
+bwa index KPNIH1.fasta
+```
+
+Also go ahead and create fai index file using samtools required by GATK in later downstream steps.
+
+```
+samtools faidx KPNIH1.fasta
+```
+
+> ***ii. Align reads to reference and redirect the output into SAM file***
+
+Quoting BWA:
+"BWA consists of three algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is designed for Illumina sequence reads up to 100bp, while the rest two for longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar features such as long-read support and split alignment, but BWA-MEM, which is the latest, is generally recommended for high-quality queries as it is faster and more accurate. BWA-MEM also has better performance than BWA-backtrack for 70-100bp Illumina reads."
+
+For other algorithms employed by BWA, you can refer to BWA [manual](http://bio-bwa.sourceforge.net/bwa.shtml "BWA manual")
+
+Now lets align both left and right end reads to our reference using BWA alignment algorithm 'mem'.
+
+```
+
+bwa mem -M -R "@RG\tID:96\tSM:Rush_KPC_266_1_combine.fastq.gz\tLB:1\tPL:Illumina" -t 8 KPNIH1.fasta forward_paired.fq.gz reverse_paired.fq.gz > Rush_KPC_266_varcall_result/Rush_KPC_266__aln.sam
+
+```
+
+Read group tells aligners/other tools that certain reads were sequenced together on a specific lane. If you have multiplexed samples in a single lane, you will get multiple samples in a single read group. If you sequenced the same sample in several lanes, you will have multiple read groups for the same sample.
+
+This string with -R flag says that all reads belongs to ID:96 and library LB:1; with sample name SM:Rush_KPC_266_1_combine.fastq.gz and was sequenced on illumina platform PL:Illumina.
+
+You can extract this information from fastq read header. (@M02127:96:000000000-AG04W:1:1101:13648:1481 1:N:0:44)
+
+**3. SAM/BAM manipulation and variant calling using [Samtools](http://www.htslib.org/doc/samtools.html "Samtools Manual")**
+
+> ***i. Change directory to results folder and look for BWA output:***
+
+```
+cd Rush_KPC_266_varcall_result
+
+ls
+```
+
+The output of BWA and most of the short-reads aligners is a SAM file. SAM format is considered as the standard output for most read aligners and stands for Sequence Alignment/Map format. It is a TAB-delimited format that describes how each reads were aligned to the reference sequence.
+
+Lets explore first few lines of .sam file.
+
+```
+
+head -n4 Rush_KPC_266__aln.sam
+
+```
+
+example:
+
+```
+
+@SQ SN:gi|661922017|gb|CP008827.1| LN:5394056 <=== Reference Genome name and its length
+@RG ID:96 SM:Rush_KPC_266_1_combine.fastq.gz LB:1 PL:Illumina <=== sample read group info
+@PG ID:bwa PN:bwa VN:0.7.12-r1039 CL:bwa mem -M -R @RG\tID:96\tSM:Rush_KPC_266_1_combine.fastq.gz\tLB:1\tPL:Illumina -t 8 KPNIH1.fasta forward_paired.fq.gz reverse_paired.fq.gz <== aligner command
+M02127:96:000000000-AG04W:1:1101:23094:1725 99 gi|661922017|gb|CP008827.1| 4724728 60 250M = 4724852 295 GCTGCCTGCAGCATCTCAGCGGCTTTATCGGCTCGCAGCAGGTGCGGCTGGTGACCCTCTCCGGCGGCGTCGGCCCGTATATGACCGGTATCGGCCAGCTTGATGCCGCCTGCAGCGTCAGCATTATCCCGGCGCCGCTGCGGGTCTCTTCGGCGGAGGTCTCCGAGATCCTGCGCCGCGAGTCGAGCGTGCGCGACGTGATCCTCGCGGCGACGGCGGCGGACGCGGCGGTAGTCGGCCTTGGCGCCAT CCCCCGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGG@FGFGFFGGGGGGGGGGGGCFGBEGGGCFGGGFGDE>*CGEFCCFCEECCCCGGGDGE5E>5EEFEEGD=C=EDCE=EEECCC?C9CCECEDC@EF??>>@)7<6?6354,4 NM:i:2 MD:Z:161G77A10 AS:i:240 XS:i:0 RG:Z:96
+
+```
+
+The lines starting with "@" is a header section and contains information about reference genome, sample read group and the aligner command that was used for aligning the samples. The header section is followed by an alignment section information for each read. It contains 11 columns and an optional TAG option.
+
+Detailed information about these 11 columns can be obtained from this [pdf](https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&ved=0ahUKEwizkvfAk9rLAhXrm4MKHVXxC9kQFggdMAA&url=https%3A%2F%2Fsamtools.github.io%2Fhts-specs%2FSAMv1.pdf&usg=AFQjCNHFmjxTXKnxYqN0WpIFjZNylwPm0Q) document.
+
+The second column consists of coded bitwise flags where each code flag carries important information about the alignment. Open [this](https://broadinstitute.github.io/picard/explain-flags.html) site and enter the flag "99" to find out what it stands for.
+
+The last section "NM:i:2 MD:Z:161G77A10 AS:i:240 XS:i:0 RG:Z:96" is an optional tag section and varies for different aligners(specifications based on aligners).
+
+Here,
+
+NM tag tells number of changes necessary to make it equal to the reference(2 changes)
+
+MD tag tells you what positions in the read alignment are different from reference base and is used by variant callers to call SNP's. For example, The tag "MD:Z:161G77A10" implies that position 162 in the read carries a different base whereas the reference genome carries base "G"
+
+AS is an alignment score and XS:i:0 is an suboptimal alignment score.
+
+> ***ii. Convert SAM to BAM using SAMTOOLS:***
+
+BAM is the compressed binary equivalent of SAM but are usually quite smaller in size than SAM format. Since, parsing through a SAM format is slow, Most of the downstream tools require SAM file to be converted to BAM so that it can be easily sorted and indexed.
+
+The below command will ask samtools to convert SAM format(-S) to BAM format(-b)
+
+```
+samtools view -Sb Rush_KPC_266__aln.sam > Rush_KPC_266__aln.bam
+```
+
+> ***iii. Sort BAM file using SAMTOOLS:***
+
+Most of the downstream tools such as GATK requires your BAM file to be indexed and sorted by reference genome positions.
+
+Now before indexing this BAM file, we will sort the data by positions(default) using samtools. Some RNA Seq/Gene expression tools require it to be sorted by read name which is achieved by passing -n flag.
+
+```
+samtools sort Rush_KPC_266__aln.bam Rush_KPC_266__aln_sort
+```
+
+**4. Mark duplicates(PCR optical duplicates) and remove them using [PICARD](http://broadinstitute.github.io/picard/command-line-overview.html#MarkDuplicates "Picard MarkDuplicates")**
+
+Illumina sequencing involves PCR amplification of adapter ligated DNA fragments so that we have enough starting material for sequencing. Therefore, some amount of duplicates are inevitable. Ideally, you amplify upto ~65 fold(4% reads) but higher rates of PCR duplicates e.g. 30% arise when people have too little starting material such that greater amplification of the library is needed or some smaller fragments which are easier to PCR amplify, end up over-represented.
+
+For an in-depth explanation about how PCR duplicates arise in sequencing, please refer to this interesting [blog](http://www.cureffi.org/2012/12/11/how-pcr-duplicates-arise-in-next-generation-sequencing/)
+
+Picard identifies duplicates by searching reads that have same start position on reference or in PE reads same start for both ends. It will choose a representative from each group of duplicate reads based on best base quality scores and other criteria and retain it while removing other duplicates. This step plays a significant role in removing false positive variant calls(such as sequencing error) during variant calling that are represented by PCR duplicate reads.
+
+![alt tag](picard.png)
+
+> ***i. Create a dictionary for reference fasta file required by PICARD***
+
+Make sure you are in Rush_KPC_266_varcall_result directory and are giving proper reference genome path (day1_after directory).
+
+```
+
+java -jar /scratch/micro612w18_fluxod/shared/bin/picard-tools-1.130/picard.jar CreateSequenceDictionary REFERENCE=../KPNIH1.fasta OUTPUT=../KPNIH1.dict
+
+```
+
+> ***ii. Run PICARD for removing duplicates.***
+
+```
+
+java -jar /scratch/micro612w18_fluxod/shared/bin/picard-tools-1.130/picard.jar MarkDuplicates REMOVE_DUPLICATES=true INPUT=Rush_KPC_266__aln_sort.bam OUTPUT=Rush_KPC_266__aln_marked.bam METRICS_FILE=Rush_KPC_266__markduplicates_metrics CREATE_INDEX=true VALIDATION_STRINGENCY=LENIENT
+
+```
+
+The output of Picard remove duplicate step is a new bam file "Rush_KPC_266__aln_marked.bam" without PCR duplicates.
+
+You will need to index this new marked.bam file for further processing.
+
+> ***iii. Index these marked bam file again using SAMTOOLS(For input in Artemis later)***
+
+```
+samtools index Rush_KPC_266__aln_marked.bam
+```
+
+Open the markduplicates metrics file and glance through the number and percentage of PCR duplicates removed.
+For more details about each metrics in a metrics file, please refer to [this](https://broadinstitute.github.io/picard/picard-metric-definitions.html#DuplicationMetrics)
+
+```
+nano Rush_KPC_266__markduplicates_metrics
+
+#or
+
+less Rush_KPC_266__markduplicates_metrics
+```
+
+Generate Alignment Statistics
+-----------------------------
+
+Often, while analyzing sequencing data, we are required to make sure that our analysis steps are correct. Some statistics about our analysis will help us in making that decision. So Lets try to get some statistics about various outputs that were created using the above steps and check if everything makes sense.
+
+> ***i. Collect Alignment statistics using Picard***
+
+Run the below command on your marked.bam file
+
+```
+
+java -jar /scratch/micro612w18_fluxod/shared/bin/picard-tools-1.130/picard.jar CollectAlignmentSummaryMetrics R=../KPNIH1.fasta I=Rush_KPC_266__aln_marked.bam O=AlignmentSummaryMetrics.txt
+
+```
+Open the file AlignmentSummaryMetrics.txt and explore various statistics. It will generate various statistics and the definition for each can be found [here](http://broadinstitute.github.io/picard/picard-metric-definitions.html#AlignmentSummaryMetrics)
+
+The file AlignmentSummaryMetrics.txt contains many columns and at times it becomes difficult to extract information from a particular column if we dont know the exact column number. Run the below unix gem to print column name with its number.
+
+```
+grep 'CATEGORY' AlignmentSummaryMetrics.txt | tr '\t' '\n' | cat --number
+```
+
+- Question: Extract alignment percentage from AlignmentSummaryMetrics file. (% of reads aligned to reference genome)
+
+
+
+```
+grep -v '#' AlignmentSummaryMetrics.txt | cut -f7
+```
+
+Try to explore other statistics and their definitions from Picard AlignmentSummaryMetrics [link](http://broadinstitute.github.io/picard/picard-metric-definitions.html#AlignmentSummaryMetrics)
+
+> ***ii. Estimate read coverage/read depth using Picard***
+
+Read coverage/depth describes the average number of reads that align to, or "cover," known reference bases. The sequencing depth is one of the most crucial issue in the design of next-generation sequencing experiments. This [paper](https://www.nature.com/articles/nrg3642) review current guidelines and precedents on the issue of coverage, as well as their underlying considerations, for four major study designs, which include de novo genome sequencing, genome resequencing, transcriptome sequencing and genomic location analyses
+
+After read mapping, it is important to make sure that the reference bases are represented by enough read depth before making any inferences such as variant calling.
+
+```
+java -jar /scratch/micro612w18_fluxod/shared/bin/picard-tools-1.130/picard.jar CollectWgsMetrics R=../KPNIH1.fasta I=Rush_KPC_266__aln_marked.bam O=WgsMetrics.txt
+
+```
+
+Open the file "WgsMetrics.txt" and explore various statistics. It will generate various statistics and the definition for each can be found [here](https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectWgsMetrics.WgsMetrics).
+
+Print column names
+
+```
+grep 'GENOME_TERRITORY' WgsMetrics.txt | tr '\t' '\n' | cat --number
+```
+
+Since "WgsMetrics.txt" also contains histogram information, we will run commands on only the first few lines to extract information.
+
+
+- Question: Extract mean coverage information from "WgsMetrics.txt"
+
+
+
+```
+grep -v '#' WgsMetrics.txt | cut -f2 | head -n3
+
+```
+
+> Question: Percentage of bases that attained at least 5X sequence coverage.
+
+```
+grep -v '#' WgsMetrics.txt | cut -f13 | head -n3
+```
+
+> Question: Percentage of bases that had siginificantly high coverage. Regions with unusually high depth sometimes indicate either repetitive regions or PCR amplification bias.
+
+```
+grep -v '#' WgsMetrics.txt | cut -f25 | head -n3
+```
+
+
+
+Variant Calling and Filteration
+-------------------------------
+[[back to top]](day1_afternoon.html)
+[[HOME]](index.html)
+
+One of the downstream uses of read mapping is finding differences between our sequence data against a reference. This step is achieved by carrying out variant calling using any of the variant callers (samtools, gatk, freebayes etc). Each variant caller uses a different statistical framework to discover SNPs and other types of mutations. For those of you who are interested in finding out more about the statistics involved, please refer to [this]() samtools paper, one of most commonly used variant callers.
+
+The [GATK best practices guide](https://www.broadinstitute.org/gatk/guide/best-practices.php) will provide more details about various steps that you can incorporate in your analysis.
+
+There are many published articles that compare different variant callers but this is a very interesting [blog post](https://bcbio.wordpress.com/2013/10/21/updated-comparison-of-variant-detection-methods-ensemble-freebayes-and-minimal-bam-preparation-pipelines/) that compares the performance and accuracy of different variant callers.
+
+Here we will use samtools mpileup to perform this operation on our BAM file and generate a VCF (variant call format) file.
+
+**1. Call variants using [samtools](http://www.htslib.org/doc/samtools.html "samtools manual") mpileup and [bcftools](https://samtools.github.io/bcftools/bcftools.html "bcftools")**
+
+```
+
+samtools mpileup -ug -f ../KPNIH1.fasta Rush_KPC_266__aln_marked.bam | bcftools call -O v -v -c -o Rush_KPC_266__aln_mpileup_raw.vcf
+
+
+#In the above command, we are using samtools mpileup to generate a pileup formatted file from BAM alignments and genotype likelihoods (-g flag) in BCF format (binary version of vcf). This bcf output is then piped to bcftools, which calls variants and outputs them in vcf format (-c flag for using consensus calling algorithm and -v for outputting variants positions only)
+
+
+```
+
+Let's go through the VCF file and try to understand a few important VCF specifications and criteria that we can use for filtering low confidence SNPs.
+
+```
+less Rush_KPC_266__aln_mpileup_raw.vcf
+```
+
+1. CHROM, POS: 1st and 2nd column represent the reference genome name and reference base position where a variant was called
+2. REF, ALT: 4th and 5th columns represent the reference allele at the position and alternate/variant allele called from the reads
+3. QUAL: Phred-scaled quality score for the assertion made in ALT
+4. INFO: Additional information that provides technical scores and obervations for each variant. Important parameters to look for: Depth (DP), mapping quality (MQ), FQ (consensus score), allele frequency for each ALT allele (AF)
+
+VCF format stores a large variety of information and you can find more details in [this pdf](https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&ved=0ahUKEwit35bvktzLAhVHkoMKHe3hAhYQFggdMAA&url=https%3A%2F%2Fsamtools.github.io%2Fhts-specs%2FVCFv4.2.pdf&usg=AFQjCNGFka33WgRmvOfOfp4nSaCzkV95HA&sig2=tPLD6jW5ALombN3ALRiCZg&cad=rja).
+
+Lets count the number of raw unfiltered variants found:
+
+```
+grep -v '#' Rush_KPC_266__aln_mpileup_raw.vcf | wc -l
+
+grep -v '#' Rush_KPC_266__aln_mpileup_raw.vcf | grep 'INDEL' | wc -l
+```
+**2. Variant filtering and processed file generation using GATK and vcftools**
+
+> ***i. Variant filtering using [GATK](https://www.broadinstitute.org/gatk/guide/tooldocs/org_broadinstitute_gatk_tools_walkers_filters_VariantFiltration.php "GATK Variant Filteration"):***
+
+There are various tools that can you can try for variant filteration such as vcftools, GATK, vcfutils etc. Here we will use GATK VariantFiltration utility to filter out low confidence variants.
+
+Run this command on raw vcf file Rush_KPC_266__aln_mpileup_raw.vcf.
+
+```
+
+java -jar /scratch/micro612w18_fluxod/shared/bin/GenomeAnalysisTK-3.3-0/GenomeAnalysisTK.jar -T VariantFiltration -R ../KPNIH1.fasta -o Rush_KPC_266__filter_gatk.vcf --variant Rush_KPC_266__aln_mpileup_raw.vcf --filterExpression "FQ < 0.025 && MQ > 50 && QUAL > 100 && DP > 15" --filterName pass_filter
+
+```
+
+This command will add a 'pass_filter' text in the 7th FILTER column for those variant positions that passed our filtered criteria:
+
+1. DP: Depth of reads. More than 15 reads supporting a variant call at these position.
+2. MQ: Root Mean Square Mapping Quality. This provides an estimation of the overall mapping quality of reads supporting a variant call. The root mean square is equivalent to the mean of the mapping qualities plus the standard deviation of the mapping qualities.
+3. QUAL stands for phred-scaled quality score for the assertion made in ALT. High QUAL scores indicate high confidence calls.
+4. FQ stands for consensus quality. A positive value indicates heterozygote and a negative value indicates homozygous. In bacterial analysis, this plays an important role in defining if a gene was duplicated in a particular sample. We will learn more about this later while visualizing our BAM files in Artemis.
+
+Check if the pass_filter was added properly and count the number of variants that passed the filter.
+
+```
+grep 'pass_filter' Rush_KPC_266__filter_gatk.vcf | head
+
+```
+
+***Caveat: This filter criteria should be applied carefully after giving some thought to the type of library, coverage, average mapping quality, type of analysis and other such requirements.***
+
+> ***ii. Remove indels and keep only SNPS that passed our filter criteria using [the vcftools manual](http://vcftools.sourceforge.net/man_latest.html):***
+
+vcftools is a program package that is especially written to work with vcf file formats. It thus saves your precious time by making available all the common operations that you would like to perform on the vcf file using a single command. One such operation is removing INDEL information from a vcf file.
+
+Now, let's remove indels from our final vcf file and keep only variants that passed our filter criteria (positions with pass_filter in their FILTER column).
+
+```
+
+vcftools --vcf Rush_KPC_266__filter_gatk.vcf --keep-filtered pass_filter --remove-indels --recode --recode-INFO-all --out Rush_KPC_266__filter_onlysnp
+
+```
+
+
+
+***3. Variant Annotation using snpEff***
+
+Variant annotation is one of the crucial steps in any variant calling pipeline. Most of the variant annotation tools create their own database or use an external one to assign function and predict the effect of variants on genes. We will try to touch base on some basic steps of annotating variants in our vcf file using snpEff.
+
+You can annotate these variants before performing any filtering steps that we did earlier or you can decide to annotate just the final filtered variants.
+
+snpEff contains a database of about 20,000 reference genomes built from trusted and public sources. Lets check if snpEff contains a database of our reference genome.
+
+> ***i. Check snpEff internal database for your reference genome:***
+
+```
+java -jar /scratch/micro612w18_fluxod/shared/bin/snpEff/snpEff.jar databases | grep 'kpnih1'
+```
+Note down the genome id for your reference genome KPNIH1. In this case: GCA_000281535.2.29
+
+> ***ii. Change the chromosome name in the vcf file to ‘Chromosome’ for snpEff reference database compatibility.***
+
+```
+sed -i 's/gi.*|/Chromosome/g' Rush_KPC_266__filter_gatk.vcf
+```
+> ***iii. Run snpEff for variant annotation.***
+
+```
+
+java -jar /scratch/micro612w18_fluxod/shared/bin/snpEff/snpEff.jar -onlyProtein -no-upstream -no-downstream -no-intergenic -v GCA_000281535.2.29 Rush_KPC_266__filter_gatk.vcf > Rush_KPC_266__filter_gatk_ann.vcf -csvStats Rush_KPC_266__filter_gatk_stats
+
+```
+
+The STDOUT will print out some useful details such as genome name and version being used, no. of genes, protein-coding genes and transcripts, chromosome and plasmid names etc.
+
+snpEff will add an extra field named 'ANN' at the end of INFO field. Lets go through the ANN field added after annotation step.
+
+```
+grep 'ANN=' Rush_KPC_266__filter_gatk_ann.vcf | head -n1
+
+or to print on seperate lines
+
+grep -o 'ANN=.*GT:PL' Rush_KPC_266__filter_gatk_ann.vcf | head -n1 | tr '|' '\n' | cat --number
+```
+
+The ANN field will provide information such as the impact of variants (HIGH/LOW/MODERATE/MODIFIER) on genes and transcripts along with other useful annotations.
+
+Detailed information of the ANN field and sequence ontology terms that it uses can be found [here](http://snpeff.sourceforge.net/SnpEff_manual.html#input).
+
+Let's see how many SNPs and Indels passed the filter using grep and wc.
+
+```
+
+No. of Variants:
+grep '^Chromosome' Rush_KPC_266__filter_gatk_ann.vcf | wc -l
+
+No. of Variants that passed the filter:
+grep '^Chromosome.*pass_filter' Rush_KPC_266__filter_gatk_ann.vcf | wc -l
+
+No. of SNPs that passed the filter:
+grep '^Chromosome.*pass_filter' Rush_KPC_266__filter_gatk_ann.vcf | grep -v 'INDEL' | wc -l
+
+No. of Indels that passed the filter:
+grep '^Chromosome.*pass_filter' Rush_KPC_266__filter_gatk_ann.vcf | grep 'INDEL' | wc -l
+
+
+```
+
+Visualize BAM and VCF files in [Artemis](http://www.sanger.ac.uk/science/tools/artemis)
+----------------------------------------
+[[back to top]](day1_afternoon.html)
+[[HOME]](index.html)
+
+While these various statistical/text analyses are helpful, visualization of all of these various output files can help in making some significant decisions and inferences about your entire analysis. There are a wide variety of visualization tools out there that you can choose from for this purpose.
+
+We will be using [Artemis](http://www.sanger.ac.uk/science/tools/artemis) here, developed by the Sanger Institute for viewing BAM and vcf files for manual inspection of some of the variants.
+
+
+- ***Required Input files:***
+
+> KPNIH1 reference fasta
+> KPNIH1 genbank file
+> Rush_KPC_266__aln_marked.bam
+> Rush_KPC_266__aln_marked.bam.bai
+> Rush_KPC_266__filter_gatk_ann.vcf.gz
+> Rush_KPC_266__filter_gatk_ann.vcf.gz.tbi
+
+Let's make a seperate folder (make sure you are in the Rush_KPC_266_varcall_result folder) for the files that we need for visualization and copy it to that folder
+
+```
+
+mkdir Artemis_files
+
+cp ../KPNIH1.fasta ../KPNIH.gb Rush_KPC_266__aln_marked.bam Rush_KPC_266__aln_marked.bam.bai Rush_KPC_266__filter_gatk_ann.vcf Artemis_files/
+
+```
+
+We need to replace the genome name that we changed earlier for snpEff. (Make sure you are in Artemis_files folder)
+
+```
+
+cd Artemis_files
+
+sed -i 's/Chromosome/gi|661922017|gb|CP008827.1|/g' Rush_KPC_266__filter_gatk_ann.vcf
+
+bgzip Rush_KPC_266__filter_gatk_ann.vcf
+
+tabix Rush_KPC_266__filter_gatk_ann.vcf.gz
+```
+
+Open a new terminal and run the scp command or cyberduck to get these files to your local system.
+
+```
+
+scp -r username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day1_after/Rush_KPC_266_varcall_result/Artemis_files/ /path-to-local-directory/
+
+#You can use ~/Desktop/ as your local directory path
+```
+
+Start Artemis.
+
+Set your working directory to Artemis_files (the Artemis_files folder that you copied to your local system) by clicking the browse button and click OK.
+
+Now go to the top left File options and select Open File Manager. You should see the folder Artemis_files. Expand it and select KPNIH.gb file. A new window should open displaying your features stored in a genbank file.
+
+Now open the BAM file by selecting File (Top left corner) -> Read BAM/VCF file -> Select -> Rush_KPC_266__aln_marked.bam -> OK
+
+Reads aligned to your reference are displayed as stacked at the top panel of Artemis. The reads are color-coded so that paired reads are blue and those with an inversion are red. Reads that do not have a mapped mate are black and are optionally shown in the inferred insert size view. In the stack view, duplicated reads that span the same region are collapsed into one green line.
+
+Now right click on any of the stacked reads and Go to Graph and select Coverage (screenshot below).
+
+Now right click on any of the stacked reads and Go to Show and select SNP marks to show SNPs in red marks.
+
+![alt tag](select_graph.png)
+
+Follow the same procedure and select SNP graph. Adjust the gene features panel height to show all the graph in a window.
+
+![alt tag](graphs.png)
+
+Play around by moving the genbank panel cursor to look at coverage and SNP density across the genome. This will let you look at any regions where the coverage or SNP density is unusually high or low.
+
+If you click a read, its mate pair will also be selected. If the cursor hovers over a read for long enough details of that read will appear in a small box. For more details of the read, right-click and select 'Show details of: READ NAME' (last option in list) from the
+menu (screenshot below). This will open up a new window giving you some useful details such as mapping quality, coordinates etc.
+
+![alt tag](read_details.png)
+
+The snps are denoted by red marks as observed inside the reads. Go to one of the SNPs in the VCF file (Position: 50195) by directly navigating to the position. For this, select Goto at the top -> select Navigator -> Type the position in Goto Base box
+
+You will notice a spike in the middle of the SNP graph window. This is one of the SNPs that passed all our filter criteria. (Screenshot)
+
+![alt tag](spike_true.png)
+
+Lets try to see an example of HET variant. Variant positions where more than one allele (variant) with sufficiently high read depth are observed are considered HET type variants.
+
+For this, click on tje Goto option at the top and select navigator. Type 321818 in Goto Base box and click Goto.
+
+You will see a thick spike in the SNP graph as well as thick red vertical line in BAM panel. Also notice the sudden spike in the coverage for this particular region compared to its flanking region (the region before and after a selected region). The coverage here is more than 300 which is unusually high compared to the entire genome coverage. This means that more than one allele with high quality and depth were observed at these positions so we cannot decide which one of these is a true variant. We removed these types of variants during our Variant Filteration step using the criteria FQ. (If the FQ is unusually high, it is suggestive of a HET variant and negative FQ value is a suggestive of true variant as observed in the mapped reads)
+
+![alt tag](HET_variant.png)
+
+Now select the gene right below this spiked region. Right click on this gene (KPNIH1_RS01560) and select Zoom to Selection.
+
+![alt tag](HET_variant_gene_selected.png)
+
+Check the details about gene by selecting View -> Selected Features
+
+You can inspect these type of HET variants later for any gene duplication or copy number analysis (by extracting variant positions with high FQ values). Addition of these details will give a better resolution while inferring phylogenetic trees.
+
+Play around with Artemis to look at what other kind of information you can find from these BAM and vcf files. Also refer to the manual at the [Artemis Homepage](http://www.sanger.ac.uk/science/tools/artemis) for full information about its usage.
+
+[[back to top]](day1_afternoon.html)
+[[HOME]](index.html)
+
+
+VRE variant calling analysis
+----------------------------
+
+Today, we learned how to assess the quality, perform quality trimming and variant calling to find variants between the sample and reference genome. This exercise requires you to apply these tools and commands on a new data set. These samples both come from a patient infected with VRE before and after treatment with daptomycin. The first sample was the patients initial sample and is susceptible to daptomycin, and the second was after daptomycin resistance emerged during treatment. Your goal is to map reads from the resistant genome to the susceptible reference and search for variants that may be associated with resistance. To accomplish this you will run the programs from this session to generate filtered variant files (VCF), and then explore these variants in Artemis to see what genes they are in. To help with your interpretation, see if you see any genes hit that were reported in this [paper](http://www.nejm.org/doi/full/10.1056/nejmoa1011138), which was the first to idenitfy putative daptomycin resistance loci.
+
+- Use VRE_daptoS_ref_strain.fa as your reference genome and VRE_daptoS_gene_annot.gff annotation file for Artemis.
+
+- This is how the command and tools workflow should look like:
+
+>1. FastQC to check the quality of reads(you can skip here for time)
+>2. Trimmomatic to remove bad quality data(you can skip here for time)
+>3. Prepare reference genome index for BWA and align reads to reference genome
+>4. SAM/BAM manipulation using samtools
+>5. Remove duplicates using picard(dont forget to create a dictionary for reference fasta file required by PICARD)
+>6. Index marked bam file generated by picard using SAMTOOLS(For input in Artemis later)
+>7. Variant calling using samtools
+>8. Variant Filteration using GATK
+>9. Visualize BAM and VCF files in Artemis
diff --git a/docs/build/html/_sources/day1_morning.txt b/docs/build/html/_sources/day1_morning.txt
new file mode 100644
index 0000000..0704e1e
--- /dev/null
+++ b/docs/build/html/_sources/day1_morning.txt
@@ -0,0 +1,654 @@
+Day 1 Morning
+=============
+[[HOME]](index.html)
+
+Installing and setting up Cyberduck for file transfer
+-----------------------------------------------------
+
+During workshop, we will transfer different output files from flux to your local system. Cyberduck makes it easier to drag and drop any remote file onto your local system and vice versa. Of course, you can use "scp" to transfer files but Cyberduck provides a graphical interface to manage file transfer and helps avoid typing long file paths and commands.
+
+> ***1. Go to [this](https://cyberduck.io/) cyberduck website and download the executable for your respective operating system.***
+
+> ***2. Double-click on the downloaded zip file to unzip it and double click cyberduck icon.***
+
+> ***3. Type sftp://flux-xfer.arc-ts.umich.edu in quickconnect bar, press enter and enter your flux username and password.***
+
+> ***4. This will take you to your flux home directory /home/username. Select "Go" from tool bar at the top then select "Go to folder" and enter workshop home directory path: /scratch/micro612w18_fluxod/***
+
+To transfer or upload a file, you can drag and drop it into the location you want.
+
+
+Getting your data onto Flux and setting up environment variable
+---------------------------------------------------------------
+
+**Log in to Flux**
+
+
+```
+ssh username@flux-login.arc-ts.umich.edu
+```
+
+
+
+**Setting up environment variables in .bashrc file so your environment is all set for genomic analysis!**
+
+Environment variables are the variables/values that describe the environment in which programs run in. All the programs and scripts on your unix system use these variables for extracting information such as:
+
+- What is my current working directory?,
+- Where are temporary files stored?,
+- Where are perl/python libraries?,
+- Where is Blast installed? etc.
+
+In addition to environment variables that are set up by system administators, each user can set their own environment variables to customize their experience. This may sound like something super advanced that isn't relevant to beginners, but that's not true!
+
+Some examples of ways that we will use environment variables in the class are:
+
+1) create shortcuts for directories that you frequently go to,
+
+2) tell unix where frequently used programs live, so you don't have to put the full path name each time you use it and
+
+3) setup a shortcut for getting on a cluster node, so that you don't have to write out the full command each time.
+
+One way to set your environment variables would be to manually set up these variables everytime you log in, but this would be extremely tedious and inefficient. So, Unix has setup a way around this, which is to put your environment variable assignments in special files called .bashrc or .bash_profile. Every user has one or both of these files in their home directory, and what's special about them is that the commands in them are executed every time you login. So, if you simply set your environmental variable assignments in one of these files, your environment will be setup just the way you want it each time you login!
+
+All the softwares/tools that we need in this workshop are installed in a directory "/scratch/micro612w18_fluxod/shared/bin/" and we want the shell to look for these installed tools in this directory. For this, We will save the full path to these tools in an environment variable PATH.
+
+> ***i. Make a backup copy of bashrc file in case something goes wrong.***
+
+```
+
+cp ~/.bashrc ~/bashrc_backup
+
+#Note: "~/" represents your home directory. On flux, these means /home/username
+
+```
+
+> ***ii. Open ~/.bashrc file using any text editor and add the following lines to your .bashrc file.***
+
+
+
+ Click here to expand entries
+
+```
+##Micro612 Workshop ENV
+
+#Aliases
+alias iflux='qsub -I -V -l nodes=1:ppn=4,pmem=4000mb,walltime=1:00:00:00 -q fluxod -l qos=flux -A micro612w18_fluxod'
+alias wd='cd /scratch/micro612w18_fluxod/username/'
+alias d1m='cd /scratch/micro612w18_fluxod/username/day1_morn'
+alias d1a='cd /scratch/micro612w18_fluxod/username/day1_after'
+alias d2m='cd /scratch/micro612w18_fluxod/username/day2_morn'
+alias d2a='cd /scratch/micro612w18_fluxod/username/day2_after'
+alias d3m='cd /scratch/micro612w18_fluxod/username/day3_morn'
+alias d3a='cd /scratch/micro612w18_fluxod/username/day3_after'
+
+
+#Flux Modules
+module load perl-modules
+
+#Perl Libraries
+export PERL5LIB=/scratch/micro612w18_fluxod/shared/bin/PAGIT/lib:/scratch/micro612w18_fluxod/shared/bin/vcftools_0.1.12b/perl:$PERL5LIB
+export PERL5LIB=/scratch/micro612w18_fluxod/shared/perl_libs:$PERL5LIB
+
+#Bioinformatics Tools
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/ncbi-blast-2.7.1+/bin/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/MultiQC/build/scripts-2.7/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/mauve_snapshot_2015-02-13/linux-x64/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/vcftools_0.1.12b/perl/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/tabix-0.2.6/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/bwa-0.7.12/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/Trimmomatic/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/bcftools-1.2/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/samtools-1.2/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/sratoolkit/bin/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/Spades/bin/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/FastQC/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/GenomeAnalysisTK-3.3-0/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/picard-tools-1.130/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/qualimap_v2.1/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/vcftools_0.1.12b/bin/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/snpEff/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/PAGIT/ABACAS/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/blast-2.2.26/bin/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/quast/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/MUMmer3.23/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/fastq_screen_v0.5.2/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/prokka-1.11/bin/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/LS-BSR-master/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/bowtie2-2.2.6/
+export PATH=$PATH:/scratch/micro612w18_fluxod/shared/bin/mcl-14-137/src/alien/oxygen/src/
+
+```
+
+
+
+Note: Replace "username" under alias shortcuts with your own umich "uniqname". In the text editor, nano, you can do this by
+
+- typing Ctrl + \ and You will then be prompted to type in your search string (here, username).
+- Press return. Then you will be prompted to enter what you want to replace "username" with (here, your uniqname).
+- Press return. Then press a to replace all incidences or y to accept each incidence one by one.
+
+You can also customize the alias name such as wd, d1m etc. catering to your own need and convenience.
+
+The above environment settings will set various shortcuts such as "iflux" for entering interactive flux session, "wd" to navigate to your workshop directory, call necessary flux modules and perl libraries required by certain tools and finally sets the path for bioinformatics programs that we will run during the workshop.
+
+> ***iii. Save the file and Source .bashrc file to make these changes permanent.***
+
+```
+
+source ~/.bashrc
+
+```
+
+> ***iv. Check if the $PATH environment variable is updated***
+
+```
+
+echo $PATH
+
+#You will see a long list of paths that has been added to your $PATH variable
+
+wd
+
+```
+
+You should be in your workshop working directory that is /scratch/micro612w18_fluxod/username
+
+
+
+
+Unix is your friend
+-------------------
+
+Up until now you’ve probably accessed sequence data from NCBI by going to the website, laboriously clicking around and finally finding and downloading the data you want.
+
+There are a lot of reasons that is not ideal:
+
+- It’s frustrating and slow to deal with the web interface
+- It can be hard to keep track of where the data came from and exactly which version of a sequence you downloaded
+- Its not conducive to downloading lots of sequence data
+
+To download sequence data in Unix you can use a variety of commands (e.g. sftp, wget, curl). Here, we will use the curl command to download some genome assemblies from NCBI ftp location:
+
+- Go to your class home directory (use your wd shortcut!)
+
+- Execute the following commands to copy files for this morning’s exercises to your home directory:
+
+```
+cp -r /scratch/micro612w18_fluxod/shared/data/day1_morn/ ./
+
+cd day1_morn/
+
+#or
+
+d1m
+
+ls
+
+```
+
+- Now get three genome sequences with the following commands:
+
+```
+curl ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/bacteria/Acinetobacter_baumannii/latest_assembly_versions/GCF_000018445.1_ASM1844v1/GCF_000018445.1_ASM1844v1_genomic.fna.gz > Acinetobacter_baumannii.fna.gz
+
+curl ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/bacteria/Klebsiella_pneumoniae/latest_assembly_versions/GCF_000220485.1_ASM22048v1/GCF_000220485.1_ASM22048v1_genomic.fna.gz > Klen_pneu.fna.gz
+
+curl ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/bacteria/Escherichia_coli/all_assembly_versions/GCF_000194495.1_ASM19449v2/GCF_000194495.1_ASM19449v2_genomic.fna.gz > E_coli.fna.gz
+
+```
+
+- Decompress the compressed fasta file using gzip
+
+```
+gzip -d Acinetobacter_baumannii.fna.gz
+gzip -d Klen_pneu.fna.gz
+gzip -d E_coli.fna.gz
+```
+
+These files are genome assemblies in fasta format. Fasta files are a common sequence data format that is composed of alternating sequence headers (sequence names and comments) and their corresponding sequences. Of great importance, the sequence header lines must start with “>”. These genome assemblies have one header line for each contig in the assembly, and our goal will be to count the number of contigs/sequences. To do this we will string together two Unix commands: “grep” and “wc”. “grep” (stands for global regular expression print), is an extremely powerful pattern matching command, which we will use to identify all the lines that start with a “>”. “wc” (stand for word count) is a command for counting words, characters and lines in a file. To count the number of contigs in one of your fasta files enter:
+
+
+```
+grep ">" E_coli.fna | wc -l
+```
+
+Try this command on other assemblies to see how many contigs they have
+
+Your first sequence analysis program!!!
+---------------------------------------
+
+OK, so now that we have a useful command, wouldn’t it be great to turn it into a program that you can easily apply to a large number of genome assemblies? Of course it would! So, now we are going to take out cool contig counting command, and put it in a shell script that applies it to all files in the desired directory.
+
+
+
+- Open “fasta_counter.sh” in pico or your favourite text editor and follow instructions for making edits so it will do what we want it to do
+
+- Run this script in day1_morn directory and verify that you get the correct results
+
+```
+bash fasta_counter.sh .
+```
+
+Plotting genomic coverage in R
+------------------------------
+
+Data visualization plays an important role in organizing, analyzing and interpreting large amount of omics data. R is one of the most basic and powerful tool for manipulating and visualizing these types of data. The following task will brush up some basic R plotting commands and help you visualize some complex omics data for interpretation.
+
+One of the most common types of genomic analysis involves comparing the newly sequenced read data of an organism to your choice of reference organism genome. Mapping millions of reads generated in a sequencing experiment to the reference genome fasta file and interpreting various parameters can achieve this analysis.
+One such parameter is validating how well your sequencing experiment performed and assessing the “uniformity” of coverage from whole-genome sequencing. Visualizing Sequencing coverage across the reference genome help us answer this question. Sequencing coverage describes the average number of reads that align to, or "cover," known reference bases.
+
+The input for this task is a comma-separated file, which contains average sequencing coverage information i.e average number of reads mapped to each 1000 base pairs in reference genome. You can find this input file in your day1_morn directory by the name, Ecoli_coverage_average_bed.csv
+
+
+
+Drag and drop this Ecoli_coverage_average_bed.csv to your local system using cyberduck.
+
+Now, Fire up R console or studio and import the file (Ecoli_coverage_average_bed.csv) using any type of data import functions in R (read.table, read.csv etc.)
+
+Hint: The file is comma-separated and contains header line (“bin,Average_coverage”) so use appropriate parameters while importing the file
+
+Once the data in file is imported into R object, you can plot the column Average_coverage as a time series plot to assess the coverage of your mapped reads across genome.
+
+Note: A time series plot is a graph that you can use to evaluate patterns and behavior in data over time. Here, we can employ the same plot to see the pattern i.e read depth/coverage at each 1000 bases (represented by bins columns where each bin represents Average number of reads mapped to each 1000 bases in reference genome) using the simplest R function for time series such as [plot.ts]( http://stat.ethz.ch/R-manual/R-devel/library/stats/html/plot.ts.html )
+
+An example plot.ts plot for Ecoli_coverage_average_bed.csv is shown below for your reference.
+
+![alt tag](plot_1.png)
+
+For advance and more beautiful visualization, ggplot2 can be employed to display the same plot. An example ggplot2 plot for Ecoli_coverage_average_bed.csv is shown below for your reference.
+
+![alt tag](plot_2.png)
+
+
+ Solution
+
+```
+
+x <- read.table("Ecoli_coverage_average_bed.csv", sep=",", header=TRUE)
+plot.ts(x$Average_coverage, xlab="Genome Position(1000bp bins)", ylab="Average Read Depth", main="Ecoli Bed Coverage", col="blue")
+
+```
+
+
+
+Power of Unix commands
+----------------------
+
+In software carpentry, you learned working with shell and automating simple tasks using basic unix commands. Lets see how some of these commands can be employed in genomics analysis while exploring various file formats that we use in day to day analysis. For this session, we will try to explore three different types of bioinformatics file formats:
+
+fasta: used for representing either nucleotide or peptide sequences
+
+gff: used for describing genes and other features of DNA, RNA and protein sequences
+
+fastq: used for storing biological sequence / sequencing reads (usually nucleotide sequence) and its corresponding quality scores
+
+
+- Question: Previously, you downloaded genome assembly fasta files and ran a shell script to count contigs. Now, lets say you want to find out the combined length of genome in each of these files. This can be achieved by running a short unix command piping together two unix programs: grep and wc. The key to crafting the command is understanding the features of fasta files,
+
+> ***1) each sequence in fasta file is preceded by a fasta header that starts with ">",***
+
+> ***2) the types of bases that a nucleotide sequence represents (A,T,G,C,N)***
+
+
+To determine the total length of our genome assemblies, we will use grep to match only those lines that doesn't start with ">" (remember grep -v option is used to ignore lines) and doesn't contain character "N". Then use wc command (stands for word count) to count the characters. We can use unix pipe "|" to pass the output of one command to another for further processing. Lets start by counting the number of bases in Acinetobacter_baumannii.fna file
+
+
+ Solution
+
+
+
+
+```
+
+grep -v '^>' Acinetobacter_baumannii.fna | grep -v "N" | grep -v "n" | wc -m
+
+#Note:
+
+#- The sign "^" inside the grep pattern represents any pattern that starts with ">" and -v asks grep to ignore those lines.
+#- Use "|" to pass the output of one command to another.
+#- -m parameter will show the character counts. Check wc help menu by typing "wc --help" on terminal to explore other parameters
+
+```
+
+
+
+
+Now run the same command on other fasta files in day1_morn directory. Try using a for loop.
+
+
+
+ Solution
+
+```
+
+for i in *.fna; do grep -v '^>' $i | grep -v "N" | grep -v "n" | wc -m; done
+
+```
+
+
+
+- Exploring GFF files
+
+The GFF (General Feature Format) format is a tab-seperated file and consists of one line per feature, each containing 9 columns of data.
+
+column 1: seqname - name of the genome or contig or scaffold
+
+column 2: source - name of the program that generated this feature, or the data source (database or project name)
+
+column 3: feature - feature type name, e.g. Gene, exon, CDS, rRNA, tRNA, CRISPR, etc.
+
+column 4: start - Start position of the feature, with sequence numbering starting at 1.
+
+column 5: end - End position of the feature, with sequence numbering starting at 1.
+
+column 6: score - A floating point value.
+
+column 7: strand - defined as + (forward) or - (reverse).
+
+column 8: frame - One of '0', '1' or '2'. '0' indicates that the first base of the feature is the first base of a codon, '1' that the second base is the first base of a codon, and so on..
+
+column 9: attribute - A semicolon-separated list of tag-value pairs, providing additional information about each feature such as gene name, product name etc.
+
+- Use less to explore first few lines of a gff file sample.gff
+
+```
+
+less sample.gff
+
+```
+Note: lines starting with pound sign "#" represent comments and are used to document extra information about the features.
+
+You will notice that the GFF format follows version 3 specifications("##gff-version 3"), followed by genome name("#Genome: 1087440.3|Klebsiella pneumoniae subsp. pneumoniae KPNIH1"), date("#Date:02/09/2017") when it was generated, contig name("##sequence-region") and finally tab-seperated lines describing features.
+
+You can press space bar on keyboard to read more lines and "q" key to exit less command.
+
+- Question: Suppose, you want to find out the number of annotated features in a gff file. how will you achieve this using grep and wc?
+
+
+ Solution
+
+```
+grep -v '^#' sample.gff | wc -l
+```
+
+
+- Question: How about counting the number of rRNA features in a gff(third column) file using grep, cut and wc? You can check the usage for cut by typing "cut --help"
+
+
+ Solution
+
+```
+
+cut -f 3 sample.gff | grep 'rRNA' | wc -l
+
+#Or number of CDS or tRNA features?
+
+cut -f 3 sample.gff | grep 'CDS' | wc -l
+cut -f 3 sample.gff | grep 'tRNA' | wc -l
+
+#Note: In the above command, we are trying to extract feature information from third column.
+
+```
+
+
+- Question: Try counting the number of features on a "+" or "-" strand (column 7).
+
+Some more useful one-line unix commands for GFF files: [here](https://github.com/stephenturner/oneliners#gff3-annotations)
+
+**Unix one-liners**
+
+As soon as you receive your sample data from sequencing centre, the first thing you do is check its quality using a quality control tool such as FastQC and make sure that it contain sequences from organism that you are working on (Free from any contamination). But before carrying out extensive QC, you can run a bash "one-liner" to get some basic statistics about the raw reads. These one-liners are great examples for how a set of simple (relatively) Unix commands can be piped together to do really useful things.
+
+Run the following command to print total number of reads in each file, total number of unique reads, percentage of unique reads, most abundant sequence(useful to find adapter sequences or contamination), its frequency, and frequency of that sequence as a proportion of the total reads, average read length.
+
+```
+for i in Rush_KPC_266_*.gz; do zcat $i | awk 'BEGIN{OFS="\t"};((NR-2)%4==0){read=$1;total++;count[read]++;len+=length(read)}END{for(read in count){if(!max||count[read]>max) {max=count[read];maxRead=read};if(count[read]==1){unique++}};print total,unique,unique*100/total,maxRead,count[maxRead],count[maxRead]*100/total,len/total}'; done
+
+#The above awk command reads every fourth record and calculates some basic fastq statistics.
+```
+
+Now try running the above command using fastq_screen.fastq.gz as input.
+
+You can find more of such super useful bash one-liners at Stephen Turner's github [page](https://github.com/stephenturner/oneliners). You can also use some pre-written unix utilities and tools such as [seqtk](https://github.com/lh3/seqtk), [bioawk](https://github.com/lh3/bioawk) and [fastx](http://hannonlab.cshl.edu/fastx_toolkit/) which comes in handy while extracting complex information from fasta/fastq/sam/bam files and are optimized to be insanely fast.
+
+Contamination Screening using [FastQ Screen](http://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/)
+--------------------------------------------
+
+When running a sequencing pipeline, it is very important to make sure that your data matches appropriate quality threshold and are free from any contaminants. This step will help you make correct interpretations in downstream analysis and will also let you know if you are required to redo the experiment/library preparation or resequencing or remove contaminant sequences.
+
+For this purpose, we will employ fastq screen to screen one of our sample against a range of reference genome databases.
+
+In the previous section, did you notice the sample fastq_screen.fastq.gz had only 28 % unique reads? What sequences does it contain?
+
+To answer this, We will screen it against Human, Mouse and Ecoli genome and try to determine what percentage of reads are contaminant such as host DNA, i.e Human and mouse.
+
+We have already created the human, mouse and ecoli reference databases inside fastq_screen tool directory which you can take a look by running:
+
+```
+
+ls /scratch/micro612w18_fluxod/shared/bin/fastq_screen_v0.5.2/data/
+
+```
+
+Note: You will learn creating reference databases in our afternoon session.
+
+> ***i. Get an interactive cluster node to start running programs. Use the shortcut that we created in .bashrc file for getting into interactive flux session.***
+
+How do you know if you are in interactive session?: you should see "username@nyx" in your command prompt
+
+```
+iflux
+```
+
+Whenever you start an interactive job, the path resets to your home directory. So, navigate to day1_morn directory again.
+
+```
+d1m
+
+#or
+
+cd /scratch/micro612w18_fluxod/username/day1_morn/
+
+```
+
+> ***ii. Lets run fastq_screen on fastq_screen.fastq.gz***
+
+```
+
+fastq_screen --subset 1000 --force --outdir ./ --aligner bowtie2 fastq_screen.fastq.gz
+
+#Note: We will screen only a subset of fastq reads against reference databases. To screen all the reads, change this argument to --subset 0 but will take long time to finish. (searching sequences against human or mouse genome is a time consuming step)
+#Also Dont worry about "Broken pipe" warning.
+
+```
+
+The above run will generate two types of output file: a screen report in text format "fastq_screen_screen.txt" and a graphical output "fastq_screen_screen.png" showing percentage of reads mapped to each reference genomes.
+
+> ***iii. Download the fastq_screen graphical report to your home computer for inspection.***
+
+Use scp command as shown below or use cyberduck. If you dont the file in cyberduck window, try refreshing it using the refresh button at the top.
+
+```
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day1_morn/fastq_screen_screen.png /path-to-local-directory/
+
+#You can use ~/Desktop/ as your local directory path
+
+```
+
+Open fastq_screen_screen.png on your system. You will notice that the sample contain a significant amount of human reads; we should always remove these contaminants from our sample before proceeding to any type of microbial analysis.
+
+Quality Control using [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/ "FastQC homepage")
+------------------------------
+[[back to top]](day1_morning.html)
+[[HOME]](index.html)
+
+Now we will run FastQC on some sample raw data to assess its quality. FastQC is a quality control tool that reads in sequence data in a variety of formats(fastq, bam, sam) and can either provide an interactive application to review the results or create an HTML based report which can be integrated into any pipeline. It is generally the first step that you take upon receiving the sequence data from sequencing facility to get a quick sense of its quality and whether it exhibits any unusual properties (e.g. contamination or unexpected biological features)
+
+> ***i. In your day1_morn directory, create a new directory for saving FastQC results.***
+
+```
+mkdir Rush_KPC_266_FastQC_results
+mkdir Rush_KPC_266_FastQC_results/before_trimmomatic
+```
+
+> ***ii. Verify that FastQC is in your path by invoking it from command line.***
+
+```
+fastqc -h
+```
+
+FastQC can be run in two modes: "command line" or as a GUI (graphical user interface). We will be using command line version of it.
+
+> ***iii. Run FastQC to generate quality report of sequence reads.***
+
+```
+fastqc -o Rush_KPC_266_FastQC_results/before_trimmomatic/ Rush_KPC_266_1_combine.fastq.gz Rush_KPC_266_2_combine.fastq.gz --extract
+```
+
+This will generate two results directory, Rush_KPC_266_1_combine_fastqc and Rush_KPC_266_2_combine_fastqc in output folder provided with -o flag.
+
+The summary.txt file in these directories indicates if the data passed different quality control tests in text format.
+
+You can visualize and assess the quality of data by opening html report in a local browser.
+
+> ***iv. Exit your cluster node so you don’t waste cluster resources and $$$!***
+
+> ***v. Download the FastQC html report to your home computer to examine using scp or cyberduck***
+
+```
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day1_morn/Rush_KPC_266_FastQC_results/before_trimmomatic/*.html /path-to-local-directory/
+```
+
+The analysis in FastQC is broken down into a series of analysis modules. The left hand side of the main interactive display or the top of the HTML report show a summary of the modules which were run, and a quick evaluation of whether the results of the module seem entirely normal (green tick), slightly abnormal (orange triangle) or very unusual (red cross).
+
+![alt tag](1.png)
+
+Lets first look at the quality drop(per base sequence quality graph) at the end of "Per Base Sequence Quality" graph. This degredation of quality towards the end of reads is commonly observed in illumina samples. The reason for this drop is that as the number of sequencing cycles performed increases, the average quality of the base calls, as reported by the Phred Scores produced by the sequencer falls.
+
+Next, lets check the overrepresented sequences graph and the kind of adapters that were used for sequencing these samples (Truseq or Nextera) which comes in handy while indicating the adapter database path during downstream filtering step (Trimmomatic).
+
+![alt tag](2.png)
+
+- Check out [this](https://sequencing.qcfail.com/articles/loss-of-base-call-accuracy-with-increasing-sequencing-cycles/) for more detailed explaination as to why quality drops with increasing sequencing cycles.
+
+- [A video FastQC walkthrough created by FastQC developers](https://www.youtube.com/watch?v=bz93ReOv87Y "FastQC video")
+
+Quality Trimming using [Trimmomatic](http://www.usadellab.org/cms/?page=trimmomatic "Trimmomatic Homepage")
+------------------------------------
+[[back to top]](day1_morning.html)
+[[HOME]](index.html)
+
+Filtering out problematic sequences within a dataset is inherently a trade off between sensitivity (ensuring all contaminant sequences are removed) and specificity (leaving all non-contaminant sequence data intact). Adapter and other technical contaminants can potentially occur in any location within the reads.(start, end, read-through (between the reads), partial adapter sequences)
+
+Trimmomatic is a tool that tries to search these potential contaminant/adapter sequence within the read at all the possible locations. It takes advantage of the added evidence available in paired-end dataset. In paired-end data, read-through/adapters can occur on both the forward and reverse reads of a particular fragment in the same position. Since the fragment was entirely sequenced from both ends, the non-adapter portion of the forward and reverse reads will be reverse-complements of each other. This strategy of searching for contaminant in both the reads is called 'palindrome' mode.
+
+For more information on how Trimmomatic tries to achieve this, Please refer [this](http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/TrimmomaticManual_V0.32.pdf) manual.
+
+Now we will run Trimmomatic on these raw data to remove low quality reads as well as adapters.
+
+> ***i. If the interactive session timed out, get an interactive cluster node again to start running programs and navigate to day1_morn directory.***
+
+How to know if you are in interactive session: you should see "username@nyx" in your command prompt
+
+```
+iflux
+
+cd /scratch/micro612w18_fluxod/username/day1_morn/
+
+#or
+
+d1m
+```
+
+> ***ii. Create these output directories in your day1_morn folder to save trimmomatic results***
+
+```
+mkdir Rush_KPC_266_trimmomatic_results
+```
+
+> ***iii. Try to invoke trimmomatic from command line.***
+
+```
+java -jar /scratch/micro612w18_fluxod/shared/bin/Trimmomatic/trimmomatic-0.33.jar –h
+```
+
+> ***iv. Run the below trimmomatic commands on raw reads.***
+
+```
+java -jar /scratch/micro612w18_fluxod/shared/bin/Trimmomatic/trimmomatic-0.33.jar PE Rush_KPC_266_1_combine.fastq.gz Rush_KPC_266_2_combine.fastq.gz Rush_KPC_266_trimmomatic_results/forward_paired.fq.gz Rush_KPC_266_trimmomatic_results/forward_unpaired.fq.gz Rush_KPC_266_trimmomatic_results/reverse_paired.fq.gz Rush_KPC_266_trimmomatic_results/reverse_unpaired.fq.gz ILLUMINACLIP:/scratch/micro612w18_fluxod/shared/bin/Trimmomatic/adapters/TruSeq3-PE.fa:2:30:10:8:true SLIDINGWINDOW:4:15 MINLEN:40 HEADCROP:0
+```
+
+
+![alt tag](trimm_parameters.png)
+
+First, Trimmomatic searches for any matches between the reads and adapter sequences. Adapter sequences are stored in this directory of Trimmomatic tool: /scratch/micro612w18_fluxod/shared/bin/Trimmomatic/adapters/. Trimmomatic comes with a list of standard adapter fasta sequences such TruSeq, Nextera etc. You should use appropriate adapter fasta sequence file based on the illumina kit that was used for sequencing. You can get this information from your sequencing centre or can find it in FastQC html report (Section: Overrepresented sequences).
+
+Short sections (2 bp as determined by seed misMatch parameter) of each adapter sequences (contained in TruSeq3-PE.fa) are tested in each possible position within the reads. If it finds a perfect match, It starts searching the entire adapter sequence and scores the alignment. The advantage here is that the full alignment is calculated only when there is a perfect seed match which results in considerable efficiency gains. So, When it finds a match, it moves forward with full alignment and when the match reaches 10 bp determined by simpleClipThreshold, it finally trims off the adapter from reads.
+
+Quoting Trimmomatic:
+
+"'Palindrome' trimming is specifically designed for the case of 'reading through' a short fragment into the adapter sequence on the other end. In this approach, the appropriate adapter sequences are 'in silico ligated' onto the start of the reads, and the combined adapter+read sequences, forward and reverse are aligned. If they align in a manner which indicates 'read- through' i.e atleast 30 bp match, the forward read is clipped and the reverse read dropped (since it contains no new data)."
+
+> ***v. Now create new directories in day1_morn folder and Run FastQC on these trimmomatic results.***
+
+```
+mkdir Rush_KPC_266_FastQC_results/after_trimmomatic
+
+fastqc -o Rush_KPC_266_FastQC_results/after_trimmomatic/ Rush_KPC_266_trimmomatic_results/forward_paired.fq.gz Rush_KPC_266_trimmomatic_results/reverse_paired.fq.gz --extract
+```
+
+Get these html reports to your local system.
+
+```
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day1_morn/Rush_KPC_266_FastQC_results/after_trimmomatic/*.html /path-to-local-directory/
+```
+
+![alt tag](3.png)
+
+After running Trimmomatic, you should notice that the sequence quality improved (Per base sequence quality) and now it doesn't contain any contaminants/adapters (Overrepresented sequences).
+
+Next, take a look at the per base sequence content graph, and notice that the head bases(~9 bp) are slightly imbalanced. In a perfect scenario, each nucleotide content should run parallel to each other, and should be reflective of the overall A/C/T/G content of your input sequence.
+
+Quoting FastQC:
+ "It's worth noting that some types of library will always produce biased sequence composition, normally at the start of the read. Libraries produced by priming using random hexamers (including nearly all RNA-Seq libraries) and those which were fragmented using transposases inherit an intrinsic bias in the positions at which reads start. This bias does not concern an absolute sequence, but instead provides enrichment of a number of different K-mers at the 5' end of the reads. Whilst this is a true technical bias, it isn't something which can be corrected by trimming and in most cases doesn't seem to adversely affect the downstream analysis. It will however produce a warning or error in this module."
+
+This doesn't look very bad but you can remove the red cross sign by trimming these imbalanced head bases using HEADCROP:9 flag in the above command.
+
+> ***vi. Lets Run trimmomatic again with headcrop 9 and save it in a different directory called Rush_KPC_266_trimmomatic_results_with_headcrop/***
+
+```
+mkdir Rush_KPC_266_trimmomatic_results_with_headcrop/
+
+time java -jar /scratch/micro612w18_fluxod/shared/bin/Trimmomatic/trimmomatic-0.33.jar PE Rush_KPC_266_1_combine.fastq.gz Rush_KPC_266_2_combine.fastq.gz Rush_KPC_266_trimmomatic_results_with_headcrop/forward_paired.fq.gz Rush_KPC_266_trimmomatic_results_with_headcrop/forward_unpaired.fq.gz Rush_KPC_266_trimmomatic_results_with_headcrop/reverse_paired.fq.gz Rush_KPC_266_trimmomatic_results_with_headcrop/reverse_unpaired.fq.gz ILLUMINACLIP:/scratch/micro612w18_fluxod/shared/bin/Trimmomatic/adapters/TruSeq3-PE.fa:2:30:10:8:true SLIDINGWINDOW:4:20 MINLEN:40 HEADCROP:9
+```
+
+Unix gem: time in above command shows how long a command takes to run?
+
+> ***vii. Run FastQC 'one last time' on updated trimmomatic results with headcrop and check report on your local computer***
+
+```
+mkdir Rush_KPC_266_FastQC_results/after_trimmomatic_headcrop/
+fastqc -o Rush_KPC_266_FastQC_results/after_trimmomatic_headcrop/ --extract -f fastq Rush_KPC_266_trimmomatic_results_with_headcrop/forward_paired.fq.gz Rush_KPC_266_trimmomatic_results_with_headcrop/reverse_paired.fq.gz
+```
+Download the reports again and see the difference.
+```
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day1_morn/Rush_KPC_266_FastQC_results/after_trimmomatic_headcrop/*.html /path-to-local-directory/
+```
+
+The red cross sign disappeared!
+
+Lets have a look at one of the Bad Illumina data example [here](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/bad_sequence_fastqc.html)
+
+[[back to top]](day1_morning.html)
+[[HOME]](index.html)
diff --git a/docs/build/html/_sources/day2_afternoon.txt b/docs/build/html/_sources/day2_afternoon.txt
new file mode 100644
index 0000000..518e379
--- /dev/null
+++ b/docs/build/html/_sources/day2_afternoon.txt
@@ -0,0 +1,475 @@
+Day 2 Afternoon
+===============
+[[HOME]](index.html)
+
+High-throughput BLAST and pan-genome analysis
+---------------------------------------------
+
+This morning we learned how to perform basic genome annotation and comparison using Prokka and ACT. Now we will up the ante and do some more sophisticated comparative genomics analyses!
+First, we will create custom BLAST databases to identify specific antibiotic resistance genes of interest in a set of genomes.
+Second, we will use the large-scale BLAST-based tool LS-BSR to identify the complete antibiotic resistome in our genomes.
+Third, we will move beyond antibiotic resistance, and look at the complete set of protein coding genes in our input genomes.
+Finally, we will go back to ACT to understand the sorts of genomic rearrangements underlying observed variation in gene content.
+
+For these exercises we will be looking at four closely related Acinetobacter baumannii strains. However, despite being closely related, these genomes have major differences in gene content, as A. baumannii has a notoriously flexible genome! In fact, in large part due to its genomic flexibility, A. baumannii has transitioned from a harmless environmental contaminant to a pan-resistant super-bug in a matter of a few decades. If you are interested in learning more, check out this nature [review](http://www.nature.com/nrmicro/journal/v5/n12/abs/nrmicro1789.html) or [this](http://www.pnas.org/content/108/33/13758.abstract) paper, I published a few years back analyzing the very same genomes you are working with.
+
+Execute the following command to copy files for this afternoon’s exercises to your scratch directory:
+
+```
+
+cd /scratch/micro612w18_fluxod/username
+
+or
+
+wd
+
+cp -r /scratch/micro612w18_fluxod/shared/data/day2_after/ ./
+
+```
+
+Determine which genomes contain beta-lactamase genes
+----------------------------------------------------
+[[back to top]](day2_afternoon.html)
+[[HOME]](index.html)
+
+Before comparing full genomic content, lets start by looking for the presence of particular genes of interest. A. baumannii harbors an arsenal of resistance genes, and it would be interesting to know how particular resistance families vary among our 4 genomes. To accomplish this we will use the antibiotic resistance database ([ARDB](http://ardb.cbcb.umd.edu/)) and particularly beta-lactamase genes extracted from ARDB. These extracted genes can be found in file ardb_beta_lactam_genes.pfasta, which we will use to generate a Blast database.
+
+> ***i. Run makeblastdb on the file of beta-lactamases to create a BLAST database.***
+
+makeblastdb takes as input:
+
+1) an input fasta file of protein or nucleotide sequences (ardb_beta_lactam_genes.pfasta) and
+
+2) a flag indicating whether to construct a protein or nucleotide database (in this case protein/ -dbtype prot).
+
+```
+#change directory to day2_after
+d2a
+
+
+makeblastdb -in ardb_beta_lactam_genes.pfasta -dbtype prot
+
+```
+
+> ***ii. BLAST A. baumannii protein sequences against our custom beta-lactamase database.***
+
+Run BLAST!
+
+The input parameters are:
+
+1) query sequences (-query Abau_all.pfasta),
+
+2) the database to search against (-db ardb_beta_lactam_genes.pfasta),
+
+3) the name of a file to store your results (-out bl_blastp_results),
+
+4) output format (-outfmt 6),
+
+5) e-value cutoff (-evalue 1e-20),
+
+6) number of database sequences to return (-max_target_seqs 1)
+
+
+```
+blastp -query Abau_all.pfasta -db ardb_beta_lactam_genes.pfasta -out bl_blastp_results -outfmt 6 -evalue 1e-20 -max_target_seqs 1
+```
+
+Use less to look at bl_blastp_results.
+
+```
+less bl_blastp_results
+```
+
+- Question: Experiment with the –outfmt parameter, which controls different output formats that BLAST can produce.
+
+- Question: Determine which Enterococcus genomes contain vancomycin resistance genes. To do this you will need to: i) create a protein BLAST database for ardb_van.pfasta, ii) concetenate the genomes sequences in the .fasta files and iii) use blastx to BLAST nucleotide genomes against a protein database
+
+Identification of antibiotic resistance genes with [ARIBA](https://github.com/sanger-pathogens/ariba) directly from paired end reads
+----------------------------------------------------------
+[[back to top]](day2_afternoon.html)
+[[HOME]](index.html)
+
+ARIBA, Antimicrobial Resistance Identification By Assembly is a tool that identifies antibiotic resistance genes by running local assemblies. The input is a FASTA file of reference sequences (can be a mix of genes and noncoding sequences) and paired sequencing reads. ARIBA reports which of the reference sequences were found, plus detailed information on the quality of the assemblies and any variants between the sequencing reads and the reference sequences.
+
+ARIBA is compatible with various databases and also contains an utility to download different databases such as: argannot, card, megares, plasmidfinder, resfinder, srst2_argannot, vfdb_core. Today, we will be working with the [card](https://card.mcmaster.ca/) database, which has been downloaded and placed in /scratch/micro612w18_fluxod/shared/out.card.prepareref/ directory.
+
+
+
+> ***i. Run ARIBA on input paired-end fastq reads for resistance gene identification.***
+
+The fastq reads are placed in Abau_genomes_fastq directory. Enter interactive flux session, change directory to day2_after workshop directory and run the below four commands to start ARIBA jobs in background.
+
+
+
+```
+iflux
+
+cd /scratch/micro612w18_fluxod/username/day2_after
+
+or
+
+d2a
+
+#Load dependency
+
+module load cd-hit
+
+#ARIBA commands
+
+/nfs/esnitkin/bin_group/anaconda3/bin/ariba run --force /scratch/micro612w18_fluxod/shared/out.card.prepareref/ Abau_genomes_fastq/AbauA_genome.1.fastq.gz Abau_genomes_fastq/AbauA_genome.2.fastq.gz AbauA_genome &
+
+/nfs/esnitkin/bin_group/anaconda3/bin/ariba run --force /scratch/micro612w18_fluxod/shared/out.card.prepareref/ Abau_genomes_fastq/AbauB_genome.1.fastq.gz Abau_genomes_fastq/AbauB_genome.2.fastq.gz AbauB_genome &
+
+/nfs/esnitkin/bin_group/anaconda3/bin/ariba run --force /scratch/micro612w18_fluxod/shared/out.card.prepareref/ Abau_genomes_fastq/AbauC_genome.1.fastq.gz Abau_genomes_fastq/AbauC_genome.2.fastq.gz AbauC_genome &
+
+/nfs/esnitkin/bin_group/anaconda3/bin/ariba run --force /scratch/micro612w18_fluxod/shared/out.card.prepareref/ Abau_genomes_fastq/ACICU_genome.1.fastq.gz Abau_genomes_fastq/ACICU_genome.2.fastq.gz ACICU_genome &
+
+```
+
+The "&" in the above commands(at the end) is a little unix trick to run commands in background. You can run multiple commands in background and make full use of parallel processing. You can check the status of these background jobs by typing:
+
+```
+jobs
+```
+
+> ***ii. Run ARIBA summary function to generate a summary report.***
+
+ARIBA has a summary function that summarises the results from one or more sample runs of ARIBA and generates an output report with various level of information determined by -preset parameter. The parameter "-preset minimal" will generate a minimal report showing only the presence/absence of resistance genes whereas "-preset all" will output all the extra information related to each database hit such as reads and reference sequence coverage, variants and their associated annotations(if the variant confers resistance to an Antibiotic) etc.
+
+```
+
+/nfs/esnitkin/bin_group/anaconda3/bin/ariba summary --preset minimal Abau_genomes_ariba_minimal_results *_genome/report.tsv
+
+/nfs/esnitkin/bin_group/anaconda3/bin/ariba summary --preset all Abau_genomes_ariba_all_results *_genome/report.tsv
+
+```
+
+ARIBA summary generates three output:
+
+1. Abau_genomes_ariba*.csv file that can be viewed in your favourite spreadsheet program.
+2. Abau_genomes_ariba*.phandango.{csv,tre} that allow you to view the results in [Phandango](http://jameshadfield.github.io/phandango/#/). They can be drag-and-dropped straight into Phandango.
+
+Lets copy this phandango files Abau_genomes_ariba_minimal_results.phandango.csv and Abau_genomes_ariba_minimal_results.phandango.tre to the local system using cyberduck or scp
+
+```
+scp username\@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day2_after/*minimal_results.phandango* ~/Desktop/
+```
+
+Drag and drop these two files on [Phandango](http://jameshadfield.github.io/phandango/#/) website. What types of resistance genes do you see in these Acinetobacter genomes? This [review](http://aac.asm.org/content/55/3/947.full) may help interpret.
+
+> ***iii. Explore full ARIBA matrix in R***
+
+- Now, Fire up R console or studio and read ariba full report "Abau_genomes_ariba_all_results.csv"
+
+```
+ariba_full = read.csv(file = 'Abau_genomes_ariba_all_results.csv', row.names = 1)
+```
+
+- Subset to get description for each gene
+
+```
+ariba_full_asm = ariba_full[, grep('assembled',colnames(ariba_full))]
+```
+
+- Make binary for plotting purposes
+
+```
+ariba_full_asm[,] = as.numeric(ariba_full_asm != 'no')
+```
+
+- Make a heatmap!
+
+```
+heatmap(as.matrix(ariba_full_asm), scale = "none", col= c('black', 'red'), margins = c(10,5), cexRow = 0.75)
+```
+
+Perform pan-genome analysis with [Roary](https://sanger-pathogens.github.io/Roary/)
+----------------------------------------
+
+Roary is a pan genome pipeline, which takes annotated assemblies in GFF3 format and calculates the pan genome. The pan-genome is just a fancy term for the full complement of genes in a set of genomes.
+
+The way Roary does this is by:
+1) Roary gets all the coding sequences from GFF files, convert them into protein, and create pre-clusters of all the genes,
+2) Then, using BLASTP and MCL, Roary will create gene clusters, and check for paralogs. and
+3) Finally, Roary will take every isolate and order them by presence/absence of genes.
+
+> ***i. Generate pan-genome matrix using Roary and GFF files***
+
+Make sure you are on an interactive node, as this will be even more computationally intensive!
+
+```
+iflux
+```
+
+Change your directory to day2_after
+
+```
+
+> Make sure to change username with your uniqname
+
+cd /scratch/micro612w18_fluxod/username/day2_after/
+
+or
+
+d2a
+
+```
+
+Load all the required dependencies and run roary on GFF files placed in Abau_genomes_gff folder.
+
+```
+module load samtools
+module load bedtools2
+module load cd-hit
+module load ncbi-blast
+module load mcl
+module load parallel
+module load mafft
+module load fasttree
+module load perl-modules
+module load R
+module load roary
+
+#Run roary
+roary -p 4 -f Abau_genomes_roary_output -r -n -v Abau_genomes_gff/*.gff
+```
+
+The above roary command will run pan-genome pipeline on gff files placed in Abau_genomes_gff(-v) using 4 threads(-p), save the results in an output directory Abau_genomes_roary_output(-f), generate R plots using .Rtab output files and align core genes(-n)
+
+Change directory to Abau_genomes_roary_output to explore the results.
+
+```
+cd Abau_genomes_roary_output
+
+ls
+```
+
+Output files:
+
+1. summary_statistics.txt: This file is an overview of your pan genome analysis showing the number of core genes(present in all isolates) and accessory genes(genes absent from one or more isolates or unique to a given isolate).
+
+2. gene_presence_absence.csv: This file contain detailed information about each gene including their annotations which can be opened in any spreadsheet software to manually explore the results. It contains plethora of information such as gene name and their functional annotation, whether a gene is present in a genome or not, minimum/maximum/Average sequence length etc.
+
+3. gene_presence_absence.Rtab: This file is similar to the gene_presence_absence.csv file, however it just contains a simple tab delimited binary matrix with the presence and absence of each gene in each sample. It can be easily loaded into R using the read.table function for further analysis and plotting. The first row is the header containing the name of each sample, and the first column contains the gene name. A 1 indicates the gene is present in the sample, a 0 indicates it is absent.
+
+4. core_gene_alignment.aln: a multi-FASTA alignment of all of the core genes that can be used to generate a phylogenetic tree.
+
+
+
+> ***ii. Explore pan-genome matrix gene_presence_absence.csv and gene_presence_absence.Rtab using R***
+
+
+
+**Modify gene_presence_absence.Rtab file to include annotations**
+
+- Get column names from gene_presence_absence.csv file
+
+```
+head -n1 gene_presence_absence.csv | tr ',' '\n' | cat --number
+```
+- Pull columns of interest
+
+```
+cut -d "," -f 3 gene_presence_absence.csv | tr '"' '_' > gene_presence_absence_annot.csv
+```
+
+- Paste it into pan-genome matrix
+
+```
+paste -d "" gene_presence_absence_annot.csv gene_presence_absence.Rtab > gene_presence_absence_wannot.Rtab
+```
+
+- Check gene_presence_absence_wannot.Rtab file
+
+```
+less gene_presence_absence_wannot.Rtab
+```
+
+**Read matrix into R, generate exploratory plots and query pan-genome**
+
+Use scp or cyberduck to get gene_presence_absence_wannot.Rtab onto your laptop.
+
+> ***i. Prepare and clean data***
+
+- Fire up RStudio and read gene_presence_absence_wannot.Rtab into matrix.
+
+```
+pg_matrix = read.table('gene_presence_absence_wannot.Rtab', sep = "\t", quote = "", row.names = 1, skip = 1)
+```
+
+- Add column names back
+
+```
+colnames(pg_matrix) = c('ACICU', 'AbauA', 'AbauB', 'AbauC')
+```
+
+- Use head, str, dim, etc. to explore the matrix.
+
+> ***ii. Generate exploratory heatmaps.***
+
+- Make a heatmap for the full matrix
+
+```
+heatmap(as.matrix(pg_matrix), , scale = "none", distfun = function(x){dist(x, method = "manhattan")}, margin = c(10,10), cexCol = 0.85, cexRow = 0.5, col= c('black', 'red'))
+```
+
+- Make a heatmap for variable genes (present in at least one, but not all of the genomes)
+
+```
+
+pg_matrix_subset = pg_matrix[rowSums(pg_matrix > 0) > 0 & rowSums(pg_matrix > 0) < 4 ,]
+heatmap(as.matrix(pg_matrix_subset), , scale = "none", distfun = function(x){dist(x, method = "manhattan")}, margin = c(10,10), cexCol = 0.85, cexRow = 0.5, col= c('black', 'red'))
+
+```
+
+> ***iii. Query pan-genome***
+
+- Which genomes are most closely related based upon shared gene content?
+
+We will use the outer function to determine the number of genes shared by each pair of genomes.
+
+
+
+Look at the help page for outer to gain additional insight into how this is working.
+
+```
+help(outer)
+```
+
+```
+outer(1:4,1:4, FUN = Vectorize(function(x,y){sum(pg_matrix_subset[,x] > 0 & pg_matrix_subset[,y] > 0)}))
+```
+
+- What is the size of the core genome?
+
+Lets first get an overview of how many genes are present in different numbers of genomes (0, 1, 2, 3 or 4) by plotting a histogram. Here, we combine hist with rowSums to accomplish this.
+
+```
+hist(rowSums(pg_matrix > 0), col="red")
+```
+
+Next, lets figure out how big the core genome is (e.g. how many genes are common to all of our genomes)?
+
+```
+sum(rowSums(pg_matrix > 0) == 4)
+```
+
+- What is the size of the accessory genome?
+
+Lets use a similar approach to determine the size of the accessory genome (e.g. those genes present in only a subset of our genomes).
+
+```
+sum(rowSums(pg_matrix > 0) < 4 & rowSums(pg_matrix > 0) > 0)
+```
+
+- What types of genes are unique to a given genome?
+
+So far we have quantified the core and accessory genome, now lets see if we can get an idea of what types of genes are core vs. accessory. Lets start by looking at those genes present in only a single genome.
+
+```
+row.names(pg_matrix[rowSums(pg_matrix > 0) == 1,])
+```
+
+What do you notice about these genes?
+
+- What is the number of hypothetical genes in core vs. accessory genome?
+
+Looking at unique genes we see that many are annotated as “hypothetical”, indicating that the sequence looks like a gene, but has no detectable homology with a functionally characterized gene.
+
+Determine the fraction of “hypothetical” genes in unique vs. core.
+
+```
+sum(grepl("hypothetical" , row.names(pg_matrix[rowSums(pg_matrix > 0) == 1,]))) / sum(rowSums(pg_matrix > 0) == 1)
+sum(grepl("hypothetical" , row.names(pg_matrix[rowSums(pg_matrix > 0) == 4,]))) / sum(rowSums(pg_matrix > 0) == 4)
+```
+
+Why does this make sense?
+
+Perform genome comparisons with [ACT](http://www.sanger.ac.uk/science/tools/artemis-comparison-tool-act)
+-------------------------------------
+[[back to top]](day2_afternoon.html)
+[[HOME]](index.html)
+
+In the previous exercises we were focusing on gene content, but losing the context of the structural variation underlying gene content variation (e.g. large insertions and deletions).
+Here we will use ACT to compare two of our genomes (note that you can use ACT to compare more than two genomes if desired).
+
+> ***i. Create ACT alignment file with BLAST***
+
+As we saw this morning, to compare genomes in ACT we need to use BLAST to create the alignments. We will do this on flux.
+
+```
+
+cd scratch/micro612w18_fluxod/username/day2_after
+blastall -p blastn -i ./Abau_genomes/AbauA_genome.fasta -d ./Abau_BLAST_DB/ACICU_genome.fasta -m 8 -e 1e-20 -o AbauA_vs_ACICU.blast
+
+```
+
+> ***ii. Read in genomes, alignments and annotation files***
+
+Use scp or cyberduck to transfer Abau_ACT_files folder onto your laptop
+
+
+1. Abau_genomes/AbauA_genome.fasta
+2. Abau_genomes/ACICU_genome.fasta
+3. AbauA_vs_ACICU.blast
+4. Abau_ACT_files/AbauA_genome_gene.gff
+5. Abau_ACT_files/ACICU_genome_gene.gff
+
+
+> ***iii. Explore genome comparison and features of ACT***
+
+Read in genomes and alignment into ACT
+
+```
+
+Go to File -> open
+Sequence file 1 = ACICU_genome.fasta
+Comparison file 1 = AbauA_vs_ACICU.blast
+Sequence file 2 = AbauA_genome.fasta
+
+```
+
+Before we use annotation present in genbank files. Here we will use ACT specific annotation files so we get some prettier display (resistance genes = red, transposable elements = bright green)
+
+```
+
+Go to File -> ACICU_genome.fasta -> Read an entry file = ACICU_genome_gene.gff
+
+Go to File -> AbauA_genome.fasta -> Read an entry file = AbauA_genome_gene.gff
+
+```
+
+Play around in ACT to gain some insight into the sorts of genes present in large insertion/deletion regions.
+See if you can find:
+
+1) differences in phage content,
+2) membrane biosynthetic gene cluster variation and
+3) antibiotic resistance island variation.
+
diff --git a/docs/build/html/_sources/day2_morning.txt b/docs/build/html/_sources/day2_morning.txt
new file mode 100644
index 0000000..b04b558
--- /dev/null
+++ b/docs/build/html/_sources/day2_morning.txt
@@ -0,0 +1,442 @@
+Day 2 Morning
+=============
+[[HOME]](index.html)
+
+On day 1 we worked through a pipeline to map short-read data to a pre-existing assembly and identify single-nucleotide variants (SNVs) and small insertions/deletions. However, what this sort of analysis misses is the existence of sequence that is not present in your reference. Today we will tackle this issue by assembling our short reads into larger sequences, which we will then analyze to characterize the functions unique to our sequenced genome.
+
+Execute the following command to copy files for this morning’s exercises to your workshop home directory:
+
+```
+> Note: Make sure you change 'username' in the commands below to your 'uniqname'.
+
+wd
+
+#or
+
+cd /scratch/micro612w18_fluxod/username
+
+> Note: Check if you are in your home directory(/scratch/micro612w18_fluxod/username) by executing 'pwd' in terminal. 'pwd' stands for present working directory and it will display the directory you are in.
+
+pwd
+
+> Note: Copy files for this morning's exercise in your home directory.
+
+cp -r /scratch/micro612w18_fluxod/shared/data/day2_morn ./
+```
+
+Genome Assembly using [Spades](http://bioinf.spbau.ru/spades) Pipeline
+------------------------------
+[[back to top]](day2_morning.html)
+[[HOME]](index.html)
+
+![alt tag](intro.png)
+
+There are a wide range of tools available for assembly of microbial genomes. These assemblers fall in to two general algorithmic categories, which you can learn more about [here](?). In the end, most assemblers will perform well on microbial genomes, unless there is unusually high GC-content or an over-abundance of repetitive sequences, both of which make accurate assembly difficult.
+
+Here we will use the Spades assembler with default parameters. Because genome assembly is a computationally intensive process, we will submit our assembly jobs to the cluster, and move ahead with some pre-assembled genomes, while your assemblies are running.
+
+> ***i. Create directory to hold your assembly output.***
+
+Create a new directory for the spades output in your day2_morn folder
+
+```
+> Note: Make sure you change 'username' in the below command with your 'uniqname'.
+
+d2m
+
+#or
+
+cd /scratch/micro612w18_fluxod/username/day2_morn
+
+> We will create a new directory in day2_morn to save genome assembly results:
+
+mkdir Rush_KPC_266_assembly_result
+
+```
+
+Now, we will use a genome assembly tool called Spades for assembling the reads.
+
+> ***ii. Test out Spades to make sure it's in your path***
+
+To make sure that your paths are set up correctly, try running Spades with the –h (help) flag, which should produce usage instruction.
+
+```
+> check if spades is working.
+
+spades.py -h
+
+```
+
+> ***iii. Submit a cluster job to assemble***
+
+Since it takes a huge amount of memory and time to assemble genomes using spades, we will run a pbs script on the cluster for this step.
+
+Now, open the spades.pbs file residing in the day2_morning folder with nano and add the following spades command to the bottom of the file. Replace the EMAIL_ADDRESS in spades.pbs file with your actual email-address. This will make sure that whenever the job starts, aborts or ends, you will get an email notification.
+
+```
+> Open the spades.pbs file using nano:
+
+nano spades.pbs
+
+> Now replace the EMAIL_ADDRESS in spades.pbs file with your actual email-address. This will make sure that whenever the job starts, aborts or ends, you will get an email notification.
+
+> Copy and paste the below command to the bottom of spades.pbs file.
+
+spades.py --pe1-1 forward_paired.fq.gz --pe1-2 reverse_paired.fq.gz --pe1-s forward_unpaired.fq.gz --pe1-s reverse_unpaired.fq.gz -o Rush_KPC_266_assembly_result/ --careful
+
+```
+
+> ***iv. Submit your job to the cluster with qsub***
+
+```
+qsub -V spades.pbs
+```
+
+> ***v. Verify that your job is in the queue with the qstat command***
+
+```
+qstat –u username
+```
+
+Assembly evaluation using [QUAST](http://bioinf.spbau.ru/quast)
+---------------------------------
+[[back to top]](day2_morning.html)
+[[HOME]](index.html)
+
+The output of an assembler is a set of contigs (contiguous sequences), that are composed of the short reads that we fed in. Once we have an assembly we want to evaluate how good it is. This is somewhat qualitative, but there are some standard metrics that people use to quantify the quality of their assembly. Useful metrics include: i) number of contigs (the fewer the better), ii) N50 (the minimum contig size that at least 50% of your assembly belongs, the bigger the better). In general you want your assembly to be less than 200 contigs and have an N50 greater than 50 Kb, although these numbers are highly dependent on the properties of the assembled genome.
+
+To evaluate some example assemblies we will use the tool quast. Quast produces a series of metrics describing the quality of your genome assemblies.
+
+> ***i. Run quast on a set of previously generated assemblies***
+
+Now to check the example assemblies residing in your day2_morn folder, run the below quast command. Make sure you are in day2_morn folder in your home directory using 'pwd'
+
+```
+quast.py -o quast sample_264_contigs.fasta sample_266_contigs.fasta
+```
+
+The command above will generate a report file in /scratch/micro612w18_fluxod/username/day2_morn/quast
+
+> ***ii. Explore quast output***
+
+QUAST creates output in different formats such as html, pdf and text. Now lets check the report.txt file residing in quast folder for assembly statistics. Open report.txt using nano.
+
+```
+less quast/report.txt
+```
+
+Check the difference between the different assembly statistics. Also check the different types of report it generated.
+
+Generating multiple sample reports using [multiqc](http://multiqc.info/)
+--------------------------------------------------
+
+![alt tag](multiqc.jpeg)
+
+Let's imagine a real-life scenario where you are working on a project which requires you to analyze and process hundreds of samples. Having a few samples with extremely bad quality is very commonplace. Including these bad samples into your analysis without adjusting their quality threshold can have a profound effect on downstream analysis and interpretations.
+
+- Question: How will you find those bad apples?
+
+Yesterday, we learned how to assess and control the quality of samples as well as screen for contaminants. But the problem with such tools or any other tools is, they work on per-sample basis and produce only single report/logs per sample. Therefore, it becomes cumbersome to dig through each sample's reports and make appropriate quality control calls.
+
+Thankfully, there is a tool called multiqc which parses the results directory containing output from various tools, reads the log report created by those tools (ex: FastQC, FastqScreen, Quast), aggregates them and creates a single report summarizing all of these results so that you have everything in one place. This helps greatly in identifying the outliers and removing or reanalysizing it individually.
+
+Lets take a look at one such mutiqc report that was generated using FastQC results on *C. difficile* samples.
+
+Download the html report Cdiff_multiqc_report.html from your day2_morn folder.
+
+```
+#Note: Make sure you change 'username' in the below command to your 'uniqname'.
+
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day2_morn/Cdiff_multiqc_report.html /path-to-local-directory/
+
+```
+
+- Question: Open this report in a browser and try to find the outlier sample/s
+
+- Question: What is the most important parameter to look for while identifying contamination or bad samples?
+
+- Question: What is the overall quality of data?
+
+Lets run multiqc on one such directory where we ran and stored FastQC, FastQ Screen and Quast reports.
+
+if you are not in day2_morn folder, navigate to it and change directory to multiqc_analysis
+
+```
+d2m
+
+#or
+
+cd /scratch/micro612w18_fluxod/username/day2_morn/
+
+cd multiqc_analysis
+
+#Load python and Try invoking multiqc
+
+module load python-anaconda2/latest
+
+multiqc -h
+
+#Run multiqc on sample reports
+
+multiqc ./ --force --filename workshop_multiqc
+
+#Check if workshop_multiqc.html report was generated
+
+ls
+
+#transfer this report to your local system and open it in a browser for visual inspection
+
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day2_morn/workshop_multiqc.html /path-to-local-directory/
+
+```
+
+The report contains the Assembly, Fastq Screen and FastQC report for a mixture of 51 organisms' sequence data. Sample names for Assembly statistics ends with "l500_contigs".
+
+- Question: Play around with the General statistics table by sorting different columns. (click on a column header). To view just the assembly statistics, click on the N50 column header. Which sample has the worst N50 value? What do you think must be the reason?
+
+- Question: Which two sample's genome length i.e column Length (Mbp) stand out from all the other genome lengths? What is their GC %? What about their FastQ Screen result?
+
+- Question: What about Number of Contigs section? Are you getting reasonable number of contigs or is there any bad assembly?
+
+- Question: Any sample's quality stand out from the rest of the bunch?
+
+
+Compare assembly to reference genome and post-assembly genome improvement
+-------------------------------------------------------------------------
+[[back to top]](day2_morning.html)
+[[HOME]](index.html)
+
+Now that we feel confident in our assembly, let's compare it to our reference to see if we can identify any large insertions/deletions using a graphical user interface called Artemis Comparison Tool (ACT) for visualization.
+
+
+
+In order to simplify the comparison between assembly and reference, we first need to orient the order of the contigs to reference.
+
+> ***i. Run abacas to orient contigs to the reference***
+
+To orient our contigs relative to the reference we will use a tool called abacas. [ABACAS](http://www.sanger.ac.uk/science/tools/pagit) aligns contigs to a reference genome and then stitches them together to form a “pseudo-chromosome”.
+
+Go back to flux and into the directory where the assembly is located.
+
+```
+d2m
+
+#or
+
+cd /scratch/micro612w18_fluxod/username/day2_morn/
+```
+
+Now, we will run abacas using these input parameters:
+
+1) your reference sequence (-r KPNIH.fasta),
+
+2) your contig file (-q sample_266_contigs.fasta),
+
+3) the program to use to align contigs to reference (-p nucmer),
+
+4) append unmapped contigs to end of file (-b),
+
+5) use default nucmer parameters (-d),
+
+6) append contigs into pseudo-chromosome (-a),
+
+7) the prefix for your output files (–o sample_266_contigs_ordered)
+
+Check if abacas can be properly invoked:
+
+```
+abacas.1.3.1.pl -h
+```
+
+Run abacas on assembly:
+
+```
+abacas.1.3.1.pl -r KPNIH1.fasta -q sample_266_contigs.fasta -p nucmer -b -d -a -o sample_266_contigs_ordered
+```
+
+> ***ii. Use ACT to view contig alignment to reference genome***
+
+- Use scp to get ordered fasta sequence and .cruch file onto your laptop
+
+```
+> Dont forget to change username and /path-to-local-ACT_contig_comparison-directory/ in the below command
+
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day2_morn/sample_266_contigs_ordered* /path-to-previously-created-local-ACT_contig_comparison-directory/
+
+```
+
+- Read files into ACT
+
+```
+Go to File on top left corner of ACT window -> open
+Sequence file 1 = KPNIH.gb
+Comparison file 1 = sample_266_contigs_ordered.crunch
+Sequence file 2 = sample_266_contigs_ordered.fasta
+
+Click Apply button
+
+Dont close the ACT window
+```
+
+- Notice that the alignment is totally beautiful now!!! Scan through the alignment and play with ACT features to look at genes present in reference but not in assembly. Keep the ACT window open for further visualizations.
+
+![alt tag](beautiful.png)
+
+Map reads to the final ordered assembly
+---------------------------------------
+[[back to top]](day2_morning.html)
+[[HOME]](index.html)
+
+You already know the drill/steps involved in reads mapping. Here, we will map the reads to the final ordered assembly genome instead of KPNIH1.fasta.
+
+- First create a bwa index of the ordered fasta file.
+
+```
+> Only proceed further if everything worked uptil now. Make sure you are in day2_morn directory.
+
+d2m
+
+#or
+
+cd /scratch/micro612w18_fluxod/username/day2_morn/
+
+bwa index sample_266_contigs_ordered.fasta
+samtools faidx sample_266_contigs_ordered.fasta
+
+```
+
+- Align the trimmed reads which we used for genome assembly to this ordered assembly using BWA mem. Convert SAM to BAM. Sort and index it.
+
+```
+
+bwa mem -M -R "@RG\tID:96\tSM:Rush_KPC_266_1_combine.fastq.gz\tLB:1\tPL:Illumina" -t 8 sample_266_contigs_ordered.fasta forward_paired.fq.gz reverse_paired.fq.gz > sample_266_contigs_ordered.sam
+
+samtools view -Sb sample_266_contigs_ordered.sam > sample_266_contigs_ordered.bam
+
+samtools sort sample_266_contigs_ordered.bam sample_266_contigs_ordered_sort
+
+samtools index sample_266_contigs_ordered_sort.bam
+
+```
+
+- Lets visualize the alignments against our ordered assembly.
+
+Copy this sorted and indexed BAM files to local ACT_contig_comparison directory.
+
+```
+> Dont forget to change username and /path-to-local-ACT_contig_comparison-directory/ in the below command
+
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day2_morn/sample_266_contigs_ordered_sort* /path-to-previously-created-local-ACT_contig_comparison-directory/
+
+```
+
+```
+Go back to ACT where your ordered contigs are still open in the window.
+
+Select File -> sample_266_contigs_ordered.fasta -> Read BAM/VCF > select sorted bam file(sample_266_contigs_ordered_sort.bam) you just copied from flux.
+```
+
+![alt tag](aligned_reads_deletion.png)
+
+Using abacas and ACT to compare VRE/VSE genome
+----------------------------------------------
+
+Now that we learned how ACT can be used to explore and compare genome organization and differences, try comparing VSE_ERR374928_contigs.fasta, a Vancomycin-susceptible Enterococcus against a Vancomycin-resistant Enterococcus reference genome Efaecium_Aus0085.fasta that are placed in VRE_vanB_comparison folder under day2_morn directory. The relevant reference genbank file that can be used in ACT is Efaecium_Aus0085.gbf.
+
+Genome Annotation
+-----------------
+[[back to top]](day2_morning.html)
+[[HOME]](index.html)
+
+**Identify protein-coding genes with [Prokka](http://www.vicbioinformatics.com/software.prokka.shtml)**
+
+From our ACT comparison of our assembly and the reference we can clearly see that there is unique sequence in our assembly. However, we still don’t know what that sequence encodes! To try to get some insight into the sorts of genes unique to our assembly we will run a genome annotation pipeline called Prokka. Prokka works by first running *de novo* gene prediction algorithms to identify protein coding genes and tRNA genes. Next, for protein coding genes Prokka runs a series of comparisons against databases of annotated genes to generate putative annotations for your genome.
+
+> ***i. Run Prokka on assembly***
+
+```
+prokka –setupdb
+```
+
+Execute Prokka on your ordered assembly
+
+```
+> Make sure you are in day2_morn directory.
+
+d2m
+
+#or
+
+cd /scratch/micro612w18_fluxod/username/day2_morn/
+
+mkdir sample_266_prokka
+
+prokka -kingdom Bacteria -outdir sample_266_prokka -force -prefix sample_266 sample_266_contigs_ordered.fasta
+
+> Use scp or cyberduck to get Prokka annotated genome on your laptop. Dont forget to change username in the below command
+
+scp -r username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day2_morn/sample_266_prokka/ /path-to-local-ACT_contig_comparison-directory/
+
+```
+
+> ***ii. Reload comparison into ACT now that we’ve annotated the un-annotated!***
+
+Read files into ACT
+
+```
+Go to File on top left corner of ACT window -> open
+Sequence file 1 = KPNIH.gb
+Comparison file 1 = sample_266_contigs_ordered.crunch
+Sequence file 2 = sample_266_contigs_ordered.gbf
+```
+
+- Play around with ACT to see what types of genes are unique to sample 266!!!
diff --git a/docs/build/html/_sources/day3_afternoon.txt b/docs/build/html/_sources/day3_afternoon.txt
new file mode 100644
index 0000000..f47affe
--- /dev/null
+++ b/docs/build/html/_sources/day3_afternoon.txt
@@ -0,0 +1,237 @@
+Day 3 Afternoon
+===============
+[[HOME]](index.html)
+
+Klebsiella pneumoniae comparative genomic analysis
+--------------------------------------------------
+
+To finish up the workshop we are going to go through the process of working up a complete dataset, from start to finish. This set of genomes originated from a regional outbreak of bla-KPC carrying Klebsiella pneumoniae – one of the most concerning healthcare associated pathogens.
+The goal is to follow up on a previously [published](http://cid.oxfordjournals.org/content/53/6/532.abstract) epidemiologic analysis, and see if genomics supports prior epidemiologic conclusions and can provide additional insights.
+We have our genomes, and we know in which regional facility each isolate originated.
+
+The goal of this exercise is to:
+
+1) process our genomes (QC, variant calling),
+
+2) perform a phylogenetic analysis and
+
+3) overlay our meta-data.
+
+To make this more difficult, the instructions will be much more vague than in previous sessions, and you will be challenged to use what you have learned, both in the past three days and in the prior workshop, to complete this analysis.
+
+Hopefully we’ve prepared you to take on the challenge, but remember this is an open book test!
+
+Feel free to lean on materials from the workshops, manuals of tools and Google (and of course instructors and neighbors).
+
+Execute the following command to copy files for this afternoon’s exercises to your scratch directory:
+
+```
+
+cd /scratch/micro612w18_fluxod/username
+
+or
+
+wd
+
+cp -r /scratch/micro612w18_fluxod/shared/data/day3_after ./
+
+```
+
+Perform QC on fastq files
+-------------------------
+[[back to top]](day3_afternoon.html)
+[[HOME]](index.html)
+
+On the first morning you ran FastQC to evaluate the quality of a single genome. However, a typical project will include many genomes and you will want to check the quality of all of your samples. From the bash workshop, I hope you can appreciate that you do not want to process 100 genomes by typing 100 commands – rather you want to write a short shell script to do the work for you!
+
+
+> ***i. Edit the shell script fastqc.sh located in /scratch/micro612w18_fluxod/your username/day3_after to run FastQC on all fastq files.***
+
+**Important info about this shell script**
+- The shell script includes a for loop that loops over all of the genomes in the target directory
+- The tricky part of this shell script is that each fastq command contains two files (forward and reverse reads). So, you need to take advantage of the fact that the forward and reverse read files both have the same prefix, and you can loop over these prefixes.
+- You should be able to get prefixes by piping the following unix commands: ls, cut, sort, uniq
+- The prefix should be a part of both forward and reverse reads. For example, the file_prefix for samples Rush_KPC_264_1_sequence.fastq.gz and Rush_KPC_264_2_sequence.fastq.gz should be Rush_KPC_264
+- when you are testing your shell script, comment out (using #) the lines below echo so you can see that if the script is 'echo'-ing the correct commands.
+- Try running multiqc inside the script by adding the multiqc command with appropriate out directory
+- Don't run multiqc inside for loop and should be run only after the for loop ends.
+
+
+The fastq files are located in:
+
+```
+/scratch/micro612w18_fluxod/shared/data/day3_after_fastq/
+```
+
+Rather than copying these to your directory, analyze the files directly in that directory, so everyone doesn’t have to copy 25G to their home directories.
+
+Copy and paste commands to run fastqc.sh as PBS script, into a PBS script and submit this PBS script as a job to the flux.
+
+Your PBS script wil contain the following command after the PBS preamble stuff(Make sure your $PBS_O_WORKDIR is set inside the pbs script):
+
+```bash fastqc.sh /scratch/micro612w18_fluxod/shared/data/day3_after_fastq/ ```
+
+
+> ***ii. Examine output of FastQC to verify that all samples are OK***
+
+Check the multiqc report of your fastq files.
+
+Examine results of [SPANDx](http://www.ncbi.nlm.nih.gov/pubmed/25201145) pipeline
+---------------------------
+[[back to top]](day3_afternoon.html)
+[[HOME]](index.html)
+
+On the afternoon of day 1 we saw how many steps are involved in calling variants relative to a reference genome. However, the same steps are applied to every sample, which makes this very pipeline friendly! So, you could write your own shell script to string together these commands, or take advantage of one of several published pipelines. Here, we will use the output of the SPANDx pipeline, which takes as input a directory of fastq files and produces core variant and indel calls.
+
+More information on SPANDx pipeline can be obtained from [this](https://sourceforge.net/projects/spandx/files/SPANDx%20Manual_v3.1.pdf/download) manual.
+
+A snapshot of the pipeline is shown below:
+
+![alt tag](spandx.jpg)
+
+Because it takes a while to run, we have pre-run it for you. Your task will be to sort through the outputs of SPANDx. The detailed information about how to interpret the output is in SPANDx manual(section INTERPRETING THE OUTPUTS).
+
+> ***i. Look at overall statistics for variant calling in excel***
+
+SPANDx produces an overall summary file of its run that includes:
+
+1) numbers of SNPs/indels,
+
+2) numbers of filtered SNPs/indels and
+
+3) average coverage across the reference genome.
+
+This summary file is in: Outputs/Single_sample_summary.txt
+
+Use less to look at this file and then apply unix commands to extract and sort individual columns
+
+**HINTS**
+The following unix commands can be used to get sorted lists of coverage and numbers of SNPs/indels: tail, cut, sort
+
+> ***ii. Look at filtered variants produced by SPANDx in excel***
+
+SPANDx also produces a summary file of the variants/indels it identified in the core genome.
+
+This summary file is:
+```/scratch/micro612w18_fluxod/username/day3_after/SPANDx_output/Outputs/All_SNPs_annotated.txt ```
+
+Use cyberduck/scp to download this file and view in excel
+
+- View SPANDx manual for interpretation of different columns which can be found [here](https://sourceforge.net/projects/spandx/files/SPANDx%20Manual_v3.1.pdf/download)
+- Back on Flux, use grep to pull SNPs that have HIGH impact
+- What types of mutations are predicted to have “HIGH” impact?
+- How many genomes do these HIGH impact mutations tend to be present in? How do you interpret this?
+
+Recombination detection and tree generation
+-------------------------------------------
+[[back to top]](day3_afternoon.html)
+[[HOME]](index.html)
+
+> ***i. Plot the distribution of variants across the genome in R***
+
+The positions of variants are embedded in the first column of Outputs/Comparative/All_SNPs_annotated.txt, but you have to do some work to isolate them!
+
+**HINTS**
+
+- You will need to pipe together two “cut” commands: the first command will use tab as a delimiter and the second will use _.
+- Note that for cut you can specify tab as the delimiter as follows: cut –d$’\t’ and _ as: cut -d ‘_’
+- You should redirect the output of your cut commands (a list of SNP positions) to a file called ‘snp_positions.txt’. For example, the first line of your snp_positions.txt should be:
+```
+12695
+```
+- Finally, download this file, read it into R using ‘read.table’ and use ‘hist’ to plot a histogram of the positions
+- Do you observe clustering of variants that would be indicative of recombination?
+
+> ***ii. Create fasta file of variants from nexus file***
+
+SPANDx creates a file of core SNPs in a slightly odd format (transposed nexus).
+This file is called:
+```/scratch/micro612w18_fluxod/username/day3_after/SPANDx_output/Outputs/Comparative/Ortho_SNP_matrix.nex ```
+
+For convenience, apply the custom perl script located in the same directory to convert it to fasta format
+
+```
+perl transpose_nex_to_fasta.pl Ortho_SNP_matrix.nex
+```
+
+This file Outputs/Comparative/Ortho_SNP_matrix.fasta should now exist
+
+> ***iii. Create maximum likelihood tree in Seaview***
+
+```
+
+Download Ortho_SNP_matrix.fasta to your home computer
+Import the file into Seaview and construct a tree using PhyML (100 bootstraps)
+Save tree for later analysis
+
+```
+
+Phylogenetic tree annotation and visualization
+----------------------------------------------
+[[back to top]](day3_afternoon.html)
+[[HOME]](index.html)
+
+> ***i. Load the maximum likelihood tree into iTOL***
+
+Note that because the out-group is so distantly related it is difficult to make out the structure of the rest of the tree.
+
+**To remedy this:**
+
+- Click on the KPNIH1 leaf, go to the “tree structure” menu and “delete leaf”
+- Click on the extended branch leading to where KPNIH1 was, go to the “tree structure” menu and click “collapse branch”
+
+> ***ii. Load the annotation file ‘Rush_KPC_facility_codes_iTOL.txt’ to view the facility of isolation, play with tree visualization properties to understand how isolates group by facility, Circular vs. normal tree layout, Bootstrap values, Ignoring branch lengths***
+
+```
+
+Which facilities appear to have a lot of intra-facility transmission based on grouping of isolates from the same facility?
+Which patient’s infections might have originated from the blue facility?
+
+```
+
+Assessment of genomic deletions
+-------------------------------
+[[back to top]](day3_afternoon.html)
+[[HOME]](index.html)
+
+> ***i. Download genome coverage bed file and load into R***
+
+This file is located in: Outputs/Comparative/Bedcov_merge.txt
+This file contains information regarding locations in the reference genome that each sequenced genome does and does not map to.
+
+The first 3 columns of the file are:
+
+1) the name of the reference,
+
+2) the start coordinate of the window and
+
+3) the end coordinate of the window
+
+The remaining columns are your analyzed genomes, with the values indicating the fraction of the window covered by reads in that genome.
+
+In essence, this file contains information on parts of the reference genome that might have been deleted in one of our sequenced genomes.
+
+After you download this file, read it into R
+
+**HINTS**
+- Use the read.table function with the relevant parameters being: header and sep
+
+> ***ii. Plot heatmap of genome coverage bed file***
+
+**HINTS**
+
+- The first 3 columns of the bed file specify the name of the chromosome and the genome coordinates – therefore you want to subset your matrix to not include these columns
+- Use the heatmap3 function to make your heatmap with the following parameters: scale = “none” (keeps original values), Rowv = NA (suppress clustering by rows – why might we not want to cluster by rows for this analysis?)
+
+- Note a large genomic deletion among a subset of isolates. Does this deletion fit with the phylogeny from above?
+
+iii. Explore genomic deletion in more detail with ACT
+
+- Use abacus to orient contigs from Rush_KPC_298 to KPNIH
+- Load KPNIH.gb, Rush_KPC_298_ordered and the .crunch alignment into ACT
+
+```
+
+What genes appear to have been lost?
+
+```
diff --git a/docs/build/html/_sources/day3_morning.txt b/docs/build/html/_sources/day3_morning.txt
new file mode 100644
index 0000000..a8794fc
--- /dev/null
+++ b/docs/build/html/_sources/day3_morning.txt
@@ -0,0 +1,415 @@
+Day 3 Morning
+=============
+[[HOME]](index.html)
+
+On day 1, we ran through a pipeline to map reads against a reference genome and call variants, but didn’t do much with the variants we identified. Among the most common analyses to perform on a set of variants is to construct phylogenetic trees. Here we will explore different tools for generating and visualizing phylogenetic trees, and also see how recombination can distort phylogenetic signal.
+
+For the first several exercises, we will use the A. baumannii genomes that we worked with yesterday afternoon.
+The backstory on these genomes is that Abau_A, Abau_B and Abau_C are representatives of three clones (as defined by pulsed-field gel electrophoresis - a low-resolution typing method) that were circulating in our hospital.
+
+One of the goals of our published study was to understand the relationship among these clones to discern whether:
+
+1) the three clones represent three independent introductions into the hospital or
+
+2) the three clones originated from a single introduction into the hospital, with subsequent genomic rearrangement leading to the appearance of unique clones.
+
+The types of phylogenetic analyses you will be performing here are the same types that we used to decipher this mystery.
+The other two genomes you will be using are ACICU and AB0057. ACICU is an isolate from a hospital in France, and its close relationship to our isolates makes it a good reference for comparison. AB0057 is a more distantly related isolate that we will utilize as an out-group in our phylogenetic analysis. The utility of an out-group is to help us root our phylogenetic tree, and gain a more nuanced understanding of the relationship among strains.
+
+Execute the following command to copy files for this afternoon’s exercises to your scratch directory:
+
+```
+wd
+
+#or
+
+cd /scratch/micro612w18_fluxod/username
+
+cp -r /scratch/micro612w18_fluxod/shared/data/day3_morn ./
+
+```
+
+Perform whole genome alignment with [Mauve](http://darlinglab.org/mauve/mauve.html) and convert alignment to other useful formats
+-------------------------------------------
+[[back to top]](day3_morning.html)
+[[HOME]](index.html)
+
+An alternative approach for identification of variants among genomes is to perform whole genome alignments of assemblies. If the original short read data is unavailable, this might be the only approach available to you. Typically, these programs don’t scale well to large numbers of genomes (e.g. > 100), but they are worth being familiar with. We will use the tool mauve for constructing whole genome alignments of our five A. baumannii genomes.
+
+> ***i. Perform mauve alignment and transfer xmfa back to flux***
+
+Use cyberduck/scp to get genomes folder Abau_genomes onto your laptop
+
+```
+Run these commands on your local system/terminal:
+
+cd ~/Desktop (or wherever your desktop is)
+
+mkdir Abau_mauve
+
+cd Abau_mauve
+
+- Now copy Abau_genomes folder residing in your day3_morn folder using scp or cyberduck:
+
+scp -r username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day3_morn/Abau_genomes ./
+
+```
+
+Run mauve to create multiple alignment
+
+```
+
+i. Open mauve
+ii. File -> align with progressiveMauve
+iii. Click on “Add Sequnce” and add each of the 5 genomes you just downloaded
+iv. Name the output file “mauve_ECII_outgroup” and make sure it is in the directory you created for this exercise
+v. Click Align!
+vi. Wait for Mauve to finish and explore the graphical interface
+
+```
+
+Use cyberduck or scp to transfer your alignment back to flux for some processing
+
+```
+
+scp ~/Desktop/Abau_mauve/mauve_ECII_outgroup username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day3_morn
+
+```
+
+> ***ii. Convert alignment to fasta format***
+
+Mauve produces alignments in .xmfa format (use less to see what this looks like), which is not compatible with other programs we want to use. We will use a custom script convert_msa_format.pl to change the alignment format to fasta format
+
+
+```
+Now run these command in day3_morn folder on flux:
+
+module load bioperl
+
+perl convert_msa_format.pl -i mauve_ECII_outgroup -o mauve_ECII_outgroup.fasta -f fasta -c
+
+```
+
+Perform some DNA sequence comparisons and phylogenetic analysis in [APE](http://ape-package.ird.fr/), an R package
+------------------------------------------------------------------------
+[[back to top]](day3_morning.html)
+[[HOME]](index.html)
+
+There are lots of options for phylogenetic analysis. Here, we will use the ape package in R to look at our multiple alignments and construct a tree using the Neighbor Joining method.
+
+Note that ape has a ton of useful functions for more sophisticated phylogenetic analyses!
+
+> ***i. Get fasta alignment you just converted to your own computer using cyberduck or scp***
+
+```
+
+cd ~/Desktop/Abau_mauve
+
+
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day3_morn/mauve_ECII_outgroup.fasta ./
+
+```
+
+> ***ii. Read alignment into R***
+
+Fire up RStudio, set your working directory to ~/Desktop/Abau_mauve/ or wherever you have downloaded mauve_ECII_outgroup.fasta file and install/load ape
+
+Use the read.dna function in ape to read in you multiple alignments.
+Print out the variable to get a summary.
+
+```
+setwd("~/Desktop/Abau_mauve/")
+install.packages("ape")
+library(ape)
+abau_msa = read.dna('mauve_ECII_outgroup.fasta', format = "fasta")
+```
+
+> ***iii. Get variable positions***
+
+The DNA object created by read.dna can also be addressed as a matrix, where the columns are positions in the alignment and rows are your sequences. We will next treat our alignment as a matrix, and use apply and colSums to get positions in the alignment that vary among our sequences. Examine these commands in detail to understand how they are working together to give you a logical vector indicating which positions vary in your alignment.
+
+```
+
+abau_msa_bin = apply(abau_msa, 2, FUN = function(x){x == x[1]})
+
+abau_var_pos = colSums(abau_msa_bin) < 5
+```
+
+> ***iv. Get non-gap positions***
+
+For our phylogenetic analysis we want to focus on the core genome, so we will next identify positions in the alignment where all our genomes have sequence.
+
+```
+non_gap_pos = colSums(as.character(abau_msa) == '-') == 0
+```
+
+> ***v. Count number of variants between sequences***
+
+Now that we know which positions in the alignment are core and variable, we can extract these positions and count how many variants there are among our genomes. Do count pairwise variants we will use the dist.dna function in ape. The model parameter indicates that we want to compare sequences by counting differences. Print out the resulting matrix to see how different our genomes are.
+
+```
+
+abau_msa_var = abau_msa[,abau_var_pos & non_gap_pos ]
+var_count_matrix = dist.dna(abau_msa_var, model = "N")
+
+```
+
+> ***vi. Construct phylogenetic tree***
+
+Now we are ready to construct our first phylogenetic tree!
+
+We are going to use the Neighbor Joining algorithm, which takes a matrix of pairwise distances among the input sequences and produces the tree with the minimal total distance. In essence, you can think of this as a distance-based maximum parsimony algorithm, with the advantage being that it runs way faster than if you were to apply a standard maximum parsimony phylogenetic reconstruction.
+
+As a first step we are going to build a more accurate distance matrix, where instead of counting variants, we will measure nucleotide distance using the Jukes-Cantor model of sequence evolution. This is the simplest model of sequence evolution, with a single mutation rate assumed for all types of nucleotide changes.
+
+```
+dna_dist_JC = dist.dna(abau_msa, model = "JC")
+```
+
+Next, we will use the ape function nj to build our tree from the distance matrix
+
+```
+abau_nj_tree = nj(dna_dist_JC)
+```
+
+Finally, plot your tree to see how the genomes group.
+
+```
+plot(abau_nj_tree)
+```
+
+Perform SNP density analysis to discern evidence of recombination
+-----------------------------------------------------------------
+[[back to top]](day3_morning.html)
+[[HOME]](index.html)
+
+An often-overlooked aspect of a proper phylogenetic analysis is to exclude recombinant sequences. Homologous recombination in bacterial genomes is a mode of horizontal transfer, wherein genomic DNA is taken up and swapped in for a homologous sequence. The reason it is critical to account for these recombinant regions is that these horizontally acquired sequences do not represent the phylogenetic history of the strain of interest, but rather in contains information regarding the strain in which the sequence was acquired from. One simple approach for detecting the presence of recombination is to look at the density of variants across a genome. The existence of unusually high or low densities of variants is suggestive that these regions of aberrant density were horizontally acquired. Here we will look at our closely related A. baumannii genomes to see if there is evidence of aberrant variant densities.
+
+> ***i. Subset sequences to exclude the out-group***
+
+For this analysis we want to exclude the out-group, because we are interested in determining whether recombination would hamper our ability to reconstruct the phylogenetic relationship among our closely related set of genomes.
+
+- Note that the names of the sequences might be different for you, so check that if the command doesn’t work.
+
+```
+
+abau_msa_no_outgroup = abau_msa[c('ACICU_genome','AbauA_genome','AbauC_genome','AbauB_genome'),]
+
+```
+
+> ***ii. Get variable positions***
+
+Next, we will get the variable positions, as before
+
+```
+
+abau_msa_no_outgroup_bin = apply(abau_msa_no_outgroup, 2, FUN = function(x){x == x[1]})
+
+abau_no_outgroup_var_pos = colSums(abau_msa_no_outgroup_bin) < 4
+
+```
+
+> ***iii. Get non-gap positions***
+
+Next, we will get the core positions, as before
+
+```
+
+abau_no_outgroup_non_gap_pos = colSums(as.character(abau_msa_no_outgroup) == '-') == 0
+
+```
+
+> ***iv. Create overall histogram of SNP density***
+
+Finally, create a histogram of SNP density across the genome. Does the density look even, or do you think there might be just a touch of recombination?
+
+```
+hist(which(abau_no_outgroup_var_pos & abau_no_outgroup_non_gap_pos), 10000)
+```
+
+Perform recombination filtering with [Gubbins](https://www.google.com/search?q=gubbins+sanger&ie=utf-8&oe=utf-8)
+----------------------------------------------
+[[back to top]](day3_morning.html)
+[[HOME]](index.html)
+
+Now that we know there is recombination, we know that we need to filter out the recombinant regions to discern the true phylogenetic relationship among our strains. In fact, this is such an extreme case (~99% of variants of recombinant), that we could be totally misled without filtering recombinant regions. To accomplish this we will use the tool gubbins, which essentially relies on elevated regions of variant density to perform recombination filtering.
+
+> ***i. Run gubbins on your fasta alignment***
+
+Go back on flux and load modules required by gubbins
+
+
+
+```
+
+module load bioperl python-anaconda2/201607 biopython dendropy reportlab fasttree RAxML fastml/gub gubbins
+
+```
+
+Run gubbins on your fasta formatted alignment
+
+```
+d3m
+
+#or
+
+cd /scratch/micro612w18_fluxod/username/day3_morn
+
+run_gubbins.py -v -f 50 -o Abau_AB0057_genome mauve_ECII_outgroup.fasta
+
+```
+
+> ***ii. Create gubbins output figure***
+
+Gubbins produces a series of output files, some of which can be run through another program to produce a visual display of filtered recombinant regions. Run the gubbins_drawer.py script to create a pdf visualization of recombinant regions.
+
+The inputs are:
+
+1) the recombination filtered tree created by gubbins (mauve_ECII_outgroup.final_tree.tre),
+
+2) the pdf file to create (mauve_ECII_outgroup.recombination.pdf) and
+
+3) a .embl representation of recombinant regions (mauve_ECII_outgroup.recombination_predictions.embl).
+
+```
+
+gubbins_drawer.py -t mauve_ECII_outgroup.final_tree.tre -o mauve_ECII_outgroup.recombination.pdf mauve_ECII_outgroup.recombination_predictions.embl
+
+```
+> ***iii. Download and view gubbins figure and filtered tree***
+
+Use cyberduck or scp to get gubbins output files into Abau_mauve on your local system
+
+```
+
+cd ~/Desktop/Abau_mauve
+
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day3_morn/mauve_ECII_outgroup.recombination.pdf ./
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day3_morn/mauve_ECII_outgroup.final_tree.tre ./
+
+```
+
+Open up the pdf and observe the recombinant regions filtered out by gubbins. Does it roughly match your expectations based upon your SNP density plots?
+
+Finally, lets look at the recombination-filtered tree to see if this alters our conclusions.
+
+To view the tree we will use [Seaview](http://doua.prabi.fr/software/seaview), which is a multi-purpose tool for:
+
+1) visualization/construction of multiple alignments and
+
+2) phylogenetic tree construction.
+
+Here, we will just use Seaview to view our gubbins tree.
+
+```
+
+In seaview:
+
+Go to Trees -> import tree (mauve_ECII_outgroup.final_tree.tre)
+To view sub-tree of interest click on “sub-tree” and select the sub-tree excluding the out-group
+
+```
+
+
+How does the structure look different than the unfiltered tree?
+
+- Note that turning back to the backstory of these isolates, Abau_B and Abau_C were both isolated first from the same patient. So this analysis supports that patient having imported both strains, which likely diverged at a prior hospital at which they resided.
+
+Create annotated publication quality trees with [iTOL](http://itol.embl.de/)
+------------------------------------------------------
+[[back to top]](day3_morning.html)
+[[HOME]](index.html)
+
+For the final exercise we will use a different dataset, composed of USA300 methicillin-resistant Staphylococcus aureus genomes. USA300 is a strain of growing concern, as it has been observed to cause infections in both hospitals and in otherwise healthy individuals in the community. An open question is whether there are sub-clades of USA300 in the hospital and the community, or if they are all the same. Here you will create an annotated phylogenetic tree of strains from the community and the hospital, to discern if these form distinct clusters.
+
+> ***i. Download MRSA genome alignment from flux***
+
+Use cyberduck or scp to get genomes onto your laptop
+
+```
+
+cd ~/Desktop (or wherever your desktop is)
+mkdir MRSA_genomes
+cd MRSA_genomes
+
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day3_morn/2016-3-9_KP_BSI_USA300.fa ./
+scp username@flux-xfer.arc-ts.umich.edu:/scratch/micro612w18_fluxod/username/day3_morn/2016-3-9_KP_BSI_USA300_iTOL_HA_vs_CA.txt ./
+
+
+```
+
+> ***ii. Look at SNP density for MRSA alignment in R***
+
+Before we embark on our phylogenetic analysis, lets look at the SNP density to verify that there is no recombination
+
+```
+
+mrsa_msa = read.dna('2016-3-9_KP_BSI_USA300.fa', format = 'fasta')
+mrsa_msa_bin = apply(mrsa_msa, 2, FUN = function(x){x == x[1]})
+mrsa_var_pos = colSums(mrsa_msa_bin) < nrow(mrsa_msa_bin)
+hist(which(mrsa_var_pos), 10000)
+
+```
+
+Does it look like there is evidence of recombination?
+
+> ***iii. Create fasta alignment with only variable positions***
+
+Next, lets create a new fasta alignment file containing only the variant positions, as this will be easier to deal with in Seaview
+
+```
+
+write.dna(mrsa_msa[, mrsa_var_pos], file = '2016-3-9_KP_BSI_USA300_var_pos.fa', format = 'fasta')
+
+```
+
+> ***iv. Read alignment into Seaview and construct Neighbor Joining tree***
+
+In the previous exercise, we used Seaview to look at a pre-existing tree, here we will use Seaview to create a tree from a
+multiple sequence alignment
+
+Read in multiple alignment of variable positions
+
+```
+Go to File -> open ('2016-3-9_KP_BSI_USA300_var_pos.fa)
+```
+
+Construct Neighbor Joining phylogenetic tree with default parameters (note, this will take a few minutes)
+
+```
+Go to Trees -> select Distance Methods -> BioNJ -> (Select Bootstrap with 20 replicates) -> Go
+```
+
+Save your tree
+
+```
+File -> Save rooted tree
+```
+
+Note that in your research it is not a good idea to use these phylogenetic tools completely blind and I strongly encourage embarking on deeper learning yourself, or consulting with an expert before doing an analysis for a publication
+
+> ***v. Read tree into iTOL***
+
+```
+
+To make a prettier tree and add annotations we will use iTOL (http://itol.embl.de/).
+
+Go to http://itol.embl.de/
+
+To load your tree, click on upload, and select the rooted tree you just created in Seaview
+
+```
+
+Explore different visualization options for your tree (e.g. make it circular, show bootstrap values, try collapsing nodes/branches)
+
+Note that you can always reset your tree if you are unhappy with the changes you’ve made
+
+> ***vi. Add annotations to tree***
+
+One of the most powerful features of iTOL is its ability to overlay diverse types of descriptive meta-data on your tree (http://itol.embl.de/help.cgi#datasets). Here, we will overlay our data on whether an isolate was from a community or hospital infection. To do this simply drag-and-drop the annotation file (2016-3-9_KP_BSI_USA300_iTOL_HA_vs_CA.txt) on your tree and voila!
+
+- Do community and hospital isolates cluster together, or are they inter-mixed?
+
diff --git a/docs/build/html/_sources/index.rst.txt b/docs/build/html/_sources/index.rst.txt
new file mode 100644
index 0000000..b2886b8
--- /dev/null
+++ b/docs/build/html/_sources/index.rst.txt
@@ -0,0 +1,22 @@
+.. Micro 612 genomics workshop documentation master file, created by
+ sphinx-quickstart on Wed Feb 21 14:56:51 2018.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+Welcome to Micro 612 genomics workshop's documentation!
+=======================================================
+
+Contents:
+
+.. toctree::
+ :maxdepth: 2
+
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
diff --git a/docs/build/html/_sources/index.txt b/docs/build/html/_sources/index.txt
new file mode 100644
index 0000000..b06f04d
--- /dev/null
+++ b/docs/build/html/_sources/index.txt
@@ -0,0 +1,27 @@
+
+Microbial Comparative Genomics Workshop
+=======================================
+
+A 3 day microbial bioinformatics workshop conducted by `Dr. Evan Snitkin `_ at `University of Michigan `_. This module covers the basics of microbial genomic analysis using publicly available tools that are commonly referenced in genomics literature. Students will learn the steps and associated tools that are required to process, annotate and compare microbial genomes.
+
+Date: Feb 28 - 2 March 2018
+
+Prerequisites
+-------------
+
+Prior participation in a `Software Carpentry Workshop `_
+
+Workshop
+--------
+
+.. toctree::
+ :maxdepth: 5
+
+ day1_morning
+ day1_afternoon
+ day2_morning
+ day2_afternoon
+ day3_morning
+ day3_afternoon
+ online_resources
+
diff --git a/docs/build/html/_sources/index_backup.txt b/docs/build/html/_sources/index_backup.txt
new file mode 100644
index 0000000..1927aa7
--- /dev/null
+++ b/docs/build/html/_sources/index_backup.txt
@@ -0,0 +1,78 @@
+Microbial Comparative Genomics Workshop
+=======================================
+
+***A 3 day microbial bioinformatics workshop conducted by [Dr. Evan Snitkin](http://thesnitkinlab.com/index.php) at [University of Michigan](https://www.umich.edu/). This module covers the basics of microbial genomic analysis using publicly available tools that are commonly referenced in genomics literature. Students will learn the steps and associated tools that are required to process, annotate and compare microbial genomes.***
+
+***Date: Feb 28 - 2 March 2018***
+***
+
+
+Prerequisites
+-------------
+
+- Prior participation in a [Software Carpentry Workshop](https://umswc.github.io/2018-02-26-UMich/)
+***
+
+
+Link
+----
+
+GOTO: http://comparative-genomics.readthedocs.io/en/latest/index.html#
+***
+
+Workshop
+--------
+
+[Day 1 Morning](day1_morning.html)
+***
+- [Installing and setting up Cyberduck for file transfer](day1_morning.html#installing-and-setting-up-cyberduck-for-file-transfer)
+- [Getting your data onto Flux and setting up Environment variable](day1_morning.html#getting-your-data-onto-glux-and-setting-up-environment-variable)
+- [Unix is your friend](day1_morning.html#unix-is-your-friend)
+- [Quality Control using FastQC](day1_morning.html#quality-control-using-fastqc)
+- [Quality Trimming using Trimmomatic](day1_morning.html#quality-trimming-using-trimmomatic)
+
+[Day 1 Afternoon](day1_afternoon.html#day-1-afternoon)
+***
+- [Read Mapping](day1_afternoon.html#read-mapping)
+- [Variant Calling](day1_afternoon.html#variant-calling-and-filteration)
+- [Visualize BAM/VCF files in Artemis](day1_afternoon.html#visualize-bam-and-vcf-files-in-artemis)
+
+[Day 2 Morning](day2_morning.html#day-2-morning)
+***
+- [Genome Assembly](day2_morning.html#genome-assembly)
+- [Assembly evaluation](day2_morning.html#assembly-evaluation-using-quast)
+- [Compare assembly to reference genome and Post-assembly genome improvement](day2_morning.html#compare-assembly-to-reference-genome-and-post-assembly-genome-improvement)
+- [Map reads to the final ordered assembly](day2_morning.html#map-reads-to-the-final-ordered-assembly)
+- [Genome Annotation](day2_morning.html#genome-annotation)
+
+[Day 2 Afternoon](day2_afternoon.html#day-2-afternoon)
+***
+- [Determine which genomes contain beta-lactamase genes](day2_afternoon.html#determine-which-genomes-contain-beta-lactamase-genes)
+- [Identification of antibiotic resistance genes with ARIBA directly from paired-end reads](day2_afternoon.html#identification-of-antibiotic-resistance-genes-with-ariba-directly-from-paired-end-reads)
+- [Perform pan-genome analysis with Roary](day2_afternoon.html#perform-pan-genome-analysis-with-roary)
+
+[Day 3 Morning](day3_morning.html#day-3-morning)
+***
+- [Perform whole genome alignment with Mauve](day3_morning.html#perform-whole-genome-alignment-with-Mauve)
+- [Perform DNA sequence comparisons and phylogenetic analysis in ape](day3_morning.html#perform-some-dna-sequence-comparisons-and-phylogenetic-analysis-in-ape)
+- [Perform SNP density analysis to discern evidence of recombination](day3_morning.html#perform-snp-density-analysis-to-discern-evidence-of-recombination)
+- [Perform recombination filtering with gubbins](day3_morning.html#perform-recombination-filtering-with-gubbins)
+- [Create annotated publication quality trees with iTOL](day3_morning.html#create-annotated-publication-quality-trees-with-itol)
+
+[Day 3 Afternoon](day3_afternoon.html#day-3-afternoon)
+***
+- [Perform QC on fastq files](day3_afternoon.html#perform-qc-on-fastq-files)
+- [Examine results of SPANDx pipeline](day3_afternoon.html#examine-results-of-spandx-pipeline)
+- [Recombination detection and tree generation](day3_afternoon.html#recombination-detection-and-tree-generation)
+- [Phylogenetic tree annotation and visualization](day3_afternoon.html#phylogenetic-tree-annotation-and-visualization)
+- [Assessment of genomic deletions](day3_afternoon.html#assessment-of-genomic-deletions)
+
+
+
+[Helpful resources for microbial genomics](online_resources.html#helpful-resources-for-microbial-genomics)
+***
diff --git a/docs/build/html/_sources/index_temp.txt b/docs/build/html/_sources/index_temp.txt
new file mode 100644
index 0000000..cd17270
--- /dev/null
+++ b/docs/build/html/_sources/index_temp.txt
@@ -0,0 +1,61 @@
+Bacterial Comparative Genomics Workshop
+=======================================
+
+A 3 day microbial bioinformatics workshop conducted by `Dr. Evan Snitkin`_ at `University of Michigan`_. This module covers the basics of microbial genomic analysis using publicly available tools that are commonly referenced in genomics literature. Students will learn the steps and associated tools that are required to process, annotate and compare microbial genomes.
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Date: Feb 28 - 2 March 2018
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. raw:: html
+
+
+
+--------------
+
+Prerequisites:
+^^^^^^^^^^^^^^
+
+- Prior participation in a `Software Carpentry Workshop`_
+
+.. raw:: html
+
+
+
+--------------
+
+Workshop:
+^^^^^^^^^
+
+`Day 1 Morning`_ \**\* - `Getting your data onto Flux and setting up
+Environment variable`_ - `Unix is your friend`_ - `Quality Control using
+FastQC`_ - `Quality Trimming using Trimmomatic`_
+
+`Day 1 Afternoon`_ \**\* - `Read Mapping`_ - `Variant Calling`_ -
+`Visualize BAM/VCF files in Artemis`_
+
+`Day 2 Morning`_ \**\* - `Genome Assembly`_ - `Assembly evaluation`_ -
+`Compare assembly to reference genome and Post-assembly genome
+improvement`_ - [Map reads to th
+
+.. _Dr. Evan Snitkin: http://thesnitkinlab.com/index.php
+.. _University of Michigan: https://www.umich.edu/
+.. _Software Carpentry Workshop: https://umswc.github.io/2018-02-26-UMich/
+.. _Day 1 Morning: https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_morning/README.md
+.. _Getting your data onto Flux and setting up Environment variable: https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_morning/README.md#getting-your-data-onto-glux-and-setting-up-environment-variable
+.. _Unix is your friend: https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_morning/README.md#unix-is-your-friend
+.. _Quality Control using FastQC: https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_morning/README.md#quality-control-using-fastqc
+.. _Quality Trimming using Trimmomatic: https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_morning/README.md#quality-trimming-using-trimmomatic
+.. _Day 1 Afternoon: https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_afternoon/README.md#day-1-afternoon
+.. _Read Mapping: https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_afternoon/README.md#read-mapping
+.. _Variant Calling: https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_afternoon/README.md#variant-calling-and-filteration
+.. _Visualize BAM/VCF files in Artemis: https://github.com/alipirani88/Comparative_Genomics/blob/master/day1_afternoon/README.md#visualize-bam-and-vcf-files-in-artemis
+.. _Day 2 Morning: https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_morning/README.md#day-2-morning
+.. _Genome Assembly: https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_morning/README.md#genome-assembly
+.. _Assembly evaluation: https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_morning/README.md#assembly-evaluation-using-quast
+.. _Compare assembly to reference genome and Post-assembly genome improvement: https://github.com/alipirani88/Comparative_Genomics/blob/master/day2_morning/README.md#compare-assembly-to-reference-genome-and-post-assembly-genome-improvement
diff --git a/docs/build/html/_sources/online_resources.txt b/docs/build/html/_sources/online_resources.txt
new file mode 100644
index 0000000..bbd36b4
--- /dev/null
+++ b/docs/build/html/_sources/online_resources.txt
@@ -0,0 +1,142 @@
+# Helpful resources for microbial genomics
+
+***If you were not able to follow the video, here is the [link](https://www.youtube.com/watch?v=womKfikWlxM) to illumina Sequencing***
+
+[[HOME]](index.html)
+
+**General Bioinformatics resources**
+
+- [Omictools](http://omictools.com/)
+
+- [Bioinformatics One-liners by Stephen Turner](https://github.com/stephenturner/oneliners)
+
+- [QC Fail: Explaining your errors](https://sequencing.qcfail.com/)
+
+
+**Short read processing**
+
+- [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
+
+- Trimmomatic: [Home](http://www.usadellab.org/cms/?page=trimmomatic) [Manual](http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/TrimmomaticManual_V0.32.pdf)
+
+- [bwa](http://bio-bwa.sourceforge.net/)
+
+- [bowtie](http://bowtie-bio.sourceforge.net/index.shtml)
+
+- [samtools](http://samtools.sourceforge.net/)
+
+- [vcftools](http://vcftools.sourceforge.net/)
+
+- [bcftools](https://samtools.github.io/bcftools/bcftools.html)
+
+- [gatk](https://www.broadinstitute.org/gatk/)
+
+- [picard](http://broadinstitute.github.io/picard/)
+
+- [SPANDx](https://github.com/dsarov/SPANDx)
+
+- [Snippy](https://github.com/tseemann/snippy)
+
+**Genome assembly**
+
+- [Spades](http://bioinf.spbau.ru/spades)
+
+- [Velvet](https://www.ebi.ac.uk/~zerbino/velvet/)
+
+- [Mira](https://sourceforge.net/p/mira-assembler/wiki/Home/)
+
+- [A5](https://sourceforge.net/p/ngopt/wiki/browse_pages/)
+
+**Genome alignment**
+
+- [Mauve](http://darlinglab.org/mauve/download.html)
+
+- [MUMmer](http://mummer.sourceforge.net/)
+
+- [Mugsy](http://mugsy.sourceforge.net/)
+
+**Visualization of genomic data**
+
+- [Artemis](http://www.sanger.ac.uk/science/tools/artemis)
+
+- [Artemis Comparison Tool](http://www.sanger.ac.uk/science/tools/artemis-comparison-tool-act)
+
+- [IGV](https://www.broadinstitute.org/igv/)
+
+**Genome annotation**
+
+- [Prokka](http://www.vicbioinformatics.com/software.prokka.shtml)
+
+- [Blastall](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=Download)
+
+- [LS-BSR](https://github.com/jasonsahl/LS-BSR)
+
+**Phylogenetic tools and resources**
+
+- Visualization
+
+ - [Seaview](http://doua.prabi.fr/software/seaview)
+
+ - [iTOL](http://itol.embl.de/)
+
+ - [Figtree](http://tree.bio.ed.ac.uk/software/figtree/)
+
+- Phylogenetic software
+
+ - [PAUP](http://paup.csit.fsu.edu/)
+
+ - [RaxML](http://sco.h-its.org/exelixis/software.html)
+
+ - [PhyML](http://www.atgc-montpellier.fr/phyml/)
+
+ - [BEAST](http://beast.bio.ed.ac.uk/)
+
+ - [PHYLIP](http://evolution.genetics.washington.edu/phylip.html)
+
+ - [APE](http://ape-package.ird.fr/)
+
+ - [Microreact](http://microreact.org/showcase/)
+
+ - Recombination detection
+
+ - [Gubbins](http://www.sanger.ac.uk/science/tools/gubbins)
+
+ - [ClonalFrame](http://www.xavierdidelot.xtreemhost.com/clonalframe.htm?ckattempt=1)
+
+ - [List of Phylogeny Programs](http://evolution.genetics.washington.edu/phylip/software.html)
+
+**Databases**
+
+- [ARDB](http://ardb.cbcb.umd.edu/)
+
+- [PATRIC](https://www.patricbrc.org/portal/portal/patric/Home)
+
+
+**Video Resources you should watch and follow**
+
+
+- [FastQC](https://www.youtube.com/watch?v=bz93ReOv87Y)
+
+- [NHGRI](https://www.youtube.com/user/GenomeTV)
+
+- [Broad Institute](https://www.youtube.com/channel/UCv4IbnP9j9RC_aZAs8wqdeQ)
+
+- [Cold Spring Harbor Lab](https://www.youtube.com/channel/UCVqWctrxf5-oBIM1lqOIt-A)
+
+- [NCBI](https://www.youtube.com/user/NCBINLM/videos)
+
+- [Bioinformatics courses from MIT](https://www.youtube.com/channel/UCEBb1b_L6zDS3xTUrIALZOw)
+
+- Youtube [Channel](https://www.youtube.com/channel/UC1lb9cYp9wt8xjF3APM9bMw) of [Rafael Irizarry](http://rafalab.dfci.harvard.edu/) covering various topics on NGS analysis and statistics involved in it.
+
+**[101 Questions: a series of interviews with notable bioinformaticians](http://www.acgt.me/blog/2014/3/25/101-questions-a-new-series-of-interviews-with-notable-bioinformaticians)**
+
+**[Bioinformatics is just like bench science and should be treated as such](http://cabbagesofdoom.blogspot.com/2015/08/bioinformatics-is-just-like-bench.html)**
+
+**Unix/Command line**
+
+- [command-line bootcamp](http://rik.smith-unna.com/command_line_bootcamp/?id=9xnbkx6eaof)
+
+- [Code academy](https://www.codecademy.com/en/courses/learn-the-command-line)
+
+
diff --git a/docs/build/html/_sources/test.txt b/docs/build/html/_sources/test.txt
new file mode 100644
index 0000000..b06f04d
--- /dev/null
+++ b/docs/build/html/_sources/test.txt
@@ -0,0 +1,27 @@
+
+Microbial Comparative Genomics Workshop
+=======================================
+
+A 3 day microbial bioinformatics workshop conducted by `Dr. Evan Snitkin `_ at `University of Michigan `_. This module covers the basics of microbial genomic analysis using publicly available tools that are commonly referenced in genomics literature. Students will learn the steps and associated tools that are required to process, annotate and compare microbial genomes.
+
+Date: Feb 28 - 2 March 2018
+
+Prerequisites
+-------------
+
+Prior participation in a `Software Carpentry Workshop `_
+
+Workshop
+--------
+
+.. toctree::
+ :maxdepth: 5
+
+ day1_morning
+ day1_afternoon
+ day2_morning
+ day2_afternoon
+ day3_morning
+ day3_afternoon
+ online_resources
+
diff --git a/docs/build/html/_static/ajax-loader.gif b/docs/build/html/_static/ajax-loader.gif
new file mode 100644
index 0000000..61faf8c
Binary files /dev/null and b/docs/build/html/_static/ajax-loader.gif differ
diff --git a/docs/build/html/_static/alabaster.css b/docs/build/html/_static/alabaster.css
new file mode 100644
index 0000000..be65b13
--- /dev/null
+++ b/docs/build/html/_static/alabaster.css
@@ -0,0 +1,693 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+@import url("basic.css");
+
+/* -- page layout ----------------------------------------------------------- */
+
+body {
+ font-family: 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif;
+ font-size: 17px;
+ background-color: #fff;
+ color: #000;
+ margin: 0;
+ padding: 0;
+}
+
+
+div.document {
+ width: 940px;
+ margin: 30px auto 0 auto;
+}
+
+div.documentwrapper {
+ float: left;
+ width: 100%;
+}
+
+div.bodywrapper {
+ margin: 0 0 0 220px;
+}
+
+div.sphinxsidebar {
+ width: 220px;
+ font-size: 14px;
+ line-height: 1.5;
+}
+
+hr {
+ border: 1px solid #B1B4B6;
+}
+
+div.body {
+ background-color: #fff;
+ color: #3E4349;
+ padding: 0 30px 0 30px;
+}
+
+div.body > .section {
+ text-align: left;
+}
+
+div.footer {
+ width: 940px;
+ margin: 20px auto 30px auto;
+ font-size: 14px;
+ color: #888;
+ text-align: right;
+}
+
+div.footer a {
+ color: #888;
+}
+
+p.caption {
+ font-family: inherit;
+ font-size: inherit;
+}
+
+
+div.relations {
+ display: none;
+}
+
+
+div.sphinxsidebar a {
+ color: #444;
+ text-decoration: none;
+ border-bottom: 1px dotted #999;
+}
+
+div.sphinxsidebar a:hover {
+ border-bottom: 1px solid #999;
+}
+
+div.sphinxsidebarwrapper {
+ padding: 18px 10px;
+}
+
+div.sphinxsidebarwrapper p.logo {
+ padding: 0;
+ margin: -10px 0 0 0px;
+ text-align: center;
+}
+
+div.sphinxsidebarwrapper h1.logo {
+ margin-top: -10px;
+ text-align: center;
+ margin-bottom: 5px;
+ text-align: left;
+}
+
+div.sphinxsidebarwrapper h1.logo-name {
+ margin-top: 0px;
+}
+
+div.sphinxsidebarwrapper p.blurb {
+ margin-top: 0;
+ font-style: normal;
+}
+
+div.sphinxsidebar h3,
+div.sphinxsidebar h4 {
+ font-family: 'Garamond', 'Georgia', serif;
+ color: #444;
+ font-size: 24px;
+ font-weight: normal;
+ margin: 0 0 5px 0;
+ padding: 0;
+}
+
+div.sphinxsidebar h4 {
+ font-size: 20px;
+}
+
+div.sphinxsidebar h3 a {
+ color: #444;
+}
+
+div.sphinxsidebar p.logo a,
+div.sphinxsidebar h3 a,
+div.sphinxsidebar p.logo a:hover,
+div.sphinxsidebar h3 a:hover {
+ border: none;
+}
+
+div.sphinxsidebar p {
+ color: #555;
+ margin: 10px 0;
+}
+
+div.sphinxsidebar ul {
+ margin: 10px 0;
+ padding: 0;
+ color: #000;
+}
+
+div.sphinxsidebar ul li.toctree-l1 > a {
+ font-size: 120%;
+}
+
+div.sphinxsidebar ul li.toctree-l2 > a {
+ font-size: 110%;
+}
+
+div.sphinxsidebar input {
+ border: 1px solid #CCC;
+ font-family: 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif;
+ font-size: 1em;
+}
+
+div.sphinxsidebar hr {
+ border: none;
+ height: 1px;
+ color: #AAA;
+ background: #AAA;
+
+ text-align: left;
+ margin-left: 0;
+ width: 50%;
+}
+
+/* -- body styles ----------------------------------------------------------- */
+
+a {
+ color: #004B6B;
+ text-decoration: underline;
+}
+
+a:hover {
+ color: #6D4100;
+ text-decoration: underline;
+}
+
+div.body h1,
+div.body h2,
+div.body h3,
+div.body h4,
+div.body h5,
+div.body h6 {
+ font-family: 'Garamond', 'Georgia', serif;
+ font-weight: normal;
+ margin: 30px 0px 10px 0px;
+ padding: 0;
+}
+
+div.body h1 { margin-top: 0; padding-top: 0; font-size: 240%; }
+div.body h2 { font-size: 180%; }
+div.body h3 { font-size: 150%; }
+div.body h4 { font-size: 130%; }
+div.body h5 { font-size: 100%; }
+div.body h6 { font-size: 100%; }
+
+a.headerlink {
+ color: #DDD;
+ padding: 0 4px;
+ text-decoration: none;
+}
+
+a.headerlink:hover {
+ color: #444;
+ background: #EAEAEA;
+}
+
+div.body p, div.body dd, div.body li {
+ line-height: 1.4em;
+}
+
+div.admonition {
+ margin: 20px 0px;
+ padding: 10px 30px;
+ background-color: #EEE;
+ border: 1px solid #CCC;
+}
+
+div.admonition tt.xref, div.admonition code.xref, div.admonition a tt {
+ background-color: #FBFBFB;
+ border-bottom: 1px solid #fafafa;
+}
+
+div.admonition p.admonition-title {
+ font-family: 'Garamond', 'Georgia', serif;
+ font-weight: normal;
+ font-size: 24px;
+ margin: 0 0 10px 0;
+ padding: 0;
+ line-height: 1;
+}
+
+div.admonition p.last {
+ margin-bottom: 0;
+}
+
+div.highlight {
+ background-color: #fff;
+}
+
+dt:target, .highlight {
+ background: #FAF3E8;
+}
+
+div.warning {
+ background-color: #FCC;
+ border: 1px solid #FAA;
+}
+
+div.danger {
+ background-color: #FCC;
+ border: 1px solid #FAA;
+ -moz-box-shadow: 2px 2px 4px #D52C2C;
+ -webkit-box-shadow: 2px 2px 4px #D52C2C;
+ box-shadow: 2px 2px 4px #D52C2C;
+}
+
+div.error {
+ background-color: #FCC;
+ border: 1px solid #FAA;
+ -moz-box-shadow: 2px 2px 4px #D52C2C;
+ -webkit-box-shadow: 2px 2px 4px #D52C2C;
+ box-shadow: 2px 2px 4px #D52C2C;
+}
+
+div.caution {
+ background-color: #FCC;
+ border: 1px solid #FAA;
+}
+
+div.attention {
+ background-color: #FCC;
+ border: 1px solid #FAA;
+}
+
+div.important {
+ background-color: #EEE;
+ border: 1px solid #CCC;
+}
+
+div.note {
+ background-color: #EEE;
+ border: 1px solid #CCC;
+}
+
+div.tip {
+ background-color: #EEE;
+ border: 1px solid #CCC;
+}
+
+div.hint {
+ background-color: #EEE;
+ border: 1px solid #CCC;
+}
+
+div.seealso {
+ background-color: #EEE;
+ border: 1px solid #CCC;
+}
+
+div.topic {
+ background-color: #EEE;
+}
+
+p.admonition-title {
+ display: inline;
+}
+
+p.admonition-title:after {
+ content: ":";
+}
+
+pre, tt, code {
+ font-family: 'Consolas', 'Menlo', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
+ font-size: 0.9em;
+}
+
+.hll {
+ background-color: #FFC;
+ margin: 0 -12px;
+ padding: 0 12px;
+ display: block;
+}
+
+img.screenshot {
+}
+
+tt.descname, tt.descclassname, code.descname, code.descclassname {
+ font-size: 0.95em;
+}
+
+tt.descname, code.descname {
+ padding-right: 0.08em;
+}
+
+img.screenshot {
+ -moz-box-shadow: 2px 2px 4px #EEE;
+ -webkit-box-shadow: 2px 2px 4px #EEE;
+ box-shadow: 2px 2px 4px #EEE;
+}
+
+table.docutils {
+ border: 1px solid #888;
+ -moz-box-shadow: 2px 2px 4px #EEE;
+ -webkit-box-shadow: 2px 2px 4px #EEE;
+ box-shadow: 2px 2px 4px #EEE;
+}
+
+table.docutils td, table.docutils th {
+ border: 1px solid #888;
+ padding: 0.25em 0.7em;
+}
+
+table.field-list, table.footnote {
+ border: none;
+ -moz-box-shadow: none;
+ -webkit-box-shadow: none;
+ box-shadow: none;
+}
+
+table.footnote {
+ margin: 15px 0;
+ width: 100%;
+ border: 1px solid #EEE;
+ background: #FDFDFD;
+ font-size: 0.9em;
+}
+
+table.footnote + table.footnote {
+ margin-top: -15px;
+ border-top: none;
+}
+
+table.field-list th {
+ padding: 0 0.8em 0 0;
+}
+
+table.field-list td {
+ padding: 0;
+}
+
+table.field-list p {
+ margin-bottom: 0.8em;
+}
+
+/* Cloned from
+ * https://github.com/sphinx-doc/sphinx/commit/ef60dbfce09286b20b7385333d63a60321784e68
+ */
+.field-name {
+ -moz-hyphens: manual;
+ -ms-hyphens: manual;
+ -webkit-hyphens: manual;
+ hyphens: manual;
+}
+
+table.footnote td.label {
+ width: .1px;
+ padding: 0.3em 0 0.3em 0.5em;
+}
+
+table.footnote td {
+ padding: 0.3em 0.5em;
+}
+
+dl {
+ margin: 0;
+ padding: 0;
+}
+
+dl dd {
+ margin-left: 30px;
+}
+
+blockquote {
+ margin: 0 0 0 30px;
+ padding: 0;
+}
+
+ul, ol {
+ /* Matches the 30px from the narrow-screen "li > ul" selector below */
+ margin: 10px 0 10px 30px;
+ padding: 0;
+}
+
+pre {
+ background: #EEE;
+ padding: 7px 30px;
+ margin: 15px 0px;
+ line-height: 1.3em;
+}
+
+div.viewcode-block:target {
+ background: #ffd;
+}
+
+dl pre, blockquote pre, li pre {
+ margin-left: 0;
+ padding-left: 30px;
+}
+
+tt, code {
+ background-color: #ecf0f3;
+ color: #222;
+ /* padding: 1px 2px; */
+}
+
+tt.xref, code.xref, a tt {
+ background-color: #FBFBFB;
+ border-bottom: 1px solid #fff;
+}
+
+a.reference {
+ text-decoration: none;
+ border-bottom: 1px dotted #004B6B;
+}
+
+/* Don't put an underline on images */
+a.image-reference, a.image-reference:hover {
+ border-bottom: none;
+}
+
+a.reference:hover {
+ border-bottom: 1px solid #6D4100;
+}
+
+a.footnote-reference {
+ text-decoration: none;
+ font-size: 0.7em;
+ vertical-align: top;
+ border-bottom: 1px dotted #004B6B;
+}
+
+a.footnote-reference:hover {
+ border-bottom: 1px solid #6D4100;
+}
+
+a:hover tt, a:hover code {
+ background: #EEE;
+}
+
+
+@media screen and (max-width: 870px) {
+
+ div.sphinxsidebar {
+ display: none;
+ }
+
+ div.document {
+ width: 100%;
+
+ }
+
+ div.documentwrapper {
+ margin-left: 0;
+ margin-top: 0;
+ margin-right: 0;
+ margin-bottom: 0;
+ }
+
+ div.bodywrapper {
+ margin-top: 0;
+ margin-right: 0;
+ margin-bottom: 0;
+ margin-left: 0;
+ }
+
+ ul {
+ margin-left: 0;
+ }
+
+ li > ul {
+ /* Matches the 30px from the "ul, ol" selector above */
+ margin-left: 30px;
+ }
+
+ .document {
+ width: auto;
+ }
+
+ .footer {
+ width: auto;
+ }
+
+ .bodywrapper {
+ margin: 0;
+ }
+
+ .footer {
+ width: auto;
+ }
+
+ .github {
+ display: none;
+ }
+
+
+
+}
+
+
+
+@media screen and (max-width: 875px) {
+
+ body {
+ margin: 0;
+ padding: 20px 30px;
+ }
+
+ div.documentwrapper {
+ float: none;
+ background: #fff;
+ }
+
+ div.sphinxsidebar {
+ display: block;
+ float: none;
+ width: 102.5%;
+ margin: 50px -30px -20px -30px;
+ padding: 10px 20px;
+ background: #333;
+ color: #FFF;
+ }
+
+ div.sphinxsidebar h3, div.sphinxsidebar h4, div.sphinxsidebar p,
+ div.sphinxsidebar h3 a {
+ color: #fff;
+ }
+
+ div.sphinxsidebar a {
+ color: #AAA;
+ }
+
+ div.sphinxsidebar p.logo {
+ display: none;
+ }
+
+ div.document {
+ width: 100%;
+ margin: 0;
+ }
+
+ div.footer {
+ display: none;
+ }
+
+ div.bodywrapper {
+ margin: 0;
+ }
+
+ div.body {
+ min-height: 0;
+ padding: 0;
+ }
+
+ .rtd_doc_footer {
+ display: none;
+ }
+
+ .document {
+ width: auto;
+ }
+
+ .footer {
+ width: auto;
+ }
+
+ .footer {
+ width: auto;
+ }
+
+ .github {
+ display: none;
+ }
+}
+
+
+/* misc. */
+
+.revsys-inline {
+ display: none!important;
+}
+
+/* Make nested-list/multi-paragraph items look better in Releases changelog
+ * pages. Without this, docutils' magical list fuckery causes inconsistent
+ * formatting between different release sub-lists.
+ */
+div#changelog > div.section > ul > li > p:only-child {
+ margin-bottom: 0;
+}
+
+/* Hide fugly table cell borders in ..bibliography:: directive output */
+table.docutils.citation, table.docutils.citation td, table.docutils.citation th {
+ border: none;
+ /* Below needed in some edge cases; if not applied, bottom shadows appear */
+ -moz-box-shadow: none;
+ -webkit-box-shadow: none;
+ box-shadow: none;
+}
\ No newline at end of file
diff --git a/docs/build/html/_static/basic.css b/docs/build/html/_static/basic.css
new file mode 100644
index 0000000..9fa77d8
--- /dev/null
+++ b/docs/build/html/_static/basic.css
@@ -0,0 +1,599 @@
+/*
+ * basic.css
+ * ~~~~~~~~~
+ *
+ * Sphinx stylesheet -- basic theme.
+ *
+ * :copyright: Copyright 2007-2015 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+
+/* -- main layout ----------------------------------------------------------- */
+
+div.clearer {
+ clear: both;
+}
+
+/* -- relbar ---------------------------------------------------------------- */
+
+div.related {
+ width: 100%;
+ font-size: 90%;
+}
+
+div.related h3 {
+ display: none;
+}
+
+div.related ul {
+ margin: 0;
+ padding: 0 0 0 10px;
+ list-style: none;
+}
+
+div.related li {
+ display: inline;
+}
+
+div.related li.right {
+ float: right;
+ margin-right: 5px;
+}
+
+/* -- sidebar --------------------------------------------------------------- */
+
+div.sphinxsidebarwrapper {
+ padding: 10px 5px 0 10px;
+}
+
+div.sphinxsidebar {
+ float: left;
+ width: 230px;
+ margin-left: -100%;
+ font-size: 90%;
+}
+
+div.sphinxsidebar ul {
+ list-style: none;
+}
+
+div.sphinxsidebar ul ul,
+div.sphinxsidebar ul.want-points {
+ margin-left: 20px;
+ list-style: square;
+}
+
+div.sphinxsidebar ul ul {
+ margin-top: 0;
+ margin-bottom: 0;
+}
+
+div.sphinxsidebar form {
+ margin-top: 10px;
+}
+
+div.sphinxsidebar input {
+ border: 1px solid #98dbcc;
+ font-family: sans-serif;
+ font-size: 1em;
+}
+
+div.sphinxsidebar #searchbox input[type="text"] {
+ width: 170px;
+}
+
+div.sphinxsidebar #searchbox input[type="submit"] {
+ width: 30px;
+}
+
+img {
+ border: 0;
+ max-width: 100%;
+}
+
+/* -- search page ----------------------------------------------------------- */
+
+ul.search {
+ margin: 10px 0 0 20px;
+ padding: 0;
+}
+
+ul.search li {
+ padding: 5px 0 5px 20px;
+ background-image: url(file.png);
+ background-repeat: no-repeat;
+ background-position: 0 7px;
+}
+
+ul.search li a {
+ font-weight: bold;
+}
+
+ul.search li div.context {
+ color: #888;
+ margin: 2px 0 0 30px;
+ text-align: left;
+}
+
+ul.keywordmatches li.goodmatch a {
+ font-weight: bold;
+}
+
+/* -- index page ------------------------------------------------------------ */
+
+table.contentstable {
+ width: 90%;
+}
+
+table.contentstable p.biglink {
+ line-height: 150%;
+}
+
+a.biglink {
+ font-size: 1.3em;
+}
+
+span.linkdescr {
+ font-style: italic;
+ padding-top: 5px;
+ font-size: 90%;
+}
+
+/* -- general index --------------------------------------------------------- */
+
+table.indextable {
+ width: 100%;
+}
+
+table.indextable td {
+ text-align: left;
+ vertical-align: top;
+}
+
+table.indextable dl, table.indextable dd {
+ margin-top: 0;
+ margin-bottom: 0;
+}
+
+table.indextable tr.pcap {
+ height: 10px;
+}
+
+table.indextable tr.cap {
+ margin-top: 10px;
+ background-color: #f2f2f2;
+}
+
+img.toggler {
+ margin-right: 3px;
+ margin-top: 3px;
+ cursor: pointer;
+}
+
+div.modindex-jumpbox {
+ border-top: 1px solid #ddd;
+ border-bottom: 1px solid #ddd;
+ margin: 1em 0 1em 0;
+ padding: 0.4em;
+}
+
+div.genindex-jumpbox {
+ border-top: 1px solid #ddd;
+ border-bottom: 1px solid #ddd;
+ margin: 1em 0 1em 0;
+ padding: 0.4em;
+}
+
+/* -- general body styles --------------------------------------------------- */
+
+a.headerlink {
+ visibility: hidden;
+}
+
+h1:hover > a.headerlink,
+h2:hover > a.headerlink,
+h3:hover > a.headerlink,
+h4:hover > a.headerlink,
+h5:hover > a.headerlink,
+h6:hover > a.headerlink,
+dt:hover > a.headerlink,
+caption:hover > a.headerlink,
+p.caption:hover > a.headerlink,
+div.code-block-caption:hover > a.headerlink {
+ visibility: visible;
+}
+
+div.body p.caption {
+ text-align: inherit;
+}
+
+div.body td {
+ text-align: left;
+}
+
+.field-list ul {
+ padding-left: 1em;
+}
+
+.first {
+ margin-top: 0 !important;
+}
+
+p.rubric {
+ margin-top: 30px;
+ font-weight: bold;
+}
+
+img.align-left, .figure.align-left, object.align-left {
+ clear: left;
+ float: left;
+ margin-right: 1em;
+}
+
+img.align-right, .figure.align-right, object.align-right {
+ clear: right;
+ float: right;
+ margin-left: 1em;
+}
+
+img.align-center, .figure.align-center, object.align-center {
+ display: block;
+ margin-left: auto;
+ margin-right: auto;
+}
+
+.align-left {
+ text-align: left;
+}
+
+.align-center {
+ text-align: center;
+}
+
+.align-right {
+ text-align: right;
+}
+
+/* -- sidebars -------------------------------------------------------------- */
+
+div.sidebar {
+ margin: 0 0 0.5em 1em;
+ border: 1px solid #ddb;
+ padding: 7px 7px 0 7px;
+ background-color: #ffe;
+ width: 40%;
+ float: right;
+}
+
+p.sidebar-title {
+ font-weight: bold;
+}
+
+/* -- topics ---------------------------------------------------------------- */
+
+div.topic {
+ border: 1px solid #ccc;
+ padding: 7px 7px 0 7px;
+ margin: 10px 0 10px 0;
+}
+
+p.topic-title {
+ font-size: 1.1em;
+ font-weight: bold;
+ margin-top: 10px;
+}
+
+/* -- admonitions ----------------------------------------------------------- */
+
+div.admonition {
+ margin-top: 10px;
+ margin-bottom: 10px;
+ padding: 7px;
+}
+
+div.admonition dt {
+ font-weight: bold;
+}
+
+div.admonition dl {
+ margin-bottom: 0;
+}
+
+p.admonition-title {
+ margin: 0px 10px 5px 0px;
+ font-weight: bold;
+}
+
+div.body p.centered {
+ text-align: center;
+ margin-top: 25px;
+}
+
+/* -- tables ---------------------------------------------------------------- */
+
+table.docutils {
+ border: 0;
+ border-collapse: collapse;
+}
+
+table caption span.caption-number {
+ font-style: italic;
+}
+
+table caption span.caption-text {
+}
+
+table.docutils td, table.docutils th {
+ padding: 1px 8px 1px 5px;
+ border-top: 0;
+ border-left: 0;
+ border-right: 0;
+ border-bottom: 1px solid #aaa;
+}
+
+table.field-list td, table.field-list th {
+ border: 0 !important;
+}
+
+table.footnote td, table.footnote th {
+ border: 0 !important;
+}
+
+th {
+ text-align: left;
+ padding-right: 5px;
+}
+
+table.citation {
+ border-left: solid 1px gray;
+ margin-left: 1px;
+}
+
+table.citation td {
+ border-bottom: none;
+}
+
+/* -- figures --------------------------------------------------------------- */
+
+div.figure {
+ margin: 0.5em;
+ padding: 0.5em;
+}
+
+div.figure p.caption {
+ padding: 0.3em;
+}
+
+div.figure p.caption span.caption-number {
+ font-style: italic;
+}
+
+div.figure p.caption span.caption-text {
+}
+
+
+/* -- other body styles ----------------------------------------------------- */
+
+ol.arabic {
+ list-style: decimal;
+}
+
+ol.loweralpha {
+ list-style: lower-alpha;
+}
+
+ol.upperalpha {
+ list-style: upper-alpha;
+}
+
+ol.lowerroman {
+ list-style: lower-roman;
+}
+
+ol.upperroman {
+ list-style: upper-roman;
+}
+
+dl {
+ margin-bottom: 15px;
+}
+
+dd p {
+ margin-top: 0px;
+}
+
+dd ul, dd table {
+ margin-bottom: 10px;
+}
+
+dd {
+ margin-top: 3px;
+ margin-bottom: 10px;
+ margin-left: 30px;
+}
+
+dt:target, .highlighted {
+ background-color: #fbe54e;
+}
+
+dl.glossary dt {
+ font-weight: bold;
+ font-size: 1.1em;
+}
+
+.field-list ul {
+ margin: 0;
+ padding-left: 1em;
+}
+
+.field-list p {
+ margin: 0;
+}
+
+.optional {
+ font-size: 1.3em;
+}
+
+.sig-paren {
+ font-size: larger;
+}
+
+.versionmodified {
+ font-style: italic;
+}
+
+.system-message {
+ background-color: #fda;
+ padding: 5px;
+ border: 3px solid red;
+}
+
+.footnote:target {
+ background-color: #ffa;
+}
+
+.line-block {
+ display: block;
+ margin-top: 1em;
+ margin-bottom: 1em;
+}
+
+.line-block .line-block {
+ margin-top: 0;
+ margin-bottom: 0;
+ margin-left: 1.5em;
+}
+
+.guilabel, .menuselection {
+ font-family: sans-serif;
+}
+
+.accelerator {
+ text-decoration: underline;
+}
+
+.classifier {
+ font-style: oblique;
+}
+
+abbr, acronym {
+ border-bottom: dotted 1px;
+ cursor: help;
+}
+
+/* -- code displays --------------------------------------------------------- */
+
+pre {
+ overflow: auto;
+ overflow-y: hidden; /* fixes display issues on Chrome browsers */
+}
+
+td.linenos pre {
+ padding: 5px 0px;
+ border: 0;
+ background-color: transparent;
+ color: #aaa;
+}
+
+table.highlighttable {
+ margin-left: 0.5em;
+}
+
+table.highlighttable td {
+ padding: 0 0.5em 0 0.5em;
+}
+
+div.code-block-caption {
+ padding: 2px 5px;
+ font-size: small;
+}
+
+div.code-block-caption code {
+ background-color: transparent;
+}
+
+div.code-block-caption + div > div.highlight > pre {
+ margin-top: 0;
+}
+
+div.code-block-caption span.caption-number {
+ padding: 0.1em 0.3em;
+ font-style: italic;
+}
+
+div.code-block-caption span.caption-text {
+}
+
+div.literal-block-wrapper {
+ padding: 1em 1em 0;
+}
+
+div.literal-block-wrapper div.highlight {
+ margin: 0;
+}
+
+code.descname {
+ background-color: transparent;
+ font-weight: bold;
+ font-size: 1.2em;
+}
+
+code.descclassname {
+ background-color: transparent;
+}
+
+code.xref, a code {
+ background-color: transparent;
+ font-weight: bold;
+}
+
+h1 code, h2 code, h3 code, h4 code, h5 code, h6 code {
+ background-color: transparent;
+}
+
+.viewcode-link {
+ float: right;
+}
+
+.viewcode-back {
+ float: right;
+ font-family: sans-serif;
+}
+
+div.viewcode-block:target {
+ margin: -1px -10px;
+ padding: 0 10px;
+}
+
+/* -- math display ---------------------------------------------------------- */
+
+img.math {
+ vertical-align: middle;
+}
+
+div.body div.math p {
+ text-align: center;
+}
+
+span.eqno {
+ float: right;
+}
+
+/* -- printout stylesheet --------------------------------------------------- */
+
+@media print {
+ div.document,
+ div.documentwrapper,
+ div.bodywrapper {
+ margin: 0 !important;
+ width: 100%;
+ }
+
+ div.sphinxsidebar,
+ div.related,
+ div.footer,
+ #top-link {
+ display: none;
+ }
+}
\ No newline at end of file
diff --git a/docs/build/html/_static/comment-bright.png b/docs/build/html/_static/comment-bright.png
new file mode 100644
index 0000000..551517b
Binary files /dev/null and b/docs/build/html/_static/comment-bright.png differ
diff --git a/docs/build/html/_static/comment-close.png b/docs/build/html/_static/comment-close.png
new file mode 100644
index 0000000..09b54be
Binary files /dev/null and b/docs/build/html/_static/comment-close.png differ
diff --git a/docs/build/html/_static/comment.png b/docs/build/html/_static/comment.png
new file mode 100644
index 0000000..92feb52
Binary files /dev/null and b/docs/build/html/_static/comment.png differ
diff --git a/docs/build/html/_static/css/badge_only.css b/docs/build/html/_static/css/badge_only.css
new file mode 100644
index 0000000..7e17fb1
--- /dev/null
+++ b/docs/build/html/_static/css/badge_only.css
@@ -0,0 +1,2 @@
+.fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-weight:normal;font-style:normal;src:url("../font/fontawesome_webfont.eot");src:url("../font/fontawesome_webfont.eot?#iefix") format("embedded-opentype"),url("../font/fontawesome_webfont.woff") format("woff"),url("../font/fontawesome_webfont.ttf") format("truetype"),url("../font/fontawesome_webfont.svg#FontAwesome") format("svg")}.fa:before{display:inline-block;font-family:FontAwesome;font-style:normal;font-weight:normal;line-height:1;text-decoration:inherit}a .fa{display:inline-block;text-decoration:inherit}li .fa{display:inline-block}li .fa-large:before,li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-0.8em}ul.fas li .fa{width:0.8em}ul.fas li .fa-large:before,ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before{content:""}.icon-book:before{content:""}.fa-caret-down:before{content:""}.icon-caret-down:before{content:""}.fa-caret-up:before{content:""}.icon-caret-up:before{content:""}.fa-caret-left:before{content:""}.icon-caret-left:before{content:""}.fa-caret-right:before{content:""}.icon-caret-right:before{content:""}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;border-top:solid 10px #343131;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;z-index:400}.rst-versions a{color:#2980B9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27AE60;*zoom:1}.rst-versions .rst-current-version:before,.rst-versions .rst-current-version:after{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book{float:left}.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#E74C3C;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#F1C40F;color:#000}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:gray;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:solid 1px #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px}.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge .fa-book{float:none}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book{float:left}.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge .rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width: 768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}img{width:100%;height:auto}}
+/*# sourceMappingURL=badge_only.css.map */
diff --git a/docs/build/html/_static/css/theme.css b/docs/build/html/_static/css/theme.css
new file mode 100644
index 0000000..390d706
--- /dev/null
+++ b/docs/build/html/_static/css/theme.css
@@ -0,0 +1,5 @@
+*{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio,canvas,video{display:inline-block;*display:inline;*zoom:1}audio:not([controls]){display:none}[hidden]{display:none}*{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}body{margin:0}a:hover,a:active{outline:0}abbr[title]{border-bottom:1px dotted}b,strong{font-weight:bold}blockquote{margin:0}dfn{font-style:italic}ins{background:#ff9;color:#000;text-decoration:none}mark{background:#ff0;color:#000;font-style:italic;font-weight:bold}pre,code,.rst-content tt,kbd,samp{font-family:monospace,serif;_font-family:"courier new",monospace;font-size:1em}pre{white-space:pre}q{quotes:none}q:before,q:after{content:"";content:none}small{font-size:85%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sup{top:-0.5em}sub{bottom:-0.25em}ul,ol,dl{margin:0;padding:0;list-style:none;list-style-image:none}li{list-style:none}dd{margin:0}img{border:0;-ms-interpolation-mode:bicubic;vertical-align:middle;max-width:100%}svg:not(:root){overflow:hidden}figure{margin:0}form{margin:0}fieldset{border:0;margin:0;padding:0}label{cursor:pointer}legend{border:0;*margin-left:-7px;padding:0;white-space:normal}button,input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}button,input{line-height:normal}button,input[type="button"],input[type="reset"],input[type="submit"]{cursor:pointer;-webkit-appearance:button;*overflow:visible}button[disabled],input[disabled]{cursor:default}input[type="checkbox"],input[type="radio"]{box-sizing:border-box;padding:0;*width:13px;*height:13px}input[type="search"]{-webkit-appearance:textfield;-moz-box-sizing:content-box;-webkit-box-sizing:content-box;box-sizing:content-box}input[type="search"]::-webkit-search-decoration,input[type="search"]::-webkit-search-cancel-button{-webkit-appearance:none}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}textarea{overflow:auto;vertical-align:top;resize:vertical}table{border-collapse:collapse;border-spacing:0}td{vertical-align:top}.chromeframe{margin:0.2em 0;background:#ccc;color:#000;padding:0.2em 0}.ir{display:block;border:0;text-indent:-999em;overflow:hidden;background-color:transparent;background-repeat:no-repeat;text-align:left;direction:ltr;*line-height:0}.ir br{display:none}.hidden{display:none !important;visibility:hidden}.visuallyhidden{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.visuallyhidden.focusable:active,.visuallyhidden.focusable:focus{clip:auto;height:auto;margin:0;overflow:visible;position:static;width:auto}.invisible{visibility:hidden}.relative{position:relative}big,small{font-size:100%}@media print{html,body,section{background:none !important}*{box-shadow:none !important;text-shadow:none !important;filter:none !important;-ms-filter:none !important}a,a:visited{text-decoration:underline}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h2,h3{page-break-after:avoid}}.fa:before,.rst-content .admonition-title:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content dl dt .headerlink:before,.icon:before,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-alert,.rst-content .note,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .tip,.rst-content .warning,.rst-content .seealso,.rst-content .admonition-todo,.btn,input[type="text"],input[type="password"],input[type="email"],input[type="url"],input[type="date"],input[type="month"],input[type="time"],input[type="datetime"],input[type="datetime-local"],input[type="week"],input[type="number"],input[type="search"],input[type="tel"],input[type="color"],select,textarea,.wy-menu-vertical li.on a,.wy-menu-vertical li.current>a,.wy-side-nav-search>a,.wy-side-nav-search .wy-dropdown>a,.wy-nav-top a{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;content:""}.clearfix:after{clear:both}/*!
+ * Font Awesome 4.2.0 by @davegandy - http://fontawesome.io - @fontawesome
+ * License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License)
+ */@font-face{font-family:'FontAwesome';src:url("../fonts/fontawesome-webfont.eot?v=4.2.0");src:url("../fonts/fontawesome-webfont.eot?#iefix&v=4.2.0") format("embedded-opentype"),url("../fonts/fontawesome-webfont.woff?v=4.2.0") format("woff"),url("../fonts/fontawesome-webfont.ttf?v=4.2.0") format("truetype"),url("../fonts/fontawesome-webfont.svg?v=4.2.0#fontawesomeregular") format("svg");font-weight:normal;font-style:normal}.fa,.rst-content .admonition-title,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content dl dt .headerlink,.icon{display:inline-block;font:normal normal normal 14px/1 FontAwesome;font-size:inherit;text-rendering:auto;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.fa-lg{font-size:1.33333em;line-height:0.75em;vertical-align:-15%}.fa-2x{font-size:2em}.fa-3x{font-size:3em}.fa-4x{font-size:4em}.fa-5x{font-size:5em}.fa-fw{width:1.28571em;text-align:center}.fa-ul{padding-left:0;margin-left:2.14286em;list-style-type:none}.fa-ul>li{position:relative}.fa-li{position:absolute;left:-2.14286em;width:2.14286em;top:0.14286em;text-align:center}.fa-li.fa-lg{left:-1.85714em}.fa-border{padding:.2em .25em .15em;border:solid 0.08em #eee;border-radius:.1em}.pull-right{float:right}.pull-left{float:left}.fa.pull-left,.rst-content .pull-left.admonition-title,.rst-content h1 .pull-left.headerlink,.rst-content h2 .pull-left.headerlink,.rst-content h3 .pull-left.headerlink,.rst-content h4 .pull-left.headerlink,.rst-content h5 .pull-left.headerlink,.rst-content h6 .pull-left.headerlink,.rst-content dl dt .pull-left.headerlink,.pull-left.icon{margin-right:.3em}.fa.pull-right,.rst-content .pull-right.admonition-title,.rst-content h1 .pull-right.headerlink,.rst-content h2 .pull-right.headerlink,.rst-content h3 .pull-right.headerlink,.rst-content h4 .pull-right.headerlink,.rst-content h5 .pull-right.headerlink,.rst-content h6 .pull-right.headerlink,.rst-content dl dt .pull-right.headerlink,.pull-right.icon{margin-left:.3em}.fa-spin{-webkit-animation:fa-spin 2s infinite linear;animation:fa-spin 2s infinite linear}@-webkit-keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}100%{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}100%{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.fa-rotate-90{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=1);-webkit-transform:rotate(90deg);-ms-transform:rotate(90deg);transform:rotate(90deg)}.fa-rotate-180{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=2);-webkit-transform:rotate(180deg);-ms-transform:rotate(180deg);transform:rotate(180deg)}.fa-rotate-270{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=3);-webkit-transform:rotate(270deg);-ms-transform:rotate(270deg);transform:rotate(270deg)}.fa-flip-horizontal{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=0);-webkit-transform:scale(-1, 1);-ms-transform:scale(-1, 1);transform:scale(-1, 1)}.fa-flip-vertical{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=2);-webkit-transform:scale(1, -1);-ms-transform:scale(1, -1);transform:scale(1, -1)}:root .fa-rotate-90,:root .fa-rotate-180,:root .fa-rotate-270,:root .fa-flip-horizontal,:root .fa-flip-vertical{filter:none}.fa-stack{position:relative;display:inline-block;width:2em;height:2em;line-height:2em;vertical-align:middle}.fa-stack-1x,.fa-stack-2x{position:absolute;left:0;width:100%;text-align:center}.fa-stack-1x{line-height:inherit}.fa-stack-2x{font-size:2em}.fa-inverse{color:#fff}.fa-glass:before{content:""}.fa-music:before{content:""}.fa-search:before,.icon-search:before{content:""}.fa-envelope-o:before{content:""}.fa-heart:before{content:""}.fa-star:before{content:""}.fa-star-o:before{content:""}.fa-user:before{content:""}.fa-film:before{content:""}.fa-th-large:before{content:""}.fa-th:before{content:""}.fa-th-list:before{content:""}.fa-check:before{content:""}.fa-remove:before,.fa-close:before,.fa-times:before{content:""}.fa-search-plus:before{content:""}.fa-search-minus:before{content:""}.fa-power-off:before{content:""}.fa-signal:before{content:""}.fa-gear:before,.fa-cog:before{content:""}.fa-trash-o:before{content:""}.fa-home:before,.icon-home:before{content:""}.fa-file-o:before{content:""}.fa-clock-o:before{content:""}.fa-road:before{content:""}.fa-download:before{content:""}.fa-arrow-circle-o-down:before{content:""}.fa-arrow-circle-o-up:before{content:""}.fa-inbox:before{content:""}.fa-play-circle-o:before{content:""}.fa-rotate-right:before,.fa-repeat:before{content:""}.fa-refresh:before{content:""}.fa-list-alt:before{content:""}.fa-lock:before{content:""}.fa-flag:before{content:""}.fa-headphones:before{content:""}.fa-volume-off:before{content:""}.fa-volume-down:before{content:""}.fa-volume-up:before{content:""}.fa-qrcode:before{content:""}.fa-barcode:before{content:""}.fa-tag:before{content:""}.fa-tags:before{content:""}.fa-book:before,.icon-book:before{content:""}.fa-bookmark:before{content:""}.fa-print:before{content:""}.fa-camera:before{content:""}.fa-font:before{content:""}.fa-bold:before{content:""}.fa-italic:before{content:""}.fa-text-height:before{content:""}.fa-text-width:before{content:""}.fa-align-left:before{content:""}.fa-align-center:before{content:""}.fa-align-right:before{content:""}.fa-align-justify:before{content:""}.fa-list:before{content:""}.fa-dedent:before,.fa-outdent:before{content:""}.fa-indent:before{content:""}.fa-video-camera:before{content:""}.fa-photo:before,.fa-image:before,.fa-picture-o:before{content:""}.fa-pencil:before{content:""}.fa-map-marker:before{content:""}.fa-adjust:before{content:""}.fa-tint:before{content:""}.fa-edit:before,.fa-pencil-square-o:before{content:""}.fa-share-square-o:before{content:""}.fa-check-square-o:before{content:""}.fa-arrows:before{content:""}.fa-step-backward:before{content:""}.fa-fast-backward:before{content:""}.fa-backward:before{content:""}.fa-play:before{content:""}.fa-pause:before{content:""}.fa-stop:before{content:""}.fa-forward:before{content:""}.fa-fast-forward:before{content:""}.fa-step-forward:before{content:""}.fa-eject:before{content:""}.fa-chevron-left:before{content:""}.fa-chevron-right:before{content:""}.fa-plus-circle:before{content:""}.fa-minus-circle:before{content:""}.fa-times-circle:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before{content:""}.fa-check-circle:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before{content:""}.fa-question-circle:before{content:""}.fa-info-circle:before{content:""}.fa-crosshairs:before{content:""}.fa-times-circle-o:before{content:""}.fa-check-circle-o:before{content:""}.fa-ban:before{content:""}.fa-arrow-left:before{content:""}.fa-arrow-right:before{content:""}.fa-arrow-up:before{content:""}.fa-arrow-down:before{content:""}.fa-mail-forward:before,.fa-share:before{content:""}.fa-expand:before{content:""}.fa-compress:before{content:""}.fa-plus:before{content:""}.fa-minus:before{content:""}.fa-asterisk:before{content:""}.fa-exclamation-circle:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.rst-content .admonition-title:before{content:""}.fa-gift:before{content:""}.fa-leaf:before{content:""}.fa-fire:before,.icon-fire:before{content:""}.fa-eye:before{content:""}.fa-eye-slash:before{content:""}.fa-warning:before,.fa-exclamation-triangle:before{content:""}.fa-plane:before{content:""}.fa-calendar:before{content:""}.fa-random:before{content:""}.fa-comment:before{content:""}.fa-magnet:before{content:""}.fa-chevron-up:before{content:""}.fa-chevron-down:before{content:""}.fa-retweet:before{content:""}.fa-shopping-cart:before{content:""}.fa-folder:before{content:""}.fa-folder-open:before{content:""}.fa-arrows-v:before{content:""}.fa-arrows-h:before{content:""}.fa-bar-chart-o:before,.fa-bar-chart:before{content:""}.fa-twitter-square:before{content:""}.fa-facebook-square:before{content:""}.fa-camera-retro:before{content:""}.fa-key:before{content:""}.fa-gears:before,.fa-cogs:before{content:""}.fa-comments:before{content:""}.fa-thumbs-o-up:before{content:""}.fa-thumbs-o-down:before{content:""}.fa-star-half:before{content:""}.fa-heart-o:before{content:""}.fa-sign-out:before{content:""}.fa-linkedin-square:before{content:""}.fa-thumb-tack:before{content:""}.fa-external-link:before{content:""}.fa-sign-in:before{content:""}.fa-trophy:before{content:""}.fa-github-square:before{content:""}.fa-upload:before{content:""}.fa-lemon-o:before{content:""}.fa-phone:before{content:""}.fa-square-o:before{content:""}.fa-bookmark-o:before{content:""}.fa-phone-square:before{content:""}.fa-twitter:before{content:""}.fa-facebook:before{content:""}.fa-github:before,.icon-github:before{content:""}.fa-unlock:before{content:""}.fa-credit-card:before{content:""}.fa-rss:before{content:""}.fa-hdd-o:before{content:""}.fa-bullhorn:before{content:""}.fa-bell:before{content:""}.fa-certificate:before{content:""}.fa-hand-o-right:before{content:""}.fa-hand-o-left:before{content:""}.fa-hand-o-up:before{content:""}.fa-hand-o-down:before{content:""}.fa-arrow-circle-left:before,.icon-circle-arrow-left:before{content:""}.fa-arrow-circle-right:before,.icon-circle-arrow-right:before{content:""}.fa-arrow-circle-up:before{content:""}.fa-arrow-circle-down:before{content:""}.fa-globe:before{content:""}.fa-wrench:before{content:""}.fa-tasks:before{content:""}.fa-filter:before{content:""}.fa-briefcase:before{content:""}.fa-arrows-alt:before{content:""}.fa-group:before,.fa-users:before{content:""}.fa-chain:before,.fa-link:before,.icon-link:before{content:""}.fa-cloud:before{content:""}.fa-flask:before{content:""}.fa-cut:before,.fa-scissors:before{content:""}.fa-copy:before,.fa-files-o:before{content:""}.fa-paperclip:before{content:""}.fa-save:before,.fa-floppy-o:before{content:""}.fa-square:before{content:""}.fa-navicon:before,.fa-reorder:before,.fa-bars:before{content:""}.fa-list-ul:before{content:""}.fa-list-ol:before{content:""}.fa-strikethrough:before{content:""}.fa-underline:before{content:""}.fa-table:before{content:""}.fa-magic:before{content:""}.fa-truck:before{content:""}.fa-pinterest:before{content:""}.fa-pinterest-square:before{content:""}.fa-google-plus-square:before{content:""}.fa-google-plus:before{content:""}.fa-money:before{content:""}.fa-caret-down:before,.wy-dropdown .caret:before,.icon-caret-down:before{content:""}.fa-caret-up:before{content:""}.fa-caret-left:before{content:""}.fa-caret-right:before{content:""}.fa-columns:before{content:""}.fa-unsorted:before,.fa-sort:before{content:""}.fa-sort-down:before,.fa-sort-desc:before{content:""}.fa-sort-up:before,.fa-sort-asc:before{content:""}.fa-envelope:before{content:""}.fa-linkedin:before{content:""}.fa-rotate-left:before,.fa-undo:before{content:""}.fa-legal:before,.fa-gavel:before{content:""}.fa-dashboard:before,.fa-tachometer:before{content:""}.fa-comment-o:before{content:""}.fa-comments-o:before{content:""}.fa-flash:before,.fa-bolt:before{content:""}.fa-sitemap:before{content:""}.fa-umbrella:before{content:""}.fa-paste:before,.fa-clipboard:before{content:""}.fa-lightbulb-o:before{content:""}.fa-exchange:before{content:""}.fa-cloud-download:before{content:""}.fa-cloud-upload:before{content:""}.fa-user-md:before{content:""}.fa-stethoscope:before{content:""}.fa-suitcase:before{content:""}.fa-bell-o:before{content:""}.fa-coffee:before{content:""}.fa-cutlery:before{content:""}.fa-file-text-o:before{content:""}.fa-building-o:before{content:""}.fa-hospital-o:before{content:""}.fa-ambulance:before{content:""}.fa-medkit:before{content:""}.fa-fighter-jet:before{content:""}.fa-beer:before{content:""}.fa-h-square:before{content:""}.fa-plus-square:before{content:""}.fa-angle-double-left:before{content:""}.fa-angle-double-right:before{content:""}.fa-angle-double-up:before{content:""}.fa-angle-double-down:before{content:""}.fa-angle-left:before{content:""}.fa-angle-right:before{content:""}.fa-angle-up:before{content:""}.fa-angle-down:before{content:""}.fa-desktop:before{content:""}.fa-laptop:before{content:""}.fa-tablet:before{content:""}.fa-mobile-phone:before,.fa-mobile:before{content:""}.fa-circle-o:before{content:""}.fa-quote-left:before{content:""}.fa-quote-right:before{content:""}.fa-spinner:before{content:""}.fa-circle:before{content:""}.fa-mail-reply:before,.fa-reply:before{content:""}.fa-github-alt:before{content:""}.fa-folder-o:before{content:""}.fa-folder-open-o:before{content:""}.fa-smile-o:before{content:""}.fa-frown-o:before{content:""}.fa-meh-o:before{content:""}.fa-gamepad:before{content:""}.fa-keyboard-o:before{content:""}.fa-flag-o:before{content:""}.fa-flag-checkered:before{content:""}.fa-terminal:before{content:""}.fa-code:before{content:""}.fa-mail-reply-all:before,.fa-reply-all:before{content:""}.fa-star-half-empty:before,.fa-star-half-full:before,.fa-star-half-o:before{content:""}.fa-location-arrow:before{content:""}.fa-crop:before{content:""}.fa-code-fork:before{content:""}.fa-unlink:before,.fa-chain-broken:before{content:""}.fa-question:before{content:""}.fa-info:before{content:""}.fa-exclamation:before{content:""}.fa-superscript:before{content:""}.fa-subscript:before{content:""}.fa-eraser:before{content:""}.fa-puzzle-piece:before{content:""}.fa-microphone:before{content:""}.fa-microphone-slash:before{content:""}.fa-shield:before{content:""}.fa-calendar-o:before{content:""}.fa-fire-extinguisher:before{content:""}.fa-rocket:before{content:""}.fa-maxcdn:before{content:""}.fa-chevron-circle-left:before{content:""}.fa-chevron-circle-right:before{content:""}.fa-chevron-circle-up:before{content:""}.fa-chevron-circle-down:before{content:""}.fa-html5:before{content:""}.fa-css3:before{content:""}.fa-anchor:before{content:""}.fa-unlock-alt:before{content:""}.fa-bullseye:before{content:""}.fa-ellipsis-h:before{content:""}.fa-ellipsis-v:before{content:""}.fa-rss-square:before{content:""}.fa-play-circle:before{content:""}.fa-ticket:before{content:""}.fa-minus-square:before{content:""}.fa-minus-square-o:before{content:""}.fa-level-up:before{content:""}.fa-level-down:before{content:""}.fa-check-square:before{content:""}.fa-pencil-square:before{content:""}.fa-external-link-square:before{content:""}.fa-share-square:before{content:""}.fa-compass:before{content:""}.fa-toggle-down:before,.fa-caret-square-o-down:before{content:""}.fa-toggle-up:before,.fa-caret-square-o-up:before{content:""}.fa-toggle-right:before,.fa-caret-square-o-right:before{content:""}.fa-euro:before,.fa-eur:before{content:""}.fa-gbp:before{content:""}.fa-dollar:before,.fa-usd:before{content:""}.fa-rupee:before,.fa-inr:before{content:""}.fa-cny:before,.fa-rmb:before,.fa-yen:before,.fa-jpy:before{content:""}.fa-ruble:before,.fa-rouble:before,.fa-rub:before{content:""}.fa-won:before,.fa-krw:before{content:""}.fa-bitcoin:before,.fa-btc:before{content:""}.fa-file:before{content:""}.fa-file-text:before{content:""}.fa-sort-alpha-asc:before{content:""}.fa-sort-alpha-desc:before{content:""}.fa-sort-amount-asc:before{content:""}.fa-sort-amount-desc:before{content:""}.fa-sort-numeric-asc:before{content:""}.fa-sort-numeric-desc:before{content:""}.fa-thumbs-up:before{content:""}.fa-thumbs-down:before{content:""}.fa-youtube-square:before{content:""}.fa-youtube:before{content:""}.fa-xing:before{content:""}.fa-xing-square:before{content:""}.fa-youtube-play:before{content:""}.fa-dropbox:before{content:""}.fa-stack-overflow:before{content:""}.fa-instagram:before{content:""}.fa-flickr:before{content:""}.fa-adn:before{content:""}.fa-bitbucket:before,.icon-bitbucket:before{content:""}.fa-bitbucket-square:before{content:""}.fa-tumblr:before{content:""}.fa-tumblr-square:before{content:""}.fa-long-arrow-down:before{content:""}.fa-long-arrow-up:before{content:""}.fa-long-arrow-left:before{content:""}.fa-long-arrow-right:before{content:""}.fa-apple:before{content:""}.fa-windows:before{content:""}.fa-android:before{content:""}.fa-linux:before{content:""}.fa-dribbble:before{content:""}.fa-skype:before{content:""}.fa-foursquare:before{content:""}.fa-trello:before{content:""}.fa-female:before{content:""}.fa-male:before{content:""}.fa-gittip:before{content:""}.fa-sun-o:before{content:""}.fa-moon-o:before{content:""}.fa-archive:before{content:""}.fa-bug:before{content:""}.fa-vk:before{content:""}.fa-weibo:before{content:""}.fa-renren:before{content:""}.fa-pagelines:before{content:""}.fa-stack-exchange:before{content:""}.fa-arrow-circle-o-right:before{content:""}.fa-arrow-circle-o-left:before{content:""}.fa-toggle-left:before,.fa-caret-square-o-left:before{content:""}.fa-dot-circle-o:before{content:""}.fa-wheelchair:before{content:""}.fa-vimeo-square:before{content:""}.fa-turkish-lira:before,.fa-try:before{content:""}.fa-plus-square-o:before{content:""}.fa-space-shuttle:before{content:""}.fa-slack:before{content:""}.fa-envelope-square:before{content:""}.fa-wordpress:before{content:""}.fa-openid:before{content:""}.fa-institution:before,.fa-bank:before,.fa-university:before{content:""}.fa-mortar-board:before,.fa-graduation-cap:before{content:""}.fa-yahoo:before{content:""}.fa-google:before{content:""}.fa-reddit:before{content:""}.fa-reddit-square:before{content:""}.fa-stumbleupon-circle:before{content:""}.fa-stumbleupon:before{content:""}.fa-delicious:before{content:""}.fa-digg:before{content:""}.fa-pied-piper:before{content:""}.fa-pied-piper-alt:before{content:""}.fa-drupal:before{content:""}.fa-joomla:before{content:""}.fa-language:before{content:""}.fa-fax:before{content:""}.fa-building:before{content:""}.fa-child:before{content:""}.fa-paw:before{content:""}.fa-spoon:before{content:""}.fa-cube:before{content:""}.fa-cubes:before{content:""}.fa-behance:before{content:""}.fa-behance-square:before{content:""}.fa-steam:before{content:""}.fa-steam-square:before{content:""}.fa-recycle:before{content:""}.fa-automobile:before,.fa-car:before{content:""}.fa-cab:before,.fa-taxi:before{content:""}.fa-tree:before{content:""}.fa-spotify:before{content:""}.fa-deviantart:before{content:""}.fa-soundcloud:before{content:""}.fa-database:before{content:""}.fa-file-pdf-o:before{content:""}.fa-file-word-o:before{content:""}.fa-file-excel-o:before{content:""}.fa-file-powerpoint-o:before{content:""}.fa-file-photo-o:before,.fa-file-picture-o:before,.fa-file-image-o:before{content:""}.fa-file-zip-o:before,.fa-file-archive-o:before{content:""}.fa-file-sound-o:before,.fa-file-audio-o:before{content:""}.fa-file-movie-o:before,.fa-file-video-o:before{content:""}.fa-file-code-o:before{content:""}.fa-vine:before{content:""}.fa-codepen:before{content:""}.fa-jsfiddle:before{content:""}.fa-life-bouy:before,.fa-life-buoy:before,.fa-life-saver:before,.fa-support:before,.fa-life-ring:before{content:""}.fa-circle-o-notch:before{content:""}.fa-ra:before,.fa-rebel:before{content:""}.fa-ge:before,.fa-empire:before{content:""}.fa-git-square:before{content:""}.fa-git:before{content:""}.fa-hacker-news:before{content:""}.fa-tencent-weibo:before{content:""}.fa-qq:before{content:""}.fa-wechat:before,.fa-weixin:before{content:""}.fa-send:before,.fa-paper-plane:before{content:""}.fa-send-o:before,.fa-paper-plane-o:before{content:""}.fa-history:before{content:""}.fa-circle-thin:before{content:""}.fa-header:before{content:""}.fa-paragraph:before{content:""}.fa-sliders:before{content:""}.fa-share-alt:before{content:""}.fa-share-alt-square:before{content:""}.fa-bomb:before{content:""}.fa-soccer-ball-o:before,.fa-futbol-o:before{content:""}.fa-tty:before{content:""}.fa-binoculars:before{content:""}.fa-plug:before{content:""}.fa-slideshare:before{content:""}.fa-twitch:before{content:""}.fa-yelp:before{content:""}.fa-newspaper-o:before{content:""}.fa-wifi:before{content:""}.fa-calculator:before{content:""}.fa-paypal:before{content:""}.fa-google-wallet:before{content:""}.fa-cc-visa:before{content:""}.fa-cc-mastercard:before{content:""}.fa-cc-discover:before{content:""}.fa-cc-amex:before{content:""}.fa-cc-paypal:before{content:""}.fa-cc-stripe:before{content:""}.fa-bell-slash:before{content:""}.fa-bell-slash-o:before{content:""}.fa-trash:before{content:""}.fa-copyright:before{content:""}.fa-at:before{content:""}.fa-eyedropper:before{content:""}.fa-paint-brush:before{content:""}.fa-birthday-cake:before{content:""}.fa-area-chart:before{content:""}.fa-pie-chart:before{content:""}.fa-line-chart:before{content:""}.fa-lastfm:before{content:""}.fa-lastfm-square:before{content:""}.fa-toggle-off:before{content:""}.fa-toggle-on:before{content:""}.fa-bicycle:before{content:""}.fa-bus:before{content:""}.fa-ioxhost:before{content:""}.fa-angellist:before{content:""}.fa-cc:before{content:""}.fa-shekel:before,.fa-sheqel:before,.fa-ils:before{content:""}.fa-meanpath:before{content:""}.fa,.rst-content .admonition-title,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content dl dt .headerlink,.icon,.wy-dropdown .caret,.wy-inline-validate.wy-inline-validate-success .wy-input-context,.wy-inline-validate.wy-inline-validate-danger .wy-input-context,.wy-inline-validate.wy-inline-validate-warning .wy-input-context,.wy-inline-validate.wy-inline-validate-info .wy-input-context{font-family:inherit}.fa:before,.rst-content .admonition-title:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content dl dt .headerlink:before,.icon:before,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before{font-family:"FontAwesome";display:inline-block;font-style:normal;font-weight:normal;line-height:1;text-decoration:inherit}a .fa,a .rst-content .admonition-title,.rst-content a .admonition-title,a .rst-content h1 .headerlink,.rst-content h1 a .headerlink,a .rst-content h2 .headerlink,.rst-content h2 a .headerlink,a .rst-content h3 .headerlink,.rst-content h3 a .headerlink,a .rst-content h4 .headerlink,.rst-content h4 a .headerlink,a .rst-content h5 .headerlink,.rst-content h5 a .headerlink,a .rst-content h6 .headerlink,.rst-content h6 a .headerlink,a .rst-content dl dt .headerlink,.rst-content dl dt a .headerlink,a .icon{display:inline-block;text-decoration:inherit}.btn .fa,.btn .rst-content .admonition-title,.rst-content .btn .admonition-title,.btn .rst-content h1 .headerlink,.rst-content h1 .btn .headerlink,.btn .rst-content h2 .headerlink,.rst-content h2 .btn .headerlink,.btn .rst-content h3 .headerlink,.rst-content h3 .btn .headerlink,.btn .rst-content h4 .headerlink,.rst-content h4 .btn .headerlink,.btn .rst-content h5 .headerlink,.rst-content h5 .btn .headerlink,.btn .rst-content h6 .headerlink,.rst-content h6 .btn .headerlink,.btn .rst-content dl dt .headerlink,.rst-content dl dt .btn .headerlink,.btn .icon,.nav .fa,.nav .rst-content .admonition-title,.rst-content .nav .admonition-title,.nav .rst-content h1 .headerlink,.rst-content h1 .nav .headerlink,.nav .rst-content h2 .headerlink,.rst-content h2 .nav .headerlink,.nav .rst-content h3 .headerlink,.rst-content h3 .nav .headerlink,.nav .rst-content h4 .headerlink,.rst-content h4 .nav .headerlink,.nav .rst-content h5 .headerlink,.rst-content h5 .nav .headerlink,.nav .rst-content h6 .headerlink,.rst-content h6 .nav .headerlink,.nav .rst-content dl dt .headerlink,.rst-content dl dt .nav .headerlink,.nav .icon{display:inline}.btn .fa.fa-large,.btn .rst-content .fa-large.admonition-title,.rst-content .btn .fa-large.admonition-title,.btn .rst-content h1 .fa-large.headerlink,.rst-content h1 .btn .fa-large.headerlink,.btn .rst-content h2 .fa-large.headerlink,.rst-content h2 .btn .fa-large.headerlink,.btn .rst-content h3 .fa-large.headerlink,.rst-content h3 .btn .fa-large.headerlink,.btn .rst-content h4 .fa-large.headerlink,.rst-content h4 .btn .fa-large.headerlink,.btn .rst-content h5 .fa-large.headerlink,.rst-content h5 .btn .fa-large.headerlink,.btn .rst-content h6 .fa-large.headerlink,.rst-content h6 .btn .fa-large.headerlink,.btn .rst-content dl dt .fa-large.headerlink,.rst-content dl dt .btn .fa-large.headerlink,.btn .fa-large.icon,.nav .fa.fa-large,.nav .rst-content .fa-large.admonition-title,.rst-content .nav .fa-large.admonition-title,.nav .rst-content h1 .fa-large.headerlink,.rst-content h1 .nav .fa-large.headerlink,.nav .rst-content h2 .fa-large.headerlink,.rst-content h2 .nav .fa-large.headerlink,.nav .rst-content h3 .fa-large.headerlink,.rst-content h3 .nav .fa-large.headerlink,.nav .rst-content h4 .fa-large.headerlink,.rst-content h4 .nav .fa-large.headerlink,.nav .rst-content h5 .fa-large.headerlink,.rst-content h5 .nav .fa-large.headerlink,.nav .rst-content h6 .fa-large.headerlink,.rst-content h6 .nav .fa-large.headerlink,.nav .rst-content dl dt .fa-large.headerlink,.rst-content dl dt .nav .fa-large.headerlink,.nav .fa-large.icon{line-height:0.9em}.btn .fa.fa-spin,.btn .rst-content .fa-spin.admonition-title,.rst-content .btn .fa-spin.admonition-title,.btn .rst-content h1 .fa-spin.headerlink,.rst-content h1 .btn .fa-spin.headerlink,.btn .rst-content h2 .fa-spin.headerlink,.rst-content h2 .btn .fa-spin.headerlink,.btn .rst-content h3 .fa-spin.headerlink,.rst-content h3 .btn .fa-spin.headerlink,.btn .rst-content h4 .fa-spin.headerlink,.rst-content h4 .btn .fa-spin.headerlink,.btn .rst-content h5 .fa-spin.headerlink,.rst-content h5 .btn .fa-spin.headerlink,.btn .rst-content h6 .fa-spin.headerlink,.rst-content h6 .btn .fa-spin.headerlink,.btn .rst-content dl dt .fa-spin.headerlink,.rst-content dl dt .btn .fa-spin.headerlink,.btn .fa-spin.icon,.nav .fa.fa-spin,.nav .rst-content .fa-spin.admonition-title,.rst-content .nav .fa-spin.admonition-title,.nav .rst-content h1 .fa-spin.headerlink,.rst-content h1 .nav .fa-spin.headerlink,.nav .rst-content h2 .fa-spin.headerlink,.rst-content h2 .nav .fa-spin.headerlink,.nav .rst-content h3 .fa-spin.headerlink,.rst-content h3 .nav .fa-spin.headerlink,.nav .rst-content h4 .fa-spin.headerlink,.rst-content h4 .nav .fa-spin.headerlink,.nav .rst-content h5 .fa-spin.headerlink,.rst-content h5 .nav .fa-spin.headerlink,.nav .rst-content h6 .fa-spin.headerlink,.rst-content h6 .nav .fa-spin.headerlink,.nav .rst-content dl dt .fa-spin.headerlink,.rst-content dl dt .nav .fa-spin.headerlink,.nav .fa-spin.icon{display:inline-block}.btn.fa:before,.rst-content .btn.admonition-title:before,.rst-content h1 .btn.headerlink:before,.rst-content h2 .btn.headerlink:before,.rst-content h3 .btn.headerlink:before,.rst-content h4 .btn.headerlink:before,.rst-content h5 .btn.headerlink:before,.rst-content h6 .btn.headerlink:before,.rst-content dl dt .btn.headerlink:before,.btn.icon:before{opacity:0.5;-webkit-transition:opacity 0.05s ease-in;-moz-transition:opacity 0.05s ease-in;transition:opacity 0.05s ease-in}.btn.fa:hover:before,.rst-content .btn.admonition-title:hover:before,.rst-content h1 .btn.headerlink:hover:before,.rst-content h2 .btn.headerlink:hover:before,.rst-content h3 .btn.headerlink:hover:before,.rst-content h4 .btn.headerlink:hover:before,.rst-content h5 .btn.headerlink:hover:before,.rst-content h6 .btn.headerlink:hover:before,.rst-content dl dt .btn.headerlink:hover:before,.btn.icon:hover:before{opacity:1}.btn-mini .fa:before,.btn-mini .rst-content .admonition-title:before,.rst-content .btn-mini .admonition-title:before,.btn-mini .rst-content h1 .headerlink:before,.rst-content h1 .btn-mini .headerlink:before,.btn-mini .rst-content h2 .headerlink:before,.rst-content h2 .btn-mini .headerlink:before,.btn-mini .rst-content h3 .headerlink:before,.rst-content h3 .btn-mini .headerlink:before,.btn-mini .rst-content h4 .headerlink:before,.rst-content h4 .btn-mini .headerlink:before,.btn-mini .rst-content h5 .headerlink:before,.rst-content h5 .btn-mini .headerlink:before,.btn-mini .rst-content h6 .headerlink:before,.rst-content h6 .btn-mini .headerlink:before,.btn-mini .rst-content dl dt .headerlink:before,.rst-content dl dt .btn-mini .headerlink:before,.btn-mini .icon:before{font-size:14px;vertical-align:-15%}.wy-alert,.rst-content .note,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .tip,.rst-content .warning,.rst-content .seealso,.rst-content .admonition-todo{padding:12px;line-height:24px;margin-bottom:24px;background:#e7f2fa}.wy-alert-title,.rst-content .admonition-title{color:#fff;font-weight:bold;display:block;color:#fff;background:#6ab0de;margin:-12px;padding:6px 12px;margin-bottom:12px}.wy-alert.wy-alert-danger,.rst-content .wy-alert-danger.note,.rst-content .wy-alert-danger.attention,.rst-content .wy-alert-danger.caution,.rst-content .danger,.rst-content .error,.rst-content .wy-alert-danger.hint,.rst-content .wy-alert-danger.important,.rst-content .wy-alert-danger.tip,.rst-content .wy-alert-danger.warning,.rst-content .wy-alert-danger.seealso,.rst-content .wy-alert-danger.admonition-todo{background:#fdf3f2}.wy-alert.wy-alert-danger .wy-alert-title,.rst-content .wy-alert-danger.note .wy-alert-title,.rst-content .wy-alert-danger.attention .wy-alert-title,.rst-content .wy-alert-danger.caution .wy-alert-title,.rst-content .danger .wy-alert-title,.rst-content .error .wy-alert-title,.rst-content .wy-alert-danger.hint .wy-alert-title,.rst-content .wy-alert-danger.important .wy-alert-title,.rst-content .wy-alert-danger.tip .wy-alert-title,.rst-content .wy-alert-danger.warning .wy-alert-title,.rst-content .wy-alert-danger.seealso .wy-alert-title,.rst-content .wy-alert-danger.admonition-todo .wy-alert-title,.wy-alert.wy-alert-danger .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-danger .admonition-title,.rst-content .wy-alert-danger.note .admonition-title,.rst-content .wy-alert-danger.attention .admonition-title,.rst-content .wy-alert-danger.caution .admonition-title,.rst-content .danger .admonition-title,.rst-content .error .admonition-title,.rst-content .wy-alert-danger.hint .admonition-title,.rst-content .wy-alert-danger.important .admonition-title,.rst-content .wy-alert-danger.tip .admonition-title,.rst-content .wy-alert-danger.warning .admonition-title,.rst-content .wy-alert-danger.seealso .admonition-title,.rst-content .wy-alert-danger.admonition-todo .admonition-title{background:#f29f97}.wy-alert.wy-alert-warning,.rst-content .wy-alert-warning.note,.rst-content .attention,.rst-content .caution,.rst-content .wy-alert-warning.danger,.rst-content .wy-alert-warning.error,.rst-content .wy-alert-warning.hint,.rst-content .wy-alert-warning.important,.rst-content .wy-alert-warning.tip,.rst-content .warning,.rst-content .wy-alert-warning.seealso,.rst-content .admonition-todo{background:#ffedcc}.wy-alert.wy-alert-warning .wy-alert-title,.rst-content .wy-alert-warning.note .wy-alert-title,.rst-content .attention .wy-alert-title,.rst-content .caution .wy-alert-title,.rst-content .wy-alert-warning.danger .wy-alert-title,.rst-content .wy-alert-warning.error .wy-alert-title,.rst-content .wy-alert-warning.hint .wy-alert-title,.rst-content .wy-alert-warning.important .wy-alert-title,.rst-content .wy-alert-warning.tip .wy-alert-title,.rst-content .warning .wy-alert-title,.rst-content .wy-alert-warning.seealso .wy-alert-title,.rst-content .admonition-todo .wy-alert-title,.wy-alert.wy-alert-warning .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-warning .admonition-title,.rst-content .wy-alert-warning.note .admonition-title,.rst-content .attention .admonition-title,.rst-content .caution .admonition-title,.rst-content .wy-alert-warning.danger .admonition-title,.rst-content .wy-alert-warning.error .admonition-title,.rst-content .wy-alert-warning.hint .admonition-title,.rst-content .wy-alert-warning.important .admonition-title,.rst-content .wy-alert-warning.tip .admonition-title,.rst-content .warning .admonition-title,.rst-content .wy-alert-warning.seealso .admonition-title,.rst-content .admonition-todo .admonition-title{background:#f0b37e}.wy-alert.wy-alert-info,.rst-content .note,.rst-content .wy-alert-info.attention,.rst-content .wy-alert-info.caution,.rst-content .wy-alert-info.danger,.rst-content .wy-alert-info.error,.rst-content .wy-alert-info.hint,.rst-content .wy-alert-info.important,.rst-content .wy-alert-info.tip,.rst-content .wy-alert-info.warning,.rst-content .seealso,.rst-content .wy-alert-info.admonition-todo{background:#e7f2fa}.wy-alert.wy-alert-info .wy-alert-title,.rst-content .note .wy-alert-title,.rst-content .wy-alert-info.attention .wy-alert-title,.rst-content .wy-alert-info.caution .wy-alert-title,.rst-content .wy-alert-info.danger .wy-alert-title,.rst-content .wy-alert-info.error .wy-alert-title,.rst-content .wy-alert-info.hint .wy-alert-title,.rst-content .wy-alert-info.important .wy-alert-title,.rst-content .wy-alert-info.tip .wy-alert-title,.rst-content .wy-alert-info.warning .wy-alert-title,.rst-content .seealso .wy-alert-title,.rst-content .wy-alert-info.admonition-todo .wy-alert-title,.wy-alert.wy-alert-info .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-info .admonition-title,.rst-content .note .admonition-title,.rst-content .wy-alert-info.attention .admonition-title,.rst-content .wy-alert-info.caution .admonition-title,.rst-content .wy-alert-info.danger .admonition-title,.rst-content .wy-alert-info.error .admonition-title,.rst-content .wy-alert-info.hint .admonition-title,.rst-content .wy-alert-info.important .admonition-title,.rst-content .wy-alert-info.tip .admonition-title,.rst-content .wy-alert-info.warning .admonition-title,.rst-content .seealso .admonition-title,.rst-content .wy-alert-info.admonition-todo .admonition-title{background:#6ab0de}.wy-alert.wy-alert-success,.rst-content .wy-alert-success.note,.rst-content .wy-alert-success.attention,.rst-content .wy-alert-success.caution,.rst-content .wy-alert-success.danger,.rst-content .wy-alert-success.error,.rst-content .hint,.rst-content .important,.rst-content .tip,.rst-content .wy-alert-success.warning,.rst-content .wy-alert-success.seealso,.rst-content .wy-alert-success.admonition-todo{background:#dbfaf4}.wy-alert.wy-alert-success .wy-alert-title,.rst-content .wy-alert-success.note .wy-alert-title,.rst-content .wy-alert-success.attention .wy-alert-title,.rst-content .wy-alert-success.caution .wy-alert-title,.rst-content .wy-alert-success.danger .wy-alert-title,.rst-content .wy-alert-success.error .wy-alert-title,.rst-content .hint .wy-alert-title,.rst-content .important .wy-alert-title,.rst-content .tip .wy-alert-title,.rst-content .wy-alert-success.warning .wy-alert-title,.rst-content .wy-alert-success.seealso .wy-alert-title,.rst-content .wy-alert-success.admonition-todo .wy-alert-title,.wy-alert.wy-alert-success .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-success .admonition-title,.rst-content .wy-alert-success.note .admonition-title,.rst-content .wy-alert-success.attention .admonition-title,.rst-content .wy-alert-success.caution .admonition-title,.rst-content .wy-alert-success.danger .admonition-title,.rst-content .wy-alert-success.error .admonition-title,.rst-content .hint .admonition-title,.rst-content .important .admonition-title,.rst-content .tip .admonition-title,.rst-content .wy-alert-success.warning .admonition-title,.rst-content .wy-alert-success.seealso .admonition-title,.rst-content .wy-alert-success.admonition-todo .admonition-title{background:#1abc9c}.wy-alert.wy-alert-neutral,.rst-content .wy-alert-neutral.note,.rst-content .wy-alert-neutral.attention,.rst-content .wy-alert-neutral.caution,.rst-content .wy-alert-neutral.danger,.rst-content .wy-alert-neutral.error,.rst-content .wy-alert-neutral.hint,.rst-content .wy-alert-neutral.important,.rst-content .wy-alert-neutral.tip,.rst-content .wy-alert-neutral.warning,.rst-content .wy-alert-neutral.seealso,.rst-content .wy-alert-neutral.admonition-todo{background:#f3f6f6}.wy-alert.wy-alert-neutral .wy-alert-title,.rst-content .wy-alert-neutral.note .wy-alert-title,.rst-content .wy-alert-neutral.attention .wy-alert-title,.rst-content .wy-alert-neutral.caution .wy-alert-title,.rst-content .wy-alert-neutral.danger .wy-alert-title,.rst-content .wy-alert-neutral.error .wy-alert-title,.rst-content .wy-alert-neutral.hint .wy-alert-title,.rst-content .wy-alert-neutral.important .wy-alert-title,.rst-content .wy-alert-neutral.tip .wy-alert-title,.rst-content .wy-alert-neutral.warning .wy-alert-title,.rst-content .wy-alert-neutral.seealso .wy-alert-title,.rst-content .wy-alert-neutral.admonition-todo .wy-alert-title,.wy-alert.wy-alert-neutral .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-neutral .admonition-title,.rst-content .wy-alert-neutral.note .admonition-title,.rst-content .wy-alert-neutral.attention .admonition-title,.rst-content .wy-alert-neutral.caution .admonition-title,.rst-content .wy-alert-neutral.danger .admonition-title,.rst-content .wy-alert-neutral.error .admonition-title,.rst-content .wy-alert-neutral.hint .admonition-title,.rst-content .wy-alert-neutral.important .admonition-title,.rst-content .wy-alert-neutral.tip .admonition-title,.rst-content .wy-alert-neutral.warning .admonition-title,.rst-content .wy-alert-neutral.seealso .admonition-title,.rst-content .wy-alert-neutral.admonition-todo .admonition-title{color:#404040;background:#e1e4e5}.wy-alert.wy-alert-neutral a,.rst-content .wy-alert-neutral.note a,.rst-content .wy-alert-neutral.attention a,.rst-content .wy-alert-neutral.caution a,.rst-content .wy-alert-neutral.danger a,.rst-content .wy-alert-neutral.error a,.rst-content .wy-alert-neutral.hint a,.rst-content .wy-alert-neutral.important a,.rst-content .wy-alert-neutral.tip a,.rst-content .wy-alert-neutral.warning a,.rst-content .wy-alert-neutral.seealso a,.rst-content .wy-alert-neutral.admonition-todo a{color:#2980B9}.wy-alert p:last-child,.rst-content .note p:last-child,.rst-content .attention p:last-child,.rst-content .caution p:last-child,.rst-content .danger p:last-child,.rst-content .error p:last-child,.rst-content .hint p:last-child,.rst-content .important p:last-child,.rst-content .tip p:last-child,.rst-content .warning p:last-child,.rst-content .seealso p:last-child,.rst-content .admonition-todo p:last-child{margin-bottom:0}.wy-tray-container{position:fixed;bottom:0px;left:0;z-index:600}.wy-tray-container li{display:block;width:300px;background:transparent;color:#fff;text-align:center;box-shadow:0 5px 5px 0 rgba(0,0,0,0.1);padding:0 24px;min-width:20%;opacity:0;height:0;line-height:56px;overflow:hidden;-webkit-transition:all 0.3s ease-in;-moz-transition:all 0.3s ease-in;transition:all 0.3s ease-in}.wy-tray-container li.wy-tray-item-success{background:#27AE60}.wy-tray-container li.wy-tray-item-info{background:#2980B9}.wy-tray-container li.wy-tray-item-warning{background:#E67E22}.wy-tray-container li.wy-tray-item-danger{background:#E74C3C}.wy-tray-container li.on{opacity:1;height:56px}@media screen and (max-width: 768px){.wy-tray-container{bottom:auto;top:0;width:100%}.wy-tray-container li{width:100%}}button{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle;cursor:pointer;line-height:normal;-webkit-appearance:button;*overflow:visible}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}button[disabled]{cursor:default}.btn{display:inline-block;border-radius:2px;line-height:normal;white-space:nowrap;text-align:center;cursor:pointer;font-size:100%;padding:6px 12px 8px 12px;color:#fff;border:1px solid rgba(0,0,0,0.1);background-color:#27AE60;text-decoration:none;font-weight:normal;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;box-shadow:0px 1px 2px -1px rgba(255,255,255,0.5) inset,0px -2px 0px 0px rgba(0,0,0,0.1) inset;outline-none:false;vertical-align:middle;*display:inline;zoom:1;-webkit-user-drag:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;-webkit-transition:all 0.1s linear;-moz-transition:all 0.1s linear;transition:all 0.1s linear}.btn-hover{background:#2e8ece;color:#fff}.btn:hover{background:#2cc36b;color:#fff}.btn:focus{background:#2cc36b;outline:0}.btn:active{box-shadow:0px -1px 0px 0px rgba(0,0,0,0.05) inset,0px 2px 0px 0px rgba(0,0,0,0.1) inset;padding:8px 12px 6px 12px}.btn:visited{color:#fff}.btn:disabled{background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);filter:alpha(opacity=40);opacity:0.4;cursor:not-allowed;box-shadow:none}.btn-disabled{background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);filter:alpha(opacity=40);opacity:0.4;cursor:not-allowed;box-shadow:none}.btn-disabled:hover,.btn-disabled:focus,.btn-disabled:active{background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);filter:alpha(opacity=40);opacity:0.4;cursor:not-allowed;box-shadow:none}.btn::-moz-focus-inner{padding:0;border:0}.btn-small{font-size:80%}.btn-info{background-color:#2980B9 !important}.btn-info:hover{background-color:#2e8ece !important}.btn-neutral{background-color:#f3f6f6 !important;color:#404040 !important}.btn-neutral:hover{background-color:#e5ebeb !important;color:#404040}.btn-neutral:visited{color:#404040 !important}.btn-success{background-color:#27AE60 !important}.btn-success:hover{background-color:#295 !important}.btn-danger{background-color:#E74C3C !important}.btn-danger:hover{background-color:#ea6153 !important}.btn-warning{background-color:#E67E22 !important}.btn-warning:hover{background-color:#e98b39 !important}.btn-invert{background-color:#222}.btn-invert:hover{background-color:#2f2f2f !important}.btn-link{background-color:transparent !important;color:#2980B9;box-shadow:none;border-color:transparent !important}.btn-link:hover{background-color:transparent !important;color:#409ad5 !important;box-shadow:none}.btn-link:active{background-color:transparent !important;color:#409ad5 !important;box-shadow:none}.btn-link:visited{color:#9B59B6}.wy-btn-group .btn,.wy-control .btn{vertical-align:middle}.wy-btn-group{margin-bottom:24px;*zoom:1}.wy-btn-group:before,.wy-btn-group:after{display:table;content:""}.wy-btn-group:after{clear:both}.wy-dropdown{position:relative;display:inline-block}.wy-dropdown-active .wy-dropdown-menu{display:block}.wy-dropdown-menu{position:absolute;left:0;display:none;float:left;top:100%;min-width:100%;background:#fcfcfc;z-index:100;border:solid 1px #cfd7dd;box-shadow:0 2px 2px 0 rgba(0,0,0,0.1);padding:12px}.wy-dropdown-menu>dd>a{display:block;clear:both;color:#404040;white-space:nowrap;font-size:90%;padding:0 12px;cursor:pointer}.wy-dropdown-menu>dd>a:hover{background:#2980B9;color:#fff}.wy-dropdown-menu>dd.divider{border-top:solid 1px #cfd7dd;margin:6px 0}.wy-dropdown-menu>dd.search{padding-bottom:12px}.wy-dropdown-menu>dd.search input[type="search"]{width:100%}.wy-dropdown-menu>dd.call-to-action{background:#e3e3e3;text-transform:uppercase;font-weight:500;font-size:80%}.wy-dropdown-menu>dd.call-to-action:hover{background:#e3e3e3}.wy-dropdown-menu>dd.call-to-action .btn{color:#fff}.wy-dropdown.wy-dropdown-up .wy-dropdown-menu{bottom:100%;top:auto;left:auto;right:0}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu{background:#fcfcfc;margin-top:2px}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu a{padding:6px 12px}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu a:hover{background:#2980B9;color:#fff}.wy-dropdown.wy-dropdown-left .wy-dropdown-menu{right:0;left:auto;text-align:right}.wy-dropdown-arrow:before{content:" ";border-bottom:5px solid #f5f5f5;border-left:5px solid transparent;border-right:5px solid transparent;position:absolute;display:block;top:-4px;left:50%;margin-left:-3px}.wy-dropdown-arrow.wy-dropdown-arrow-left:before{left:11px}.wy-form-stacked select{display:block}.wy-form-aligned input,.wy-form-aligned textarea,.wy-form-aligned select,.wy-form-aligned .wy-help-inline,.wy-form-aligned label{display:inline-block;*display:inline;*zoom:1;vertical-align:middle}.wy-form-aligned .wy-control-group>label{display:inline-block;vertical-align:middle;width:10em;margin:6px 12px 0 0;float:left}.wy-form-aligned .wy-control{float:left}.wy-form-aligned .wy-control label{display:block}.wy-form-aligned .wy-control select{margin-top:6px}fieldset{border:0;margin:0;padding:0}legend{display:block;width:100%;border:0;padding:0;white-space:normal;margin-bottom:24px;font-size:150%;*margin-left:-7px}label{display:block;margin:0 0 0.3125em 0;color:#333;font-size:90%}input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}.wy-control-group{margin-bottom:24px;*zoom:1;max-width:68em;margin-left:auto;margin-right:auto;*zoom:1}.wy-control-group:before,.wy-control-group:after{display:table;content:""}.wy-control-group:after{clear:both}.wy-control-group:before,.wy-control-group:after{display:table;content:""}.wy-control-group:after{clear:both}.wy-control-group.wy-control-group-required>label:after{content:" *";color:#E74C3C}.wy-control-group .wy-form-full,.wy-control-group .wy-form-halves,.wy-control-group .wy-form-thirds{padding-bottom:12px}.wy-control-group .wy-form-full select,.wy-control-group .wy-form-halves select,.wy-control-group .wy-form-thirds select{width:100%}.wy-control-group .wy-form-full input[type="text"],.wy-control-group .wy-form-full input[type="password"],.wy-control-group .wy-form-full input[type="email"],.wy-control-group .wy-form-full input[type="url"],.wy-control-group .wy-form-full input[type="date"],.wy-control-group .wy-form-full input[type="month"],.wy-control-group .wy-form-full input[type="time"],.wy-control-group .wy-form-full input[type="datetime"],.wy-control-group .wy-form-full input[type="datetime-local"],.wy-control-group .wy-form-full input[type="week"],.wy-control-group .wy-form-full input[type="number"],.wy-control-group .wy-form-full input[type="search"],.wy-control-group .wy-form-full input[type="tel"],.wy-control-group .wy-form-full input[type="color"],.wy-control-group .wy-form-halves input[type="text"],.wy-control-group .wy-form-halves input[type="password"],.wy-control-group .wy-form-halves input[type="email"],.wy-control-group .wy-form-halves input[type="url"],.wy-control-group .wy-form-halves input[type="date"],.wy-control-group .wy-form-halves input[type="month"],.wy-control-group .wy-form-halves input[type="time"],.wy-control-group .wy-form-halves input[type="datetime"],.wy-control-group .wy-form-halves input[type="datetime-local"],.wy-control-group .wy-form-halves input[type="week"],.wy-control-group .wy-form-halves input[type="number"],.wy-control-group .wy-form-halves input[type="search"],.wy-control-group .wy-form-halves input[type="tel"],.wy-control-group .wy-form-halves input[type="color"],.wy-control-group .wy-form-thirds input[type="text"],.wy-control-group .wy-form-thirds input[type="password"],.wy-control-group .wy-form-thirds input[type="email"],.wy-control-group .wy-form-thirds input[type="url"],.wy-control-group .wy-form-thirds input[type="date"],.wy-control-group .wy-form-thirds input[type="month"],.wy-control-group .wy-form-thirds input[type="time"],.wy-control-group .wy-form-thirds input[type="datetime"],.wy-control-group .wy-form-thirds input[type="datetime-local"],.wy-control-group .wy-form-thirds input[type="week"],.wy-control-group .wy-form-thirds input[type="number"],.wy-control-group .wy-form-thirds input[type="search"],.wy-control-group .wy-form-thirds input[type="tel"],.wy-control-group .wy-form-thirds input[type="color"]{width:100%}.wy-control-group .wy-form-full{float:left;display:block;margin-right:2.35765%;width:100%;margin-right:0}.wy-control-group .wy-form-full:last-child{margin-right:0}.wy-control-group .wy-form-halves{float:left;display:block;margin-right:2.35765%;width:48.82117%}.wy-control-group .wy-form-halves:last-child{margin-right:0}.wy-control-group .wy-form-halves:nth-of-type(2n){margin-right:0}.wy-control-group .wy-form-halves:nth-of-type(2n+1){clear:left}.wy-control-group .wy-form-thirds{float:left;display:block;margin-right:2.35765%;width:31.76157%}.wy-control-group .wy-form-thirds:last-child{margin-right:0}.wy-control-group .wy-form-thirds:nth-of-type(3n){margin-right:0}.wy-control-group .wy-form-thirds:nth-of-type(3n+1){clear:left}.wy-control-group.wy-control-group-no-input .wy-control{margin:6px 0 0 0;font-size:90%}.wy-control-no-input{display:inline-block;margin:6px 0 0 0;font-size:90%}.wy-control-group.fluid-input input[type="text"],.wy-control-group.fluid-input input[type="password"],.wy-control-group.fluid-input input[type="email"],.wy-control-group.fluid-input input[type="url"],.wy-control-group.fluid-input input[type="date"],.wy-control-group.fluid-input input[type="month"],.wy-control-group.fluid-input input[type="time"],.wy-control-group.fluid-input input[type="datetime"],.wy-control-group.fluid-input input[type="datetime-local"],.wy-control-group.fluid-input input[type="week"],.wy-control-group.fluid-input input[type="number"],.wy-control-group.fluid-input input[type="search"],.wy-control-group.fluid-input input[type="tel"],.wy-control-group.fluid-input input[type="color"]{width:100%}.wy-form-message-inline{display:inline-block;padding-left:0.3em;color:#666;vertical-align:middle;font-size:90%}.wy-form-message{display:block;color:#999;font-size:70%;margin-top:0.3125em;font-style:italic}.wy-form-message p{font-size:inherit;font-style:italic;margin-bottom:6px}.wy-form-message p:last-child{margin-bottom:0}input{line-height:normal}input[type="button"],input[type="reset"],input[type="submit"]{-webkit-appearance:button;cursor:pointer;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;*overflow:visible}input[type="text"],input[type="password"],input[type="email"],input[type="url"],input[type="date"],input[type="month"],input[type="time"],input[type="datetime"],input[type="datetime-local"],input[type="week"],input[type="number"],input[type="search"],input[type="tel"],input[type="color"]{-webkit-appearance:none;padding:6px;display:inline-block;border:1px solid #ccc;font-size:80%;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;box-shadow:inset 0 1px 3px #ddd;border-radius:0;-webkit-transition:border 0.3s linear;-moz-transition:border 0.3s linear;transition:border 0.3s linear}input[type="datetime-local"]{padding:0.34375em 0.625em}input[disabled]{cursor:default}input[type="checkbox"],input[type="radio"]{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;padding:0;margin-right:0.3125em;*height:13px;*width:13px}input[type="search"]{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}input[type="search"]::-webkit-search-cancel-button,input[type="search"]::-webkit-search-decoration{-webkit-appearance:none}input[type="text"]:focus,input[type="password"]:focus,input[type="email"]:focus,input[type="url"]:focus,input[type="date"]:focus,input[type="month"]:focus,input[type="time"]:focus,input[type="datetime"]:focus,input[type="datetime-local"]:focus,input[type="week"]:focus,input[type="number"]:focus,input[type="search"]:focus,input[type="tel"]:focus,input[type="color"]:focus{outline:0;outline:thin dotted \9;border-color:#333}input.no-focus:focus{border-color:#ccc !important}input[type="file"]:focus,input[type="radio"]:focus,input[type="checkbox"]:focus{outline:thin dotted #333;outline:1px auto #129FEA}input[type="text"][disabled],input[type="password"][disabled],input[type="email"][disabled],input[type="url"][disabled],input[type="date"][disabled],input[type="month"][disabled],input[type="time"][disabled],input[type="datetime"][disabled],input[type="datetime-local"][disabled],input[type="week"][disabled],input[type="number"][disabled],input[type="search"][disabled],input[type="tel"][disabled],input[type="color"][disabled]{cursor:not-allowed;background-color:#fafafa}input:focus:invalid,textarea:focus:invalid,select:focus:invalid{color:#E74C3C;border:1px solid #E74C3C}input:focus:invalid:focus,textarea:focus:invalid:focus,select:focus:invalid:focus{border-color:#E74C3C}input[type="file"]:focus:invalid:focus,input[type="radio"]:focus:invalid:focus,input[type="checkbox"]:focus:invalid:focus{outline-color:#E74C3C}input.wy-input-large{padding:12px;font-size:100%}textarea{overflow:auto;vertical-align:top;width:100%;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif}select,textarea{padding:0.5em 0.625em;display:inline-block;border:1px solid #ccc;font-size:80%;box-shadow:inset 0 1px 3px #ddd;-webkit-transition:border 0.3s linear;-moz-transition:border 0.3s linear;transition:border 0.3s linear}select{border:1px solid #ccc;background-color:#fff}select[multiple]{height:auto}select:focus,textarea:focus{outline:0}select[disabled],textarea[disabled],input[readonly],select[readonly],textarea[readonly]{cursor:not-allowed;background-color:#fafafa}input[type="radio"][disabled],input[type="checkbox"][disabled]{cursor:not-allowed}.wy-checkbox,.wy-radio{margin:6px 0;color:#404040;display:block}.wy-checkbox input,.wy-radio input{vertical-align:baseline}.wy-form-message-inline{display:inline-block;*display:inline;*zoom:1;vertical-align:middle}.wy-input-prefix,.wy-input-suffix{white-space:nowrap;padding:6px}.wy-input-prefix .wy-input-context,.wy-input-suffix .wy-input-context{line-height:27px;padding:0 8px;display:inline-block;font-size:80%;background-color:#f3f6f6;border:solid 1px #ccc;color:#999}.wy-input-suffix .wy-input-context{border-left:0}.wy-input-prefix .wy-input-context{border-right:0}.wy-switch{width:36px;height:12px;margin:12px 0;position:relative;border-radius:4px;background:#ccc;cursor:pointer;-webkit-transition:all 0.2s ease-in-out;-moz-transition:all 0.2s ease-in-out;transition:all 0.2s ease-in-out}.wy-switch:before{position:absolute;content:"";display:block;width:18px;height:18px;border-radius:4px;background:#999;left:-3px;top:-3px;-webkit-transition:all 0.2s ease-in-out;-moz-transition:all 0.2s ease-in-out;transition:all 0.2s ease-in-out}.wy-switch:after{content:"false";position:absolute;left:48px;display:block;font-size:12px;color:#ccc}.wy-switch.active{background:#1e8449}.wy-switch.active:before{left:24px;background:#27AE60}.wy-switch.active:after{content:"true"}.wy-switch.disabled,.wy-switch.active.disabled{cursor:not-allowed}.wy-control-group.wy-control-group-error .wy-form-message,.wy-control-group.wy-control-group-error>label{color:#E74C3C}.wy-control-group.wy-control-group-error input[type="text"],.wy-control-group.wy-control-group-error input[type="password"],.wy-control-group.wy-control-group-error input[type="email"],.wy-control-group.wy-control-group-error input[type="url"],.wy-control-group.wy-control-group-error input[type="date"],.wy-control-group.wy-control-group-error input[type="month"],.wy-control-group.wy-control-group-error input[type="time"],.wy-control-group.wy-control-group-error input[type="datetime"],.wy-control-group.wy-control-group-error input[type="datetime-local"],.wy-control-group.wy-control-group-error input[type="week"],.wy-control-group.wy-control-group-error input[type="number"],.wy-control-group.wy-control-group-error input[type="search"],.wy-control-group.wy-control-group-error input[type="tel"],.wy-control-group.wy-control-group-error input[type="color"]{border:solid 1px #E74C3C}.wy-control-group.wy-control-group-error textarea{border:solid 1px #E74C3C}.wy-inline-validate{white-space:nowrap}.wy-inline-validate .wy-input-context{padding:0.5em 0.625em;display:inline-block;font-size:80%}.wy-inline-validate.wy-inline-validate-success .wy-input-context{color:#27AE60}.wy-inline-validate.wy-inline-validate-danger .wy-input-context{color:#E74C3C}.wy-inline-validate.wy-inline-validate-warning .wy-input-context{color:#E67E22}.wy-inline-validate.wy-inline-validate-info .wy-input-context{color:#2980B9}.rotate-90{-webkit-transform:rotate(90deg);-moz-transform:rotate(90deg);-ms-transform:rotate(90deg);-o-transform:rotate(90deg);transform:rotate(90deg)}.rotate-180{-webkit-transform:rotate(180deg);-moz-transform:rotate(180deg);-ms-transform:rotate(180deg);-o-transform:rotate(180deg);transform:rotate(180deg)}.rotate-270{-webkit-transform:rotate(270deg);-moz-transform:rotate(270deg);-ms-transform:rotate(270deg);-o-transform:rotate(270deg);transform:rotate(270deg)}.mirror{-webkit-transform:scaleX(-1);-moz-transform:scaleX(-1);-ms-transform:scaleX(-1);-o-transform:scaleX(-1);transform:scaleX(-1)}.mirror.rotate-90{-webkit-transform:scaleX(-1) rotate(90deg);-moz-transform:scaleX(-1) rotate(90deg);-ms-transform:scaleX(-1) rotate(90deg);-o-transform:scaleX(-1) rotate(90deg);transform:scaleX(-1) rotate(90deg)}.mirror.rotate-180{-webkit-transform:scaleX(-1) rotate(180deg);-moz-transform:scaleX(-1) rotate(180deg);-ms-transform:scaleX(-1) rotate(180deg);-o-transform:scaleX(-1) rotate(180deg);transform:scaleX(-1) rotate(180deg)}.mirror.rotate-270{-webkit-transform:scaleX(-1) rotate(270deg);-moz-transform:scaleX(-1) rotate(270deg);-ms-transform:scaleX(-1) rotate(270deg);-o-transform:scaleX(-1) rotate(270deg);transform:scaleX(-1) rotate(270deg)}@media only screen and (max-width: 480px){.wy-form button[type="submit"]{margin:0.7em 0 0}.wy-form input[type="text"],.wy-form input[type="password"],.wy-form input[type="email"],.wy-form input[type="url"],.wy-form input[type="date"],.wy-form input[type="month"],.wy-form input[type="time"],.wy-form input[type="datetime"],.wy-form input[type="datetime-local"],.wy-form input[type="week"],.wy-form input[type="number"],.wy-form input[type="search"],.wy-form input[type="tel"],.wy-form input[type="color"]{margin-bottom:0.3em;display:block}.wy-form label{margin-bottom:0.3em;display:block}.wy-form input[type="password"],.wy-form input[type="email"],.wy-form input[type="url"],.wy-form input[type="date"],.wy-form input[type="month"],.wy-form input[type="time"],.wy-form input[type="datetime"],.wy-form input[type="datetime-local"],.wy-form input[type="week"],.wy-form input[type="number"],.wy-form input[type="search"],.wy-form input[type="tel"],.wy-form input[type="color"]{margin-bottom:0}.wy-form-aligned .wy-control-group label{margin-bottom:0.3em;text-align:left;display:block;width:100%}.wy-form-aligned .wy-control{margin:1.5em 0 0 0}.wy-form .wy-help-inline,.wy-form-message-inline,.wy-form-message{display:block;font-size:80%;padding:6px 0}}@media screen and (max-width: 768px){.tablet-hide{display:none}}@media screen and (max-width: 480px){.mobile-hide{display:none}}.float-left{float:left}.float-right{float:right}.full-width{width:100%}.wy-table,.rst-content table.docutils,.rst-content table.field-list{border-collapse:collapse;border-spacing:0;empty-cells:show;margin-bottom:24px}.wy-table caption,.rst-content table.docutils caption,.rst-content table.field-list caption{color:#000;font:italic 85%/1 arial,sans-serif;padding:1em 0;text-align:center}.wy-table td,.rst-content table.docutils td,.rst-content table.field-list td,.wy-table th,.rst-content table.docutils th,.rst-content table.field-list th{font-size:90%;margin:0;overflow:visible;padding:8px 16px}.wy-table td:first-child,.rst-content table.docutils td:first-child,.rst-content table.field-list td:first-child,.wy-table th:first-child,.rst-content table.docutils th:first-child,.rst-content table.field-list th:first-child{border-left-width:0}.wy-table thead,.rst-content table.docutils thead,.rst-content table.field-list thead{color:#000;text-align:left;vertical-align:bottom;white-space:nowrap}.wy-table thead th,.rst-content table.docutils thead th,.rst-content table.field-list thead th{font-weight:bold;border-bottom:solid 2px #e1e4e5}.wy-table td,.rst-content table.docutils td,.rst-content table.field-list td{background-color:transparent;vertical-align:middle}.wy-table td p,.rst-content table.docutils td p,.rst-content table.field-list td p{line-height:18px}.wy-table td p:last-child,.rst-content table.docutils td p:last-child,.rst-content table.field-list td p:last-child{margin-bottom:0}.wy-table .wy-table-cell-min,.rst-content table.docutils .wy-table-cell-min,.rst-content table.field-list .wy-table-cell-min{width:1%;padding-right:0}.wy-table .wy-table-cell-min input[type=checkbox],.rst-content table.docutils .wy-table-cell-min input[type=checkbox],.rst-content table.field-list .wy-table-cell-min input[type=checkbox],.wy-table .wy-table-cell-min input[type=checkbox],.rst-content table.docutils .wy-table-cell-min input[type=checkbox],.rst-content table.field-list .wy-table-cell-min input[type=checkbox]{margin:0}.wy-table-secondary{color:gray;font-size:90%}.wy-table-tertiary{color:gray;font-size:80%}.wy-table-odd td,.wy-table-striped tr:nth-child(2n-1) td,.rst-content table.docutils:not(.field-list) tr:nth-child(2n-1) td{background-color:#f3f6f6}.wy-table-backed{background-color:#f3f6f6}.wy-table-bordered-all,.rst-content table.docutils{border:1px solid #e1e4e5}.wy-table-bordered-all td,.rst-content table.docutils td{border-bottom:1px solid #e1e4e5;border-left:1px solid #e1e4e5}.wy-table-bordered-all tbody>tr:last-child td,.rst-content table.docutils tbody>tr:last-child td{border-bottom-width:0}.wy-table-bordered{border:1px solid #e1e4e5}.wy-table-bordered-rows td{border-bottom:1px solid #e1e4e5}.wy-table-bordered-rows tbody>tr:last-child td{border-bottom-width:0}.wy-table-horizontal tbody>tr:last-child td{border-bottom-width:0}.wy-table-horizontal td,.wy-table-horizontal th{border-width:0 0 1px 0;border-bottom:1px solid #e1e4e5}.wy-table-horizontal tbody>tr:last-child td{border-bottom-width:0}.wy-table-responsive{margin-bottom:24px;max-width:100%;overflow:auto}.wy-table-responsive table{margin-bottom:0 !important}.wy-table-responsive table td,.wy-table-responsive table th{white-space:nowrap}a{color:#2980B9;text-decoration:none;cursor:pointer}a:hover{color:#3091d1}a:visited{color:#9B59B6}html{height:100%;overflow-x:hidden}body{font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;font-weight:normal;color:#404040;min-height:100%;overflow-x:hidden;background:#edf0f2}.wy-text-left{text-align:left}.wy-text-center{text-align:center}.wy-text-right{text-align:right}.wy-text-large{font-size:120%}.wy-text-normal{font-size:100%}.wy-text-small,small{font-size:80%}.wy-text-strike{text-decoration:line-through}.wy-text-warning{color:#E67E22 !important}a.wy-text-warning:hover{color:#eb9950 !important}.wy-text-info{color:#2980B9 !important}a.wy-text-info:hover{color:#409ad5 !important}.wy-text-success{color:#27AE60 !important}a.wy-text-success:hover{color:#36d278 !important}.wy-text-danger{color:#E74C3C !important}a.wy-text-danger:hover{color:#ed7669 !important}.wy-text-neutral{color:#404040 !important}a.wy-text-neutral:hover{color:#595959 !important}h1,h2,h3,h4,h5,h6,legend{margin-top:0;font-weight:700;font-family:"Roboto Slab","ff-tisa-web-pro","Georgia",Arial,sans-serif}p{line-height:24px;margin:0;font-size:16px;margin-bottom:24px}h1{font-size:175%}h2{font-size:150%}h3{font-size:125%}h4{font-size:115%}h5{font-size:110%}h6{font-size:100%}hr{display:block;height:1px;border:0;border-top:1px solid #e1e4e5;margin:24px 0;padding:0}code,.rst-content tt{white-space:nowrap;max-width:100%;background:#fff;border:solid 1px #e1e4e5;font-size:75%;padding:0 5px;font-family:Consolas,"Andale Mono WT","Andale Mono","Lucida Console","Lucida Sans Typewriter","DejaVu Sans Mono","Bitstream Vera Sans Mono","Liberation Mono","Nimbus Mono L",Monaco,"Courier New",Courier,monospace;color:#E74C3C;overflow-x:auto}code.code-large,.rst-content tt.code-large{font-size:90%}.wy-plain-list-disc,.rst-content .section ul,.rst-content .toctree-wrapper ul,article ul{list-style:disc;line-height:24px;margin-bottom:24px}.wy-plain-list-disc li,.rst-content .section ul li,.rst-content .toctree-wrapper ul li,article ul li{list-style:disc;margin-left:24px}.wy-plain-list-disc li p:last-child,.rst-content .section ul li p:last-child,.rst-content .toctree-wrapper ul li p:last-child,article ul li p:last-child{margin-bottom:0}.wy-plain-list-disc li ul,.rst-content .section ul li ul,.rst-content .toctree-wrapper ul li ul,article ul li ul{margin-bottom:0}.wy-plain-list-disc li li,.rst-content .section ul li li,.rst-content .toctree-wrapper ul li li,article ul li li{list-style:circle}.wy-plain-list-disc li li li,.rst-content .section ul li li li,.rst-content .toctree-wrapper ul li li li,article ul li li li{list-style:square}.wy-plain-list-disc li ol li,.rst-content .section ul li ol li,.rst-content .toctree-wrapper ul li ol li,article ul li ol li{list-style:decimal}.wy-plain-list-decimal,.rst-content .section ol,.rst-content ol.arabic,article ol{list-style:decimal;line-height:24px;margin-bottom:24px}.wy-plain-list-decimal li,.rst-content .section ol li,.rst-content ol.arabic li,article ol li{list-style:decimal;margin-left:24px}.wy-plain-list-decimal li p:last-child,.rst-content .section ol li p:last-child,.rst-content ol.arabic li p:last-child,article ol li p:last-child{margin-bottom:0}.wy-plain-list-decimal li ul,.rst-content .section ol li ul,.rst-content ol.arabic li ul,article ol li ul{margin-bottom:0}.wy-plain-list-decimal li ul li,.rst-content .section ol li ul li,.rst-content ol.arabic li ul li,article ol li ul li{list-style:disc}.codeblock-example{border:1px solid #e1e4e5;border-bottom:none;padding:24px;padding-top:48px;font-weight:500;background:#fff;position:relative}.codeblock-example:after{content:"Example";position:absolute;top:0px;left:0px;background:#9B59B6;color:#fff;padding:6px 12px}.codeblock-example.prettyprint-example-only{border:1px solid #e1e4e5;margin-bottom:24px}.codeblock,pre.literal-block,.rst-content .literal-block,.rst-content pre.literal-block,div[class^='highlight']{border:1px solid #e1e4e5;padding:0px;overflow-x:auto;background:#fff;margin:1px 0 24px 0}.codeblock div[class^='highlight'],pre.literal-block div[class^='highlight'],.rst-content .literal-block div[class^='highlight'],div[class^='highlight'] div[class^='highlight']{border:none;background:none;margin:0}div[class^='highlight'] td.code{width:100%}.linenodiv pre{border-right:solid 1px #e6e9ea;margin:0;padding:12px 12px;font-family:Consolas,"Andale Mono WT","Andale Mono","Lucida Console","Lucida Sans Typewriter","DejaVu Sans Mono","Bitstream Vera Sans Mono","Liberation Mono","Nimbus Mono L",Monaco,"Courier New",Courier,monospace;font-size:12px;line-height:1.5;color:#d9d9d9}div[class^='highlight'] pre{white-space:pre;margin:0;padding:12px 12px;font-family:Consolas,"Andale Mono WT","Andale Mono","Lucida Console","Lucida Sans Typewriter","DejaVu Sans Mono","Bitstream Vera Sans Mono","Liberation Mono","Nimbus Mono L",Monaco,"Courier New",Courier,monospace;font-size:12px;line-height:1.5;display:block;overflow:auto;color:#404040}@media print{.codeblock,pre.literal-block,.rst-content .literal-block,.rst-content pre.literal-block,div[class^='highlight'],div[class^='highlight'] pre{white-space:pre-wrap}}.hll{background-color:#ffc;margin:0 -12px;padding:0 12px;display:block}.c{color:#998;font-style:italic}.err{color:#a61717;background-color:#e3d2d2}.k{font-weight:bold}.o{font-weight:bold}.cm{color:#998;font-style:italic}.cp{color:#999;font-weight:bold}.c1{color:#998;font-style:italic}.cs{color:#999;font-weight:bold;font-style:italic}.gd{color:#000;background-color:#fdd}.gd .x{color:#000;background-color:#faa}.ge{font-style:italic}.gr{color:#a00}.gh{color:#999}.gi{color:#000;background-color:#dfd}.gi .x{color:#000;background-color:#afa}.go{color:#888}.gp{color:#555}.gs{font-weight:bold}.gu{color:purple;font-weight:bold}.gt{color:#a00}.kc{font-weight:bold}.kd{font-weight:bold}.kn{font-weight:bold}.kp{font-weight:bold}.kr{font-weight:bold}.kt{color:#458;font-weight:bold}.m{color:#099}.s{color:#d14}.n{color:#333}.na{color:teal}.nb{color:#0086b3}.nc{color:#458;font-weight:bold}.no{color:teal}.ni{color:purple}.ne{color:#900;font-weight:bold}.nf{color:#900;font-weight:bold}.nn{color:#555}.nt{color:navy}.nv{color:teal}.ow{font-weight:bold}.w{color:#bbb}.mf{color:#099}.mh{color:#099}.mi{color:#099}.mo{color:#099}.sb{color:#d14}.sc{color:#d14}.sd{color:#d14}.s2{color:#d14}.se{color:#d14}.sh{color:#d14}.si{color:#d14}.sx{color:#d14}.sr{color:#009926}.s1{color:#d14}.ss{color:#990073}.bp{color:#999}.vc{color:teal}.vg{color:teal}.vi{color:teal}.il{color:#099}.gc{color:#999;background-color:#EAF2F5}.wy-breadcrumbs li{display:inline-block}.wy-breadcrumbs li.wy-breadcrumbs-aside{float:right}.wy-breadcrumbs li a{display:inline-block;padding:5px}.wy-breadcrumbs li a:first-child{padding-left:0}.wy-breadcrumbs-extra{margin-bottom:0;color:#b3b3b3;font-size:80%;display:inline-block}@media screen and (max-width: 480px){.wy-breadcrumbs-extra{display:none}.wy-breadcrumbs li.wy-breadcrumbs-aside{display:none}}@media print{.wy-breadcrumbs li.wy-breadcrumbs-aside{display:none}}.wy-affix{position:fixed;top:1.618em}.wy-menu a:hover{text-decoration:none}.wy-menu-horiz{*zoom:1}.wy-menu-horiz:before,.wy-menu-horiz:after{display:table;content:""}.wy-menu-horiz:after{clear:both}.wy-menu-horiz ul,.wy-menu-horiz li{display:inline-block}.wy-menu-horiz li:hover{background:rgba(255,255,255,0.1)}.wy-menu-horiz li.divide-left{border-left:solid 1px #404040}.wy-menu-horiz li.divide-right{border-right:solid 1px #404040}.wy-menu-horiz a{height:32px;display:inline-block;line-height:32px;padding:0 16px}.wy-menu-vertical header{height:32px;display:inline-block;line-height:32px;padding:0 1.618em;display:block;font-weight:bold;text-transform:uppercase;font-size:80%;color:#2980B9;white-space:nowrap}.wy-menu-vertical ul{margin-bottom:0}.wy-menu-vertical li.divide-top{border-top:solid 1px #404040}.wy-menu-vertical li.divide-bottom{border-bottom:solid 1px #404040}.wy-menu-vertical li.current{background:#e3e3e3}.wy-menu-vertical li.current a{color:gray;border-right:solid 1px #c9c9c9;padding:0.4045em 2.427em}.wy-menu-vertical li.current a:hover{background:#d6d6d6}.wy-menu-vertical li.on a,.wy-menu-vertical li.current>a{color:#404040;padding:0.4045em 1.618em;font-weight:bold;position:relative;background:#fcfcfc;border:none;border-bottom:solid 1px #c9c9c9;border-top:solid 1px #c9c9c9;padding-left:1.618em -4px}.wy-menu-vertical li.on a:hover,.wy-menu-vertical li.current>a:hover{background:#fcfcfc}.wy-menu-vertical li.toctree-l2.current>a{background:#c9c9c9;padding:0.4045em 2.427em}.wy-menu-vertical li.current ul{display:block}.wy-menu-vertical li ul{margin-bottom:0;display:none}.wy-menu-vertical .local-toc li ul{display:block}.wy-menu-vertical li ul li a{margin-bottom:0;color:#b3b3b3;font-weight:normal}.wy-menu-vertical a{display:inline-block;line-height:18px;padding:0.4045em 1.618em;display:block;position:relative;font-size:90%;color:#b3b3b3}.wy-menu-vertical a:hover{background-color:#4e4a4a;cursor:pointer}.wy-menu-vertical a:active{background-color:#2980B9;cursor:pointer;color:#fff}.wy-side-nav-search{z-index:200;background-color:#2980B9;text-align:center;padding:0.809em;display:block;color:#fcfcfc;margin-bottom:0.809em}.wy-side-nav-search input[type=text]{width:100%;border-radius:50px;padding:6px 12px;border-color:#2472a4}.wy-side-nav-search img{display:block;margin:auto auto 0.809em auto;height:45px;width:45px;background-color:#2980B9;padding:5px;border-radius:100%}.wy-side-nav-search>a,.wy-side-nav-search .wy-dropdown>a{color:#fcfcfc;font-size:100%;font-weight:bold;display:inline-block;padding:4px 6px;margin-bottom:0.809em}.wy-side-nav-search>a:hover,.wy-side-nav-search .wy-dropdown>a:hover{background:rgba(255,255,255,0.1)}.wy-nav .wy-menu-vertical header{color:#2980B9}.wy-nav .wy-menu-vertical a{color:#b3b3b3}.wy-nav .wy-menu-vertical a:hover{background-color:#2980B9;color:#fff}[data-menu-wrap]{-webkit-transition:all 0.2s ease-in;-moz-transition:all 0.2s ease-in;transition:all 0.2s ease-in;position:absolute;opacity:1;width:100%;opacity:0}[data-menu-wrap].move-center{left:0;right:auto;opacity:1}[data-menu-wrap].move-left{right:auto;left:-100%;opacity:0}[data-menu-wrap].move-right{right:-100%;left:auto;opacity:0}.wy-body-for-nav{background:left repeat-y #fcfcfc;background-image:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAyRpVFh0WE1MOmNvbS5hZG9iZS54bXAAAAAAADw/eHBhY2tldCBiZWdpbj0i77u/IiBpZD0iVzVNME1wQ2VoaUh6cmVTek5UY3prYzlkIj8+IDx4OnhtcG1ldGEgeG1sbnM6eD0iYWRvYmU6bnM6bWV0YS8iIHg6eG1wdGs9IkFkb2JlIFhNUCBDb3JlIDUuMy1jMDExIDY2LjE0NTY2MSwgMjAxMi8wMi8wNi0xNDo1NjoyNyAgICAgICAgIj4gPHJkZjpSREYgeG1sbnM6cmRmPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5LzAyLzIyLXJkZi1zeW50YXgtbnMjIj4gPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIgeG1sbnM6eG1wPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvIiB4bWxuczp4bXBNTT0iaHR0cDovL25zLmFkb2JlLmNvbS94YXAvMS4wL21tLyIgeG1sbnM6c3RSZWY9Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC9zVHlwZS9SZXNvdXJjZVJlZiMiIHhtcDpDcmVhdG9yVG9vbD0iQWRvYmUgUGhvdG9zaG9wIENTNiAoTWFjaW50b3NoKSIgeG1wTU06SW5zdGFuY2VJRD0ieG1wLmlpZDoxOERBMTRGRDBFMUUxMUUzODUwMkJCOThDMEVFNURFMCIgeG1wTU06RG9jdW1lbnRJRD0ieG1wLmRpZDoxOERBMTRGRTBFMUUxMUUzODUwMkJCOThDMEVFNURFMCI+IDx4bXBNTTpEZXJpdmVkRnJvbSBzdFJlZjppbnN0YW5jZUlEPSJ4bXAuaWlkOjE4REExNEZCMEUxRTExRTM4NTAyQkI5OEMwRUU1REUwIiBzdFJlZjpkb2N1bWVudElEPSJ4bXAuZGlkOjE4REExNEZDMEUxRTExRTM4NTAyQkI5OEMwRUU1REUwIi8+IDwvcmRmOkRlc2NyaXB0aW9uPiA8L3JkZjpSREY+IDwveDp4bXBtZXRhPiA8P3hwYWNrZXQgZW5kPSJyIj8+EwrlwAAAAA5JREFUeNpiMDU0BAgwAAE2AJgB9BnaAAAAAElFTkSuQmCC);background-size:300px 1px}.wy-grid-for-nav{position:absolute;width:100%;height:100%}.wy-nav-side{position:absolute;top:0;left:0;width:300px;overflow:hidden;min-height:100%;background:#343131;z-index:200}.wy-nav-top{display:none;background:#2980B9;color:#fff;padding:0.4045em 0.809em;position:relative;line-height:50px;text-align:center;font-size:100%;*zoom:1}.wy-nav-top:before,.wy-nav-top:after{display:table;content:""}.wy-nav-top:after{clear:both}.wy-nav-top a{color:#fff;font-weight:bold}.wy-nav-top img{margin-right:12px;height:45px;width:45px;background-color:#2980B9;padding:5px;border-radius:100%}.wy-nav-top i{font-size:30px;float:left;cursor:pointer}.wy-nav-content-wrap{margin-left:300px;background:#fcfcfc;min-height:100%}.wy-nav-content{padding:1.618em 3.236em;height:100%;max-width:800px;margin:auto}.wy-body-mask{position:fixed;width:100%;height:100%;background:rgba(0,0,0,0.2);display:none;z-index:499}.wy-body-mask.on{display:block}footer{color:#999}footer p{margin-bottom:12px}.rst-footer-buttons{*zoom:1}.rst-footer-buttons:before,.rst-footer-buttons:after{display:table;content:""}.rst-footer-buttons:after{clear:both}#search-results .search li{margin-bottom:24px;border-bottom:solid 1px #e1e4e5;padding-bottom:24px}#search-results .search li:first-child{border-top:solid 1px #e1e4e5;padding-top:24px}#search-results .search li a{font-size:120%;margin-bottom:12px;display:inline-block}#search-results .context{color:gray;font-size:90%}@media screen and (max-width: 768px){.wy-body-for-nav{background:#fcfcfc}.wy-nav-top{display:block}.wy-nav-side{left:-300px}.wy-nav-side.shift{width:85%;left:0}.wy-nav-content-wrap{margin-left:0}.wy-nav-content-wrap .wy-nav-content{padding:1.618em}.wy-nav-content-wrap.shift{position:fixed;min-width:100%;left:85%;top:0;height:100%;overflow:hidden}}@media screen and (min-width: 1400px){.wy-nav-content-wrap{background:rgba(0,0,0,0.05)}.wy-nav-content{margin:0;background:#fcfcfc}}@media print{.rst-versions,footer,.wy-nav-side{display:none}.wy-nav-content-wrap{margin-left:0}}nav.stickynav{position:fixed;top:0}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;border-top:solid 10px #343131;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;z-index:400}.rst-versions a{color:#2980B9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27AE60;*zoom:1}.rst-versions .rst-current-version:before,.rst-versions .rst-current-version:after{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-versions .rst-current-version .fa,.rst-versions .rst-current-version .rst-content .admonition-title,.rst-content .rst-versions .rst-current-version .admonition-title,.rst-versions .rst-current-version .rst-content h1 .headerlink,.rst-content h1 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h2 .headerlink,.rst-content h2 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h3 .headerlink,.rst-content h3 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h4 .headerlink,.rst-content h4 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h5 .headerlink,.rst-content h5 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h6 .headerlink,.rst-content h6 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content dl dt .headerlink,.rst-content dl dt .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .icon{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#E74C3C;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#F1C40F;color:#000}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:gray;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:solid 1px #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px}.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge .rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width: 768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}img{width:100%;height:auto}}.rst-content img{max-width:100%;height:auto !important}.rst-content div.figure{margin-bottom:24px}.rst-content div.figure.align-center{text-align:center}.rst-content .section>img,.rst-content .section>a>img{margin-bottom:24px}.rst-content blockquote{margin-left:24px;line-height:24px;margin-bottom:24px}.rst-content .note .last,.rst-content .attention .last,.rst-content .caution .last,.rst-content .danger .last,.rst-content .error .last,.rst-content .hint .last,.rst-content .important .last,.rst-content .tip .last,.rst-content .warning .last,.rst-content .seealso .last,.rst-content .admonition-todo .last{margin-bottom:0}.rst-content .admonition-title:before{margin-right:4px}.rst-content .admonition table{border-color:rgba(0,0,0,0.1)}.rst-content .admonition table td,.rst-content .admonition table th{background:transparent !important;border-color:rgba(0,0,0,0.1) !important}.rst-content .section ol.loweralpha,.rst-content .section ol.loweralpha li{list-style:lower-alpha}.rst-content .section ol.upperalpha,.rst-content .section ol.upperalpha li{list-style:upper-alpha}.rst-content .section ol p,.rst-content .section ul p{margin-bottom:12px}.rst-content .line-block{margin-left:24px}.rst-content .topic-title{font-weight:bold;margin-bottom:12px}.rst-content .toc-backref{color:#404040}.rst-content .align-right{float:right;margin:0px 0px 24px 24px}.rst-content .align-left{float:left;margin:0px 24px 24px 0px}.rst-content .align-center{margin:auto;display:block}.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content dl dt .headerlink{display:none;visibility:hidden;font-size:14px}.rst-content h1 .headerlink:after,.rst-content h2 .headerlink:after,.rst-content h3 .headerlink:after,.rst-content h4 .headerlink:after,.rst-content h5 .headerlink:after,.rst-content h6 .headerlink:after,.rst-content dl dt .headerlink:after{visibility:visible;content:"";font-family:FontAwesome;display:inline-block}.rst-content h1:hover .headerlink,.rst-content h2:hover .headerlink,.rst-content h3:hover .headerlink,.rst-content h4:hover .headerlink,.rst-content h5:hover .headerlink,.rst-content h6:hover .headerlink,.rst-content dl dt:hover .headerlink{display:inline-block}.rst-content .sidebar{float:right;width:40%;display:block;margin:0 0 24px 24px;padding:24px;background:#f3f6f6;border:solid 1px #e1e4e5}.rst-content .sidebar p,.rst-content .sidebar ul,.rst-content .sidebar dl{font-size:90%}.rst-content .sidebar .last{margin-bottom:0}.rst-content .sidebar .sidebar-title{display:block;font-family:"Roboto Slab","ff-tisa-web-pro","Georgia",Arial,sans-serif;font-weight:bold;background:#e1e4e5;padding:6px 12px;margin:-24px;margin-bottom:24px;font-size:100%}.rst-content .highlighted{background:#F1C40F;display:inline-block;font-weight:bold;padding:0 6px}.rst-content .footnote-reference,.rst-content .citation-reference{vertical-align:super;font-size:90%}.rst-content table.docutils.citation,.rst-content table.docutils.footnote{background:none;border:none;color:#999}.rst-content table.docutils.citation td,.rst-content table.docutils.citation tr,.rst-content table.docutils.footnote td,.rst-content table.docutils.footnote tr{border:none;background-color:transparent !important;white-space:normal}.rst-content table.docutils.citation td.label,.rst-content table.docutils.footnote td.label{padding-left:0;padding-right:0;vertical-align:top}.rst-content table.field-list{border:none}.rst-content table.field-list td{border:none;padding-top:5px}.rst-content table.field-list td>strong{display:inline-block;margin-top:3px}.rst-content table.field-list .field-name{padding-right:10px;text-align:left;white-space:nowrap}.rst-content table.field-list .field-body{text-align:left;padding-left:0}.rst-content tt{color:#000}.rst-content tt big,.rst-content tt em{font-size:100% !important;line-height:normal}.rst-content tt .xref,a .rst-content tt{font-weight:bold}.rst-content a tt{color:#2980B9}.rst-content dl{margin-bottom:24px}.rst-content dl dt{font-weight:bold}.rst-content dl p,.rst-content dl table,.rst-content dl ul,.rst-content dl ol{margin-bottom:12px !important}.rst-content dl dd{margin:0 0 12px 24px}.rst-content dl:not(.docutils){margin-bottom:24px}.rst-content dl:not(.docutils) dt{display:inline-block;margin:6px 0;font-size:90%;line-height:normal;background:#e7f2fa;color:#2980B9;border-top:solid 3px #6ab0de;padding:6px;position:relative}.rst-content dl:not(.docutils) dt:before{color:#6ab0de}.rst-content dl:not(.docutils) dt .headerlink{color:#404040;font-size:100% !important}.rst-content dl:not(.docutils) dl dt{margin-bottom:6px;border:none;border-left:solid 3px #ccc;background:#f0f0f0;color:gray}.rst-content dl:not(.docutils) dl dt .headerlink{color:#404040;font-size:100% !important}.rst-content dl:not(.docutils) dt:first-child{margin-top:0}.rst-content dl:not(.docutils) tt{font-weight:bold}.rst-content dl:not(.docutils) tt.descname,.rst-content dl:not(.docutils) tt.descclassname{background-color:transparent;border:none;padding:0;font-size:100% !important}.rst-content dl:not(.docutils) tt.descname{font-weight:bold}.rst-content dl:not(.docutils) .optional{display:inline-block;padding:0 4px;color:#000;font-weight:bold}.rst-content dl:not(.docutils) .property{display:inline-block;padding-right:8px}.rst-content .viewcode-link,.rst-content .viewcode-back{display:inline-block;color:#27AE60;font-size:80%;padding-left:24px}.rst-content .viewcode-back{display:block;float:right}.rst-content p.rubric{margin-bottom:12px;font-weight:bold}@media screen and (max-width: 480px){.rst-content .sidebar{width:100%}}span[id*='MathJax-Span']{color:#404040}.math{text-align:center}
+/*# sourceMappingURL=theme.css.map */
diff --git a/docs/build/html/_static/custom.css b/docs/build/html/_static/custom.css
new file mode 100644
index 0000000..2a924f1
--- /dev/null
+++ b/docs/build/html/_static/custom.css
@@ -0,0 +1 @@
+/* This file intentionally left blank. */
diff --git a/docs/build/html/_static/doctools.js b/docs/build/html/_static/doctools.js
new file mode 100644
index 0000000..c7bfe76
--- /dev/null
+++ b/docs/build/html/_static/doctools.js
@@ -0,0 +1,263 @@
+/*
+ * doctools.js
+ * ~~~~~~~~~~~
+ *
+ * Sphinx JavaScript utilities for all documentation.
+ *
+ * :copyright: Copyright 2007-2015 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+
+/**
+ * select a different prefix for underscore
+ */
+$u = _.noConflict();
+
+/**
+ * make the code below compatible with browsers without
+ * an installed firebug like debugger
+if (!window.console || !console.firebug) {
+ var names = ["log", "debug", "info", "warn", "error", "assert", "dir",
+ "dirxml", "group", "groupEnd", "time", "timeEnd", "count", "trace",
+ "profile", "profileEnd"];
+ window.console = {};
+ for (var i = 0; i < names.length; ++i)
+ window.console[names[i]] = function() {};
+}
+ */
+
+/**
+ * small helper function to urldecode strings
+ */
+jQuery.urldecode = function(x) {
+ return decodeURIComponent(x).replace(/\+/g, ' ');
+};
+
+/**
+ * small helper function to urlencode strings
+ */
+jQuery.urlencode = encodeURIComponent;
+
+/**
+ * This function returns the parsed url parameters of the
+ * current request. Multiple values per key are supported,
+ * it will always return arrays of strings for the value parts.
+ */
+jQuery.getQueryParameters = function(s) {
+ if (typeof s == 'undefined')
+ s = document.location.search;
+ var parts = s.substr(s.indexOf('?') + 1).split('&');
+ var result = {};
+ for (var i = 0; i < parts.length; i++) {
+ var tmp = parts[i].split('=', 2);
+ var key = jQuery.urldecode(tmp[0]);
+ var value = jQuery.urldecode(tmp[1]);
+ if (key in result)
+ result[key].push(value);
+ else
+ result[key] = [value];
+ }
+ return result;
+};
+
+/**
+ * highlight a given string on a jquery object by wrapping it in
+ * span elements with the given class name.
+ */
+jQuery.fn.highlightText = function(text, className) {
+ function highlight(node) {
+ if (node.nodeType == 3) {
+ var val = node.nodeValue;
+ var pos = val.toLowerCase().indexOf(text);
+ if (pos >= 0 && !jQuery(node.parentNode).hasClass(className)) {
+ var span = document.createElement("span");
+ span.className = className;
+ span.appendChild(document.createTextNode(val.substr(pos, text.length)));
+ node.parentNode.insertBefore(span, node.parentNode.insertBefore(
+ document.createTextNode(val.substr(pos + text.length)),
+ node.nextSibling));
+ node.nodeValue = val.substr(0, pos);
+ }
+ }
+ else if (!jQuery(node).is("button, select, textarea")) {
+ jQuery.each(node.childNodes, function() {
+ highlight(this);
+ });
+ }
+ }
+ return this.each(function() {
+ highlight(this);
+ });
+};
+
+/*
+ * backward compatibility for jQuery.browser
+ * This will be supported until firefox bug is fixed.
+ */
+if (!jQuery.browser) {
+ jQuery.uaMatch = function(ua) {
+ ua = ua.toLowerCase();
+
+ var match = /(chrome)[ \/]([\w.]+)/.exec(ua) ||
+ /(webkit)[ \/]([\w.]+)/.exec(ua) ||
+ /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) ||
+ /(msie) ([\w.]+)/.exec(ua) ||
+ ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) ||
+ [];
+
+ return {
+ browser: match[ 1 ] || "",
+ version: match[ 2 ] || "0"
+ };
+ };
+ jQuery.browser = {};
+ jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true;
+}
+
+/**
+ * Small JavaScript module for the documentation.
+ */
+var Documentation = {
+
+ init : function() {
+ this.fixFirefoxAnchorBug();
+ this.highlightSearchWords();
+ this.initIndexTable();
+ },
+
+ /**
+ * i18n support
+ */
+ TRANSLATIONS : {},
+ PLURAL_EXPR : function(n) { return n == 1 ? 0 : 1; },
+ LOCALE : 'unknown',
+
+ // gettext and ngettext don't access this so that the functions
+ // can safely bound to a different name (_ = Documentation.gettext)
+ gettext : function(string) {
+ var translated = Documentation.TRANSLATIONS[string];
+ if (typeof translated == 'undefined')
+ return string;
+ return (typeof translated == 'string') ? translated : translated[0];
+ },
+
+ ngettext : function(singular, plural, n) {
+ var translated = Documentation.TRANSLATIONS[singular];
+ if (typeof translated == 'undefined')
+ return (n == 1) ? singular : plural;
+ return translated[Documentation.PLURALEXPR(n)];
+ },
+
+ addTranslations : function(catalog) {
+ for (var key in catalog.messages)
+ this.TRANSLATIONS[key] = catalog.messages[key];
+ this.PLURAL_EXPR = new Function('n', 'return +(' + catalog.plural_expr + ')');
+ this.LOCALE = catalog.locale;
+ },
+
+ /**
+ * add context elements like header anchor links
+ */
+ addContextElements : function() {
+ $('div[id] > :header:first').each(function() {
+ $('\u00B6').
+ attr('href', '#' + this.id).
+ attr('title', _('Permalink to this headline')).
+ appendTo(this);
+ });
+ $('dt[id]').each(function() {
+ $('\u00B6').
+ attr('href', '#' + this.id).
+ attr('title', _('Permalink to this definition')).
+ appendTo(this);
+ });
+ },
+
+ /**
+ * workaround a firefox stupidity
+ * see: https://bugzilla.mozilla.org/show_bug.cgi?id=645075
+ */
+ fixFirefoxAnchorBug : function() {
+ if (document.location.hash)
+ window.setTimeout(function() {
+ document.location.href += '';
+ }, 10);
+ },
+
+ /**
+ * highlight the search words provided in the url in the text
+ */
+ highlightSearchWords : function() {
+ var params = $.getQueryParameters();
+ var terms = (params.highlight) ? params.highlight[0].split(/\s+/) : [];
+ if (terms.length) {
+ var body = $('div.body');
+ if (!body.length) {
+ body = $('body');
+ }
+ window.setTimeout(function() {
+ $.each(terms, function() {
+ body.highlightText(this.toLowerCase(), 'highlighted');
+ });
+ }, 10);
+ $('