diff --git a/CITATIONS.md b/CITATIONS.md index aa9274ad..f1f168ce 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -82,6 +82,8 @@ > Kovaka S, Zimin AV, Pertea GM, Razaghi R, Salzberg SL, Pertea M. Transcriptome assembly from long-read RNA-seq alignments with StringTie2 Genome Biol. 2019 Dec 16;20(1):278. doi: 10.1186/s13059-019-1910-1. PubMed PMID: 31842956; PubMed Central PMCID: PMC6912988. +- [ToulligQC](https://github.com/GenomiqueENS/toulligQC) + - [UCSC tools](https://www.ncbi.nlm.nih.gov/pubmed/20639541/) > Kent WJ, Zweig AS, Barber G, Hinrichs AS, Karolchik D. BigWig and BigBed: enabling browsing of large distributed datasets. Bioinformatics. 2010 Sep 1;26(17):2204-7. doi: 10.1093/bioinformatics/btq351. Epub 2010 Jul 17. PubMed PMID: 20639541; PubMed Central PMCID: PMC2922891. diff --git a/README.md b/README.md index b8188fbc..69ea62e3 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ On release, automated continuous integration tests run the pipeline on a [full-s 1. Demultiplexing ([`qcat`](https://github.com/nanoporetech/qcat); _optional_) 2. Raw read cleaning ([NanoLyse](https://github.com/wdecoster/nanolyse); _optional_) -3. Raw read QC ([`NanoPlot`](https://github.com/wdecoster/NanoPlot), [`FastQC`](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +3. Raw read QC ([`NanoPlot`](https://github.com/wdecoster/NanoPlot), [`ToulligQC`](https://github.com/GenomiqueENS/toulligQC), [`FastQC`](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 4. Alignment ([`GraphMap2`](https://github.com/lbcb-sci/graphmap2) or [`minimap2`](https://github.com/lh3/minimap2)) - Both aligners are capable of performing unspliced and spliced alignment. Sensible defaults will be applied automatically based on a combination of the input data and user-specified parameters - Each sample can be mapped to its own reference genome if multiplexed in this way diff --git a/conf/modules.config b/conf/modules.config index 85894cf1..3bf5a031 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -94,6 +94,18 @@ if (!params.skip_qc) { } } } + if (!params.skip_toulligqc) { + process { + withName: TOULLIGQC { + publishDir = [ + path: { "${params.outdir}/toulligqc" }, + mode: 'copy', + enabled: true, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } if (!params.skip_fastqc) { process { withName: FASTQC { diff --git a/docs/images/toulligqc_report_barcodes.png b/docs/images/toulligqc_report_barcodes.png new file mode 100644 index 00000000..d83e0bb0 Binary files /dev/null and b/docs/images/toulligqc_report_barcodes.png differ diff --git a/docs/output.md b/docs/output.md index 42a1a00c..fcb5ad04 100644 --- a/docs/output.md +++ b/docs/output.md @@ -54,11 +54,14 @@ If you would like to run NanoLyse on the raw FASTQ files you can provide `--run_ - `nanoplot/fastq//`: directory with various `*.html` files containing QC metrics and plots. - `fastqc/_fastqc.html`: _FastQC_ `*.html` file for each sample. - `fastqc/_fastqc.zip`: _FastQC_ `*.zip` file for each sample. +- `toulligqc/_ToulligQC-report-/report.html`: _ToulligQC_ `*.html` browser-viewable report that contains all the figures in a single location for each sample. +- `toulligqc/_ToulligQC-report-/report.data`: A log file containing information about ToulligQC execution, environment variables and full statistics. +- `toulligqc/_ToulligQC-report-/images/*`: This is folder containing all the individual images produced by ToulligQC. _Documentation_: -[NanoPlot](https://github.com/wdecoster/NanoPlot), [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/) +[NanoPlot](https://github.com/wdecoster/NanoPlot), [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/), [ToulligQC](https://github.com/GenomiqueENS/toulligQC) _Description_: _NanoPlot_ can be used to produce general quality metrics from the per barcode FASTQ files generated by a basecaller e.g. quality score distribution, read lengths, and other general stats. @@ -67,6 +70,10 @@ _NanoPlot_ can be used to produce general quality metrics from the per barcode F _FastQC_ can give general quality metrics about your reads. It can provide information about the quality score distribution across your reads, and the per-base sequence content (%A/C/G/T). You can also generate information about adapter contamination and other over-represented sequences. +_ToulligQC_ is dedicated to the QC analyses of Oxford Nanopore runs. It can be used to produce general quality metrics from the per barcode FASTQ files generated by a basecaller e.g. quality score distribution, read lengths, and other general stats. You can also generate quality metrics per barcode. + +![ToulligQC - Barcoding Report](images/toulligqc_barcoding_report.png) + ## Alignment
diff --git a/modules.json b/modules.json index feec77e1..c9334ba1 100644 --- a/modules.json +++ b/modules.json @@ -7,168 +7,129 @@ "nf-core": { "bcftools/sort": { "branch": "master", - "git_sha": "d6d112a1af2ee8c97fc1932df008183341e7d8fe", - "installed_by": ["modules"] - }, - "bedtools/bamtobed": { - "branch": "master", - "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] - }, - "bedtools/genomecov": { - "branch": "master", - "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "05c280924b6c768d484c7c443dad5e605c4ff4b4", - "installed_by": ["modules"] - }, - "custom/getchromsizes": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] - }, - "cutesv": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] - }, - "deepvariant": { - "branch": "master", - "git_sha": "ed67f2fadd6d2a155b296f728e6b1f8c92ddc1a6", - "installed_by": ["modules"] + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": [ + "modules" + ] }, "fastqc": { "branch": "master", - "git_sha": "a33464f205fa15305bfe268546f6607b6f4d4753", - "installed_by": ["modules"] - }, - "graphmap2/align": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] - }, - "graphmap2/index": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] - }, - "minimap2/align": { - "branch": "master", - "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": ["modules"] - }, - "minimap2/index": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", - "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", - "installed_by": ["modules"] + "git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba", + "installed_by": [ + "modules" + ] }, "nanolyse": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": [ + "modules" + ] }, "nanoplot": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] - }, - "qcat": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "git_sha": "3822e04e49b6d89b7092feb3480d744cb5d9986b", + "installed_by": [ + "modules" + ] }, "samtools/faidx": { "branch": "master", - "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", - "installed_by": ["modules"] + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": [ + "modules" + ] }, "samtools/flagstat": { "branch": "master", - "git_sha": "570ec5bcfe19c49e16c9ca35a7a116563af6cc1c", - "installed_by": ["modules"] + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": [ + "modules" + ] }, "samtools/idxstats": { "branch": "master", - "git_sha": "e662ab16e0c11f1e62983e21de9871f59371a639", - "installed_by": ["modules"] + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": [ + "modules" + ] }, "samtools/index": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": [ + "modules" + ] }, "samtools/sort": { "branch": "master", - "git_sha": "a0f7be95788366c1923171e358da7d049eb440f9", - "installed_by": ["modules"] + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": [ + "modules" + ] }, "samtools/stats": { "branch": "master", - "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", - "installed_by": ["modules"] - }, - "samtools/view": { - "branch": "master", - "git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f", - "installed_by": ["modules"] - }, - "sniffles": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": [ + "modules" + ] }, "stringtie/merge": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] - }, - "stringtie/stringtie": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] - }, - "subread/featurecounts": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": [ + "modules" + ] }, "tabix/bgzip": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "git_sha": "90294980a903ecebd99ac31d8b6c66af48fa8259", + "installed_by": [ + "modules" + ] }, "tabix/bgziptabix": { "branch": "master", - "git_sha": "591b71642820933dcb3c954c934b397bd00d8e5e", - "installed_by": ["modules"] + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": [ + "modules" + ] }, "tabix/tabix": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] - }, - "ucsc/bedgraphtobigwig": { - "branch": "master", - "git_sha": "66290981ab6038ea86177ade40b9449bc790b0ce", - "installed_by": ["modules"] + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": [ + "modules" + ] }, - "ucsc/bedtobigbed": { + "toulligqc": { "branch": "master", - "git_sha": "66290981ab6038ea86177ade40b9449bc790b0ce", - "installed_by": ["modules"] + "git_sha": "061a322293b3487e53f044304710e54cbf657717", + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", - "git_sha": "d0b4fc03af52a1cc8c6fb4493b921b57352b1dd8", - "installed_by": ["modules"] + "git_sha": "cc1f997fab6d8fde5dc0e6e2a310814df5b53ce7", + "installed_by": [ + "modules" + ] } } }, diff --git a/modules/nf-core/toulligqc/environment.yml b/modules/nf-core/toulligqc/environment.yml new file mode 100644 index 00000000..cbc1b570 --- /dev/null +++ b/modules/nf-core/toulligqc/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "toulligqc" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::toulligqc=2.5.4" diff --git a/modules/nf-core/toulligqc/main.nf b/modules/nf-core/toulligqc/main.nf new file mode 100644 index 00000000..6b2328d9 --- /dev/null +++ b/modules/nf-core/toulligqc/main.nf @@ -0,0 +1,62 @@ +process TOULLIGQC { + label 'process_low' + tag "$meta.id" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/toulligqc:2.5.6--pyhdfd78af_0': + 'quay.io/biocontainers/toulligqc:2.5.6--pyhdfd78af_0' }" + + input: + tuple val(meta), path(ontfile) + + output: + tuple val(meta), path("*/*.data") , emit: report_data + path "*/*.html" , emit: report_html, optional: true + path "*/images/*.html" , emit: plots_html + path "*/images/plotly.min.js" , emit: plotly_js + + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def input_file = ("$ontfile".endsWith(".fastq") || "$ontfile".endsWith(".fastq.gz") || "$ontfile".endsWith(".fq") || "$ontfile".endsWith(".fq.gz")) ? "--fastq ${ontfile}" : + ("$ontfile".endsWith(".txt")) ? "--sequencing-summary-source ${ontfile}" : + ("$ontfile".endsWith(".bam")) ? "--bam ${ontfile}" : '' + + """ + toulligqc \\ + ${input_file} \\ + --output-directory ${prefix} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + toulligqc: \$(toulligqc --version) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${prefix} + mkdir ${prefix}/images + touch ${prefix}/report.data + touch ${prefix}/images/Correlation_between_read_length_and_PHRED_score.html + touch ${prefix}/images/Distribution_of_read_lengths.html + touch ${prefix}/images/PHRED_score_density_distribution.html + touch ${prefix}/images/Read_count_histogram.html + touch ${prefix}/images/plotly.min.js + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + toulligqc: \$(toulligqc --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/toulligqc/meta.yml b/modules/nf-core/toulligqc/meta.yml new file mode 100644 index 00000000..682a8ef3 --- /dev/null +++ b/modules/nf-core/toulligqc/meta.yml @@ -0,0 +1,66 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "toulligqc" +description: "A post sequencing QC tool for Oxford Nanopore sequencers" +keywords: + - nanopore sequencing + - quality control + - genomics +tools: + - "toulligqc": + description: "A post sequencing QC tool for Oxford Nanopore sequencers" + homepage: https://github.com/GenomiqueENS/toulligQC + documentation: https://github.com/GenomiqueENS/toulligQC + tool_dev_url: https://github.com/GenomiqueENS/toulligQC + licence: ["CECILL-2.1"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - seq_summary: + type: file + description: Basecaller sequencing summary source + pattern: "*.txt" + - fastq: + type: file + description: FASTQ file (necessary if no sequencing summary file) + pattern: "*.{fq,fastq,fq.gz,fastq.gz}" + - bam: + type: file + description: BAM file (necessary if no sequencing summary file) + pattern: "*.bam" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - report_data: + type: file + description: Report data emitted from toulligqc + pattern: "*.data" + - report_html: + type: file + description: Report data in html format + pattern: "*.html" + - plots_html: + type: file + description: Plots emitted in html format + pattern: "*.html" + - plotly_js: + type: file + description: Plots emitted from toulligqc + pattern: "plotly.min.js" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Salome-Brunon" +maintainers: + - "@Salome-Brunon" diff --git a/nextflow.config b/nextflow.config index 8b5f1c0d..2a02da4a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -71,6 +71,7 @@ params { // Options: QC skip_qc = false skip_nanoplot = false + skip_toulligqc = false skip_fastqc = false skip_multiqc = false diff --git a/nextflow_schema.json b/nextflow_schema.json index aa9e1bb2..f0174111 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -292,6 +292,11 @@ "fa_icon": "fas fa-fast-forward", "description": "Skip NanoPlot." }, + "skip_toulligqc": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip ToulligQC." + }, "skip_fastqc": { "type": "boolean", "fa_icon": "fas fa-fast-forward", diff --git a/subworkflows/local/qcfastq_nanoplot_fastqc.nf b/subworkflows/local/qcfastq_nanoplot_fastqc.nf index b2c0f54d..ee20d107 100644 --- a/subworkflows/local/qcfastq_nanoplot_fastqc.nf +++ b/subworkflows/local/qcfastq_nanoplot_fastqc.nf @@ -3,12 +3,14 @@ */ include { NANOPLOT } from '../../modules/nf-core/nanoplot/main' +include { TOULLIGQC } from '../../modules/nf-core/toulligqc/main' include { FASTQC } from '../../modules/nf-core/fastqc/main' workflow QCFASTQ_NANOPLOT_FASTQC { take: ch_fastq skip_nanoplot + skip_toulligqc skip_fastqc main: @@ -32,7 +34,22 @@ workflow QCFASTQ_NANOPLOT_FASTQC { nanoplot_log = NANOPLOT.out.log nanoplot_version = NANOPLOT.out.versions } - + /* + * FastQ QC using ToulligQC + */ + toulligqc_report_data = Channel.empty() + toulligqc_report_html = Channel.empty() + toulligqc_plots_html = Channel.empty() + toulligqc_plotly_js = Channel.empty() + toulligqc_version = Channel.empty() + if (!skip_toulligqc){ + TOULLIGQC ( ch_fastq ) + toulligqc_report_data = TOULLIGQC.out.report_data + toulligqc_report_html = TOULLIGQC.out.report_html + toulligqc_plots_html = TOULLIGQC.out.plots_html + toulligqc_plotly_js = TOULLIGQC.out.plotly_js + toulligqc_version = TOULLIGQC.out.versions + } /* * FastQ QC using FASTQC */ @@ -61,6 +78,12 @@ workflow QCFASTQ_NANOPLOT_FASTQC { nanoplot_log nanoplot_version + toulligqc_report_data + toulligqc_report_html + toulligqc_plots_html + toulligqc_plotly_js + toulligqc_version + fastqc_zip fastqc_html fastqc_version diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index 6303f6f7..8ab1cc6b 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -264,7 +264,7 @@ workflow NANOSEQ{ /* * SUBWORKFLOW: Fastq QC with Nanoplot and fastqc */ - QCFASTQ_NANOPLOT_FASTQC ( ch_fastq_to_align, params.skip_nanoplot, params.skip_fastqc) + QCFASTQ_NANOPLOT_FASTQC ( ch_fastq, params.skip_nanoplot, params.skip_toulligqc, params.skip_fastqc) ch_software_versions = ch_software_versions.mix(QCFASTQ_NANOPLOT_FASTQC.out.fastqc_version.first().ifEmpty(null)) ch_fastqc_multiqc = QCFASTQ_NANOPLOT_FASTQC.out.fastqc_multiqc.ifEmpty([]) }