Merge pull request #61 from nf-core/tests

Allow tarballed inputs instead of only directories
nf-core · Dec 20, 2023 · 04e4568 · 04e4568
2 parents 9c7ce2b + 4a06e97
commit 04e4568
Show file tree

Hide file tree

Showing 26 changed files with 288 additions and 162 deletions.
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
@@ -27,6 +27,9 @@ If you're not used to this workflow with git, you can start with some [docs from
 
 ## Tests
 
+You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to
+receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir <OUTDIR>`.
+
 When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests.
 Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then.
 

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -19,6 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/spat
 - [ ] If necessary, also make a PR on the nf-core/spatialtranscriptomics _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository.
 - [ ] Make sure your code lints (`nf-core lint`).
 - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir <OUTDIR>`).
+- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir <OUTDIR>`).
 - [ ] Usage Documentation in `docs/usage.md` is updated.
 - [ ] Output Documentation in `docs/output.md` is updated.
 - [ ] `CHANGELOG.md` is updated.

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -49,15 +49,7 @@ jobs:
           - tests/pipeline/test_downstream.nf.test
     steps:
       - name: Check out pipeline code
-        uses: actions/checkout@v3
-
-      - name: Checkout test data
-        uses: actions/checkout@v3
-        with:
-          repository: nf-core/test-datasets
-          ref: spatialtranscriptomics
-          fetch-depth: 1
-          path: test-datasets
+        uses: actions/checkout@v4
 
       # Install Nextflow
       - name: Install Nextflow

diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml
@@ -13,7 +13,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       # Use the @nf-core-bot token to check out so we can push later
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           token: ${{ secrets.nf_core_bot_auth_token }}
 
@@ -24,7 +24,7 @@ jobs:
         env:
           GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }}
 
-      - uses: actions/setup-node@v3
+      - uses: actions/setup-node@v4
 
       - name: Install Prettier
         run: npm install -g prettier @prettier/plugin-php

diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -14,9 +14,9 @@ jobs:
   EditorConfig:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
-      - uses: actions/setup-node@v3
+      - uses: actions/setup-node@v4
 
       - name: Install editorconfig-checker
         run: npm install -g editorconfig-checker
@@ -27,9 +27,9 @@ jobs:
   Prettier:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
-      - uses: actions/setup-node@v3
+      - uses: actions/setup-node@v4
 
       - name: Install Prettier
         run: npm install -g prettier
@@ -40,7 +40,7 @@ jobs:
   PythonBlack:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - name: Check code lints with Black
         uses: psf/black@stable
@@ -71,7 +71,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out pipeline code
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Install Nextflow
         uses: nf-core/setup-nextflow@v1

diff --git a/.gitignore b/.gitignore
@@ -10,4 +10,5 @@ log
 reports
 .nf-test/
 nf-test
+.nf-test*
 test-datasets
diff --git a/.gitpod.yml b/.gitpod.yml
@@ -4,7 +4,9 @@ tasks:
     command: |
       pre-commit install --install-hooks
       nextflow self-update
-
+  - name: unset JAVA_TOOL_OPTIONS
+    command: |
+      unset JAVA_TOOL_OPTIONS
 vscode:
   extensions: # based on nf-core.nf-core-extensionpack
     - codezombiech.gitignore # Language support for .gitignore files

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,24 +5,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
-Initial release of nf-core/spatialtranscriptomics, created with the [nf-core](https://nf-co.re/) template.
-This marks the point at which the pipeline development was moved to nf-core and
-NBIS. The pipeline has undergone several iterations regarding its functionality
-and content; there are a significant number of changes, of which not all are
-listed here. In summary, the pipeline contains best-practice processing and
-analyses of pre- and post-Space Ranger-processed data, including quality
-controls, normalisation, dimensionality reduction, clustering, differential
-expression testing as well as output files compatible with further downstream
-analyses and/or exploration in _e.g._ [TissUUmaps](https://tissuumaps.github.io/)
-or bespoke user code.
+Initial release of nf-core/spatialtranscriptomics, created with the
+[nf-core](https://nf-co.re/) template. This marks the point at which the
+pipeline development was moved to nf-core and NBIS. The pipeline has undergone
+several iterations regarding its functionality and content; there are a
+significant number of changes, of which not all are listed here. In summary, the
+pipeline contains best-practice processing and analyses of pre- and post-Space
+Ranger-processed data, including quality controls, normalisation, dimensionality
+reduction, clustering, differential expression testing as well as output files
+compatible with further downstream analyses and/or exploration in _e.g._
+[TissUUmaps](https://tissuumaps.github.io/) or bespoke user code.
 
 ### `Added`
 
+- Allow input directories `fastq_dir` and `spaceranger_dir` to be specified as tar archives (`.tar.gz`)
+- Add a check to make sure that there are spots left after filtering [[#46](https://github.com/nf-core/spatialtranscriptomics/issues/46)]
 - Implement tests with nf-test [[#42](https://github.com/nf-core/spatialtranscriptomics/pull/42)]
 - Replace custom code to download reference with `untar` module [[#44](https://github.com/nf-core/spatialtranscriptomics/pull/44)]
-- Replace custom code to download reference with `untar` module [[#44](https://github.com/nf-core/spatialtranscriptomics/pull/44)]
 - Embed resources in quarto reports [[#43](https://github.com/nf-core/spatialtranscriptomics/pull/43)]
-- Implement tests with nf-test [[#42](https://github.com/nf-core/spatialtranscriptomics/pull/42)]
 - Use a samplesheet for input specification [[#30](https://github.com/nf-core/spatialtranscriptomics/pull/30), [#31](https://github.com/nf-core/spatialtranscriptomics/pull/31) and [#45](https://github.com/nf-core/spatialtranscriptomics/pull/45)]
 - Add Space Ranger pre-processing as an optional pipeline step using the `spaceranger` nf-core module [[#17](https://github.com/nf-core/spatialtranscriptomics/pull/17) and [#45](https://github.com/nf-core/spatialtranscriptomics/pull/45)]
 - Add `env/` directory with pipeline-specific container and Conda environment specifications [[#17](https://github.com/nf-core/spatialtranscriptomics/pull/17) and [#28](https://github.com/nf-core/spatialtranscriptomics/pull/28)]
@@ -50,7 +50,7 @@ versions of the same tool.
 | ----------- | ------- |
 | `SpatialDE` | 1.1.3   |
 | `leidenalg` | 0.9.1   |
-| `python`    | 3.11.0  |
+| `python`    | 3.12.0  |
 | `quarto`    | 1.3.302 |
 | `scanpy`    | 1.9.3   |
 

diff --git a/README.md b/README.md
@@ -44,11 +44,8 @@ the full-sized test can be viewed on the [nf-core website](https://nf-co.re/spat
 
 ## Usage
 
-:::note
-If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how
-to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline)
-with `-profile test` before running the workflow on actual data.
-:::
+> [!NOTE]
+> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.
 
 <!-- TODO nf-core: Describe the minimum required steps to execute the pipeline, e.g. how to prepare samplesheets.
      Explain what rows and columns represent. For instance (please edit as appropriate):
@@ -62,11 +59,9 @@ nextflow run nf-core/spatialtranscriptomics \
    --outdir <OUTDIR>
 ```
 
-:::warning
-Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those
-provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;
-see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).
-:::
+> [!WARNING]
+> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;
+> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).
 
 For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/spatialtranscriptomics/usage) and the [parameter documentation](https://nf-co.re/spatialtranscriptomics/parameters).
 
@@ -104,7 +99,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `#
 ## Citations
 
 <!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. -->
-<!-- If you use  nf-core/spatialtranscriptomics for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) -->
+<!-- If you use nf-core/spatialtranscriptomics for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) -->
 
 <!-- TODO nf-core: Add bibliography of tools and data used in your pipeline -->
 

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
@@ -1,5 +1,5 @@
 report_comment: >
-  This report has been generated by the <a href="https://github.com/nf-core/spatialtranscriptomics/releases/tag/dev" target="_blank">nf-core/spatialtranscriptomics</a>
+  This report has been generated by the <a href="https://github.com/nf-core/spatialtranscriptomics/tree/dev" target="_blank">nf-core/spatialtranscriptomics</a>
   analysis pipeline. For information about how to interpret these results, please see the
   <a href="https://nf-co.re/spatialtranscriptomics/dev/docs/output" target="_blank">documentation</a>.
 report_section_order:

diff --git a/assets/slackreport.json b/assets/slackreport.json
@@ -3,7 +3,7 @@
         {
             "fallback": "Plain-text summary of the attachment.",
             "color": "<% if (success) { %>good<% } else { %>danger<%} %>",
-            "author_name": "nf-core/spatialtranscriptomics v${version} - ${runName}",
+            "author_name": "nf-core/spatialtranscriptomics ${version} - ${runName}",
             "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico",
             "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>",
             "fields": [

diff --git a/conf/test.config b/conf/test.config
@@ -12,17 +12,17 @@
 
 params {
     config_profile_name        = 'Test profile'
-    config_profile_description = 'Test pipeline incl. spaceranger with cytassist ffpe sample'
+    config_profile_description = 'Test pipeline for post-Space Ranger functionality'
 
     // Limit resources so that this can run on GitHub Actions
     max_cpus   = 2
     max_memory = '3.GB'
     max_time   = '2.h'
 
     // Input and output
-    input  = './test-datasets/testdata/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/samplesheet_spaceranger.csv'
-    spaceranger_probeset = "./test-datasets/testdata/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/CytAssist_11mm_FFPE_Human_Glioblastoma_probe_set.csv"
-    spaceranger_reference = "./test-datasets/testdata/homo_sapiens_chr22_reference.tar.gz"
+    input  = 'https://raw.githubusercontent.com/nf-core/test-datasets/spatialtranscriptomics/testdata/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/samplesheet_downstream.csv'
+    spaceranger_probeset = "https://raw.githubusercontent.com/nf-core/test-datasets/spatialtranscriptomics/testdata/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/outs/probe_set.csv"
+    spaceranger_reference = "https://raw.githubusercontent.com/nf-core/test-datasets/spatialtranscriptomics/testdata/homo_sapiens_chr22_reference.tar.gz"
     st_preprocess_min_counts = 5
     st_preprocess_min_genes = 3
     outdir = 'results'

diff --git a/docs/usage.md b/docs/usage.md
@@ -29,6 +29,15 @@ SAMPLE_1,fastqs_1/,hires_1.png,V11J26,B1
 SAMPLE_2,fastqs_2/,hires_2.png,V11J26,B1
 ```
 
+You may also supply a compressed tarball containing the FASTQ files in lieu of a
+directory path:
+
+```no-highlight
+sample,fastq_dir,image,slide,area
+SAMPLE_1,fastqs_1.tar.gz,hires_1.png,V11J26,B1
+SAMPLE_2,fastqs_2.tar.gz,hires_2.png,V11J26,B1
+```
+
 For Cytassist samples, the `image` column gets replaced with the `cytaimage` column:
 
 ```no-highlight
@@ -45,7 +54,7 @@ Please refer to the following table for an overview of all supported columns:
 | Column             | Description                                                                                                         |
 | ------------------ | ------------------------------------------------------------------------------------------------------------------- |
 | `sample`           | Unique sample identifier. MUST match the prefix of the fastq files                                                  |
-| `fastq_dir`        | Path to directory where the sample FASTQ files are stored.                                                          |
+| `fastq_dir`        | Path to directory where the sample FASTQ files are stored. May be a `.tar.gz` file instead of a directory.          |
 | `image`            | Brightfield microscopy image                                                                                        |
 | `cytaimage`        | Brightfield tissue image captured with Cytassist device                                                             |
 | `colorizedimage`   | A color composite of one or more fluorescence image channels saved as a single-page, single-file color TIFF or JPEG |
@@ -80,10 +89,21 @@ SAMPLE_1,results/SAMPLE_1/outs
 SAMPLE_2,results/SAMPLE_2/outs
 ```
 
-| Column            | Description                                                                                                                                   |
-| ----------------- | --------------------------------------------------------------------------------------------------------------------------------------------- |
-| `sample`          | Unique sample identifier.                                                                                                                     |
-| `spaceranger_dir` | Output directory generated by spaceranger. This is typically called `outs` and contains both gene expression matrices and spatial information |
+You may alternatively supply a compressed tarball containing the Space Ranger output:
+
+```no-highlight
+sample,spaceranger_dir
+SAMPLE_1,outs.tar.gz
+SAMPLE_2,outs.tar.gz
+```
+
+| Column            | Description                                                                               |
+| ----------------- | ----------------------------------------------------------------------------------------- |
+| `sample`          | Unique sample identifier.                                                                 |
+| `spaceranger_dir` | Output directory generated by spaceranger. May be a `.tar.gz` file instead of a directory |
+
+The Space Ranger output directory is typically called `outs` and contains both
+gene expression matrices as well as spatial information.
 
 ## Space Ranger
 
@@ -134,7 +154,7 @@ The typical command for running the pipeline is as follows:
 
 ```bash
 # Run the pipeline with raw data yet to be processed by Space Ranger
-nextflow run nf-core/spatialtranscriptomics --input samplesheet.csv --outdir <OUTDIR> -profile docker --run_spaceranger
+nextflow run nf-core/spatialtranscriptomics --input samplesheet.csv --outdir <OUTDIR> -profile docker
 
 # Run pipeline with data already processed by Space Ranger
 nextflow run nf-core/spatialtranscriptomics --input samplesheet.csv --outdir <OUTDIR> -profile docker

diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy
@@ -4,6 +4,7 @@
 
 import org.yaml.snakeyaml.Yaml
 import groovy.json.JsonOutput
+import nextflow.extension.FilesEx
 
 class NfcoreTemplate {
 
@@ -141,12 +142,14 @@ class NfcoreTemplate {
             try {
                 if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') }
                 // Try to send HTML e-mail using sendmail
+                def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html")
+                sendmail_tf.withWriter { w -> w << sendmail_html }
                 [ 'sendmail', '-t' ].execute() << sendmail_html
                 log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-"
             } catch (all) {
                 // Catch failures and try with plaintext
                 def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ]
-                if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) {
+                if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) {
                     mail_cmd += [ '-A', mqc_report ]
                 }
                 mail_cmd.execute() << email_html
@@ -155,14 +158,16 @@ class NfcoreTemplate {
         }
 
         // Write summary e-mail HTML to a file
-        def output_d = new File("${params.outdir}/pipeline_info/")
-        if (!output_d.exists()) {
-            output_d.mkdirs()
-        }
-        def output_hf = new File(output_d, "pipeline_report.html")
+        def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html")
         output_hf.withWriter { w -> w << email_html }
-        def output_tf = new File(output_d, "pipeline_report.txt")
+        FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html");
+        output_hf.delete()
+
+        // Write summary e-mail TXT to a file
+        def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt")
         output_tf.withWriter { w -> w << email_txt }
+        FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt");
+        output_tf.delete()
     }
 
     //
@@ -227,15 +232,14 @@ class NfcoreTemplate {
     // Dump pipeline parameters in a json file
     //
     public static void dump_parameters(workflow, params) {
-        def output_d = new File("${params.outdir}/pipeline_info/")
-        if (!output_d.exists()) {
-            output_d.mkdirs()
-        }
-
         def timestamp  = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
-        def output_pf  = new File(output_d, "params_${timestamp}.json")
+        def filename   = "params_${timestamp}.json"
+        def temp_pf    = new File(workflow.launchDir.toString(), ".${filename}")
         def jsonStr    = JsonOutput.toJson(params)
-        output_pf.text = JsonOutput.prettyPrint(jsonStr)
+        temp_pf.text   = JsonOutput.prettyPrint(jsonStr)
+
+        FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json")
+        temp_pf.delete()
     }
 
     //

diff --git a/modules.json b/modules.json
@@ -12,12 +12,12 @@
                     },
                     "fastqc": {
                         "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
                         "installed_by": ["modules"]
                     },
                     "multiqc": {
                         "branch": "master",
-                        "git_sha": "1537442a7be4a78efa3d1ff700a923c627bbda5d",
+                        "git_sha": "4ab13872435962dadc239979554d13709e20bf29",
                         "installed_by": ["modules"]
                     },
                     "spaceranger/count": {