diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..7a3ba6aa --- /dev/null +++ b/.editorconfig @@ -0,0 +1,24 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +indent_size = 4 +indent_style = space + +[*.{yml,yaml}] +indent_size = 2 + +# These files are edited upstream in nf-core/modules +[/modules/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset +indent_size = unset + +[/assets/email*] +indent_size = unset diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 80142766..02f3581a 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -16,8 +16,8 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/vira - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/viralrecon/tree/master/.github/CONTRIBUTING.md) - - [ ] If necessary, also make a PR on the nf-core/viralrecon _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. + - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/viralrecon/tree/master/.github/CONTRIBUTING.md) + - [ ] If necessary, also make a PR on the nf-core/viralrecon _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint .`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker`). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6e9c0d18..6065fd0f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,138 +1,154 @@ -name: nf-core CI -# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors -on: - push: - branches: - - dev - pull_request: - release: - types: [published] - -jobs: - test: - name: Run workflow tests - # Only run on push if this is the nf-core dev branch (merged PRs) - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} - runs-on: ubuntu-latest - env: - NXF_VER: ${{ matrix.nxf_ver }} - NXF_ANSI_LOG: false - strategy: - matrix: - # Nextflow versions: check pipeline minimum and current latest - nxf_ver: [21.04.0] - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - - name: Run pipeline with test data - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker - - parameters: - name: Test workflow parameters - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} - runs-on: ubuntu-latest - env: - NXF_VER: '21.04.0' - NXF_ANSI_LOG: false - strategy: - matrix: - parameters: [--skip_fastp, --skip_variants, --skip_cutadapt, --skip_kraken2, --skip_assembly, '--spades_mode corona', '--spades_mode metaviral'] - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - - name: Run pipeline with various parameters - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.parameters }} - - test_sra: - name: Test SRA workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} - runs-on: ubuntu-latest - env: - NXF_VER: '21.04.0' - NXF_ANSI_LOG: false - strategy: - matrix: - parameters: [--skip_sra_fastq_download, ''] - - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - - name: Run pipeline to download SRA ids and various options - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_sra,docker ${{ matrix.parameters }} - - test_sispa: - name: Test SISPA workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} - runs-on: ubuntu-latest - env: - NXF_VER: '21.04.0' - NXF_ANSI_LOG: false - strategy: - matrix: - parameters: [--gff false, "--genome 'NC_045512.2'"] - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - - name: Run pipeline with minimal SISPA data and various options - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_sispa,docker ${{ matrix.parameters }} - - test_nanopore: - name: Test Nanopore workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} - runs-on: ubuntu-latest - env: - NXF_VER: '21.04.0' - NXF_ANSI_LOG: false - strategy: - matrix: - parameters: [--gff false, --input false, --min_barcode_reads 10000, --min_guppyplex_reads 10000, "--artic_minion_caller medaka"] - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - - name: Run pipeline with minimal Nanopore data and various options - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_nanopore,docker ${{ matrix.parameters }} +name: nf-core CI +# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + +jobs: + test: + name: Run workflow tests + # Only run on push if this is the nf-core dev branch (merged PRs) + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} + runs-on: ubuntu-latest + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false + strategy: + matrix: + # Nextflow versions: check pipeline minimum and current latest + nxf_ver: [21.04.0, ""] + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + env: + CAPSULE_LOG: none + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Run pipeline with test data + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker + + parameters: + name: Test workflow parameters + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} + runs-on: ubuntu-latest + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false + strategy: + matrix: + parameters: + [ + --skip_fastp, + --skip_variants, + --skip_cutadapt, + --skip_kraken2, + --skip_assembly, + "--spades_mode corona", + "--spades_mode metaviral", + ] + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + env: + CAPSULE_LOG: none + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Run pipeline with various parameters + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.parameters }} + + test_sra: + name: Test SRA workflow + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} + runs-on: ubuntu-latest + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false + strategy: + matrix: + parameters: [--skip_sra_fastq_download, ""] + + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + env: + CAPSULE_LOG: none + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Run pipeline to download SRA ids and various options + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_sra,docker ${{ matrix.parameters }} + + test_sispa: + name: Test SISPA workflow + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} + runs-on: ubuntu-latest + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false + strategy: + matrix: + parameters: [--gff false, "--genome 'NC_045512.2'"] + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + env: + CAPSULE_LOG: none + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Run pipeline with minimal SISPA data and various options + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_sispa,docker ${{ matrix.parameters }} + + test_nanopore: + name: Test Nanopore workflow + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} + runs-on: ubuntu-latest + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false + strategy: + matrix: + parameters: + [ + --gff false, + --input false, + --min_barcode_reads 10000, + --min_guppyplex_reads 10000, + "--artic_minion_caller medaka", + ] + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + env: + CAPSULE_LOG: none + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Run pipeline with minimal Nanopore data and various options + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_nanopore,docker ${{ matrix.parameters }} diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 0ff7a870..46f42cb8 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,11 +14,11 @@ jobs: - uses: actions/checkout@v2 - uses: actions/setup-node@v1 with: - node-version: '10' + node-version: "10" - name: Install markdownlint run: npm install -g markdownlint-cli - name: Run Markdownlint - run: markdownlint ${GITHUB_WORKSPACE} -c ${GITHUB_WORKSPACE}/.github/markdownlint.yml + run: markdownlint . # If the above check failed, post a comment on the PR explaining the failure - name: Post PR comment @@ -36,7 +36,7 @@ jobs: * Everything else: [Install `npm`](https://www.npmjs.com/get-npm) then [install `markdownlint-cli`](https://www.npmjs.com/package/markdownlint-cli) (`npm install -g markdownlint-cli`) * Fix the markdown errors * Automatically: `markdownlint . --config .github/markdownlint.yml --fix` - * Manually resolve anything left from `markdownlint . --config .github/markdownlint.yml` + * Manually resolve anything left from `markdownlint .` Once you push these changes the test should pass, and you can hide this comment :+1: @@ -46,6 +46,20 @@ jobs: repo-token: ${{ secrets.GITHUB_TOKEN }} allow-repeats: false + EditorConfig: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - uses: actions/setup-node@v1 + with: + node-version: "10" + + - name: Install editorconfig-checker + run: npm install -g editorconfig-checker + + - name: Run ECLint check + run: editorconfig-checker -exclude README.md $(git ls-files | grep -v test) YAML: runs-on: ubuntu-latest @@ -53,7 +67,7 @@ jobs: - uses: actions/checkout@v1 - uses: actions/setup-node@v1 with: - node-version: '10' + node-version: "10" - name: Install yaml-lint run: npm install -g yaml-lint - name: Run yaml-lint @@ -84,13 +98,9 @@ jobs: repo-token: ${{ secrets.GITHUB_TOKEN }} allow-repeats: false - nf-core: runs-on: ubuntu-latest - env: - NXF_VER: 21.03.0-edge steps: - - name: Check out pipeline code uses: actions/checkout@v2 @@ -103,8 +113,8 @@ jobs: - uses: actions/setup-python@v1 with: - python-version: '3.6' - architecture: 'x64' + python-version: "3.6" + architecture: "x64" - name: Install dependencies run: | @@ -131,4 +141,3 @@ jobs: lint_log.txt lint_results.md PR_number.txt - diff --git a/.github/markdownlint.yml b/.markdownlint.yml similarity index 91% rename from .github/markdownlint.yml rename to .markdownlint.yml index c8a3bcac..e7fc97a7 100644 --- a/.github/markdownlint.yml +++ b/.markdownlint.yml @@ -1,6 +1,8 @@ # Markdownlint configuration file default: true line-length: false +ul-indent: + indent: 4 no-duplicate-header: siblings_only: true no-inline-html: diff --git a/.nf-core-lint.yaml b/.nf-core-lint.yaml index 56dc4f8d..5752d519 100644 --- a/.nf-core-lint.yaml +++ b/.nf-core-lint.yaml @@ -4,7 +4,10 @@ files_unchanged: - .github/workflows/linting.yml - assets/email_template.html - assets/email_template.txt + - assets/sendmail_template.txt - lib/NfcoreSchema.groovy + - docs/README.md files_exist: - bin/markdown_to_html.py + - .github/markdownlint.yml actions_awsfulltest: False diff --git a/CHANGELOG.md b/CHANGELOG.md index c0da5002..a51ae64d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,22 +14,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Variant graph processes to call variants relative to the reference genome directly from _de novo_ assemblies have been deprecated and removed * Variant calling with Varscan 2 has been deprecated and removed due to [licensing restrictions](https://github.com/dkoboldt/varscan/issues/12) * New tools: - * [Pangolin](https://github.com/cov-lineages/pangolin) for lineage analysis - * [Nextclade](https://github.com/nextstrain/nextclade) for clade assignment, mutation calling and consensus sequence quality checks - * [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) for individual variant screenshots with annotation tracks + * [Pangolin](https://github.com/cov-lineages/pangolin) for lineage analysis + * [Nextclade](https://github.com/nextstrain/nextclade) for clade assignment, mutation calling and consensus sequence quality checks + * [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) for individual variant screenshots with annotation tracks ### Other enhancements & fixes * Illumina and Nanopore runs containing the same 48 samples sequenced on both platforms have been uploaded to the nf-core AWS account for full-sized tests on release * Default human `--kraken2_db` link has been changed from Zenodo to an AWS S3 bucket for more reliable downloads -* Updated pipeline template to nf-core/tools `1.13.3` +* Updated pipeline template to nf-core/tools `1.14` * Optimise MultiQC configuration and input files for faster run-time on huge sample numbers -* [#122](https://github.com/nf-core/viralrecon/issues/122) - Single SPAdes command to rule them all -* [#138](https://github.com/nf-core/viralrecon/issues/138) - Problem masking the consensus sequence -* [#142](https://github.com/nf-core/viralrecon/issues/142) - Unknown method invocation `toBytes` on String type -* [#169](https://github.com/nf-core/viralrecon/issues/169) - ggplot2 error when generating mosdepth amplicon plot with Swift v2 primers -* [#170](https://github.com/nf-core/viralrecon/issues/170) - ivar trimming of Swift libraries new offset feature -* [#175](https://github.com/nf-core/viralrecon/issues/175) - MultiQC report does not include all the metrics +* [[#122](https://github.com/nf-core/viralrecon/issues/122)] - Single SPAdes command to rule them all +* [[#138](https://github.com/nf-core/viralrecon/issues/138)] - Problem masking the consensus sequence +* [[#142](https://github.com/nf-core/viralrecon/issues/142)] - Unknown method invocation `toBytes` on String type +* [[#169](https://github.com/nf-core/viralrecon/issues/169)] - ggplot2 error when generating mosdepth amplicon plot with Swift v2 primers +* [[#170](https://github.com/nf-core/viralrecon/issues/170)] - ivar trimming of Swift libraries new offset feature +* [[#175](https://github.com/nf-core/viralrecon/issues/175)] - MultiQC report does not include all the metrics +* [[#188](https://github.com/nf-core/viralrecon/pull/188)] - Add and fix EditorConfig linting in entire pipeline ### Parameters @@ -99,8 +100,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | `--unqualified_percent_limit` | | | `--varscan2_strand_filter` | | -> **NB:** Parameter has been __updated__ if both old and new parameter information is present. -> **NB:** Parameter has been __added__ if just the new parameter information is present. +> **NB:** Parameter has been __updated__ if both old and new parameter information is present. +> **NB:** Parameter has been __added__ if just the new parameter information is present. > **NB:** Parameter has been __removed__ if new parameter information isn't present. ### Software dependencies @@ -150,9 +151,9 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi | `varscan` | 2.4.4 | | | `vg` | 1.24.0 | | -> **NB:** Dependency has been __updated__ if both old and new version information is present. -> **NB:** Dependency has been __added__ if just the new version information is present. -> **NB:** Dependency has been __removed__ if new version information isn't present. +> **NB:** Dependency has been __updated__ if both old and new version information is present. +> **NB:** Dependency has been __added__ if just the new version information is present. +> **NB:** Dependency has been __removed__ if new version information isn't present. ## [[1.1.0](https://github.com/nf-core/rnaseq/releases/tag/1.1.0)] - 2020-06-23 @@ -162,18 +163,18 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi * [#124](https://github.com/nf-core/viralrecon/issues/124) - Intersect variants across callers * [nf-core/tools#616](https://github.com/nf-core/tools/pull/616) - Updated GitHub Actions to build Docker image and push to Docker Hub * Parameters: - * `--min_mapped_reads` to circumvent failures for samples with low number of mapped reads - * `--varscan2_strand_filter` to toggle the default Varscan 2 strand filter - * `--skip_mosdepth` - skip genome-wide and amplicon coverage plot generation from mosdepth output - * `--amplicon_left_suffix` - to provide left primer suffix used in name field of `--amplicon_bed` - * `--amplicon_right_suffix` - to provide right primer suffix used in name field of `--amplicon_bed` - * Unify parameter specification with COG-UK pipeline: - * `--min_allele_freq` - minimum allele frequency threshold for calling variants - * `--mpileup_depth` - SAMTools mpileup max per-file depth - * `--ivar_exclude_reads` renamed to `--ivar_trim_noprimer` - * `--ivar_trim_min_len` - minimum length of read to retain after primer trimming - * `--ivar_trim_min_qual` - minimum quality threshold for sliding window to pass - * `--ivar_trim_window_width` - width of sliding window + * `--min_mapped_reads` to circumvent failures for samples with low number of mapped reads + * `--varscan2_strand_filter` to toggle the default Varscan 2 strand filter + * `--skip_mosdepth` - skip genome-wide and amplicon coverage plot generation from mosdepth output + * `--amplicon_left_suffix` - to provide left primer suffix used in name field of `--amplicon_bed` + * `--amplicon_right_suffix` - to provide right primer suffix used in name field of `--amplicon_bed` + * Unify parameter specification with COG-UK pipeline: + * `--min_allele_freq` - minimum allele frequency threshold for calling variants + * `--mpileup_depth` - SAMTools mpileup max per-file depth + * `--ivar_exclude_reads` renamed to `--ivar_trim_noprimer` + * `--ivar_trim_min_len` - minimum length of read to retain after primer trimming + * `--ivar_trim_min_qual` - minimum quality threshold for sliding window to pass + * `--ivar_trim_window_width` - width of sliding window * [#118] Updated GitHub Actions AWS workflow for small and full size tests. ### Removed diff --git a/CITATIONS.md b/CITATIONS.md index f323a64d..6a3f41ef 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -1,109 +1,108 @@ -# nf-core/viralrecon: Citations - -## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) - -> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. - -## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) - -> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. - -## Pipeline tools - -* [ABACAS](https://www.ncbi.nlm.nih.gov/pubmed/19497936/) - > Assefa S, Keane TM, Otto TD, Newbold C, Berriman M. ABACAS: algorithm-based automatic contiguation of assembled sequences. Bioinformatics. 2009 Aug 1;25(15):1968-9. doi: 10.1093/bioinformatics/btp347. Epub 2009 Jun 3. PubMed PMID: 19497936; PubMed Central PMCID: PMC2712343. - -* [ASCIIGenome](https://www.ncbi.nlm.nih.gov/pubmed/28119307/) - > Beraldi D. ASCIIGenome: a command line genome browser for console terminals. Bioinformatics. 2017 May 15;33(10):1568-1569. doi: 10.1093/bioinformatics/btx007. PubMed PMID: 28119307; PubMed Central PMCID: PMC5423454. - -* [ARTIC network](https://github.com/artic-network) - -* [Bandage](https://www.ncbi.nlm.nih.gov/pubmed/26099265) - > Wick R.R., Schultz M.B., Zobel J. & Holt K.E. Bandage: interactive visualisation of de novo genome assemblies. Bioinformatics, 31(20), 3350-3352. doi: 10.1093/bioinformatics/btv383. PubMed PMID: 26099265; PubMed Central PCMID: PMC4595904. - -* [BCFtools](https://www.ncbi.nlm.nih.gov/pubmed/21903627/) - > Li H. A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data. Bioinformatics. 2011 Nov 1;27(21):2987-93. doi: 10.1093/bioinformatics/btr509. Epub 2011 Sep 8. PubMed PMID: 21903627; PubMed Central PMCID: PMC3198575. - -* [BEDTools](https://www.ncbi.nlm.nih.gov/pubmed/20110278/) - > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. - -* [BLAST](https://www.ncbi.nlm.nih.gov/pubmed/20003500/) - > Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL. BLAST+: architecture and applications. BMC Bioinformatics. 2009 Dec 15;10:421. doi: 10.1186/1471-2105-10-421. PubMed PMID: 20003500; PubMed Central PMCID: PMC2803857. - -* [Bowtie 2](https://www.ncbi.nlm.nih.gov/pubmed/22388286/) - > Langmead B, Salzberg SL. Fast gapped-read alignment with Bowtie 2. Nat Methods. 2012 Mar 4;9(4):357-9. doi: 10.1038/nmeth.1923. PubMed PMID: 22388286; PubMed Central PMCID: PMC3322381. - -* [Cutadapt](http://dx.doi.org/10.14806/ej.17.1.200) - > Marcel, M. Cutadapt removes adapter sequences from high-throughput sequencing reads. EMBnet.journal, [S.l.], v. 17, n. 1, p. pp. 10-12, may 2011. ISSN 2226-6089. doi: 10.14806/ej.17.1.200. - -* [fastp](https://www.ncbi.nlm.nih.gov/pubmed/30423086/) - > Chen S, Zhou Y, Chen Y, Gu J. fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics. 2018 Sep 1;34(17):i884-i890. doi: 10.1093/bioinformatics/bty560. PubMed PMID: 30423086; PubMed Central PMCID: PMC6129281. - -* [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - -* [iVar](https://www.ncbi.nlm.nih.gov/pubmed/30621750/) - > Grubaugh ND, Gangavarapu K, Quick J, Matteson NL, De Jesus JG, Main BJ, Tan AL, Paul LM, Brackney DE, Grewal S, Gurfield N, Van Rompay KKA, Isern S, Michael SF, Coffey LL, Loman NJ, Andersen KG. An amplicon-based sequencing framework for accurately measuring intrahost virus diversity using PrimalSeq and iVar. Genome Biol. 2019 Jan 8;20(1):8. doi: 10.1186/s13059-018-1618-7. PubMed PMID: 30621750; PubMed Central PMCID: PMC6325816. - -* [Kraken 2](https://www.ncbi.nlm.nih.gov/pubmed/31779668/) - > Wood DE, Lu J, Langmead B. Improved metagenomic analysis with Kraken 2. Genome Biol. 2019 Nov 28;20(1):257. doi: 10.1186/s13059-019-1891-0. PubMed PMID: 31779668; PubMed Central PMCID: PMC6883579. - -* [minia](https://www.ncbi.nlm.nih.gov/pubmed/24040893/) - > Chikhi R, Rizk G. Space-efficient and exact de Bruijn graph representation based on a Bloom filter. Algorithms Mol Biol. 2013 Sep 16;8(1):22. doi: 10.1186/1748-7188-8-22. PubMed PMID: 24040893; PubMed Central PMCID: PMC3848682. - -* [mosdepth](https://www.ncbi.nlm.nih.gov/pubmed/29096012) - > Pedersen BS, Quinlan AR. Mosdepth: Quick Coverage Calculation for Genomes and Exomes. Bioinformatics. 2018 Mar 1;34(5):867-868. doi: 10.1093/bioinformatics/btx699. PMID: 29096012 PMCID: PMC6030888. - -* [MultiQC](https://www.ncbi.nlm.nih.gov/pubmed/27312411/) - > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. - -* [NanoPlot](https://pubmed.ncbi.nlm.nih.gov/29547981/) - > De Coster W, D'Hert S, Schultz DT, Cruts M, Van Broeckhoven C. NanoPack: visualizing and processing long-read sequencing data. Bioinformatics. 2018 Aug 1;34(15):2666-2669. doi: 10.1093/bioinformatics/bty149. PubMed PMID: 29547981; PubMed Central PMCID: PMC6061794. - -* [Nextstrain](https://pubmed.ncbi.nlm.nih.gov/29790939/) - > Hadfield J, Megill C, Bell SM, Huddleston J, Potter B, Callender C, Sagulenko P, Bedford T, Neher RA. Nextstrain: real-time tracking of pathogen evolution. Bioinformatics. 2018 Dec 1;34(23):4121-4123. doi: 10.1093/bioinformatics/bty407. PubMed PMID: 29790939; PubMed Central PMCID: PMC6247931. - -* [pangolin](https://github.com/cov-lineages/pangolin) - > Áine O'Toole, Emily Scher, Anthony Underwood, Ben Jackson, Verity Hill, JT McCrone, Chris Ruis, Khali Abu-Dahab, Ben Taylor, Corin Yeats, Louis du Plessis, David Aanensen, Eddie Holmes, Oliver Pybus, Andrew Rambaut. pangolin: lineage assignment in an emerging pandemic as an epidemiological tool. Publication in preparation. - -* [picard-tools](http://broadinstitute.github.io/picard) - -* [pycoQC](https://doi.org/10.21105/joss.01236) - > Leger A, Leonardi T, (2019). pycoQC, interactive quality control for Oxford Nanopore Sequencing. Journal of Open Source Software, 4(34), 1236. - -* [QUAST](https://www.ncbi.nlm.nih.gov/pubmed/23422339/) - > Gurevich A, Saveliev V, Vyahhi N, Tesler G. QUAST: quality assessment tool for genome assemblies. Bioinformatics. 2013 Apr 15;29(8):1072-5. doi: 10.1093/bioinformatics/btt086. Epub 2013 Feb 19. PubMed PMID: 23422339; PubMed Central PMCID: PMC3624806. - -* [R](https://www.R-project.org/) - > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. - -* [SAMtools](https://www.ncbi.nlm.nih.gov/pubmed/19505943/) - > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. - -* [SnpEff](https://www.ncbi.nlm.nih.gov/pubmed/22728672/) - > Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3. Fly (Austin). 2012 Apr-Jun;6(2):80-92. doi: 10.4161/fly.19695. PubMed PMID: 22728672; PubMed Central PMCID: PMC3679285. - -* [SnpSift](https://www.ncbi.nlm.nih.gov/pubmed/22435069/) - > Cingolani P, Patel VM, Coon M, Nguyen T, Land SJ, Ruden DM, Lu X. Using Drosophila melanogaster as a Model for Genotoxic Chemical Mutational Studies with a New Program, SnpSift. Front Genet. 2012 Mar 15;3:35. doi: 10.3389/fgene.2012.00035. eCollection 2012. PubMed PMID: 22435069; PubMed Central PMCID: PMC3304048. - -* [SPAdes](https://www.ncbi.nlm.nih.gov/pubmed/24093227/) - > Nurk S, Bankevich A, Antipov D, Gurevich AA, Korobeynikov A, Lapidus A, Prjibelski AD, Pyshkin A, Sirotkin A, Sirotkin Y, Stepanauskas R, Clingenpeel SR, Woyke T, McLean JS, Lasken R, Tesler G, Alekseyev MA, Pevzner PA. Assembling single-cell genomes and mini-metagenomes from chimeric MDA products. J Comput Biol. 2013 Oct;20(10):714-37. doi: 10.1089/cmb.2013.0084. PubMed PMID: 24093227; PubMed Central PMCID: PMC3791033. - -* [Unicycler](https://www.ncbi.nlm.nih.gov/pubmed/28594827/) - > Wick RR, Judd LM, Gorrie CL, Holt KE. Unicycler: Resolving bacterial genome assemblies from short and long sequencing reads. PLoS Comput Biol. 2017 Jun 8;13(6):e1005595. doi: 10.1371/journal.pcbi.1005595. eCollection 2017 Jun. PubMed PMID: 28594827; PubMed Central PMCID: PMC5481147. - -## Software packaging/containerisation tools - -* [Anaconda](https://anaconda.com) - > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. - -* [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) - > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. - -* [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) - > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. - -* [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) - -* [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) - > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. - \ No newline at end of file +# nf-core/viralrecon: Citations + +## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) + +> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. + +## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) + +> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. + +## Pipeline tools + +* [ABACAS](https://www.ncbi.nlm.nih.gov/pubmed/19497936/) + > Assefa S, Keane TM, Otto TD, Newbold C, Berriman M. ABACAS: algorithm-based automatic contiguation of assembled sequences. Bioinformatics. 2009 Aug 1;25(15):1968-9. doi: 10.1093/bioinformatics/btp347. Epub 2009 Jun 3. PubMed PMID: 19497936; PubMed Central PMCID: PMC2712343. + +* [ASCIIGenome](https://www.ncbi.nlm.nih.gov/pubmed/28119307/) + > Beraldi D. ASCIIGenome: a command line genome browser for console terminals. Bioinformatics. 2017 May 15;33(10):1568-1569. doi: 10.1093/bioinformatics/btx007. PubMed PMID: 28119307; PubMed Central PMCID: PMC5423454. + +* [ARTIC network](https://github.com/artic-network) + +* [Bandage](https://www.ncbi.nlm.nih.gov/pubmed/26099265) + > Wick R.R., Schultz M.B., Zobel J. & Holt K.E. Bandage: interactive visualisation of de novo genome assemblies. Bioinformatics, 31(20), 3350-3352. doi: 10.1093/bioinformatics/btv383. PubMed PMID: 26099265; PubMed Central PCMID: PMC4595904. + +* [BCFtools](https://www.ncbi.nlm.nih.gov/pubmed/21903627/) + > Li H. A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data. Bioinformatics. 2011 Nov 1;27(21):2987-93. doi: 10.1093/bioinformatics/btr509. Epub 2011 Sep 8. PubMed PMID: 21903627; PubMed Central PMCID: PMC3198575. + +* [BEDTools](https://www.ncbi.nlm.nih.gov/pubmed/20110278/) + > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. + +* [BLAST](https://www.ncbi.nlm.nih.gov/pubmed/20003500/) + > Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL. BLAST+: architecture and applications. BMC Bioinformatics. 2009 Dec 15;10:421. doi: 10.1186/1471-2105-10-421. PubMed PMID: 20003500; PubMed Central PMCID: PMC2803857. + +* [Bowtie 2](https://www.ncbi.nlm.nih.gov/pubmed/22388286/) + > Langmead B, Salzberg SL. Fast gapped-read alignment with Bowtie 2. Nat Methods. 2012 Mar 4;9(4):357-9. doi: 10.1038/nmeth.1923. PubMed PMID: 22388286; PubMed Central PMCID: PMC3322381. + +* [Cutadapt](http://dx.doi.org/10.14806/ej.17.1.200) + > Marcel, M. Cutadapt removes adapter sequences from high-throughput sequencing reads. EMBnet.journal, [S.l.], v. 17, n. 1, p. pp. 10-12, may 2011. ISSN 2226-6089. doi: 10.14806/ej.17.1.200. + +* [fastp](https://www.ncbi.nlm.nih.gov/pubmed/30423086/) + > Chen S, Zhou Y, Chen Y, Gu J. fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics. 2018 Sep 1;34(17):i884-i890. doi: 10.1093/bioinformatics/bty560. PubMed PMID: 30423086; PubMed Central PMCID: PMC6129281. + +* [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + +* [iVar](https://www.ncbi.nlm.nih.gov/pubmed/30621750/) + > Grubaugh ND, Gangavarapu K, Quick J, Matteson NL, De Jesus JG, Main BJ, Tan AL, Paul LM, Brackney DE, Grewal S, Gurfield N, Van Rompay KKA, Isern S, Michael SF, Coffey LL, Loman NJ, Andersen KG. An amplicon-based sequencing framework for accurately measuring intrahost virus diversity using PrimalSeq and iVar. Genome Biol. 2019 Jan 8;20(1):8. doi: 10.1186/s13059-018-1618-7. PubMed PMID: 30621750; PubMed Central PMCID: PMC6325816. + +* [Kraken 2](https://www.ncbi.nlm.nih.gov/pubmed/31779668/) + > Wood DE, Lu J, Langmead B. Improved metagenomic analysis with Kraken 2. Genome Biol. 2019 Nov 28;20(1):257. doi: 10.1186/s13059-019-1891-0. PubMed PMID: 31779668; PubMed Central PMCID: PMC6883579. + +* [minia](https://www.ncbi.nlm.nih.gov/pubmed/24040893/) + > Chikhi R, Rizk G. Space-efficient and exact de Bruijn graph representation based on a Bloom filter. Algorithms Mol Biol. 2013 Sep 16;8(1):22. doi: 10.1186/1748-7188-8-22. PubMed PMID: 24040893; PubMed Central PMCID: PMC3848682. + +* [mosdepth](https://www.ncbi.nlm.nih.gov/pubmed/29096012) + > Pedersen BS, Quinlan AR. Mosdepth: Quick Coverage Calculation for Genomes and Exomes. Bioinformatics. 2018 Mar 1;34(5):867-868. doi: 10.1093/bioinformatics/btx699. PMID: 29096012 PMCID: PMC6030888. + +* [MultiQC](https://www.ncbi.nlm.nih.gov/pubmed/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + +* [NanoPlot](https://pubmed.ncbi.nlm.nih.gov/29547981/) + > De Coster W, D'Hert S, Schultz DT, Cruts M, Van Broeckhoven C. NanoPack: visualizing and processing long-read sequencing data. Bioinformatics. 2018 Aug 1;34(15):2666-2669. doi: 10.1093/bioinformatics/bty149. PubMed PMID: 29547981; PubMed Central PMCID: PMC6061794. + +* [Nextstrain](https://pubmed.ncbi.nlm.nih.gov/29790939/) + > Hadfield J, Megill C, Bell SM, Huddleston J, Potter B, Callender C, Sagulenko P, Bedford T, Neher RA. Nextstrain: real-time tracking of pathogen evolution. Bioinformatics. 2018 Dec 1;34(23):4121-4123. doi: 10.1093/bioinformatics/bty407. PubMed PMID: 29790939; PubMed Central PMCID: PMC6247931. + +* [pangolin](https://github.com/cov-lineages/pangolin) + > Áine O'Toole, Emily Scher, Anthony Underwood, Ben Jackson, Verity Hill, JT McCrone, Chris Ruis, Khali Abu-Dahab, Ben Taylor, Corin Yeats, Louis du Plessis, David Aanensen, Eddie Holmes, Oliver Pybus, Andrew Rambaut. pangolin: lineage assignment in an emerging pandemic as an epidemiological tool. Publication in preparation. + +* [picard-tools](http://broadinstitute.github.io/picard) + +* [pycoQC](https://doi.org/10.21105/joss.01236) + > Leger A, Leonardi T, (2019). pycoQC, interactive quality control for Oxford Nanopore Sequencing. Journal of Open Source Software, 4(34), 1236. + +* [QUAST](https://www.ncbi.nlm.nih.gov/pubmed/23422339/) + > Gurevich A, Saveliev V, Vyahhi N, Tesler G. QUAST: quality assessment tool for genome assemblies. Bioinformatics. 2013 Apr 15;29(8):1072-5. doi: 10.1093/bioinformatics/btt086. Epub 2013 Feb 19. PubMed PMID: 23422339; PubMed Central PMCID: PMC3624806. + +* [R](https://www.R-project.org/) + > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. + +* [SAMtools](https://www.ncbi.nlm.nih.gov/pubmed/19505943/) + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + +* [SnpEff](https://www.ncbi.nlm.nih.gov/pubmed/22728672/) + > Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3. Fly (Austin). 2012 Apr-Jun;6(2):80-92. doi: 10.4161/fly.19695. PubMed PMID: 22728672; PubMed Central PMCID: PMC3679285. + +* [SnpSift](https://www.ncbi.nlm.nih.gov/pubmed/22435069/) + > Cingolani P, Patel VM, Coon M, Nguyen T, Land SJ, Ruden DM, Lu X. Using Drosophila melanogaster as a Model for Genotoxic Chemical Mutational Studies with a New Program, SnpSift. Front Genet. 2012 Mar 15;3:35. doi: 10.3389/fgene.2012.00035. eCollection 2012. PubMed PMID: 22435069; PubMed Central PMCID: PMC3304048. + +* [SPAdes](https://www.ncbi.nlm.nih.gov/pubmed/24093227/) + > Nurk S, Bankevich A, Antipov D, Gurevich AA, Korobeynikov A, Lapidus A, Prjibelski AD, Pyshkin A, Sirotkin A, Sirotkin Y, Stepanauskas R, Clingenpeel SR, Woyke T, McLean JS, Lasken R, Tesler G, Alekseyev MA, Pevzner PA. Assembling single-cell genomes and mini-metagenomes from chimeric MDA products. J Comput Biol. 2013 Oct;20(10):714-37. doi: 10.1089/cmb.2013.0084. PubMed PMID: 24093227; PubMed Central PMCID: PMC3791033. + +* [Unicycler](https://www.ncbi.nlm.nih.gov/pubmed/28594827/) + > Wick RR, Judd LM, Gorrie CL, Holt KE. Unicycler: Resolving bacterial genome assemblies from short and long sequencing reads. PLoS Comput Biol. 2017 Jun 8;13(6):e1005595. doi: 10.1371/journal.pcbi.1005595. eCollection 2017 Jun. PubMed PMID: 28594827; PubMed Central PMCID: PMC5481147. + +## Software packaging/containerisation tools + +* [Anaconda](https://anaconda.com) + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +* [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + +* [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + +* [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + +* [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/assets/multiqc_config_illumina.yaml b/assets/multiqc_config_illumina.yaml index 3f133b74..a8d57bf9 100644 --- a/assets/multiqc_config_illumina.yaml +++ b/assets/multiqc_config_illumina.yaml @@ -138,7 +138,7 @@ custom_data: section_name: 'Variant calling metrics' description: 'generated by the nf-core/viralrecon pipeline' plot_type: 'table' - headers: + headers: '# Input reads': description: 'Total number of reads in raw fastq file' format: '{:,.0f}' @@ -279,4 +279,4 @@ sp: mosdepth/global_dist: fn: '*.global.dist.txt' cutadapt: - fn: '*.cutadapt.log' \ No newline at end of file + fn: '*.cutadapt.log' diff --git a/assets/multiqc_config_nanopore.yaml b/assets/multiqc_config_nanopore.yaml index 05368a96..3412dcdd 100644 --- a/assets/multiqc_config_nanopore.yaml +++ b/assets/multiqc_config_nanopore.yaml @@ -85,7 +85,7 @@ custom_data: section_name: 'Variant calling metrics' description: 'generated by the nf-core/viralrecon pipeline' plot_type: 'table' - headers: + headers: '# Mapped reads': description: 'Total number of mapped reads relative to the viral genome' format: '{:,.0f}' diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index c903d626..15f1ccd6 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -15,15 +15,15 @@ Content-ID: Content-Disposition: inline; filename="nf-core-viralrecon_logo.png" <% out << new File("$projectDir/assets/nf-core-viralrecon_logo.png"). - bytes. - encodeBase64(). - toString(). - tokenize( '\n' )*. - toList()*. - collate( 76 )*. - collect { it.join() }. - flatten(). - join( '\n' ) %> + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' ) %> <% if (mqcFile){ @@ -37,15 +37,15 @@ Content-ID: Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" ${mqcFileObj. - bytes. - encodeBase64(). - toString(). - tokenize( '\n' )*. - toList()*. - collate( 76 )*. - collect { it.join() }. - flatten(). - join( '\n' )} + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} """ }} %> diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index dea9eeb5..9d433f29 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -188,7 +188,7 @@ def check_nanopore_samplesheet(file_in, file_out): print_error("Barcode entry is not an integer!", 'Line', line) else: barcode = 'barcode%s' % (barcode.zfill(2)) - + ## Create sample mapping dictionary = { sample: barcode } if barcode in sample_mapping_dict.values(): print_error("Samplesheet contains duplicate entries in the 'barcode' column!", "Line", line) diff --git a/bin/fastq_dir_to_samplesheet.py b/bin/fastq_dir_to_samplesheet.py index 93a32edc..74485715 100755 --- a/bin/fastq_dir_to_samplesheet.py +++ b/bin/fastq_dir_to_samplesheet.py @@ -30,7 +30,7 @@ def fastq_dir_to_samplesheet(fastq_dir, samplesheet_file, read1_extension='_R1_0 if sanitise_name: sample = sanitise_name_delimiter.join(os.path.basename(read1_file).split(sanitise_name_delimiter)[:sanitise_name_index]) read_dict[sample] = [read1_file] - + ## Get read 2 files read2_files = glob.glob(os.path.join(fastq_dir, f'*{read2_extension}'), recursive=False) if not single_end and len(read2_files) != 0: @@ -54,6 +54,14 @@ def fastq_dir_to_samplesheet(fastq_dir, samplesheet_file, read1_extension='_R1_0 if len(reads) == 1: sample_info += ',' fout.write(f'{sample_info}\n') + else: + error_str = "\nWARNING: No FastQ files found so samplesheet has not been created!\n\n" + error_str += "Please check the values provided for the:\n" + error_str += " - Path to the directory containing the FastQ files\n" + error_str += " - '--read1_extension' parameter\n" + error_str += " - '--read2_extension' parameter\n" + print(error_str) + sys.exit(1) def main(args=None): args = parse_args(args) @@ -64,8 +72,8 @@ def main(args=None): read1_extension = args.READ1_EXTENSION, read2_extension = args.READ2_EXTENSION, single_end = args.SINGLE_END, - sanitise_name = args.SANITISE_NAME, - sanitise_name_delimiter = args.SANITISE_NAME_DELIMITER, + sanitise_name = args.SANITISE_NAME, + sanitise_name_delimiter = args.SANITISE_NAME_DELIMITER, sanitise_name_index = args.SANITISE_NAME_INDEX ) diff --git a/bin/multiqc_to_custom_csv.py b/bin/multiqc_to_custom_csv.py index 346fd32d..f6f5bfb0 100755 --- a/bin/multiqc_to_custom_csv.py +++ b/bin/multiqc_to_custom_csv.py @@ -208,7 +208,7 @@ def main(args=None): if os.path.exists(yaml_file): metrics_dict = yaml_fields_to_dict(yaml_file=yaml_file, append_dict={}, field_mapping_list=[('# Mapped reads', ['mapped_passed'])], valid_sample_list=[]) sample_list = metrics_dict.keys() - + metrics_dict_to_file( file_field_list = nanopore_variant_files, multiqc_data_dir = args.MULTIQC_DATA_DIR, diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py index 9e993589..002a859a 100755 --- a/bin/sra_ids_to_runinfo.py +++ b/bin/sra_ids_to_runinfo.py @@ -110,7 +110,7 @@ def fetch_sra_runinfo(file_in,file_out,platform_list=[],library_layout_list=[]): prefix = match.group() if prefix in PREFIX_LIST: if not db_id in seen_ids: - + ids = [db_id] ## Resolve/expand these ids against GEO URL if prefix in ['GSE']: diff --git a/bin/sra_runinfo_to_ftp.py b/bin/sra_runinfo_to_ftp.py index 6fd09aeb..8056015f 100755 --- a/bin/sra_runinfo_to_ftp.py +++ b/bin/sra_runinfo_to_ftp.py @@ -48,10 +48,10 @@ def parse_sra_runinfo(file_in): sample_dict['fastq_1'] = fastq_files sample_dict['md5_1'] = fastq_md5 else: - ## In some instances FTP links don't exist for FastQ files + ## In some instances FTP links don't exist for FastQ files ## These have to be downloaded via fastq-dump / fasterq-dump / parallel-fastq-dump via the run id db_id = run_id - + elif library == 'PAIRED': sample_dict = collections.OrderedDict([('fastq_1',''), ('fastq_2',''), ('md5_1',''), ('md5_2',''), ('single_end','false')]) if fastq_files: @@ -62,14 +62,14 @@ def parse_sra_runinfo(file_in): sample_dict['fastq_1'] = fq_files[0] sample_dict['fastq_2'] = fq_files[1] sample_dict['md5_1'] = fq_md5[0] - sample_dict['md5_2'] = fq_md5[1] + sample_dict['md5_2'] = fq_md5[1] else: print("Invalid FastQ files found for database id:'{}'!.".format(run_id)) else: print("Invalid number of FastQ files ({}) found for paired-end database id:'{}'!.".format(len(fq_files), run_id)) else: db_id = run_id - + if sample_dict: sample_dict.update(line_dict) if db_id not in runinfo_dict: @@ -79,20 +79,19 @@ def parse_sra_runinfo(file_in): print("Input run info file contains duplicate rows!\nLine: '{}'".format(line)) else: runinfo_dict[db_id].append(sample_dict) - return runinfo_dict def sra_runinfo_to_ftp(files_in,file_out): samplesheet_dict = {} for file_in in files_in: - runinfo_dict = parse_sra_runinfo(file_in) + runinfo_dict = parse_sra_runinfo(file_in) for db_id in runinfo_dict.keys(): if db_id not in samplesheet_dict: samplesheet_dict[db_id] = runinfo_dict[db_id] else: print("Duplicate sample identifier found!\nID: '{}'".format(db_id)) - + ## Write samplesheet with paths to FastQ files and md5 sums if samplesheet_dict: out_dir = os.path.dirname(file_out) @@ -108,7 +107,7 @@ def sra_runinfo_to_ftp(files_in,file_out): def main(args=None): args = parse_args(args) sra_runinfo_to_ftp([x.strip() for x in args.FILES_IN.split(',')], args.FILE_OUT) - + if __name__ == '__main__': sys.exit(main()) diff --git a/conf/base.config b/conf/base.config index c612725c..eed1ffab 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,48 +1,47 @@ /* - * ------------------------------------------------- - * nf-core/viralrecon Nextflow base config file - * ------------------------------------------------- - * A 'blank slate' config file, appropriate for general - * use on most high performace compute environments. - * Assumes that all software is installed and available - * on the PATH. Runs in `local` mode - all jobs will be - * run on the logged in environment. - */ +======================================================================================== + nf-core/viralrecon Nextflow base config file +======================================================================================== + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on + the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. +---------------------------------------------------------------------------------------- +*/ process { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' - // Process-specific resource requirements - withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } - } - withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } - } - withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } - } - withLabel:error_ignore { - errorStrategy = 'ignore' - } - withLabel:error_retry { - errorStrategy = 'retry' - maxRetries = 2 - } + // Process-specific resource requirements + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 20.h * task.attempt, 'time' ) } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } } diff --git a/conf/modules.config b/conf/modules.config old mode 100755 new mode 100644 index 57915e5f..609199ef --- a/conf/modules.config +++ b/conf/modules.config @@ -1,25 +1,24 @@ /* - * -------------------------------------------------- - * Config file for defining DSL2 per module options - * -------------------------------------------------- - * - * Available keys to override module options: - * args = Additional arguments appended to command in module. - * args2 = Second set of arguments appended to command in module (multi-tool modules). - * args3 = Third set of arguments appended to command in module (multi-tool modules). - * publish_dir = Directory to publish results. - * publish_by_meta = Groovy list of keys available in meta map to append as directories to "publish_dir" path - * If publish_by_meta = true - Value of ${meta['id']} is appended as a directory to "publish_dir" path - * If publish_by_meta = ['id', 'custompath'] - If "id" is in meta map and "custompath" isn't then "${meta['id']}/custompath/" - * is appended as a directory to "publish_dir" path - * If publish_by_meta = false / null - No directories are appended to "publish_dir" path - * publish_files = Groovy map where key = "file_ext" and value = "directory" to publish results for that file extension - * The value of "directory" is appended to the standard "publish_dir" path as defined above. - * If publish_files = null (unspecified) - All files are published. - * If publish_files = false - No files are published. - * suffix = File name suffix for output files. - * - */ +======================================================================================== + Config file for defining DSL2 per module options +======================================================================================== + Available keys to override module options: + args = Additional arguments appended to command in module. + args2 = Second set of arguments appended to command in module (multi-tool modules). + args3 = Third set of arguments appended to command in module (multi-tool modules). + publish_dir = Directory to publish results. + publish_by_meta = Groovy list of keys available in meta map to append as directories to "publish_dir" path + If publish_by_meta = true - Value of ${meta['id']} is appended as a directory to "publish_dir" path + If publish_by_meta = ['id', 'custompath'] - If "id" is in meta map and "custompath" isn't then "${meta['id']}/custompath/" + is appended as a directory to "publish_dir" path + If publish_by_meta = false / null - No directories are appended to "publish_dir" path + publish_files = Groovy map where key = "file_ext" and value = "directory" to publish results for that file extension + The value of "directory" is appended to the standard "publish_dir" path as defined above. + If publish_files = null (unspecified) - All files are published. + If publish_files = false - No files are published. + suffix = File name suffix for output files. +---------------------------------------------------------------------------------------- +*/ params { modules { @@ -123,7 +122,7 @@ params { } 'nanopore_snpeff_tabix' { args = '-p vcf -f' - suffix = '.snpeff' + suffix = '.snpeff' publish_dir = "${params.artic_minion_caller}/snpeff" } 'nanopore_snpeff_stats' { @@ -216,6 +215,7 @@ params { publish_dir = 'variants/bowtie2' } 'illumina_picard_collectmultiplemetrics' { + args = 'VALIDATION_STRINGENCY=LENIENT TMP_DIR=tmp' publish_files = ['metrics':'picard_metrics', 'pdf': 'picard_metrics/pdf'] publish_dir = 'variants/bowtie2' } @@ -260,7 +260,7 @@ params { publish_dir = 'variants/ivar' } 'illumina_ivar_consensus' { - args = '-t 0.75 -q 20 -m 10 -n N' + args = '-t 0.75 -q 20 -m 10 -n N' args2 = '--count-orphans --no-BAQ --max-depth 0 --min-BQ 0 -aa' suffix = '.consensus' publish_dir = 'variants/ivar/consensus' @@ -279,7 +279,7 @@ params { } 'illumina_ivar_snpeff_tabix' { args = '-p vcf -f' - suffix = '.snpeff' + suffix = '.snpeff' publish_dir = 'variants/ivar/snpeff' } 'illumina_ivar_snpeff_stats' { @@ -329,7 +329,7 @@ params { } 'illumina_bcftools_consensus_maskfasta' { suffix = '.masked' - publish_files = false + publish_files = false publish_dir = 'variants/bcftools' } 'illumina_bcftools_consensus_bcftools' { @@ -345,12 +345,12 @@ params { publish_dir = 'variants/bcftools/snpeff' } 'illumina_bcftools_snpeff_bgzip' { - suffix = '.snpeff' + suffix = '.snpeff' publish_dir = 'variants/bcftools/snpeff' } 'illumina_bcftools_snpeff_tabix' { args = '-p vcf -f' - suffix = '.snpeff' + suffix = '.snpeff' publish_dir = 'variants/bcftools/snpeff' } 'illumina_bcftools_snpeff_stats' { diff --git a/conf/test.config b/conf/test.config index 592e9c5e..28450b79 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,33 +1,36 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/viralrecon -profile test, - */ +======================================================================================== + Nextflow config file for running minimal tests +======================================================================================== + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test, + +---------------------------------------------------------------------------------------- +*/ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = 6.GB - max_time = 6.h + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = 6.GB + max_time = 6.h - // Input data to test amplicon analysis - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' - platform = 'illumina' - protocol = 'amplicon' - primer_set = 'artic' - primer_set_version = 1 + // Input data to test amplicon analysis + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + platform = 'illumina' + protocol = 'amplicon' + primer_set = 'artic' + primer_set_version = 1 - // Genome references - genome = 'MN908947.3' - kraken2_db = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/kraken2/kraken2_hs22.tar.gz' + // Genome references + genome = 'MN908947.3' + kraken2_db = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/kraken2/kraken2_hs22.tar.gz' - // Other pipeline options - callers = 'ivar,bcftools' - assemblers = 'spades,unicycler,minia' + // Other pipeline options + callers = 'ivar,bcftools' + assemblers = 'spades,unicycler,minia' } diff --git a/conf/test_full.config b/conf/test_full.config index 781948b9..c44d4b0a 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -1,27 +1,30 @@ /* - * ------------------------------------------------- - * Nextflow config file for running full-size tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a full size pipeline test. Use as follows: - * nextflow run nf-core/viralrecon -profile test_full, - */ +======================================================================================== + Nextflow config file for running full-size tests +======================================================================================== + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test_full, + +---------------------------------------------------------------------------------------- +*/ params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' - // Input data for full test of amplicon analysis - input = 's3://nf-core-awsmegatests/viralrecon/input_data/210212_K00102_0557_AHKN3LBBXY/samplesheet.csv' - platform = 'illumina' - protocol = 'amplicon' - primer_set = 'artic' - primer_set_version = 3 + // Input data for full test of amplicon analysis + input = 's3://nf-core-awsmegatests/viralrecon/input_data/210212_K00102_0557_AHKN3LBBXY/samplesheet.csv' + platform = 'illumina' + protocol = 'amplicon' + primer_set = 'artic' + primer_set_version = 3 - // Genome references - genome = 'MN908947.3' + // Genome references + genome = 'MN908947.3' - // Other pipeline options - callers = 'ivar,bcftools' - assemblers = 'spades,unicycler,minia' + // Other pipeline options + callers = 'ivar,bcftools' + assemblers = 'spades,unicycler,minia' } diff --git a/conf/test_full_nanopore.config b/conf/test_full_nanopore.config index 14f22d31..31e1c124 100644 --- a/conf/test_full_nanopore.config +++ b/conf/test_full_nanopore.config @@ -1,28 +1,30 @@ /* - * ------------------------------------------------- - * Nextflow config file for running full-size tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a full size pipeline test. Use as follows: - * nextflow run nf-core/viralrecon -profile test_full_nanopore, - */ +======================================================================================== + Nextflow config file for running full-size tests +======================================================================================== + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test_full_nanopore, -params { - config_profile_name = 'Full test profile for nanopore data' - config_profile_description = 'Full test dataset to check pipeline function' +---------------------------------------------------------------------------------------- +*/ - // Input data for full test of amplicon analysis - platform = 'nanopore' - input = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/samplesheet.csv' - fastq_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/fastq_pass/' - fast5_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/fast5_pass/' - sequencing_summary = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/sequencing_summary.txt' - - // Genome references - genome = 'MN908947.3' - primer_set_version = 3 +params { + config_profile_name = 'Full test profile for nanopore data' + config_profile_description = 'Full test dataset to check pipeline function' - // Other parameters - artic_minion_medaka_model = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/r941_min_high_g360_model.hdf5' + // Input data for full test of amplicon analysis + platform = 'nanopore' + input = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/samplesheet.csv' + fastq_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/fastq_pass/' + fast5_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/fast5_pass/' + sequencing_summary = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/sequencing_summary.txt' + + // Genome references + genome = 'MN908947.3' + primer_set_version = 3 + // Other parameters + artic_minion_medaka_model = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/r941_min_high_g360_model.hdf5' } \ No newline at end of file diff --git a/conf/test_full_sispa.config b/conf/test_full_sispa.config index fecc750b..49b295fc 100644 --- a/conf/test_full_sispa.config +++ b/conf/test_full_sispa.config @@ -1,25 +1,28 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a full pipeline test. Use as follows: - * nextflow run nf-core/viralrecon -profile test_full_sispa, - */ +======================================================================================== + Nextflow config file for running full-size tests +======================================================================================== + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test_full_sispa, + +---------------------------------------------------------------------------------------- +*/ params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' - // Input data for full test of SISPA/metagenomics analysis - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_sispa.csv' - platform = 'illumina' - protocol = 'metagenomic' + // Input data for full test of SISPA/metagenomics analysis + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_sispa.csv' + platform = 'illumina' + protocol = 'metagenomic' - // Genome references - genome = 'MN908947.3' + // Genome references + genome = 'MN908947.3' - // Other pipeline options - callers = 'ivar,bcftools' - assemblers = 'spades,unicycler,minia' + // Other pipeline options + callers = 'ivar,bcftools' + assemblers = 'spades,unicycler,minia' } diff --git a/conf/test_nanopore.config b/conf/test_nanopore.config index b01a9f56..d7c6b945 100644 --- a/conf/test_nanopore.config +++ b/conf/test_nanopore.config @@ -1,33 +1,35 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/viralrecon -profile test_nanopore, - */ +======================================================================================== + Nextflow config file for running minimal tests +======================================================================================== + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test_nanopore, + +---------------------------------------------------------------------------------------- +*/ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function for Nanopore data' + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function for Nanopore data' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = 6.GB - max_time = 6.h + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = 6.GB + max_time = 6.h - // Input data to test nanopore analysis - platform = 'nanopore' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_nanopore.csv' - fastq_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/fastq_pass/' - fast5_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/fast5_pass/' - sequencing_summary = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/sequencing_summary.txt' - - // Genome references - genome = 'MN908947.3' - primer_set_version = 3 + // Input data to test nanopore analysis + platform = 'nanopore' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_nanopore.csv' + fastq_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/fastq_pass/' + fast5_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/fast5_pass/' + sequencing_summary = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/sequencing_summary.txt' - // Other parameters - artic_minion_medaka_model = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/r941_min_high_g360_model.hdf5' + // Genome references + genome = 'MN908947.3' + primer_set_version = 3 + // Other parameters + artic_minion_medaka_model = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/r941_min_high_g360_model.hdf5' } diff --git a/conf/test_sispa.config b/conf/test_sispa.config index 3ee52739..b631f985 100644 --- a/conf/test_sispa.config +++ b/conf/test_sispa.config @@ -1,31 +1,34 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/viralrecon -profile test_sispa, - */ +======================================================================================== + Nextflow config file for running minimal tests +======================================================================================== + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test_sispa, + +---------------------------------------------------------------------------------------- +*/ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = 6.GB - max_time = 6.h + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = 6.GB + max_time = 6.h - // Input data to test SISPA/metagenomics analysis - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_sispa.csv' - platform = 'illumina' - protocol = 'metagenomic' + // Input data to test SISPA/metagenomics analysis + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_sispa.csv' + platform = 'illumina' + protocol = 'metagenomic' - // Genome references - genome = 'MN908947.3' - kraken2_db = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/kraken2/kraken2_hs22.tar.gz' + // Genome references + genome = 'MN908947.3' + kraken2_db = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/kraken2/kraken2_hs22.tar.gz' - // Other pipeline options - callers = 'ivar,bcftools' - assemblers = 'spades,unicycler,minia' + // Other pipeline options + callers = 'ivar,bcftools' + assemblers = 'spades,unicycler,minia' } diff --git a/conf/test_sra.config b/conf/test_sra.config index 1a80ac41..05ca823d 100644 --- a/conf/test_sra.config +++ b/conf/test_sra.config @@ -1,21 +1,24 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/viralrecon -profile test_sra, - */ +======================================================================================== + Nextflow config file for running minimal tests +======================================================================================== + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test_sra, + +---------------------------------------------------------------------------------------- +*/ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = 6.GB - max_time = 6.h + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = 6.GB + max_time = 6.h - // Input data to test SRA download functionality using SISPA/metagenomics data - public_data_ids = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_sra.csv' + // Input data to test SRA download functionality using SISPA/metagenomics data + public_data_ids = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_sra.csv' } diff --git a/docs/README.md b/docs/README.md index 276d9690..0e457111 100644 --- a/docs/README.md +++ b/docs/README.md @@ -3,8 +3,8 @@ The nf-core/viralrecon documentation is split into the following pages: * [Usage](usage.md) - * An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. + * An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. * [Output](output.md) - * An overview of the different results produced by the pipeline and how to interpret them. + * An overview of the different results produced by the pipeline and how to interpret them. You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/output.md b/docs/output.md index 51503207..34a2ea7b 100644 --- a/docs/output.md +++ b/docs/output.md @@ -1,875 +1,875 @@ - -# Introduction - -This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. - -The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - -# Nanopore: Pipeline overview - -* [Preprocessing](#nanopore-preprocessing) - * [pycoQC](#nanopore-pycoqc) - Sequencing QC - * [artic guppyplex](#nanopore-artic-guppyplex) - Aggregate pre-demultiplexed reads from MinKNOW/Guppy - * [NanoPlot](#nanopore-nanoplot) - Read QC -* [Variant calling](#nanopore-variant-calling) - * [artic minion](#nanopore-artic-minion) - Align reads, call variants and generate consensus sequence -* [Downstream analysis](#nanopore-downstream-analysis) - * [SAMtools](#nanopore-samtools) - Remove unmapped reads and obtain alignment metrics - * [mosdepth](#nanopore-mosdepth) - Genome-wide and amplicon coverage QC plots - * [BCFTools](#nanopore-bcftools) - Variant count metrics - * [SnpEff and SnpSift](#nanopore-snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction - * [QUAST](#nanopore-quast) - Consensus assessment report - * [Pangolin](#nanopore-pangolin) - Lineage analysis - * [Nextclade](#nanopore-nextclade) - Clade assignment, mutation calling and sequence quality checks - * [ASCIIGenome](#nanopore-asciigenome) - Individual variant screenshots with annotation tracks -* [Workflow reporting](#nanopore-workflow-reporting) - * [MultiQC](#nanopore-multiqc) - Present QC, visualisation and custom reporting for sequencing, raw reads, alignment and variant calling results - -## Nanopore: Preprocessing - -A file called `summary_variants_metrics_mqc.csv` containing a selection of read alignment and variant calling metrics will be saved in the `multiqc//` output directory which is determined by the `--artic_minion_caller` parameter (Default: `nanopolish/`). The same metrics will also be added to the top of the MultiQC report. - -### Nanopore: pycoQC - -
-Output files - -* `pycoqc/` - * `*.html` and `.json` file that includes a run summary and graphical representation of various QC metrics including distribution of read length, distribution of read quality scores, mean read quality per sequence length, output per channel over experiment time and percentage of reads per barcode. - -
- -[PycoQC](https://github.com/a-slide/pycoQC) compute metrics and generate QC plots using the sequencing summary information generated by basecalling/demultiplexing tools such as Guppy e.g. distribution of read length, read length over time, number of reads per barcode and other general stats. - -

- PycoQC - Number of reads per barcode -

- -### Nanopore: artic guppyplex - -
-Output files - -* `guppyplex/` - * `*.fastq.gz` files generated by aggregate pre-demultiplexed reads from MinKNOW/Guppy. These files are not saved by default but can be via a custom config file such as the one below. - -```nextflow -params { - modules { - 'nanopore_artic_guppyplex' { - publish_files = ['fastq.gz':''] - } - } -} -``` - -
- -The [artic guppyplex](https://artic.readthedocs.io/en/latest/commands/) tool from the [ARTIC field bioinformatics pipeline](https://github.com/artic-network/fieldbioinformatics) is used to perform length filtering of the demultiplexed Nanopore reads obtained per barcode. This essentially filters out chimeric reads that may be generated by the ARTIC protocol. The pipeline uses a default minimum and maximum read length of 400 and 700, respectively as tailored for the [nCoV-2019 primer set](https://artic.network/ncov-2019/ncov2019-bioinformatics-sop.html). However, you may need to adjust these for different primer schemes e.g. by using the minimum length of the amplicons (`--min-length`) as well as the maximum length plus 200 (`--max-length`). - -### Nanopore: NanoPlot - -
-Output files - -* `nanoplot//` - * Per-sample `*.html` files for QC metrics and individual `*.png` image files for plots. - -
- -[NanoPlot](https://github.com/wdecoster/NanoPlot) it a tool that can be used to produce general quality metrics from various Nanopore-based input files including fastq files e.g. quality score distribution, read lengths and other general stats. - -

- Nanoplot - Read quality vs read length -

- -## Nanopore: Variant calling - -### Nanopore: artic minion - -
-Output files - -* `/` - * `*.consensus.fasta`: Consensus fasta file generated by artic minion. - * `*.pass.vcf.gz`: VCF file containing variants passing quality filters. - * `*.pass.vcf.gz.tbi`: VCF index file containing variants passing quality filters. - * `*.primers.vcf`: VCF file containing variants found in primer-binding regions. - * `*.merged.vcf`: VCF file containing all detected variants. - * `*.fail.vcf`: VCF file containing variants failing quality filters. - * `*.sorted.bam`: BAM file generated by initial alignment. - * `*.sorted.bam.bai`: BAM index file generated by initial alignment. - * `*.trimmed.rg.sorted.bam`: BAM file without primer-binding site trimming. - * `*.trimmed.rg.sorted.bam.bai`: BAM index file without primer-binding site trimming. - * `*.primertrimmed.rg.sorted.bam`: BAM file generated after primer-binding site trimming. - * `*.primertrimmed.rg.sorted.bam.bai`: BAM index file generated after primer-binding site trimming. - -**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). - -
- -The [artic minion](https://artic.readthedocs.io/en/latest/commands/) tool from the [ARTIC field bioinformatics pipeline](https://github.com/artic-network/fieldbioinformatics) is used to align reads, call variants and to generate the consensus sequence. By default, artic minion uses [Minimap2](https://github.com/lh3/minimap2) to align the reads to the viral genome, however you can use [BWA](https://github.com/lh3/bwa) instead using the `--artic_minion_aligner bwa` parameter. Similarly, the default variant caller used by artic minion is [Nanopolish](https://github.com/jts/nanopolish), however, you can use [Medaka](https://github.com/nanoporetech/medaka) instead via the `--artic_minion_caller medaka` parameter. Medaka is faster than Nanopolish, performs mostly the same and can be run directly from `fastq` input files as opposed to requiring the `fastq`, `fast5` and `sequencing_summary.txt` files required to run Nanopolish. You must provide the appropriate [Medaka model](https://github.com/nanoporetech/medaka#models) via the `--artic_minion_medaka_model` parameter if using `--artic_minion_caller medaka`. - -## Nanopore: Downstream analysis - -### Nanopore: SAMtools - -
-Output files - -* `/` - * `*.mapped.sorted.bam`: Coordinate sorted BAM file containing read alignment information. - * `*.mapped.sorted.bam.bai`: Index file for coordinate sorted BAM file. -* `/samtools_stats/` - * SAMtools `*.mapped.sorted.bam.flagstat`, `*.mapped.sorted.bam.idxstats` and `*.mapped.sorted.bam.stats` files generated from the alignment files. - -**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). - -
- -BAM files containing the original alignments from either Minimap2 or BWA are further processed with [SAMtools](http://samtools.sourceforge.net/) to remove unmapped reads as well as to generate read mapping statistics. - -![MultiQC - SAMtools alignment scores plot](images/mqc_samtools_stats_plot.png) - -### Nanopore: mosdepth - -
-Output files - -* `/mosdepth/genome/` - * `all_samples.mosdepth.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. - * `*.mosdepth.coverage.pdf`: Whole-genome coverage plot. - * `*.mosdepth.coverage.tsv`: File containing coverage values for the above plot. - * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. -* `/mosdepth/amplicon/` - * `all_samples.mosdepth.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. - * `all_samples.mosdepth.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. - * `*.mosdepth.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. - * `*.mosdepth.coverage.tsv`: File containing per-amplicon coverage values for the above plot. - * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. - -**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). - -
- -[mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts. - -![R - Samples amplicon coverage heatmap ](images/r_amplicon_heatmap.png) - -![R - Sample genome-wide coverage plot](images/r_genome_coverage.png) - -

- R - Sample per-amplicon coverage plot -

- -### Nanopore: BCFTools - -
-Output files - -* `/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. - -**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). - -
- -[BCFtools](http://samtools.github.io/bcftools/bcftools.html) is a set of utilities that manipulate variant calls in [VCF](https://vcftools.github.io/specs.html) and its binary counterpart BCF format. It can also used be used to generate statistics and counts obtained from VCF files as used here. - -![MultiQC - BCFTools variant counts](images/mqc_bcftools_stats_plot.png) - -### Nanopore: SnpEff and SnpSift - -
-Output files - -* `/snpeff/` - * `*.snpeff.csv`: Variant annotation csv file. - * `*.snpeff.genes.txt`: Gene table for annotated variants. - * `*.snpeff.summary.html`: Summary html file for variants. - * `*.snpeff.vcf.gz`: VCF file with variant annotations. - * `*.snpeff.vcf.gz.tbi`: Index for VCF file with variant annotations. - * `*.snpsift.txt`: SnpSift summary table. -* `/snpeff/bcftools_stats/` - * `*.snpeff.bcftools_stats.txt`: Statistics and counts obtained from SnpEff VCF file. - -**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). - -
- -[SnpEff](http://snpeff.sourceforge.net/SnpEff.html) is a genetic variant annotation and functional effect prediction toolbox. It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). - -[SnpSift](http://snpeff.sourceforge.net/SnpSift.html) annotates genomic variants using databases, filters, and manipulates genomic annotated variants. After annotation with SnpEff, you can use SnpSift to help filter large genomic datasets in order to find the most significant variants. - -![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) - -### Nanopore: QUAST - -
-Output files - -* `/quast/` - * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. - -**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). - -
- -[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the consensus sequence across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. - -### Nanopore: Pangolin - -
-Output files - -* `/pangolin/` - * `*.pangolin.csv`: Lineage analysis results from Pangolin. - -**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). - -
- -Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://github.com/cov-lineages/pangolin)) has been used extensively during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. A [web application](https://pangolin.cog-uk.io/) also exists that allows users to upload genome sequences via a web browser to assign lineages to genome sequences of SARS-CoV-2, view descriptive characteristics of the assigned lineage(s), view the placement of the lineage in a phylogeny of global samples, and view the temporal and geographic distribution of the assigned lineage(s). - -### Nanopore: Nextclade - -
-Output files - -* `/nextclade/` - * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. - -**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). - -
- -[Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. - -### Nanopore: ASCIIGenome - -
-Output files - -* `/asciigenome//` - * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. - -**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). - -
- -As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) is a command-line genome browser that can be run from a terminal window and is solely based on ASCII characters. The closest program to ASCIIGenome is probably [samtools tview](http://www.htslib.org/doc/samtools-tview.html) but ASCIIGenome offers much more flexibility, similar to popular GUI viewers like the [IGV](https://software.broadinstitute.org/software/igv/) browser. We are using the batch processing mode of ASCIIGenome in this pipeline to generate individual screenshots for all of the variant sites reported for each sample in the VCF files. This is incredibly useful to be able to quickly QC the variants called by the pipeline without having to tediously load all of the relevant tracks into a conventional genome browser. Where possible, the BAM read alignments, VCF variant file, primer BED file and GFF annotation track will be represented in the screenshot for contextual purposes. The screenshot below shows a SNP called relative to the MN908947.3 SARS-CoV-2 reference genome that overlaps the ORF7a protein and the nCoV-2019_91_LEFT primer from the ARIC v3 protocol. - -

- ASCIIGenome screenshot -

- -## Nanopore: Workflow reporting - -### Nanopore: MultiQC - -
-Output files - -* `multiqc//` - * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - * `summary_variants_metrics_mqc.csv`: file containing a selection of read alignmnet and variant calling metrics. The same metrics will also be added to the top of the MultiQC report. - -
- -[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarizing all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. - -Results generated by MultiQC collate pipeline QC from pycoQC, samtools, mosdepth, BCFTools, SnpEff and QUAST. - -The default [`multiqc config file`](https://github.com/nf-core/viralrecon/blob/master/assets/multiqc_config_nanopore.yaml) has been written in a way in which to structure these QC metrics to make them more interpretable in the final report. - -The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . - -An example MultiQC report generated from a full-sized dataset can be viewed on the [nf-core website](https://nf-co.re/viralrecon/results). - -# Illumina: Pipeline overview - -* [Preprocessing](#illumina-preprocessing) - * [cat](#cat) - Merge re-sequenced FastQ files - * [FastQC](#fastqc) - Raw read QC - * [fastp](#fastp) - Adapter and quality trimming - * [Kraken 2](#kraken-2) - Removal/QC for host reads -* [Variant calling](#illumina-variant-calling) - * [Bowtie 2](#bowtie-2) - Read alignment relative to reference genome - * [SAMtools](#samtools) - Sort, index and generate metrics for alignments - * [iVar trim](#ivar-trim) - Primer sequence removal for amplicon data - * [picard MarkDuplicates](#picard-markduplicates) - Duplicate read marking and removal - * [picard CollectMultipleMetrics](#picard-collectmultiplemetrics) - Alignment metrics - * [mosdepth](#mosdepth) - Whole-genome and amplicon coverage metrics - * [iVar variants and iVar consensus](#ivar-variants-and-ivar-consensus) *||* [BCFTools and BEDTools](#bcftools-and-bedtools) - Variant calling and consensus sequence generation - * [SnpEff and SnpSift](#snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction - * [QUAST](#quast) - Consensus assessment report - * [Pangolin](#pangolin) - Lineage analysis - * [Nextclade](#nextclade) - Clade assignment, mutation calling and sequence quality checks - * [ASCIIGenome](#asciigenome) - Individual variant screenshots with annotation tracks - * [BCFTools isec](#bcftools-isec) - Intersect variants across all callers -* [De novo assembly](#illumina-de-novo-assembly) - * [Cutadapt](#cutadapt) - Primer trimming for amplicon data - * [SPAdes](#spades) *||* [Unicycler](#unicycler) *||* [minia](#minia) - Viral genome assembly - * [BLAST](#blast) - Blast to reference assembly - * [ABACAS](#abacas) - Order contigs according to reference genome - * [PlasmidID](#plasmidid) - Assembly report and visualisation - * [Assembly QUAST](#assembly-quast) - Assembly quality assessment -* [Workflow reporting and genomes](#illumina-workflow-reporting-and-genomes) - * [MultiQC](#multiqc) - Present QC for raw reads, alignment, assembly and variant calling - * [Reference genome files](#reference-genome-files) - Save reference genome indices/files - -## Illumina: Preprocessing - -### cat - -
-Output files - -* `fastq/` - * `*.merged.fastq.gz`: These files are not saved by default but can be via a custom config file such as the one below. - -```nextflow -params { - modules { - 'illumina_cat_fastq' { - publish_files = null - } - } -} -``` - -
- -If multiple libraries/runs have been provided for the same sample in the input samplesheet (e.g. to increase sequencing depth) then these will be merged at the very beginning of the pipeline in order to have consistent sample naming throughout the pipeline. Please refer to the [usage documentation](https://nf-co.re/viralrecon/usage#illumina-samplesheet-format) to see how to specify these samples in the input samplesheet. - -### FastQC - -
-Output files - -* `fastqc/raw/` - * `*_fastqc.html`: FastQC report containing quality metrics. - * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. - -**NB:** The FastQC plots in this directory are generated relative to the raw, input reads. They may contain adapter sequence and regions of low quality. To see how your reads look after trimming please refer to the FastQC reports in the `fastqc/trim/` directory. - -
- -[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). - -![MultiQC - FastQC per base sequence plot](images/mqc_fastqc_plot.png) - -### fastp - -
-Output files - -* `fastp/` - * `*.fastp.html`: Trimming report in html format. - * `*.fastp.json`: Trimming report in json format. -* `fastp/log/` - * `*.fastp.log`: Trimming log file. -* `fastqc/trim/` - * `*_fastqc.html`: FastQC report of the trimmed reads. - * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. - -
- -[fastp](https://github.com/OpenGene/fastp) is a tool designed to provide fast, all-in-one preprocessing for FastQ files. It has been developed in C++ with multithreading support to achieve higher performance. fastp is used in this pipeline for standard adapter trimming and quality filtering. - -![MultiQC - fastp filtered reads plot](images/mqc_fastp_plot.png) - -### Kraken 2 - -
-Output files - -* `kraken2/` - * `*.kraken2.report.txt`: Kraken 2 taxonomic report. See [here](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual#sample-report-output-format) for a detailed description of the format. - -
- -[Kraken 2](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual) is a sequence classifier that assigns taxonomic labels to DNA sequences. Kraken 2 examines the k-mers within a query sequence and uses the information within those k-mers to query a database. That database maps k-mers to the lowest common ancestor (LCA) of all genomes known to contain a given k-mer. - -We use a Kraken 2 database in this workflow to filter out reads specific to the host genome before performing the *de novo* assembly steps in the pipeline. This filtering is not performed in the variant calling arm of the pipeline by default but Kraken 2 is still run to obtain an estimate of host reads, however, the filtering can be amended via the `--kraken2_variants_host_filter` parameter. - -![MultiQC - Kraken 2 classification plot](images/mqc_kraken2_plot.png) - -## Illumina: Variant calling - -A file called `summary_variants_metrics_mqc.csv` containing a selection of read alignment and variant calling metrics will be saved in the `multiqc/` results directory. The same metrics will also be added to the top of the MultiQC report. - -### Bowtie 2 - -
-Output files - -* `variants/bowtie2/log/` - * `*.bowtie2.log`: Bowtie 2 mapping log file. - -
- -[Bowtie 2](http://bio-bwa.sourceforge.net/) is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. Bowtie 2 supports gapped, local, and paired-end alignment modes. - -![MultiQC - Bowtie2 alignment score plot](images/mqc_bowtie2_plot.png) - -### SAMtools - -
-Output files - -* `variants/bowtie2/` - * `.sorted.bam`: Coordinate sorted BAM file containing read alignment information. - * `.sorted.bam.bai`: Index file for coordinate sorted BAM file. -* `variants/bowtie2/samtools_stats/` - * SAMtools `.sorted.bam.flagstat`, `.sorted.bam.idxstats` and `.sorted.bam.stats` files generated from the alignment files. - -
- -Bowtie 2 BAM files are further processed with [SAMtools](http://samtools.sourceforge.net/) to sort them by coordinate, for indexing, as well as to generate read mapping statistics. - -![MultiQC - SAMtools alignment scores plot](images/mqc_samtools_stats_plot.png) - -### iVar trim - -
-Output files - -* `variants/bowtie2/` - * `*.ivar_trim.sorted.bam`: Coordinate sorted BAM file after primer trimming. - * `*.ivar_trim.sorted.bam.bai`: Index file for coordinate sorted BAM file after primer trimming. -* `variants/bowtie2/samtools_stats/` - * SAMtools `*.ivar_trim.sorted.bam.flagstat`, `*.ivar_trim.sorted.bam.idxstats` and `*.ivar_trim.sorted.bam.stats` files generated from the primer trimmed alignment files. -* `variants/bowtie2/log/` - * `*.ivar_trim.ivar.log`: iVar trim log file obtained from stdout. - -
- -If the `--protocol amplicon` parameter is provided then [iVar](http://gensoft.pasteur.fr/docs/ivar/1.0/manualpage.html) is used to trim amplicon primer sequences from the aligned reads. iVar uses the primer positions supplied in `--primer_bed` to soft clip primer sequences from a coordinate sorted BAM file. - -### picard MarkDuplicates - -
-Output files - -* `variants/bowtie2/` - * `*.markduplicates.sorted.bam`: Coordinate sorted BAM file after duplicate marking. - * `*.markduplicates.sorted.bam.bai`: Index file for coordinate sorted BAM file after duplicate marking. -* `variants/bowtie2/samtools_stats/` - * SAMtools `*.markduplicates.sorted.bam.flagstat`, `*.markduplicates.sorted.bam.idxstats` and `*.markduplicates.sorted.bam.stats` files generated from the duplicate marked alignment files. -* `variants/bowtie2/picard_metrics/` - * `*.markduplicates.sorted.MarkDuplicates.metrics.txt`: Metrics file from MarkDuplicates. - -
- -Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-explorer/kits-and-arrays/umi.html) it is not possible to establish whether the fragments you have sequenced from your sample were derived via true biological duplication (i.e. sequencing independent template fragments) or as a result of PCR biases introduced during the library preparation. [picard MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard-) isn't run by default because you anticipate high levels of duplication with viral data due to the size of the genome, however, you can activate it by adding `--skip_markduplicates false` to the command you use to run the pipeline. This will only *mark* the duplicate reads identified amongst the alignments to allow you to guage the overall level of duplication in your samples. You can also choose to remove any reads identified as duplicates via the `--filter_duplicates` parameter. - -![MultiQC - Picard MarkDuplicates metrics plot](images/mqc_picard_duplicates_plot.png) - -### picard CollectMultipleMetrics - -
-Output files - -* `variants/bowtie2/picard_metrics/` - * `*.CollectMultipleMetrics.*`: Alignment QC files from picard CollectMultipleMetrics in `*_metrics` textual format. -* `variants/bowtie2/picard_metrics/pdf/` - * `*.pdf` plots for metrics obtained from CollectMultipleMetrics. - -
- -[picard-tools](https://broadinstitute.github.io/picard/command-line-overview.html) is a set of command-line tools for manipulating high-throughput sequencing data. We use picard-tools in this pipeline to obtain mapping and coverage metrics. - -![MultiQC - Picard insert size plot](images/mqc_picard_insert_size_plot.png) - -### mosdepth - -
-Output files - -* `variants/bowtie2/mosdepth/genome/` - * `all_samples.mosdepth.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. - * `*.mosdepth.coverage.pdf`: Whole-genome coverage plot. - * `*.mosdepth.coverage.tsv`: File containing coverage values for the above plot. - * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. -* `variants/bowtie2/mosdepth/amplicon/` - * `all_samples.mosdepth.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. - * `all_samples.mosdepth.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. - * `*.mosdepth.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. - * `*.mosdepth.coverage.tsv`: File containing per-amplicon coverage values for the above plot. - * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. - -
- -[mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and for `--protocol amplicon` to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts. - -![R - Samples amplicon coverage heatmap ](images/r_amplicon_heatmap.png) - -![R - Sample genome-wide coverage plot](images/r_genome_coverage.png) - -

- R - Sample per-amplicon coverage plot -

- -### iVar variants and iVar consensus - -
-Output files - -* `variants/ivar/` - * `*.tsv`: Original iVar variants in TSV format. - * `*.vcf.gz`: iVar variants in VCF format. - * `*.vcf.gz.tbi`: iVar variants in VCF index file. -* `variants/ivar/consensus/` - * `*.consensus.fa`: Consensus Fasta file generated by iVar. - * `*.consensus.qual.txt`: File with the average quality of each base in the consensus sequence. -* `variants/ivar/consensus/base_qc/` - * `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. - * `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. - * `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. - * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. - * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. -* `variants/ivar/log/` - * `*.variant_counts.log`: Counts for type of variants called by iVar. -* `variants/ivar/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from iVar variants VCF file. - -
- -[iVar](https://github.com/andersen-lab/ivar/blob/master/docs/MANUAL.md) is a computational package that contains functions broadly useful for viral amplicon-based sequencing. We use iVar in this pipeline to [trim primer sequences](#ivar-trim) for amplicon input data as well as to call variants and for consensus sequence generation. - -![MultiQC - iVar variants called plot](images/mqc_ivar_variants_plot.png) - -### BCFTools and BEDTools - -
-Output files - -* `variants/bcftools/` - * `*.vcf.gz`: Variants VCF file. - * `*.vcf.gz.tbi`: Variants VCF index file. -* `variants/bcftools/consensus/` - * `*.consensus.fa`: Consensus Fasta file generated by integrating the variants called by BCFTools into the reference genome. -* `variants/bcftools/consensus/base_qc/` - * `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. - * `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. - * `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. - * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. - * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. -* `variants/bcftools/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. - -
- -[BCFtools](http://samtools.github.io/bcftools/bcftools.html) can be used to call variants directly from BAM alignment files. The functionality to call variants with BCFTools in this pipeline was inspired by work carried out by [Conor Walker](https://github.com/conorwalker/covid19/blob/3cb26ec399417bedb7e60487415c78a405f517d6/scripts/call_variants.sh). - -[BCFtools](http://samtools.github.io/bcftools/bcftools.html) is a set of utilities that manipulate variant calls in [VCF](https://vcftools.github.io/specs.html) and its binary counterpart BCF format. BCFTools is used in the variant calling and *de novo* assembly steps of this pipeline to obtain basic statistics from the VCF output. It can also used be used to generate a consensus sequence by integrating variant calls into the reference genome. - -[BEDTools](https://bedtools.readthedocs.io/en/latest/) is a swiss-army knife of tools for a wide-range of genomics analysis tasks. In this pipeline we use `bedtools genomecov` to compute the per-base mapped read coverage in bedGraph format, and `bedtools maskfasta` to mask sequences in a Fasta file based on intervals defined in a feature file. This may be useful for creating your own masked genome file based on custom annotations or for masking all but your target regions when aligning sequence data from a targeted capture experiment. - -![MultiQC - BCFTools variant counts](images/mqc_bcftools_stats_plot.png) - -### SnpEff and SnpSift - -
-Output files - -* `variants//snpeff/` - * `*.snpeff.csv`: Variant annotation csv file. - * `*.snpeff.genes.txt`: Gene table for annotated variants. - * `*.snpeff.summary.html`: Summary html file for variants. - * `*.snpeff.vcf.gz`: VCF file with variant annotations. - * `*.snpeff.vcf.gz.tbi`: Index for VCF file with variant annotations. - * `*.snpsift.txt`: SnpSift summary table. -* `variants//snpeff/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. - -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). - -
- -[SnpEff](http://snpeff.sourceforge.net/SnpEff.html) is a genetic variant annotation and functional effect prediction toolbox. It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). - -[SnpSift](http://snpeff.sourceforge.net/SnpSift.html) annotates genomic variants using databases, filters, and manipulates genomic annotated variants. After annotation with SnpEff, you can use SnpSift to help filter large genomic datasets in order to find the most significant variants. - -![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) - -### QUAST - -
-Output files - -* `variants//quast/` - * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. - -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). - -
- -[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the consensus sequence across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. - -### Pangolin - -
-Output files - -* `variants//pangolin/` - * `*.pangolin.csv`: Lineage analysis results from Pangolin. - -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). - -
- -Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://github.com/cov-lineages/pangolin)) has been used extensively during the COVID-19 pandemic in order to to assign lineages to SARS-CoV-2 genome sequenced samples. A [web application](https://pangolin.cog-uk.io/) also exists that allows users to upload genome sequences via a web browser to assign lineages to genome sequences of SARS-CoV-2, view descriptive characteristics of the assigned lineage(s), view the placement of the lineage in a phylogeny of global samples, and view the temporal and geographic distribution of the assigned lineage(s). - -### Nextclade - -
-Output files - -* `variants//nextclade/` - * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. - -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). - -
- -[Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. - -### ASCIIGenome - -
-Output files - -* `variants//asciigenome//` - * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. - -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). - -
- -As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) is a command-line genome browser that can be run from a terminal window and is solely based on ASCII characters. The closest program to ASCIIGenome is probably [samtools tview](http://www.htslib.org/doc/samtools-tview.html) but ASCIIGenome offers much more flexibility, similar to popular GUI viewers like the [IGV](https://software.broadinstitute.org/software/igv/) browser. We are using the batch processing mode of ASCIIGenome in this pipeline to generate individual screenshots for all of the variant sites reported for each sample in the VCF files. This is incredibly useful to be able to quickly QC the variants called by the pipeline without having to tediously load all of the relevant tracks into a conventional genome browser. Where possible, the BAM read alignments, VCF variant file, primer BED file and GFF annotation track will be represented in the screenshot for contextual purposes. The screenshot below shows a SNP called relative to the MN908947.3 SARS-CoV-2 reference genome that overlaps the ORF7a protein and the nCoV-2019_91_LEFT primer from the ARIC v3 protocol. - -

- ASCIIGenome screenshot -

- -### BCFTools isec - -
-Output files - -* `variants/intersect//` - * `*.vcf.gz`: VCF file containing variants common to both variant callers. There will be one file for each caller - see `README.txt` for details. - * `*.vcf.gz.tbi`: Index for VCF file. - * `README.txt`: File containing command used and file name mappings. - * `sites.txt`: List of variants common to both variant callers in textual format. The last column indicates presence (1) or absence (0) amongst the 2 different callers. - -**NB:** This process will only be executed when both variant callers are specified to be run i.e. `--callers ivar,bcftools`. - -
- -[BCFTools isec](http://samtools.github.io/bcftools/bcftools.html#isec) can be used to intersect the variant calls generated by the 2 different callers used in the pipeline. This permits a quick assessment of how consistently a particular variant is being called using different algorithms and to prioritise the investigation of the variants. - -## Illumina: De novo assembly - -A file called `summary_assembly_metrics_mqc.csv` containing a selection of read alignment and *de novo* assembly related metrics will be saved in the `multiqc/` results directory. The same metrics will also be added to the top of the MultiQC report. - -### Cutadapt - -
-Output files - -* `assembly/cutadapt/log/` - * `*.cutadapt.log`: Cutadapt log file generated from stdout. -* `assembly/cutadapt/fastqc/` - * `*_fastqc.html`: FastQC report of the trimmed reads. - * `*_fastqc.zip`: Zip archive containing the FastQC report. - -
- -In the variant calling branch of the pipeline we are using [iVar trim](#ivar-trim) to remove primer sequences from the aligned BAM files for amplicon data. Since in the *de novo* assembly branch we don't align the reads, we use [Cutadapt](https://cutadapt.readthedocs.io/en/stable/guide.html) as an alternative option to remove and clean the primer sequences directly from FastQ files. - -![MultiQC - Cutadapt filtered reads plot](images/mqc_cutadapt_plot.png) - -### SPAdes - -
-Output files - -* `assembly/spades//` - * `*.scaffolds.fa`: SPAdes scaffold assembly. - * `*.contigs.fa`: SPAdes assembly contigs. - * `*.assembly.gfa`: SPAdes assembly graph in [GFA](https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md) format. -* `assembly/spades//bandage/` - * `*.png`: Bandage visualisation for SPAdes assembly graph in PNG format. - * `*.svg`: Bandage visualisation for SPAdes assembly graph in SVG format. - -**NB:** The value of `` in the output directory name above is determined by the `--spades_mode` parameter (Default: 'rnaviral'). - -
- -[SPAdes](http://cab.spbu.ru/software/spades/) is an assembly toolkit containing various assembly pipelines. Generically speaking, SPAdes is one of the most popular de Bruijn graph-based assembly algorithms used for bacterial/viral genome reconstruction. - -[Bandage](https://rrwick.github.io/Bandage/) is a program for visualising *de novo* assembly graphs. By displaying connections which are not present in the contigs file, Bandage opens up new possibilities for analysing *de novo* assemblies. - -### Unicycler - -
-Output files - -* `assembly/unicycler/` - * `*.scaffolds.fa`: Unicycler scaffold assembly. - * `*.assembly.gfa`: Unicycler assembly graph in GFA format. -* `assembly/unicycler/bandage/` - * `*.png`: Bandage visualisation for Unicycler assembly graph in PNG format. - * `*.svg`: Bandage visualisation for Unicycler assembly graph in SVG format. - -
- -[Unicycler](https://github.com/rrwick/Unicycler) is an assembly pipeline for bacterial genomes. It can assemble Illumina-only read sets where it functions as a SPAdes-optimiser. - -### minia - -
-Output files - -* `assembly/minia/` - * `*.contigs.fa`: Minia scaffold assembly. - * `*.unitigs.fa`: Minia unitigs fasta file. - * `*.h5`: Minia h5 output file. - -
- -[Minia](https://github.com/GATB/minia) is a short-read assembler based on a de Bruijn graph, capable of assembling a human genome on a desktop computer in a day. The output of Minia is a set of contigs. Minia produces results of similar contiguity and accuracy to other de Bruijn assemblers. - -### BLAST - -
-Output files - -* `assembly//blastn/` - * `*.blastn.txt`: BLAST results against the target virus. - * `*.filter.blastn.txt`: Filtered BLAST results. - -**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). - -
- -[blastn](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch) is used to align the assembled contigs against the virus reference genome. - -### ABACAS - -
-Output files - -* `assembly//abacas/` - * `*.abacas.bin`: Bin file that contains contigs that are not used in ordering. - * `*.abacas.crunch`: Comparison file. - * `*.abacas.fasta`: Ordered and orientated sequence file. - * `*.abacas.gaps`: Gap information. - * `*.abacas.gaps.tab`: Gap information in tab-delimited format. - * `*.abacas.MULTIFASTA.fa`: A list of ordered and orientated contigs in a multi-fasta format. - * `*.abacas.tab`: Feature file - * `*.unused_contigs.out`: Information on contigs that have a mapping information but could not be used in the ordering. -* `assembly//abacas/nucmer/`: Folder containing the files generated by the NUCmer algorithm used by ABACAS. - -**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). - -
- -[ABACAS](https://www.sanger.ac.uk/science/tools/pagit) was developed to rapidly contiguate (align, order, orientate), visualize and design primers to close gaps on shotgun assembled contigs based on a reference sequence. - -### PlasmidID - -
-Output files - -* `assembly//plasmidid//` - * `*_final_results.html`: Summary file with reference coverage stats and contigs for visualization. - * `*_final_results.tab`: Summary file with reference coverage stats and contigs. - * `images/_.png`: PNG file with the visualization of the alignment between the viral assembly and the reference viral genome. - * `logs/`: Log files. - -**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). - -
- -[PlasmidID](https://github.com/BU-ISCIII/plasmidID) was used to graphically represent the alignment of the reference genome relative to a given assembly. This helps to visualize the coverage of the reference genome in the assembly. To find more information about the output files refer to the [documentation](https://github.com/BU-ISCIII/plasmidID/wiki/Understanding-the-image:-track-by-track). - -### Assembly QUAST - -
-Output files - -* `assembly//quast/` - * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. - -**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). - -
- -[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the *de novo* assemblies across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. - -![MultiQC - QUAST contig counts](images/mqc_quast_plot.png) - -## Illumina: Workflow reporting and genomes - -### MultiQC - -
-Output files - -* `multiqc/` - * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - * `summary_variants_metrics_mqc.csv`: file containing a selection of read alignment and variant calling metrics. The same metrics will also be added to the top of the MultiQC report. - * `summary_assembly_metrics_mqc.csv`: file containing a selection of read alignment and *de novo* assembly related metrics. The same metrics will also be added to the top of the MultiQC report. - -
- -[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarizing all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. - -Results generated by MultiQC collate pipeline QC from FastQC, fastp, Cutadapt, Bowtie 2, Kraken 2, samtools, picard CollectMultipleMetrics, BCFTools, SnpEff and QUAST. - -The default [`multiqc config file`](https://github.com/nf-core/viralrecon/blob/master/assets/multiqc_config_illumina.yaml) has been written in a way in which to structure these QC metrics to make them more interpretable in the final report. - -The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . - -An example MultiQC report generated from a full-sized dataset can be viewed on the [nf-core website](https://nf-co.re/viralrecon/results). - -### Reference genome files - -
-Output files - -* `genome/` - * Unzipped genome fasta file for viral genome - * Unzipped genome annotation GFF file for viral genome -* `genome/index/` - * `bowtie2/`: Bowtie 2 index for viral genome. -* `genome/db/` - * `blast_db/`: BLAST database for viral genome. - * `kraken2_db/`: Kraken 2 database for host genome. - * `snpeff_db/`: SnpEff database for viral genome. - * `snpeff.config`: SnpEff config file for viral genome. - -
- -A number of genome-specific files are generated by the pipeline because they are required for the downstream processing of the results. If the `--save_reference` parameter is provided then the Bowtie 2 alignment indices, BLAST and Kraken 2 databases downloaded/generated by the pipeline will be saved in the `genome/` directory. It is recommended to use the `--save_reference` parameter if you are using the pipeline to build a Kraken 2 database for the host genome. This can be quite a time-consuming process and it permits their reuse for future runs of the pipeline or for other purposes. - -# Pipeline information - -
-Output files - -* `pipeline_info/` - * Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - * Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.csv`. - * Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. - * Documentation for interpretation of results in HTML format: `results_description.html`. - -
- -[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. + +# Introduction + +This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. + +The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. + +# Nanopore: Pipeline overview + +* [Preprocessing](#nanopore-preprocessing) + * [pycoQC](#nanopore-pycoqc) - Sequencing QC + * [artic guppyplex](#nanopore-artic-guppyplex) - Aggregate pre-demultiplexed reads from MinKNOW/Guppy + * [NanoPlot](#nanopore-nanoplot) - Read QC +* [Variant calling](#nanopore-variant-calling) + * [artic minion](#nanopore-artic-minion) - Align reads, call variants and generate consensus sequence +* [Downstream analysis](#nanopore-downstream-analysis) + * [SAMtools](#nanopore-samtools) - Remove unmapped reads and obtain alignment metrics + * [mosdepth](#nanopore-mosdepth) - Genome-wide and amplicon coverage QC plots + * [BCFTools](#nanopore-bcftools) - Variant count metrics + * [SnpEff and SnpSift](#nanopore-snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction + * [QUAST](#nanopore-quast) - Consensus assessment report + * [Pangolin](#nanopore-pangolin) - Lineage analysis + * [Nextclade](#nanopore-nextclade) - Clade assignment, mutation calling and sequence quality checks + * [ASCIIGenome](#nanopore-asciigenome) - Individual variant screenshots with annotation tracks +* [Workflow reporting](#nanopore-workflow-reporting) + * [MultiQC](#nanopore-multiqc) - Present QC, visualisation and custom reporting for sequencing, raw reads, alignment and variant calling results + +## Nanopore: Preprocessing + +A file called `summary_variants_metrics_mqc.csv` containing a selection of read alignment and variant calling metrics will be saved in the `multiqc//` output directory which is determined by the `--artic_minion_caller` parameter (Default: `nanopolish/`). The same metrics will also be added to the top of the MultiQC report. + +### Nanopore: pycoQC + +
+Output files + +* `pycoqc/` + * `*.html` and `.json` file that includes a run summary and graphical representation of various QC metrics including distribution of read length, distribution of read quality scores, mean read quality per sequence length, output per channel over experiment time and percentage of reads per barcode. + +
+ +[PycoQC](https://github.com/a-slide/pycoQC) compute metrics and generate QC plots using the sequencing summary information generated by basecalling/demultiplexing tools such as Guppy e.g. distribution of read length, read length over time, number of reads per barcode and other general stats. + +

+ PycoQC - Number of reads per barcode +

+ +### Nanopore: artic guppyplex + +
+Output files + +* `guppyplex/` + * `*.fastq.gz` files generated by aggregate pre-demultiplexed reads from MinKNOW/Guppy. These files are not saved by default but can be via a custom config file such as the one below. + +```nextflow +params { + modules { + 'nanopore_artic_guppyplex' { + publish_files = ['fastq.gz':''] + } + } +} +``` + +
+ +The [artic guppyplex](https://artic.readthedocs.io/en/latest/commands/) tool from the [ARTIC field bioinformatics pipeline](https://github.com/artic-network/fieldbioinformatics) is used to perform length filtering of the demultiplexed Nanopore reads obtained per barcode. This essentially filters out chimeric reads that may be generated by the ARTIC protocol. The pipeline uses a default minimum and maximum read length of 400 and 700, respectively as tailored for the [nCoV-2019 primer set](https://artic.network/ncov-2019/ncov2019-bioinformatics-sop.html). However, you may need to adjust these for different primer schemes e.g. by using the minimum length of the amplicons (`--min-length`) as well as the maximum length plus 200 (`--max-length`). + +### Nanopore: NanoPlot + +
+Output files + +* `nanoplot//` + * Per-sample `*.html` files for QC metrics and individual `*.png` image files for plots. + +
+ +[NanoPlot](https://github.com/wdecoster/NanoPlot) it a tool that can be used to produce general quality metrics from various Nanopore-based input files including fastq files e.g. quality score distribution, read lengths and other general stats. + +

+ Nanoplot - Read quality vs read length +

+ +## Nanopore: Variant calling + +### Nanopore: artic minion + +
+Output files + +* `/` + * `*.consensus.fasta`: Consensus fasta file generated by artic minion. + * `*.pass.vcf.gz`: VCF file containing variants passing quality filters. + * `*.pass.vcf.gz.tbi`: VCF index file containing variants passing quality filters. + * `*.primers.vcf`: VCF file containing variants found in primer-binding regions. + * `*.merged.vcf`: VCF file containing all detected variants. + * `*.fail.vcf`: VCF file containing variants failing quality filters. + * `*.sorted.bam`: BAM file generated by initial alignment. + * `*.sorted.bam.bai`: BAM index file generated by initial alignment. + * `*.trimmed.rg.sorted.bam`: BAM file without primer-binding site trimming. + * `*.trimmed.rg.sorted.bam.bai`: BAM index file without primer-binding site trimming. + * `*.primertrimmed.rg.sorted.bam`: BAM file generated after primer-binding site trimming. + * `*.primertrimmed.rg.sorted.bam.bai`: BAM index file generated after primer-binding site trimming. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +The [artic minion](https://artic.readthedocs.io/en/latest/commands/) tool from the [ARTIC field bioinformatics pipeline](https://github.com/artic-network/fieldbioinformatics) is used to align reads, call variants and to generate the consensus sequence. By default, artic minion uses [Minimap2](https://github.com/lh3/minimap2) to align the reads to the viral genome, however you can use [BWA](https://github.com/lh3/bwa) instead using the `--artic_minion_aligner bwa` parameter. Similarly, the default variant caller used by artic minion is [Nanopolish](https://github.com/jts/nanopolish), however, you can use [Medaka](https://github.com/nanoporetech/medaka) instead via the `--artic_minion_caller medaka` parameter. Medaka is faster than Nanopolish, performs mostly the same and can be run directly from `fastq` input files as opposed to requiring the `fastq`, `fast5` and `sequencing_summary.txt` files required to run Nanopolish. You must provide the appropriate [Medaka model](https://github.com/nanoporetech/medaka#models) via the `--artic_minion_medaka_model` parameter if using `--artic_minion_caller medaka`. + +## Nanopore: Downstream analysis + +### Nanopore: SAMtools + +
+Output files + +* `/` + * `*.mapped.sorted.bam`: Coordinate sorted BAM file containing read alignment information. + * `*.mapped.sorted.bam.bai`: Index file for coordinate sorted BAM file. +* `/samtools_stats/` + * SAMtools `*.mapped.sorted.bam.flagstat`, `*.mapped.sorted.bam.idxstats` and `*.mapped.sorted.bam.stats` files generated from the alignment files. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +BAM files containing the original alignments from either Minimap2 or BWA are further processed with [SAMtools](http://samtools.sourceforge.net/) to remove unmapped reads as well as to generate read mapping statistics. + +![MultiQC - SAMtools alignment scores plot](images/mqc_samtools_stats_plot.png) + +### Nanopore: mosdepth + +
+Output files + +* `/mosdepth/genome/` + * `all_samples.mosdepth.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. + * `*.mosdepth.coverage.pdf`: Whole-genome coverage plot. + * `*.mosdepth.coverage.tsv`: File containing coverage values for the above plot. + * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. +* `/mosdepth/amplicon/` + * `all_samples.mosdepth.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. + * `all_samples.mosdepth.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. + * `*.mosdepth.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. + * `*.mosdepth.coverage.tsv`: File containing per-amplicon coverage values for the above plot. + * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +[mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts. + +![R - Samples amplicon coverage heatmap ](images/r_amplicon_heatmap.png) + +![R - Sample genome-wide coverage plot](images/r_genome_coverage.png) + +

+ R - Sample per-amplicon coverage plot +

+ +### Nanopore: BCFTools + +
+Output files + +* `/bcftools_stats/` + * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +[BCFtools](http://samtools.github.io/bcftools/bcftools.html) is a set of utilities that manipulate variant calls in [VCF](https://vcftools.github.io/specs.html) and its binary counterpart BCF format. It can also used be used to generate statistics and counts obtained from VCF files as used here. + +![MultiQC - BCFTools variant counts](images/mqc_bcftools_stats_plot.png) + +### Nanopore: SnpEff and SnpSift + +
+Output files + +* `/snpeff/` + * `*.snpeff.csv`: Variant annotation csv file. + * `*.snpeff.genes.txt`: Gene table for annotated variants. + * `*.snpeff.summary.html`: Summary html file for variants. + * `*.snpeff.vcf.gz`: VCF file with variant annotations. + * `*.snpeff.vcf.gz.tbi`: Index for VCF file with variant annotations. + * `*.snpsift.txt`: SnpSift summary table. +* `/snpeff/bcftools_stats/` + * `*.snpeff.bcftools_stats.txt`: Statistics and counts obtained from SnpEff VCF file. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +[SnpEff](http://snpeff.sourceforge.net/SnpEff.html) is a genetic variant annotation and functional effect prediction toolbox. It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). + +[SnpSift](http://snpeff.sourceforge.net/SnpSift.html) annotates genomic variants using databases, filters, and manipulates genomic annotated variants. After annotation with SnpEff, you can use SnpSift to help filter large genomic datasets in order to find the most significant variants. + +![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) + +### Nanopore: QUAST + +
+Output files + +* `/quast/` + * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the consensus sequence across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. + +### Nanopore: Pangolin + +
+Output files + +* `/pangolin/` + * `*.pangolin.csv`: Lineage analysis results from Pangolin. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://github.com/cov-lineages/pangolin)) has been used extensively during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. A [web application](https://pangolin.cog-uk.io/) also exists that allows users to upload genome sequences via a web browser to assign lineages to genome sequences of SARS-CoV-2, view descriptive characteristics of the assigned lineage(s), view the placement of the lineage in a phylogeny of global samples, and view the temporal and geographic distribution of the assigned lineage(s). + +### Nanopore: Nextclade + +
+Output files + +* `/nextclade/` + * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +[Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. + +### Nanopore: ASCIIGenome + +
+Output files + +* `/asciigenome//` + * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) is a command-line genome browser that can be run from a terminal window and is solely based on ASCII characters. The closest program to ASCIIGenome is probably [samtools tview](http://www.htslib.org/doc/samtools-tview.html) but ASCIIGenome offers much more flexibility, similar to popular GUI viewers like the [IGV](https://software.broadinstitute.org/software/igv/) browser. We are using the batch processing mode of ASCIIGenome in this pipeline to generate individual screenshots for all of the variant sites reported for each sample in the VCF files. This is incredibly useful to be able to quickly QC the variants called by the pipeline without having to tediously load all of the relevant tracks into a conventional genome browser. Where possible, the BAM read alignments, VCF variant file, primer BED file and GFF annotation track will be represented in the screenshot for contextual purposes. The screenshot below shows a SNP called relative to the MN908947.3 SARS-CoV-2 reference genome that overlaps the ORF7a protein and the nCoV-2019_91_LEFT primer from the ARIC v3 protocol. + +

+ ASCIIGenome screenshot +

+ +## Nanopore: Workflow reporting + +### Nanopore: MultiQC + +
+Output files + +* `multiqc//` + * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + * `summary_variants_metrics_mqc.csv`: file containing a selection of read alignmnet and variant calling metrics. The same metrics will also be added to the top of the MultiQC report. + +
+ +[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarizing all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. + +Results generated by MultiQC collate pipeline QC from pycoQC, samtools, mosdepth, BCFTools, SnpEff and QUAST. + +The default [`multiqc config file`](https://github.com/nf-core/viralrecon/blob/master/assets/multiqc_config_nanopore.yaml) has been written in a way in which to structure these QC metrics to make them more interpretable in the final report. + +The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . + +An example MultiQC report generated from a full-sized dataset can be viewed on the [nf-core website](https://nf-co.re/viralrecon/results). + +# Illumina: Pipeline overview + +* [Preprocessing](#illumina-preprocessing) + * [cat](#cat) - Merge re-sequenced FastQ files + * [FastQC](#fastqc) - Raw read QC + * [fastp](#fastp) - Adapter and quality trimming + * [Kraken 2](#kraken-2) - Removal/QC for host reads +* [Variant calling](#illumina-variant-calling) + * [Bowtie 2](#bowtie-2) - Read alignment relative to reference genome + * [SAMtools](#samtools) - Sort, index and generate metrics for alignments + * [iVar trim](#ivar-trim) - Primer sequence removal for amplicon data + * [picard MarkDuplicates](#picard-markduplicates) - Duplicate read marking and removal + * [picard CollectMultipleMetrics](#picard-collectmultiplemetrics) - Alignment metrics + * [mosdepth](#mosdepth) - Whole-genome and amplicon coverage metrics + * [iVar variants and iVar consensus](#ivar-variants-and-ivar-consensus) *||* [BCFTools and BEDTools](#bcftools-and-bedtools) - Variant calling and consensus sequence generation + * [SnpEff and SnpSift](#snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction + * [QUAST](#quast) - Consensus assessment report + * [Pangolin](#pangolin) - Lineage analysis + * [Nextclade](#nextclade) - Clade assignment, mutation calling and sequence quality checks + * [ASCIIGenome](#asciigenome) - Individual variant screenshots with annotation tracks + * [BCFTools isec](#bcftools-isec) - Intersect variants across all callers +* [De novo assembly](#illumina-de-novo-assembly) + * [Cutadapt](#cutadapt) - Primer trimming for amplicon data + * [SPAdes](#spades) *||* [Unicycler](#unicycler) *||* [minia](#minia) - Viral genome assembly + * [BLAST](#blast) - Blast to reference assembly + * [ABACAS](#abacas) - Order contigs according to reference genome + * [PlasmidID](#plasmidid) - Assembly report and visualisation + * [Assembly QUAST](#assembly-quast) - Assembly quality assessment +* [Workflow reporting and genomes](#illumina-workflow-reporting-and-genomes) + * [MultiQC](#multiqc) - Present QC for raw reads, alignment, assembly and variant calling + * [Reference genome files](#reference-genome-files) - Save reference genome indices/files + +## Illumina: Preprocessing + +### cat + +
+Output files + +* `fastq/` + * `*.merged.fastq.gz`: These files are not saved by default but can be via a custom config file such as the one below. + +```nextflow +params { + modules { + 'illumina_cat_fastq' { + publish_files = null + } + } +} +``` + +
+ +If multiple libraries/runs have been provided for the same sample in the input samplesheet (e.g. to increase sequencing depth) then these will be merged at the very beginning of the pipeline in order to have consistent sample naming throughout the pipeline. Please refer to the [usage documentation](https://nf-co.re/viralrecon/usage#illumina-samplesheet-format) to see how to specify these samples in the input samplesheet. + +### FastQC + +
+Output files + +* `fastqc/raw/` + * `*_fastqc.html`: FastQC report containing quality metrics. + * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + +**NB:** The FastQC plots in this directory are generated relative to the raw, input reads. They may contain adapter sequence and regions of low quality. To see how your reads look after trimming please refer to the FastQC reports in the `fastqc/trim/` directory. + +
+ +[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). + +![MultiQC - FastQC per base sequence plot](images/mqc_fastqc_plot.png) + +### fastp + +
+Output files + +* `fastp/` + * `*.fastp.html`: Trimming report in html format. + * `*.fastp.json`: Trimming report in json format. +* `fastp/log/` + * `*.fastp.log`: Trimming log file. +* `fastqc/trim/` + * `*_fastqc.html`: FastQC report of the trimmed reads. + * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + +
+ +[fastp](https://github.com/OpenGene/fastp) is a tool designed to provide fast, all-in-one preprocessing for FastQ files. It has been developed in C++ with multithreading support to achieve higher performance. fastp is used in this pipeline for standard adapter trimming and quality filtering. + +![MultiQC - fastp filtered reads plot](images/mqc_fastp_plot.png) + +### Kraken 2 + +
+Output files + +* `kraken2/` + * `*.kraken2.report.txt`: Kraken 2 taxonomic report. See [here](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual#sample-report-output-format) for a detailed description of the format. + +
+ +[Kraken 2](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual) is a sequence classifier that assigns taxonomic labels to DNA sequences. Kraken 2 examines the k-mers within a query sequence and uses the information within those k-mers to query a database. That database maps k-mers to the lowest common ancestor (LCA) of all genomes known to contain a given k-mer. + +We use a Kraken 2 database in this workflow to filter out reads specific to the host genome before performing the *de novo* assembly steps in the pipeline. This filtering is not performed in the variant calling arm of the pipeline by default but Kraken 2 is still run to obtain an estimate of host reads, however, the filtering can be amended via the `--kraken2_variants_host_filter` parameter. + +![MultiQC - Kraken 2 classification plot](images/mqc_kraken2_plot.png) + +## Illumina: Variant calling + +A file called `summary_variants_metrics_mqc.csv` containing a selection of read alignment and variant calling metrics will be saved in the `multiqc/` results directory. The same metrics will also be added to the top of the MultiQC report. + +### Bowtie 2 + +
+Output files + +* `variants/bowtie2/log/` + * `*.bowtie2.log`: Bowtie 2 mapping log file. + +
+ +[Bowtie 2](http://bio-bwa.sourceforge.net/) is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. Bowtie 2 supports gapped, local, and paired-end alignment modes. + +![MultiQC - Bowtie2 alignment score plot](images/mqc_bowtie2_plot.png) + +### SAMtools + +
+Output files + +* `variants/bowtie2/` + * `.sorted.bam`: Coordinate sorted BAM file containing read alignment information. + * `.sorted.bam.bai`: Index file for coordinate sorted BAM file. +* `variants/bowtie2/samtools_stats/` + * SAMtools `.sorted.bam.flagstat`, `.sorted.bam.idxstats` and `.sorted.bam.stats` files generated from the alignment files. + +
+ +Bowtie 2 BAM files are further processed with [SAMtools](http://samtools.sourceforge.net/) to sort them by coordinate, for indexing, as well as to generate read mapping statistics. + +![MultiQC - SAMtools alignment scores plot](images/mqc_samtools_stats_plot.png) + +### iVar trim + +
+Output files + +* `variants/bowtie2/` + * `*.ivar_trim.sorted.bam`: Coordinate sorted BAM file after primer trimming. + * `*.ivar_trim.sorted.bam.bai`: Index file for coordinate sorted BAM file after primer trimming. +* `variants/bowtie2/samtools_stats/` + * SAMtools `*.ivar_trim.sorted.bam.flagstat`, `*.ivar_trim.sorted.bam.idxstats` and `*.ivar_trim.sorted.bam.stats` files generated from the primer trimmed alignment files. +* `variants/bowtie2/log/` + * `*.ivar_trim.ivar.log`: iVar trim log file obtained from stdout. + +
+ +If the `--protocol amplicon` parameter is provided then [iVar](http://gensoft.pasteur.fr/docs/ivar/1.0/manualpage.html) is used to trim amplicon primer sequences from the aligned reads. iVar uses the primer positions supplied in `--primer_bed` to soft clip primer sequences from a coordinate sorted BAM file. + +### picard MarkDuplicates + +
+Output files + +* `variants/bowtie2/` + * `*.markduplicates.sorted.bam`: Coordinate sorted BAM file after duplicate marking. + * `*.markduplicates.sorted.bam.bai`: Index file for coordinate sorted BAM file after duplicate marking. +* `variants/bowtie2/samtools_stats/` + * SAMtools `*.markduplicates.sorted.bam.flagstat`, `*.markduplicates.sorted.bam.idxstats` and `*.markduplicates.sorted.bam.stats` files generated from the duplicate marked alignment files. +* `variants/bowtie2/picard_metrics/` + * `*.markduplicates.sorted.MarkDuplicates.metrics.txt`: Metrics file from MarkDuplicates. + +
+ +Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-explorer/kits-and-arrays/umi.html) it is not possible to establish whether the fragments you have sequenced from your sample were derived via true biological duplication (i.e. sequencing independent template fragments) or as a result of PCR biases introduced during the library preparation. [picard MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard-) isn't run by default because you anticipate high levels of duplication with viral data due to the size of the genome, however, you can activate it by adding `--skip_markduplicates false` to the command you use to run the pipeline. This will only *mark* the duplicate reads identified amongst the alignments to allow you to guage the overall level of duplication in your samples. You can also choose to remove any reads identified as duplicates via the `--filter_duplicates` parameter. + +![MultiQC - Picard MarkDuplicates metrics plot](images/mqc_picard_duplicates_plot.png) + +### picard CollectMultipleMetrics + +
+Output files + +* `variants/bowtie2/picard_metrics/` + * `*.CollectMultipleMetrics.*`: Alignment QC files from picard CollectMultipleMetrics in `*_metrics` textual format. +* `variants/bowtie2/picard_metrics/pdf/` + * `*.pdf` plots for metrics obtained from CollectMultipleMetrics. + +
+ +[picard-tools](https://broadinstitute.github.io/picard/command-line-overview.html) is a set of command-line tools for manipulating high-throughput sequencing data. We use picard-tools in this pipeline to obtain mapping and coverage metrics. + +![MultiQC - Picard insert size plot](images/mqc_picard_insert_size_plot.png) + +### mosdepth + +
+Output files + +* `variants/bowtie2/mosdepth/genome/` + * `all_samples.mosdepth.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. + * `*.mosdepth.coverage.pdf`: Whole-genome coverage plot. + * `*.mosdepth.coverage.tsv`: File containing coverage values for the above plot. + * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. +* `variants/bowtie2/mosdepth/amplicon/` + * `all_samples.mosdepth.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. + * `all_samples.mosdepth.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. + * `*.mosdepth.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. + * `*.mosdepth.coverage.tsv`: File containing per-amplicon coverage values for the above plot. + * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. + +
+ +[mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and for `--protocol amplicon` to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts. + +![R - Samples amplicon coverage heatmap ](images/r_amplicon_heatmap.png) + +![R - Sample genome-wide coverage plot](images/r_genome_coverage.png) + +

+ R - Sample per-amplicon coverage plot +

+ +### iVar variants and iVar consensus + +
+Output files + +* `variants/ivar/` + * `*.tsv`: Original iVar variants in TSV format. + * `*.vcf.gz`: iVar variants in VCF format. + * `*.vcf.gz.tbi`: iVar variants in VCF index file. +* `variants/ivar/consensus/` + * `*.consensus.fa`: Consensus Fasta file generated by iVar. + * `*.consensus.qual.txt`: File with the average quality of each base in the consensus sequence. +* `variants/ivar/consensus/base_qc/` + * `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. + * `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. + * `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. + * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. + * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. +* `variants/ivar/log/` + * `*.variant_counts.log`: Counts for type of variants called by iVar. +* `variants/ivar/bcftools_stats/` + * `*.bcftools_stats.txt`: Statistics and counts obtained from iVar variants VCF file. + +
+ +[iVar](https://github.com/andersen-lab/ivar/blob/master/docs/MANUAL.md) is a computational package that contains functions broadly useful for viral amplicon-based sequencing. We use iVar in this pipeline to [trim primer sequences](#ivar-trim) for amplicon input data as well as to call variants and for consensus sequence generation. + +![MultiQC - iVar variants called plot](images/mqc_ivar_variants_plot.png) + +### BCFTools and BEDTools + +
+Output files + +* `variants/bcftools/` + * `*.vcf.gz`: Variants VCF file. + * `*.vcf.gz.tbi`: Variants VCF index file. +* `variants/bcftools/consensus/` + * `*.consensus.fa`: Consensus Fasta file generated by integrating the variants called by BCFTools into the reference genome. +* `variants/bcftools/consensus/base_qc/` + * `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. + * `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. + * `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. + * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. + * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. +* `variants/bcftools/bcftools_stats/` + * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. + +
+ +[BCFtools](http://samtools.github.io/bcftools/bcftools.html) can be used to call variants directly from BAM alignment files. The functionality to call variants with BCFTools in this pipeline was inspired by work carried out by [Conor Walker](https://github.com/conorwalker/covid19/blob/3cb26ec399417bedb7e60487415c78a405f517d6/scripts/call_variants.sh). + +[BCFtools](http://samtools.github.io/bcftools/bcftools.html) is a set of utilities that manipulate variant calls in [VCF](https://vcftools.github.io/specs.html) and its binary counterpart BCF format. BCFTools is used in the variant calling and *de novo* assembly steps of this pipeline to obtain basic statistics from the VCF output. It can also used be used to generate a consensus sequence by integrating variant calls into the reference genome. + +[BEDTools](https://bedtools.readthedocs.io/en/latest/) is a swiss-army knife of tools for a wide-range of genomics analysis tasks. In this pipeline we use `bedtools genomecov` to compute the per-base mapped read coverage in bedGraph format, and `bedtools maskfasta` to mask sequences in a Fasta file based on intervals defined in a feature file. This may be useful for creating your own masked genome file based on custom annotations or for masking all but your target regions when aligning sequence data from a targeted capture experiment. + +![MultiQC - BCFTools variant counts](images/mqc_bcftools_stats_plot.png) + +### SnpEff and SnpSift + +
+Output files + +* `variants//snpeff/` + * `*.snpeff.csv`: Variant annotation csv file. + * `*.snpeff.genes.txt`: Gene table for annotated variants. + * `*.snpeff.summary.html`: Summary html file for variants. + * `*.snpeff.vcf.gz`: VCF file with variant annotations. + * `*.snpeff.vcf.gz.tbi`: Index for VCF file with variant annotations. + * `*.snpsift.txt`: SnpSift summary table. +* `variants//snpeff/bcftools_stats/` + * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. + +**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). + +
+ +[SnpEff](http://snpeff.sourceforge.net/SnpEff.html) is a genetic variant annotation and functional effect prediction toolbox. It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). + +[SnpSift](http://snpeff.sourceforge.net/SnpSift.html) annotates genomic variants using databases, filters, and manipulates genomic annotated variants. After annotation with SnpEff, you can use SnpSift to help filter large genomic datasets in order to find the most significant variants. + +![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) + +### QUAST + +
+Output files + +* `variants//quast/` + * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. + +**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). + +
+ +[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the consensus sequence across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. + +### Pangolin + +
+Output files + +* `variants//pangolin/` + * `*.pangolin.csv`: Lineage analysis results from Pangolin. + +**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). + +
+ +Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://github.com/cov-lineages/pangolin)) has been used extensively during the COVID-19 pandemic in order to to assign lineages to SARS-CoV-2 genome sequenced samples. A [web application](https://pangolin.cog-uk.io/) also exists that allows users to upload genome sequences via a web browser to assign lineages to genome sequences of SARS-CoV-2, view descriptive characteristics of the assigned lineage(s), view the placement of the lineage in a phylogeny of global samples, and view the temporal and geographic distribution of the assigned lineage(s). + +### Nextclade + +
+Output files + +* `variants//nextclade/` + * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. + +**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). + +
+ +[Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. + +### ASCIIGenome + +
+Output files + +* `variants//asciigenome//` + * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. + +**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). + +
+ +As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) is a command-line genome browser that can be run from a terminal window and is solely based on ASCII characters. The closest program to ASCIIGenome is probably [samtools tview](http://www.htslib.org/doc/samtools-tview.html) but ASCIIGenome offers much more flexibility, similar to popular GUI viewers like the [IGV](https://software.broadinstitute.org/software/igv/) browser. We are using the batch processing mode of ASCIIGenome in this pipeline to generate individual screenshots for all of the variant sites reported for each sample in the VCF files. This is incredibly useful to be able to quickly QC the variants called by the pipeline without having to tediously load all of the relevant tracks into a conventional genome browser. Where possible, the BAM read alignments, VCF variant file, primer BED file and GFF annotation track will be represented in the screenshot for contextual purposes. The screenshot below shows a SNP called relative to the MN908947.3 SARS-CoV-2 reference genome that overlaps the ORF7a protein and the nCoV-2019_91_LEFT primer from the ARIC v3 protocol. + +

+ ASCIIGenome screenshot +

+ +### BCFTools isec + +
+Output files + +* `variants/intersect//` + * `*.vcf.gz`: VCF file containing variants common to both variant callers. There will be one file for each caller - see `README.txt` for details. + * `*.vcf.gz.tbi`: Index for VCF file. + * `README.txt`: File containing command used and file name mappings. + * `sites.txt`: List of variants common to both variant callers in textual format. The last column indicates presence (1) or absence (0) amongst the 2 different callers. + +**NB:** This process will only be executed when both variant callers are specified to be run i.e. `--callers ivar,bcftools`. + +
+ +[BCFTools isec](http://samtools.github.io/bcftools/bcftools.html#isec) can be used to intersect the variant calls generated by the 2 different callers used in the pipeline. This permits a quick assessment of how consistently a particular variant is being called using different algorithms and to prioritise the investigation of the variants. + +## Illumina: De novo assembly + +A file called `summary_assembly_metrics_mqc.csv` containing a selection of read alignment and *de novo* assembly related metrics will be saved in the `multiqc/` results directory. The same metrics will also be added to the top of the MultiQC report. + +### Cutadapt + +
+Output files + +* `assembly/cutadapt/log/` + * `*.cutadapt.log`: Cutadapt log file generated from stdout. +* `assembly/cutadapt/fastqc/` + * `*_fastqc.html`: FastQC report of the trimmed reads. + * `*_fastqc.zip`: Zip archive containing the FastQC report. + +
+ +In the variant calling branch of the pipeline we are using [iVar trim](#ivar-trim) to remove primer sequences from the aligned BAM files for amplicon data. Since in the *de novo* assembly branch we don't align the reads, we use [Cutadapt](https://cutadapt.readthedocs.io/en/stable/guide.html) as an alternative option to remove and clean the primer sequences directly from FastQ files. + +![MultiQC - Cutadapt filtered reads plot](images/mqc_cutadapt_plot.png) + +### SPAdes + +
+Output files + +* `assembly/spades//` + * `*.scaffolds.fa`: SPAdes scaffold assembly. + * `*.contigs.fa`: SPAdes assembly contigs. + * `*.assembly.gfa`: SPAdes assembly graph in [GFA](https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md) format. +* `assembly/spades//bandage/` + * `*.png`: Bandage visualisation for SPAdes assembly graph in PNG format. + * `*.svg`: Bandage visualisation for SPAdes assembly graph in SVG format. + +**NB:** The value of `` in the output directory name above is determined by the `--spades_mode` parameter (Default: 'rnaviral'). + +
+ +[SPAdes](http://cab.spbu.ru/software/spades/) is an assembly toolkit containing various assembly pipelines. Generically speaking, SPAdes is one of the most popular de Bruijn graph-based assembly algorithms used for bacterial/viral genome reconstruction. + +[Bandage](https://rrwick.github.io/Bandage/) is a program for visualising *de novo* assembly graphs. By displaying connections which are not present in the contigs file, Bandage opens up new possibilities for analysing *de novo* assemblies. + +### Unicycler + +
+Output files + +* `assembly/unicycler/` + * `*.scaffolds.fa`: Unicycler scaffold assembly. + * `*.assembly.gfa`: Unicycler assembly graph in GFA format. +* `assembly/unicycler/bandage/` + * `*.png`: Bandage visualisation for Unicycler assembly graph in PNG format. + * `*.svg`: Bandage visualisation for Unicycler assembly graph in SVG format. + +
+ +[Unicycler](https://github.com/rrwick/Unicycler) is an assembly pipeline for bacterial genomes. It can assemble Illumina-only read sets where it functions as a SPAdes-optimiser. + +### minia + +
+Output files + +* `assembly/minia/` + * `*.contigs.fa`: Minia scaffold assembly. + * `*.unitigs.fa`: Minia unitigs fasta file. + * `*.h5`: Minia h5 output file. + +
+ +[Minia](https://github.com/GATB/minia) is a short-read assembler based on a de Bruijn graph, capable of assembling a human genome on a desktop computer in a day. The output of Minia is a set of contigs. Minia produces results of similar contiguity and accuracy to other de Bruijn assemblers. + +### BLAST + +
+Output files + +* `assembly//blastn/` + * `*.blastn.txt`: BLAST results against the target virus. + * `*.filter.blastn.txt`: Filtered BLAST results. + +**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). + +
+ +[blastn](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch) is used to align the assembled contigs against the virus reference genome. + +### ABACAS + +
+Output files + +* `assembly//abacas/` + * `*.abacas.bin`: Bin file that contains contigs that are not used in ordering. + * `*.abacas.crunch`: Comparison file. + * `*.abacas.fasta`: Ordered and orientated sequence file. + * `*.abacas.gaps`: Gap information. + * `*.abacas.gaps.tab`: Gap information in tab-delimited format. + * `*.abacas.MULTIFASTA.fa`: A list of ordered and orientated contigs in a multi-fasta format. + * `*.abacas.tab`: Feature file + * `*.unused_contigs.out`: Information on contigs that have a mapping information but could not be used in the ordering. +* `assembly//abacas/nucmer/`: Folder containing the files generated by the NUCmer algorithm used by ABACAS. + +**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). + +
+ +[ABACAS](https://www.sanger.ac.uk/science/tools/pagit) was developed to rapidly contiguate (align, order, orientate), visualize and design primers to close gaps on shotgun assembled contigs based on a reference sequence. + +### PlasmidID + +
+Output files + +* `assembly//plasmidid//` + * `*_final_results.html`: Summary file with reference coverage stats and contigs for visualization. + * `*_final_results.tab`: Summary file with reference coverage stats and contigs. + * `images/_.png`: PNG file with the visualization of the alignment between the viral assembly and the reference viral genome. + * `logs/`: Log files. + +**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). + +
+ +[PlasmidID](https://github.com/BU-ISCIII/plasmidID) was used to graphically represent the alignment of the reference genome relative to a given assembly. This helps to visualize the coverage of the reference genome in the assembly. To find more information about the output files refer to the [documentation](https://github.com/BU-ISCIII/plasmidID/wiki/Understanding-the-image:-track-by-track). + +### Assembly QUAST + +
+Output files + +* `assembly//quast/` + * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. + +**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). + +
+ +[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the *de novo* assemblies across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. + +![MultiQC - QUAST contig counts](images/mqc_quast_plot.png) + +## Illumina: Workflow reporting and genomes + +### MultiQC + +
+Output files + +* `multiqc/` + * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + * `summary_variants_metrics_mqc.csv`: file containing a selection of read alignment and variant calling metrics. The same metrics will also be added to the top of the MultiQC report. + * `summary_assembly_metrics_mqc.csv`: file containing a selection of read alignment and *de novo* assembly related metrics. The same metrics will also be added to the top of the MultiQC report. + +
+ +[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarizing all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. + +Results generated by MultiQC collate pipeline QC from FastQC, fastp, Cutadapt, Bowtie 2, Kraken 2, samtools, picard CollectMultipleMetrics, BCFTools, SnpEff and QUAST. + +The default [`multiqc config file`](https://github.com/nf-core/viralrecon/blob/master/assets/multiqc_config_illumina.yaml) has been written in a way in which to structure these QC metrics to make them more interpretable in the final report. + +The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . + +An example MultiQC report generated from a full-sized dataset can be viewed on the [nf-core website](https://nf-co.re/viralrecon/results). + +### Reference genome files + +
+Output files + +* `genome/` + * Unzipped genome fasta file for viral genome + * Unzipped genome annotation GFF file for viral genome +* `genome/index/` + * `bowtie2/`: Bowtie 2 index for viral genome. +* `genome/db/` + * `blast_db/`: BLAST database for viral genome. + * `kraken2_db/`: Kraken 2 database for host genome. + * `snpeff_db/`: SnpEff database for viral genome. + * `snpeff.config`: SnpEff config file for viral genome. + +
+ +A number of genome-specific files are generated by the pipeline because they are required for the downstream processing of the results. If the `--save_reference` parameter is provided then the Bowtie 2 alignment indices, BLAST and Kraken 2 databases downloaded/generated by the pipeline will be saved in the `genome/` directory. It is recommended to use the `--save_reference` parameter if you are using the pipeline to build a Kraken 2 database for the host genome. This can be quite a time-consuming process and it permits their reuse for future runs of the pipeline or for other purposes. + +# Pipeline information + +
+Output files + +* `pipeline_info/` + * Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + * Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.csv`. + * Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + * Documentation for interpretation of results in HTML format: `results_description.html`. + +
+ +[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/docs/usage.md b/docs/usage.md index d45a2384..7b1657c8 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -139,20 +139,20 @@ They are loaded in sequence, so later profiles can overwrite earlier profiles. If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. * `docker` - * A generic configuration profile to be used with [Docker](https://docker.com/) + * A generic configuration profile to be used with [Docker](https://docker.com/) * `singularity` - * A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) + * A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) * `podman` - * A generic configuration profile to be used with [Podman](https://podman.io/) + * A generic configuration profile to be used with [Podman](https://podman.io/) * `shifter` - * A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) + * A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) * `charliecloud` - * A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) + * A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) * `conda` - * A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. + * A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. * `test` - * A profile with a complete configuration for automated testing - * Includes links to test data so needs no other parameters + * A profile with a complete configuration for automated testing + * Includes links to test data so needs no other parameters ### `-resume` @@ -177,26 +177,26 @@ For example, if the nf-core/rnaseq pipeline is failing after multiple re-submiss Error executing process > 'RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' Caused by: - Process `RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) + Process `RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) Command executed: - STAR \ - --genomeDir star \ - --readFilesIn WT_REP1_trimmed.fq.gz \ - --runThreadN 2 \ - --outFileNamePrefix WT_REP1. \ - + STAR \ + --genomeDir star \ + --readFilesIn WT_REP1_trimmed.fq.gz \ + --runThreadN 2 \ + --outFileNamePrefix WT_REP1. \ + Command exit status: - 137 + 137 Command output: - (empty) + (empty) Command error: - .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. + .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. Work dir: - /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb + /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` ``` @@ -205,9 +205,9 @@ To bypass this error you would need to find exactly which resources are set by t ```nextflow process { - withName: STAR_ALIGN { - memory = 100.GB - } + withName: STAR_ALIGN { + memory = 100.GB + } } ``` @@ -240,11 +240,11 @@ As you will see in the example below, we have: ```nextflow params { modules { - 'star_align' { - args = "--quantMode TranscriptomeSAM --twopassMode Basic --outSAMtype BAM Unsorted --readFilesCommand zcat --runRNGseed 0 --outFilterMultimapNmax 20 --alignSJDBoverhangMin 1 --outSAMattributes NH HI AS NM MD --quantTranscriptomeBan Singleend --outFilterMismatchNmax 16" - publish_dir = "my_star_directory" - publish_files = ['out':'log', 'tab':'log', 'bam':''] - } + 'star_align' { + args = "--quantMode TranscriptomeSAM --twopassMode Basic --outSAMtype BAM Unsorted --readFilesCommand zcat --runRNGseed 0 --outFilterMultimapNmax 20 --alignSJDBoverhangMin 1 --outSAMattributes NH HI AS NM MD --quantTranscriptomeBan Singleend --outFilterMismatchNmax 16" + publish_dir = "my_star_directory" + publish_files = ['out':'log', 'tab':'log', 'bam':''] + } } } ``` diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy index fa92d4d9..852a2ad1 100755 --- a/lib/NfcoreSchema.groovy +++ b/lib/NfcoreSchema.groovy @@ -1,6 +1,6 @@ -/* - * This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. - */ +// +// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. +// import org.everit.json.schema.Schema import org.everit.json.schema.loader.SchemaLoader @@ -12,18 +12,18 @@ import groovy.json.JsonSlurper import groovy.json.JsonBuilder class NfcoreSchema { - - /* - * Resolve Schema path relative to main workflow directory - */ + + // + // Resolve Schema path relative to main workflow directory + // public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { return "${workflow.projectDir}/${schema_filename}" } - /* - * Function to loop over all parameters defined in schema and check - * whether the given parameters adhere to the specifications - */ + // + // Function to loop over all parameters defined in schema and check + // whether the given parameters adhere to the specifications + // /* groovylint-disable-next-line UnusedPrivateMethodParameter */ public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { def has_error = false @@ -177,9 +177,9 @@ class NfcoreSchema { } } - /* - * Beautify parameters for --help - */ + // + // Beautify parameters for --help + // public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { Map colors = NfcoreTemplate.logColours(params.monochrome_logs) Integer num_hidden = 0 @@ -234,9 +234,9 @@ class NfcoreSchema { return output } - /* - * Groovy Map summarising parameters/workflow options used by the pipeline - */ + // + // Groovy Map summarising parameters/workflow options used by the pipeline + // public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { // Get a selection of core Nextflow workflow options def Map workflow_summary = [:] @@ -303,9 +303,9 @@ class NfcoreSchema { return [ 'Core Nextflow options' : workflow_summary ] << params_summary } - /* - * Beautify parameters for summary and return as string - */ + // + // Beautify parameters for summary and return as string + // public static String paramsSummaryLog(workflow, params) { Map colors = NfcoreTemplate.logColours(params.monochrome_logs) String output = '' @@ -325,10 +325,10 @@ class NfcoreSchema { output += NfcoreTemplate.dashedLine(params.monochrome_logs) return output } - - /* - * Loop over nested exceptions and print the causingException - */ + + // + // Loop over nested exceptions and print the causingException + // private static void printExceptions(ex_json, params_json, log) { def causingExceptions = ex_json['causingExceptions'] if (causingExceptions.length() == 0) { @@ -353,9 +353,9 @@ class NfcoreSchema { } } - /* - * Remove an element from a JSONArray - */ + // + // Remove an element from a JSONArray + // private static JSONArray removeElement(json_array, element) { def list = [] int len = json_array.length() @@ -367,9 +367,9 @@ class NfcoreSchema { return jsArray } - /* - * Remove ignored parameters - */ + // + // Remove ignored parameters + // private static JSONObject removeIgnoredParams(raw_schema, params) { // Remove anything that's in params.schema_ignore_params params.schema_ignore_params.split(',').each{ ignore_param -> @@ -399,9 +399,9 @@ class NfcoreSchema { return raw_schema } - /* - * Clean and check parameters relative to Nextflow native classes - */ + // + // Clean and check parameters relative to Nextflow native classes + // private static Map cleanParameters(params) { def new_params = params.getClass().newInstance(params) for (p in params) { @@ -425,9 +425,9 @@ class NfcoreSchema { return new_params } - /* - * This function tries to read a JSON params file - */ + // + // This function tries to read a JSON params file + // private static LinkedHashMap paramsLoad(String json_schema) { def params_map = new LinkedHashMap() try { @@ -439,15 +439,14 @@ class NfcoreSchema { return params_map } - /* - Method to actually read in JSON file using Groovy. - Group (as Key), values are all parameters - - Parameter1 as Key, Description as Value - - Parameter2 as Key, Description as Value - .... - Group - - - */ + // + // Method to actually read in JSON file using Groovy. + // Group (as Key), values are all parameters + // - Parameter1 as Key, Description as Value + // - Parameter2 as Key, Description as Value + // .... + // Group + // - private static LinkedHashMap paramsRead(String json_schema) throws Exception { def json = new File(json_schema).text def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') @@ -499,9 +498,9 @@ class NfcoreSchema { return params_map } - /* - * Get maximum number of characters across all parameter names - */ + // + // Get maximum number of characters across all parameter names + // private static Integer paramsMaxChars(params_map) { Integer max_chars = 0 for (group in params_map.keySet()) { diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 9157c556..dbf8e614 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -1,14 +1,14 @@ -/* - * This file holds several functions used within the nf-core pipeline template. - */ +// +// This file holds several functions used within the nf-core pipeline template. +// import org.yaml.snakeyaml.Yaml class NfcoreTemplate { - /* - * Check AWS Batch related parameters have been specified correctly - */ + // + // Check AWS Batch related parameters have been specified correctly + // public static void awsBatch(workflow, params) { if (workflow.profile.contains('awsbatch')) { // Check params.awsqueue and params.awsregion have been set if running on AWSBatch @@ -18,9 +18,9 @@ class NfcoreTemplate { } } - /* - * Check params.hostnames - */ + // + // Check params.hostnames + // public static void hostName(workflow, params, log) { Map colors = logColours(params.monochrome_logs) if (params.hostnames) { @@ -39,9 +39,9 @@ class NfcoreTemplate { } } - /* - * Construct and send completion email - */ + // + // Construct and send completion email + // public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[], fail_mapped_reads=[:]) { // Set up the e-mail variables @@ -57,7 +57,7 @@ class NfcoreTemplate { for (group in summary_params.keySet()) { summary << summary_params[group] } - + def misc_fields = [:] misc_fields['Date Started'] = workflow.start misc_fields['Date Completed'] = workflow.complete @@ -84,7 +84,7 @@ class NfcoreTemplate { email_fields['summary'] = summary << misc_fields email_fields['fail_mapped_reads'] = fail_mapped_reads.keySet() email_fields['min_mapped_reads'] = params.min_mapped_reads - + // On success try attach the multiqc report def mqc_report = null try { @@ -121,7 +121,7 @@ class NfcoreTemplate { def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] def sf = new File("$projectDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) @@ -157,9 +157,9 @@ class NfcoreTemplate { output_tf.withWriter { w -> w << email_txt } } - /* - * Print pipeline summary on completion - */ + // + // Print pipeline summary on completion + // public static void summary(workflow, params, log, fail_mapped_reads=[:], pass_mapped_reads=[:]) { Map colors = logColours(params.monochrome_logs) @@ -197,9 +197,9 @@ class NfcoreTemplate { } } - /* - * ANSII Colours used for terminal logging - */ + // + // ANSII Colours used for terminal logging + // public static Map logColours(Boolean monochrome_logs) { Map colorcodes = [:] @@ -265,17 +265,17 @@ class NfcoreTemplate { return colorcodes } - /* - * Does what is says on the tin - */ + // + // Does what is says on the tin + // public static String dashedLine(monochrome_logs) { Map colors = logColours(monochrome_logs) return "-${colors.dim}----------------------------------------------------${colors.reset}-" } - /* - * nf-core logo - */ + // + // nf-core logo + // public static String logo(workflow, monochrome_logs) { Map colors = logColours(monochrome_logs) String.format( diff --git a/lib/Utils.groovy b/lib/Utils.groovy index cdbafc31..18173e98 100755 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -1,47 +1,47 @@ -/* - * This file holds several Groovy functions that could be useful for any Nextflow pipeline - */ - -import org.yaml.snakeyaml.Yaml - -class Utils { - - /* - * When running with -profile conda, warn if channels have not been set-up appropriately - */ - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - def required_channels = ['conda-forge', 'bioconda', 'defaults'] - def conda_check_failed = !required_channels.every { ch -> ch in channels } - - // Check that they are in the right order - conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) - conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) - - if (conda_check_failed) { - log.warn "=============================================================================\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + - " NB: The order of the channels matters!\n" + - "===================================================================================" - } - } - - /* - * Join module args with appropriate spacing - */ - public static String joinModuleArgs(args_list) { - return ' ' + args_list.join(' ') - } -} +// +// This file holds several Groovy functions that could be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml + +class Utils { + + // + // When running with -profile conda, warn if channels have not been set-up appropriately + // + public static void checkCondaChannels(log) { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + def required_channels = ['conda-forge', 'bioconda', 'defaults'] + def conda_check_failed = !required_channels.every { ch -> ch in channels } + + // Check that they are in the right order + conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) + conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + + if (conda_check_failed) { + log.warn "=============================================================================\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + + " NB: The order of the channels matters!\n" + + "===================================================================================" + } + } + + // + // Join module args with appropriate spacing + // + public static String joinModuleArgs(args_list) { + return ' ' + args_list.join(' ') + } +} diff --git a/lib/WorkflowCommons.groovy b/lib/WorkflowCommons.groovy index f2635a08..a9a94421 100755 --- a/lib/WorkflowCommons.groovy +++ b/lib/WorkflowCommons.groovy @@ -1,12 +1,12 @@ -/* - *This file holds several functions common to the multiple workflows in the nf-core/viralrecon pipeline - */ +// +// This file holds several functions common to the multiple workflows in the nf-core/viralrecon pipeline +// class WorkflowCommons { - /* - * Exit pipeline if incorrect --genome key provided - */ + // + // Exit pipeline if incorrect --genome key provided + // private static void genomeExistsError(params, log) { if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { log.error "=============================================================================\n" + @@ -18,9 +18,9 @@ class WorkflowCommons { } } - /* - * Get workflow summary for MultiQC - */ + // + // Get workflow summary for MultiQC + // public static String paramsSummaryMultiqc(workflow, summary) { String summary_section = '' for (group in summary.keySet()) { @@ -45,9 +45,9 @@ class WorkflowCommons { return yaml_file_text } - /* - * Function to check whether primer BED file has the correct suffixes as provided to the pipeline - */ + // + // Function to check whether primer BED file has the correct suffixes as provided to the pipeline + // public static void checkPrimerSuffixes(primer_bed_file, primer_left_suffix, primer_right_suffix, log) { def total = 0 def left = 0 @@ -67,15 +67,15 @@ class WorkflowCommons { " All of the values in that column do not end with those supplied by:\n" + " --primer_left_suffix : $primer_left_suffix\n" + " --primer_right_suffix: $primer_right_suffix\n\n" + - " This information is required to collapse the primer intervals into amplicons\n" + + " This information is required to collapse the primer intervals into amplicons\n" + " for the coverage plots generated by the pipeline.\n" + "===================================================================================" } } - /* - * Function to get lineage from Pangolin output file - */ + // + // Function to get lineage from Pangolin output file + // public static String getPangolinLineage(pangolin_report) { def lineage = '' pangolin_report.eachLine { line -> @@ -84,9 +84,9 @@ class WorkflowCommons { return lineage } - /* - * Function to get number of variants reported in BCFTools stats file - */ + // + // Function to get number of variants reported in BCFTools stats file + // public static Integer getNumVariantsFromBCFToolsStats(bcftools_stats) { def num_vars = 0 bcftools_stats.eachLine { line -> diff --git a/lib/WorkflowIllumina.groovy b/lib/WorkflowIllumina.groovy index dad01609..fbcc540f 100755 --- a/lib/WorkflowIllumina.groovy +++ b/lib/WorkflowIllumina.groovy @@ -1,14 +1,14 @@ -/* - * This file holds several functions specific to the workflow/illumina.nf in the nf-core/viralrecon pipeline - */ +// +// This file holds several functions specific to the workflow/illumina.nf in the nf-core/viralrecon pipeline +// import groovy.json.JsonSlurper class WorkflowIllumina { - /* - * Check and validate parameters - */ + // + // Check and validate parameters + // public static void initialise(params, log, valid_params) { WorkflowCommons.genomeExistsError(params, log) @@ -18,18 +18,18 @@ class WorkflowIllumina { System.exit(1) } - if (!params.fasta) { + if (!params.fasta) { log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." System.exit(1) } if (!params.skip_kraken2 && !params.kraken2_db) { - if (!params.kraken2_db_name) { + if (!params.kraken2_db_name) { log.error "Please specify a valid name to build Kraken2 database for host e.g. '--kraken2_db_name human'." System.exit(1) } } - + // Variant calling parameter validation def callers = params.callers ? params.callers.split(',').collect{ it.trim().toLowerCase() } : [] if ((valid_params['callers'] + callers).unique().size() != valid_params['callers'].size()) { @@ -55,9 +55,9 @@ class WorkflowIllumina { } } - /* - * Print warning if genome fasta has more than one sequence - */ + // + // Print warning if genome fasta has more than one sequence + // public static void isMultiFasta(fasta_file, log) { def count = 0 def line = null @@ -67,7 +67,7 @@ class WorkflowIllumina { count++ if (count > 1) { log.warn "=============================================================================\n" + - " This pipeline does not officially support multi-fasta genome files!\n\n" + + " This pipeline does not officially support multi-fasta genome files!\n\n" + " The parameters and processes are tailored for viral genome analysis.\n" + " Please amend the '--fasta' parameter.\n" + "===================================================================================" @@ -78,9 +78,9 @@ class WorkflowIllumina { } } - /* - * Function that parses and returns the number of mapped reasds from flagstat files - */ + // + // Function that parses and returns the number of mapped reasds from flagstat files + // public static ArrayList getFlagstatMappedReads(flagstat_file, params) { def mapped_reads = 0 flagstat_file.eachLine { line -> @@ -88,7 +88,7 @@ class WorkflowIllumina { mapped_reads = line.tokenize().first().toInteger() } } - + def pass = false def logname = flagstat_file.getBaseName() - 'flagstat' if (mapped_reads > params.min_mapped_reads.toInteger()) { @@ -97,9 +97,9 @@ class WorkflowIllumina { return [ mapped_reads, pass ] } - /* - * Check if the primer BED file supplied to the pipeline is from the SWIFT/SNAP protocol - */ + // + // Check if the primer BED file supplied to the pipeline is from the SWIFT/SNAP protocol + // public static void checkIfSwiftProtocol(primer_bed_file, name_prefix, log) { def count = 0 def line = null @@ -110,8 +110,8 @@ class WorkflowIllumina { count++ if (count > 1) { log.warn "=============================================================================\n" + - " Found '${name_prefix}' in the name field of the primer BED file!\n" + - " This suggests that you have used the SWIFT/SNAP protocol to prep your samples.\n" + + " Found '${name_prefix}' in the name field of the primer BED file!\n" + + " This suggests that you have used the SWIFT/SNAP protocol to prep your samples.\n" + " If so, please set '--ivar_trim_offset 5' as suggested in the issue below:\n" + " https://github.com/nf-core/viralrecon/issues/170\n" + "===================================================================================" @@ -122,9 +122,9 @@ class WorkflowIllumina { } } - /* - * Function that parses fastp json output file to get total number of reads after trimming - */ + // + // Function that parses fastp json output file to get total number of reads after trimming + // public static Integer getFastpReadsAfterFiltering(json_file) { def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary') return json['after_filtering']['total_reads'].toInteger() diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index bf3ab106..b12aacc9 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -1,15 +1,15 @@ -/* - *This file holds several functions specific to the main.nf workflow in the nf-core/viralrecon pipeline - */ +// +// This file holds several functions specific to the main.nf workflow in the nf-core/viralrecon pipeline +// class WorkflowMain { - /* - * Citation string for pipeline - */ + // + // Citation string for pipeline + // public static String citation(workflow) { return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - "* The pipeline\n" + + "* The pipeline\n" + " https://doi.org/10.5281/zenodo.3901628\n\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + @@ -17,9 +17,9 @@ class WorkflowMain { " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } - /* - * Print help to screen if required - */ + // + // Print help to screen if required + // public static String help(workflow, params, log) { def command = "nextflow run nf-core/viralrecon --input samplesheet.csv --genome 'MN908947.3' -profile docker" def help_string = '' @@ -30,9 +30,9 @@ class WorkflowMain { return help_string } - /* - * Print parameter summary log to screen - */ + // + // Print parameter summary log to screen + // public static String paramsSummaryLog(workflow, params, log) { def summary_log = '' summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) @@ -42,9 +42,9 @@ class WorkflowMain { return summary_log } - /* - * Validate parameters and print summary to screen - */ + // + // Validate parameters and print summary to screen + // public static void initialise(workflow, params, log) { // Print help to screen if required if (params.help) { @@ -84,14 +84,14 @@ class WorkflowMain { } } - /* - * Get attribute from genome config file e.g. fasta - */ + // + // Get attribute from genome config file e.g. fasta + // public static String getGenomeAttribute(params, attribute, log, primer_set='', primer_set_version=0) { def val = '' def support_link = " The default genome config used by the pipeline can be found here:\n" + " - https://github.com/nf-core/configs/blob/master/conf/pipeline/viralrecon/genomes.config\n\n" + - " If you would still like to blame us please come and find us on nf-core Slack:\n" + + " If you would still like to blame us please come and find us on nf-core Slack:\n" + " - https://nf-co.re/viralrecon#contributions-and-support\n" + "=============================================================================" if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { diff --git a/lib/WorkflowNanopore.groovy b/lib/WorkflowNanopore.groovy index 1fd218f8..8b2eb74a 100755 --- a/lib/WorkflowNanopore.groovy +++ b/lib/WorkflowNanopore.groovy @@ -1,17 +1,17 @@ -/* - * This file holds several functions specific to the workflow/nanopore.nf in the nf-core/viralrecon pipeline - */ +// +// This file holds several functions specific to the workflow/nanopore.nf in the nf-core/viralrecon pipeline +// class WorkflowNanopore { - /* - * Check and validate parameters - */ + // + // Check and validate parameters + // public static void initialise(params, log, valid_params) { WorkflowCommons.genomeExistsError(params, log) // Generic parameter validation - if (!params.fasta) { + if (!params.fasta) { log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." System.exit(1) } diff --git a/lib/WorkflowSraDownload.groovy b/lib/WorkflowSraDownload.groovy index b80877e6..69e697da 100755 --- a/lib/WorkflowSraDownload.groovy +++ b/lib/WorkflowSraDownload.groovy @@ -1,12 +1,12 @@ -/* - * This file holds functions specific to the workflow/sra_download.nf in the nf-core/viralrecon pipeline - */ +// +// This file holds functions specific to the workflow/sra_download.nf in the nf-core/viralrecon pipeline +// class WorkflowSraDownload { - /* - * Print a warning after SRA download has completed - */ + // + // Print a warning after SRA download has completed + // public static void sraDownloadWarn(log) { log.warn "=============================================================================\n" + " Please double-check the samplesheet that has been auto-created using the\n" + diff --git a/main.nf b/main.nf index 8606daf7..32b1f140 100644 --- a/main.nf +++ b/main.nf @@ -13,7 +13,7 @@ nextflow.enable.dsl = 2 /* ======================================================================================== - GENOME PARAMETER VALUES + GENOME PARAMETER VALUES ======================================================================================== */ @@ -38,7 +38,7 @@ params.primer_bed = WorkflowMain.getGenomeAttribute(params, 'primer_bed', log VALIDATE & PRINT PARAMETER SUMMARY ======================================================================================== */ - + WorkflowMain.initialise(workflow, params, log) /* @@ -48,24 +48,24 @@ WorkflowMain.initialise(workflow, params, log) */ workflow NFCORE_VIRALRECON { - - /* - * WORKFLOW: Get SRA run information for public database ids, download and md5sum check FastQ files, auto-create samplesheet - */ + + // + // WORKFLOW: Get SRA run information for public database ids, download and md5sum check FastQ files, auto-create samplesheet + // if (params.public_data_ids) { include { SRA_DOWNLOAD } from './workflows/sra_download' SRA_DOWNLOAD () - - /* - * WORKFLOW: Variant and de novo assembly analysis for Illumina data - */ + + // + // WORKFLOW: Variant and de novo assembly analysis for Illumina data + // } else if (params.platform == 'illumina') { include { ILLUMINA } from './workflows/illumina' ILLUMINA () - /* - * WORKFLOW: Variant analysis for Nanopore data - */ + // + // WORKFLOW: Variant analysis for Nanopore data + // } else if (params.platform == 'nanopore') { include { NANOPORE } from './workflows/nanopore' NANOPORE () @@ -78,10 +78,10 @@ workflow NFCORE_VIRALRECON { ======================================================================================== */ -/* - * WORKFLOW: Execute a single named workflow for the pipeline - * See: https://github.com/nf-core/rnaseq/issues/619 - */ +// +// WORKFLOW: Execute a single named workflow for the pipeline +// See: https://github.com/nf-core/rnaseq/issues/619 +// workflow { NFCORE_VIRALRECON () } diff --git a/modules/local/asciigenome.nf b/modules/local/asciigenome.nf index bdd257b5..d9c6acb3 100644 --- a/modules/local/asciigenome.nf +++ b/modules/local/asciigenome.nf @@ -1,58 +1,58 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process ASCIIGENOME { - tag "$meta.id" - label 'process_medium' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - - conda (params.enable_conda ? "bioconda::asciigenome=1.16.0" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/asciigenome:1.16.0--0" - } else { - container "quay.io/biocontainers/asciigenome:1.16.0--0" - } - - input: - tuple val(meta), path(bam), path(vcf) - path fasta - path gff - path bed - val window - val track_height - - output: - tuple val(meta), path("*pdf"), emit: pdf - path "*.version.txt" , emit: version - - script: - def software = getSoftwareName(task.process) - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - def gff_track = gff ? "$gff" : '' - def bed_track = bed ? "$bed" : '' - def paired_end = meta.single_end ? '' : '&& readsAsPairs -on' - """ - zcat $vcf \\ - | grep -v '#' \\ - | awk -v FS='\t' -v OFS='\t' '{print \$1, (\$2-$window-1), (\$2+$window)}' \\ - > variants.bed - - ASCIIGenome \\ - -ni \\ - -x "trackHeight 0 bam#1 && trackHeight $track_height bam@2 $paired_end && filterVariantReads && save ${prefix}.%r.pdf" \\ - --batchFile variants.bed \\ - --fasta $fasta \\ - $bam \\ - $vcf \\ - $bed_track \\ - $gff_track \\ - > /dev/null - - echo \$(ASCIIGenome -ni --version 2>&1) | sed -e "s/ASCIIGenome //g" > ${software}.version.txt - """ -} \ No newline at end of file +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process ASCIIGENOME { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::asciigenome=1.16.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/asciigenome:1.16.0--0" + } else { + container "quay.io/biocontainers/asciigenome:1.16.0--0" + } + + input: + tuple val(meta), path(bam), path(vcf) + path fasta + path gff + path bed + val window + val track_height + + output: + tuple val(meta), path("*pdf"), emit: pdf + path "*.version.txt" , emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def gff_track = gff ? "$gff" : '' + def bed_track = bed ? "$bed" : '' + def paired_end = meta.single_end ? '' : '&& readsAsPairs -on' + """ + zcat $vcf \\ + | grep -v '#' \\ + | awk -v FS='\t' -v OFS='\t' '{print \$1, (\$2-$window-1), (\$2+$window)}' \\ + > variants.bed + + ASCIIGenome \\ + -ni \\ + -x "trackHeight 0 bam#1 && trackHeight $track_height bam@2 $paired_end && filterVariantReads && save ${prefix}.%r.pdf" \\ + --batchFile variants.bed \\ + --fasta $fasta \\ + $bam \\ + $vcf \\ + $bed_track \\ + $gff_track \\ + > /dev/null + + echo \$(ASCIIGenome -ni --version 2>&1) | sed -e "s/ASCIIGenome //g" > ${software}.version.txt + """ +} diff --git a/modules/local/bcftools_isec.nf b/modules/local/bcftools_isec.nf index 429a6e8c..f9b6ffb6 100644 --- a/modules/local/bcftools_isec.nf +++ b/modules/local/bcftools_isec.nf @@ -9,7 +9,7 @@ process BCFTOOLS_ISEC { publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - + conda (params.enable_conda ? "bioconda::bcftools=1.11" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { container "https://depot.galaxyproject.org/singularity/bcftools:1.11--h7c999a4_0" diff --git a/modules/local/collapse_primers.nf b/modules/local/collapse_primers.nf index c1161989..2b81410f 100644 --- a/modules/local/collapse_primers.nf +++ b/modules/local/collapse_primers.nf @@ -1,36 +1,36 @@ -// Import generic module functions -include { saveFiles } from './functions' - -params.options = [:] - -process COLLAPSE_PRIMERS { - tag "$bed" - label 'process_medium' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'primers', meta:[:], publish_by_meta:[]) } - - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/python:3.8.3" - } else { - container "quay.io/biocontainers/python:3.8.3" - } - - input: - path bed - val left_suffix - val right_suffix - - output: - path '*.bed', emit: bed - - script: - """ - collapse_primer_bed.py \\ - --left_primer_suffix $left_suffix \\ - --right_primer_suffix $right_suffix \\ - $bed \\ - ${bed.baseName}.collapsed.bed - """ -} \ No newline at end of file +// Import generic module functions +include { saveFiles } from './functions' + +params.options = [:] + +process COLLAPSE_PRIMERS { + tag "$bed" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'primers', meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/python:3.8.3" + } else { + container "quay.io/biocontainers/python:3.8.3" + } + + input: + path bed + val left_suffix + val right_suffix + + output: + path '*.bed', emit: bed + + script: + """ + collapse_primer_bed.py \\ + --left_primer_suffix $left_suffix \\ + --right_primer_suffix $right_suffix \\ + $bed \\ + ${bed.baseName}.collapsed.bed + """ +} diff --git a/modules/local/cutadapt.nf b/modules/local/cutadapt.nf index 567b5925..7d75385b 100644 --- a/modules/local/cutadapt.nf +++ b/modules/local/cutadapt.nf @@ -1,49 +1,48 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process CUTADAPT { - tag "$meta.id" - label 'process_medium' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - - conda (params.enable_conda ? 'bioconda::cutadapt=3.2' : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container 'https://depot.galaxyproject.org/singularity/cutadapt:3.2--py38h0213d0e_0' - } else { - container 'quay.io/biocontainers/cutadapt:3.2--py38h0213d0e_0' - } - - input: - tuple val(meta), path(reads) - path adapters - - output: - tuple val(meta), path('*.fastq.gz'), emit: reads - tuple val(meta), path('*.log') , emit: log - path '*.version.txt' , emit: version - - script: - def software = getSoftwareName(task.process) - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - def paired = meta.single_end ? "-a file:adapters.sub.fa" : "-a file:adapters.sub.fa -A file:adapters.sub.fa" - def trimmed = meta.single_end ? "-o ${prefix}.fastq.gz" : "-o ${prefix}_1.fastq.gz -p ${prefix}_2.fastq.gz" - """ - sed -r '/^[ACTGactg]+\$/ s/\$/X/g' $adapters > adapters.sub.fa - - cutadapt \\ - --cores $task.cpus \\ - $options.args \\ - $paired \\ - $trimmed \\ - $reads \\ - > ${prefix}.cutadapt.log - - echo \$(cutadapt --version) > ${software}.version.txt - """ -} - +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process CUTADAPT { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? 'bioconda::cutadapt=3.2' : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container 'https://depot.galaxyproject.org/singularity/cutadapt:3.2--py38h0213d0e_0' + } else { + container 'quay.io/biocontainers/cutadapt:3.2--py38h0213d0e_0' + } + + input: + tuple val(meta), path(reads) + path adapters + + output: + tuple val(meta), path('*.fastq.gz'), emit: reads + tuple val(meta), path('*.log') , emit: log + path '*.version.txt' , emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def paired = meta.single_end ? "-a file:adapters.sub.fa" : "-a file:adapters.sub.fa -A file:adapters.sub.fa" + def trimmed = meta.single_end ? "-o ${prefix}.fastq.gz" : "-o ${prefix}_1.fastq.gz -p ${prefix}_2.fastq.gz" + """ + sed -r '/^[ACTGactg]+\$/ s/\$/X/g' $adapters > adapters.sub.fa + + cutadapt \\ + --cores $task.cpus \\ + $options.args \\ + $paired \\ + $trimmed \\ + $reads \\ + > ${prefix}.cutadapt.log + + echo \$(cutadapt --version) > ${software}.version.txt + """ +} diff --git a/modules/local/filter_blastn.nf b/modules/local/filter_blastn.nf index 8761314d..004314f0 100644 --- a/modules/local/filter_blastn.nf +++ b/modules/local/filter_blastn.nf @@ -17,14 +17,14 @@ process FILTER_BLASTN { } else { container "biocontainers/biocontainers:v1.2.0_cv1" } - + input: tuple val(meta), path(hits) path header - + output: tuple val(meta), path('*.txt'), emit: txt - + script: def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" """ diff --git a/modules/local/functions.nf b/modules/local/functions.nf index 9d0137e3..2e2a7956 100644 --- a/modules/local/functions.nf +++ b/modules/local/functions.nf @@ -1,19 +1,17 @@ -/* - * ----------------------------------------------------- - * Utility functions used in nf-core DSL2 module files - * ----------------------------------------------------- - */ +// +// Utility functions used in nf-core DSL2 module files +// -/* - * Extract name of software tool from process name using $task.process - */ +// +// Extract name of software tool from process name using $task.process +// def getSoftwareName(task_process) { return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() } -/* - * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules - */ +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// def initOptions(Map args) { def Map options = [:] options.args = args.args ?: '' @@ -26,18 +24,18 @@ def initOptions(Map args) { return options } -/* - * Tidy up and join elements of a list to return a path string - */ +// +// Tidy up and join elements of a list to return a path string +// def getPathFromList(path_list) { def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes return paths.join('/') } -/* - * Function to save/publish module results - */ +// +// Function to save/publish module results +// def saveFiles(Map args) { if (!args.filename.endsWith('.version.txt')) { def ioptions = initOptions(args.options) diff --git a/modules/local/get_software_versions.nf b/modules/local/get_software_versions.nf index 9b0e3585..7078394a 100644 --- a/modules/local/get_software_versions.nf +++ b/modules/local/get_software_versions.nf @@ -3,14 +3,11 @@ include { saveFiles } from './functions' params.options = [:] -/* - * Parse software version numbers - */ process GET_SOFTWARE_VERSIONS { publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'pipeline_info', meta:[:], publish_by_meta:[]) } - + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { container "https://depot.galaxyproject.org/singularity/python:3.8.3" @@ -22,7 +19,7 @@ process GET_SOFTWARE_VERSIONS { input: path versions - + output: path "software_versions.csv" , emit: csv path 'software_versions_mqc.yaml', emit: yaml diff --git a/modules/local/ivar_variants_to_vcf.nf b/modules/local/ivar_variants_to_vcf.nf index e4babe63..3ff46e0f 100644 --- a/modules/local/ivar_variants_to_vcf.nf +++ b/modules/local/ivar_variants_to_vcf.nf @@ -4,9 +4,6 @@ include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] options = initOptions(params.options) -/* - * Convert IVar tsv output to vcf - */ process IVAR_VARIANTS_TO_VCF { tag "$meta.id" publishDir "${params.outdir}", @@ -23,7 +20,7 @@ process IVAR_VARIANTS_TO_VCF { input: tuple val(meta), path(tsv) path header - + output: tuple val(meta), path("*.vcf"), emit: vcf tuple val(meta), path("*.log"), emit: log @@ -37,7 +34,7 @@ process IVAR_VARIANTS_TO_VCF { ${prefix}.vcf \\ $options.args \\ > ${prefix}.variant_counts.log - + cat $header ${prefix}.variant_counts.log > ${prefix}.variant_counts_mqc.tsv """ -} \ No newline at end of file +} diff --git a/modules/local/kraken2_build.nf b/modules/local/kraken2_build.nf index 229fab7b..ae7bc2f6 100644 --- a/modules/local/kraken2_build.nf +++ b/modules/local/kraken2_build.nf @@ -1,36 +1,36 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process KRAKEN2_BUILD { - label 'process_high' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } - - conda (params.enable_conda ? 'bioconda::kraken2=2.1.1' : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container 'https://depot.galaxyproject.org/singularity/kraken2:2.1.1--pl526hc9558a2_0' - } else { - container 'quay.io/biocontainers/kraken2:2.1.1--pl526hc9558a2_0' - } - - input: - val library - - output: - path 'kraken2_db' , emit: db - path '*.version.txt', emit: version - - script: - def software = getSoftwareName(task.process) - """ - kraken2-build --db kraken2_db --threads $task.cpus $options.args --download-taxonomy - kraken2-build --db kraken2_db --threads $task.cpus $options.args2 --download-library $library - kraken2-build --db kraken2_db --threads $task.cpus $options.args3 --build - - echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//' > ${software}.version.txt - """ -} +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process KRAKEN2_BUILD { + label 'process_high' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? 'bioconda::kraken2=2.1.1' : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container 'https://depot.galaxyproject.org/singularity/kraken2:2.1.1--pl526hc9558a2_0' + } else { + container 'quay.io/biocontainers/kraken2:2.1.1--pl526hc9558a2_0' + } + + input: + val library + + output: + path 'kraken2_db' , emit: db + path '*.version.txt', emit: version + + script: + def software = getSoftwareName(task.process) + """ + kraken2-build --db kraken2_db --threads $task.cpus $options.args --download-taxonomy + kraken2-build --db kraken2_db --threads $task.cpus $options.args2 --download-library $library + kraken2-build --db kraken2_db --threads $task.cpus $options.args3 --build + + echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//' > ${software}.version.txt + """ +} diff --git a/modules/local/make_bed_mask.nf b/modules/local/make_bed_mask.nf index a7b92788..aa084a99 100644 --- a/modules/local/make_bed_mask.nf +++ b/modules/local/make_bed_mask.nf @@ -4,15 +4,12 @@ include { initOptions; saveFiles } from './functions' params.options = [:] options = initOptions(params.options) -/* - * Make a consensus mask - */ process MAKE_BED_MASK { tag "$meta.id" publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'bed', meta:meta, publish_by_meta:['id']) } - + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { container "https://depot.galaxyproject.org/singularity/python:3.8.3" @@ -22,7 +19,7 @@ process MAKE_BED_MASK { input: tuple val(meta), path(vcf), path(bed) - + output: tuple val(meta), path("*.bed"), emit: bed @@ -31,4 +28,4 @@ process MAKE_BED_MASK { """ make_bed_mask.py $vcf $bed ${prefix}.bed """ -} \ No newline at end of file +} diff --git a/modules/local/multiqc_custom_twocol_tsv.nf b/modules/local/multiqc_custom_twocol_tsv.nf index 196bc018..89a603a9 100644 --- a/modules/local/multiqc_custom_twocol_tsv.nf +++ b/modules/local/multiqc_custom_twocol_tsv.nf @@ -7,14 +7,14 @@ process MULTIQC_CUSTOM_TWOCOL_TSV { publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } - + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { container "https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img" } else { container "biocontainers/biocontainers:v1.2.0_cv1" } - + input: val tsv_data val col1_name diff --git a/modules/local/multiqc_illumina.nf b/modules/local/multiqc_illumina.nf index 9994d196..306ffa1d 100644 --- a/modules/local/multiqc_illumina.nf +++ b/modules/local/multiqc_illumina.nf @@ -9,7 +9,7 @@ process MULTIQC { publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } - + conda (params.enable_conda ? "bioconda::multiqc=1.10.1" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { container "https://depot.galaxyproject.org/singularity/multiqc:1.10.1--pyhdfd78af_1" @@ -44,7 +44,7 @@ process MULTIQC { path ('assembly_spades/*') path ('assembly_unicycler/*') path ('assembly_minia/*') - + output: path "*multiqc_report.html" , emit: report path "*_data" , emit: data diff --git a/modules/local/multiqc_nanopore.nf b/modules/local/multiqc_nanopore.nf index a6072bf3..c4421d27 100644 --- a/modules/local/multiqc_nanopore.nf +++ b/modules/local/multiqc_nanopore.nf @@ -50,4 +50,3 @@ process MULTIQC { multiqc -f $options.args -e general_stats --ignore *pangolin_lineage_mqc.tsv $custom_config . """ } - diff --git a/modules/local/plot_base_density.nf b/modules/local/plot_base_density.nf index ac7655e5..d5ddfce1 100644 --- a/modules/local/plot_base_density.nf +++ b/modules/local/plot_base_density.nf @@ -1,39 +1,36 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process PLOT_BASE_DENSITY { - tag "$fasta" - label 'process_medium' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'plots', meta:meta, publish_by_meta:['id']) } - - conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0" - } else { - container "quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0" - } - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path('*.pdf'), emit: pdf - tuple val(meta), path('*.tsv'), emit: tsv - - script: - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - """ - plot_base_density.r \\ - --fasta_files $fasta \\ - --prefixes $prefix \\ - --output_dir ./ - """ -} - - - +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process PLOT_BASE_DENSITY { + tag "$fasta" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'plots', meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0" + } else { + container "quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0" + } + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path('*.pdf'), emit: pdf + tuple val(meta), path('*.tsv'), emit: tsv + + script: + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + """ + plot_base_density.r \\ + --fasta_files $fasta \\ + --prefixes $prefix \\ + --output_dir ./ + """ +} diff --git a/modules/local/plot_mosdepth_regions.nf b/modules/local/plot_mosdepth_regions.nf index 3246ad40..2af85c52 100644 --- a/modules/local/plot_mosdepth_regions.nf +++ b/modules/local/plot_mosdepth_regions.nf @@ -1,36 +1,36 @@ -// Import generic module functions -include { initOptions; saveFiles } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process PLOT_MOSDEPTH_REGIONS { - label 'process_medium' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'mosdepth', meta:[:], publish_by_meta:[]) } - - conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0" - } else { - container "quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0" - } - - input: - path beds - - output: - path '*.pdf', emit: pdf - path '*.tsv', emit: tsv - - script: - def prefix = options.suffix ?: "mosdepth" - """ - plot_mosdepth_regions.r \\ - --input_files ${beds.join(',')} \\ - --output_dir ./ \\ - --output_suffix $prefix \\ - $options.args - """ -} \ No newline at end of file +// Import generic module functions +include { initOptions; saveFiles } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process PLOT_MOSDEPTH_REGIONS { + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'mosdepth', meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0" + } else { + container "quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0" + } + + input: + path beds + + output: + path '*.pdf', emit: pdf + path '*.tsv', emit: tsv + + script: + def prefix = options.suffix ?: "mosdepth" + """ + plot_mosdepth_regions.r \\ + --input_files ${beds.join(',')} \\ + --output_dir ./ \\ + --output_suffix $prefix \\ + $options.args + """ +} diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index 050f140d..3ee6c869 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -19,7 +19,7 @@ process SAMPLESHEET_CHECK { input: path samplesheet val platform - + output: path '*.csv' @@ -30,4 +30,4 @@ process SAMPLESHEET_CHECK { samplesheet.valid.csv \\ --platform $platform """ -} \ No newline at end of file +} diff --git a/modules/local/snpeff_ann.nf b/modules/local/snpeff_ann.nf index ae0fc474..19ee442a 100644 --- a/modules/local/snpeff_ann.nf +++ b/modules/local/snpeff_ann.nf @@ -1,50 +1,49 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process SNPEFF_ANN { - tag "$meta.id" - label 'process_medium' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - - conda (params.enable_conda ? 'bioconda::snpeff=5.0' : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container 'https://depot.galaxyproject.org/singularity/snpeff:5.0--0' - } else { - container 'quay.io/biocontainers/snpeff:5.0--0' - } - - input: - tuple val(meta), path(vcf) - path db - path config - path fasta - - output: - tuple val(meta), path("*.vcf") , emit: vcf - tuple val(meta), path("*.csv") , emit: csv - tuple val(meta), path("*.genes.txt"), emit: txt - tuple val(meta), path("*.html") , emit: html - path '*.version.txt' , emit: version - - script: - def software = getSoftwareName(task.process) - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - """ - snpEff ${fasta.baseName} \\ - -config $config \\ - -dataDir $db \\ - $options.args \\ - $vcf \\ - -csvStats ${prefix}.snpeff.csv \\ - > ${prefix}.snpeff.vcf - mv snpEff_summary.html ${prefix}.snpeff.summary.html - - echo \$(snpEff -version 2>&1) | sed 's/^.*SnpEff //; s/ .*\$//' > ${software}.version.txt - """ -} - +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process SNPEFF_ANN { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? 'bioconda::snpeff=5.0' : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container 'https://depot.galaxyproject.org/singularity/snpeff:5.0--0' + } else { + container 'quay.io/biocontainers/snpeff:5.0--0' + } + + input: + tuple val(meta), path(vcf) + path db + path config + path fasta + + output: + tuple val(meta), path("*.vcf") , emit: vcf + tuple val(meta), path("*.csv") , emit: csv + tuple val(meta), path("*.genes.txt"), emit: txt + tuple val(meta), path("*.html") , emit: html + path '*.version.txt' , emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + """ + snpEff ${fasta.baseName} \\ + -config $config \\ + -dataDir $db \\ + $options.args \\ + $vcf \\ + -csvStats ${prefix}.snpeff.csv \\ + > ${prefix}.snpeff.vcf + mv snpEff_summary.html ${prefix}.snpeff.summary.html + + echo \$(snpEff -version 2>&1) | sed 's/^.*SnpEff //; s/ .*\$//' > ${software}.version.txt + """ +} diff --git a/modules/local/snpeff_build.nf b/modules/local/snpeff_build.nf index e2f322fe..0bbddffe 100644 --- a/modules/local/snpeff_build.nf +++ b/modules/local/snpeff_build.nf @@ -1,50 +1,50 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process SNPEFF_BUILD { - tag "$fasta" - label 'process_low' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } - - conda (params.enable_conda ? 'bioconda::snpeff=5.0' : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container 'https://depot.galaxyproject.org/singularity/snpeff:5.0--0' - } else { - container 'quay.io/biocontainers/snpeff:5.0--0' - } - - input: - path fasta - path gff - - output: - path 'snpeff_db' , emit: db - path '*.config' , emit: config - path '*.version.txt', emit: version - - script: - def software = getSoftwareName(task.process) - def basename = fasta.baseName - """ - mkdir -p snpeff_db/genomes/ - cd snpeff_db/genomes/ - ln -s ../../$fasta ${basename}.fa - - cd ../../ - mkdir -p snpeff_db/${basename}/ - cd snpeff_db/${basename}/ - ln -s ../../$gff genes.gff - - cd ../../ - echo "${basename}.genome : ${basename}" > snpeff.config - - snpEff build -config snpeff.config -dataDir ./snpeff_db -gff3 -v ${basename} - - echo \$(snpEff -version 2>&1) | sed 's/^.*SnpEff //; s/ .*\$//' > ${software}.version.txt - """ -} \ No newline at end of file +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process SNPEFF_BUILD { + tag "$fasta" + label 'process_low' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? 'bioconda::snpeff=5.0' : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container 'https://depot.galaxyproject.org/singularity/snpeff:5.0--0' + } else { + container 'quay.io/biocontainers/snpeff:5.0--0' + } + + input: + path fasta + path gff + + output: + path 'snpeff_db' , emit: db + path '*.config' , emit: config + path '*.version.txt', emit: version + + script: + def software = getSoftwareName(task.process) + def basename = fasta.baseName + """ + mkdir -p snpeff_db/genomes/ + cd snpeff_db/genomes/ + ln -s ../../$fasta ${basename}.fa + + cd ../../ + mkdir -p snpeff_db/${basename}/ + cd snpeff_db/${basename}/ + ln -s ../../$gff genes.gff + + cd ../../ + echo "${basename}.genome : ${basename}" > snpeff.config + + snpEff build -config snpeff.config -dataDir ./snpeff_db -gff3 -v ${basename} + + echo \$(snpEff -version 2>&1) | sed 's/^.*SnpEff //; s/ .*\$//' > ${software}.version.txt + """ +} diff --git a/modules/local/snpsift_extractfields.nf b/modules/local/snpsift_extractfields.nf index 178e1f69..8e2cf30e 100644 --- a/modules/local/snpsift_extractfields.nf +++ b/modules/local/snpsift_extractfields.nf @@ -1,51 +1,51 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process SNPSIFT_EXTRACTFIELDS { - tag "$meta.id" - label 'process_medium' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - - conda (params.enable_conda ? 'bioconda::snpsift=4.3.1t' : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container 'https://depot.galaxyproject.org/singularity/snpsift:4.3.1t--2' - } else { - container 'quay.io/biocontainers/snpsift:4.3.1t--2' - } - - input: - tuple val(meta), path(vcf) - - output: - tuple val(meta), path("*.snpsift.txt"), emit: txt - path '*.version.txt' , emit: version - - script: - def software = getSoftwareName(task.process) - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - """ - SnpSift \\ - extractFields \\ - -s "," \\ - -e "." \\ - $options.args \\ - $vcf \\ - CHROM POS REF ALT \\ - "ANN[*].GENE" "ANN[*].GENEID" \\ - "ANN[*].IMPACT" "ANN[*].EFFECT" \\ - "ANN[*].FEATURE" "ANN[*].FEATUREID" \\ - "ANN[*].BIOTYPE" "ANN[*].RANK" "ANN[*].HGVS_C" \\ - "ANN[*].HGVS_P" "ANN[*].CDNA_POS" "ANN[*].CDNA_LEN" \\ - "ANN[*].CDS_POS" "ANN[*].CDS_LEN" "ANN[*].AA_POS" \\ - "ANN[*].AA_LEN" "ANN[*].DISTANCE" "EFF[*].EFFECT" \\ - "EFF[*].FUNCLASS" "EFF[*].CODON" "EFF[*].AA" "EFF[*].AA_LEN" \\ - > ${prefix}.snpsift.txt - - echo \$(SnpSift -h 2>&1) | sed 's/^.*SnpSift version //; s/ .*\$//' > ${software}.version.txt - """ -} +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process SNPSIFT_EXTRACTFIELDS { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? 'bioconda::snpsift=4.3.1t' : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container 'https://depot.galaxyproject.org/singularity/snpsift:4.3.1t--2' + } else { + container 'quay.io/biocontainers/snpsift:4.3.1t--2' + } + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.snpsift.txt"), emit: txt + path '*.version.txt' , emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + """ + SnpSift \\ + extractFields \\ + -s "," \\ + -e "." \\ + $options.args \\ + $vcf \\ + CHROM POS REF ALT \\ + "ANN[*].GENE" "ANN[*].GENEID" \\ + "ANN[*].IMPACT" "ANN[*].EFFECT" \\ + "ANN[*].FEATURE" "ANN[*].FEATUREID" \\ + "ANN[*].BIOTYPE" "ANN[*].RANK" "ANN[*].HGVS_C" \\ + "ANN[*].HGVS_P" "ANN[*].CDNA_POS" "ANN[*].CDNA_LEN" \\ + "ANN[*].CDS_POS" "ANN[*].CDS_LEN" "ANN[*].AA_POS" \\ + "ANN[*].AA_LEN" "ANN[*].DISTANCE" "EFF[*].EFFECT" \\ + "EFF[*].FUNCLASS" "EFF[*].CODON" "EFF[*].AA" "EFF[*].AA_LEN" \\ + > ${prefix}.snpsift.txt + + echo \$(SnpSift -h 2>&1) | sed 's/^.*SnpSift version //; s/ .*\$//' > ${software}.version.txt + """ +} diff --git a/modules/local/sra_fastq_ftp.nf b/modules/local/sra_fastq_ftp.nf index d8c211df..4b800d19 100644 --- a/modules/local/sra_fastq_ftp.nf +++ b/modules/local/sra_fastq_ftp.nf @@ -1,50 +1,47 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -/* - * Download SRA data via FTP - */ -process SRA_FASTQ_FTP { - tag "$meta.id" - label 'process_medium' - label 'error_retry' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img" - } else { - container "biocontainers/biocontainers:v1.2.0_cv1" - } - - input: - tuple val(meta), val(fastq) - - output: - tuple val(meta), path("*fastq.gz"), emit: fastq - tuple val(meta), path("*md5") , emit: md5 - - script: - if (meta.single_end) { - """ - bash -c 'until curl $options.args -L ${fastq[0]} -o ${meta.id}.fastq.gz; do sleep 1; done'; - echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5 - md5sum -c ${meta.id}.fastq.gz.md5 - """ - } else { - """ - bash -c 'until curl $options.args -L ${fastq[0]} -o ${meta.id}_1.fastq.gz; do sleep 1; done'; - echo "${meta.md5_1} ${meta.id}_1.fastq.gz" > ${meta.id}_1.fastq.gz.md5 - md5sum -c ${meta.id}_1.fastq.gz.md5 - - bash -c 'until curl $options.args -L ${fastq[1]} -o ${meta.id}_2.fastq.gz; do sleep 1; done'; - echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5 - md5sum -c ${meta.id}_2.fastq.gz.md5 - """ - } -} \ No newline at end of file +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process SRA_FASTQ_FTP { + tag "$meta.id" + label 'process_medium' + label 'error_retry' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img" + } else { + container "biocontainers/biocontainers:v1.2.0_cv1" + } + + input: + tuple val(meta), val(fastq) + + output: + tuple val(meta), path("*fastq.gz"), emit: fastq + tuple val(meta), path("*md5") , emit: md5 + + script: + if (meta.single_end) { + """ + bash -c 'until curl $options.args -L ${fastq[0]} -o ${meta.id}.fastq.gz; do sleep 1; done'; + echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5 + md5sum -c ${meta.id}.fastq.gz.md5 + """ + } else { + """ + bash -c 'until curl $options.args -L ${fastq[0]} -o ${meta.id}_1.fastq.gz; do sleep 1; done'; + echo "${meta.md5_1} ${meta.id}_1.fastq.gz" > ${meta.id}_1.fastq.gz.md5 + md5sum -c ${meta.id}_1.fastq.gz.md5 + + bash -c 'until curl $options.args -L ${fastq[1]} -o ${meta.id}_2.fastq.gz; do sleep 1; done'; + echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5 + md5sum -c ${meta.id}_2.fastq.gz.md5 + """ + } +} diff --git a/modules/local/sra_ids_to_runinfo.nf b/modules/local/sra_ids_to_runinfo.nf index 1a542a44..baee3eb9 100644 --- a/modules/local/sra_ids_to_runinfo.nf +++ b/modules/local/sra_ids_to_runinfo.nf @@ -1,34 +1,31 @@ -// Import generic module functions -include { saveFiles; getSoftwareName } from './functions' - -params.options = [:] - -/* - * Fetch SRA / ENA / GEO run information via the ENA API - */ -process SRA_IDS_TO_RUNINFO { - tag "$id" - label 'error_retry' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } - - conda (params.enable_conda ? "conda-forge::requests=2.24.0" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/requests:2.24.0" - } else { - container "quay.io/biocontainers/requests:2.24.0" - } - - input: - val id - - output: - path "*.tsv", emit: tsv - - script: - """ - echo $id > id.txt - sra_ids_to_runinfo.py id.txt ${id}.runinfo.tsv - """ -} +// Import generic module functions +include { saveFiles; getSoftwareName } from './functions' + +params.options = [:] + +process SRA_IDS_TO_RUNINFO { + tag "$id" + label 'error_retry' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "conda-forge::requests=2.24.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/requests:2.24.0" + } else { + container "quay.io/biocontainers/requests:2.24.0" + } + + input: + val id + + output: + path "*.tsv", emit: tsv + + script: + """ + echo $id > id.txt + sra_ids_to_runinfo.py id.txt ${id}.runinfo.tsv + """ +} diff --git a/modules/local/sra_merge_samplesheet.nf b/modules/local/sra_merge_samplesheet.nf index 66d56e5e..423be634 100644 --- a/modules/local/sra_merge_samplesheet.nf +++ b/modules/local/sra_merge_samplesheet.nf @@ -1,34 +1,31 @@ -// Import generic module functions -include { saveFiles; getSoftwareName } from './functions' - -params.options = [:] - -/* - * Merge samplesheets across all samples - */ -process SRA_MERGE_SAMPLESHEET { - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } - - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img" - } else { - container "biocontainers/biocontainers:v1.2.0_cv1" - } - - input: - path ('samplesheets/*') - - output: - path "*csv", emit: csv - - script: - """ - head -n 1 `ls ./samplesheets/* | head -n 1` > samplesheet.csv - for fileid in `ls ./samplesheets/*`; do - awk 'NR>1' \$fileid >> samplesheet.csv - done - """ -} \ No newline at end of file +// Import generic module functions +include { saveFiles; getSoftwareName } from './functions' + +params.options = [:] + +process SRA_MERGE_SAMPLESHEET { + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img" + } else { + container "biocontainers/biocontainers:v1.2.0_cv1" + } + + input: + path ('samplesheets/*') + + output: + path "*csv", emit: csv + + script: + """ + head -n 1 `ls ./samplesheets/* | head -n 1` > samplesheet.csv + for fileid in `ls ./samplesheets/*`; do + awk 'NR>1' \$fileid >> samplesheet.csv + done + """ +} diff --git a/modules/local/sra_runinfo_to_ftp.nf b/modules/local/sra_runinfo_to_ftp.nf index 7d65bc66..de210b5e 100644 --- a/modules/local/sra_runinfo_to_ftp.nf +++ b/modules/local/sra_runinfo_to_ftp.nf @@ -1,31 +1,28 @@ -// Import generic module functions -include { saveFiles; getSoftwareName } from './functions' - -params.options = [:] - -/* - * Create samplesheet for pipeline from SRA run information fetched via the ENA API - */ -process SRA_RUNINFO_TO_FTP { - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } - - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/python:3.8.3" - } else { - container "quay.io/biocontainers/python:3.8.3" - } - - input: - path runinfo - - output: - path "*.tsv", emit: tsv - - script: - """ - sra_runinfo_to_ftp.py ${runinfo.join(',')} ${runinfo.toString().tokenize(".")[0]}.runinfo_ftp.tsv - """ -} +// Import generic module functions +include { saveFiles; getSoftwareName } from './functions' + +params.options = [:] + +process SRA_RUNINFO_TO_FTP { + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/python:3.8.3" + } else { + container "quay.io/biocontainers/python:3.8.3" + } + + input: + path runinfo + + output: + path "*.tsv", emit: tsv + + script: + """ + sra_runinfo_to_ftp.py ${runinfo.join(',')} ${runinfo.toString().tokenize(".")[0]}.runinfo_ftp.tsv + """ +} diff --git a/modules/local/sra_to_samplesheet.nf b/modules/local/sra_to_samplesheet.nf index 39821976..ef4487d9 100644 --- a/modules/local/sra_to_samplesheet.nf +++ b/modules/local/sra_to_samplesheet.nf @@ -1,46 +1,43 @@ -// Import generic module functions -include { saveFiles; getSoftwareName } from './functions' - -params.options = [:] -params.results_dir = '' - -/* - * Stage FastQ files downloaded by SRA and auto-create a samplesheet for the pipeline - */ -process SRA_TO_SAMPLESHEET { - tag "$meta.id" - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - - memory 100.MB - - input: - tuple val(meta), path(fastq) - - output: - tuple val(meta), path("*csv"), emit: csv - - exec: - // Remove custom keys needed to download the data - def meta_map = meta.clone() - meta_map.remove("id") - meta_map.remove("fastq_1") - meta_map.remove("fastq_2") - meta_map.remove("md5_1") - meta_map.remove("md5_2") - meta_map.remove("single_end") - - // Add required fields for the pipeline to the beginning of the map - pipeline_map = [ - sample : "${meta.id.split('_')[0..-2].join('_')}", - fastq_1 : "${params.outdir}/${params.results_dir}/${fastq[0]}", - fastq_2 : meta.single_end ? '' : "${params.outdir}/${params.results_dir}/${fastq[1]}" - ] - pipeline_map << meta_map - - // Write to file - def file = task.workDir.resolve("${meta.id}.samplesheet.csv") - file.write pipeline_map.keySet().collect{ '"' + it + '"'}.join(",") + '\n' - file.append(pipeline_map.values().collect{ '"' + it + '"'}.join(",")) + '\n' -} \ No newline at end of file +// Import generic module functions +include { saveFiles; getSoftwareName } from './functions' + +params.options = [:] +params.results_dir = '' + +process SRA_TO_SAMPLESHEET { + tag "$meta.id" + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + memory 100.MB + + input: + tuple val(meta), path(fastq) + + output: + tuple val(meta), path("*csv"), emit: csv + + exec: + // Remove custom keys needed to download the data + def meta_map = meta.clone() + meta_map.remove("id") + meta_map.remove("fastq_1") + meta_map.remove("fastq_2") + meta_map.remove("md5_1") + meta_map.remove("md5_2") + meta_map.remove("single_end") + + // Add required fields for the pipeline to the beginning of the map + pipeline_map = [ + sample : "${meta.id.split('_')[0..-2].join('_')}", + fastq_1 : "${params.outdir}/${params.results_dir}/${fastq[0]}", + fastq_2 : meta.single_end ? '' : "${params.outdir}/${params.results_dir}/${fastq[1]}" + ] + pipeline_map << meta_map + + // Write to file + def file = task.workDir.resolve("${meta.id}.samplesheet.csv") + file.write pipeline_map.keySet().collect{ '"' + it + '"'}.join(",") + '\n' + file.append(pipeline_map.values().collect{ '"' + it + '"'}.join(",")) + '\n' +} diff --git a/nextflow.config b/nextflow.config index 5dcfeb78..a05bbfbc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,125 +1,124 @@ /* - * ------------------------------------------------- - * nf-core/viralrecon Nextflow config file - * ------------------------------------------------- - * Default config options for all environments. - */ - +======================================================================================== + nf-core/viralrecon Nextflow config file +======================================================================================== + Default config options for all compute environments +---------------------------------------------------------------------------------------- +*/ // Global default params, used in configs params { - // Input options - input = null - platform = null - protocol = null + // Input options + input = null + platform = null + protocol = null + + // SRA download options + public_data_ids = null + skip_sra_fastq_download = false + + // Reference genome options + genome = null + primer_set = null + primer_set_version = null + primer_fasta = null + primer_left_suffix = '_LEFT' + primer_right_suffix = '_RIGHT' + save_reference = false - // SRA download options - public_data_ids = null - skip_sra_fastq_download = false + // Nanopore options + fastq_dir = null + fast5_dir = null + sequencing_summary = null + min_barcode_reads = 100 + min_guppyplex_reads = 10 + artic_minion_caller = 'nanopolish' + artic_minion_aligner = 'minimap2' + artic_minion_medaka_model = null + skip_pycoqc = false + skip_nanoplot = false - // Reference genome options - genome = null - primer_set = null - primer_set_version = null - primer_fasta = null - primer_left_suffix = '_LEFT' - primer_right_suffix = '_RIGHT' - save_reference = false + // Nanopore/Illumina options + asciigenome_read_depth = 50 + asciigenome_window_size = 50 + multiqc_title = null + multiqc_config = null + max_multiqc_email_size = '25.MB' + skip_mosdepth = false + skip_pangolin = false + skip_nextclade = false + skip_asciigenome = false + skip_variants_quast = false + skip_multiqc = false - // Nanopore options - fastq_dir = null - fast5_dir = null - sequencing_summary = null - min_barcode_reads = 100 - min_guppyplex_reads = 10 - artic_minion_caller = 'nanopolish' - artic_minion_aligner = 'minimap2' - artic_minion_medaka_model = null - skip_pycoqc = false - skip_nanoplot = false + // Illumina QC, read trimming and filtering options + kraken2_db = 's3://nf-core-awsmegatests/viralrecon/input_data/kraken2_human.tar.gz' + kraken2_db_name = 'human' + kraken2_variants_host_filter = false + kraken2_assembly_host_filter = true + save_trimmed_fail = false + skip_fastqc = false + skip_kraken2 = false + skip_fastp = false + skip_cutadapt = false - // Nanopore/Illumina options - asciigenome_read_depth = 50 - asciigenome_window_size = 50 - multiqc_title = null - multiqc_config = null - max_multiqc_email_size = '25.MB' - skip_mosdepth = false - skip_pangolin = false - skip_nextclade = false - skip_asciigenome = false - skip_variants_quast = false - skip_multiqc = false - - // Illumina QC, read trimming and filtering options - kraken2_db = 's3://nf-core-awsmegatests/viralrecon/input_data/kraken2_human.tar.gz' - kraken2_db_name = 'human' - kraken2_variants_host_filter = false - kraken2_assembly_host_filter = true - save_trimmed_fail = false - skip_fastqc = false - skip_kraken2 = false - skip_fastp = false - skip_cutadapt = false - - // Illumina variant calling options - callers = null - min_mapped_reads = 1000 - ivar_trim_noprimer = false - ivar_trim_offset = null - filter_duplicates = false - save_unaligned = false - save_mpileup = false - skip_ivar_trim = false - skip_markduplicates = true - skip_picard_metrics = false - skip_snpeff = false - skip_consensus = false - skip_variants = false + // Illumina variant calling options + callers = null + min_mapped_reads = 1000 + ivar_trim_noprimer = false + ivar_trim_offset = null + filter_duplicates = false + save_unaligned = false + save_mpileup = false + skip_ivar_trim = false + skip_markduplicates = true + skip_picard_metrics = false + skip_snpeff = false + skip_consensus = false + skip_variants = false - // Illumina de novo assembly options - assemblers = 'spades' - spades_mode = 'rnaviral' - spades_hmm = null - blast_db = null - skip_bandage = false - skip_blast = false - skip_abacas = false - skip_plasmidid = false - skip_assembly_quast = false - skip_assembly = false + // Illumina de novo assembly options + assemblers = 'spades' + spades_mode = 'rnaviral' + spades_hmm = null + blast_db = null + skip_bandage = false + skip_blast = false + skip_abacas = false + skip_plasmidid = false + skip_assembly_quast = false + skip_assembly = false - // Boilerplate options - outdir = './results' - tracedir = "${params.outdir}/pipeline_info" - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - help = false - enable_conda = false - singularity_pull_docker_container = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes,modules,igenomes_base' - - // Config options - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - hostnames = [:] - config_profile_name = null - config_profile_description = null - config_profile_contact = null - config_profile_url = null + // Boilerplate options + outdir = './results' + tracedir = "${params.outdir}/pipeline_info" + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + help = false + enable_conda = false + singularity_pull_docker_container = false + validate_params = true + show_hidden_params = false + schema_ignore_params = 'genomes,modules,igenomes_base' - // Max resource options - // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' + // Config options + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + hostnames = [:] + config_profile_name = null + config_profile_description = null + config_profile_contact = null + config_profile_url = null + // Max resource options + // Defaults only, expecting to be overwritten + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' } // Load base.config by default for all pipelines @@ -130,79 +129,79 @@ includeConfig 'conf/modules.config' // Load nf-core custom profiles from different Institutions try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" + includeConfig "${params.custom_config_base}/nfcore_custom.config" } catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") } // Load nf-core/viralrecon custom config. // The default 'genomes.config' used by the pipeline can be found here and is auto-loaded via the pipeline config: // https://github.com/nf-core/configs/blob/master/conf/pipeline/viralrecon/genomes.config try { - includeConfig "${params.custom_config_base}/pipeline/viralrecon.config" + includeConfig "${params.custom_config_base}/pipeline/viralrecon.config" } catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config/viralrecon profiles: ${params.custom_config_base}/pipeline/viralrecon.config") + System.err.println("WARNING: Could not load nf-core/config/viralrecon profiles: ${params.custom_config_base}/pipeline/viralrecon.config") } profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } - conda { - params.enable_conda = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - docker { - docker.enabled = true - // Avoid this error: - // WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap. - // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351 - // once this is established and works well, nextflow might implement this behavior as new default. - docker.runOptions = '-u \$(id -u):\$(id -g)' - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } - test { includeConfig 'conf/test.config' } - test_sra { includeConfig 'conf/test_sra.config' } - test_sispa { includeConfig 'conf/test_sispa.config' } - test_nanopore { includeConfig 'conf/test_nanopore.config' } - test_full { includeConfig 'conf/test_full.config' } - test_full_illumina { includeConfig 'conf/test_full.config' } - test_full_nanopore { includeConfig 'conf/test_full_nanopore.config' } - test_full_sispa { includeConfig 'conf/test_full_sispa.config' } + debug { process.beforeScript = 'echo $HOSTNAME' } + conda { + params.enable_conda = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + docker { + docker.enabled = true + // Avoid this error: + // WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap. + // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351 + // once this is established and works well, nextflow might implement this behavior as new default. + docker.runOptions = '-u \$(id -u):\$(id -g)' + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } + test { includeConfig 'conf/test.config' } + test_sra { includeConfig 'conf/test_sra.config' } + test_sispa { includeConfig 'conf/test_sispa.config' } + test_nanopore { includeConfig 'conf/test_nanopore.config' } + test_full { includeConfig 'conf/test_full.config' } + test_full_illumina { includeConfig 'conf/test_full.config' } + test_full_nanopore { includeConfig 'conf/test_full_nanopore.config' } + test_full_sispa { includeConfig 'conf/test_full_sispa.config' } } // Increase time available to build Conda environment @@ -210,9 +209,9 @@ conda { createTimeout = "120 min" } // Export these variables to prevent local Python/R libraries from conflicting with those in the container env { - PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" } // Capture exit codes from upstream processes when piping @@ -220,61 +219,61 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { - enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + enabled = true + file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" } report { - enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + enabled = true + file = "${params.tracedir}/execution_report_${trace_timestamp}.html" } trace { - enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + enabled = true + file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" } dag { - enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg" + enabled = true + file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg" } manifest { - name = 'nf-core/viralrecon' - author = 'Sarai Varona and Sara Monzon' - homePage = 'https://github.com/nf-core/viralrecon' - description = 'Assembly and intrahost/low-frequency variant calling for viral samples' - mainScript = 'main.nf' - nextflowVersion = '!>=21.04.0' - version = '2.0' + name = 'nf-core/viralrecon' + author = 'Sarai Varona and Sara Monzon' + homePage = 'https://github.com/nf-core/viralrecon' + description = 'Assembly and intrahost/low-frequency variant calling for viral samples' + mainScript = 'main.nf' + nextflowVersion = '!>=21.04.0' + version = '2.0' } // Function to ensure that resource requirements don't go beyond // a maximum limit def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } } - } } diff --git a/subworkflows/local/assembly_minia.nf b/subworkflows/local/assembly_minia.nf index 8d609b3b..0626f42c 100644 --- a/subworkflows/local/assembly_minia.nf +++ b/subworkflows/local/assembly_minia.nf @@ -1,76 +1,76 @@ -/* - * Assembly and downstream processing for minia scaffolds - */ - -params.minia_options = [:] -params.blastn_options = [:] -params.blastn_filter_options = [:] -params.abacas_options = [:] -params.plasmidid_options = [:] -params.quast_options = [:] - -include { MINIA } from '../../modules/nf-core/software/minia/main' addParams( options: params.minia_options ) -include { ASSEMBLY_QC } from './assembly_qc' addParams( blastn_options: params.blastn_options, blastn_filter_options: params.blastn_filter_options, abacas_options: params.abacas_options, plasmidid_options: params.plasmidid_options, quast_options: params.quast_options ) - -workflow ASSEMBLY_MINIA { - take: - reads // channel: [ val(meta), [ reads ] ] - fasta // channel: /path/to/genome.fasta - gff // channel: /path/to/genome.gff - blast_db // channel: /path/to/blast_db/ - blast_header // channel: /path/to/blast_header.txt - - main: - /* - * Assemble reads with minia - */ - MINIA ( reads ) - - /* - * Filter for empty contig files - */ - MINIA - .out - .contigs - .filter { meta, contig -> contig.size() > 0 } - .set { ch_contigs } - - /* - * Downstream assembly steps - */ - ASSEMBLY_QC ( - ch_contigs, - fasta, - gff, - blast_db, - blast_header - ) - - emit: - contigs = MINIA.out.contigs // channel: [ val(meta), [ contigs ] ] - unitigs = MINIA.out.unitigs // channel: [ val(meta), [ unitigs ] ] - h5 = MINIA.out.h5 // channel: [ val(meta), [ h5 ] ] - minia_version = MINIA.out.version // path: *.version.txt - - blast_txt = ASSEMBLY_QC.out.blast_txt // channel: [ val(meta), [ txt ] ] - blast_filter_txt = ASSEMBLY_QC.out.blast_filter_txt // channel: [ val(meta), [ txt ] ] - blast_version = ASSEMBLY_QC.out.blast_version // path: *.version.txt - - quast_results = ASSEMBLY_QC.out.quast_results // channel: [ val(meta), [ results ] ] - quast_tsv = ASSEMBLY_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] - quast_version = ASSEMBLY_QC.out.quast_version // path: *.version.txt - - abacas_results = ASSEMBLY_QC.out.abacas_results // channel: [ val(meta), [ results ] ] - abacas_version = ASSEMBLY_QC.out.abacas_version // path: *.version.txt - - plasmidid_html = ASSEMBLY_QC.out.plasmidid_html // channel: [ val(meta), [ html ] ] - plasmidid_tab = ASSEMBLY_QC.out.plasmidid_tab // channel: [ val(meta), [ tab ] ] - plasmidid_images = ASSEMBLY_QC.out.plasmidid_images // channel: [ val(meta), [ images/ ] ] - plasmidid_logs = ASSEMBLY_QC.out.plasmidid_logs // channel: [ val(meta), [ logs/ ] ] - plasmidid_data = ASSEMBLY_QC.out.plasmidid_data // channel: [ val(meta), [ data/ ] ] - plasmidid_database = ASSEMBLY_QC.out.plasmidid_database // channel: [ val(meta), [ database/ ] ] - plasmidid_fasta = ASSEMBLY_QC.out.plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] - plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] - plasmidid_version = ASSEMBLY_QC.out.plasmidid_version // path: *.version.txt - -} \ No newline at end of file +// +// Assembly and downstream processing for minia scaffolds +// + +params.minia_options = [:] +params.blastn_options = [:] +params.blastn_filter_options = [:] +params.abacas_options = [:] +params.plasmidid_options = [:] +params.quast_options = [:] + +include { MINIA } from '../../modules/nf-core/software/minia/main' addParams( options: params.minia_options ) +include { ASSEMBLY_QC } from './assembly_qc' addParams( blastn_options: params.blastn_options, blastn_filter_options: params.blastn_filter_options, abacas_options: params.abacas_options, plasmidid_options: params.plasmidid_options, quast_options: params.quast_options ) + +workflow ASSEMBLY_MINIA { + take: + reads // channel: [ val(meta), [ reads ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + blast_db // channel: /path/to/blast_db/ + blast_header // channel: /path/to/blast_header.txt + + main: + + // + // Assemble reads with minia + // + MINIA ( reads ) + + // + // Filter for empty contig files + // + MINIA + .out + .contigs + .filter { meta, contig -> contig.size() > 0 } + .set { ch_contigs } + + // + // Downstream assembly steps + // + ASSEMBLY_QC ( + ch_contigs, + fasta, + gff, + blast_db, + blast_header + ) + + emit: + contigs = MINIA.out.contigs // channel: [ val(meta), [ contigs ] ] + unitigs = MINIA.out.unitigs // channel: [ val(meta), [ unitigs ] ] + h5 = MINIA.out.h5 // channel: [ val(meta), [ h5 ] ] + minia_version = MINIA.out.version // path: *.version.txt + + blast_txt = ASSEMBLY_QC.out.blast_txt // channel: [ val(meta), [ txt ] ] + blast_filter_txt = ASSEMBLY_QC.out.blast_filter_txt // channel: [ val(meta), [ txt ] ] + blast_version = ASSEMBLY_QC.out.blast_version // path: *.version.txt + + quast_results = ASSEMBLY_QC.out.quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ASSEMBLY_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] + quast_version = ASSEMBLY_QC.out.quast_version // path: *.version.txt + + abacas_results = ASSEMBLY_QC.out.abacas_results // channel: [ val(meta), [ results ] ] + abacas_version = ASSEMBLY_QC.out.abacas_version // path: *.version.txt + + plasmidid_html = ASSEMBLY_QC.out.plasmidid_html // channel: [ val(meta), [ html ] ] + plasmidid_tab = ASSEMBLY_QC.out.plasmidid_tab // channel: [ val(meta), [ tab ] ] + plasmidid_images = ASSEMBLY_QC.out.plasmidid_images // channel: [ val(meta), [ images/ ] ] + plasmidid_logs = ASSEMBLY_QC.out.plasmidid_logs // channel: [ val(meta), [ logs/ ] ] + plasmidid_data = ASSEMBLY_QC.out.plasmidid_data // channel: [ val(meta), [ data/ ] ] + plasmidid_database = ASSEMBLY_QC.out.plasmidid_database // channel: [ val(meta), [ database/ ] ] + plasmidid_fasta = ASSEMBLY_QC.out.plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] + plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] + plasmidid_version = ASSEMBLY_QC.out.plasmidid_version // path: *.version.txt +} diff --git a/subworkflows/local/assembly_qc.nf b/subworkflows/local/assembly_qc.nf index 6085b307..723b59d0 100644 --- a/subworkflows/local/assembly_qc.nf +++ b/subworkflows/local/assembly_qc.nf @@ -1,113 +1,112 @@ -/* - * Downstream analysis for assembly scaffolds - */ - -params.blastn_options = [:] -params.blastn_filter_options = [:] -params.abacas_options = [:] -params.plasmidid_options = [:] -params.quast_options = [:] - -include { FILTER_BLASTN } from '../../modules/local/filter_blastn' addParams( options: params.blastn_filter_options ) -include { ABACAS } from '../../modules/nf-core/software/abacas/main' addParams( options: params.abacas_options ) -include { BLAST_BLASTN } from '../../modules/nf-core/software/blast/blastn/main' addParams( options: params.blastn_options ) -include { PLASMIDID } from '../../modules/nf-core/software/plasmidid/main' addParams( options: params.plasmidid_options ) -include { QUAST } from '../../modules/nf-core/software/quast/main' addParams( options: params.quast_options ) - -workflow ASSEMBLY_QC { - take: - scaffolds // channel: [ val(meta), [ scaffolds ] ] - fasta // channel: /path/to/genome.fasta - gff // channel: /path/to/genome.gff - blast_db // channel: /path/to/blast_db/ - blast_header // channel: /path/to/blast_header.txt - - main: - /* - * Run blastn on assembly scaffolds - */ - ch_blast_txt = Channel.empty() - ch_blast_filter_txt = Channel.empty() - ch_blast_version = Channel.empty() - if (!params.skip_blast) { - BLAST_BLASTN ( scaffolds, blast_db ) - ch_blast_txt = BLAST_BLASTN.out.txt - ch_blast_version = BLAST_BLASTN.out.version - - FILTER_BLASTN ( BLAST_BLASTN.out.txt, blast_header ) - ch_blast_filter_txt = FILTER_BLASTN.out.txt - } - - /* - * Assembly QC across all samples with QUAST - */ - ch_quast_results = Channel.empty() - ch_quast_tsv = Channel.empty() - ch_quast_version = Channel.empty() - if (!params.skip_assembly_quast) { - QUAST ( scaffolds.collect{ it[1] }, fasta, gff, true, params.gff ) - ch_quast_results = QUAST.out.results - ch_quast_tsv = QUAST.out.tsv - ch_quast_version = QUAST.out.version - } - - /* - * Contiguate assembly with ABACAS - */ - ch_abacas_results = Channel.empty() - ch_abacas_version = Channel.empty() - if (!params.skip_abacas) { - ABACAS ( scaffolds, fasta ) - ch_abacas_results = ABACAS.out.results - ch_abacas_version = ABACAS.out.version - } - - /* - * Assembly report with PlasmidID - */ - ch_plasmidid_html = Channel.empty() - ch_plasmidid_tab = Channel.empty() - ch_plasmidid_images = Channel.empty() - ch_plasmidid_logs = Channel.empty() - ch_plasmidid_data = Channel.empty() - ch_plasmidid_database = Channel.empty() - ch_plasmidid_fasta = Channel.empty() - ch_plasmidid_kmer = Channel.empty() - ch_plasmidid_version = Channel.empty() - if (!params.skip_plasmidid) { - PLASMIDID ( scaffolds, fasta ) - ch_plasmidid_html = PLASMIDID.out.html - ch_plasmidid_tab = PLASMIDID.out.tab - ch_plasmidid_images = PLASMIDID.out.images - ch_plasmidid_logs = PLASMIDID.out.logs - ch_plasmidid_data = PLASMIDID.out.data - ch_plasmidid_database = PLASMIDID.out.database - ch_plasmidid_fasta = PLASMIDID.out.fasta_files - ch_plasmidid_kmer = PLASMIDID.out.kmer - ch_plasmidid_version = PLASMIDID.out.version - } - - emit: - blast_txt = ch_blast_txt // channel: [ val(meta), [ txt ] ] - blast_filter_txt = ch_blast_filter_txt // channel: [ val(meta), [ txt ] ] - blast_version = ch_blast_version // path: *.version.txt - - quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] - quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] - quast_version = ch_quast_version // path: *.version.txt - - abacas_results = ch_abacas_results // channel: [ val(meta), [ results ] ] - abacas_version = ch_abacas_version // path: *.version.txt - - plasmidid_html = ch_plasmidid_html // channel: [ val(meta), [ html ] ] - plasmidid_tab = ch_plasmidid_tab // channel: [ val(meta), [ tab ] ] - plasmidid_images = ch_plasmidid_images // channel: [ val(meta), [ images/ ] ] - plasmidid_logs = ch_plasmidid_logs // channel: [ val(meta), [ logs/ ] ] - plasmidid_data = ch_plasmidid_data // channel: [ val(meta), [ data/ ] ] - plasmidid_database = ch_plasmidid_database // channel: [ val(meta), [ database/ ] ] - plasmidid_fasta = ch_plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] - plasmidid_kmer = ch_plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] - plasmidid_version = ch_plasmidid_version // path: *.version.txt - -} - +// +// Downstream analysis for assembly scaffolds +// + +params.blastn_options = [:] +params.blastn_filter_options = [:] +params.abacas_options = [:] +params.plasmidid_options = [:] +params.quast_options = [:] + +include { FILTER_BLASTN } from '../../modules/local/filter_blastn' addParams( options: params.blastn_filter_options ) +include { ABACAS } from '../../modules/nf-core/software/abacas/main' addParams( options: params.abacas_options ) +include { BLAST_BLASTN } from '../../modules/nf-core/software/blast/blastn/main' addParams( options: params.blastn_options ) +include { PLASMIDID } from '../../modules/nf-core/software/plasmidid/main' addParams( options: params.plasmidid_options ) +include { QUAST } from '../../modules/nf-core/software/quast/main' addParams( options: params.quast_options ) + +workflow ASSEMBLY_QC { + take: + scaffolds // channel: [ val(meta), [ scaffolds ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + blast_db // channel: /path/to/blast_db/ + blast_header // channel: /path/to/blast_header.txt + + main: + + // + // Run blastn on assembly scaffolds + // + ch_blast_txt = Channel.empty() + ch_blast_filter_txt = Channel.empty() + ch_blast_version = Channel.empty() + if (!params.skip_blast) { + BLAST_BLASTN ( scaffolds, blast_db ) + ch_blast_txt = BLAST_BLASTN.out.txt + ch_blast_version = BLAST_BLASTN.out.version + + FILTER_BLASTN ( BLAST_BLASTN.out.txt, blast_header ) + ch_blast_filter_txt = FILTER_BLASTN.out.txt + } + + // + // Assembly QC across all samples with QUAST + // + ch_quast_results = Channel.empty() + ch_quast_tsv = Channel.empty() + ch_quast_version = Channel.empty() + if (!params.skip_assembly_quast) { + QUAST ( scaffolds.collect{ it[1] }, fasta, gff, true, params.gff ) + ch_quast_results = QUAST.out.results + ch_quast_tsv = QUAST.out.tsv + ch_quast_version = QUAST.out.version + } + + // + // Contiguate assembly with ABACAS + // + ch_abacas_results = Channel.empty() + ch_abacas_version = Channel.empty() + if (!params.skip_abacas) { + ABACAS ( scaffolds, fasta ) + ch_abacas_results = ABACAS.out.results + ch_abacas_version = ABACAS.out.version + } + + // + // Assembly report with PlasmidID + // + ch_plasmidid_html = Channel.empty() + ch_plasmidid_tab = Channel.empty() + ch_plasmidid_images = Channel.empty() + ch_plasmidid_logs = Channel.empty() + ch_plasmidid_data = Channel.empty() + ch_plasmidid_database = Channel.empty() + ch_plasmidid_fasta = Channel.empty() + ch_plasmidid_kmer = Channel.empty() + ch_plasmidid_version = Channel.empty() + if (!params.skip_plasmidid) { + PLASMIDID ( scaffolds, fasta ) + ch_plasmidid_html = PLASMIDID.out.html + ch_plasmidid_tab = PLASMIDID.out.tab + ch_plasmidid_images = PLASMIDID.out.images + ch_plasmidid_logs = PLASMIDID.out.logs + ch_plasmidid_data = PLASMIDID.out.data + ch_plasmidid_database = PLASMIDID.out.database + ch_plasmidid_fasta = PLASMIDID.out.fasta_files + ch_plasmidid_kmer = PLASMIDID.out.kmer + ch_plasmidid_version = PLASMIDID.out.version + } + + emit: + blast_txt = ch_blast_txt // channel: [ val(meta), [ txt ] ] + blast_filter_txt = ch_blast_filter_txt // channel: [ val(meta), [ txt ] ] + blast_version = ch_blast_version // path: *.version.txt + + quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] + quast_version = ch_quast_version // path: *.version.txt + + abacas_results = ch_abacas_results // channel: [ val(meta), [ results ] ] + abacas_version = ch_abacas_version // path: *.version.txt + + plasmidid_html = ch_plasmidid_html // channel: [ val(meta), [ html ] ] + plasmidid_tab = ch_plasmidid_tab // channel: [ val(meta), [ tab ] ] + plasmidid_images = ch_plasmidid_images // channel: [ val(meta), [ images/ ] ] + plasmidid_logs = ch_plasmidid_logs // channel: [ val(meta), [ logs/ ] ] + plasmidid_data = ch_plasmidid_data // channel: [ val(meta), [ data/ ] ] + plasmidid_database = ch_plasmidid_database // channel: [ val(meta), [ database/ ] ] + plasmidid_fasta = ch_plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] + plasmidid_kmer = ch_plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] + plasmidid_version = ch_plasmidid_version // path: *.version.txt +} diff --git a/subworkflows/local/assembly_spades.nf b/subworkflows/local/assembly_spades.nf index 6c472527..b9bf6c52 100644 --- a/subworkflows/local/assembly_spades.nf +++ b/subworkflows/local/assembly_spades.nf @@ -1,114 +1,115 @@ -/* - * Assembly and downstream processing for SPAdes scaffolds - */ - -params.spades_options = [:] -params.bandage_options = [:] -params.blastn_options = [:] -params.blastn_filter_options = [:] -params.abacas_options = [:] -params.plasmidid_options = [:] -params.quast_options = [:] - -include { SPADES } from '../../modules/nf-core/software/spades/main' addParams( options: params.spades_options ) -include { BANDAGE_IMAGE } from '../../modules/nf-core/software/bandage/image/main' addParams( options: params.bandage_options ) -include { ASSEMBLY_QC } from './assembly_qc' addParams( blastn_options: params.blastn_options, blastn_filter_options: params.blastn_filter_options, abacas_options: params.abacas_options, plasmidid_options: params.plasmidid_options, quast_options: params.quast_options ) - -workflow ASSEMBLY_SPADES { - take: - reads // channel: [ val(meta), [ reads ] ] - hmm // channel: /path/to/spades.hmm - fasta // channel: /path/to/genome.fasta - gff // channel: /path/to/genome.gff - blast_db // channel: /path/to/blast_db/ - blast_header // channel: /path/to/blast_header.txt - - main: - /* - * Filter for paired-end samples if running metaSPAdes / metaviralSPAdes / metaplasmidSPAdes - */ - ch_reads = reads - if (params.spades_options.args.contains('--meta') || params.spades_options.args.contains('--bio')) { - reads - .filter { meta, fastq -> !meta.single_end } - .set { ch_reads } - } - - /* - * Assemble reads with SPAdes - */ - SPADES ( ch_reads, hmm ) - - /* - * Filter for empty scaffold files - */ - SPADES - .out - .scaffolds - .filter { meta, scaffold -> scaffold.size() > 0 } - .set { ch_scaffolds } - - SPADES - .out - .gfa - .filter { meta, gfa -> gfa.size() > 0 } - .set { ch_gfa } - - /* - * Generate assembly visualisation with Bandage - */ - ch_bandage_png = Channel.empty() - ch_bandage_svg = Channel.empty() - ch_bandage_version = Channel.empty() - if (!params.skip_bandage) { - BANDAGE_IMAGE ( ch_gfa ) - ch_bandage_version = BANDAGE_IMAGE.out.version - ch_bandage_png = BANDAGE_IMAGE.out.png - ch_bandage_svg = BANDAGE_IMAGE.out.svg - } - - /* - * Downstream assembly steps - */ - ASSEMBLY_QC ( - ch_scaffolds, - fasta, - gff, - blast_db, - blast_header - ) - - emit: - scaffolds = SPADES.out.scaffolds // channel: [ val(meta), [ scaffolds ] ] - contigs = SPADES.out.contigs // channel: [ val(meta), [ contigs ] ] - transcripts = SPADES.out.transcripts // channel: [ val(meta), [ transcripts ] ] - gene_clusters = SPADES.out.gene_clusters // channel: [ val(meta), [ gene_clusters ] ] - gfa = SPADES.out.gfa // channel: [ val(meta), [ gfa ] ] - log_out = SPADES.out.log // channel: [ val(meta), [ log ] ] - spades_version = SPADES.out.version // path: *.version.txt - - bandage_png = ch_bandage_png // channel: [ val(meta), [ png ] ] - bandage_svg = ch_bandage_svg // channel: [ val(meta), [ svg ] ] - bandage_version = ch_bandage_version // path: *.version.txt - - blast_txt = ASSEMBLY_QC.out.blast_txt // channel: [ val(meta), [ txt ] ] - blast_filter_txt = ASSEMBLY_QC.out.blast_filter_txt // channel: [ val(meta), [ txt ] ] - blast_version = ASSEMBLY_QC.out.blast_version // path: *.version.txt - - quast_results = ASSEMBLY_QC.out.quast_results // channel: [ val(meta), [ results ] ] - quast_tsv = ASSEMBLY_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] - quast_version = ASSEMBLY_QC.out.quast_version // path: *.version.txt - - abacas_results = ASSEMBLY_QC.out.abacas_results // channel: [ val(meta), [ results ] ] - abacas_version = ASSEMBLY_QC.out.abacas_version // path: *.version.txt - - plasmidid_html = ASSEMBLY_QC.out.plasmidid_html // channel: [ val(meta), [ html ] ] - plasmidid_tab = ASSEMBLY_QC.out.plasmidid_tab // channel: [ val(meta), [ tab ] ] - plasmidid_images = ASSEMBLY_QC.out.plasmidid_images // channel: [ val(meta), [ images/ ] ] - plasmidid_logs = ASSEMBLY_QC.out.plasmidid_logs // channel: [ val(meta), [ logs/ ] ] - plasmidid_data = ASSEMBLY_QC.out.plasmidid_data // channel: [ val(meta), [ data/ ] ] - plasmidid_database = ASSEMBLY_QC.out.plasmidid_database // channel: [ val(meta), [ database/ ] ] - plasmidid_fasta = ASSEMBLY_QC.out.plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] - plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] - plasmidid_version = ASSEMBLY_QC.out.plasmidid_version // path: *.version.txt -} \ No newline at end of file +// +// Assembly and downstream processing for SPAdes scaffolds +// + +params.spades_options = [:] +params.bandage_options = [:] +params.blastn_options = [:] +params.blastn_filter_options = [:] +params.abacas_options = [:] +params.plasmidid_options = [:] +params.quast_options = [:] + +include { SPADES } from '../../modules/nf-core/software/spades/main' addParams( options: params.spades_options ) +include { BANDAGE_IMAGE } from '../../modules/nf-core/software/bandage/image/main' addParams( options: params.bandage_options ) +include { ASSEMBLY_QC } from './assembly_qc' addParams( blastn_options: params.blastn_options, blastn_filter_options: params.blastn_filter_options, abacas_options: params.abacas_options, plasmidid_options: params.plasmidid_options, quast_options: params.quast_options ) + +workflow ASSEMBLY_SPADES { + take: + reads // channel: [ val(meta), [ reads ] ] + hmm // channel: /path/to/spades.hmm + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + blast_db // channel: /path/to/blast_db/ + blast_header // channel: /path/to/blast_header.txt + + main: + + // + // Filter for paired-end samples if running metaSPAdes / metaviralSPAdes / metaplasmidSPAdes + // + ch_reads = reads + if (params.spades_options.args.contains('--meta') || params.spades_options.args.contains('--bio')) { + reads + .filter { meta, fastq -> !meta.single_end } + .set { ch_reads } + } + + // + // Assemble reads with SPAdes + // + SPADES ( ch_reads, hmm ) + + // + // Filter for empty scaffold files + // + SPADES + .out + .scaffolds + .filter { meta, scaffold -> scaffold.size() > 0 } + .set { ch_scaffolds } + + SPADES + .out + .gfa + .filter { meta, gfa -> gfa.size() > 0 } + .set { ch_gfa } + + // + // Generate assembly visualisation with Bandage + // + ch_bandage_png = Channel.empty() + ch_bandage_svg = Channel.empty() + ch_bandage_version = Channel.empty() + if (!params.skip_bandage) { + BANDAGE_IMAGE ( ch_gfa ) + ch_bandage_version = BANDAGE_IMAGE.out.version + ch_bandage_png = BANDAGE_IMAGE.out.png + ch_bandage_svg = BANDAGE_IMAGE.out.svg + } + + // + // Downstream assembly steps + // + ASSEMBLY_QC ( + ch_scaffolds, + fasta, + gff, + blast_db, + blast_header + ) + + emit: + scaffolds = SPADES.out.scaffolds // channel: [ val(meta), [ scaffolds ] ] + contigs = SPADES.out.contigs // channel: [ val(meta), [ contigs ] ] + transcripts = SPADES.out.transcripts // channel: [ val(meta), [ transcripts ] ] + gene_clusters = SPADES.out.gene_clusters // channel: [ val(meta), [ gene_clusters ] ] + gfa = SPADES.out.gfa // channel: [ val(meta), [ gfa ] ] + log_out = SPADES.out.log // channel: [ val(meta), [ log ] ] + spades_version = SPADES.out.version // path: *.version.txt + + bandage_png = ch_bandage_png // channel: [ val(meta), [ png ] ] + bandage_svg = ch_bandage_svg // channel: [ val(meta), [ svg ] ] + bandage_version = ch_bandage_version // path: *.version.txt + + blast_txt = ASSEMBLY_QC.out.blast_txt // channel: [ val(meta), [ txt ] ] + blast_filter_txt = ASSEMBLY_QC.out.blast_filter_txt // channel: [ val(meta), [ txt ] ] + blast_version = ASSEMBLY_QC.out.blast_version // path: *.version.txt + + quast_results = ASSEMBLY_QC.out.quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ASSEMBLY_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] + quast_version = ASSEMBLY_QC.out.quast_version // path: *.version.txt + + abacas_results = ASSEMBLY_QC.out.abacas_results // channel: [ val(meta), [ results ] ] + abacas_version = ASSEMBLY_QC.out.abacas_version // path: *.version.txt + + plasmidid_html = ASSEMBLY_QC.out.plasmidid_html // channel: [ val(meta), [ html ] ] + plasmidid_tab = ASSEMBLY_QC.out.plasmidid_tab // channel: [ val(meta), [ tab ] ] + plasmidid_images = ASSEMBLY_QC.out.plasmidid_images // channel: [ val(meta), [ images/ ] ] + plasmidid_logs = ASSEMBLY_QC.out.plasmidid_logs // channel: [ val(meta), [ logs/ ] ] + plasmidid_data = ASSEMBLY_QC.out.plasmidid_data // channel: [ val(meta), [ data/ ] ] + plasmidid_database = ASSEMBLY_QC.out.plasmidid_database // channel: [ val(meta), [ database/ ] ] + plasmidid_fasta = ASSEMBLY_QC.out.plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] + plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] + plasmidid_version = ASSEMBLY_QC.out.plasmidid_version // path: *.version.txt +} diff --git a/subworkflows/local/assembly_unicycler.nf b/subworkflows/local/assembly_unicycler.nf index 48920b93..9bde9d4f 100644 --- a/subworkflows/local/assembly_unicycler.nf +++ b/subworkflows/local/assembly_unicycler.nf @@ -1,101 +1,101 @@ -/* - * Assembly and downstream processing for Unicycler scaffolds - */ - -params.unicycler_options = [:] -params.bandage_options = [:] -params.blastn_options = [:] -params.blastn_filter_options = [:] -params.abacas_options = [:] -params.plasmidid_options = [:] -params.quast_options = [:] - -include { UNICYCLER } from '../../modules/nf-core/software/unicycler/main' addParams( options: params.unicycler_options ) -include { BANDAGE_IMAGE } from '../../modules/nf-core/software/bandage/image/main' addParams( options: params.bandage_options ) -include { ASSEMBLY_QC } from './assembly_qc' addParams( blastn_options: params.blastn_options, blastn_filter_options: params.blastn_filter_options, abacas_options: params.abacas_options, plasmidid_options: params.plasmidid_options, quast_options: params.quast_options ) - -workflow ASSEMBLY_UNICYCLER { - take: - reads // channel: [ val(meta), [ reads ] ] - fasta // channel: /path/to/genome.fasta - gff // channel: /path/to/genome.gff - blast_db // channel: /path/to/blast_db/ - blast_header // channel: /path/to/blast_header.txt - - main: - /* - * Assemble reads with Unicycler - */ - UNICYCLER ( reads ) - - /* - * Filter for empty scaffold files - */ - UNICYCLER - .out - .scaffolds - .filter { meta, scaffold -> scaffold.size() > 0 } - .set { ch_scaffolds } - - UNICYCLER - .out - .gfa - .filter { meta, gfa -> gfa.size() > 0 } - .set { ch_gfa } - - /* - * Generate assembly visualisation with Bandage - */ - ch_bandage_png = Channel.empty() - ch_bandage_svg = Channel.empty() - ch_bandage_version = Channel.empty() - if (!params.skip_bandage) { - BANDAGE_IMAGE ( ch_gfa ) - ch_bandage_version = BANDAGE_IMAGE.out.version - ch_bandage_png = BANDAGE_IMAGE.out.png - ch_bandage_svg = BANDAGE_IMAGE.out.svg - } - - /* - * Downstream assembly steps - */ - ASSEMBLY_QC ( - ch_scaffolds, - fasta, - gff, - blast_db, - blast_header - ) - - emit: - scaffolds = UNICYCLER.out.scaffolds // channel: [ val(meta), [ scaffolds ] ] - gfa = UNICYCLER.out.gfa // channel: [ val(meta), [ gfa ] ] - log_out = UNICYCLER.out.log // channel: [ val(meta), [ log ] ] - unicycler_version = UNICYCLER.out.version // path: *.version.txt - - bandage_png = ch_bandage_png // channel: [ val(meta), [ png ] ] - bandage_svg = ch_bandage_svg // channel: [ val(meta), [ svg ] ] - bandage_version = ch_bandage_version // path: *.version.txt - - blast_txt = ASSEMBLY_QC.out.blast_txt // channel: [ val(meta), [ txt ] ] - blast_filter_txt = ASSEMBLY_QC.out.blast_filter_txt // channel: [ val(meta), [ txt ] ] - blast_version = ASSEMBLY_QC.out.blast_version // path: *.version.txt - - quast_results = ASSEMBLY_QC.out.quast_results // channel: [ val(meta), [ results ] ] - quast_tsv = ASSEMBLY_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] - quast_version = ASSEMBLY_QC.out.quast_version // path: *.version.txt - - abacas_results = ASSEMBLY_QC.out.abacas_results // channel: [ val(meta), [ results ] ] - abacas_version = ASSEMBLY_QC.out.abacas_version // path: *.version.txt - - plasmidid_html = ASSEMBLY_QC.out.plasmidid_html // channel: [ val(meta), [ html ] ] - plasmidid_tab = ASSEMBLY_QC.out.plasmidid_tab // channel: [ val(meta), [ tab ] ] - plasmidid_images = ASSEMBLY_QC.out.plasmidid_images // channel: [ val(meta), [ images/ ] ] - plasmidid_logs = ASSEMBLY_QC.out.plasmidid_logs // channel: [ val(meta), [ logs/ ] ] - plasmidid_data = ASSEMBLY_QC.out.plasmidid_data // channel: [ val(meta), [ data/ ] ] - plasmidid_database = ASSEMBLY_QC.out.plasmidid_database // channel: [ val(meta), [ database/ ] ] - plasmidid_fasta = ASSEMBLY_QC.out.plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] - plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] - plasmidid_version = ASSEMBLY_QC.out.plasmidid_version // path: *.version.txt - -} \ No newline at end of file +// +// Assembly and downstream processing for Unicycler scaffolds +// + +params.unicycler_options = [:] +params.bandage_options = [:] +params.blastn_options = [:] +params.blastn_filter_options = [:] +params.abacas_options = [:] +params.plasmidid_options = [:] +params.quast_options = [:] + +include { UNICYCLER } from '../../modules/nf-core/software/unicycler/main' addParams( options: params.unicycler_options ) +include { BANDAGE_IMAGE } from '../../modules/nf-core/software/bandage/image/main' addParams( options: params.bandage_options ) +include { ASSEMBLY_QC } from './assembly_qc' addParams( blastn_options: params.blastn_options, blastn_filter_options: params.blastn_filter_options, abacas_options: params.abacas_options, plasmidid_options: params.plasmidid_options, quast_options: params.quast_options ) + +workflow ASSEMBLY_UNICYCLER { + take: + reads // channel: [ val(meta), [ reads ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + blast_db // channel: /path/to/blast_db/ + blast_header // channel: /path/to/blast_header.txt + + main: + + // + // Assemble reads with Unicycler + // + UNICYCLER ( reads ) + + // + // Filter for empty scaffold files + // + UNICYCLER + .out + .scaffolds + .filter { meta, scaffold -> scaffold.size() > 0 } + .set { ch_scaffolds } + + UNICYCLER + .out + .gfa + .filter { meta, gfa -> gfa.size() > 0 } + .set { ch_gfa } + + // + // Generate assembly visualisation with Bandage + // + ch_bandage_png = Channel.empty() + ch_bandage_svg = Channel.empty() + ch_bandage_version = Channel.empty() + if (!params.skip_bandage) { + BANDAGE_IMAGE ( ch_gfa ) + ch_bandage_version = BANDAGE_IMAGE.out.version + ch_bandage_png = BANDAGE_IMAGE.out.png + ch_bandage_svg = BANDAGE_IMAGE.out.svg + } + + // + // Downstream assembly steps + // + ASSEMBLY_QC ( + ch_scaffolds, + fasta, + gff, + blast_db, + blast_header + ) + + emit: + scaffolds = UNICYCLER.out.scaffolds // channel: [ val(meta), [ scaffolds ] ] + gfa = UNICYCLER.out.gfa // channel: [ val(meta), [ gfa ] ] + log_out = UNICYCLER.out.log // channel: [ val(meta), [ log ] ] + unicycler_version = UNICYCLER.out.version // path: *.version.txt + + bandage_png = ch_bandage_png // channel: [ val(meta), [ png ] ] + bandage_svg = ch_bandage_svg // channel: [ val(meta), [ svg ] ] + bandage_version = ch_bandage_version // path: *.version.txt + + blast_txt = ASSEMBLY_QC.out.blast_txt // channel: [ val(meta), [ txt ] ] + blast_filter_txt = ASSEMBLY_QC.out.blast_filter_txt // channel: [ val(meta), [ txt ] ] + blast_version = ASSEMBLY_QC.out.blast_version // path: *.version.txt + + quast_results = ASSEMBLY_QC.out.quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ASSEMBLY_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] + quast_version = ASSEMBLY_QC.out.quast_version // path: *.version.txt + + abacas_results = ASSEMBLY_QC.out.abacas_results // channel: [ val(meta), [ results ] ] + abacas_version = ASSEMBLY_QC.out.abacas_version // path: *.version.txt + + plasmidid_html = ASSEMBLY_QC.out.plasmidid_html // channel: [ val(meta), [ html ] ] + plasmidid_tab = ASSEMBLY_QC.out.plasmidid_tab // channel: [ val(meta), [ tab ] ] + plasmidid_images = ASSEMBLY_QC.out.plasmidid_images // channel: [ val(meta), [ images/ ] ] + plasmidid_logs = ASSEMBLY_QC.out.plasmidid_logs // channel: [ val(meta), [ logs/ ] ] + plasmidid_data = ASSEMBLY_QC.out.plasmidid_data // channel: [ val(meta), [ data/ ] ] + plasmidid_database = ASSEMBLY_QC.out.plasmidid_database // channel: [ val(meta), [ database/ ] ] + plasmidid_fasta = ASSEMBLY_QC.out.plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] + plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] + plasmidid_version = ASSEMBLY_QC.out.plasmidid_version // path: *.version.txt +} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 6acd9c07..7cfb85d4 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -1,6 +1,6 @@ -/* - * Check input samplesheet and get read channels - */ +// +// Check input samplesheet and get read channels +// params.options = [:] @@ -10,7 +10,7 @@ workflow INPUT_CHECK { take: samplesheet // file : /path/to/samplesheet.csv platform // string: sequencing platform. Accepted values: 'illumina', 'nanopore' - + main: SAMPLESHEET_CHECK ( samplesheet, platform ) @@ -50,5 +50,5 @@ def create_fastq_channels(LinkedHashMap row) { } array = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] } - return array -} \ No newline at end of file + return array +} diff --git a/subworkflows/local/make_consensus.nf b/subworkflows/local/make_consensus.nf index 8d20972b..342f2735 100644 --- a/subworkflows/local/make_consensus.nf +++ b/subworkflows/local/make_consensus.nf @@ -1,45 +1,43 @@ -/* - * Run various tools to generate a masked genome consensus sequence - */ - -params.genomecov_options = [:] -params.merge_options = [:] -params.mask_options = [:] -params.maskfasta_options = [:] -params.bcftools_options = [:] -params.plot_bases_options = [:] - -include { BEDTOOLS_GENOMECOV } from '../../modules/nf-core/software/bedtools/genomecov/main' addParams( options: params.genomecov_options ) -include { BEDTOOLS_MERGE } from '../../modules/nf-core/software/bedtools/merge/main' addParams( options: params.merge_options ) -include { BEDTOOLS_MASKFASTA } from '../../modules/nf-core/software/bedtools/maskfasta/main' addParams( options: params.maskfasta_options ) -include { BCFTOOLS_CONSENSUS } from '../../modules/nf-core/software/bcftools/consensus/main' addParams( options: params.bcftools_options ) -include { MAKE_BED_MASK } from '../../modules/local/make_bed_mask' addParams( options: params.mask_options ) -include { PLOT_BASE_DENSITY } from '../../modules/local/plot_base_density' addParams( options: params.plot_bases_options ) - -workflow MAKE_CONSENSUS { - take: - bam_vcf // channel: [ val(meta), [ bam ], [ vcf ], [ tbi ] ] - fasta - - main: - BEDTOOLS_GENOMECOV ( bam_vcf.map { meta, bam, vcf, tbi -> [ meta, bam ] } ) - - BEDTOOLS_MERGE ( BEDTOOLS_GENOMECOV.out.bed ) - - MAKE_BED_MASK ( bam_vcf.map { meta, bam, vcf, tbi -> [ meta, vcf ] }.join( BEDTOOLS_MERGE.out.bed, by: [0] ) ) - - BEDTOOLS_MASKFASTA ( MAKE_BED_MASK.out.bed, fasta ) - - BCFTOOLS_CONSENSUS ( bam_vcf.map { meta, bam, vcf, tbi -> [ meta, vcf, tbi ] }.join( BEDTOOLS_MASKFASTA.out.fasta, by: [0] ) ) - - PLOT_BASE_DENSITY ( BCFTOOLS_CONSENSUS.out.fasta ) - - emit: - fasta = BCFTOOLS_CONSENSUS.out.fasta // channel: [ val(meta), [ fasta ] ] - tsv = PLOT_BASE_DENSITY.out.tsv // channel: [ val(meta), [ tsv ] ] - pdf = PLOT_BASE_DENSITY.out.pdf // channel: [ val(meta), [ pdf ] ] - bedtools_version = BEDTOOLS_MERGE.out.version // path: *.version.txt - bcftools_version = BCFTOOLS_CONSENSUS.out.version // path: *.version.txt - -} - +// +// Run various tools to generate a masked genome consensus sequence +// + +params.genomecov_options = [:] +params.merge_options = [:] +params.mask_options = [:] +params.maskfasta_options = [:] +params.bcftools_options = [:] +params.plot_bases_options = [:] + +include { BEDTOOLS_GENOMECOV } from '../../modules/nf-core/software/bedtools/genomecov/main' addParams( options: params.genomecov_options ) +include { BEDTOOLS_MERGE } from '../../modules/nf-core/software/bedtools/merge/main' addParams( options: params.merge_options ) +include { BEDTOOLS_MASKFASTA } from '../../modules/nf-core/software/bedtools/maskfasta/main' addParams( options: params.maskfasta_options ) +include { BCFTOOLS_CONSENSUS } from '../../modules/nf-core/software/bcftools/consensus/main' addParams( options: params.bcftools_options ) +include { MAKE_BED_MASK } from '../../modules/local/make_bed_mask' addParams( options: params.mask_options ) +include { PLOT_BASE_DENSITY } from '../../modules/local/plot_base_density' addParams( options: params.plot_bases_options ) + +workflow MAKE_CONSENSUS { + take: + bam_vcf // channel: [ val(meta), [ bam ], [ vcf ], [ tbi ] ] + fasta + + main: + BEDTOOLS_GENOMECOV ( bam_vcf.map { meta, bam, vcf, tbi -> [ meta, bam ] } ) + + BEDTOOLS_MERGE ( BEDTOOLS_GENOMECOV.out.bed ) + + MAKE_BED_MASK ( bam_vcf.map { meta, bam, vcf, tbi -> [ meta, vcf ] }.join( BEDTOOLS_MERGE.out.bed, by: [0] ) ) + + BEDTOOLS_MASKFASTA ( MAKE_BED_MASK.out.bed, fasta ) + + BCFTOOLS_CONSENSUS ( bam_vcf.map { meta, bam, vcf, tbi -> [ meta, vcf, tbi ] }.join( BEDTOOLS_MASKFASTA.out.fasta, by: [0] ) ) + + PLOT_BASE_DENSITY ( BCFTOOLS_CONSENSUS.out.fasta ) + + emit: + fasta = BCFTOOLS_CONSENSUS.out.fasta // channel: [ val(meta), [ fasta ] ] + tsv = PLOT_BASE_DENSITY.out.tsv // channel: [ val(meta), [ tsv ] ] + pdf = PLOT_BASE_DENSITY.out.pdf // channel: [ val(meta), [ pdf ] ] + bedtools_version = BEDTOOLS_MERGE.out.version // path: *.version.txt + bcftools_version = BCFTOOLS_CONSENSUS.out.version // path: *.version.txt +} diff --git a/subworkflows/local/prepare_genome_illumina.nf b/subworkflows/local/prepare_genome_illumina.nf index ec6c39cc..35f61fd6 100644 --- a/subworkflows/local/prepare_genome_illumina.nf +++ b/subworkflows/local/prepare_genome_illumina.nf @@ -1,161 +1,162 @@ -/* - * Uncompress and prepare reference genome files -*/ - -params.genome_options = [:] -params.index_options = [:] -params.db_options = [:] -params.bowtie2_build_options = [:] -params.collapse_primers_options = [:] -params.bedtools_getfasta_options = [:] -params.snpeff_build_options = [:] -params.makeblastdb_options = [:] -params.kraken2_build_options = [:] - -include { - GUNZIP as GUNZIP_FASTA - GUNZIP as GUNZIP_GFF - GUNZIP as GUNZIP_PRIMER_BED - GUNZIP as GUNZIP_PRIMER_FASTA } from '../../modules/nf-core/software/gunzip/main' addParams( options: params.genome_options ) -include { UNTAR as UNTAR_BOWTIE2_INDEX } from '../../modules/nf-core/software/untar/main' addParams( options: params.index_options ) -include { UNTAR as UNTAR_KRAKEN2_DB } from '../../modules/nf-core/software/untar/main' addParams( options: params.db_options ) -include { UNTAR as UNTAR_BLAST_DB } from '../../modules/nf-core/software/untar/main' addParams( options: params.db_options ) -include { BOWTIE2_BUILD } from '../../modules/nf-core/software/bowtie2/build/main' addParams( options: params.bowtie2_build_options ) -include { BLAST_MAKEBLASTDB } from '../../modules/nf-core/software/blast/makeblastdb/main' addParams( options: params.makeblastdb_options ) -include { BEDTOOLS_GETFASTA } from '../../modules/nf-core/software/bedtools/getfasta/main' addParams( options: params.bedtools_getfasta_options ) -include { COLLAPSE_PRIMERS } from '../../modules/local/collapse_primers' addParams( options: params.collapse_primers_options ) -include { KRAKEN2_BUILD } from '../../modules/local/kraken2_build' addParams( options: params.kraken2_build_options ) -include { SNPEFF_BUILD } from '../../modules/local/snpeff_build' addParams( options: params.snpeff_build_options ) - -workflow PREPARE_GENOME { - take: - dummy_file - - main: - /* - * Uncompress genome fasta file if required - */ - if (params.fasta.endsWith('.gz')) { - ch_fasta = GUNZIP_FASTA ( params.fasta ).gunzip - } else { - ch_fasta = file(params.fasta) - } - - /* - * Uncompress GFF annotation file - */ - if (params.gff) { - if (params.gff.endsWith('.gz')) { - ch_gff = GUNZIP_GFF ( params.gff ).gunzip - } else { - ch_gff = file(params.gff) - } - } else { - ch_gff = dummy_file - } - - /* - * Prepare reference files required for variant calling - */ - ch_kraken2_db = Channel.empty() - if (!params.skip_kraken2) { - if (params.kraken2_db) { - if (params.kraken2_db.endsWith('.tar.gz')) { - ch_kraken2_db = UNTAR_KRAKEN2_DB ( params.kraken2_db ).untar - } else { - ch_kraken2_db = file(params.kraken2_db) - } - } else { - ch_kraken2_db = KRAKEN2_BUILD ( params.kraken2_db_name ).db - } - } - - /* - * Prepare files required for amplicon data - */ - ch_primer_bed = Channel.empty() - ch_primer_fasta = Channel.empty() - ch_primer_collapsed_bed = Channel.empty() - if (params.protocol == 'amplicon') { - if (params.primer_bed) { - if (params.primer_bed.endsWith('.gz')) { - ch_primer_bed = GUNZIP_PRIMER_BED ( params.primer_bed ).gunzip - } else { - ch_primer_bed = file(params.primer_bed) - } - } - - if (!params.skip_variants && !params.skip_mosdepth) { - ch_primer_collapsed_bed = COLLAPSE_PRIMERS ( ch_primer_bed, params.primer_left_suffix, params.primer_right_suffix ) - } - - if (!params.skip_assembly && !params.skip_cutadapt) { - if (params.primer_fasta) { - if (params.primer_fasta.endsWith('.gz')) { - ch_primer_fasta = GUNZIP_PRIMER_FASTA ( params.primer_fasta ).gunzip - } else { - ch_primer_fasta = file(params.primer_fasta) - } - } else { - ch_primer_fasta = BEDTOOLS_GETFASTA ( ch_primer_bed, ch_fasta ).fasta - } - } - } - - /* - * Prepare reference files required for variant calling - */ - ch_bowtie2_index = Channel.empty() - if (!params.skip_variants) { - if (params.bowtie2_index) { - if (params.bowtie2_index.endsWith('.tar.gz')) { - ch_bowtie2_index = UNTAR_BOWTIE2_INDEX ( params.bowtie2_index ).untar - } else { - ch_bowtie2_index = file(params.bowtie2_index) - } - } else { - ch_bowtie2_index = BOWTIE2_BUILD ( ch_fasta ).index - } - } - - /* - * Prepare reference files required for de novo assembly - */ - ch_blast_db = Channel.empty() - if (!params.skip_assembly) { - if (!params.skip_blast) { - if (params.blast_db) { - if (params.blast_db.endsWith('.tar.gz')) { - ch_blast_db = UNTAR_BLAST_DB ( params.blast_db ).untar - } else { - ch_blast_db = file(params.blast_db) - } - } else { - ch_blast_db = BLAST_MAKEBLASTDB ( ch_fasta ).db - } - } - } - - /* - * Make snpEff database - */ - ch_snpeff_db = Channel.empty() - ch_snpeff_config = Channel.empty() - if (!params.skip_variants && params.gff && !params.skip_snpeff) { - SNPEFF_BUILD ( ch_fasta, ch_gff ) - ch_snpeff_db = SNPEFF_BUILD.out.db - ch_snpeff_config = SNPEFF_BUILD.out.config - } - - emit: - fasta = ch_fasta // path: genome.fasta - gff = ch_gff // path: genome.gff - bowtie2_index = ch_bowtie2_index // path: bowtie2/index/ - primer_bed = ch_primer_bed // path: primer.bed - primer_collapsed_bed = ch_primer_collapsed_bed // path: primer.collapsed.bed - primer_fasta = ch_primer_fasta // path: primer.fasta - blast_db = ch_blast_db // path: blast_db/ - kraken2_db = ch_kraken2_db // path: kraken2_db/ - snpeff_db = ch_snpeff_db // path: snpeff_db - snpeff_config = ch_snpeff_config // path: snpeff.config -} +// +// Uncompress and prepare reference genome files +// + +params.genome_options = [:] +params.index_options = [:] +params.db_options = [:] +params.bowtie2_build_options = [:] +params.collapse_primers_options = [:] +params.bedtools_getfasta_options = [:] +params.snpeff_build_options = [:] +params.makeblastdb_options = [:] +params.kraken2_build_options = [:] + +include { + GUNZIP as GUNZIP_FASTA + GUNZIP as GUNZIP_GFF + GUNZIP as GUNZIP_PRIMER_BED + GUNZIP as GUNZIP_PRIMER_FASTA } from '../../modules/nf-core/software/gunzip/main' addParams( options: params.genome_options ) +include { UNTAR as UNTAR_BOWTIE2_INDEX } from '../../modules/nf-core/software/untar/main' addParams( options: params.index_options ) +include { UNTAR as UNTAR_KRAKEN2_DB } from '../../modules/nf-core/software/untar/main' addParams( options: params.db_options ) +include { UNTAR as UNTAR_BLAST_DB } from '../../modules/nf-core/software/untar/main' addParams( options: params.db_options ) +include { BOWTIE2_BUILD } from '../../modules/nf-core/software/bowtie2/build/main' addParams( options: params.bowtie2_build_options ) +include { BLAST_MAKEBLASTDB } from '../../modules/nf-core/software/blast/makeblastdb/main' addParams( options: params.makeblastdb_options ) +include { BEDTOOLS_GETFASTA } from '../../modules/nf-core/software/bedtools/getfasta/main' addParams( options: params.bedtools_getfasta_options ) +include { COLLAPSE_PRIMERS } from '../../modules/local/collapse_primers' addParams( options: params.collapse_primers_options ) +include { KRAKEN2_BUILD } from '../../modules/local/kraken2_build' addParams( options: params.kraken2_build_options ) +include { SNPEFF_BUILD } from '../../modules/local/snpeff_build' addParams( options: params.snpeff_build_options ) + +workflow PREPARE_GENOME { + take: + dummy_file + + main: + + // + // Uncompress genome fasta file if required + // + if (params.fasta.endsWith('.gz')) { + ch_fasta = GUNZIP_FASTA ( params.fasta ).gunzip + } else { + ch_fasta = file(params.fasta) + } + + // + // Uncompress GFF annotation file + // + if (params.gff) { + if (params.gff.endsWith('.gz')) { + ch_gff = GUNZIP_GFF ( params.gff ).gunzip + } else { + ch_gff = file(params.gff) + } + } else { + ch_gff = dummy_file + } + + // + // Prepare reference files required for variant calling + // + ch_kraken2_db = Channel.empty() + if (!params.skip_kraken2) { + if (params.kraken2_db) { + if (params.kraken2_db.endsWith('.tar.gz')) { + ch_kraken2_db = UNTAR_KRAKEN2_DB ( params.kraken2_db ).untar + } else { + ch_kraken2_db = file(params.kraken2_db) + } + } else { + ch_kraken2_db = KRAKEN2_BUILD ( params.kraken2_db_name ).db + } + } + + // + // Prepare files required for amplicon data + // + ch_primer_bed = Channel.empty() + ch_primer_fasta = Channel.empty() + ch_primer_collapsed_bed = Channel.empty() + if (params.protocol == 'amplicon') { + if (params.primer_bed) { + if (params.primer_bed.endsWith('.gz')) { + ch_primer_bed = GUNZIP_PRIMER_BED ( params.primer_bed ).gunzip + } else { + ch_primer_bed = file(params.primer_bed) + } + } + + if (!params.skip_variants && !params.skip_mosdepth) { + ch_primer_collapsed_bed = COLLAPSE_PRIMERS ( ch_primer_bed, params.primer_left_suffix, params.primer_right_suffix ) + } + + if (!params.skip_assembly && !params.skip_cutadapt) { + if (params.primer_fasta) { + if (params.primer_fasta.endsWith('.gz')) { + ch_primer_fasta = GUNZIP_PRIMER_FASTA ( params.primer_fasta ).gunzip + } else { + ch_primer_fasta = file(params.primer_fasta) + } + } else { + ch_primer_fasta = BEDTOOLS_GETFASTA ( ch_primer_bed, ch_fasta ).fasta + } + } + } + + // + // Prepare reference files required for variant calling + // + ch_bowtie2_index = Channel.empty() + if (!params.skip_variants) { + if (params.bowtie2_index) { + if (params.bowtie2_index.endsWith('.tar.gz')) { + ch_bowtie2_index = UNTAR_BOWTIE2_INDEX ( params.bowtie2_index ).untar + } else { + ch_bowtie2_index = file(params.bowtie2_index) + } + } else { + ch_bowtie2_index = BOWTIE2_BUILD ( ch_fasta ).index + } + } + + // + // Prepare reference files required for de novo assembly + // + ch_blast_db = Channel.empty() + if (!params.skip_assembly) { + if (!params.skip_blast) { + if (params.blast_db) { + if (params.blast_db.endsWith('.tar.gz')) { + ch_blast_db = UNTAR_BLAST_DB ( params.blast_db ).untar + } else { + ch_blast_db = file(params.blast_db) + } + } else { + ch_blast_db = BLAST_MAKEBLASTDB ( ch_fasta ).db + } + } + } + + // + // Make snpEff database + // + ch_snpeff_db = Channel.empty() + ch_snpeff_config = Channel.empty() + if (!params.skip_variants && params.gff && !params.skip_snpeff) { + SNPEFF_BUILD ( ch_fasta, ch_gff ) + ch_snpeff_db = SNPEFF_BUILD.out.db + ch_snpeff_config = SNPEFF_BUILD.out.config + } + + emit: + fasta = ch_fasta // path: genome.fasta + gff = ch_gff // path: genome.gff + bowtie2_index = ch_bowtie2_index // path: bowtie2/index/ + primer_bed = ch_primer_bed // path: primer.bed + primer_collapsed_bed = ch_primer_collapsed_bed // path: primer.collapsed.bed + primer_fasta = ch_primer_fasta // path: primer.fasta + blast_db = ch_blast_db // path: blast_db/ + kraken2_db = ch_kraken2_db // path: kraken2_db/ + snpeff_db = ch_snpeff_db // path: snpeff_db + snpeff_config = ch_snpeff_config // path: snpeff.config +} diff --git a/subworkflows/local/prepare_genome_nanopore.nf b/subworkflows/local/prepare_genome_nanopore.nf index a8d12724..dfe7442a 100644 --- a/subworkflows/local/prepare_genome_nanopore.nf +++ b/subworkflows/local/prepare_genome_nanopore.nf @@ -1,81 +1,82 @@ -/* - * Uncompress and prepare reference genome files -*/ - -params.genome_options = [:] -params.collapse_primers_options = [:] -params.snpeff_build_options = [:] - -include { - GUNZIP as GUNZIP_FASTA - GUNZIP as GUNZIP_GFF - GUNZIP as GUNZIP_PRIMER_BED } from '../../modules/nf-core/software/gunzip/main' addParams( options: params.genome_options ) -include { COLLAPSE_PRIMERS } from '../../modules/local/collapse_primers' addParams( options: params.collapse_primers_options ) -include { SNPEFF_BUILD } from '../../modules/local/snpeff_build' addParams( options: params.snpeff_build_options ) - -workflow PREPARE_GENOME { - take: - dummy_file - - main: - /* - * Uncompress genome fasta file if required - */ - if (params.fasta.endsWith('.gz')) { - ch_fasta = GUNZIP_FASTA ( params.fasta ).gunzip - } else { - ch_fasta = file(params.fasta) - } - - /* - * Uncompress GFF annotation file - */ - if (params.gff) { - if (params.gff.endsWith('.gz')) { - ch_gff = GUNZIP_GFF ( params.gff ).gunzip - } else { - ch_gff = file(params.gff) - } - } else { - ch_gff = dummy_file - } - - /* - * Uncompress primer BED file - */ - ch_primer_bed = Channel.empty() - if (params.primer_bed) { - if (params.primer_bed.endsWith('.gz')) { - ch_primer_bed = GUNZIP_PRIMER_BED ( params.primer_bed ).gunzip - } else { - ch_primer_bed = file(params.primer_bed) - } - } - - /* - * Generate collapsed BED file - */ - ch_primer_collapsed_bed = Channel.empty() - if (!params.skip_mosdepth) { - ch_primer_collapsed_bed = COLLAPSE_PRIMERS ( ch_primer_bed, params.primer_left_suffix, params.primer_right_suffix ) - } - - /* - * Make snpEff database - */ - ch_snpeff_db = Channel.empty() - ch_snpeff_config = Channel.empty() - if (params.gff && !params.skip_snpeff) { - SNPEFF_BUILD ( ch_fasta, ch_gff ) - ch_snpeff_db = SNPEFF_BUILD.out.db - ch_snpeff_config = SNPEFF_BUILD.out.config - } - - emit: - fasta = ch_fasta // path: genome.fasta - gff = ch_gff // path: genome.gff - primer_bed = ch_primer_bed // path: primer.bed - primer_collapsed_bed = ch_primer_collapsed_bed // path: primer.collapsed.bed - snpeff_db = ch_snpeff_db // path: snpeff_db - snpeff_config = ch_snpeff_config // path: snpeff.config -} +// +// Uncompress and prepare reference genome files +// + +params.genome_options = [:] +params.collapse_primers_options = [:] +params.snpeff_build_options = [:] + +include { + GUNZIP as GUNZIP_FASTA + GUNZIP as GUNZIP_GFF + GUNZIP as GUNZIP_PRIMER_BED } from '../../modules/nf-core/software/gunzip/main' addParams( options: params.genome_options ) +include { COLLAPSE_PRIMERS } from '../../modules/local/collapse_primers' addParams( options: params.collapse_primers_options ) +include { SNPEFF_BUILD } from '../../modules/local/snpeff_build' addParams( options: params.snpeff_build_options ) + +workflow PREPARE_GENOME { + take: + dummy_file + + main: + + // + // Uncompress genome fasta file if required + // + if (params.fasta.endsWith('.gz')) { + ch_fasta = GUNZIP_FASTA ( params.fasta ).gunzip + } else { + ch_fasta = file(params.fasta) + } + + // + // Uncompress GFF annotation file + // + if (params.gff) { + if (params.gff.endsWith('.gz')) { + ch_gff = GUNZIP_GFF ( params.gff ).gunzip + } else { + ch_gff = file(params.gff) + } + } else { + ch_gff = dummy_file + } + + // + // Uncompress primer BED file + // + ch_primer_bed = Channel.empty() + if (params.primer_bed) { + if (params.primer_bed.endsWith('.gz')) { + ch_primer_bed = GUNZIP_PRIMER_BED ( params.primer_bed ).gunzip + } else { + ch_primer_bed = file(params.primer_bed) + } + } + + // + // Generate collapsed BED file + // + ch_primer_collapsed_bed = Channel.empty() + if (!params.skip_mosdepth) { + ch_primer_collapsed_bed = COLLAPSE_PRIMERS ( ch_primer_bed, params.primer_left_suffix, params.primer_right_suffix ) + } + + // + // Make snpEff database + // + ch_snpeff_db = Channel.empty() + ch_snpeff_config = Channel.empty() + if (params.gff && !params.skip_snpeff) { + SNPEFF_BUILD ( ch_fasta, ch_gff ) + ch_snpeff_db = SNPEFF_BUILD.out.db + ch_snpeff_config = SNPEFF_BUILD.out.config + } + + emit: + fasta = ch_fasta // path: genome.fasta + gff = ch_gff // path: genome.gff + primer_bed = ch_primer_bed // path: primer.bed + primer_collapsed_bed = ch_primer_collapsed_bed // path: primer.collapsed.bed + snpeff_db = ch_snpeff_db // path: snpeff_db + snpeff_config = ch_snpeff_config // path: snpeff.config +} diff --git a/subworkflows/local/primer_trim_ivar.nf b/subworkflows/local/primer_trim_ivar.nf index 16fa5788..b1589457 100644 --- a/subworkflows/local/primer_trim_ivar.nf +++ b/subworkflows/local/primer_trim_ivar.nf @@ -1,39 +1,40 @@ -/* - * iVar trim, sort, index BAM file and run samtools stats, flagstat and idxstats - */ - -params.ivar_trim_options = [:] -params.samtools_options = [:] - -include { IVAR_TRIM } from '../../modules/nf-core/software/ivar/trim/main' addParams( options: params.ivar_trim_options ) -include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams( options: params.samtools_options ) -include { BAM_SORT_SAMTOOLS } from '../nf-core/bam_sort_samtools' addParams( options: params.samtools_options ) - -workflow PRIMER_TRIM_IVAR { - take: - bam // channel: [ val(meta), [ bam ], [bai] ] - bed // path : bed - - main: - /* - * iVar trim primers - */ - IVAR_TRIM ( bam, bed ) - - /* - * Sort, index BAM file and run samtools stats, flagstat and idxstats - */ - BAM_SORT_SAMTOOLS ( IVAR_TRIM.out.bam ) - - emit: - bam_orig = IVAR_TRIM.out.bam // channel: [ val(meta), bam ] - log_out = IVAR_TRIM.out.log // channel: [ val(meta), log ] - ivar_version = IVAR_TRIM.out.version // path: *.version.txt - - bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - samtools_version = BAM_SORT_SAMTOOLS.out.version // path: *.version.txt -} +// +// iVar trim, sort, index BAM file and run samtools stats, flagstat and idxstats +// + +params.ivar_trim_options = [:] +params.samtools_options = [:] + +include { IVAR_TRIM } from '../../modules/nf-core/software/ivar/trim/main' addParams( options: params.ivar_trim_options ) +include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams( options: params.samtools_options ) +include { BAM_SORT_SAMTOOLS } from '../nf-core/bam_sort_samtools' addParams( options: params.samtools_options ) + +workflow PRIMER_TRIM_IVAR { + take: + bam // channel: [ val(meta), [ bam ], [bai] ] + bed // path : bed + + main: + + // + // iVar trim primers + // + IVAR_TRIM ( bam, bed ) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_SAMTOOLS ( IVAR_TRIM.out.bam ) + + emit: + bam_orig = IVAR_TRIM.out.bam // channel: [ val(meta), bam ] + log_out = IVAR_TRIM.out.log // channel: [ val(meta), log ] + ivar_version = IVAR_TRIM.out.version // path: *.version.txt + + bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + samtools_version = BAM_SORT_SAMTOOLS.out.version // path: *.version.txt +} diff --git a/subworkflows/local/snpeff_snpsift.nf b/subworkflows/local/snpeff_snpsift.nf index df12a8a9..cee772e3 100644 --- a/subworkflows/local/snpeff_snpsift.nf +++ b/subworkflows/local/snpeff_snpsift.nf @@ -1,43 +1,44 @@ -/* - * Run snpEff, bgzip, tabix, stats and SnpSift commands - */ - -params.snpeff_options = [:] -params.bgzip_options = [:] -params.tabix_options = [:] -params.stats_options = [:] -params.snpsift_options = [:] - -include { SNPEFF_ANN } from '../../modules/local/snpeff_ann' addParams( options: params.snpeff_options ) -include { SNPSIFT_EXTRACTFIELDS } from '../../modules/local/snpsift_extractfields' addParams( options: params.snpsift_options ) -include { VCF_BGZIP_TABIX_STATS } from '../nf-core/vcf_bgzip_tabix_stats' addParams( bgzip_options: params.bgzip_options, tabix_options: params.tabix_options, stats_options: params.stats_options ) - -workflow SNPEFF_SNPSIFT { - take: - vcf // channel: [ val(meta), [ vcf ] ] - db // path : snpEff database - config // path : snpEff config - fasta // path : genome.fasta - - main: - SNPEFF_ANN ( vcf, db, config, fasta ) - - VCF_BGZIP_TABIX_STATS ( SNPEFF_ANN.out.vcf ) - - SNPSIFT_EXTRACTFIELDS ( VCF_BGZIP_TABIX_STATS.out.vcf ) - - emit: - csv = SNPEFF_ANN.out.csv // channel: [ val(meta), [ csv ] ] - txt = SNPEFF_ANN.out.txt // channel: [ val(meta), [ txt ] ] - html = SNPEFF_ANN.out.html // channel: [ val(meta), [ html ] ] - snpeff_version = SNPEFF_ANN.out.version // path: *.version.txt - - vcf = VCF_BGZIP_TABIX_STATS.out.vcf // channel: [ val(meta), [ vcf.gz ] ] - tbi = VCF_BGZIP_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] - stats = VCF_BGZIP_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] - tabix_version = VCF_BGZIP_TABIX_STATS.out.tabix_version // path: *.version.txt - bcftools_version = VCF_BGZIP_TABIX_STATS.out.bcftools_version // path: *.version.txt - - snpsift_txt = SNPSIFT_EXTRACTFIELDS.out.txt // channel: [ val(meta), [ txt ] ] - snpsift_version = SNPSIFT_EXTRACTFIELDS.out.version // path: *.version.txt -} +// +// Run snpEff, bgzip, tabix, stats and SnpSift commands +// + +params.snpeff_options = [:] +params.bgzip_options = [:] +params.tabix_options = [:] +params.stats_options = [:] +params.snpsift_options = [:] + +include { SNPEFF_ANN } from '../../modules/local/snpeff_ann' addParams( options: params.snpeff_options ) +include { SNPSIFT_EXTRACTFIELDS } from '../../modules/local/snpsift_extractfields' addParams( options: params.snpsift_options ) +include { VCF_BGZIP_TABIX_STATS } from '../nf-core/vcf_bgzip_tabix_stats' addParams( bgzip_options: params.bgzip_options, tabix_options: params.tabix_options, stats_options: params.stats_options ) + +workflow SNPEFF_SNPSIFT { + take: + vcf // channel: [ val(meta), [ vcf ] ] + db // path : snpEff database + config // path : snpEff config + fasta // path : genome.fasta + + main: + + SNPEFF_ANN ( vcf, db, config, fasta ) + + VCF_BGZIP_TABIX_STATS ( SNPEFF_ANN.out.vcf ) + + SNPSIFT_EXTRACTFIELDS ( VCF_BGZIP_TABIX_STATS.out.vcf ) + + emit: + csv = SNPEFF_ANN.out.csv // channel: [ val(meta), [ csv ] ] + txt = SNPEFF_ANN.out.txt // channel: [ val(meta), [ txt ] ] + html = SNPEFF_ANN.out.html // channel: [ val(meta), [ html ] ] + snpeff_version = SNPEFF_ANN.out.version // path: *.version.txt + + vcf = VCF_BGZIP_TABIX_STATS.out.vcf // channel: [ val(meta), [ vcf.gz ] ] + tbi = VCF_BGZIP_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + stats = VCF_BGZIP_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] + tabix_version = VCF_BGZIP_TABIX_STATS.out.tabix_version // path: *.version.txt + bcftools_version = VCF_BGZIP_TABIX_STATS.out.bcftools_version // path: *.version.txt + + snpsift_txt = SNPSIFT_EXTRACTFIELDS.out.txt // channel: [ val(meta), [ txt ] ] + snpsift_version = SNPSIFT_EXTRACTFIELDS.out.version // path: *.version.txt +} diff --git a/subworkflows/local/variants_bcftools.nf b/subworkflows/local/variants_bcftools.nf index b6bd0515..22f84c07 100644 --- a/subworkflows/local/variants_bcftools.nf +++ b/subworkflows/local/variants_bcftools.nf @@ -1,172 +1,173 @@ -/* - * Variant calling and downstream processing for BCFTools - */ - -params.bcftools_mpileup_options = [:] -params.quast_options = [:] -params.consensus_genomecov_options = [:] -params.consensus_merge_options = [:] -params.consensus_mask_options = [:] -params.consensus_maskfasta_options = [:] -params.consensus_bcftools_options = [:] -params.consensus_plot_options = [:] -params.snpeff_options = [:] -params.snpsift_options = [:] -params.snpeff_bgzip_options = [:] -params.snpeff_tabix_options = [:] -params.snpeff_stats_options = [:] -params.pangolin_options = [:] -params.nextclade_options = [:] -params.asciigenome_options = [:] - -include { BCFTOOLS_MPILEUP } from '../../modules/nf-core/software/bcftools/mpileup/main' addParams( options: params.bcftools_mpileup_options ) -include { QUAST } from '../../modules/nf-core/software/quast/main' addParams( options: params.quast_options ) -include { PANGOLIN } from '../../modules/nf-core/software/pangolin/main' addParams( options: params.pangolin_options ) -include { NEXTCLADE } from '../../modules/nf-core/software/nextclade/main' addParams( options: params.nextclade_options ) -include { ASCIIGENOME } from '../../modules/local/asciigenome' addParams( options: params.asciigenome_options ) -include { MAKE_CONSENSUS } from './make_consensus' addParams( genomecov_options: params.consensus_genomecov_options, merge_options: params.consensus_merge_options, mask_options: params.consensus_mask_options, maskfasta_options: params.consensus_maskfasta_options, bcftools_options: params.consensus_bcftools_options, plot_bases_options: params.consensus_plot_options ) -include { SNPEFF_SNPSIFT } from './snpeff_snpsift' addParams( snpeff_options: params.snpeff_options, snpsift_options: params.snpsift_options, bgzip_options: params.snpeff_bgzip_options, tabix_options: params.snpeff_tabix_options, stats_options: params.snpeff_stats_options ) - -workflow VARIANTS_BCFTOOLS { - take: - bam // channel: [ val(meta), [ bam ] ] - fasta // channel: /path/to/genome.fasta - gff // channel: /path/to/genome.gff - bed // channel: /path/to/primers.bed - snpeff_db // channel: /path/to/snpeff_db/ - snpeff_config // channel: /path/to/snpeff.config - - main: - /* - * Call variants - */ - BCFTOOLS_MPILEUP ( bam, fasta ) - - /* - * Create genome consensus using variants in VCF, run QUAST and pangolin - */ - ch_consensus = Channel.empty() - ch_bases_tsv = Channel.empty() - ch_bases_pdf = Channel.empty() - ch_bedtools_version = Channel.empty() - ch_quast_results = Channel.empty() - ch_quast_tsv = Channel.empty() - ch_quast_version = Channel.empty() - ch_pangolin_report = Channel.empty() - ch_pangolin_version = Channel.empty() - ch_nextclade_report = Channel.empty() - ch_nextclade_version = Channel.empty() - if (!params.skip_consensus) { - MAKE_CONSENSUS ( bam.join(BCFTOOLS_MPILEUP.out.vcf, by: [0]).join(BCFTOOLS_MPILEUP.out.tbi, by: [0]), fasta ) - ch_consensus = MAKE_CONSENSUS.out.fasta - ch_bases_tsv = MAKE_CONSENSUS.out.tsv - ch_bases_pdf = MAKE_CONSENSUS.out.pdf - ch_bedtools_version = MAKE_CONSENSUS.out.bedtools_version - - if (!params.skip_variants_quast) { - QUAST ( ch_consensus.collect{ it[1] }, fasta, gff, true, params.gff ) - ch_quast_results = QUAST.out.results - ch_quast_tsv = QUAST.out.tsv - ch_quast_version = QUAST.out.version - } - - if (!params.skip_pangolin) { - PANGOLIN ( ch_consensus ) - ch_pangolin_report = PANGOLIN.out.report - ch_pangolin_version = PANGOLIN.out.version - } - - if (!params.skip_nextclade) { - NEXTCLADE ( ch_consensus, 'csv' ) - ch_nextclade_report = NEXTCLADE.out.csv - ch_nextclade_version = NEXTCLADE.out.version - } - } - - /* - * Annotate variants - */ - ch_snpeff_vcf = Channel.empty() - ch_snpeff_tbi = Channel.empty() - ch_snpeff_stats = Channel.empty() - ch_snpeff_csv = Channel.empty() - ch_snpeff_txt = Channel.empty() - ch_snpeff_html = Channel.empty() - ch_snpsift_txt = Channel.empty() - ch_snpeff_version = Channel.empty() - ch_snpsift_version = Channel.empty() - if (params.gff && !params.skip_snpeff) { - SNPEFF_SNPSIFT ( BCFTOOLS_MPILEUP.out.vcf, snpeff_db, snpeff_config, fasta ) - ch_snpeff_vcf = SNPEFF_SNPSIFT.out.vcf - ch_snpeff_tbi = SNPEFF_SNPSIFT.out.tbi - ch_snpeff_stats = SNPEFF_SNPSIFT.out.stats - ch_snpeff_csv = SNPEFF_SNPSIFT.out.csv - ch_snpeff_txt = SNPEFF_SNPSIFT.out.txt - ch_snpeff_html = SNPEFF_SNPSIFT.out.html - ch_snpsift_txt = SNPEFF_SNPSIFT.out.snpsift_txt - ch_snpeff_version = SNPEFF_SNPSIFT.out.snpeff_version - ch_snpsift_version = SNPEFF_SNPSIFT.out.snpsift_version - } - - /* - * MODULE: Variant screenshots with ASCIIGenome - */ - ch_asciigenome_pdf = Channel.empty() - ch_asciigenome_version = Channel.empty() - if (!params.skip_asciigenome) { - bam - .join(BCFTOOLS_MPILEUP.out.vcf, by: [0]) - .join(BCFTOOLS_MPILEUP.out.stats, by: [0]) - .map { meta, bam, vcf, stats -> - if (WorkflowCommons.getNumVariantsFromBCFToolsStats(stats) > 0) { - return [ meta, bam, vcf ] - } - } - .set { ch_asciigenome } - - ASCIIGENOME ( - ch_asciigenome, - fasta, - gff, - bed, - params.asciigenome_window_size, - params.asciigenome_read_depth - ) - ch_asciigenome_pdf = ASCIIGENOME.out.pdf - ch_asciigenome_version = ASCIIGENOME.out.version - } - - emit: - vcf = BCFTOOLS_MPILEUP.out.vcf // channel: [ val(meta), [ vcf ] ] - tbi = BCFTOOLS_MPILEUP.out.tbi // channel: [ val(meta), [ tbi ] ] - stats = BCFTOOLS_MPILEUP.out.stats // channel: [ val(meta), [ txt ] ] - bcftools_version = BCFTOOLS_MPILEUP.out.version // path: *.version.txt - - consensus = ch_consensus // channel: [ val(meta), [ fasta ] ] - bases_tsv = ch_bases_tsv // channel: [ val(meta), [ tsv ] ] - bases_pdf = ch_bases_pdf // channel: [ val(meta), [ pdf ] ] - bedtools_version = ch_bedtools_version // path: *.version.txt - - quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] - quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] - quast_version = ch_quast_version // path: *.version.txt - - snpeff_vcf = ch_snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] - snpeff_tbi = ch_snpeff_tbi // channel: [ val(meta), [ tbi ] ] - snpeff_stats = ch_snpeff_stats // channel: [ val(meta), [ txt ] ] - snpeff_csv = ch_snpeff_csv // channel: [ val(meta), [ csv ] ] - snpeff_txt = ch_snpeff_txt // channel: [ val(meta), [ txt ] ] - snpeff_html = ch_snpeff_html // channel: [ val(meta), [ html ] ] - snpsift_txt = ch_snpsift_txt // channel: [ val(meta), [ txt ] ] - snpeff_version = ch_snpeff_version // path: *.version.txt - snpsift_version = ch_snpsift_version // path: *.version.txt - - pangolin_report = ch_pangolin_report // channel: [ val(meta), [ csv ] ] - pangolin_version = ch_pangolin_version // path: *.version.txt - - nextclade_report = ch_nextclade_report // channel: [ val(meta), [ csv ] ] - nextclade_version = ch_nextclade_version // path: *.version.txt - - asciigenome_pdf = ch_asciigenome_pdf // channel: [ val(meta), [ pdf ] ] - asciigenome_version = ch_asciigenome_version // path: *.version.txt -} \ No newline at end of file +// +// Variant calling and downstream processing for BCFTools +// + +params.bcftools_mpileup_options = [:] +params.quast_options = [:] +params.consensus_genomecov_options = [:] +params.consensus_merge_options = [:] +params.consensus_mask_options = [:] +params.consensus_maskfasta_options = [:] +params.consensus_bcftools_options = [:] +params.consensus_plot_options = [:] +params.snpeff_options = [:] +params.snpsift_options = [:] +params.snpeff_bgzip_options = [:] +params.snpeff_tabix_options = [:] +params.snpeff_stats_options = [:] +params.pangolin_options = [:] +params.nextclade_options = [:] +params.asciigenome_options = [:] + +include { BCFTOOLS_MPILEUP } from '../../modules/nf-core/software/bcftools/mpileup/main' addParams( options: params.bcftools_mpileup_options ) +include { QUAST } from '../../modules/nf-core/software/quast/main' addParams( options: params.quast_options ) +include { PANGOLIN } from '../../modules/nf-core/software/pangolin/main' addParams( options: params.pangolin_options ) +include { NEXTCLADE } from '../../modules/nf-core/software/nextclade/main' addParams( options: params.nextclade_options ) +include { ASCIIGENOME } from '../../modules/local/asciigenome' addParams( options: params.asciigenome_options ) +include { MAKE_CONSENSUS } from './make_consensus' addParams( genomecov_options: params.consensus_genomecov_options, merge_options: params.consensus_merge_options, mask_options: params.consensus_mask_options, maskfasta_options: params.consensus_maskfasta_options, bcftools_options: params.consensus_bcftools_options, plot_bases_options: params.consensus_plot_options ) +include { SNPEFF_SNPSIFT } from './snpeff_snpsift' addParams( snpeff_options: params.snpeff_options, snpsift_options: params.snpsift_options, bgzip_options: params.snpeff_bgzip_options, tabix_options: params.snpeff_tabix_options, stats_options: params.snpeff_stats_options ) + +workflow VARIANTS_BCFTOOLS { + take: + bam // channel: [ val(meta), [ bam ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + bed // channel: /path/to/primers.bed + snpeff_db // channel: /path/to/snpeff_db/ + snpeff_config // channel: /path/to/snpeff.config + + main: + + // + // Call variants + // + BCFTOOLS_MPILEUP ( bam, fasta ) + + // + // Create genome consensus using variants in VCF, run QUAST and pangolin + // + ch_consensus = Channel.empty() + ch_bases_tsv = Channel.empty() + ch_bases_pdf = Channel.empty() + ch_bedtools_version = Channel.empty() + ch_quast_results = Channel.empty() + ch_quast_tsv = Channel.empty() + ch_quast_version = Channel.empty() + ch_pangolin_report = Channel.empty() + ch_pangolin_version = Channel.empty() + ch_nextclade_report = Channel.empty() + ch_nextclade_version = Channel.empty() + if (!params.skip_consensus) { + MAKE_CONSENSUS ( bam.join(BCFTOOLS_MPILEUP.out.vcf, by: [0]).join(BCFTOOLS_MPILEUP.out.tbi, by: [0]), fasta ) + ch_consensus = MAKE_CONSENSUS.out.fasta + ch_bases_tsv = MAKE_CONSENSUS.out.tsv + ch_bases_pdf = MAKE_CONSENSUS.out.pdf + ch_bedtools_version = MAKE_CONSENSUS.out.bedtools_version + + if (!params.skip_variants_quast) { + QUAST ( ch_consensus.collect{ it[1] }, fasta, gff, true, params.gff ) + ch_quast_results = QUAST.out.results + ch_quast_tsv = QUAST.out.tsv + ch_quast_version = QUAST.out.version + } + + if (!params.skip_pangolin) { + PANGOLIN ( ch_consensus ) + ch_pangolin_report = PANGOLIN.out.report + ch_pangolin_version = PANGOLIN.out.version + } + + if (!params.skip_nextclade) { + NEXTCLADE ( ch_consensus, 'csv' ) + ch_nextclade_report = NEXTCLADE.out.csv + ch_nextclade_version = NEXTCLADE.out.version + } + } + + // + // Annotate variants + // + ch_snpeff_vcf = Channel.empty() + ch_snpeff_tbi = Channel.empty() + ch_snpeff_stats = Channel.empty() + ch_snpeff_csv = Channel.empty() + ch_snpeff_txt = Channel.empty() + ch_snpeff_html = Channel.empty() + ch_snpsift_txt = Channel.empty() + ch_snpeff_version = Channel.empty() + ch_snpsift_version = Channel.empty() + if (params.gff && !params.skip_snpeff) { + SNPEFF_SNPSIFT ( BCFTOOLS_MPILEUP.out.vcf, snpeff_db, snpeff_config, fasta ) + ch_snpeff_vcf = SNPEFF_SNPSIFT.out.vcf + ch_snpeff_tbi = SNPEFF_SNPSIFT.out.tbi + ch_snpeff_stats = SNPEFF_SNPSIFT.out.stats + ch_snpeff_csv = SNPEFF_SNPSIFT.out.csv + ch_snpeff_txt = SNPEFF_SNPSIFT.out.txt + ch_snpeff_html = SNPEFF_SNPSIFT.out.html + ch_snpsift_txt = SNPEFF_SNPSIFT.out.snpsift_txt + ch_snpeff_version = SNPEFF_SNPSIFT.out.snpeff_version + ch_snpsift_version = SNPEFF_SNPSIFT.out.snpsift_version + } + + // + // Variant screenshots with ASCIIGenome + // + ch_asciigenome_pdf = Channel.empty() + ch_asciigenome_version = Channel.empty() + if (!params.skip_asciigenome) { + bam + .join(BCFTOOLS_MPILEUP.out.vcf, by: [0]) + .join(BCFTOOLS_MPILEUP.out.stats, by: [0]) + .map { meta, bam, vcf, stats -> + if (WorkflowCommons.getNumVariantsFromBCFToolsStats(stats) > 0) { + return [ meta, bam, vcf ] + } + } + .set { ch_asciigenome } + + ASCIIGENOME ( + ch_asciigenome, + fasta, + gff, + bed, + params.asciigenome_window_size, + params.asciigenome_read_depth + ) + ch_asciigenome_pdf = ASCIIGENOME.out.pdf + ch_asciigenome_version = ASCIIGENOME.out.version + } + + emit: + vcf = BCFTOOLS_MPILEUP.out.vcf // channel: [ val(meta), [ vcf ] ] + tbi = BCFTOOLS_MPILEUP.out.tbi // channel: [ val(meta), [ tbi ] ] + stats = BCFTOOLS_MPILEUP.out.stats // channel: [ val(meta), [ txt ] ] + bcftools_version = BCFTOOLS_MPILEUP.out.version // path: *.version.txt + + consensus = ch_consensus // channel: [ val(meta), [ fasta ] ] + bases_tsv = ch_bases_tsv // channel: [ val(meta), [ tsv ] ] + bases_pdf = ch_bases_pdf // channel: [ val(meta), [ pdf ] ] + bedtools_version = ch_bedtools_version // path: *.version.txt + + quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] + quast_version = ch_quast_version // path: *.version.txt + + snpeff_vcf = ch_snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] + snpeff_tbi = ch_snpeff_tbi // channel: [ val(meta), [ tbi ] ] + snpeff_stats = ch_snpeff_stats // channel: [ val(meta), [ txt ] ] + snpeff_csv = ch_snpeff_csv // channel: [ val(meta), [ csv ] ] + snpeff_txt = ch_snpeff_txt // channel: [ val(meta), [ txt ] ] + snpeff_html = ch_snpeff_html // channel: [ val(meta), [ html ] ] + snpsift_txt = ch_snpsift_txt // channel: [ val(meta), [ txt ] ] + snpeff_version = ch_snpeff_version // path: *.version.txt + snpsift_version = ch_snpsift_version // path: *.version.txt + + pangolin_report = ch_pangolin_report // channel: [ val(meta), [ csv ] ] + pangolin_version = ch_pangolin_version // path: *.version.txt + + nextclade_report = ch_nextclade_report // channel: [ val(meta), [ csv ] ] + nextclade_version = ch_nextclade_version // path: *.version.txt + + asciigenome_pdf = ch_asciigenome_pdf // channel: [ val(meta), [ pdf ] ] + asciigenome_version = ch_asciigenome_version // path: *.version.txt +} diff --git a/subworkflows/local/variants_ivar.nf b/subworkflows/local/variants_ivar.nf index c44ef534..66afa779 100644 --- a/subworkflows/local/variants_ivar.nf +++ b/subworkflows/local/variants_ivar.nf @@ -1,194 +1,194 @@ -/* - * Variant calling and downstream processing for IVar - */ - -params.ivar_variants_options = [:] -params.ivar_variants_to_vcf_options = [:] -params.tabix_bgzip_options = [:] -params.tabix_tabix_options = [:] -params.bcftools_stats_options = [:] -params.ivar_consensus_options = [:] -params.consensus_plot_options = [:] -params.quast_options = [:] -params.snpeff_options = [:] -params.snpsift_options = [:] -params.snpeff_bgzip_options = [:] -params.snpeff_tabix_options = [:] -params.snpeff_stats_options = [:] -params.pangolin_options = [:] -params.nextclade_options = [:] -params.asciigenome_options = [:] - -include { IVAR_VARIANTS_TO_VCF } from '../../modules/local/ivar_variants_to_vcf' addParams( options: params.ivar_variants_to_vcf_options ) -include { PLOT_BASE_DENSITY } from '../../modules/local/plot_base_density' addParams( options: params.consensus_plot_options ) -include { IVAR_VARIANTS } from '../../modules/nf-core/software/ivar/variants/main' addParams( options: params.ivar_variants_options ) -include { IVAR_CONSENSUS } from '../../modules/nf-core/software/ivar/consensus/main' addParams( options: params.ivar_consensus_options ) -include { QUAST } from '../../modules/nf-core/software/quast/main' addParams( options: params.quast_options ) -include { PANGOLIN } from '../../modules/nf-core/software/pangolin/main' addParams( options: params.pangolin_options ) -include { NEXTCLADE } from '../../modules/nf-core/software/nextclade/main' addParams( options: params.nextclade_options ) -include { ASCIIGENOME } from '../../modules/local/asciigenome' addParams( options: params.asciigenome_options ) -include { VCF_BGZIP_TABIX_STATS } from '../nf-core/vcf_bgzip_tabix_stats' addParams( bgzip_options: params.tabix_bgzip_options, tabix_options: params.tabix_tabix_options, stats_options: params.bcftools_stats_options ) -include { SNPEFF_SNPSIFT } from './snpeff_snpsift' addParams( snpeff_options: params.snpeff_options, snpsift_options: params.snpsift_options, bgzip_options: params.snpeff_bgzip_options, tabix_options: params.snpeff_tabix_options, stats_options: params.snpeff_stats_options ) - -workflow VARIANTS_IVAR { - take: - bam // channel: [ val(meta), [ bam ] ] - fasta // channel: /path/to/genome.fasta - gff // channel: /path/to/genome.gff - bed // channel: /path/to/primers.bed - snpeff_db // channel: /path/to/snpeff_db/ - snpeff_config // channel: /path/to/snpeff.config - ivar_multiqc_header // channel: /path/to/multiqc_header for ivar variants - - main: - /* - * Call variants - */ - IVAR_VARIANTS ( bam, fasta, gff ) - - /* - * Convert original iVar output to VCF, zip and index - */ - IVAR_VARIANTS_TO_VCF ( IVAR_VARIANTS.out.tsv, ivar_multiqc_header ) - - VCF_BGZIP_TABIX_STATS ( IVAR_VARIANTS_TO_VCF.out.vcf ) - - /* - * Create genome consensus - */ - ch_consensus = Channel.empty() - ch_consensus_qual = Channel.empty() - ch_bases_tsv = Channel.empty() - ch_bases_pdf = Channel.empty() - ch_quast_results = Channel.empty() - ch_quast_tsv = Channel.empty() - ch_quast_version = Channel.empty() - ch_pangolin_report = Channel.empty() - ch_pangolin_version = Channel.empty() - ch_nextclade_report = Channel.empty() - ch_nextclade_version = Channel.empty() - if (!params.skip_consensus) { - IVAR_CONSENSUS ( bam, fasta ) - ch_consensus = IVAR_CONSENSUS.out.fasta - ch_consensus_qual = IVAR_CONSENSUS.out.qual - - PLOT_BASE_DENSITY ( ch_consensus ) - ch_bases_tsv = PLOT_BASE_DENSITY.out.tsv - ch_bases_pdf = PLOT_BASE_DENSITY.out.pdf - - if (!params.skip_variants_quast) { - QUAST ( ch_consensus.collect{ it[1] }, fasta, gff, true, params.gff ) - ch_quast_results = QUAST.out.results - ch_quast_tsv = QUAST.out.tsv - ch_quast_version = QUAST.out.version - } - - if (!params.skip_pangolin) { - PANGOLIN ( ch_consensus ) - ch_pangolin_report = PANGOLIN.out.report - ch_pangolin_version = PANGOLIN.out.version - } - - if (!params.skip_nextclade) { - NEXTCLADE ( ch_consensus, 'csv' ) - ch_nextclade_report = NEXTCLADE.out.csv - ch_nextclade_version = NEXTCLADE.out.version - } - } - - /* - * Annotate variants - */ - ch_snpeff_vcf = Channel.empty() - ch_snpeff_tbi = Channel.empty() - ch_snpeff_stats = Channel.empty() - ch_snpeff_csv = Channel.empty() - ch_snpeff_txt = Channel.empty() - ch_snpeff_html = Channel.empty() - ch_snpsift_txt = Channel.empty() - ch_snpeff_version = Channel.empty() - ch_snpsift_version = Channel.empty() - if (params.gff && !params.skip_snpeff) { - SNPEFF_SNPSIFT ( VCF_BGZIP_TABIX_STATS.out.vcf, snpeff_db, snpeff_config, fasta ) - ch_snpeff_vcf = SNPEFF_SNPSIFT.out.vcf - ch_snpeff_tbi = SNPEFF_SNPSIFT.out.tbi - ch_snpeff_stats = SNPEFF_SNPSIFT.out.stats - ch_snpeff_csv = SNPEFF_SNPSIFT.out.csv - ch_snpeff_txt = SNPEFF_SNPSIFT.out.txt - ch_snpeff_html = SNPEFF_SNPSIFT.out.html - ch_snpsift_txt = SNPEFF_SNPSIFT.out.snpsift_txt - ch_snpeff_version = SNPEFF_SNPSIFT.out.snpeff_version - ch_snpsift_version = SNPEFF_SNPSIFT.out.snpsift_version - } - - /* - * MODULE: Variant screenshots with ASCIIGenome - */ - ch_asciigenome_pdf = Channel.empty() - ch_asciigenome_version = Channel.empty() - if (!params.skip_asciigenome) { - bam - .join(VCF_BGZIP_TABIX_STATS.out.vcf, by: [0]) - .join(VCF_BGZIP_TABIX_STATS.out.stats, by: [0]) - .map { meta, bam, vcf, stats -> - if (WorkflowCommons.getNumVariantsFromBCFToolsStats(stats) > 0) { - return [ meta, bam, vcf ] - } - } - .set { ch_asciigenome } - - ASCIIGENOME ( - ch_asciigenome, - fasta, - gff, - bed, - params.asciigenome_window_size, - params.asciigenome_read_depth - ) - ch_asciigenome_pdf = ASCIIGENOME.out.pdf - ch_asciigenome_version = ASCIIGENOME.out.version - } - - emit: - tsv = IVAR_VARIANTS.out.tsv // channel: [ val(meta), [ tsv ] ] - ivar_version = IVAR_VARIANTS.out.version // path: *.version.txt - - vcf_orig = IVAR_VARIANTS_TO_VCF.out.vcf // channel: [ val(meta), [ vcf ] ] - log_out = IVAR_VARIANTS_TO_VCF.out.log // channel: [ val(meta), [ log ] ] - multiqc_tsv = IVAR_VARIANTS_TO_VCF.out.tsv // channel: [ val(meta), [ tsv ] ] - - vcf = VCF_BGZIP_TABIX_STATS.out.vcf // channel: [ val(meta), [ vcf ] ] - tbi = VCF_BGZIP_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] - stats = VCF_BGZIP_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] - tabix_version = VCF_BGZIP_TABIX_STATS.out.tabix_version // path: *.version.txt - bcftools_version = VCF_BGZIP_TABIX_STATS.out.bcftools_version // path: *.version.txt - - consensus = ch_consensus // channel: [ val(meta), [ fasta ] ] - consensus_qual = ch_consensus_qual // channel: [ val(meta), [ fasta ] ] - bases_tsv = ch_bases_tsv // channel: [ val(meta), [ tsv ] ] - bases_pdf = ch_bases_pdf // channel: [ val(meta), [ pdf ] ] - - quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] - quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] - quast_version = ch_quast_version // path: *.version.txt - - snpeff_vcf = ch_snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] - snpeff_tbi = ch_snpeff_tbi // channel: [ val(meta), [ tbi ] ] - snpeff_stats = ch_snpeff_stats // channel: [ val(meta), [ txt ] ] - snpeff_csv = ch_snpeff_csv // channel: [ val(meta), [ csv ] ] - snpeff_txt = ch_snpeff_txt // channel: [ val(meta), [ txt ] ] - snpeff_html = ch_snpeff_html // channel: [ val(meta), [ html ] ] - snpsift_txt = ch_snpsift_txt // channel: [ val(meta), [ txt ] ] - snpeff_version = ch_snpeff_version // path: *.version.txt - snpsift_version = ch_snpsift_version // path: *.version.txt - - pangolin_report = ch_pangolin_report // channel: [ val(meta), [ csv ] ] - pangolin_version = ch_pangolin_version // path: *.version.txt - - nextclade_report = ch_nextclade_report // channel: [ val(meta), [ csv ] ] - nextclade_version = ch_nextclade_version // path: *.version.txt - - asciigenome_pdf = ch_asciigenome_pdf // channel: [ val(meta), [ pdf ] ] - asciigenome_version = ch_asciigenome_version // path: *.version.txt -} - +// +// Variant calling and downstream processing for IVar +// + +params.ivar_variants_options = [:] +params.ivar_variants_to_vcf_options = [:] +params.tabix_bgzip_options = [:] +params.tabix_tabix_options = [:] +params.bcftools_stats_options = [:] +params.ivar_consensus_options = [:] +params.consensus_plot_options = [:] +params.quast_options = [:] +params.snpeff_options = [:] +params.snpsift_options = [:] +params.snpeff_bgzip_options = [:] +params.snpeff_tabix_options = [:] +params.snpeff_stats_options = [:] +params.pangolin_options = [:] +params.nextclade_options = [:] +params.asciigenome_options = [:] + +include { IVAR_VARIANTS_TO_VCF } from '../../modules/local/ivar_variants_to_vcf' addParams( options: params.ivar_variants_to_vcf_options ) +include { PLOT_BASE_DENSITY } from '../../modules/local/plot_base_density' addParams( options: params.consensus_plot_options ) +include { IVAR_VARIANTS } from '../../modules/nf-core/software/ivar/variants/main' addParams( options: params.ivar_variants_options ) +include { IVAR_CONSENSUS } from '../../modules/nf-core/software/ivar/consensus/main' addParams( options: params.ivar_consensus_options ) +include { QUAST } from '../../modules/nf-core/software/quast/main' addParams( options: params.quast_options ) +include { PANGOLIN } from '../../modules/nf-core/software/pangolin/main' addParams( options: params.pangolin_options ) +include { NEXTCLADE } from '../../modules/nf-core/software/nextclade/main' addParams( options: params.nextclade_options ) +include { ASCIIGENOME } from '../../modules/local/asciigenome' addParams( options: params.asciigenome_options ) +include { VCF_BGZIP_TABIX_STATS } from '../nf-core/vcf_bgzip_tabix_stats' addParams( bgzip_options: params.tabix_bgzip_options, tabix_options: params.tabix_tabix_options, stats_options: params.bcftools_stats_options ) +include { SNPEFF_SNPSIFT } from './snpeff_snpsift' addParams( snpeff_options: params.snpeff_options, snpsift_options: params.snpsift_options, bgzip_options: params.snpeff_bgzip_options, tabix_options: params.snpeff_tabix_options, stats_options: params.snpeff_stats_options ) + +workflow VARIANTS_IVAR { + take: + bam // channel: [ val(meta), [ bam ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + bed // channel: /path/to/primers.bed + snpeff_db // channel: /path/to/snpeff_db/ + snpeff_config // channel: /path/to/snpeff.config + ivar_multiqc_header // channel: /path/to/multiqc_header for ivar variants + + main: + + // + // Call variants + // + IVAR_VARIANTS ( bam, fasta, gff ) + + // + // Convert original iVar output to VCF, zip and index + // + IVAR_VARIANTS_TO_VCF ( IVAR_VARIANTS.out.tsv, ivar_multiqc_header ) + + VCF_BGZIP_TABIX_STATS ( IVAR_VARIANTS_TO_VCF.out.vcf ) + + // + // Create genome consensus + // + ch_consensus = Channel.empty() + ch_consensus_qual = Channel.empty() + ch_bases_tsv = Channel.empty() + ch_bases_pdf = Channel.empty() + ch_quast_results = Channel.empty() + ch_quast_tsv = Channel.empty() + ch_quast_version = Channel.empty() + ch_pangolin_report = Channel.empty() + ch_pangolin_version = Channel.empty() + ch_nextclade_report = Channel.empty() + ch_nextclade_version = Channel.empty() + if (!params.skip_consensus) { + IVAR_CONSENSUS ( bam, fasta ) + ch_consensus = IVAR_CONSENSUS.out.fasta + ch_consensus_qual = IVAR_CONSENSUS.out.qual + + PLOT_BASE_DENSITY ( ch_consensus ) + ch_bases_tsv = PLOT_BASE_DENSITY.out.tsv + ch_bases_pdf = PLOT_BASE_DENSITY.out.pdf + + if (!params.skip_variants_quast) { + QUAST ( ch_consensus.collect{ it[1] }, fasta, gff, true, params.gff ) + ch_quast_results = QUAST.out.results + ch_quast_tsv = QUAST.out.tsv + ch_quast_version = QUAST.out.version + } + + if (!params.skip_pangolin) { + PANGOLIN ( ch_consensus ) + ch_pangolin_report = PANGOLIN.out.report + ch_pangolin_version = PANGOLIN.out.version + } + + if (!params.skip_nextclade) { + NEXTCLADE ( ch_consensus, 'csv' ) + ch_nextclade_report = NEXTCLADE.out.csv + ch_nextclade_version = NEXTCLADE.out.version + } + } + + // + // Annotate variants + // + ch_snpeff_vcf = Channel.empty() + ch_snpeff_tbi = Channel.empty() + ch_snpeff_stats = Channel.empty() + ch_snpeff_csv = Channel.empty() + ch_snpeff_txt = Channel.empty() + ch_snpeff_html = Channel.empty() + ch_snpsift_txt = Channel.empty() + ch_snpeff_version = Channel.empty() + ch_snpsift_version = Channel.empty() + if (params.gff && !params.skip_snpeff) { + SNPEFF_SNPSIFT ( VCF_BGZIP_TABIX_STATS.out.vcf, snpeff_db, snpeff_config, fasta ) + ch_snpeff_vcf = SNPEFF_SNPSIFT.out.vcf + ch_snpeff_tbi = SNPEFF_SNPSIFT.out.tbi + ch_snpeff_stats = SNPEFF_SNPSIFT.out.stats + ch_snpeff_csv = SNPEFF_SNPSIFT.out.csv + ch_snpeff_txt = SNPEFF_SNPSIFT.out.txt + ch_snpeff_html = SNPEFF_SNPSIFT.out.html + ch_snpsift_txt = SNPEFF_SNPSIFT.out.snpsift_txt + ch_snpeff_version = SNPEFF_SNPSIFT.out.snpeff_version + ch_snpsift_version = SNPEFF_SNPSIFT.out.snpsift_version + } + + // + // Variant screenshots with ASCIIGenome + // + ch_asciigenome_pdf = Channel.empty() + ch_asciigenome_version = Channel.empty() + if (!params.skip_asciigenome) { + bam + .join(VCF_BGZIP_TABIX_STATS.out.vcf, by: [0]) + .join(VCF_BGZIP_TABIX_STATS.out.stats, by: [0]) + .map { meta, bam, vcf, stats -> + if (WorkflowCommons.getNumVariantsFromBCFToolsStats(stats) > 0) { + return [ meta, bam, vcf ] + } + } + .set { ch_asciigenome } + + ASCIIGENOME ( + ch_asciigenome, + fasta, + gff, + bed, + params.asciigenome_window_size, + params.asciigenome_read_depth + ) + ch_asciigenome_pdf = ASCIIGENOME.out.pdf + ch_asciigenome_version = ASCIIGENOME.out.version + } + + emit: + tsv = IVAR_VARIANTS.out.tsv // channel: [ val(meta), [ tsv ] ] + ivar_version = IVAR_VARIANTS.out.version // path: *.version.txt + + vcf_orig = IVAR_VARIANTS_TO_VCF.out.vcf // channel: [ val(meta), [ vcf ] ] + log_out = IVAR_VARIANTS_TO_VCF.out.log // channel: [ val(meta), [ log ] ] + multiqc_tsv = IVAR_VARIANTS_TO_VCF.out.tsv // channel: [ val(meta), [ tsv ] ] + + vcf = VCF_BGZIP_TABIX_STATS.out.vcf // channel: [ val(meta), [ vcf ] ] + tbi = VCF_BGZIP_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + stats = VCF_BGZIP_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] + tabix_version = VCF_BGZIP_TABIX_STATS.out.tabix_version // path: *.version.txt + bcftools_version = VCF_BGZIP_TABIX_STATS.out.bcftools_version // path: *.version.txt + + consensus = ch_consensus // channel: [ val(meta), [ fasta ] ] + consensus_qual = ch_consensus_qual // channel: [ val(meta), [ fasta ] ] + bases_tsv = ch_bases_tsv // channel: [ val(meta), [ tsv ] ] + bases_pdf = ch_bases_pdf // channel: [ val(meta), [ pdf ] ] + + quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] + quast_version = ch_quast_version // path: *.version.txt + + snpeff_vcf = ch_snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] + snpeff_tbi = ch_snpeff_tbi // channel: [ val(meta), [ tbi ] ] + snpeff_stats = ch_snpeff_stats // channel: [ val(meta), [ txt ] ] + snpeff_csv = ch_snpeff_csv // channel: [ val(meta), [ csv ] ] + snpeff_txt = ch_snpeff_txt // channel: [ val(meta), [ txt ] ] + snpeff_html = ch_snpeff_html // channel: [ val(meta), [ html ] ] + snpsift_txt = ch_snpsift_txt // channel: [ val(meta), [ txt ] ] + snpeff_version = ch_snpeff_version // path: *.version.txt + snpsift_version = ch_snpsift_version // path: *.version.txt + + pangolin_report = ch_pangolin_report // channel: [ val(meta), [ csv ] ] + pangolin_version = ch_pangolin_version // path: *.version.txt + + nextclade_report = ch_nextclade_report // channel: [ val(meta), [ csv ] ] + nextclade_version = ch_nextclade_version // path: *.version.txt + + asciigenome_pdf = ch_asciigenome_pdf // channel: [ val(meta), [ pdf ] ] + asciigenome_version = ch_asciigenome_version // path: *.version.txt +} diff --git a/subworkflows/nf-core/align_bowtie2.nf b/subworkflows/nf-core/align_bowtie2.nf index 8f1022b0..2219a10b 100644 --- a/subworkflows/nf-core/align_bowtie2.nf +++ b/subworkflows/nf-core/align_bowtie2.nf @@ -1,39 +1,40 @@ -/* - * Alignment with BOWTIE2 - */ - -params.align_options = [:] -params.samtools_options = [:] - -include { BOWTIE2_ALIGN } from '../../modules/nf-core/software/bowtie2/align/main' addParams( options: params.align_options ) -include { BAM_SORT_SAMTOOLS } from '../nf-core/bam_sort_samtools' addParams( options: params.samtools_options ) - -workflow ALIGN_BOWTIE2 { - take: - reads // channel: [ val(meta), [ reads ] ] - index // channel: /path/to/bowtie2/index/ - - main: - /* - * Map reads with BOWTIE2 - */ - BOWTIE2_ALIGN ( reads, index ) - - /* - * Sort, index BAM file and run samtools stats, flagstat and idxstats - */ - BAM_SORT_SAMTOOLS ( BOWTIE2_ALIGN.out.bam ) - - emit: - bam_orig = BOWTIE2_ALIGN.out.bam // channel: [ val(meta), bam ] - log_out = BOWTIE2_ALIGN.out.log // channel: [ val(meta), log ] - fastq = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), fastq ] - bowtie2_version = BOWTIE2_ALIGN.out.version // path: *.version.txt - - bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - samtools_version = BAM_SORT_SAMTOOLS.out.version // path: *.version.txt -} +// +// Alignment with Bowtie2 +// + +params.align_options = [:] +params.samtools_options = [:] + +include { BOWTIE2_ALIGN } from '../../modules/nf-core/software/bowtie2/align/main' addParams( options: params.align_options ) +include { BAM_SORT_SAMTOOLS } from '../nf-core/bam_sort_samtools' addParams( options: params.samtools_options ) + +workflow ALIGN_BOWTIE2 { + take: + reads // channel: [ val(meta), [ reads ] ] + index // channel: /path/to/bowtie2/index/ + + main: + + // + // Map reads with Bowtie2 + // + BOWTIE2_ALIGN ( reads, index ) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_SAMTOOLS ( BOWTIE2_ALIGN.out.bam ) + + emit: + bam_orig = BOWTIE2_ALIGN.out.bam // channel: [ val(meta), bam ] + log_out = BOWTIE2_ALIGN.out.log // channel: [ val(meta), log ] + fastq = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), fastq ] + bowtie2_version = BOWTIE2_ALIGN.out.version // path: *.version.txt + + bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + samtools_version = BAM_SORT_SAMTOOLS.out.version // path: *.version.txt +} diff --git a/subworkflows/nf-core/bam_sort_samtools.nf b/subworkflows/nf-core/bam_sort_samtools.nf index d4da7471..e410a706 100644 --- a/subworkflows/nf-core/bam_sort_samtools.nf +++ b/subworkflows/nf-core/bam_sort_samtools.nf @@ -1,27 +1,27 @@ -/* - * Sort, index BAM file and run samtools stats, flagstat and idxstats - */ - -params.options = [:] - -include { SAMTOOLS_SORT } from '../../modules/nf-core/software/samtools/sort/main' addParams( options: params.options ) -include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams( options: params.options ) -include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' addParams( options: params.options ) - -workflow BAM_SORT_SAMTOOLS { - take: - bam // channel: [ val(meta), [ bam ] ] - - main: - SAMTOOLS_SORT ( bam ) - SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) - BAM_STATS_SAMTOOLS ( SAMTOOLS_SORT.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) ) - - emit: - bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - version = SAMTOOLS_SORT.out.version // path: *.version.txt -} +// +// Sort, index BAM file and run samtools stats, flagstat and idxstats +// + +params.options = [:] + +include { SAMTOOLS_SORT } from '../../modules/nf-core/software/samtools/sort/main' addParams( options: params.options ) +include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams( options: params.options ) +include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' addParams( options: params.options ) + +workflow BAM_SORT_SAMTOOLS { + take: + bam // channel: [ val(meta), [ bam ] ] + + main: + SAMTOOLS_SORT ( bam ) + SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) + BAM_STATS_SAMTOOLS ( SAMTOOLS_SORT.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) ) + + emit: + bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + version = SAMTOOLS_SORT.out.version // path: *.version.txt +} diff --git a/subworkflows/nf-core/bam_stats_samtools.nf b/subworkflows/nf-core/bam_stats_samtools.nf index fcf7645e..c262e003 100644 --- a/subworkflows/nf-core/bam_stats_samtools.nf +++ b/subworkflows/nf-core/bam_stats_samtools.nf @@ -1,25 +1,25 @@ -/* - * Run SAMtools stats, flagstat and idxstats - */ - -params.options = [:] - -include { SAMTOOLS_STATS } from '../../modules/nf-core/software/samtools/stats/main' addParams( options: params.options ) -include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/software/samtools/idxstats/main' addParams( options: params.options ) -include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/software/samtools/flagstat/main' addParams( options: params.options ) - -workflow BAM_STATS_SAMTOOLS { - take: - bam_bai // channel: [ val(meta), [ bam ], [bai] ] - - main: - SAMTOOLS_STATS ( bam_bai ) - SAMTOOLS_FLAGSTAT ( bam_bai ) - SAMTOOLS_IDXSTATS ( bam_bai ) - - emit: - stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - version = SAMTOOLS_STATS.out.version // path: *.version.txt -} +// +// Run SAMtools stats, flagstat and idxstats +// + +params.options = [:] + +include { SAMTOOLS_STATS } from '../../modules/nf-core/software/samtools/stats/main' addParams( options: params.options ) +include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/software/samtools/idxstats/main' addParams( options: params.options ) +include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/software/samtools/flagstat/main' addParams( options: params.options ) + +workflow BAM_STATS_SAMTOOLS { + take: + bam_bai // channel: [ val(meta), [ bam ], [bai] ] + + main: + SAMTOOLS_STATS ( bam_bai ) + SAMTOOLS_FLAGSTAT ( bam_bai ) + SAMTOOLS_IDXSTATS ( bam_bai ) + + emit: + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + version = SAMTOOLS_STATS.out.version // path: *.version.txt +} diff --git a/subworkflows/nf-core/fastqc_fastp.nf b/subworkflows/nf-core/fastqc_fastp.nf index 61df2c0f..2061d946 100644 --- a/subworkflows/nf-core/fastqc_fastp.nf +++ b/subworkflows/nf-core/fastqc_fastp.nf @@ -1,62 +1,62 @@ -/* - * Read QC and trimming - */ - -params.fastqc_raw_options = [:] -params.fastqc_trim_options = [:] -params.fastp_options = [:] - -include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/software/fastqc/main' addParams( options: params.fastqc_raw_options ) -include { FASTQC as FASTQC_TRIM } from '../../modules/nf-core/software/fastqc/main' addParams( options: params.fastqc_trim_options ) -include { FASTP } from '../../modules/nf-core/software/fastp/main' addParams( options: params.fastp_options ) - -workflow FASTQC_FASTP { - take: - reads // channel: [ val(meta), [ reads ] ] - - main: - fastqc_raw_html = Channel.empty() - fastqc_raw_zip = Channel.empty() - fastqc_version = Channel.empty() - if (!params.skip_fastqc) { - FASTQC_RAW ( reads ).html.set { fastqc_raw_html } - fastqc_raw_zip = FASTQC_RAW.out.zip - fastqc_version = FASTQC_RAW.out.version - } - - trim_reads = reads - trim_json = Channel.empty() - trim_html = Channel.empty() - trim_log = Channel.empty() - trim_reads_fail = Channel.empty() - fastp_version = Channel.empty() - fastqc_trim_html = Channel.empty() - fastqc_trim_zip = Channel.empty() - if (!params.skip_fastp) { - FASTP ( reads ).reads.set { trim_reads } - trim_json = FASTP.out.json - trim_html = FASTP.out.html - trim_log = FASTP.out.log - trim_reads_fail = FASTP.out.reads_fail - fastp_version = FASTP.out.version - - if (!params.skip_fastqc) { - FASTQC_TRIM ( trim_reads ).html.set { fastqc_trim_html } - fastqc_trim_zip = FASTQC_TRIM.out.zip - } - } - - emit: - reads = trim_reads // channel: [ val(meta), [ reads ] ] - trim_json // channel: [ val(meta), [ json ] ] - trim_html // channel: [ val(meta), [ html ] ] - trim_log // channel: [ val(meta), [ log ] ] - trim_reads_fail // channel: [ val(meta), [ fastq.gz ] ] - fastp_version // path: *.version.txt - - fastqc_raw_html // channel: [ val(meta), [ html ] ] - fastqc_raw_zip // channel: [ val(meta), [ zip ] ] - fastqc_trim_html // channel: [ val(meta), [ html ] ] - fastqc_trim_zip // channel: [ val(meta), [ zip ] ] - fastqc_version // path: *.version.txt -} +// +// Read QC and trimming +// + +params.fastqc_raw_options = [:] +params.fastqc_trim_options = [:] +params.fastp_options = [:] + +include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/software/fastqc/main' addParams( options: params.fastqc_raw_options ) +include { FASTQC as FASTQC_TRIM } from '../../modules/nf-core/software/fastqc/main' addParams( options: params.fastqc_trim_options ) +include { FASTP } from '../../modules/nf-core/software/fastp/main' addParams( options: params.fastp_options ) + +workflow FASTQC_FASTP { + take: + reads // channel: [ val(meta), [ reads ] ] + + main: + fastqc_raw_html = Channel.empty() + fastqc_raw_zip = Channel.empty() + fastqc_version = Channel.empty() + if (!params.skip_fastqc) { + FASTQC_RAW ( reads ).html.set { fastqc_raw_html } + fastqc_raw_zip = FASTQC_RAW.out.zip + fastqc_version = FASTQC_RAW.out.version + } + + trim_reads = reads + trim_json = Channel.empty() + trim_html = Channel.empty() + trim_log = Channel.empty() + trim_reads_fail = Channel.empty() + fastp_version = Channel.empty() + fastqc_trim_html = Channel.empty() + fastqc_trim_zip = Channel.empty() + if (!params.skip_fastp) { + FASTP ( reads ).reads.set { trim_reads } + trim_json = FASTP.out.json + trim_html = FASTP.out.html + trim_log = FASTP.out.log + trim_reads_fail = FASTP.out.reads_fail + fastp_version = FASTP.out.version + + if (!params.skip_fastqc) { + FASTQC_TRIM ( trim_reads ).html.set { fastqc_trim_html } + fastqc_trim_zip = FASTQC_TRIM.out.zip + } + } + + emit: + reads = trim_reads // channel: [ val(meta), [ reads ] ] + trim_json // channel: [ val(meta), [ json ] ] + trim_html // channel: [ val(meta), [ html ] ] + trim_log // channel: [ val(meta), [ log ] ] + trim_reads_fail // channel: [ val(meta), [ fastq.gz ] ] + fastp_version // path: *.version.txt + + fastqc_raw_html // channel: [ val(meta), [ html ] ] + fastqc_raw_zip // channel: [ val(meta), [ zip ] ] + fastqc_trim_html // channel: [ val(meta), [ html ] ] + fastqc_trim_zip // channel: [ val(meta), [ zip ] ] + fastqc_version // path: *.version.txt +} diff --git a/subworkflows/nf-core/filter_bam_samtools.nf b/subworkflows/nf-core/filter_bam_samtools.nf index 26b7b719..fdd317e0 100644 --- a/subworkflows/nf-core/filter_bam_samtools.nf +++ b/subworkflows/nf-core/filter_bam_samtools.nf @@ -1,35 +1,36 @@ -/* - * filter co-ordinate sorted BAM, index and run samtools stats, flagstat and idxstats - */ - -params.samtools_view_options = [:] -params.samtools_index_options = [:] - -include { SAMTOOLS_VIEW } from '../../modules/nf-core/software/samtools/view/main' addParams( options: params.samtools_view_options ) -include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams( options: params.samtools_index_options ) -include { BAM_STATS_SAMTOOLS } from '../nf-core/bam_stats_samtools' addParams( options: params.samtools_index_options ) - -workflow FILTER_BAM_SAMTOOLS { - take: - bam // channel: [ val(meta), [ bam ] ] - - main: - /* - * Filter BAM using Samtools view - */ - SAMTOOLS_VIEW ( bam ) - - /* - * Index BAM file and run samtools stats, flagstat and idxstats - */ - SAMTOOLS_INDEX ( SAMTOOLS_VIEW.out.bam ) - BAM_STATS_SAMTOOLS ( SAMTOOLS_VIEW.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) ) - - emit: - bam = SAMTOOLS_VIEW.out.bam // channel: [ val(meta), [ bam ] ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - samtools_version = SAMTOOLS_INDEX.out.version // path: *.version.txt -} +// +// Filter co-ordinate sorted BAM, index and run samtools stats, flagstat and idxstats +// + +params.samtools_view_options = [:] +params.samtools_index_options = [:] + +include { SAMTOOLS_VIEW } from '../../modules/nf-core/software/samtools/view/main' addParams( options: params.samtools_view_options ) +include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams( options: params.samtools_index_options ) +include { BAM_STATS_SAMTOOLS } from '../nf-core/bam_stats_samtools' addParams( options: params.samtools_index_options ) + +workflow FILTER_BAM_SAMTOOLS { + take: + bam // channel: [ val(meta), [ bam ] ] + + main: + + // + // Filter BAM using Samtools view + // + SAMTOOLS_VIEW ( bam ) + + // + // Index BAM file and run samtools stats, flagstat and idxstats + // + SAMTOOLS_INDEX ( SAMTOOLS_VIEW.out.bam ) + BAM_STATS_SAMTOOLS ( SAMTOOLS_VIEW.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) ) + + emit: + bam = SAMTOOLS_VIEW.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + samtools_version = SAMTOOLS_INDEX.out.version // path: *.version.txt +} diff --git a/subworkflows/nf-core/mark_duplicates_picard.nf b/subworkflows/nf-core/mark_duplicates_picard.nf index 4c19592f..6c111de1 100644 --- a/subworkflows/nf-core/mark_duplicates_picard.nf +++ b/subworkflows/nf-core/mark_duplicates_picard.nf @@ -1,38 +1,39 @@ -/* - * Picard MarkDuplicates, sort, index BAM file and run samtools stats, flagstat and idxstats - */ - -params.markduplicates_options = [:] -params.samtools_options = [:] - -include { PICARD_MARKDUPLICATES } from '../../modules/nf-core/software/picard/markduplicates/main' addParams( options: params.markduplicates_options ) -include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams( options: params.samtools_options ) -include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' addParams( options: params.samtools_options ) - -workflow MARK_DUPLICATES_PICARD { - take: - bam // channel: [ val(meta), [ bam ] ] - - main: - /* - * Picard MarkDuplicates - */ - PICARD_MARKDUPLICATES ( bam ) - - /* - * Index BAM file and run samtools stats, flagstat and idxstats - */ - SAMTOOLS_INDEX ( PICARD_MARKDUPLICATES.out.bam ) - BAM_STATS_SAMTOOLS ( PICARD_MARKDUPLICATES.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) ) - - emit: - bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), [ bam ] ] - metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), [ metrics ] ] - picard_version = PICARD_MARKDUPLICATES.out.version // path: *.version.txt - - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - samtools_version = SAMTOOLS_INDEX.out.version // path: *.version.txt -} +// +// Picard MarkDuplicates, sort, index BAM file and run samtools stats, flagstat and idxstats +// + +params.markduplicates_options = [:] +params.samtools_options = [:] + +include { PICARD_MARKDUPLICATES } from '../../modules/nf-core/software/picard/markduplicates/main' addParams( options: params.markduplicates_options ) +include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams( options: params.samtools_options ) +include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' addParams( options: params.samtools_options ) + +workflow MARK_DUPLICATES_PICARD { + take: + bam // channel: [ val(meta), [ bam ] ] + + main: + + // + // Picard MarkDuplicates + // + PICARD_MARKDUPLICATES ( bam ) + + // + // Index BAM file and run samtools stats, flagstat and idxstats + // + SAMTOOLS_INDEX ( PICARD_MARKDUPLICATES.out.bam ) + BAM_STATS_SAMTOOLS ( PICARD_MARKDUPLICATES.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) ) + + emit: + bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), [ bam ] ] + metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), [ metrics ] ] + picard_version = PICARD_MARKDUPLICATES.out.version // path: *.version.txt + + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + samtools_version = SAMTOOLS_INDEX.out.version // path: *.version.txt +} diff --git a/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf b/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf index d5587a79..291b8b7d 100644 --- a/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf +++ b/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf @@ -1,27 +1,27 @@ -/* - * Run BCFTools bgzip, tabix and stats commands - */ - -params.bgzip_options = [:] -params.tabix_options = [:] -params.stats_options = [:] - -include { TABIX_BGZIP } from '../../modules/nf-core/software/tabix/bgzip/main' addParams( options: params.bgzip_options ) -include { VCF_TABIX_STATS } from './vcf_tabix_stats' addParams( tabix_options: params.tabix_options, stats_options: params.stats_options ) - -workflow VCF_BGZIP_TABIX_STATS { - take: - vcf // channel: [ val(meta), [ vcf ] ] - - main: - TABIX_BGZIP ( vcf ) - VCF_TABIX_STATS ( TABIX_BGZIP.out.gz ) - - emit: - vcf = TABIX_BGZIP.out.gz // channel: [ val(meta), [ vcf.gz ] ] - tabix_version = TABIX_BGZIP.out.version // path: *.version.txt - - tbi = VCF_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] - stats = VCF_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] - bcftools_version = VCF_TABIX_STATS.out.bcftools_version // path: *.version.txt -} +// +// Run BCFTools bgzip, tabix and stats commands +// + +params.bgzip_options = [:] +params.tabix_options = [:] +params.stats_options = [:] + +include { TABIX_BGZIP } from '../../modules/nf-core/software/tabix/bgzip/main' addParams( options: params.bgzip_options ) +include { VCF_TABIX_STATS } from './vcf_tabix_stats' addParams( tabix_options: params.tabix_options, stats_options: params.stats_options ) + +workflow VCF_BGZIP_TABIX_STATS { + take: + vcf // channel: [ val(meta), [ vcf ] ] + + main: + TABIX_BGZIP ( vcf ) + VCF_TABIX_STATS ( TABIX_BGZIP.out.gz ) + + emit: + vcf = TABIX_BGZIP.out.gz // channel: [ val(meta), [ vcf.gz ] ] + tabix_version = TABIX_BGZIP.out.version // path: *.version.txt + + tbi = VCF_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + stats = VCF_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] + bcftools_version = VCF_TABIX_STATS.out.bcftools_version // path: *.version.txt +} diff --git a/subworkflows/nf-core/vcf_tabix_stats.nf b/subworkflows/nf-core/vcf_tabix_stats.nf index 9a684da6..aedb68a3 100644 --- a/subworkflows/nf-core/vcf_tabix_stats.nf +++ b/subworkflows/nf-core/vcf_tabix_stats.nf @@ -1,25 +1,25 @@ -/* - * Run BCFTools tabix and stats commands - */ - -params.tabix_options = [:] -params.stats_options = [:] - -include { TABIX_TABIX } from '../../modules/nf-core/software/tabix/tabix/main' addParams( options: params.tabix_options ) -include { BCFTOOLS_STATS } from '../../modules/nf-core/software/bcftools/stats/main' addParams( options: params.stats_options ) - -workflow VCF_TABIX_STATS { - take: - vcf // channel: [ val(meta), [ vcf ] ] - - main: - TABIX_TABIX ( vcf ) - BCFTOOLS_STATS ( vcf ) - - emit: - tbi = TABIX_TABIX.out.tbi // channel: [ val(meta), [ tbi ] ] - tabix_version = TABIX_TABIX.out.version // path: *.version.txt - - stats = BCFTOOLS_STATS.out.stats // channel: [ val(meta), [ txt ] ] - bcftools_version = BCFTOOLS_STATS.out.version // path: *.version.txt -} +// +// Run BCFTools tabix and stats commands +// + +params.tabix_options = [:] +params.stats_options = [:] + +include { TABIX_TABIX } from '../../modules/nf-core/software/tabix/tabix/main' addParams( options: params.tabix_options ) +include { BCFTOOLS_STATS } from '../../modules/nf-core/software/bcftools/stats/main' addParams( options: params.stats_options ) + +workflow VCF_TABIX_STATS { + take: + vcf // channel: [ val(meta), [ vcf ] ] + + main: + TABIX_TABIX ( vcf ) + BCFTOOLS_STATS ( vcf ) + + emit: + tbi = TABIX_TABIX.out.tbi // channel: [ val(meta), [ tbi ] ] + tabix_version = TABIX_TABIX.out.version // path: *.version.txt + + stats = BCFTOOLS_STATS.out.stats // channel: [ val(meta), [ txt ] ] + bcftools_version = BCFTOOLS_STATS.out.version // path: *.version.txt +} diff --git a/workflows/illumina.nf b/workflows/illumina.nf index 294885e2..d4be0c6f 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -36,7 +36,7 @@ if (!callers) { callers = params.protocol == 'amplicon' ? ['ivar'] : ['bcftools /* ======================================================================================== - CONFIG FILES + CONFIG FILES ======================================================================================== */ @@ -66,7 +66,7 @@ if (!params.skip_variants) { multiqc_options.publish_files.put('variants_metrics_mqc.csv','') } -include { BCFTOOLS_ISEC } from '../modules/local/bcftools_isec' addParams( options: modules['illumina_bcftools_isec'] ) +include { BCFTOOLS_ISEC } from '../modules/local/bcftools_isec' addParams( options: modules['illumina_bcftools_isec'] ) include { CUTADAPT } from '../modules/local/cutadapt' addParams( options: modules['illumina_cutadapt'] ) include { GET_SOFTWARE_VERSIONS } from '../modules/local/get_software_versions' addParams( options: [publish_files: ['csv':'']] ) include { MULTIQC } from '../modules/local/multiqc_illumina' addParams( options: multiqc_options ) @@ -76,9 +76,9 @@ include { MULTIQC_CUSTOM_TWOCOL_TSV as MULTIQC_CUSTOM_TWOCOL_TSV_FAIL_MAPPED include { MULTIQC_CUSTOM_TWOCOL_TSV as MULTIQC_CUSTOM_TWOCOL_TSV_IVAR_PANGOLIN } from '../modules/local/multiqc_custom_twocol_tsv' addParams( options: [publish_files: false] ) include { MULTIQC_CUSTOM_TWOCOL_TSV as MULTIQC_CUSTOM_TWOCOL_TSV_BCFTOOLS_PANGOLIN } from '../modules/local/multiqc_custom_twocol_tsv' addParams( options: [publish_files: false] ) -/* - * SUBWORKFLOW: Consisting of a mix of local and nf-core/modules - */ +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// def publish_genome_options = params.save_reference ? [publish_dir: 'genome'] : [publish_files: false] def publish_index_options = params.save_reference ? [publish_dir: 'genome/index'] : [publish_files: false] def publish_db_options = params.save_reference ? [publish_dir: 'genome/db'] : [publish_files: false] @@ -88,7 +88,7 @@ def snpeff_build_options = modules['illumina_snpeff_build'] def makeblastdb_options = modules['illumina_blast_makeblastdb'] def kraken2_build_options = modules['illumina_kraken2_build'] def collapse_primers_options = modules['illumina_collapse_primers_illumina'] -if (!params.save_reference) { +if (!params.save_reference) { bedtools_getfasta_options['publish_files'] = false bowtie2_build_options['publish_files'] = false snpeff_build_options['publish_files'] = false @@ -125,19 +125,19 @@ include { ASSEMBLY_MINIA } from '../subworkflows/local/assembly_minia' ======================================================================================== */ -/* - * MODULE: Installed directly from nf-core/modules - */ -include { CAT_FASTQ } from '../modules/nf-core/software/cat/fastq/main' addParams( options: modules['illumina_cat_fastq'] ) +// +// MODULE: Installed directly from nf-core/modules +// +include { CAT_FASTQ } from '../modules/nf-core/software/cat/fastq/main' addParams( options: modules['illumina_cat_fastq'] ) include { FASTQC } from '../modules/nf-core/software/fastqc/main' addParams( options: modules['illumina_cutadapt_fastqc'] ) -include { KRAKEN2_RUN } from '../modules/nf-core/software/kraken2/run/main' addParams( options: modules['illumina_kraken2_run'] ) +include { KRAKEN2_RUN } from '../modules/nf-core/software/kraken2/run/main' addParams( options: modules['illumina_kraken2_run'] ) include { PICARD_COLLECTMULTIPLEMETRICS } from '../modules/nf-core/software/picard/collectmultiplemetrics/main' addParams( options: modules['illumina_picard_collectmultiplemetrics'] ) include { MOSDEPTH as MOSDEPTH_GENOME } from '../modules/nf-core/software/mosdepth/main' addParams( options: modules['illumina_mosdepth_genome'] ) include { MOSDEPTH as MOSDEPTH_AMPLICON } from '../modules/nf-core/software/mosdepth/main' addParams( options: modules['illumina_mosdepth_amplicon'] ) -/* - * SUBWORKFLOW: Consisting entirely of nf-core/modules - */ +// +// SUBWORKFLOW: Consisting entirely of nf-core/modules +// def fastp_options = modules['illumina_fastp'] if (params.save_trimmed_fail) { fastp_options.publish_files.put('fail.fastq.gz','') } @@ -166,9 +166,9 @@ workflow ILLUMINA { ch_software_versions = Channel.empty() - /* - * SUBWORKFLOW: Uncompress and prepare reference genome files - */ + // + // SUBWORKFLOW: Uncompress and prepare reference genome files + // PREPARE_GENOME ( ch_dummy_file ) @@ -194,18 +194,18 @@ workflow ILLUMINA { .map { WorkflowIllumina.checkIfSwiftProtocol(it, 'covid19genome', log) } } } - - /* - * SUBWORKFLOW: Read in samplesheet, validate and stage input files - */ - INPUT_CHECK ( + + // + // SUBWORKFLOW: Read in samplesheet, validate and stage input files + // + INPUT_CHECK ( ch_input, params.platform ) .map { meta, fastq -> meta.id = meta.id.split('_')[0..-2].join('_') - [ meta, fastq ] + [ meta, fastq ] } .groupTuple(by: [0]) .branch { @@ -216,19 +216,19 @@ workflow ILLUMINA { return [ meta, fastq.flatten() ] } .set { ch_fastq } - - /* - * MODULE: Concatenate FastQ files from same sample if required - */ - CAT_FASTQ ( + + // + // MODULE: Concatenate FastQ files from same sample if required + // + CAT_FASTQ ( ch_fastq.multiple ) .mix(ch_fastq.single) .set { ch_cat_fastq } - - /* - * SUBWORKFLOW: Read QC and trim adapters - */ + + // + // SUBWORKFLOW: Read QC and trim adapters + // FASTQC_FASTP ( ch_cat_fastq ) @@ -236,26 +236,26 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(FASTQC_FASTP.out.fastqc_version.first().ifEmpty(null)) ch_software_versions = ch_software_versions.mix(FASTQC_FASTP.out.fastp_version.first().ifEmpty(null)) - /* - * Filter empty FastQ files after adapter trimming - */ + // + // Filter empty FastQ files after adapter trimming + // if (!params.skip_fastp) { ch_variants_fastq .join(FASTQC_FASTP.out.trim_json) - .map { + .map { meta, reads, json -> if (WorkflowIllumina.getFastpReadsAfterFiltering(json) > 0) [ meta, reads ] } .set { ch_variants_fastq } } - - /* - * MODULE: Run Kraken2 for removal of host reads - */ + + // + // MODULE: Run Kraken2 for removal of host reads + // ch_assembly_fastq = ch_variants_fastq ch_kraken2_multiqc = Channel.empty() if (!params.skip_kraken2) { - KRAKEN2_RUN ( + KRAKEN2_RUN ( ch_variants_fastq, PREPARE_GENOME.out.kraken2_db ) @@ -268,12 +268,12 @@ workflow ILLUMINA { if (params.kraken2_assembly_host_filter) { ch_assembly_fastq = KRAKEN2_RUN.out.unclassified - } + } } - - /* - * SUBWORKFLOW: Alignment with Bowtie2 - */ + + // + // SUBWORKFLOW: Alignment with Bowtie2 + // ch_bam = Channel.empty() ch_bai = Channel.empty() ch_bowtie2_multiqc = Channel.empty() @@ -291,9 +291,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(ALIGN_BOWTIE2.out.samtools_version.first().ifEmpty(null)) } - /* - * Filter channels to get samples that passed Bowtie2 minimum mapped reads threshold - */ + // + // Filter channels to get samples that passed Bowtie2 minimum mapped reads threshold + // ch_fail_mapping_multiqc = Channel.empty() if (!params.skip_variants) { ch_bowtie2_flagstat_multiqc @@ -321,7 +321,7 @@ workflow ILLUMINA { } .set { ch_pass_fail_mapped } - MULTIQC_CUSTOM_TWOCOL_TSV_FAIL_MAPPED ( + MULTIQC_CUSTOM_TWOCOL_TSV_FAIL_MAPPED ( ch_pass_fail_mapped.fail.collect(), 'Sample', 'Mapped reads', @@ -329,10 +329,10 @@ workflow ILLUMINA { ) .set { ch_fail_mapping_multiqc } } - - /* - * SUBWORKFLOW: Trim primer sequences from reads with iVar - */ + + // + // SUBWORKFLOW: Trim primer sequences from reads with iVar + // ch_ivar_trim_flagstat_multiqc = Channel.empty() if (!params.skip_variants && !params.skip_ivar_trim && params.protocol == 'amplicon') { PRIMER_TRIM_IVAR ( @@ -345,9 +345,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(PRIMER_TRIM_IVAR.out.ivar_version.first().ifEmpty(null)) } - /* - * SUBWORKFLOW: Mark duplicate reads - */ + // + // SUBWORKFLOW: Mark duplicate reads + // ch_markduplicates_flagstat_multiqc = Channel.empty() if (!params.skip_variants && !params.skip_markduplicates) { MARK_DUPLICATES_PICARD ( @@ -359,9 +359,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(MARK_DUPLICATES_PICARD.out.picard_version.first().ifEmpty(null)) } - /* - * MODULE: Picard metrics - */ + // + // MODULE: Picard metrics + // if (!params.skip_variants && !params.skip_picard_metrics) { PICARD_COLLECTMULTIPLEMETRICS ( ch_bam, @@ -370,9 +370,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(PICARD_COLLECTMULTIPLEMETRICS.out.version.first().ifEmpty(null)) } - /* - * MODULE: Genome-wide and amplicon-specific coverage QC plots - */ + // + // MODULE: Genome-wide and amplicon-specific coverage QC plots + // ch_mosdepth_multiqc = Channel.empty() if (!params.skip_variants && !params.skip_mosdepth) { @@ -387,7 +387,7 @@ workflow ILLUMINA { PLOT_MOSDEPTH_REGIONS_GENOME ( MOSDEPTH_GENOME.out.regions_bed.collect { it[1] } ) - + if (params.protocol == 'amplicon') { MOSDEPTH_AMPLICON ( ch_bam.join(ch_bai, by: [0]), @@ -395,15 +395,15 @@ workflow ILLUMINA { 0 ) - PLOT_MOSDEPTH_REGIONS_AMPLICON ( + PLOT_MOSDEPTH_REGIONS_AMPLICON ( MOSDEPTH_AMPLICON.out.regions_bed.collect { it[1] } ) } } - /* - * SUBWORKFLOW: Call variants with IVar - */ + // + // SUBWORKFLOW: Call variants with IVar + // ch_ivar_vcf = Channel.empty() ch_ivar_tbi = Channel.empty() ch_ivar_counts_multiqc = Channel.empty() @@ -438,9 +438,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(VARIANTS_IVAR.out.nextclade_version.first().ifEmpty(null)) ch_software_versions = ch_software_versions.mix(VARIANTS_IVAR.out.asciigenome_version.first().ifEmpty(null)) - /* - * MODULE: Get Pangolin lineage information for MultiQC report - */ + // + // MODULE: Get Pangolin lineage information for MultiQC report + // ch_ivar_pangolin_report .map { meta, report -> def lineage = WorkflowCommons.getPangolinLineage(report) @@ -448,7 +448,7 @@ workflow ILLUMINA { } .set { ch_ivar_pangolin_multiqc } - MULTIQC_CUSTOM_TWOCOL_TSV_IVAR_PANGOLIN ( + MULTIQC_CUSTOM_TWOCOL_TSV_IVAR_PANGOLIN ( ch_ivar_pangolin_multiqc.collect(), 'Sample', 'Lineage', @@ -456,10 +456,10 @@ workflow ILLUMINA { ) .set { ch_ivar_pangolin_multiqc } } - - /* - * SUBWORKFLOW: Call variants with BCFTools - */ + + // + // SUBWORKFLOW: Call variants with BCFTools + // ch_bcftools_vcf = Channel.empty() ch_bcftools_tbi = Channel.empty() ch_bcftools_stats_multiqc = Channel.empty() @@ -490,9 +490,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(VARIANTS_BCFTOOLS.out.nextclade_version.first().ifEmpty(null)) ch_software_versions = ch_software_versions.mix(VARIANTS_BCFTOOLS.out.asciigenome_version.first().ifEmpty(null)) - /* - * MODULE: Get Pangolin lineage information for MultiQC report - */ + // + // MODULE: Get Pangolin lineage information for MultiQC report + // ch_bcftools_pangolin_report .map { meta, report -> def lineage = WorkflowCommons.getPangolinLineage(report) @@ -500,7 +500,7 @@ workflow ILLUMINA { } .set { ch_bcftools_pangolin_multiqc } - MULTIQC_CUSTOM_TWOCOL_TSV_BCFTOOLS_PANGOLIN ( + MULTIQC_CUSTOM_TWOCOL_TSV_BCFTOOLS_PANGOLIN ( ch_bcftools_pangolin_multiqc.collect(), 'Sample', 'Lineage', @@ -509,9 +509,9 @@ workflow ILLUMINA { .set { ch_bcftools_pangolin_multiqc } } - /* - * MODULE: Intersect variants across callers - */ + // + // MODULE: Intersect variants across callers + // if (!params.skip_variants && callers.size() > 1) { BCFTOOLS_ISEC ( ch_ivar_vcf @@ -521,9 +521,9 @@ workflow ILLUMINA { ) } - /* - * MODULE: Primer trimming with Cutadapt - */ + // + // MODULE: Primer trimming with Cutadapt + // ch_cutadapt_multiqc = Channel.empty() if (params.protocol == 'amplicon' && !params.skip_assembly && !params.skip_cutadapt) { CUTADAPT ( @@ -535,15 +535,15 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(CUTADAPT.out.version.first().ifEmpty(null)) if (!params.skip_fastqc) { - FASTQC ( - CUTADAPT.out.reads + FASTQC ( + CUTADAPT.out.reads ) } } - /* - * SUBWORKFLOW: Run SPAdes assembly and downstream analysis - */ + // + // SUBWORKFLOW: Run SPAdes assembly and downstream analysis + // ch_spades_quast_multiqc = Channel.empty() if (!params.skip_assembly && 'spades' in assemblers) { ASSEMBLY_SPADES ( @@ -563,9 +563,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(ASSEMBLY_SPADES.out.plasmidid_version.first().ifEmpty(null)) } - /* - * SUBWORKFLOW: Run Unicycler assembly and downstream analysis - */ + // + // SUBWORKFLOW: Run Unicycler assembly and downstream analysis + // ch_unicycler_quast_multiqc = Channel.empty() if (!params.skip_assembly && 'unicycler' in assemblers) { ASSEMBLY_UNICYCLER ( @@ -584,9 +584,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(ASSEMBLY_UNICYCLER.out.plasmidid_version.first().ifEmpty(null)) } - /* - * SUBWORKFLOW: Run minia assembly and downstream analysis - */ + // + // SUBWORKFLOW: Run minia assembly and downstream analysis + // ch_minia_quast_multiqc = Channel.empty() if (!params.skip_assembly && 'minia' in assemblers) { ASSEMBLY_MINIA ( @@ -604,10 +604,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(ASSEMBLY_MINIA.out.plasmidid_version.first().ifEmpty(null)) } - /* - * MODULE: Pipeline reporting - */ - // Get unique list of files containing version information + // + // MODULE: Pipeline reporting + // ch_software_versions .map { it -> if (it) [ it.baseName, it ] } .groupTuple() @@ -616,13 +615,13 @@ workflow ILLUMINA { .collect() .set { ch_software_versions } - GET_SOFTWARE_VERSIONS ( + GET_SOFTWARE_VERSIONS ( ch_software_versions ) - /* - * MODULE: MultiQC - */ + // + // MODULE: MultiQC + // if (!params.skip_multiqc) { workflow_summary = WorkflowCommons.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) diff --git a/workflows/nanopore.nf b/workflows/nanopore.nf index d1fafa91..23b378a4 100644 --- a/workflows/nanopore.nf +++ b/workflows/nanopore.nf @@ -15,7 +15,7 @@ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) WorkflowNanopore.initialise(params, log, valid_params) def checkPathParamList = [ - params.input, params.fastq_dir, params.fast5_dir, + params.input, params.fastq_dir, params.fast5_dir, params.sequencing_summary, params.gff ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -37,7 +37,7 @@ if (params.artic_minion_caller == 'medaka') { /* ======================================================================================== - CONFIG FILES + CONFIG FILES ======================================================================================== */ @@ -68,9 +68,9 @@ include { MULTIQC_CUSTOM_TWOCOL_TSV as MULTIQC_CUSTOM_PANGOLIN } fro include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_GENOME } from '../modules/local/plot_mosdepth_regions' addParams( options: modules['nanopore_plot_mosdepth_regions_genome'] ) include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_AMPLICON } from '../modules/local/plot_mosdepth_regions' addParams( options: modules['nanopore_plot_mosdepth_regions_amplicon'] ) -/* - * SUBWORKFLOW: Consisting of a mix of local and nf-core/modules - */ +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// def publish_genome_options = params.save_reference ? [publish_dir: 'genome'] : [publish_files: false] def collapse_primers_options = modules['nanopore_collapse_primers'] def snpeff_build_options = modules['nanopore_snpeff_build'] @@ -89,9 +89,9 @@ include { SNPEFF_SNPSIFT } from '../subworkflows/local/snpeff_snpsift' ======================================================================================== */ -/* - * MODULE: Installed directly from nf-core/modules - */ +// +// MODULE: Installed directly from nf-core/modules +// def artic_minion_options = modules['nanopore_artic_minion'] artic_minion_options.args += params.artic_minion_caller == 'medaka' ? Utils.joinModuleArgs(['--medaka']) : '' @@ -108,9 +108,9 @@ include { NEXTCLADE } from '../modules/nf-core/software/next include { MOSDEPTH as MOSDEPTH_GENOME } from '../modules/nf-core/software/mosdepth/main' addParams( options: modules['nanopore_mosdepth_genome'] ) include { MOSDEPTH as MOSDEPTH_AMPLICON } from '../modules/nf-core/software/mosdepth/main' addParams( options: modules['nanopore_mosdepth_amplicon'] ) -/* - * SUBWORKFLOW: Consisting entirely of nf-core/modules - */ +// +// SUBWORKFLOW: Consisting entirely of nf-core/modules +// include { FILTER_BAM_SAMTOOLS } from '../subworkflows/nf-core/filter_bam_samtools' addParams( samtools_view_options: modules['nanopore_filter_bam'], samtools_index_options: modules['nanopore_filter_bam_stats'] ) /* @@ -128,9 +128,9 @@ workflow NANOPORE { ch_software_versions = Channel.empty() - /* - * MODULE: PycoQC on sequencing summary file - */ + // + // MODULE: PycoQC on sequencing summary file + // if (params.sequencing_summary && !params.skip_pycoqc) { PYCOQC ( ch_sequencing_summary @@ -138,9 +138,9 @@ workflow NANOPORE { } ch_software_versions = ch_software_versions.mix(PYCOQC.out.version.ifEmpty(null)) - /* - * SUBWORKFLOW: Uncompress and prepare reference genome files - */ + // + // SUBWORKFLOW: Uncompress and prepare reference genome files + // PREPARE_GENOME ( ch_dummy_file ) @@ -149,8 +149,8 @@ workflow NANOPORE { PREPARE_GENOME .out .primer_bed - .map { WorkflowCommons.checkPrimerSuffixes(it, params.primer_left_suffix, params.primer_right_suffix, log) } - + .map { WorkflowCommons.checkPrimerSuffixes(it, params.primer_left_suffix, params.primer_right_suffix, log) } + barcode_dirs = file("${params.fastq_dir}/barcode*", type: 'dir' , maxdepth: 1) single_barcode_dir = file("${params.fastq_dir}/*.fastq" , type: 'file', maxdepth: 1) ch_custom_no_sample_name_multiqc = Channel.empty() @@ -170,60 +170,60 @@ workflow NANOPORE { } .set { ch_fastq_dirs } - /* - * SUBWORKFLOW: Read in samplesheet containing sample to barcode mappings - */ + // + // SUBWORKFLOW: Read in samplesheet containing sample to barcode mappings + // if (params.input) { - INPUT_CHECK ( + INPUT_CHECK ( ch_input, params.platform ) .join(ch_fastq_dirs, remainder: true) .set { ch_fastq_dirs } - /* - * MODULE: Create custom content file for MultiQC to report barcodes were allocated reads >= params.min_barcode_reads but no sample name in samplesheet - */ + // + // MODULE: Create custom content file for MultiQC to report barcodes were allocated reads >= params.min_barcode_reads but no sample name in samplesheet + // ch_fastq_dirs .filter { it[1] == null } .filter { it[-1] >= params.min_barcode_reads } .map { it -> [ "${it[0]}\t${it[-1]}" ] } .set { ch_barcodes_no_sample } - MULTIQC_CUSTOM_FAIL_NO_SAMPLE_NAME ( + MULTIQC_CUSTOM_FAIL_NO_SAMPLE_NAME ( ch_barcodes_no_sample.collect(), 'Barcode', 'Read count', 'fail_barcodes_no_sample' ) ch_custom_no_sample_name_multiqc = MULTIQC_CUSTOM_FAIL_NO_SAMPLE_NAME.out - - /* - * MODULE: Create custom content file for MultiQC to report samples that were in samplesheet but have no barcodes - */ + + // + // MODULE: Create custom content file for MultiQC to report samples that were in samplesheet but have no barcodes + // ch_fastq_dirs .filter { it[-1] == null } .map { it -> [ "${it[1]}\t${it[0]}" ] } .set { ch_samples_no_barcode } - MULTIQC_CUSTOM_FAIL_NO_BARCODES ( + MULTIQC_CUSTOM_FAIL_NO_BARCODES ( ch_samples_no_barcode.collect(), 'Sample', 'Missing barcode', 'fail_no_barcode_samples' ) ch_custom_no_barcodes_multiqc = MULTIQC_CUSTOM_FAIL_NO_BARCODES.out - - ch_fastq_dirs + + ch_fastq_dirs .filter { (it[1] != null) } .filter { (it[-1] != null) } .set { ch_fastq_dirs } - + } else { ch_fastq_dirs .map { barcode, dir, count -> [ barcode, barcode, dir, count ] } .set { ch_fastq_dirs } - } + } } else if (single_barcode_dir) { Channel .fromPath("${params.fastq_dir}", type: 'dir', maxDepth: 1) @@ -233,10 +233,10 @@ workflow NANOPORE { log.error "Please specify a valid folder containing ONT basecalled, barcoded fastq files generated by guppy_barcoder or guppy_basecaller e.g. '--fastq_dir ./20191023_1522_MC-110615_0_FAO93606_12bf9b4f/fastq_pass/" System.exit(1) } - - /* - * MODULE: Create custom content file for MultiQC to report samples with reads < params.min_barcode_reads - */ + + // + // MODULE: Create custom content file for MultiQC to report samples with reads < params.min_barcode_reads + // ch_fastq_dirs .branch { barcode, sample, dir, count -> pass: count > params.min_barcode_reads @@ -248,7 +248,7 @@ workflow NANOPORE { } .set { ch_pass_fail_barcode_count } - MULTIQC_CUSTOM_FAIL_BARCODE_COUNT ( + MULTIQC_CUSTOM_FAIL_BARCODE_COUNT ( ch_pass_fail_barcode_count.fail.collect(), 'Sample', 'Barcode count', @@ -261,17 +261,17 @@ workflow NANOPORE { .map { barcode, sample, dir, count -> [ [ id: sample, barcode:barcode ], dir ] } .set { ch_fastq_dirs } - /* - * MODULE: Run Artic Guppyplex - */ + // + // MODULE: Run Artic Guppyplex + // ARTIC_GUPPYPLEX ( ch_fastq_dirs ) ch_software_versions = ch_software_versions.mix(ARTIC_GUPPYPLEX.out.version.first().ifEmpty(null)) - /* - * MODULE: Create custom content file for MultiQC to report samples with reads < params.min_guppyplex_reads - */ + // + // MODULE: Create custom content file for MultiQC to report samples with reads < params.min_guppyplex_reads + // ARTIC_GUPPYPLEX .out .fastq @@ -284,16 +284,16 @@ workflow NANOPORE { } .set { ch_pass_fail_guppyplex_count } - MULTIQC_CUSTOM_FAIL_GUPPYPLEX_COUNT ( + MULTIQC_CUSTOM_FAIL_GUPPYPLEX_COUNT ( ch_pass_fail_guppyplex_count.fail.collect(), 'Sample', 'Read count', 'fail_guppyplex_count_samples' ) - /* - * MODULE: Nanoplot QC for FastQ files - */ + // + // MODULE: Nanoplot QC for FastQ files + // if (!params.skip_nanoplot) { NANOPLOT ( ARTIC_GUPPYPLEX.out.fastq @@ -301,9 +301,9 @@ workflow NANOPORE { ch_software_versions = ch_software_versions.mix(NANOPLOT.out.version.first().ifEmpty(null)) } - /* - * MODULE: Run Artic minion - */ + // + // MODULE: Run Artic minion + // ARTIC_MINION ( ARTIC_GUPPYPLEX.out.fastq.filter { it[-1].countFastq() > params.min_guppyplex_reads }, ch_fast5_dir, @@ -314,26 +314,26 @@ workflow NANOPORE { params.artic_scheme, params.primer_set_version ) - - /* - * SUBWORKFLOW: Filter unmapped reads from BAM - */ - FILTER_BAM_SAMTOOLS ( - ARTIC_MINION.out.bam + + // + // SUBWORKFLOW: Filter unmapped reads from BAM + // + FILTER_BAM_SAMTOOLS ( + ARTIC_MINION.out.bam ) ch_software_versions = ch_software_versions.mix(FILTER_BAM_SAMTOOLS.out.samtools_version.first().ifEmpty(null)) - /* - * MODULE: VCF stats with bcftools stats - */ - BCFTOOLS_STATS ( - ARTIC_MINION.out.vcf + // + // MODULE: VCF stats with bcftools stats + // + BCFTOOLS_STATS ( + ARTIC_MINION.out.vcf ) ch_software_versions = ch_software_versions.mix(BCFTOOLS_STATS.out.version.ifEmpty(null)) - /* - * MODULE: Genome-wide and amplicon-specific coverage QC plots - */ + // + // MODULE: Genome-wide and amplicon-specific coverage QC plots + // ch_mosdepth_multiqc = Channel.empty() if (!params.skip_mosdepth) { @@ -348,31 +348,31 @@ workflow NANOPORE { PLOT_MOSDEPTH_REGIONS_GENOME ( MOSDEPTH_GENOME.out.regions_bed.collect { it[1] } ) - + MOSDEPTH_AMPLICON ( ARTIC_MINION.out.bam_primertrimmed.join(ARTIC_MINION.out.bai_primertrimmed, by: [0]), PREPARE_GENOME.out.primer_collapsed_bed, 0 ) - PLOT_MOSDEPTH_REGIONS_AMPLICON ( + PLOT_MOSDEPTH_REGIONS_AMPLICON ( MOSDEPTH_AMPLICON.out.regions_bed.collect { it[1] } ) } - /* - * MODULE: Lineage analysis with Pangolin - */ + // + // MODULE: Lineage analysis with Pangolin + // ch_pangolin_multiqc = Channel.empty() if (!params.skip_pangolin) { - PANGOLIN ( + PANGOLIN ( ARTIC_MINION.out.fasta ) ch_software_versions = ch_software_versions.mix(PANGOLIN.out.version.ifEmpty(null)) - /* - * MODULE: Get Pangolin lineage information for MultiQC report - */ + // + // MODULE: Get Pangolin lineage information for MultiQC report + // PANGOLIN .out .report @@ -382,7 +382,7 @@ workflow NANOPORE { } .set { ch_pangolin_multiqc } - MULTIQC_CUSTOM_PANGOLIN ( + MULTIQC_CUSTOM_PANGOLIN ( ch_pangolin_multiqc.collect(), 'Sample', 'Lineage', @@ -391,42 +391,42 @@ workflow NANOPORE { .set { ch_pangolin_multiqc } } - /* - * MODULE: Clade assignment, mutation calling, and sequence quality checks with Nextclade - */ + // + // MODULE: Clade assignment, mutation calling, and sequence quality checks with Nextclade + // if (!params.skip_nextclade) { - NEXTCLADE ( + NEXTCLADE ( ARTIC_MINION.out.fasta, 'csv' ) ch_software_versions = ch_software_versions.mix(NEXTCLADE.out.version.ifEmpty(null)) } - - /* - * MODULE: Consensus QC across all samples with QUAST - */ + + // + // MODULE: Consensus QC across all samples with QUAST + // ch_quast_multiqc = Channel.empty() if (!params.skip_variants_quast) { - QUAST ( + QUAST ( ARTIC_MINION.out.fasta.collect{ it[1] }, - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.gff, - true, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.gff, + true, params.gff ) ch_quast_multiqc = QUAST.out.tsv ch_software_versions = ch_software_versions.mix(QUAST.out.version.ifEmpty(null)) } - /* - * SUBWORKFLOW: Annotate variants with snpEff - */ + // + // SUBWORKFLOW: Annotate variants with snpEff + // ch_snpeff_multiqc = Channel.empty() if (params.gff && !params.skip_snpeff) { - SNPEFF_SNPSIFT ( - ARTIC_MINION.out.vcf, - PREPARE_GENOME.out.snpeff_db, - PREPARE_GENOME.out.snpeff_config, + SNPEFF_SNPSIFT ( + ARTIC_MINION.out.vcf, + PREPARE_GENOME.out.snpeff_db, + PREPARE_GENOME.out.snpeff_config, PREPARE_GENOME.out.fasta ) ch_snpeff_multiqc = SNPEFF_SNPSIFT.out.csv @@ -434,9 +434,9 @@ workflow NANOPORE { ch_software_versions = ch_software_versions.mix(SNPEFF_SNPSIFT.out.snpsift_version.ifEmpty(null)) } - /* - * MODULE: Variant screenshots with ASCIIGenome - */ + // + // MODULE: Variant screenshots with ASCIIGenome + // if (!params.skip_asciigenome) { ARTIC_MINION .out @@ -461,10 +461,9 @@ workflow NANOPORE { ch_software_versions = ch_software_versions.mix(ASCIIGENOME.out.version.ifEmpty(null)) } - /* - * MODULE: Pipeline reporting - */ - // Get unique list of files containing version information + // + // MODULE: Pipeline reporting + // ch_software_versions .map { it -> if (it) [ it.baseName, it ] } .groupTuple() @@ -473,13 +472,13 @@ workflow NANOPORE { .collect() .set { ch_software_versions } - GET_SOFTWARE_VERSIONS ( + GET_SOFTWARE_VERSIONS ( ch_software_versions ) - /* - * MODULE: MultiQC - */ + // + // MODULE: MultiQC + // if (!params.skip_multiqc) { workflow_summary = WorkflowCommons.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) diff --git a/workflows/sra_download.nf b/workflows/sra_download.nf index 28d8afd2..e3353cc1 100644 --- a/workflows/sra_download.nf +++ b/workflows/sra_download.nf @@ -4,15 +4,15 @@ ======================================================================================== */ -if (params.public_data_ids) { +if (params.public_data_ids) { Channel .from(file(params.public_data_ids, checkIfExists: true)) .splitCsv(header:false, sep:'', strip:true) .map { it[0] } .unique() .set { ch_public_data_ids } -} else { - exit 1, 'Input file with public database ids not specified!' +} else { + exit 1, 'Input file with public database ids not specified!' } /* @@ -38,16 +38,16 @@ include { SRA_MERGE_SAMPLESHEET } from '../modules/local/sra_merge_samplesheet' workflow SRA_DOWNLOAD { - /* - * MODULE: Get SRA run information for public database ids - */ + // + // MODULE: Get SRA run information for public database ids + // SRA_IDS_TO_RUNINFO ( ch_public_data_ids ) - /* - * MODULE: Parse SRA run information, create file containing FTP links and read into workflow as [ meta, [reads] ] - */ + // + // MODULE: Parse SRA run information, create file containing FTP links and read into workflow as [ meta, [reads] ] + // SRA_RUNINFO_TO_FTP ( SRA_IDS_TO_RUNINFO.out.tsv ) @@ -56,8 +56,8 @@ workflow SRA_DOWNLOAD { .out .tsv .splitCsv(header:true, sep:'\t') - .map { - meta -> + .map { + meta -> meta.single_end = meta.single_end.toBoolean() [ meta, [ meta.fastq_1, meta.fastq_2 ] ] } @@ -65,36 +65,35 @@ workflow SRA_DOWNLOAD { .set { ch_sra_reads } if (!params.skip_sra_fastq_download) { - /* - * MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums - */ + // + // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums + // SRA_FASTQ_FTP ( ch_sra_reads.map { meta, reads -> if (meta.fastq_1) [ meta, reads ] } ) - /* - * MODULE: Stage FastQ files downloaded by SRA together and auto-create a samplesheet for the pipeline - */ + // + // MODULE: Stage FastQ files downloaded by SRA together and auto-create a samplesheet for the pipeline + // SRA_TO_SAMPLESHEET ( SRA_FASTQ_FTP.out.fastq ) - /* - * MODULE: Create a merged samplesheet across all samples for the pipeline - */ + // + // MODULE: Create a merged samplesheet across all samples for the pipeline + // SRA_MERGE_SAMPLESHEET ( SRA_TO_SAMPLESHEET.out.csv.collect{it[1]} ) - /* - * If ids don't have a direct FTP download link write them to file for download outside of the pipeline - */ + // + // If ids don't have a direct FTP download link write them to file for download outside of the pipeline + // def no_ids_file = ["${params.outdir}", "${modules['sra_fastq_ftp'].publish_dir}", "IDS_NOT_DOWNLOADED.txt" ].join(File.separator) ch_sra_reads .map { meta, reads -> if (!meta.fastq_1) "${meta.id.split('_')[0..-2].join('_')}" } .unique() .collectFile(name: no_ids_file, sort: true, newLine: true) - } }