From 263e99f8bf36375b6594134ca36b4e65de3bd2b8 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 11 May 2021 10:33:03 +0100 Subject: [PATCH 01/17] Update NXF versions in CI files --- .github/workflows/ci.yml | 10 +++++----- .github/workflows/linting.yml | 3 --- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 290b349a..ada24797 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: strategy: matrix: # Nextflow versions: check pipeline minimum and current latest - nxf_ver: [21.04.0] + nxf_ver: [21.04.0, ''] steps: - name: Check out pipeline code uses: actions/checkout@v2 @@ -42,7 +42,7 @@ jobs: if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} runs-on: ubuntu-latest env: - NXF_VER: '21.04.0' + NXF_VER: ${{ matrix.nxf_ver }} NXF_ANSI_LOG: false strategy: matrix: @@ -67,7 +67,7 @@ jobs: if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} runs-on: ubuntu-latest env: - NXF_VER: '21.04.0' + NXF_VER: ${{ matrix.nxf_ver }} NXF_ANSI_LOG: false strategy: matrix: @@ -93,7 +93,7 @@ jobs: if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} runs-on: ubuntu-latest env: - NXF_VER: '21.04.0' + NXF_VER: ${{ matrix.nxf_ver }} NXF_ANSI_LOG: false strategy: matrix: @@ -118,7 +118,7 @@ jobs: if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} runs-on: ubuntu-latest env: - NXF_VER: '21.04.0' + NXF_VER: ${{ matrix.nxf_ver }} NXF_ANSI_LOG: false strategy: matrix: diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 0ff7a870..5153b795 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -84,11 +84,8 @@ jobs: repo-token: ${{ secrets.GITHUB_TOKEN }} allow-repeats: false - nf-core: runs-on: ubuntu-latest - env: - NXF_VER: 21.03.0-edge steps: - name: Check out pipeline code From 12d26d0f3db3e92df90a1746c9ac9e226cf58a42 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 11 May 2021 11:17:59 +0100 Subject: [PATCH 02/17] Add editor config CI --- .editorconfig | 24 +++++++++++++++++++ .github/workflows/linting.yml | 16 ++++++++++++- .github/markdownlint.yml => .markdownlint.yml | 2 ++ 3 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 .editorconfig rename .github/markdownlint.yml => .markdownlint.yml (91%) diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..2095f8e5 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,24 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +indent_size = 4 +indent_style = space + +[*.{yml,yaml}] +indent_size = 2 + +# These files are edited upstream in nf-core/modules +[/modules/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset +indent_size = unset + +[/assets/email*] +indent_size = unset diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 5153b795..8aaf33ef 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -18,7 +18,7 @@ jobs: - name: Install markdownlint run: npm install -g markdownlint-cli - name: Run Markdownlint - run: markdownlint ${GITHUB_WORKSPACE} -c ${GITHUB_WORKSPACE}/.github/markdownlint.yml + run: markdownlint ${GITHUB_WORKSPACE} -c ${GITHUB_WORKSPACE}/.markdownlint.yml # If the above check failed, post a comment on the PR explaining the failure - name: Post PR comment @@ -46,6 +46,20 @@ jobs: repo-token: ${{ secrets.GITHUB_TOKEN }} allow-repeats: false + EditorConfig: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - uses: actions/setup-node@v1 + with: + node-version: "10" + + - name: Install editorconfig-checker + run: npm install -g editorconfig-checker + + - name: Run ECLint check + run: editorconfig-checker -exclude README.md $(git ls-files | grep -v test) YAML: runs-on: ubuntu-latest diff --git a/.github/markdownlint.yml b/.markdownlint.yml similarity index 91% rename from .github/markdownlint.yml rename to .markdownlint.yml index c8a3bcac..e7fc97a7 100644 --- a/.github/markdownlint.yml +++ b/.markdownlint.yml @@ -1,6 +1,8 @@ # Markdownlint configuration file default: true line-length: false +ul-indent: + indent: 4 no-duplicate-header: siblings_only: true no-inline-html: From 36f9cd603a9b37f2414fe10d70904874ee81ef71 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 11 May 2021 11:29:07 +0100 Subject: [PATCH 03/17] Fix markdownlint --- CHANGELOG.md | 40 ++--- docs/README.md | 4 +- docs/output.md | 420 ++++++++++++++++++++++++------------------------- docs/usage.md | 16 +- 4 files changed, 240 insertions(+), 240 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c0da5002..72c5c157 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,9 +14,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Variant graph processes to call variants relative to the reference genome directly from _de novo_ assemblies have been deprecated and removed * Variant calling with Varscan 2 has been deprecated and removed due to [licensing restrictions](https://github.com/dkoboldt/varscan/issues/12) * New tools: - * [Pangolin](https://github.com/cov-lineages/pangolin) for lineage analysis - * [Nextclade](https://github.com/nextstrain/nextclade) for clade assignment, mutation calling and consensus sequence quality checks - * [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) for individual variant screenshots with annotation tracks + * [Pangolin](https://github.com/cov-lineages/pangolin) for lineage analysis + * [Nextclade](https://github.com/nextstrain/nextclade) for clade assignment, mutation calling and consensus sequence quality checks + * [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) for individual variant screenshots with annotation tracks ### Other enhancements & fixes @@ -99,8 +99,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | `--unqualified_percent_limit` | | | `--varscan2_strand_filter` | | -> **NB:** Parameter has been __updated__ if both old and new parameter information is present. -> **NB:** Parameter has been __added__ if just the new parameter information is present. +> **NB:** Parameter has been __updated__ if both old and new parameter information is present. +> **NB:** Parameter has been __added__ if just the new parameter information is present. > **NB:** Parameter has been __removed__ if new parameter information isn't present. ### Software dependencies @@ -150,9 +150,9 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi | `varscan` | 2.4.4 | | | `vg` | 1.24.0 | | -> **NB:** Dependency has been __updated__ if both old and new version information is present. -> **NB:** Dependency has been __added__ if just the new version information is present. -> **NB:** Dependency has been __removed__ if new version information isn't present. +> **NB:** Dependency has been __updated__ if both old and new version information is present. +> **NB:** Dependency has been __added__ if just the new version information is present. +> **NB:** Dependency has been __removed__ if new version information isn't present. ## [[1.1.0](https://github.com/nf-core/rnaseq/releases/tag/1.1.0)] - 2020-06-23 @@ -162,18 +162,18 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi * [#124](https://github.com/nf-core/viralrecon/issues/124) - Intersect variants across callers * [nf-core/tools#616](https://github.com/nf-core/tools/pull/616) - Updated GitHub Actions to build Docker image and push to Docker Hub * Parameters: - * `--min_mapped_reads` to circumvent failures for samples with low number of mapped reads - * `--varscan2_strand_filter` to toggle the default Varscan 2 strand filter - * `--skip_mosdepth` - skip genome-wide and amplicon coverage plot generation from mosdepth output - * `--amplicon_left_suffix` - to provide left primer suffix used in name field of `--amplicon_bed` - * `--amplicon_right_suffix` - to provide right primer suffix used in name field of `--amplicon_bed` - * Unify parameter specification with COG-UK pipeline: - * `--min_allele_freq` - minimum allele frequency threshold for calling variants - * `--mpileup_depth` - SAMTools mpileup max per-file depth - * `--ivar_exclude_reads` renamed to `--ivar_trim_noprimer` - * `--ivar_trim_min_len` - minimum length of read to retain after primer trimming - * `--ivar_trim_min_qual` - minimum quality threshold for sliding window to pass - * `--ivar_trim_window_width` - width of sliding window + * `--min_mapped_reads` to circumvent failures for samples with low number of mapped reads + * `--varscan2_strand_filter` to toggle the default Varscan 2 strand filter + * `--skip_mosdepth` - skip genome-wide and amplicon coverage plot generation from mosdepth output + * `--amplicon_left_suffix` - to provide left primer suffix used in name field of `--amplicon_bed` + * `--amplicon_right_suffix` - to provide right primer suffix used in name field of `--amplicon_bed` + * Unify parameter specification with COG-UK pipeline: + * `--min_allele_freq` - minimum allele frequency threshold for calling variants + * `--mpileup_depth` - SAMTools mpileup max per-file depth + * `--ivar_exclude_reads` renamed to `--ivar_trim_noprimer` + * `--ivar_trim_min_len` - minimum length of read to retain after primer trimming + * `--ivar_trim_min_qual` - minimum quality threshold for sliding window to pass + * `--ivar_trim_window_width` - width of sliding window * [#118] Updated GitHub Actions AWS workflow for small and full size tests. ### Removed diff --git a/docs/README.md b/docs/README.md index 276d9690..0e457111 100644 --- a/docs/README.md +++ b/docs/README.md @@ -3,8 +3,8 @@ The nf-core/viralrecon documentation is split into the following pages: * [Usage](usage.md) - * An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. + * An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. * [Output](output.md) - * An overview of the different results produced by the pipeline and how to interpret them. + * An overview of the different results produced by the pipeline and how to interpret them. You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/output.md b/docs/output.md index 51503207..dec6af9d 100644 --- a/docs/output.md +++ b/docs/output.md @@ -8,22 +8,22 @@ The directories listed below will be created in the results directory after the # Nanopore: Pipeline overview * [Preprocessing](#nanopore-preprocessing) - * [pycoQC](#nanopore-pycoqc) - Sequencing QC - * [artic guppyplex](#nanopore-artic-guppyplex) - Aggregate pre-demultiplexed reads from MinKNOW/Guppy - * [NanoPlot](#nanopore-nanoplot) - Read QC + * [pycoQC](#nanopore-pycoqc) - Sequencing QC + * [artic guppyplex](#nanopore-artic-guppyplex) - Aggregate pre-demultiplexed reads from MinKNOW/Guppy + * [NanoPlot](#nanopore-nanoplot) - Read QC * [Variant calling](#nanopore-variant-calling) - * [artic minion](#nanopore-artic-minion) - Align reads, call variants and generate consensus sequence + * [artic minion](#nanopore-artic-minion) - Align reads, call variants and generate consensus sequence * [Downstream analysis](#nanopore-downstream-analysis) - * [SAMtools](#nanopore-samtools) - Remove unmapped reads and obtain alignment metrics - * [mosdepth](#nanopore-mosdepth) - Genome-wide and amplicon coverage QC plots - * [BCFTools](#nanopore-bcftools) - Variant count metrics - * [SnpEff and SnpSift](#nanopore-snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction - * [QUAST](#nanopore-quast) - Consensus assessment report - * [Pangolin](#nanopore-pangolin) - Lineage analysis - * [Nextclade](#nanopore-nextclade) - Clade assignment, mutation calling and sequence quality checks - * [ASCIIGenome](#nanopore-asciigenome) - Individual variant screenshots with annotation tracks + * [SAMtools](#nanopore-samtools) - Remove unmapped reads and obtain alignment metrics + * [mosdepth](#nanopore-mosdepth) - Genome-wide and amplicon coverage QC plots + * [BCFTools](#nanopore-bcftools) - Variant count metrics + * [SnpEff and SnpSift](#nanopore-snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction + * [QUAST](#nanopore-quast) - Consensus assessment report + * [Pangolin](#nanopore-pangolin) - Lineage analysis + * [Nextclade](#nanopore-nextclade) - Clade assignment, mutation calling and sequence quality checks + * [ASCIIGenome](#nanopore-asciigenome) - Individual variant screenshots with annotation tracks * [Workflow reporting](#nanopore-workflow-reporting) - * [MultiQC](#nanopore-multiqc) - Present QC, visualisation and custom reporting for sequencing, raw reads, alignment and variant calling results + * [MultiQC](#nanopore-multiqc) - Present QC, visualisation and custom reporting for sequencing, raw reads, alignment and variant calling results ## Nanopore: Preprocessing @@ -34,15 +34,15 @@ A file called `summary_variants_metrics_mqc.csv` containing a selection of read
Output files -* `pycoqc/` - * `*.html` and `.json` file that includes a run summary and graphical representation of various QC metrics including distribution of read length, distribution of read quality scores, mean read quality per sequence length, output per channel over experiment time and percentage of reads per barcode. +* `pycoqc/` + * `*.html` and `.json` file that includes a run summary and graphical representation of various QC metrics including distribution of read length, distribution of read quality scores, mean read quality per sequence length, output per channel over experiment time and percentage of reads per barcode.
[PycoQC](https://github.com/a-slide/pycoQC) compute metrics and generate QC plots using the sequencing summary information generated by basecalling/demultiplexing tools such as Guppy e.g. distribution of read length, read length over time, number of reads per barcode and other general stats.

- PycoQC - Number of reads per barcode + PycoQC - Number of reads per barcode

### Nanopore: artic guppyplex @@ -50,8 +50,8 @@ A file called `summary_variants_metrics_mqc.csv` containing a selection of read
Output files -* `guppyplex/` - * `*.fastq.gz` files generated by aggregate pre-demultiplexed reads from MinKNOW/Guppy. These files are not saved by default but can be via a custom config file such as the one below. +* `guppyplex/` + * `*.fastq.gz` files generated by aggregate pre-demultiplexed reads from MinKNOW/Guppy. These files are not saved by default but can be via a custom config file such as the one below. ```nextflow params { @@ -72,15 +72,15 @@ The [artic guppyplex](https://artic.readthedocs.io/en/latest/commands/) tool fro
Output files -* `nanoplot//` - * Per-sample `*.html` files for QC metrics and individual `*.png` image files for plots. +* `nanoplot//` + * Per-sample `*.html` files for QC metrics and individual `*.png` image files for plots.
[NanoPlot](https://github.com/wdecoster/NanoPlot) it a tool that can be used to produce general quality metrics from various Nanopore-based input files including fastq files e.g. quality score distribution, read lengths and other general stats.

- Nanoplot - Read quality vs read length + Nanoplot - Read quality vs read length

## Nanopore: Variant calling @@ -91,18 +91,18 @@ The [artic guppyplex](https://artic.readthedocs.io/en/latest/commands/) tool fro Output files * `/` - * `*.consensus.fasta`: Consensus fasta file generated by artic minion. - * `*.pass.vcf.gz`: VCF file containing variants passing quality filters. - * `*.pass.vcf.gz.tbi`: VCF index file containing variants passing quality filters. - * `*.primers.vcf`: VCF file containing variants found in primer-binding regions. - * `*.merged.vcf`: VCF file containing all detected variants. - * `*.fail.vcf`: VCF file containing variants failing quality filters. - * `*.sorted.bam`: BAM file generated by initial alignment. - * `*.sorted.bam.bai`: BAM index file generated by initial alignment. - * `*.trimmed.rg.sorted.bam`: BAM file without primer-binding site trimming. - * `*.trimmed.rg.sorted.bam.bai`: BAM index file without primer-binding site trimming. - * `*.primertrimmed.rg.sorted.bam`: BAM file generated after primer-binding site trimming. - * `*.primertrimmed.rg.sorted.bam.bai`: BAM index file generated after primer-binding site trimming. + * `*.consensus.fasta`: Consensus fasta file generated by artic minion. + * `*.pass.vcf.gz`: VCF file containing variants passing quality filters. + * `*.pass.vcf.gz.tbi`: VCF index file containing variants passing quality filters. + * `*.primers.vcf`: VCF file containing variants found in primer-binding regions. + * `*.merged.vcf`: VCF file containing all detected variants. + * `*.fail.vcf`: VCF file containing variants failing quality filters. + * `*.sorted.bam`: BAM file generated by initial alignment. + * `*.sorted.bam.bai`: BAM index file generated by initial alignment. + * `*.trimmed.rg.sorted.bam`: BAM file without primer-binding site trimming. + * `*.trimmed.rg.sorted.bam.bai`: BAM index file without primer-binding site trimming. + * `*.primertrimmed.rg.sorted.bam`: BAM file generated after primer-binding site trimming. + * `*.primertrimmed.rg.sorted.bam.bai`: BAM index file generated after primer-binding site trimming. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -118,10 +118,10 @@ The [artic minion](https://artic.readthedocs.io/en/latest/commands/) tool from t Output files * `/` - * `*.mapped.sorted.bam`: Coordinate sorted BAM file containing read alignment information. - * `*.mapped.sorted.bam.bai`: Index file for coordinate sorted BAM file. -* `/samtools_stats/` - * SAMtools `*.mapped.sorted.bam.flagstat`, `*.mapped.sorted.bam.idxstats` and `*.mapped.sorted.bam.stats` files generated from the alignment files. + * `*.mapped.sorted.bam`: Coordinate sorted BAM file containing read alignment information. + * `*.mapped.sorted.bam.bai`: Index file for coordinate sorted BAM file. +* `/samtools_stats/` + * SAMtools `*.mapped.sorted.bam.flagstat`, `*.mapped.sorted.bam.idxstats` and `*.mapped.sorted.bam.stats` files generated from the alignment files. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -137,16 +137,16 @@ BAM files containing the original alignments from either Minimap2 or BWA are fur Output files * `/mosdepth/genome/` - * `all_samples.mosdepth.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. - * `*.mosdepth.coverage.pdf`: Whole-genome coverage plot. - * `*.mosdepth.coverage.tsv`: File containing coverage values for the above plot. - * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. + * `all_samples.mosdepth.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. + * `*.mosdepth.coverage.pdf`: Whole-genome coverage plot. + * `*.mosdepth.coverage.tsv`: File containing coverage values for the above plot. + * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. * `/mosdepth/amplicon/` - * `all_samples.mosdepth.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. - * `all_samples.mosdepth.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. - * `*.mosdepth.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. - * `*.mosdepth.coverage.tsv`: File containing per-amplicon coverage values for the above plot. - * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. + * `all_samples.mosdepth.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. + * `all_samples.mosdepth.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. + * `*.mosdepth.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. + * `*.mosdepth.coverage.tsv`: File containing per-amplicon coverage values for the above plot. + * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -159,7 +159,7 @@ BAM files containing the original alignments from either Minimap2 or BWA are fur ![R - Sample genome-wide coverage plot](images/r_genome_coverage.png)

- R - Sample per-amplicon coverage plot + R - Sample per-amplicon coverage plot

### Nanopore: BCFTools @@ -168,7 +168,7 @@ BAM files containing the original alignments from either Minimap2 or BWA are fur Output files * `/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. + * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -184,14 +184,14 @@ BAM files containing the original alignments from either Minimap2 or BWA are fur Output files * `/snpeff/` - * `*.snpeff.csv`: Variant annotation csv file. - * `*.snpeff.genes.txt`: Gene table for annotated variants. - * `*.snpeff.summary.html`: Summary html file for variants. - * `*.snpeff.vcf.gz`: VCF file with variant annotations. - * `*.snpeff.vcf.gz.tbi`: Index for VCF file with variant annotations. - * `*.snpsift.txt`: SnpSift summary table. + * `*.snpeff.csv`: Variant annotation csv file. + * `*.snpeff.genes.txt`: Gene table for annotated variants. + * `*.snpeff.summary.html`: Summary html file for variants. + * `*.snpeff.vcf.gz`: VCF file with variant annotations. + * `*.snpeff.vcf.gz.tbi`: Index for VCF file with variant annotations. + * `*.snpsift.txt`: SnpSift summary table. * `/snpeff/bcftools_stats/` - * `*.snpeff.bcftools_stats.txt`: Statistics and counts obtained from SnpEff VCF file. + * `*.snpeff.bcftools_stats.txt`: Statistics and counts obtained from SnpEff VCF file. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -209,7 +209,7 @@ BAM files containing the original alignments from either Minimap2 or BWA are fur Output files * `/quast/` - * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. + * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -223,7 +223,7 @@ BAM files containing the original alignments from either Minimap2 or BWA are fur Output files * `/pangolin/` - * `*.pangolin.csv`: Lineage analysis results from Pangolin. + * `*.pangolin.csv`: Lineage analysis results from Pangolin. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -237,7 +237,7 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi Output files * `/nextclade/` - * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. + * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -251,7 +251,7 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi Output files * `/asciigenome//` - * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. + * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -260,7 +260,7 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) is a command-line genome browser that can be run from a terminal window and is solely based on ASCII characters. The closest program to ASCIIGenome is probably [samtools tview](http://www.htslib.org/doc/samtools-tview.html) but ASCIIGenome offers much more flexibility, similar to popular GUI viewers like the [IGV](https://software.broadinstitute.org/software/igv/) browser. We are using the batch processing mode of ASCIIGenome in this pipeline to generate individual screenshots for all of the variant sites reported for each sample in the VCF files. This is incredibly useful to be able to quickly QC the variants called by the pipeline without having to tediously load all of the relevant tracks into a conventional genome browser. Where possible, the BAM read alignments, VCF variant file, primer BED file and GFF annotation track will be represented in the screenshot for contextual purposes. The screenshot below shows a SNP called relative to the MN908947.3 SARS-CoV-2 reference genome that overlaps the ORF7a protein and the nCoV-2019_91_LEFT primer from the ARIC v3 protocol.

- ASCIIGenome screenshot + ASCIIGenome screenshot

## Nanopore: Workflow reporting @@ -270,10 +270,10 @@ As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs
Output files -* `multiqc//` - * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - * `summary_variants_metrics_mqc.csv`: file containing a selection of read alignmnet and variant calling metrics. The same metrics will also be added to the top of the MultiQC report. +* `multiqc//` + * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + * `summary_variants_metrics_mqc.csv`: file containing a selection of read alignmnet and variant calling metrics. The same metrics will also be added to the top of the MultiQC report.
@@ -290,34 +290,34 @@ An example MultiQC report generated from a full-sized dataset can be viewed on t # Illumina: Pipeline overview * [Preprocessing](#illumina-preprocessing) - * [cat](#cat) - Merge re-sequenced FastQ files - * [FastQC](#fastqc) - Raw read QC - * [fastp](#fastp) - Adapter and quality trimming - * [Kraken 2](#kraken-2) - Removal/QC for host reads + * [cat](#cat) - Merge re-sequenced FastQ files + * [FastQC](#fastqc) - Raw read QC + * [fastp](#fastp) - Adapter and quality trimming + * [Kraken 2](#kraken-2) - Removal/QC for host reads * [Variant calling](#illumina-variant-calling) - * [Bowtie 2](#bowtie-2) - Read alignment relative to reference genome - * [SAMtools](#samtools) - Sort, index and generate metrics for alignments - * [iVar trim](#ivar-trim) - Primer sequence removal for amplicon data - * [picard MarkDuplicates](#picard-markduplicates) - Duplicate read marking and removal - * [picard CollectMultipleMetrics](#picard-collectmultiplemetrics) - Alignment metrics - * [mosdepth](#mosdepth) - Whole-genome and amplicon coverage metrics - * [iVar variants and iVar consensus](#ivar-variants-and-ivar-consensus) *||* [BCFTools and BEDTools](#bcftools-and-bedtools) - Variant calling and consensus sequence generation - * [SnpEff and SnpSift](#snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction - * [QUAST](#quast) - Consensus assessment report - * [Pangolin](#pangolin) - Lineage analysis - * [Nextclade](#nextclade) - Clade assignment, mutation calling and sequence quality checks - * [ASCIIGenome](#asciigenome) - Individual variant screenshots with annotation tracks - * [BCFTools isec](#bcftools-isec) - Intersect variants across all callers + * [Bowtie 2](#bowtie-2) - Read alignment relative to reference genome + * [SAMtools](#samtools) - Sort, index and generate metrics for alignments + * [iVar trim](#ivar-trim) - Primer sequence removal for amplicon data + * [picard MarkDuplicates](#picard-markduplicates) - Duplicate read marking and removal + * [picard CollectMultipleMetrics](#picard-collectmultiplemetrics) - Alignment metrics + * [mosdepth](#mosdepth) - Whole-genome and amplicon coverage metrics + * [iVar variants and iVar consensus](#ivar-variants-and-ivar-consensus) *||* [BCFTools and BEDTools](#bcftools-and-bedtools) - Variant calling and consensus sequence generation + * [SnpEff and SnpSift](#snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction + * [QUAST](#quast) - Consensus assessment report + * [Pangolin](#pangolin) - Lineage analysis + * [Nextclade](#nextclade) - Clade assignment, mutation calling and sequence quality checks + * [ASCIIGenome](#asciigenome) - Individual variant screenshots with annotation tracks + * [BCFTools isec](#bcftools-isec) - Intersect variants across all callers * [De novo assembly](#illumina-de-novo-assembly) - * [Cutadapt](#cutadapt) - Primer trimming for amplicon data - * [SPAdes](#spades) *||* [Unicycler](#unicycler) *||* [minia](#minia) - Viral genome assembly - * [BLAST](#blast) - Blast to reference assembly - * [ABACAS](#abacas) - Order contigs according to reference genome - * [PlasmidID](#plasmidid) - Assembly report and visualisation - * [Assembly QUAST](#assembly-quast) - Assembly quality assessment + * [Cutadapt](#cutadapt) - Primer trimming for amplicon data + * [SPAdes](#spades) *||* [Unicycler](#unicycler) *||* [minia](#minia) - Viral genome assembly + * [BLAST](#blast) - Blast to reference assembly + * [ABACAS](#abacas) - Order contigs according to reference genome + * [PlasmidID](#plasmidid) - Assembly report and visualisation + * [Assembly QUAST](#assembly-quast) - Assembly quality assessment * [Workflow reporting and genomes](#illumina-workflow-reporting-and-genomes) - * [MultiQC](#multiqc) - Present QC for raw reads, alignment, assembly and variant calling - * [Reference genome files](#reference-genome-files) - Save reference genome indices/files + * [MultiQC](#multiqc) - Present QC for raw reads, alignment, assembly and variant calling + * [Reference genome files](#reference-genome-files) - Save reference genome indices/files ## Illumina: Preprocessing @@ -327,7 +327,7 @@ An example MultiQC report generated from a full-sized dataset can be viewed on t Output files * `fastq/` - * `*.merged.fastq.gz`: These files are not saved by default but can be via a custom config file such as the one below. + * `*.merged.fastq.gz`: These files are not saved by default but can be via a custom config file such as the one below. ```nextflow params { @@ -349,8 +349,8 @@ If multiple libraries/runs have been provided for the same sample in the input s Output files * `fastqc/raw/` - * `*_fastqc.html`: FastQC report containing quality metrics. - * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + * `*_fastqc.html`: FastQC report containing quality metrics. + * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. **NB:** The FastQC plots in this directory are generated relative to the raw, input reads. They may contain adapter sequence and regions of low quality. To see how your reads look after trimming please refer to the FastQC reports in the `fastqc/trim/` directory. @@ -366,13 +366,13 @@ If multiple libraries/runs have been provided for the same sample in the input s Output files * `fastp/` - * `*.fastp.html`: Trimming report in html format. - * `*.fastp.json`: Trimming report in json format. + * `*.fastp.html`: Trimming report in html format. + * `*.fastp.json`: Trimming report in json format. * `fastp/log/` - * `*.fastp.log`: Trimming log file. + * `*.fastp.log`: Trimming log file. * `fastqc/trim/` - * `*_fastqc.html`: FastQC report of the trimmed reads. - * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + * `*_fastqc.html`: FastQC report of the trimmed reads. + * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images.
@@ -386,7 +386,7 @@ If multiple libraries/runs have been provided for the same sample in the input s Output files * `kraken2/` - * `*.kraken2.report.txt`: Kraken 2 taxonomic report. See [here](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual#sample-report-output-format) for a detailed description of the format. + * `*.kraken2.report.txt`: Kraken 2 taxonomic report. See [here](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual#sample-report-output-format) for a detailed description of the format. @@ -406,7 +406,7 @@ A file called `summary_variants_metrics_mqc.csv` containing a selection of read Output files * `variants/bowtie2/log/` - * `*.bowtie2.log`: Bowtie 2 mapping log file. + * `*.bowtie2.log`: Bowtie 2 mapping log file. @@ -420,10 +420,10 @@ A file called `summary_variants_metrics_mqc.csv` containing a selection of read Output files * `variants/bowtie2/` - * `.sorted.bam`: Coordinate sorted BAM file containing read alignment information. - * `.sorted.bam.bai`: Index file for coordinate sorted BAM file. + * `.sorted.bam`: Coordinate sorted BAM file containing read alignment information. + * `.sorted.bam.bai`: Index file for coordinate sorted BAM file. * `variants/bowtie2/samtools_stats/` - * SAMtools `.sorted.bam.flagstat`, `.sorted.bam.idxstats` and `.sorted.bam.stats` files generated from the alignment files. + * SAMtools `.sorted.bam.flagstat`, `.sorted.bam.idxstats` and `.sorted.bam.stats` files generated from the alignment files. @@ -437,12 +437,12 @@ Bowtie 2 BAM files are further processed with [SAMtools](http://samtools.sourcef Output files * `variants/bowtie2/` - * `*.ivar_trim.sorted.bam`: Coordinate sorted BAM file after primer trimming. - * `*.ivar_trim.sorted.bam.bai`: Index file for coordinate sorted BAM file after primer trimming. + * `*.ivar_trim.sorted.bam`: Coordinate sorted BAM file after primer trimming. + * `*.ivar_trim.sorted.bam.bai`: Index file for coordinate sorted BAM file after primer trimming. * `variants/bowtie2/samtools_stats/` - * SAMtools `*.ivar_trim.sorted.bam.flagstat`, `*.ivar_trim.sorted.bam.idxstats` and `*.ivar_trim.sorted.bam.stats` files generated from the primer trimmed alignment files. + * SAMtools `*.ivar_trim.sorted.bam.flagstat`, `*.ivar_trim.sorted.bam.idxstats` and `*.ivar_trim.sorted.bam.stats` files generated from the primer trimmed alignment files. * `variants/bowtie2/log/` - * `*.ivar_trim.ivar.log`: iVar trim log file obtained from stdout. + * `*.ivar_trim.ivar.log`: iVar trim log file obtained from stdout. @@ -454,12 +454,12 @@ If the `--protocol amplicon` parameter is provided then [iVar](http://gensoft.pa Output files * `variants/bowtie2/` - * `*.markduplicates.sorted.bam`: Coordinate sorted BAM file after duplicate marking. - * `*.markduplicates.sorted.bam.bai`: Index file for coordinate sorted BAM file after duplicate marking. + * `*.markduplicates.sorted.bam`: Coordinate sorted BAM file after duplicate marking. + * `*.markduplicates.sorted.bam.bai`: Index file for coordinate sorted BAM file after duplicate marking. * `variants/bowtie2/samtools_stats/` - * SAMtools `*.markduplicates.sorted.bam.flagstat`, `*.markduplicates.sorted.bam.idxstats` and `*.markduplicates.sorted.bam.stats` files generated from the duplicate marked alignment files. + * SAMtools `*.markduplicates.sorted.bam.flagstat`, `*.markduplicates.sorted.bam.idxstats` and `*.markduplicates.sorted.bam.stats` files generated from the duplicate marked alignment files. * `variants/bowtie2/picard_metrics/` - * `*.markduplicates.sorted.MarkDuplicates.metrics.txt`: Metrics file from MarkDuplicates. + * `*.markduplicates.sorted.MarkDuplicates.metrics.txt`: Metrics file from MarkDuplicates. @@ -472,10 +472,10 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-
Output files -* `variants/bowtie2/picard_metrics/` - * `*.CollectMultipleMetrics.*`: Alignment QC files from picard CollectMultipleMetrics in `*_metrics` textual format. -* `variants/bowtie2/picard_metrics/pdf/` - * `*.pdf` plots for metrics obtained from CollectMultipleMetrics. +* `variants/bowtie2/picard_metrics/` + * `*.CollectMultipleMetrics.*`: Alignment QC files from picard CollectMultipleMetrics in `*_metrics` textual format. +* `variants/bowtie2/picard_metrics/pdf/` + * `*.pdf` plots for metrics obtained from CollectMultipleMetrics.
@@ -489,16 +489,16 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- Output files * `variants/bowtie2/mosdepth/genome/` - * `all_samples.mosdepth.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. - * `*.mosdepth.coverage.pdf`: Whole-genome coverage plot. - * `*.mosdepth.coverage.tsv`: File containing coverage values for the above plot. - * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. + * `all_samples.mosdepth.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. + * `*.mosdepth.coverage.pdf`: Whole-genome coverage plot. + * `*.mosdepth.coverage.tsv`: File containing coverage values for the above plot. + * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. * `variants/bowtie2/mosdepth/amplicon/` - * `all_samples.mosdepth.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. - * `all_samples.mosdepth.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. - * `*.mosdepth.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. - * `*.mosdepth.coverage.tsv`: File containing per-amplicon coverage values for the above plot. - * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. + * `all_samples.mosdepth.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. + * `all_samples.mosdepth.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. + * `*.mosdepth.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. + * `*.mosdepth.coverage.tsv`: File containing per-amplicon coverage values for the above plot. + * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. @@ -509,7 +509,7 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- ![R - Sample genome-wide coverage plot](images/r_genome_coverage.png)

- R - Sample per-amplicon coverage plot + R - Sample per-amplicon coverage plot

### iVar variants and iVar consensus @@ -518,22 +518,22 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- Output files * `variants/ivar/` - * `*.tsv`: Original iVar variants in TSV format. - * `*.vcf.gz`: iVar variants in VCF format. - * `*.vcf.gz.tbi`: iVar variants in VCF index file. + * `*.tsv`: Original iVar variants in TSV format. + * `*.vcf.gz`: iVar variants in VCF format. + * `*.vcf.gz.tbi`: iVar variants in VCF index file. * `variants/ivar/consensus/` - * `*.consensus.fa`: Consensus Fasta file generated by iVar. - * `*.consensus.qual.txt`: File with the average quality of each base in the consensus sequence. + * `*.consensus.fa`: Consensus Fasta file generated by iVar. + * `*.consensus.qual.txt`: File with the average quality of each base in the consensus sequence. * `variants/ivar/consensus/base_qc/` - * `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. - * `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. - * `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. - * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. - * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. + * `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. + * `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. + * `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. + * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. + * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. * `variants/ivar/log/` - * `*.variant_counts.log`: Counts for type of variants called by iVar. + * `*.variant_counts.log`: Counts for type of variants called by iVar. * `variants/ivar/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from iVar variants VCF file. + * `*.bcftools_stats.txt`: Statistics and counts obtained from iVar variants VCF file. @@ -547,18 +547,18 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- Output files * `variants/bcftools/` - * `*.vcf.gz`: Variants VCF file. - * `*.vcf.gz.tbi`: Variants VCF index file. + * `*.vcf.gz`: Variants VCF file. + * `*.vcf.gz.tbi`: Variants VCF index file. * `variants/bcftools/consensus/` - * `*.consensus.fa`: Consensus Fasta file generated by integrating the variants called by BCFTools into the reference genome. + * `*.consensus.fa`: Consensus Fasta file generated by integrating the variants called by BCFTools into the reference genome. * `variants/bcftools/consensus/base_qc/` - * `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. - * `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. - * `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. - * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. - * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. + * `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. + * `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. + * `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. + * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. + * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. * `variants/bcftools/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. + * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. @@ -576,14 +576,14 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- Output files * `variants//snpeff/` - * `*.snpeff.csv`: Variant annotation csv file. - * `*.snpeff.genes.txt`: Gene table for annotated variants. - * `*.snpeff.summary.html`: Summary html file for variants. - * `*.snpeff.vcf.gz`: VCF file with variant annotations. - * `*.snpeff.vcf.gz.tbi`: Index for VCF file with variant annotations. - * `*.snpsift.txt`: SnpSift summary table. + * `*.snpeff.csv`: Variant annotation csv file. + * `*.snpeff.genes.txt`: Gene table for annotated variants. + * `*.snpeff.summary.html`: Summary html file for variants. + * `*.snpeff.vcf.gz`: VCF file with variant annotations. + * `*.snpeff.vcf.gz.tbi`: Index for VCF file with variant annotations. + * `*.snpsift.txt`: SnpSift summary table. * `variants//snpeff/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. + * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. **NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). @@ -601,7 +601,7 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- Output files * `variants//quast/` - * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. + * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. **NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). @@ -615,7 +615,7 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- Output files * `variants//pangolin/` - * `*.pangolin.csv`: Lineage analysis results from Pangolin. + * `*.pangolin.csv`: Lineage analysis results from Pangolin. **NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). @@ -629,7 +629,7 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi Output files * `variants//nextclade/` - * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. + * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. **NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). @@ -643,7 +643,7 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi Output files * `variants//asciigenome//` - * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. + * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. **NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). @@ -652,7 +652,7 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) is a command-line genome browser that can be run from a terminal window and is solely based on ASCII characters. The closest program to ASCIIGenome is probably [samtools tview](http://www.htslib.org/doc/samtools-tview.html) but ASCIIGenome offers much more flexibility, similar to popular GUI viewers like the [IGV](https://software.broadinstitute.org/software/igv/) browser. We are using the batch processing mode of ASCIIGenome in this pipeline to generate individual screenshots for all of the variant sites reported for each sample in the VCF files. This is incredibly useful to be able to quickly QC the variants called by the pipeline without having to tediously load all of the relevant tracks into a conventional genome browser. Where possible, the BAM read alignments, VCF variant file, primer BED file and GFF annotation track will be represented in the screenshot for contextual purposes. The screenshot below shows a SNP called relative to the MN908947.3 SARS-CoV-2 reference genome that overlaps the ORF7a protein and the nCoV-2019_91_LEFT primer from the ARIC v3 protocol.

- ASCIIGenome screenshot + ASCIIGenome screenshot

### BCFTools isec @@ -661,10 +661,10 @@ As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs Output files * `variants/intersect//` - * `*.vcf.gz`: VCF file containing variants common to both variant callers. There will be one file for each caller - see `README.txt` for details. - * `*.vcf.gz.tbi`: Index for VCF file. - * `README.txt`: File containing command used and file name mappings. - * `sites.txt`: List of variants common to both variant callers in textual format. The last column indicates presence (1) or absence (0) amongst the 2 different callers. + * `*.vcf.gz`: VCF file containing variants common to both variant callers. There will be one file for each caller - see `README.txt` for details. + * `*.vcf.gz.tbi`: Index for VCF file. + * `README.txt`: File containing command used and file name mappings. + * `sites.txt`: List of variants common to both variant callers in textual format. The last column indicates presence (1) or absence (0) amongst the 2 different callers. **NB:** This process will only be executed when both variant callers are specified to be run i.e. `--callers ivar,bcftools`. @@ -682,10 +682,10 @@ A file called `summary_assembly_metrics_mqc.csv` containing a selection of read Output files * `assembly/cutadapt/log/` - * `*.cutadapt.log`: Cutadapt log file generated from stdout. + * `*.cutadapt.log`: Cutadapt log file generated from stdout. * `assembly/cutadapt/fastqc/` - * `*_fastqc.html`: FastQC report of the trimmed reads. - * `*_fastqc.zip`: Zip archive containing the FastQC report. + * `*_fastqc.html`: FastQC report of the trimmed reads. + * `*_fastqc.zip`: Zip archive containing the FastQC report. @@ -699,12 +699,12 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri Output files * `assembly/spades//` - * `*.scaffolds.fa`: SPAdes scaffold assembly. - * `*.contigs.fa`: SPAdes assembly contigs. - * `*.assembly.gfa`: SPAdes assembly graph in [GFA](https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md) format. + * `*.scaffolds.fa`: SPAdes scaffold assembly. + * `*.contigs.fa`: SPAdes assembly contigs. + * `*.assembly.gfa`: SPAdes assembly graph in [GFA](https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md) format. * `assembly/spades//bandage/` - * `*.png`: Bandage visualisation for SPAdes assembly graph in PNG format. - * `*.svg`: Bandage visualisation for SPAdes assembly graph in SVG format. + * `*.png`: Bandage visualisation for SPAdes assembly graph in PNG format. + * `*.svg`: Bandage visualisation for SPAdes assembly graph in SVG format. **NB:** The value of `` in the output directory name above is determined by the `--spades_mode` parameter (Default: 'rnaviral'). @@ -720,11 +720,11 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri Output files * `assembly/unicycler/` - * `*.scaffolds.fa`: Unicycler scaffold assembly. - * `*.assembly.gfa`: Unicycler assembly graph in GFA format. + * `*.scaffolds.fa`: Unicycler scaffold assembly. + * `*.assembly.gfa`: Unicycler assembly graph in GFA format. * `assembly/unicycler/bandage/` - * `*.png`: Bandage visualisation for Unicycler assembly graph in PNG format. - * `*.svg`: Bandage visualisation for Unicycler assembly graph in SVG format. + * `*.png`: Bandage visualisation for Unicycler assembly graph in PNG format. + * `*.svg`: Bandage visualisation for Unicycler assembly graph in SVG format. @@ -736,9 +736,9 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri Output files * `assembly/minia/` - * `*.contigs.fa`: Minia scaffold assembly. - * `*.unitigs.fa`: Minia unitigs fasta file. - * `*.h5`: Minia h5 output file. + * `*.contigs.fa`: Minia scaffold assembly. + * `*.unitigs.fa`: Minia unitigs fasta file. + * `*.h5`: Minia h5 output file. @@ -750,8 +750,8 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri Output files * `assembly//blastn/` - * `*.blastn.txt`: BLAST results against the target virus. - * `*.filter.blastn.txt`: Filtered BLAST results. + * `*.blastn.txt`: BLAST results against the target virus. + * `*.filter.blastn.txt`: Filtered BLAST results. **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). @@ -765,14 +765,14 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri Output files * `assembly//abacas/` - * `*.abacas.bin`: Bin file that contains contigs that are not used in ordering. - * `*.abacas.crunch`: Comparison file. - * `*.abacas.fasta`: Ordered and orientated sequence file. - * `*.abacas.gaps`: Gap information. - * `*.abacas.gaps.tab`: Gap information in tab-delimited format. - * `*.abacas.MULTIFASTA.fa`: A list of ordered and orientated contigs in a multi-fasta format. - * `*.abacas.tab`: Feature file - * `*.unused_contigs.out`: Information on contigs that have a mapping information but could not be used in the ordering. + * `*.abacas.bin`: Bin file that contains contigs that are not used in ordering. + * `*.abacas.crunch`: Comparison file. + * `*.abacas.fasta`: Ordered and orientated sequence file. + * `*.abacas.gaps`: Gap information. + * `*.abacas.gaps.tab`: Gap information in tab-delimited format. + * `*.abacas.MULTIFASTA.fa`: A list of ordered and orientated contigs in a multi-fasta format. + * `*.abacas.tab`: Feature file + * `*.unused_contigs.out`: Information on contigs that have a mapping information but could not be used in the ordering. * `assembly//abacas/nucmer/`: Folder containing the files generated by the NUCmer algorithm used by ABACAS. **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). @@ -787,10 +787,10 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri Output files * `assembly//plasmidid//` - * `*_final_results.html`: Summary file with reference coverage stats and contigs for visualization. - * `*_final_results.tab`: Summary file with reference coverage stats and contigs. - * `images/_.png`: PNG file with the visualization of the alignment between the viral assembly and the reference viral genome. - * `logs/`: Log files. + * `*_final_results.html`: Summary file with reference coverage stats and contigs for visualization. + * `*_final_results.tab`: Summary file with reference coverage stats and contigs. + * `images/_.png`: PNG file with the visualization of the alignment between the viral assembly and the reference viral genome. + * `logs/`: Log files. **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). @@ -804,7 +804,7 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri Output files * `assembly//quast/` - * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. + * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). @@ -821,11 +821,11 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri
Output files -* `multiqc/` - * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - * `summary_variants_metrics_mqc.csv`: file containing a selection of read alignment and variant calling metrics. The same metrics will also be added to the top of the MultiQC report. - * `summary_assembly_metrics_mqc.csv`: file containing a selection of read alignment and *de novo* assembly related metrics. The same metrics will also be added to the top of the MultiQC report. +* `multiqc/` + * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + * `summary_variants_metrics_mqc.csv`: file containing a selection of read alignment and variant calling metrics. The same metrics will also be added to the top of the MultiQC report. + * `summary_assembly_metrics_mqc.csv`: file containing a selection of read alignment and *de novo* assembly related metrics. The same metrics will also be added to the top of the MultiQC report.
@@ -844,16 +844,16 @@ An example MultiQC report generated from a full-sized dataset can be viewed on t
Output files -* `genome/` - * Unzipped genome fasta file for viral genome - * Unzipped genome annotation GFF file for viral genome -* `genome/index/` - * `bowtie2/`: Bowtie 2 index for viral genome. -* `genome/db/` - * `blast_db/`: BLAST database for viral genome. - * `kraken2_db/`: Kraken 2 database for host genome. - * `snpeff_db/`: SnpEff database for viral genome. - * `snpeff.config`: SnpEff config file for viral genome. +* `genome/` + * Unzipped genome fasta file for viral genome + * Unzipped genome annotation GFF file for viral genome +* `genome/index/` + * `bowtie2/`: Bowtie 2 index for viral genome. +* `genome/db/` + * `blast_db/`: BLAST database for viral genome. + * `kraken2_db/`: Kraken 2 database for host genome. + * `snpeff_db/`: SnpEff database for viral genome. + * `snpeff.config`: SnpEff config file for viral genome.
@@ -865,10 +865,10 @@ A number of genome-specific files are generated by the pipeline because they are Output files * `pipeline_info/` - * Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - * Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.csv`. - * Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. - * Documentation for interpretation of results in HTML format: `results_description.html`. + * Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + * Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.csv`. + * Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + * Documentation for interpretation of results in HTML format: `results_description.html`. diff --git a/docs/usage.md b/docs/usage.md index d45a2384..411fb267 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -139,20 +139,20 @@ They are loaded in sequence, so later profiles can overwrite earlier profiles. If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. * `docker` - * A generic configuration profile to be used with [Docker](https://docker.com/) + * A generic configuration profile to be used with [Docker](https://docker.com/) * `singularity` - * A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) + * A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) * `podman` - * A generic configuration profile to be used with [Podman](https://podman.io/) + * A generic configuration profile to be used with [Podman](https://podman.io/) * `shifter` - * A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) + * A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) * `charliecloud` - * A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) + * A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) * `conda` - * A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. + * A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. * `test` - * A profile with a complete configuration for automated testing - * Includes links to test data so needs no other parameters + * A profile with a complete configuration for automated testing + * Includes links to test data so needs no other parameters ### `-resume` From 74f9359c558d01a54cf7e38e786d1590d446004f Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 11 May 2021 12:47:16 +0100 Subject: [PATCH 04/17] First pass EC lint fix --- .editorconfig | 4 +- .github/PULL_REQUEST_TEMPLATE.md | 4 +- .github/workflows/ci.yml | 2 +- CITATIONS.md | 59 ++- assets/multiqc_config_illumina.yaml | 4 +- assets/multiqc_config_nanopore.yaml | 2 +- assets/sendmail_template.txt | 36 +- bin/check_samplesheet.py | 2 +- bin/fastq_dir_to_samplesheet.py | 6 +- bin/multiqc_to_custom_csv.py | 2 +- bin/sra_ids_to_runinfo.py | 2 +- bin/sra_runinfo_to_ftp.py | 15 +- conf/base.config | 64 +-- conf/modules.config | 14 +- docs/usage.md | 38 +- lib/NfcoreSchema.groovy | 4 +- lib/NfcoreTemplate.groovy | 8 +- lib/Utils.groovy | 2 +- lib/WorkflowCommons.groovy | 2 +- lib/WorkflowIllumina.groovy | 14 +- lib/WorkflowMain.groovy | 4 +- lib/WorkflowNanopore.groovy | 2 +- main.nf | 10 +- nextflow.config | 422 +++++++++--------- nextflow_schema.json | 2 +- subworkflows/local/assembly_minia.nf | 10 +- subworkflows/local/assembly_qc.nf | 7 +- subworkflows/local/assembly_spades.nf | 18 +- subworkflows/local/assembly_unicycler.nf | 14 +- subworkflows/local/input_check.nf | 6 +- subworkflows/local/make_consensus.nf | 9 +- subworkflows/local/prepare_genome_illumina.nf | 4 +- subworkflows/local/prepare_genome_nanopore.nf | 4 +- subworkflows/local/primer_trim_ivar.nf | 2 +- subworkflows/local/snpeff_snpsift.nf | 4 +- subworkflows/local/variants_bcftools.nf | 14 +- subworkflows/local/variants_ivar.nf | 5 +- subworkflows/nf-core/align_bowtie2.nf | 2 +- subworkflows/nf-core/bam_sort_samtools.nf | 2 +- subworkflows/nf-core/bam_stats_samtools.nf | 2 +- subworkflows/nf-core/fastqc_fastp.nf | 2 +- subworkflows/nf-core/filter_bam_samtools.nf | 2 +- subworkflows/nf-core/vcf_bgzip_tabix_stats.nf | 2 +- subworkflows/nf-core/vcf_tabix_stats.nf | 2 +- workflows/illumina.nf | 50 +-- workflows/nanopore.nf | 70 +-- workflows/sra_download.nf | 10 +- 47 files changed, 479 insertions(+), 486 deletions(-) diff --git a/.editorconfig b/.editorconfig index 2095f8e5..e210ef22 100644 --- a/.editorconfig +++ b/.editorconfig @@ -9,10 +9,10 @@ indent_size = 4 indent_style = space [*.{yml,yaml}] -indent_size = 2 +indent_size = 4 # These files are edited upstream in nf-core/modules -[/modules/**] +[/modules/nf-core/**] charset = unset end_of_line = unset insert_final_newline = unset diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index ba023bde..3541d6da 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,8 +15,8 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/vira - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/viralrecon/tree/master/.github/CONTRIBUTING.md) - - [ ] If necessary, also make a PR on the nf-core/viralrecon _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. + - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/viralrecon/tree/master/.github/CONTRIBUTING.md) + - [ ] If necessary, also make a PR on the nf-core/viralrecon _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint .`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker`). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ada24797..b5e8cbef 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -136,4 +136,4 @@ jobs: - name: Run pipeline with minimal Nanopore data and various options run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_nanopore,docker ${{ matrix.parameters }} \ No newline at end of file + nextflow run ${GITHUB_WORKSPACE} -profile test_nanopore,docker ${{ matrix.parameters }} diff --git a/CITATIONS.md b/CITATIONS.md index f323a64d..b6bcd3e0 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -11,99 +11,98 @@ ## Pipeline tools * [ABACAS](https://www.ncbi.nlm.nih.gov/pubmed/19497936/) - > Assefa S, Keane TM, Otto TD, Newbold C, Berriman M. ABACAS: algorithm-based automatic contiguation of assembled sequences. Bioinformatics. 2009 Aug 1;25(15):1968-9. doi: 10.1093/bioinformatics/btp347. Epub 2009 Jun 3. PubMed PMID: 19497936; PubMed Central PMCID: PMC2712343. + > Assefa S, Keane TM, Otto TD, Newbold C, Berriman M. ABACAS: algorithm-based automatic contiguation of assembled sequences. Bioinformatics. 2009 Aug 1;25(15):1968-9. doi: 10.1093/bioinformatics/btp347. Epub 2009 Jun 3. PubMed PMID: 19497936; PubMed Central PMCID: PMC2712343. * [ASCIIGenome](https://www.ncbi.nlm.nih.gov/pubmed/28119307/) - > Beraldi D. ASCIIGenome: a command line genome browser for console terminals. Bioinformatics. 2017 May 15;33(10):1568-1569. doi: 10.1093/bioinformatics/btx007. PubMed PMID: 28119307; PubMed Central PMCID: PMC5423454. + > Beraldi D. ASCIIGenome: a command line genome browser for console terminals. Bioinformatics. 2017 May 15;33(10):1568-1569. doi: 10.1093/bioinformatics/btx007. PubMed PMID: 28119307; PubMed Central PMCID: PMC5423454. * [ARTIC network](https://github.com/artic-network) * [Bandage](https://www.ncbi.nlm.nih.gov/pubmed/26099265) - > Wick R.R., Schultz M.B., Zobel J. & Holt K.E. Bandage: interactive visualisation of de novo genome assemblies. Bioinformatics, 31(20), 3350-3352. doi: 10.1093/bioinformatics/btv383. PubMed PMID: 26099265; PubMed Central PCMID: PMC4595904. + > Wick R.R., Schultz M.B., Zobel J. & Holt K.E. Bandage: interactive visualisation of de novo genome assemblies. Bioinformatics, 31(20), 3350-3352. doi: 10.1093/bioinformatics/btv383. PubMed PMID: 26099265; PubMed Central PCMID: PMC4595904. * [BCFtools](https://www.ncbi.nlm.nih.gov/pubmed/21903627/) - > Li H. A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data. Bioinformatics. 2011 Nov 1;27(21):2987-93. doi: 10.1093/bioinformatics/btr509. Epub 2011 Sep 8. PubMed PMID: 21903627; PubMed Central PMCID: PMC3198575. + > Li H. A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data. Bioinformatics. 2011 Nov 1;27(21):2987-93. doi: 10.1093/bioinformatics/btr509. Epub 2011 Sep 8. PubMed PMID: 21903627; PubMed Central PMCID: PMC3198575. * [BEDTools](https://www.ncbi.nlm.nih.gov/pubmed/20110278/) - > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. + > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. * [BLAST](https://www.ncbi.nlm.nih.gov/pubmed/20003500/) - > Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL. BLAST+: architecture and applications. BMC Bioinformatics. 2009 Dec 15;10:421. doi: 10.1186/1471-2105-10-421. PubMed PMID: 20003500; PubMed Central PMCID: PMC2803857. + > Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL. BLAST+: architecture and applications. BMC Bioinformatics. 2009 Dec 15;10:421. doi: 10.1186/1471-2105-10-421. PubMed PMID: 20003500; PubMed Central PMCID: PMC2803857. * [Bowtie 2](https://www.ncbi.nlm.nih.gov/pubmed/22388286/) - > Langmead B, Salzberg SL. Fast gapped-read alignment with Bowtie 2. Nat Methods. 2012 Mar 4;9(4):357-9. doi: 10.1038/nmeth.1923. PubMed PMID: 22388286; PubMed Central PMCID: PMC3322381. + > Langmead B, Salzberg SL. Fast gapped-read alignment with Bowtie 2. Nat Methods. 2012 Mar 4;9(4):357-9. doi: 10.1038/nmeth.1923. PubMed PMID: 22388286; PubMed Central PMCID: PMC3322381. * [Cutadapt](http://dx.doi.org/10.14806/ej.17.1.200) - > Marcel, M. Cutadapt removes adapter sequences from high-throughput sequencing reads. EMBnet.journal, [S.l.], v. 17, n. 1, p. pp. 10-12, may 2011. ISSN 2226-6089. doi: 10.14806/ej.17.1.200. + > Marcel, M. Cutadapt removes adapter sequences from high-throughput sequencing reads. EMBnet.journal, [S.l.], v. 17, n. 1, p. pp. 10-12, may 2011. ISSN 2226-6089. doi: 10.14806/ej.17.1.200. * [fastp](https://www.ncbi.nlm.nih.gov/pubmed/30423086/) - > Chen S, Zhou Y, Chen Y, Gu J. fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics. 2018 Sep 1;34(17):i884-i890. doi: 10.1093/bioinformatics/bty560. PubMed PMID: 30423086; PubMed Central PMCID: PMC6129281. + > Chen S, Zhou Y, Chen Y, Gu J. fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics. 2018 Sep 1;34(17):i884-i890. doi: 10.1093/bioinformatics/bty560. PubMed PMID: 30423086; PubMed Central PMCID: PMC6129281. * [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) * [iVar](https://www.ncbi.nlm.nih.gov/pubmed/30621750/) - > Grubaugh ND, Gangavarapu K, Quick J, Matteson NL, De Jesus JG, Main BJ, Tan AL, Paul LM, Brackney DE, Grewal S, Gurfield N, Van Rompay KKA, Isern S, Michael SF, Coffey LL, Loman NJ, Andersen KG. An amplicon-based sequencing framework for accurately measuring intrahost virus diversity using PrimalSeq and iVar. Genome Biol. 2019 Jan 8;20(1):8. doi: 10.1186/s13059-018-1618-7. PubMed PMID: 30621750; PubMed Central PMCID: PMC6325816. + > Grubaugh ND, Gangavarapu K, Quick J, Matteson NL, De Jesus JG, Main BJ, Tan AL, Paul LM, Brackney DE, Grewal S, Gurfield N, Van Rompay KKA, Isern S, Michael SF, Coffey LL, Loman NJ, Andersen KG. An amplicon-based sequencing framework for accurately measuring intrahost virus diversity using PrimalSeq and iVar. Genome Biol. 2019 Jan 8;20(1):8. doi: 10.1186/s13059-018-1618-7. PubMed PMID: 30621750; PubMed Central PMCID: PMC6325816. * [Kraken 2](https://www.ncbi.nlm.nih.gov/pubmed/31779668/) - > Wood DE, Lu J, Langmead B. Improved metagenomic analysis with Kraken 2. Genome Biol. 2019 Nov 28;20(1):257. doi: 10.1186/s13059-019-1891-0. PubMed PMID: 31779668; PubMed Central PMCID: PMC6883579. + > Wood DE, Lu J, Langmead B. Improved metagenomic analysis with Kraken 2. Genome Biol. 2019 Nov 28;20(1):257. doi: 10.1186/s13059-019-1891-0. PubMed PMID: 31779668; PubMed Central PMCID: PMC6883579. * [minia](https://www.ncbi.nlm.nih.gov/pubmed/24040893/) - > Chikhi R, Rizk G. Space-efficient and exact de Bruijn graph representation based on a Bloom filter. Algorithms Mol Biol. 2013 Sep 16;8(1):22. doi: 10.1186/1748-7188-8-22. PubMed PMID: 24040893; PubMed Central PMCID: PMC3848682. + > Chikhi R, Rizk G. Space-efficient and exact de Bruijn graph representation based on a Bloom filter. Algorithms Mol Biol. 2013 Sep 16;8(1):22. doi: 10.1186/1748-7188-8-22. PubMed PMID: 24040893; PubMed Central PMCID: PMC3848682. * [mosdepth](https://www.ncbi.nlm.nih.gov/pubmed/29096012) - > Pedersen BS, Quinlan AR. Mosdepth: Quick Coverage Calculation for Genomes and Exomes. Bioinformatics. 2018 Mar 1;34(5):867-868. doi: 10.1093/bioinformatics/btx699. PMID: 29096012 PMCID: PMC6030888. + > Pedersen BS, Quinlan AR. Mosdepth: Quick Coverage Calculation for Genomes and Exomes. Bioinformatics. 2018 Mar 1;34(5):867-868. doi: 10.1093/bioinformatics/btx699. PMID: 29096012 PMCID: PMC6030888. * [MultiQC](https://www.ncbi.nlm.nih.gov/pubmed/27312411/) - > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. * [NanoPlot](https://pubmed.ncbi.nlm.nih.gov/29547981/) - > De Coster W, D'Hert S, Schultz DT, Cruts M, Van Broeckhoven C. NanoPack: visualizing and processing long-read sequencing data. Bioinformatics. 2018 Aug 1;34(15):2666-2669. doi: 10.1093/bioinformatics/bty149. PubMed PMID: 29547981; PubMed Central PMCID: PMC6061794. + > De Coster W, D'Hert S, Schultz DT, Cruts M, Van Broeckhoven C. NanoPack: visualizing and processing long-read sequencing data. Bioinformatics. 2018 Aug 1;34(15):2666-2669. doi: 10.1093/bioinformatics/bty149. PubMed PMID: 29547981; PubMed Central PMCID: PMC6061794. * [Nextstrain](https://pubmed.ncbi.nlm.nih.gov/29790939/) - > Hadfield J, Megill C, Bell SM, Huddleston J, Potter B, Callender C, Sagulenko P, Bedford T, Neher RA. Nextstrain: real-time tracking of pathogen evolution. Bioinformatics. 2018 Dec 1;34(23):4121-4123. doi: 10.1093/bioinformatics/bty407. PubMed PMID: 29790939; PubMed Central PMCID: PMC6247931. + > Hadfield J, Megill C, Bell SM, Huddleston J, Potter B, Callender C, Sagulenko P, Bedford T, Neher RA. Nextstrain: real-time tracking of pathogen evolution. Bioinformatics. 2018 Dec 1;34(23):4121-4123. doi: 10.1093/bioinformatics/bty407. PubMed PMID: 29790939; PubMed Central PMCID: PMC6247931. * [pangolin](https://github.com/cov-lineages/pangolin) - > Áine O'Toole, Emily Scher, Anthony Underwood, Ben Jackson, Verity Hill, JT McCrone, Chris Ruis, Khali Abu-Dahab, Ben Taylor, Corin Yeats, Louis du Plessis, David Aanensen, Eddie Holmes, Oliver Pybus, Andrew Rambaut. pangolin: lineage assignment in an emerging pandemic as an epidemiological tool. Publication in preparation. + > Áine O'Toole, Emily Scher, Anthony Underwood, Ben Jackson, Verity Hill, JT McCrone, Chris Ruis, Khali Abu-Dahab, Ben Taylor, Corin Yeats, Louis du Plessis, David Aanensen, Eddie Holmes, Oliver Pybus, Andrew Rambaut. pangolin: lineage assignment in an emerging pandemic as an epidemiological tool. Publication in preparation. * [picard-tools](http://broadinstitute.github.io/picard) * [pycoQC](https://doi.org/10.21105/joss.01236) - > Leger A, Leonardi T, (2019). pycoQC, interactive quality control for Oxford Nanopore Sequencing. Journal of Open Source Software, 4(34), 1236. + > Leger A, Leonardi T, (2019). pycoQC, interactive quality control for Oxford Nanopore Sequencing. Journal of Open Source Software, 4(34), 1236. * [QUAST](https://www.ncbi.nlm.nih.gov/pubmed/23422339/) - > Gurevich A, Saveliev V, Vyahhi N, Tesler G. QUAST: quality assessment tool for genome assemblies. Bioinformatics. 2013 Apr 15;29(8):1072-5. doi: 10.1093/bioinformatics/btt086. Epub 2013 Feb 19. PubMed PMID: 23422339; PubMed Central PMCID: PMC3624806. + > Gurevich A, Saveliev V, Vyahhi N, Tesler G. QUAST: quality assessment tool for genome assemblies. Bioinformatics. 2013 Apr 15;29(8):1072-5. doi: 10.1093/bioinformatics/btt086. Epub 2013 Feb 19. PubMed PMID: 23422339; PubMed Central PMCID: PMC3624806. * [R](https://www.R-project.org/) - > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. + > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. * [SAMtools](https://www.ncbi.nlm.nih.gov/pubmed/19505943/) - > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. * [SnpEff](https://www.ncbi.nlm.nih.gov/pubmed/22728672/) - > Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3. Fly (Austin). 2012 Apr-Jun;6(2):80-92. doi: 10.4161/fly.19695. PubMed PMID: 22728672; PubMed Central PMCID: PMC3679285. + > Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3. Fly (Austin). 2012 Apr-Jun;6(2):80-92. doi: 10.4161/fly.19695. PubMed PMID: 22728672; PubMed Central PMCID: PMC3679285. * [SnpSift](https://www.ncbi.nlm.nih.gov/pubmed/22435069/) - > Cingolani P, Patel VM, Coon M, Nguyen T, Land SJ, Ruden DM, Lu X. Using Drosophila melanogaster as a Model for Genotoxic Chemical Mutational Studies with a New Program, SnpSift. Front Genet. 2012 Mar 15;3:35. doi: 10.3389/fgene.2012.00035. eCollection 2012. PubMed PMID: 22435069; PubMed Central PMCID: PMC3304048. + > Cingolani P, Patel VM, Coon M, Nguyen T, Land SJ, Ruden DM, Lu X. Using Drosophila melanogaster as a Model for Genotoxic Chemical Mutational Studies with a New Program, SnpSift. Front Genet. 2012 Mar 15;3:35. doi: 10.3389/fgene.2012.00035. eCollection 2012. PubMed PMID: 22435069; PubMed Central PMCID: PMC3304048. * [SPAdes](https://www.ncbi.nlm.nih.gov/pubmed/24093227/) - > Nurk S, Bankevich A, Antipov D, Gurevich AA, Korobeynikov A, Lapidus A, Prjibelski AD, Pyshkin A, Sirotkin A, Sirotkin Y, Stepanauskas R, Clingenpeel SR, Woyke T, McLean JS, Lasken R, Tesler G, Alekseyev MA, Pevzner PA. Assembling single-cell genomes and mini-metagenomes from chimeric MDA products. J Comput Biol. 2013 Oct;20(10):714-37. doi: 10.1089/cmb.2013.0084. PubMed PMID: 24093227; PubMed Central PMCID: PMC3791033. + > Nurk S, Bankevich A, Antipov D, Gurevich AA, Korobeynikov A, Lapidus A, Prjibelski AD, Pyshkin A, Sirotkin A, Sirotkin Y, Stepanauskas R, Clingenpeel SR, Woyke T, McLean JS, Lasken R, Tesler G, Alekseyev MA, Pevzner PA. Assembling single-cell genomes and mini-metagenomes from chimeric MDA products. J Comput Biol. 2013 Oct;20(10):714-37. doi: 10.1089/cmb.2013.0084. PubMed PMID: 24093227; PubMed Central PMCID: PMC3791033. * [Unicycler](https://www.ncbi.nlm.nih.gov/pubmed/28594827/) - > Wick RR, Judd LM, Gorrie CL, Holt KE. Unicycler: Resolving bacterial genome assemblies from short and long sequencing reads. PLoS Comput Biol. 2017 Jun 8;13(6):e1005595. doi: 10.1371/journal.pcbi.1005595. eCollection 2017 Jun. PubMed PMID: 28594827; PubMed Central PMCID: PMC5481147. + > Wick RR, Judd LM, Gorrie CL, Holt KE. Unicycler: Resolving bacterial genome assemblies from short and long sequencing reads. PLoS Comput Biol. 2017 Jun 8;13(6):e1005595. doi: 10.1371/journal.pcbi.1005595. eCollection 2017 Jun. PubMed PMID: 28594827; PubMed Central PMCID: PMC5481147. ## Software packaging/containerisation tools * [Anaconda](https://anaconda.com) - > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. * [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) - > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. * [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) - > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. * [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) * [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) - > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. - \ No newline at end of file + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/assets/multiqc_config_illumina.yaml b/assets/multiqc_config_illumina.yaml index 3f133b74..a8d57bf9 100644 --- a/assets/multiqc_config_illumina.yaml +++ b/assets/multiqc_config_illumina.yaml @@ -138,7 +138,7 @@ custom_data: section_name: 'Variant calling metrics' description: 'generated by the nf-core/viralrecon pipeline' plot_type: 'table' - headers: + headers: '# Input reads': description: 'Total number of reads in raw fastq file' format: '{:,.0f}' @@ -279,4 +279,4 @@ sp: mosdepth/global_dist: fn: '*.global.dist.txt' cutadapt: - fn: '*.cutadapt.log' \ No newline at end of file + fn: '*.cutadapt.log' diff --git a/assets/multiqc_config_nanopore.yaml b/assets/multiqc_config_nanopore.yaml index 05368a96..3412dcdd 100644 --- a/assets/multiqc_config_nanopore.yaml +++ b/assets/multiqc_config_nanopore.yaml @@ -85,7 +85,7 @@ custom_data: section_name: 'Variant calling metrics' description: 'generated by the nf-core/viralrecon pipeline' plot_type: 'table' - headers: + headers: '# Mapped reads': description: 'Total number of mapped reads relative to the viral genome' format: '{:,.0f}' diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index c903d626..15f1ccd6 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -15,15 +15,15 @@ Content-ID: Content-Disposition: inline; filename="nf-core-viralrecon_logo.png" <% out << new File("$projectDir/assets/nf-core-viralrecon_logo.png"). - bytes. - encodeBase64(). - toString(). - tokenize( '\n' )*. - toList()*. - collate( 76 )*. - collect { it.join() }. - flatten(). - join( '\n' ) %> + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' ) %> <% if (mqcFile){ @@ -37,15 +37,15 @@ Content-ID: Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" ${mqcFileObj. - bytes. - encodeBase64(). - toString(). - tokenize( '\n' )*. - toList()*. - collate( 76 )*. - collect { it.join() }. - flatten(). - join( '\n' )} + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} """ }} %> diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index dea9eeb5..9d433f29 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -188,7 +188,7 @@ def check_nanopore_samplesheet(file_in, file_out): print_error("Barcode entry is not an integer!", 'Line', line) else: barcode = 'barcode%s' % (barcode.zfill(2)) - + ## Create sample mapping dictionary = { sample: barcode } if barcode in sample_mapping_dict.values(): print_error("Samplesheet contains duplicate entries in the 'barcode' column!", "Line", line) diff --git a/bin/fastq_dir_to_samplesheet.py b/bin/fastq_dir_to_samplesheet.py index 93a32edc..200b4e0d 100755 --- a/bin/fastq_dir_to_samplesheet.py +++ b/bin/fastq_dir_to_samplesheet.py @@ -30,7 +30,7 @@ def fastq_dir_to_samplesheet(fastq_dir, samplesheet_file, read1_extension='_R1_0 if sanitise_name: sample = sanitise_name_delimiter.join(os.path.basename(read1_file).split(sanitise_name_delimiter)[:sanitise_name_index]) read_dict[sample] = [read1_file] - + ## Get read 2 files read2_files = glob.glob(os.path.join(fastq_dir, f'*{read2_extension}'), recursive=False) if not single_end and len(read2_files) != 0: @@ -64,8 +64,8 @@ def main(args=None): read1_extension = args.READ1_EXTENSION, read2_extension = args.READ2_EXTENSION, single_end = args.SINGLE_END, - sanitise_name = args.SANITISE_NAME, - sanitise_name_delimiter = args.SANITISE_NAME_DELIMITER, + sanitise_name = args.SANITISE_NAME, + sanitise_name_delimiter = args.SANITISE_NAME_DELIMITER, sanitise_name_index = args.SANITISE_NAME_INDEX ) diff --git a/bin/multiqc_to_custom_csv.py b/bin/multiqc_to_custom_csv.py index 346fd32d..f6f5bfb0 100755 --- a/bin/multiqc_to_custom_csv.py +++ b/bin/multiqc_to_custom_csv.py @@ -208,7 +208,7 @@ def main(args=None): if os.path.exists(yaml_file): metrics_dict = yaml_fields_to_dict(yaml_file=yaml_file, append_dict={}, field_mapping_list=[('# Mapped reads', ['mapped_passed'])], valid_sample_list=[]) sample_list = metrics_dict.keys() - + metrics_dict_to_file( file_field_list = nanopore_variant_files, multiqc_data_dir = args.MULTIQC_DATA_DIR, diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py index 9e993589..002a859a 100755 --- a/bin/sra_ids_to_runinfo.py +++ b/bin/sra_ids_to_runinfo.py @@ -110,7 +110,7 @@ def fetch_sra_runinfo(file_in,file_out,platform_list=[],library_layout_list=[]): prefix = match.group() if prefix in PREFIX_LIST: if not db_id in seen_ids: - + ids = [db_id] ## Resolve/expand these ids against GEO URL if prefix in ['GSE']: diff --git a/bin/sra_runinfo_to_ftp.py b/bin/sra_runinfo_to_ftp.py index 6fd09aeb..8056015f 100755 --- a/bin/sra_runinfo_to_ftp.py +++ b/bin/sra_runinfo_to_ftp.py @@ -48,10 +48,10 @@ def parse_sra_runinfo(file_in): sample_dict['fastq_1'] = fastq_files sample_dict['md5_1'] = fastq_md5 else: - ## In some instances FTP links don't exist for FastQ files + ## In some instances FTP links don't exist for FastQ files ## These have to be downloaded via fastq-dump / fasterq-dump / parallel-fastq-dump via the run id db_id = run_id - + elif library == 'PAIRED': sample_dict = collections.OrderedDict([('fastq_1',''), ('fastq_2',''), ('md5_1',''), ('md5_2',''), ('single_end','false')]) if fastq_files: @@ -62,14 +62,14 @@ def parse_sra_runinfo(file_in): sample_dict['fastq_1'] = fq_files[0] sample_dict['fastq_2'] = fq_files[1] sample_dict['md5_1'] = fq_md5[0] - sample_dict['md5_2'] = fq_md5[1] + sample_dict['md5_2'] = fq_md5[1] else: print("Invalid FastQ files found for database id:'{}'!.".format(run_id)) else: print("Invalid number of FastQ files ({}) found for paired-end database id:'{}'!.".format(len(fq_files), run_id)) else: db_id = run_id - + if sample_dict: sample_dict.update(line_dict) if db_id not in runinfo_dict: @@ -79,20 +79,19 @@ def parse_sra_runinfo(file_in): print("Input run info file contains duplicate rows!\nLine: '{}'".format(line)) else: runinfo_dict[db_id].append(sample_dict) - return runinfo_dict def sra_runinfo_to_ftp(files_in,file_out): samplesheet_dict = {} for file_in in files_in: - runinfo_dict = parse_sra_runinfo(file_in) + runinfo_dict = parse_sra_runinfo(file_in) for db_id in runinfo_dict.keys(): if db_id not in samplesheet_dict: samplesheet_dict[db_id] = runinfo_dict[db_id] else: print("Duplicate sample identifier found!\nID: '{}'".format(db_id)) - + ## Write samplesheet with paths to FastQ files and md5 sums if samplesheet_dict: out_dir = os.path.dirname(file_out) @@ -108,7 +107,7 @@ def sra_runinfo_to_ftp(files_in,file_out): def main(args=None): args = parse_args(args) sra_runinfo_to_ftp([x.strip() for x in args.FILES_IN.split(',')], args.FILE_OUT) - + if __name__ == '__main__': sys.exit(main()) diff --git a/conf/base.config b/conf/base.config index c612725c..72431ecb 100644 --- a/conf/base.config +++ b/conf/base.config @@ -11,38 +11,38 @@ process { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' - // Process-specific resource requirements - withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } - } - withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } - } - withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } - } - withLabel:error_ignore { - errorStrategy = 'ignore' - } - withLabel:error_retry { - errorStrategy = 'retry' - maxRetries = 2 - } + // Process-specific resource requirements + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 20.h * task.attempt, 'time' ) } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } } diff --git a/conf/modules.config b/conf/modules.config index 57915e5f..1d7da9dc 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -11,7 +11,7 @@ * publish_by_meta = Groovy list of keys available in meta map to append as directories to "publish_dir" path * If publish_by_meta = true - Value of ${meta['id']} is appended as a directory to "publish_dir" path * If publish_by_meta = ['id', 'custompath'] - If "id" is in meta map and "custompath" isn't then "${meta['id']}/custompath/" - * is appended as a directory to "publish_dir" path + * is appended as a directory to "publish_dir" path * If publish_by_meta = false / null - No directories are appended to "publish_dir" path * publish_files = Groovy map where key = "file_ext" and value = "directory" to publish results for that file extension * The value of "directory" is appended to the standard "publish_dir" path as defined above. @@ -123,7 +123,7 @@ params { } 'nanopore_snpeff_tabix' { args = '-p vcf -f' - suffix = '.snpeff' + suffix = '.snpeff' publish_dir = "${params.artic_minion_caller}/snpeff" } 'nanopore_snpeff_stats' { @@ -260,7 +260,7 @@ params { publish_dir = 'variants/ivar' } 'illumina_ivar_consensus' { - args = '-t 0.75 -q 20 -m 10 -n N' + args = '-t 0.75 -q 20 -m 10 -n N' args2 = '--count-orphans --no-BAQ --max-depth 0 --min-BQ 0 -aa' suffix = '.consensus' publish_dir = 'variants/ivar/consensus' @@ -279,7 +279,7 @@ params { } 'illumina_ivar_snpeff_tabix' { args = '-p vcf -f' - suffix = '.snpeff' + suffix = '.snpeff' publish_dir = 'variants/ivar/snpeff' } 'illumina_ivar_snpeff_stats' { @@ -329,7 +329,7 @@ params { } 'illumina_bcftools_consensus_maskfasta' { suffix = '.masked' - publish_files = false + publish_files = false publish_dir = 'variants/bcftools' } 'illumina_bcftools_consensus_bcftools' { @@ -345,12 +345,12 @@ params { publish_dir = 'variants/bcftools/snpeff' } 'illumina_bcftools_snpeff_bgzip' { - suffix = '.snpeff' + suffix = '.snpeff' publish_dir = 'variants/bcftools/snpeff' } 'illumina_bcftools_snpeff_tabix' { args = '-p vcf -f' - suffix = '.snpeff' + suffix = '.snpeff' publish_dir = 'variants/bcftools/snpeff' } 'illumina_bcftools_snpeff_stats' { diff --git a/docs/usage.md b/docs/usage.md index 411fb267..7b1657c8 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -177,26 +177,26 @@ For example, if the nf-core/rnaseq pipeline is failing after multiple re-submiss Error executing process > 'RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' Caused by: - Process `RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) + Process `RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) Command executed: - STAR \ - --genomeDir star \ - --readFilesIn WT_REP1_trimmed.fq.gz \ - --runThreadN 2 \ - --outFileNamePrefix WT_REP1. \ - + STAR \ + --genomeDir star \ + --readFilesIn WT_REP1_trimmed.fq.gz \ + --runThreadN 2 \ + --outFileNamePrefix WT_REP1. \ + Command exit status: - 137 + 137 Command output: - (empty) + (empty) Command error: - .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. + .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. Work dir: - /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb + /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` ``` @@ -205,9 +205,9 @@ To bypass this error you would need to find exactly which resources are set by t ```nextflow process { - withName: STAR_ALIGN { - memory = 100.GB - } + withName: STAR_ALIGN { + memory = 100.GB + } } ``` @@ -240,11 +240,11 @@ As you will see in the example below, we have: ```nextflow params { modules { - 'star_align' { - args = "--quantMode TranscriptomeSAM --twopassMode Basic --outSAMtype BAM Unsorted --readFilesCommand zcat --runRNGseed 0 --outFilterMultimapNmax 20 --alignSJDBoverhangMin 1 --outSAMattributes NH HI AS NM MD --quantTranscriptomeBan Singleend --outFilterMismatchNmax 16" - publish_dir = "my_star_directory" - publish_files = ['out':'log', 'tab':'log', 'bam':''] - } + 'star_align' { + args = "--quantMode TranscriptomeSAM --twopassMode Basic --outSAMtype BAM Unsorted --readFilesCommand zcat --runRNGseed 0 --outFilterMultimapNmax 20 --alignSJDBoverhangMin 1 --outSAMattributes NH HI AS NM MD --quantTranscriptomeBan Singleend --outFilterMismatchNmax 16" + publish_dir = "my_star_directory" + publish_files = ['out':'log', 'tab':'log', 'bam':''] + } } } ``` diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy index 17e102a5..e6f3d824 100755 --- a/lib/NfcoreSchema.groovy +++ b/lib/NfcoreSchema.groovy @@ -12,7 +12,7 @@ import groovy.json.JsonSlurper import groovy.json.JsonBuilder class NfcoreSchema { - + /* * Resolve Schema path relative to main workflow directory */ @@ -319,7 +319,7 @@ class NfcoreSchema { output += NfcoreTemplate.dashedLine(params.monochrome_logs) return output } - + /* * Loop over nested exceptions and print the causingException */ diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 9157c556..3ee255e1 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -57,7 +57,7 @@ class NfcoreTemplate { for (group in summary_params.keySet()) { summary << summary_params[group] } - + def misc_fields = [:] misc_fields['Date Started'] = workflow.start misc_fields['Date Completed'] = workflow.complete @@ -84,7 +84,7 @@ class NfcoreTemplate { email_fields['summary'] = summary << misc_fields email_fields['fail_mapped_reads'] = fail_mapped_reads.keySet() email_fields['min_mapped_reads'] = params.min_mapped_reads - + // On success try attach the multiqc report def mqc_report = null try { @@ -121,7 +121,7 @@ class NfcoreTemplate { def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] def sf = new File("$projectDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) @@ -275,7 +275,7 @@ class NfcoreTemplate { /* * nf-core logo - */ + */ public static String logo(workflow, monochrome_logs) { Map colors = logColours(monochrome_logs) String.format( diff --git a/lib/Utils.groovy b/lib/Utils.groovy index cdbafc31..119855f6 100755 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -30,7 +30,7 @@ class Utils { if (conda_check_failed) { log.warn "=============================================================================\n" + - " There is a problem with your Conda configuration!\n\n" + + " There is a problem with your Conda configuration!\n\n" + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + " NB: The order of the channels matters!\n" + diff --git a/lib/WorkflowCommons.groovy b/lib/WorkflowCommons.groovy index f2635a08..4e7659b4 100755 --- a/lib/WorkflowCommons.groovy +++ b/lib/WorkflowCommons.groovy @@ -67,7 +67,7 @@ class WorkflowCommons { " All of the values in that column do not end with those supplied by:\n" + " --primer_left_suffix : $primer_left_suffix\n" + " --primer_right_suffix: $primer_right_suffix\n\n" + - " This information is required to collapse the primer intervals into amplicons\n" + + " This information is required to collapse the primer intervals into amplicons\n" + " for the coverage plots generated by the pipeline.\n" + "===================================================================================" } diff --git a/lib/WorkflowIllumina.groovy b/lib/WorkflowIllumina.groovy index dad01609..d05f1e0d 100755 --- a/lib/WorkflowIllumina.groovy +++ b/lib/WorkflowIllumina.groovy @@ -18,18 +18,18 @@ class WorkflowIllumina { System.exit(1) } - if (!params.fasta) { + if (!params.fasta) { log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." System.exit(1) } if (!params.skip_kraken2 && !params.kraken2_db) { - if (!params.kraken2_db_name) { + if (!params.kraken2_db_name) { log.error "Please specify a valid name to build Kraken2 database for host e.g. '--kraken2_db_name human'." System.exit(1) } } - + // Variant calling parameter validation def callers = params.callers ? params.callers.split(',').collect{ it.trim().toLowerCase() } : [] if ((valid_params['callers'] + callers).unique().size() != valid_params['callers'].size()) { @@ -67,7 +67,7 @@ class WorkflowIllumina { count++ if (count > 1) { log.warn "=============================================================================\n" + - " This pipeline does not officially support multi-fasta genome files!\n\n" + + " This pipeline does not officially support multi-fasta genome files!\n\n" + " The parameters and processes are tailored for viral genome analysis.\n" + " Please amend the '--fasta' parameter.\n" + "===================================================================================" @@ -88,7 +88,7 @@ class WorkflowIllumina { mapped_reads = line.tokenize().first().toInteger() } } - + def pass = false def logname = flagstat_file.getBaseName() - 'flagstat' if (mapped_reads > params.min_mapped_reads.toInteger()) { @@ -110,8 +110,8 @@ class WorkflowIllumina { count++ if (count > 1) { log.warn "=============================================================================\n" + - " Found '${name_prefix}' in the name field of the primer BED file!\n" + - " This suggests that you have used the SWIFT/SNAP protocol to prep your samples.\n" + + " Found '${name_prefix}' in the name field of the primer BED file!\n" + + " This suggests that you have used the SWIFT/SNAP protocol to prep your samples.\n" + " If so, please set '--ivar_trim_offset 5' as suggested in the issue below:\n" + " https://github.com/nf-core/viralrecon/issues/170\n" + "===================================================================================" diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index bf3ab106..16548934 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -9,7 +9,7 @@ class WorkflowMain { */ public static String citation(workflow) { return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - "* The pipeline\n" + + "* The pipeline\n" + " https://doi.org/10.5281/zenodo.3901628\n\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + @@ -91,7 +91,7 @@ class WorkflowMain { def val = '' def support_link = " The default genome config used by the pipeline can be found here:\n" + " - https://github.com/nf-core/configs/blob/master/conf/pipeline/viralrecon/genomes.config\n\n" + - " If you would still like to blame us please come and find us on nf-core Slack:\n" + + " If you would still like to blame us please come and find us on nf-core Slack:\n" + " - https://nf-co.re/viralrecon#contributions-and-support\n" + "=============================================================================" if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { diff --git a/lib/WorkflowNanopore.groovy b/lib/WorkflowNanopore.groovy index 1fd218f8..dd91367b 100755 --- a/lib/WorkflowNanopore.groovy +++ b/lib/WorkflowNanopore.groovy @@ -11,7 +11,7 @@ class WorkflowNanopore { WorkflowCommons.genomeExistsError(params, log) // Generic parameter validation - if (!params.fasta) { + if (!params.fasta) { log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." System.exit(1) } diff --git a/main.nf b/main.nf index 8606daf7..959f7c5e 100644 --- a/main.nf +++ b/main.nf @@ -13,7 +13,7 @@ nextflow.enable.dsl = 2 /* ======================================================================================== - GENOME PARAMETER VALUES + GENOME PARAMETER VALUES ======================================================================================== */ @@ -38,7 +38,7 @@ params.primer_bed = WorkflowMain.getGenomeAttribute(params, 'primer_bed', log VALIDATE & PRINT PARAMETER SUMMARY ======================================================================================== */ - + WorkflowMain.initialise(workflow, params, log) /* @@ -48,14 +48,14 @@ WorkflowMain.initialise(workflow, params, log) */ workflow NFCORE_VIRALRECON { - + /* * WORKFLOW: Get SRA run information for public database ids, download and md5sum check FastQ files, auto-create samplesheet */ if (params.public_data_ids) { include { SRA_DOWNLOAD } from './workflows/sra_download' SRA_DOWNLOAD () - + /* * WORKFLOW: Variant and de novo assembly analysis for Illumina data */ @@ -65,7 +65,7 @@ workflow NFCORE_VIRALRECON { /* * WORKFLOW: Variant analysis for Nanopore data - */ + */ } else if (params.platform == 'nanopore') { include { NANOPORE } from './workflows/nanopore' NANOPORE () diff --git a/nextflow.config b/nextflow.config index be174ba7..4fff2931 100644 --- a/nextflow.config +++ b/nextflow.config @@ -5,120 +5,119 @@ * Default config options for all environments. */ - // Global default params, used in configs params { - // Input options - input = null - platform = null - protocol = null + // Input options + input = null + platform = null + protocol = null + + // SRA download options + public_data_ids = null + skip_sra_fastq_download = false - // SRA download options - public_data_ids = null - skip_sra_fastq_download = false + // Reference genome options + genome = null + primer_set = null + primer_set_version = null + primer_fasta = null + primer_left_suffix = '_LEFT' + primer_right_suffix = '_RIGHT' + save_reference = false - // Reference genome options - genome = null - primer_set = null - primer_set_version = null - primer_fasta = null - primer_left_suffix = '_LEFT' - primer_right_suffix = '_RIGHT' - save_reference = false + // Nanopore options + fastq_dir = null + fast5_dir = null + sequencing_summary = null + min_barcode_reads = 100 + min_guppyplex_reads = 10 + artic_minion_caller = 'nanopolish' + artic_minion_aligner = 'minimap2' + artic_minion_medaka_model = null + skip_pycoqc = false + skip_nanoplot = false + + // Nanopore/Illumina options + asciigenome_read_depth = 50 + asciigenome_window_size = 50 + multiqc_title = null + multiqc_config = null + max_multiqc_email_size = '25.MB' + skip_mosdepth = false + skip_pangolin = false + skip_nextclade = false + skip_asciigenome = false + skip_variants_quast = false + skip_multiqc = false + + // Illumina QC, read trimming and filtering options + kraken2_db = 's3://nf-core-awsmegatests/viralrecon/input_data/kraken2_human.tar.gz' + kraken2_db_name = 'human' + kraken2_variants_host_filter = false + kraken2_assembly_host_filter = true + save_trimmed_fail = false + skip_fastqc = false + skip_kraken2 = false + skip_fastp = false + skip_cutadapt = false - // Nanopore options - fastq_dir = null - fast5_dir = null - sequencing_summary = null - min_barcode_reads = 100 - min_guppyplex_reads = 10 - artic_minion_caller = 'nanopolish' - artic_minion_aligner = 'minimap2' - artic_minion_medaka_model = null - skip_pycoqc = false - skip_nanoplot = false + // Illumina variant calling options + callers = null + min_mapped_reads = 1000 + ivar_trim_noprimer = false + ivar_trim_offset = null + filter_duplicates = false + save_unaligned = false + save_mpileup = false + skip_ivar_trim = false + skip_markduplicates = true + skip_picard_metrics = false + skip_snpeff = false + skip_consensus = false + skip_variants = false - // Nanopore/Illumina options - asciigenome_read_depth = 50 - asciigenome_window_size = 50 - multiqc_title = null - multiqc_config = null - max_multiqc_email_size = '25.MB' - skip_mosdepth = false - skip_pangolin = false - skip_nextclade = false - skip_asciigenome = false - skip_variants_quast = false - skip_multiqc = false - - // Illumina QC, read trimming and filtering options - kraken2_db = 's3://nf-core-awsmegatests/viralrecon/input_data/kraken2_human.tar.gz' - kraken2_db_name = 'human' - kraken2_variants_host_filter = false - kraken2_assembly_host_filter = true - save_trimmed_fail = false - skip_fastqc = false - skip_kraken2 = false - skip_fastp = false - skip_cutadapt = false - - // Illumina variant calling options - callers = null - min_mapped_reads = 1000 - ivar_trim_noprimer = false - ivar_trim_offset = null - filter_duplicates = false - save_unaligned = false - save_mpileup = false - skip_ivar_trim = false - skip_markduplicates = true - skip_picard_metrics = false - skip_snpeff = false - skip_consensus = false - skip_variants = false + // Illumina de novo assembly options + assemblers = 'spades' + spades_mode = 'rnaviral' + spades_hmm = null + blast_db = null + skip_bandage = false + skip_blast = false + skip_abacas = false + skip_plasmidid = false + skip_assembly_quast = false + skip_assembly = false - // Illumina de novo assembly options - assemblers = 'spades' - spades_mode = 'rnaviral' - spades_hmm = null - blast_db = null - skip_bandage = false - skip_blast = false - skip_abacas = false - skip_plasmidid = false - skip_assembly_quast = false - skip_assembly = false + // Boilerplate options + outdir = './results' + tracedir = "${params.outdir}/pipeline_info" + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + help = false + enable_conda = false + singularity_pull_docker_container = false + validate_params = true + show_hidden_params = false + schema_ignore_params = 'genomes,modules,igenomes_base' - // Boilerplate options - outdir = './results' - tracedir = "${params.outdir}/pipeline_info" - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - help = false - enable_conda = false - singularity_pull_docker_container = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes,modules,igenomes_base' - - // Config options - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - hostnames = [:] - config_profile_name = null - config_profile_description = null - config_profile_contact = null - config_profile_url = null + // Config options + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + hostnames = [:] + config_profile_name = null + config_profile_description = null + config_profile_contact = null + config_profile_url = null - // Max resource options - // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' + // Max resource options + // Defaults only, expecting to be overwritten + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' } @@ -130,80 +129,79 @@ includeConfig 'conf/modules.config' // Load nf-core custom profiles from different Institutions try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" + includeConfig "${params.custom_config_base}/nfcore_custom.config" } catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") } // Load nf-core/viralrecon custom config. // The default 'genomes.config' used by the pipeline can be found here and is auto-loaded via the pipeline config: // https://github.com/nf-core/configs/blob/master/conf/pipeline/viralrecon/genomes.config try { - includeConfig "${params.custom_config_base}/pipeline/viralrecon.config" + includeConfig "${params.custom_config_base}/pipeline/viralrecon.config" } catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config/viralrecon profiles: ${params.custom_config_base}/pipeline/viralrecon.config") + System.err.println("WARNING: Could not load nf-core/config/viralrecon profiles: ${params.custom_config_base}/pipeline/viralrecon.config") } profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } - conda { - params.enable_conda = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - - } - docker { - docker.enabled = true - // Avoid this error: - // WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap. - // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351 - // once this is established and works well, nextflow might implement this behavior as new default. - docker.runOptions = '-u \$(id -u):\$(id -g)' - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } - test { includeConfig 'conf/test.config' } - test_sra { includeConfig 'conf/test_sra.config' } - test_sispa { includeConfig 'conf/test_sispa.config' } - test_nanopore { includeConfig 'conf/test_nanopore.config' } - test_full { includeConfig 'conf/test_full.config' } - test_full_illumina { includeConfig 'conf/test_full.config' } - test_full_nanopore { includeConfig 'conf/test_full_nanopore.config' } - test_full_sispa { includeConfig 'conf/test_full_sispa.config' } + debug { process.beforeScript = 'echo $HOSTNAME' } + conda { + params.enable_conda = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + docker { + docker.enabled = true + // Avoid this error: + // WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap. + // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351 + // once this is established and works well, nextflow might implement this behavior as new default. + docker.runOptions = '-u \$(id -u):\$(id -g)' + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } + test { includeConfig 'conf/test.config' } + test_sra { includeConfig 'conf/test_sra.config' } + test_sispa { includeConfig 'conf/test_sispa.config' } + test_nanopore { includeConfig 'conf/test_nanopore.config' } + test_full { includeConfig 'conf/test_full.config' } + test_full_illumina { includeConfig 'conf/test_full.config' } + test_full_nanopore { includeConfig 'conf/test_full_nanopore.config' } + test_full_sispa { includeConfig 'conf/test_full_sispa.config' } } // Increase time available to build Conda environment @@ -211,9 +209,9 @@ conda { createTimeout = "120 min" } // Export these variables to prevent local Python/R libraries from conflicting with those in the container env { - PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" } // Capture exit codes from upstream processes when piping @@ -221,61 +219,61 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { - enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + enabled = true + file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" } report { - enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + enabled = true + file = "${params.tracedir}/execution_report_${trace_timestamp}.html" } trace { - enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + enabled = true + file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" } dag { - enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg" + enabled = true + file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg" } manifest { - name = 'nf-core/viralrecon' - author = 'Sarai Varona and Sara Monzon' - homePage = 'https://github.com/nf-core/viralrecon' - description = 'Assembly and intrahost/low-frequency variant calling for viral samples' - mainScript = 'main.nf' - nextflowVersion = '!>=21.04.0' - version = '2.0' + name = 'nf-core/viralrecon' + author = 'Sarai Varona and Sara Monzon' + homePage = 'https://github.com/nf-core/viralrecon' + description = 'Assembly and intrahost/low-frequency variant calling for viral samples' + mainScript = 'main.nf' + nextflowVersion = '!>=21.04.0' + version = '2.0' } // Function to ensure that resource requirements don't go beyond // a maximum limit def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } } - } } diff --git a/nextflow_schema.json b/nextflow_schema.json index f991d1fb..8474ed57 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -685,4 +685,4 @@ "$ref": "#/definitions/institutional_config_options" } ] -} \ No newline at end of file +} diff --git a/subworkflows/local/assembly_minia.nf b/subworkflows/local/assembly_minia.nf index 8d609b3b..24aee3b1 100644 --- a/subworkflows/local/assembly_minia.nf +++ b/subworkflows/local/assembly_minia.nf @@ -9,7 +9,7 @@ params.abacas_options = [:] params.plasmidid_options = [:] params.quast_options = [:] -include { MINIA } from '../../modules/nf-core/software/minia/main' addParams( options: params.minia_options ) +include { MINIA } from '../../modules/nf-core/software/minia/main' addParams( options: params.minia_options ) include { ASSEMBLY_QC } from './assembly_qc' addParams( blastn_options: params.blastn_options, blastn_filter_options: params.blastn_filter_options, abacas_options: params.abacas_options, plasmidid_options: params.plasmidid_options, quast_options: params.quast_options ) workflow ASSEMBLY_MINIA { @@ -19,7 +19,7 @@ workflow ASSEMBLY_MINIA { gff // channel: /path/to/genome.gff blast_db // channel: /path/to/blast_db/ blast_header // channel: /path/to/blast_header.txt - + main: /* * Assemble reads with minia @@ -38,7 +38,7 @@ workflow ASSEMBLY_MINIA { /* * Downstream assembly steps */ - ASSEMBLY_QC ( + ASSEMBLY_QC ( ch_contigs, fasta, gff, @@ -59,7 +59,7 @@ workflow ASSEMBLY_MINIA { quast_results = ASSEMBLY_QC.out.quast_results // channel: [ val(meta), [ results ] ] quast_tsv = ASSEMBLY_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] quast_version = ASSEMBLY_QC.out.quast_version // path: *.version.txt - + abacas_results = ASSEMBLY_QC.out.abacas_results // channel: [ val(meta), [ results ] ] abacas_version = ASSEMBLY_QC.out.abacas_version // path: *.version.txt @@ -73,4 +73,4 @@ workflow ASSEMBLY_MINIA { plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] plasmidid_version = ASSEMBLY_QC.out.plasmidid_version // path: *.version.txt -} \ No newline at end of file +} diff --git a/subworkflows/local/assembly_qc.nf b/subworkflows/local/assembly_qc.nf index 6085b307..edc5ff03 100644 --- a/subworkflows/local/assembly_qc.nf +++ b/subworkflows/local/assembly_qc.nf @@ -21,7 +21,7 @@ workflow ASSEMBLY_QC { gff // channel: /path/to/genome.gff blast_db // channel: /path/to/blast_db/ blast_header // channel: /path/to/blast_header.txt - + main: /* * Run blastn on assembly scaffolds @@ -64,7 +64,7 @@ workflow ASSEMBLY_QC { /* * Assembly report with PlasmidID - */ + */ ch_plasmidid_html = Channel.empty() ch_plasmidid_tab = Channel.empty() ch_plasmidid_images = Channel.empty() @@ -91,7 +91,7 @@ workflow ASSEMBLY_QC { blast_txt = ch_blast_txt // channel: [ val(meta), [ txt ] ] blast_filter_txt = ch_blast_filter_txt // channel: [ val(meta), [ txt ] ] blast_version = ch_blast_version // path: *.version.txt - + quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] quast_version = ch_quast_version // path: *.version.txt @@ -110,4 +110,3 @@ workflow ASSEMBLY_QC { plasmidid_version = ch_plasmidid_version // path: *.version.txt } - diff --git a/subworkflows/local/assembly_spades.nf b/subworkflows/local/assembly_spades.nf index 6c472527..0560bf05 100644 --- a/subworkflows/local/assembly_spades.nf +++ b/subworkflows/local/assembly_spades.nf @@ -10,8 +10,8 @@ params.abacas_options = [:] params.plasmidid_options = [:] params.quast_options = [:] -include { SPADES } from '../../modules/nf-core/software/spades/main' addParams( options: params.spades_options ) -include { BANDAGE_IMAGE } from '../../modules/nf-core/software/bandage/image/main' addParams( options: params.bandage_options ) +include { SPADES } from '../../modules/nf-core/software/spades/main' addParams( options: params.spades_options ) +include { BANDAGE_IMAGE } from '../../modules/nf-core/software/bandage/image/main' addParams( options: params.bandage_options ) include { ASSEMBLY_QC } from './assembly_qc' addParams( blastn_options: params.blastn_options, blastn_filter_options: params.blastn_filter_options, abacas_options: params.abacas_options, plasmidid_options: params.plasmidid_options, quast_options: params.quast_options ) workflow ASSEMBLY_SPADES { @@ -22,7 +22,7 @@ workflow ASSEMBLY_SPADES { gff // channel: /path/to/genome.gff blast_db // channel: /path/to/blast_db/ blast_header // channel: /path/to/blast_header.txt - + main: /* * Filter for paired-end samples if running metaSPAdes / metaviralSPAdes / metaplasmidSPAdes @@ -33,7 +33,7 @@ workflow ASSEMBLY_SPADES { .filter { meta, fastq -> !meta.single_end } .set { ch_reads } } - + /* * Assemble reads with SPAdes */ @@ -47,13 +47,13 @@ workflow ASSEMBLY_SPADES { .scaffolds .filter { meta, scaffold -> scaffold.size() > 0 } .set { ch_scaffolds } - + SPADES .out .gfa .filter { meta, gfa -> gfa.size() > 0 } .set { ch_gfa } - + /* * Generate assembly visualisation with Bandage */ @@ -70,7 +70,7 @@ workflow ASSEMBLY_SPADES { /* * Downstream assembly steps */ - ASSEMBLY_QC ( + ASSEMBLY_QC ( ch_scaffolds, fasta, gff, @@ -98,7 +98,7 @@ workflow ASSEMBLY_SPADES { quast_results = ASSEMBLY_QC.out.quast_results // channel: [ val(meta), [ results ] ] quast_tsv = ASSEMBLY_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] quast_version = ASSEMBLY_QC.out.quast_version // path: *.version.txt - + abacas_results = ASSEMBLY_QC.out.abacas_results // channel: [ val(meta), [ results ] ] abacas_version = ASSEMBLY_QC.out.abacas_version // path: *.version.txt @@ -111,4 +111,4 @@ workflow ASSEMBLY_SPADES { plasmidid_fasta = ASSEMBLY_QC.out.plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] plasmidid_version = ASSEMBLY_QC.out.plasmidid_version // path: *.version.txt -} \ No newline at end of file +} diff --git a/subworkflows/local/assembly_unicycler.nf b/subworkflows/local/assembly_unicycler.nf index 48920b93..63c047e9 100644 --- a/subworkflows/local/assembly_unicycler.nf +++ b/subworkflows/local/assembly_unicycler.nf @@ -10,8 +10,8 @@ params.abacas_options = [:] params.plasmidid_options = [:] params.quast_options = [:] -include { UNICYCLER } from '../../modules/nf-core/software/unicycler/main' addParams( options: params.unicycler_options ) -include { BANDAGE_IMAGE } from '../../modules/nf-core/software/bandage/image/main' addParams( options: params.bandage_options ) +include { UNICYCLER } from '../../modules/nf-core/software/unicycler/main' addParams( options: params.unicycler_options ) +include { BANDAGE_IMAGE } from '../../modules/nf-core/software/bandage/image/main' addParams( options: params.bandage_options ) include { ASSEMBLY_QC } from './assembly_qc' addParams( blastn_options: params.blastn_options, blastn_filter_options: params.blastn_filter_options, abacas_options: params.abacas_options, plasmidid_options: params.plasmidid_options, quast_options: params.quast_options ) workflow ASSEMBLY_UNICYCLER { @@ -21,7 +21,7 @@ workflow ASSEMBLY_UNICYCLER { gff // channel: /path/to/genome.gff blast_db // channel: /path/to/blast_db/ blast_header // channel: /path/to/blast_header.txt - + main: /* * Assemble reads with Unicycler @@ -36,7 +36,7 @@ workflow ASSEMBLY_UNICYCLER { .scaffolds .filter { meta, scaffold -> scaffold.size() > 0 } .set { ch_scaffolds } - + UNICYCLER .out .gfa @@ -59,7 +59,7 @@ workflow ASSEMBLY_UNICYCLER { /* * Downstream assembly steps */ - ASSEMBLY_QC ( + ASSEMBLY_QC ( ch_scaffolds, fasta, gff, @@ -84,7 +84,7 @@ workflow ASSEMBLY_UNICYCLER { quast_results = ASSEMBLY_QC.out.quast_results // channel: [ val(meta), [ results ] ] quast_tsv = ASSEMBLY_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] quast_version = ASSEMBLY_QC.out.quast_version // path: *.version.txt - + abacas_results = ASSEMBLY_QC.out.abacas_results // channel: [ val(meta), [ results ] ] abacas_version = ASSEMBLY_QC.out.abacas_version // path: *.version.txt @@ -98,4 +98,4 @@ workflow ASSEMBLY_UNICYCLER { plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] plasmidid_version = ASSEMBLY_QC.out.plasmidid_version // path: *.version.txt -} \ No newline at end of file +} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 6acd9c07..46fae8d6 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -10,7 +10,7 @@ workflow INPUT_CHECK { take: samplesheet // file : /path/to/samplesheet.csv platform // string: sequencing platform. Accepted values: 'illumina', 'nanopore' - + main: SAMPLESHEET_CHECK ( samplesheet, platform ) @@ -50,5 +50,5 @@ def create_fastq_channels(LinkedHashMap row) { } array = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] } - return array -} \ No newline at end of file + return array +} diff --git a/subworkflows/local/make_consensus.nf b/subworkflows/local/make_consensus.nf index 8d20972b..ea295481 100644 --- a/subworkflows/local/make_consensus.nf +++ b/subworkflows/local/make_consensus.nf @@ -20,14 +20,14 @@ workflow MAKE_CONSENSUS { take: bam_vcf // channel: [ val(meta), [ bam ], [ vcf ], [ tbi ] ] fasta - + main: BEDTOOLS_GENOMECOV ( bam_vcf.map { meta, bam, vcf, tbi -> [ meta, bam ] } ) - + BEDTOOLS_MERGE ( BEDTOOLS_GENOMECOV.out.bed ) MAKE_BED_MASK ( bam_vcf.map { meta, bam, vcf, tbi -> [ meta, vcf ] }.join( BEDTOOLS_MERGE.out.bed, by: [0] ) ) - + BEDTOOLS_MASKFASTA ( MAKE_BED_MASK.out.bed, fasta ) BCFTOOLS_CONSENSUS ( bam_vcf.map { meta, bam, vcf, tbi -> [ meta, vcf, tbi ] }.join( BEDTOOLS_MASKFASTA.out.fasta, by: [0] ) ) @@ -38,8 +38,7 @@ workflow MAKE_CONSENSUS { fasta = BCFTOOLS_CONSENSUS.out.fasta // channel: [ val(meta), [ fasta ] ] tsv = PLOT_BASE_DENSITY.out.tsv // channel: [ val(meta), [ tsv ] ] pdf = PLOT_BASE_DENSITY.out.pdf // channel: [ val(meta), [ pdf ] ] - bedtools_version = BEDTOOLS_MERGE.out.version // path: *.version.txt + bedtools_version = BEDTOOLS_MERGE.out.version // path: *.version.txt bcftools_version = BCFTOOLS_CONSENSUS.out.version // path: *.version.txt } - diff --git a/subworkflows/local/prepare_genome_illumina.nf b/subworkflows/local/prepare_genome_illumina.nf index ec6c39cc..fd883197 100644 --- a/subworkflows/local/prepare_genome_illumina.nf +++ b/subworkflows/local/prepare_genome_illumina.nf @@ -14,7 +14,7 @@ params.kraken2_build_options = [:] include { GUNZIP as GUNZIP_FASTA - GUNZIP as GUNZIP_GFF + GUNZIP as GUNZIP_GFF GUNZIP as GUNZIP_PRIMER_BED GUNZIP as GUNZIP_PRIMER_FASTA } from '../../modules/nf-core/software/gunzip/main' addParams( options: params.genome_options ) include { UNTAR as UNTAR_BOWTIE2_INDEX } from '../../modules/nf-core/software/untar/main' addParams( options: params.index_options ) @@ -146,7 +146,7 @@ workflow PREPARE_GENOME { ch_snpeff_db = SNPEFF_BUILD.out.db ch_snpeff_config = SNPEFF_BUILD.out.config } - + emit: fasta = ch_fasta // path: genome.fasta gff = ch_gff // path: genome.gff diff --git a/subworkflows/local/prepare_genome_nanopore.nf b/subworkflows/local/prepare_genome_nanopore.nf index a8d12724..a0c040fe 100644 --- a/subworkflows/local/prepare_genome_nanopore.nf +++ b/subworkflows/local/prepare_genome_nanopore.nf @@ -8,7 +8,7 @@ params.snpeff_build_options = [:] include { GUNZIP as GUNZIP_FASTA - GUNZIP as GUNZIP_GFF + GUNZIP as GUNZIP_GFF GUNZIP as GUNZIP_PRIMER_BED } from '../../modules/nf-core/software/gunzip/main' addParams( options: params.genome_options ) include { COLLAPSE_PRIMERS } from '../../modules/local/collapse_primers' addParams( options: params.collapse_primers_options ) include { SNPEFF_BUILD } from '../../modules/local/snpeff_build' addParams( options: params.snpeff_build_options ) @@ -70,7 +70,7 @@ workflow PREPARE_GENOME { ch_snpeff_db = SNPEFF_BUILD.out.db ch_snpeff_config = SNPEFF_BUILD.out.config } - + emit: fasta = ch_fasta // path: genome.fasta gff = ch_gff // path: genome.gff diff --git a/subworkflows/local/primer_trim_ivar.nf b/subworkflows/local/primer_trim_ivar.nf index 16fa5788..fe94be30 100644 --- a/subworkflows/local/primer_trim_ivar.nf +++ b/subworkflows/local/primer_trim_ivar.nf @@ -24,7 +24,7 @@ workflow PRIMER_TRIM_IVAR { * Sort, index BAM file and run samtools stats, flagstat and idxstats */ BAM_SORT_SAMTOOLS ( IVAR_TRIM.out.bam ) - + emit: bam_orig = IVAR_TRIM.out.bam // channel: [ val(meta), bam ] log_out = IVAR_TRIM.out.log // channel: [ val(meta), log ] diff --git a/subworkflows/local/snpeff_snpsift.nf b/subworkflows/local/snpeff_snpsift.nf index df12a8a9..d6095a3c 100644 --- a/subworkflows/local/snpeff_snpsift.nf +++ b/subworkflows/local/snpeff_snpsift.nf @@ -18,14 +18,14 @@ workflow SNPEFF_SNPSIFT { db // path : snpEff database config // path : snpEff config fasta // path : genome.fasta - + main: SNPEFF_ANN ( vcf, db, config, fasta ) VCF_BGZIP_TABIX_STATS ( SNPEFF_ANN.out.vcf ) SNPSIFT_EXTRACTFIELDS ( VCF_BGZIP_TABIX_STATS.out.vcf ) - + emit: csv = SNPEFF_ANN.out.csv // channel: [ val(meta), [ csv ] ] txt = SNPEFF_ANN.out.txt // channel: [ val(meta), [ txt ] ] diff --git a/subworkflows/local/variants_bcftools.nf b/subworkflows/local/variants_bcftools.nf index b6bd0515..7512f184 100644 --- a/subworkflows/local/variants_bcftools.nf +++ b/subworkflows/local/variants_bcftools.nf @@ -19,7 +19,7 @@ params.pangolin_options = [:] params.nextclade_options = [:] params.asciigenome_options = [:] -include { BCFTOOLS_MPILEUP } from '../../modules/nf-core/software/bcftools/mpileup/main' addParams( options: params.bcftools_mpileup_options ) +include { BCFTOOLS_MPILEUP } from '../../modules/nf-core/software/bcftools/mpileup/main' addParams( options: params.bcftools_mpileup_options ) include { QUAST } from '../../modules/nf-core/software/quast/main' addParams( options: params.quast_options ) include { PANGOLIN } from '../../modules/nf-core/software/pangolin/main' addParams( options: params.pangolin_options ) include { NEXTCLADE } from '../../modules/nf-core/software/nextclade/main' addParams( options: params.nextclade_options ) @@ -35,7 +35,7 @@ workflow VARIANTS_BCFTOOLS { bed // channel: /path/to/primers.bed snpeff_db // channel: /path/to/snpeff_db/ snpeff_config // channel: /path/to/snpeff.config - + main: /* * Call variants @@ -62,7 +62,7 @@ workflow VARIANTS_BCFTOOLS { ch_bases_tsv = MAKE_CONSENSUS.out.tsv ch_bases_pdf = MAKE_CONSENSUS.out.pdf ch_bedtools_version = MAKE_CONSENSUS.out.bedtools_version - + if (!params.skip_variants_quast) { QUAST ( ch_consensus.collect{ it[1] }, fasta, gff, true, params.gff ) ch_quast_results = QUAST.out.results @@ -141,12 +141,12 @@ workflow VARIANTS_BCFTOOLS { tbi = BCFTOOLS_MPILEUP.out.tbi // channel: [ val(meta), [ tbi ] ] stats = BCFTOOLS_MPILEUP.out.stats // channel: [ val(meta), [ txt ] ] bcftools_version = BCFTOOLS_MPILEUP.out.version // path: *.version.txt - + consensus = ch_consensus // channel: [ val(meta), [ fasta ] ] bases_tsv = ch_bases_tsv // channel: [ val(meta), [ tsv ] ] bases_pdf = ch_bases_pdf // channel: [ val(meta), [ pdf ] ] - bedtools_version = ch_bedtools_version // path: *.version.txt - + bedtools_version = ch_bedtools_version // path: *.version.txt + quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] quast_version = ch_quast_version // path: *.version.txt @@ -169,4 +169,4 @@ workflow VARIANTS_BCFTOOLS { asciigenome_pdf = ch_asciigenome_pdf // channel: [ val(meta), [ pdf ] ] asciigenome_version = ch_asciigenome_version // path: *.version.txt -} \ No newline at end of file +} diff --git a/subworkflows/local/variants_ivar.nf b/subworkflows/local/variants_ivar.nf index c44ef534..c7675a9d 100644 --- a/subworkflows/local/variants_ivar.nf +++ b/subworkflows/local/variants_ivar.nf @@ -39,7 +39,7 @@ workflow VARIANTS_IVAR { snpeff_db // channel: /path/to/snpeff_db/ snpeff_config // channel: /path/to/snpeff.config ivar_multiqc_header // channel: /path/to/multiqc_header for ivar variants - + main: /* * Call variants @@ -71,7 +71,7 @@ workflow VARIANTS_IVAR { IVAR_CONSENSUS ( bam, fasta ) ch_consensus = IVAR_CONSENSUS.out.fasta ch_consensus_qual = IVAR_CONSENSUS.out.qual - + PLOT_BASE_DENSITY ( ch_consensus ) ch_bases_tsv = PLOT_BASE_DENSITY.out.tsv ch_bases_pdf = PLOT_BASE_DENSITY.out.pdf @@ -191,4 +191,3 @@ workflow VARIANTS_IVAR { asciigenome_pdf = ch_asciigenome_pdf // channel: [ val(meta), [ pdf ] ] asciigenome_version = ch_asciigenome_version // path: *.version.txt } - diff --git a/subworkflows/nf-core/align_bowtie2.nf b/subworkflows/nf-core/align_bowtie2.nf index 8f1022b0..0cbb658c 100644 --- a/subworkflows/nf-core/align_bowtie2.nf +++ b/subworkflows/nf-core/align_bowtie2.nf @@ -12,7 +12,7 @@ workflow ALIGN_BOWTIE2 { take: reads // channel: [ val(meta), [ reads ] ] index // channel: /path/to/bowtie2/index/ - + main: /* * Map reads with BOWTIE2 diff --git a/subworkflows/nf-core/bam_sort_samtools.nf b/subworkflows/nf-core/bam_sort_samtools.nf index d4da7471..42430501 100644 --- a/subworkflows/nf-core/bam_sort_samtools.nf +++ b/subworkflows/nf-core/bam_sort_samtools.nf @@ -11,7 +11,7 @@ include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' workflow BAM_SORT_SAMTOOLS { take: bam // channel: [ val(meta), [ bam ] ] - + main: SAMTOOLS_SORT ( bam ) SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) diff --git a/subworkflows/nf-core/bam_stats_samtools.nf b/subworkflows/nf-core/bam_stats_samtools.nf index fcf7645e..d948ed49 100644 --- a/subworkflows/nf-core/bam_stats_samtools.nf +++ b/subworkflows/nf-core/bam_stats_samtools.nf @@ -11,7 +11,7 @@ include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/software/samtools/flag workflow BAM_STATS_SAMTOOLS { take: bam_bai // channel: [ val(meta), [ bam ], [bai] ] - + main: SAMTOOLS_STATS ( bam_bai ) SAMTOOLS_FLAGSTAT ( bam_bai ) diff --git a/subworkflows/nf-core/fastqc_fastp.nf b/subworkflows/nf-core/fastqc_fastp.nf index 61df2c0f..23c62ec6 100644 --- a/subworkflows/nf-core/fastqc_fastp.nf +++ b/subworkflows/nf-core/fastqc_fastp.nf @@ -13,7 +13,7 @@ include { FASTP } from '../../modules/nf-core/software/fastp/mai workflow FASTQC_FASTP { take: reads // channel: [ val(meta), [ reads ] ] - + main: fastqc_raw_html = Channel.empty() fastqc_raw_zip = Channel.empty() diff --git a/subworkflows/nf-core/filter_bam_samtools.nf b/subworkflows/nf-core/filter_bam_samtools.nf index 26b7b719..aff9495d 100644 --- a/subworkflows/nf-core/filter_bam_samtools.nf +++ b/subworkflows/nf-core/filter_bam_samtools.nf @@ -12,7 +12,7 @@ include { BAM_STATS_SAMTOOLS } from '../nf-core/bam_stats_samtools' workflow FILTER_BAM_SAMTOOLS { take: bam // channel: [ val(meta), [ bam ] ] - + main: /* * Filter BAM using Samtools view diff --git a/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf b/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf index d5587a79..fa6cd5e9 100644 --- a/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf +++ b/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf @@ -12,7 +12,7 @@ include { VCF_TABIX_STATS } from './vcf_tabix_stats' workflow VCF_BGZIP_TABIX_STATS { take: vcf // channel: [ val(meta), [ vcf ] ] - + main: TABIX_BGZIP ( vcf ) VCF_TABIX_STATS ( TABIX_BGZIP.out.gz ) diff --git a/subworkflows/nf-core/vcf_tabix_stats.nf b/subworkflows/nf-core/vcf_tabix_stats.nf index 9a684da6..4430e617 100644 --- a/subworkflows/nf-core/vcf_tabix_stats.nf +++ b/subworkflows/nf-core/vcf_tabix_stats.nf @@ -11,7 +11,7 @@ include { BCFTOOLS_STATS } from '../../modules/nf-core/software/bcftools/stats/m workflow VCF_TABIX_STATS { take: vcf // channel: [ val(meta), [ vcf ] ] - + main: TABIX_TABIX ( vcf ) BCFTOOLS_STATS ( vcf ) diff --git a/workflows/illumina.nf b/workflows/illumina.nf index 294885e2..69a93e09 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -36,7 +36,7 @@ if (!callers) { callers = params.protocol == 'amplicon' ? ['ivar'] : ['bcftools /* ======================================================================================== - CONFIG FILES + CONFIG FILES ======================================================================================== */ @@ -66,7 +66,7 @@ if (!params.skip_variants) { multiqc_options.publish_files.put('variants_metrics_mqc.csv','') } -include { BCFTOOLS_ISEC } from '../modules/local/bcftools_isec' addParams( options: modules['illumina_bcftools_isec'] ) +include { BCFTOOLS_ISEC } from '../modules/local/bcftools_isec' addParams( options: modules['illumina_bcftools_isec'] ) include { CUTADAPT } from '../modules/local/cutadapt' addParams( options: modules['illumina_cutadapt'] ) include { GET_SOFTWARE_VERSIONS } from '../modules/local/get_software_versions' addParams( options: [publish_files: ['csv':'']] ) include { MULTIQC } from '../modules/local/multiqc_illumina' addParams( options: multiqc_options ) @@ -88,7 +88,7 @@ def snpeff_build_options = modules['illumina_snpeff_build'] def makeblastdb_options = modules['illumina_blast_makeblastdb'] def kraken2_build_options = modules['illumina_kraken2_build'] def collapse_primers_options = modules['illumina_collapse_primers_illumina'] -if (!params.save_reference) { +if (!params.save_reference) { bedtools_getfasta_options['publish_files'] = false bowtie2_build_options['publish_files'] = false snpeff_build_options['publish_files'] = false @@ -128,9 +128,9 @@ include { ASSEMBLY_MINIA } from '../subworkflows/local/assembly_minia' /* * MODULE: Installed directly from nf-core/modules */ -include { CAT_FASTQ } from '../modules/nf-core/software/cat/fastq/main' addParams( options: modules['illumina_cat_fastq'] ) +include { CAT_FASTQ } from '../modules/nf-core/software/cat/fastq/main' addParams( options: modules['illumina_cat_fastq'] ) include { FASTQC } from '../modules/nf-core/software/fastqc/main' addParams( options: modules['illumina_cutadapt_fastqc'] ) -include { KRAKEN2_RUN } from '../modules/nf-core/software/kraken2/run/main' addParams( options: modules['illumina_kraken2_run'] ) +include { KRAKEN2_RUN } from '../modules/nf-core/software/kraken2/run/main' addParams( options: modules['illumina_kraken2_run'] ) include { PICARD_COLLECTMULTIPLEMETRICS } from '../modules/nf-core/software/picard/collectmultiplemetrics/main' addParams( options: modules['illumina_picard_collectmultiplemetrics'] ) include { MOSDEPTH as MOSDEPTH_GENOME } from '../modules/nf-core/software/mosdepth/main' addParams( options: modules['illumina_mosdepth_genome'] ) include { MOSDEPTH as MOSDEPTH_AMPLICON } from '../modules/nf-core/software/mosdepth/main' addParams( options: modules['illumina_mosdepth_amplicon'] ) @@ -194,18 +194,18 @@ workflow ILLUMINA { .map { WorkflowIllumina.checkIfSwiftProtocol(it, 'covid19genome', log) } } } - + /* * SUBWORKFLOW: Read in samplesheet, validate and stage input files */ - INPUT_CHECK ( + INPUT_CHECK ( ch_input, params.platform ) .map { meta, fastq -> meta.id = meta.id.split('_')[0..-2].join('_') - [ meta, fastq ] + [ meta, fastq ] } .groupTuple(by: [0]) .branch { @@ -216,16 +216,16 @@ workflow ILLUMINA { return [ meta, fastq.flatten() ] } .set { ch_fastq } - + /* * MODULE: Concatenate FastQ files from same sample if required */ - CAT_FASTQ ( + CAT_FASTQ ( ch_fastq.multiple ) .mix(ch_fastq.single) .set { ch_cat_fastq } - + /* * SUBWORKFLOW: Read QC and trim adapters */ @@ -248,14 +248,14 @@ workflow ILLUMINA { } .set { ch_variants_fastq } } - + /* * MODULE: Run Kraken2 for removal of host reads */ ch_assembly_fastq = ch_variants_fastq ch_kraken2_multiqc = Channel.empty() if (!params.skip_kraken2) { - KRAKEN2_RUN ( + KRAKEN2_RUN ( ch_variants_fastq, PREPARE_GENOME.out.kraken2_db ) @@ -268,9 +268,9 @@ workflow ILLUMINA { if (params.kraken2_assembly_host_filter) { ch_assembly_fastq = KRAKEN2_RUN.out.unclassified - } + } } - + /* * SUBWORKFLOW: Alignment with Bowtie2 */ @@ -321,7 +321,7 @@ workflow ILLUMINA { } .set { ch_pass_fail_mapped } - MULTIQC_CUSTOM_TWOCOL_TSV_FAIL_MAPPED ( + MULTIQC_CUSTOM_TWOCOL_TSV_FAIL_MAPPED ( ch_pass_fail_mapped.fail.collect(), 'Sample', 'Mapped reads', @@ -329,7 +329,7 @@ workflow ILLUMINA { ) .set { ch_fail_mapping_multiqc } } - + /* * SUBWORKFLOW: Trim primer sequences from reads with iVar */ @@ -387,7 +387,7 @@ workflow ILLUMINA { PLOT_MOSDEPTH_REGIONS_GENOME ( MOSDEPTH_GENOME.out.regions_bed.collect { it[1] } ) - + if (params.protocol == 'amplicon') { MOSDEPTH_AMPLICON ( ch_bam.join(ch_bai, by: [0]), @@ -395,7 +395,7 @@ workflow ILLUMINA { 0 ) - PLOT_MOSDEPTH_REGIONS_AMPLICON ( + PLOT_MOSDEPTH_REGIONS_AMPLICON ( MOSDEPTH_AMPLICON.out.regions_bed.collect { it[1] } ) } @@ -448,7 +448,7 @@ workflow ILLUMINA { } .set { ch_ivar_pangolin_multiqc } - MULTIQC_CUSTOM_TWOCOL_TSV_IVAR_PANGOLIN ( + MULTIQC_CUSTOM_TWOCOL_TSV_IVAR_PANGOLIN ( ch_ivar_pangolin_multiqc.collect(), 'Sample', 'Lineage', @@ -456,7 +456,7 @@ workflow ILLUMINA { ) .set { ch_ivar_pangolin_multiqc } } - + /* * SUBWORKFLOW: Call variants with BCFTools */ @@ -500,7 +500,7 @@ workflow ILLUMINA { } .set { ch_bcftools_pangolin_multiqc } - MULTIQC_CUSTOM_TWOCOL_TSV_BCFTOOLS_PANGOLIN ( + MULTIQC_CUSTOM_TWOCOL_TSV_BCFTOOLS_PANGOLIN ( ch_bcftools_pangolin_multiqc.collect(), 'Sample', 'Lineage', @@ -535,8 +535,8 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(CUTADAPT.out.version.first().ifEmpty(null)) if (!params.skip_fastqc) { - FASTQC ( - CUTADAPT.out.reads + FASTQC ( + CUTADAPT.out.reads ) } } @@ -616,7 +616,7 @@ workflow ILLUMINA { .collect() .set { ch_software_versions } - GET_SOFTWARE_VERSIONS ( + GET_SOFTWARE_VERSIONS ( ch_software_versions ) diff --git a/workflows/nanopore.nf b/workflows/nanopore.nf index d1fafa91..4ef26ed2 100644 --- a/workflows/nanopore.nf +++ b/workflows/nanopore.nf @@ -15,7 +15,7 @@ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) WorkflowNanopore.initialise(params, log, valid_params) def checkPathParamList = [ - params.input, params.fastq_dir, params.fast5_dir, + params.input, params.fastq_dir, params.fast5_dir, params.sequencing_summary, params.gff ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -37,7 +37,7 @@ if (params.artic_minion_caller == 'medaka') { /* ======================================================================================== - CONFIG FILES + CONFIG FILES ======================================================================================== */ @@ -149,8 +149,8 @@ workflow NANOPORE { PREPARE_GENOME .out .primer_bed - .map { WorkflowCommons.checkPrimerSuffixes(it, params.primer_left_suffix, params.primer_right_suffix, log) } - + .map { WorkflowCommons.checkPrimerSuffixes(it, params.primer_left_suffix, params.primer_right_suffix, log) } + barcode_dirs = file("${params.fastq_dir}/barcode*", type: 'dir' , maxdepth: 1) single_barcode_dir = file("${params.fastq_dir}/*.fastq" , type: 'file', maxdepth: 1) ch_custom_no_sample_name_multiqc = Channel.empty() @@ -174,7 +174,7 @@ workflow NANOPORE { * SUBWORKFLOW: Read in samplesheet containing sample to barcode mappings */ if (params.input) { - INPUT_CHECK ( + INPUT_CHECK ( ch_input, params.platform ) @@ -190,14 +190,14 @@ workflow NANOPORE { .map { it -> [ "${it[0]}\t${it[-1]}" ] } .set { ch_barcodes_no_sample } - MULTIQC_CUSTOM_FAIL_NO_SAMPLE_NAME ( + MULTIQC_CUSTOM_FAIL_NO_SAMPLE_NAME ( ch_barcodes_no_sample.collect(), 'Barcode', 'Read count', 'fail_barcodes_no_sample' ) ch_custom_no_sample_name_multiqc = MULTIQC_CUSTOM_FAIL_NO_SAMPLE_NAME.out - + /* * MODULE: Create custom content file for MultiQC to report samples that were in samplesheet but have no barcodes */ @@ -206,24 +206,24 @@ workflow NANOPORE { .map { it -> [ "${it[1]}\t${it[0]}" ] } .set { ch_samples_no_barcode } - MULTIQC_CUSTOM_FAIL_NO_BARCODES ( + MULTIQC_CUSTOM_FAIL_NO_BARCODES ( ch_samples_no_barcode.collect(), 'Sample', 'Missing barcode', 'fail_no_barcode_samples' ) ch_custom_no_barcodes_multiqc = MULTIQC_CUSTOM_FAIL_NO_BARCODES.out - - ch_fastq_dirs + + ch_fastq_dirs .filter { (it[1] != null) } .filter { (it[-1] != null) } .set { ch_fastq_dirs } - + } else { ch_fastq_dirs .map { barcode, dir, count -> [ barcode, barcode, dir, count ] } .set { ch_fastq_dirs } - } + } } else if (single_barcode_dir) { Channel .fromPath("${params.fastq_dir}", type: 'dir', maxDepth: 1) @@ -233,7 +233,7 @@ workflow NANOPORE { log.error "Please specify a valid folder containing ONT basecalled, barcoded fastq files generated by guppy_barcoder or guppy_basecaller e.g. '--fastq_dir ./20191023_1522_MC-110615_0_FAO93606_12bf9b4f/fastq_pass/" System.exit(1) } - + /* * MODULE: Create custom content file for MultiQC to report samples with reads < params.min_barcode_reads */ @@ -248,7 +248,7 @@ workflow NANOPORE { } .set { ch_pass_fail_barcode_count } - MULTIQC_CUSTOM_FAIL_BARCODE_COUNT ( + MULTIQC_CUSTOM_FAIL_BARCODE_COUNT ( ch_pass_fail_barcode_count.fail.collect(), 'Sample', 'Barcode count', @@ -284,7 +284,7 @@ workflow NANOPORE { } .set { ch_pass_fail_guppyplex_count } - MULTIQC_CUSTOM_FAIL_GUPPYPLEX_COUNT ( + MULTIQC_CUSTOM_FAIL_GUPPYPLEX_COUNT ( ch_pass_fail_guppyplex_count.fail.collect(), 'Sample', 'Read count', @@ -314,20 +314,20 @@ workflow NANOPORE { params.artic_scheme, params.primer_set_version ) - + /* * SUBWORKFLOW: Filter unmapped reads from BAM */ - FILTER_BAM_SAMTOOLS ( - ARTIC_MINION.out.bam + FILTER_BAM_SAMTOOLS ( + ARTIC_MINION.out.bam ) ch_software_versions = ch_software_versions.mix(FILTER_BAM_SAMTOOLS.out.samtools_version.first().ifEmpty(null)) /* * MODULE: VCF stats with bcftools stats */ - BCFTOOLS_STATS ( - ARTIC_MINION.out.vcf + BCFTOOLS_STATS ( + ARTIC_MINION.out.vcf ) ch_software_versions = ch_software_versions.mix(BCFTOOLS_STATS.out.version.ifEmpty(null)) @@ -348,14 +348,14 @@ workflow NANOPORE { PLOT_MOSDEPTH_REGIONS_GENOME ( MOSDEPTH_GENOME.out.regions_bed.collect { it[1] } ) - + MOSDEPTH_AMPLICON ( ARTIC_MINION.out.bam_primertrimmed.join(ARTIC_MINION.out.bai_primertrimmed, by: [0]), PREPARE_GENOME.out.primer_collapsed_bed, 0 ) - PLOT_MOSDEPTH_REGIONS_AMPLICON ( + PLOT_MOSDEPTH_REGIONS_AMPLICON ( MOSDEPTH_AMPLICON.out.regions_bed.collect { it[1] } ) } @@ -365,7 +365,7 @@ workflow NANOPORE { */ ch_pangolin_multiqc = Channel.empty() if (!params.skip_pangolin) { - PANGOLIN ( + PANGOLIN ( ARTIC_MINION.out.fasta ) ch_software_versions = ch_software_versions.mix(PANGOLIN.out.version.ifEmpty(null)) @@ -382,7 +382,7 @@ workflow NANOPORE { } .set { ch_pangolin_multiqc } - MULTIQC_CUSTOM_PANGOLIN ( + MULTIQC_CUSTOM_PANGOLIN ( ch_pangolin_multiqc.collect(), 'Sample', 'Lineage', @@ -395,23 +395,23 @@ workflow NANOPORE { * MODULE: Clade assignment, mutation calling, and sequence quality checks with Nextclade */ if (!params.skip_nextclade) { - NEXTCLADE ( + NEXTCLADE ( ARTIC_MINION.out.fasta, 'csv' ) ch_software_versions = ch_software_versions.mix(NEXTCLADE.out.version.ifEmpty(null)) } - + /* * MODULE: Consensus QC across all samples with QUAST */ ch_quast_multiqc = Channel.empty() if (!params.skip_variants_quast) { - QUAST ( + QUAST ( ARTIC_MINION.out.fasta.collect{ it[1] }, - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.gff, - true, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.gff, + true, params.gff ) ch_quast_multiqc = QUAST.out.tsv @@ -423,10 +423,10 @@ workflow NANOPORE { */ ch_snpeff_multiqc = Channel.empty() if (params.gff && !params.skip_snpeff) { - SNPEFF_SNPSIFT ( - ARTIC_MINION.out.vcf, - PREPARE_GENOME.out.snpeff_db, - PREPARE_GENOME.out.snpeff_config, + SNPEFF_SNPSIFT ( + ARTIC_MINION.out.vcf, + PREPARE_GENOME.out.snpeff_db, + PREPARE_GENOME.out.snpeff_config, PREPARE_GENOME.out.fasta ) ch_snpeff_multiqc = SNPEFF_SNPSIFT.out.csv @@ -473,7 +473,7 @@ workflow NANOPORE { .collect() .set { ch_software_versions } - GET_SOFTWARE_VERSIONS ( + GET_SOFTWARE_VERSIONS ( ch_software_versions ) diff --git a/workflows/sra_download.nf b/workflows/sra_download.nf index 28d8afd2..9545239c 100644 --- a/workflows/sra_download.nf +++ b/workflows/sra_download.nf @@ -4,15 +4,15 @@ ======================================================================================== */ -if (params.public_data_ids) { +if (params.public_data_ids) { Channel .from(file(params.public_data_ids, checkIfExists: true)) .splitCsv(header:false, sep:'', strip:true) .map { it[0] } .unique() .set { ch_public_data_ids } -} else { - exit 1, 'Input file with public database ids not specified!' +} else { + exit 1, 'Input file with public database ids not specified!' } /* @@ -56,8 +56,8 @@ workflow SRA_DOWNLOAD { .out .tsv .splitCsv(header:true, sep:'\t') - .map { - meta -> + .map { + meta -> meta.single_end = meta.single_end.toBoolean() [ meta, [ meta.fastq_1, meta.fastq_2 ] ] } From 967a7b5725820342386776882903c2799ff33bef Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 11 May 2021 13:05:56 +0100 Subject: [PATCH 05/17] Fix trailing whitespaces --- .editorconfig | 2 +- modules/local/asciigenome.nf | 8 ++++---- modules/local/bcftools_isec.nf | 2 +- modules/local/collapse_primers.nf | 6 +++--- modules/local/cutadapt.nf | 7 +++---- modules/local/filter_blastn.nf | 6 +++--- modules/local/get_software_versions.nf | 4 ++-- modules/local/ivar_variants_to_vcf.nf | 6 +++--- modules/local/make_bed_mask.nf | 6 +++--- modules/local/multiqc_custom_twocol_tsv.nf | 4 ++-- modules/local/multiqc_illumina.nf | 4 ++-- modules/local/plot_base_density.nf | 7 ++----- modules/local/plot_mosdepth_regions.nf | 8 ++++---- modules/local/samplesheet_check.nf | 4 ++-- modules/local/snpeff_ann.nf | 7 +++---- modules/local/snpeff_build.nf | 2 +- modules/local/snpsift_extractfields.nf | 4 ++-- modules/local/sra_fastq_ftp.nf | 6 +++--- modules/local/sra_ids_to_runinfo.nf | 6 +++--- modules/local/sra_merge_samplesheet.nf | 8 ++++---- modules/local/sra_runinfo_to_ftp.nf | 6 +++--- modules/local/sra_to_samplesheet.nf | 4 ++-- nextflow.config | 2 +- subworkflows/local/variants_ivar.nf | 4 ++-- workflows/illumina.nf | 2 +- 25 files changed, 60 insertions(+), 65 deletions(-) diff --git a/.editorconfig b/.editorconfig index e210ef22..7a3ba6aa 100644 --- a/.editorconfig +++ b/.editorconfig @@ -9,7 +9,7 @@ indent_size = 4 indent_style = space [*.{yml,yaml}] -indent_size = 4 +indent_size = 2 # These files are edited upstream in nf-core/modules [/modules/nf-core/**] diff --git a/modules/local/asciigenome.nf b/modules/local/asciigenome.nf index bdd257b5..e8252ffd 100644 --- a/modules/local/asciigenome.nf +++ b/modules/local/asciigenome.nf @@ -10,7 +10,7 @@ process ASCIIGENOME { publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - + conda (params.enable_conda ? "bioconda::asciigenome=1.16.0" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { container "https://depot.galaxyproject.org/singularity/asciigenome:1.16.0--0" @@ -35,7 +35,7 @@ process ASCIIGENOME { def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" def gff_track = gff ? "$gff" : '' def bed_track = bed ? "$bed" : '' - def paired_end = meta.single_end ? '' : '&& readsAsPairs -on' + def paired_end = meta.single_end ? '' : '&& readsAsPairs -on' """ zcat $vcf \\ | grep -v '#' \\ @@ -52,7 +52,7 @@ process ASCIIGENOME { $bed_track \\ $gff_track \\ > /dev/null - + echo \$(ASCIIGenome -ni --version 2>&1) | sed -e "s/ASCIIGenome //g" > ${software}.version.txt """ -} \ No newline at end of file +} diff --git a/modules/local/bcftools_isec.nf b/modules/local/bcftools_isec.nf index 429a6e8c..f9b6ffb6 100644 --- a/modules/local/bcftools_isec.nf +++ b/modules/local/bcftools_isec.nf @@ -9,7 +9,7 @@ process BCFTOOLS_ISEC { publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - + conda (params.enable_conda ? "bioconda::bcftools=1.11" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { container "https://depot.galaxyproject.org/singularity/bcftools:1.11--h7c999a4_0" diff --git a/modules/local/collapse_primers.nf b/modules/local/collapse_primers.nf index c1161989..8b5cbed7 100644 --- a/modules/local/collapse_primers.nf +++ b/modules/local/collapse_primers.nf @@ -9,7 +9,7 @@ process COLLAPSE_PRIMERS { publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'primers', meta:[:], publish_by_meta:[]) } - + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { container "https://depot.galaxyproject.org/singularity/python:3.8.3" @@ -24,7 +24,7 @@ process COLLAPSE_PRIMERS { output: path '*.bed', emit: bed - + script: """ collapse_primer_bed.py \\ @@ -33,4 +33,4 @@ process COLLAPSE_PRIMERS { $bed \\ ${bed.baseName}.collapsed.bed """ -} \ No newline at end of file +} diff --git a/modules/local/cutadapt.nf b/modules/local/cutadapt.nf index 567b5925..f51e2dc9 100644 --- a/modules/local/cutadapt.nf +++ b/modules/local/cutadapt.nf @@ -10,7 +10,7 @@ process CUTADAPT { publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - + conda (params.enable_conda ? 'bioconda::cutadapt=3.2' : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { container 'https://depot.galaxyproject.org/singularity/cutadapt:3.2--py38h0213d0e_0' @@ -21,12 +21,12 @@ process CUTADAPT { input: tuple val(meta), path(reads) path adapters - + output: tuple val(meta), path('*.fastq.gz'), emit: reads tuple val(meta), path('*.log') , emit: log path '*.version.txt' , emit: version - + script: def software = getSoftwareName(task.process) def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" @@ -46,4 +46,3 @@ process CUTADAPT { echo \$(cutadapt --version) > ${software}.version.txt """ } - diff --git a/modules/local/filter_blastn.nf b/modules/local/filter_blastn.nf index 8761314d..004314f0 100644 --- a/modules/local/filter_blastn.nf +++ b/modules/local/filter_blastn.nf @@ -17,14 +17,14 @@ process FILTER_BLASTN { } else { container "biocontainers/biocontainers:v1.2.0_cv1" } - + input: tuple val(meta), path(hits) path header - + output: tuple val(meta), path('*.txt'), emit: txt - + script: def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" """ diff --git a/modules/local/get_software_versions.nf b/modules/local/get_software_versions.nf index 9b0e3585..f0aebb76 100644 --- a/modules/local/get_software_versions.nf +++ b/modules/local/get_software_versions.nf @@ -10,7 +10,7 @@ process GET_SOFTWARE_VERSIONS { publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'pipeline_info', meta:[:], publish_by_meta:[]) } - + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { container "https://depot.galaxyproject.org/singularity/python:3.8.3" @@ -22,7 +22,7 @@ process GET_SOFTWARE_VERSIONS { input: path versions - + output: path "software_versions.csv" , emit: csv path 'software_versions_mqc.yaml', emit: yaml diff --git a/modules/local/ivar_variants_to_vcf.nf b/modules/local/ivar_variants_to_vcf.nf index e4babe63..4a1a3e6c 100644 --- a/modules/local/ivar_variants_to_vcf.nf +++ b/modules/local/ivar_variants_to_vcf.nf @@ -23,7 +23,7 @@ process IVAR_VARIANTS_TO_VCF { input: tuple val(meta), path(tsv) path header - + output: tuple val(meta), path("*.vcf"), emit: vcf tuple val(meta), path("*.log"), emit: log @@ -37,7 +37,7 @@ process IVAR_VARIANTS_TO_VCF { ${prefix}.vcf \\ $options.args \\ > ${prefix}.variant_counts.log - + cat $header ${prefix}.variant_counts.log > ${prefix}.variant_counts_mqc.tsv """ -} \ No newline at end of file +} diff --git a/modules/local/make_bed_mask.nf b/modules/local/make_bed_mask.nf index a7b92788..bcdf814a 100644 --- a/modules/local/make_bed_mask.nf +++ b/modules/local/make_bed_mask.nf @@ -12,7 +12,7 @@ process MAKE_BED_MASK { publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'bed', meta:meta, publish_by_meta:['id']) } - + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { container "https://depot.galaxyproject.org/singularity/python:3.8.3" @@ -22,7 +22,7 @@ process MAKE_BED_MASK { input: tuple val(meta), path(vcf), path(bed) - + output: tuple val(meta), path("*.bed"), emit: bed @@ -31,4 +31,4 @@ process MAKE_BED_MASK { """ make_bed_mask.py $vcf $bed ${prefix}.bed """ -} \ No newline at end of file +} diff --git a/modules/local/multiqc_custom_twocol_tsv.nf b/modules/local/multiqc_custom_twocol_tsv.nf index 196bc018..89a603a9 100644 --- a/modules/local/multiqc_custom_twocol_tsv.nf +++ b/modules/local/multiqc_custom_twocol_tsv.nf @@ -7,14 +7,14 @@ process MULTIQC_CUSTOM_TWOCOL_TSV { publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } - + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { container "https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img" } else { container "biocontainers/biocontainers:v1.2.0_cv1" } - + input: val tsv_data val col1_name diff --git a/modules/local/multiqc_illumina.nf b/modules/local/multiqc_illumina.nf index 9994d196..306ffa1d 100644 --- a/modules/local/multiqc_illumina.nf +++ b/modules/local/multiqc_illumina.nf @@ -9,7 +9,7 @@ process MULTIQC { publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } - + conda (params.enable_conda ? "bioconda::multiqc=1.10.1" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { container "https://depot.galaxyproject.org/singularity/multiqc:1.10.1--pyhdfd78af_1" @@ -44,7 +44,7 @@ process MULTIQC { path ('assembly_spades/*') path ('assembly_unicycler/*') path ('assembly_minia/*') - + output: path "*multiqc_report.html" , emit: report path "*_data" , emit: data diff --git a/modules/local/plot_base_density.nf b/modules/local/plot_base_density.nf index ac7655e5..a7987000 100644 --- a/modules/local/plot_base_density.nf +++ b/modules/local/plot_base_density.nf @@ -17,10 +17,10 @@ process PLOT_BASE_DENSITY { } else { container "quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0" } - + input: tuple val(meta), path(fasta) - + output: tuple val(meta), path('*.pdf'), emit: pdf tuple val(meta), path('*.tsv'), emit: tsv @@ -34,6 +34,3 @@ process PLOT_BASE_DENSITY { --output_dir ./ """ } - - - diff --git a/modules/local/plot_mosdepth_regions.nf b/modules/local/plot_mosdepth_regions.nf index 3246ad40..f6daf3d3 100644 --- a/modules/local/plot_mosdepth_regions.nf +++ b/modules/local/plot_mosdepth_regions.nf @@ -16,14 +16,14 @@ process PLOT_MOSDEPTH_REGIONS { } else { container "quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0" } - + input: path beds - + output: path '*.pdf', emit: pdf path '*.tsv', emit: tsv - + script: def prefix = options.suffix ?: "mosdepth" """ @@ -33,4 +33,4 @@ process PLOT_MOSDEPTH_REGIONS { --output_suffix $prefix \\ $options.args """ -} \ No newline at end of file +} diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index 050f140d..3ee6c869 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -19,7 +19,7 @@ process SAMPLESHEET_CHECK { input: path samplesheet val platform - + output: path '*.csv' @@ -30,4 +30,4 @@ process SAMPLESHEET_CHECK { samplesheet.valid.csv \\ --platform $platform """ -} \ No newline at end of file +} diff --git a/modules/local/snpeff_ann.nf b/modules/local/snpeff_ann.nf index ae0fc474..275cfe79 100644 --- a/modules/local/snpeff_ann.nf +++ b/modules/local/snpeff_ann.nf @@ -10,7 +10,7 @@ process SNPEFF_ANN { publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - + conda (params.enable_conda ? 'bioconda::snpeff=5.0' : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { container 'https://depot.galaxyproject.org/singularity/snpeff:5.0--0' @@ -23,7 +23,7 @@ process SNPEFF_ANN { path db path config path fasta - + output: tuple val(meta), path("*.vcf") , emit: vcf tuple val(meta), path("*.csv") , emit: csv @@ -43,8 +43,7 @@ process SNPEFF_ANN { -csvStats ${prefix}.snpeff.csv \\ > ${prefix}.snpeff.vcf mv snpEff_summary.html ${prefix}.snpeff.summary.html - + echo \$(snpEff -version 2>&1) | sed 's/^.*SnpEff //; s/ .*\$//' > ${software}.version.txt """ } - diff --git a/modules/local/snpeff_build.nf b/modules/local/snpeff_build.nf index e2f322fe..d7aecc4f 100644 --- a/modules/local/snpeff_build.nf +++ b/modules/local/snpeff_build.nf @@ -47,4 +47,4 @@ process SNPEFF_BUILD { echo \$(snpEff -version 2>&1) | sed 's/^.*SnpEff //; s/ .*\$//' > ${software}.version.txt """ -} \ No newline at end of file +} diff --git a/modules/local/snpsift_extractfields.nf b/modules/local/snpsift_extractfields.nf index 178e1f69..3eb99c1a 100644 --- a/modules/local/snpsift_extractfields.nf +++ b/modules/local/snpsift_extractfields.nf @@ -10,7 +10,7 @@ process SNPSIFT_EXTRACTFIELDS { publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - + conda (params.enable_conda ? 'bioconda::snpsift=4.3.1t' : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { container 'https://depot.galaxyproject.org/singularity/snpsift:4.3.1t--2' @@ -20,7 +20,7 @@ process SNPSIFT_EXTRACTFIELDS { input: tuple val(meta), path(vcf) - + output: tuple val(meta), path("*.snpsift.txt"), emit: txt path '*.version.txt' , emit: version diff --git a/modules/local/sra_fastq_ftp.nf b/modules/local/sra_fastq_ftp.nf index d8c211df..3e5344dc 100644 --- a/modules/local/sra_fastq_ftp.nf +++ b/modules/local/sra_fastq_ftp.nf @@ -21,7 +21,7 @@ process SRA_FASTQ_FTP { } else { container "biocontainers/biocontainers:v1.2.0_cv1" } - + input: tuple val(meta), val(fastq) @@ -29,7 +29,7 @@ process SRA_FASTQ_FTP { tuple val(meta), path("*fastq.gz"), emit: fastq tuple val(meta), path("*md5") , emit: md5 - script: + script: if (meta.single_end) { """ bash -c 'until curl $options.args -L ${fastq[0]} -o ${meta.id}.fastq.gz; do sleep 1; done'; @@ -47,4 +47,4 @@ process SRA_FASTQ_FTP { md5sum -c ${meta.id}_2.fastq.gz.md5 """ } -} \ No newline at end of file +} diff --git a/modules/local/sra_ids_to_runinfo.nf b/modules/local/sra_ids_to_runinfo.nf index 1a542a44..bb9b3c30 100644 --- a/modules/local/sra_ids_to_runinfo.nf +++ b/modules/local/sra_ids_to_runinfo.nf @@ -19,13 +19,13 @@ process SRA_IDS_TO_RUNINFO { } else { container "quay.io/biocontainers/requests:2.24.0" } - + input: val id - + output: path "*.tsv", emit: tsv - + script: """ echo $id > id.txt diff --git a/modules/local/sra_merge_samplesheet.nf b/modules/local/sra_merge_samplesheet.nf index 66d56e5e..77aa3770 100644 --- a/modules/local/sra_merge_samplesheet.nf +++ b/modules/local/sra_merge_samplesheet.nf @@ -20,15 +20,15 @@ process SRA_MERGE_SAMPLESHEET { input: path ('samplesheets/*') - + output: path "*csv", emit: csv - + script: """ head -n 1 `ls ./samplesheets/* | head -n 1` > samplesheet.csv for fileid in `ls ./samplesheets/*`; do - awk 'NR>1' \$fileid >> samplesheet.csv + awk 'NR>1' \$fileid >> samplesheet.csv done """ -} \ No newline at end of file +} diff --git a/modules/local/sra_runinfo_to_ftp.nf b/modules/local/sra_runinfo_to_ftp.nf index 7d65bc66..431ded86 100644 --- a/modules/local/sra_runinfo_to_ftp.nf +++ b/modules/local/sra_runinfo_to_ftp.nf @@ -17,13 +17,13 @@ process SRA_RUNINFO_TO_FTP { } else { container "quay.io/biocontainers/python:3.8.3" } - + input: path runinfo - + output: path "*.tsv", emit: tsv - + script: """ sra_runinfo_to_ftp.py ${runinfo.join(',')} ${runinfo.toString().tokenize(".")[0]}.runinfo_ftp.tsv diff --git a/modules/local/sra_to_samplesheet.nf b/modules/local/sra_to_samplesheet.nf index 39821976..ba92babe 100644 --- a/modules/local/sra_to_samplesheet.nf +++ b/modules/local/sra_to_samplesheet.nf @@ -20,7 +20,7 @@ process SRA_TO_SAMPLESHEET { output: tuple val(meta), path("*csv"), emit: csv - + exec: // Remove custom keys needed to download the data def meta_map = meta.clone() @@ -43,4 +43,4 @@ process SRA_TO_SAMPLESHEET { def file = task.workDir.resolve("${meta.id}.samplesheet.csv") file.write pipeline_map.keySet().collect{ '"' + it + '"'}.join(",") + '\n' file.append(pipeline_map.values().collect{ '"' + it + '"'}.join(",")) + '\n' -} \ No newline at end of file +} diff --git a/nextflow.config b/nextflow.config index 4fff2931..9a80e0d1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -50,7 +50,7 @@ params { skip_asciigenome = false skip_variants_quast = false skip_multiqc = false - + // Illumina QC, read trimming and filtering options kraken2_db = 's3://nf-core-awsmegatests/viralrecon/input_data/kraken2_human.tar.gz' kraken2_db_name = 'human' diff --git a/subworkflows/local/variants_ivar.nf b/subworkflows/local/variants_ivar.nf index c7675a9d..35b2364a 100644 --- a/subworkflows/local/variants_ivar.nf +++ b/subworkflows/local/variants_ivar.nf @@ -162,12 +162,12 @@ workflow VARIANTS_IVAR { stats = VCF_BGZIP_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] tabix_version = VCF_BGZIP_TABIX_STATS.out.tabix_version // path: *.version.txt bcftools_version = VCF_BGZIP_TABIX_STATS.out.bcftools_version // path: *.version.txt - + consensus = ch_consensus // channel: [ val(meta), [ fasta ] ] consensus_qual = ch_consensus_qual // channel: [ val(meta), [ fasta ] ] bases_tsv = ch_bases_tsv // channel: [ val(meta), [ tsv ] ] bases_pdf = ch_bases_pdf // channel: [ val(meta), [ pdf ] ] - + quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] quast_version = ch_quast_version // path: *.version.txt diff --git a/workflows/illumina.nf b/workflows/illumina.nf index 69a93e09..4188e586 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -242,7 +242,7 @@ workflow ILLUMINA { if (!params.skip_fastp) { ch_variants_fastq .join(FASTQC_FASTP.out.trim_json) - .map { + .map { meta, reads, json -> if (WorkflowIllumina.getFastpReadsAfterFiltering(json) > 0) [ meta, reads ] } From 0e6609152deb27b312de524f9c05f90519b769ae Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 11 May 2021 13:46:24 +0100 Subject: [PATCH 06/17] Ignore files failing nf-core lint --- .nf-core-lint.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.nf-core-lint.yaml b/.nf-core-lint.yaml index 56dc4f8d..016b5870 100644 --- a/.nf-core-lint.yaml +++ b/.nf-core-lint.yaml @@ -4,7 +4,10 @@ files_unchanged: - .github/workflows/linting.yml - assets/email_template.html - assets/email_template.txt + - assets/sendmail_template.txt - lib/NfcoreSchema.groovy + - docs/README.md files_exist: - bin/markdown_to_html.py -actions_awsfulltest: False + - .github/markdownlint.yml +actions_awsfulltest: False \ No newline at end of file From 4f7956b02afaf32edc074bc5ad4dfdf098f7b638 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 11 May 2021 18:11:26 +0100 Subject: [PATCH 07/17] Ongoing fixes --- .github/workflows/ci.yml | 24 ++++++++++++++++++++---- .nf-core-lint.yaml | 2 +- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ac1053c2..91efdccf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: strategy: matrix: # Nextflow versions: check pipeline minimum and current latest - nxf_ver: [21.04.0, ''] + nxf_ver: [21.04.0, ""] steps: - name: Check out pipeline code uses: actions/checkout@v2 @@ -45,7 +45,16 @@ jobs: NXF_ANSI_LOG: false strategy: matrix: - parameters: [--skip_fastp, --skip_variants, --skip_cutadapt, --skip_kraken2, --skip_assembly, '--spades_mode corona', '--spades_mode metaviral'] + parameters: + [ + --skip_fastp, + --skip_variants, + --skip_cutadapt, + --skip_kraken2, + --skip_assembly, + "--spades_mode corona", + "--spades_mode metaviral", + ] steps: - name: Check out pipeline code uses: actions/checkout@v2 @@ -70,7 +79,7 @@ jobs: NXF_ANSI_LOG: false strategy: matrix: - parameters: [--skip_sra_fastq_download, ''] + parameters: [--skip_sra_fastq_download, ""] steps: - name: Check out pipeline code @@ -121,7 +130,14 @@ jobs: NXF_ANSI_LOG: false strategy: matrix: - parameters: [--gff false, --input false, --min_barcode_reads 10000, --min_guppyplex_reads 10000, "--artic_minion_caller medaka"] + parameters: + [ + --gff false, + --input false, + --min_barcode_reads 10000, + --min_guppyplex_reads 10000, + "--artic_minion_caller medaka", + ] steps: - name: Check out pipeline code uses: actions/checkout@v2 diff --git a/.nf-core-lint.yaml b/.nf-core-lint.yaml index 016b5870..5752d519 100644 --- a/.nf-core-lint.yaml +++ b/.nf-core-lint.yaml @@ -10,4 +10,4 @@ files_unchanged: files_exist: - bin/markdown_to_html.py - .github/markdownlint.yml -actions_awsfulltest: False \ No newline at end of file +actions_awsfulltest: False From 6a8da1e49a4f285da194c91658ca21710fad615a Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 11 May 2021 18:25:31 +0100 Subject: [PATCH 08/17] Fix EClint --- .github/workflows/ci.yml | 308 +-- CITATIONS.md | 216 +- docs/output.md | 1750 ++++++++--------- lib/Utils.groovy | 94 +- modules/local/asciigenome.nf | 116 +- modules/local/collapse_primers.nf | 72 +- modules/local/cutadapt.nf | 96 +- modules/local/kraken2_build.nf | 72 +- modules/local/multiqc_nanopore.nf | 1 - modules/local/plot_base_density.nf | 72 +- modules/local/plot_mosdepth_regions.nf | 72 +- modules/local/snpeff_ann.nf | 98 +- modules/local/snpeff_build.nf | 100 +- modules/local/snpsift_extractfields.nf | 102 +- modules/local/sra_fastq_ftp.nf | 100 +- modules/local/sra_ids_to_runinfo.nf | 68 +- modules/local/sra_merge_samplesheet.nf | 68 +- modules/local/sra_runinfo_to_ftp.nf | 62 +- modules/local/sra_to_samplesheet.nf | 92 +- subworkflows/local/assembly_minia.nf | 152 +- subworkflows/local/assembly_qc.nf | 224 +-- subworkflows/local/assembly_spades.nf | 228 +-- subworkflows/local/assembly_unicycler.nf | 202 +- subworkflows/local/make_consensus.nf | 88 +- subworkflows/local/prepare_genome_illumina.nf | 322 +-- subworkflows/local/prepare_genome_nanopore.nf | 162 +- subworkflows/local/primer_trim_ivar.nf | 78 +- subworkflows/local/snpeff_snpsift.nf | 86 +- subworkflows/local/variants_bcftools.nf | 344 ++-- subworkflows/local/variants_ivar.nf | 386 ++-- subworkflows/nf-core/align_bowtie2.nf | 78 +- subworkflows/nf-core/bam_sort_samtools.nf | 54 +- subworkflows/nf-core/bam_stats_samtools.nf | 50 +- subworkflows/nf-core/fastqc_fastp.nf | 124 +- subworkflows/nf-core/filter_bam_samtools.nf | 70 +- .../nf-core/mark_duplicates_picard.nf | 76 +- subworkflows/nf-core/vcf_bgzip_tabix_stats.nf | 54 +- subworkflows/nf-core/vcf_tabix_stats.nf | 50 +- 38 files changed, 3193 insertions(+), 3194 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 91efdccf..6065fd0f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,154 +1,154 @@ -name: nf-core CI -# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors -on: - push: - branches: - - dev - pull_request: - release: - types: [published] - -jobs: - test: - name: Run workflow tests - # Only run on push if this is the nf-core dev branch (merged PRs) - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} - runs-on: ubuntu-latest - env: - NXF_VER: ${{ matrix.nxf_ver }} - NXF_ANSI_LOG: false - strategy: - matrix: - # Nextflow versions: check pipeline minimum and current latest - nxf_ver: [21.04.0, ""] - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - - name: Run pipeline with test data - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker - - parameters: - name: Test workflow parameters - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} - runs-on: ubuntu-latest - env: - NXF_VER: ${{ matrix.nxf_ver }} - NXF_ANSI_LOG: false - strategy: - matrix: - parameters: - [ - --skip_fastp, - --skip_variants, - --skip_cutadapt, - --skip_kraken2, - --skip_assembly, - "--spades_mode corona", - "--spades_mode metaviral", - ] - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - - name: Run pipeline with various parameters - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.parameters }} - - test_sra: - name: Test SRA workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} - runs-on: ubuntu-latest - env: - NXF_VER: ${{ matrix.nxf_ver }} - NXF_ANSI_LOG: false - strategy: - matrix: - parameters: [--skip_sra_fastq_download, ""] - - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - - name: Run pipeline to download SRA ids and various options - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_sra,docker ${{ matrix.parameters }} - - test_sispa: - name: Test SISPA workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} - runs-on: ubuntu-latest - env: - NXF_VER: ${{ matrix.nxf_ver }} - NXF_ANSI_LOG: false - strategy: - matrix: - parameters: [--gff false, "--genome 'NC_045512.2'"] - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - - name: Run pipeline with minimal SISPA data and various options - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_sispa,docker ${{ matrix.parameters }} - - test_nanopore: - name: Test Nanopore workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} - runs-on: ubuntu-latest - env: - NXF_VER: ${{ matrix.nxf_ver }} - NXF_ANSI_LOG: false - strategy: - matrix: - parameters: - [ - --gff false, - --input false, - --min_barcode_reads 10000, - --min_guppyplex_reads 10000, - "--artic_minion_caller medaka", - ] - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - - name: Run pipeline with minimal Nanopore data and various options - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_nanopore,docker ${{ matrix.parameters }} +name: nf-core CI +# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + +jobs: + test: + name: Run workflow tests + # Only run on push if this is the nf-core dev branch (merged PRs) + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} + runs-on: ubuntu-latest + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false + strategy: + matrix: + # Nextflow versions: check pipeline minimum and current latest + nxf_ver: [21.04.0, ""] + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + env: + CAPSULE_LOG: none + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Run pipeline with test data + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker + + parameters: + name: Test workflow parameters + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} + runs-on: ubuntu-latest + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false + strategy: + matrix: + parameters: + [ + --skip_fastp, + --skip_variants, + --skip_cutadapt, + --skip_kraken2, + --skip_assembly, + "--spades_mode corona", + "--spades_mode metaviral", + ] + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + env: + CAPSULE_LOG: none + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Run pipeline with various parameters + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.parameters }} + + test_sra: + name: Test SRA workflow + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} + runs-on: ubuntu-latest + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false + strategy: + matrix: + parameters: [--skip_sra_fastq_download, ""] + + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + env: + CAPSULE_LOG: none + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Run pipeline to download SRA ids and various options + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_sra,docker ${{ matrix.parameters }} + + test_sispa: + name: Test SISPA workflow + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} + runs-on: ubuntu-latest + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false + strategy: + matrix: + parameters: [--gff false, "--genome 'NC_045512.2'"] + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + env: + CAPSULE_LOG: none + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Run pipeline with minimal SISPA data and various options + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_sispa,docker ${{ matrix.parameters }} + + test_nanopore: + name: Test Nanopore workflow + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} + runs-on: ubuntu-latest + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false + strategy: + matrix: + parameters: + [ + --gff false, + --input false, + --min_barcode_reads 10000, + --min_guppyplex_reads 10000, + "--artic_minion_caller medaka", + ] + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + env: + CAPSULE_LOG: none + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Run pipeline with minimal Nanopore data and various options + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_nanopore,docker ${{ matrix.parameters }} diff --git a/CITATIONS.md b/CITATIONS.md index b6bcd3e0..6a3f41ef 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -1,108 +1,108 @@ -# nf-core/viralrecon: Citations - -## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) - -> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. - -## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) - -> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. - -## Pipeline tools - -* [ABACAS](https://www.ncbi.nlm.nih.gov/pubmed/19497936/) - > Assefa S, Keane TM, Otto TD, Newbold C, Berriman M. ABACAS: algorithm-based automatic contiguation of assembled sequences. Bioinformatics. 2009 Aug 1;25(15):1968-9. doi: 10.1093/bioinformatics/btp347. Epub 2009 Jun 3. PubMed PMID: 19497936; PubMed Central PMCID: PMC2712343. - -* [ASCIIGenome](https://www.ncbi.nlm.nih.gov/pubmed/28119307/) - > Beraldi D. ASCIIGenome: a command line genome browser for console terminals. Bioinformatics. 2017 May 15;33(10):1568-1569. doi: 10.1093/bioinformatics/btx007. PubMed PMID: 28119307; PubMed Central PMCID: PMC5423454. - -* [ARTIC network](https://github.com/artic-network) - -* [Bandage](https://www.ncbi.nlm.nih.gov/pubmed/26099265) - > Wick R.R., Schultz M.B., Zobel J. & Holt K.E. Bandage: interactive visualisation of de novo genome assemblies. Bioinformatics, 31(20), 3350-3352. doi: 10.1093/bioinformatics/btv383. PubMed PMID: 26099265; PubMed Central PCMID: PMC4595904. - -* [BCFtools](https://www.ncbi.nlm.nih.gov/pubmed/21903627/) - > Li H. A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data. Bioinformatics. 2011 Nov 1;27(21):2987-93. doi: 10.1093/bioinformatics/btr509. Epub 2011 Sep 8. PubMed PMID: 21903627; PubMed Central PMCID: PMC3198575. - -* [BEDTools](https://www.ncbi.nlm.nih.gov/pubmed/20110278/) - > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. - -* [BLAST](https://www.ncbi.nlm.nih.gov/pubmed/20003500/) - > Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL. BLAST+: architecture and applications. BMC Bioinformatics. 2009 Dec 15;10:421. doi: 10.1186/1471-2105-10-421. PubMed PMID: 20003500; PubMed Central PMCID: PMC2803857. - -* [Bowtie 2](https://www.ncbi.nlm.nih.gov/pubmed/22388286/) - > Langmead B, Salzberg SL. Fast gapped-read alignment with Bowtie 2. Nat Methods. 2012 Mar 4;9(4):357-9. doi: 10.1038/nmeth.1923. PubMed PMID: 22388286; PubMed Central PMCID: PMC3322381. - -* [Cutadapt](http://dx.doi.org/10.14806/ej.17.1.200) - > Marcel, M. Cutadapt removes adapter sequences from high-throughput sequencing reads. EMBnet.journal, [S.l.], v. 17, n. 1, p. pp. 10-12, may 2011. ISSN 2226-6089. doi: 10.14806/ej.17.1.200. - -* [fastp](https://www.ncbi.nlm.nih.gov/pubmed/30423086/) - > Chen S, Zhou Y, Chen Y, Gu J. fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics. 2018 Sep 1;34(17):i884-i890. doi: 10.1093/bioinformatics/bty560. PubMed PMID: 30423086; PubMed Central PMCID: PMC6129281. - -* [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - -* [iVar](https://www.ncbi.nlm.nih.gov/pubmed/30621750/) - > Grubaugh ND, Gangavarapu K, Quick J, Matteson NL, De Jesus JG, Main BJ, Tan AL, Paul LM, Brackney DE, Grewal S, Gurfield N, Van Rompay KKA, Isern S, Michael SF, Coffey LL, Loman NJ, Andersen KG. An amplicon-based sequencing framework for accurately measuring intrahost virus diversity using PrimalSeq and iVar. Genome Biol. 2019 Jan 8;20(1):8. doi: 10.1186/s13059-018-1618-7. PubMed PMID: 30621750; PubMed Central PMCID: PMC6325816. - -* [Kraken 2](https://www.ncbi.nlm.nih.gov/pubmed/31779668/) - > Wood DE, Lu J, Langmead B. Improved metagenomic analysis with Kraken 2. Genome Biol. 2019 Nov 28;20(1):257. doi: 10.1186/s13059-019-1891-0. PubMed PMID: 31779668; PubMed Central PMCID: PMC6883579. - -* [minia](https://www.ncbi.nlm.nih.gov/pubmed/24040893/) - > Chikhi R, Rizk G. Space-efficient and exact de Bruijn graph representation based on a Bloom filter. Algorithms Mol Biol. 2013 Sep 16;8(1):22. doi: 10.1186/1748-7188-8-22. PubMed PMID: 24040893; PubMed Central PMCID: PMC3848682. - -* [mosdepth](https://www.ncbi.nlm.nih.gov/pubmed/29096012) - > Pedersen BS, Quinlan AR. Mosdepth: Quick Coverage Calculation for Genomes and Exomes. Bioinformatics. 2018 Mar 1;34(5):867-868. doi: 10.1093/bioinformatics/btx699. PMID: 29096012 PMCID: PMC6030888. - -* [MultiQC](https://www.ncbi.nlm.nih.gov/pubmed/27312411/) - > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. - -* [NanoPlot](https://pubmed.ncbi.nlm.nih.gov/29547981/) - > De Coster W, D'Hert S, Schultz DT, Cruts M, Van Broeckhoven C. NanoPack: visualizing and processing long-read sequencing data. Bioinformatics. 2018 Aug 1;34(15):2666-2669. doi: 10.1093/bioinformatics/bty149. PubMed PMID: 29547981; PubMed Central PMCID: PMC6061794. - -* [Nextstrain](https://pubmed.ncbi.nlm.nih.gov/29790939/) - > Hadfield J, Megill C, Bell SM, Huddleston J, Potter B, Callender C, Sagulenko P, Bedford T, Neher RA. Nextstrain: real-time tracking of pathogen evolution. Bioinformatics. 2018 Dec 1;34(23):4121-4123. doi: 10.1093/bioinformatics/bty407. PubMed PMID: 29790939; PubMed Central PMCID: PMC6247931. - -* [pangolin](https://github.com/cov-lineages/pangolin) - > Áine O'Toole, Emily Scher, Anthony Underwood, Ben Jackson, Verity Hill, JT McCrone, Chris Ruis, Khali Abu-Dahab, Ben Taylor, Corin Yeats, Louis du Plessis, David Aanensen, Eddie Holmes, Oliver Pybus, Andrew Rambaut. pangolin: lineage assignment in an emerging pandemic as an epidemiological tool. Publication in preparation. - -* [picard-tools](http://broadinstitute.github.io/picard) - -* [pycoQC](https://doi.org/10.21105/joss.01236) - > Leger A, Leonardi T, (2019). pycoQC, interactive quality control for Oxford Nanopore Sequencing. Journal of Open Source Software, 4(34), 1236. - -* [QUAST](https://www.ncbi.nlm.nih.gov/pubmed/23422339/) - > Gurevich A, Saveliev V, Vyahhi N, Tesler G. QUAST: quality assessment tool for genome assemblies. Bioinformatics. 2013 Apr 15;29(8):1072-5. doi: 10.1093/bioinformatics/btt086. Epub 2013 Feb 19. PubMed PMID: 23422339; PubMed Central PMCID: PMC3624806. - -* [R](https://www.R-project.org/) - > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. - -* [SAMtools](https://www.ncbi.nlm.nih.gov/pubmed/19505943/) - > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. - -* [SnpEff](https://www.ncbi.nlm.nih.gov/pubmed/22728672/) - > Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3. Fly (Austin). 2012 Apr-Jun;6(2):80-92. doi: 10.4161/fly.19695. PubMed PMID: 22728672; PubMed Central PMCID: PMC3679285. - -* [SnpSift](https://www.ncbi.nlm.nih.gov/pubmed/22435069/) - > Cingolani P, Patel VM, Coon M, Nguyen T, Land SJ, Ruden DM, Lu X. Using Drosophila melanogaster as a Model for Genotoxic Chemical Mutational Studies with a New Program, SnpSift. Front Genet. 2012 Mar 15;3:35. doi: 10.3389/fgene.2012.00035. eCollection 2012. PubMed PMID: 22435069; PubMed Central PMCID: PMC3304048. - -* [SPAdes](https://www.ncbi.nlm.nih.gov/pubmed/24093227/) - > Nurk S, Bankevich A, Antipov D, Gurevich AA, Korobeynikov A, Lapidus A, Prjibelski AD, Pyshkin A, Sirotkin A, Sirotkin Y, Stepanauskas R, Clingenpeel SR, Woyke T, McLean JS, Lasken R, Tesler G, Alekseyev MA, Pevzner PA. Assembling single-cell genomes and mini-metagenomes from chimeric MDA products. J Comput Biol. 2013 Oct;20(10):714-37. doi: 10.1089/cmb.2013.0084. PubMed PMID: 24093227; PubMed Central PMCID: PMC3791033. - -* [Unicycler](https://www.ncbi.nlm.nih.gov/pubmed/28594827/) - > Wick RR, Judd LM, Gorrie CL, Holt KE. Unicycler: Resolving bacterial genome assemblies from short and long sequencing reads. PLoS Comput Biol. 2017 Jun 8;13(6):e1005595. doi: 10.1371/journal.pcbi.1005595. eCollection 2017 Jun. PubMed PMID: 28594827; PubMed Central PMCID: PMC5481147. - -## Software packaging/containerisation tools - -* [Anaconda](https://anaconda.com) - > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. - -* [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) - > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. - -* [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) - > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. - -* [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) - -* [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) - > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. +# nf-core/viralrecon: Citations + +## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) + +> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. + +## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) + +> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. + +## Pipeline tools + +* [ABACAS](https://www.ncbi.nlm.nih.gov/pubmed/19497936/) + > Assefa S, Keane TM, Otto TD, Newbold C, Berriman M. ABACAS: algorithm-based automatic contiguation of assembled sequences. Bioinformatics. 2009 Aug 1;25(15):1968-9. doi: 10.1093/bioinformatics/btp347. Epub 2009 Jun 3. PubMed PMID: 19497936; PubMed Central PMCID: PMC2712343. + +* [ASCIIGenome](https://www.ncbi.nlm.nih.gov/pubmed/28119307/) + > Beraldi D. ASCIIGenome: a command line genome browser for console terminals. Bioinformatics. 2017 May 15;33(10):1568-1569. doi: 10.1093/bioinformatics/btx007. PubMed PMID: 28119307; PubMed Central PMCID: PMC5423454. + +* [ARTIC network](https://github.com/artic-network) + +* [Bandage](https://www.ncbi.nlm.nih.gov/pubmed/26099265) + > Wick R.R., Schultz M.B., Zobel J. & Holt K.E. Bandage: interactive visualisation of de novo genome assemblies. Bioinformatics, 31(20), 3350-3352. doi: 10.1093/bioinformatics/btv383. PubMed PMID: 26099265; PubMed Central PCMID: PMC4595904. + +* [BCFtools](https://www.ncbi.nlm.nih.gov/pubmed/21903627/) + > Li H. A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data. Bioinformatics. 2011 Nov 1;27(21):2987-93. doi: 10.1093/bioinformatics/btr509. Epub 2011 Sep 8. PubMed PMID: 21903627; PubMed Central PMCID: PMC3198575. + +* [BEDTools](https://www.ncbi.nlm.nih.gov/pubmed/20110278/) + > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. + +* [BLAST](https://www.ncbi.nlm.nih.gov/pubmed/20003500/) + > Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL. BLAST+: architecture and applications. BMC Bioinformatics. 2009 Dec 15;10:421. doi: 10.1186/1471-2105-10-421. PubMed PMID: 20003500; PubMed Central PMCID: PMC2803857. + +* [Bowtie 2](https://www.ncbi.nlm.nih.gov/pubmed/22388286/) + > Langmead B, Salzberg SL. Fast gapped-read alignment with Bowtie 2. Nat Methods. 2012 Mar 4;9(4):357-9. doi: 10.1038/nmeth.1923. PubMed PMID: 22388286; PubMed Central PMCID: PMC3322381. + +* [Cutadapt](http://dx.doi.org/10.14806/ej.17.1.200) + > Marcel, M. Cutadapt removes adapter sequences from high-throughput sequencing reads. EMBnet.journal, [S.l.], v. 17, n. 1, p. pp. 10-12, may 2011. ISSN 2226-6089. doi: 10.14806/ej.17.1.200. + +* [fastp](https://www.ncbi.nlm.nih.gov/pubmed/30423086/) + > Chen S, Zhou Y, Chen Y, Gu J. fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics. 2018 Sep 1;34(17):i884-i890. doi: 10.1093/bioinformatics/bty560. PubMed PMID: 30423086; PubMed Central PMCID: PMC6129281. + +* [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + +* [iVar](https://www.ncbi.nlm.nih.gov/pubmed/30621750/) + > Grubaugh ND, Gangavarapu K, Quick J, Matteson NL, De Jesus JG, Main BJ, Tan AL, Paul LM, Brackney DE, Grewal S, Gurfield N, Van Rompay KKA, Isern S, Michael SF, Coffey LL, Loman NJ, Andersen KG. An amplicon-based sequencing framework for accurately measuring intrahost virus diversity using PrimalSeq and iVar. Genome Biol. 2019 Jan 8;20(1):8. doi: 10.1186/s13059-018-1618-7. PubMed PMID: 30621750; PubMed Central PMCID: PMC6325816. + +* [Kraken 2](https://www.ncbi.nlm.nih.gov/pubmed/31779668/) + > Wood DE, Lu J, Langmead B. Improved metagenomic analysis with Kraken 2. Genome Biol. 2019 Nov 28;20(1):257. doi: 10.1186/s13059-019-1891-0. PubMed PMID: 31779668; PubMed Central PMCID: PMC6883579. + +* [minia](https://www.ncbi.nlm.nih.gov/pubmed/24040893/) + > Chikhi R, Rizk G. Space-efficient and exact de Bruijn graph representation based on a Bloom filter. Algorithms Mol Biol. 2013 Sep 16;8(1):22. doi: 10.1186/1748-7188-8-22. PubMed PMID: 24040893; PubMed Central PMCID: PMC3848682. + +* [mosdepth](https://www.ncbi.nlm.nih.gov/pubmed/29096012) + > Pedersen BS, Quinlan AR. Mosdepth: Quick Coverage Calculation for Genomes and Exomes. Bioinformatics. 2018 Mar 1;34(5):867-868. doi: 10.1093/bioinformatics/btx699. PMID: 29096012 PMCID: PMC6030888. + +* [MultiQC](https://www.ncbi.nlm.nih.gov/pubmed/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + +* [NanoPlot](https://pubmed.ncbi.nlm.nih.gov/29547981/) + > De Coster W, D'Hert S, Schultz DT, Cruts M, Van Broeckhoven C. NanoPack: visualizing and processing long-read sequencing data. Bioinformatics. 2018 Aug 1;34(15):2666-2669. doi: 10.1093/bioinformatics/bty149. PubMed PMID: 29547981; PubMed Central PMCID: PMC6061794. + +* [Nextstrain](https://pubmed.ncbi.nlm.nih.gov/29790939/) + > Hadfield J, Megill C, Bell SM, Huddleston J, Potter B, Callender C, Sagulenko P, Bedford T, Neher RA. Nextstrain: real-time tracking of pathogen evolution. Bioinformatics. 2018 Dec 1;34(23):4121-4123. doi: 10.1093/bioinformatics/bty407. PubMed PMID: 29790939; PubMed Central PMCID: PMC6247931. + +* [pangolin](https://github.com/cov-lineages/pangolin) + > Áine O'Toole, Emily Scher, Anthony Underwood, Ben Jackson, Verity Hill, JT McCrone, Chris Ruis, Khali Abu-Dahab, Ben Taylor, Corin Yeats, Louis du Plessis, David Aanensen, Eddie Holmes, Oliver Pybus, Andrew Rambaut. pangolin: lineage assignment in an emerging pandemic as an epidemiological tool. Publication in preparation. + +* [picard-tools](http://broadinstitute.github.io/picard) + +* [pycoQC](https://doi.org/10.21105/joss.01236) + > Leger A, Leonardi T, (2019). pycoQC, interactive quality control for Oxford Nanopore Sequencing. Journal of Open Source Software, 4(34), 1236. + +* [QUAST](https://www.ncbi.nlm.nih.gov/pubmed/23422339/) + > Gurevich A, Saveliev V, Vyahhi N, Tesler G. QUAST: quality assessment tool for genome assemblies. Bioinformatics. 2013 Apr 15;29(8):1072-5. doi: 10.1093/bioinformatics/btt086. Epub 2013 Feb 19. PubMed PMID: 23422339; PubMed Central PMCID: PMC3624806. + +* [R](https://www.R-project.org/) + > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. + +* [SAMtools](https://www.ncbi.nlm.nih.gov/pubmed/19505943/) + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + +* [SnpEff](https://www.ncbi.nlm.nih.gov/pubmed/22728672/) + > Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3. Fly (Austin). 2012 Apr-Jun;6(2):80-92. doi: 10.4161/fly.19695. PubMed PMID: 22728672; PubMed Central PMCID: PMC3679285. + +* [SnpSift](https://www.ncbi.nlm.nih.gov/pubmed/22435069/) + > Cingolani P, Patel VM, Coon M, Nguyen T, Land SJ, Ruden DM, Lu X. Using Drosophila melanogaster as a Model for Genotoxic Chemical Mutational Studies with a New Program, SnpSift. Front Genet. 2012 Mar 15;3:35. doi: 10.3389/fgene.2012.00035. eCollection 2012. PubMed PMID: 22435069; PubMed Central PMCID: PMC3304048. + +* [SPAdes](https://www.ncbi.nlm.nih.gov/pubmed/24093227/) + > Nurk S, Bankevich A, Antipov D, Gurevich AA, Korobeynikov A, Lapidus A, Prjibelski AD, Pyshkin A, Sirotkin A, Sirotkin Y, Stepanauskas R, Clingenpeel SR, Woyke T, McLean JS, Lasken R, Tesler G, Alekseyev MA, Pevzner PA. Assembling single-cell genomes and mini-metagenomes from chimeric MDA products. J Comput Biol. 2013 Oct;20(10):714-37. doi: 10.1089/cmb.2013.0084. PubMed PMID: 24093227; PubMed Central PMCID: PMC3791033. + +* [Unicycler](https://www.ncbi.nlm.nih.gov/pubmed/28594827/) + > Wick RR, Judd LM, Gorrie CL, Holt KE. Unicycler: Resolving bacterial genome assemblies from short and long sequencing reads. PLoS Comput Biol. 2017 Jun 8;13(6):e1005595. doi: 10.1371/journal.pcbi.1005595. eCollection 2017 Jun. PubMed PMID: 28594827; PubMed Central PMCID: PMC5481147. + +## Software packaging/containerisation tools + +* [Anaconda](https://anaconda.com) + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +* [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + +* [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + +* [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + +* [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/docs/output.md b/docs/output.md index dec6af9d..34a2ea7b 100644 --- a/docs/output.md +++ b/docs/output.md @@ -1,875 +1,875 @@ - -# Introduction - -This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. - -The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - -# Nanopore: Pipeline overview - -* [Preprocessing](#nanopore-preprocessing) - * [pycoQC](#nanopore-pycoqc) - Sequencing QC - * [artic guppyplex](#nanopore-artic-guppyplex) - Aggregate pre-demultiplexed reads from MinKNOW/Guppy - * [NanoPlot](#nanopore-nanoplot) - Read QC -* [Variant calling](#nanopore-variant-calling) - * [artic minion](#nanopore-artic-minion) - Align reads, call variants and generate consensus sequence -* [Downstream analysis](#nanopore-downstream-analysis) - * [SAMtools](#nanopore-samtools) - Remove unmapped reads and obtain alignment metrics - * [mosdepth](#nanopore-mosdepth) - Genome-wide and amplicon coverage QC plots - * [BCFTools](#nanopore-bcftools) - Variant count metrics - * [SnpEff and SnpSift](#nanopore-snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction - * [QUAST](#nanopore-quast) - Consensus assessment report - * [Pangolin](#nanopore-pangolin) - Lineage analysis - * [Nextclade](#nanopore-nextclade) - Clade assignment, mutation calling and sequence quality checks - * [ASCIIGenome](#nanopore-asciigenome) - Individual variant screenshots with annotation tracks -* [Workflow reporting](#nanopore-workflow-reporting) - * [MultiQC](#nanopore-multiqc) - Present QC, visualisation and custom reporting for sequencing, raw reads, alignment and variant calling results - -## Nanopore: Preprocessing - -A file called `summary_variants_metrics_mqc.csv` containing a selection of read alignment and variant calling metrics will be saved in the `multiqc//` output directory which is determined by the `--artic_minion_caller` parameter (Default: `nanopolish/`). The same metrics will also be added to the top of the MultiQC report. - -### Nanopore: pycoQC - -
-Output files - -* `pycoqc/` - * `*.html` and `.json` file that includes a run summary and graphical representation of various QC metrics including distribution of read length, distribution of read quality scores, mean read quality per sequence length, output per channel over experiment time and percentage of reads per barcode. - -
- -[PycoQC](https://github.com/a-slide/pycoQC) compute metrics and generate QC plots using the sequencing summary information generated by basecalling/demultiplexing tools such as Guppy e.g. distribution of read length, read length over time, number of reads per barcode and other general stats. - -

- PycoQC - Number of reads per barcode -

- -### Nanopore: artic guppyplex - -
-Output files - -* `guppyplex/` - * `*.fastq.gz` files generated by aggregate pre-demultiplexed reads from MinKNOW/Guppy. These files are not saved by default but can be via a custom config file such as the one below. - -```nextflow -params { - modules { - 'nanopore_artic_guppyplex' { - publish_files = ['fastq.gz':''] - } - } -} -``` - -
- -The [artic guppyplex](https://artic.readthedocs.io/en/latest/commands/) tool from the [ARTIC field bioinformatics pipeline](https://github.com/artic-network/fieldbioinformatics) is used to perform length filtering of the demultiplexed Nanopore reads obtained per barcode. This essentially filters out chimeric reads that may be generated by the ARTIC protocol. The pipeline uses a default minimum and maximum read length of 400 and 700, respectively as tailored for the [nCoV-2019 primer set](https://artic.network/ncov-2019/ncov2019-bioinformatics-sop.html). However, you may need to adjust these for different primer schemes e.g. by using the minimum length of the amplicons (`--min-length`) as well as the maximum length plus 200 (`--max-length`). - -### Nanopore: NanoPlot - -
-Output files - -* `nanoplot//` - * Per-sample `*.html` files for QC metrics and individual `*.png` image files for plots. - -
- -[NanoPlot](https://github.com/wdecoster/NanoPlot) it a tool that can be used to produce general quality metrics from various Nanopore-based input files including fastq files e.g. quality score distribution, read lengths and other general stats. - -

- Nanoplot - Read quality vs read length -

- -## Nanopore: Variant calling - -### Nanopore: artic minion - -
-Output files - -* `/` - * `*.consensus.fasta`: Consensus fasta file generated by artic minion. - * `*.pass.vcf.gz`: VCF file containing variants passing quality filters. - * `*.pass.vcf.gz.tbi`: VCF index file containing variants passing quality filters. - * `*.primers.vcf`: VCF file containing variants found in primer-binding regions. - * `*.merged.vcf`: VCF file containing all detected variants. - * `*.fail.vcf`: VCF file containing variants failing quality filters. - * `*.sorted.bam`: BAM file generated by initial alignment. - * `*.sorted.bam.bai`: BAM index file generated by initial alignment. - * `*.trimmed.rg.sorted.bam`: BAM file without primer-binding site trimming. - * `*.trimmed.rg.sorted.bam.bai`: BAM index file without primer-binding site trimming. - * `*.primertrimmed.rg.sorted.bam`: BAM file generated after primer-binding site trimming. - * `*.primertrimmed.rg.sorted.bam.bai`: BAM index file generated after primer-binding site trimming. - -**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). - -
- -The [artic minion](https://artic.readthedocs.io/en/latest/commands/) tool from the [ARTIC field bioinformatics pipeline](https://github.com/artic-network/fieldbioinformatics) is used to align reads, call variants and to generate the consensus sequence. By default, artic minion uses [Minimap2](https://github.com/lh3/minimap2) to align the reads to the viral genome, however you can use [BWA](https://github.com/lh3/bwa) instead using the `--artic_minion_aligner bwa` parameter. Similarly, the default variant caller used by artic minion is [Nanopolish](https://github.com/jts/nanopolish), however, you can use [Medaka](https://github.com/nanoporetech/medaka) instead via the `--artic_minion_caller medaka` parameter. Medaka is faster than Nanopolish, performs mostly the same and can be run directly from `fastq` input files as opposed to requiring the `fastq`, `fast5` and `sequencing_summary.txt` files required to run Nanopolish. You must provide the appropriate [Medaka model](https://github.com/nanoporetech/medaka#models) via the `--artic_minion_medaka_model` parameter if using `--artic_minion_caller medaka`. - -## Nanopore: Downstream analysis - -### Nanopore: SAMtools - -
-Output files - -* `/` - * `*.mapped.sorted.bam`: Coordinate sorted BAM file containing read alignment information. - * `*.mapped.sorted.bam.bai`: Index file for coordinate sorted BAM file. -* `/samtools_stats/` - * SAMtools `*.mapped.sorted.bam.flagstat`, `*.mapped.sorted.bam.idxstats` and `*.mapped.sorted.bam.stats` files generated from the alignment files. - -**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). - -
- -BAM files containing the original alignments from either Minimap2 or BWA are further processed with [SAMtools](http://samtools.sourceforge.net/) to remove unmapped reads as well as to generate read mapping statistics. - -![MultiQC - SAMtools alignment scores plot](images/mqc_samtools_stats_plot.png) - -### Nanopore: mosdepth - -
-Output files - -* `/mosdepth/genome/` - * `all_samples.mosdepth.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. - * `*.mosdepth.coverage.pdf`: Whole-genome coverage plot. - * `*.mosdepth.coverage.tsv`: File containing coverage values for the above plot. - * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. -* `/mosdepth/amplicon/` - * `all_samples.mosdepth.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. - * `all_samples.mosdepth.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. - * `*.mosdepth.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. - * `*.mosdepth.coverage.tsv`: File containing per-amplicon coverage values for the above plot. - * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. - -**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). - -
- -[mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts. - -![R - Samples amplicon coverage heatmap ](images/r_amplicon_heatmap.png) - -![R - Sample genome-wide coverage plot](images/r_genome_coverage.png) - -

- R - Sample per-amplicon coverage plot -

- -### Nanopore: BCFTools - -
-Output files - -* `/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. - -**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). - -
- -[BCFtools](http://samtools.github.io/bcftools/bcftools.html) is a set of utilities that manipulate variant calls in [VCF](https://vcftools.github.io/specs.html) and its binary counterpart BCF format. It can also used be used to generate statistics and counts obtained from VCF files as used here. - -![MultiQC - BCFTools variant counts](images/mqc_bcftools_stats_plot.png) - -### Nanopore: SnpEff and SnpSift - -
-Output files - -* `/snpeff/` - * `*.snpeff.csv`: Variant annotation csv file. - * `*.snpeff.genes.txt`: Gene table for annotated variants. - * `*.snpeff.summary.html`: Summary html file for variants. - * `*.snpeff.vcf.gz`: VCF file with variant annotations. - * `*.snpeff.vcf.gz.tbi`: Index for VCF file with variant annotations. - * `*.snpsift.txt`: SnpSift summary table. -* `/snpeff/bcftools_stats/` - * `*.snpeff.bcftools_stats.txt`: Statistics and counts obtained from SnpEff VCF file. - -**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). - -
- -[SnpEff](http://snpeff.sourceforge.net/SnpEff.html) is a genetic variant annotation and functional effect prediction toolbox. It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). - -[SnpSift](http://snpeff.sourceforge.net/SnpSift.html) annotates genomic variants using databases, filters, and manipulates genomic annotated variants. After annotation with SnpEff, you can use SnpSift to help filter large genomic datasets in order to find the most significant variants. - -![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) - -### Nanopore: QUAST - -
-Output files - -* `/quast/` - * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. - -**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). - -
- -[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the consensus sequence across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. - -### Nanopore: Pangolin - -
-Output files - -* `/pangolin/` - * `*.pangolin.csv`: Lineage analysis results from Pangolin. - -**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). - -
- -Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://github.com/cov-lineages/pangolin)) has been used extensively during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. A [web application](https://pangolin.cog-uk.io/) also exists that allows users to upload genome sequences via a web browser to assign lineages to genome sequences of SARS-CoV-2, view descriptive characteristics of the assigned lineage(s), view the placement of the lineage in a phylogeny of global samples, and view the temporal and geographic distribution of the assigned lineage(s). - -### Nanopore: Nextclade - -
-Output files - -* `/nextclade/` - * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. - -**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). - -
- -[Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. - -### Nanopore: ASCIIGenome - -
-Output files - -* `/asciigenome//` - * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. - -**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). - -
- -As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) is a command-line genome browser that can be run from a terminal window and is solely based on ASCII characters. The closest program to ASCIIGenome is probably [samtools tview](http://www.htslib.org/doc/samtools-tview.html) but ASCIIGenome offers much more flexibility, similar to popular GUI viewers like the [IGV](https://software.broadinstitute.org/software/igv/) browser. We are using the batch processing mode of ASCIIGenome in this pipeline to generate individual screenshots for all of the variant sites reported for each sample in the VCF files. This is incredibly useful to be able to quickly QC the variants called by the pipeline without having to tediously load all of the relevant tracks into a conventional genome browser. Where possible, the BAM read alignments, VCF variant file, primer BED file and GFF annotation track will be represented in the screenshot for contextual purposes. The screenshot below shows a SNP called relative to the MN908947.3 SARS-CoV-2 reference genome that overlaps the ORF7a protein and the nCoV-2019_91_LEFT primer from the ARIC v3 protocol. - -

- ASCIIGenome screenshot -

- -## Nanopore: Workflow reporting - -### Nanopore: MultiQC - -
-Output files - -* `multiqc//` - * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - * `summary_variants_metrics_mqc.csv`: file containing a selection of read alignmnet and variant calling metrics. The same metrics will also be added to the top of the MultiQC report. - -
- -[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarizing all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. - -Results generated by MultiQC collate pipeline QC from pycoQC, samtools, mosdepth, BCFTools, SnpEff and QUAST. - -The default [`multiqc config file`](https://github.com/nf-core/viralrecon/blob/master/assets/multiqc_config_nanopore.yaml) has been written in a way in which to structure these QC metrics to make them more interpretable in the final report. - -The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . - -An example MultiQC report generated from a full-sized dataset can be viewed on the [nf-core website](https://nf-co.re/viralrecon/results). - -# Illumina: Pipeline overview - -* [Preprocessing](#illumina-preprocessing) - * [cat](#cat) - Merge re-sequenced FastQ files - * [FastQC](#fastqc) - Raw read QC - * [fastp](#fastp) - Adapter and quality trimming - * [Kraken 2](#kraken-2) - Removal/QC for host reads -* [Variant calling](#illumina-variant-calling) - * [Bowtie 2](#bowtie-2) - Read alignment relative to reference genome - * [SAMtools](#samtools) - Sort, index and generate metrics for alignments - * [iVar trim](#ivar-trim) - Primer sequence removal for amplicon data - * [picard MarkDuplicates](#picard-markduplicates) - Duplicate read marking and removal - * [picard CollectMultipleMetrics](#picard-collectmultiplemetrics) - Alignment metrics - * [mosdepth](#mosdepth) - Whole-genome and amplicon coverage metrics - * [iVar variants and iVar consensus](#ivar-variants-and-ivar-consensus) *||* [BCFTools and BEDTools](#bcftools-and-bedtools) - Variant calling and consensus sequence generation - * [SnpEff and SnpSift](#snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction - * [QUAST](#quast) - Consensus assessment report - * [Pangolin](#pangolin) - Lineage analysis - * [Nextclade](#nextclade) - Clade assignment, mutation calling and sequence quality checks - * [ASCIIGenome](#asciigenome) - Individual variant screenshots with annotation tracks - * [BCFTools isec](#bcftools-isec) - Intersect variants across all callers -* [De novo assembly](#illumina-de-novo-assembly) - * [Cutadapt](#cutadapt) - Primer trimming for amplicon data - * [SPAdes](#spades) *||* [Unicycler](#unicycler) *||* [minia](#minia) - Viral genome assembly - * [BLAST](#blast) - Blast to reference assembly - * [ABACAS](#abacas) - Order contigs according to reference genome - * [PlasmidID](#plasmidid) - Assembly report and visualisation - * [Assembly QUAST](#assembly-quast) - Assembly quality assessment -* [Workflow reporting and genomes](#illumina-workflow-reporting-and-genomes) - * [MultiQC](#multiqc) - Present QC for raw reads, alignment, assembly and variant calling - * [Reference genome files](#reference-genome-files) - Save reference genome indices/files - -## Illumina: Preprocessing - -### cat - -
-Output files - -* `fastq/` - * `*.merged.fastq.gz`: These files are not saved by default but can be via a custom config file such as the one below. - -```nextflow -params { - modules { - 'illumina_cat_fastq' { - publish_files = null - } - } -} -``` - -
- -If multiple libraries/runs have been provided for the same sample in the input samplesheet (e.g. to increase sequencing depth) then these will be merged at the very beginning of the pipeline in order to have consistent sample naming throughout the pipeline. Please refer to the [usage documentation](https://nf-co.re/viralrecon/usage#illumina-samplesheet-format) to see how to specify these samples in the input samplesheet. - -### FastQC - -
-Output files - -* `fastqc/raw/` - * `*_fastqc.html`: FastQC report containing quality metrics. - * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. - -**NB:** The FastQC plots in this directory are generated relative to the raw, input reads. They may contain adapter sequence and regions of low quality. To see how your reads look after trimming please refer to the FastQC reports in the `fastqc/trim/` directory. - -
- -[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). - -![MultiQC - FastQC per base sequence plot](images/mqc_fastqc_plot.png) - -### fastp - -
-Output files - -* `fastp/` - * `*.fastp.html`: Trimming report in html format. - * `*.fastp.json`: Trimming report in json format. -* `fastp/log/` - * `*.fastp.log`: Trimming log file. -* `fastqc/trim/` - * `*_fastqc.html`: FastQC report of the trimmed reads. - * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. - -
- -[fastp](https://github.com/OpenGene/fastp) is a tool designed to provide fast, all-in-one preprocessing for FastQ files. It has been developed in C++ with multithreading support to achieve higher performance. fastp is used in this pipeline for standard adapter trimming and quality filtering. - -![MultiQC - fastp filtered reads plot](images/mqc_fastp_plot.png) - -### Kraken 2 - -
-Output files - -* `kraken2/` - * `*.kraken2.report.txt`: Kraken 2 taxonomic report. See [here](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual#sample-report-output-format) for a detailed description of the format. - -
- -[Kraken 2](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual) is a sequence classifier that assigns taxonomic labels to DNA sequences. Kraken 2 examines the k-mers within a query sequence and uses the information within those k-mers to query a database. That database maps k-mers to the lowest common ancestor (LCA) of all genomes known to contain a given k-mer. - -We use a Kraken 2 database in this workflow to filter out reads specific to the host genome before performing the *de novo* assembly steps in the pipeline. This filtering is not performed in the variant calling arm of the pipeline by default but Kraken 2 is still run to obtain an estimate of host reads, however, the filtering can be amended via the `--kraken2_variants_host_filter` parameter. - -![MultiQC - Kraken 2 classification plot](images/mqc_kraken2_plot.png) - -## Illumina: Variant calling - -A file called `summary_variants_metrics_mqc.csv` containing a selection of read alignment and variant calling metrics will be saved in the `multiqc/` results directory. The same metrics will also be added to the top of the MultiQC report. - -### Bowtie 2 - -
-Output files - -* `variants/bowtie2/log/` - * `*.bowtie2.log`: Bowtie 2 mapping log file. - -
- -[Bowtie 2](http://bio-bwa.sourceforge.net/) is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. Bowtie 2 supports gapped, local, and paired-end alignment modes. - -![MultiQC - Bowtie2 alignment score plot](images/mqc_bowtie2_plot.png) - -### SAMtools - -
-Output files - -* `variants/bowtie2/` - * `.sorted.bam`: Coordinate sorted BAM file containing read alignment information. - * `.sorted.bam.bai`: Index file for coordinate sorted BAM file. -* `variants/bowtie2/samtools_stats/` - * SAMtools `.sorted.bam.flagstat`, `.sorted.bam.idxstats` and `.sorted.bam.stats` files generated from the alignment files. - -
- -Bowtie 2 BAM files are further processed with [SAMtools](http://samtools.sourceforge.net/) to sort them by coordinate, for indexing, as well as to generate read mapping statistics. - -![MultiQC - SAMtools alignment scores plot](images/mqc_samtools_stats_plot.png) - -### iVar trim - -
-Output files - -* `variants/bowtie2/` - * `*.ivar_trim.sorted.bam`: Coordinate sorted BAM file after primer trimming. - * `*.ivar_trim.sorted.bam.bai`: Index file for coordinate sorted BAM file after primer trimming. -* `variants/bowtie2/samtools_stats/` - * SAMtools `*.ivar_trim.sorted.bam.flagstat`, `*.ivar_trim.sorted.bam.idxstats` and `*.ivar_trim.sorted.bam.stats` files generated from the primer trimmed alignment files. -* `variants/bowtie2/log/` - * `*.ivar_trim.ivar.log`: iVar trim log file obtained from stdout. - -
- -If the `--protocol amplicon` parameter is provided then [iVar](http://gensoft.pasteur.fr/docs/ivar/1.0/manualpage.html) is used to trim amplicon primer sequences from the aligned reads. iVar uses the primer positions supplied in `--primer_bed` to soft clip primer sequences from a coordinate sorted BAM file. - -### picard MarkDuplicates - -
-Output files - -* `variants/bowtie2/` - * `*.markduplicates.sorted.bam`: Coordinate sorted BAM file after duplicate marking. - * `*.markduplicates.sorted.bam.bai`: Index file for coordinate sorted BAM file after duplicate marking. -* `variants/bowtie2/samtools_stats/` - * SAMtools `*.markduplicates.sorted.bam.flagstat`, `*.markduplicates.sorted.bam.idxstats` and `*.markduplicates.sorted.bam.stats` files generated from the duplicate marked alignment files. -* `variants/bowtie2/picard_metrics/` - * `*.markduplicates.sorted.MarkDuplicates.metrics.txt`: Metrics file from MarkDuplicates. - -
- -Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-explorer/kits-and-arrays/umi.html) it is not possible to establish whether the fragments you have sequenced from your sample were derived via true biological duplication (i.e. sequencing independent template fragments) or as a result of PCR biases introduced during the library preparation. [picard MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard-) isn't run by default because you anticipate high levels of duplication with viral data due to the size of the genome, however, you can activate it by adding `--skip_markduplicates false` to the command you use to run the pipeline. This will only *mark* the duplicate reads identified amongst the alignments to allow you to guage the overall level of duplication in your samples. You can also choose to remove any reads identified as duplicates via the `--filter_duplicates` parameter. - -![MultiQC - Picard MarkDuplicates metrics plot](images/mqc_picard_duplicates_plot.png) - -### picard CollectMultipleMetrics - -
-Output files - -* `variants/bowtie2/picard_metrics/` - * `*.CollectMultipleMetrics.*`: Alignment QC files from picard CollectMultipleMetrics in `*_metrics` textual format. -* `variants/bowtie2/picard_metrics/pdf/` - * `*.pdf` plots for metrics obtained from CollectMultipleMetrics. - -
- -[picard-tools](https://broadinstitute.github.io/picard/command-line-overview.html) is a set of command-line tools for manipulating high-throughput sequencing data. We use picard-tools in this pipeline to obtain mapping and coverage metrics. - -![MultiQC - Picard insert size plot](images/mqc_picard_insert_size_plot.png) - -### mosdepth - -
-Output files - -* `variants/bowtie2/mosdepth/genome/` - * `all_samples.mosdepth.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. - * `*.mosdepth.coverage.pdf`: Whole-genome coverage plot. - * `*.mosdepth.coverage.tsv`: File containing coverage values for the above plot. - * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. -* `variants/bowtie2/mosdepth/amplicon/` - * `all_samples.mosdepth.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. - * `all_samples.mosdepth.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. - * `*.mosdepth.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. - * `*.mosdepth.coverage.tsv`: File containing per-amplicon coverage values for the above plot. - * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. - -
- -[mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and for `--protocol amplicon` to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts. - -![R - Samples amplicon coverage heatmap ](images/r_amplicon_heatmap.png) - -![R - Sample genome-wide coverage plot](images/r_genome_coverage.png) - -

- R - Sample per-amplicon coverage plot -

- -### iVar variants and iVar consensus - -
-Output files - -* `variants/ivar/` - * `*.tsv`: Original iVar variants in TSV format. - * `*.vcf.gz`: iVar variants in VCF format. - * `*.vcf.gz.tbi`: iVar variants in VCF index file. -* `variants/ivar/consensus/` - * `*.consensus.fa`: Consensus Fasta file generated by iVar. - * `*.consensus.qual.txt`: File with the average quality of each base in the consensus sequence. -* `variants/ivar/consensus/base_qc/` - * `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. - * `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. - * `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. - * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. - * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. -* `variants/ivar/log/` - * `*.variant_counts.log`: Counts for type of variants called by iVar. -* `variants/ivar/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from iVar variants VCF file. - -
- -[iVar](https://github.com/andersen-lab/ivar/blob/master/docs/MANUAL.md) is a computational package that contains functions broadly useful for viral amplicon-based sequencing. We use iVar in this pipeline to [trim primer sequences](#ivar-trim) for amplicon input data as well as to call variants and for consensus sequence generation. - -![MultiQC - iVar variants called plot](images/mqc_ivar_variants_plot.png) - -### BCFTools and BEDTools - -
-Output files - -* `variants/bcftools/` - * `*.vcf.gz`: Variants VCF file. - * `*.vcf.gz.tbi`: Variants VCF index file. -* `variants/bcftools/consensus/` - * `*.consensus.fa`: Consensus Fasta file generated by integrating the variants called by BCFTools into the reference genome. -* `variants/bcftools/consensus/base_qc/` - * `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. - * `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. - * `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. - * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. - * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. -* `variants/bcftools/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. - -
- -[BCFtools](http://samtools.github.io/bcftools/bcftools.html) can be used to call variants directly from BAM alignment files. The functionality to call variants with BCFTools in this pipeline was inspired by work carried out by [Conor Walker](https://github.com/conorwalker/covid19/blob/3cb26ec399417bedb7e60487415c78a405f517d6/scripts/call_variants.sh). - -[BCFtools](http://samtools.github.io/bcftools/bcftools.html) is a set of utilities that manipulate variant calls in [VCF](https://vcftools.github.io/specs.html) and its binary counterpart BCF format. BCFTools is used in the variant calling and *de novo* assembly steps of this pipeline to obtain basic statistics from the VCF output. It can also used be used to generate a consensus sequence by integrating variant calls into the reference genome. - -[BEDTools](https://bedtools.readthedocs.io/en/latest/) is a swiss-army knife of tools for a wide-range of genomics analysis tasks. In this pipeline we use `bedtools genomecov` to compute the per-base mapped read coverage in bedGraph format, and `bedtools maskfasta` to mask sequences in a Fasta file based on intervals defined in a feature file. This may be useful for creating your own masked genome file based on custom annotations or for masking all but your target regions when aligning sequence data from a targeted capture experiment. - -![MultiQC - BCFTools variant counts](images/mqc_bcftools_stats_plot.png) - -### SnpEff and SnpSift - -
-Output files - -* `variants//snpeff/` - * `*.snpeff.csv`: Variant annotation csv file. - * `*.snpeff.genes.txt`: Gene table for annotated variants. - * `*.snpeff.summary.html`: Summary html file for variants. - * `*.snpeff.vcf.gz`: VCF file with variant annotations. - * `*.snpeff.vcf.gz.tbi`: Index for VCF file with variant annotations. - * `*.snpsift.txt`: SnpSift summary table. -* `variants//snpeff/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. - -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). - -
- -[SnpEff](http://snpeff.sourceforge.net/SnpEff.html) is a genetic variant annotation and functional effect prediction toolbox. It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). - -[SnpSift](http://snpeff.sourceforge.net/SnpSift.html) annotates genomic variants using databases, filters, and manipulates genomic annotated variants. After annotation with SnpEff, you can use SnpSift to help filter large genomic datasets in order to find the most significant variants. - -![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) - -### QUAST - -
-Output files - -* `variants//quast/` - * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. - -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). - -
- -[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the consensus sequence across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. - -### Pangolin - -
-Output files - -* `variants//pangolin/` - * `*.pangolin.csv`: Lineage analysis results from Pangolin. - -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). - -
- -Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://github.com/cov-lineages/pangolin)) has been used extensively during the COVID-19 pandemic in order to to assign lineages to SARS-CoV-2 genome sequenced samples. A [web application](https://pangolin.cog-uk.io/) also exists that allows users to upload genome sequences via a web browser to assign lineages to genome sequences of SARS-CoV-2, view descriptive characteristics of the assigned lineage(s), view the placement of the lineage in a phylogeny of global samples, and view the temporal and geographic distribution of the assigned lineage(s). - -### Nextclade - -
-Output files - -* `variants//nextclade/` - * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. - -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). - -
- -[Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. - -### ASCIIGenome - -
-Output files - -* `variants//asciigenome//` - * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. - -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). - -
- -As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) is a command-line genome browser that can be run from a terminal window and is solely based on ASCII characters. The closest program to ASCIIGenome is probably [samtools tview](http://www.htslib.org/doc/samtools-tview.html) but ASCIIGenome offers much more flexibility, similar to popular GUI viewers like the [IGV](https://software.broadinstitute.org/software/igv/) browser. We are using the batch processing mode of ASCIIGenome in this pipeline to generate individual screenshots for all of the variant sites reported for each sample in the VCF files. This is incredibly useful to be able to quickly QC the variants called by the pipeline without having to tediously load all of the relevant tracks into a conventional genome browser. Where possible, the BAM read alignments, VCF variant file, primer BED file and GFF annotation track will be represented in the screenshot for contextual purposes. The screenshot below shows a SNP called relative to the MN908947.3 SARS-CoV-2 reference genome that overlaps the ORF7a protein and the nCoV-2019_91_LEFT primer from the ARIC v3 protocol. - -

- ASCIIGenome screenshot -

- -### BCFTools isec - -
-Output files - -* `variants/intersect//` - * `*.vcf.gz`: VCF file containing variants common to both variant callers. There will be one file for each caller - see `README.txt` for details. - * `*.vcf.gz.tbi`: Index for VCF file. - * `README.txt`: File containing command used and file name mappings. - * `sites.txt`: List of variants common to both variant callers in textual format. The last column indicates presence (1) or absence (0) amongst the 2 different callers. - -**NB:** This process will only be executed when both variant callers are specified to be run i.e. `--callers ivar,bcftools`. - -
- -[BCFTools isec](http://samtools.github.io/bcftools/bcftools.html#isec) can be used to intersect the variant calls generated by the 2 different callers used in the pipeline. This permits a quick assessment of how consistently a particular variant is being called using different algorithms and to prioritise the investigation of the variants. - -## Illumina: De novo assembly - -A file called `summary_assembly_metrics_mqc.csv` containing a selection of read alignment and *de novo* assembly related metrics will be saved in the `multiqc/` results directory. The same metrics will also be added to the top of the MultiQC report. - -### Cutadapt - -
-Output files - -* `assembly/cutadapt/log/` - * `*.cutadapt.log`: Cutadapt log file generated from stdout. -* `assembly/cutadapt/fastqc/` - * `*_fastqc.html`: FastQC report of the trimmed reads. - * `*_fastqc.zip`: Zip archive containing the FastQC report. - -
- -In the variant calling branch of the pipeline we are using [iVar trim](#ivar-trim) to remove primer sequences from the aligned BAM files for amplicon data. Since in the *de novo* assembly branch we don't align the reads, we use [Cutadapt](https://cutadapt.readthedocs.io/en/stable/guide.html) as an alternative option to remove and clean the primer sequences directly from FastQ files. - -![MultiQC - Cutadapt filtered reads plot](images/mqc_cutadapt_plot.png) - -### SPAdes - -
-Output files - -* `assembly/spades//` - * `*.scaffolds.fa`: SPAdes scaffold assembly. - * `*.contigs.fa`: SPAdes assembly contigs. - * `*.assembly.gfa`: SPAdes assembly graph in [GFA](https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md) format. -* `assembly/spades//bandage/` - * `*.png`: Bandage visualisation for SPAdes assembly graph in PNG format. - * `*.svg`: Bandage visualisation for SPAdes assembly graph in SVG format. - -**NB:** The value of `` in the output directory name above is determined by the `--spades_mode` parameter (Default: 'rnaviral'). - -
- -[SPAdes](http://cab.spbu.ru/software/spades/) is an assembly toolkit containing various assembly pipelines. Generically speaking, SPAdes is one of the most popular de Bruijn graph-based assembly algorithms used for bacterial/viral genome reconstruction. - -[Bandage](https://rrwick.github.io/Bandage/) is a program for visualising *de novo* assembly graphs. By displaying connections which are not present in the contigs file, Bandage opens up new possibilities for analysing *de novo* assemblies. - -### Unicycler - -
-Output files - -* `assembly/unicycler/` - * `*.scaffolds.fa`: Unicycler scaffold assembly. - * `*.assembly.gfa`: Unicycler assembly graph in GFA format. -* `assembly/unicycler/bandage/` - * `*.png`: Bandage visualisation for Unicycler assembly graph in PNG format. - * `*.svg`: Bandage visualisation for Unicycler assembly graph in SVG format. - -
- -[Unicycler](https://github.com/rrwick/Unicycler) is an assembly pipeline for bacterial genomes. It can assemble Illumina-only read sets where it functions as a SPAdes-optimiser. - -### minia - -
-Output files - -* `assembly/minia/` - * `*.contigs.fa`: Minia scaffold assembly. - * `*.unitigs.fa`: Minia unitigs fasta file. - * `*.h5`: Minia h5 output file. - -
- -[Minia](https://github.com/GATB/minia) is a short-read assembler based on a de Bruijn graph, capable of assembling a human genome on a desktop computer in a day. The output of Minia is a set of contigs. Minia produces results of similar contiguity and accuracy to other de Bruijn assemblers. - -### BLAST - -
-Output files - -* `assembly//blastn/` - * `*.blastn.txt`: BLAST results against the target virus. - * `*.filter.blastn.txt`: Filtered BLAST results. - -**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). - -
- -[blastn](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch) is used to align the assembled contigs against the virus reference genome. - -### ABACAS - -
-Output files - -* `assembly//abacas/` - * `*.abacas.bin`: Bin file that contains contigs that are not used in ordering. - * `*.abacas.crunch`: Comparison file. - * `*.abacas.fasta`: Ordered and orientated sequence file. - * `*.abacas.gaps`: Gap information. - * `*.abacas.gaps.tab`: Gap information in tab-delimited format. - * `*.abacas.MULTIFASTA.fa`: A list of ordered and orientated contigs in a multi-fasta format. - * `*.abacas.tab`: Feature file - * `*.unused_contigs.out`: Information on contigs that have a mapping information but could not be used in the ordering. -* `assembly//abacas/nucmer/`: Folder containing the files generated by the NUCmer algorithm used by ABACAS. - -**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). - -
- -[ABACAS](https://www.sanger.ac.uk/science/tools/pagit) was developed to rapidly contiguate (align, order, orientate), visualize and design primers to close gaps on shotgun assembled contigs based on a reference sequence. - -### PlasmidID - -
-Output files - -* `assembly//plasmidid//` - * `*_final_results.html`: Summary file with reference coverage stats and contigs for visualization. - * `*_final_results.tab`: Summary file with reference coverage stats and contigs. - * `images/_.png`: PNG file with the visualization of the alignment between the viral assembly and the reference viral genome. - * `logs/`: Log files. - -**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). - -
- -[PlasmidID](https://github.com/BU-ISCIII/plasmidID) was used to graphically represent the alignment of the reference genome relative to a given assembly. This helps to visualize the coverage of the reference genome in the assembly. To find more information about the output files refer to the [documentation](https://github.com/BU-ISCIII/plasmidID/wiki/Understanding-the-image:-track-by-track). - -### Assembly QUAST - -
-Output files - -* `assembly//quast/` - * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. - -**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). - -
- -[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the *de novo* assemblies across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. - -![MultiQC - QUAST contig counts](images/mqc_quast_plot.png) - -## Illumina: Workflow reporting and genomes - -### MultiQC - -
-Output files - -* `multiqc/` - * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - * `summary_variants_metrics_mqc.csv`: file containing a selection of read alignment and variant calling metrics. The same metrics will also be added to the top of the MultiQC report. - * `summary_assembly_metrics_mqc.csv`: file containing a selection of read alignment and *de novo* assembly related metrics. The same metrics will also be added to the top of the MultiQC report. - -
- -[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarizing all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. - -Results generated by MultiQC collate pipeline QC from FastQC, fastp, Cutadapt, Bowtie 2, Kraken 2, samtools, picard CollectMultipleMetrics, BCFTools, SnpEff and QUAST. - -The default [`multiqc config file`](https://github.com/nf-core/viralrecon/blob/master/assets/multiqc_config_illumina.yaml) has been written in a way in which to structure these QC metrics to make them more interpretable in the final report. - -The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . - -An example MultiQC report generated from a full-sized dataset can be viewed on the [nf-core website](https://nf-co.re/viralrecon/results). - -### Reference genome files - -
-Output files - -* `genome/` - * Unzipped genome fasta file for viral genome - * Unzipped genome annotation GFF file for viral genome -* `genome/index/` - * `bowtie2/`: Bowtie 2 index for viral genome. -* `genome/db/` - * `blast_db/`: BLAST database for viral genome. - * `kraken2_db/`: Kraken 2 database for host genome. - * `snpeff_db/`: SnpEff database for viral genome. - * `snpeff.config`: SnpEff config file for viral genome. - -
- -A number of genome-specific files are generated by the pipeline because they are required for the downstream processing of the results. If the `--save_reference` parameter is provided then the Bowtie 2 alignment indices, BLAST and Kraken 2 databases downloaded/generated by the pipeline will be saved in the `genome/` directory. It is recommended to use the `--save_reference` parameter if you are using the pipeline to build a Kraken 2 database for the host genome. This can be quite a time-consuming process and it permits their reuse for future runs of the pipeline or for other purposes. - -# Pipeline information - -
-Output files - -* `pipeline_info/` - * Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - * Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.csv`. - * Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. - * Documentation for interpretation of results in HTML format: `results_description.html`. - -
- -[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. + +# Introduction + +This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. + +The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. + +# Nanopore: Pipeline overview + +* [Preprocessing](#nanopore-preprocessing) + * [pycoQC](#nanopore-pycoqc) - Sequencing QC + * [artic guppyplex](#nanopore-artic-guppyplex) - Aggregate pre-demultiplexed reads from MinKNOW/Guppy + * [NanoPlot](#nanopore-nanoplot) - Read QC +* [Variant calling](#nanopore-variant-calling) + * [artic minion](#nanopore-artic-minion) - Align reads, call variants and generate consensus sequence +* [Downstream analysis](#nanopore-downstream-analysis) + * [SAMtools](#nanopore-samtools) - Remove unmapped reads and obtain alignment metrics + * [mosdepth](#nanopore-mosdepth) - Genome-wide and amplicon coverage QC plots + * [BCFTools](#nanopore-bcftools) - Variant count metrics + * [SnpEff and SnpSift](#nanopore-snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction + * [QUAST](#nanopore-quast) - Consensus assessment report + * [Pangolin](#nanopore-pangolin) - Lineage analysis + * [Nextclade](#nanopore-nextclade) - Clade assignment, mutation calling and sequence quality checks + * [ASCIIGenome](#nanopore-asciigenome) - Individual variant screenshots with annotation tracks +* [Workflow reporting](#nanopore-workflow-reporting) + * [MultiQC](#nanopore-multiqc) - Present QC, visualisation and custom reporting for sequencing, raw reads, alignment and variant calling results + +## Nanopore: Preprocessing + +A file called `summary_variants_metrics_mqc.csv` containing a selection of read alignment and variant calling metrics will be saved in the `multiqc//` output directory which is determined by the `--artic_minion_caller` parameter (Default: `nanopolish/`). The same metrics will also be added to the top of the MultiQC report. + +### Nanopore: pycoQC + +
+Output files + +* `pycoqc/` + * `*.html` and `.json` file that includes a run summary and graphical representation of various QC metrics including distribution of read length, distribution of read quality scores, mean read quality per sequence length, output per channel over experiment time and percentage of reads per barcode. + +
+ +[PycoQC](https://github.com/a-slide/pycoQC) compute metrics and generate QC plots using the sequencing summary information generated by basecalling/demultiplexing tools such as Guppy e.g. distribution of read length, read length over time, number of reads per barcode and other general stats. + +

+ PycoQC - Number of reads per barcode +

+ +### Nanopore: artic guppyplex + +
+Output files + +* `guppyplex/` + * `*.fastq.gz` files generated by aggregate pre-demultiplexed reads from MinKNOW/Guppy. These files are not saved by default but can be via a custom config file such as the one below. + +```nextflow +params { + modules { + 'nanopore_artic_guppyplex' { + publish_files = ['fastq.gz':''] + } + } +} +``` + +
+ +The [artic guppyplex](https://artic.readthedocs.io/en/latest/commands/) tool from the [ARTIC field bioinformatics pipeline](https://github.com/artic-network/fieldbioinformatics) is used to perform length filtering of the demultiplexed Nanopore reads obtained per barcode. This essentially filters out chimeric reads that may be generated by the ARTIC protocol. The pipeline uses a default minimum and maximum read length of 400 and 700, respectively as tailored for the [nCoV-2019 primer set](https://artic.network/ncov-2019/ncov2019-bioinformatics-sop.html). However, you may need to adjust these for different primer schemes e.g. by using the minimum length of the amplicons (`--min-length`) as well as the maximum length plus 200 (`--max-length`). + +### Nanopore: NanoPlot + +
+Output files + +* `nanoplot//` + * Per-sample `*.html` files for QC metrics and individual `*.png` image files for plots. + +
+ +[NanoPlot](https://github.com/wdecoster/NanoPlot) it a tool that can be used to produce general quality metrics from various Nanopore-based input files including fastq files e.g. quality score distribution, read lengths and other general stats. + +

+ Nanoplot - Read quality vs read length +

+ +## Nanopore: Variant calling + +### Nanopore: artic minion + +
+Output files + +* `/` + * `*.consensus.fasta`: Consensus fasta file generated by artic minion. + * `*.pass.vcf.gz`: VCF file containing variants passing quality filters. + * `*.pass.vcf.gz.tbi`: VCF index file containing variants passing quality filters. + * `*.primers.vcf`: VCF file containing variants found in primer-binding regions. + * `*.merged.vcf`: VCF file containing all detected variants. + * `*.fail.vcf`: VCF file containing variants failing quality filters. + * `*.sorted.bam`: BAM file generated by initial alignment. + * `*.sorted.bam.bai`: BAM index file generated by initial alignment. + * `*.trimmed.rg.sorted.bam`: BAM file without primer-binding site trimming. + * `*.trimmed.rg.sorted.bam.bai`: BAM index file without primer-binding site trimming. + * `*.primertrimmed.rg.sorted.bam`: BAM file generated after primer-binding site trimming. + * `*.primertrimmed.rg.sorted.bam.bai`: BAM index file generated after primer-binding site trimming. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +The [artic minion](https://artic.readthedocs.io/en/latest/commands/) tool from the [ARTIC field bioinformatics pipeline](https://github.com/artic-network/fieldbioinformatics) is used to align reads, call variants and to generate the consensus sequence. By default, artic minion uses [Minimap2](https://github.com/lh3/minimap2) to align the reads to the viral genome, however you can use [BWA](https://github.com/lh3/bwa) instead using the `--artic_minion_aligner bwa` parameter. Similarly, the default variant caller used by artic minion is [Nanopolish](https://github.com/jts/nanopolish), however, you can use [Medaka](https://github.com/nanoporetech/medaka) instead via the `--artic_minion_caller medaka` parameter. Medaka is faster than Nanopolish, performs mostly the same and can be run directly from `fastq` input files as opposed to requiring the `fastq`, `fast5` and `sequencing_summary.txt` files required to run Nanopolish. You must provide the appropriate [Medaka model](https://github.com/nanoporetech/medaka#models) via the `--artic_minion_medaka_model` parameter if using `--artic_minion_caller medaka`. + +## Nanopore: Downstream analysis + +### Nanopore: SAMtools + +
+Output files + +* `/` + * `*.mapped.sorted.bam`: Coordinate sorted BAM file containing read alignment information. + * `*.mapped.sorted.bam.bai`: Index file for coordinate sorted BAM file. +* `/samtools_stats/` + * SAMtools `*.mapped.sorted.bam.flagstat`, `*.mapped.sorted.bam.idxstats` and `*.mapped.sorted.bam.stats` files generated from the alignment files. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +BAM files containing the original alignments from either Minimap2 or BWA are further processed with [SAMtools](http://samtools.sourceforge.net/) to remove unmapped reads as well as to generate read mapping statistics. + +![MultiQC - SAMtools alignment scores plot](images/mqc_samtools_stats_plot.png) + +### Nanopore: mosdepth + +
+Output files + +* `/mosdepth/genome/` + * `all_samples.mosdepth.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. + * `*.mosdepth.coverage.pdf`: Whole-genome coverage plot. + * `*.mosdepth.coverage.tsv`: File containing coverage values for the above plot. + * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. +* `/mosdepth/amplicon/` + * `all_samples.mosdepth.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. + * `all_samples.mosdepth.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. + * `*.mosdepth.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. + * `*.mosdepth.coverage.tsv`: File containing per-amplicon coverage values for the above plot. + * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +[mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts. + +![R - Samples amplicon coverage heatmap ](images/r_amplicon_heatmap.png) + +![R - Sample genome-wide coverage plot](images/r_genome_coverage.png) + +

+ R - Sample per-amplicon coverage plot +

+ +### Nanopore: BCFTools + +
+Output files + +* `/bcftools_stats/` + * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +[BCFtools](http://samtools.github.io/bcftools/bcftools.html) is a set of utilities that manipulate variant calls in [VCF](https://vcftools.github.io/specs.html) and its binary counterpart BCF format. It can also used be used to generate statistics and counts obtained from VCF files as used here. + +![MultiQC - BCFTools variant counts](images/mqc_bcftools_stats_plot.png) + +### Nanopore: SnpEff and SnpSift + +
+Output files + +* `/snpeff/` + * `*.snpeff.csv`: Variant annotation csv file. + * `*.snpeff.genes.txt`: Gene table for annotated variants. + * `*.snpeff.summary.html`: Summary html file for variants. + * `*.snpeff.vcf.gz`: VCF file with variant annotations. + * `*.snpeff.vcf.gz.tbi`: Index for VCF file with variant annotations. + * `*.snpsift.txt`: SnpSift summary table. +* `/snpeff/bcftools_stats/` + * `*.snpeff.bcftools_stats.txt`: Statistics and counts obtained from SnpEff VCF file. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +[SnpEff](http://snpeff.sourceforge.net/SnpEff.html) is a genetic variant annotation and functional effect prediction toolbox. It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). + +[SnpSift](http://snpeff.sourceforge.net/SnpSift.html) annotates genomic variants using databases, filters, and manipulates genomic annotated variants. After annotation with SnpEff, you can use SnpSift to help filter large genomic datasets in order to find the most significant variants. + +![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) + +### Nanopore: QUAST + +
+Output files + +* `/quast/` + * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the consensus sequence across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. + +### Nanopore: Pangolin + +
+Output files + +* `/pangolin/` + * `*.pangolin.csv`: Lineage analysis results from Pangolin. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://github.com/cov-lineages/pangolin)) has been used extensively during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. A [web application](https://pangolin.cog-uk.io/) also exists that allows users to upload genome sequences via a web browser to assign lineages to genome sequences of SARS-CoV-2, view descriptive characteristics of the assigned lineage(s), view the placement of the lineage in a phylogeny of global samples, and view the temporal and geographic distribution of the assigned lineage(s). + +### Nanopore: Nextclade + +
+Output files + +* `/nextclade/` + * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +[Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. + +### Nanopore: ASCIIGenome + +
+Output files + +* `/asciigenome//` + * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) is a command-line genome browser that can be run from a terminal window and is solely based on ASCII characters. The closest program to ASCIIGenome is probably [samtools tview](http://www.htslib.org/doc/samtools-tview.html) but ASCIIGenome offers much more flexibility, similar to popular GUI viewers like the [IGV](https://software.broadinstitute.org/software/igv/) browser. We are using the batch processing mode of ASCIIGenome in this pipeline to generate individual screenshots for all of the variant sites reported for each sample in the VCF files. This is incredibly useful to be able to quickly QC the variants called by the pipeline without having to tediously load all of the relevant tracks into a conventional genome browser. Where possible, the BAM read alignments, VCF variant file, primer BED file and GFF annotation track will be represented in the screenshot for contextual purposes. The screenshot below shows a SNP called relative to the MN908947.3 SARS-CoV-2 reference genome that overlaps the ORF7a protein and the nCoV-2019_91_LEFT primer from the ARIC v3 protocol. + +

+ ASCIIGenome screenshot +

+ +## Nanopore: Workflow reporting + +### Nanopore: MultiQC + +
+Output files + +* `multiqc//` + * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + * `summary_variants_metrics_mqc.csv`: file containing a selection of read alignmnet and variant calling metrics. The same metrics will also be added to the top of the MultiQC report. + +
+ +[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarizing all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. + +Results generated by MultiQC collate pipeline QC from pycoQC, samtools, mosdepth, BCFTools, SnpEff and QUAST. + +The default [`multiqc config file`](https://github.com/nf-core/viralrecon/blob/master/assets/multiqc_config_nanopore.yaml) has been written in a way in which to structure these QC metrics to make them more interpretable in the final report. + +The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . + +An example MultiQC report generated from a full-sized dataset can be viewed on the [nf-core website](https://nf-co.re/viralrecon/results). + +# Illumina: Pipeline overview + +* [Preprocessing](#illumina-preprocessing) + * [cat](#cat) - Merge re-sequenced FastQ files + * [FastQC](#fastqc) - Raw read QC + * [fastp](#fastp) - Adapter and quality trimming + * [Kraken 2](#kraken-2) - Removal/QC for host reads +* [Variant calling](#illumina-variant-calling) + * [Bowtie 2](#bowtie-2) - Read alignment relative to reference genome + * [SAMtools](#samtools) - Sort, index and generate metrics for alignments + * [iVar trim](#ivar-trim) - Primer sequence removal for amplicon data + * [picard MarkDuplicates](#picard-markduplicates) - Duplicate read marking and removal + * [picard CollectMultipleMetrics](#picard-collectmultiplemetrics) - Alignment metrics + * [mosdepth](#mosdepth) - Whole-genome and amplicon coverage metrics + * [iVar variants and iVar consensus](#ivar-variants-and-ivar-consensus) *||* [BCFTools and BEDTools](#bcftools-and-bedtools) - Variant calling and consensus sequence generation + * [SnpEff and SnpSift](#snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction + * [QUAST](#quast) - Consensus assessment report + * [Pangolin](#pangolin) - Lineage analysis + * [Nextclade](#nextclade) - Clade assignment, mutation calling and sequence quality checks + * [ASCIIGenome](#asciigenome) - Individual variant screenshots with annotation tracks + * [BCFTools isec](#bcftools-isec) - Intersect variants across all callers +* [De novo assembly](#illumina-de-novo-assembly) + * [Cutadapt](#cutadapt) - Primer trimming for amplicon data + * [SPAdes](#spades) *||* [Unicycler](#unicycler) *||* [minia](#minia) - Viral genome assembly + * [BLAST](#blast) - Blast to reference assembly + * [ABACAS](#abacas) - Order contigs according to reference genome + * [PlasmidID](#plasmidid) - Assembly report and visualisation + * [Assembly QUAST](#assembly-quast) - Assembly quality assessment +* [Workflow reporting and genomes](#illumina-workflow-reporting-and-genomes) + * [MultiQC](#multiqc) - Present QC for raw reads, alignment, assembly and variant calling + * [Reference genome files](#reference-genome-files) - Save reference genome indices/files + +## Illumina: Preprocessing + +### cat + +
+Output files + +* `fastq/` + * `*.merged.fastq.gz`: These files are not saved by default but can be via a custom config file such as the one below. + +```nextflow +params { + modules { + 'illumina_cat_fastq' { + publish_files = null + } + } +} +``` + +
+ +If multiple libraries/runs have been provided for the same sample in the input samplesheet (e.g. to increase sequencing depth) then these will be merged at the very beginning of the pipeline in order to have consistent sample naming throughout the pipeline. Please refer to the [usage documentation](https://nf-co.re/viralrecon/usage#illumina-samplesheet-format) to see how to specify these samples in the input samplesheet. + +### FastQC + +
+Output files + +* `fastqc/raw/` + * `*_fastqc.html`: FastQC report containing quality metrics. + * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + +**NB:** The FastQC plots in this directory are generated relative to the raw, input reads. They may contain adapter sequence and regions of low quality. To see how your reads look after trimming please refer to the FastQC reports in the `fastqc/trim/` directory. + +
+ +[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). + +![MultiQC - FastQC per base sequence plot](images/mqc_fastqc_plot.png) + +### fastp + +
+Output files + +* `fastp/` + * `*.fastp.html`: Trimming report in html format. + * `*.fastp.json`: Trimming report in json format. +* `fastp/log/` + * `*.fastp.log`: Trimming log file. +* `fastqc/trim/` + * `*_fastqc.html`: FastQC report of the trimmed reads. + * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + +
+ +[fastp](https://github.com/OpenGene/fastp) is a tool designed to provide fast, all-in-one preprocessing for FastQ files. It has been developed in C++ with multithreading support to achieve higher performance. fastp is used in this pipeline for standard adapter trimming and quality filtering. + +![MultiQC - fastp filtered reads plot](images/mqc_fastp_plot.png) + +### Kraken 2 + +
+Output files + +* `kraken2/` + * `*.kraken2.report.txt`: Kraken 2 taxonomic report. See [here](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual#sample-report-output-format) for a detailed description of the format. + +
+ +[Kraken 2](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual) is a sequence classifier that assigns taxonomic labels to DNA sequences. Kraken 2 examines the k-mers within a query sequence and uses the information within those k-mers to query a database. That database maps k-mers to the lowest common ancestor (LCA) of all genomes known to contain a given k-mer. + +We use a Kraken 2 database in this workflow to filter out reads specific to the host genome before performing the *de novo* assembly steps in the pipeline. This filtering is not performed in the variant calling arm of the pipeline by default but Kraken 2 is still run to obtain an estimate of host reads, however, the filtering can be amended via the `--kraken2_variants_host_filter` parameter. + +![MultiQC - Kraken 2 classification plot](images/mqc_kraken2_plot.png) + +## Illumina: Variant calling + +A file called `summary_variants_metrics_mqc.csv` containing a selection of read alignment and variant calling metrics will be saved in the `multiqc/` results directory. The same metrics will also be added to the top of the MultiQC report. + +### Bowtie 2 + +
+Output files + +* `variants/bowtie2/log/` + * `*.bowtie2.log`: Bowtie 2 mapping log file. + +
+ +[Bowtie 2](http://bio-bwa.sourceforge.net/) is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. Bowtie 2 supports gapped, local, and paired-end alignment modes. + +![MultiQC - Bowtie2 alignment score plot](images/mqc_bowtie2_plot.png) + +### SAMtools + +
+Output files + +* `variants/bowtie2/` + * `.sorted.bam`: Coordinate sorted BAM file containing read alignment information. + * `.sorted.bam.bai`: Index file for coordinate sorted BAM file. +* `variants/bowtie2/samtools_stats/` + * SAMtools `.sorted.bam.flagstat`, `.sorted.bam.idxstats` and `.sorted.bam.stats` files generated from the alignment files. + +
+ +Bowtie 2 BAM files are further processed with [SAMtools](http://samtools.sourceforge.net/) to sort them by coordinate, for indexing, as well as to generate read mapping statistics. + +![MultiQC - SAMtools alignment scores plot](images/mqc_samtools_stats_plot.png) + +### iVar trim + +
+Output files + +* `variants/bowtie2/` + * `*.ivar_trim.sorted.bam`: Coordinate sorted BAM file after primer trimming. + * `*.ivar_trim.sorted.bam.bai`: Index file for coordinate sorted BAM file after primer trimming. +* `variants/bowtie2/samtools_stats/` + * SAMtools `*.ivar_trim.sorted.bam.flagstat`, `*.ivar_trim.sorted.bam.idxstats` and `*.ivar_trim.sorted.bam.stats` files generated from the primer trimmed alignment files. +* `variants/bowtie2/log/` + * `*.ivar_trim.ivar.log`: iVar trim log file obtained from stdout. + +
+ +If the `--protocol amplicon` parameter is provided then [iVar](http://gensoft.pasteur.fr/docs/ivar/1.0/manualpage.html) is used to trim amplicon primer sequences from the aligned reads. iVar uses the primer positions supplied in `--primer_bed` to soft clip primer sequences from a coordinate sorted BAM file. + +### picard MarkDuplicates + +
+Output files + +* `variants/bowtie2/` + * `*.markduplicates.sorted.bam`: Coordinate sorted BAM file after duplicate marking. + * `*.markduplicates.sorted.bam.bai`: Index file for coordinate sorted BAM file after duplicate marking. +* `variants/bowtie2/samtools_stats/` + * SAMtools `*.markduplicates.sorted.bam.flagstat`, `*.markduplicates.sorted.bam.idxstats` and `*.markduplicates.sorted.bam.stats` files generated from the duplicate marked alignment files. +* `variants/bowtie2/picard_metrics/` + * `*.markduplicates.sorted.MarkDuplicates.metrics.txt`: Metrics file from MarkDuplicates. + +
+ +Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-explorer/kits-and-arrays/umi.html) it is not possible to establish whether the fragments you have sequenced from your sample were derived via true biological duplication (i.e. sequencing independent template fragments) or as a result of PCR biases introduced during the library preparation. [picard MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard-) isn't run by default because you anticipate high levels of duplication with viral data due to the size of the genome, however, you can activate it by adding `--skip_markduplicates false` to the command you use to run the pipeline. This will only *mark* the duplicate reads identified amongst the alignments to allow you to guage the overall level of duplication in your samples. You can also choose to remove any reads identified as duplicates via the `--filter_duplicates` parameter. + +![MultiQC - Picard MarkDuplicates metrics plot](images/mqc_picard_duplicates_plot.png) + +### picard CollectMultipleMetrics + +
+Output files + +* `variants/bowtie2/picard_metrics/` + * `*.CollectMultipleMetrics.*`: Alignment QC files from picard CollectMultipleMetrics in `*_metrics` textual format. +* `variants/bowtie2/picard_metrics/pdf/` + * `*.pdf` plots for metrics obtained from CollectMultipleMetrics. + +
+ +[picard-tools](https://broadinstitute.github.io/picard/command-line-overview.html) is a set of command-line tools for manipulating high-throughput sequencing data. We use picard-tools in this pipeline to obtain mapping and coverage metrics. + +![MultiQC - Picard insert size plot](images/mqc_picard_insert_size_plot.png) + +### mosdepth + +
+Output files + +* `variants/bowtie2/mosdepth/genome/` + * `all_samples.mosdepth.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. + * `*.mosdepth.coverage.pdf`: Whole-genome coverage plot. + * `*.mosdepth.coverage.tsv`: File containing coverage values for the above plot. + * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. +* `variants/bowtie2/mosdepth/amplicon/` + * `all_samples.mosdepth.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. + * `all_samples.mosdepth.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. + * `*.mosdepth.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. + * `*.mosdepth.coverage.tsv`: File containing per-amplicon coverage values for the above plot. + * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. + +
+ +[mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and for `--protocol amplicon` to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts. + +![R - Samples amplicon coverage heatmap ](images/r_amplicon_heatmap.png) + +![R - Sample genome-wide coverage plot](images/r_genome_coverage.png) + +

+ R - Sample per-amplicon coverage plot +

+ +### iVar variants and iVar consensus + +
+Output files + +* `variants/ivar/` + * `*.tsv`: Original iVar variants in TSV format. + * `*.vcf.gz`: iVar variants in VCF format. + * `*.vcf.gz.tbi`: iVar variants in VCF index file. +* `variants/ivar/consensus/` + * `*.consensus.fa`: Consensus Fasta file generated by iVar. + * `*.consensus.qual.txt`: File with the average quality of each base in the consensus sequence. +* `variants/ivar/consensus/base_qc/` + * `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. + * `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. + * `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. + * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. + * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. +* `variants/ivar/log/` + * `*.variant_counts.log`: Counts for type of variants called by iVar. +* `variants/ivar/bcftools_stats/` + * `*.bcftools_stats.txt`: Statistics and counts obtained from iVar variants VCF file. + +
+ +[iVar](https://github.com/andersen-lab/ivar/blob/master/docs/MANUAL.md) is a computational package that contains functions broadly useful for viral amplicon-based sequencing. We use iVar in this pipeline to [trim primer sequences](#ivar-trim) for amplicon input data as well as to call variants and for consensus sequence generation. + +![MultiQC - iVar variants called plot](images/mqc_ivar_variants_plot.png) + +### BCFTools and BEDTools + +
+Output files + +* `variants/bcftools/` + * `*.vcf.gz`: Variants VCF file. + * `*.vcf.gz.tbi`: Variants VCF index file. +* `variants/bcftools/consensus/` + * `*.consensus.fa`: Consensus Fasta file generated by integrating the variants called by BCFTools into the reference genome. +* `variants/bcftools/consensus/base_qc/` + * `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. + * `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. + * `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. + * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. + * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. +* `variants/bcftools/bcftools_stats/` + * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. + +
+ +[BCFtools](http://samtools.github.io/bcftools/bcftools.html) can be used to call variants directly from BAM alignment files. The functionality to call variants with BCFTools in this pipeline was inspired by work carried out by [Conor Walker](https://github.com/conorwalker/covid19/blob/3cb26ec399417bedb7e60487415c78a405f517d6/scripts/call_variants.sh). + +[BCFtools](http://samtools.github.io/bcftools/bcftools.html) is a set of utilities that manipulate variant calls in [VCF](https://vcftools.github.io/specs.html) and its binary counterpart BCF format. BCFTools is used in the variant calling and *de novo* assembly steps of this pipeline to obtain basic statistics from the VCF output. It can also used be used to generate a consensus sequence by integrating variant calls into the reference genome. + +[BEDTools](https://bedtools.readthedocs.io/en/latest/) is a swiss-army knife of tools for a wide-range of genomics analysis tasks. In this pipeline we use `bedtools genomecov` to compute the per-base mapped read coverage in bedGraph format, and `bedtools maskfasta` to mask sequences in a Fasta file based on intervals defined in a feature file. This may be useful for creating your own masked genome file based on custom annotations or for masking all but your target regions when aligning sequence data from a targeted capture experiment. + +![MultiQC - BCFTools variant counts](images/mqc_bcftools_stats_plot.png) + +### SnpEff and SnpSift + +
+Output files + +* `variants//snpeff/` + * `*.snpeff.csv`: Variant annotation csv file. + * `*.snpeff.genes.txt`: Gene table for annotated variants. + * `*.snpeff.summary.html`: Summary html file for variants. + * `*.snpeff.vcf.gz`: VCF file with variant annotations. + * `*.snpeff.vcf.gz.tbi`: Index for VCF file with variant annotations. + * `*.snpsift.txt`: SnpSift summary table. +* `variants//snpeff/bcftools_stats/` + * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. + +**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). + +
+ +[SnpEff](http://snpeff.sourceforge.net/SnpEff.html) is a genetic variant annotation and functional effect prediction toolbox. It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). + +[SnpSift](http://snpeff.sourceforge.net/SnpSift.html) annotates genomic variants using databases, filters, and manipulates genomic annotated variants. After annotation with SnpEff, you can use SnpSift to help filter large genomic datasets in order to find the most significant variants. + +![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) + +### QUAST + +
+Output files + +* `variants//quast/` + * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. + +**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). + +
+ +[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the consensus sequence across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. + +### Pangolin + +
+Output files + +* `variants//pangolin/` + * `*.pangolin.csv`: Lineage analysis results from Pangolin. + +**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). + +
+ +Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://github.com/cov-lineages/pangolin)) has been used extensively during the COVID-19 pandemic in order to to assign lineages to SARS-CoV-2 genome sequenced samples. A [web application](https://pangolin.cog-uk.io/) also exists that allows users to upload genome sequences via a web browser to assign lineages to genome sequences of SARS-CoV-2, view descriptive characteristics of the assigned lineage(s), view the placement of the lineage in a phylogeny of global samples, and view the temporal and geographic distribution of the assigned lineage(s). + +### Nextclade + +
+Output files + +* `variants//nextclade/` + * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. + +**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). + +
+ +[Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. + +### ASCIIGenome + +
+Output files + +* `variants//asciigenome//` + * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. + +**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). + +
+ +As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) is a command-line genome browser that can be run from a terminal window and is solely based on ASCII characters. The closest program to ASCIIGenome is probably [samtools tview](http://www.htslib.org/doc/samtools-tview.html) but ASCIIGenome offers much more flexibility, similar to popular GUI viewers like the [IGV](https://software.broadinstitute.org/software/igv/) browser. We are using the batch processing mode of ASCIIGenome in this pipeline to generate individual screenshots for all of the variant sites reported for each sample in the VCF files. This is incredibly useful to be able to quickly QC the variants called by the pipeline without having to tediously load all of the relevant tracks into a conventional genome browser. Where possible, the BAM read alignments, VCF variant file, primer BED file and GFF annotation track will be represented in the screenshot for contextual purposes. The screenshot below shows a SNP called relative to the MN908947.3 SARS-CoV-2 reference genome that overlaps the ORF7a protein and the nCoV-2019_91_LEFT primer from the ARIC v3 protocol. + +

+ ASCIIGenome screenshot +

+ +### BCFTools isec + +
+Output files + +* `variants/intersect//` + * `*.vcf.gz`: VCF file containing variants common to both variant callers. There will be one file for each caller - see `README.txt` for details. + * `*.vcf.gz.tbi`: Index for VCF file. + * `README.txt`: File containing command used and file name mappings. + * `sites.txt`: List of variants common to both variant callers in textual format. The last column indicates presence (1) or absence (0) amongst the 2 different callers. + +**NB:** This process will only be executed when both variant callers are specified to be run i.e. `--callers ivar,bcftools`. + +
+ +[BCFTools isec](http://samtools.github.io/bcftools/bcftools.html#isec) can be used to intersect the variant calls generated by the 2 different callers used in the pipeline. This permits a quick assessment of how consistently a particular variant is being called using different algorithms and to prioritise the investigation of the variants. + +## Illumina: De novo assembly + +A file called `summary_assembly_metrics_mqc.csv` containing a selection of read alignment and *de novo* assembly related metrics will be saved in the `multiqc/` results directory. The same metrics will also be added to the top of the MultiQC report. + +### Cutadapt + +
+Output files + +* `assembly/cutadapt/log/` + * `*.cutadapt.log`: Cutadapt log file generated from stdout. +* `assembly/cutadapt/fastqc/` + * `*_fastqc.html`: FastQC report of the trimmed reads. + * `*_fastqc.zip`: Zip archive containing the FastQC report. + +
+ +In the variant calling branch of the pipeline we are using [iVar trim](#ivar-trim) to remove primer sequences from the aligned BAM files for amplicon data. Since in the *de novo* assembly branch we don't align the reads, we use [Cutadapt](https://cutadapt.readthedocs.io/en/stable/guide.html) as an alternative option to remove and clean the primer sequences directly from FastQ files. + +![MultiQC - Cutadapt filtered reads plot](images/mqc_cutadapt_plot.png) + +### SPAdes + +
+Output files + +* `assembly/spades//` + * `*.scaffolds.fa`: SPAdes scaffold assembly. + * `*.contigs.fa`: SPAdes assembly contigs. + * `*.assembly.gfa`: SPAdes assembly graph in [GFA](https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md) format. +* `assembly/spades//bandage/` + * `*.png`: Bandage visualisation for SPAdes assembly graph in PNG format. + * `*.svg`: Bandage visualisation for SPAdes assembly graph in SVG format. + +**NB:** The value of `` in the output directory name above is determined by the `--spades_mode` parameter (Default: 'rnaviral'). + +
+ +[SPAdes](http://cab.spbu.ru/software/spades/) is an assembly toolkit containing various assembly pipelines. Generically speaking, SPAdes is one of the most popular de Bruijn graph-based assembly algorithms used for bacterial/viral genome reconstruction. + +[Bandage](https://rrwick.github.io/Bandage/) is a program for visualising *de novo* assembly graphs. By displaying connections which are not present in the contigs file, Bandage opens up new possibilities for analysing *de novo* assemblies. + +### Unicycler + +
+Output files + +* `assembly/unicycler/` + * `*.scaffolds.fa`: Unicycler scaffold assembly. + * `*.assembly.gfa`: Unicycler assembly graph in GFA format. +* `assembly/unicycler/bandage/` + * `*.png`: Bandage visualisation for Unicycler assembly graph in PNG format. + * `*.svg`: Bandage visualisation for Unicycler assembly graph in SVG format. + +
+ +[Unicycler](https://github.com/rrwick/Unicycler) is an assembly pipeline for bacterial genomes. It can assemble Illumina-only read sets where it functions as a SPAdes-optimiser. + +### minia + +
+Output files + +* `assembly/minia/` + * `*.contigs.fa`: Minia scaffold assembly. + * `*.unitigs.fa`: Minia unitigs fasta file. + * `*.h5`: Minia h5 output file. + +
+ +[Minia](https://github.com/GATB/minia) is a short-read assembler based on a de Bruijn graph, capable of assembling a human genome on a desktop computer in a day. The output of Minia is a set of contigs. Minia produces results of similar contiguity and accuracy to other de Bruijn assemblers. + +### BLAST + +
+Output files + +* `assembly//blastn/` + * `*.blastn.txt`: BLAST results against the target virus. + * `*.filter.blastn.txt`: Filtered BLAST results. + +**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). + +
+ +[blastn](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch) is used to align the assembled contigs against the virus reference genome. + +### ABACAS + +
+Output files + +* `assembly//abacas/` + * `*.abacas.bin`: Bin file that contains contigs that are not used in ordering. + * `*.abacas.crunch`: Comparison file. + * `*.abacas.fasta`: Ordered and orientated sequence file. + * `*.abacas.gaps`: Gap information. + * `*.abacas.gaps.tab`: Gap information in tab-delimited format. + * `*.abacas.MULTIFASTA.fa`: A list of ordered and orientated contigs in a multi-fasta format. + * `*.abacas.tab`: Feature file + * `*.unused_contigs.out`: Information on contigs that have a mapping information but could not be used in the ordering. +* `assembly//abacas/nucmer/`: Folder containing the files generated by the NUCmer algorithm used by ABACAS. + +**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). + +
+ +[ABACAS](https://www.sanger.ac.uk/science/tools/pagit) was developed to rapidly contiguate (align, order, orientate), visualize and design primers to close gaps on shotgun assembled contigs based on a reference sequence. + +### PlasmidID + +
+Output files + +* `assembly//plasmidid//` + * `*_final_results.html`: Summary file with reference coverage stats and contigs for visualization. + * `*_final_results.tab`: Summary file with reference coverage stats and contigs. + * `images/_.png`: PNG file with the visualization of the alignment between the viral assembly and the reference viral genome. + * `logs/`: Log files. + +**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). + +
+ +[PlasmidID](https://github.com/BU-ISCIII/plasmidID) was used to graphically represent the alignment of the reference genome relative to a given assembly. This helps to visualize the coverage of the reference genome in the assembly. To find more information about the output files refer to the [documentation](https://github.com/BU-ISCIII/plasmidID/wiki/Understanding-the-image:-track-by-track). + +### Assembly QUAST + +
+Output files + +* `assembly//quast/` + * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. + +**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). + +
+ +[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the *de novo* assemblies across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. + +![MultiQC - QUAST contig counts](images/mqc_quast_plot.png) + +## Illumina: Workflow reporting and genomes + +### MultiQC + +
+Output files + +* `multiqc/` + * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + * `summary_variants_metrics_mqc.csv`: file containing a selection of read alignment and variant calling metrics. The same metrics will also be added to the top of the MultiQC report. + * `summary_assembly_metrics_mqc.csv`: file containing a selection of read alignment and *de novo* assembly related metrics. The same metrics will also be added to the top of the MultiQC report. + +
+ +[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarizing all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. + +Results generated by MultiQC collate pipeline QC from FastQC, fastp, Cutadapt, Bowtie 2, Kraken 2, samtools, picard CollectMultipleMetrics, BCFTools, SnpEff and QUAST. + +The default [`multiqc config file`](https://github.com/nf-core/viralrecon/blob/master/assets/multiqc_config_illumina.yaml) has been written in a way in which to structure these QC metrics to make them more interpretable in the final report. + +The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . + +An example MultiQC report generated from a full-sized dataset can be viewed on the [nf-core website](https://nf-co.re/viralrecon/results). + +### Reference genome files + +
+Output files + +* `genome/` + * Unzipped genome fasta file for viral genome + * Unzipped genome annotation GFF file for viral genome +* `genome/index/` + * `bowtie2/`: Bowtie 2 index for viral genome. +* `genome/db/` + * `blast_db/`: BLAST database for viral genome. + * `kraken2_db/`: Kraken 2 database for host genome. + * `snpeff_db/`: SnpEff database for viral genome. + * `snpeff.config`: SnpEff config file for viral genome. + +
+ +A number of genome-specific files are generated by the pipeline because they are required for the downstream processing of the results. If the `--save_reference` parameter is provided then the Bowtie 2 alignment indices, BLAST and Kraken 2 databases downloaded/generated by the pipeline will be saved in the `genome/` directory. It is recommended to use the `--save_reference` parameter if you are using the pipeline to build a Kraken 2 database for the host genome. This can be quite a time-consuming process and it permits their reuse for future runs of the pipeline or for other purposes. + +# Pipeline information + +
+Output files + +* `pipeline_info/` + * Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + * Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.csv`. + * Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + * Documentation for interpretation of results in HTML format: `results_description.html`. + +
+ +[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/lib/Utils.groovy b/lib/Utils.groovy index 119855f6..a6e8a8fe 100755 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -1,47 +1,47 @@ -/* - * This file holds several Groovy functions that could be useful for any Nextflow pipeline - */ - -import org.yaml.snakeyaml.Yaml - -class Utils { - - /* - * When running with -profile conda, warn if channels have not been set-up appropriately - */ - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - def required_channels = ['conda-forge', 'bioconda', 'defaults'] - def conda_check_failed = !required_channels.every { ch -> ch in channels } - - // Check that they are in the right order - conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) - conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) - - if (conda_check_failed) { - log.warn "=============================================================================\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + - " NB: The order of the channels matters!\n" + - "===================================================================================" - } - } - - /* - * Join module args with appropriate spacing - */ - public static String joinModuleArgs(args_list) { - return ' ' + args_list.join(' ') - } -} +/* + * This file holds several Groovy functions that could be useful for any Nextflow pipeline + */ + +import org.yaml.snakeyaml.Yaml + +class Utils { + + /* + * When running with -profile conda, warn if channels have not been set-up appropriately + */ + public static void checkCondaChannels(log) { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + def required_channels = ['conda-forge', 'bioconda', 'defaults'] + def conda_check_failed = !required_channels.every { ch -> ch in channels } + + // Check that they are in the right order + conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) + conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + + if (conda_check_failed) { + log.warn "=============================================================================\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + + " NB: The order of the channels matters!\n" + + "===================================================================================" + } + } + + /* + * Join module args with appropriate spacing + */ + public static String joinModuleArgs(args_list) { + return ' ' + args_list.join(' ') + } +} diff --git a/modules/local/asciigenome.nf b/modules/local/asciigenome.nf index e8252ffd..d9c6acb3 100644 --- a/modules/local/asciigenome.nf +++ b/modules/local/asciigenome.nf @@ -1,58 +1,58 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process ASCIIGENOME { - tag "$meta.id" - label 'process_medium' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - - conda (params.enable_conda ? "bioconda::asciigenome=1.16.0" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/asciigenome:1.16.0--0" - } else { - container "quay.io/biocontainers/asciigenome:1.16.0--0" - } - - input: - tuple val(meta), path(bam), path(vcf) - path fasta - path gff - path bed - val window - val track_height - - output: - tuple val(meta), path("*pdf"), emit: pdf - path "*.version.txt" , emit: version - - script: - def software = getSoftwareName(task.process) - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - def gff_track = gff ? "$gff" : '' - def bed_track = bed ? "$bed" : '' - def paired_end = meta.single_end ? '' : '&& readsAsPairs -on' - """ - zcat $vcf \\ - | grep -v '#' \\ - | awk -v FS='\t' -v OFS='\t' '{print \$1, (\$2-$window-1), (\$2+$window)}' \\ - > variants.bed - - ASCIIGenome \\ - -ni \\ - -x "trackHeight 0 bam#1 && trackHeight $track_height bam@2 $paired_end && filterVariantReads && save ${prefix}.%r.pdf" \\ - --batchFile variants.bed \\ - --fasta $fasta \\ - $bam \\ - $vcf \\ - $bed_track \\ - $gff_track \\ - > /dev/null - - echo \$(ASCIIGenome -ni --version 2>&1) | sed -e "s/ASCIIGenome //g" > ${software}.version.txt - """ -} +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process ASCIIGENOME { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::asciigenome=1.16.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/asciigenome:1.16.0--0" + } else { + container "quay.io/biocontainers/asciigenome:1.16.0--0" + } + + input: + tuple val(meta), path(bam), path(vcf) + path fasta + path gff + path bed + val window + val track_height + + output: + tuple val(meta), path("*pdf"), emit: pdf + path "*.version.txt" , emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def gff_track = gff ? "$gff" : '' + def bed_track = bed ? "$bed" : '' + def paired_end = meta.single_end ? '' : '&& readsAsPairs -on' + """ + zcat $vcf \\ + | grep -v '#' \\ + | awk -v FS='\t' -v OFS='\t' '{print \$1, (\$2-$window-1), (\$2+$window)}' \\ + > variants.bed + + ASCIIGenome \\ + -ni \\ + -x "trackHeight 0 bam#1 && trackHeight $track_height bam@2 $paired_end && filterVariantReads && save ${prefix}.%r.pdf" \\ + --batchFile variants.bed \\ + --fasta $fasta \\ + $bam \\ + $vcf \\ + $bed_track \\ + $gff_track \\ + > /dev/null + + echo \$(ASCIIGenome -ni --version 2>&1) | sed -e "s/ASCIIGenome //g" > ${software}.version.txt + """ +} diff --git a/modules/local/collapse_primers.nf b/modules/local/collapse_primers.nf index 8b5cbed7..2b81410f 100644 --- a/modules/local/collapse_primers.nf +++ b/modules/local/collapse_primers.nf @@ -1,36 +1,36 @@ -// Import generic module functions -include { saveFiles } from './functions' - -params.options = [:] - -process COLLAPSE_PRIMERS { - tag "$bed" - label 'process_medium' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'primers', meta:[:], publish_by_meta:[]) } - - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/python:3.8.3" - } else { - container "quay.io/biocontainers/python:3.8.3" - } - - input: - path bed - val left_suffix - val right_suffix - - output: - path '*.bed', emit: bed - - script: - """ - collapse_primer_bed.py \\ - --left_primer_suffix $left_suffix \\ - --right_primer_suffix $right_suffix \\ - $bed \\ - ${bed.baseName}.collapsed.bed - """ -} +// Import generic module functions +include { saveFiles } from './functions' + +params.options = [:] + +process COLLAPSE_PRIMERS { + tag "$bed" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'primers', meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/python:3.8.3" + } else { + container "quay.io/biocontainers/python:3.8.3" + } + + input: + path bed + val left_suffix + val right_suffix + + output: + path '*.bed', emit: bed + + script: + """ + collapse_primer_bed.py \\ + --left_primer_suffix $left_suffix \\ + --right_primer_suffix $right_suffix \\ + $bed \\ + ${bed.baseName}.collapsed.bed + """ +} diff --git a/modules/local/cutadapt.nf b/modules/local/cutadapt.nf index f51e2dc9..7d75385b 100644 --- a/modules/local/cutadapt.nf +++ b/modules/local/cutadapt.nf @@ -1,48 +1,48 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process CUTADAPT { - tag "$meta.id" - label 'process_medium' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - - conda (params.enable_conda ? 'bioconda::cutadapt=3.2' : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container 'https://depot.galaxyproject.org/singularity/cutadapt:3.2--py38h0213d0e_0' - } else { - container 'quay.io/biocontainers/cutadapt:3.2--py38h0213d0e_0' - } - - input: - tuple val(meta), path(reads) - path adapters - - output: - tuple val(meta), path('*.fastq.gz'), emit: reads - tuple val(meta), path('*.log') , emit: log - path '*.version.txt' , emit: version - - script: - def software = getSoftwareName(task.process) - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - def paired = meta.single_end ? "-a file:adapters.sub.fa" : "-a file:adapters.sub.fa -A file:adapters.sub.fa" - def trimmed = meta.single_end ? "-o ${prefix}.fastq.gz" : "-o ${prefix}_1.fastq.gz -p ${prefix}_2.fastq.gz" - """ - sed -r '/^[ACTGactg]+\$/ s/\$/X/g' $adapters > adapters.sub.fa - - cutadapt \\ - --cores $task.cpus \\ - $options.args \\ - $paired \\ - $trimmed \\ - $reads \\ - > ${prefix}.cutadapt.log - - echo \$(cutadapt --version) > ${software}.version.txt - """ -} +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process CUTADAPT { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? 'bioconda::cutadapt=3.2' : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container 'https://depot.galaxyproject.org/singularity/cutadapt:3.2--py38h0213d0e_0' + } else { + container 'quay.io/biocontainers/cutadapt:3.2--py38h0213d0e_0' + } + + input: + tuple val(meta), path(reads) + path adapters + + output: + tuple val(meta), path('*.fastq.gz'), emit: reads + tuple val(meta), path('*.log') , emit: log + path '*.version.txt' , emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def paired = meta.single_end ? "-a file:adapters.sub.fa" : "-a file:adapters.sub.fa -A file:adapters.sub.fa" + def trimmed = meta.single_end ? "-o ${prefix}.fastq.gz" : "-o ${prefix}_1.fastq.gz -p ${prefix}_2.fastq.gz" + """ + sed -r '/^[ACTGactg]+\$/ s/\$/X/g' $adapters > adapters.sub.fa + + cutadapt \\ + --cores $task.cpus \\ + $options.args \\ + $paired \\ + $trimmed \\ + $reads \\ + > ${prefix}.cutadapt.log + + echo \$(cutadapt --version) > ${software}.version.txt + """ +} diff --git a/modules/local/kraken2_build.nf b/modules/local/kraken2_build.nf index 229fab7b..ae7bc2f6 100644 --- a/modules/local/kraken2_build.nf +++ b/modules/local/kraken2_build.nf @@ -1,36 +1,36 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process KRAKEN2_BUILD { - label 'process_high' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } - - conda (params.enable_conda ? 'bioconda::kraken2=2.1.1' : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container 'https://depot.galaxyproject.org/singularity/kraken2:2.1.1--pl526hc9558a2_0' - } else { - container 'quay.io/biocontainers/kraken2:2.1.1--pl526hc9558a2_0' - } - - input: - val library - - output: - path 'kraken2_db' , emit: db - path '*.version.txt', emit: version - - script: - def software = getSoftwareName(task.process) - """ - kraken2-build --db kraken2_db --threads $task.cpus $options.args --download-taxonomy - kraken2-build --db kraken2_db --threads $task.cpus $options.args2 --download-library $library - kraken2-build --db kraken2_db --threads $task.cpus $options.args3 --build - - echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//' > ${software}.version.txt - """ -} +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process KRAKEN2_BUILD { + label 'process_high' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? 'bioconda::kraken2=2.1.1' : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container 'https://depot.galaxyproject.org/singularity/kraken2:2.1.1--pl526hc9558a2_0' + } else { + container 'quay.io/biocontainers/kraken2:2.1.1--pl526hc9558a2_0' + } + + input: + val library + + output: + path 'kraken2_db' , emit: db + path '*.version.txt', emit: version + + script: + def software = getSoftwareName(task.process) + """ + kraken2-build --db kraken2_db --threads $task.cpus $options.args --download-taxonomy + kraken2-build --db kraken2_db --threads $task.cpus $options.args2 --download-library $library + kraken2-build --db kraken2_db --threads $task.cpus $options.args3 --build + + echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//' > ${software}.version.txt + """ +} diff --git a/modules/local/multiqc_nanopore.nf b/modules/local/multiqc_nanopore.nf index a6072bf3..c4421d27 100644 --- a/modules/local/multiqc_nanopore.nf +++ b/modules/local/multiqc_nanopore.nf @@ -50,4 +50,3 @@ process MULTIQC { multiqc -f $options.args -e general_stats --ignore *pangolin_lineage_mqc.tsv $custom_config . """ } - diff --git a/modules/local/plot_base_density.nf b/modules/local/plot_base_density.nf index a7987000..d5ddfce1 100644 --- a/modules/local/plot_base_density.nf +++ b/modules/local/plot_base_density.nf @@ -1,36 +1,36 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process PLOT_BASE_DENSITY { - tag "$fasta" - label 'process_medium' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'plots', meta:meta, publish_by_meta:['id']) } - - conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0" - } else { - container "quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0" - } - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path('*.pdf'), emit: pdf - tuple val(meta), path('*.tsv'), emit: tsv - - script: - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - """ - plot_base_density.r \\ - --fasta_files $fasta \\ - --prefixes $prefix \\ - --output_dir ./ - """ -} +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process PLOT_BASE_DENSITY { + tag "$fasta" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'plots', meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0" + } else { + container "quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0" + } + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path('*.pdf'), emit: pdf + tuple val(meta), path('*.tsv'), emit: tsv + + script: + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + """ + plot_base_density.r \\ + --fasta_files $fasta \\ + --prefixes $prefix \\ + --output_dir ./ + """ +} diff --git a/modules/local/plot_mosdepth_regions.nf b/modules/local/plot_mosdepth_regions.nf index f6daf3d3..2af85c52 100644 --- a/modules/local/plot_mosdepth_regions.nf +++ b/modules/local/plot_mosdepth_regions.nf @@ -1,36 +1,36 @@ -// Import generic module functions -include { initOptions; saveFiles } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process PLOT_MOSDEPTH_REGIONS { - label 'process_medium' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'mosdepth', meta:[:], publish_by_meta:[]) } - - conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0" - } else { - container "quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0" - } - - input: - path beds - - output: - path '*.pdf', emit: pdf - path '*.tsv', emit: tsv - - script: - def prefix = options.suffix ?: "mosdepth" - """ - plot_mosdepth_regions.r \\ - --input_files ${beds.join(',')} \\ - --output_dir ./ \\ - --output_suffix $prefix \\ - $options.args - """ -} +// Import generic module functions +include { initOptions; saveFiles } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process PLOT_MOSDEPTH_REGIONS { + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'mosdepth', meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0" + } else { + container "quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0" + } + + input: + path beds + + output: + path '*.pdf', emit: pdf + path '*.tsv', emit: tsv + + script: + def prefix = options.suffix ?: "mosdepth" + """ + plot_mosdepth_regions.r \\ + --input_files ${beds.join(',')} \\ + --output_dir ./ \\ + --output_suffix $prefix \\ + $options.args + """ +} diff --git a/modules/local/snpeff_ann.nf b/modules/local/snpeff_ann.nf index 275cfe79..19ee442a 100644 --- a/modules/local/snpeff_ann.nf +++ b/modules/local/snpeff_ann.nf @@ -1,49 +1,49 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process SNPEFF_ANN { - tag "$meta.id" - label 'process_medium' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - - conda (params.enable_conda ? 'bioconda::snpeff=5.0' : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container 'https://depot.galaxyproject.org/singularity/snpeff:5.0--0' - } else { - container 'quay.io/biocontainers/snpeff:5.0--0' - } - - input: - tuple val(meta), path(vcf) - path db - path config - path fasta - - output: - tuple val(meta), path("*.vcf") , emit: vcf - tuple val(meta), path("*.csv") , emit: csv - tuple val(meta), path("*.genes.txt"), emit: txt - tuple val(meta), path("*.html") , emit: html - path '*.version.txt' , emit: version - - script: - def software = getSoftwareName(task.process) - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - """ - snpEff ${fasta.baseName} \\ - -config $config \\ - -dataDir $db \\ - $options.args \\ - $vcf \\ - -csvStats ${prefix}.snpeff.csv \\ - > ${prefix}.snpeff.vcf - mv snpEff_summary.html ${prefix}.snpeff.summary.html - - echo \$(snpEff -version 2>&1) | sed 's/^.*SnpEff //; s/ .*\$//' > ${software}.version.txt - """ -} +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process SNPEFF_ANN { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? 'bioconda::snpeff=5.0' : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container 'https://depot.galaxyproject.org/singularity/snpeff:5.0--0' + } else { + container 'quay.io/biocontainers/snpeff:5.0--0' + } + + input: + tuple val(meta), path(vcf) + path db + path config + path fasta + + output: + tuple val(meta), path("*.vcf") , emit: vcf + tuple val(meta), path("*.csv") , emit: csv + tuple val(meta), path("*.genes.txt"), emit: txt + tuple val(meta), path("*.html") , emit: html + path '*.version.txt' , emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + """ + snpEff ${fasta.baseName} \\ + -config $config \\ + -dataDir $db \\ + $options.args \\ + $vcf \\ + -csvStats ${prefix}.snpeff.csv \\ + > ${prefix}.snpeff.vcf + mv snpEff_summary.html ${prefix}.snpeff.summary.html + + echo \$(snpEff -version 2>&1) | sed 's/^.*SnpEff //; s/ .*\$//' > ${software}.version.txt + """ +} diff --git a/modules/local/snpeff_build.nf b/modules/local/snpeff_build.nf index d7aecc4f..0bbddffe 100644 --- a/modules/local/snpeff_build.nf +++ b/modules/local/snpeff_build.nf @@ -1,50 +1,50 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process SNPEFF_BUILD { - tag "$fasta" - label 'process_low' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } - - conda (params.enable_conda ? 'bioconda::snpeff=5.0' : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container 'https://depot.galaxyproject.org/singularity/snpeff:5.0--0' - } else { - container 'quay.io/biocontainers/snpeff:5.0--0' - } - - input: - path fasta - path gff - - output: - path 'snpeff_db' , emit: db - path '*.config' , emit: config - path '*.version.txt', emit: version - - script: - def software = getSoftwareName(task.process) - def basename = fasta.baseName - """ - mkdir -p snpeff_db/genomes/ - cd snpeff_db/genomes/ - ln -s ../../$fasta ${basename}.fa - - cd ../../ - mkdir -p snpeff_db/${basename}/ - cd snpeff_db/${basename}/ - ln -s ../../$gff genes.gff - - cd ../../ - echo "${basename}.genome : ${basename}" > snpeff.config - - snpEff build -config snpeff.config -dataDir ./snpeff_db -gff3 -v ${basename} - - echo \$(snpEff -version 2>&1) | sed 's/^.*SnpEff //; s/ .*\$//' > ${software}.version.txt - """ -} +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process SNPEFF_BUILD { + tag "$fasta" + label 'process_low' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? 'bioconda::snpeff=5.0' : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container 'https://depot.galaxyproject.org/singularity/snpeff:5.0--0' + } else { + container 'quay.io/biocontainers/snpeff:5.0--0' + } + + input: + path fasta + path gff + + output: + path 'snpeff_db' , emit: db + path '*.config' , emit: config + path '*.version.txt', emit: version + + script: + def software = getSoftwareName(task.process) + def basename = fasta.baseName + """ + mkdir -p snpeff_db/genomes/ + cd snpeff_db/genomes/ + ln -s ../../$fasta ${basename}.fa + + cd ../../ + mkdir -p snpeff_db/${basename}/ + cd snpeff_db/${basename}/ + ln -s ../../$gff genes.gff + + cd ../../ + echo "${basename}.genome : ${basename}" > snpeff.config + + snpEff build -config snpeff.config -dataDir ./snpeff_db -gff3 -v ${basename} + + echo \$(snpEff -version 2>&1) | sed 's/^.*SnpEff //; s/ .*\$//' > ${software}.version.txt + """ +} diff --git a/modules/local/snpsift_extractfields.nf b/modules/local/snpsift_extractfields.nf index 3eb99c1a..8e2cf30e 100644 --- a/modules/local/snpsift_extractfields.nf +++ b/modules/local/snpsift_extractfields.nf @@ -1,51 +1,51 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process SNPSIFT_EXTRACTFIELDS { - tag "$meta.id" - label 'process_medium' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - - conda (params.enable_conda ? 'bioconda::snpsift=4.3.1t' : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container 'https://depot.galaxyproject.org/singularity/snpsift:4.3.1t--2' - } else { - container 'quay.io/biocontainers/snpsift:4.3.1t--2' - } - - input: - tuple val(meta), path(vcf) - - output: - tuple val(meta), path("*.snpsift.txt"), emit: txt - path '*.version.txt' , emit: version - - script: - def software = getSoftwareName(task.process) - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - """ - SnpSift \\ - extractFields \\ - -s "," \\ - -e "." \\ - $options.args \\ - $vcf \\ - CHROM POS REF ALT \\ - "ANN[*].GENE" "ANN[*].GENEID" \\ - "ANN[*].IMPACT" "ANN[*].EFFECT" \\ - "ANN[*].FEATURE" "ANN[*].FEATUREID" \\ - "ANN[*].BIOTYPE" "ANN[*].RANK" "ANN[*].HGVS_C" \\ - "ANN[*].HGVS_P" "ANN[*].CDNA_POS" "ANN[*].CDNA_LEN" \\ - "ANN[*].CDS_POS" "ANN[*].CDS_LEN" "ANN[*].AA_POS" \\ - "ANN[*].AA_LEN" "ANN[*].DISTANCE" "EFF[*].EFFECT" \\ - "EFF[*].FUNCLASS" "EFF[*].CODON" "EFF[*].AA" "EFF[*].AA_LEN" \\ - > ${prefix}.snpsift.txt - - echo \$(SnpSift -h 2>&1) | sed 's/^.*SnpSift version //; s/ .*\$//' > ${software}.version.txt - """ -} +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process SNPSIFT_EXTRACTFIELDS { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? 'bioconda::snpsift=4.3.1t' : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container 'https://depot.galaxyproject.org/singularity/snpsift:4.3.1t--2' + } else { + container 'quay.io/biocontainers/snpsift:4.3.1t--2' + } + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.snpsift.txt"), emit: txt + path '*.version.txt' , emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + """ + SnpSift \\ + extractFields \\ + -s "," \\ + -e "." \\ + $options.args \\ + $vcf \\ + CHROM POS REF ALT \\ + "ANN[*].GENE" "ANN[*].GENEID" \\ + "ANN[*].IMPACT" "ANN[*].EFFECT" \\ + "ANN[*].FEATURE" "ANN[*].FEATUREID" \\ + "ANN[*].BIOTYPE" "ANN[*].RANK" "ANN[*].HGVS_C" \\ + "ANN[*].HGVS_P" "ANN[*].CDNA_POS" "ANN[*].CDNA_LEN" \\ + "ANN[*].CDS_POS" "ANN[*].CDS_LEN" "ANN[*].AA_POS" \\ + "ANN[*].AA_LEN" "ANN[*].DISTANCE" "EFF[*].EFFECT" \\ + "EFF[*].FUNCLASS" "EFF[*].CODON" "EFF[*].AA" "EFF[*].AA_LEN" \\ + > ${prefix}.snpsift.txt + + echo \$(SnpSift -h 2>&1) | sed 's/^.*SnpSift version //; s/ .*\$//' > ${software}.version.txt + """ +} diff --git a/modules/local/sra_fastq_ftp.nf b/modules/local/sra_fastq_ftp.nf index 3e5344dc..fa0f29ee 100644 --- a/modules/local/sra_fastq_ftp.nf +++ b/modules/local/sra_fastq_ftp.nf @@ -1,50 +1,50 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -/* - * Download SRA data via FTP - */ -process SRA_FASTQ_FTP { - tag "$meta.id" - label 'process_medium' - label 'error_retry' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img" - } else { - container "biocontainers/biocontainers:v1.2.0_cv1" - } - - input: - tuple val(meta), val(fastq) - - output: - tuple val(meta), path("*fastq.gz"), emit: fastq - tuple val(meta), path("*md5") , emit: md5 - - script: - if (meta.single_end) { - """ - bash -c 'until curl $options.args -L ${fastq[0]} -o ${meta.id}.fastq.gz; do sleep 1; done'; - echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5 - md5sum -c ${meta.id}.fastq.gz.md5 - """ - } else { - """ - bash -c 'until curl $options.args -L ${fastq[0]} -o ${meta.id}_1.fastq.gz; do sleep 1; done'; - echo "${meta.md5_1} ${meta.id}_1.fastq.gz" > ${meta.id}_1.fastq.gz.md5 - md5sum -c ${meta.id}_1.fastq.gz.md5 - - bash -c 'until curl $options.args -L ${fastq[1]} -o ${meta.id}_2.fastq.gz; do sleep 1; done'; - echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5 - md5sum -c ${meta.id}_2.fastq.gz.md5 - """ - } -} +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +/* + * Download SRA data via FTP + */ +process SRA_FASTQ_FTP { + tag "$meta.id" + label 'process_medium' + label 'error_retry' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img" + } else { + container "biocontainers/biocontainers:v1.2.0_cv1" + } + + input: + tuple val(meta), val(fastq) + + output: + tuple val(meta), path("*fastq.gz"), emit: fastq + tuple val(meta), path("*md5") , emit: md5 + + script: + if (meta.single_end) { + """ + bash -c 'until curl $options.args -L ${fastq[0]} -o ${meta.id}.fastq.gz; do sleep 1; done'; + echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5 + md5sum -c ${meta.id}.fastq.gz.md5 + """ + } else { + """ + bash -c 'until curl $options.args -L ${fastq[0]} -o ${meta.id}_1.fastq.gz; do sleep 1; done'; + echo "${meta.md5_1} ${meta.id}_1.fastq.gz" > ${meta.id}_1.fastq.gz.md5 + md5sum -c ${meta.id}_1.fastq.gz.md5 + + bash -c 'until curl $options.args -L ${fastq[1]} -o ${meta.id}_2.fastq.gz; do sleep 1; done'; + echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5 + md5sum -c ${meta.id}_2.fastq.gz.md5 + """ + } +} diff --git a/modules/local/sra_ids_to_runinfo.nf b/modules/local/sra_ids_to_runinfo.nf index bb9b3c30..06d6ceff 100644 --- a/modules/local/sra_ids_to_runinfo.nf +++ b/modules/local/sra_ids_to_runinfo.nf @@ -1,34 +1,34 @@ -// Import generic module functions -include { saveFiles; getSoftwareName } from './functions' - -params.options = [:] - -/* - * Fetch SRA / ENA / GEO run information via the ENA API - */ -process SRA_IDS_TO_RUNINFO { - tag "$id" - label 'error_retry' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } - - conda (params.enable_conda ? "conda-forge::requests=2.24.0" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/requests:2.24.0" - } else { - container "quay.io/biocontainers/requests:2.24.0" - } - - input: - val id - - output: - path "*.tsv", emit: tsv - - script: - """ - echo $id > id.txt - sra_ids_to_runinfo.py id.txt ${id}.runinfo.tsv - """ -} +// Import generic module functions +include { saveFiles; getSoftwareName } from './functions' + +params.options = [:] + +/* + * Fetch SRA / ENA / GEO run information via the ENA API + */ +process SRA_IDS_TO_RUNINFO { + tag "$id" + label 'error_retry' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "conda-forge::requests=2.24.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/requests:2.24.0" + } else { + container "quay.io/biocontainers/requests:2.24.0" + } + + input: + val id + + output: + path "*.tsv", emit: tsv + + script: + """ + echo $id > id.txt + sra_ids_to_runinfo.py id.txt ${id}.runinfo.tsv + """ +} diff --git a/modules/local/sra_merge_samplesheet.nf b/modules/local/sra_merge_samplesheet.nf index 77aa3770..78cb8457 100644 --- a/modules/local/sra_merge_samplesheet.nf +++ b/modules/local/sra_merge_samplesheet.nf @@ -1,34 +1,34 @@ -// Import generic module functions -include { saveFiles; getSoftwareName } from './functions' - -params.options = [:] - -/* - * Merge samplesheets across all samples - */ -process SRA_MERGE_SAMPLESHEET { - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } - - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img" - } else { - container "biocontainers/biocontainers:v1.2.0_cv1" - } - - input: - path ('samplesheets/*') - - output: - path "*csv", emit: csv - - script: - """ - head -n 1 `ls ./samplesheets/* | head -n 1` > samplesheet.csv - for fileid in `ls ./samplesheets/*`; do - awk 'NR>1' \$fileid >> samplesheet.csv - done - """ -} +// Import generic module functions +include { saveFiles; getSoftwareName } from './functions' + +params.options = [:] + +/* + * Merge samplesheets across all samples + */ +process SRA_MERGE_SAMPLESHEET { + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img" + } else { + container "biocontainers/biocontainers:v1.2.0_cv1" + } + + input: + path ('samplesheets/*') + + output: + path "*csv", emit: csv + + script: + """ + head -n 1 `ls ./samplesheets/* | head -n 1` > samplesheet.csv + for fileid in `ls ./samplesheets/*`; do + awk 'NR>1' \$fileid >> samplesheet.csv + done + """ +} diff --git a/modules/local/sra_runinfo_to_ftp.nf b/modules/local/sra_runinfo_to_ftp.nf index 431ded86..634abd0e 100644 --- a/modules/local/sra_runinfo_to_ftp.nf +++ b/modules/local/sra_runinfo_to_ftp.nf @@ -1,31 +1,31 @@ -// Import generic module functions -include { saveFiles; getSoftwareName } from './functions' - -params.options = [:] - -/* - * Create samplesheet for pipeline from SRA run information fetched via the ENA API - */ -process SRA_RUNINFO_TO_FTP { - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } - - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/python:3.8.3" - } else { - container "quay.io/biocontainers/python:3.8.3" - } - - input: - path runinfo - - output: - path "*.tsv", emit: tsv - - script: - """ - sra_runinfo_to_ftp.py ${runinfo.join(',')} ${runinfo.toString().tokenize(".")[0]}.runinfo_ftp.tsv - """ -} +// Import generic module functions +include { saveFiles; getSoftwareName } from './functions' + +params.options = [:] + +/* + * Create samplesheet for pipeline from SRA run information fetched via the ENA API + */ +process SRA_RUNINFO_TO_FTP { + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/python:3.8.3" + } else { + container "quay.io/biocontainers/python:3.8.3" + } + + input: + path runinfo + + output: + path "*.tsv", emit: tsv + + script: + """ + sra_runinfo_to_ftp.py ${runinfo.join(',')} ${runinfo.toString().tokenize(".")[0]}.runinfo_ftp.tsv + """ +} diff --git a/modules/local/sra_to_samplesheet.nf b/modules/local/sra_to_samplesheet.nf index ba92babe..d33c3571 100644 --- a/modules/local/sra_to_samplesheet.nf +++ b/modules/local/sra_to_samplesheet.nf @@ -1,46 +1,46 @@ -// Import generic module functions -include { saveFiles; getSoftwareName } from './functions' - -params.options = [:] -params.results_dir = '' - -/* - * Stage FastQ files downloaded by SRA and auto-create a samplesheet for the pipeline - */ -process SRA_TO_SAMPLESHEET { - tag "$meta.id" - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - - memory 100.MB - - input: - tuple val(meta), path(fastq) - - output: - tuple val(meta), path("*csv"), emit: csv - - exec: - // Remove custom keys needed to download the data - def meta_map = meta.clone() - meta_map.remove("id") - meta_map.remove("fastq_1") - meta_map.remove("fastq_2") - meta_map.remove("md5_1") - meta_map.remove("md5_2") - meta_map.remove("single_end") - - // Add required fields for the pipeline to the beginning of the map - pipeline_map = [ - sample : "${meta.id.split('_')[0..-2].join('_')}", - fastq_1 : "${params.outdir}/${params.results_dir}/${fastq[0]}", - fastq_2 : meta.single_end ? '' : "${params.outdir}/${params.results_dir}/${fastq[1]}" - ] - pipeline_map << meta_map - - // Write to file - def file = task.workDir.resolve("${meta.id}.samplesheet.csv") - file.write pipeline_map.keySet().collect{ '"' + it + '"'}.join(",") + '\n' - file.append(pipeline_map.values().collect{ '"' + it + '"'}.join(",")) + '\n' -} +// Import generic module functions +include { saveFiles; getSoftwareName } from './functions' + +params.options = [:] +params.results_dir = '' + +/* + * Stage FastQ files downloaded by SRA and auto-create a samplesheet for the pipeline + */ +process SRA_TO_SAMPLESHEET { + tag "$meta.id" + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + memory 100.MB + + input: + tuple val(meta), path(fastq) + + output: + tuple val(meta), path("*csv"), emit: csv + + exec: + // Remove custom keys needed to download the data + def meta_map = meta.clone() + meta_map.remove("id") + meta_map.remove("fastq_1") + meta_map.remove("fastq_2") + meta_map.remove("md5_1") + meta_map.remove("md5_2") + meta_map.remove("single_end") + + // Add required fields for the pipeline to the beginning of the map + pipeline_map = [ + sample : "${meta.id.split('_')[0..-2].join('_')}", + fastq_1 : "${params.outdir}/${params.results_dir}/${fastq[0]}", + fastq_2 : meta.single_end ? '' : "${params.outdir}/${params.results_dir}/${fastq[1]}" + ] + pipeline_map << meta_map + + // Write to file + def file = task.workDir.resolve("${meta.id}.samplesheet.csv") + file.write pipeline_map.keySet().collect{ '"' + it + '"'}.join(",") + '\n' + file.append(pipeline_map.values().collect{ '"' + it + '"'}.join(",")) + '\n' +} diff --git a/subworkflows/local/assembly_minia.nf b/subworkflows/local/assembly_minia.nf index 24aee3b1..ab67aac4 100644 --- a/subworkflows/local/assembly_minia.nf +++ b/subworkflows/local/assembly_minia.nf @@ -1,76 +1,76 @@ -/* - * Assembly and downstream processing for minia scaffolds - */ - -params.minia_options = [:] -params.blastn_options = [:] -params.blastn_filter_options = [:] -params.abacas_options = [:] -params.plasmidid_options = [:] -params.quast_options = [:] - -include { MINIA } from '../../modules/nf-core/software/minia/main' addParams( options: params.minia_options ) -include { ASSEMBLY_QC } from './assembly_qc' addParams( blastn_options: params.blastn_options, blastn_filter_options: params.blastn_filter_options, abacas_options: params.abacas_options, plasmidid_options: params.plasmidid_options, quast_options: params.quast_options ) - -workflow ASSEMBLY_MINIA { - take: - reads // channel: [ val(meta), [ reads ] ] - fasta // channel: /path/to/genome.fasta - gff // channel: /path/to/genome.gff - blast_db // channel: /path/to/blast_db/ - blast_header // channel: /path/to/blast_header.txt - - main: - /* - * Assemble reads with minia - */ - MINIA ( reads ) - - /* - * Filter for empty contig files - */ - MINIA - .out - .contigs - .filter { meta, contig -> contig.size() > 0 } - .set { ch_contigs } - - /* - * Downstream assembly steps - */ - ASSEMBLY_QC ( - ch_contigs, - fasta, - gff, - blast_db, - blast_header - ) - - emit: - contigs = MINIA.out.contigs // channel: [ val(meta), [ contigs ] ] - unitigs = MINIA.out.unitigs // channel: [ val(meta), [ unitigs ] ] - h5 = MINIA.out.h5 // channel: [ val(meta), [ h5 ] ] - minia_version = MINIA.out.version // path: *.version.txt - - blast_txt = ASSEMBLY_QC.out.blast_txt // channel: [ val(meta), [ txt ] ] - blast_filter_txt = ASSEMBLY_QC.out.blast_filter_txt // channel: [ val(meta), [ txt ] ] - blast_version = ASSEMBLY_QC.out.blast_version // path: *.version.txt - - quast_results = ASSEMBLY_QC.out.quast_results // channel: [ val(meta), [ results ] ] - quast_tsv = ASSEMBLY_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] - quast_version = ASSEMBLY_QC.out.quast_version // path: *.version.txt - - abacas_results = ASSEMBLY_QC.out.abacas_results // channel: [ val(meta), [ results ] ] - abacas_version = ASSEMBLY_QC.out.abacas_version // path: *.version.txt - - plasmidid_html = ASSEMBLY_QC.out.plasmidid_html // channel: [ val(meta), [ html ] ] - plasmidid_tab = ASSEMBLY_QC.out.plasmidid_tab // channel: [ val(meta), [ tab ] ] - plasmidid_images = ASSEMBLY_QC.out.plasmidid_images // channel: [ val(meta), [ images/ ] ] - plasmidid_logs = ASSEMBLY_QC.out.plasmidid_logs // channel: [ val(meta), [ logs/ ] ] - plasmidid_data = ASSEMBLY_QC.out.plasmidid_data // channel: [ val(meta), [ data/ ] ] - plasmidid_database = ASSEMBLY_QC.out.plasmidid_database // channel: [ val(meta), [ database/ ] ] - plasmidid_fasta = ASSEMBLY_QC.out.plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] - plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] - plasmidid_version = ASSEMBLY_QC.out.plasmidid_version // path: *.version.txt - -} +/* + * Assembly and downstream processing for minia scaffolds + */ + +params.minia_options = [:] +params.blastn_options = [:] +params.blastn_filter_options = [:] +params.abacas_options = [:] +params.plasmidid_options = [:] +params.quast_options = [:] + +include { MINIA } from '../../modules/nf-core/software/minia/main' addParams( options: params.minia_options ) +include { ASSEMBLY_QC } from './assembly_qc' addParams( blastn_options: params.blastn_options, blastn_filter_options: params.blastn_filter_options, abacas_options: params.abacas_options, plasmidid_options: params.plasmidid_options, quast_options: params.quast_options ) + +workflow ASSEMBLY_MINIA { + take: + reads // channel: [ val(meta), [ reads ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + blast_db // channel: /path/to/blast_db/ + blast_header // channel: /path/to/blast_header.txt + + main: + /* + * Assemble reads with minia + */ + MINIA ( reads ) + + /* + * Filter for empty contig files + */ + MINIA + .out + .contigs + .filter { meta, contig -> contig.size() > 0 } + .set { ch_contigs } + + /* + * Downstream assembly steps + */ + ASSEMBLY_QC ( + ch_contigs, + fasta, + gff, + blast_db, + blast_header + ) + + emit: + contigs = MINIA.out.contigs // channel: [ val(meta), [ contigs ] ] + unitigs = MINIA.out.unitigs // channel: [ val(meta), [ unitigs ] ] + h5 = MINIA.out.h5 // channel: [ val(meta), [ h5 ] ] + minia_version = MINIA.out.version // path: *.version.txt + + blast_txt = ASSEMBLY_QC.out.blast_txt // channel: [ val(meta), [ txt ] ] + blast_filter_txt = ASSEMBLY_QC.out.blast_filter_txt // channel: [ val(meta), [ txt ] ] + blast_version = ASSEMBLY_QC.out.blast_version // path: *.version.txt + + quast_results = ASSEMBLY_QC.out.quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ASSEMBLY_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] + quast_version = ASSEMBLY_QC.out.quast_version // path: *.version.txt + + abacas_results = ASSEMBLY_QC.out.abacas_results // channel: [ val(meta), [ results ] ] + abacas_version = ASSEMBLY_QC.out.abacas_version // path: *.version.txt + + plasmidid_html = ASSEMBLY_QC.out.plasmidid_html // channel: [ val(meta), [ html ] ] + plasmidid_tab = ASSEMBLY_QC.out.plasmidid_tab // channel: [ val(meta), [ tab ] ] + plasmidid_images = ASSEMBLY_QC.out.plasmidid_images // channel: [ val(meta), [ images/ ] ] + plasmidid_logs = ASSEMBLY_QC.out.plasmidid_logs // channel: [ val(meta), [ logs/ ] ] + plasmidid_data = ASSEMBLY_QC.out.plasmidid_data // channel: [ val(meta), [ data/ ] ] + plasmidid_database = ASSEMBLY_QC.out.plasmidid_database // channel: [ val(meta), [ database/ ] ] + plasmidid_fasta = ASSEMBLY_QC.out.plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] + plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] + plasmidid_version = ASSEMBLY_QC.out.plasmidid_version // path: *.version.txt + +} diff --git a/subworkflows/local/assembly_qc.nf b/subworkflows/local/assembly_qc.nf index edc5ff03..b39939fe 100644 --- a/subworkflows/local/assembly_qc.nf +++ b/subworkflows/local/assembly_qc.nf @@ -1,112 +1,112 @@ -/* - * Downstream analysis for assembly scaffolds - */ - -params.blastn_options = [:] -params.blastn_filter_options = [:] -params.abacas_options = [:] -params.plasmidid_options = [:] -params.quast_options = [:] - -include { FILTER_BLASTN } from '../../modules/local/filter_blastn' addParams( options: params.blastn_filter_options ) -include { ABACAS } from '../../modules/nf-core/software/abacas/main' addParams( options: params.abacas_options ) -include { BLAST_BLASTN } from '../../modules/nf-core/software/blast/blastn/main' addParams( options: params.blastn_options ) -include { PLASMIDID } from '../../modules/nf-core/software/plasmidid/main' addParams( options: params.plasmidid_options ) -include { QUAST } from '../../modules/nf-core/software/quast/main' addParams( options: params.quast_options ) - -workflow ASSEMBLY_QC { - take: - scaffolds // channel: [ val(meta), [ scaffolds ] ] - fasta // channel: /path/to/genome.fasta - gff // channel: /path/to/genome.gff - blast_db // channel: /path/to/blast_db/ - blast_header // channel: /path/to/blast_header.txt - - main: - /* - * Run blastn on assembly scaffolds - */ - ch_blast_txt = Channel.empty() - ch_blast_filter_txt = Channel.empty() - ch_blast_version = Channel.empty() - if (!params.skip_blast) { - BLAST_BLASTN ( scaffolds, blast_db ) - ch_blast_txt = BLAST_BLASTN.out.txt - ch_blast_version = BLAST_BLASTN.out.version - - FILTER_BLASTN ( BLAST_BLASTN.out.txt, blast_header ) - ch_blast_filter_txt = FILTER_BLASTN.out.txt - } - - /* - * Assembly QC across all samples with QUAST - */ - ch_quast_results = Channel.empty() - ch_quast_tsv = Channel.empty() - ch_quast_version = Channel.empty() - if (!params.skip_assembly_quast) { - QUAST ( scaffolds.collect{ it[1] }, fasta, gff, true, params.gff ) - ch_quast_results = QUAST.out.results - ch_quast_tsv = QUAST.out.tsv - ch_quast_version = QUAST.out.version - } - - /* - * Contiguate assembly with ABACAS - */ - ch_abacas_results = Channel.empty() - ch_abacas_version = Channel.empty() - if (!params.skip_abacas) { - ABACAS ( scaffolds, fasta ) - ch_abacas_results = ABACAS.out.results - ch_abacas_version = ABACAS.out.version - } - - /* - * Assembly report with PlasmidID - */ - ch_plasmidid_html = Channel.empty() - ch_plasmidid_tab = Channel.empty() - ch_plasmidid_images = Channel.empty() - ch_plasmidid_logs = Channel.empty() - ch_plasmidid_data = Channel.empty() - ch_plasmidid_database = Channel.empty() - ch_plasmidid_fasta = Channel.empty() - ch_plasmidid_kmer = Channel.empty() - ch_plasmidid_version = Channel.empty() - if (!params.skip_plasmidid) { - PLASMIDID ( scaffolds, fasta ) - ch_plasmidid_html = PLASMIDID.out.html - ch_plasmidid_tab = PLASMIDID.out.tab - ch_plasmidid_images = PLASMIDID.out.images - ch_plasmidid_logs = PLASMIDID.out.logs - ch_plasmidid_data = PLASMIDID.out.data - ch_plasmidid_database = PLASMIDID.out.database - ch_plasmidid_fasta = PLASMIDID.out.fasta_files - ch_plasmidid_kmer = PLASMIDID.out.kmer - ch_plasmidid_version = PLASMIDID.out.version - } - - emit: - blast_txt = ch_blast_txt // channel: [ val(meta), [ txt ] ] - blast_filter_txt = ch_blast_filter_txt // channel: [ val(meta), [ txt ] ] - blast_version = ch_blast_version // path: *.version.txt - - quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] - quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] - quast_version = ch_quast_version // path: *.version.txt - - abacas_results = ch_abacas_results // channel: [ val(meta), [ results ] ] - abacas_version = ch_abacas_version // path: *.version.txt - - plasmidid_html = ch_plasmidid_html // channel: [ val(meta), [ html ] ] - plasmidid_tab = ch_plasmidid_tab // channel: [ val(meta), [ tab ] ] - plasmidid_images = ch_plasmidid_images // channel: [ val(meta), [ images/ ] ] - plasmidid_logs = ch_plasmidid_logs // channel: [ val(meta), [ logs/ ] ] - plasmidid_data = ch_plasmidid_data // channel: [ val(meta), [ data/ ] ] - plasmidid_database = ch_plasmidid_database // channel: [ val(meta), [ database/ ] ] - plasmidid_fasta = ch_plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] - plasmidid_kmer = ch_plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] - plasmidid_version = ch_plasmidid_version // path: *.version.txt - -} +/* + * Downstream analysis for assembly scaffolds + */ + +params.blastn_options = [:] +params.blastn_filter_options = [:] +params.abacas_options = [:] +params.plasmidid_options = [:] +params.quast_options = [:] + +include { FILTER_BLASTN } from '../../modules/local/filter_blastn' addParams( options: params.blastn_filter_options ) +include { ABACAS } from '../../modules/nf-core/software/abacas/main' addParams( options: params.abacas_options ) +include { BLAST_BLASTN } from '../../modules/nf-core/software/blast/blastn/main' addParams( options: params.blastn_options ) +include { PLASMIDID } from '../../modules/nf-core/software/plasmidid/main' addParams( options: params.plasmidid_options ) +include { QUAST } from '../../modules/nf-core/software/quast/main' addParams( options: params.quast_options ) + +workflow ASSEMBLY_QC { + take: + scaffolds // channel: [ val(meta), [ scaffolds ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + blast_db // channel: /path/to/blast_db/ + blast_header // channel: /path/to/blast_header.txt + + main: + /* + * Run blastn on assembly scaffolds + */ + ch_blast_txt = Channel.empty() + ch_blast_filter_txt = Channel.empty() + ch_blast_version = Channel.empty() + if (!params.skip_blast) { + BLAST_BLASTN ( scaffolds, blast_db ) + ch_blast_txt = BLAST_BLASTN.out.txt + ch_blast_version = BLAST_BLASTN.out.version + + FILTER_BLASTN ( BLAST_BLASTN.out.txt, blast_header ) + ch_blast_filter_txt = FILTER_BLASTN.out.txt + } + + /* + * Assembly QC across all samples with QUAST + */ + ch_quast_results = Channel.empty() + ch_quast_tsv = Channel.empty() + ch_quast_version = Channel.empty() + if (!params.skip_assembly_quast) { + QUAST ( scaffolds.collect{ it[1] }, fasta, gff, true, params.gff ) + ch_quast_results = QUAST.out.results + ch_quast_tsv = QUAST.out.tsv + ch_quast_version = QUAST.out.version + } + + /* + * Contiguate assembly with ABACAS + */ + ch_abacas_results = Channel.empty() + ch_abacas_version = Channel.empty() + if (!params.skip_abacas) { + ABACAS ( scaffolds, fasta ) + ch_abacas_results = ABACAS.out.results + ch_abacas_version = ABACAS.out.version + } + + /* + * Assembly report with PlasmidID + */ + ch_plasmidid_html = Channel.empty() + ch_plasmidid_tab = Channel.empty() + ch_plasmidid_images = Channel.empty() + ch_plasmidid_logs = Channel.empty() + ch_plasmidid_data = Channel.empty() + ch_plasmidid_database = Channel.empty() + ch_plasmidid_fasta = Channel.empty() + ch_plasmidid_kmer = Channel.empty() + ch_plasmidid_version = Channel.empty() + if (!params.skip_plasmidid) { + PLASMIDID ( scaffolds, fasta ) + ch_plasmidid_html = PLASMIDID.out.html + ch_plasmidid_tab = PLASMIDID.out.tab + ch_plasmidid_images = PLASMIDID.out.images + ch_plasmidid_logs = PLASMIDID.out.logs + ch_plasmidid_data = PLASMIDID.out.data + ch_plasmidid_database = PLASMIDID.out.database + ch_plasmidid_fasta = PLASMIDID.out.fasta_files + ch_plasmidid_kmer = PLASMIDID.out.kmer + ch_plasmidid_version = PLASMIDID.out.version + } + + emit: + blast_txt = ch_blast_txt // channel: [ val(meta), [ txt ] ] + blast_filter_txt = ch_blast_filter_txt // channel: [ val(meta), [ txt ] ] + blast_version = ch_blast_version // path: *.version.txt + + quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] + quast_version = ch_quast_version // path: *.version.txt + + abacas_results = ch_abacas_results // channel: [ val(meta), [ results ] ] + abacas_version = ch_abacas_version // path: *.version.txt + + plasmidid_html = ch_plasmidid_html // channel: [ val(meta), [ html ] ] + plasmidid_tab = ch_plasmidid_tab // channel: [ val(meta), [ tab ] ] + plasmidid_images = ch_plasmidid_images // channel: [ val(meta), [ images/ ] ] + plasmidid_logs = ch_plasmidid_logs // channel: [ val(meta), [ logs/ ] ] + plasmidid_data = ch_plasmidid_data // channel: [ val(meta), [ data/ ] ] + plasmidid_database = ch_plasmidid_database // channel: [ val(meta), [ database/ ] ] + plasmidid_fasta = ch_plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] + plasmidid_kmer = ch_plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] + plasmidid_version = ch_plasmidid_version // path: *.version.txt + +} diff --git a/subworkflows/local/assembly_spades.nf b/subworkflows/local/assembly_spades.nf index 0560bf05..f438e65a 100644 --- a/subworkflows/local/assembly_spades.nf +++ b/subworkflows/local/assembly_spades.nf @@ -1,114 +1,114 @@ -/* - * Assembly and downstream processing for SPAdes scaffolds - */ - -params.spades_options = [:] -params.bandage_options = [:] -params.blastn_options = [:] -params.blastn_filter_options = [:] -params.abacas_options = [:] -params.plasmidid_options = [:] -params.quast_options = [:] - -include { SPADES } from '../../modules/nf-core/software/spades/main' addParams( options: params.spades_options ) -include { BANDAGE_IMAGE } from '../../modules/nf-core/software/bandage/image/main' addParams( options: params.bandage_options ) -include { ASSEMBLY_QC } from './assembly_qc' addParams( blastn_options: params.blastn_options, blastn_filter_options: params.blastn_filter_options, abacas_options: params.abacas_options, plasmidid_options: params.plasmidid_options, quast_options: params.quast_options ) - -workflow ASSEMBLY_SPADES { - take: - reads // channel: [ val(meta), [ reads ] ] - hmm // channel: /path/to/spades.hmm - fasta // channel: /path/to/genome.fasta - gff // channel: /path/to/genome.gff - blast_db // channel: /path/to/blast_db/ - blast_header // channel: /path/to/blast_header.txt - - main: - /* - * Filter for paired-end samples if running metaSPAdes / metaviralSPAdes / metaplasmidSPAdes - */ - ch_reads = reads - if (params.spades_options.args.contains('--meta') || params.spades_options.args.contains('--bio')) { - reads - .filter { meta, fastq -> !meta.single_end } - .set { ch_reads } - } - - /* - * Assemble reads with SPAdes - */ - SPADES ( ch_reads, hmm ) - - /* - * Filter for empty scaffold files - */ - SPADES - .out - .scaffolds - .filter { meta, scaffold -> scaffold.size() > 0 } - .set { ch_scaffolds } - - SPADES - .out - .gfa - .filter { meta, gfa -> gfa.size() > 0 } - .set { ch_gfa } - - /* - * Generate assembly visualisation with Bandage - */ - ch_bandage_png = Channel.empty() - ch_bandage_svg = Channel.empty() - ch_bandage_version = Channel.empty() - if (!params.skip_bandage) { - BANDAGE_IMAGE ( ch_gfa ) - ch_bandage_version = BANDAGE_IMAGE.out.version - ch_bandage_png = BANDAGE_IMAGE.out.png - ch_bandage_svg = BANDAGE_IMAGE.out.svg - } - - /* - * Downstream assembly steps - */ - ASSEMBLY_QC ( - ch_scaffolds, - fasta, - gff, - blast_db, - blast_header - ) - - emit: - scaffolds = SPADES.out.scaffolds // channel: [ val(meta), [ scaffolds ] ] - contigs = SPADES.out.contigs // channel: [ val(meta), [ contigs ] ] - transcripts = SPADES.out.transcripts // channel: [ val(meta), [ transcripts ] ] - gene_clusters = SPADES.out.gene_clusters // channel: [ val(meta), [ gene_clusters ] ] - gfa = SPADES.out.gfa // channel: [ val(meta), [ gfa ] ] - log_out = SPADES.out.log // channel: [ val(meta), [ log ] ] - spades_version = SPADES.out.version // path: *.version.txt - - bandage_png = ch_bandage_png // channel: [ val(meta), [ png ] ] - bandage_svg = ch_bandage_svg // channel: [ val(meta), [ svg ] ] - bandage_version = ch_bandage_version // path: *.version.txt - - blast_txt = ASSEMBLY_QC.out.blast_txt // channel: [ val(meta), [ txt ] ] - blast_filter_txt = ASSEMBLY_QC.out.blast_filter_txt // channel: [ val(meta), [ txt ] ] - blast_version = ASSEMBLY_QC.out.blast_version // path: *.version.txt - - quast_results = ASSEMBLY_QC.out.quast_results // channel: [ val(meta), [ results ] ] - quast_tsv = ASSEMBLY_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] - quast_version = ASSEMBLY_QC.out.quast_version // path: *.version.txt - - abacas_results = ASSEMBLY_QC.out.abacas_results // channel: [ val(meta), [ results ] ] - abacas_version = ASSEMBLY_QC.out.abacas_version // path: *.version.txt - - plasmidid_html = ASSEMBLY_QC.out.plasmidid_html // channel: [ val(meta), [ html ] ] - plasmidid_tab = ASSEMBLY_QC.out.plasmidid_tab // channel: [ val(meta), [ tab ] ] - plasmidid_images = ASSEMBLY_QC.out.plasmidid_images // channel: [ val(meta), [ images/ ] ] - plasmidid_logs = ASSEMBLY_QC.out.plasmidid_logs // channel: [ val(meta), [ logs/ ] ] - plasmidid_data = ASSEMBLY_QC.out.plasmidid_data // channel: [ val(meta), [ data/ ] ] - plasmidid_database = ASSEMBLY_QC.out.plasmidid_database // channel: [ val(meta), [ database/ ] ] - plasmidid_fasta = ASSEMBLY_QC.out.plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] - plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] - plasmidid_version = ASSEMBLY_QC.out.plasmidid_version // path: *.version.txt -} +/* + * Assembly and downstream processing for SPAdes scaffolds + */ + +params.spades_options = [:] +params.bandage_options = [:] +params.blastn_options = [:] +params.blastn_filter_options = [:] +params.abacas_options = [:] +params.plasmidid_options = [:] +params.quast_options = [:] + +include { SPADES } from '../../modules/nf-core/software/spades/main' addParams( options: params.spades_options ) +include { BANDAGE_IMAGE } from '../../modules/nf-core/software/bandage/image/main' addParams( options: params.bandage_options ) +include { ASSEMBLY_QC } from './assembly_qc' addParams( blastn_options: params.blastn_options, blastn_filter_options: params.blastn_filter_options, abacas_options: params.abacas_options, plasmidid_options: params.plasmidid_options, quast_options: params.quast_options ) + +workflow ASSEMBLY_SPADES { + take: + reads // channel: [ val(meta), [ reads ] ] + hmm // channel: /path/to/spades.hmm + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + blast_db // channel: /path/to/blast_db/ + blast_header // channel: /path/to/blast_header.txt + + main: + /* + * Filter for paired-end samples if running metaSPAdes / metaviralSPAdes / metaplasmidSPAdes + */ + ch_reads = reads + if (params.spades_options.args.contains('--meta') || params.spades_options.args.contains('--bio')) { + reads + .filter { meta, fastq -> !meta.single_end } + .set { ch_reads } + } + + /* + * Assemble reads with SPAdes + */ + SPADES ( ch_reads, hmm ) + + /* + * Filter for empty scaffold files + */ + SPADES + .out + .scaffolds + .filter { meta, scaffold -> scaffold.size() > 0 } + .set { ch_scaffolds } + + SPADES + .out + .gfa + .filter { meta, gfa -> gfa.size() > 0 } + .set { ch_gfa } + + /* + * Generate assembly visualisation with Bandage + */ + ch_bandage_png = Channel.empty() + ch_bandage_svg = Channel.empty() + ch_bandage_version = Channel.empty() + if (!params.skip_bandage) { + BANDAGE_IMAGE ( ch_gfa ) + ch_bandage_version = BANDAGE_IMAGE.out.version + ch_bandage_png = BANDAGE_IMAGE.out.png + ch_bandage_svg = BANDAGE_IMAGE.out.svg + } + + /* + * Downstream assembly steps + */ + ASSEMBLY_QC ( + ch_scaffolds, + fasta, + gff, + blast_db, + blast_header + ) + + emit: + scaffolds = SPADES.out.scaffolds // channel: [ val(meta), [ scaffolds ] ] + contigs = SPADES.out.contigs // channel: [ val(meta), [ contigs ] ] + transcripts = SPADES.out.transcripts // channel: [ val(meta), [ transcripts ] ] + gene_clusters = SPADES.out.gene_clusters // channel: [ val(meta), [ gene_clusters ] ] + gfa = SPADES.out.gfa // channel: [ val(meta), [ gfa ] ] + log_out = SPADES.out.log // channel: [ val(meta), [ log ] ] + spades_version = SPADES.out.version // path: *.version.txt + + bandage_png = ch_bandage_png // channel: [ val(meta), [ png ] ] + bandage_svg = ch_bandage_svg // channel: [ val(meta), [ svg ] ] + bandage_version = ch_bandage_version // path: *.version.txt + + blast_txt = ASSEMBLY_QC.out.blast_txt // channel: [ val(meta), [ txt ] ] + blast_filter_txt = ASSEMBLY_QC.out.blast_filter_txt // channel: [ val(meta), [ txt ] ] + blast_version = ASSEMBLY_QC.out.blast_version // path: *.version.txt + + quast_results = ASSEMBLY_QC.out.quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ASSEMBLY_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] + quast_version = ASSEMBLY_QC.out.quast_version // path: *.version.txt + + abacas_results = ASSEMBLY_QC.out.abacas_results // channel: [ val(meta), [ results ] ] + abacas_version = ASSEMBLY_QC.out.abacas_version // path: *.version.txt + + plasmidid_html = ASSEMBLY_QC.out.plasmidid_html // channel: [ val(meta), [ html ] ] + plasmidid_tab = ASSEMBLY_QC.out.plasmidid_tab // channel: [ val(meta), [ tab ] ] + plasmidid_images = ASSEMBLY_QC.out.plasmidid_images // channel: [ val(meta), [ images/ ] ] + plasmidid_logs = ASSEMBLY_QC.out.plasmidid_logs // channel: [ val(meta), [ logs/ ] ] + plasmidid_data = ASSEMBLY_QC.out.plasmidid_data // channel: [ val(meta), [ data/ ] ] + plasmidid_database = ASSEMBLY_QC.out.plasmidid_database // channel: [ val(meta), [ database/ ] ] + plasmidid_fasta = ASSEMBLY_QC.out.plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] + plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] + plasmidid_version = ASSEMBLY_QC.out.plasmidid_version // path: *.version.txt +} diff --git a/subworkflows/local/assembly_unicycler.nf b/subworkflows/local/assembly_unicycler.nf index 63c047e9..9a965a1d 100644 --- a/subworkflows/local/assembly_unicycler.nf +++ b/subworkflows/local/assembly_unicycler.nf @@ -1,101 +1,101 @@ -/* - * Assembly and downstream processing for Unicycler scaffolds - */ - -params.unicycler_options = [:] -params.bandage_options = [:] -params.blastn_options = [:] -params.blastn_filter_options = [:] -params.abacas_options = [:] -params.plasmidid_options = [:] -params.quast_options = [:] - -include { UNICYCLER } from '../../modules/nf-core/software/unicycler/main' addParams( options: params.unicycler_options ) -include { BANDAGE_IMAGE } from '../../modules/nf-core/software/bandage/image/main' addParams( options: params.bandage_options ) -include { ASSEMBLY_QC } from './assembly_qc' addParams( blastn_options: params.blastn_options, blastn_filter_options: params.blastn_filter_options, abacas_options: params.abacas_options, plasmidid_options: params.plasmidid_options, quast_options: params.quast_options ) - -workflow ASSEMBLY_UNICYCLER { - take: - reads // channel: [ val(meta), [ reads ] ] - fasta // channel: /path/to/genome.fasta - gff // channel: /path/to/genome.gff - blast_db // channel: /path/to/blast_db/ - blast_header // channel: /path/to/blast_header.txt - - main: - /* - * Assemble reads with Unicycler - */ - UNICYCLER ( reads ) - - /* - * Filter for empty scaffold files - */ - UNICYCLER - .out - .scaffolds - .filter { meta, scaffold -> scaffold.size() > 0 } - .set { ch_scaffolds } - - UNICYCLER - .out - .gfa - .filter { meta, gfa -> gfa.size() > 0 } - .set { ch_gfa } - - /* - * Generate assembly visualisation with Bandage - */ - ch_bandage_png = Channel.empty() - ch_bandage_svg = Channel.empty() - ch_bandage_version = Channel.empty() - if (!params.skip_bandage) { - BANDAGE_IMAGE ( ch_gfa ) - ch_bandage_version = BANDAGE_IMAGE.out.version - ch_bandage_png = BANDAGE_IMAGE.out.png - ch_bandage_svg = BANDAGE_IMAGE.out.svg - } - - /* - * Downstream assembly steps - */ - ASSEMBLY_QC ( - ch_scaffolds, - fasta, - gff, - blast_db, - blast_header - ) - - emit: - scaffolds = UNICYCLER.out.scaffolds // channel: [ val(meta), [ scaffolds ] ] - gfa = UNICYCLER.out.gfa // channel: [ val(meta), [ gfa ] ] - log_out = UNICYCLER.out.log // channel: [ val(meta), [ log ] ] - unicycler_version = UNICYCLER.out.version // path: *.version.txt - - bandage_png = ch_bandage_png // channel: [ val(meta), [ png ] ] - bandage_svg = ch_bandage_svg // channel: [ val(meta), [ svg ] ] - bandage_version = ch_bandage_version // path: *.version.txt - - blast_txt = ASSEMBLY_QC.out.blast_txt // channel: [ val(meta), [ txt ] ] - blast_filter_txt = ASSEMBLY_QC.out.blast_filter_txt // channel: [ val(meta), [ txt ] ] - blast_version = ASSEMBLY_QC.out.blast_version // path: *.version.txt - - quast_results = ASSEMBLY_QC.out.quast_results // channel: [ val(meta), [ results ] ] - quast_tsv = ASSEMBLY_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] - quast_version = ASSEMBLY_QC.out.quast_version // path: *.version.txt - - abacas_results = ASSEMBLY_QC.out.abacas_results // channel: [ val(meta), [ results ] ] - abacas_version = ASSEMBLY_QC.out.abacas_version // path: *.version.txt - - plasmidid_html = ASSEMBLY_QC.out.plasmidid_html // channel: [ val(meta), [ html ] ] - plasmidid_tab = ASSEMBLY_QC.out.plasmidid_tab // channel: [ val(meta), [ tab ] ] - plasmidid_images = ASSEMBLY_QC.out.plasmidid_images // channel: [ val(meta), [ images/ ] ] - plasmidid_logs = ASSEMBLY_QC.out.plasmidid_logs // channel: [ val(meta), [ logs/ ] ] - plasmidid_data = ASSEMBLY_QC.out.plasmidid_data // channel: [ val(meta), [ data/ ] ] - plasmidid_database = ASSEMBLY_QC.out.plasmidid_database // channel: [ val(meta), [ database/ ] ] - plasmidid_fasta = ASSEMBLY_QC.out.plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] - plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] - plasmidid_version = ASSEMBLY_QC.out.plasmidid_version // path: *.version.txt - -} +/* + * Assembly and downstream processing for Unicycler scaffolds + */ + +params.unicycler_options = [:] +params.bandage_options = [:] +params.blastn_options = [:] +params.blastn_filter_options = [:] +params.abacas_options = [:] +params.plasmidid_options = [:] +params.quast_options = [:] + +include { UNICYCLER } from '../../modules/nf-core/software/unicycler/main' addParams( options: params.unicycler_options ) +include { BANDAGE_IMAGE } from '../../modules/nf-core/software/bandage/image/main' addParams( options: params.bandage_options ) +include { ASSEMBLY_QC } from './assembly_qc' addParams( blastn_options: params.blastn_options, blastn_filter_options: params.blastn_filter_options, abacas_options: params.abacas_options, plasmidid_options: params.plasmidid_options, quast_options: params.quast_options ) + +workflow ASSEMBLY_UNICYCLER { + take: + reads // channel: [ val(meta), [ reads ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + blast_db // channel: /path/to/blast_db/ + blast_header // channel: /path/to/blast_header.txt + + main: + /* + * Assemble reads with Unicycler + */ + UNICYCLER ( reads ) + + /* + * Filter for empty scaffold files + */ + UNICYCLER + .out + .scaffolds + .filter { meta, scaffold -> scaffold.size() > 0 } + .set { ch_scaffolds } + + UNICYCLER + .out + .gfa + .filter { meta, gfa -> gfa.size() > 0 } + .set { ch_gfa } + + /* + * Generate assembly visualisation with Bandage + */ + ch_bandage_png = Channel.empty() + ch_bandage_svg = Channel.empty() + ch_bandage_version = Channel.empty() + if (!params.skip_bandage) { + BANDAGE_IMAGE ( ch_gfa ) + ch_bandage_version = BANDAGE_IMAGE.out.version + ch_bandage_png = BANDAGE_IMAGE.out.png + ch_bandage_svg = BANDAGE_IMAGE.out.svg + } + + /* + * Downstream assembly steps + */ + ASSEMBLY_QC ( + ch_scaffolds, + fasta, + gff, + blast_db, + blast_header + ) + + emit: + scaffolds = UNICYCLER.out.scaffolds // channel: [ val(meta), [ scaffolds ] ] + gfa = UNICYCLER.out.gfa // channel: [ val(meta), [ gfa ] ] + log_out = UNICYCLER.out.log // channel: [ val(meta), [ log ] ] + unicycler_version = UNICYCLER.out.version // path: *.version.txt + + bandage_png = ch_bandage_png // channel: [ val(meta), [ png ] ] + bandage_svg = ch_bandage_svg // channel: [ val(meta), [ svg ] ] + bandage_version = ch_bandage_version // path: *.version.txt + + blast_txt = ASSEMBLY_QC.out.blast_txt // channel: [ val(meta), [ txt ] ] + blast_filter_txt = ASSEMBLY_QC.out.blast_filter_txt // channel: [ val(meta), [ txt ] ] + blast_version = ASSEMBLY_QC.out.blast_version // path: *.version.txt + + quast_results = ASSEMBLY_QC.out.quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ASSEMBLY_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] + quast_version = ASSEMBLY_QC.out.quast_version // path: *.version.txt + + abacas_results = ASSEMBLY_QC.out.abacas_results // channel: [ val(meta), [ results ] ] + abacas_version = ASSEMBLY_QC.out.abacas_version // path: *.version.txt + + plasmidid_html = ASSEMBLY_QC.out.plasmidid_html // channel: [ val(meta), [ html ] ] + plasmidid_tab = ASSEMBLY_QC.out.plasmidid_tab // channel: [ val(meta), [ tab ] ] + plasmidid_images = ASSEMBLY_QC.out.plasmidid_images // channel: [ val(meta), [ images/ ] ] + plasmidid_logs = ASSEMBLY_QC.out.plasmidid_logs // channel: [ val(meta), [ logs/ ] ] + plasmidid_data = ASSEMBLY_QC.out.plasmidid_data // channel: [ val(meta), [ data/ ] ] + plasmidid_database = ASSEMBLY_QC.out.plasmidid_database // channel: [ val(meta), [ database/ ] ] + plasmidid_fasta = ASSEMBLY_QC.out.plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] + plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] + plasmidid_version = ASSEMBLY_QC.out.plasmidid_version // path: *.version.txt + +} diff --git a/subworkflows/local/make_consensus.nf b/subworkflows/local/make_consensus.nf index ea295481..bfdfecc6 100644 --- a/subworkflows/local/make_consensus.nf +++ b/subworkflows/local/make_consensus.nf @@ -1,44 +1,44 @@ -/* - * Run various tools to generate a masked genome consensus sequence - */ - -params.genomecov_options = [:] -params.merge_options = [:] -params.mask_options = [:] -params.maskfasta_options = [:] -params.bcftools_options = [:] -params.plot_bases_options = [:] - -include { BEDTOOLS_GENOMECOV } from '../../modules/nf-core/software/bedtools/genomecov/main' addParams( options: params.genomecov_options ) -include { BEDTOOLS_MERGE } from '../../modules/nf-core/software/bedtools/merge/main' addParams( options: params.merge_options ) -include { BEDTOOLS_MASKFASTA } from '../../modules/nf-core/software/bedtools/maskfasta/main' addParams( options: params.maskfasta_options ) -include { BCFTOOLS_CONSENSUS } from '../../modules/nf-core/software/bcftools/consensus/main' addParams( options: params.bcftools_options ) -include { MAKE_BED_MASK } from '../../modules/local/make_bed_mask' addParams( options: params.mask_options ) -include { PLOT_BASE_DENSITY } from '../../modules/local/plot_base_density' addParams( options: params.plot_bases_options ) - -workflow MAKE_CONSENSUS { - take: - bam_vcf // channel: [ val(meta), [ bam ], [ vcf ], [ tbi ] ] - fasta - - main: - BEDTOOLS_GENOMECOV ( bam_vcf.map { meta, bam, vcf, tbi -> [ meta, bam ] } ) - - BEDTOOLS_MERGE ( BEDTOOLS_GENOMECOV.out.bed ) - - MAKE_BED_MASK ( bam_vcf.map { meta, bam, vcf, tbi -> [ meta, vcf ] }.join( BEDTOOLS_MERGE.out.bed, by: [0] ) ) - - BEDTOOLS_MASKFASTA ( MAKE_BED_MASK.out.bed, fasta ) - - BCFTOOLS_CONSENSUS ( bam_vcf.map { meta, bam, vcf, tbi -> [ meta, vcf, tbi ] }.join( BEDTOOLS_MASKFASTA.out.fasta, by: [0] ) ) - - PLOT_BASE_DENSITY ( BCFTOOLS_CONSENSUS.out.fasta ) - - emit: - fasta = BCFTOOLS_CONSENSUS.out.fasta // channel: [ val(meta), [ fasta ] ] - tsv = PLOT_BASE_DENSITY.out.tsv // channel: [ val(meta), [ tsv ] ] - pdf = PLOT_BASE_DENSITY.out.pdf // channel: [ val(meta), [ pdf ] ] - bedtools_version = BEDTOOLS_MERGE.out.version // path: *.version.txt - bcftools_version = BCFTOOLS_CONSENSUS.out.version // path: *.version.txt - -} +/* + * Run various tools to generate a masked genome consensus sequence + */ + +params.genomecov_options = [:] +params.merge_options = [:] +params.mask_options = [:] +params.maskfasta_options = [:] +params.bcftools_options = [:] +params.plot_bases_options = [:] + +include { BEDTOOLS_GENOMECOV } from '../../modules/nf-core/software/bedtools/genomecov/main' addParams( options: params.genomecov_options ) +include { BEDTOOLS_MERGE } from '../../modules/nf-core/software/bedtools/merge/main' addParams( options: params.merge_options ) +include { BEDTOOLS_MASKFASTA } from '../../modules/nf-core/software/bedtools/maskfasta/main' addParams( options: params.maskfasta_options ) +include { BCFTOOLS_CONSENSUS } from '../../modules/nf-core/software/bcftools/consensus/main' addParams( options: params.bcftools_options ) +include { MAKE_BED_MASK } from '../../modules/local/make_bed_mask' addParams( options: params.mask_options ) +include { PLOT_BASE_DENSITY } from '../../modules/local/plot_base_density' addParams( options: params.plot_bases_options ) + +workflow MAKE_CONSENSUS { + take: + bam_vcf // channel: [ val(meta), [ bam ], [ vcf ], [ tbi ] ] + fasta + + main: + BEDTOOLS_GENOMECOV ( bam_vcf.map { meta, bam, vcf, tbi -> [ meta, bam ] } ) + + BEDTOOLS_MERGE ( BEDTOOLS_GENOMECOV.out.bed ) + + MAKE_BED_MASK ( bam_vcf.map { meta, bam, vcf, tbi -> [ meta, vcf ] }.join( BEDTOOLS_MERGE.out.bed, by: [0] ) ) + + BEDTOOLS_MASKFASTA ( MAKE_BED_MASK.out.bed, fasta ) + + BCFTOOLS_CONSENSUS ( bam_vcf.map { meta, bam, vcf, tbi -> [ meta, vcf, tbi ] }.join( BEDTOOLS_MASKFASTA.out.fasta, by: [0] ) ) + + PLOT_BASE_DENSITY ( BCFTOOLS_CONSENSUS.out.fasta ) + + emit: + fasta = BCFTOOLS_CONSENSUS.out.fasta // channel: [ val(meta), [ fasta ] ] + tsv = PLOT_BASE_DENSITY.out.tsv // channel: [ val(meta), [ tsv ] ] + pdf = PLOT_BASE_DENSITY.out.pdf // channel: [ val(meta), [ pdf ] ] + bedtools_version = BEDTOOLS_MERGE.out.version // path: *.version.txt + bcftools_version = BCFTOOLS_CONSENSUS.out.version // path: *.version.txt + +} diff --git a/subworkflows/local/prepare_genome_illumina.nf b/subworkflows/local/prepare_genome_illumina.nf index fd883197..75f6774f 100644 --- a/subworkflows/local/prepare_genome_illumina.nf +++ b/subworkflows/local/prepare_genome_illumina.nf @@ -1,161 +1,161 @@ -/* - * Uncompress and prepare reference genome files -*/ - -params.genome_options = [:] -params.index_options = [:] -params.db_options = [:] -params.bowtie2_build_options = [:] -params.collapse_primers_options = [:] -params.bedtools_getfasta_options = [:] -params.snpeff_build_options = [:] -params.makeblastdb_options = [:] -params.kraken2_build_options = [:] - -include { - GUNZIP as GUNZIP_FASTA - GUNZIP as GUNZIP_GFF - GUNZIP as GUNZIP_PRIMER_BED - GUNZIP as GUNZIP_PRIMER_FASTA } from '../../modules/nf-core/software/gunzip/main' addParams( options: params.genome_options ) -include { UNTAR as UNTAR_BOWTIE2_INDEX } from '../../modules/nf-core/software/untar/main' addParams( options: params.index_options ) -include { UNTAR as UNTAR_KRAKEN2_DB } from '../../modules/nf-core/software/untar/main' addParams( options: params.db_options ) -include { UNTAR as UNTAR_BLAST_DB } from '../../modules/nf-core/software/untar/main' addParams( options: params.db_options ) -include { BOWTIE2_BUILD } from '../../modules/nf-core/software/bowtie2/build/main' addParams( options: params.bowtie2_build_options ) -include { BLAST_MAKEBLASTDB } from '../../modules/nf-core/software/blast/makeblastdb/main' addParams( options: params.makeblastdb_options ) -include { BEDTOOLS_GETFASTA } from '../../modules/nf-core/software/bedtools/getfasta/main' addParams( options: params.bedtools_getfasta_options ) -include { COLLAPSE_PRIMERS } from '../../modules/local/collapse_primers' addParams( options: params.collapse_primers_options ) -include { KRAKEN2_BUILD } from '../../modules/local/kraken2_build' addParams( options: params.kraken2_build_options ) -include { SNPEFF_BUILD } from '../../modules/local/snpeff_build' addParams( options: params.snpeff_build_options ) - -workflow PREPARE_GENOME { - take: - dummy_file - - main: - /* - * Uncompress genome fasta file if required - */ - if (params.fasta.endsWith('.gz')) { - ch_fasta = GUNZIP_FASTA ( params.fasta ).gunzip - } else { - ch_fasta = file(params.fasta) - } - - /* - * Uncompress GFF annotation file - */ - if (params.gff) { - if (params.gff.endsWith('.gz')) { - ch_gff = GUNZIP_GFF ( params.gff ).gunzip - } else { - ch_gff = file(params.gff) - } - } else { - ch_gff = dummy_file - } - - /* - * Prepare reference files required for variant calling - */ - ch_kraken2_db = Channel.empty() - if (!params.skip_kraken2) { - if (params.kraken2_db) { - if (params.kraken2_db.endsWith('.tar.gz')) { - ch_kraken2_db = UNTAR_KRAKEN2_DB ( params.kraken2_db ).untar - } else { - ch_kraken2_db = file(params.kraken2_db) - } - } else { - ch_kraken2_db = KRAKEN2_BUILD ( params.kraken2_db_name ).db - } - } - - /* - * Prepare files required for amplicon data - */ - ch_primer_bed = Channel.empty() - ch_primer_fasta = Channel.empty() - ch_primer_collapsed_bed = Channel.empty() - if (params.protocol == 'amplicon') { - if (params.primer_bed) { - if (params.primer_bed.endsWith('.gz')) { - ch_primer_bed = GUNZIP_PRIMER_BED ( params.primer_bed ).gunzip - } else { - ch_primer_bed = file(params.primer_bed) - } - } - - if (!params.skip_variants && !params.skip_mosdepth) { - ch_primer_collapsed_bed = COLLAPSE_PRIMERS ( ch_primer_bed, params.primer_left_suffix, params.primer_right_suffix ) - } - - if (!params.skip_assembly && !params.skip_cutadapt) { - if (params.primer_fasta) { - if (params.primer_fasta.endsWith('.gz')) { - ch_primer_fasta = GUNZIP_PRIMER_FASTA ( params.primer_fasta ).gunzip - } else { - ch_primer_fasta = file(params.primer_fasta) - } - } else { - ch_primer_fasta = BEDTOOLS_GETFASTA ( ch_primer_bed, ch_fasta ).fasta - } - } - } - - /* - * Prepare reference files required for variant calling - */ - ch_bowtie2_index = Channel.empty() - if (!params.skip_variants) { - if (params.bowtie2_index) { - if (params.bowtie2_index.endsWith('.tar.gz')) { - ch_bowtie2_index = UNTAR_BOWTIE2_INDEX ( params.bowtie2_index ).untar - } else { - ch_bowtie2_index = file(params.bowtie2_index) - } - } else { - ch_bowtie2_index = BOWTIE2_BUILD ( ch_fasta ).index - } - } - - /* - * Prepare reference files required for de novo assembly - */ - ch_blast_db = Channel.empty() - if (!params.skip_assembly) { - if (!params.skip_blast) { - if (params.blast_db) { - if (params.blast_db.endsWith('.tar.gz')) { - ch_blast_db = UNTAR_BLAST_DB ( params.blast_db ).untar - } else { - ch_blast_db = file(params.blast_db) - } - } else { - ch_blast_db = BLAST_MAKEBLASTDB ( ch_fasta ).db - } - } - } - - /* - * Make snpEff database - */ - ch_snpeff_db = Channel.empty() - ch_snpeff_config = Channel.empty() - if (!params.skip_variants && params.gff && !params.skip_snpeff) { - SNPEFF_BUILD ( ch_fasta, ch_gff ) - ch_snpeff_db = SNPEFF_BUILD.out.db - ch_snpeff_config = SNPEFF_BUILD.out.config - } - - emit: - fasta = ch_fasta // path: genome.fasta - gff = ch_gff // path: genome.gff - bowtie2_index = ch_bowtie2_index // path: bowtie2/index/ - primer_bed = ch_primer_bed // path: primer.bed - primer_collapsed_bed = ch_primer_collapsed_bed // path: primer.collapsed.bed - primer_fasta = ch_primer_fasta // path: primer.fasta - blast_db = ch_blast_db // path: blast_db/ - kraken2_db = ch_kraken2_db // path: kraken2_db/ - snpeff_db = ch_snpeff_db // path: snpeff_db - snpeff_config = ch_snpeff_config // path: snpeff.config -} +/* + * Uncompress and prepare reference genome files +*/ + +params.genome_options = [:] +params.index_options = [:] +params.db_options = [:] +params.bowtie2_build_options = [:] +params.collapse_primers_options = [:] +params.bedtools_getfasta_options = [:] +params.snpeff_build_options = [:] +params.makeblastdb_options = [:] +params.kraken2_build_options = [:] + +include { + GUNZIP as GUNZIP_FASTA + GUNZIP as GUNZIP_GFF + GUNZIP as GUNZIP_PRIMER_BED + GUNZIP as GUNZIP_PRIMER_FASTA } from '../../modules/nf-core/software/gunzip/main' addParams( options: params.genome_options ) +include { UNTAR as UNTAR_BOWTIE2_INDEX } from '../../modules/nf-core/software/untar/main' addParams( options: params.index_options ) +include { UNTAR as UNTAR_KRAKEN2_DB } from '../../modules/nf-core/software/untar/main' addParams( options: params.db_options ) +include { UNTAR as UNTAR_BLAST_DB } from '../../modules/nf-core/software/untar/main' addParams( options: params.db_options ) +include { BOWTIE2_BUILD } from '../../modules/nf-core/software/bowtie2/build/main' addParams( options: params.bowtie2_build_options ) +include { BLAST_MAKEBLASTDB } from '../../modules/nf-core/software/blast/makeblastdb/main' addParams( options: params.makeblastdb_options ) +include { BEDTOOLS_GETFASTA } from '../../modules/nf-core/software/bedtools/getfasta/main' addParams( options: params.bedtools_getfasta_options ) +include { COLLAPSE_PRIMERS } from '../../modules/local/collapse_primers' addParams( options: params.collapse_primers_options ) +include { KRAKEN2_BUILD } from '../../modules/local/kraken2_build' addParams( options: params.kraken2_build_options ) +include { SNPEFF_BUILD } from '../../modules/local/snpeff_build' addParams( options: params.snpeff_build_options ) + +workflow PREPARE_GENOME { + take: + dummy_file + + main: + /* + * Uncompress genome fasta file if required + */ + if (params.fasta.endsWith('.gz')) { + ch_fasta = GUNZIP_FASTA ( params.fasta ).gunzip + } else { + ch_fasta = file(params.fasta) + } + + /* + * Uncompress GFF annotation file + */ + if (params.gff) { + if (params.gff.endsWith('.gz')) { + ch_gff = GUNZIP_GFF ( params.gff ).gunzip + } else { + ch_gff = file(params.gff) + } + } else { + ch_gff = dummy_file + } + + /* + * Prepare reference files required for variant calling + */ + ch_kraken2_db = Channel.empty() + if (!params.skip_kraken2) { + if (params.kraken2_db) { + if (params.kraken2_db.endsWith('.tar.gz')) { + ch_kraken2_db = UNTAR_KRAKEN2_DB ( params.kraken2_db ).untar + } else { + ch_kraken2_db = file(params.kraken2_db) + } + } else { + ch_kraken2_db = KRAKEN2_BUILD ( params.kraken2_db_name ).db + } + } + + /* + * Prepare files required for amplicon data + */ + ch_primer_bed = Channel.empty() + ch_primer_fasta = Channel.empty() + ch_primer_collapsed_bed = Channel.empty() + if (params.protocol == 'amplicon') { + if (params.primer_bed) { + if (params.primer_bed.endsWith('.gz')) { + ch_primer_bed = GUNZIP_PRIMER_BED ( params.primer_bed ).gunzip + } else { + ch_primer_bed = file(params.primer_bed) + } + } + + if (!params.skip_variants && !params.skip_mosdepth) { + ch_primer_collapsed_bed = COLLAPSE_PRIMERS ( ch_primer_bed, params.primer_left_suffix, params.primer_right_suffix ) + } + + if (!params.skip_assembly && !params.skip_cutadapt) { + if (params.primer_fasta) { + if (params.primer_fasta.endsWith('.gz')) { + ch_primer_fasta = GUNZIP_PRIMER_FASTA ( params.primer_fasta ).gunzip + } else { + ch_primer_fasta = file(params.primer_fasta) + } + } else { + ch_primer_fasta = BEDTOOLS_GETFASTA ( ch_primer_bed, ch_fasta ).fasta + } + } + } + + /* + * Prepare reference files required for variant calling + */ + ch_bowtie2_index = Channel.empty() + if (!params.skip_variants) { + if (params.bowtie2_index) { + if (params.bowtie2_index.endsWith('.tar.gz')) { + ch_bowtie2_index = UNTAR_BOWTIE2_INDEX ( params.bowtie2_index ).untar + } else { + ch_bowtie2_index = file(params.bowtie2_index) + } + } else { + ch_bowtie2_index = BOWTIE2_BUILD ( ch_fasta ).index + } + } + + /* + * Prepare reference files required for de novo assembly + */ + ch_blast_db = Channel.empty() + if (!params.skip_assembly) { + if (!params.skip_blast) { + if (params.blast_db) { + if (params.blast_db.endsWith('.tar.gz')) { + ch_blast_db = UNTAR_BLAST_DB ( params.blast_db ).untar + } else { + ch_blast_db = file(params.blast_db) + } + } else { + ch_blast_db = BLAST_MAKEBLASTDB ( ch_fasta ).db + } + } + } + + /* + * Make snpEff database + */ + ch_snpeff_db = Channel.empty() + ch_snpeff_config = Channel.empty() + if (!params.skip_variants && params.gff && !params.skip_snpeff) { + SNPEFF_BUILD ( ch_fasta, ch_gff ) + ch_snpeff_db = SNPEFF_BUILD.out.db + ch_snpeff_config = SNPEFF_BUILD.out.config + } + + emit: + fasta = ch_fasta // path: genome.fasta + gff = ch_gff // path: genome.gff + bowtie2_index = ch_bowtie2_index // path: bowtie2/index/ + primer_bed = ch_primer_bed // path: primer.bed + primer_collapsed_bed = ch_primer_collapsed_bed // path: primer.collapsed.bed + primer_fasta = ch_primer_fasta // path: primer.fasta + blast_db = ch_blast_db // path: blast_db/ + kraken2_db = ch_kraken2_db // path: kraken2_db/ + snpeff_db = ch_snpeff_db // path: snpeff_db + snpeff_config = ch_snpeff_config // path: snpeff.config +} diff --git a/subworkflows/local/prepare_genome_nanopore.nf b/subworkflows/local/prepare_genome_nanopore.nf index a0c040fe..6b01cd8e 100644 --- a/subworkflows/local/prepare_genome_nanopore.nf +++ b/subworkflows/local/prepare_genome_nanopore.nf @@ -1,81 +1,81 @@ -/* - * Uncompress and prepare reference genome files -*/ - -params.genome_options = [:] -params.collapse_primers_options = [:] -params.snpeff_build_options = [:] - -include { - GUNZIP as GUNZIP_FASTA - GUNZIP as GUNZIP_GFF - GUNZIP as GUNZIP_PRIMER_BED } from '../../modules/nf-core/software/gunzip/main' addParams( options: params.genome_options ) -include { COLLAPSE_PRIMERS } from '../../modules/local/collapse_primers' addParams( options: params.collapse_primers_options ) -include { SNPEFF_BUILD } from '../../modules/local/snpeff_build' addParams( options: params.snpeff_build_options ) - -workflow PREPARE_GENOME { - take: - dummy_file - - main: - /* - * Uncompress genome fasta file if required - */ - if (params.fasta.endsWith('.gz')) { - ch_fasta = GUNZIP_FASTA ( params.fasta ).gunzip - } else { - ch_fasta = file(params.fasta) - } - - /* - * Uncompress GFF annotation file - */ - if (params.gff) { - if (params.gff.endsWith('.gz')) { - ch_gff = GUNZIP_GFF ( params.gff ).gunzip - } else { - ch_gff = file(params.gff) - } - } else { - ch_gff = dummy_file - } - - /* - * Uncompress primer BED file - */ - ch_primer_bed = Channel.empty() - if (params.primer_bed) { - if (params.primer_bed.endsWith('.gz')) { - ch_primer_bed = GUNZIP_PRIMER_BED ( params.primer_bed ).gunzip - } else { - ch_primer_bed = file(params.primer_bed) - } - } - - /* - * Generate collapsed BED file - */ - ch_primer_collapsed_bed = Channel.empty() - if (!params.skip_mosdepth) { - ch_primer_collapsed_bed = COLLAPSE_PRIMERS ( ch_primer_bed, params.primer_left_suffix, params.primer_right_suffix ) - } - - /* - * Make snpEff database - */ - ch_snpeff_db = Channel.empty() - ch_snpeff_config = Channel.empty() - if (params.gff && !params.skip_snpeff) { - SNPEFF_BUILD ( ch_fasta, ch_gff ) - ch_snpeff_db = SNPEFF_BUILD.out.db - ch_snpeff_config = SNPEFF_BUILD.out.config - } - - emit: - fasta = ch_fasta // path: genome.fasta - gff = ch_gff // path: genome.gff - primer_bed = ch_primer_bed // path: primer.bed - primer_collapsed_bed = ch_primer_collapsed_bed // path: primer.collapsed.bed - snpeff_db = ch_snpeff_db // path: snpeff_db - snpeff_config = ch_snpeff_config // path: snpeff.config -} +/* + * Uncompress and prepare reference genome files +*/ + +params.genome_options = [:] +params.collapse_primers_options = [:] +params.snpeff_build_options = [:] + +include { + GUNZIP as GUNZIP_FASTA + GUNZIP as GUNZIP_GFF + GUNZIP as GUNZIP_PRIMER_BED } from '../../modules/nf-core/software/gunzip/main' addParams( options: params.genome_options ) +include { COLLAPSE_PRIMERS } from '../../modules/local/collapse_primers' addParams( options: params.collapse_primers_options ) +include { SNPEFF_BUILD } from '../../modules/local/snpeff_build' addParams( options: params.snpeff_build_options ) + +workflow PREPARE_GENOME { + take: + dummy_file + + main: + /* + * Uncompress genome fasta file if required + */ + if (params.fasta.endsWith('.gz')) { + ch_fasta = GUNZIP_FASTA ( params.fasta ).gunzip + } else { + ch_fasta = file(params.fasta) + } + + /* + * Uncompress GFF annotation file + */ + if (params.gff) { + if (params.gff.endsWith('.gz')) { + ch_gff = GUNZIP_GFF ( params.gff ).gunzip + } else { + ch_gff = file(params.gff) + } + } else { + ch_gff = dummy_file + } + + /* + * Uncompress primer BED file + */ + ch_primer_bed = Channel.empty() + if (params.primer_bed) { + if (params.primer_bed.endsWith('.gz')) { + ch_primer_bed = GUNZIP_PRIMER_BED ( params.primer_bed ).gunzip + } else { + ch_primer_bed = file(params.primer_bed) + } + } + + /* + * Generate collapsed BED file + */ + ch_primer_collapsed_bed = Channel.empty() + if (!params.skip_mosdepth) { + ch_primer_collapsed_bed = COLLAPSE_PRIMERS ( ch_primer_bed, params.primer_left_suffix, params.primer_right_suffix ) + } + + /* + * Make snpEff database + */ + ch_snpeff_db = Channel.empty() + ch_snpeff_config = Channel.empty() + if (params.gff && !params.skip_snpeff) { + SNPEFF_BUILD ( ch_fasta, ch_gff ) + ch_snpeff_db = SNPEFF_BUILD.out.db + ch_snpeff_config = SNPEFF_BUILD.out.config + } + + emit: + fasta = ch_fasta // path: genome.fasta + gff = ch_gff // path: genome.gff + primer_bed = ch_primer_bed // path: primer.bed + primer_collapsed_bed = ch_primer_collapsed_bed // path: primer.collapsed.bed + snpeff_db = ch_snpeff_db // path: snpeff_db + snpeff_config = ch_snpeff_config // path: snpeff.config +} diff --git a/subworkflows/local/primer_trim_ivar.nf b/subworkflows/local/primer_trim_ivar.nf index fe94be30..f9e6dfa3 100644 --- a/subworkflows/local/primer_trim_ivar.nf +++ b/subworkflows/local/primer_trim_ivar.nf @@ -1,39 +1,39 @@ -/* - * iVar trim, sort, index BAM file and run samtools stats, flagstat and idxstats - */ - -params.ivar_trim_options = [:] -params.samtools_options = [:] - -include { IVAR_TRIM } from '../../modules/nf-core/software/ivar/trim/main' addParams( options: params.ivar_trim_options ) -include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams( options: params.samtools_options ) -include { BAM_SORT_SAMTOOLS } from '../nf-core/bam_sort_samtools' addParams( options: params.samtools_options ) - -workflow PRIMER_TRIM_IVAR { - take: - bam // channel: [ val(meta), [ bam ], [bai] ] - bed // path : bed - - main: - /* - * iVar trim primers - */ - IVAR_TRIM ( bam, bed ) - - /* - * Sort, index BAM file and run samtools stats, flagstat and idxstats - */ - BAM_SORT_SAMTOOLS ( IVAR_TRIM.out.bam ) - - emit: - bam_orig = IVAR_TRIM.out.bam // channel: [ val(meta), bam ] - log_out = IVAR_TRIM.out.log // channel: [ val(meta), log ] - ivar_version = IVAR_TRIM.out.version // path: *.version.txt - - bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - samtools_version = BAM_SORT_SAMTOOLS.out.version // path: *.version.txt -} +/* + * iVar trim, sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +params.ivar_trim_options = [:] +params.samtools_options = [:] + +include { IVAR_TRIM } from '../../modules/nf-core/software/ivar/trim/main' addParams( options: params.ivar_trim_options ) +include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams( options: params.samtools_options ) +include { BAM_SORT_SAMTOOLS } from '../nf-core/bam_sort_samtools' addParams( options: params.samtools_options ) + +workflow PRIMER_TRIM_IVAR { + take: + bam // channel: [ val(meta), [ bam ], [bai] ] + bed // path : bed + + main: + /* + * iVar trim primers + */ + IVAR_TRIM ( bam, bed ) + + /* + * Sort, index BAM file and run samtools stats, flagstat and idxstats + */ + BAM_SORT_SAMTOOLS ( IVAR_TRIM.out.bam ) + + emit: + bam_orig = IVAR_TRIM.out.bam // channel: [ val(meta), bam ] + log_out = IVAR_TRIM.out.log // channel: [ val(meta), log ] + ivar_version = IVAR_TRIM.out.version // path: *.version.txt + + bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + samtools_version = BAM_SORT_SAMTOOLS.out.version // path: *.version.txt +} diff --git a/subworkflows/local/snpeff_snpsift.nf b/subworkflows/local/snpeff_snpsift.nf index d6095a3c..40b05bba 100644 --- a/subworkflows/local/snpeff_snpsift.nf +++ b/subworkflows/local/snpeff_snpsift.nf @@ -1,43 +1,43 @@ -/* - * Run snpEff, bgzip, tabix, stats and SnpSift commands - */ - -params.snpeff_options = [:] -params.bgzip_options = [:] -params.tabix_options = [:] -params.stats_options = [:] -params.snpsift_options = [:] - -include { SNPEFF_ANN } from '../../modules/local/snpeff_ann' addParams( options: params.snpeff_options ) -include { SNPSIFT_EXTRACTFIELDS } from '../../modules/local/snpsift_extractfields' addParams( options: params.snpsift_options ) -include { VCF_BGZIP_TABIX_STATS } from '../nf-core/vcf_bgzip_tabix_stats' addParams( bgzip_options: params.bgzip_options, tabix_options: params.tabix_options, stats_options: params.stats_options ) - -workflow SNPEFF_SNPSIFT { - take: - vcf // channel: [ val(meta), [ vcf ] ] - db // path : snpEff database - config // path : snpEff config - fasta // path : genome.fasta - - main: - SNPEFF_ANN ( vcf, db, config, fasta ) - - VCF_BGZIP_TABIX_STATS ( SNPEFF_ANN.out.vcf ) - - SNPSIFT_EXTRACTFIELDS ( VCF_BGZIP_TABIX_STATS.out.vcf ) - - emit: - csv = SNPEFF_ANN.out.csv // channel: [ val(meta), [ csv ] ] - txt = SNPEFF_ANN.out.txt // channel: [ val(meta), [ txt ] ] - html = SNPEFF_ANN.out.html // channel: [ val(meta), [ html ] ] - snpeff_version = SNPEFF_ANN.out.version // path: *.version.txt - - vcf = VCF_BGZIP_TABIX_STATS.out.vcf // channel: [ val(meta), [ vcf.gz ] ] - tbi = VCF_BGZIP_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] - stats = VCF_BGZIP_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] - tabix_version = VCF_BGZIP_TABIX_STATS.out.tabix_version // path: *.version.txt - bcftools_version = VCF_BGZIP_TABIX_STATS.out.bcftools_version // path: *.version.txt - - snpsift_txt = SNPSIFT_EXTRACTFIELDS.out.txt // channel: [ val(meta), [ txt ] ] - snpsift_version = SNPSIFT_EXTRACTFIELDS.out.version // path: *.version.txt -} +/* + * Run snpEff, bgzip, tabix, stats and SnpSift commands + */ + +params.snpeff_options = [:] +params.bgzip_options = [:] +params.tabix_options = [:] +params.stats_options = [:] +params.snpsift_options = [:] + +include { SNPEFF_ANN } from '../../modules/local/snpeff_ann' addParams( options: params.snpeff_options ) +include { SNPSIFT_EXTRACTFIELDS } from '../../modules/local/snpsift_extractfields' addParams( options: params.snpsift_options ) +include { VCF_BGZIP_TABIX_STATS } from '../nf-core/vcf_bgzip_tabix_stats' addParams( bgzip_options: params.bgzip_options, tabix_options: params.tabix_options, stats_options: params.stats_options ) + +workflow SNPEFF_SNPSIFT { + take: + vcf // channel: [ val(meta), [ vcf ] ] + db // path : snpEff database + config // path : snpEff config + fasta // path : genome.fasta + + main: + SNPEFF_ANN ( vcf, db, config, fasta ) + + VCF_BGZIP_TABIX_STATS ( SNPEFF_ANN.out.vcf ) + + SNPSIFT_EXTRACTFIELDS ( VCF_BGZIP_TABIX_STATS.out.vcf ) + + emit: + csv = SNPEFF_ANN.out.csv // channel: [ val(meta), [ csv ] ] + txt = SNPEFF_ANN.out.txt // channel: [ val(meta), [ txt ] ] + html = SNPEFF_ANN.out.html // channel: [ val(meta), [ html ] ] + snpeff_version = SNPEFF_ANN.out.version // path: *.version.txt + + vcf = VCF_BGZIP_TABIX_STATS.out.vcf // channel: [ val(meta), [ vcf.gz ] ] + tbi = VCF_BGZIP_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + stats = VCF_BGZIP_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] + tabix_version = VCF_BGZIP_TABIX_STATS.out.tabix_version // path: *.version.txt + bcftools_version = VCF_BGZIP_TABIX_STATS.out.bcftools_version // path: *.version.txt + + snpsift_txt = SNPSIFT_EXTRACTFIELDS.out.txt // channel: [ val(meta), [ txt ] ] + snpsift_version = SNPSIFT_EXTRACTFIELDS.out.version // path: *.version.txt +} diff --git a/subworkflows/local/variants_bcftools.nf b/subworkflows/local/variants_bcftools.nf index 7512f184..82bfff38 100644 --- a/subworkflows/local/variants_bcftools.nf +++ b/subworkflows/local/variants_bcftools.nf @@ -1,172 +1,172 @@ -/* - * Variant calling and downstream processing for BCFTools - */ - -params.bcftools_mpileup_options = [:] -params.quast_options = [:] -params.consensus_genomecov_options = [:] -params.consensus_merge_options = [:] -params.consensus_mask_options = [:] -params.consensus_maskfasta_options = [:] -params.consensus_bcftools_options = [:] -params.consensus_plot_options = [:] -params.snpeff_options = [:] -params.snpsift_options = [:] -params.snpeff_bgzip_options = [:] -params.snpeff_tabix_options = [:] -params.snpeff_stats_options = [:] -params.pangolin_options = [:] -params.nextclade_options = [:] -params.asciigenome_options = [:] - -include { BCFTOOLS_MPILEUP } from '../../modules/nf-core/software/bcftools/mpileup/main' addParams( options: params.bcftools_mpileup_options ) -include { QUAST } from '../../modules/nf-core/software/quast/main' addParams( options: params.quast_options ) -include { PANGOLIN } from '../../modules/nf-core/software/pangolin/main' addParams( options: params.pangolin_options ) -include { NEXTCLADE } from '../../modules/nf-core/software/nextclade/main' addParams( options: params.nextclade_options ) -include { ASCIIGENOME } from '../../modules/local/asciigenome' addParams( options: params.asciigenome_options ) -include { MAKE_CONSENSUS } from './make_consensus' addParams( genomecov_options: params.consensus_genomecov_options, merge_options: params.consensus_merge_options, mask_options: params.consensus_mask_options, maskfasta_options: params.consensus_maskfasta_options, bcftools_options: params.consensus_bcftools_options, plot_bases_options: params.consensus_plot_options ) -include { SNPEFF_SNPSIFT } from './snpeff_snpsift' addParams( snpeff_options: params.snpeff_options, snpsift_options: params.snpsift_options, bgzip_options: params.snpeff_bgzip_options, tabix_options: params.snpeff_tabix_options, stats_options: params.snpeff_stats_options ) - -workflow VARIANTS_BCFTOOLS { - take: - bam // channel: [ val(meta), [ bam ] ] - fasta // channel: /path/to/genome.fasta - gff // channel: /path/to/genome.gff - bed // channel: /path/to/primers.bed - snpeff_db // channel: /path/to/snpeff_db/ - snpeff_config // channel: /path/to/snpeff.config - - main: - /* - * Call variants - */ - BCFTOOLS_MPILEUP ( bam, fasta ) - - /* - * Create genome consensus using variants in VCF, run QUAST and pangolin - */ - ch_consensus = Channel.empty() - ch_bases_tsv = Channel.empty() - ch_bases_pdf = Channel.empty() - ch_bedtools_version = Channel.empty() - ch_quast_results = Channel.empty() - ch_quast_tsv = Channel.empty() - ch_quast_version = Channel.empty() - ch_pangolin_report = Channel.empty() - ch_pangolin_version = Channel.empty() - ch_nextclade_report = Channel.empty() - ch_nextclade_version = Channel.empty() - if (!params.skip_consensus) { - MAKE_CONSENSUS ( bam.join(BCFTOOLS_MPILEUP.out.vcf, by: [0]).join(BCFTOOLS_MPILEUP.out.tbi, by: [0]), fasta ) - ch_consensus = MAKE_CONSENSUS.out.fasta - ch_bases_tsv = MAKE_CONSENSUS.out.tsv - ch_bases_pdf = MAKE_CONSENSUS.out.pdf - ch_bedtools_version = MAKE_CONSENSUS.out.bedtools_version - - if (!params.skip_variants_quast) { - QUAST ( ch_consensus.collect{ it[1] }, fasta, gff, true, params.gff ) - ch_quast_results = QUAST.out.results - ch_quast_tsv = QUAST.out.tsv - ch_quast_version = QUAST.out.version - } - - if (!params.skip_pangolin) { - PANGOLIN ( ch_consensus ) - ch_pangolin_report = PANGOLIN.out.report - ch_pangolin_version = PANGOLIN.out.version - } - - if (!params.skip_nextclade) { - NEXTCLADE ( ch_consensus, 'csv' ) - ch_nextclade_report = NEXTCLADE.out.csv - ch_nextclade_version = NEXTCLADE.out.version - } - } - - /* - * Annotate variants - */ - ch_snpeff_vcf = Channel.empty() - ch_snpeff_tbi = Channel.empty() - ch_snpeff_stats = Channel.empty() - ch_snpeff_csv = Channel.empty() - ch_snpeff_txt = Channel.empty() - ch_snpeff_html = Channel.empty() - ch_snpsift_txt = Channel.empty() - ch_snpeff_version = Channel.empty() - ch_snpsift_version = Channel.empty() - if (params.gff && !params.skip_snpeff) { - SNPEFF_SNPSIFT ( BCFTOOLS_MPILEUP.out.vcf, snpeff_db, snpeff_config, fasta ) - ch_snpeff_vcf = SNPEFF_SNPSIFT.out.vcf - ch_snpeff_tbi = SNPEFF_SNPSIFT.out.tbi - ch_snpeff_stats = SNPEFF_SNPSIFT.out.stats - ch_snpeff_csv = SNPEFF_SNPSIFT.out.csv - ch_snpeff_txt = SNPEFF_SNPSIFT.out.txt - ch_snpeff_html = SNPEFF_SNPSIFT.out.html - ch_snpsift_txt = SNPEFF_SNPSIFT.out.snpsift_txt - ch_snpeff_version = SNPEFF_SNPSIFT.out.snpeff_version - ch_snpsift_version = SNPEFF_SNPSIFT.out.snpsift_version - } - - /* - * MODULE: Variant screenshots with ASCIIGenome - */ - ch_asciigenome_pdf = Channel.empty() - ch_asciigenome_version = Channel.empty() - if (!params.skip_asciigenome) { - bam - .join(BCFTOOLS_MPILEUP.out.vcf, by: [0]) - .join(BCFTOOLS_MPILEUP.out.stats, by: [0]) - .map { meta, bam, vcf, stats -> - if (WorkflowCommons.getNumVariantsFromBCFToolsStats(stats) > 0) { - return [ meta, bam, vcf ] - } - } - .set { ch_asciigenome } - - ASCIIGENOME ( - ch_asciigenome, - fasta, - gff, - bed, - params.asciigenome_window_size, - params.asciigenome_read_depth - ) - ch_asciigenome_pdf = ASCIIGENOME.out.pdf - ch_asciigenome_version = ASCIIGENOME.out.version - } - - emit: - vcf = BCFTOOLS_MPILEUP.out.vcf // channel: [ val(meta), [ vcf ] ] - tbi = BCFTOOLS_MPILEUP.out.tbi // channel: [ val(meta), [ tbi ] ] - stats = BCFTOOLS_MPILEUP.out.stats // channel: [ val(meta), [ txt ] ] - bcftools_version = BCFTOOLS_MPILEUP.out.version // path: *.version.txt - - consensus = ch_consensus // channel: [ val(meta), [ fasta ] ] - bases_tsv = ch_bases_tsv // channel: [ val(meta), [ tsv ] ] - bases_pdf = ch_bases_pdf // channel: [ val(meta), [ pdf ] ] - bedtools_version = ch_bedtools_version // path: *.version.txt - - quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] - quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] - quast_version = ch_quast_version // path: *.version.txt - - snpeff_vcf = ch_snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] - snpeff_tbi = ch_snpeff_tbi // channel: [ val(meta), [ tbi ] ] - snpeff_stats = ch_snpeff_stats // channel: [ val(meta), [ txt ] ] - snpeff_csv = ch_snpeff_csv // channel: [ val(meta), [ csv ] ] - snpeff_txt = ch_snpeff_txt // channel: [ val(meta), [ txt ] ] - snpeff_html = ch_snpeff_html // channel: [ val(meta), [ html ] ] - snpsift_txt = ch_snpsift_txt // channel: [ val(meta), [ txt ] ] - snpeff_version = ch_snpeff_version // path: *.version.txt - snpsift_version = ch_snpsift_version // path: *.version.txt - - pangolin_report = ch_pangolin_report // channel: [ val(meta), [ csv ] ] - pangolin_version = ch_pangolin_version // path: *.version.txt - - nextclade_report = ch_nextclade_report // channel: [ val(meta), [ csv ] ] - nextclade_version = ch_nextclade_version // path: *.version.txt - - asciigenome_pdf = ch_asciigenome_pdf // channel: [ val(meta), [ pdf ] ] - asciigenome_version = ch_asciigenome_version // path: *.version.txt -} +/* + * Variant calling and downstream processing for BCFTools + */ + +params.bcftools_mpileup_options = [:] +params.quast_options = [:] +params.consensus_genomecov_options = [:] +params.consensus_merge_options = [:] +params.consensus_mask_options = [:] +params.consensus_maskfasta_options = [:] +params.consensus_bcftools_options = [:] +params.consensus_plot_options = [:] +params.snpeff_options = [:] +params.snpsift_options = [:] +params.snpeff_bgzip_options = [:] +params.snpeff_tabix_options = [:] +params.snpeff_stats_options = [:] +params.pangolin_options = [:] +params.nextclade_options = [:] +params.asciigenome_options = [:] + +include { BCFTOOLS_MPILEUP } from '../../modules/nf-core/software/bcftools/mpileup/main' addParams( options: params.bcftools_mpileup_options ) +include { QUAST } from '../../modules/nf-core/software/quast/main' addParams( options: params.quast_options ) +include { PANGOLIN } from '../../modules/nf-core/software/pangolin/main' addParams( options: params.pangolin_options ) +include { NEXTCLADE } from '../../modules/nf-core/software/nextclade/main' addParams( options: params.nextclade_options ) +include { ASCIIGENOME } from '../../modules/local/asciigenome' addParams( options: params.asciigenome_options ) +include { MAKE_CONSENSUS } from './make_consensus' addParams( genomecov_options: params.consensus_genomecov_options, merge_options: params.consensus_merge_options, mask_options: params.consensus_mask_options, maskfasta_options: params.consensus_maskfasta_options, bcftools_options: params.consensus_bcftools_options, plot_bases_options: params.consensus_plot_options ) +include { SNPEFF_SNPSIFT } from './snpeff_snpsift' addParams( snpeff_options: params.snpeff_options, snpsift_options: params.snpsift_options, bgzip_options: params.snpeff_bgzip_options, tabix_options: params.snpeff_tabix_options, stats_options: params.snpeff_stats_options ) + +workflow VARIANTS_BCFTOOLS { + take: + bam // channel: [ val(meta), [ bam ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + bed // channel: /path/to/primers.bed + snpeff_db // channel: /path/to/snpeff_db/ + snpeff_config // channel: /path/to/snpeff.config + + main: + /* + * Call variants + */ + BCFTOOLS_MPILEUP ( bam, fasta ) + + /* + * Create genome consensus using variants in VCF, run QUAST and pangolin + */ + ch_consensus = Channel.empty() + ch_bases_tsv = Channel.empty() + ch_bases_pdf = Channel.empty() + ch_bedtools_version = Channel.empty() + ch_quast_results = Channel.empty() + ch_quast_tsv = Channel.empty() + ch_quast_version = Channel.empty() + ch_pangolin_report = Channel.empty() + ch_pangolin_version = Channel.empty() + ch_nextclade_report = Channel.empty() + ch_nextclade_version = Channel.empty() + if (!params.skip_consensus) { + MAKE_CONSENSUS ( bam.join(BCFTOOLS_MPILEUP.out.vcf, by: [0]).join(BCFTOOLS_MPILEUP.out.tbi, by: [0]), fasta ) + ch_consensus = MAKE_CONSENSUS.out.fasta + ch_bases_tsv = MAKE_CONSENSUS.out.tsv + ch_bases_pdf = MAKE_CONSENSUS.out.pdf + ch_bedtools_version = MAKE_CONSENSUS.out.bedtools_version + + if (!params.skip_variants_quast) { + QUAST ( ch_consensus.collect{ it[1] }, fasta, gff, true, params.gff ) + ch_quast_results = QUAST.out.results + ch_quast_tsv = QUAST.out.tsv + ch_quast_version = QUAST.out.version + } + + if (!params.skip_pangolin) { + PANGOLIN ( ch_consensus ) + ch_pangolin_report = PANGOLIN.out.report + ch_pangolin_version = PANGOLIN.out.version + } + + if (!params.skip_nextclade) { + NEXTCLADE ( ch_consensus, 'csv' ) + ch_nextclade_report = NEXTCLADE.out.csv + ch_nextclade_version = NEXTCLADE.out.version + } + } + + /* + * Annotate variants + */ + ch_snpeff_vcf = Channel.empty() + ch_snpeff_tbi = Channel.empty() + ch_snpeff_stats = Channel.empty() + ch_snpeff_csv = Channel.empty() + ch_snpeff_txt = Channel.empty() + ch_snpeff_html = Channel.empty() + ch_snpsift_txt = Channel.empty() + ch_snpeff_version = Channel.empty() + ch_snpsift_version = Channel.empty() + if (params.gff && !params.skip_snpeff) { + SNPEFF_SNPSIFT ( BCFTOOLS_MPILEUP.out.vcf, snpeff_db, snpeff_config, fasta ) + ch_snpeff_vcf = SNPEFF_SNPSIFT.out.vcf + ch_snpeff_tbi = SNPEFF_SNPSIFT.out.tbi + ch_snpeff_stats = SNPEFF_SNPSIFT.out.stats + ch_snpeff_csv = SNPEFF_SNPSIFT.out.csv + ch_snpeff_txt = SNPEFF_SNPSIFT.out.txt + ch_snpeff_html = SNPEFF_SNPSIFT.out.html + ch_snpsift_txt = SNPEFF_SNPSIFT.out.snpsift_txt + ch_snpeff_version = SNPEFF_SNPSIFT.out.snpeff_version + ch_snpsift_version = SNPEFF_SNPSIFT.out.snpsift_version + } + + /* + * MODULE: Variant screenshots with ASCIIGenome + */ + ch_asciigenome_pdf = Channel.empty() + ch_asciigenome_version = Channel.empty() + if (!params.skip_asciigenome) { + bam + .join(BCFTOOLS_MPILEUP.out.vcf, by: [0]) + .join(BCFTOOLS_MPILEUP.out.stats, by: [0]) + .map { meta, bam, vcf, stats -> + if (WorkflowCommons.getNumVariantsFromBCFToolsStats(stats) > 0) { + return [ meta, bam, vcf ] + } + } + .set { ch_asciigenome } + + ASCIIGENOME ( + ch_asciigenome, + fasta, + gff, + bed, + params.asciigenome_window_size, + params.asciigenome_read_depth + ) + ch_asciigenome_pdf = ASCIIGENOME.out.pdf + ch_asciigenome_version = ASCIIGENOME.out.version + } + + emit: + vcf = BCFTOOLS_MPILEUP.out.vcf // channel: [ val(meta), [ vcf ] ] + tbi = BCFTOOLS_MPILEUP.out.tbi // channel: [ val(meta), [ tbi ] ] + stats = BCFTOOLS_MPILEUP.out.stats // channel: [ val(meta), [ txt ] ] + bcftools_version = BCFTOOLS_MPILEUP.out.version // path: *.version.txt + + consensus = ch_consensus // channel: [ val(meta), [ fasta ] ] + bases_tsv = ch_bases_tsv // channel: [ val(meta), [ tsv ] ] + bases_pdf = ch_bases_pdf // channel: [ val(meta), [ pdf ] ] + bedtools_version = ch_bedtools_version // path: *.version.txt + + quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] + quast_version = ch_quast_version // path: *.version.txt + + snpeff_vcf = ch_snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] + snpeff_tbi = ch_snpeff_tbi // channel: [ val(meta), [ tbi ] ] + snpeff_stats = ch_snpeff_stats // channel: [ val(meta), [ txt ] ] + snpeff_csv = ch_snpeff_csv // channel: [ val(meta), [ csv ] ] + snpeff_txt = ch_snpeff_txt // channel: [ val(meta), [ txt ] ] + snpeff_html = ch_snpeff_html // channel: [ val(meta), [ html ] ] + snpsift_txt = ch_snpsift_txt // channel: [ val(meta), [ txt ] ] + snpeff_version = ch_snpeff_version // path: *.version.txt + snpsift_version = ch_snpsift_version // path: *.version.txt + + pangolin_report = ch_pangolin_report // channel: [ val(meta), [ csv ] ] + pangolin_version = ch_pangolin_version // path: *.version.txt + + nextclade_report = ch_nextclade_report // channel: [ val(meta), [ csv ] ] + nextclade_version = ch_nextclade_version // path: *.version.txt + + asciigenome_pdf = ch_asciigenome_pdf // channel: [ val(meta), [ pdf ] ] + asciigenome_version = ch_asciigenome_version // path: *.version.txt +} diff --git a/subworkflows/local/variants_ivar.nf b/subworkflows/local/variants_ivar.nf index 35b2364a..45b7f1f8 100644 --- a/subworkflows/local/variants_ivar.nf +++ b/subworkflows/local/variants_ivar.nf @@ -1,193 +1,193 @@ -/* - * Variant calling and downstream processing for IVar - */ - -params.ivar_variants_options = [:] -params.ivar_variants_to_vcf_options = [:] -params.tabix_bgzip_options = [:] -params.tabix_tabix_options = [:] -params.bcftools_stats_options = [:] -params.ivar_consensus_options = [:] -params.consensus_plot_options = [:] -params.quast_options = [:] -params.snpeff_options = [:] -params.snpsift_options = [:] -params.snpeff_bgzip_options = [:] -params.snpeff_tabix_options = [:] -params.snpeff_stats_options = [:] -params.pangolin_options = [:] -params.nextclade_options = [:] -params.asciigenome_options = [:] - -include { IVAR_VARIANTS_TO_VCF } from '../../modules/local/ivar_variants_to_vcf' addParams( options: params.ivar_variants_to_vcf_options ) -include { PLOT_BASE_DENSITY } from '../../modules/local/plot_base_density' addParams( options: params.consensus_plot_options ) -include { IVAR_VARIANTS } from '../../modules/nf-core/software/ivar/variants/main' addParams( options: params.ivar_variants_options ) -include { IVAR_CONSENSUS } from '../../modules/nf-core/software/ivar/consensus/main' addParams( options: params.ivar_consensus_options ) -include { QUAST } from '../../modules/nf-core/software/quast/main' addParams( options: params.quast_options ) -include { PANGOLIN } from '../../modules/nf-core/software/pangolin/main' addParams( options: params.pangolin_options ) -include { NEXTCLADE } from '../../modules/nf-core/software/nextclade/main' addParams( options: params.nextclade_options ) -include { ASCIIGENOME } from '../../modules/local/asciigenome' addParams( options: params.asciigenome_options ) -include { VCF_BGZIP_TABIX_STATS } from '../nf-core/vcf_bgzip_tabix_stats' addParams( bgzip_options: params.tabix_bgzip_options, tabix_options: params.tabix_tabix_options, stats_options: params.bcftools_stats_options ) -include { SNPEFF_SNPSIFT } from './snpeff_snpsift' addParams( snpeff_options: params.snpeff_options, snpsift_options: params.snpsift_options, bgzip_options: params.snpeff_bgzip_options, tabix_options: params.snpeff_tabix_options, stats_options: params.snpeff_stats_options ) - -workflow VARIANTS_IVAR { - take: - bam // channel: [ val(meta), [ bam ] ] - fasta // channel: /path/to/genome.fasta - gff // channel: /path/to/genome.gff - bed // channel: /path/to/primers.bed - snpeff_db // channel: /path/to/snpeff_db/ - snpeff_config // channel: /path/to/snpeff.config - ivar_multiqc_header // channel: /path/to/multiqc_header for ivar variants - - main: - /* - * Call variants - */ - IVAR_VARIANTS ( bam, fasta, gff ) - - /* - * Convert original iVar output to VCF, zip and index - */ - IVAR_VARIANTS_TO_VCF ( IVAR_VARIANTS.out.tsv, ivar_multiqc_header ) - - VCF_BGZIP_TABIX_STATS ( IVAR_VARIANTS_TO_VCF.out.vcf ) - - /* - * Create genome consensus - */ - ch_consensus = Channel.empty() - ch_consensus_qual = Channel.empty() - ch_bases_tsv = Channel.empty() - ch_bases_pdf = Channel.empty() - ch_quast_results = Channel.empty() - ch_quast_tsv = Channel.empty() - ch_quast_version = Channel.empty() - ch_pangolin_report = Channel.empty() - ch_pangolin_version = Channel.empty() - ch_nextclade_report = Channel.empty() - ch_nextclade_version = Channel.empty() - if (!params.skip_consensus) { - IVAR_CONSENSUS ( bam, fasta ) - ch_consensus = IVAR_CONSENSUS.out.fasta - ch_consensus_qual = IVAR_CONSENSUS.out.qual - - PLOT_BASE_DENSITY ( ch_consensus ) - ch_bases_tsv = PLOT_BASE_DENSITY.out.tsv - ch_bases_pdf = PLOT_BASE_DENSITY.out.pdf - - if (!params.skip_variants_quast) { - QUAST ( ch_consensus.collect{ it[1] }, fasta, gff, true, params.gff ) - ch_quast_results = QUAST.out.results - ch_quast_tsv = QUAST.out.tsv - ch_quast_version = QUAST.out.version - } - - if (!params.skip_pangolin) { - PANGOLIN ( ch_consensus ) - ch_pangolin_report = PANGOLIN.out.report - ch_pangolin_version = PANGOLIN.out.version - } - - if (!params.skip_nextclade) { - NEXTCLADE ( ch_consensus, 'csv' ) - ch_nextclade_report = NEXTCLADE.out.csv - ch_nextclade_version = NEXTCLADE.out.version - } - } - - /* - * Annotate variants - */ - ch_snpeff_vcf = Channel.empty() - ch_snpeff_tbi = Channel.empty() - ch_snpeff_stats = Channel.empty() - ch_snpeff_csv = Channel.empty() - ch_snpeff_txt = Channel.empty() - ch_snpeff_html = Channel.empty() - ch_snpsift_txt = Channel.empty() - ch_snpeff_version = Channel.empty() - ch_snpsift_version = Channel.empty() - if (params.gff && !params.skip_snpeff) { - SNPEFF_SNPSIFT ( VCF_BGZIP_TABIX_STATS.out.vcf, snpeff_db, snpeff_config, fasta ) - ch_snpeff_vcf = SNPEFF_SNPSIFT.out.vcf - ch_snpeff_tbi = SNPEFF_SNPSIFT.out.tbi - ch_snpeff_stats = SNPEFF_SNPSIFT.out.stats - ch_snpeff_csv = SNPEFF_SNPSIFT.out.csv - ch_snpeff_txt = SNPEFF_SNPSIFT.out.txt - ch_snpeff_html = SNPEFF_SNPSIFT.out.html - ch_snpsift_txt = SNPEFF_SNPSIFT.out.snpsift_txt - ch_snpeff_version = SNPEFF_SNPSIFT.out.snpeff_version - ch_snpsift_version = SNPEFF_SNPSIFT.out.snpsift_version - } - - /* - * MODULE: Variant screenshots with ASCIIGenome - */ - ch_asciigenome_pdf = Channel.empty() - ch_asciigenome_version = Channel.empty() - if (!params.skip_asciigenome) { - bam - .join(VCF_BGZIP_TABIX_STATS.out.vcf, by: [0]) - .join(VCF_BGZIP_TABIX_STATS.out.stats, by: [0]) - .map { meta, bam, vcf, stats -> - if (WorkflowCommons.getNumVariantsFromBCFToolsStats(stats) > 0) { - return [ meta, bam, vcf ] - } - } - .set { ch_asciigenome } - - ASCIIGENOME ( - ch_asciigenome, - fasta, - gff, - bed, - params.asciigenome_window_size, - params.asciigenome_read_depth - ) - ch_asciigenome_pdf = ASCIIGENOME.out.pdf - ch_asciigenome_version = ASCIIGENOME.out.version - } - - emit: - tsv = IVAR_VARIANTS.out.tsv // channel: [ val(meta), [ tsv ] ] - ivar_version = IVAR_VARIANTS.out.version // path: *.version.txt - - vcf_orig = IVAR_VARIANTS_TO_VCF.out.vcf // channel: [ val(meta), [ vcf ] ] - log_out = IVAR_VARIANTS_TO_VCF.out.log // channel: [ val(meta), [ log ] ] - multiqc_tsv = IVAR_VARIANTS_TO_VCF.out.tsv // channel: [ val(meta), [ tsv ] ] - - vcf = VCF_BGZIP_TABIX_STATS.out.vcf // channel: [ val(meta), [ vcf ] ] - tbi = VCF_BGZIP_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] - stats = VCF_BGZIP_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] - tabix_version = VCF_BGZIP_TABIX_STATS.out.tabix_version // path: *.version.txt - bcftools_version = VCF_BGZIP_TABIX_STATS.out.bcftools_version // path: *.version.txt - - consensus = ch_consensus // channel: [ val(meta), [ fasta ] ] - consensus_qual = ch_consensus_qual // channel: [ val(meta), [ fasta ] ] - bases_tsv = ch_bases_tsv // channel: [ val(meta), [ tsv ] ] - bases_pdf = ch_bases_pdf // channel: [ val(meta), [ pdf ] ] - - quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] - quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] - quast_version = ch_quast_version // path: *.version.txt - - snpeff_vcf = ch_snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] - snpeff_tbi = ch_snpeff_tbi // channel: [ val(meta), [ tbi ] ] - snpeff_stats = ch_snpeff_stats // channel: [ val(meta), [ txt ] ] - snpeff_csv = ch_snpeff_csv // channel: [ val(meta), [ csv ] ] - snpeff_txt = ch_snpeff_txt // channel: [ val(meta), [ txt ] ] - snpeff_html = ch_snpeff_html // channel: [ val(meta), [ html ] ] - snpsift_txt = ch_snpsift_txt // channel: [ val(meta), [ txt ] ] - snpeff_version = ch_snpeff_version // path: *.version.txt - snpsift_version = ch_snpsift_version // path: *.version.txt - - pangolin_report = ch_pangolin_report // channel: [ val(meta), [ csv ] ] - pangolin_version = ch_pangolin_version // path: *.version.txt - - nextclade_report = ch_nextclade_report // channel: [ val(meta), [ csv ] ] - nextclade_version = ch_nextclade_version // path: *.version.txt - - asciigenome_pdf = ch_asciigenome_pdf // channel: [ val(meta), [ pdf ] ] - asciigenome_version = ch_asciigenome_version // path: *.version.txt -} +/* + * Variant calling and downstream processing for IVar + */ + +params.ivar_variants_options = [:] +params.ivar_variants_to_vcf_options = [:] +params.tabix_bgzip_options = [:] +params.tabix_tabix_options = [:] +params.bcftools_stats_options = [:] +params.ivar_consensus_options = [:] +params.consensus_plot_options = [:] +params.quast_options = [:] +params.snpeff_options = [:] +params.snpsift_options = [:] +params.snpeff_bgzip_options = [:] +params.snpeff_tabix_options = [:] +params.snpeff_stats_options = [:] +params.pangolin_options = [:] +params.nextclade_options = [:] +params.asciigenome_options = [:] + +include { IVAR_VARIANTS_TO_VCF } from '../../modules/local/ivar_variants_to_vcf' addParams( options: params.ivar_variants_to_vcf_options ) +include { PLOT_BASE_DENSITY } from '../../modules/local/plot_base_density' addParams( options: params.consensus_plot_options ) +include { IVAR_VARIANTS } from '../../modules/nf-core/software/ivar/variants/main' addParams( options: params.ivar_variants_options ) +include { IVAR_CONSENSUS } from '../../modules/nf-core/software/ivar/consensus/main' addParams( options: params.ivar_consensus_options ) +include { QUAST } from '../../modules/nf-core/software/quast/main' addParams( options: params.quast_options ) +include { PANGOLIN } from '../../modules/nf-core/software/pangolin/main' addParams( options: params.pangolin_options ) +include { NEXTCLADE } from '../../modules/nf-core/software/nextclade/main' addParams( options: params.nextclade_options ) +include { ASCIIGENOME } from '../../modules/local/asciigenome' addParams( options: params.asciigenome_options ) +include { VCF_BGZIP_TABIX_STATS } from '../nf-core/vcf_bgzip_tabix_stats' addParams( bgzip_options: params.tabix_bgzip_options, tabix_options: params.tabix_tabix_options, stats_options: params.bcftools_stats_options ) +include { SNPEFF_SNPSIFT } from './snpeff_snpsift' addParams( snpeff_options: params.snpeff_options, snpsift_options: params.snpsift_options, bgzip_options: params.snpeff_bgzip_options, tabix_options: params.snpeff_tabix_options, stats_options: params.snpeff_stats_options ) + +workflow VARIANTS_IVAR { + take: + bam // channel: [ val(meta), [ bam ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + bed // channel: /path/to/primers.bed + snpeff_db // channel: /path/to/snpeff_db/ + snpeff_config // channel: /path/to/snpeff.config + ivar_multiqc_header // channel: /path/to/multiqc_header for ivar variants + + main: + /* + * Call variants + */ + IVAR_VARIANTS ( bam, fasta, gff ) + + /* + * Convert original iVar output to VCF, zip and index + */ + IVAR_VARIANTS_TO_VCF ( IVAR_VARIANTS.out.tsv, ivar_multiqc_header ) + + VCF_BGZIP_TABIX_STATS ( IVAR_VARIANTS_TO_VCF.out.vcf ) + + /* + * Create genome consensus + */ + ch_consensus = Channel.empty() + ch_consensus_qual = Channel.empty() + ch_bases_tsv = Channel.empty() + ch_bases_pdf = Channel.empty() + ch_quast_results = Channel.empty() + ch_quast_tsv = Channel.empty() + ch_quast_version = Channel.empty() + ch_pangolin_report = Channel.empty() + ch_pangolin_version = Channel.empty() + ch_nextclade_report = Channel.empty() + ch_nextclade_version = Channel.empty() + if (!params.skip_consensus) { + IVAR_CONSENSUS ( bam, fasta ) + ch_consensus = IVAR_CONSENSUS.out.fasta + ch_consensus_qual = IVAR_CONSENSUS.out.qual + + PLOT_BASE_DENSITY ( ch_consensus ) + ch_bases_tsv = PLOT_BASE_DENSITY.out.tsv + ch_bases_pdf = PLOT_BASE_DENSITY.out.pdf + + if (!params.skip_variants_quast) { + QUAST ( ch_consensus.collect{ it[1] }, fasta, gff, true, params.gff ) + ch_quast_results = QUAST.out.results + ch_quast_tsv = QUAST.out.tsv + ch_quast_version = QUAST.out.version + } + + if (!params.skip_pangolin) { + PANGOLIN ( ch_consensus ) + ch_pangolin_report = PANGOLIN.out.report + ch_pangolin_version = PANGOLIN.out.version + } + + if (!params.skip_nextclade) { + NEXTCLADE ( ch_consensus, 'csv' ) + ch_nextclade_report = NEXTCLADE.out.csv + ch_nextclade_version = NEXTCLADE.out.version + } + } + + /* + * Annotate variants + */ + ch_snpeff_vcf = Channel.empty() + ch_snpeff_tbi = Channel.empty() + ch_snpeff_stats = Channel.empty() + ch_snpeff_csv = Channel.empty() + ch_snpeff_txt = Channel.empty() + ch_snpeff_html = Channel.empty() + ch_snpsift_txt = Channel.empty() + ch_snpeff_version = Channel.empty() + ch_snpsift_version = Channel.empty() + if (params.gff && !params.skip_snpeff) { + SNPEFF_SNPSIFT ( VCF_BGZIP_TABIX_STATS.out.vcf, snpeff_db, snpeff_config, fasta ) + ch_snpeff_vcf = SNPEFF_SNPSIFT.out.vcf + ch_snpeff_tbi = SNPEFF_SNPSIFT.out.tbi + ch_snpeff_stats = SNPEFF_SNPSIFT.out.stats + ch_snpeff_csv = SNPEFF_SNPSIFT.out.csv + ch_snpeff_txt = SNPEFF_SNPSIFT.out.txt + ch_snpeff_html = SNPEFF_SNPSIFT.out.html + ch_snpsift_txt = SNPEFF_SNPSIFT.out.snpsift_txt + ch_snpeff_version = SNPEFF_SNPSIFT.out.snpeff_version + ch_snpsift_version = SNPEFF_SNPSIFT.out.snpsift_version + } + + /* + * MODULE: Variant screenshots with ASCIIGenome + */ + ch_asciigenome_pdf = Channel.empty() + ch_asciigenome_version = Channel.empty() + if (!params.skip_asciigenome) { + bam + .join(VCF_BGZIP_TABIX_STATS.out.vcf, by: [0]) + .join(VCF_BGZIP_TABIX_STATS.out.stats, by: [0]) + .map { meta, bam, vcf, stats -> + if (WorkflowCommons.getNumVariantsFromBCFToolsStats(stats) > 0) { + return [ meta, bam, vcf ] + } + } + .set { ch_asciigenome } + + ASCIIGENOME ( + ch_asciigenome, + fasta, + gff, + bed, + params.asciigenome_window_size, + params.asciigenome_read_depth + ) + ch_asciigenome_pdf = ASCIIGENOME.out.pdf + ch_asciigenome_version = ASCIIGENOME.out.version + } + + emit: + tsv = IVAR_VARIANTS.out.tsv // channel: [ val(meta), [ tsv ] ] + ivar_version = IVAR_VARIANTS.out.version // path: *.version.txt + + vcf_orig = IVAR_VARIANTS_TO_VCF.out.vcf // channel: [ val(meta), [ vcf ] ] + log_out = IVAR_VARIANTS_TO_VCF.out.log // channel: [ val(meta), [ log ] ] + multiqc_tsv = IVAR_VARIANTS_TO_VCF.out.tsv // channel: [ val(meta), [ tsv ] ] + + vcf = VCF_BGZIP_TABIX_STATS.out.vcf // channel: [ val(meta), [ vcf ] ] + tbi = VCF_BGZIP_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + stats = VCF_BGZIP_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] + tabix_version = VCF_BGZIP_TABIX_STATS.out.tabix_version // path: *.version.txt + bcftools_version = VCF_BGZIP_TABIX_STATS.out.bcftools_version // path: *.version.txt + + consensus = ch_consensus // channel: [ val(meta), [ fasta ] ] + consensus_qual = ch_consensus_qual // channel: [ val(meta), [ fasta ] ] + bases_tsv = ch_bases_tsv // channel: [ val(meta), [ tsv ] ] + bases_pdf = ch_bases_pdf // channel: [ val(meta), [ pdf ] ] + + quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] + quast_version = ch_quast_version // path: *.version.txt + + snpeff_vcf = ch_snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] + snpeff_tbi = ch_snpeff_tbi // channel: [ val(meta), [ tbi ] ] + snpeff_stats = ch_snpeff_stats // channel: [ val(meta), [ txt ] ] + snpeff_csv = ch_snpeff_csv // channel: [ val(meta), [ csv ] ] + snpeff_txt = ch_snpeff_txt // channel: [ val(meta), [ txt ] ] + snpeff_html = ch_snpeff_html // channel: [ val(meta), [ html ] ] + snpsift_txt = ch_snpsift_txt // channel: [ val(meta), [ txt ] ] + snpeff_version = ch_snpeff_version // path: *.version.txt + snpsift_version = ch_snpsift_version // path: *.version.txt + + pangolin_report = ch_pangolin_report // channel: [ val(meta), [ csv ] ] + pangolin_version = ch_pangolin_version // path: *.version.txt + + nextclade_report = ch_nextclade_report // channel: [ val(meta), [ csv ] ] + nextclade_version = ch_nextclade_version // path: *.version.txt + + asciigenome_pdf = ch_asciigenome_pdf // channel: [ val(meta), [ pdf ] ] + asciigenome_version = ch_asciigenome_version // path: *.version.txt +} diff --git a/subworkflows/nf-core/align_bowtie2.nf b/subworkflows/nf-core/align_bowtie2.nf index 0cbb658c..967228d4 100644 --- a/subworkflows/nf-core/align_bowtie2.nf +++ b/subworkflows/nf-core/align_bowtie2.nf @@ -1,39 +1,39 @@ -/* - * Alignment with BOWTIE2 - */ - -params.align_options = [:] -params.samtools_options = [:] - -include { BOWTIE2_ALIGN } from '../../modules/nf-core/software/bowtie2/align/main' addParams( options: params.align_options ) -include { BAM_SORT_SAMTOOLS } from '../nf-core/bam_sort_samtools' addParams( options: params.samtools_options ) - -workflow ALIGN_BOWTIE2 { - take: - reads // channel: [ val(meta), [ reads ] ] - index // channel: /path/to/bowtie2/index/ - - main: - /* - * Map reads with BOWTIE2 - */ - BOWTIE2_ALIGN ( reads, index ) - - /* - * Sort, index BAM file and run samtools stats, flagstat and idxstats - */ - BAM_SORT_SAMTOOLS ( BOWTIE2_ALIGN.out.bam ) - - emit: - bam_orig = BOWTIE2_ALIGN.out.bam // channel: [ val(meta), bam ] - log_out = BOWTIE2_ALIGN.out.log // channel: [ val(meta), log ] - fastq = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), fastq ] - bowtie2_version = BOWTIE2_ALIGN.out.version // path: *.version.txt - - bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - samtools_version = BAM_SORT_SAMTOOLS.out.version // path: *.version.txt -} +/* + * Alignment with BOWTIE2 + */ + +params.align_options = [:] +params.samtools_options = [:] + +include { BOWTIE2_ALIGN } from '../../modules/nf-core/software/bowtie2/align/main' addParams( options: params.align_options ) +include { BAM_SORT_SAMTOOLS } from '../nf-core/bam_sort_samtools' addParams( options: params.samtools_options ) + +workflow ALIGN_BOWTIE2 { + take: + reads // channel: [ val(meta), [ reads ] ] + index // channel: /path/to/bowtie2/index/ + + main: + /* + * Map reads with BOWTIE2 + */ + BOWTIE2_ALIGN ( reads, index ) + + /* + * Sort, index BAM file and run samtools stats, flagstat and idxstats + */ + BAM_SORT_SAMTOOLS ( BOWTIE2_ALIGN.out.bam ) + + emit: + bam_orig = BOWTIE2_ALIGN.out.bam // channel: [ val(meta), bam ] + log_out = BOWTIE2_ALIGN.out.log // channel: [ val(meta), log ] + fastq = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), fastq ] + bowtie2_version = BOWTIE2_ALIGN.out.version // path: *.version.txt + + bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + samtools_version = BAM_SORT_SAMTOOLS.out.version // path: *.version.txt +} diff --git a/subworkflows/nf-core/bam_sort_samtools.nf b/subworkflows/nf-core/bam_sort_samtools.nf index 42430501..e4431afb 100644 --- a/subworkflows/nf-core/bam_sort_samtools.nf +++ b/subworkflows/nf-core/bam_sort_samtools.nf @@ -1,27 +1,27 @@ -/* - * Sort, index BAM file and run samtools stats, flagstat and idxstats - */ - -params.options = [:] - -include { SAMTOOLS_SORT } from '../../modules/nf-core/software/samtools/sort/main' addParams( options: params.options ) -include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams( options: params.options ) -include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' addParams( options: params.options ) - -workflow BAM_SORT_SAMTOOLS { - take: - bam // channel: [ val(meta), [ bam ] ] - - main: - SAMTOOLS_SORT ( bam ) - SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) - BAM_STATS_SAMTOOLS ( SAMTOOLS_SORT.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) ) - - emit: - bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - version = SAMTOOLS_SORT.out.version // path: *.version.txt -} +/* + * Sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +params.options = [:] + +include { SAMTOOLS_SORT } from '../../modules/nf-core/software/samtools/sort/main' addParams( options: params.options ) +include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams( options: params.options ) +include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' addParams( options: params.options ) + +workflow BAM_SORT_SAMTOOLS { + take: + bam // channel: [ val(meta), [ bam ] ] + + main: + SAMTOOLS_SORT ( bam ) + SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) + BAM_STATS_SAMTOOLS ( SAMTOOLS_SORT.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) ) + + emit: + bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + version = SAMTOOLS_SORT.out.version // path: *.version.txt +} diff --git a/subworkflows/nf-core/bam_stats_samtools.nf b/subworkflows/nf-core/bam_stats_samtools.nf index d948ed49..4676d39c 100644 --- a/subworkflows/nf-core/bam_stats_samtools.nf +++ b/subworkflows/nf-core/bam_stats_samtools.nf @@ -1,25 +1,25 @@ -/* - * Run SAMtools stats, flagstat and idxstats - */ - -params.options = [:] - -include { SAMTOOLS_STATS } from '../../modules/nf-core/software/samtools/stats/main' addParams( options: params.options ) -include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/software/samtools/idxstats/main' addParams( options: params.options ) -include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/software/samtools/flagstat/main' addParams( options: params.options ) - -workflow BAM_STATS_SAMTOOLS { - take: - bam_bai // channel: [ val(meta), [ bam ], [bai] ] - - main: - SAMTOOLS_STATS ( bam_bai ) - SAMTOOLS_FLAGSTAT ( bam_bai ) - SAMTOOLS_IDXSTATS ( bam_bai ) - - emit: - stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - version = SAMTOOLS_STATS.out.version // path: *.version.txt -} +/* + * Run SAMtools stats, flagstat and idxstats + */ + +params.options = [:] + +include { SAMTOOLS_STATS } from '../../modules/nf-core/software/samtools/stats/main' addParams( options: params.options ) +include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/software/samtools/idxstats/main' addParams( options: params.options ) +include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/software/samtools/flagstat/main' addParams( options: params.options ) + +workflow BAM_STATS_SAMTOOLS { + take: + bam_bai // channel: [ val(meta), [ bam ], [bai] ] + + main: + SAMTOOLS_STATS ( bam_bai ) + SAMTOOLS_FLAGSTAT ( bam_bai ) + SAMTOOLS_IDXSTATS ( bam_bai ) + + emit: + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + version = SAMTOOLS_STATS.out.version // path: *.version.txt +} diff --git a/subworkflows/nf-core/fastqc_fastp.nf b/subworkflows/nf-core/fastqc_fastp.nf index 23c62ec6..79f05132 100644 --- a/subworkflows/nf-core/fastqc_fastp.nf +++ b/subworkflows/nf-core/fastqc_fastp.nf @@ -1,62 +1,62 @@ -/* - * Read QC and trimming - */ - -params.fastqc_raw_options = [:] -params.fastqc_trim_options = [:] -params.fastp_options = [:] - -include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/software/fastqc/main' addParams( options: params.fastqc_raw_options ) -include { FASTQC as FASTQC_TRIM } from '../../modules/nf-core/software/fastqc/main' addParams( options: params.fastqc_trim_options ) -include { FASTP } from '../../modules/nf-core/software/fastp/main' addParams( options: params.fastp_options ) - -workflow FASTQC_FASTP { - take: - reads // channel: [ val(meta), [ reads ] ] - - main: - fastqc_raw_html = Channel.empty() - fastqc_raw_zip = Channel.empty() - fastqc_version = Channel.empty() - if (!params.skip_fastqc) { - FASTQC_RAW ( reads ).html.set { fastqc_raw_html } - fastqc_raw_zip = FASTQC_RAW.out.zip - fastqc_version = FASTQC_RAW.out.version - } - - trim_reads = reads - trim_json = Channel.empty() - trim_html = Channel.empty() - trim_log = Channel.empty() - trim_reads_fail = Channel.empty() - fastp_version = Channel.empty() - fastqc_trim_html = Channel.empty() - fastqc_trim_zip = Channel.empty() - if (!params.skip_fastp) { - FASTP ( reads ).reads.set { trim_reads } - trim_json = FASTP.out.json - trim_html = FASTP.out.html - trim_log = FASTP.out.log - trim_reads_fail = FASTP.out.reads_fail - fastp_version = FASTP.out.version - - if (!params.skip_fastqc) { - FASTQC_TRIM ( trim_reads ).html.set { fastqc_trim_html } - fastqc_trim_zip = FASTQC_TRIM.out.zip - } - } - - emit: - reads = trim_reads // channel: [ val(meta), [ reads ] ] - trim_json // channel: [ val(meta), [ json ] ] - trim_html // channel: [ val(meta), [ html ] ] - trim_log // channel: [ val(meta), [ log ] ] - trim_reads_fail // channel: [ val(meta), [ fastq.gz ] ] - fastp_version // path: *.version.txt - - fastqc_raw_html // channel: [ val(meta), [ html ] ] - fastqc_raw_zip // channel: [ val(meta), [ zip ] ] - fastqc_trim_html // channel: [ val(meta), [ html ] ] - fastqc_trim_zip // channel: [ val(meta), [ zip ] ] - fastqc_version // path: *.version.txt -} +/* + * Read QC and trimming + */ + +params.fastqc_raw_options = [:] +params.fastqc_trim_options = [:] +params.fastp_options = [:] + +include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/software/fastqc/main' addParams( options: params.fastqc_raw_options ) +include { FASTQC as FASTQC_TRIM } from '../../modules/nf-core/software/fastqc/main' addParams( options: params.fastqc_trim_options ) +include { FASTP } from '../../modules/nf-core/software/fastp/main' addParams( options: params.fastp_options ) + +workflow FASTQC_FASTP { + take: + reads // channel: [ val(meta), [ reads ] ] + + main: + fastqc_raw_html = Channel.empty() + fastqc_raw_zip = Channel.empty() + fastqc_version = Channel.empty() + if (!params.skip_fastqc) { + FASTQC_RAW ( reads ).html.set { fastqc_raw_html } + fastqc_raw_zip = FASTQC_RAW.out.zip + fastqc_version = FASTQC_RAW.out.version + } + + trim_reads = reads + trim_json = Channel.empty() + trim_html = Channel.empty() + trim_log = Channel.empty() + trim_reads_fail = Channel.empty() + fastp_version = Channel.empty() + fastqc_trim_html = Channel.empty() + fastqc_trim_zip = Channel.empty() + if (!params.skip_fastp) { + FASTP ( reads ).reads.set { trim_reads } + trim_json = FASTP.out.json + trim_html = FASTP.out.html + trim_log = FASTP.out.log + trim_reads_fail = FASTP.out.reads_fail + fastp_version = FASTP.out.version + + if (!params.skip_fastqc) { + FASTQC_TRIM ( trim_reads ).html.set { fastqc_trim_html } + fastqc_trim_zip = FASTQC_TRIM.out.zip + } + } + + emit: + reads = trim_reads // channel: [ val(meta), [ reads ] ] + trim_json // channel: [ val(meta), [ json ] ] + trim_html // channel: [ val(meta), [ html ] ] + trim_log // channel: [ val(meta), [ log ] ] + trim_reads_fail // channel: [ val(meta), [ fastq.gz ] ] + fastp_version // path: *.version.txt + + fastqc_raw_html // channel: [ val(meta), [ html ] ] + fastqc_raw_zip // channel: [ val(meta), [ zip ] ] + fastqc_trim_html // channel: [ val(meta), [ html ] ] + fastqc_trim_zip // channel: [ val(meta), [ zip ] ] + fastqc_version // path: *.version.txt +} diff --git a/subworkflows/nf-core/filter_bam_samtools.nf b/subworkflows/nf-core/filter_bam_samtools.nf index aff9495d..f113cdec 100644 --- a/subworkflows/nf-core/filter_bam_samtools.nf +++ b/subworkflows/nf-core/filter_bam_samtools.nf @@ -1,35 +1,35 @@ -/* - * filter co-ordinate sorted BAM, index and run samtools stats, flagstat and idxstats - */ - -params.samtools_view_options = [:] -params.samtools_index_options = [:] - -include { SAMTOOLS_VIEW } from '../../modules/nf-core/software/samtools/view/main' addParams( options: params.samtools_view_options ) -include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams( options: params.samtools_index_options ) -include { BAM_STATS_SAMTOOLS } from '../nf-core/bam_stats_samtools' addParams( options: params.samtools_index_options ) - -workflow FILTER_BAM_SAMTOOLS { - take: - bam // channel: [ val(meta), [ bam ] ] - - main: - /* - * Filter BAM using Samtools view - */ - SAMTOOLS_VIEW ( bam ) - - /* - * Index BAM file and run samtools stats, flagstat and idxstats - */ - SAMTOOLS_INDEX ( SAMTOOLS_VIEW.out.bam ) - BAM_STATS_SAMTOOLS ( SAMTOOLS_VIEW.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) ) - - emit: - bam = SAMTOOLS_VIEW.out.bam // channel: [ val(meta), [ bam ] ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - samtools_version = SAMTOOLS_INDEX.out.version // path: *.version.txt -} +/* + * filter co-ordinate sorted BAM, index and run samtools stats, flagstat and idxstats + */ + +params.samtools_view_options = [:] +params.samtools_index_options = [:] + +include { SAMTOOLS_VIEW } from '../../modules/nf-core/software/samtools/view/main' addParams( options: params.samtools_view_options ) +include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams( options: params.samtools_index_options ) +include { BAM_STATS_SAMTOOLS } from '../nf-core/bam_stats_samtools' addParams( options: params.samtools_index_options ) + +workflow FILTER_BAM_SAMTOOLS { + take: + bam // channel: [ val(meta), [ bam ] ] + + main: + /* + * Filter BAM using Samtools view + */ + SAMTOOLS_VIEW ( bam ) + + /* + * Index BAM file and run samtools stats, flagstat and idxstats + */ + SAMTOOLS_INDEX ( SAMTOOLS_VIEW.out.bam ) + BAM_STATS_SAMTOOLS ( SAMTOOLS_VIEW.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) ) + + emit: + bam = SAMTOOLS_VIEW.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + samtools_version = SAMTOOLS_INDEX.out.version // path: *.version.txt +} diff --git a/subworkflows/nf-core/mark_duplicates_picard.nf b/subworkflows/nf-core/mark_duplicates_picard.nf index 4c19592f..2893bd53 100644 --- a/subworkflows/nf-core/mark_duplicates_picard.nf +++ b/subworkflows/nf-core/mark_duplicates_picard.nf @@ -1,38 +1,38 @@ -/* - * Picard MarkDuplicates, sort, index BAM file and run samtools stats, flagstat and idxstats - */ - -params.markduplicates_options = [:] -params.samtools_options = [:] - -include { PICARD_MARKDUPLICATES } from '../../modules/nf-core/software/picard/markduplicates/main' addParams( options: params.markduplicates_options ) -include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams( options: params.samtools_options ) -include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' addParams( options: params.samtools_options ) - -workflow MARK_DUPLICATES_PICARD { - take: - bam // channel: [ val(meta), [ bam ] ] - - main: - /* - * Picard MarkDuplicates - */ - PICARD_MARKDUPLICATES ( bam ) - - /* - * Index BAM file and run samtools stats, flagstat and idxstats - */ - SAMTOOLS_INDEX ( PICARD_MARKDUPLICATES.out.bam ) - BAM_STATS_SAMTOOLS ( PICARD_MARKDUPLICATES.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) ) - - emit: - bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), [ bam ] ] - metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), [ metrics ] ] - picard_version = PICARD_MARKDUPLICATES.out.version // path: *.version.txt - - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - samtools_version = SAMTOOLS_INDEX.out.version // path: *.version.txt -} +/* + * Picard MarkDuplicates, sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +params.markduplicates_options = [:] +params.samtools_options = [:] + +include { PICARD_MARKDUPLICATES } from '../../modules/nf-core/software/picard/markduplicates/main' addParams( options: params.markduplicates_options ) +include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams( options: params.samtools_options ) +include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' addParams( options: params.samtools_options ) + +workflow MARK_DUPLICATES_PICARD { + take: + bam // channel: [ val(meta), [ bam ] ] + + main: + /* + * Picard MarkDuplicates + */ + PICARD_MARKDUPLICATES ( bam ) + + /* + * Index BAM file and run samtools stats, flagstat and idxstats + */ + SAMTOOLS_INDEX ( PICARD_MARKDUPLICATES.out.bam ) + BAM_STATS_SAMTOOLS ( PICARD_MARKDUPLICATES.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) ) + + emit: + bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), [ bam ] ] + metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), [ metrics ] ] + picard_version = PICARD_MARKDUPLICATES.out.version // path: *.version.txt + + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + samtools_version = SAMTOOLS_INDEX.out.version // path: *.version.txt +} diff --git a/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf b/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf index fa6cd5e9..46faf6ef 100644 --- a/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf +++ b/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf @@ -1,27 +1,27 @@ -/* - * Run BCFTools bgzip, tabix and stats commands - */ - -params.bgzip_options = [:] -params.tabix_options = [:] -params.stats_options = [:] - -include { TABIX_BGZIP } from '../../modules/nf-core/software/tabix/bgzip/main' addParams( options: params.bgzip_options ) -include { VCF_TABIX_STATS } from './vcf_tabix_stats' addParams( tabix_options: params.tabix_options, stats_options: params.stats_options ) - -workflow VCF_BGZIP_TABIX_STATS { - take: - vcf // channel: [ val(meta), [ vcf ] ] - - main: - TABIX_BGZIP ( vcf ) - VCF_TABIX_STATS ( TABIX_BGZIP.out.gz ) - - emit: - vcf = TABIX_BGZIP.out.gz // channel: [ val(meta), [ vcf.gz ] ] - tabix_version = TABIX_BGZIP.out.version // path: *.version.txt - - tbi = VCF_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] - stats = VCF_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] - bcftools_version = VCF_TABIX_STATS.out.bcftools_version // path: *.version.txt -} +/* + * Run BCFTools bgzip, tabix and stats commands + */ + +params.bgzip_options = [:] +params.tabix_options = [:] +params.stats_options = [:] + +include { TABIX_BGZIP } from '../../modules/nf-core/software/tabix/bgzip/main' addParams( options: params.bgzip_options ) +include { VCF_TABIX_STATS } from './vcf_tabix_stats' addParams( tabix_options: params.tabix_options, stats_options: params.stats_options ) + +workflow VCF_BGZIP_TABIX_STATS { + take: + vcf // channel: [ val(meta), [ vcf ] ] + + main: + TABIX_BGZIP ( vcf ) + VCF_TABIX_STATS ( TABIX_BGZIP.out.gz ) + + emit: + vcf = TABIX_BGZIP.out.gz // channel: [ val(meta), [ vcf.gz ] ] + tabix_version = TABIX_BGZIP.out.version // path: *.version.txt + + tbi = VCF_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + stats = VCF_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] + bcftools_version = VCF_TABIX_STATS.out.bcftools_version // path: *.version.txt +} diff --git a/subworkflows/nf-core/vcf_tabix_stats.nf b/subworkflows/nf-core/vcf_tabix_stats.nf index 4430e617..d0916940 100644 --- a/subworkflows/nf-core/vcf_tabix_stats.nf +++ b/subworkflows/nf-core/vcf_tabix_stats.nf @@ -1,25 +1,25 @@ -/* - * Run BCFTools tabix and stats commands - */ - -params.tabix_options = [:] -params.stats_options = [:] - -include { TABIX_TABIX } from '../../modules/nf-core/software/tabix/tabix/main' addParams( options: params.tabix_options ) -include { BCFTOOLS_STATS } from '../../modules/nf-core/software/bcftools/stats/main' addParams( options: params.stats_options ) - -workflow VCF_TABIX_STATS { - take: - vcf // channel: [ val(meta), [ vcf ] ] - - main: - TABIX_TABIX ( vcf ) - BCFTOOLS_STATS ( vcf ) - - emit: - tbi = TABIX_TABIX.out.tbi // channel: [ val(meta), [ tbi ] ] - tabix_version = TABIX_TABIX.out.version // path: *.version.txt - - stats = BCFTOOLS_STATS.out.stats // channel: [ val(meta), [ txt ] ] - bcftools_version = BCFTOOLS_STATS.out.version // path: *.version.txt -} +/* + * Run BCFTools tabix and stats commands + */ + +params.tabix_options = [:] +params.stats_options = [:] + +include { TABIX_TABIX } from '../../modules/nf-core/software/tabix/tabix/main' addParams( options: params.tabix_options ) +include { BCFTOOLS_STATS } from '../../modules/nf-core/software/bcftools/stats/main' addParams( options: params.stats_options ) + +workflow VCF_TABIX_STATS { + take: + vcf // channel: [ val(meta), [ vcf ] ] + + main: + TABIX_TABIX ( vcf ) + BCFTOOLS_STATS ( vcf ) + + emit: + tbi = TABIX_TABIX.out.tbi // channel: [ val(meta), [ tbi ] ] + tabix_version = TABIX_TABIX.out.version // path: *.version.txt + + stats = BCFTOOLS_STATS.out.stats // channel: [ val(meta), [ txt ] ] + bcftools_version = BCFTOOLS_STATS.out.version // path: *.version.txt +} From 90b98e3b88b00a10280ce0164f9f123925fd0ca2 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 11 May 2021 20:09:17 +0100 Subject: [PATCH 09/17] Fix files in conf directory --- .editorconfig | 3 --- conf/test.config | 36 +++++++++++++++++----------------- conf/test_full.config | 26 ++++++++++++------------ conf/test_full_nanopore.config | 29 +++++++++++++-------------- conf/test_full_sispa.config | 22 ++++++++++----------- conf/test_nanopore.config | 35 ++++++++++++++++----------------- conf/test_sispa.config | 32 +++++++++++++++--------------- conf/test_sra.config | 16 +++++++-------- 8 files changed, 97 insertions(+), 102 deletions(-) diff --git a/.editorconfig b/.editorconfig index 7a3ba6aa..11c1681b 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,9 +8,6 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{yml,yaml}] -indent_size = 2 - # These files are edited upstream in nf-core/modules [/modules/nf-core/**] charset = unset diff --git a/conf/test.config b/conf/test.config index 592e9c5e..70ae1954 100644 --- a/conf/test.config +++ b/conf/test.config @@ -8,26 +8,26 @@ */ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = 6.GB - max_time = 6.h + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = 6.GB + max_time = 6.h - // Input data to test amplicon analysis - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' - platform = 'illumina' - protocol = 'amplicon' - primer_set = 'artic' - primer_set_version = 1 + // Input data to test amplicon analysis + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + platform = 'illumina' + protocol = 'amplicon' + primer_set = 'artic' + primer_set_version = 1 - // Genome references - genome = 'MN908947.3' - kraken2_db = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/kraken2/kraken2_hs22.tar.gz' + // Genome references + genome = 'MN908947.3' + kraken2_db = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/kraken2/kraken2_hs22.tar.gz' - // Other pipeline options - callers = 'ivar,bcftools' - assemblers = 'spades,unicycler,minia' + // Other pipeline options + callers = 'ivar,bcftools' + assemblers = 'spades,unicycler,minia' } diff --git a/conf/test_full.config b/conf/test_full.config index 781948b9..8c248597 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -8,20 +8,20 @@ */ params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' - // Input data for full test of amplicon analysis - input = 's3://nf-core-awsmegatests/viralrecon/input_data/210212_K00102_0557_AHKN3LBBXY/samplesheet.csv' - platform = 'illumina' - protocol = 'amplicon' - primer_set = 'artic' - primer_set_version = 3 + // Input data for full test of amplicon analysis + input = 's3://nf-core-awsmegatests/viralrecon/input_data/210212_K00102_0557_AHKN3LBBXY/samplesheet.csv' + platform = 'illumina' + protocol = 'amplicon' + primer_set = 'artic' + primer_set_version = 3 - // Genome references - genome = 'MN908947.3' + // Genome references + genome = 'MN908947.3' - // Other pipeline options - callers = 'ivar,bcftools' - assemblers = 'spades,unicycler,minia' + // Other pipeline options + callers = 'ivar,bcftools' + assemblers = 'spades,unicycler,minia' } diff --git a/conf/test_full_nanopore.config b/conf/test_full_nanopore.config index 14f22d31..e4ab3c71 100644 --- a/conf/test_full_nanopore.config +++ b/conf/test_full_nanopore.config @@ -8,21 +8,20 @@ */ params { - config_profile_name = 'Full test profile for nanopore data' - config_profile_description = 'Full test dataset to check pipeline function' + config_profile_name = 'Full test profile for nanopore data' + config_profile_description = 'Full test dataset to check pipeline function' - // Input data for full test of amplicon analysis - platform = 'nanopore' - input = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/samplesheet.csv' - fastq_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/fastq_pass/' - fast5_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/fast5_pass/' - sequencing_summary = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/sequencing_summary.txt' - - // Genome references - genome = 'MN908947.3' - primer_set_version = 3 - - // Other parameters - artic_minion_medaka_model = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/r941_min_high_g360_model.hdf5' + // Input data for full test of amplicon analysis + platform = 'nanopore' + input = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/samplesheet.csv' + fastq_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/fastq_pass/' + fast5_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/fast5_pass/' + sequencing_summary = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/sequencing_summary.txt' + + // Genome references + genome = 'MN908947.3' + primer_set_version = 3 + // Other parameters + artic_minion_medaka_model = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/r941_min_high_g360_model.hdf5' } \ No newline at end of file diff --git a/conf/test_full_sispa.config b/conf/test_full_sispa.config index fecc750b..de33e99c 100644 --- a/conf/test_full_sispa.config +++ b/conf/test_full_sispa.config @@ -8,18 +8,18 @@ */ params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' - // Input data for full test of SISPA/metagenomics analysis - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_sispa.csv' - platform = 'illumina' - protocol = 'metagenomic' + // Input data for full test of SISPA/metagenomics analysis + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_sispa.csv' + platform = 'illumina' + protocol = 'metagenomic' - // Genome references - genome = 'MN908947.3' + // Genome references + genome = 'MN908947.3' - // Other pipeline options - callers = 'ivar,bcftools' - assemblers = 'spades,unicycler,minia' + // Other pipeline options + callers = 'ivar,bcftools' + assemblers = 'spades,unicycler,minia' } diff --git a/conf/test_nanopore.config b/conf/test_nanopore.config index b01a9f56..9079e375 100644 --- a/conf/test_nanopore.config +++ b/conf/test_nanopore.config @@ -8,26 +8,25 @@ */ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function for Nanopore data' + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function for Nanopore data' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = 6.GB - max_time = 6.h + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = 6.GB + max_time = 6.h - // Input data to test nanopore analysis - platform = 'nanopore' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_nanopore.csv' - fastq_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/fastq_pass/' - fast5_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/fast5_pass/' - sequencing_summary = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/sequencing_summary.txt' - - // Genome references - genome = 'MN908947.3' - primer_set_version = 3 + // Input data to test nanopore analysis + platform = 'nanopore' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_nanopore.csv' + fastq_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/fastq_pass/' + fast5_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/fast5_pass/' + sequencing_summary = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/sequencing_summary.txt' - // Other parameters - artic_minion_medaka_model = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/r941_min_high_g360_model.hdf5' + // Genome references + genome = 'MN908947.3' + primer_set_version = 3 + // Other parameters + artic_minion_medaka_model = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/r941_min_high_g360_model.hdf5' } diff --git a/conf/test_sispa.config b/conf/test_sispa.config index 3ee52739..dea1546b 100644 --- a/conf/test_sispa.config +++ b/conf/test_sispa.config @@ -8,24 +8,24 @@ */ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = 6.GB - max_time = 6.h + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = 6.GB + max_time = 6.h - // Input data to test SISPA/metagenomics analysis - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_sispa.csv' - platform = 'illumina' - protocol = 'metagenomic' + // Input data to test SISPA/metagenomics analysis + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_sispa.csv' + platform = 'illumina' + protocol = 'metagenomic' - // Genome references - genome = 'MN908947.3' - kraken2_db = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/kraken2/kraken2_hs22.tar.gz' + // Genome references + genome = 'MN908947.3' + kraken2_db = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/kraken2/kraken2_hs22.tar.gz' - // Other pipeline options - callers = 'ivar,bcftools' - assemblers = 'spades,unicycler,minia' + // Other pipeline options + callers = 'ivar,bcftools' + assemblers = 'spades,unicycler,minia' } diff --git a/conf/test_sra.config b/conf/test_sra.config index 1a80ac41..a1311197 100644 --- a/conf/test_sra.config +++ b/conf/test_sra.config @@ -8,14 +8,14 @@ */ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = 6.GB - max_time = 6.h + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = 6.GB + max_time = 6.h - // Input data to test SRA download functionality using SISPA/metagenomics data - public_data_ids = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_sra.csv' + // Input data to test SRA download functionality using SISPA/metagenomics data + public_data_ids = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_sra.csv' } From 9b974237d5707fb79e744a50cfaeeede598d3a26 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 11 May 2021 21:05:19 +0100 Subject: [PATCH 10/17] YAML with 2 space indents --- .editorconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.editorconfig b/.editorconfig index 11c1681b..7a3ba6aa 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,6 +8,9 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space +[*.{yml,yaml}] +indent_size = 2 + # These files are edited upstream in nf-core/modules [/modules/nf-core/**] charset = unset From 1ceb722b7929968a594c864688a039221564ca5e Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 11 May 2021 22:35:15 +0100 Subject: [PATCH 11/17] Change Groovy comments everywhere --- conf/base.config | 17 +- conf/modules.config | 41 +++-- conf/test.config | 17 +- conf/test_full.config | 17 +- conf/test_full_nanopore.config | 17 +- conf/test_full_sispa.config | 17 +- conf/test_nanopore.config | 17 +- conf/test_sispa.config | 17 +- conf/test_sra.config | 17 +- lib/NfcoreSchema.groovy | 91 +++++----- lib/NfcoreTemplate.groovy | 48 +++--- lib/Utils.groovy | 18 +- lib/WorkflowCommons.groovy | 36 ++-- lib/WorkflowIllumina.groovy | 36 ++-- lib/WorkflowMain.groovy | 36 ++-- lib/WorkflowNanopore.groovy | 12 +- lib/WorkflowSraDownload.groovy | 12 +- main.nf | 26 +-- modules/local/get_software_versions.nf | 3 - modules/local/ivar_variants_to_vcf.nf | 3 - modules/local/make_bed_mask.nf | 3 - modules/local/sra_fastq_ftp.nf | 3 - modules/local/sra_ids_to_runinfo.nf | 3 - modules/local/sra_merge_samplesheet.nf | 3 - modules/local/sra_runinfo_to_ftp.nf | 3 - modules/local/sra_to_samplesheet.nf | 3 - nextflow.config | 11 +- subworkflows/local/assembly_minia.nf | 26 +-- subworkflows/local/assembly_qc.nf | 32 ++-- subworkflows/local/assembly_spades.nf | 37 +++-- subworkflows/local/assembly_unicycler.nf | 32 ++-- subworkflows/local/input_check.nf | 6 +- subworkflows/local/make_consensus.nf | 7 +- subworkflows/local/prepare_genome_illumina.nf | 49 +++--- subworkflows/local/prepare_genome_nanopore.nf | 37 +++-- subworkflows/local/primer_trim_ivar.nf | 19 ++- subworkflows/local/snpeff_snpsift.nf | 7 +- subworkflows/local/variants_bcftools.nf | 31 ++-- subworkflows/local/variants_ivar.nf | 37 +++-- subworkflows/nf-core/align_bowtie2.nf | 19 ++- subworkflows/nf-core/bam_sort_samtools.nf | 6 +- subworkflows/nf-core/bam_stats_samtools.nf | 6 +- subworkflows/nf-core/fastqc_fastp.nf | 6 +- subworkflows/nf-core/filter_bam_samtools.nf | 19 ++- .../nf-core/mark_duplicates_picard.nf | 19 ++- subworkflows/nf-core/vcf_bgzip_tabix_stats.nf | 6 +- subworkflows/nf-core/vcf_tabix_stats.nf | 6 +- workflows/illumina.nf | 157 +++++++++--------- workflows/nanopore.nf | 145 ++++++++-------- workflows/sra_download.nf | 37 ++--- 50 files changed, 637 insertions(+), 636 deletions(-) mode change 100755 => 100644 conf/modules.config diff --git a/conf/base.config b/conf/base.config index 72431ecb..dc250257 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,13 +1,12 @@ /* - * ------------------------------------------------- - * nf-core/viralrecon Nextflow base config file - * ------------------------------------------------- - * A 'blank slate' config file, appropriate for general - * use on most high performace compute environments. - * Assumes that all software is installed and available - * on the PATH. Runs in `local` mode - all jobs will be - * run on the logged in environment. - */ +======================================================================================== + nf-core/viralrecon Nextflow base config file +======================================================================================== + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on + the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. +---------------------------------------------------------------------------------------- +*/ process { diff --git a/conf/modules.config b/conf/modules.config old mode 100755 new mode 100644 index 1d7da9dc..fc69eedc --- a/conf/modules.config +++ b/conf/modules.config @@ -1,25 +1,24 @@ /* - * -------------------------------------------------- - * Config file for defining DSL2 per module options - * -------------------------------------------------- - * - * Available keys to override module options: - * args = Additional arguments appended to command in module. - * args2 = Second set of arguments appended to command in module (multi-tool modules). - * args3 = Third set of arguments appended to command in module (multi-tool modules). - * publish_dir = Directory to publish results. - * publish_by_meta = Groovy list of keys available in meta map to append as directories to "publish_dir" path - * If publish_by_meta = true - Value of ${meta['id']} is appended as a directory to "publish_dir" path - * If publish_by_meta = ['id', 'custompath'] - If "id" is in meta map and "custompath" isn't then "${meta['id']}/custompath/" - * is appended as a directory to "publish_dir" path - * If publish_by_meta = false / null - No directories are appended to "publish_dir" path - * publish_files = Groovy map where key = "file_ext" and value = "directory" to publish results for that file extension - * The value of "directory" is appended to the standard "publish_dir" path as defined above. - * If publish_files = null (unspecified) - All files are published. - * If publish_files = false - No files are published. - * suffix = File name suffix for output files. - * - */ +======================================================================================== + Config file for defining DSL2 per module options +======================================================================================== + Available keys to override module options: + args = Additional arguments appended to command in module. + args2 = Second set of arguments appended to command in module (multi-tool modules). + args3 = Third set of arguments appended to command in module (multi-tool modules). + publish_dir = Directory to publish results. + publish_by_meta = Groovy list of keys available in meta map to append as directories to "publish_dir" path + If publish_by_meta = true - Value of ${meta['id']} is appended as a directory to "publish_dir" path + If publish_by_meta = ['id', 'custompath'] - If "id" is in meta map and "custompath" isn't then "${meta['id']}/custompath/" + is appended as a directory to "publish_dir" path + If publish_by_meta = false / null - No directories are appended to "publish_dir" path + publish_files = Groovy map where key = "file_ext" and value = "directory" to publish results for that file extension + The value of "directory" is appended to the standard "publish_dir" path as defined above. + If publish_files = null (unspecified) - All files are published. + If publish_files = false - No files are published. + suffix = File name suffix for output files. +---------------------------------------------------------------------------------------- +*/ params { modules { diff --git a/conf/test.config b/conf/test.config index 70ae1954..28450b79 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,11 +1,14 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/viralrecon -profile test, - */ +======================================================================================== + Nextflow config file for running minimal tests +======================================================================================== + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test, + +---------------------------------------------------------------------------------------- +*/ params { config_profile_name = 'Test profile' diff --git a/conf/test_full.config b/conf/test_full.config index 8c248597..c44d4b0a 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -1,11 +1,14 @@ /* - * ------------------------------------------------- - * Nextflow config file for running full-size tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a full size pipeline test. Use as follows: - * nextflow run nf-core/viralrecon -profile test_full, - */ +======================================================================================== + Nextflow config file for running full-size tests +======================================================================================== + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test_full, + +---------------------------------------------------------------------------------------- +*/ params { config_profile_name = 'Full test profile' diff --git a/conf/test_full_nanopore.config b/conf/test_full_nanopore.config index e4ab3c71..31e1c124 100644 --- a/conf/test_full_nanopore.config +++ b/conf/test_full_nanopore.config @@ -1,11 +1,14 @@ /* - * ------------------------------------------------- - * Nextflow config file for running full-size tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a full size pipeline test. Use as follows: - * nextflow run nf-core/viralrecon -profile test_full_nanopore, - */ +======================================================================================== + Nextflow config file for running full-size tests +======================================================================================== + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test_full_nanopore, + +---------------------------------------------------------------------------------------- +*/ params { config_profile_name = 'Full test profile for nanopore data' diff --git a/conf/test_full_sispa.config b/conf/test_full_sispa.config index de33e99c..49b295fc 100644 --- a/conf/test_full_sispa.config +++ b/conf/test_full_sispa.config @@ -1,11 +1,14 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a full pipeline test. Use as follows: - * nextflow run nf-core/viralrecon -profile test_full_sispa, - */ +======================================================================================== + Nextflow config file for running full-size tests +======================================================================================== + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test_full_sispa, + +---------------------------------------------------------------------------------------- +*/ params { config_profile_name = 'Full test profile' diff --git a/conf/test_nanopore.config b/conf/test_nanopore.config index 9079e375..d7c6b945 100644 --- a/conf/test_nanopore.config +++ b/conf/test_nanopore.config @@ -1,11 +1,14 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/viralrecon -profile test_nanopore, - */ +======================================================================================== + Nextflow config file for running minimal tests +======================================================================================== + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test_nanopore, + +---------------------------------------------------------------------------------------- +*/ params { config_profile_name = 'Test profile' diff --git a/conf/test_sispa.config b/conf/test_sispa.config index dea1546b..b631f985 100644 --- a/conf/test_sispa.config +++ b/conf/test_sispa.config @@ -1,11 +1,14 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/viralrecon -profile test_sispa, - */ +======================================================================================== + Nextflow config file for running minimal tests +======================================================================================== + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test_sispa, + +---------------------------------------------------------------------------------------- +*/ params { config_profile_name = 'Test profile' diff --git a/conf/test_sra.config b/conf/test_sra.config index a1311197..05ca823d 100644 --- a/conf/test_sra.config +++ b/conf/test_sra.config @@ -1,11 +1,14 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/viralrecon -profile test_sra, - */ +======================================================================================== + Nextflow config file for running minimal tests +======================================================================================== + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test_sra, + +---------------------------------------------------------------------------------------- +*/ params { config_profile_name = 'Test profile' diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy index 683b4747..852a2ad1 100755 --- a/lib/NfcoreSchema.groovy +++ b/lib/NfcoreSchema.groovy @@ -1,6 +1,6 @@ -/* - * This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. - */ +// +// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. +// import org.everit.json.schema.Schema import org.everit.json.schema.loader.SchemaLoader @@ -13,17 +13,17 @@ import groovy.json.JsonBuilder class NfcoreSchema { - /* - * Resolve Schema path relative to main workflow directory - */ + // + // Resolve Schema path relative to main workflow directory + // public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { return "${workflow.projectDir}/${schema_filename}" } - /* - * Function to loop over all parameters defined in schema and check - * whether the given parameters adhere to the specifications - */ + // + // Function to loop over all parameters defined in schema and check + // whether the given parameters adhere to the specifications + // /* groovylint-disable-next-line UnusedPrivateMethodParameter */ public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { def has_error = false @@ -177,9 +177,9 @@ class NfcoreSchema { } } - /* - * Beautify parameters for --help - */ + // + // Beautify parameters for --help + // public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { Map colors = NfcoreTemplate.logColours(params.monochrome_logs) Integer num_hidden = 0 @@ -234,9 +234,9 @@ class NfcoreSchema { return output } - /* - * Groovy Map summarising parameters/workflow options used by the pipeline - */ + // + // Groovy Map summarising parameters/workflow options used by the pipeline + // public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { // Get a selection of core Nextflow workflow options def Map workflow_summary = [:] @@ -303,9 +303,9 @@ class NfcoreSchema { return [ 'Core Nextflow options' : workflow_summary ] << params_summary } - /* - * Beautify parameters for summary and return as string - */ + // + // Beautify parameters for summary and return as string + // public static String paramsSummaryLog(workflow, params) { Map colors = NfcoreTemplate.logColours(params.monochrome_logs) String output = '' @@ -326,9 +326,9 @@ class NfcoreSchema { return output } - /* - * Loop over nested exceptions and print the causingException - */ + // + // Loop over nested exceptions and print the causingException + // private static void printExceptions(ex_json, params_json, log) { def causingExceptions = ex_json['causingExceptions'] if (causingExceptions.length() == 0) { @@ -353,9 +353,9 @@ class NfcoreSchema { } } - /* - * Remove an element from a JSONArray - */ + // + // Remove an element from a JSONArray + // private static JSONArray removeElement(json_array, element) { def list = [] int len = json_array.length() @@ -367,9 +367,9 @@ class NfcoreSchema { return jsArray } - /* - * Remove ignored parameters - */ + // + // Remove ignored parameters + // private static JSONObject removeIgnoredParams(raw_schema, params) { // Remove anything that's in params.schema_ignore_params params.schema_ignore_params.split(',').each{ ignore_param -> @@ -399,9 +399,9 @@ class NfcoreSchema { return raw_schema } - /* - * Clean and check parameters relative to Nextflow native classes - */ + // + // Clean and check parameters relative to Nextflow native classes + // private static Map cleanParameters(params) { def new_params = params.getClass().newInstance(params) for (p in params) { @@ -425,9 +425,9 @@ class NfcoreSchema { return new_params } - /* - * This function tries to read a JSON params file - */ + // + // This function tries to read a JSON params file + // private static LinkedHashMap paramsLoad(String json_schema) { def params_map = new LinkedHashMap() try { @@ -439,15 +439,14 @@ class NfcoreSchema { return params_map } - /* - Method to actually read in JSON file using Groovy. - Group (as Key), values are all parameters - - Parameter1 as Key, Description as Value - - Parameter2 as Key, Description as Value - .... - Group - - - */ + // + // Method to actually read in JSON file using Groovy. + // Group (as Key), values are all parameters + // - Parameter1 as Key, Description as Value + // - Parameter2 as Key, Description as Value + // .... + // Group + // - private static LinkedHashMap paramsRead(String json_schema) throws Exception { def json = new File(json_schema).text def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') @@ -499,9 +498,9 @@ class NfcoreSchema { return params_map } - /* - * Get maximum number of characters across all parameter names - */ + // + // Get maximum number of characters across all parameter names + // private static Integer paramsMaxChars(params_map) { Integer max_chars = 0 for (group in params_map.keySet()) { diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 3ee255e1..dbf8e614 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -1,14 +1,14 @@ -/* - * This file holds several functions used within the nf-core pipeline template. - */ +// +// This file holds several functions used within the nf-core pipeline template. +// import org.yaml.snakeyaml.Yaml class NfcoreTemplate { - /* - * Check AWS Batch related parameters have been specified correctly - */ + // + // Check AWS Batch related parameters have been specified correctly + // public static void awsBatch(workflow, params) { if (workflow.profile.contains('awsbatch')) { // Check params.awsqueue and params.awsregion have been set if running on AWSBatch @@ -18,9 +18,9 @@ class NfcoreTemplate { } } - /* - * Check params.hostnames - */ + // + // Check params.hostnames + // public static void hostName(workflow, params, log) { Map colors = logColours(params.monochrome_logs) if (params.hostnames) { @@ -39,9 +39,9 @@ class NfcoreTemplate { } } - /* - * Construct and send completion email - */ + // + // Construct and send completion email + // public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[], fail_mapped_reads=[:]) { // Set up the e-mail variables @@ -157,9 +157,9 @@ class NfcoreTemplate { output_tf.withWriter { w -> w << email_txt } } - /* - * Print pipeline summary on completion - */ + // + // Print pipeline summary on completion + // public static void summary(workflow, params, log, fail_mapped_reads=[:], pass_mapped_reads=[:]) { Map colors = logColours(params.monochrome_logs) @@ -197,9 +197,9 @@ class NfcoreTemplate { } } - /* - * ANSII Colours used for terminal logging - */ + // + // ANSII Colours used for terminal logging + // public static Map logColours(Boolean monochrome_logs) { Map colorcodes = [:] @@ -265,17 +265,17 @@ class NfcoreTemplate { return colorcodes } - /* - * Does what is says on the tin - */ + // + // Does what is says on the tin + // public static String dashedLine(monochrome_logs) { Map colors = logColours(monochrome_logs) return "-${colors.dim}----------------------------------------------------${colors.reset}-" } - /* - * nf-core logo - */ + // + // nf-core logo + // public static String logo(workflow, monochrome_logs) { Map colors = logColours(monochrome_logs) String.format( diff --git a/lib/Utils.groovy b/lib/Utils.groovy index a6e8a8fe..18173e98 100755 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -1,14 +1,14 @@ -/* - * This file holds several Groovy functions that could be useful for any Nextflow pipeline - */ +// +// This file holds several Groovy functions that could be useful for any Nextflow pipeline +// import org.yaml.snakeyaml.Yaml class Utils { - /* - * When running with -profile conda, warn if channels have not been set-up appropriately - */ + // + // When running with -profile conda, warn if channels have not been set-up appropriately + // public static void checkCondaChannels(log) { Yaml parser = new Yaml() def channels = [] @@ -38,9 +38,9 @@ class Utils { } } - /* - * Join module args with appropriate spacing - */ + // + // Join module args with appropriate spacing + // public static String joinModuleArgs(args_list) { return ' ' + args_list.join(' ') } diff --git a/lib/WorkflowCommons.groovy b/lib/WorkflowCommons.groovy index 4e7659b4..a9a94421 100755 --- a/lib/WorkflowCommons.groovy +++ b/lib/WorkflowCommons.groovy @@ -1,12 +1,12 @@ -/* - *This file holds several functions common to the multiple workflows in the nf-core/viralrecon pipeline - */ +// +// This file holds several functions common to the multiple workflows in the nf-core/viralrecon pipeline +// class WorkflowCommons { - /* - * Exit pipeline if incorrect --genome key provided - */ + // + // Exit pipeline if incorrect --genome key provided + // private static void genomeExistsError(params, log) { if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { log.error "=============================================================================\n" + @@ -18,9 +18,9 @@ class WorkflowCommons { } } - /* - * Get workflow summary for MultiQC - */ + // + // Get workflow summary for MultiQC + // public static String paramsSummaryMultiqc(workflow, summary) { String summary_section = '' for (group in summary.keySet()) { @@ -45,9 +45,9 @@ class WorkflowCommons { return yaml_file_text } - /* - * Function to check whether primer BED file has the correct suffixes as provided to the pipeline - */ + // + // Function to check whether primer BED file has the correct suffixes as provided to the pipeline + // public static void checkPrimerSuffixes(primer_bed_file, primer_left_suffix, primer_right_suffix, log) { def total = 0 def left = 0 @@ -73,9 +73,9 @@ class WorkflowCommons { } } - /* - * Function to get lineage from Pangolin output file - */ + // + // Function to get lineage from Pangolin output file + // public static String getPangolinLineage(pangolin_report) { def lineage = '' pangolin_report.eachLine { line -> @@ -84,9 +84,9 @@ class WorkflowCommons { return lineage } - /* - * Function to get number of variants reported in BCFTools stats file - */ + // + // Function to get number of variants reported in BCFTools stats file + // public static Integer getNumVariantsFromBCFToolsStats(bcftools_stats) { def num_vars = 0 bcftools_stats.eachLine { line -> diff --git a/lib/WorkflowIllumina.groovy b/lib/WorkflowIllumina.groovy index d05f1e0d..fbcc540f 100755 --- a/lib/WorkflowIllumina.groovy +++ b/lib/WorkflowIllumina.groovy @@ -1,14 +1,14 @@ -/* - * This file holds several functions specific to the workflow/illumina.nf in the nf-core/viralrecon pipeline - */ +// +// This file holds several functions specific to the workflow/illumina.nf in the nf-core/viralrecon pipeline +// import groovy.json.JsonSlurper class WorkflowIllumina { - /* - * Check and validate parameters - */ + // + // Check and validate parameters + // public static void initialise(params, log, valid_params) { WorkflowCommons.genomeExistsError(params, log) @@ -55,9 +55,9 @@ class WorkflowIllumina { } } - /* - * Print warning if genome fasta has more than one sequence - */ + // + // Print warning if genome fasta has more than one sequence + // public static void isMultiFasta(fasta_file, log) { def count = 0 def line = null @@ -78,9 +78,9 @@ class WorkflowIllumina { } } - /* - * Function that parses and returns the number of mapped reasds from flagstat files - */ + // + // Function that parses and returns the number of mapped reasds from flagstat files + // public static ArrayList getFlagstatMappedReads(flagstat_file, params) { def mapped_reads = 0 flagstat_file.eachLine { line -> @@ -97,9 +97,9 @@ class WorkflowIllumina { return [ mapped_reads, pass ] } - /* - * Check if the primer BED file supplied to the pipeline is from the SWIFT/SNAP protocol - */ + // + // Check if the primer BED file supplied to the pipeline is from the SWIFT/SNAP protocol + // public static void checkIfSwiftProtocol(primer_bed_file, name_prefix, log) { def count = 0 def line = null @@ -122,9 +122,9 @@ class WorkflowIllumina { } } - /* - * Function that parses fastp json output file to get total number of reads after trimming - */ + // + // Function that parses fastp json output file to get total number of reads after trimming + // public static Integer getFastpReadsAfterFiltering(json_file) { def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary') return json['after_filtering']['total_reads'].toInteger() diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 16548934..b12aacc9 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -1,12 +1,12 @@ -/* - *This file holds several functions specific to the main.nf workflow in the nf-core/viralrecon pipeline - */ +// +// This file holds several functions specific to the main.nf workflow in the nf-core/viralrecon pipeline +// class WorkflowMain { - /* - * Citation string for pipeline - */ + // + // Citation string for pipeline + // public static String citation(workflow) { return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + @@ -17,9 +17,9 @@ class WorkflowMain { " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } - /* - * Print help to screen if required - */ + // + // Print help to screen if required + // public static String help(workflow, params, log) { def command = "nextflow run nf-core/viralrecon --input samplesheet.csv --genome 'MN908947.3' -profile docker" def help_string = '' @@ -30,9 +30,9 @@ class WorkflowMain { return help_string } - /* - * Print parameter summary log to screen - */ + // + // Print parameter summary log to screen + // public static String paramsSummaryLog(workflow, params, log) { def summary_log = '' summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) @@ -42,9 +42,9 @@ class WorkflowMain { return summary_log } - /* - * Validate parameters and print summary to screen - */ + // + // Validate parameters and print summary to screen + // public static void initialise(workflow, params, log) { // Print help to screen if required if (params.help) { @@ -84,9 +84,9 @@ class WorkflowMain { } } - /* - * Get attribute from genome config file e.g. fasta - */ + // + // Get attribute from genome config file e.g. fasta + // public static String getGenomeAttribute(params, attribute, log, primer_set='', primer_set_version=0) { def val = '' def support_link = " The default genome config used by the pipeline can be found here:\n" + diff --git a/lib/WorkflowNanopore.groovy b/lib/WorkflowNanopore.groovy index dd91367b..8b2eb74a 100755 --- a/lib/WorkflowNanopore.groovy +++ b/lib/WorkflowNanopore.groovy @@ -1,12 +1,12 @@ -/* - * This file holds several functions specific to the workflow/nanopore.nf in the nf-core/viralrecon pipeline - */ +// +// This file holds several functions specific to the workflow/nanopore.nf in the nf-core/viralrecon pipeline +// class WorkflowNanopore { - /* - * Check and validate parameters - */ + // + // Check and validate parameters + // public static void initialise(params, log, valid_params) { WorkflowCommons.genomeExistsError(params, log) diff --git a/lib/WorkflowSraDownload.groovy b/lib/WorkflowSraDownload.groovy index b80877e6..69e697da 100755 --- a/lib/WorkflowSraDownload.groovy +++ b/lib/WorkflowSraDownload.groovy @@ -1,12 +1,12 @@ -/* - * This file holds functions specific to the workflow/sra_download.nf in the nf-core/viralrecon pipeline - */ +// +// This file holds functions specific to the workflow/sra_download.nf in the nf-core/viralrecon pipeline +// class WorkflowSraDownload { - /* - * Print a warning after SRA download has completed - */ + // + // Print a warning after SRA download has completed + // public static void sraDownloadWarn(log) { log.warn "=============================================================================\n" + " Please double-check the samplesheet that has been auto-created using the\n" + diff --git a/main.nf b/main.nf index 959f7c5e..32b1f140 100644 --- a/main.nf +++ b/main.nf @@ -49,23 +49,23 @@ WorkflowMain.initialise(workflow, params, log) workflow NFCORE_VIRALRECON { - /* - * WORKFLOW: Get SRA run information for public database ids, download and md5sum check FastQ files, auto-create samplesheet - */ + // + // WORKFLOW: Get SRA run information for public database ids, download and md5sum check FastQ files, auto-create samplesheet + // if (params.public_data_ids) { include { SRA_DOWNLOAD } from './workflows/sra_download' SRA_DOWNLOAD () - /* - * WORKFLOW: Variant and de novo assembly analysis for Illumina data - */ + // + // WORKFLOW: Variant and de novo assembly analysis for Illumina data + // } else if (params.platform == 'illumina') { include { ILLUMINA } from './workflows/illumina' ILLUMINA () - /* - * WORKFLOW: Variant analysis for Nanopore data - */ + // + // WORKFLOW: Variant analysis for Nanopore data + // } else if (params.platform == 'nanopore') { include { NANOPORE } from './workflows/nanopore' NANOPORE () @@ -78,10 +78,10 @@ workflow NFCORE_VIRALRECON { ======================================================================================== */ -/* - * WORKFLOW: Execute a single named workflow for the pipeline - * See: https://github.com/nf-core/rnaseq/issues/619 - */ +// +// WORKFLOW: Execute a single named workflow for the pipeline +// See: https://github.com/nf-core/rnaseq/issues/619 +// workflow { NFCORE_VIRALRECON () } diff --git a/modules/local/get_software_versions.nf b/modules/local/get_software_versions.nf index f0aebb76..7078394a 100644 --- a/modules/local/get_software_versions.nf +++ b/modules/local/get_software_versions.nf @@ -3,9 +3,6 @@ include { saveFiles } from './functions' params.options = [:] -/* - * Parse software version numbers - */ process GET_SOFTWARE_VERSIONS { publishDir "${params.outdir}", mode: params.publish_dir_mode, diff --git a/modules/local/ivar_variants_to_vcf.nf b/modules/local/ivar_variants_to_vcf.nf index 4a1a3e6c..3ff46e0f 100644 --- a/modules/local/ivar_variants_to_vcf.nf +++ b/modules/local/ivar_variants_to_vcf.nf @@ -4,9 +4,6 @@ include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] options = initOptions(params.options) -/* - * Convert IVar tsv output to vcf - */ process IVAR_VARIANTS_TO_VCF { tag "$meta.id" publishDir "${params.outdir}", diff --git a/modules/local/make_bed_mask.nf b/modules/local/make_bed_mask.nf index bcdf814a..aa084a99 100644 --- a/modules/local/make_bed_mask.nf +++ b/modules/local/make_bed_mask.nf @@ -4,9 +4,6 @@ include { initOptions; saveFiles } from './functions' params.options = [:] options = initOptions(params.options) -/* - * Make a consensus mask - */ process MAKE_BED_MASK { tag "$meta.id" publishDir "${params.outdir}", diff --git a/modules/local/sra_fastq_ftp.nf b/modules/local/sra_fastq_ftp.nf index fa0f29ee..4b800d19 100644 --- a/modules/local/sra_fastq_ftp.nf +++ b/modules/local/sra_fastq_ftp.nf @@ -4,9 +4,6 @@ include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] options = initOptions(params.options) -/* - * Download SRA data via FTP - */ process SRA_FASTQ_FTP { tag "$meta.id" label 'process_medium' diff --git a/modules/local/sra_ids_to_runinfo.nf b/modules/local/sra_ids_to_runinfo.nf index 06d6ceff..baee3eb9 100644 --- a/modules/local/sra_ids_to_runinfo.nf +++ b/modules/local/sra_ids_to_runinfo.nf @@ -3,9 +3,6 @@ include { saveFiles; getSoftwareName } from './functions' params.options = [:] -/* - * Fetch SRA / ENA / GEO run information via the ENA API - */ process SRA_IDS_TO_RUNINFO { tag "$id" label 'error_retry' diff --git a/modules/local/sra_merge_samplesheet.nf b/modules/local/sra_merge_samplesheet.nf index 78cb8457..423be634 100644 --- a/modules/local/sra_merge_samplesheet.nf +++ b/modules/local/sra_merge_samplesheet.nf @@ -3,9 +3,6 @@ include { saveFiles; getSoftwareName } from './functions' params.options = [:] -/* - * Merge samplesheets across all samples - */ process SRA_MERGE_SAMPLESHEET { publishDir "${params.outdir}", mode: params.publish_dir_mode, diff --git a/modules/local/sra_runinfo_to_ftp.nf b/modules/local/sra_runinfo_to_ftp.nf index 634abd0e..de210b5e 100644 --- a/modules/local/sra_runinfo_to_ftp.nf +++ b/modules/local/sra_runinfo_to_ftp.nf @@ -3,9 +3,6 @@ include { saveFiles; getSoftwareName } from './functions' params.options = [:] -/* - * Create samplesheet for pipeline from SRA run information fetched via the ENA API - */ process SRA_RUNINFO_TO_FTP { publishDir "${params.outdir}", mode: params.publish_dir_mode, diff --git a/modules/local/sra_to_samplesheet.nf b/modules/local/sra_to_samplesheet.nf index d33c3571..ef4487d9 100644 --- a/modules/local/sra_to_samplesheet.nf +++ b/modules/local/sra_to_samplesheet.nf @@ -4,9 +4,6 @@ include { saveFiles; getSoftwareName } from './functions' params.options = [:] params.results_dir = '' -/* - * Stage FastQ files downloaded by SRA and auto-create a samplesheet for the pipeline - */ process SRA_TO_SAMPLESHEET { tag "$meta.id" publishDir "${params.outdir}", diff --git a/nextflow.config b/nextflow.config index 5461704c..a05bbfbc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,9 +1,10 @@ /* - * ------------------------------------------------- - * nf-core/viralrecon Nextflow config file - * ------------------------------------------------- - * Default config options for all environments. - */ +======================================================================================== + nf-core/viralrecon Nextflow config file +======================================================================================== + Default config options for all compute environments +---------------------------------------------------------------------------------------- +*/ // Global default params, used in configs params { diff --git a/subworkflows/local/assembly_minia.nf b/subworkflows/local/assembly_minia.nf index ab67aac4..0626f42c 100644 --- a/subworkflows/local/assembly_minia.nf +++ b/subworkflows/local/assembly_minia.nf @@ -1,6 +1,6 @@ -/* - * Assembly and downstream processing for minia scaffolds - */ +// +// Assembly and downstream processing for minia scaffolds +// params.minia_options = [:] params.blastn_options = [:] @@ -21,23 +21,24 @@ workflow ASSEMBLY_MINIA { blast_header // channel: /path/to/blast_header.txt main: - /* - * Assemble reads with minia - */ + + // + // Assemble reads with minia + // MINIA ( reads ) - /* - * Filter for empty contig files - */ + // + // Filter for empty contig files + // MINIA .out .contigs .filter { meta, contig -> contig.size() > 0 } .set { ch_contigs } - /* - * Downstream assembly steps - */ + // + // Downstream assembly steps + // ASSEMBLY_QC ( ch_contigs, fasta, @@ -72,5 +73,4 @@ workflow ASSEMBLY_MINIA { plasmidid_fasta = ASSEMBLY_QC.out.plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] plasmidid_version = ASSEMBLY_QC.out.plasmidid_version // path: *.version.txt - } diff --git a/subworkflows/local/assembly_qc.nf b/subworkflows/local/assembly_qc.nf index b39939fe..723b59d0 100644 --- a/subworkflows/local/assembly_qc.nf +++ b/subworkflows/local/assembly_qc.nf @@ -1,6 +1,6 @@ -/* - * Downstream analysis for assembly scaffolds - */ +// +// Downstream analysis for assembly scaffolds +// params.blastn_options = [:] params.blastn_filter_options = [:] @@ -23,9 +23,10 @@ workflow ASSEMBLY_QC { blast_header // channel: /path/to/blast_header.txt main: - /* - * Run blastn on assembly scaffolds - */ + + // + // Run blastn on assembly scaffolds + // ch_blast_txt = Channel.empty() ch_blast_filter_txt = Channel.empty() ch_blast_version = Channel.empty() @@ -38,9 +39,9 @@ workflow ASSEMBLY_QC { ch_blast_filter_txt = FILTER_BLASTN.out.txt } - /* - * Assembly QC across all samples with QUAST - */ + // + // Assembly QC across all samples with QUAST + // ch_quast_results = Channel.empty() ch_quast_tsv = Channel.empty() ch_quast_version = Channel.empty() @@ -51,9 +52,9 @@ workflow ASSEMBLY_QC { ch_quast_version = QUAST.out.version } - /* - * Contiguate assembly with ABACAS - */ + // + // Contiguate assembly with ABACAS + // ch_abacas_results = Channel.empty() ch_abacas_version = Channel.empty() if (!params.skip_abacas) { @@ -62,9 +63,9 @@ workflow ASSEMBLY_QC { ch_abacas_version = ABACAS.out.version } - /* - * Assembly report with PlasmidID - */ + // + // Assembly report with PlasmidID + // ch_plasmidid_html = Channel.empty() ch_plasmidid_tab = Channel.empty() ch_plasmidid_images = Channel.empty() @@ -108,5 +109,4 @@ workflow ASSEMBLY_QC { plasmidid_fasta = ch_plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] plasmidid_kmer = ch_plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] plasmidid_version = ch_plasmidid_version // path: *.version.txt - } diff --git a/subworkflows/local/assembly_spades.nf b/subworkflows/local/assembly_spades.nf index f438e65a..b9bf6c52 100644 --- a/subworkflows/local/assembly_spades.nf +++ b/subworkflows/local/assembly_spades.nf @@ -1,6 +1,6 @@ -/* - * Assembly and downstream processing for SPAdes scaffolds - */ +// +// Assembly and downstream processing for SPAdes scaffolds +// params.spades_options = [:] params.bandage_options = [:] @@ -24,9 +24,10 @@ workflow ASSEMBLY_SPADES { blast_header // channel: /path/to/blast_header.txt main: - /* - * Filter for paired-end samples if running metaSPAdes / metaviralSPAdes / metaplasmidSPAdes - */ + + // + // Filter for paired-end samples if running metaSPAdes / metaviralSPAdes / metaplasmidSPAdes + // ch_reads = reads if (params.spades_options.args.contains('--meta') || params.spades_options.args.contains('--bio')) { reads @@ -34,14 +35,14 @@ workflow ASSEMBLY_SPADES { .set { ch_reads } } - /* - * Assemble reads with SPAdes - */ + // + // Assemble reads with SPAdes + // SPADES ( ch_reads, hmm ) - /* - * Filter for empty scaffold files - */ + // + // Filter for empty scaffold files + // SPADES .out .scaffolds @@ -54,9 +55,9 @@ workflow ASSEMBLY_SPADES { .filter { meta, gfa -> gfa.size() > 0 } .set { ch_gfa } - /* - * Generate assembly visualisation with Bandage - */ + // + // Generate assembly visualisation with Bandage + // ch_bandage_png = Channel.empty() ch_bandage_svg = Channel.empty() ch_bandage_version = Channel.empty() @@ -67,9 +68,9 @@ workflow ASSEMBLY_SPADES { ch_bandage_svg = BANDAGE_IMAGE.out.svg } - /* - * Downstream assembly steps - */ + // + // Downstream assembly steps + // ASSEMBLY_QC ( ch_scaffolds, fasta, diff --git a/subworkflows/local/assembly_unicycler.nf b/subworkflows/local/assembly_unicycler.nf index 9a965a1d..f0d0b2ea 100644 --- a/subworkflows/local/assembly_unicycler.nf +++ b/subworkflows/local/assembly_unicycler.nf @@ -1,6 +1,6 @@ -/* - * Assembly and downstream processing for Unicycler scaffolds - */ +// +// Assembly and downstream processing for Unicycler scaffolds +// params.unicycler_options = [:] params.bandage_options = [:] @@ -23,14 +23,15 @@ workflow ASSEMBLY_UNICYCLER { blast_header // channel: /path/to/blast_header.txt main: - /* - * Assemble reads with Unicycler - */ + + // + // Assemble reads with Unicycler + // UNICYCLER ( reads ) - /* - * Filter for empty scaffold files - */ + // + // Filter for empty scaffold files + // UNICYCLER .out .scaffolds @@ -43,9 +44,9 @@ workflow ASSEMBLY_UNICYCLER { .filter { meta, gfa -> gfa.size() > 0 } .set { ch_gfa } - /* - * Generate assembly visualisation with Bandage - */ + // + // Generate assembly visualisation with Bandage + // ch_bandage_png = Channel.empty() ch_bandage_svg = Channel.empty() ch_bandage_version = Channel.empty() @@ -56,9 +57,9 @@ workflow ASSEMBLY_UNICYCLER { ch_bandage_svg = BANDAGE_IMAGE.out.svg } - /* - * Downstream assembly steps - */ + // + // Downstream assembly steps + // ASSEMBLY_QC ( ch_scaffolds, fasta, @@ -97,5 +98,4 @@ workflow ASSEMBLY_UNICYCLER { plasmidid_fasta = ASSEMBLY_QC.out.plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] plasmidid_version = ASSEMBLY_QC.out.plasmidid_version // path: *.version.txt - } diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 46fae8d6..7cfb85d4 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -1,6 +1,6 @@ -/* - * Check input samplesheet and get read channels - */ +// +// Check input samplesheet and get read channels +// params.options = [:] diff --git a/subworkflows/local/make_consensus.nf b/subworkflows/local/make_consensus.nf index bfdfecc6..342f2735 100644 --- a/subworkflows/local/make_consensus.nf +++ b/subworkflows/local/make_consensus.nf @@ -1,6 +1,6 @@ -/* - * Run various tools to generate a masked genome consensus sequence - */ +// +// Run various tools to generate a masked genome consensus sequence +// params.genomecov_options = [:] params.merge_options = [:] @@ -40,5 +40,4 @@ workflow MAKE_CONSENSUS { pdf = PLOT_BASE_DENSITY.out.pdf // channel: [ val(meta), [ pdf ] ] bedtools_version = BEDTOOLS_MERGE.out.version // path: *.version.txt bcftools_version = BCFTOOLS_CONSENSUS.out.version // path: *.version.txt - } diff --git a/subworkflows/local/prepare_genome_illumina.nf b/subworkflows/local/prepare_genome_illumina.nf index 75f6774f..86098445 100644 --- a/subworkflows/local/prepare_genome_illumina.nf +++ b/subworkflows/local/prepare_genome_illumina.nf @@ -1,6 +1,6 @@ -/* - * Uncompress and prepare reference genome files -*/ +// +// Uncompress and prepare reference genome files +// params.genome_options = [:] params.index_options = [:] @@ -32,18 +32,19 @@ workflow PREPARE_GENOME { dummy_file main: - /* - * Uncompress genome fasta file if required - */ + + // + // Uncompress genome fasta file if required + // if (params.fasta.endsWith('.gz')) { ch_fasta = GUNZIP_FASTA ( params.fasta ).gunzip } else { ch_fasta = file(params.fasta) } - /* - * Uncompress GFF annotation file - */ + // + // Uncompress GFF annotation file + // if (params.gff) { if (params.gff.endsWith('.gz')) { ch_gff = GUNZIP_GFF ( params.gff ).gunzip @@ -54,9 +55,9 @@ workflow PREPARE_GENOME { ch_gff = dummy_file } - /* - * Prepare reference files required for variant calling - */ + // + // Prepare reference files required for variant calling + // ch_kraken2_db = Channel.empty() if (!params.skip_kraken2) { if (params.kraken2_db) { @@ -70,9 +71,9 @@ workflow PREPARE_GENOME { } } - /* - * Prepare files required for amplicon data - */ + // + // Prepare files required for amplicon data + // ch_primer_bed = Channel.empty() ch_primer_fasta = Channel.empty() ch_primer_collapsed_bed = Channel.empty() @@ -102,9 +103,9 @@ workflow PREPARE_GENOME { } } - /* - * Prepare reference files required for variant calling - */ + // + // Prepare reference files required for variant calling + // ch_bowtie2_index = Channel.empty() if (!params.skip_variants) { if (params.bowtie2_index) { @@ -118,9 +119,9 @@ workflow PREPARE_GENOME { } } - /* - * Prepare reference files required for de novo assembly - */ + // + // Prepare reference files required for de novo assembly + // ch_blast_db = Channel.empty() if (!params.skip_assembly) { if (!params.skip_blast) { @@ -136,9 +137,9 @@ workflow PREPARE_GENOME { } } - /* - * Make snpEff database - */ + // + // Make snpEff database + // ch_snpeff_db = Channel.empty() ch_snpeff_config = Channel.empty() if (!params.skip_variants && params.gff && !params.skip_snpeff) { diff --git a/subworkflows/local/prepare_genome_nanopore.nf b/subworkflows/local/prepare_genome_nanopore.nf index 6b01cd8e..dfe7442a 100644 --- a/subworkflows/local/prepare_genome_nanopore.nf +++ b/subworkflows/local/prepare_genome_nanopore.nf @@ -1,6 +1,6 @@ -/* - * Uncompress and prepare reference genome files -*/ +// +// Uncompress and prepare reference genome files +// params.genome_options = [:] params.collapse_primers_options = [:] @@ -18,18 +18,19 @@ workflow PREPARE_GENOME { dummy_file main: - /* - * Uncompress genome fasta file if required - */ + + // + // Uncompress genome fasta file if required + // if (params.fasta.endsWith('.gz')) { ch_fasta = GUNZIP_FASTA ( params.fasta ).gunzip } else { ch_fasta = file(params.fasta) } - /* - * Uncompress GFF annotation file - */ + // + // Uncompress GFF annotation file + // if (params.gff) { if (params.gff.endsWith('.gz')) { ch_gff = GUNZIP_GFF ( params.gff ).gunzip @@ -40,9 +41,9 @@ workflow PREPARE_GENOME { ch_gff = dummy_file } - /* - * Uncompress primer BED file - */ + // + // Uncompress primer BED file + // ch_primer_bed = Channel.empty() if (params.primer_bed) { if (params.primer_bed.endsWith('.gz')) { @@ -52,17 +53,17 @@ workflow PREPARE_GENOME { } } - /* - * Generate collapsed BED file - */ + // + // Generate collapsed BED file + // ch_primer_collapsed_bed = Channel.empty() if (!params.skip_mosdepth) { ch_primer_collapsed_bed = COLLAPSE_PRIMERS ( ch_primer_bed, params.primer_left_suffix, params.primer_right_suffix ) } - /* - * Make snpEff database - */ + // + // Make snpEff database + // ch_snpeff_db = Channel.empty() ch_snpeff_config = Channel.empty() if (params.gff && !params.skip_snpeff) { diff --git a/subworkflows/local/primer_trim_ivar.nf b/subworkflows/local/primer_trim_ivar.nf index f9e6dfa3..b1589457 100644 --- a/subworkflows/local/primer_trim_ivar.nf +++ b/subworkflows/local/primer_trim_ivar.nf @@ -1,6 +1,6 @@ -/* - * iVar trim, sort, index BAM file and run samtools stats, flagstat and idxstats - */ +// +// iVar trim, sort, index BAM file and run samtools stats, flagstat and idxstats +// params.ivar_trim_options = [:] params.samtools_options = [:] @@ -15,14 +15,15 @@ workflow PRIMER_TRIM_IVAR { bed // path : bed main: - /* - * iVar trim primers - */ + + // + // iVar trim primers + // IVAR_TRIM ( bam, bed ) - /* - * Sort, index BAM file and run samtools stats, flagstat and idxstats - */ + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // BAM_SORT_SAMTOOLS ( IVAR_TRIM.out.bam ) emit: diff --git a/subworkflows/local/snpeff_snpsift.nf b/subworkflows/local/snpeff_snpsift.nf index 40b05bba..05352199 100644 --- a/subworkflows/local/snpeff_snpsift.nf +++ b/subworkflows/local/snpeff_snpsift.nf @@ -1,6 +1,6 @@ -/* - * Run snpEff, bgzip, tabix, stats and SnpSift commands - */ +// +// Run snpEff, bgzip, tabix, stats and SnpSift commands +// params.snpeff_options = [:] params.bgzip_options = [:] @@ -20,6 +20,7 @@ workflow SNPEFF_SNPSIFT { fasta // path : genome.fasta main: + SNPEFF_ANN ( vcf, db, config, fasta ) VCF_BGZIP_TABIX_STATS ( SNPEFF_ANN.out.vcf ) diff --git a/subworkflows/local/variants_bcftools.nf b/subworkflows/local/variants_bcftools.nf index 82bfff38..22f84c07 100644 --- a/subworkflows/local/variants_bcftools.nf +++ b/subworkflows/local/variants_bcftools.nf @@ -1,6 +1,6 @@ -/* - * Variant calling and downstream processing for BCFTools - */ +// +// Variant calling and downstream processing for BCFTools +// params.bcftools_mpileup_options = [:] params.quast_options = [:] @@ -37,14 +37,15 @@ workflow VARIANTS_BCFTOOLS { snpeff_config // channel: /path/to/snpeff.config main: - /* - * Call variants - */ + + // + // Call variants + // BCFTOOLS_MPILEUP ( bam, fasta ) - /* - * Create genome consensus using variants in VCF, run QUAST and pangolin - */ + // + // Create genome consensus using variants in VCF, run QUAST and pangolin + // ch_consensus = Channel.empty() ch_bases_tsv = Channel.empty() ch_bases_pdf = Channel.empty() @@ -83,9 +84,9 @@ workflow VARIANTS_BCFTOOLS { } } - /* - * Annotate variants - */ + // + // Annotate variants + // ch_snpeff_vcf = Channel.empty() ch_snpeff_tbi = Channel.empty() ch_snpeff_stats = Channel.empty() @@ -108,9 +109,9 @@ workflow VARIANTS_BCFTOOLS { ch_snpsift_version = SNPEFF_SNPSIFT.out.snpsift_version } - /* - * MODULE: Variant screenshots with ASCIIGenome - */ + // + // Variant screenshots with ASCIIGenome + // ch_asciigenome_pdf = Channel.empty() ch_asciigenome_version = Channel.empty() if (!params.skip_asciigenome) { diff --git a/subworkflows/local/variants_ivar.nf b/subworkflows/local/variants_ivar.nf index 45b7f1f8..66afa779 100644 --- a/subworkflows/local/variants_ivar.nf +++ b/subworkflows/local/variants_ivar.nf @@ -1,6 +1,6 @@ -/* - * Variant calling and downstream processing for IVar - */ +// +// Variant calling and downstream processing for IVar +// params.ivar_variants_options = [:] params.ivar_variants_to_vcf_options = [:] @@ -41,21 +41,22 @@ workflow VARIANTS_IVAR { ivar_multiqc_header // channel: /path/to/multiqc_header for ivar variants main: - /* - * Call variants - */ + + // + // Call variants + // IVAR_VARIANTS ( bam, fasta, gff ) - /* - * Convert original iVar output to VCF, zip and index - */ + // + // Convert original iVar output to VCF, zip and index + // IVAR_VARIANTS_TO_VCF ( IVAR_VARIANTS.out.tsv, ivar_multiqc_header ) VCF_BGZIP_TABIX_STATS ( IVAR_VARIANTS_TO_VCF.out.vcf ) - /* - * Create genome consensus - */ + // + // Create genome consensus + // ch_consensus = Channel.empty() ch_consensus_qual = Channel.empty() ch_bases_tsv = Channel.empty() @@ -96,9 +97,9 @@ workflow VARIANTS_IVAR { } } - /* - * Annotate variants - */ + // + // Annotate variants + // ch_snpeff_vcf = Channel.empty() ch_snpeff_tbi = Channel.empty() ch_snpeff_stats = Channel.empty() @@ -121,9 +122,9 @@ workflow VARIANTS_IVAR { ch_snpsift_version = SNPEFF_SNPSIFT.out.snpsift_version } - /* - * MODULE: Variant screenshots with ASCIIGenome - */ + // + // Variant screenshots with ASCIIGenome + // ch_asciigenome_pdf = Channel.empty() ch_asciigenome_version = Channel.empty() if (!params.skip_asciigenome) { diff --git a/subworkflows/nf-core/align_bowtie2.nf b/subworkflows/nf-core/align_bowtie2.nf index 967228d4..2219a10b 100644 --- a/subworkflows/nf-core/align_bowtie2.nf +++ b/subworkflows/nf-core/align_bowtie2.nf @@ -1,6 +1,6 @@ -/* - * Alignment with BOWTIE2 - */ +// +// Alignment with Bowtie2 +// params.align_options = [:] params.samtools_options = [:] @@ -14,14 +14,15 @@ workflow ALIGN_BOWTIE2 { index // channel: /path/to/bowtie2/index/ main: - /* - * Map reads with BOWTIE2 - */ + + // + // Map reads with Bowtie2 + // BOWTIE2_ALIGN ( reads, index ) - /* - * Sort, index BAM file and run samtools stats, flagstat and idxstats - */ + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // BAM_SORT_SAMTOOLS ( BOWTIE2_ALIGN.out.bam ) emit: diff --git a/subworkflows/nf-core/bam_sort_samtools.nf b/subworkflows/nf-core/bam_sort_samtools.nf index e4431afb..e410a706 100644 --- a/subworkflows/nf-core/bam_sort_samtools.nf +++ b/subworkflows/nf-core/bam_sort_samtools.nf @@ -1,6 +1,6 @@ -/* - * Sort, index BAM file and run samtools stats, flagstat and idxstats - */ +// +// Sort, index BAM file and run samtools stats, flagstat and idxstats +// params.options = [:] diff --git a/subworkflows/nf-core/bam_stats_samtools.nf b/subworkflows/nf-core/bam_stats_samtools.nf index 4676d39c..c262e003 100644 --- a/subworkflows/nf-core/bam_stats_samtools.nf +++ b/subworkflows/nf-core/bam_stats_samtools.nf @@ -1,6 +1,6 @@ -/* - * Run SAMtools stats, flagstat and idxstats - */ +// +// Run SAMtools stats, flagstat and idxstats +// params.options = [:] diff --git a/subworkflows/nf-core/fastqc_fastp.nf b/subworkflows/nf-core/fastqc_fastp.nf index 79f05132..2061d946 100644 --- a/subworkflows/nf-core/fastqc_fastp.nf +++ b/subworkflows/nf-core/fastqc_fastp.nf @@ -1,6 +1,6 @@ -/* - * Read QC and trimming - */ +// +// Read QC and trimming +// params.fastqc_raw_options = [:] params.fastqc_trim_options = [:] diff --git a/subworkflows/nf-core/filter_bam_samtools.nf b/subworkflows/nf-core/filter_bam_samtools.nf index f113cdec..fdd317e0 100644 --- a/subworkflows/nf-core/filter_bam_samtools.nf +++ b/subworkflows/nf-core/filter_bam_samtools.nf @@ -1,6 +1,6 @@ -/* - * filter co-ordinate sorted BAM, index and run samtools stats, flagstat and idxstats - */ +// +// Filter co-ordinate sorted BAM, index and run samtools stats, flagstat and idxstats +// params.samtools_view_options = [:] params.samtools_index_options = [:] @@ -14,14 +14,15 @@ workflow FILTER_BAM_SAMTOOLS { bam // channel: [ val(meta), [ bam ] ] main: - /* - * Filter BAM using Samtools view - */ + + // + // Filter BAM using Samtools view + // SAMTOOLS_VIEW ( bam ) - /* - * Index BAM file and run samtools stats, flagstat and idxstats - */ + // + // Index BAM file and run samtools stats, flagstat and idxstats + // SAMTOOLS_INDEX ( SAMTOOLS_VIEW.out.bam ) BAM_STATS_SAMTOOLS ( SAMTOOLS_VIEW.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) ) diff --git a/subworkflows/nf-core/mark_duplicates_picard.nf b/subworkflows/nf-core/mark_duplicates_picard.nf index 2893bd53..6c111de1 100644 --- a/subworkflows/nf-core/mark_duplicates_picard.nf +++ b/subworkflows/nf-core/mark_duplicates_picard.nf @@ -1,6 +1,6 @@ -/* - * Picard MarkDuplicates, sort, index BAM file and run samtools stats, flagstat and idxstats - */ +// +// Picard MarkDuplicates, sort, index BAM file and run samtools stats, flagstat and idxstats +// params.markduplicates_options = [:] params.samtools_options = [:] @@ -14,14 +14,15 @@ workflow MARK_DUPLICATES_PICARD { bam // channel: [ val(meta), [ bam ] ] main: - /* - * Picard MarkDuplicates - */ + + // + // Picard MarkDuplicates + // PICARD_MARKDUPLICATES ( bam ) - /* - * Index BAM file and run samtools stats, flagstat and idxstats - */ + // + // Index BAM file and run samtools stats, flagstat and idxstats + // SAMTOOLS_INDEX ( PICARD_MARKDUPLICATES.out.bam ) BAM_STATS_SAMTOOLS ( PICARD_MARKDUPLICATES.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) ) diff --git a/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf b/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf index 46faf6ef..291b8b7d 100644 --- a/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf +++ b/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf @@ -1,6 +1,6 @@ -/* - * Run BCFTools bgzip, tabix and stats commands - */ +// +// Run BCFTools bgzip, tabix and stats commands +// params.bgzip_options = [:] params.tabix_options = [:] diff --git a/subworkflows/nf-core/vcf_tabix_stats.nf b/subworkflows/nf-core/vcf_tabix_stats.nf index d0916940..aedb68a3 100644 --- a/subworkflows/nf-core/vcf_tabix_stats.nf +++ b/subworkflows/nf-core/vcf_tabix_stats.nf @@ -1,6 +1,6 @@ -/* - * Run BCFTools tabix and stats commands - */ +// +// Run BCFTools tabix and stats commands +// params.tabix_options = [:] params.stats_options = [:] diff --git a/workflows/illumina.nf b/workflows/illumina.nf index 4188e586..d4be0c6f 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -76,9 +76,9 @@ include { MULTIQC_CUSTOM_TWOCOL_TSV as MULTIQC_CUSTOM_TWOCOL_TSV_FAIL_MAPPED include { MULTIQC_CUSTOM_TWOCOL_TSV as MULTIQC_CUSTOM_TWOCOL_TSV_IVAR_PANGOLIN } from '../modules/local/multiqc_custom_twocol_tsv' addParams( options: [publish_files: false] ) include { MULTIQC_CUSTOM_TWOCOL_TSV as MULTIQC_CUSTOM_TWOCOL_TSV_BCFTOOLS_PANGOLIN } from '../modules/local/multiqc_custom_twocol_tsv' addParams( options: [publish_files: false] ) -/* - * SUBWORKFLOW: Consisting of a mix of local and nf-core/modules - */ +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// def publish_genome_options = params.save_reference ? [publish_dir: 'genome'] : [publish_files: false] def publish_index_options = params.save_reference ? [publish_dir: 'genome/index'] : [publish_files: false] def publish_db_options = params.save_reference ? [publish_dir: 'genome/db'] : [publish_files: false] @@ -125,9 +125,9 @@ include { ASSEMBLY_MINIA } from '../subworkflows/local/assembly_minia' ======================================================================================== */ -/* - * MODULE: Installed directly from nf-core/modules - */ +// +// MODULE: Installed directly from nf-core/modules +// include { CAT_FASTQ } from '../modules/nf-core/software/cat/fastq/main' addParams( options: modules['illumina_cat_fastq'] ) include { FASTQC } from '../modules/nf-core/software/fastqc/main' addParams( options: modules['illumina_cutadapt_fastqc'] ) include { KRAKEN2_RUN } from '../modules/nf-core/software/kraken2/run/main' addParams( options: modules['illumina_kraken2_run'] ) @@ -135,9 +135,9 @@ include { PICARD_COLLECTMULTIPLEMETRICS } from '../modules/nf-core/software/pica include { MOSDEPTH as MOSDEPTH_GENOME } from '../modules/nf-core/software/mosdepth/main' addParams( options: modules['illumina_mosdepth_genome'] ) include { MOSDEPTH as MOSDEPTH_AMPLICON } from '../modules/nf-core/software/mosdepth/main' addParams( options: modules['illumina_mosdepth_amplicon'] ) -/* - * SUBWORKFLOW: Consisting entirely of nf-core/modules - */ +// +// SUBWORKFLOW: Consisting entirely of nf-core/modules +// def fastp_options = modules['illumina_fastp'] if (params.save_trimmed_fail) { fastp_options.publish_files.put('fail.fastq.gz','') } @@ -166,9 +166,9 @@ workflow ILLUMINA { ch_software_versions = Channel.empty() - /* - * SUBWORKFLOW: Uncompress and prepare reference genome files - */ + // + // SUBWORKFLOW: Uncompress and prepare reference genome files + // PREPARE_GENOME ( ch_dummy_file ) @@ -195,9 +195,9 @@ workflow ILLUMINA { } } - /* - * SUBWORKFLOW: Read in samplesheet, validate and stage input files - */ + // + // SUBWORKFLOW: Read in samplesheet, validate and stage input files + // INPUT_CHECK ( ch_input, params.platform @@ -217,18 +217,18 @@ workflow ILLUMINA { } .set { ch_fastq } - /* - * MODULE: Concatenate FastQ files from same sample if required - */ + // + // MODULE: Concatenate FastQ files from same sample if required + // CAT_FASTQ ( ch_fastq.multiple ) .mix(ch_fastq.single) .set { ch_cat_fastq } - /* - * SUBWORKFLOW: Read QC and trim adapters - */ + // + // SUBWORKFLOW: Read QC and trim adapters + // FASTQC_FASTP ( ch_cat_fastq ) @@ -236,9 +236,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(FASTQC_FASTP.out.fastqc_version.first().ifEmpty(null)) ch_software_versions = ch_software_versions.mix(FASTQC_FASTP.out.fastp_version.first().ifEmpty(null)) - /* - * Filter empty FastQ files after adapter trimming - */ + // + // Filter empty FastQ files after adapter trimming + // if (!params.skip_fastp) { ch_variants_fastq .join(FASTQC_FASTP.out.trim_json) @@ -249,9 +249,9 @@ workflow ILLUMINA { .set { ch_variants_fastq } } - /* - * MODULE: Run Kraken2 for removal of host reads - */ + // + // MODULE: Run Kraken2 for removal of host reads + // ch_assembly_fastq = ch_variants_fastq ch_kraken2_multiqc = Channel.empty() if (!params.skip_kraken2) { @@ -271,9 +271,9 @@ workflow ILLUMINA { } } - /* - * SUBWORKFLOW: Alignment with Bowtie2 - */ + // + // SUBWORKFLOW: Alignment with Bowtie2 + // ch_bam = Channel.empty() ch_bai = Channel.empty() ch_bowtie2_multiqc = Channel.empty() @@ -291,9 +291,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(ALIGN_BOWTIE2.out.samtools_version.first().ifEmpty(null)) } - /* - * Filter channels to get samples that passed Bowtie2 minimum mapped reads threshold - */ + // + // Filter channels to get samples that passed Bowtie2 minimum mapped reads threshold + // ch_fail_mapping_multiqc = Channel.empty() if (!params.skip_variants) { ch_bowtie2_flagstat_multiqc @@ -330,9 +330,9 @@ workflow ILLUMINA { .set { ch_fail_mapping_multiqc } } - /* - * SUBWORKFLOW: Trim primer sequences from reads with iVar - */ + // + // SUBWORKFLOW: Trim primer sequences from reads with iVar + // ch_ivar_trim_flagstat_multiqc = Channel.empty() if (!params.skip_variants && !params.skip_ivar_trim && params.protocol == 'amplicon') { PRIMER_TRIM_IVAR ( @@ -345,9 +345,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(PRIMER_TRIM_IVAR.out.ivar_version.first().ifEmpty(null)) } - /* - * SUBWORKFLOW: Mark duplicate reads - */ + // + // SUBWORKFLOW: Mark duplicate reads + // ch_markduplicates_flagstat_multiqc = Channel.empty() if (!params.skip_variants && !params.skip_markduplicates) { MARK_DUPLICATES_PICARD ( @@ -359,9 +359,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(MARK_DUPLICATES_PICARD.out.picard_version.first().ifEmpty(null)) } - /* - * MODULE: Picard metrics - */ + // + // MODULE: Picard metrics + // if (!params.skip_variants && !params.skip_picard_metrics) { PICARD_COLLECTMULTIPLEMETRICS ( ch_bam, @@ -370,9 +370,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(PICARD_COLLECTMULTIPLEMETRICS.out.version.first().ifEmpty(null)) } - /* - * MODULE: Genome-wide and amplicon-specific coverage QC plots - */ + // + // MODULE: Genome-wide and amplicon-specific coverage QC plots + // ch_mosdepth_multiqc = Channel.empty() if (!params.skip_variants && !params.skip_mosdepth) { @@ -401,9 +401,9 @@ workflow ILLUMINA { } } - /* - * SUBWORKFLOW: Call variants with IVar - */ + // + // SUBWORKFLOW: Call variants with IVar + // ch_ivar_vcf = Channel.empty() ch_ivar_tbi = Channel.empty() ch_ivar_counts_multiqc = Channel.empty() @@ -438,9 +438,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(VARIANTS_IVAR.out.nextclade_version.first().ifEmpty(null)) ch_software_versions = ch_software_versions.mix(VARIANTS_IVAR.out.asciigenome_version.first().ifEmpty(null)) - /* - * MODULE: Get Pangolin lineage information for MultiQC report - */ + // + // MODULE: Get Pangolin lineage information for MultiQC report + // ch_ivar_pangolin_report .map { meta, report -> def lineage = WorkflowCommons.getPangolinLineage(report) @@ -457,9 +457,9 @@ workflow ILLUMINA { .set { ch_ivar_pangolin_multiqc } } - /* - * SUBWORKFLOW: Call variants with BCFTools - */ + // + // SUBWORKFLOW: Call variants with BCFTools + // ch_bcftools_vcf = Channel.empty() ch_bcftools_tbi = Channel.empty() ch_bcftools_stats_multiqc = Channel.empty() @@ -490,9 +490,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(VARIANTS_BCFTOOLS.out.nextclade_version.first().ifEmpty(null)) ch_software_versions = ch_software_versions.mix(VARIANTS_BCFTOOLS.out.asciigenome_version.first().ifEmpty(null)) - /* - * MODULE: Get Pangolin lineage information for MultiQC report - */ + // + // MODULE: Get Pangolin lineage information for MultiQC report + // ch_bcftools_pangolin_report .map { meta, report -> def lineage = WorkflowCommons.getPangolinLineage(report) @@ -509,9 +509,9 @@ workflow ILLUMINA { .set { ch_bcftools_pangolin_multiqc } } - /* - * MODULE: Intersect variants across callers - */ + // + // MODULE: Intersect variants across callers + // if (!params.skip_variants && callers.size() > 1) { BCFTOOLS_ISEC ( ch_ivar_vcf @@ -521,9 +521,9 @@ workflow ILLUMINA { ) } - /* - * MODULE: Primer trimming with Cutadapt - */ + // + // MODULE: Primer trimming with Cutadapt + // ch_cutadapt_multiqc = Channel.empty() if (params.protocol == 'amplicon' && !params.skip_assembly && !params.skip_cutadapt) { CUTADAPT ( @@ -541,9 +541,9 @@ workflow ILLUMINA { } } - /* - * SUBWORKFLOW: Run SPAdes assembly and downstream analysis - */ + // + // SUBWORKFLOW: Run SPAdes assembly and downstream analysis + // ch_spades_quast_multiqc = Channel.empty() if (!params.skip_assembly && 'spades' in assemblers) { ASSEMBLY_SPADES ( @@ -563,9 +563,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(ASSEMBLY_SPADES.out.plasmidid_version.first().ifEmpty(null)) } - /* - * SUBWORKFLOW: Run Unicycler assembly and downstream analysis - */ + // + // SUBWORKFLOW: Run Unicycler assembly and downstream analysis + // ch_unicycler_quast_multiqc = Channel.empty() if (!params.skip_assembly && 'unicycler' in assemblers) { ASSEMBLY_UNICYCLER ( @@ -584,9 +584,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(ASSEMBLY_UNICYCLER.out.plasmidid_version.first().ifEmpty(null)) } - /* - * SUBWORKFLOW: Run minia assembly and downstream analysis - */ + // + // SUBWORKFLOW: Run minia assembly and downstream analysis + // ch_minia_quast_multiqc = Channel.empty() if (!params.skip_assembly && 'minia' in assemblers) { ASSEMBLY_MINIA ( @@ -604,10 +604,9 @@ workflow ILLUMINA { ch_software_versions = ch_software_versions.mix(ASSEMBLY_MINIA.out.plasmidid_version.first().ifEmpty(null)) } - /* - * MODULE: Pipeline reporting - */ - // Get unique list of files containing version information + // + // MODULE: Pipeline reporting + // ch_software_versions .map { it -> if (it) [ it.baseName, it ] } .groupTuple() @@ -620,9 +619,9 @@ workflow ILLUMINA { ch_software_versions ) - /* - * MODULE: MultiQC - */ + // + // MODULE: MultiQC + // if (!params.skip_multiqc) { workflow_summary = WorkflowCommons.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) diff --git a/workflows/nanopore.nf b/workflows/nanopore.nf index 4ef26ed2..23b378a4 100644 --- a/workflows/nanopore.nf +++ b/workflows/nanopore.nf @@ -68,9 +68,9 @@ include { MULTIQC_CUSTOM_TWOCOL_TSV as MULTIQC_CUSTOM_PANGOLIN } fro include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_GENOME } from '../modules/local/plot_mosdepth_regions' addParams( options: modules['nanopore_plot_mosdepth_regions_genome'] ) include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_AMPLICON } from '../modules/local/plot_mosdepth_regions' addParams( options: modules['nanopore_plot_mosdepth_regions_amplicon'] ) -/* - * SUBWORKFLOW: Consisting of a mix of local and nf-core/modules - */ +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// def publish_genome_options = params.save_reference ? [publish_dir: 'genome'] : [publish_files: false] def collapse_primers_options = modules['nanopore_collapse_primers'] def snpeff_build_options = modules['nanopore_snpeff_build'] @@ -89,9 +89,9 @@ include { SNPEFF_SNPSIFT } from '../subworkflows/local/snpeff_snpsift' ======================================================================================== */ -/* - * MODULE: Installed directly from nf-core/modules - */ +// +// MODULE: Installed directly from nf-core/modules +// def artic_minion_options = modules['nanopore_artic_minion'] artic_minion_options.args += params.artic_minion_caller == 'medaka' ? Utils.joinModuleArgs(['--medaka']) : '' @@ -108,9 +108,9 @@ include { NEXTCLADE } from '../modules/nf-core/software/next include { MOSDEPTH as MOSDEPTH_GENOME } from '../modules/nf-core/software/mosdepth/main' addParams( options: modules['nanopore_mosdepth_genome'] ) include { MOSDEPTH as MOSDEPTH_AMPLICON } from '../modules/nf-core/software/mosdepth/main' addParams( options: modules['nanopore_mosdepth_amplicon'] ) -/* - * SUBWORKFLOW: Consisting entirely of nf-core/modules - */ +// +// SUBWORKFLOW: Consisting entirely of nf-core/modules +// include { FILTER_BAM_SAMTOOLS } from '../subworkflows/nf-core/filter_bam_samtools' addParams( samtools_view_options: modules['nanopore_filter_bam'], samtools_index_options: modules['nanopore_filter_bam_stats'] ) /* @@ -128,9 +128,9 @@ workflow NANOPORE { ch_software_versions = Channel.empty() - /* - * MODULE: PycoQC on sequencing summary file - */ + // + // MODULE: PycoQC on sequencing summary file + // if (params.sequencing_summary && !params.skip_pycoqc) { PYCOQC ( ch_sequencing_summary @@ -138,9 +138,9 @@ workflow NANOPORE { } ch_software_versions = ch_software_versions.mix(PYCOQC.out.version.ifEmpty(null)) - /* - * SUBWORKFLOW: Uncompress and prepare reference genome files - */ + // + // SUBWORKFLOW: Uncompress and prepare reference genome files + // PREPARE_GENOME ( ch_dummy_file ) @@ -170,9 +170,9 @@ workflow NANOPORE { } .set { ch_fastq_dirs } - /* - * SUBWORKFLOW: Read in samplesheet containing sample to barcode mappings - */ + // + // SUBWORKFLOW: Read in samplesheet containing sample to barcode mappings + // if (params.input) { INPUT_CHECK ( ch_input, @@ -181,9 +181,9 @@ workflow NANOPORE { .join(ch_fastq_dirs, remainder: true) .set { ch_fastq_dirs } - /* - * MODULE: Create custom content file for MultiQC to report barcodes were allocated reads >= params.min_barcode_reads but no sample name in samplesheet - */ + // + // MODULE: Create custom content file for MultiQC to report barcodes were allocated reads >= params.min_barcode_reads but no sample name in samplesheet + // ch_fastq_dirs .filter { it[1] == null } .filter { it[-1] >= params.min_barcode_reads } @@ -198,9 +198,9 @@ workflow NANOPORE { ) ch_custom_no_sample_name_multiqc = MULTIQC_CUSTOM_FAIL_NO_SAMPLE_NAME.out - /* - * MODULE: Create custom content file for MultiQC to report samples that were in samplesheet but have no barcodes - */ + // + // MODULE: Create custom content file for MultiQC to report samples that were in samplesheet but have no barcodes + // ch_fastq_dirs .filter { it[-1] == null } .map { it -> [ "${it[1]}\t${it[0]}" ] } @@ -234,9 +234,9 @@ workflow NANOPORE { System.exit(1) } - /* - * MODULE: Create custom content file for MultiQC to report samples with reads < params.min_barcode_reads - */ + // + // MODULE: Create custom content file for MultiQC to report samples with reads < params.min_barcode_reads + // ch_fastq_dirs .branch { barcode, sample, dir, count -> pass: count > params.min_barcode_reads @@ -261,17 +261,17 @@ workflow NANOPORE { .map { barcode, sample, dir, count -> [ [ id: sample, barcode:barcode ], dir ] } .set { ch_fastq_dirs } - /* - * MODULE: Run Artic Guppyplex - */ + // + // MODULE: Run Artic Guppyplex + // ARTIC_GUPPYPLEX ( ch_fastq_dirs ) ch_software_versions = ch_software_versions.mix(ARTIC_GUPPYPLEX.out.version.first().ifEmpty(null)) - /* - * MODULE: Create custom content file for MultiQC to report samples with reads < params.min_guppyplex_reads - */ + // + // MODULE: Create custom content file for MultiQC to report samples with reads < params.min_guppyplex_reads + // ARTIC_GUPPYPLEX .out .fastq @@ -291,9 +291,9 @@ workflow NANOPORE { 'fail_guppyplex_count_samples' ) - /* - * MODULE: Nanoplot QC for FastQ files - */ + // + // MODULE: Nanoplot QC for FastQ files + // if (!params.skip_nanoplot) { NANOPLOT ( ARTIC_GUPPYPLEX.out.fastq @@ -301,9 +301,9 @@ workflow NANOPORE { ch_software_versions = ch_software_versions.mix(NANOPLOT.out.version.first().ifEmpty(null)) } - /* - * MODULE: Run Artic minion - */ + // + // MODULE: Run Artic minion + // ARTIC_MINION ( ARTIC_GUPPYPLEX.out.fastq.filter { it[-1].countFastq() > params.min_guppyplex_reads }, ch_fast5_dir, @@ -315,25 +315,25 @@ workflow NANOPORE { params.primer_set_version ) - /* - * SUBWORKFLOW: Filter unmapped reads from BAM - */ + // + // SUBWORKFLOW: Filter unmapped reads from BAM + // FILTER_BAM_SAMTOOLS ( ARTIC_MINION.out.bam ) ch_software_versions = ch_software_versions.mix(FILTER_BAM_SAMTOOLS.out.samtools_version.first().ifEmpty(null)) - /* - * MODULE: VCF stats with bcftools stats - */ + // + // MODULE: VCF stats with bcftools stats + // BCFTOOLS_STATS ( ARTIC_MINION.out.vcf ) ch_software_versions = ch_software_versions.mix(BCFTOOLS_STATS.out.version.ifEmpty(null)) - /* - * MODULE: Genome-wide and amplicon-specific coverage QC plots - */ + // + // MODULE: Genome-wide and amplicon-specific coverage QC plots + // ch_mosdepth_multiqc = Channel.empty() if (!params.skip_mosdepth) { @@ -360,9 +360,9 @@ workflow NANOPORE { ) } - /* - * MODULE: Lineage analysis with Pangolin - */ + // + // MODULE: Lineage analysis with Pangolin + // ch_pangolin_multiqc = Channel.empty() if (!params.skip_pangolin) { PANGOLIN ( @@ -370,9 +370,9 @@ workflow NANOPORE { ) ch_software_versions = ch_software_versions.mix(PANGOLIN.out.version.ifEmpty(null)) - /* - * MODULE: Get Pangolin lineage information for MultiQC report - */ + // + // MODULE: Get Pangolin lineage information for MultiQC report + // PANGOLIN .out .report @@ -391,9 +391,9 @@ workflow NANOPORE { .set { ch_pangolin_multiqc } } - /* - * MODULE: Clade assignment, mutation calling, and sequence quality checks with Nextclade - */ + // + // MODULE: Clade assignment, mutation calling, and sequence quality checks with Nextclade + // if (!params.skip_nextclade) { NEXTCLADE ( ARTIC_MINION.out.fasta, @@ -402,9 +402,9 @@ workflow NANOPORE { ch_software_versions = ch_software_versions.mix(NEXTCLADE.out.version.ifEmpty(null)) } - /* - * MODULE: Consensus QC across all samples with QUAST - */ + // + // MODULE: Consensus QC across all samples with QUAST + // ch_quast_multiqc = Channel.empty() if (!params.skip_variants_quast) { QUAST ( @@ -418,9 +418,9 @@ workflow NANOPORE { ch_software_versions = ch_software_versions.mix(QUAST.out.version.ifEmpty(null)) } - /* - * SUBWORKFLOW: Annotate variants with snpEff - */ + // + // SUBWORKFLOW: Annotate variants with snpEff + // ch_snpeff_multiqc = Channel.empty() if (params.gff && !params.skip_snpeff) { SNPEFF_SNPSIFT ( @@ -434,9 +434,9 @@ workflow NANOPORE { ch_software_versions = ch_software_versions.mix(SNPEFF_SNPSIFT.out.snpsift_version.ifEmpty(null)) } - /* - * MODULE: Variant screenshots with ASCIIGenome - */ + // + // MODULE: Variant screenshots with ASCIIGenome + // if (!params.skip_asciigenome) { ARTIC_MINION .out @@ -461,10 +461,9 @@ workflow NANOPORE { ch_software_versions = ch_software_versions.mix(ASCIIGENOME.out.version.ifEmpty(null)) } - /* - * MODULE: Pipeline reporting - */ - // Get unique list of files containing version information + // + // MODULE: Pipeline reporting + // ch_software_versions .map { it -> if (it) [ it.baseName, it ] } .groupTuple() @@ -477,9 +476,9 @@ workflow NANOPORE { ch_software_versions ) - /* - * MODULE: MultiQC - */ + // + // MODULE: MultiQC + // if (!params.skip_multiqc) { workflow_summary = WorkflowCommons.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) diff --git a/workflows/sra_download.nf b/workflows/sra_download.nf index 9545239c..e3353cc1 100644 --- a/workflows/sra_download.nf +++ b/workflows/sra_download.nf @@ -38,16 +38,16 @@ include { SRA_MERGE_SAMPLESHEET } from '../modules/local/sra_merge_samplesheet' workflow SRA_DOWNLOAD { - /* - * MODULE: Get SRA run information for public database ids - */ + // + // MODULE: Get SRA run information for public database ids + // SRA_IDS_TO_RUNINFO ( ch_public_data_ids ) - /* - * MODULE: Parse SRA run information, create file containing FTP links and read into workflow as [ meta, [reads] ] - */ + // + // MODULE: Parse SRA run information, create file containing FTP links and read into workflow as [ meta, [reads] ] + // SRA_RUNINFO_TO_FTP ( SRA_IDS_TO_RUNINFO.out.tsv ) @@ -65,36 +65,35 @@ workflow SRA_DOWNLOAD { .set { ch_sra_reads } if (!params.skip_sra_fastq_download) { - /* - * MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums - */ + // + // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums + // SRA_FASTQ_FTP ( ch_sra_reads.map { meta, reads -> if (meta.fastq_1) [ meta, reads ] } ) - /* - * MODULE: Stage FastQ files downloaded by SRA together and auto-create a samplesheet for the pipeline - */ + // + // MODULE: Stage FastQ files downloaded by SRA together and auto-create a samplesheet for the pipeline + // SRA_TO_SAMPLESHEET ( SRA_FASTQ_FTP.out.fastq ) - /* - * MODULE: Create a merged samplesheet across all samples for the pipeline - */ + // + // MODULE: Create a merged samplesheet across all samples for the pipeline + // SRA_MERGE_SAMPLESHEET ( SRA_TO_SAMPLESHEET.out.csv.collect{it[1]} ) - /* - * If ids don't have a direct FTP download link write them to file for download outside of the pipeline - */ + // + // If ids don't have a direct FTP download link write them to file for download outside of the pipeline + // def no_ids_file = ["${params.outdir}", "${modules['sra_fastq_ftp'].publish_dir}", "IDS_NOT_DOWNLOADED.txt" ].join(File.separator) ch_sra_reads .map { meta, reads -> if (!meta.fastq_1) "${meta.id.split('_')[0..-2].join('_')}" } .unique() .collectFile(name: no_ids_file, sort: true, newLine: true) - } } From 36949d82518824fe6d854fedc58d2c76a91d907e Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 11 May 2021 22:38:17 +0100 Subject: [PATCH 12/17] Fix ECLint agaaaaaainnnnn --- conf/base.config | 4 ++-- subworkflows/local/assembly_unicycler.nf | 2 +- subworkflows/local/prepare_genome_illumina.nf | 2 +- subworkflows/local/snpeff_snpsift.nf | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/conf/base.config b/conf/base.config index dc250257..eed1ffab 100644 --- a/conf/base.config +++ b/conf/base.config @@ -2,8 +2,8 @@ ======================================================================================== nf-core/viralrecon Nextflow base config file ======================================================================================== - A 'blank slate' config file, appropriate for general use on most high performance - compute environments. Assumes that all software is installed and available on + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. ---------------------------------------------------------------------------------------- */ diff --git a/subworkflows/local/assembly_unicycler.nf b/subworkflows/local/assembly_unicycler.nf index f0d0b2ea..9bde9d4f 100644 --- a/subworkflows/local/assembly_unicycler.nf +++ b/subworkflows/local/assembly_unicycler.nf @@ -23,7 +23,7 @@ workflow ASSEMBLY_UNICYCLER { blast_header // channel: /path/to/blast_header.txt main: - + // // Assemble reads with Unicycler // diff --git a/subworkflows/local/prepare_genome_illumina.nf b/subworkflows/local/prepare_genome_illumina.nf index 86098445..35f61fd6 100644 --- a/subworkflows/local/prepare_genome_illumina.nf +++ b/subworkflows/local/prepare_genome_illumina.nf @@ -32,7 +32,7 @@ workflow PREPARE_GENOME { dummy_file main: - + // // Uncompress genome fasta file if required // diff --git a/subworkflows/local/snpeff_snpsift.nf b/subworkflows/local/snpeff_snpsift.nf index 05352199..cee772e3 100644 --- a/subworkflows/local/snpeff_snpsift.nf +++ b/subworkflows/local/snpeff_snpsift.nf @@ -20,7 +20,7 @@ workflow SNPEFF_SNPSIFT { fasta // path : genome.fasta main: - + SNPEFF_ANN ( vcf, db, config, fasta ) VCF_BGZIP_TABIX_STATS ( SNPEFF_ANN.out.vcf ) From bca618010683a626a155ae6403bd90326551d7e6 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 11 May 2021 23:53:27 +0100 Subject: [PATCH 13/17] Change comments in functions.nf --- modules/local/functions.nf | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/modules/local/functions.nf b/modules/local/functions.nf index 9d0137e3..2e2a7956 100644 --- a/modules/local/functions.nf +++ b/modules/local/functions.nf @@ -1,19 +1,17 @@ -/* - * ----------------------------------------------------- - * Utility functions used in nf-core DSL2 module files - * ----------------------------------------------------- - */ +// +// Utility functions used in nf-core DSL2 module files +// -/* - * Extract name of software tool from process name using $task.process - */ +// +// Extract name of software tool from process name using $task.process +// def getSoftwareName(task_process) { return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() } -/* - * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules - */ +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// def initOptions(Map args) { def Map options = [:] options.args = args.args ?: '' @@ -26,18 +24,18 @@ def initOptions(Map args) { return options } -/* - * Tidy up and join elements of a list to return a path string - */ +// +// Tidy up and join elements of a list to return a path string +// def getPathFromList(path_list) { def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes return paths.join('/') } -/* - * Function to save/publish module results - */ +// +// Function to save/publish module results +// def saveFiles(Map args) { if (!args.filename.endsWith('.version.txt')) { def ioptions = initOptions(args.options) From efe0ebd8883467ada3292a88e21a8ac7bebe461e Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Wed, 12 May 2021 00:30:52 +0100 Subject: [PATCH 14/17] Update CHANGELOG --- CHANGELOG.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 283f9255..a51ae64d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,12 +24,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Default human `--kraken2_db` link has been changed from Zenodo to an AWS S3 bucket for more reliable downloads * Updated pipeline template to nf-core/tools `1.14` * Optimise MultiQC configuration and input files for faster run-time on huge sample numbers -* [#122](https://github.com/nf-core/viralrecon/issues/122) - Single SPAdes command to rule them all -* [#138](https://github.com/nf-core/viralrecon/issues/138) - Problem masking the consensus sequence -* [#142](https://github.com/nf-core/viralrecon/issues/142) - Unknown method invocation `toBytes` on String type -* [#169](https://github.com/nf-core/viralrecon/issues/169) - ggplot2 error when generating mosdepth amplicon plot with Swift v2 primers -* [#170](https://github.com/nf-core/viralrecon/issues/170) - ivar trimming of Swift libraries new offset feature -* [#175](https://github.com/nf-core/viralrecon/issues/175) - MultiQC report does not include all the metrics +* [[#122](https://github.com/nf-core/viralrecon/issues/122)] - Single SPAdes command to rule them all +* [[#138](https://github.com/nf-core/viralrecon/issues/138)] - Problem masking the consensus sequence +* [[#142](https://github.com/nf-core/viralrecon/issues/142)] - Unknown method invocation `toBytes` on String type +* [[#169](https://github.com/nf-core/viralrecon/issues/169)] - ggplot2 error when generating mosdepth amplicon plot with Swift v2 primers +* [[#170](https://github.com/nf-core/viralrecon/issues/170)] - ivar trimming of Swift libraries new offset feature +* [[#175](https://github.com/nf-core/viralrecon/issues/175)] - MultiQC report does not include all the metrics +* [[#188](https://github.com/nf-core/viralrecon/pull/188)] - Add and fix EditorConfig linting in entire pipeline ### Parameters From b5d09b8ebd646bf3de4821190847924454948563 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Wed, 12 May 2021 09:22:34 +0100 Subject: [PATCH 15/17] Add VALIDATION_STRINGENCY=LENIENT for Picard CollectMultipleMetrics --- conf/modules.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/modules.config b/conf/modules.config index fc69eedc..609199ef 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -215,6 +215,7 @@ params { publish_dir = 'variants/bowtie2' } 'illumina_picard_collectmultiplemetrics' { + args = 'VALIDATION_STRINGENCY=LENIENT TMP_DIR=tmp' publish_files = ['metrics':'picard_metrics', 'pdf': 'picard_metrics/pdf'] publish_dir = 'variants/bowtie2' } From 82b26599c37054d22412e58c64af4166f43f41d0 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Wed, 12 May 2021 10:23:31 +0100 Subject: [PATCH 16/17] Add warning to fastq_dir_to_samplesheet script --- bin/fastq_dir_to_samplesheet.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/bin/fastq_dir_to_samplesheet.py b/bin/fastq_dir_to_samplesheet.py index 200b4e0d..74485715 100755 --- a/bin/fastq_dir_to_samplesheet.py +++ b/bin/fastq_dir_to_samplesheet.py @@ -54,6 +54,14 @@ def fastq_dir_to_samplesheet(fastq_dir, samplesheet_file, read1_extension='_R1_0 if len(reads) == 1: sample_info += ',' fout.write(f'{sample_info}\n') + else: + error_str = "\nWARNING: No FastQ files found so samplesheet has not been created!\n\n" + error_str += "Please check the values provided for the:\n" + error_str += " - Path to the directory containing the FastQ files\n" + error_str += " - '--read1_extension' parameter\n" + error_str += " - '--read2_extension' parameter\n" + print(error_str) + sys.exit(1) def main(args=None): args = parse_args(args) From 992183b8cf608c5a0158fbdff7726c92fbab1860 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Wed, 12 May 2021 12:48:25 +0100 Subject: [PATCH 17/17] Fix CI markdownlint config location --- .github/workflows/linting.yml | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 8aaf33ef..46f42cb8 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,11 +14,11 @@ jobs: - uses: actions/checkout@v2 - uses: actions/setup-node@v1 with: - node-version: '10' + node-version: "10" - name: Install markdownlint run: npm install -g markdownlint-cli - name: Run Markdownlint - run: markdownlint ${GITHUB_WORKSPACE} -c ${GITHUB_WORKSPACE}/.markdownlint.yml + run: markdownlint . # If the above check failed, post a comment on the PR explaining the failure - name: Post PR comment @@ -36,7 +36,7 @@ jobs: * Everything else: [Install `npm`](https://www.npmjs.com/get-npm) then [install `markdownlint-cli`](https://www.npmjs.com/package/markdownlint-cli) (`npm install -g markdownlint-cli`) * Fix the markdown errors * Automatically: `markdownlint . --config .github/markdownlint.yml --fix` - * Manually resolve anything left from `markdownlint . --config .github/markdownlint.yml` + * Manually resolve anything left from `markdownlint .` Once you push these changes the test should pass, and you can hide this comment :+1: @@ -67,7 +67,7 @@ jobs: - uses: actions/checkout@v1 - uses: actions/setup-node@v1 with: - node-version: '10' + node-version: "10" - name: Install yaml-lint run: npm install -g yaml-lint - name: Run yaml-lint @@ -101,7 +101,6 @@ jobs: nf-core: runs-on: ubuntu-latest steps: - - name: Check out pipeline code uses: actions/checkout@v2 @@ -114,8 +113,8 @@ jobs: - uses: actions/setup-python@v1 with: - python-version: '3.6' - architecture: 'x64' + python-version: "3.6" + architecture: "x64" - name: Install dependencies run: | @@ -142,4 +141,3 @@ jobs: lint_log.txt lint_results.md PR_number.txt -