\\n Workflow | \\n Nextflow | \\n\\"
+ ],
+ [
+ "CUSTOM_DUMPSOFTWAREVERSIONS:",
+ " python: 3.11.7",
+ " yaml: 5.4.1",
+ "TOOL1:",
+ " tool1: 0.11.9",
+ "TOOL2:",
+ " tool2: '1.9'",
+ "Workflow:"
+ ]
],
- "timestamp": "2023-11-03T14:43:22.157011"
+ "timestamp": "2024-01-09T23:01:18.710682"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/custom/getchromsizes/environment.yml b/modules/nf-core/custom/getchromsizes/environment.yml
new file mode 100644
index 0000000..2a01695
--- /dev/null
+++ b/modules/nf-core/custom/getchromsizes/environment.yml
@@ -0,0 +1,7 @@
+name: custom_getchromsizes
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::samtools=1.16.1
diff --git a/modules/nf-core/custom/getchromsizes/main.nf b/modules/nf-core/custom/getchromsizes/main.nf
new file mode 100644
index 0000000..e8084ea
--- /dev/null
+++ b/modules/nf-core/custom/getchromsizes/main.nf
@@ -0,0 +1,44 @@
+process CUSTOM_GETCHROMSIZES {
+ tag "$fasta"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' :
+ 'biocontainers/samtools:1.16.1--h6899075_1' }"
+
+ input:
+ tuple val(meta), path(fasta)
+
+ output:
+ tuple val(meta), path ("*.sizes"), emit: sizes
+ tuple val(meta), path ("*.fai") , emit: fai
+ tuple val(meta), path ("*.gzi") , emit: gzi, optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ """
+ samtools faidx $fasta
+ cut -f 1,2 ${fasta}.fai > ${fasta}.sizes
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ """
+ touch ${fasta}.fai
+ touch ${fasta}.sizes
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/custom/getchromsizes/meta.yml b/modules/nf-core/custom/getchromsizes/meta.yml
new file mode 100644
index 0000000..529be07
--- /dev/null
+++ b/modules/nf-core/custom/getchromsizes/meta.yml
@@ -0,0 +1,54 @@
+name: custom_getchromsizes
+description: Generates a FASTA file of chromosome sizes and a fasta index file
+keywords:
+ - fasta
+ - chromosome
+ - indexing
+tools:
+ - samtools:
+ description: Tools for dealing with SAM, BAM and CRAM files
+ homepage: http://www.htslib.org/
+ documentation: http://www.htslib.org/doc/samtools.html
+ tool_dev_url: https://github.com/samtools/samtools
+ doi: 10.1093/bioinformatics/btp352
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - fasta:
+ type: file
+ description: FASTA file
+ pattern: "*.{fa,fasta,fna,fas}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - sizes:
+ type: file
+ description: File containing chromosome lengths
+ pattern: "*.{sizes}"
+ - fai:
+ type: file
+ description: FASTA index file
+ pattern: "*.{fai}"
+ - gzi:
+ type: file
+ description: Optional gzip index file for compressed inputs
+ pattern: "*.gzi"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@tamara-hodgetts"
+ - "@chris-cheshire"
+ - "@muffato"
+maintainers:
+ - "@tamara-hodgetts"
+ - "@chris-cheshire"
+ - "@muffato"
diff --git a/modules/nf-core/custom/getchromsizes/tests/main.nf.test b/modules/nf-core/custom/getchromsizes/tests/main.nf.test
new file mode 100644
index 0000000..9f6b564
--- /dev/null
+++ b/modules/nf-core/custom/getchromsizes/tests/main.nf.test
@@ -0,0 +1,62 @@
+nextflow_process {
+
+ name "Test Process CUSTOM_GETCHROMSIZES"
+ script "../main.nf"
+ process "CUSTOM_GETCHROMSIZES"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "custom"
+ tag "custom/getchromsizes"
+
+ test("test_custom_getchromsizes") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("test_custom_getchromsizes_bgzip") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap b/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap
new file mode 100644
index 0000000..2e560bd
--- /dev/null
+++ b/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap
@@ -0,0 +1,114 @@
+{
+ "test_custom_getchromsizes": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,45a83c5f3dddbc5dcab30035169f7ce8"
+ ],
+ "fai": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+ ]
+ ],
+ "gzi": [
+
+ ],
+ "sizes": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,45a83c5f3dddbc5dcab30035169f7ce8"
+ ]
+ }
+ ],
+ "timestamp": "2024-01-17T17:48:35.562918"
+ },
+ "test_custom_getchromsizes_bgzip": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,45a83c5f3dddbc5dcab30035169f7ce8"
+ ],
+ "fai": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+ ]
+ ],
+ "gzi": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474"
+ ]
+ ],
+ "sizes": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,45a83c5f3dddbc5dcab30035169f7ce8"
+ ]
+ }
+ ],
+ "timestamp": "2024-01-17T17:49:02.562311"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/custom/getchromsizes/tests/tags.yml b/modules/nf-core/custom/getchromsizes/tests/tags.yml
new file mode 100644
index 0000000..d89a805
--- /dev/null
+++ b/modules/nf-core/custom/getchromsizes/tests/tags.yml
@@ -0,0 +1,2 @@
+custom/getchromsizes:
+ - modules/nf-core/custom/getchromsizes/**
diff --git a/modules/nf-core/cutadapt/environment.yml b/modules/nf-core/cutadapt/environment.yml
new file mode 100644
index 0000000..d32a8f9
--- /dev/null
+++ b/modules/nf-core/cutadapt/environment.yml
@@ -0,0 +1,7 @@
+name: cutadapt
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::cutadapt=3.4
diff --git a/modules/nf-core/cutadapt/main.nf b/modules/nf-core/cutadapt/main.nf
new file mode 100644
index 0000000..e232a70
--- /dev/null
+++ b/modules/nf-core/cutadapt/main.nf
@@ -0,0 +1,50 @@
+process CUTADAPT {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/cutadapt:3.4--py39h38f01e4_1' :
+ 'biocontainers/cutadapt:3.4--py39h38f01e4_1' }"
+
+ input:
+ tuple val(meta), path(reads)
+
+ output:
+ tuple val(meta), path('*.trim.fastq.gz'), emit: reads
+ tuple val(meta), path('*.log') , emit: log
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def trimmed = meta.single_end ? "-o ${prefix}.trim.fastq.gz" : "-o ${prefix}_1.trim.fastq.gz -p ${prefix}_2.trim.fastq.gz"
+ """
+ cutadapt \\
+ --cores $task.cpus \\
+ $args \\
+ $trimmed \\
+ $reads \\
+ > ${prefix}.cutadapt.log
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ cutadapt: \$(cutadapt --version)
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def trimmed = meta.single_end ? "${prefix}.trim.fastq.gz" : "${prefix}_1.trim.fastq.gz ${prefix}_2.trim.fastq.gz"
+ """
+ touch ${prefix}.cutadapt.log
+ touch ${trimmed}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ cutadapt: \$(cutadapt --version)
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/cutadapt/meta.yml b/modules/nf-core/cutadapt/meta.yml
new file mode 100644
index 0000000..5ecfe27
--- /dev/null
+++ b/modules/nf-core/cutadapt/meta.yml
@@ -0,0 +1,49 @@
+name: cutadapt
+description: Perform adapter/quality trimming on sequencing reads
+keywords:
+ - trimming
+ - adapter trimming
+ - adapters
+ - quality trimming
+tools:
+ - cuatadapt:
+ description: |
+ Cutadapt finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence from your high-throughput sequencing reads.
+ documentation: https://cutadapt.readthedocs.io/en/stable/index.html
+ doi: 10.14806/ej.17.1.200
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: The trimmed/modified fastq reads
+ pattern: "*fastq.gz"
+ - log:
+ type: file
+ description: cuatadapt log file
+ pattern: "*cutadapt.log"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@drpatelh"
+ - "@kevinmenden"
+maintainers:
+ - "@drpatelh"
+ - "@kevinmenden"
diff --git a/modules/nf-core/cutadapt/tests/main.nf.test b/modules/nf-core/cutadapt/tests/main.nf.test
new file mode 100644
index 0000000..819c8f9
--- /dev/null
+++ b/modules/nf-core/cutadapt/tests/main.nf.test
@@ -0,0 +1,69 @@
+nextflow_process {
+
+ name "Test Process CUTADAPT"
+ script "../main.nf"
+ process "CUTADAPT"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "cutadapt"
+
+ test("sarscov2 Illumina single end [fastq]") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [ id: 'test', single_end:true ],
+ [
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+ ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert process.out.reads != null },
+ { assert process.out.reads.get(0).get(1) ==~ ".*.trim.fastq.gz" },
+ { assert snapshot(process.out.versions).match("versions") },
+ { assert snapshot(path(process.out.reads.get(0).get(1)).linesGzip[0]).match() }
+ )
+ }
+ }
+
+ test("sarscov2 Illumina paired-end [fastq]") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [ id: 'test', single_end:false ],
+ [
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+ ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert process.out.reads != null },
+ { assert process.out.reads.get(0).get(1).get(0) ==~ ".*.1.trim.fastq.gz" },
+ { assert process.out.reads.get(0).get(1).get(1) ==~ ".*.2.trim.fastq.gz" },
+ { assert snapshot(path(process.out.reads.get(0).get(1).get(1)).linesGzip[0]).match() },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+}
diff --git a/modules/nf-core/cutadapt/tests/main.nf.test.snap b/modules/nf-core/cutadapt/tests/main.nf.test.snap
new file mode 100644
index 0000000..4bdee0b
--- /dev/null
+++ b/modules/nf-core/cutadapt/tests/main.nf.test.snap
@@ -0,0 +1,22 @@
+{
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,d37c5b9e465accf6d836972608795071"
+ ]
+ ],
+ "timestamp": "2023-10-24T11:22:34.352529"
+ },
+ "Single-Read": {
+ "content": [
+ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1"
+ ],
+ "timestamp": "2023-10-24T11:22:34.364556"
+ },
+ "Paired-Reads": {
+ "content": [
+ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/2"
+ ],
+ "timestamp": "2023-10-24T11:22:41.877854"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/cutadapt/tests/nextflow.config b/modules/nf-core/cutadapt/tests/nextflow.config
new file mode 100644
index 0000000..6c3b425
--- /dev/null
+++ b/modules/nf-core/cutadapt/tests/nextflow.config
@@ -0,0 +1,7 @@
+process {
+
+ withName: CUTADAPT {
+ ext.args = '-q 25'
+ }
+
+}
diff --git a/modules/nf-core/cutadapt/tests/tags.yml b/modules/nf-core/cutadapt/tests/tags.yml
new file mode 100644
index 0000000..f64f997
--- /dev/null
+++ b/modules/nf-core/cutadapt/tests/tags.yml
@@ -0,0 +1,2 @@
+cutadapt:
+ - modules/nf-core/cutadapt/**
diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml
new file mode 100644
index 0000000..70389e6
--- /dev/null
+++ b/modules/nf-core/fastp/environment.yml
@@ -0,0 +1,7 @@
+name: fastp
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::fastp=0.23.4
diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf
new file mode 100644
index 0000000..2a3b679
--- /dev/null
+++ b/modules/nf-core/fastp/main.nf
@@ -0,0 +1,120 @@
+process FASTP {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' :
+ 'biocontainers/fastp:0.23.4--h5f740d0_0' }"
+
+ input:
+ tuple val(meta), path(reads)
+ path adapter_fasta
+ val save_trimmed_fail
+ val save_merged
+
+ output:
+ tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads
+ tuple val(meta), path('*.json') , emit: json
+ tuple val(meta), path('*.html') , emit: html
+ tuple val(meta), path('*.log') , emit: log
+ path "versions.yml" , emit: versions
+ tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail
+ tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : ""
+ def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
+ // Added soft-links to original fastqs for consistent naming in MultiQC
+ // Use single ended for interleaved. Add --interleaved_in in config.
+ if ( task.ext.args?.contains('--interleaved_in') ) {
+ """
+ [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
+
+ fastp \\
+ --stdout \\
+ --in1 ${prefix}.fastq.gz \\
+ --thread $task.cpus \\
+ --json ${prefix}.fastp.json \\
+ --html ${prefix}.fastp.html \\
+ $adapter_list \\
+ $fail_fastq \\
+ $args \\
+ 2> >(tee ${prefix}.fastp.log >&2) \\
+ | gzip -c > ${prefix}.fastp.fastq.gz
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
+ END_VERSIONS
+ """
+ } else if (meta.single_end) {
+ """
+ [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
+
+ fastp \\
+ --in1 ${prefix}.fastq.gz \\
+ --out1 ${prefix}.fastp.fastq.gz \\
+ --thread $task.cpus \\
+ --json ${prefix}.fastp.json \\
+ --html ${prefix}.fastp.html \\
+ $adapter_list \\
+ $fail_fastq \\
+ $args \\
+ 2> >(tee ${prefix}.fastp.log >&2)
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
+ END_VERSIONS
+ """
+ } else {
+ def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : ''
+ """
+ [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz
+ [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz
+ fastp \\
+ --in1 ${prefix}_1.fastq.gz \\
+ --in2 ${prefix}_2.fastq.gz \\
+ --out1 ${prefix}_1.fastp.fastq.gz \\
+ --out2 ${prefix}_2.fastp.fastq.gz \\
+ --json ${prefix}.fastp.json \\
+ --html ${prefix}.fastp.html \\
+ $adapter_list \\
+ $fail_fastq \\
+ $merge_fastq \\
+ --thread $task.cpus \\
+ --detect_adapter_for_pe \\
+ $args \\
+ 2> >(tee ${prefix}.fastp.log >&2)
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
+ END_VERSIONS
+ """
+ }
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end
+ def touch_reads = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz"
+ def touch_merged = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : ""
+ """
+ touch $touch_reads
+ touch "${prefix}.fastp.json"
+ touch "${prefix}.fastp.html"
+ touch "${prefix}.fastp.log"
+ $touch_merged
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml
new file mode 100644
index 0000000..c22a16a
--- /dev/null
+++ b/modules/nf-core/fastp/meta.yml
@@ -0,0 +1,75 @@
+name: fastp
+description: Perform adapter/quality trimming on sequencing reads
+keywords:
+ - trimming
+ - quality control
+ - fastq
+tools:
+ - fastp:
+ description: |
+ A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance.
+ documentation: https://github.com/OpenGene/fastp
+ doi: 10.1093/bioinformatics/bty560
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads.
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively. If you wish to run interleaved paired-end data, supply as single-end data
+ but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module.
+ - adapter_fasta:
+ type: file
+ description: File in FASTA format containing possible adapters to remove.
+ pattern: "*.{fasta,fna,fas,fa}"
+ - save_trimmed_fail:
+ type: boolean
+ description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz`
+ - save_merged:
+ type: boolean
+ description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz`
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: The trimmed/modified/unmerged fastq reads
+ pattern: "*fastp.fastq.gz"
+ - json:
+ type: file
+ description: Results in JSON format
+ pattern: "*.json"
+ - html:
+ type: file
+ description: Results in HTML format
+ pattern: "*.html"
+ - log:
+ type: file
+ description: fastq log file
+ pattern: "*.log"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - reads_fail:
+ type: file
+ description: Reads the failed the preprocessing
+ pattern: "*fail.fastq.gz"
+ - reads_merged:
+ type: file
+ description: Reads that were successfully merged
+ pattern: "*.{merged.fastq.gz}"
+authors:
+ - "@drpatelh"
+ - "@kevinmenden"
+maintainers:
+ - "@drpatelh"
+ - "@kevinmenden"
diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test
new file mode 100644
index 0000000..fa7e5b4
--- /dev/null
+++ b/modules/nf-core/fastp/tests/main.nf.test
@@ -0,0 +1,723 @@
+nextflow_process {
+
+ name "Test Process FASTP"
+ script "../main.nf"
+ process "FASTP"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fastp"
+
+ test("test_fastp_single_end") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ "Q20 bases:12.922000 K (92.984097%)",
+ "single end (151 cycles)" ]
+ def log_text = [ "Q20 bases: 12922(92.9841%)",
+ "reads passed filter: 99" ]
+ def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+ "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+ "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
+ { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { assert snapshot(process.out.json).match("test_fastp_single_end_json") },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { file(it[1]).getName() } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_single_end-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("test_fastp_single_end-stub") {
+
+ options '-stub'
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+
+ assertAll(
+ { assert process.success },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { file(it[1]).getName() } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_single_end-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ "Q20 bases: | 25.719000 K (93.033098%)",
+ "The input has little adapter percentage (~0.000000%), probably it's trimmed before."]
+ def log_text = [ "No adapter detected for read1",
+ "Q30 bases: 12281(88.3716%)"]
+ def json_text = ['"passed_filter_reads": 198']
+ def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+ "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+ "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
+ { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
+ }
+ },
+ { read2_lines.each { read2_line ->
+ { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { json_text.each { json_part ->
+ { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
+ }
+ },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_paired_end-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end-stub") {
+
+ options '-stub'
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_paired_end-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("fastp test_fastp_interleaved") {
+ config './nextflow.config'
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ "Q20 bases: | 25.719000 K (93.033098%)",
+ "paired end (151 cycles + 151 cycles)"]
+ def log_text = [ "Q20 bases: 12922(92.9841%)",
+ "reads passed filter: 198"]
+ def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+ "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+ "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
+ { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { assert snapshot(process.out.json).match("fastp test_fastp_interleaved_json") },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { file(it[1]).getName() } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_interleaved-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("fastp test_fastp_interleaved-stub") {
+
+ options '-stub'
+
+ config './nextflow.config'
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { file(it[1]).getName() } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_interleaved-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("test_fastp_single_end_trim_fail") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = true
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ "Q20 bases: | 12.922000 K (92.984097%)",
+ "single end (151 cycles)"]
+ def log_text = [ "Q20 bases: 12922(92.9841%)",
+ "reads passed filter: 99" ]
+ def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+ "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+ "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
+ { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
+ }
+ },
+ { failed_read_lines.each { failed_read_line ->
+ { assert path(process.out.reads_fail.get(0).get(1)).linesGzip.contains(failed_read_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { assert snapshot(process.out.json).match("test_fastp_single_end_trim_fail_json") },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_trim_fail") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = true
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ "Q20 bases: | 25.719000 K (93.033098%)",
+ "The input has little adapter percentage (~0.000000%), probably it's trimmed before."]
+ def log_text = [ "No adapter detected for read1",
+ "Q30 bases: 12281(88.3716%)"]
+ def json_text = ['"passed_filter_reads": 198']
+ def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+ "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+ "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
+ { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
+ }
+ },
+ { read2_lines.each { read2_line ->
+ { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
+ }
+ },
+ { failed_read2_lines.each { failed_read2_line ->
+ { assert path(process.out.reads_fail.get(0).get(1).get(1)).linesGzip.contains(failed_read2_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { json_text.each { json_part ->
+ { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
+ }
+ },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_merged") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = true
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ ""]
+ def log_text = [ "Merged and filtered:",
+ "total reads: 75",
+ "total bases: 13683"]
+ def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683']
+ def read1_lines = [ "@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1",
+ "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC",
+ "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE
+ { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
+ }
+ },
+ { read2_lines.each { read2_line ->
+ { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
+ }
+ },
+ { read_merged_lines.each { read_merged_line ->
+ { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { json_text.each { json_part ->
+ { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
+ }
+ },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_paired_end_merged-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_merged-stub") {
+
+ options '-stub'
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = true
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_paired_end_merged-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_merged_adapterlist") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ])
+ save_trimmed_fail = false
+ save_merged = true
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ ""]
+ def log_text = [ "Merged and filtered:",
+ "total reads: 75",
+ "total bases: 13683"]
+ def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683',"--adapter_fasta"]
+ def read1_lines = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1",
+ "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC",
+ "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE
+ { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
+ }
+ },
+ { read2_lines.each { read2_line ->
+ { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
+ }
+ },
+ { read_merged_lines.each { read_merged_line ->
+ { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { json_text.each { json_part ->
+ { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
+ }
+ },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+}
diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap
new file mode 100644
index 0000000..6a71b68
--- /dev/null
+++ b/modules/nf-core/fastp/tests/main.nf.test.snap
@@ -0,0 +1,107 @@
+{
+ "test_fastp_paired_end-for_stub_match": {
+ "content": [
+ [
+ [
+ "test_1.fastp.fastq.gz",
+ "test_2.fastp.fastq.gz"
+ ],
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "{id=test, single_end=false}"
+ ]
+ ],
+ "timestamp": "2024-01-17T18:07:15.398827"
+ },
+ "fastp test_fastp_interleaved_json": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,168f516f7bd4b7b6c32da7cba87299a4"
+ ]
+ ]
+ ],
+ "timestamp": "2024-01-17T18:08:06.123035"
+ },
+ "test_fastp_paired_end_merged-for_stub_match": {
+ "content": [
+ [
+ [
+ "test_1.fastp.fastq.gz",
+ "test_2.fastp.fastq.gz"
+ ],
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "test.merged.fastq.gz",
+ "{id=test, single_end=false}"
+ ]
+ ],
+ "timestamp": "2024-01-17T18:10:13.467574"
+ },
+ "test_fastp_single_end_json": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc"
+ ]
+ ]
+ ],
+ "timestamp": "2024-01-17T18:06:00.223817"
+ },
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "timestamp": "2024-01-17T18:06:00.248422"
+ },
+ "test_fastp_interleaved-for_stub_match": {
+ "content": [
+ [
+ "test.fastp.fastq.gz",
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "{id=test, single_end=true}"
+ ]
+ ],
+ "timestamp": "2024-01-17T18:08:06.127974"
+ },
+ "test_fastp_single_end-for_stub_match": {
+ "content": [
+ [
+ "test.fastp.fastq.gz",
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "{id=test, single_end=true}"
+ ]
+ ],
+ "timestamp": "2024-01-17T18:06:00.244202"
+ },
+ "test_fastp_single_end_trim_fail_json": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5"
+ ]
+ ]
+ ],
+ "timestamp": "2024-01-17T18:08:41.942317"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fastp/tests/nextflow.config b/modules/nf-core/fastp/tests/nextflow.config
new file mode 100644
index 0000000..0f7849a
--- /dev/null
+++ b/modules/nf-core/fastp/tests/nextflow.config
@@ -0,0 +1,6 @@
+process {
+
+ withName: FASTP {
+ ext.args = "--interleaved_in"
+ }
+}
diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml
new file mode 100644
index 0000000..c1afcce
--- /dev/null
+++ b/modules/nf-core/fastp/tests/tags.yml
@@ -0,0 +1,2 @@
+fastp:
+ - modules/nf-core/fastp/**
diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test
index b9e8f92..1f21c66 100644
--- a/modules/nf-core/fastqc/tests/main.nf.test
+++ b/modules/nf-core/fastqc/tests/main.nf.test
@@ -3,24 +3,20 @@ nextflow_process {
name "Test Process FASTQC"
script "../main.nf"
process "FASTQC"
+
tag "modules"
tag "modules_nfcore"
tag "fastqc"
- test("Single-Read") {
+ test("sarscov2 single-end [fastq]") {
when {
- params {
- outdir = "$outputDir"
- }
process {
"""
- input[0] = [
+ input[0] = Channel.of([
[ id: 'test', single_end:true ],
- [
- file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
- ]
- ]
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+ ])
"""
}
}
@@ -28,82 +24,189 @@ nextflow_process {
then {
assertAll (
{ assert process.success },
+
// NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it.
// looks like this:
// https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039
- { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" },
- { assert path(process.out.html.get(0).get(1)).getText().contains("File type | Conventional base calls | ") },
- { assert snapshot(process.out.versions).match("versions") },
- { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" }
+
+ { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+ { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+ { assert path(process.out.html[0][1]).text.contains("File type | Conventional base calls | ") },
+
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("sarscov2 paired-end [fastq]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+
+ { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" },
+ { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" },
+ { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" },
+ { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" },
+ { assert path(process.out.html[0][1][0]).text.contains("File type | Conventional base calls | ") },
+ { assert path(process.out.html[0][1][1]).text.contains("File type | Conventional base calls | ") },
+
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("sarscov2 interleaved [fastq]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+
+ { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+ { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+ { assert path(process.out.html[0][1]).text.contains("File type | Conventional base calls | ") },
+
+ { assert snapshot(process.out.versions).match("versions") }
)
}
}
-// TODO
-// //
-// // Test with paired-end data
-// //
-// workflow test_fastqc_paired_end {
-// input = [
-// [id: 'test', single_end: false], // meta map
-// [
-// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
-// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
-// ]
-// ]
-
-// FASTQC ( input )
-// }
-
-// //
-// // Test with interleaved data
-// //
-// workflow test_fastqc_interleaved {
-// input = [
-// [id: 'test', single_end: false], // meta map
-// file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true)
-// ]
-
-// FASTQC ( input )
-// }
-
-// //
-// // Test with bam data
-// //
-// workflow test_fastqc_bam {
-// input = [
-// [id: 'test', single_end: false], // meta map
-// file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
-// ]
-
-// FASTQC ( input )
-// }
-
-// //
-// // Test with multiple samples
-// //
-// workflow test_fastqc_multiple {
-// input = [
-// [id: 'test', single_end: false], // meta map
-// [
-// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
-// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true),
-// file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true),
-// file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true)
-// ]
-// ]
-
-// FASTQC ( input )
-// }
-
-// //
-// // Test with custom prefix
-// //
-// workflow test_fastqc_custom_prefix {
-// input = [
-// [ id:'mysample', single_end:true ], // meta map
-// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
-// ]
-
-// FASTQC ( input )
-// }
+
+ test("sarscov2 paired-end [bam]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+
+ { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+ { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+ { assert path(process.out.html[0][1]).text.contains("File type | Conventional base calls | ") },
+
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("sarscov2 multiple [fastq]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+
+ { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" },
+ { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" },
+ { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" },
+ { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" },
+ { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" },
+ { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" },
+ { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" },
+ { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" },
+ { assert path(process.out.html[0][1][0]).text.contains("File type | Conventional base calls | ") },
+ { assert path(process.out.html[0][1][1]).text.contains("File type | Conventional base calls | ") },
+ { assert path(process.out.html[0][1][2]).text.contains("File type | Conventional base calls | ") },
+ { assert path(process.out.html[0][1][3]).text.contains("File type | Conventional base calls | ") },
+
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("sarscov2 custom_prefix") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'mysample', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+
+ { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" },
+ { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" },
+ { assert path(process.out.html[0][1]).text.contains("File type | Conventional base calls | ") },
+
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("sarscov2 single-end [fastq] - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id: 'test', single_end:true ],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out.html.collect { file(it[1]).getName() } +
+ process.out.zip.collect { file(it[1]).getName() } +
+ process.out.versions ).match() }
+ )
+ }
+ }
+
}
diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap
index 636a32c..5d624bb 100644
--- a/modules/nf-core/fastqc/tests/main.nf.test.snap
+++ b/modules/nf-core/fastqc/tests/main.nf.test.snap
@@ -1,10 +1,20 @@
{
+ "sarscov2 single-end [fastq] - stub": {
+ "content": [
+ [
+ "test.html",
+ "test.zip",
+ "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+ ]
+ ],
+ "timestamp": "2024-01-17T18:40:57.254299"
+ },
"versions": {
"content": [
[
"versions.yml:md5,e1cc25ca8af856014824abd842e93978"
]
],
- "timestamp": "2023-10-09T23:40:54+0000"
+ "timestamp": "2024-01-17T18:36:50.033627"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/fq/subsample/environment.yml b/modules/nf-core/fq/subsample/environment.yml
new file mode 100644
index 0000000..c588de3
--- /dev/null
+++ b/modules/nf-core/fq/subsample/environment.yml
@@ -0,0 +1,7 @@
+name: fq_subsample
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::fq=0.9.1
diff --git a/modules/nf-core/fq/subsample/main.nf b/modules/nf-core/fq/subsample/main.nf
new file mode 100644
index 0000000..f3d8cc7
--- /dev/null
+++ b/modules/nf-core/fq/subsample/main.nf
@@ -0,0 +1,55 @@
+process FQ_SUBSAMPLE {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/fq:0.9.1--h9ee0642_0':
+ 'biocontainers/fq:0.9.1--h9ee0642_0' }"
+
+ input:
+ tuple val(meta), path(fastq)
+
+ output:
+ tuple val(meta), path("*.fastq.gz"), emit: fastq
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ /* args requires:
+ --probability : Probability read is kept, between 0 and 1. Mutually exclusive with record-count.
+ --record-count : Number of records to keep. Mutually exclusive with probability
+ */
+ def args = task.ext.args ?: ''
+ def prob_exists = args =~ /-p|--probability/
+ def nrec_exists = args =~ /-n|--record-count/
+ if ( !(prob_exists || nrec_exists) ){
+ error "FQ/SUBSAMPLE requires --probability (-p) or --record-count (-n) specified in task.ext.args!"
+ }
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def n_fastq = fastq instanceof List ? fastq.size() : 1
+ log.debug "FQ/SUBSAMPLE found ${n_fastq} FASTQ files"
+ if ( n_fastq == 1 ){
+ fastq1_output = "--r1-dst ${prefix}.fastq.gz"
+ fastq2_output = ""
+ } else if ( n_fastq == 2 ){
+ fastq1_output = "--r1-dst ${prefix}_R1.fastq.gz"
+ fastq2_output = "--r2-dst ${prefix}_R2.fastq.gz"
+ } else {
+ error "FQ/SUBSAMPLE only accepts 1 or 2 FASTQ files!"
+ }
+ """
+ fq subsample \\
+ $args \\
+ $fastq \\
+ $fastq1_output \\
+ $fastq2_output
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fq: \$(echo \$(fq subsample --version | sed 's/fq-subsample //g'))
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/fq/subsample/meta.yml b/modules/nf-core/fq/subsample/meta.yml
new file mode 100644
index 0000000..6c5e87f
--- /dev/null
+++ b/modules/nf-core/fq/subsample/meta.yml
@@ -0,0 +1,41 @@
+name: "fq_subsample"
+description: fq subsample outputs a subset of records from single or paired FASTQ files. This requires a seed (--seed) to be set in ext.args.
+keywords:
+ - fastq
+ - fq
+ - subsample
+tools:
+ - "fq":
+ description: "fq is a library to generate and validate FASTQ file pairs."
+ homepage: "https://github.com/stjude-rust-labs/fq"
+ documentation: "https://github.com/stjude-rust-labs/fq"
+ tool_dev_url: "https://github.com/stjude-rust-labs/fq"
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - fastq:
+ type: file
+ description: FASTQ file
+ pattern: "*.{fq,fastq}{,.gz}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - fastq:
+ type: file
+ description: Randomly sampled FASTQ files.
+ pattern: "*_R[12].fastq.gz"
+authors:
+ - "@adamrtalbot"
+maintainers:
+ - "@adamrtalbot"
diff --git a/modules/nf-core/fq/subsample/tests/main.nf.test b/modules/nf-core/fq/subsample/tests/main.nf.test
new file mode 100644
index 0000000..285f30c
--- /dev/null
+++ b/modules/nf-core/fq/subsample/tests/main.nf.test
@@ -0,0 +1,140 @@
+nextflow_process {
+
+ name "Test Process FQ_SUBSAMPLE"
+ script "../main.nf"
+ process "FQ_SUBSAMPLE"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fq"
+ tag "fq/subsample"
+
+ test("test_fq_subsample_no_args") {
+ config "./nextflow_no_args.config"
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert !process.success },
+ { assert snapshot(process.out).match() },
+ )
+ }
+
+ }
+
+ test("test_fq_subsample_probability") {
+ config "./nextflow_probability.config"
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ )
+ }
+
+ }
+
+ test("test_fq_subsample_record_count") {
+ config "./nextflow_record_count.config"
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ )
+ }
+
+ }
+
+ test("test_fq_subsample_single") {
+ config "./nextflow.config"
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ )
+ }
+
+ }
+
+ test("test_fq_subsample_no_input") {
+ config "./nextflow.config"
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert !process.success },
+ { assert snapshot(process.out).match() },
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/fq/subsample/tests/main.nf.test.snap b/modules/nf-core/fq/subsample/tests/main.nf.test.snap
new file mode 100644
index 0000000..1e47918
--- /dev/null
+++ b/modules/nf-core/fq/subsample/tests/main.nf.test.snap
@@ -0,0 +1,145 @@
+{
+ "test_fq_subsample_probability": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_R1.fastq.gz:md5,19326ff922a16c0cb81191f2a0a5c5fc",
+ "test_R2.fastq.gz:md5,ce7ff46296d89b68521ad55a3588bcfe"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,ffbf935bd0de512fbc9e83b187bf924f"
+ ],
+ "fastq": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_R1.fastq.gz:md5,19326ff922a16c0cb81191f2a0a5c5fc",
+ "test_R2.fastq.gz:md5,ce7ff46296d89b68521ad55a3588bcfe"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ffbf935bd0de512fbc9e83b187bf924f"
+ ]
+ }
+ ],
+ "timestamp": "2024-01-17T17:57:15.446336"
+ },
+ "test_fq_subsample_record_count": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_R1.fastq.gz:md5,394c7a233f1c1c1a167a34cf2895d26d",
+ "test_R2.fastq.gz:md5,32724cbdb5ab954a0a659ebcd56ca422"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,ffbf935bd0de512fbc9e83b187bf924f"
+ ],
+ "fastq": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_R1.fastq.gz:md5,394c7a233f1c1c1a167a34cf2895d26d",
+ "test_R2.fastq.gz:md5,32724cbdb5ab954a0a659ebcd56ca422"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ffbf935bd0de512fbc9e83b187bf924f"
+ ]
+ }
+ ],
+ "timestamp": "2024-01-17T17:57:23.920058"
+ },
+ "test_fq_subsample_single": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastq.gz:md5,19326ff922a16c0cb81191f2a0a5c5fc"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,ffbf935bd0de512fbc9e83b187bf924f"
+ ],
+ "fastq": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastq.gz:md5,19326ff922a16c0cb81191f2a0a5c5fc"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ffbf935bd0de512fbc9e83b187bf924f"
+ ]
+ }
+ ],
+ "timestamp": "2024-01-17T17:57:31.908993"
+ },
+ "test_fq_subsample_no_args": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+
+ ],
+ "fastq": [
+
+ ],
+ "versions": [
+
+ ]
+ }
+ ],
+ "timestamp": "2023-10-17T11:17:09.761156"
+ },
+ "test_fq_subsample_no_input": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+
+ ],
+ "fastq": [
+
+ ],
+ "versions": [
+
+ ]
+ }
+ ],
+ "timestamp": "2023-10-17T11:17:37.555824"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fq/subsample/tests/nextflow.config b/modules/nf-core/fq/subsample/tests/nextflow.config
new file mode 100644
index 0000000..41edd9f
--- /dev/null
+++ b/modules/nf-core/fq/subsample/tests/nextflow.config
@@ -0,0 +1,7 @@
+process {
+
+ withName: FQ_SUBSAMPLE {
+ ext.args = '--probability 0.1 -s 123'
+ }
+
+}
diff --git a/modules/nf-core/fq/subsample/tests/nextflow_no_args.config b/modules/nf-core/fq/subsample/tests/nextflow_no_args.config
new file mode 100644
index 0000000..88990d2
--- /dev/null
+++ b/modules/nf-core/fq/subsample/tests/nextflow_no_args.config
@@ -0,0 +1,7 @@
+process {
+
+ withName: FQ_SUBSAMPLE {
+ ext.args = ''
+ }
+
+}
diff --git a/modules/nf-core/fq/subsample/tests/nextflow_probability.config b/modules/nf-core/fq/subsample/tests/nextflow_probability.config
new file mode 100644
index 0000000..8cde0c7
--- /dev/null
+++ b/modules/nf-core/fq/subsample/tests/nextflow_probability.config
@@ -0,0 +1,7 @@
+process {
+
+ withName: FQ_SUBSAMPLE {
+ ext.args = '-p 0.1 -s 123'
+ }
+
+}
diff --git a/modules/nf-core/fq/subsample/tests/nextflow_record_count.config b/modules/nf-core/fq/subsample/tests/nextflow_record_count.config
new file mode 100644
index 0000000..1ea624e
--- /dev/null
+++ b/modules/nf-core/fq/subsample/tests/nextflow_record_count.config
@@ -0,0 +1,7 @@
+process {
+
+ withName: FQ_SUBSAMPLE {
+ ext.args = '-n 10 -s 123'
+ }
+
+}
diff --git a/modules/nf-core/fq/subsample/tests/tags.yml b/modules/nf-core/fq/subsample/tests/tags.yml
new file mode 100644
index 0000000..5156431
--- /dev/null
+++ b/modules/nf-core/fq/subsample/tests/tags.yml
@@ -0,0 +1,2 @@
+fq/subsample:
+ - modules/nf-core/fq/subsample/**
diff --git a/modules/nf-core/gffread/environment.yml b/modules/nf-core/gffread/environment.yml
new file mode 100644
index 0000000..5398f71
--- /dev/null
+++ b/modules/nf-core/gffread/environment.yml
@@ -0,0 +1,7 @@
+name: gffread
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::gffread=0.12.1
diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf
new file mode 100644
index 0000000..d8a473e
--- /dev/null
+++ b/modules/nf-core/gffread/main.nf
@@ -0,0 +1,35 @@
+process GFFREAD {
+ tag "$gff"
+ label 'process_low'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' :
+ 'biocontainers/gffread:0.12.1--h8b12597_0' }"
+
+ input:
+ path gff
+
+ output:
+ path "*.gtf" , emit: gtf , optional: true
+ path "*.gff3" , emit: gffread_gff , optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${gff.baseName}"
+ def extension = args.contains("-T") ? 'gtf' : 'gffread.gff3'
+ """
+ gffread \\
+ $gff \\
+ $args \\
+ -o ${prefix}.${extension}
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ gffread: \$(gffread --version 2>&1)
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/gffread/meta.yml b/modules/nf-core/gffread/meta.yml
new file mode 100644
index 0000000..27ac310
--- /dev/null
+++ b/modules/nf-core/gffread/meta.yml
@@ -0,0 +1,36 @@
+name: gffread
+description: Validate, filter, convert and perform various other operations on GFF files
+keywords:
+ - gff
+ - conversion
+ - validation
+tools:
+ - gffread:
+ description: GFF/GTF utility providing format conversions, region filtering, FASTA sequence extraction and more.
+ homepage: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread
+ documentation: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread
+ tool_dev_url: https://github.com/gpertea/gffread
+ doi: 10.12688/f1000research.23297.1
+ licence: ["MIT"]
+input:
+ - gff:
+ type: file
+ description: A reference file in either the GFF3, GFF2 or GTF format.
+ pattern: "*.{gff, gtf}"
+output:
+ - gtf:
+ type: file
+ description: GTF file resulting from the conversion of the GFF input file if '-T' argument is present
+ pattern: "*.{gtf}"
+ - gffread_gff:
+ type: file
+ description: GFF3 file resulting from the conversion of the GFF input file if '-T' argument is absent
+ pattern: "*.{gff3}"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@emiller88"
+maintainers:
+ - "@emiller88"
diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test
new file mode 100644
index 0000000..bdbc96a
--- /dev/null
+++ b/modules/nf-core/gffread/tests/main.nf.test
@@ -0,0 +1,65 @@
+nextflow_process {
+
+ name "Test Process GFFREAD"
+ script "../main.nf"
+ process "GFFREAD"
+
+ tag "gffread"
+ tag "modules_nfcore"
+ tag "modules"
+
+ test("sarscov2-gff3-gtf") {
+
+ config "./nextflow.config"
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ process.out.gtf,
+ process.out.versions
+ ).match() },
+ { assert process.out.gffread_gff == [] }
+ )
+ }
+
+ }
+
+ test("sarscov2-gff3-gff3") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ process.out.gffread_gff,
+ process.out.versions
+ ).match() },
+ { assert process.out.gtf == [] },
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/gffread/tests/main.nf.test.snap b/modules/nf-core/gffread/tests/main.nf.test.snap
new file mode 100644
index 0000000..00a11a4
--- /dev/null
+++ b/modules/nf-core/gffread/tests/main.nf.test.snap
@@ -0,0 +1,24 @@
+{
+ "sarscov2-gff3-gtf": {
+ "content": [
+ [
+ "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3"
+ ],
+ [
+ "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6"
+ ]
+ ],
+ "timestamp": "2024-01-23T20:00:32.688779117"
+ },
+ "sarscov2-gff3-gff3": {
+ "content": [
+ [
+ "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d"
+ ],
+ [
+ "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6"
+ ]
+ ],
+ "timestamp": "2024-01-23T20:07:11.457356625"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/gffread/tests/nextflow.config b/modules/nf-core/gffread/tests/nextflow.config
new file mode 100644
index 0000000..74b2509
--- /dev/null
+++ b/modules/nf-core/gffread/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: GFFREAD {
+ ext.args = '-T'
+ }
+}
diff --git a/modules/nf-core/gffread/tests/tags.yml b/modules/nf-core/gffread/tests/tags.yml
new file mode 100644
index 0000000..0557606
--- /dev/null
+++ b/modules/nf-core/gffread/tests/tags.yml
@@ -0,0 +1,2 @@
+gffread:
+ - modules/nf-core/gffread/**
diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml
new file mode 100644
index 0000000..25910b3
--- /dev/null
+++ b/modules/nf-core/gunzip/environment.yml
@@ -0,0 +1,7 @@
+name: gunzip
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - conda-forge::sed=4.7
diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf
new file mode 100644
index 0000000..468a6f2
--- /dev/null
+++ b/modules/nf-core/gunzip/main.nf
@@ -0,0 +1,48 @@
+process GUNZIP {
+ tag "$archive"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+ 'nf-core/ubuntu:20.04' }"
+
+ input:
+ tuple val(meta), path(archive)
+
+ output:
+ tuple val(meta), path("$gunzip"), emit: gunzip
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ gunzip = archive.toString() - '.gz'
+ """
+ # Not calling gunzip itself because it creates files
+ # with the original group ownership rather than the
+ # default one for that user / the work directory
+ gzip \\
+ -cd \\
+ $args \\
+ $archive \\
+ > $gunzip
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ gunzip = archive.toString() - '.gz'
+ """
+ touch $gunzip
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml
new file mode 100644
index 0000000..231034f
--- /dev/null
+++ b/modules/nf-core/gunzip/meta.yml
@@ -0,0 +1,39 @@
+name: gunzip
+description: Compresses and decompresses files.
+keywords:
+ - gunzip
+ - compression
+ - decompression
+tools:
+ - gunzip:
+ description: |
+ gzip is a file format and a software application used for file compression and decompression.
+ documentation: https://www.gnu.org/software/gzip/manual/gzip.html
+ licence: ["GPL-3.0-or-later"]
+input:
+ - meta:
+ type: map
+ description: |
+ Optional groovy Map containing meta information
+ e.g. [ id:'test', single_end:false ]
+ - archive:
+ type: file
+ description: File to be compressed/uncompressed
+ pattern: "*.*"
+output:
+ - gunzip:
+ type: file
+ description: Compressed/uncompressed file
+ pattern: "*.*"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@joseespinosa"
+ - "@drpatelh"
+ - "@jfy133"
+maintainers:
+ - "@joseespinosa"
+ - "@drpatelh"
+ - "@jfy133"
diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test
new file mode 100644
index 0000000..6406008
--- /dev/null
+++ b/modules/nf-core/gunzip/tests/main.nf.test
@@ -0,0 +1,36 @@
+nextflow_process {
+
+ name "Test Process GUNZIP"
+ script "../main.nf"
+ process "GUNZIP"
+ tag "gunzip"
+ tag "modules_nfcore"
+ tag "modules"
+
+ test("Should run without failures") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ )
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap
new file mode 100644
index 0000000..720fd9f
--- /dev/null
+++ b/modules/nf-core/gunzip/tests/main.nf.test.snap
@@ -0,0 +1,31 @@
+{
+ "Should run without failures": {
+ "content": [
+ {
+ "0": [
+ [
+ [
+
+ ],
+ "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,54376d32aca20e937a4ec26dac228e84"
+ ],
+ "gunzip": [
+ [
+ [
+
+ ],
+ "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,54376d32aca20e937a4ec26dac228e84"
+ ]
+ }
+ ],
+ "timestamp": "2023-10-17T15:35:37.690477896"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml
new file mode 100644
index 0000000..fd3f691
--- /dev/null
+++ b/modules/nf-core/gunzip/tests/tags.yml
@@ -0,0 +1,2 @@
+gunzip:
+ - modules/nf-core/gunzip/**
diff --git a/modules/nf-core/kallisto/index/environment.yml b/modules/nf-core/kallisto/index/environment.yml
new file mode 100644
index 0000000..471b006
--- /dev/null
+++ b/modules/nf-core/kallisto/index/environment.yml
@@ -0,0 +1,7 @@
+name: kallisto_index
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::kallisto=0.48.0
diff --git a/modules/nf-core/kallisto/index/main.nf b/modules/nf-core/kallisto/index/main.nf
new file mode 100644
index 0000000..28a47db
--- /dev/null
+++ b/modules/nf-core/kallisto/index/main.nf
@@ -0,0 +1,44 @@
+process KALLISTO_INDEX {
+ tag "$fasta"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/kallisto:0.48.0--h15996b6_2':
+ 'biocontainers/kallisto:0.48.0--h15996b6_2' }"
+
+ input:
+ tuple val(meta), path(fasta)
+
+ output:
+ tuple val(meta), path("kallisto") , emit: index
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ """
+ kallisto \\
+ index \\
+ $args \\
+ -i kallisto \\
+ $fasta
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ kallisto: \$(echo \$(kallisto 2>&1) | sed 's/^kallisto //; s/Usage.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ """
+ touch kallisto
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ kallisto: \$(echo \$(kallisto 2>&1) | sed 's/^kallisto //; s/Usage.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/kallisto/index/meta.yml b/modules/nf-core/kallisto/index/meta.yml
new file mode 100644
index 0000000..d366aeb
--- /dev/null
+++ b/modules/nf-core/kallisto/index/meta.yml
@@ -0,0 +1,41 @@
+name: kallisto_index
+description: Create kallisto index
+keywords:
+ - kallisto
+ - kallisto/index
+ - index
+tools:
+ - kallisto:
+ description: Quantifying abundances of transcripts from bulk and single-cell RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads.
+ homepage: https://pachterlab.github.io/kallisto/
+ documentation: https://pachterlab.github.io/kallisto/manual
+ tool_dev_url: https://github.com/pachterlab/kallisto
+ licence: ["BSD-2-Clause"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'test' ]
+ - fasta:
+ type: file
+ description: genome fasta file
+ pattern: "*.{fasta}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'test' ]
+ - index:
+ type: directory
+ description: Kallisto genome index
+ pattern: "*.idx"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@ggabernet"
+maintainers:
+ - "@ggabernet"
diff --git a/modules/nf-core/kallisto/index/tests/main.nf.test b/modules/nf-core/kallisto/index/tests/main.nf.test
new file mode 100644
index 0000000..97933d6
--- /dev/null
+++ b/modules/nf-core/kallisto/index/tests/main.nf.test
@@ -0,0 +1,33 @@
+nextflow_process {
+
+ name "Test Process KALLISTO_INDEX"
+ script "../main.nf"
+ process "KALLISTO_INDEX"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "kallisto"
+ tag "kallisto/index"
+
+ test("homo_sapiens genome_fasta") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test_fasta' ], // meta map
+ [ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/kallisto/index/tests/main.nf.test.snap b/modules/nf-core/kallisto/index/tests/main.nf.test.snap
new file mode 100644
index 0000000..c0f45ac
--- /dev/null
+++ b/modules/nf-core/kallisto/index/tests/main.nf.test.snap
@@ -0,0 +1,31 @@
+{
+ "homo_sapiens genome_fasta": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test_fasta"
+ },
+ "kallisto:md5,2dab84e1456201beca5a43f4c514d67c"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,178f9b57d4228edc356911d571b958a4"
+ ],
+ "index": [
+ [
+ {
+ "id": "test_fasta"
+ },
+ "kallisto:md5,2dab84e1456201beca5a43f4c514d67c"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,178f9b57d4228edc356911d571b958a4"
+ ]
+ }
+ ],
+ "timestamp": "2023-11-02T09:58:48.83625986"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/kallisto/index/tests/tags.yml b/modules/nf-core/kallisto/index/tests/tags.yml
new file mode 100644
index 0000000..9f47b88
--- /dev/null
+++ b/modules/nf-core/kallisto/index/tests/tags.yml
@@ -0,0 +1,2 @@
+kallisto/index:
+ - modules/nf-core/kallisto/index/**
diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml
index bc0bdb5..7625b75 100644
--- a/modules/nf-core/multiqc/environment.yml
+++ b/modules/nf-core/multiqc/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::multiqc=1.18
+ - bioconda::multiqc=1.19
diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf
index 00cc48d..1b9f7c4 100644
--- a/modules/nf-core/multiqc/main.nf
+++ b/modules/nf-core/multiqc/main.nf
@@ -3,8 +3,8 @@ process MULTIQC {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/multiqc:1.18--pyhdfd78af_0' :
- 'biocontainers/multiqc:1.18--pyhdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' :
+ 'biocontainers/multiqc:1.19--pyhdfd78af_0' }"
input:
path multiqc_files, stageAs: "?/*"
@@ -43,7 +43,7 @@ process MULTIQC {
stub:
"""
- touch multiqc_data
+ mkdir multiqc_data
touch multiqc_plots
touch multiqc_report.html
diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml
index f1aa660..45a9bc3 100644
--- a/modules/nf-core/multiqc/meta.yml
+++ b/modules/nf-core/multiqc/meta.yml
@@ -1,4 +1,3 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: multiqc
description: Aggregate results from bioinformatics analyses across many samples into a single report
keywords:
diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test
index c2dad21..d0438ed 100644
--- a/modules/nf-core/multiqc/tests/main.nf.test
+++ b/modules/nf-core/multiqc/tests/main.nf.test
@@ -7,12 +7,9 @@ nextflow_process {
tag "modules_nfcore"
tag "multiqc"
- test("MULTIQC: FASTQC") {
+ test("sarscov2 single-end [fastqc]") {
when {
- params {
- outdir = "$outputDir"
- }
process {
"""
input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)])
@@ -26,20 +23,17 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert path(process.out.report.get(0)).exists() },
- { assert path(process.out.data.get(0)).exists() },
- { assert path(process.out.versions.get(0)).getText().contains("multiqc") }
+ { assert process.out.report[0] ==~ ".*/multiqc_report.html" },
+ { assert process.out.data[0] ==~ ".*/multiqc_data" },
+ { assert snapshot(process.out.versions).match("versions") }
)
}
}
- test("MULTIQC: FASTQC and a config file") {
+ test("sarscov2 single-end [fastqc] [config]") {
when {
- params {
- outdir = "$outputDir"
- }
process {
"""
input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)])
@@ -53,9 +47,35 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert path(process.out.report.get(0)).exists() },
- { assert path(process.out.data.get(0)).exists() },
- { assert path(process.out.versions.get(0)).getText().contains("multiqc") }
+ { assert process.out.report[0] ==~ ".*/multiqc_report.html" },
+ { assert process.out.data[0] ==~ ".*/multiqc_data" },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("sarscov2 single-end [fastqc] - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)])
+ input[1] = []
+ input[2] = []
+ input[3] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.report.collect { file(it).getName() } +
+ process.out.data.collect { file(it).getName() } +
+ process.out.plots.collect { file(it).getName() } +
+ process.out.versions ).match() }
)
}
diff --git a/modules/nf-core/rsem/preparereference/environment.yml b/modules/nf-core/rsem/preparereference/environment.yml
new file mode 100644
index 0000000..c80e691
--- /dev/null
+++ b/modules/nf-core/rsem/preparereference/environment.yml
@@ -0,0 +1,8 @@
+name: rsem_preparereference
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::rsem=1.3.3
+ - bioconda::star=2.7.10a
diff --git a/modules/nf-core/rsem/preparereference/main.nf b/modules/nf-core/rsem/preparereference/main.nf
new file mode 100644
index 0000000..44f76eb
--- /dev/null
+++ b/modules/nf-core/rsem/preparereference/main.nf
@@ -0,0 +1,72 @@
+process RSEM_PREPAREREFERENCE {
+ tag "$fasta"
+ label 'process_high'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/mulled-v2-cf0123ef83b3c38c13e3b0696a3f285d3f20f15b:64aad4a4e144878400649e71f42105311be7ed87-0' :
+ 'biocontainers/mulled-v2-cf0123ef83b3c38c13e3b0696a3f285d3f20f15b:64aad4a4e144878400649e71f42105311be7ed87-0' }"
+
+ input:
+ path fasta, stageAs: "rsem/*"
+ path gtf
+
+ output:
+ path "rsem" , emit: index
+ path "*transcripts.fa", emit: transcript_fasta
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def args2 = task.ext.args2 ?: ''
+ def args_list = args.tokenize()
+ if (args_list.contains('--star')) {
+ args_list.removeIf { it.contains('--star') }
+ def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
+ """
+ STAR \\
+ --runMode genomeGenerate \\
+ --genomeDir rsem/ \\
+ --genomeFastaFiles $fasta \\
+ --sjdbGTFfile $gtf \\
+ --runThreadN $task.cpus \\
+ $memory \\
+ $args2
+
+ rsem-prepare-reference \\
+ --gtf $gtf \\
+ --num-threads $task.cpus \\
+ ${args_list.join(' ')} \\
+ $fasta \\
+ rsem/genome
+
+ cp rsem/genome.transcripts.fa .
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ rsem: \$(rsem-calculate-expression --version | sed -e "s/Current version: RSEM v//g")
+ star: \$(STAR --version | sed -e "s/STAR_//g")
+ END_VERSIONS
+ """
+ } else {
+ """
+ rsem-prepare-reference \\
+ --gtf $gtf \\
+ --num-threads $task.cpus \\
+ $args \\
+ $fasta \\
+ rsem/genome
+
+ cp rsem/genome.transcripts.fa .
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ rsem: \$(rsem-calculate-expression --version | sed -e "s/Current version: RSEM v//g")
+ star: \$(STAR --version | sed -e "s/STAR_//g")
+ END_VERSIONS
+ """
+ }
+}
diff --git a/modules/nf-core/rsem/preparereference/meta.yml b/modules/nf-core/rsem/preparereference/meta.yml
new file mode 100644
index 0000000..05aa44f
--- /dev/null
+++ b/modules/nf-core/rsem/preparereference/meta.yml
@@ -0,0 +1,42 @@
+name: rsem_preparereference
+description: Prepare a reference genome for RSEM
+keywords:
+ - rsem
+ - genome
+ - index
+tools:
+ - rseqc:
+ description: |
+ RSEM: accurate transcript quantification from RNA-Seq data with or without a reference genome
+ homepage: https://github.com/deweylab/RSEM
+ documentation: https://github.com/deweylab/RSEM
+ doi: 10.1186/1471-2105-12-323
+ licence: ["GPL-3.0-or-later"]
+input:
+ - fasta:
+ type: file
+ description: The Fasta file of the reference genome
+ pattern: "*.{fasta,fa}"
+ - gtf:
+ type: file
+ description: The GTF file of the reference genome
+ pattern: "*.gtf"
+output:
+ - rsem:
+ type: directory
+ description: RSEM index directory
+ pattern: "rsem"
+ - transcript_fasta:
+ type: file
+ description: Fasta file of transcripts
+ pattern: "rsem/*transcripts.fa"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@drpatelh"
+ - "@kevinmenden"
+maintainers:
+ - "@drpatelh"
+ - "@kevinmenden"
diff --git a/modules/nf-core/rsem/preparereference/tests/main.nf.test b/modules/nf-core/rsem/preparereference/tests/main.nf.test
new file mode 100644
index 0000000..a1d948d
--- /dev/null
+++ b/modules/nf-core/rsem/preparereference/tests/main.nf.test
@@ -0,0 +1,36 @@
+nextflow_process {
+
+ name "Test Process RSEM_PREPAREREFERENCE"
+ script "../main.nf"
+ process "RSEM_PREPAREREFERENCE"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "rsem"
+ tag "rsem/preparereference"
+
+ test("homo_sapiens") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)])
+ input[1] = Channel.of([file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.index).match("index")},
+ { assert snapshot(process.out.transcript_fasta).match("transcript_fasta")},
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/rsem/preparereference/tests/main.nf.test.snap b/modules/nf-core/rsem/preparereference/tests/main.nf.test.snap
new file mode 100644
index 0000000..0251065
--- /dev/null
+++ b/modules/nf-core/rsem/preparereference/tests/main.nf.test.snap
@@ -0,0 +1,35 @@
+{
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,517611c42f6354d3609db1b35fffa397"
+ ]
+ ],
+ "timestamp": "2023-11-22T13:16:49.170587"
+ },
+ "index": {
+ "content": [
+ [
+ [
+ "genome.chrlist:md5,b190587cae0531f3cf25552d8aa674db",
+ "genome.fasta:md5,f315020d899597c1b57e5fe9f60f4c3e",
+ "genome.grp:md5,c2848a8b6d495956c11ec53efc1de67e",
+ "genome.idx.fa:md5,050c521a2719c2ae48267c1e65218f29",
+ "genome.n2g.idx.fa:md5,050c521a2719c2ae48267c1e65218f29",
+ "genome.seq:md5,94da0c6b88c33e63c9a052a11f4f57c1",
+ "genome.ti:md5,c9e4ae8d4d13a504eec2acf1b8589a66",
+ "genome.transcripts.fa:md5,050c521a2719c2ae48267c1e65218f29"
+ ]
+ ]
+ ],
+ "timestamp": "2023-11-22T13:16:49.140398"
+ },
+ "transcript_fasta": {
+ "content": [
+ [
+ "genome.transcripts.fa:md5,050c521a2719c2ae48267c1e65218f29"
+ ]
+ ],
+ "timestamp": "2023-11-22T13:16:49.159946"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/rsem/preparereference/tests/tags.yml b/modules/nf-core/rsem/preparereference/tests/tags.yml
new file mode 100644
index 0000000..1129045
--- /dev/null
+++ b/modules/nf-core/rsem/preparereference/tests/tags.yml
@@ -0,0 +1,2 @@
+rsem/preparereference:
+ - modules/nf-core/rsem/preparereference/**
diff --git a/modules/nf-core/salmon/index/environment.yml b/modules/nf-core/salmon/index/environment.yml
new file mode 100644
index 0000000..a6607d9
--- /dev/null
+++ b/modules/nf-core/salmon/index/environment.yml
@@ -0,0 +1,7 @@
+name: salmon_index
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::salmon=1.10.1
diff --git a/modules/nf-core/salmon/index/main.nf b/modules/nf-core/salmon/index/main.nf
new file mode 100644
index 0000000..88d9cf1
--- /dev/null
+++ b/modules/nf-core/salmon/index/main.nf
@@ -0,0 +1,47 @@
+process SALMON_INDEX {
+ tag "$transcript_fasta"
+ label "process_medium"
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/salmon:1.10.1--h7e5ed60_0' :
+ 'biocontainers/salmon:1.10.1--h7e5ed60_0' }"
+
+ input:
+ path genome_fasta
+ path transcript_fasta
+
+ output:
+ path "salmon" , emit: index
+ path "versions.yml", emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def get_decoy_ids = "grep '^>' $genome_fasta | cut -d ' ' -f 1 | cut -d \$'\\t' -f 1 > decoys.txt"
+ def gentrome = "gentrome.fa"
+ if (genome_fasta.endsWith('.gz')) {
+ get_decoy_ids = "grep '^>' <(gunzip -c $genome_fasta) | cut -d ' ' -f 1 | cut -d \$'\\t' -f 1 > decoys.txt"
+ gentrome = "gentrome.fa.gz"
+ }
+ """
+ $get_decoy_ids
+ sed -i.bak -e 's/>//g' decoys.txt
+ cat $transcript_fasta $genome_fasta > $gentrome
+
+ salmon \\
+ index \\
+ --threads $task.cpus \\
+ -t $gentrome \\
+ -d decoys.txt \\
+ $args \\
+ -i salmon
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g")
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/salmon/index/meta.yml b/modules/nf-core/salmon/index/meta.yml
new file mode 100644
index 0000000..fd94dd2
--- /dev/null
+++ b/modules/nf-core/salmon/index/meta.yml
@@ -0,0 +1,37 @@
+name: salmon_index
+description: Create index for salmon
+keywords:
+ - index
+ - fasta
+ - genome
+ - reference
+tools:
+ - salmon:
+ description: |
+ Salmon is a tool for wicked-fast transcript quantification from RNA-seq data
+ homepage: https://salmon.readthedocs.io/en/latest/salmon.html
+ manual: https://salmon.readthedocs.io/en/latest/salmon.html
+ doi: 10.1038/nmeth.4197
+ licence: ["GPL-3.0-or-later"]
+input:
+ - genome_fasta:
+ type: file
+ description: Fasta file of the reference genome
+ - transcriptome_fasta:
+ type: file
+ description: Fasta file of the reference transcriptome
+output:
+ - index:
+ type: directory
+ description: Folder containing the star index files
+ pattern: "salmon"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@kevinmenden"
+ - "@drpatelh"
+maintainers:
+ - "@kevinmenden"
+ - "@drpatelh"
diff --git a/modules/nf-core/salmon/index/tests/main.nf.test b/modules/nf-core/salmon/index/tests/main.nf.test
new file mode 100644
index 0000000..24f95c0
--- /dev/null
+++ b/modules/nf-core/salmon/index/tests/main.nf.test
@@ -0,0 +1,35 @@
+nextflow_process {
+
+ name "Test Process SALMON_INDEX"
+ script "../main.nf"
+ process "SALMON_INDEX"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "salmon"
+ tag "salmon/index"
+
+ test("sarscov2") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)])
+ input[1] = Channel.of([file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.index.get(0)).exists() },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/salmon/index/tests/main.nf.test.snap b/modules/nf-core/salmon/index/tests/main.nf.test.snap
new file mode 100644
index 0000000..1e3e6b6
--- /dev/null
+++ b/modules/nf-core/salmon/index/tests/main.nf.test.snap
@@ -0,0 +1,10 @@
+{
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,563eeafb4577be0b13801d7021c0bf42"
+ ]
+ ],
+ "timestamp": "2023-11-22T14:26:33.32036"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/salmon/index/tests/tags.yml b/modules/nf-core/salmon/index/tests/tags.yml
new file mode 100644
index 0000000..0299789
--- /dev/null
+++ b/modules/nf-core/salmon/index/tests/tags.yml
@@ -0,0 +1,2 @@
+salmon/index:
+ - modules/nf-core/salmon/index/**
diff --git a/modules/nf-core/salmon/quant/environment.yml b/modules/nf-core/salmon/quant/environment.yml
new file mode 100644
index 0000000..8626672
--- /dev/null
+++ b/modules/nf-core/salmon/quant/environment.yml
@@ -0,0 +1,7 @@
+name: salmon_quant
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::salmon=1.10.1
diff --git a/modules/nf-core/salmon/quant/main.nf b/modules/nf-core/salmon/quant/main.nf
new file mode 100644
index 0000000..e115d39
--- /dev/null
+++ b/modules/nf-core/salmon/quant/main.nf
@@ -0,0 +1,79 @@
+process SALMON_QUANT {
+ tag "$meta.id"
+ label "process_medium"
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/salmon:1.10.1--h7e5ed60_0' :
+ 'biocontainers/salmon:1.10.1--h7e5ed60_0' }"
+
+ input:
+ tuple val(meta), path(reads)
+ path index
+ path gtf
+ path transcript_fasta
+ val alignment_mode
+ val lib_type
+
+ output:
+ tuple val(meta), path("${prefix}") , emit: results
+ tuple val(meta), path("*info.json"), emit: json_info, optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
+
+ def reference = "--index $index"
+ def reads1 = [], reads2 = []
+ meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v }
+ def input_reads = meta.single_end ? "-r ${reads1.join(" ")}" : "-1 ${reads1.join(" ")} -2 ${reads2.join(" ")}"
+ if (alignment_mode) {
+ reference = "-t $transcript_fasta"
+ input_reads = "-a $reads"
+ }
+
+ def strandedness_opts = [
+ 'A', 'U', 'SF', 'SR',
+ 'IS', 'IU' , 'ISF', 'ISR',
+ 'OS', 'OU' , 'OSF', 'OSR',
+ 'MS', 'MU' , 'MSF', 'MSR'
+ ]
+ def strandedness = 'A'
+ if (lib_type) {
+ if (strandedness_opts.contains(lib_type)) {
+ strandedness = lib_type
+ } else {
+ log.info "[Salmon Quant] Invalid library type specified '--libType=${lib_type}', defaulting to auto-detection with '--libType=A'."
+ }
+ } else {
+ strandedness = meta.single_end ? 'U' : 'IU'
+ if (meta.strandedness == 'forward') {
+ strandedness = meta.single_end ? 'SF' : 'ISF'
+ } else if (meta.strandedness == 'reverse') {
+ strandedness = meta.single_end ? 'SR' : 'ISR'
+ }
+ }
+ """
+ salmon quant \\
+ --geneMap $gtf \\
+ --threads $task.cpus \\
+ --libType=$strandedness \\
+ $reference \\
+ $input_reads \\
+ $args \\
+ -o $prefix
+
+ if [ -f $prefix/aux_info/meta_info.json ]; then
+ cp $prefix/aux_info/meta_info.json "${prefix}_meta_info.json"
+ fi
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g")
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/salmon/quant/meta.yml b/modules/nf-core/salmon/quant/meta.yml
new file mode 100644
index 0000000..49d7137
--- /dev/null
+++ b/modules/nf-core/salmon/quant/meta.yml
@@ -0,0 +1,62 @@
+name: salmon_quant
+description: gene/transcript quantification with Salmon
+keywords:
+ - index
+ - fasta
+ - genome
+ - reference
+tools:
+ - salmon:
+ description: |
+ Salmon is a tool for wicked-fast transcript quantification from RNA-seq data
+ homepage: https://salmon.readthedocs.io/en/latest/salmon.html
+ manual: https://salmon.readthedocs.io/en/latest/salmon.html
+ doi: 10.1038/nmeth.4197
+ licence: ["GPL-3.0-or-later"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files for single-end or paired-end data.
+ Multiple single-end fastqs or pairs of paired-end fastqs are
+ handled.
+ - index:
+ type: directory
+ description: Folder containing the star index files
+ - gtf:
+ type: file
+ description: GTF of the reference transcriptome
+ - transcriptome_fasta:
+ type: file
+ description: Fasta file of the reference transcriptome
+ - alignment_mode:
+ type: boolean
+ description: whether to run salmon in alignment mode
+ - lib_type:
+ type: string
+ description: |
+ Override library type inferred based on strandedness defined in meta object
+output:
+ - results:
+ type: directory
+ description: Folder containing the quantification results for a specific sample
+ pattern: "${prefix}"
+ - json_info:
+ type: file
+ description: File containing meta information from Salmon quant
+ pattern: "*info.json"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@kevinmenden"
+ - "@drpatelh"
+maintainers:
+ - "@kevinmenden"
+ - "@drpatelh"
diff --git a/modules/nf-core/salmon/quant/tests/main.nf.test b/modules/nf-core/salmon/quant/tests/main.nf.test
new file mode 100644
index 0000000..7b28db3
--- /dev/null
+++ b/modules/nf-core/salmon/quant/tests/main.nf.test
@@ -0,0 +1,192 @@
+nextflow_process {
+
+ name "Test Process SALMON_QUANT"
+ script "../main.nf"
+ process "SALMON_QUANT"
+ config "./nextflow.config"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "salmon"
+ tag "salmon/quant"
+
+ test("sarscov2 - single_end") {
+
+ setup {
+ run("SALMON_INDEX") {
+ script "../../../salmon/index/main.nf"
+ process {
+ """
+ input[0] = Channel.of([file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)])
+ input[1] = Channel.of([file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)])
+ """
+ }
+ }
+ }
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+ ])
+ input[1] = SALMON_INDEX.out.index
+ input[2] = Channel.of([file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true)])
+ input[3] = Channel.of([file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)])
+ input[4] = false
+ input[5] = ''
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.json_info.get(0).get(1)).exists() },
+ { assert path(process.out.results.get(0).get(1)).exists() },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("sarscov2 - single_end lib type A") {
+
+ setup {
+ run("SALMON_INDEX") {
+ script "../../../salmon/index/main.nf"
+ process {
+ """
+ input[0] = Channel.of([file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)])
+ input[1] = Channel.of([file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)])
+ """
+ }
+ }
+ }
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+ ])
+ input[1] = SALMON_INDEX.out.index
+ input[2] = Channel.of([file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true)])
+ input[3] = Channel.of([file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)])
+ input[4] = false
+ input[5] = 'A'
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.json_info.get(0).get(1)).exists() },
+ { assert path(process.out.results.get(0).get(1)).exists() },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("sarscov2 - pair_end") {
+
+ setup {
+ run("SALMON_INDEX") {
+ script "../../../salmon/index/main.nf"
+ process {
+ """
+ input[0] = Channel.of([file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)])
+ input[1] = Channel.of([file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)])
+ """
+ }
+ }
+ }
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+ ])
+ input[1] = SALMON_INDEX.out.index
+ input[2] = Channel.of([file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true)])
+ input[3] = Channel.of([file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)])
+ input[4] = false
+ input[5] = ''
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.json_info.get(0).get(1)).exists() },
+ { assert path(process.out.results.get(0).get(1)).exists() },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+ test("sarscov2 - pair_end multiple") {
+
+ setup {
+ run("SALMON_INDEX") {
+ script "../../../salmon/index/main.nf"
+ process {
+ """
+ input[0] = Channel.of([file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)])
+ input[1] = Channel.of([file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)])
+ """
+ }
+ }
+ }
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true)
+ ]
+ ])
+ input[1] = SALMON_INDEX.out.index
+ input[2] = Channel.of([file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true)])
+ input[3] = Channel.of([file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)])
+ input[4] = false
+ input[5] = ''
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.json_info.get(0).get(1)).exists() },
+ { assert path(process.out.results.get(0).get(1)).exists() },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/salmon/quant/tests/main.nf.test.snap b/modules/nf-core/salmon/quant/tests/main.nf.test.snap
new file mode 100644
index 0000000..386a7a3
--- /dev/null
+++ b/modules/nf-core/salmon/quant/tests/main.nf.test.snap
@@ -0,0 +1,10 @@
+{
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,80eb3d2ad36960c7e9263f81ede9d263"
+ ]
+ ],
+ "timestamp": "2023-11-22T14:47:10.132112"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/salmon/quant/tests/nextflow.config b/modules/nf-core/salmon/quant/tests/nextflow.config
new file mode 100644
index 0000000..37c0821
--- /dev/null
+++ b/modules/nf-core/salmon/quant/tests/nextflow.config
@@ -0,0 +1,7 @@
+process {
+
+ withName: SALMON_QUANT {
+ ext.args = '--minAssignedFrags 1'
+ }
+
+}
diff --git a/modules/nf-core/salmon/quant/tests/tags.yml b/modules/nf-core/salmon/quant/tests/tags.yml
new file mode 100644
index 0000000..048d816
--- /dev/null
+++ b/modules/nf-core/salmon/quant/tests/tags.yml
@@ -0,0 +1,2 @@
+salmon/quant:
+ - modules/nf-core/salmon/quant/**
diff --git a/modules/nf-core/sortmerna/environment.yml b/modules/nf-core/sortmerna/environment.yml
new file mode 100644
index 0000000..f40f995
--- /dev/null
+++ b/modules/nf-core/sortmerna/environment.yml
@@ -0,0 +1,7 @@
+name: sortmerna
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::sortmerna=4.3.6
diff --git a/modules/nf-core/sortmerna/main.nf b/modules/nf-core/sortmerna/main.nf
new file mode 100644
index 0000000..29c640c
--- /dev/null
+++ b/modules/nf-core/sortmerna/main.nf
@@ -0,0 +1,96 @@
+process SORTMERNA {
+ tag "$meta.id"
+ label 'process_high'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/sortmerna:4.3.6--h9ee0642_0' :
+ 'biocontainers/sortmerna:4.3.6--h9ee0642_0' }"
+
+ input:
+ tuple val(meta), path(reads)
+ path fastas
+
+ output:
+ tuple val(meta), path("*non_rRNA.fastq.gz"), emit: reads
+ tuple val(meta), path("*.log") , emit: log
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ if (meta.single_end) {
+ """
+ sortmerna \\
+ ${'--ref '+fastas.join(' --ref ')} \\
+ --reads $reads \\
+ --threads $task.cpus \\
+ --workdir . \\
+ --aligned rRNA_reads \\
+ --fastx \\
+ --other non_rRNA_reads \\
+ $args
+
+ mv non_rRNA_reads.f*q.gz ${prefix}.non_rRNA.fastq.gz
+ mv rRNA_reads.log ${prefix}.sortmerna.log
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//')
+ END_VERSIONS
+ """
+ } else {
+ """
+ sortmerna \\
+ ${'--ref '+fastas.join(' --ref ')} \\
+ --reads ${reads[0]} \\
+ --reads ${reads[1]} \\
+ --threads $task.cpus \\
+ --workdir . \\
+ --aligned rRNA_reads \\
+ --fastx \\
+ --other non_rRNA_reads \\
+ --paired_in \\
+ --out2 \\
+ $args
+
+ mv non_rRNA_reads_fwd.f*q.gz ${prefix}_1.non_rRNA.fastq.gz
+ mv non_rRNA_reads_rev.f*q.gz ${prefix}_2.non_rRNA.fastq.gz
+ mv rRNA_reads.log ${prefix}.sortmerna.log
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//')
+ END_VERSIONS
+ """
+ }
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ if (meta.single_end) {
+ """
+ touch ${prefix}.non_rRNA.fastq.gz
+ touch ${prefix}.sortmerna.log
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//')
+ END_VERSIONS
+ """
+ } else {
+ """
+ touch ${prefix}_1.non_rRNA.fastq.gz
+ touch ${prefix}_2.non_rRNA.fastq.gz
+ touch ${prefix}.sortmerna.log
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//')
+ END_VERSIONS
+ """
+ }
+}
diff --git a/modules/nf-core/sortmerna/meta.yml b/modules/nf-core/sortmerna/meta.yml
new file mode 100644
index 0000000..de0b18e
--- /dev/null
+++ b/modules/nf-core/sortmerna/meta.yml
@@ -0,0 +1,53 @@
+name: sortmerna
+description: Local sequence alignment tool for filtering, mapping and clustering.
+keywords:
+ - filtering
+ - mapping
+ - clustering
+ - rRNA
+ - ribosomal RNA
+tools:
+ - SortMeRNA:
+ description: The core algorithm is based on approximate seeds and allows for sensitive analysis of NGS reads. The main application of SortMeRNA is filtering rRNA from metatranscriptomic data. SortMeRNA takes as input files of reads (fasta, fastq, fasta.gz, fastq.gz) and one or multiple rRNA database file(s), and sorts apart aligned and rejected reads into two files. Additional applications include clustering and taxonomy assignation available through QIIME v1.9.1. SortMeRNA works with Illumina, Ion Torrent and PacBio data, and can produce SAM and BLAST-like alignments.
+ homepage: https://hpc.nih.gov/apps/sortmeRNA.html
+ documentation: https://github.com/biocore/sortmerna/wiki/
+ licence: ["GPL-3.0-or-later"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+ - fastas:
+ type: file
+ description: |
+ Path to reference file(s)
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: The filtered fastq reads
+ pattern: "*fastq.gz"
+ - log:
+ type: file
+ description: SortMeRNA log file
+ pattern: "*sortmerna.log"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@drpatelh"
+ - "@mashehu"
+maintainers:
+ - "@drpatelh"
+ - "@mashehu"
diff --git a/modules/nf-core/sortmerna/tests/main.nf.test b/modules/nf-core/sortmerna/tests/main.nf.test
new file mode 100644
index 0000000..8a01e2a
--- /dev/null
+++ b/modules/nf-core/sortmerna/tests/main.nf.test
@@ -0,0 +1,144 @@
+nextflow_process {
+
+ name "Test Process SORTMERNA"
+ script "../main.nf"
+ process "SORTMERNA"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "sortmerna"
+
+ test("sarscov2 single_end") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:true ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+ ]
+ input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert process.out.reads },
+ { assert file(process.out.log[0][1]).text.contains("Total reads passing E-value threshold = 100 (100.00)") },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("sarscov2 single_end-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("sarscov2 single_end stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:true ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+ ]
+ input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("sarscov2 single_end-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("sarscov2 paired_end") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+ ]
+ input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert process.out.reads },
+ { assert file(process.out.log[0][1]).text.contains("Total reads passing E-value threshold = 200 (100.00)") },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+ process.out.log.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("sarscov2 paired_end-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("sarscov2 paired_end stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+ ]
+ input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+ process.out.log.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("sarscov2 paired_end-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/sortmerna/tests/main.nf.test.snap b/modules/nf-core/sortmerna/tests/main.nf.test.snap
new file mode 100644
index 0000000..e502000
--- /dev/null
+++ b/modules/nf-core/sortmerna/tests/main.nf.test.snap
@@ -0,0 +1,33 @@
+{
+ "sarscov2 single_end-for_stub_match": {
+ "content": [
+ [
+ "test.non_rRNA.fastq.gz",
+ "test.sortmerna.log",
+ "{id=test, single_end=true}"
+ ]
+ ],
+ "timestamp": "2023-12-21T11:56:00.15356"
+ },
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b"
+ ]
+ ],
+ "timestamp": "2023-12-21T11:56:00.200244"
+ },
+ "sarscov2 paired_end-for_stub_match": {
+ "content": [
+ [
+ [
+ "test_1.non_rRNA.fastq.gz",
+ "test_2.non_rRNA.fastq.gz"
+ ],
+ "test.sortmerna.log",
+ "{id=test, single_end=false}"
+ ]
+ ],
+ "timestamp": "2023-12-21T12:00:47.879193"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/sortmerna/tests/tags.yml b/modules/nf-core/sortmerna/tests/tags.yml
new file mode 100644
index 0000000..e088480
--- /dev/null
+++ b/modules/nf-core/sortmerna/tests/tags.yml
@@ -0,0 +1,2 @@
+sortmerna:
+ - modules/nf-core/sortmerna/**
diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml
new file mode 100644
index 0000000..93e4476
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/environment.yml
@@ -0,0 +1,11 @@
+name: star_genomegenerate
+
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+
+dependencies:
+ - bioconda::samtools=1.18
+ - bioconda::star=2.7.10a
+ - conda-forge::gawk=5.1.0
diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf
new file mode 100644
index 0000000..b885571
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/main.nf
@@ -0,0 +1,119 @@
+process STAR_GENOMEGENERATE {
+ tag "$fasta"
+ label 'process_high'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' :
+ 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }"
+
+ input:
+ tuple val(meta), path(fasta)
+ tuple val(meta2), path(gtf)
+
+ output:
+ tuple val(meta), path("star") , emit: index
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def args_list = args.tokenize()
+ def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
+ def include_gtf = gtf ? "--sjdbGTFfile $gtf" : ''
+ if (args_list.contains('--genomeSAindexNbases')) {
+ """
+ mkdir star
+ STAR \\
+ --runMode genomeGenerate \\
+ --genomeDir star/ \\
+ --genomeFastaFiles $fasta \\
+ $include_gtf \\
+ --runThreadN $task.cpus \\
+ $memory \\
+ $args
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ star: \$(STAR --version | sed -e "s/STAR_//g")
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
+ END_VERSIONS
+ """
+ } else {
+ """
+ samtools faidx $fasta
+ NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fasta}.fai`
+
+ mkdir star
+ STAR \\
+ --runMode genomeGenerate \\
+ --genomeDir star/ \\
+ --genomeFastaFiles $fasta \\
+ $include_gtf \\
+ --runThreadN $task.cpus \\
+ --genomeSAindexNbases \$NUM_BASES \\
+ $memory \\
+ $args
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ star: \$(STAR --version | sed -e "s/STAR_//g")
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
+ END_VERSIONS
+ """
+ }
+
+ stub:
+ if (gtf) {
+ """
+ mkdir star
+ touch star/Genome
+ touch star/Log.out
+ touch star/SA
+ touch star/SAindex
+ touch star/chrLength.txt
+ touch star/chrName.txt
+ touch star/chrNameLength.txt
+ touch star/chrStart.txt
+ touch star/exonGeTrInfo.tab
+ touch star/exonInfo.tab
+ touch star/geneInfo.tab
+ touch star/genomeParameters.txt
+ touch star/sjdbInfo.txt
+ touch star/sjdbList.fromGTF.out.tab
+ touch star/sjdbList.out.tab
+ touch star/transcriptInfo.tab
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ star: \$(STAR --version | sed -e "s/STAR_//g")
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
+ END_VERSIONS
+ """
+ } else {
+ """
+ mkdir star
+ touch star/Genome
+ touch star/Log.out
+ touch star/SA
+ touch star/SAindex
+ touch star/chrLength.txt
+ touch star/chrName.txt
+ touch star/chrNameLength.txt
+ touch star/chrStart.txt
+ touch star/genomeParameters.txt
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ star: \$(STAR --version | sed -e "s/STAR_//g")
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
+ END_VERSIONS
+ """
+ }
+}
diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml
new file mode 100644
index 0000000..1061e1b
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/meta.yml
@@ -0,0 +1,53 @@
+name: star_genomegenerate
+description: Create index for STAR
+keywords:
+ - index
+ - fasta
+ - genome
+ - reference
+tools:
+ - star:
+ description: |
+ STAR is a software package for mapping DNA sequences against
+ a large reference genome, such as the human genome.
+ homepage: https://github.com/alexdobin/STAR
+ manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf
+ doi: 10.1093/bioinformatics/bts635
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - fasta:
+ type: file
+ description: Fasta file of the reference genome
+ - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'test' ]
+ - gtf:
+ type: file
+ description: GTF file of the reference genome
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - index:
+ type: directory
+ description: Folder containing the star index files
+ pattern: "star"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@kevinmenden"
+ - "@drpatelh"
+maintainers:
+ - "@kevinmenden"
+ - "@drpatelh"
diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test
new file mode 100644
index 0000000..af0c942
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test
@@ -0,0 +1,117 @@
+nextflow_process {
+
+ name "Test Process STAR_GENOMEGENERATE"
+ script "../main.nf"
+ process "STAR_GENOMEGENERATE"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "star"
+ tag "star/genomegenerate"
+
+ test("homo_sapiens") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test_fasta' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+ ])
+ input[1] = Channel.of([
+ [ id:'test_gtf' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("homo_sapiens-stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test_fasta' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+ ])
+ input[1] = Channel.of([
+ [ id:'test_gtf' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("homo_sapiens-without_gtf") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test_fasta' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+ ])
+ input[1] = Channel.of([ [], [] ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("homo_sapiens-without_gtf-stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test_fasta' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+ ])
+ input[1] = Channel.of([ [], [] ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
new file mode 100644
index 0000000..9de08c7
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
@@ -0,0 +1,22 @@
+{
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f"
+ ]
+ ],
+ "timestamp": "2023-12-19T11:05:51.741109"
+ },
+ "index_with_gtf": {
+ "content": [
+ "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]"
+ ],
+ "timestamp": "2023-12-19T11:38:14.551548"
+ },
+ "index_without_gtf": {
+ "content": [
+ "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]"
+ ],
+ "timestamp": "2023-12-19T11:38:22.382905"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/star/genomegenerate/tests/tags.yml b/modules/nf-core/star/genomegenerate/tests/tags.yml
new file mode 100644
index 0000000..79f619b
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/tests/tags.yml
@@ -0,0 +1,2 @@
+star/genomegenerate:
+ - modules/nf-core/star/genomegenerate/**
diff --git a/modules/nf-core/trimgalore/environment.yml b/modules/nf-core/trimgalore/environment.yml
new file mode 100644
index 0000000..6cd0f51
--- /dev/null
+++ b/modules/nf-core/trimgalore/environment.yml
@@ -0,0 +1,7 @@
+name: trimgalore
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::trim-galore=0.6.7
diff --git a/modules/nf-core/trimgalore/main.nf b/modules/nf-core/trimgalore/main.nf
new file mode 100644
index 0000000..24ead87
--- /dev/null
+++ b/modules/nf-core/trimgalore/main.nf
@@ -0,0 +1,75 @@
+process TRIMGALORE {
+ tag "$meta.id"
+ label 'process_high'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/trim-galore:0.6.7--hdfd78af_0' :
+ 'biocontainers/trim-galore:0.6.7--hdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(reads)
+
+ output:
+ tuple val(meta), path("*{3prime,5prime,trimmed,val}*.fq.gz"), emit: reads
+ tuple val(meta), path("*report.txt") , emit: log , optional: true
+ tuple val(meta), path("*unpaired*.fq.gz") , emit: unpaired, optional: true
+ tuple val(meta), path("*.html") , emit: html , optional: true
+ tuple val(meta), path("*.zip") , emit: zip , optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ // Calculate number of --cores for TrimGalore based on value of task.cpus
+ // See: https://github.com/FelixKrueger/TrimGalore/blob/master/Changelog.md#version-060-release-on-1-mar-2019
+ // See: https://github.com/nf-core/atacseq/pull/65
+ def cores = 1
+ if (task.cpus) {
+ cores = (task.cpus as int) - 4
+ if (meta.single_end) cores = (task.cpus as int) - 3
+ if (cores < 1) cores = 1
+ if (cores > 8) cores = 8
+ }
+
+ // Added soft-links to original fastqs for consistent naming in MultiQC
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ if (meta.single_end) {
+ def args_list = args.split("\\s(?=--)").toList()
+ args_list.removeAll { it.toLowerCase().contains('_r2 ') }
+ """
+ [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz
+ trim_galore \\
+ ${args_list.join(' ')} \\
+ --cores $cores \\
+ --gzip \\
+ ${prefix}.fastq.gz
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//')
+ cutadapt: \$(cutadapt --version)
+ END_VERSIONS
+ """
+ } else {
+ """
+ [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
+ [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
+ trim_galore \\
+ $args \\
+ --cores $cores \\
+ --paired \\
+ --gzip \\
+ ${prefix}_1.fastq.gz \\
+ ${prefix}_2.fastq.gz
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//')
+ cutadapt: \$(cutadapt --version)
+ END_VERSIONS
+ """
+ }
+}
diff --git a/modules/nf-core/trimgalore/meta.yml b/modules/nf-core/trimgalore/meta.yml
new file mode 100644
index 0000000..e649088
--- /dev/null
+++ b/modules/nf-core/trimgalore/meta.yml
@@ -0,0 +1,68 @@
+name: trimgalore
+description: Trim FastQ files using Trim Galore!
+keywords:
+ - trimming
+ - adapters
+ - sequencing adapters
+ - fastq
+tools:
+ - trimgalore:
+ description: |
+ A wrapper tool around Cutadapt and FastQC to consistently apply quality
+ and adapter trimming to FastQ files, with some extra functionality for
+ MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries.
+ homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/
+ documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md
+ licence: ["GPL-3.0-or-later"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input adapter trimmed FastQ files of size 1 and 2 for
+ single-end and paired-end data, respectively.
+ pattern: "*{3prime,5prime,trimmed,val}*.fq.gz"
+ - unpaired:
+ type: file
+ description: |
+ FastQ files containing unpaired reads from read 1 or read 2
+ pattern: "*unpaired*.fq.gz"
+ - html:
+ type: file
+ description: FastQC report (optional)
+ pattern: "*_{fastqc.html}"
+ - zip:
+ type: file
+ description: FastQC report archive (optional)
+ pattern: "*_{fastqc.zip}"
+ - log:
+ type: file
+ description: Trim Galore! trimming report
+ pattern: "*_{report.txt}"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@drpatelh"
+ - "@ewels"
+ - "@FelixKrueger"
+maintainers:
+ - "@drpatelh"
+ - "@ewels"
+ - "@FelixKrueger"
diff --git a/modules/nf-core/trimgalore/tests/main.nf.test b/modules/nf-core/trimgalore/tests/main.nf.test
new file mode 100644
index 0000000..bc6812c
--- /dev/null
+++ b/modules/nf-core/trimgalore/tests/main.nf.test
@@ -0,0 +1,105 @@
+nextflow_process {
+
+ name "Test Process TRIMGALORE"
+ script "../main.nf"
+ process "TRIMGALORE"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "trimgalore"
+
+ test("test_trimgalore_single_end") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:true ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+ ]
+ """
+ }
+ }
+
+ then {
+ def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+ "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+ "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
+ { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
+ }
+ },
+ { report1_lines.each { report1_line ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(report1_line) }
+ }
+ }
+ )
+ }
+ }
+
+ test("test_trimgalore_paired_end") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+ ]
+ """
+ }
+ }
+
+ then {
+ def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+ "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+ "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
+ { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
+ }
+ },
+ { read2_lines.each { read2_line ->
+ { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
+ }
+ },
+ { report1_lines.each { report1_line ->
+ { assert path(process.out.log.get(0).get(1).get(0)).getText().contains(report1_line) }
+ }
+ },
+ { report2_lines.each { report2_line ->
+ { assert path(process.out.log.get(0).get(1).get(1)).getText().contains(report2_line) }
+ }
+ }
+ )
+ }
+ }
+}
diff --git a/modules/nf-core/trimgalore/tests/main.nf.test.snap b/modules/nf-core/trimgalore/tests/main.nf.test.snap
new file mode 100644
index 0000000..84feacc
--- /dev/null
+++ b/modules/nf-core/trimgalore/tests/main.nf.test.snap
@@ -0,0 +1,148 @@
+{
+ "test_trimgalore_single_end": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_trimmed.fq.gz:md5,e0a7516b8ea8d6467d6306acb2cd13c4"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastq.gz_trimming_report.txt:md5,a1ab3958205f1ddf48af623242b5b429"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+
+ ],
+ "4": [
+
+ ],
+ "5": [
+ "versions.yml:md5,47d966cbb31c80eb8f7fe860d55659b7"
+ ],
+ "html": [
+
+ ],
+ "log": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastq.gz_trimming_report.txt:md5,a1ab3958205f1ddf48af623242b5b429"
+ ]
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_trimmed.fq.gz:md5,e0a7516b8ea8d6467d6306acb2cd13c4"
+ ]
+ ],
+ "unpaired": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,47d966cbb31c80eb8f7fe860d55659b7"
+ ],
+ "zip": [
+
+ ]
+ }
+ ],
+ "timestamp": "2023-10-17T15:24:57.782141441"
+ },
+ "test_trimgalore_paired_end": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1_val_1.fq.gz:md5,e0a7516b8ea8d6467d6306acb2cd13c4",
+ "test_2_val_2.fq.gz:md5,f3d61189e6d10202da7b8686f1dbb71b"
+ ]
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1.fastq.gz_trimming_report.txt:md5,315d40465412f9909bbaabf52269274d",
+ "test_2.fastq.gz_trimming_report.txt:md5,34436303da1c78811103427a2fb57f7b"
+ ]
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+
+ ],
+ "4": [
+
+ ],
+ "5": [
+ "versions.yml:md5,47d966cbb31c80eb8f7fe860d55659b7"
+ ],
+ "html": [
+
+ ],
+ "log": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1.fastq.gz_trimming_report.txt:md5,315d40465412f9909bbaabf52269274d",
+ "test_2.fastq.gz_trimming_report.txt:md5,34436303da1c78811103427a2fb57f7b"
+ ]
+ ]
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1_val_1.fq.gz:md5,e0a7516b8ea8d6467d6306acb2cd13c4",
+ "test_2_val_2.fq.gz:md5,f3d61189e6d10202da7b8686f1dbb71b"
+ ]
+ ]
+ ],
+ "unpaired": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,47d966cbb31c80eb8f7fe860d55659b7"
+ ],
+ "zip": [
+
+ ]
+ }
+ ],
+ "timestamp": "2023-10-17T15:25:08.513589909"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/trimgalore/tests/tags.yml b/modules/nf-core/trimgalore/tests/tags.yml
new file mode 100644
index 0000000..e993769
--- /dev/null
+++ b/modules/nf-core/trimgalore/tests/tags.yml
@@ -0,0 +1,2 @@
+trimgalore:
+ - modules/nf-core/trimgalore/**
diff --git a/modules/nf-core/umitools/extract/environment.yml b/modules/nf-core/umitools/extract/environment.yml
new file mode 100644
index 0000000..7d08ac0
--- /dev/null
+++ b/modules/nf-core/umitools/extract/environment.yml
@@ -0,0 +1,7 @@
+name: umitools_extract
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::umi_tools=1.1.4
diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf
new file mode 100644
index 0000000..4bd79e7
--- /dev/null
+++ b/modules/nf-core/umitools/extract/main.nf
@@ -0,0 +1,56 @@
+process UMITOOLS_EXTRACT {
+ tag "$meta.id"
+ label "process_single"
+ label "process_long"
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' :
+ 'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }"
+
+ input:
+ tuple val(meta), path(reads)
+
+ output:
+ tuple val(meta), path("*.fastq.gz"), emit: reads
+ tuple val(meta), path("*.log") , emit: log
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ if (meta.single_end) {
+ """
+ umi_tools \\
+ extract \\
+ -I $reads \\
+ -S ${prefix}.umi_extract.fastq.gz \\
+ $args \\
+ > ${prefix}.umi_extract.log
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' )
+ END_VERSIONS
+ """
+ } else {
+ """
+ umi_tools \\
+ extract \\
+ -I ${reads[0]} \\
+ --read2-in=${reads[1]} \\
+ -S ${prefix}.umi_extract_1.fastq.gz \\
+ --read2-out=${prefix}.umi_extract_2.fastq.gz \\
+ $args \\
+ > ${prefix}.umi_extract.log
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' )
+ END_VERSIONS
+ """
+ }
+}
diff --git a/modules/nf-core/umitools/extract/meta.yml b/modules/nf-core/umitools/extract/meta.yml
new file mode 100644
index 0000000..7695b27
--- /dev/null
+++ b/modules/nf-core/umitools/extract/meta.yml
@@ -0,0 +1,48 @@
+name: umitools_extract
+description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place
+keywords:
+ - UMI
+ - barcode
+ - extract
+ - umitools
+tools:
+ - umi_tools:
+ description: >
+ UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes
+ documentation: https://umi-tools.readthedocs.io/en/latest/
+ license: "MIT"
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: list
+ description: |
+ List of input FASTQ files whose UMIs will be extracted.
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: >
+ Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
+ pattern: "*.{fastq.gz}"
+ - log:
+ type: file
+ description: Logfile for umi_tools
+ pattern: "*.{log}"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@drpatelh"
+ - "@grst"
+maintainers:
+ - "@drpatelh"
+ - "@grst"
diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test b/modules/nf-core/umitools/extract/tests/main.nf.test
new file mode 100644
index 0000000..22242d1
--- /dev/null
+++ b/modules/nf-core/umitools/extract/tests/main.nf.test
@@ -0,0 +1,35 @@
+nextflow_process {
+
+ name "Test Process UMITOOLS_EXTRACT"
+ script "../main.nf"
+ process "UMITOOLS_EXTRACT"
+ config "./nextflow.config"
+ tag "modules_nfcore"
+ tag "modules"
+ tag "umitools"
+ tag "umitools/extract"
+
+ test("Should run without failures") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:true ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test.snap b/modules/nf-core/umitools/extract/tests/main.nf.test.snap
new file mode 100644
index 0000000..6d5944f
--- /dev/null
+++ b/modules/nf-core/umitools/extract/tests/main.nf.test.snap
@@ -0,0 +1,10 @@
+{
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,5a18da2d3a5a4de15e7aaae9082d7abb"
+ ]
+ ],
+ "timestamp": "2023-12-08T09:41:43.540658352"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/umitools/extract/tests/nextflow.config b/modules/nf-core/umitools/extract/tests/nextflow.config
new file mode 100644
index 0000000..c866f5a
--- /dev/null
+++ b/modules/nf-core/umitools/extract/tests/nextflow.config
@@ -0,0 +1,9 @@
+process {
+
+ publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+
+ withName: UMITOOLS_EXTRACT {
+ ext.args = '--bc-pattern="NNNN"'
+ }
+
+}
diff --git a/modules/nf-core/umitools/extract/tests/tags.yml b/modules/nf-core/umitools/extract/tests/tags.yml
new file mode 100644
index 0000000..c3fb23d
--- /dev/null
+++ b/modules/nf-core/umitools/extract/tests/tags.yml
@@ -0,0 +1,2 @@
+umitools/extract:
+ - modules/nf-core/umitools/extract/**
diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml
new file mode 100644
index 0000000..0c9cbb1
--- /dev/null
+++ b/modules/nf-core/untar/environment.yml
@@ -0,0 +1,11 @@
+name: untar
+
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+
+dependencies:
+ - conda-forge::grep=3.11
+ - conda-forge::sed=4.7
+ - conda-forge::tar=1.34
diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf
new file mode 100644
index 0000000..8a75bb9
--- /dev/null
+++ b/modules/nf-core/untar/main.nf
@@ -0,0 +1,63 @@
+process UNTAR {
+ tag "$archive"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+ 'nf-core/ubuntu:20.04' }"
+
+ input:
+ tuple val(meta), path(archive)
+
+ output:
+ tuple val(meta), path("$prefix"), emit: untar
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def args2 = task.ext.args2 ?: ''
+ prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, ""))
+
+ """
+ mkdir $prefix
+
+ ## Ensures --strip-components only applied when top level of tar contents is a directory
+ ## If just files or multiple directories, place all in prefix
+ if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then
+ tar \\
+ -C $prefix --strip-components 1 \\
+ -xavf \\
+ $args \\
+ $archive \\
+ $args2
+ else
+ tar \\
+ -C $prefix \\
+ -xavf \\
+ $args \\
+ $archive \\
+ $args2
+ fi
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, ""))
+ """
+ mkdir $prefix
+ touch ${prefix}/file.txt
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml
new file mode 100644
index 0000000..a9a2110
--- /dev/null
+++ b/modules/nf-core/untar/meta.yml
@@ -0,0 +1,46 @@
+name: untar
+description: Extract files.
+keywords:
+ - untar
+ - uncompress
+ - extract
+tools:
+ - untar:
+ description: |
+ Extract tar.gz files.
+ documentation: https://www.gnu.org/software/tar/manual/
+ licence: ["GPL-3.0-or-later"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - archive:
+ type: file
+ description: File to be untar
+ pattern: "*.{tar}.{gz}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - untar:
+ type: directory
+ description: Directory containing contents of archive
+ pattern: "*/"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@joseespinosa"
+ - "@drpatelh"
+ - "@matthdsm"
+ - "@jfy133"
+maintainers:
+ - "@joseespinosa"
+ - "@drpatelh"
+ - "@matthdsm"
+ - "@jfy133"
diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test
new file mode 100644
index 0000000..679e83c
--- /dev/null
+++ b/modules/nf-core/untar/tests/main.nf.test
@@ -0,0 +1,55 @@
+nextflow_process {
+
+ name "Test Process UNTAR"
+ script "../main.nf"
+ process "UNTAR"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "untar"
+
+ test("test_untar") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out.untar).match("test_untar") },
+ )
+ }
+
+ }
+
+ test("test_untar_onlyfiles") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out.untar).match("test_untar_onlyfiles") },
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap
new file mode 100644
index 0000000..ace4257
--- /dev/null
+++ b/modules/nf-core/untar/tests/main.nf.test.snap
@@ -0,0 +1,34 @@
+{
+ "test_untar_onlyfiles": {
+ "content": [
+ [
+ [
+ [
+
+ ],
+ [
+ "hello.txt:md5,e59ff97941044f85df5297e1c302d260"
+ ]
+ ]
+ ]
+ ],
+ "timestamp": "2023-10-18T11:56:46.878844"
+ },
+ "test_untar": {
+ "content": [
+ [
+ [
+ [
+
+ ],
+ [
+ "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9",
+ "opts.k2d:md5,a033d00cf6759407010b21700938f543",
+ "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c"
+ ]
+ ]
+ ]
+ ],
+ "timestamp": "2023-10-18T11:56:08.16574"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/untar/tests/tags.yml b/modules/nf-core/untar/tests/tags.yml
new file mode 100644
index 0000000..feb6f15
--- /dev/null
+++ b/modules/nf-core/untar/tests/tags.yml
@@ -0,0 +1,2 @@
+untar:
+ - modules/nf-core/untar/**
diff --git a/nextflow.config b/nextflow.config
index e2f3488..7308f0f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -9,15 +9,91 @@
// Global default params, used in configs
params {
- // TODO nf-core: Specify your pipeline's command line flags
// Input options
input = null
// References
+ skip_gtf_filter = false
genome = null
- igenomes_base = 's3://ngi-igenomes/igenomes/'
+ splicesites = null
+ gtf_extra_attributes = 'gene_name'
+ gtf_group_features = 'gene_id'
+ skip_gtf_filter = false
+ skip_gtf_transcript_filter = false
+ igenomes_base = 's3://ngi-igenomes/igenomes'
igenomes_ignore = false
+ gencode = false
+ save_reference = false
+ featurecounts_group_type = 'gene_biotype'
+ featurecounts_feature_type = 'exon'
+ // UMI handling
+ with_umi = false
+ skip_umi_extract = false
+ umitools_extract_method = 'string'
+ umitools_grouping_method = 'directional'
+ umitools_dedup_stats = false
+ umitools_bc_pattern = null
+ umitools_bc_pattern2 = null
+ umitools_umi_separator = null
+ umi_discard_read = null
+ save_umi_intermeds = false
+
+ // Trimming
+ trimmer = 'trimgalore'
+ min_trimmed_reads = 10000
+ extra_trimgalore_args = null
+ extra_fastp_args = null
+ save_trimmed = false
+ skip_trimming = false
+ // BBSplit genome filtering
+ bbsplit_fasta_list = null
+ save_bbsplit_reads = false
+ skip_bbsplit = true
+
+ // Ribosomal RNA removal
+ remove_ribo_rna = true
+ save_non_ribo_reads = false
+ ribo_database_manifest = "${projectDir}/assets/rrna-db-defaults.txt"
+
+ // Alignment
+ aligner = 'star_salmon'
+ pseudo_aligner = null
+ pseudo_aligner_kmer_size = 31
+ seq_center = null
+ bam_csi_index = false
+ star_ignore_sjdbgtf = false
+ salmon_quant_libtype = null
+ hisat2_build_memory = '200.GB' // Amount of memory required to build HISAT2 index with splice sites
+ stringtie_ignore_gtf = false
+ min_mapped_reads = 5
+ extra_star_align_args = null
+ extra_salmon_quant_args = null
+ extra_kallisto_quant_args = null
+ kallisto_quant_fraglen = 200
+ kallisto_quant_fraglen_sd = 200
+ save_merged_fastq = false
+ save_unaligned = false
+ save_align_intermeds = false
+ skip_markduplicates = false
+ skip_alignment = true
+ skip_pseudo_alignment = false
+
+ // QC
+ skip_qc = false
+ skip_bigwig = false
+ skip_stringtie = false
+ skip_fastqc = false
+ skip_preseq = true
+ skip_dupradar = false
+ skip_qualimap = false
+ skip_rseqc = false
+ skip_biotype_qc = false
+ skip_deseq2_qc = false
+ skip_multiqc = false
+ deseq2_vst = true
+ rseqc_modules = 'bam_stat,inner_distance,infer_experiment,junction_annotation,junction_saturation,read_distribution,read_duplication'
+
// MultiQC options
multiqc_config = null
multiqc_title = null
@@ -43,6 +119,7 @@ params {
custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
config_profile_contact = null
config_profile_url = null
+ test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/riboseq/testdata/'
// Max resource options
diff --git a/nextflow_schema.json b/nextflow_schema.json
index ecc8a6c..800ba5e 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -18,6 +18,7 @@
"exists": true,
"mimetype": "text/csv",
"pattern": "^\\S+\\.csv$",
+ "schema": "assets/schema_input.json",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/riboseq/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
@@ -52,7 +53,7 @@
"type": "string",
"description": "Name of iGenomes reference.",
"fa_icon": "fas fa-book",
- "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
+ "help_text": "If using a reference genome configured in the pipeline using iGenomes (not recommended), use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
},
"fasta": {
"type": "string",
@@ -61,9 +62,148 @@
"mimetype": "text/plain",
"pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
"description": "Path to FASTA genome file.",
- "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.",
+ "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have the appropriate alignment index available this will be generated for you automatically. Combine with `--save_reference` to save alignment index for future runs.",
"fa_icon": "far fa-file-code"
},
+ "gtf": {
+ "type": "string",
+ "format": "file-path",
+ "exists": true,
+ "mimetype": "text/plain",
+ "pattern": "^\\S+\\.gtf(\\.gz)?$",
+ "description": "Path to GTF annotation file.",
+ "fa_icon": "fas fa-code-branch",
+ "help_text": "This parameter is *mandatory* if `--genome` is not specified."
+ },
+ "gff": {
+ "type": "string",
+ "format": "file-path",
+ "exists": true,
+ "mimetype": "text/plain",
+ "pattern": "^\\S+\\.gff(\\.gz)?$",
+ "fa_icon": "fas fa-code-branch",
+ "description": "Path to GFF3 annotation file.",
+ "help_text": "This parameter must be specified if `--genome` or `--gtf` are not specified."
+ },
+ "gene_bed": {
+ "type": "string",
+ "format": "file-path",
+ "exists": true,
+ "mimetype": "text/plain",
+ "pattern": "^\\S+\\.bed(\\.gz)?$",
+ "fa_icon": "fas fa-procedures",
+ "description": "Path to BED file containing gene intervals. This will be created from the GTF file if not specified."
+ },
+ "transcript_fasta": {
+ "type": "string",
+ "format": "file-path",
+ "exists": true,
+ "mimetype": "text/plain",
+ "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
+ "fa_icon": "far fa-file-code",
+ "description": "Path to FASTA transcriptome file."
+ },
+ "additional_fasta": {
+ "type": "string",
+ "format": "file-path",
+ "exists": true,
+ "mimetype": "text/plain",
+ "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
+ "fa_icon": "far fa-file-code",
+ "description": "FASTA file to concatenate to genome FASTA file e.g. containing spike-in sequences.",
+ "help_text": "If provided, the sequences in this file will get concatenated to the existing genome FASTA file, a GTF file will be automatically created using the entire sequence as the gene, transcript, and exon features, and any alignment index will get created from the combined FASTA and GTF. It is recommended to save the reference with `--save_reference` to re-use the index for future runs so you do not need to create it again."
+ },
+ "splicesites": {
+ "type": "string",
+ "format": "file-path",
+ "exists": true,
+ "mimetype": "text/plain",
+ "fa_icon": "fas fa-hand-scissors",
+ "description": "Splice sites file required for HISAT2."
+ },
+ "star_index": {
+ "type": "string",
+ "format": "path",
+ "exists": true,
+ "fa_icon": "fas fa-bezier-curve",
+ "description": "Path to directory or tar.gz archive for pre-built STAR index."
+ },
+ "hisat2_index": {
+ "type": "string",
+ "format": "path",
+ "exists": true,
+ "fa_icon": "fas fa-bezier-curve",
+ "description": "Path to directory or tar.gz archive for pre-built HISAT2 index."
+ },
+ "rsem_index": {
+ "type": "string",
+ "format": "path",
+ "exists": true,
+ "fa_icon": "fas fa-bezier-curve",
+ "description": "Path to directory or tar.gz archive for pre-built RSEM index."
+ },
+ "salmon_index": {
+ "type": "string",
+ "format": "path",
+ "exists": true,
+ "fa_icon": "fas fa-bezier-curve",
+ "description": "Path to directory or tar.gz archive for pre-built Salmon index."
+ },
+ "kallisto_index": {
+ "type": "string",
+ "format": "path",
+ "exists": true,
+ "fa_icon": "fas fa-bezier-curve",
+ "description": "Path to directory or tar.gz archive for pre-built Kallisto index."
+ },
+ "hisat2_build_memory": {
+ "type": "string",
+ "default": "200.GB",
+ "fa_icon": "fas fa-memory",
+ "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$",
+ "description": "Minimum memory required to use splice sites and exons in the HiSAT2 index build process.",
+ "help_text": "HiSAT2 requires a huge amount of RAM to build a genome index for larger genomes, if including splice sites and exons e.g. the human genome might typically require 200GB. If you specify less than this threshold for the `HISAT2_BUILD` process then the splice sites and exons will be ignored, meaning that the process will require a lot less memory. If you are working with a small genome, set this parameter to a lower value to reduce the threshold for skipping this check. If using a larger genome, consider supplying more memory to the `HISAT2_BUILD` process."
+ },
+ "gencode": {
+ "type": "boolean",
+ "fa_icon": "fas fa-code-branch",
+ "description": "Specify if your GTF annotation is in GENCODE format.",
+ "help_text": "If your GTF file is in GENCODE format and you would like to run Salmon i.e. `--pseudo_aligner salmon`, you will need to provide this parameter in order to build the Salmon index appropriately."
+ },
+ "gtf_extra_attributes": {
+ "type": "string",
+ "default": "gene_name",
+ "fa_icon": "fas fa-plus-square",
+ "description": "By default, the pipeline uses the `gene_name` field to obtain additional gene identifiers from the input GTF file when running Salmon.",
+ "help_text": "This behaviour can be modified by specifying `--gtf_extra_attributes` when running the pipeline. Note that you can also specify more than one desired value, separated by a comma e.g. `--gtf_extra_attributes gene_id,...`.\n"
+ },
+ "gtf_group_features": {
+ "type": "string",
+ "default": "gene_id",
+ "description": "Define the attribute type used to group features in the GTF file when running Salmon.",
+ "fa_icon": "fas fa-layer-group"
+ },
+ "featurecounts_group_type": {
+ "type": "string",
+ "default": "gene_biotype",
+ "fa_icon": "fas fa-layer-group",
+ "description": "The attribute type used to group feature types in the GTF file when generating the biotype plot with featureCounts."
+ },
+ "featurecounts_feature_type": {
+ "type": "string",
+ "default": "exon",
+ "description": "By default, the pipeline assigns reads based on the 'exon' attribute within the GTF file.",
+ "fa_icon": "fas fa-indent",
+ "help_text": "The feature type used from the GTF file when generating the biotype plot with featureCounts."
+ },
+ "igenomes_base": {
+ "type": "string",
+ "format": "directory-path",
+ "description": "Directory / URL base for iGenomes references.",
+ "default": "s3://ngi-igenomes/igenomes",
+ "fa_icon": "fas fa-cloud-download-alt",
+ "hidden": true
+ },
"igenomes_ignore": {
"type": "boolean",
"description": "Do not load the iGenomes reference config.",
@@ -71,6 +211,421 @@
"hidden": true,
"help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`."
}
+ },
+ "required": ["fasta"]
+ },
+ "read_trimming_options": {
+ "title": "Read trimming options",
+ "type": "object",
+ "fa_icon": "fas fa-cut",
+ "description": "Options to adjust read trimming criteria.",
+ "properties": {
+ "trimmer": {
+ "type": "string",
+ "default": "trimgalore",
+ "description": "Specifies the trimming tool to use - available options are 'trimgalore' and 'fastp'.",
+ "fa_icon": "fas fa-cut",
+ "enum": ["trimgalore", "fastp"]
+ },
+ "extra_trimgalore_args": {
+ "type": "string",
+ "description": "Extra arguments to pass to Trim Galore! command in addition to defaults defined by the pipeline.",
+ "fa_icon": "fas fa-plus"
+ },
+ "extra_fastp_args": {
+ "type": "string",
+ "description": "Extra arguments to pass to fastp command in addition to defaults defined by the pipeline.",
+ "fa_icon": "fas fa-plus"
+ },
+ "min_trimmed_reads": {
+ "type": "integer",
+ "default": 10000,
+ "fa_icon": "fas fa-hand-paper",
+ "description": "Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low."
+ }
+ }
+ },
+ "read_filtering_options": {
+ "title": "Read filtering options",
+ "type": "object",
+ "description": "Options for filtering reads prior to alignment",
+ "default": "",
+ "properties": {
+ "bbsplit_fasta_list": {
+ "type": "string",
+ "format": "file-path",
+ "exists": true,
+ "mimetype": "text/plain",
+ "fa_icon": "fas fa-list-alt",
+ "description": "Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. You have to also explicitly set `--skip_bbsplit false` if you want to use BBSplit.",
+ "help_text": "The file should contain 2 columns: short name and full path to reference genome(s) e.g. \n```\nmm10,/path/to/mm10.fa\necoli,/path/to/ecoli.fa\n```"
+ },
+ "bbsplit_index": {
+ "type": "string",
+ "format": "path",
+ "exists": true,
+ "fa_icon": "fas fa-bezier-curve",
+ "description": "Path to directory or tar.gz archive for pre-built BBSplit index.",
+ "help_text": "The BBSplit index will have to be built at least once with this pipeline (see `--save_reference` to save index). It can then be provided via `--bbsplit_index` for future runs."
+ },
+ "remove_ribo_rna": {
+ "type": "boolean",
+ "fa_icon": "fas fa-trash-alt",
+ "description": "Enable the removal of reads derived from ribosomal RNA using SortMeRNA.",
+ "help_text": "Any patterns found in the sequences defined by the '--ribo_database_manifest' parameter will be used."
+ },
+ "ribo_database_manifest": {
+ "type": "string",
+ "format": "file-path",
+ "exists": true,
+ "mimetype": "text/plain",
+ "default": "${projectDir}/assets/rrna-db-defaults.txt",
+ "fa_icon": "fas fa-database",
+ "description": "Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA.",
+ "help_text": "By default, [rRNA databases](https://github.com/biocore/sortmerna/tree/master/data/rRNA_databases) defined in the SortMeRNA GitHub repo are used. You can see an example in the pipeline Github repository in `assets/rrna-default-dbs.txt`.\nPlease note that commercial/non-academic entities require [`licensing for SILVA`](https://www.arb-silva.de/silva-license-information) for these default databases."
+ }
+ },
+ "fa_icon": "fas fa-trash-alt"
+ },
+ "umi_options": {
+ "title": "UMI options",
+ "type": "object",
+ "description": "Options for processing reads with unique molecular identifiers",
+ "default": "",
+ "properties": {
+ "with_umi": {
+ "type": "boolean",
+ "fa_icon": "fas fa-barcode",
+ "description": "Enable UMI-based read deduplication."
+ },
+ "umitools_extract_method": {
+ "type": "string",
+ "default": "string",
+ "fa_icon": "fas fa-barcode",
+ "description": "UMI pattern to use. Can be either 'string' (default) or 'regex'.",
+ "help_text": "More details can be found in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/extract.html#extract-method).\n"
+ },
+ "umitools_bc_pattern": {
+ "type": "string",
+ "fa_icon": "fas fa-barcode",
+ "help_text": "More details can be found in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/extract.html#extract-method).",
+ "description": "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides of the read are from the UMI."
+ },
+ "umitools_bc_pattern2": {
+ "type": "string",
+ "fa_icon": "fas fa-barcode",
+ "description": "The UMI barcode pattern to use if the UMI is located in read 2."
+ },
+ "umi_discard_read": {
+ "type": "integer",
+ "fa_icon": "fas fa-barcode",
+ "description": "After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively."
+ },
+ "umitools_umi_separator": {
+ "type": "string",
+ "fa_icon": "fas fa-star-half-alt",
+ "description": "The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with UMI-tools and used other software."
+ },
+ "umitools_grouping_method": {
+ "type": "string",
+ "default": "directional",
+ "fa_icon": "far fa-object-ungroup",
+ "description": "Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying the reads with the same mapping position, but treat similar yet nonidentical UMIs differently.",
+ "enum": ["unique", "percentile", "cluster", "adjacency", "directional"]
+ },
+ "umitools_dedup_stats": {
+ "type": "boolean",
+ "fa_icon": "fas fa-barcode",
+ "help_text": "It can be quite time consuming generating these output stats - see [#827](https://github.com/nf-core/rnaseq/issues/827).",
+ "description": "Generate output stats when running \"umi_tools dedup\"."
+ }
+ },
+ "fa_icon": "fas fa-barcode"
+ },
+ "alignment_options": {
+ "title": "Alignment options",
+ "type": "object",
+ "fa_icon": "fas fa-map-signs",
+ "description": "Options to adjust parameters and filtering criteria for read alignments.",
+ "properties": {
+ "aligner": {
+ "type": "string",
+ "default": "star_salmon",
+ "description": "Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2'.",
+ "fa_icon": "fas fa-map-signs",
+ "enum": ["star_salmon", "star_rsem", "hisat2"]
+ },
+ "pseudo_aligner": {
+ "type": "string",
+ "description": "Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner'.",
+ "fa_icon": "fas fa-hamburger",
+ "enum": ["salmon", "kallisto"]
+ },
+ "pseudo_aligner_kmer_size": {
+ "type": "integer",
+ "default": 31,
+ "description": "Kmer length passed to indexing step of pseudoaligners",
+ "help_text": "Failure to set a good kmer size could cause issues with quantification with Kallisto or Salmon. This is mostly an issue for short reads (<50bp), where the default kmer size of 31 is an problem.",
+ "fa_icon": "fas fa-ruler-horizontal"
+ },
+ "bam_csi_index": {
+ "type": "boolean",
+ "description": "Create a CSI index for BAM files instead of the traditional BAI index. This will be required for genomes with larger chromosome sizes.",
+ "fa_icon": "fas fa-sort-alpha-down"
+ },
+ "star_ignore_sjdbgtf": {
+ "type": "boolean",
+ "fa_icon": "fas fa-ban",
+ "description": "When using pre-built STAR indices do not re-extract and use splice junctions from the GTF file."
+ },
+ "salmon_quant_libtype": {
+ "type": "string",
+ "fa_icon": "fas fa-fast-forward",
+ "description": " Override Salmon library type inferred based on strandedness defined in meta object.",
+ "help_text": "See [Salmon docs](https://salmon.readthedocs.io/en/latest/library_type.html).",
+ "enum": [
+ "A",
+ "IS",
+ "ISF",
+ "ISR",
+ "IU",
+ "MS",
+ "MSF",
+ "MSR",
+ "MU",
+ "OS",
+ "OSF",
+ "OSR",
+ "OU",
+ "SF",
+ "SR",
+ "U"
+ ]
+ },
+ "min_mapped_reads": {
+ "type": "number",
+ "default": 5,
+ "fa_icon": "fas fa-percentage",
+ "description": "Minimum percentage of uniquely mapped reads below which samples are removed from further processing.",
+ "help_text": "Some downstream steps in the pipeline will fail if this threshold is too low."
+ },
+ "seq_center": {
+ "type": "string",
+ "description": "Sequencing center information to be added to read group of BAM files.",
+ "fa_icon": "fas fa-synagogue"
+ },
+ "stringtie_ignore_gtf": {
+ "type": "boolean",
+ "description": "Perform reference-guided de novo assembly of transcripts using StringTie i.e. dont restrict to those in GTF file.",
+ "fa_icon": "fas fa-ban"
+ },
+ "extra_star_align_args": {
+ "type": "string",
+ "description": "Extra arguments to pass to STAR alignment command in addition to defaults defined by the pipeline. Only available for the STAR-Salmon route.",
+ "fa_icon": "fas fa-plus"
+ },
+ "extra_salmon_quant_args": {
+ "type": "string",
+ "description": "Extra arguments to pass to Salmon quant command in addition to defaults defined by the pipeline.",
+ "fa_icon": "fas fa-plus"
+ },
+ "extra_kallisto_quant_args": {
+ "type": "string",
+ "description": "Extra arguments to pass to Kallisto quant command in addition to defaults defined by the pipeline.",
+ "fa_icon": "fas fa-plus"
+ },
+ "kallisto_quant_fraglen": {
+ "type": "integer",
+ "description": "In single-end mode Kallisto requires an estimated fragment length. Specify a default value for that here. TODO: use existing RSeQC results to do this dynamically.",
+ "default": 200,
+ "fa_icon": "fas fa-ruler-horizontal"
+ },
+ "kallisto_quant_fraglen_sd": {
+ "type": "integer",
+ "description": "In single-end mode, Kallisto requires an estimated standard error for fragment length. Specify a default value for that here. TODO: use existing RSeQC results to do this dynamically.",
+ "default": 200,
+ "fa_icon": "fas fa-sort-amount-up-alt"
+ }
+ }
+ },
+ "optional_outputs": {
+ "title": "Optional outputs",
+ "type": "object",
+ "description": "Additional output files produces as intermediates that can be saved",
+ "default": "",
+ "properties": {
+ "save_merged_fastq": {
+ "type": "boolean",
+ "fa_icon": "fas fa-save",
+ "description": "Save FastQ files after merging re-sequenced libraries in the results directory."
+ },
+ "save_umi_intermeds": {
+ "type": "boolean",
+ "fa_icon": "fas fa-save",
+ "description": "If this option is specified, intermediate FastQ and BAM files produced by UMI-tools are also saved in the results directory."
+ },
+ "save_non_ribo_reads": {
+ "type": "boolean",
+ "fa_icon": "fas fa-save",
+ "description": "If this option is specified, intermediate FastQ files containing non-rRNA reads will be saved in the results directory."
+ },
+ "save_bbsplit_reads": {
+ "type": "boolean",
+ "fa_icon": "fas fa-save",
+ "description": "If this option is specified, FastQ files split by reference will be saved in the results directory."
+ },
+ "save_reference": {
+ "type": "boolean",
+ "description": "If generated by the pipeline save the STAR index in the results directory.",
+ "help_text": "If an alignment index is generated by the pipeline use this parameter to save it to your results folder. These can then be used for future pipeline runs, reducing processing times.",
+ "fa_icon": "fas fa-save"
+ },
+ "save_trimmed": {
+ "type": "boolean",
+ "description": "Save the trimmed FastQ files in the results directory.",
+ "help_text": "By default, trimmed FastQ files will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete.",
+ "fa_icon": "fas fa-save"
+ },
+ "save_align_intermeds": {
+ "type": "boolean",
+ "description": "Save the intermediate BAM files from the alignment step.",
+ "help_text": "By default, intermediate BAM files will not be saved. The final BAM files created after the appropriate filtering step are always saved to limit storage usage. Set this parameter to also save other intermediate BAM files.",
+ "fa_icon": "fas fa-save"
+ },
+ "save_unaligned": {
+ "type": "boolean",
+ "fa_icon": "fas fa-save",
+ "description": "Where possible, save unaligned reads from either STAR, HISAT2 or Salmon to the results directory.",
+ "help_text": "This may either be in the form of FastQ or BAM files depending on the options available for that particular tool."
+ }
+ }
+ },
+ "quality_control": {
+ "title": "Quality Control",
+ "type": "object",
+ "description": "Additional quality control options.",
+ "default": "",
+ "properties": {
+ "deseq2_vst": {
+ "type": "boolean",
+ "description": "Use vst transformation instead of rlog with DESeq2.",
+ "help_text": "See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization).",
+ "fa_icon": "fas fa-dolly",
+ "default": true
+ },
+ "rseqc_modules": {
+ "type": "string",
+ "default": "bam_stat,inner_distance,infer_experiment,junction_annotation,junction_saturation,read_distribution,read_duplication",
+ "fa_icon": "fas fa-chart-pie",
+ "description": "Specify the RSeQC modules to run."
+ }
+ }
+ },
+ "process_skipping_options": {
+ "title": "Process skipping options",
+ "type": "object",
+ "fa_icon": "fas fa-fast-forward",
+ "description": "Options to skip various steps within the workflow.",
+ "properties": {
+ "skip_gtf_filter": {
+ "type": "boolean",
+ "fa_icon": "fas fa-forward",
+ "description": "Skip filtering of GTF for valid scaffolds and/ or transcript IDs.",
+ "help_text": "If you're confident on the validity of the GTF with respect to the genome fasta file, or wish to disregard failures thriggered by the filtering module, activate this option."
+ },
+ "skip_gtf_transcript_filter": {
+ "type": "boolean",
+ "fa_icon": "fas fa-forward",
+ "description": "Skip the 'transcript_id' checking component of the GTF filtering script used in the pipeline."
+ },
+ "skip_bbsplit": {
+ "type": "boolean",
+ "default": true,
+ "fa_icon": "fas fa-fast-forward",
+ "description": "Skip BBSplit for removal of non-reference genome reads."
+ },
+ "skip_umi_extract": {
+ "type": "boolean",
+ "fa_icon": "fas fa-compress-alt",
+ "description": "Skip the UMI extraction from the read in case the UMIs have been moved to the headers in advance of the pipeline run."
+ },
+ "skip_trimming": {
+ "type": "boolean",
+ "description": "Skip the adapter trimming step.",
+ "help_text": "Use this if your input FastQ files have already been trimmed outside of the workflow or if you're very confident that there is no adapter contamination in your data.",
+ "fa_icon": "fas fa-fast-forward"
+ },
+ "skip_alignment": {
+ "type": "boolean",
+ "fa_icon": "fas fa-fast-forward",
+ "description": "Skip all of the alignment-based processes within the pipeline."
+ },
+ "skip_pseudo_alignment": {
+ "type": "boolean",
+ "fa_icon": "fas fa-fast-forward",
+ "description": "Skip all of the pseudoalignment-based processes within the pipeline."
+ },
+ "skip_markduplicates": {
+ "type": "boolean",
+ "fa_icon": "fas fa-fast-forward",
+ "description": "Skip picard MarkDuplicates step."
+ },
+ "skip_bigwig": {
+ "type": "boolean",
+ "fa_icon": "fas fa-fast-forward",
+ "description": "Skip bigWig file creation."
+ },
+ "skip_stringtie": {
+ "type": "boolean",
+ "fa_icon": "fas fa-fast-forward",
+ "description": "Skip StringTie."
+ },
+ "skip_fastqc": {
+ "type": "boolean",
+ "description": "Skip FastQC.",
+ "fa_icon": "fas fa-fast-forward"
+ },
+ "skip_preseq": {
+ "type": "boolean",
+ "description": "Skip Preseq.",
+ "fa_icon": "fas fa-fast-forward",
+ "default": true
+ },
+ "skip_dupradar": {
+ "type": "boolean",
+ "fa_icon": "fas fa-fast-forward",
+ "description": "Skip dupRadar."
+ },
+ "skip_qualimap": {
+ "type": "boolean",
+ "fa_icon": "fas fa-fast-forward",
+ "description": "Skip Qualimap."
+ },
+ "skip_rseqc": {
+ "type": "boolean",
+ "fa_icon": "fas fa-fast-forward",
+ "description": "Skip RSeQC."
+ },
+ "skip_biotype_qc": {
+ "type": "boolean",
+ "fa_icon": "fas fa-fast-forward",
+ "description": "Skip additional featureCounts process for biotype QC."
+ },
+ "skip_deseq2_qc": {
+ "type": "boolean",
+ "fa_icon": "fas fa-fast-forward",
+ "description": "Skip DESeq2 PCA and heatmap plotting."
+ },
+ "skip_multiqc": {
+ "type": "boolean",
+ "description": "Skip MultiQC.",
+ "fa_icon": "fas fa-fast-forward"
+ },
+ "skip_qc": {
+ "type": "boolean",
+ "fa_icon": "fas fa-fast-forward",
+ "description": "Skip all QC steps except for MultiQC."
+ }
}
},
"institutional_config_options": {
@@ -118,6 +673,13 @@
"description": "Institutional config URL link.",
"hidden": true,
"fa_icon": "fas fa-users-cog"
+ },
+ "test_data_base": {
+ "type": "string",
+ "default": "https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3",
+ "description": "Base path / URL for data used in the test profiles",
+ "help_text": "Warning: The `-profile test` samplesheet file itself contains remote paths. Setting this parameter does not alter the contents of that file.",
+ "hidden": true
}
}
},
@@ -201,7 +763,6 @@
"max_multiqc_email_size": {
"type": "string",
"description": "File size limit when attaching MultiQC reports to summary emails.",
- "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$",
"default": "25.MB",
"fa_icon": "fas fa-file-upload",
"hidden": true
@@ -222,18 +783,26 @@
"multiqc_config": {
"type": "string",
"format": "file-path",
+ "exists": true,
+ "mimetype": "text/plain",
"description": "Custom config file to supply to MultiQC.",
"fa_icon": "fas fa-cog",
"hidden": true
},
"multiqc_logo": {
"type": "string",
+ "format": "file-path",
+ "exists": true,
+ "mimetype": "text/plain",
"description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file",
"fa_icon": "fas fa-image",
"hidden": true
},
"multiqc_methods_description": {
"type": "string",
+ "format": "file-path",
+ "exists": true,
+ "mimetype": "text/plain",
"description": "Custom MultiQC yaml file containing HTML including a methods description.",
"fa_icon": "fas fa-cog"
},
@@ -275,6 +844,27 @@
{
"$ref": "#/definitions/reference_genome_options"
},
+ {
+ "$ref": "#/definitions/read_trimming_options"
+ },
+ {
+ "$ref": "#/definitions/read_filtering_options"
+ },
+ {
+ "$ref": "#/definitions/umi_options"
+ },
+ {
+ "$ref": "#/definitions/alignment_options"
+ },
+ {
+ "$ref": "#/definitions/optional_outputs"
+ },
+ {
+ "$ref": "#/definitions/quality_control"
+ },
+ {
+ "$ref": "#/definitions/process_skipping_options"
+ },
{
"$ref": "#/definitions/institutional_config_options"
},
diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf
new file mode 100644
index 0000000..0840c77
--- /dev/null
+++ b/subworkflows/local/prepare_genome/main.nf
@@ -0,0 +1,304 @@
+//
+// Uncompress and prepare reference genome files
+//
+
+include { GUNZIP as GUNZIP_FASTA } from '../../../modules/nf-core/gunzip'
+include { GUNZIP as GUNZIP_GTF } from '../../../modules/nf-core/gunzip'
+include { GUNZIP as GUNZIP_GFF } from '../../../modules/nf-core/gunzip'
+include { GUNZIP as GUNZIP_GENE_BED } from '../../../modules/nf-core/gunzip'
+include { GUNZIP as GUNZIP_TRANSCRIPT_FASTA } from '../../../modules/nf-core/gunzip'
+include { GUNZIP as GUNZIP_ADDITIONAL_FASTA } from '../../../modules/nf-core/gunzip'
+
+include { UNTAR as UNTAR_BBSPLIT_INDEX } from '../../../modules/nf-core/untar'
+include { UNTAR as UNTAR_STAR_INDEX } from '../../../modules/nf-core/untar'
+include { UNTAR as UNTAR_RSEM_INDEX } from '../../../modules/nf-core/untar'
+include { UNTAR as UNTAR_HISAT2_INDEX } from '../../../modules/nf-core/untar'
+include { UNTAR as UNTAR_SALMON_INDEX } from '../../../modules/nf-core/untar'
+include { UNTAR as UNTAR_KALLISTO_INDEX } from '../../../modules/nf-core/untar'
+
+include { CUSTOM_GETCHROMSIZES } from '../../../modules/nf-core/custom/getchromsizes'
+include { GFFREAD } from '../../../modules/nf-core/gffread'
+include { BBMAP_BBSPLIT } from '../../../modules/nf-core/bbmap/bbsplit'
+include { STAR_GENOMEGENERATE } from '../../../modules/nf-core/star/genomegenerate'
+include { HISAT2_EXTRACTSPLICESITES } from '../../../modules/nf-core/hisat2/extractsplicesites'
+include { HISAT2_BUILD } from '../../../modules/nf-core/hisat2/build'
+include { SALMON_INDEX } from '../../../modules/nf-core/salmon/index'
+include { KALLISTO_INDEX } from '../../../modules/nf-core/kallisto/index'
+include { RSEM_PREPAREREFERENCE as RSEM_PREPAREREFERENCE_GENOME } from '../../../modules/nf-core/rsem/preparereference'
+include { RSEM_PREPAREREFERENCE as MAKE_TRANSCRIPTS_FASTA } from '../../../modules/nf-core/rsem/preparereference'
+
+include { PREPROCESS_TRANSCRIPTS_FASTA_GENCODE } from '../../../modules/local/preprocess_transcripts_fasta_gencode'
+include { GTF2BED } from '../../../modules/local/gtf2bed'
+include { CAT_ADDITIONAL_FASTA } from '../../../modules/local/cat_additional_fasta'
+include { GTF_FILTER } from '../../../modules/local/gtf_filter'
+include { STAR_GENOMEGENERATE_IGENOMES } from '../../../modules/local/star_genomegenerate_igenomes'
+
+workflow PREPARE_GENOME {
+ take:
+ fasta // file: /path/to/genome.fasta
+ gtf // file: /path/to/genome.gtf
+ gff // file: /path/to/genome.gff
+ additional_fasta // file: /path/to/additional.fasta
+ transcript_fasta // file: /path/to/transcript.fasta
+ gene_bed // file: /path/to/gene.bed
+ splicesites // file: /path/to/splicesites.txt
+ bbsplit_fasta_list // file: /path/to/bbsplit_fasta_list.txt
+ star_index // directory: /path/to/star/index/
+ rsem_index // directory: /path/to/rsem/index/
+ salmon_index // directory: /path/to/salmon/index/
+ kallisto_index // directory: /path/to/kallisto/index/
+ hisat2_index // directory: /path/to/hisat2/index/
+ bbsplit_index // directory: /path/to/rsem/index/
+ gencode // boolean: whether the genome is from GENCODE
+ is_aws_igenome // boolean: whether the genome files are from AWS iGenomes
+ biotype // string: if additional fasta file is provided biotype value to use when appending entries to GTF file
+ prepare_tool_indices // list: tools to prepare indices for
+ filter_gtf // boolean: whether to filter GTF file
+
+ main:
+
+ ch_versions = Channel.empty()
+
+ //
+ // Uncompress genome fasta file if required
+ //
+ if (fasta.endsWith('.gz')) {
+ ch_fasta = GUNZIP_FASTA ( [ [:], fasta ] ).gunzip.map { it[1] }
+ ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions)
+ } else {
+ ch_fasta = Channel.value(file(fasta))
+ }
+
+ //
+ // Uncompress GTF annotation file or create from GFF3 if required
+ //
+ if (gtf || gff) {
+ if (gtf) {
+ if (gtf.endsWith('.gz')) {
+ ch_gtf = GUNZIP_GTF ( [ [:], gtf ] ).gunzip.map { it[1] }
+ ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions)
+ } else {
+ ch_gtf = Channel.value(file(gtf))
+ }
+ } else if (gff) {
+ if (gff.endsWith('.gz')) {
+ ch_gff = GUNZIP_GFF ( [ [:], gff ] ).gunzip.map { it[1] }
+ ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions)
+ } else {
+ ch_gff = Channel.value(file(gff))
+ }
+ ch_gtf = GFFREAD ( ch_gff ).gtf
+ ch_versions = ch_versions.mix(GFFREAD.out.versions)
+ }
+
+ if (filter_gtf) {
+ GTF_FILTER ( ch_fasta, ch_gtf )
+ ch_gtf = GTF_FILTER.out.genome_gtf
+ ch_versions = ch_versions.mix(GTF_FILTER.out.versions)
+ }
+ }
+
+ //
+ // Uncompress additional fasta file and concatenate with reference fasta and gtf files
+ //
+ if (additional_fasta) {
+ if (additional_fasta.endsWith('.gz')) {
+ ch_add_fasta = GUNZIP_ADDITIONAL_FASTA ( [ [:], additional_fasta ] ).gunzip.map { it[1] }
+ ch_versions = ch_versions.mix(GUNZIP_ADDITIONAL_FASTA.out.versions)
+ } else {
+ ch_add_fasta = Channel.value(file(additional_fasta))
+ }
+ CAT_ADDITIONAL_FASTA ( ch_fasta, ch_gtf, ch_add_fasta, biotype )
+ ch_fasta = CAT_ADDITIONAL_FASTA.out.fasta
+ ch_gtf = CAT_ADDITIONAL_FASTA.out.gtf
+ ch_versions = ch_versions.mix(CAT_ADDITIONAL_FASTA.out.versions)
+ }
+
+ //
+ // Uncompress gene BED annotation file or create from GTF if required
+ //
+ if (gene_bed) {
+ if (gene_bed.endsWith('.gz')) {
+ ch_gene_bed = GUNZIP_GENE_BED ( [ [:], gene_bed ] ).gunzip.map { it[1] }
+ ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions)
+ } else {
+ ch_gene_bed = Channel.value(file(gene_bed))
+ }
+ } else {
+ ch_gene_bed = GTF2BED ( ch_gtf ).bed
+ ch_versions = ch_versions.mix(GTF2BED.out.versions)
+ }
+
+ //
+ // Uncompress transcript fasta file / create if required
+ //
+ if (transcript_fasta) {
+ if (transcript_fasta.endsWith('.gz')) {
+ ch_transcript_fasta = GUNZIP_TRANSCRIPT_FASTA ( [ [:], transcript_fasta ] ).gunzip.map { it[1] }
+ ch_versions = ch_versions.mix(GUNZIP_TRANSCRIPT_FASTA.out.versions)
+ } else {
+ ch_transcript_fasta = Channel.value(file(transcript_fasta))
+ }
+ if (gencode) {
+ PREPROCESS_TRANSCRIPTS_FASTA_GENCODE ( ch_transcript_fasta )
+ ch_transcript_fasta = PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.out.fasta
+ ch_versions = ch_versions.mix(PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.out.versions)
+ }
+ } else {
+ ch_transcript_fasta = MAKE_TRANSCRIPTS_FASTA ( ch_fasta, ch_gtf ).transcript_fasta
+ ch_versions = ch_versions.mix(MAKE_TRANSCRIPTS_FASTA.out.versions)
+ }
+
+ //
+ // Create chromosome sizes file
+ //
+ CUSTOM_GETCHROMSIZES ( ch_fasta.map { [ [:], it ] } )
+ ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] }
+ ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] }
+ ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions)
+
+ //
+ // Uncompress BBSplit index or generate from scratch if required
+ //
+ ch_bbsplit_index = Channel.empty()
+ if ('bbsplit' in prepare_tool_indices) {
+ if (bbsplit_index) {
+ if (bbsplit_index.endsWith('.tar.gz')) {
+ ch_bbsplit_index = UNTAR_BBSPLIT_INDEX ( [ [:], bbsplit_index ] ).untar.map { it[1] }
+ ch_versions = ch_versions.mix(UNTAR_BBSPLIT_INDEX.out.versions)
+ } else {
+ ch_bbsplit_index = Channel.value(file(bbsplit_index))
+ }
+ } else {
+ Channel
+ .from(file(bbsplit_fasta_list))
+ .splitCsv() // Read in 2 column csv file: short_name,path_to_fasta
+ .flatMap { id, fasta -> [ [ 'id', id ], [ 'fasta', file(fasta, checkIfExists: true) ] ] } // Flatten entries to be able to groupTuple by a common key
+ .groupTuple()
+ .map { it -> it[1] } // Get rid of keys and keep grouped values
+ .collect { [ it ] } // Collect entries as a list to pass as "tuple val(short_names), path(path_to_fasta)" to module
+ .set { ch_bbsplit_fasta_list }
+
+ ch_bbsplit_index = BBMAP_BBSPLIT ( [ [:], [] ], [], ch_fasta, ch_bbsplit_fasta_list, true ).index
+ ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions)
+ }
+ }
+
+ //
+ // Uncompress STAR index or generate from scratch if required
+ //
+ ch_star_index = Channel.empty()
+ if ('star_salmon' in prepare_tool_indices) {
+ if (star_index) {
+ if (star_index.endsWith('.tar.gz')) {
+ ch_star_index = UNTAR_STAR_INDEX ( [ [:], star_index ] ).untar.map { it[1] }
+ ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions)
+ } else {
+ ch_star_index = Channel.value(file(star_index))
+ }
+ } else {
+ if (is_aws_igenome) {
+ ch_star_index = STAR_GENOMEGENERATE_IGENOMES ( ch_fasta, ch_gtf ).index
+ ch_versions = ch_versions.mix(STAR_GENOMEGENERATE_IGENOMES.out.versions)
+ } else {
+ ch_star_index = STAR_GENOMEGENERATE ( ch_fasta.map { [ [:], it ] }, ch_gtf.map { [ [:], it ] } ).index.map { it[1] }
+ ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
+ }
+ }
+ }
+
+ //
+ // Uncompress RSEM index or generate from scratch if required
+ //
+ ch_rsem_index = Channel.empty()
+ if ('star_rsem' in prepare_tool_indices) {
+ if (rsem_index) {
+ if (rsem_index.endsWith('.tar.gz')) {
+ ch_rsem_index = UNTAR_RSEM_INDEX ( [ [:], rsem_index ] ).untar.map { it[1] }
+ ch_versions = ch_versions.mix(UNTAR_RSEM_INDEX.out.versions)
+ } else {
+ ch_rsem_index = Channel.value(file(rsem_index))
+ }
+ } else {
+ ch_rsem_index = RSEM_PREPAREREFERENCE_GENOME ( ch_fasta, ch_gtf ).index
+ ch_versions = ch_versions.mix(RSEM_PREPAREREFERENCE_GENOME.out.versions)
+ }
+ }
+
+ //
+ // Uncompress HISAT2 index or generate from scratch if required
+ //
+ ch_splicesites = Channel.empty()
+ ch_hisat2_index = Channel.empty()
+ if ('hisat2' in prepare_tool_indices) {
+ if (!splicesites) {
+ ch_splicesites = HISAT2_EXTRACTSPLICESITES ( ch_gtf.map { [ [:], it ] } ).txt.map { it[1] }
+ ch_versions = ch_versions.mix(HISAT2_EXTRACTSPLICESITES.out.versions)
+ } else {
+ ch_splicesites = Channel.value(file(splicesites))
+ }
+ if (hisat2_index) {
+ if (hisat2_index.endsWith('.tar.gz')) {
+ ch_hisat2_index = UNTAR_HISAT2_INDEX ( [ [:], hisat2_index ] ).untar.map { it[1] }
+ ch_versions = ch_versions.mix(UNTAR_HISAT2_INDEX.out.versions)
+ } else {
+ ch_hisat2_index = Channel.value(file(hisat2_index))
+ }
+ } else {
+ ch_hisat2_index = HISAT2_BUILD ( ch_fasta.map { [ [:], it ] }, ch_gtf.map { [ [:], it ] }, ch_splicesites.map { [ [:], it ] } ).index.map { it[1] }
+ ch_versions = ch_versions.mix(HISAT2_BUILD.out.versions)
+ }
+ }
+
+ //
+ // Uncompress Salmon index or generate from scratch if required
+ //
+ ch_salmon_index = Channel.empty()
+ if (salmon_index) {
+ if (salmon_index.endsWith('.tar.gz')) {
+ ch_salmon_index = UNTAR_SALMON_INDEX ( [ [:], salmon_index ] ).untar.map { it[1] }
+ ch_versions = ch_versions.mix(UNTAR_SALMON_INDEX.out.versions)
+ } else {
+ ch_salmon_index = Channel.value(file(salmon_index))
+ }
+ } else {
+ if ('salmon' in prepare_tool_indices) {
+ ch_salmon_index = SALMON_INDEX ( ch_fasta, ch_transcript_fasta ).index
+ ch_versions = ch_versions.mix(SALMON_INDEX.out.versions)
+ }
+ }
+
+ //
+ // Uncompress Kallisto index or generate from scratch if required
+ //
+ ch_kallisto_index = Channel.empty()
+ if (kallisto_index) {
+ if (kallisto_index.endsWith('.tar.gz')) {
+ ch_kallisto_index = UNTAR_KALLISTO_INDEX ( [ [:], kallisto_index ] ).untar
+ ch_versions = ch_versions.mix(UNTAR_KALLISTO_INDEX.out.versions)
+ } else {
+ ch_kallisto_index = Channel.value([[:], file(kallisto_index)])
+ }
+ } else {
+ if ('kallisto' in prepare_tool_indices) {
+ ch_kallisto_index = KALLISTO_INDEX ( ch_transcript_fasta.map{[ [:], it]} ).index
+ ch_versions = ch_versions.mix(KALLISTO_INDEX.out.versions)
+ }
+ }
+
+ emit:
+ fasta = ch_fasta // channel: path(genome.fasta)
+ gtf = ch_gtf // channel: path(genome.gtf)
+ fai = ch_fai // channel: path(genome.fai)
+ gene_bed = ch_gene_bed // channel: path(gene.bed)
+ transcript_fasta = ch_transcript_fasta // channel: path(transcript.fasta)
+ chrom_sizes = ch_chrom_sizes // channel: path(genome.sizes)
+ splicesites = ch_splicesites // channel: path(genome.splicesites.txt)
+ bbsplit_index = ch_bbsplit_index // channel: path(bbsplit/index/)
+ star_index = ch_star_index // channel: path(star/index/)
+ rsem_index = ch_rsem_index // channel: path(rsem/index/)
+ hisat2_index = ch_hisat2_index // channel: path(hisat2/index/)
+ salmon_index = ch_salmon_index // channel: path(salmon/index/)
+ kallisto_index = ch_kallisto_index // channel: [ meta, path(kallisto/index/) ]
+ versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ]
+}
diff --git a/subworkflows/local/preprocess_rnaseq/main.nf b/subworkflows/local/preprocess_rnaseq/main.nf
new file mode 100644
index 0000000..6de3371
--- /dev/null
+++ b/subworkflows/local/preprocess_rnaseq/main.nf
@@ -0,0 +1,252 @@
+import groovy.json.JsonSlurper
+
+include { CAT_FASTQ } from '../../../modules/nf-core/cat/fastq/main'
+include { FASTQC } from '../../../modules/nf-core/fastqc/main'
+include { SORTMERNA } from '../../../modules/nf-core/sortmerna/main'
+
+include { FASTQ_SUBSAMPLE_FQ_SALMON } from '../../../subworkflows/nf-core/fastq_subsample_fq_salmon'
+include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../../../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore'
+include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../../../subworkflows/nf-core/fastq_fastqc_umitools_fastp'
+
+def pass_trimmed_reads = [:]
+
+public static String getSalmonInferredStrandedness(json_file) {
+ def lib_type = new JsonSlurper().parseText(json_file.text).get('library_types')[0]
+ def strandedness = 'reverse'
+ if (lib_type) {
+ if (lib_type in ['U', 'IU']) {
+ strandedness = 'unstranded'
+ } else if (lib_type in ['SF', 'ISF']) {
+ strandedness = 'forward'
+ } else if (lib_type in ['SR', 'ISR']) {
+ strandedness = 'reverse'
+ }
+ }
+ return strandedness
+}
+
+//
+// Create MultiQC tsv custom content from a list of values
+//
+public static String multiqcTsvFromList(tsv_data, header) {
+ def tsv_string = ""
+ if (tsv_data.size() > 0) {
+ tsv_string += "${header.join('\t')}\n"
+ tsv_string += tsv_data.join('\n')
+ }
+ return tsv_string
+}
+
+workflow PREPROCESS_RNASEQ {
+
+ take:
+ ch_reads // channel: [ val(meta), [ reads ] ]
+ ch_fasta // channel: /path/to/genome.fasta
+ ch_transcript_fasta // channel: /path/to/transcript.fasta
+ ch_gtf // channel: /path/to/genome.gtf
+ make_salmon_index // boolean: Whether to create salmon index before running salmon quant
+ ch_salmon_index // channel: /path/to/salmon/index/ (optional)
+ skip_bbsplit // boolean: Skip BBSplit for removal of non-reference genome reads.
+ ch_bbsplit_index // channel: /path/to/bbsplit/index/ (optional)
+ skip_fastqc // boolean: true/false
+ skip_trimming // boolean: true/false
+ trimmer // string: 'fastp' or 'trimgalore'
+ min_trimmed_reads // integer: > 0
+ save_trimmed // boolean: true/false
+ remove_ribo_rna // boolean: true/false: whether to run sortmerna to remove rrnas
+ ch_ribo_db // Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA. (optional)
+ with_umi // boolean: true/false: Enable UMI-based read deduplication.
+ skip_umi_extract // boolean: true/false
+ umi_discard_read // integer: 0, 1 or 2
+
+ main:
+
+ ch_versions = Channel.empty()
+ ch_filtered_reads = Channel.empty()
+ ch_trim_read_count = Channel.empty()
+ ch_multiqc_files = Channel.empty()
+
+ ch_reads
+ .branch {
+ meta, fastqs ->
+ single : fastqs.size() == 1
+ return [ meta, fastqs.flatten() ]
+ multiple: fastqs.size() > 1
+ return [ meta, fastqs.flatten() ]
+ }
+ .set { ch_fastq }
+
+ //
+ // MODULE: Concatenate FastQ files from same sample if required
+ //
+ CAT_FASTQ (
+ ch_fastq.multiple
+ )
+ .reads
+ .mix(ch_fastq.single)
+ .set { ch_filtered_reads }
+
+ ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null))
+
+ //
+ // MODULE: Remove ribosomal RNA reads
+ //
+ if (remove_ribo_rna) {
+ ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()).map { row -> file(row, checkIfExists: true) }.collect()
+
+ SORTMERNA (
+ ch_filtered_reads,
+ ch_sortmerna_fastas
+ )
+ .reads
+ .set { ch_filtered_reads }
+
+ ch_multiqc_files = ch_multiqc_files.mix(SORTMERNA.out.log.map{it[1]})
+
+ ch_versions = ch_versions.mix(SORTMERNA.out.versions.first())
+ }
+
+ //
+ // SUBWORKFLOW: Read QC, extract UMI and trim adapters with TrimGalore!
+ //
+ if (trimmer == 'trimgalore') {
+ FASTQ_FASTQC_UMITOOLS_TRIMGALORE (
+ ch_filtered_reads,
+ skip_fastqc,
+ with_umi,
+ skip_umi_extract,
+ skip_trimming,
+ umi_discard_read,
+ min_trimmed_reads
+ )
+ ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads
+ ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_read_count
+
+ ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.versions)
+ ch_multiqc_files = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip
+ .mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_zip)
+ .mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_log)
+ .map{it[1]}
+ .mix(ch_multiqc_files)
+ }
+
+ //
+ // SUBWORKFLOW: Read QC, extract UMI and trim adapters with fastp
+ //
+ if (trimmer == 'fastp') {
+ FASTQ_FASTQC_UMITOOLS_FASTP (
+ ch_filtered_reads,
+ skip_fastqc,
+ with_umi,
+ skip_umi_extract,
+ umi_discard_read,
+ skip_trimming,
+ [],
+ save_trimmed,
+ save_trimmed,
+ min_trimmed_reads
+ )
+ ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
+ ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_read_count
+ ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions)
+
+ ch_multiqc_files = FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_zip
+ .mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_zip)
+ .mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_log)
+ .map{it[1]}
+ .mix(ch_multiqc_files)
+ }
+
+ //
+ // Get list of samples that failed trimming threshold for MultiQC report
+ //
+
+ ch_trim_read_count
+ .map {
+ meta, num_reads ->
+ pass_trimmed_reads[meta.id] = true
+ if (num_reads <= min_trimmed_reads.toFloat()) {
+ pass_trimmed_reads[meta.id] = false
+ return [ "$meta.id\t$num_reads" ]
+ }
+ }
+ .collect()
+ .map {
+ tsv_data ->
+ def header = ["Sample", "Reads after trimming"]
+ multiqcTsvFromList(tsv_data, header)
+ }
+ .set { ch_fail_trimming_multiqc }
+
+ ch_multiqc_files = ch_multiqc_files
+ .mix(
+ ch_fail_trimming_multiqc.collectFile(name: 'fail_trimmed_samples_mqc.tsv').ifEmpty([])
+ )
+
+ //
+ // MODULE: Remove genome contaminant reads
+ //
+ if (!skip_bbsplit) {
+ BBMAP_BBSPLIT (
+ ch_filtered_reads,
+ ch_bbsplit_index,
+ [],
+ [ [], [] ],
+ false
+ )
+ .primary_fastq
+ .set { ch_filtered_reads }
+ ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions.first())
+ }
+
+ // Branch FastQ channels if 'auto' specified to infer strandedness
+ ch_filtered_reads
+ .branch {
+ meta, fastq ->
+ auto_strand : meta.strandedness == 'auto'
+ return [ meta, fastq ]
+ known_strand: meta.strandedness != 'auto'
+ return [ meta, fastq ]
+ }
+ .set { ch_strand_fastq }
+
+ //
+ // SUBWORKFLOW: Sub-sample FastQ files and pseudoalign with Salmon to auto-infer strandedness
+ //
+ // Return empty channel if ch_strand_fastq.auto_strand is empty so salmon index isn't created
+
+ ch_fasta
+ .combine(ch_strand_fastq.auto_strand)
+ .map { it.first() }
+ .first()
+ .set { ch_genome_fasta }
+
+ FASTQ_SUBSAMPLE_FQ_SALMON (
+ ch_strand_fastq.auto_strand,
+ ch_genome_fasta,
+ ch_transcript_fasta,
+ ch_gtf,
+ ch_salmon_index,
+ make_salmon_index
+ )
+ ch_versions = ch_versions.mix(FASTQ_SUBSAMPLE_FQ_SALMON.out.versions)
+
+ FASTQ_SUBSAMPLE_FQ_SALMON
+ .out
+ .json_info
+ .join(ch_strand_fastq.auto_strand)
+ .map { meta, json, reads ->
+ return [ meta + [ strandedness: getSalmonInferredStrandedness(json) ], reads ]
+ }
+ .mix(ch_strand_fastq.known_strand)
+ .set { ch_strand_inferred_fastq }
+
+ emit:
+
+ reads = ch_strand_inferred_fastq
+ trim_read_count = ch_trim_read_count
+
+ multiqc_files = ch_multiqc_files
+ versions = ch_versions // channel: [ versions.yml ]
+}
+
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
new file mode 100644
index 0000000..3dbb27e
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
@@ -0,0 +1,140 @@
+//
+// Read QC, UMI extraction and trimming
+//
+
+include { FASTQC as FASTQC_RAW } from '../../../modules/nf-core/fastqc/main'
+include { FASTQC as FASTQC_TRIM } from '../../../modules/nf-core/fastqc/main'
+include { UMITOOLS_EXTRACT } from '../../../modules/nf-core/umitools/extract/main'
+include { FASTP } from '../../../modules/nf-core/fastp/main'
+
+//
+// Function that parses fastp json output file to get total number of reads after trimming
+//
+import groovy.json.JsonSlurper
+
+def getFastpReadsAfterFiltering(json_file) {
+ def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary')
+ return json['after_filtering']['total_reads'].toLong()
+}
+
+workflow FASTQ_FASTQC_UMITOOLS_FASTP {
+ take:
+ reads // channel: [ val(meta), [ reads ] ]
+ skip_fastqc // boolean: true/false
+ with_umi // boolean: true/false
+ skip_umi_extract // boolean: true/false
+ umi_discard_read // integer: 0, 1 or 2
+ skip_trimming // boolean: true/false
+ adapter_fasta // file: adapter.fasta
+ save_trimmed_fail // boolean: true/false
+ save_merged // boolean: true/false
+ min_trimmed_reads // integer: > 0
+
+ main:
+ ch_versions = Channel.empty()
+ fastqc_raw_html = Channel.empty()
+ fastqc_raw_zip = Channel.empty()
+ if (!skip_fastqc) {
+ FASTQC_RAW (
+ reads
+ )
+ fastqc_raw_html = FASTQC_RAW.out.html
+ fastqc_raw_zip = FASTQC_RAW.out.zip
+ ch_versions = ch_versions.mix(FASTQC_RAW.out.versions.first())
+ }
+
+ umi_reads = reads
+ umi_log = Channel.empty()
+ if (with_umi && !skip_umi_extract) {
+ UMITOOLS_EXTRACT (
+ reads
+ )
+ umi_reads = UMITOOLS_EXTRACT.out.reads
+ umi_log = UMITOOLS_EXTRACT.out.log
+ ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first())
+
+ // Discard R1 / R2 if required
+ if (umi_discard_read in [1,2]) {
+ UMITOOLS_EXTRACT
+ .out
+ .reads
+ .map {
+ meta, reads ->
+ meta.single_end ? [ meta, reads ] : [ meta + [single_end: true], reads[umi_discard_read % 2] ]
+ }
+ .set { umi_reads }
+ }
+ }
+
+ trim_reads = umi_reads
+ trim_json = Channel.empty()
+ trim_html = Channel.empty()
+ trim_log = Channel.empty()
+ trim_reads_fail = Channel.empty()
+ trim_reads_merged = Channel.empty()
+ fastqc_trim_html = Channel.empty()
+ fastqc_trim_zip = Channel.empty()
+ trim_read_count = Channel.empty()
+ if (!skip_trimming) {
+ FASTP (
+ umi_reads,
+ adapter_fasta,
+ save_trimmed_fail,
+ save_merged
+ )
+ trim_json = FASTP.out.json
+ trim_html = FASTP.out.html
+ trim_log = FASTP.out.log
+ trim_reads_fail = FASTP.out.reads_fail
+ trim_reads_merged = FASTP.out.reads_merged
+ ch_versions = ch_versions.mix(FASTP.out.versions.first())
+
+ //
+ // Filter FastQ files based on minimum trimmed read count after adapter trimming
+ //
+ FASTP
+ .out
+ .reads
+ .join(trim_json)
+ .map { meta, reads, json -> [ meta, reads, getFastpReadsAfterFiltering(json) ] }
+ .set { ch_num_trimmed_reads }
+
+ ch_num_trimmed_reads
+ .filter { meta, reads, num_reads -> num_reads >= min_trimmed_reads.toLong() }
+ .map { meta, reads, num_reads -> [ meta, reads ] }
+ .set { trim_reads }
+
+ ch_num_trimmed_reads
+ .map { meta, reads, num_reads -> [ meta, num_reads ] }
+ .set { trim_read_count }
+
+ if (!skip_fastqc) {
+ FASTQC_TRIM (
+ trim_reads
+ )
+ fastqc_trim_html = FASTQC_TRIM.out.html
+ fastqc_trim_zip = FASTQC_TRIM.out.zip
+ ch_versions = ch_versions.mix(FASTQC_TRIM.out.versions.first())
+ }
+ }
+
+ emit:
+ reads = trim_reads // channel: [ val(meta), [ reads ] ]
+
+ fastqc_raw_html // channel: [ val(meta), [ html ] ]
+ fastqc_raw_zip // channel: [ val(meta), [ zip ] ]
+
+ umi_log // channel: [ val(meta), [ log ] ]
+
+ trim_json // channel: [ val(meta), [ json ] ]
+ trim_html // channel: [ val(meta), [ html ] ]
+ trim_log // channel: [ val(meta), [ log ] ]
+ trim_reads_fail // channel: [ val(meta), [ fastq.gz ] ]
+ trim_reads_merged // channel: [ val(meta), [ fastq.gz ] ]
+ trim_read_count // channel: [ val(meta), val(count) ]
+
+ fastqc_trim_html // channel: [ val(meta), [ html ] ]
+ fastqc_trim_zip // channel: [ val(meta), [ zip ] ]
+
+ versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ]
+}
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml
new file mode 100644
index 0000000..220e8db
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml
@@ -0,0 +1,128 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+# yaml-language-server: $schema=yaml-schema.json
+name: "fastq_fastqc_umitools_fastp"
+description: Read QC, UMI extraction and trimming
+keywords:
+ - fastq
+ - fastqc
+ - qc
+ - UMI
+ - trimming
+ - fastp
+components:
+ - fastqc
+ - umitools/extract
+ - fastp
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test' ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+ - skip_fastqc:
+ type: boolean
+ description: |
+ Skip fastqc process
+ - with_umi:
+ type: boolean
+ description: |
+ With or without umi detection
+ - skip_umi_extract:
+ type: boolean
+ description: |
+ With or without umi extrection
+ - umi_discard_read:
+ type: integer
+ description: |
+ Discard R1 / R2 if required
+ - skip_trimming:
+ type: boolean
+ description: |
+ Allows to skip trimgalore execution
+ - adapter_fasta:
+ type: file
+ description: |
+ Fasta file of adapter sequences
+ - save_trimmed_fail:
+ type: boolean
+ description: |
+ Save trimmed fastqs of failed samples
+ - save_merged:
+ type: boolean
+ description: |
+ Save merged fastqs
+ - min_trimmed_reads:
+ type: integer
+ description: |
+ Inputs with fewer than this reads will be filtered out of the "reads" output channel
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test' ]
+ - reads:
+ type: file
+ description: >
+ Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. |
+
+
+
+ For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
+ pattern: "*.{fastq.gz}"
+ - fastqc_html:
+ type: file
+ description: FastQC report
+ pattern: "*_{fastqc.html}"
+ - fastqc_zip:
+ type: file
+ description: FastQC report archive
+ pattern: "*_{fastqc.zip}"
+ - log:
+ type: file
+ description: Logfile for umi_tools
+ pattern: "*.{log}"
+ - trim_json:
+ type: file
+ description: FastP Trimming report
+ pattern: "*.{fastp.json}"
+ - trim_html:
+ type: file
+ description: FastP Trimming report
+ pattern: "*.{fastp.html}"
+ - log:
+ type: file
+ description: Logfile FastP
+ pattern: "*.{fastp.log}"
+ - trim_reads_fail:
+ type: file
+ description: Trimmed fastq files failing QC
+ pattern: "*.{fastq.gz}"
+ - trim_reads_merged:
+ type: file
+ description: Trimmed and merged fastq files
+ pattern: "*.{fastq.gz}"
+ - trim_read_count:
+ type: integer
+ description: Number of reads after trimming
+ - fastqc_trim_html:
+ type: file
+ description: FastQC report
+ pattern: "*_{fastqc.html}"
+ - fastqc_trim_zip:
+ type: file
+ description: FastQC report archive
+ pattern: "*_{fastqc.zip}"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@robsyme"
+maintainers:
+ - "@robsyme"
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test
new file mode 100644
index 0000000..cdd7398
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test
@@ -0,0 +1,60 @@
+nextflow_workflow {
+
+ name "Test Workflow FASTQ_FASTQC_UMITOOLS_FASTP"
+ script "../main.nf"
+ workflow "FASTQ_FASTQC_UMITOOLS_FASTP"
+ tag "subworkflows"
+ tag "subworkflows_nfcore"
+ tag "subworkflows/fastq_fastqc_umitools_fastp"
+ tag "fastq_fastqc_umitools_fastp"
+ tag "fastqc"
+ tag "umitools/extract"
+ tag "fastp"
+
+
+ test("sarscov2 paired-end [fastq]") {
+
+ when {
+ workflow {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+ ]
+ ]
+ input[1] = false // skip_fastqc
+ input[2] = false // with_umi
+ input[3] = false // skip_umi_extract
+ input[4] = 1 // umi_discard_read
+ input[5] = false // skip_trimming
+ input[6] = [] // adapter_fasta
+ input[7] = false // save_trimmed_fail
+ input[8] = false // save_merged
+ input[9] = 1 // min_trimmed_reads
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out.reads).match("reads") },
+ { assert snapshot(workflow.out.umi_log).match("umi_log") },
+ { assert snapshot(workflow.out.trim_json).match("trim_json") },
+ { assert snapshot(workflow.out.trim_reads_fail).match("trim_reads_fail") },
+ { assert snapshot(workflow.out.trim_reads_merged).match("trim_reads_merged") },
+ { assert snapshot(workflow.out.trim_read_count).match("trim_read_count") },
+ { assert snapshot(workflow.out.versions).match("versions") },
+
+ { assert workflow.out.fastqc_raw_html },
+ { assert workflow.out.fastqc_raw_zip },
+ { assert workflow.out.trim_html },
+ { assert workflow.out.trim_log },
+ { assert workflow.out.fastqc_trim_html },
+ { assert workflow.out.fastqc_trim_zip }
+ )
+ }
+ }
+}
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap
new file mode 100644
index 0000000..38a65ae
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap
@@ -0,0 +1,81 @@
+{
+ "trim_reads_merged": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-26T02:28:26.26920982"
+ },
+ "trim_reads_fail": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-26T02:28:26.25861515"
+ },
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,85bd0117e5778fff18e3920972a296ad",
+ "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0",
+ "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e"
+ ]
+ ],
+ "timestamp": "2023-11-26T02:28:26.30891403"
+ },
+ "trim_json": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd"
+ ]
+ ]
+ ],
+ "timestamp": "2023-11-26T02:28:26.24768259"
+ },
+ "reads": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7",
+ "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39"
+ ]
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T11:30:32.061644815"
+ },
+ "umi_log": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-26T02:28:26.238536"
+ },
+ "trim_read_count": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ 198
+ ]
+ ]
+ ],
+ "timestamp": "2023-11-26T02:28:26.27984169"
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml
new file mode 100644
index 0000000..84a4b56
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/fastq_fastqc_umitools_fastp:
+ - subworkflows/nf-core/fastq_fastqc_umitools_fastp/**
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf
new file mode 100644
index 0000000..db2e5b3
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf
@@ -0,0 +1,123 @@
+//
+// Read QC, UMI extraction and trimming
+//
+
+include { FASTQC } from '../../../modules/nf-core/fastqc/main'
+include { UMITOOLS_EXTRACT } from '../../../modules/nf-core/umitools/extract/main'
+include { TRIMGALORE } from '../../../modules/nf-core/trimgalore/main'
+
+//
+// Function that parses TrimGalore log output file to get total number of reads after trimming
+//
+def getTrimGaloreReadsAfterFiltering(log_file) {
+ def total_reads = 0
+ def filtered_reads = 0
+ log_file.eachLine { line ->
+ def total_reads_matcher = line =~ /([\d\.]+)\ssequences processed in total/
+ def filtered_reads_matcher = line =~ /shorter than the length cutoff[^:]+:\s([\d\.]+)/
+ if (total_reads_matcher) total_reads = total_reads_matcher[0][1].toFloat()
+ if (filtered_reads_matcher) filtered_reads = filtered_reads_matcher[0][1].toFloat()
+ }
+ return total_reads - filtered_reads
+}
+
+workflow FASTQ_FASTQC_UMITOOLS_TRIMGALORE {
+ take:
+ reads // channel: [ val(meta), [ reads ] ]
+ skip_fastqc // boolean: true/false
+ with_umi // boolean: true/false
+ skip_umi_extract // boolean: true/false
+ skip_trimming // boolean: true/false
+ umi_discard_read // integer: 0, 1 or 2
+ min_trimmed_reads // integer: > 0
+
+ main:
+ ch_versions = Channel.empty()
+ fastqc_html = Channel.empty()
+ fastqc_zip = Channel.empty()
+ if (!skip_fastqc) {
+ FASTQC (reads)
+ fastqc_html = FASTQC.out.html
+ fastqc_zip = FASTQC.out.zip
+ ch_versions = ch_versions.mix(FASTQC.out.versions.first())
+ }
+
+ umi_reads = reads
+ umi_log = Channel.empty()
+ if (with_umi && !skip_umi_extract) {
+ UMITOOLS_EXTRACT (reads)
+ umi_reads = UMITOOLS_EXTRACT.out.reads
+ umi_log = UMITOOLS_EXTRACT.out.log
+ ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first())
+
+ // Discard R1 / R2 if required
+ if (umi_discard_read in [1,2]) {
+ UMITOOLS_EXTRACT
+ .out
+ .reads
+ .map {
+ meta, reads ->
+ meta.single_end ? [ meta, reads ] : [ meta + ['single_end': true], reads[umi_discard_read % 2] ]
+ }
+ .set { umi_reads }
+ }
+ }
+
+ trim_reads = umi_reads
+ trim_unpaired = Channel.empty()
+ trim_html = Channel.empty()
+ trim_zip = Channel.empty()
+ trim_log = Channel.empty()
+ trim_read_count = Channel.empty()
+ if (!skip_trimming) {
+ TRIMGALORE (umi_reads)
+ trim_unpaired = TRIMGALORE.out.unpaired
+ trim_html = TRIMGALORE.out.html
+ trim_zip = TRIMGALORE.out.zip
+ trim_log = TRIMGALORE.out.log
+ ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first())
+
+ //
+ // Filter FastQ files based on minimum trimmed read count after adapter trimming
+ //
+ TRIMGALORE
+ .out
+ .reads
+ .join(trim_log, remainder: true)
+ .map {
+ meta, reads, trim_log ->
+ if (trim_log) {
+ num_reads = getTrimGaloreReadsAfterFiltering(meta.single_end ? trim_log : trim_log[-1])
+ [ meta, reads, num_reads ]
+ } else {
+ [ meta, reads, min_trimmed_reads.toFloat() + 1 ]
+ }
+ }
+ .set { ch_num_trimmed_reads }
+
+ ch_num_trimmed_reads
+ .filter { meta, reads, num_reads -> num_reads >= min_trimmed_reads.toFloat() }
+ .map { meta, reads, num_reads -> [ meta, reads ] }
+ .set { trim_reads }
+
+ ch_num_trimmed_reads
+ .map { meta, reads, num_reads -> [ meta, num_reads ] }
+ .set { trim_read_count }
+ }
+
+ emit:
+ reads = trim_reads // channel: [ val(meta), [ reads ] ]
+
+ fastqc_html // channel: [ val(meta), [ html ] ]
+ fastqc_zip // channel: [ val(meta), [ zip ] ]
+
+ umi_log // channel: [ val(meta), [ log ] ]
+
+ trim_unpaired // channel: [ val(meta), [ reads ] ]
+ trim_html // channel: [ val(meta), [ html ] ]
+ trim_zip // channel: [ val(meta), [ zip ] ]
+ trim_log // channel: [ val(meta), [ txt ] ]
+ trim_read_count // channel: [ val(meta), val(count) ]
+
+ versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ]
+}
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/meta.yml b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/meta.yml
new file mode 100644
index 0000000..a7df97f
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/meta.yml
@@ -0,0 +1,101 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "fastq_fastqc_umitools_trimgalore"
+description: Read QC, UMI extraction and trimming
+keywords:
+ - fastq
+ - fastqc
+ - qc
+ - UMI
+ - trimming
+ - trimgalore
+components:
+ - fastqc
+ - umitools/extract
+ - trimgalore
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test' ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+ - skip_fastqc:
+ type: boolean
+ description: |
+ Skip fastqc process
+ - with_umi:
+ type: boolean
+ description: |
+ With or without umi detection
+ - skip_umi_extract:
+ type: boolean
+ description: |
+ With or without umi extrection
+ - skip_trimming:
+ type: boolean
+ description: |
+ Allows to skip trimgalore execution
+ - umi_discard_read:
+ type: integer
+ description: |
+ Discard R1 / R2 if required
+ - min_trimmed_reads:
+ type: integer
+ description: |
+ Inputs with fewer than this reads will be filtered out of the "reads" output channel
+output:
+ - reads:
+ type: file
+ description: >
+ Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. |
+
+
+
+ For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
+ pattern: "*.{fastq.gz}"
+ - fastqc_html:
+ type: file
+ description: FastQC report
+ pattern: "*_{fastqc.html}"
+ - fastqc_zip:
+ type: file
+ description: FastQC report archive
+ pattern: "*_{fastqc.zip}"
+ - log:
+ type: file
+ description: Logfile for umi_tools
+ pattern: "*.{log}"
+ - trim_unpaired:
+ type: file
+ description: |
+ FastQ files containing unpaired reads from read 1 or read 2
+ pattern: "*unpaired*.fq.gz"
+ - trim_html:
+ type: file
+ description: FastQC report (optional)
+ pattern: "*_{fastqc.html}"
+ - trim_zip:
+ type: file
+ description: FastQC report archive (optional)
+ pattern: "*_{fastqc.zip}"
+ - trim_log:
+ type: file
+ description: Trim Galore! trimming report
+ pattern: "*_{report.txt}"
+ - trim_read_count:
+ type: integer
+ description: Number of reads remaining after trimming for all input samples
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@drpatelh"
+ - "@KamilMaliszArdigen"
+maintainers:
+ - "@drpatelh"
+ - "@KamilMaliszArdigen"
diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/main.nf b/subworkflows/nf-core/fastq_subsample_fq_salmon/main.nf
new file mode 100644
index 0000000..0ac3e53
--- /dev/null
+++ b/subworkflows/nf-core/fastq_subsample_fq_salmon/main.nf
@@ -0,0 +1,54 @@
+//
+// Sub-sample FastQ files and pseudo-align with Salmon
+// can be used to infer strandedness of library
+//
+
+include { SALMON_INDEX } from '../../../modules/nf-core/salmon/index/main'
+include { FQ_SUBSAMPLE } from '../../../modules/nf-core/fq/subsample/main'
+include { SALMON_QUANT } from '../../../modules/nf-core/salmon/quant/main'
+
+workflow FASTQ_SUBSAMPLE_FQ_SALMON {
+ take:
+ ch_reads // channel: [ val(meta), [ reads ] ]
+ ch_genome_fasta // channel: /path/to/genome.fasta
+ ch_transcript_fasta // channel: /path/to/transcript.fasta
+ ch_gtf // channel: /path/to/genome.gtf
+ ch_index // channel: /path/to/salmon/index/
+ make_index // boolean: Whether to create salmon index before running salmon quant
+
+ main:
+
+ ch_versions = Channel.empty()
+
+ //
+ // Create Salmon index if required
+ //
+ if (make_index) {
+ ch_index = SALMON_INDEX ( ch_genome_fasta, ch_transcript_fasta ).index
+ ch_versions = ch_versions.mix(SALMON_INDEX.out.versions)
+ }
+
+ //
+ // Sub-sample FastQ files with fq
+ //
+ FQ_SUBSAMPLE ( ch_reads )
+ ch_versions = ch_versions.mix(FQ_SUBSAMPLE.out.versions.first())
+
+ //
+ // Pseudo-alignment with Salmon
+ //
+ def lib_type = 'A'
+ def alignment_mode = false
+ SALMON_QUANT ( FQ_SUBSAMPLE.out.fastq, ch_index, ch_gtf, ch_transcript_fasta, alignment_mode, lib_type )
+ ch_versions = ch_versions.mix(SALMON_QUANT.out.versions.first())
+
+ emit:
+ index = ch_index // channel: [ index ]
+
+ reads = FQ_SUBSAMPLE.out.fastq // channel: [ val(meta), fastq ]
+
+ results = SALMON_QUANT.out.results // channel: [ val(meta), results_dir ]
+ json_info = SALMON_QUANT.out.json_info // channel: [ val(meta), json_info
+
+ versions = ch_versions // channel: [ versions.yml ]
+}
diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/meta.yml b/subworkflows/nf-core/fastq_subsample_fq_salmon/meta.yml
new file mode 100644
index 0000000..7e2f109
--- /dev/null
+++ b/subworkflows/nf-core/fastq_subsample_fq_salmon/meta.yml
@@ -0,0 +1,70 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "fastq_subsample_fq_salmon"
+description: Subsample fastq
+keywords:
+ - fastq
+ - subsample
+ - strandedness
+components:
+ - fq/subsample
+ - salmon/quant
+ - salmon/index
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test' ]
+ - ch_reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+ - ch_genome_fasta:
+ type: file
+ description: Genome fasta file
+ pattern: "Path to genome sequence in fasta format"
+ - ch_transcript_fasta:
+ type: file
+ description: Transcript fasta file
+ pattern: "Path to transcript sequence in fasta format"
+ - ch_gtf:
+ type: file
+ description: GTF features file
+ pattern: "Path features in GTF format"
+ - ch_index:
+ type: file
+ description: Salmon index files
+ pattern: "Directory containing Salmon index"
+ - make_index:
+ type: boolean
+ description: Whether to create salmon index before running salmon quant
+output:
+ - index:
+ type: directory
+ description: Directory containing salmon index
+ pattern: "salmon"
+ - reads:
+ type: file
+ description: Subsampled fastq reads.
+ pattern: "*.{fq,fastq}{,.gz}"
+ - results:
+ type: directory
+ description: Folder containing the quantification results for a specific sample
+ pattern: "${prefix}"
+ - json_info:
+ type: file
+ description: |
+ File containing meta information from Salmon quant
+ Which could be used to infer strandedness among other things
+ pattern: "*info.json"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@robsyme"
+ - "@drpatelh"
+maintainers:
+ - "@robsyme"
+ - "@drpatelh"
diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test
new file mode 100644
index 0000000..6342449
--- /dev/null
+++ b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test
@@ -0,0 +1,65 @@
+nextflow_workflow {
+
+ name "Test Workflow FASTQ_SUBSAMPLE_FQ_SALMON"
+ script "../main.nf"
+ workflow "FASTQ_SUBSAMPLE_FQ_SALMON"
+ config "./nextflow.config"
+ tag "subworkflows"
+ tag "subworkflows_nfcore"
+ tag "subworkflows/fastq_subsample_fq_salmon"
+ tag "fastq_subsample_fq_salmon"
+ tag "salmon/index"
+ tag "fq/subsample"
+ tag "salmon/quant"
+
+ test("homo_sapiens paired-end [fastq]") {
+
+ setup {
+ run("SALMON_INDEX") {
+ script "../../../../modules/nf-core/salmon/index/main.nf"
+ process {
+ """
+ input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // genome_fasta
+ input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/transcriptome.fasta', checkIfExists: true)) // transcriptome_fasta
+ """
+ }
+ }
+ }
+
+ when {
+ workflow {
+ """
+ make_index = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // genome_fasta
+ input[2] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/transcriptome.fasta', checkIfExists: true)) // transcriptome_fasta
+ input[3] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)) // genome_gtf
+ input[4] = SALMON_INDEX.out.index
+ input[5] = make_index
+ """
+ }
+ }
+
+ then {
+ def readlines1 = path(workflow.out.reads[0][1][0]).linesGzip
+ def readlines2 = path(workflow.out.reads[0][1][1]).linesGzip
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(readlines1[0..5]).match("test_reads_1_lines") },
+ { assert snapshot(readlines1.size()).match("test_reads_1_size") },
+ { assert snapshot(readlines2[0..5]).match("test_reads_2_lines") },
+ { assert snapshot(readlines2.size()).match("test_reads_2_size") },
+ { assert snapshot(workflow.out.versions).match("versions") },
+
+ { assert workflow.out.index },
+ { assert workflow.out.results },
+ { assert workflow.out.json_info }
+ )
+ }
+ }
+}
diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test.snap
new file mode 100644
index 0000000..afbe0b5
--- /dev/null
+++ b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test.snap
@@ -0,0 +1,49 @@
+{
+ "test_reads_1_size": {
+ "content": [
+ 1066944
+ ],
+ "timestamp": "2024-01-11T10:33:54.747992124"
+ },
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,12c0d1f67c2afb97470ae0974e5e01bb",
+ "versions.yml:md5,885fde9e7beac002b3a17b66b92db4bd"
+ ]
+ ],
+ "timestamp": "2023-11-26T16:41:10.396971682"
+ },
+ "test_reads_1_lines": {
+ "content": [
+ [
+ "@normal#21#998579#1/1",
+ "CCTTCTCCCTGCTGGGGTTGCTTGTCAGTAGCGGGCAAGGTAGGAGTGTGGCGCTTTATTGCATTTACTTTCCCTCCCCCTTCCCCCCGGCCAAGAGAGG",
+ "+",
+ "102302000331;3333;23133320233330*33/233333333333333/313232333/3;3;3/333000;11/00;;01//103*1032323233",
+ "@normal#21#998572#2/1",
+ "CTCCTCTCCTTCTACCTGCTGGGGTTGCTTGTCAGTAGCGGGCAAGGTCGGAGTGTTGCGCTTTATTGCATTTACTTTCCCTCCCCCTTCCACCCGGCCA"
+ ]
+ ],
+ "timestamp": "2024-01-11T10:33:54.730250665"
+ },
+ "test_reads_2_lines": {
+ "content": [
+ [
+ "@normal#21#998579#1/2",
+ "AAAAAAAAAGAAGAAGCAGAAGCTGTTTCCCTGGATATCCTGCTCACCGATTCCCCTCTCCAATTCTGTATTTTCCCTTCTCTTATTTAAGGGTCTCCAC",
+ "+",
+ "023333233332333310333302333211/3333;0300;*/;000/32;201003031/22;21333032;;11/23030322;2332333313/030",
+ "@normal#21#998572#2/2",
+ "TTCCCCTCTCCAATTGAGTATTTTCCCTTCTCTTATTTAAGGGTCTCCACACAAACAGATACAATTTTAGGGACAGCTAGGAGAAAGAACGAAAATAATAA"
+ ]
+ ],
+ "timestamp": "2024-01-11T10:33:54.756723613"
+ },
+ "test_reads_2_size": {
+ "content": [
+ 1066944
+ ],
+ "timestamp": "2024-01-11T10:33:54.763399473"
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/nextflow.config b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/nextflow.config
new file mode 100644
index 0000000..7fc4d63
--- /dev/null
+++ b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/nextflow.config
@@ -0,0 +1,9 @@
+process {
+
+ publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+
+ withName: FQ_SUBSAMPLE {
+ ext.args = '--record-count 1000000 --seed 1'
+ }
+
+}
diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/tags.yml b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/tags.yml
new file mode 100644
index 0000000..cc809c5
--- /dev/null
+++ b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/fastq_subsample_fq_salmon:
+ - subworkflows/nf-core/fastq_subsample_fq_salmon/**
diff --git a/tests/nextflow.config b/tests/nextflow.config
index ec574b3..c82fb21 100644
--- a/tests/nextflow.config
+++ b/tests/nextflow.config
@@ -4,4 +4,30 @@
========================================================================================
*/
+process {
+
+ // Impose sensible resource limits for testing
+
+ withName: '.*' {
+ cpus = 2
+ memory = 3.GB
+ time = 2.h
+ }
+
+ // Override modules.config so module snapshots match
+
+ withName: FQ_SUBSAMPLE {
+ ext.prefix = ''
+ }
+ withName: GFFREAD {
+ ext.args = null
+ }
+}
+
+params {
+ test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules'
+ modules_testdata_base_path = 's3://ngi-igenomes/testdata/nf-core/modules/'
+ hisat2_build_memory = '3.GB'
+}
+
includeConfig 'https://raw.githubusercontent.com/nf-core/modules/0094ae45ba8f5a2e30644d4e252970775a03ee91/tests/config/test_data.config'
diff --git a/workflows/riboseq.nf b/workflows/riboseq.nf
index 47abb81..e797e26 100644
--- a/workflows/riboseq.nf
+++ b/workflows/riboseq.nf
@@ -1,10 +1,29 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ GENOME PARAMETER VALUES
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta')
+params.transcript_fasta = WorkflowMain.getGenomeAttribute(params, 'transcript_fasta')
+params.additional_fasta = WorkflowMain.getGenomeAttribute(params, 'additional_fasta')
+params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf')
+params.gff = WorkflowMain.getGenomeAttribute(params, 'gff')
+params.gene_bed = WorkflowMain.getGenomeAttribute(params, 'bed12')
+params.bbsplit_index = WorkflowMain.getGenomeAttribute(params, 'bbsplit')
+params.star_index = WorkflowMain.getGenomeAttribute(params, 'star')
+params.hisat2_index = WorkflowMain.getGenomeAttribute(params, 'hisat2')
+params.rsem_index = WorkflowMain.getGenomeAttribute(params, 'rsem')
+params.salmon_index = WorkflowMain.getGenomeAttribute(params, 'salmon')
+params.kallisto_index = WorkflowMain.getGenomeAttribute(params, 'kallisto')
+
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
PRINT PARAMS SUMMARY
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation'
+include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation'
def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs)
def citation = '\n' + WorkflowMain.citation(workflow) + '\n'
@@ -13,8 +32,59 @@ def summary_params = paramsSummaryMap(workflow)
// Print parameter summary log to screen
log.info logo + paramsSummaryLog(workflow) + citation
+// Check if an AWS iGenome has been provided to use the appropriate version of STAR
+def is_aws_igenome = false
+if (params.fasta && params.gtf) {
+ if ((file(params.fasta).getName() - '.gz' == 'genome.fa') && (file(params.gtf).getName() - '.gz' == 'genes.gtf')) {
+ is_aws_igenome = true
+ }
+}
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ VALIDATE INPUTS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
WorkflowRiboseq.initialise(params, log)
+// Check rRNA databases for sortmerna
+if (params.remove_ribo_rna) {
+ ch_ribo_db = file(params.ribo_database_manifest)
+ if (ch_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${ch_ribo_db.getName()}!"}
+}
+
+// Check if file with list of fastas is provided when running BBSplit
+if (!params.skip_bbsplit && !params.bbsplit_index && params.bbsplit_fasta_list) {
+ ch_bbsplit_fasta_list = file(params.bbsplit_fasta_list)
+ if (ch_bbsplit_fasta_list.isEmpty()) {exit 1, "File provided with --bbsplit_fasta_list is empty: ${ch_bbsplit_fasta_list.getName()}!"}
+}
+
+// Check alignment parameters
+def prepareToolIndices = []
+if (!params.skip_bbsplit) { prepareToolIndices << 'bbsplit' }
+if (!params.skip_alignment) { prepareToolIndices << params.aligner }
+if (!params.skip_pseudo_alignment && params.pseudo_aligner) { prepareToolIndices << params.pseudo_aligner }
+
+// Determine whether to filter the GTF or not
+def filterGtf =
+ ((
+ // Condition 1: Alignment is required and aligner is set
+ !params.skip_alignment && params.aligner
+ ) ||
+ (
+ // Condition 2: Pseudoalignment is required and pseudoaligner is set
+ !params.skip_pseudo_alignment && params.pseudo_aligner
+ ) ||
+ (
+ // Condition 3: Transcript FASTA file is not provided
+ !params.transcript_fasta
+ )) &&
+ (
+ // Condition 4: --skip_gtf_filter is not provided
+ !params.skip_gtf_filter
+ )
+
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CONFIG FILES
@@ -35,7 +105,8 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil
//
// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
//
-include { INPUT_CHECK } from '../subworkflows/local/input_check'
+include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome'
+include { PREPROCESS_RNASEQ } from '../subworkflows/local/preprocess_rnaseq'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -46,7 +117,6 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check'
//
// MODULE: Installed directly from nf-core/modules
//
-include { FASTQC } from '../modules/nf-core/fastqc/main'
include { MULTIQC } from '../modules/nf-core/multiqc/main'
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
@@ -61,53 +131,114 @@ def multiqc_report = []
workflow RIBOSEQ {
- ch_versions = Channel.empty()
-
+ ch_versions = Channel.empty()
+ ch_multiqc_files = Channel.empty()
+
//
- // SUBWORKFLOW: Read in samplesheet, validate and stage input files
+ // SUBWORKFLOW: Uncompress and prepare reference genome files
//
- INPUT_CHECK (
- file(params.input)
+ def biotype = params.gencode ? "gene_type" : params.featurecounts_group_type
+ PREPARE_GENOME (
+ params.fasta,
+ params.gtf,
+ params.gff,
+ params.additional_fasta,
+ params.transcript_fasta,
+ params.gene_bed,
+ params.splicesites,
+ params.bbsplit_fasta_list,
+ params.star_index,
+ params.rsem_index,
+ params.salmon_index,
+ params.kallisto_index,
+ params.hisat2_index,
+ params.bbsplit_index,
+ params.gencode,
+ is_aws_igenome,
+ biotype,
+ prepareToolIndices,
+ filterGtf
)
- ch_versions = ch_versions.mix(INPUT_CHECK.out.versions)
- // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input")
- // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/
- // ! There is currently no tooling to help you write a sample sheet schema
+ ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions)
+
+ // Check if contigs in genome fasta file > 512 Mbp
+ if (!params.skip_alignment && !params.bam_csi_index) {
+ PREPARE_GENOME
+ .out
+ .fai
+ .map { WorkflowRiboseq.checkMaxContigSize(it, log) }
+ }
//
- // MODULE: Run FastQC
+ // Create input channel from input file provided through params.input
//
- FASTQC (
- INPUT_CHECK.out.reads
+ Channel
+ .fromSamplesheet("input")
+ .map {
+ meta, fastq_1, fastq_2 ->
+ if (!fastq_2) {
+ return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ]
+ } else {
+ return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ]
+ }
+ }
+ .groupTuple()
+ .map {
+ WorkflowRiboseq.validateInput(it)
+ }
+ .set { ch_fastq }
+
+ PREPROCESS_RNASEQ (
+ ch_fastq,
+ PREPARE_GENOME.out.fasta,
+ PREPARE_GENOME.out.transcript_fasta,
+ PREPARE_GENOME.out.gtf,
+ PREPARE_GENOME.out.salmon_index,
+ !params.salmon_index && !('salmon' in prepareToolIndices),
+ params.skip_bbsplit,
+ PREPARE_GENOME.out.bbsplit_index,
+ params.skip_fastqc || params.skip_qc,
+ params.skip_trimming,
+ params.trimmer,
+ params.min_trimmed_reads,
+ params.save_trimmed,
+ params.remove_ribo_rna,
+ ch_ribo_db,
+ params.with_umi,
+ params.skip_umi_extract,
+ params.umi_discard_read
)
- ch_versions = ch_versions.mix(FASTQC.out.versions.first())
+ ch_multiqc_files = ch_multiqc_files.mix(PREPROCESS_RNASEQ.out.multiqc_files)
+ ch_versions = ch_versions.mix(PREPROCESS_RNASEQ.out.versions)
CUSTOM_DUMPSOFTWAREVERSIONS (
ch_versions.unique().collectFile(name: 'collated_versions.yml')
)
+ ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml)
//
// MODULE: MultiQC
//
- workflow_summary = WorkflowRiboseq.paramsSummaryMultiqc(workflow, summary_params)
- ch_workflow_summary = Channel.value(workflow_summary)
+ if (!params.skip_multiqc) {
+ workflow_summary = WorkflowRiboseq.paramsSummaryMultiqc(workflow, summary_params)
+ ch_workflow_summary = Channel.value(workflow_summary)
- methods_description = WorkflowRiboseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params)
- ch_methods_description = Channel.value(methods_description)
+ methods_description = WorkflowRiboseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params)
+ ch_methods_description = Channel.value(methods_description)
- ch_multiqc_files = Channel.empty()
- ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
- ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
- ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
- ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
-
- MULTIQC (
- ch_multiqc_files.collect(),
- ch_multiqc_config.toList(),
- ch_multiqc_custom_config.toList(),
- ch_multiqc_logo.toList()
- )
- multiqc_report = MULTIQC.out.report.toList()
+ ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
+ ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
+ ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
+
+ MULTIQC (
+ ch_multiqc_files.collect(),
+ ch_multiqc_config.toList(),
+ ch_multiqc_custom_config.toList(),
+ ch_multiqc_logo.toList()
+ )
+ multiqc_report = MULTIQC.out.report.toList()
+
+ }
}
/*
|