From bd462262c3d12aeda948add55db6a59dae7833db Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 24 Jan 2025 14:37:56 +0100 Subject: [PATCH] Start replacing custom Bowtie2 modules with official nf-core ones --- modules.json | 10 + modules/nf-core/bowtie2/align/environment.yml | 7 + modules/nf-core/bowtie2/align/main.nf | 116 ++++ modules/nf-core/bowtie2/align/meta.yml | 132 ++++ .../bowtie2/align/tests/cram_crai.config | 5 + .../bowtie2/align/tests/large_index.config | 5 + .../nf-core/bowtie2/align/tests/main.nf.test | 623 ++++++++++++++++++ .../bowtie2/align/tests/main.nf.test.snap | 311 +++++++++ .../nf-core/bowtie2/align/tests/sam.config | 5 + .../nf-core/bowtie2/align/tests/sam2.config | 5 + modules/nf-core/bowtie2/align/tests/tags.yml | 2 + modules/nf-core/bowtie2/build/environment.yml | 5 + modules/nf-core/bowtie2/build/main.nf | 42 ++ modules/nf-core/bowtie2/build/meta.yml | 49 ++ .../nf-core/bowtie2/build/tests/main.nf.test | 31 + .../bowtie2/build/tests/main.nf.test.snap | 45 ++ modules/nf-core/bowtie2/build/tests/tags.yml | 2 + subworkflows/local/binning_preparation.nf | 45 +- subworkflows/local/shortread_preprocessing.nf | 38 +- 19 files changed, 1439 insertions(+), 39 deletions(-) create mode 100644 modules/nf-core/bowtie2/align/environment.yml create mode 100644 modules/nf-core/bowtie2/align/main.nf create mode 100644 modules/nf-core/bowtie2/align/meta.yml create mode 100644 modules/nf-core/bowtie2/align/tests/cram_crai.config create mode 100644 modules/nf-core/bowtie2/align/tests/large_index.config create mode 100644 modules/nf-core/bowtie2/align/tests/main.nf.test create mode 100644 modules/nf-core/bowtie2/align/tests/main.nf.test.snap create mode 100644 modules/nf-core/bowtie2/align/tests/sam.config create mode 100644 modules/nf-core/bowtie2/align/tests/sam2.config create mode 100644 modules/nf-core/bowtie2/align/tests/tags.yml create mode 100644 modules/nf-core/bowtie2/build/environment.yml create mode 100644 modules/nf-core/bowtie2/build/main.nf create mode 100644 modules/nf-core/bowtie2/build/meta.yml create mode 100644 modules/nf-core/bowtie2/build/tests/main.nf.test create mode 100644 modules/nf-core/bowtie2/build/tests/main.nf.test.snap create mode 100644 modules/nf-core/bowtie2/build/tests/tags.yml diff --git a/modules.json b/modules.json index 05e3b3dd..d241aa28 100644 --- a/modules.json +++ b/modules.json @@ -36,6 +36,16 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "bowtie2/align": { + "branch": "master", + "git_sha": "0e9cb409c32d3ec4f0d3804588e4778971c09b7e", + "installed_by": ["modules"] + }, + "bowtie2/build": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "cat/fastq": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", diff --git a/modules/nf-core/bowtie2/align/environment.yml b/modules/nf-core/bowtie2/align/environment.yml new file mode 100644 index 00000000..9090f218 --- /dev/null +++ b/modules/nf-core/bowtie2/align/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bowtie2=2.5.2 + - bioconda::samtools=1.18 + - conda-forge::pigz=2.6 diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf new file mode 100644 index 00000000..7b2f25eb --- /dev/null +++ b/modules/nf-core/bowtie2/align/main.nf @@ -0,0 +1,116 @@ +process BOWTIE2_ALIGN { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6-0' : + 'biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6-0' }" + + input: + tuple val(meta) , path(reads) + tuple val(meta2), path(index) + tuple val(meta3), path(fasta) + val save_unaligned + val sort_bam + + output: + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram , optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + tuple val(meta), path("*.crai") , emit: crai , optional:true + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*fastq.gz") , emit: fastq , optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: "" + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + + def unaligned = "" + def reads_args = "" + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-U ${reads}" + } else { + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-1 ${reads[0]} -2 ${reads[1]}" + } + + def samtools_command = sort_bam ? 'sort' : 'view' + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"` + [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 + + bowtie2 \\ + -x \$INDEX \\ + $reads_args \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> >(tee ${prefix}.bowtie2.log >&2) \\ + | samtools $samtools_command $args2 --threads $task.cpus ${reference} -o ${prefix}.${extension} - + + if [ -f ${prefix}.unmapped.fastq.1.gz ]; then + mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz + fi + + if [ -f ${prefix}.unmapped.fastq.2.gz ]; then + mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + def create_unmapped = "" + if (meta.single_end) { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped.fastq.gz" : "" + } else { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped_1.fastq.gz && touch ${prefix}.unmapped_2.fastq.gz" : "" + } + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } + + """ + touch ${prefix}.${extension} + ${create_index} + touch ${prefix}.bowtie2.log + ${create_unmapped} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/bowtie2/align/meta.yml b/modules/nf-core/bowtie2/align/meta.yml new file mode 100644 index 00000000..7436097b --- /dev/null +++ b/modules/nf-core/bowtie2/align/meta.yml @@ -0,0 +1,132 @@ +name: bowtie2_align +description: Align reads to a reference genome using bowtie2 +keywords: + - align + - map + - fasta + - fastq + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Bowtie2 genome fasta file + pattern: "*.fasta" + - - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - sam: + - meta: + type: file + description: Output SAM file containing read alignments + pattern: "*.sam" + - "*.sam": + type: file + description: Output SAM file containing read alignments + pattern: "*.sam" + - bam: + - meta: + type: file + description: Output BAM file containing read alignments + pattern: "*.bam" + - "*.bam": + type: file + description: Output BAM file containing read alignments + pattern: "*.bam" + - cram: + - meta: + type: file + description: Output CRAM file containing read alignments + pattern: "*.cram" + - "*.cram": + type: file + description: Output CRAM file containing read alignments + pattern: "*.cram" + - csi: + - meta: + type: file + description: Output SAM/BAM index for large inputs + pattern: "*.csi" + - "*.csi": + type: file + description: Output SAM/BAM index for large inputs + pattern: "*.csi" + - crai: + - meta: + type: file + description: Output CRAM index + pattern: "*.crai" + - "*.crai": + type: file + description: Output CRAM index + pattern: "*.crai" + - log: + - meta: + type: file + description: Alignment log + pattern: "*.log" + - "*.log": + type: file + description: Alignment log + pattern: "*.log" + - fastq: + - meta: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - "*fastq.gz": + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/align/tests/cram_crai.config b/modules/nf-core/bowtie2/align/tests/cram_crai.config new file mode 100644 index 00000000..03f1d5e5 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/cram_crai.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_ALIGN { + ext.args2 = '--output-fmt cram --write-index' + } +} diff --git a/modules/nf-core/bowtie2/align/tests/large_index.config b/modules/nf-core/bowtie2/align/tests/large_index.config new file mode 100644 index 00000000..fdc1c59d --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/large_index.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_BUILD { + ext.args = '--large-index' + } +} \ No newline at end of file diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test b/modules/nf-core/bowtie2/align/tests/main.nf.test new file mode 100644 index 00000000..0de5950f --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test @@ -0,0 +1,623 @@ +nextflow_process { + + name "Test Process BOWTIE2_ALIGN" + script "../main.nf" + process "BOWTIE2_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "bowtie2" + tag "bowtie2/build" + tag "bowtie2/align" + + test("sarscov2 - fastq, index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam") { + + config "./sam.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam2") { + + config "./sam2.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, true - cram") { + + config "./cram_crai.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = true //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.cram[0][1]).name, + file(process.out.crai[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test.snap b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap new file mode 100644 index 00000000..028e7da6 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap @@ -0,0 +1,311 @@ +{ + "sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam": { + "content": [ + "test.bam", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bowtie2.log:md5,bd89ce1b28c93bf822bae391ffcedd19" + ] + ], + [ + + ], + [ + "versions.yml:md5,01d18ab035146ea790e9a0f70adb758f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T13:19:25.337323" + }, + "sarscov2 - fastq, index, fasta, false, false - sam2": { + "content": [ + [ + "ERR5069949.2151832\t16\tMT192765.1\t17453\t42\t150M\t*\t0\t0\tACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGA\tAAAA versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + mkdir bowtie2 + touch bowtie2/${fasta.baseName}.{1..4}.bt2 + touch bowtie2/${fasta.baseName}.rev.{1,2}.bt2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bowtie2/build/meta.yml b/modules/nf-core/bowtie2/build/meta.yml new file mode 100644 index 00000000..2729a92e --- /dev/null +++ b/modules/nf-core/bowtie2/build/meta.yml @@ -0,0 +1,49 @@ +name: bowtie2_build +description: Builds bowtie index for reference genome +keywords: + - build + - index + - fasta + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file +output: + - index: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - bowtie2: + type: file + description: Bowtie2 genome index files + pattern: "*.bt2" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/build/tests/main.nf.test b/modules/nf-core/bowtie2/build/tests/main.nf.test new file mode 100644 index 00000000..16376025 --- /dev/null +++ b/modules/nf-core/bowtie2/build/tests/main.nf.test @@ -0,0 +1,31 @@ +nextflow_process { + + name "Test Process BOWTIE2_BUILD" + script "modules/nf-core/bowtie2/build/main.nf" + process "BOWTIE2_BUILD" + tag "modules" + tag "modules_nfcore" + tag "bowtie2" + tag "bowtie2/build" + + test("Should run without failures") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/nf-core/bowtie2/build/tests/main.nf.test.snap b/modules/nf-core/bowtie2/build/tests/main.nf.test.snap new file mode 100644 index 00000000..6875e021 --- /dev/null +++ b/modules/nf-core/bowtie2/build/tests/main.nf.test.snap @@ -0,0 +1,45 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.1.bt2:md5,cbe3d0bbea55bc57c99b4bfa25b5fbdf", + "genome.2.bt2:md5,47b153cd1319abc88dda532462651fcf", + "genome.3.bt2:md5,4ed93abba181d8dfab2e303e33114777", + "genome.4.bt2:md5,c25be5f8b0378abf7a58c8a880b87626", + "genome.rev.1.bt2:md5,52be6950579598a990570fbcf5372184", + "genome.rev.2.bt2:md5,e3b4ef343dea4dd571642010a7d09597" + ] + ] + ], + "1": [ + "versions.yml:md5,1df11e9b82891527271c889c880d3974" + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.1.bt2:md5,cbe3d0bbea55bc57c99b4bfa25b5fbdf", + "genome.2.bt2:md5,47b153cd1319abc88dda532462651fcf", + "genome.3.bt2:md5,4ed93abba181d8dfab2e303e33114777", + "genome.4.bt2:md5,c25be5f8b0378abf7a58c8a880b87626", + "genome.rev.1.bt2:md5,52be6950579598a990570fbcf5372184", + "genome.rev.2.bt2:md5,e3b4ef343dea4dd571642010a7d09597" + ] + ] + ], + "versions": [ + "versions.yml:md5,1df11e9b82891527271c889c880d3974" + ] + } + ], + "timestamp": "2023-11-23T11:51:01.107681997" + } +} \ No newline at end of file diff --git a/modules/nf-core/bowtie2/build/tests/tags.yml b/modules/nf-core/bowtie2/build/tests/tags.yml new file mode 100644 index 00000000..81aa61da --- /dev/null +++ b/modules/nf-core/bowtie2/build/tests/tags.yml @@ -0,0 +1,2 @@ +bowtie2/build: + - modules/nf-core/bowtie2/build/** diff --git a/subworkflows/local/binning_preparation.nf b/subworkflows/local/binning_preparation.nf index 60f63a26..f885c3db 100644 --- a/subworkflows/local/binning_preparation.nf +++ b/subworkflows/local/binning_preparation.nf @@ -2,50 +2,49 @@ * Binning preparation with Bowtie2 */ -include { BOWTIE2_ASSEMBLY_BUILD } from '../../modules/local/bowtie2_assembly_build' -include { BOWTIE2_ASSEMBLY_ALIGN } from '../../modules/local/bowtie2_assembly_align' +include { BOWTIE2_BUILD as BOWTIE2_ASSEMBLY_BUILD } from '../../modules/nf-core/bowtie2/build' +include { BOWTIE2_ALIGN as BOWTIE2_ASSEMBLY_ALIGN } from '../../modules/nf-core/bowtie2/align' workflow BINNING_PREPARATION { take: - assemblies // channel: [ val(meta), path(assembly) ] - reads // channel: [ val(meta), [ reads ] ] + assemblies // channel: [ val(meta), path(assembly) ] + reads // channel: [ val(meta), [ reads ] ] main: // build bowtie2 index for all assemblies - BOWTIE2_ASSEMBLY_BUILD ( assemblies ) + BOWTIE2_ASSEMBLY_BUILD(assemblies) // combine assemblies with sample reads for binning depending on specified mapping mode - if (params.binning_map_mode == 'all'){ + if (params.binning_map_mode == 'all') { // combine assemblies with reads of all samples - ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.assembly_index - .combine(reads) - } else if (params.binning_map_mode == 'group'){ + ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.index.combine(reads) + } + else if (params.binning_map_mode == 'group') { // combine assemblies with reads of samples from same group - ch_reads_bowtie2 = reads.map{ meta, reads -> [ meta.group, meta, reads ] } - ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.assembly_index - .map { meta, assembly, index -> [ meta.group, meta, assembly, index ] } + ch_reads_bowtie2 = reads.map { meta, fastq -> [meta.group, meta, fastq] } + ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.index + .map { meta, assembly, index -> [meta.group, meta, assembly, index] } .combine(ch_reads_bowtie2, by: 0) - .map { group, assembly_meta, assembly, index, reads_meta, reads -> [ assembly_meta, assembly, index, reads_meta, reads ] } - - } else { + .map { _group, assembly_meta, assembly, index, reads_meta, fastq -> [assembly_meta, assembly, index, reads_meta, fastq] } + } + else { // i.e. --binning_map_mode 'own' // combine assemblies (not co-assembled) with reads from own sample - ch_reads_bowtie2 = reads.map{ meta, reads -> [ meta.id, meta, reads ] } - ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.assembly_index - .map { meta, assembly, index -> [ meta.id, meta, assembly, index ] } + ch_reads_bowtie2 = reads.map { meta, fastq -> [meta.id, meta, fastq] } + ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.index + .map { meta, assembly, index -> [meta.id, meta, assembly, index] } .combine(ch_reads_bowtie2, by: 0) - .map { id, assembly_meta, assembly, index, reads_meta, reads -> [ assembly_meta, assembly, index, reads_meta, reads ] } - + .map { _id, assembly_meta, assembly, index, reads_meta, fastq -> [assembly_meta, assembly, index, reads_meta, fastq] } } - BOWTIE2_ASSEMBLY_ALIGN ( ch_bowtie2_input ) + BOWTIE2_ASSEMBLY_ALIGN(ch_bowtie2_input) // group mappings for one assembly ch_grouped_mappings = BOWTIE2_ASSEMBLY_ALIGN.out.mappings .groupTuple(by: 0) - .map { meta, assembly, bams, bais -> [ meta, assembly.sort()[0], bams, bais ] } // multiple symlinks to the same assembly -> use first of sorted list + .map { meta, assembly, bams, bais -> [meta, assembly.sort()[0], bams, bais] } emit: - bowtie2_assembly_multiqc = BOWTIE2_ASSEMBLY_ALIGN.out.log.map { assembly_meta, reads_meta, log -> [ log ] } + bowtie2_assembly_multiqc = BOWTIE2_ASSEMBLY_ALIGN.out.log.map { _assembly_meta, _reads_meta, log -> [log] } bowtie2_version = BOWTIE2_ASSEMBLY_ALIGN.out.versions grouped_mappings = ch_grouped_mappings } diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index abcee2c3..9a633b8f 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -2,18 +2,18 @@ * SHORTREAD_PREPROCESSING: Preprocessing and QC for short reads */ -include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/fastqc/main' -include { FASTQC as FASTQC_TRIMMED } from '../../modules/nf-core/fastqc/main' -include { FASTP } from '../../modules/nf-core/fastp/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../../modules/nf-core/adapterremoval/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../../modules/nf-core/adapterremoval/main' -include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../../modules/local/bowtie2_removal_build' -include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../../modules/local/bowtie2_removal_align' -include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../../modules/local/bowtie2_removal_build' -include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../../modules/local/bowtie2_removal_align' -include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main' -include { SEQTK_MERGEPE } from '../../modules/nf-core/seqtk/mergepe/main' -include { BBMAP_BBNORM } from '../../modules/nf-core/bbmap/bbnorm/main' +include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_TRIMMED } from '../../modules/nf-core/fastqc/main' +include { FASTP } from '../../modules/nf-core/fastp/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../../modules/nf-core/adapterremoval/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../../modules/nf-core/adapterremoval/main' +include { BOWTIE2_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../../modules/nf-core/bowtie2/build/main.nf' +include { BOWTIE2_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../../modules/nf-core/bowtie2/align/main.nf' +include { BOWTIE2_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../../modules/nf-core/bowtie2/build/main.nf' +include { BOWTIE2_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../../modules/nf-core/bowtie2/align/main.nf' +include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main' +include { SEQTK_MERGEPE } from '../../modules/nf-core/seqtk/mergepe/main' +include { BBMAP_BBNORM } from '../../modules/nf-core/bbmap/bbnorm/main' workflow SHORTREAD_PREPROCESSING { take: @@ -75,7 +75,7 @@ workflow SHORTREAD_PREPROCESSING { } else { BOWTIE2_HOST_REMOVAL_BUILD( - ch_host_fasta + ch_host_fasta.map { fasta -> [[id: fasta.getSimpleName()], fasta] } ) ch_host_bowtie2index = BOWTIE2_HOST_REMOVAL_BUILD.out.index } @@ -88,8 +88,11 @@ workflow SHORTREAD_PREPROCESSING { BOWTIE2_HOST_REMOVAL_ALIGN( ch_short_reads_prepped, ch_host_bowtie2index, + [[:], []], + true, + false, ) - ch_short_reads_hostremoved = BOWTIE2_HOST_REMOVAL_ALIGN.out.reads + ch_short_reads_hostremoved = BOWTIE2_HOST_REMOVAL_ALIGN.out.fastq ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.versions.first()) ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.log) } @@ -99,13 +102,16 @@ workflow SHORTREAD_PREPROCESSING { if (!params.keep_phix) { BOWTIE2_PHIX_REMOVAL_BUILD( - ch_phix_db_file + ch_phix_db_file.map { fasta -> [[id: fasta.getSimpleName()], fasta] } ) BOWTIE2_PHIX_REMOVAL_ALIGN( ch_short_reads_hostremoved, BOWTIE2_PHIX_REMOVAL_BUILD.out.index, + [[:], []], + true, + false, ) - ch_short_reads_phixremoved = BOWTIE2_PHIX_REMOVAL_ALIGN.out.reads + ch_short_reads_phixremoved = BOWTIE2_PHIX_REMOVAL_ALIGN.out.fastq ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.versions.first()) ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.log) }