diff --git a/CHANGELOG.md b/CHANGELOG.md index 13cc42f4..a5c654e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- [#243](https://github.com/nf-core/funcscan/pull/243) Compress the ampcombi_complete_summary.csv in the output directory (by @louperelo) - [#237](https://github.com/nf-core/funcscan/pull/237) Reactivate DeepARG automatic database downloading and CI tests as server is now back up. (by @jfy133) ### `Dependencies` diff --git a/LICENSE b/LICENSE index 076b7ed6..a5c91c03 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) Jasmin Frangenberg, Anan Ibrahim, James A. Fellows Yates +Copyright (c) Jasmin Frangenberg, Anan Ibrahim, Louisa Perelo, Moritz E. Beber, James A. Fellows Yates Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/conf/modules.config b/conf/modules.config index 345e4b64..89c62af3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -454,6 +454,14 @@ process { ext.args = "--tooldict '${ext.tooldict}' --cutoff ${params.amp_ampcombi_cutoff}" } + withName: TABIX_BGZIP { + publishDir = [ + path: { "${params.outdir}/reports/ampcombi" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: COMBGC { publishDir = [ path: { "${params.outdir}/reports/combgc" }, diff --git a/docs/output.md b/docs/output.md index 2c25b1fe..84cccff2 100644 --- a/docs/output.md +++ b/docs/output.md @@ -384,7 +384,7 @@ Output Summaries: Output files - `ampcombi/` - - `ampcombi_complete_summary.csv`: summarised output from all AMP workflow tools (except hmmer_hmmsearch) in csv format + - `ampcombi_complete_summary.csv.gz`: summarised output from all AMP workflow tools (except hmmer_hmmsearch) in compressed csv format - `ampcombi.log`: a log file generated by ampcombi - `*_ampcombi.csv`: summarised output in csv for each sample - `*_amp.faa*`: fasta file containing the amino acid sequences for all AMP hits for each sample diff --git a/modules.json b/modules.json index 5dcad8af..9cd43e1c 100644 --- a/modules.json +++ b/modules.json @@ -166,6 +166,11 @@ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", "installed_by": ["modules"] }, + "tabix/bgzip": { + "branch": "master", + "git_sha": "90294980a903ecebd99ac31d8b6c66af48fa8259", + "installed_by": ["modules"] + }, "untar": { "branch": "master", "git_sha": "cc1f997fab6d8fde5dc0e6e2a310814df5b53ce7", diff --git a/modules/nf-core/tabix/bgzip/main.nf b/modules/nf-core/tabix/bgzip/main.nf new file mode 100644 index 00000000..6dd4e202 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/main.nf @@ -0,0 +1,54 @@ +process TABIX_BGZIP { + tag "$meta.id" + label 'process_single' + + conda "bioconda::tabix=1.11" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : + 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("${output}") , emit: output + tuple val(meta), path("${output}.gzi"), emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) + extension = in_bgzip ? input.getBaseName().tokenize(".")[-1] : input.getExtension() + output = in_bgzip ? "${prefix}.${extension}" : "${prefix}.${extension}.gz" + command = in_bgzip ? '-d' : '' + // Name the index according to $prefix, unless a name has been requested + if ((args.matches("(^| )-i\\b") || args.matches("(^| )--index(\$| )")) && !args.matches("(^| )-I\\b") && !args.matches("(^| )--index-name\\b")) { + args = args + " -I ${output}.gzi" + } + """ + bgzip $command -c $args -@${task.cpus} $input > ${output} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) + output = in_bgzip ? input.getBaseName() : "${prefix}.${input.getExtension()}.gz" + + """ + touch ${output} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tabix/bgzip/meta.yml b/modules/nf-core/tabix/bgzip/meta.yml new file mode 100644 index 00000000..c3ea2107 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/meta.yml @@ -0,0 +1,47 @@ +name: tabix_bgzip +description: Compresses/decompresses files +keywords: + - compress + - decompress + - bgzip + - tabix +tools: + - bgzip: + description: | + Bgzip compresses or decompresses files in a similar manner to, and compatible with, gzip. + homepage: https://www.htslib.org/doc/tabix.html + documentation: http://www.htslib.org/doc/bgzip.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: file to compress or to decompress +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - output: + type: file + description: Output compressed/decompressed file + pattern: "*." + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" + - "@nvnieuwk" diff --git a/nextflow.config b/nextflow.config index f8562c09..72180a81 100644 --- a/nextflow.config +++ b/nextflow.config @@ -339,7 +339,7 @@ dag { manifest { name = 'nf-core/funcscan' - author = """Jasmin Frangenberg, Anan Ibrahim, James A. Fellows Yates""" + author = """Jasmin Frangenberg, Anan Ibrahim, Louisa Perelo, Moritz E. Beber, James A. Fellows Yates""" homePage = 'https://github.com/nf-core/funcscan' description = """Pipeline for screening for functional components of assembled contigs""" mainScript = 'main.nf' diff --git a/subworkflows/local/amp.nf b/subworkflows/local/amp.nf index faed052c..5d24cda1 100644 --- a/subworkflows/local/amp.nf +++ b/subworkflows/local/amp.nf @@ -8,6 +8,8 @@ include { AMPLIFY_PREDICT } from '../. include { AMPIR } from '../../modules/nf-core/ampir/main' include { AMPCOMBI } from '../../modules/nf-core/ampcombi/main' include { GUNZIP as GUNZIP_MACREL ; GUNZIP as GUNZIP_HMMER } from '../../modules/nf-core/gunzip/main' +include { TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main' + workflow AMP { take: @@ -96,7 +98,11 @@ workflow AMP { input: [ it[0] ] summary: it[1] } - ch_ampcombi_summaries_out.summary.collectFile(name: 'ampcombi_complete_summary.csv', storeDir: "${params.outdir}/reports/ampcombi", keepHeader:true) + + ch_tabix_input = Channel.of(['id':'ampcombi_complete_summary']) + .combine(ch_ampcombi_summaries_out.summary.collectFile(name: 'ampcombi_complete_summary.csv', keepHeader:true)) + + TABIX_BGZIP(ch_tabix_input) emit: versions = ch_versions