diff --git a/subworkflows/local/annotation.nf b/subworkflows/local/annotation.nf index c1c8e332..a59fe561 100644 --- a/subworkflows/local/annotation.nf +++ b/subworkflows/local/annotation.nf @@ -19,72 +19,75 @@ workflow ANNOTATION { fasta // tuple val(meta), path(contigs) main: - ch_versions = Channel.empty() + ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - if ( params.annotation_tool == "pyrodigal" || ( params.annotation_tool == "prodigal" && params.run_bgc_screening == true && ( !params.bgc_skip_antismash || !params.bgc_skip_deepbgc || !params.bgc_skip_gecco ) ) || ( params.annotation_tool == "prodigal" && params.run_amp_screening == true ) ) { // Need to use Pyrodigal for most BGC tools and AMPcombi because Prodigal GBK annotation format is incompatible with them. + if (params.annotation_tool == "pyrodigal" || (params.annotation_tool == "prodigal" && params.run_bgc_screening == true && (!params.bgc_skip_antismash || !params.bgc_skip_deepbgc || !params.bgc_skip_gecco)) || (params.annotation_tool == "prodigal" && params.run_amp_screening == true)) { + // Need to use Pyrodigal for most BGC tools and AMPcombi because Prodigal GBK annotation format is incompatible with them. - if ( params.annotation_tool == "prodigal" && params.run_bgc_screening == true && ( !params.bgc_skip_antismash || !params.bgc_skip_deepbgc || !params.bgc_skip_gecco ) ) { - log.warn("[nf-core/funcscan] Switching annotation tool to: Pyrodigal. This is because Prodigal annotations (in GBK format) are incompatible with antiSMASH, DeepBGC, and GECCO. If you specifically wish to run Prodigal instead, please skip antiSMASH, DeepBGC, and GECCO or provide a pre-annotated GBK file in the samplesheet.") - } else if ( params.annotation_tool == "prodigal" && params.run_amp_screening == true ) { - log.warn("[nf-core/funcscan] Switching annotation tool to: Pyrodigal. This is because Prodigal annotations (in GBK format) are incompatible with AMPcombi. If you specifically wish to run Prodigal instead, please skip AMP workflow or provide a pre-annotated GBK file in the samplesheet.") - } - - PYRODIGAL ( fasta, "gbk" ) - GUNZIP_PYRODIGAL_FAA ( PYRODIGAL.out.faa ) - GUNZIP_PYRODIGAL_FNA ( PYRODIGAL.out.fna) - GUNZIP_PYRODIGAL_GBK ( PYRODIGAL.out.annotations ) - ch_versions = ch_versions.mix(PYRODIGAL.out.versions) - ch_versions = ch_versions.mix(GUNZIP_PYRODIGAL_FAA.out.versions) - ch_versions = ch_versions.mix(GUNZIP_PYRODIGAL_FNA.out.versions) - ch_versions = ch_versions.mix(GUNZIP_PYRODIGAL_GBK.out.versions) - ch_annotation_faa = GUNZIP_PYRODIGAL_FAA.out.gunzip - ch_annotation_fna = GUNZIP_PYRODIGAL_FNA.out.gunzip - ch_annotation_gbk = GUNZIP_PYRODIGAL_GBK.out.gunzip - - } else if ( params.annotation_tool == "prodigal" ) { - - PRODIGAL ( fasta, "gbk" ) - GUNZIP_PRODIGAL_FAA ( PRODIGAL.out.amino_acid_fasta ) - GUNZIP_PRODIGAL_FNA ( PRODIGAL.out.nucleotide_fasta) - GUNZIP_PRODIGAL_GBK ( PRODIGAL.out.gene_annotations ) - ch_versions = ch_versions.mix(PRODIGAL.out.versions) - ch_versions = ch_versions.mix(GUNZIP_PRODIGAL_FAA.out.versions) - ch_versions = ch_versions.mix(GUNZIP_PRODIGAL_FNA.out.versions) - ch_versions = ch_versions.mix(GUNZIP_PRODIGAL_GBK.out.versions) - ch_annotation_faa = GUNZIP_PRODIGAL_FAA.out.gunzip - ch_annotation_fna = GUNZIP_PRODIGAL_FNA.out.gunzip - ch_annotation_gbk = GUNZIP_PRODIGAL_GBK.out.gunzip - - } else if ( params.annotation_tool == "prokka" ) { + if (params.annotation_tool == "prodigal" && params.run_bgc_screening == true && (!params.bgc_skip_antismash || !params.bgc_skip_deepbgc || !params.bgc_skip_gecco)) { + log.warn("[nf-core/funcscan] Switching annotation tool to: Pyrodigal. This is because Prodigal annotations (in GBK format) are incompatible with antiSMASH, DeepBGC, and GECCO. If you specifically wish to run Prodigal instead, please skip antiSMASH, DeepBGC, and GECCO or provide a pre-annotated GBK file in the samplesheet.") + } + else if (params.annotation_tool == "prodigal" && params.run_amp_screening == true) { + log.warn("[nf-core/funcscan] Switching annotation tool to: Pyrodigal. This is because Prodigal annotations (in GBK format) are incompatible with AMPcombi. If you specifically wish to run Prodigal instead, please skip AMP workflow or provide a pre-annotated GBK file in the samplesheet.") + } - PROKKA ( fasta, [], [] ) - ch_versions = ch_versions.mix(PROKKA.out.versions) - ch_multiqc_files = PROKKA.out.txt.collect{it[1]}.ifEmpty([]) - ch_annotation_faa = PROKKA.out.faa - ch_annotation_fna = PROKKA.out.fna - ch_annotation_gbk = PROKKA.out.gbk + PYRODIGAL(fasta, "gbk") + GUNZIP_PYRODIGAL_FAA(PYRODIGAL.out.faa) + GUNZIP_PYRODIGAL_FNA(PYRODIGAL.out.fna) + GUNZIP_PYRODIGAL_GBK(PYRODIGAL.out.annotations) + ch_versions = ch_versions.mix(PYRODIGAL.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PYRODIGAL_FAA.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PYRODIGAL_FNA.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PYRODIGAL_GBK.out.versions) + ch_annotation_faa = GUNZIP_PYRODIGAL_FAA.out.gunzip + ch_annotation_fna = GUNZIP_PYRODIGAL_FNA.out.gunzip + ch_annotation_gbk = GUNZIP_PYRODIGAL_GBK.out.gunzip + } + else if (params.annotation_tool == "prodigal") { - } else if ( params.annotation_tool == "bakta" ) { + PRODIGAL(fasta, "gbk") + GUNZIP_PRODIGAL_FAA(PRODIGAL.out.amino_acid_fasta) + GUNZIP_PRODIGAL_FNA(PRODIGAL.out.nucleotide_fasta) + GUNZIP_PRODIGAL_GBK(PRODIGAL.out.gene_annotations) + ch_versions = ch_versions.mix(PRODIGAL.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PRODIGAL_FAA.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PRODIGAL_FNA.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PRODIGAL_GBK.out.versions) + ch_annotation_faa = GUNZIP_PRODIGAL_FAA.out.gunzip + ch_annotation_fna = GUNZIP_PRODIGAL_FNA.out.gunzip + ch_annotation_gbk = GUNZIP_PRODIGAL_GBK.out.gunzip + } + else if (params.annotation_tool == "prokka") { - // BAKTA prepare download - if ( params.annotation_bakta_db ) { - ch_bakta_db = Channel - .fromPath( params.annotation_bakta_db ) - .first() - } else { - BAKTA_BAKTADBDOWNLOAD ( ) - ch_versions = ch_versions.mix( BAKTA_BAKTADBDOWNLOAD.out.versions ) - ch_bakta_db = ( BAKTA_BAKTADBDOWNLOAD.out.db ) - } + PROKKA(fasta, [], []) + ch_versions = ch_versions.mix(PROKKA.out.versions) + ch_multiqc_files = PROKKA.out.txt.collect { it[1] }.ifEmpty([]) + ch_annotation_faa = PROKKA.out.faa + ch_annotation_fna = PROKKA.out.fna + ch_annotation_gbk = PROKKA.out.gbk + } + else if (params.annotation_tool == "bakta") { - BAKTA_BAKTA ( fasta, ch_bakta_db, [], [] ) - ch_versions = ch_versions.mix(BAKTA_BAKTA.out.versions) - ch_multiqc_files = BAKTA_BAKTA.out.txt.collect{it[1]}.ifEmpty([]) - ch_annotation_faa = BAKTA_BAKTA.out.faa - ch_annotation_fna = BAKTA_BAKTA.out.fna - ch_annotation_gbk = BAKTA_BAKTA.out.gbff + // BAKTA prepare download + if (params.annotation_bakta_db) { + ch_bakta_db = Channel + .fromPath(params.annotation_bakta_db, checkIfExists: true) + .first() } + else { + BAKTA_BAKTADBDOWNLOAD() + ch_versions = ch_versions.mix(BAKTA_BAKTADBDOWNLOAD.out.versions) + ch_bakta_db = BAKTA_BAKTADBDOWNLOAD.out.db + } + + BAKTA_BAKTA(fasta, ch_bakta_db, [], []) + ch_versions = ch_versions.mix(BAKTA_BAKTA.out.versions) + ch_multiqc_files = BAKTA_BAKTA.out.txt.collect { it[1] }.ifEmpty([]) + ch_annotation_faa = BAKTA_BAKTA.out.faa + ch_annotation_fna = BAKTA_BAKTA.out.fna + ch_annotation_gbk = BAKTA_BAKTA.out.gbff + } emit: versions = ch_versions diff --git a/subworkflows/local/arg.nf b/subworkflows/local/arg.nf index af618c5c..182f9648 100644 --- a/subworkflows/local/arg.nf +++ b/subworkflows/local/arg.nf @@ -2,26 +2,26 @@ Run ARG screening tools */ -include { ABRICATE_RUN } from '../../modules/nf-core/abricate/run/main' -include { AMRFINDERPLUS_UPDATE } from '../../modules/nf-core/amrfinderplus/update/main' -include { AMRFINDERPLUS_RUN } from '../../modules/nf-core/amrfinderplus/run/main' -include { DEEPARG_DOWNLOADDATA } from '../../modules/nf-core/deeparg/downloaddata/main' -include { DEEPARG_PREDICT } from '../../modules/nf-core/deeparg/predict/main' -include { FARGENE } from '../../modules/nf-core/fargene/main' -include { RGI_CARDANNOTATION } from '../../modules/nf-core/rgi/cardannotation/main' -include { RGI_MAIN } from '../../modules/nf-core/rgi/main/main' -include { UNTAR as UNTAR_CARD } from '../../modules/nf-core/untar/main' -include { TABIX_BGZIP as ARG_TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main' -include { MERGE_TAXONOMY_HAMRONIZATION } from '../../modules/local/merge_taxonomy_hamronization' -include { HAMRONIZATION_RGI } from '../../modules/nf-core/hamronization/rgi/main' -include { HAMRONIZATION_FARGENE } from '../../modules/nf-core/hamronization/fargene/main' -include { HAMRONIZATION_SUMMARIZE } from '../../modules/nf-core/hamronization/summarize/main' -include { HAMRONIZATION_ABRICATE } from '../../modules/nf-core/hamronization/abricate/main' -include { HAMRONIZATION_DEEPARG } from '../../modules/nf-core/hamronization/deeparg/main' -include { HAMRONIZATION_AMRFINDERPLUS } from '../../modules/nf-core/hamronization/amrfinderplus/main' -include { ARGNORM as ARGNORM_DEEPARG } from '../../modules/nf-core/argnorm/main' -include { ARGNORM as ARGNORM_ABRICATE } from '../../modules/nf-core/argnorm/main' -include { ARGNORM as ARGNORM_AMRFINDERPLUS } from '../../modules/nf-core/argnorm/main' +include { ABRICATE_RUN } from '../../modules/nf-core/abricate/run/main' +include { AMRFINDERPLUS_UPDATE } from '../../modules/nf-core/amrfinderplus/update/main' +include { AMRFINDERPLUS_RUN } from '../../modules/nf-core/amrfinderplus/run/main' +include { DEEPARG_DOWNLOADDATA } from '../../modules/nf-core/deeparg/downloaddata/main' +include { DEEPARG_PREDICT } from '../../modules/nf-core/deeparg/predict/main' +include { FARGENE } from '../../modules/nf-core/fargene/main' +include { RGI_CARDANNOTATION } from '../../modules/nf-core/rgi/cardannotation/main' +include { RGI_MAIN } from '../../modules/nf-core/rgi/main/main' +include { UNTAR as UNTAR_CARD } from '../../modules/nf-core/untar/main' +include { TABIX_BGZIP as ARG_TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main' +include { MERGE_TAXONOMY_HAMRONIZATION } from '../../modules/local/merge_taxonomy_hamronization' +include { HAMRONIZATION_RGI } from '../../modules/nf-core/hamronization/rgi/main' +include { HAMRONIZATION_FARGENE } from '../../modules/nf-core/hamronization/fargene/main' +include { HAMRONIZATION_SUMMARIZE } from '../../modules/nf-core/hamronization/summarize/main' +include { HAMRONIZATION_ABRICATE } from '../../modules/nf-core/hamronization/abricate/main' +include { HAMRONIZATION_DEEPARG } from '../../modules/nf-core/hamronization/deeparg/main' +include { HAMRONIZATION_AMRFINDERPLUS } from '../../modules/nf-core/hamronization/amrfinderplus/main' +include { ARGNORM as ARGNORM_DEEPARG } from '../../modules/nf-core/argnorm/main' +include { ARGNORM as ARGNORM_ABRICATE } from '../../modules/nf-core/argnorm/main' +include { ARGNORM as ARGNORM_AMRFINDERPLUS } from '../../modules/nf-core/argnorm/main' workflow ARG { take: @@ -36,181 +36,179 @@ workflow ARG { ch_input_to_hamronization_summarize = Channel.empty() // AMRfinderplus run - // Prepare channel for database - if ( !params.arg_skip_amrfinderplus && params.arg_amrfinderplus_db ) { + // Prepare channel for database + if (!params.arg_skip_amrfinderplus && params.arg_amrfinderplus_db) { ch_amrfinderplus_db = Channel - .fromPath( params.arg_amrfinderplus_db ) + .fromPath(params.arg_amrfinderplus_db, checkIfExists: true) .first() - } else if ( !params.arg_skip_amrfinderplus && !params.arg_amrfinderplus_db ) { - AMRFINDERPLUS_UPDATE( ) - ch_versions = ch_versions.mix( AMRFINDERPLUS_UPDATE.out.versions ) + } + else if (!params.arg_skip_amrfinderplus && !params.arg_amrfinderplus_db) { + AMRFINDERPLUS_UPDATE() + ch_versions = ch_versions.mix(AMRFINDERPLUS_UPDATE.out.versions) ch_amrfinderplus_db = AMRFINDERPLUS_UPDATE.out.db } - if ( !params.arg_skip_amrfinderplus ) { - AMRFINDERPLUS_RUN ( fastas, ch_amrfinderplus_db ) - ch_versions = ch_versions.mix( AMRFINDERPLUS_RUN.out.versions ) - - // Reporting - HAMRONIZATION_AMRFINDERPLUS ( AMRFINDERPLUS_RUN.out.report, 'tsv', AMRFINDERPLUS_RUN.out.tool_version, AMRFINDERPLUS_RUN.out.db_version ) - ch_versions = ch_versions.mix( HAMRONIZATION_AMRFINDERPLUS.out.versions ) - ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix( HAMRONIZATION_AMRFINDERPLUS.out.tsv ) + if (!params.arg_skip_amrfinderplus) { + AMRFINDERPLUS_RUN(fastas, ch_amrfinderplus_db) + ch_versions = ch_versions.mix(AMRFINDERPLUS_RUN.out.versions) - if ( !params.arg_skip_argnorm ) { - ch_input_to_argnorm_amrfinderplus = HAMRONIZATION_AMRFINDERPLUS.out.tsv.filter{ meta, file -> !file.isEmpty() } - ARGNORM_AMRFINDERPLUS ( ch_input_to_argnorm_amrfinderplus, 'amrfinderplus', 'ncbi' ) - ch_versions = ch_versions.mix( ARGNORM_AMRFINDERPLUS.out.versions ) + // Reporting + HAMRONIZATION_AMRFINDERPLUS(AMRFINDERPLUS_RUN.out.report, 'tsv', AMRFINDERPLUS_RUN.out.tool_version, AMRFINDERPLUS_RUN.out.db_version) + ch_versions = ch_versions.mix(HAMRONIZATION_AMRFINDERPLUS.out.versions) + ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix(HAMRONIZATION_AMRFINDERPLUS.out.tsv) + + if (!params.arg_skip_argnorm) { + ch_input_to_argnorm_amrfinderplus = HAMRONIZATION_AMRFINDERPLUS.out.tsv.filter { meta, file -> !file.isEmpty() } + ARGNORM_AMRFINDERPLUS(ch_input_to_argnorm_amrfinderplus, 'amrfinderplus', 'ncbi') + ch_versions = ch_versions.mix(ARGNORM_AMRFINDERPLUS.out.versions) } } // fARGene run - if ( !params.arg_skip_fargene ) { - ch_fargene_classes = Channel.fromList( params.arg_fargene_hmmmodel.tokenize(',') ) + if (!params.arg_skip_fargene) { + ch_fargene_classes = Channel.fromList(params.arg_fargene_hmmmodel.tokenize(',')) ch_fargene_input = fastas - .combine( ch_fargene_classes ) - .map { - meta, fastas, hmm_class -> - def meta_new = meta.clone() - meta_new['hmm_class'] = hmm_class - [ meta_new, fastas, hmm_class ] - } - .multiMap { - fastas: [ it[0], it[1] ] - hmmclass: it[2] - } - - FARGENE ( ch_fargene_input.fastas, ch_fargene_input.hmmclass ) - ch_versions = ch_versions.mix( FARGENE.out.versions ) + .combine(ch_fargene_classes) + .map { meta, fastas, hmm_class -> + def meta_new = meta.clone() + meta_new['hmm_class'] = hmm_class + [meta_new, fastas, hmm_class] + } + .multiMap { + fastas: [it[0], it[1]] + hmmclass: it[2] + } + + FARGENE(ch_fargene_input.fastas, ch_fargene_input.hmmclass) + ch_versions = ch_versions.mix(FARGENE.out.versions) // Reporting // Note: currently hardcoding versions, has to be updated with every fARGene-update - HAMRONIZATION_FARGENE( FARGENE.out.hmm_genes.transpose(), 'tsv', '0.1', '0.1' ) - ch_versions = ch_versions.mix( HAMRONIZATION_FARGENE.out.versions ) - ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix( HAMRONIZATION_FARGENE.out.tsv ) + HAMRONIZATION_FARGENE(FARGENE.out.hmm_genes.transpose(), 'tsv', '0.1', '0.1') + ch_versions = ch_versions.mix(HAMRONIZATION_FARGENE.out.versions) + ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix(HAMRONIZATION_FARGENE.out.tsv) } // RGI run - if ( !params.arg_skip_rgi ) { + if (!params.arg_skip_rgi) { - if ( !params.arg_rgi_db ) { + if (!params.arg_rgi_db) { // Download and untar CARD - UNTAR_CARD ( [ [], file('https://card.mcmaster.ca/latest/data', checkIfExists: true) ] ) - ch_versions = ch_versions.mix( UNTAR_CARD.out.versions ) - rgi_db = UNTAR_CARD.out.untar.map{ it[1] } - RGI_CARDANNOTATION ( rgi_db ) + UNTAR_CARD([[], file('https://card.mcmaster.ca/latest/data', checkIfExists: true)]) + ch_versions = ch_versions.mix(UNTAR_CARD.out.versions) + rgi_db = UNTAR_CARD.out.untar.map { it[1] } + RGI_CARDANNOTATION(rgi_db) card = RGI_CARDANNOTATION.out.db - ch_versions = ch_versions.mix( RGI_CARDANNOTATION.out.versions ) - - } else { + ch_versions = ch_versions.mix(RGI_CARDANNOTATION.out.versions) + } + else { // Use user-supplied database - rgi_db = params.arg_rgi_db - if (! rgi_db.contains("card_database_processed") ) { - RGI_CARDANNOTATION ( rgi_db ) + rgi_db = file(params.arg_rgi_db, checkIfExists: true) + if (!rgi_db.contains("card_database_processed")) { + RGI_CARDANNOTATION(rgi_db) card = RGI_CARDANNOTATION.out.db - ch_versions = ch_versions.mix( RGI_CARDANNOTATION.out.versions ) - } else { + ch_versions = ch_versions.mix(RGI_CARDANNOTATION.out.versions) + } + else { card = rgi_db } - } - RGI_MAIN ( fastas, card, [] ) - ch_versions = ch_versions.mix( RGI_MAIN.out.versions ) + RGI_MAIN(fastas, card, []) + ch_versions = ch_versions.mix(RGI_MAIN.out.versions) // Reporting - HAMRONIZATION_RGI ( RGI_MAIN.out.tsv, 'tsv', RGI_MAIN.out.tool_version, RGI_MAIN.out.db_version ) - ch_versions = ch_versions.mix( HAMRONIZATION_RGI.out.versions ) - ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix( HAMRONIZATION_RGI.out.tsv ) + HAMRONIZATION_RGI(RGI_MAIN.out.tsv, 'tsv', RGI_MAIN.out.tool_version, RGI_MAIN.out.db_version) + ch_versions = ch_versions.mix(HAMRONIZATION_RGI.out.versions) + ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix(HAMRONIZATION_RGI.out.tsv) } // DeepARG prepare download - if ( !params.arg_skip_deeparg && params.arg_deeparg_db ) { + if (!params.arg_skip_deeparg && params.arg_deeparg_db) { ch_deeparg_db = Channel - .fromPath( params.arg_deeparg_db ) + .fromPath(params.arg_deeparg_db, checkIfExists: true) .first() - } else if ( !params.arg_skip_deeparg && !params.arg_deeparg_db ) { - DEEPARG_DOWNLOADDATA( ) - ch_versions = ch_versions.mix( DEEPARG_DOWNLOADDATA.out.versions ) + } + else if (!params.arg_skip_deeparg && !params.arg_deeparg_db) { + DEEPARG_DOWNLOADDATA() + ch_versions = ch_versions.mix(DEEPARG_DOWNLOADDATA.out.versions) ch_deeparg_db = DEEPARG_DOWNLOADDATA.out.db } // DeepARG run - if ( !params.arg_skip_deeparg ) { + if (!params.arg_skip_deeparg) { annotations - .map { - it -> - def meta = it[0] - def anno = it[1] - def model = params.arg_deeparg_model + .map { it -> + def meta = it[0] + def anno = it[1] + def model = params.arg_deeparg_model - [ meta, anno, model ] - } - .set { ch_input_for_deeparg } + [meta, anno, model] + } + .set { ch_input_for_deeparg } - DEEPARG_PREDICT ( ch_input_for_deeparg, ch_deeparg_db ) - ch_versions = ch_versions.mix( DEEPARG_PREDICT.out.versions ) + DEEPARG_PREDICT(ch_input_for_deeparg, ch_deeparg_db) + ch_versions = ch_versions.mix(DEEPARG_PREDICT.out.versions) // Reporting // Note: currently hardcoding versions as unreported by DeepARG // Make sure to update on version bump. - ch_input_to_hamronization_deeparg = DEEPARG_PREDICT.out.arg.mix( DEEPARG_PREDICT.out.potential_arg ) - HAMRONIZATION_DEEPARG ( ch_input_to_hamronization_deeparg, 'tsv', '1.0.4', params.arg_deeparg_db_version ) - ch_versions = ch_versions.mix( HAMRONIZATION_DEEPARG.out.versions ) - ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix( HAMRONIZATION_DEEPARG.out.tsv ) - - if ( !params.arg_skip_argnorm ) { - ch_input_to_argnorm_deeparg = HAMRONIZATION_DEEPARG.out.tsv.filter{ meta, file -> !file.isEmpty() } - ARGNORM_DEEPARG ( ch_input_to_argnorm_deeparg, 'deeparg', 'deeparg' ) - ch_versions = ch_versions.mix( ARGNORM_DEEPARG.out.versions ) + ch_input_to_hamronization_deeparg = DEEPARG_PREDICT.out.arg.mix(DEEPARG_PREDICT.out.potential_arg) + HAMRONIZATION_DEEPARG(ch_input_to_hamronization_deeparg, 'tsv', '1.0.4', params.arg_deeparg_db_version) + ch_versions = ch_versions.mix(HAMRONIZATION_DEEPARG.out.versions) + ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix(HAMRONIZATION_DEEPARG.out.tsv) + + if (!params.arg_skip_argnorm) { + ch_input_to_argnorm_deeparg = HAMRONIZATION_DEEPARG.out.tsv.filter { meta, file -> !file.isEmpty() } + ARGNORM_DEEPARG(ch_input_to_argnorm_deeparg, 'deeparg', 'deeparg') + ch_versions = ch_versions.mix(ARGNORM_DEEPARG.out.versions) } } // ABRicate run - if ( !params.arg_skip_abricate ) { + if (!params.arg_skip_abricate) { abricate_dbdir = params.arg_abricate_db ? file(params.arg_abricate_db, checkIfExists: true) : [] - ABRICATE_RUN ( fastas, abricate_dbdir ) - ch_versions = ch_versions.mix( ABRICATE_RUN.out.versions ) - - HAMRONIZATION_ABRICATE ( ABRICATE_RUN.out.report, 'tsv', '1.0.1', '2021-Mar-27' ) - ch_versions = ch_versions.mix( HAMRONIZATION_ABRICATE.out.versions ) - ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix( HAMRONIZATION_ABRICATE.out.tsv ) - - if ( ( params.arg_abricate_db_id == 'ncbi' || - params.arg_abricate_db_id == 'resfinder' || - params.arg_abricate_db_id == 'argannot' || - params.arg_abricate_db_id == 'megares') && !params.arg_skip_argnorm ) { - ch_input_to_argnorm_abricate = HAMRONIZATION_ABRICATE.out.tsv.filter{ meta, file -> !file.isEmpty() } - ARGNORM_ABRICATE ( ch_input_to_argnorm_abricate, 'abricate', params.arg_abricate_db_id ) - ch_versions = ch_versions.mix( ARGNORM_ABRICATE.out.versions ) + ABRICATE_RUN(fastas, abricate_dbdir) + ch_versions = ch_versions.mix(ABRICATE_RUN.out.versions) + + HAMRONIZATION_ABRICATE(ABRICATE_RUN.out.report, 'tsv', '1.0.1', '2021-Mar-27') + ch_versions = ch_versions.mix(HAMRONIZATION_ABRICATE.out.versions) + ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix(HAMRONIZATION_ABRICATE.out.tsv) + + if ((params.arg_abricate_db_id == 'ncbi' || params.arg_abricate_db_id == 'resfinder' || params.arg_abricate_db_id == 'argannot' || params.arg_abricate_db_id == 'megares') && !params.arg_skip_argnorm) { + ch_input_to_argnorm_abricate = HAMRONIZATION_ABRICATE.out.tsv.filter { meta, file -> !file.isEmpty() } + ARGNORM_ABRICATE(ch_input_to_argnorm_abricate, 'abricate', params.arg_abricate_db_id) + ch_versions = ch_versions.mix(ARGNORM_ABRICATE.out.versions) } } ch_input_to_hamronization_summarize - .map{ + .map { it[1] } .collect() .set { ch_input_for_hamronization_summarize } - HAMRONIZATION_SUMMARIZE( ch_input_for_hamronization_summarize, params.arg_hamronization_summarizeformat ) - ch_versions = ch_versions.mix( HAMRONIZATION_SUMMARIZE.out.versions ) + HAMRONIZATION_SUMMARIZE(ch_input_for_hamronization_summarize, params.arg_hamronization_summarizeformat) + ch_versions = ch_versions.mix(HAMRONIZATION_SUMMARIZE.out.versions) // MERGE_TAXONOMY - if ( params.run_taxa_classification ) { + if (params.run_taxa_classification) { - ch_mmseqs_taxonomy_list = tsvs.map{ it[1] }.collect() - MERGE_TAXONOMY_HAMRONIZATION( HAMRONIZATION_SUMMARIZE.out.tsv, ch_mmseqs_taxonomy_list ) - ch_versions = ch_versions.mix( MERGE_TAXONOMY_HAMRONIZATION.out.versions ) + ch_mmseqs_taxonomy_list = tsvs.map { it[1] }.collect() + MERGE_TAXONOMY_HAMRONIZATION(HAMRONIZATION_SUMMARIZE.out.tsv, ch_mmseqs_taxonomy_list) + ch_versions = ch_versions.mix(MERGE_TAXONOMY_HAMRONIZATION.out.versions) - ch_tabix_input = Channel.of( [ 'id':'hamronization_combined_report' ] ) + ch_tabix_input = Channel + .of(['id': 'hamronization_combined_report']) .combine(MERGE_TAXONOMY_HAMRONIZATION.out.tsv) - ARG_TABIX_BGZIP( ch_tabix_input ) - ch_versions = ch_versions.mix( ARG_TABIX_BGZIP.out.versions ) + ARG_TABIX_BGZIP(ch_tabix_input) + ch_versions = ch_versions.mix(ARG_TABIX_BGZIP.out.versions) } emit: diff --git a/subworkflows/local/taxa_class.nf b/subworkflows/local/taxa_class.nf index d76e1dff..0bf67312 100644 --- a/subworkflows/local/taxa_class.nf +++ b/subworkflows/local/taxa_class.nf @@ -12,47 +12,48 @@ workflow TAXA_CLASS { contigs // tuple val(meta), path(contigs) main: - ch_versions = Channel.empty() - ch_mmseqs_db = Channel.empty() - ch_taxonomy_querydb = Channel.empty() + ch_versions = Channel.empty() + ch_mmseqs_db = Channel.empty() + ch_taxonomy_querydb = Channel.empty() ch_taxonomy_querydb_taxdb = Channel.empty() - ch_taxonomy_tsv = Channel.empty() + ch_taxonomy_tsv = Channel.empty() - if ( params.taxa_classification_tool == 'mmseqs2') { + if (params.taxa_classification_tool == 'mmseqs2') { // Download the ref db if not supplied by user // MMSEQS_DATABASE - if ( params.taxa_classification_mmseqs_db != null ) { + if (params.taxa_classification_mmseqs_db != null) { ch_mmseqs_db = Channel - .fromPath( params.taxa_classification_mmseqs_db ) + .fromPath(params.taxa_classification_mmseqs_db, checkIfExists: true) .first() - } else { - MMSEQS_DATABASES ( params.taxa_classification_mmseqs_db_id ) - ch_versions = ch_versions.mix( MMSEQS_DATABASES.out.versions ) - ch_mmseqs_db = ( MMSEQS_DATABASES.out.database ) + } + else { + MMSEQS_DATABASES(params.taxa_classification_mmseqs_db_id) + ch_versions = ch_versions.mix(MMSEQS_DATABASES.out.versions) + ch_mmseqs_db = MMSEQS_DATABASES.out.database } // Create db for query contigs, assign taxonomy and convert to table format // MMSEQS_CREATEDB - MMSEQS_CREATEDB ( contigs ) - ch_versions = ch_versions.mix( MMSEQS_CREATEDB.out.versions ) + MMSEQS_CREATEDB(contigs) + ch_versions = ch_versions.mix(MMSEQS_CREATEDB.out.versions) // MMSEQS_TAXONOMY - MMSEQS_TAXONOMY ( MMSEQS_CREATEDB.out.db, ch_mmseqs_db ) - ch_versions = ch_versions.mix( MMSEQS_TAXONOMY.out.versions ) + MMSEQS_TAXONOMY(MMSEQS_CREATEDB.out.db, ch_mmseqs_db) + ch_versions = ch_versions.mix(MMSEQS_TAXONOMY.out.versions) ch_taxonomy_querydb_taxdb = MMSEQS_TAXONOMY.out.db_taxonomy // Join together to ensure in sync ch_taxonomy_input_for_createtsv = MMSEQS_CREATEDB.out.db - .join(MMSEQS_TAXONOMY.out.db_taxonomy) - .multiMap { meta, db, db_taxonomy -> - db: [ meta,db ] - taxdb: [ meta, db_taxonomy ] - } + .join(MMSEQS_TAXONOMY.out.db_taxonomy) + .multiMap { meta, db, db_taxonomy -> + db: [meta, db] + taxdb: [meta, db_taxonomy] + } // MMSEQS_CREATETSV - MMSEQS_CREATETSV ( ch_taxonomy_input_for_createtsv.taxdb, [[:],[]], ch_taxonomy_input_for_createtsv.db ) - ch_versions = ch_versions.mix( MMSEQS_CREATETSV.out.versions ) + MMSEQS_CREATETSV(ch_taxonomy_input_for_createtsv.taxdb, [[:], []], ch_taxonomy_input_for_createtsv.db) + ch_versions = ch_versions.mix(MMSEQS_CREATETSV.out.versions) ch_taxonomy_tsv = MMSEQS_CREATETSV.out.tsv } diff --git a/workflows/funcscan.nf b/workflows/funcscan.nf index 2d6c51fd..48542dd8 100644 --- a/workflows/funcscan.nf +++ b/workflows/funcscan.nf @@ -4,11 +4,11 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_funcscan_pipeline' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_funcscan_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -96,13 +96,18 @@ workflow FUNCSCAN { fastas: true } + println('########## FINISHED GUINZIPPING ################# ') + // Duplicate and filter the duplicated file for long contigs only for BGC // This is to speed up BGC run and prevent 'no hits found' fails if (params.run_bgc_screening) { + println('########## GOING TO FILTER ################# ') SEQKIT_SEQ(ch_intermediate_input.fastas.map { meta, fasta, faa, gbk -> [meta, fasta] }) ch_input_for_annotation = ch_intermediate_input.fastas + .dump(tag: 'ch_premap_seqkit') .map { meta, fasta, protein, gbk -> [meta, fasta] } .mix(SEQKIT_SEQ.out.fastx.map { meta, fasta -> [meta + [category: 'long'], fasta] }) + .dump(tag: 'ch_prefilerseqkit') .filter { meta, fasta -> if (fasta != [] && fasta.isEmpty()) { log.warn("[nf-core/funcscan] Sample ${meta.id} does not have contigs longer than ${params.bgc_mincontiglength} bp. Will not be screened for BGCs.") @@ -112,6 +117,7 @@ workflow FUNCSCAN { ch_versions = ch_versions.mix(SEQKIT_SEQ.out.versions) } else { + println('########## DID NOT FILTER ################# ') ch_input_for_annotation = ch_intermediate_input.fastas.map { meta, fasta, protein, gbk -> [meta, fasta] } } @@ -121,21 +127,26 @@ workflow FUNCSCAN { // Some tools require annotated FASTAs if ((params.run_arg_screening && !params.arg_skip_deeparg) || params.run_amp_screening || params.run_bgc_screening) { + println('########## GOING TO ANNOTATE ################# ') ANNOTATION(ch_input_for_annotation) ch_versions = ch_versions.mix(ANNOTATION.out.versions) ch_new_annotation = ch_input_for_annotation + .dump(tag: 'preanotaitonjoin') .join(ANNOTATION.out.faa) .join(ANNOTATION.out.gbk) + .dump(tag: 'postannotaitonjoin') } else { - ch_new_annotation = ch_intermediate_input.fastas + println('########## DID NOT ANNOTATE ################# ') + ch_new_annotation = ch_intermediate_input.fastas.dump(tag: 'noannotation') } // Mix back the preannotated samples with the newly annotated ones ch_prepped_input = ch_new_annotation .filter { meta, fasta, faa, gbk -> meta.category != 'long' } .mix(ch_intermediate_input.preannotated) + .dump(tag: 'ch_prepped_input_fastas_premultimap_mixpreannotated') .multiMap { meta, fasta, faa, gbk -> fastas: [meta, fasta] faas: [meta, faa] @@ -143,10 +154,11 @@ workflow FUNCSCAN { } if (params.run_bgc_screening) { - + println('########## PREPPRING BGC SCREENING ################# ') ch_prepped_input_long = ch_new_annotation .filter { meta, fasta, faa, gbk -> meta.category == 'long' } .mix(ch_intermediate_input.preannotated) + .dump(tag: 'ch_prepped_input_fastas_premultimap_annotation') .multiMap { meta, fasta, faa, gbk -> fastas: [meta, fasta] faas: [meta, faa] @@ -158,15 +170,22 @@ workflow FUNCSCAN { TAXONOMIC CLASSIFICATION */ + ch_prepped_input.fastas.dump(tag: 'ch_prepped_input_fastas_tax_fasta') + ch_prepped_input.faas.dump(tag: 'ch_prepped_input_fastas_tax_faas') + ch_prepped_input.gbks.dump(tag: 'ch_prepped_input_fastas_tax_gbks') + // The final subworkflow reports need taxonomic classification. // This can be either on NT or AA level depending on annotation. // TODO: Only NT at the moment. AA tax. classification will be added only when its PR is merged. if (params.run_taxa_classification) { - TAXA_CLASS(ch_prepped_input.fastas) + println('########## GOING TO TAX CLASS ################# ') + TAXA_CLASS(ch_prepped_input.fastas.dump(tag: 'ch_prepped_input_fastas_tax')) ch_versions = ch_versions.mix(TAXA_CLASS.out.versions) ch_taxonomy_tsv = TAXA_CLASS.out.sample_taxonomy + println('########## FINSIHED TAX CLASS ################# ') } else { + println('########## DID NOT TAX CLASS ################# ') ch_mmseqs_db = Channel.empty() ch_taxonomy_querydb = Channel.empty()