From 6f38499f76edfbf2e0c8712a52e43c5ccab33a1a Mon Sep 17 00:00:00 2001 From: "Diego Alvarez S." Date: Mon, 28 Oct 2024 17:31:36 -0300 Subject: [PATCH 1/8] Testing param --- nextflow.config | 1 + nextflow_schema.json | 5 +++++ workflows/mag.nf | 20 ++++++++++++-------- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/nextflow.config b/nextflow.config index 07325acf..ea0a87e5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -121,6 +121,7 @@ params { refine_bins_dastool = false refine_bins_dastool_threshold = 0.5 postbinning_input = 'raw_bins_only' + include_unbins_in_postbinning = true // Bin QC skip_binqc = false diff --git a/nextflow_schema.json b/nextflow_schema.json index ceb3ac08..26342674 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -704,6 +704,11 @@ "type": "integer", "default": 3000, "description": "Minimum contig length for Tiara to use for domain classification. For accurate classification, should be longer than 3000 bp." + }, + "include_unbins_in_postbinning": { + "type": "boolean", + "description": "Include unbinned contigs in the post-binning output (.", + "default": true } } }, diff --git a/workflows/mag.nf b/workflows/mag.nf index 7afb4316..27262116 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -769,7 +769,11 @@ workflow MAG { ch_input_for_postbinning_bins_unbins = ch_binning_results_bins.mix(ch_binning_results_unbins) } - DEPTHS(ch_input_for_postbinning_bins_unbins, BINNING.out.metabat2depths, ch_short_reads) + def ch_input_for_postbinning = params.include_unbins_in_postbinning + ? ch_input_for_postbinning_bins_unbins + : ch_input_for_postbinning_bins + + DEPTHS(ch_input_for_postbinning, BINNING.out.metabat2depths, ch_short_reads) ch_input_for_binsummary = DEPTHS.out.depths_summary ch_versions = ch_versions.mix(DEPTHS.out.versions) @@ -777,7 +781,7 @@ workflow MAG { * Bin QC subworkflows: for checking bin completeness with either BUSCO, CHECKM, and/or GUNC */ - ch_input_bins_for_qc = ch_input_for_postbinning_bins_unbins.transpose() + ch_input_bins_for_qc = ch_input_for_postbinning.transpose() if (!params.skip_binqc && params.binqc_tool == 'busco') { /* @@ -821,7 +825,7 @@ workflow MAG { ch_versions = ch_versions.mix(GUNC_QC.out.versions) } else if (params.run_gunc) { - ch_input_bins_for_gunc = ch_input_for_postbinning_bins_unbins.filter { meta, bins -> + ch_input_bins_for_gunc = ch_input_for_postbinning.filter { meta, bins -> meta.domain != "eukarya" } GUNC_QC(ch_input_bins_for_qc, ch_gunc_db, []) @@ -830,7 +834,7 @@ workflow MAG { ch_quast_bins_summary = Channel.empty() if (!params.skip_quast) { - ch_input_for_quast_bins = ch_input_for_postbinning_bins_unbins + ch_input_for_quast_bins = ch_input_for_postbinning .groupTuple() .map { meta, bins -> def new_bins = bins.flatten() @@ -859,7 +863,7 @@ workflow MAG { ch_cat_db = CAT_DB_GENERATE.out.db } CAT( - ch_input_for_postbinning_bins_unbins, + ch_input_for_postbinning, ch_cat_db ) // Group all classification results for each sample in a single file @@ -890,7 +894,7 @@ workflow MAG { ch_gtdbtk_summary = Channel.empty() if (gtdb) { - ch_gtdb_bins = ch_input_for_postbinning_bins_unbins.filter { meta, bins -> + ch_gtdb_bins = ch_input_for_postbinning.filter { meta, bins -> meta.domain != "eukarya" } @@ -925,7 +929,7 @@ workflow MAG { */ if (!params.skip_prokka) { - ch_bins_for_prokka = ch_input_for_postbinning_bins_unbins + ch_bins_for_prokka = ch_input_for_postbinning .transpose() .map { meta, bin -> def meta_new = meta + [id: bin.getBaseName()] @@ -944,7 +948,7 @@ workflow MAG { } if (!params.skip_metaeuk && (params.metaeuk_db || params.metaeuk_mmseqs_db)) { - ch_bins_for_metaeuk = ch_input_for_postbinning_bins_unbins + ch_bins_for_metaeuk = ch_input_for_postbinning .transpose() .filter { meta, bin -> meta.domain in ["eukarya", "unclassified"] From dca7b71aa40d958a014e03b967ae62b7d5f45081 Mon Sep 17 00:00:00 2001 From: "Diego Alvarez S." Date: Mon, 28 Oct 2024 17:55:26 -0300 Subject: [PATCH 2/8] Complete schema description --- nextflow_schema.json | 2 +- workflows/mag.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 26342674..f11868b6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -707,7 +707,7 @@ }, "include_unbins_in_postbinning": { "type": "boolean", - "description": "Include unbinned contigs in the post-binning output (.", + "description": "Include unbinned contigs in the post-binning steps (bin qc, taxonomic classification and annotation).", "default": true } } diff --git a/workflows/mag.nf b/workflows/mag.nf index 27262116..9a02d1b7 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -769,7 +769,7 @@ workflow MAG { ch_input_for_postbinning_bins_unbins = ch_binning_results_bins.mix(ch_binning_results_unbins) } - def ch_input_for_postbinning = params.include_unbins_in_postbinning + ch_input_for_postbinning = params.include_unbins_in_postbinning ? ch_input_for_postbinning_bins_unbins : ch_input_for_postbinning_bins From dd7f135f9816c483c2bc3d0b23c704b85f554396 Mon Sep 17 00:00:00 2001 From: "Diego Alvarez S." Date: Mon, 28 Oct 2024 17:55:33 -0300 Subject: [PATCH 3/8] Fix channel --- workflows/mag.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/mag.nf b/workflows/mag.nf index 9a02d1b7..19ea6be2 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -828,7 +828,7 @@ workflow MAG { ch_input_bins_for_gunc = ch_input_for_postbinning.filter { meta, bins -> meta.domain != "eukarya" } - GUNC_QC(ch_input_bins_for_qc, ch_gunc_db, []) + GUNC_QC(ch_input_bins_for_gunc, ch_gunc_db, []) ch_versions = ch_versions.mix(GUNC_QC.out.versions) } From 705198e3d2437ca1759bde33cff4586cee8b9224 Mon Sep 17 00:00:00 2001 From: "Diego Alvarez S." Date: Mon, 28 Oct 2024 18:13:07 -0300 Subject: [PATCH 4/8] Invert parameter --- nextflow.config | 2 +- nextflow_schema.json | 6 +++--- workflows/mag.nf | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/nextflow.config b/nextflow.config index ea0a87e5..2bca0b93 100644 --- a/nextflow.config +++ b/nextflow.config @@ -121,7 +121,7 @@ params { refine_bins_dastool = false refine_bins_dastool_threshold = 0.5 postbinning_input = 'raw_bins_only' - include_unbins_in_postbinning = true + exclude_unbins_in_postbinning = false // Bin QC skip_binqc = false diff --git a/nextflow_schema.json b/nextflow_schema.json index f11868b6..233f042a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -705,10 +705,10 @@ "default": 3000, "description": "Minimum contig length for Tiara to use for domain classification. For accurate classification, should be longer than 3000 bp." }, - "include_unbins_in_postbinning": { + "exclude_unbins_in_postbinning": { "type": "boolean", - "description": "Include unbinned contigs in the post-binning steps (bin qc, taxonomic classification and annotation).", - "default": true + "description": "Exclude unbinned contigs in the post-binning steps (bin qc, taxonomic classification and annotation).", + "default": false } } }, diff --git a/workflows/mag.nf b/workflows/mag.nf index 19ea6be2..6ae9bce7 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -769,9 +769,9 @@ workflow MAG { ch_input_for_postbinning_bins_unbins = ch_binning_results_bins.mix(ch_binning_results_unbins) } - ch_input_for_postbinning = params.include_unbins_in_postbinning - ? ch_input_for_postbinning_bins_unbins - : ch_input_for_postbinning_bins + ch_input_for_postbinning = params.exclude_unbins_in_postbinning + ? ch_input_for_postbinning_bins + : ch_input_for_postbinning_bins_unbins DEPTHS(ch_input_for_postbinning, BINNING.out.metabat2depths, ch_short_reads) ch_input_for_binsummary = DEPTHS.out.depths_summary From 9504788600dd56f5721050ae03ae32da2dcdb26e Mon Sep 17 00:00:00 2001 From: "Diego Alvarez S." Date: Wed, 30 Oct 2024 02:17:47 -0300 Subject: [PATCH 5/8] Apply suggestions from code review Co-authored-by: James A. Fellows Yates --- nextflow.config | 2 +- nextflow_schema.json | 5 +++-- workflows/mag.nf | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/nextflow.config b/nextflow.config index 2bca0b93..1ecabbdc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -121,7 +121,7 @@ params { refine_bins_dastool = false refine_bins_dastool_threshold = 0.5 postbinning_input = 'raw_bins_only' - exclude_unbins_in_postbinning = false + exclude_unbins_from_postbinning = false // Bin QC skip_binqc = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 233f042a..923306e5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -705,9 +705,10 @@ "default": 3000, "description": "Minimum contig length for Tiara to use for domain classification. For accurate classification, should be longer than 3000 bp." }, - "exclude_unbins_in_postbinning": { + "exclude_unbins_from_postbinning": { "type": "boolean", - "description": "Exclude unbinned contigs in the post-binning steps (bin qc, taxonomic classification and annotation).", + "description": "Exclude unbinned contigs in the post-binning steps (bin QC, taxonomic classification, and annotation steps).", + "help": "If you're not interested in assemblies results that are not considered 'genome level', excluding unbinned contigs can greatly speed up downstream steps such as Prokka, that can be quite slow and spin up many tasks.", "default": false } } diff --git a/workflows/mag.nf b/workflows/mag.nf index 6ae9bce7..356c39fe 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -769,7 +769,7 @@ workflow MAG { ch_input_for_postbinning_bins_unbins = ch_binning_results_bins.mix(ch_binning_results_unbins) } - ch_input_for_postbinning = params.exclude_unbins_in_postbinning + ch_input_for_postbinning = params.exclude_unbins_from_postbinning ? ch_input_for_postbinning_bins : ch_input_for_postbinning_bins_unbins From af7300f290954bc8a3f82f52ff2576296fb095cc Mon Sep 17 00:00:00 2001 From: "Diego Alvarez S." Date: Wed, 30 Oct 2024 02:24:25 -0300 Subject: [PATCH 6/8] Update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a902cd7b..7e7c3b9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,10 +6,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## dev [unreleased] ### `Added` +- [#708](https://github.com/nf-core/mag/pull/708) - Added `dev-exclude-unbinned` parameter to exclude unbinned contigs from post-binning processes (added by @dialvarezs) ### `Changed` ### `Fixed` +- [#708](https://github.com/nf-core/mag/pull/708) - Fixed channel passed as GUNC input (added by @dialvarezs) ### `Dependencies` From aa2520147105030d3a4072ba2e61cd705bcaaba1 Mon Sep 17 00:00:00 2001 From: "Diego Alvarez S." Date: Wed, 30 Oct 2024 02:25:37 -0300 Subject: [PATCH 7/8] Pre-commit --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e7c3b9f..77b15c09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,11 +6,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## dev [unreleased] ### `Added` + - [#708](https://github.com/nf-core/mag/pull/708) - Added `dev-exclude-unbinned` parameter to exclude unbinned contigs from post-binning processes (added by @dialvarezs) ### `Changed` ### `Fixed` + - [#708](https://github.com/nf-core/mag/pull/708) - Fixed channel passed as GUNC input (added by @dialvarezs) ### `Dependencies` From 1a00c14f3e2a6ead1f03f9ac73052f80d9f58e3d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 30 Oct 2024 10:48:55 +0100 Subject: [PATCH 8/8] Apply suggestions from code review --- CHANGELOG.md | 2 +- nextflow_schema.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77b15c09..f46adf9c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- [#708](https://github.com/nf-core/mag/pull/708) - Added `dev-exclude-unbinned` parameter to exclude unbinned contigs from post-binning processes (added by @dialvarezs) +- [#708](https://github.com/nf-core/mag/pull/708) - Added `--exclude_unbins_from_postbinning` parameter to exclude unbinned contigs from post-binning processes, speeding up Prokka in some cases (added by @dialvarezs) ### `Changed` diff --git a/nextflow_schema.json b/nextflow_schema.json index 923306e5..0875606d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -708,7 +708,7 @@ "exclude_unbins_from_postbinning": { "type": "boolean", "description": "Exclude unbinned contigs in the post-binning steps (bin QC, taxonomic classification, and annotation steps).", - "help": "If you're not interested in assemblies results that are not considered 'genome level', excluding unbinned contigs can greatly speed up downstream steps such as Prokka, that can be quite slow and spin up many tasks.", + "help": "If you're not interested in assemby results that are not considered 'genome level', excluding unbinned contigs can greatly speed up downstream steps such as Prokka, that can be quite slow and spin up many tasks.", "default": false } }