diff --git a/.editorconfig b/.editorconfig index 72dda289..6d9b74cc 100644 --- a/.editorconfig +++ b/.editorconfig @@ -31,3 +31,7 @@ indent_size = unset # ignore python and markdown [*.{py,md}] indent_style = unset + +# ignore ro-crate metadata files +[**/ro-crate-metadata.json] +insert_final_newline = unset diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 146d5516..816733b0 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -9,7 +9,6 @@ body: - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) - [nf-core/mag pipeline documentation](https://nf-co.re/mag/usage) - - type: textarea id: description attributes: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e2b06df8..829555e4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -51,6 +51,8 @@ jobs: - name: Check out pipeline code uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + with: + fetch-depth: 0 - name: Set up Nextflow uses: nf-core/setup-nextflow@v2 diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index 2576cc0c..ab06316e 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -28,8 +28,23 @@ env: NXF_ANSI_LOG: false jobs: + configure: + runs-on: ubuntu-latest + outputs: + REPO_LOWERCASE: ${{ steps.get_repo_properties.outputs.REPO_LOWERCASE }} + REPOTITLE_LOWERCASE: ${{ steps.get_repo_properties.outputs.REPOTITLE_LOWERCASE }} + REPO_BRANCH: ${{ steps.get_repo_properties.outputs.REPO_BRANCH }} + steps: + - name: Get the repository name and current branch + id: get_repo_properties + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> "$GITHUB_OUTPUT" + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> "$GITHUB_OUTPUT" + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> "$GITHUB_OUTPUT" + download: runs-on: ubuntu-latest + needs: configure steps: - name: Install Nextflow uses: nf-core/setup-nextflow@v2 @@ -52,12 +67,6 @@ jobs: python -m pip install --upgrade pip pip install git+https://github.com/nf-core/tools.git@dev - - name: Get the repository name and current branch set as environment variable - run: | - echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} - echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} - echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} - - name: Make a cache directory for the container images run: | mkdir -p ./singularity_container_images @@ -66,9 +75,9 @@ jobs: env: NXF_SINGULARITY_CACHEDIR: ./singularity_container_images run: | - nf-core pipelines download ${{ env.REPO_LOWERCASE }} \ - --revision ${{ env.REPO_BRANCH }} \ - --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + nf-core pipelines download ${{ needs.configure.outputs.REPO_LOWERCASE }} \ + --revision ${{ needs.configure.outputs.REPO_BRANCH }} \ + --outdir ./${{ needs.configure.outputs.REPOTITLE_LOWERCASE }} \ --compress "none" \ --container-system 'singularity' \ --container-library "quay.io" -l "docker.io" -l "community.wave.seqera.io/library/" \ @@ -76,14 +85,17 @@ jobs: --download-configuration 'yes' - name: Inspect download - run: tree ./${{ env.REPOTITLE_LOWERCASE }} + run: tree ./${{ needs.configure.outputs.REPOTITLE_LOWERCASE }} + + - name: Inspect container images + run: tree ./singularity_container_images | tee ./container_initial - name: Count the downloaded number of container images id: count_initial run: | image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) echo "Initial container image count: $image_count" - echo "IMAGE_COUNT_INITIAL=$image_count" >> ${GITHUB_ENV} + echo "IMAGE_COUNT_INITIAL=$image_count" >> "$GITHUB_OUTPUT" - name: Run the downloaded pipeline (stub) id: stub_run_pipeline @@ -91,30 +103,31 @@ jobs: env: NXF_SINGULARITY_CACHEDIR: ./singularity_container_images NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + run: nextflow run ./${{needs.configure.outputs.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ needs.configure.outputs.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results - name: Run the downloaded pipeline (stub run not supported) id: run_pipeline - if: ${{ job.steps.stub_run_pipeline.status == failure() }} + if: ${{ steps.stub_run_pipeline.outcome == 'failure' }} env: NXF_SINGULARITY_CACHEDIR: ./singularity_container_images NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results + run: nextflow run ./${{ needs.configure.outputs.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ needs.configure.outputs.REPO_BRANCH }}) -profile test,singularity --outdir ./results - name: Count the downloaded number of container images id: count_afterwards run: | image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) echo "Post-pipeline run container image count: $image_count" - echo "IMAGE_COUNT_AFTER=$image_count" >> ${GITHUB_ENV} + echo "IMAGE_COUNT_AFTER=$image_count" >> "$GITHUB_OUTPUT" - name: Compare container image counts run: | - if [ "${{ env.IMAGE_COUNT_INITIAL }}" -ne "${{ env.IMAGE_COUNT_AFTER }}" ]; then - initial_count=${{ env.IMAGE_COUNT_INITIAL }} - final_count=${{ env.IMAGE_COUNT_AFTER }} + if [ "${{ steps.count_initial.outputs.IMAGE_COUNT_INITIAL }}" -ne "${{ steps.count_afterwards.outputs.IMAGE_COUNT_AFTER }}" ]; then + initial_count=${{ steps.count_initial.outputs.IMAGE_COUNT_INITIAL }} + final_count=${{ steps.count_afterwards.outputs.IMAGE_COUNT_AFTER }} difference=$((final_count - initial_count)) echo "$difference additional container images were \n downloaded at runtime . The pipeline has no support for offline runs!" - tree ./singularity_container_images + tree ./singularity_container_images > ./container_afterwards + diff ./container_initial ./container_afterwards exit 1 else echo "The pipeline can be downloaded successfully!" diff --git a/.nf-core.yml b/.nf-core.yml index d15e9969..21c50125 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -5,7 +5,7 @@ lint: - config_defaults: - params.phix_reference - params.lambda_reference -nf_core_version: 3.1.0 +nf_core_version: 3.1.2 repository_type: pipeline template: author: "Hadrien Gourlé, Daniel Straub, Sabrina Krakau, James A. Fellows Yates, diff --git a/.prettierignore b/.prettierignore index 437d763d..edd29f01 100644 --- a/.prettierignore +++ b/.prettierignore @@ -10,3 +10,4 @@ testing/ testing* *.pyc bin/ +ro-crate-metadata.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cc1d549..6840f609 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,17 +3,23 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## dev [unreleased] +## v3.3.1dev - [unreleased] ### `Added` ### `Changed` +- [#731](https://github.com/nf-core/mag/pull/747) - Updated to nf-core 3.1.2 `TEMPLATE` (by @jfy133) + ### `Fixed` -### `Dependencies` +- [#748](https://github.com/nf-core/mag/pull/748) - Fix broken phix reference channel when skipping phix removal (reported by @amizeranschi, fix by @muabnezor) +- [#752](https://github.com/nf-core/mag/pull/752) - Fix QUAST results not being displayed when skipping certain steps (reported by @amizeranschi, fix by @jfy133) +- [#753](https://github.com/nf-core/mag/pull/753) - Fix iGenomes reference support for host removal reference genome (reported by @Thomieh73, fix by @jfy133) -### `Dependencies` +### `Deprecated` + +### `Deprecated` ## 3.3.0 [2024-12-19] diff --git a/LICENSE b/LICENSE index d90d555c..4502ee82 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) Hadrien Gourlé, Daniel Straub, Sabrina Krakau, James A. Fellows Yates, Maxime Borry +Copyright (c) The nf-core/mag team Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index a5ae232f..bee71ce3 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,8 @@ [![GitHub Actions CI Status](https://github.com/nf-core/mag/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/mag/actions/workflows/ci.yml) [![GitHub Actions Linting Status](https://github.com/nf-core/mag/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/mag/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/mag/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3589527-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3589527) -[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)[![Cite Publication](https://img.shields.io/badge/Cite%20Us!-Cite%20Publication-orange)](https://doi.org/10.1093/nargab/lqac007) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) +[![Cite Publication](https://img.shields.io/badge/Cite%20Us!-Cite%20Publication-orange)](https://doi.org/10.1093/nargab/lqac007) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) @@ -27,6 +28,11 @@ ## Pipeline summary +## Usage + +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. + By default, the pipeline currently performs the following: it supports both short and long reads, quality trims the reads and adapters with [fastp](https://github.com/OpenGene/fastp) and [Porechop](https://github.com/rrwick/Porechop), and performs basic QC with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), and merge multiple sequencing runs. The pipeline then: diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 883cf516..5edd0881 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,8 +1,7 @@ report_comment: > - This report has been generated by the nf-core/mag analysis pipeline. For information about how to - interpret these results, please see the documentation. + This report has been generated by the nf-core/mag + analysis pipeline. For information about how to interpret these results, please see the + documentation. report_section_order: "nf-core-mag-methods-description": order: -1000 @@ -19,7 +18,7 @@ data_format: "yaml" run_modules: - fastqc - fastp - - adapterRemoval + - adapterremoval - custom_content - bowtie2 - busco @@ -36,7 +35,7 @@ top_modules: path_filters_exclude: - "*trimmed*" - "fastp" - - "adapterRemoval" + - "adapterremoval" - "porechop" - "filtlong" - "fastqc": @@ -119,12 +118,12 @@ custom_data: sp: host_removal: fn: "host_removal_metrics.tsv" - adapterRemoval: + adapterremoval: fn: "*_ar2.settings" kraken: fn_re: ".*[kraken2|centrifuge].*report.txt" quast: - fn_re: "report.*.tsv" + fn: "report*.tsv" filtlong: num_lines: 20 fn_re: ".*_filtlong.log" diff --git a/conf/modules.config b/conf/modules.config index 60c681e1..abb4d764 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -28,20 +28,20 @@ process { "--cut_front", "--cut_tail", "--cut_mean_quality ${params.fastp_cut_mean_quality}", - "--length_required ${params.reads_minlength}" + "--length_required ${params.reads_minlength}", ].join(' ').trim() publishDir = [ [ path: { "${params.outdir}/QC_shortreads/fastp/${meta.id}" }, mode: params.publish_dir_mode, - pattern: "*.{html,json}" + pattern: "*.{html,json}", ], [ path: { "${params.outdir}/QC_shortreads/fastp/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.fastq.gz", - enabled: params.save_clipped_reads - ] + enabled: params.save_clipped_reads, + ], ] ext.prefix = { "${meta.id}_run${meta.run}_fastp" } tag = { "${meta.id}_run${meta.run}" } @@ -52,20 +52,20 @@ process { "--minlength ${params.reads_minlength}", "--adapter1 ${params.adapterremoval_adapter1} --adapter2 ${params.adapterremoval_adapter2}", "--minquality ${params.adapterremoval_minquality} --trimns", - params.adapterremoval_trim_quality_stretch ? "--trim_qualities" : "--trimwindows 4" + params.adapterremoval_trim_quality_stretch ? "--trim_qualities" : "--trimwindows 4", ].join(' ').trim() publishDir = [ [ path: { "${params.outdir}/QC_shortreads/adapterremoval/${meta.id}" }, mode: params.publish_dir_mode, - pattern: "*.{settings}" + pattern: "*.{settings}", ], [ path: { "${params.outdir}/QC_shortreads/adapterremoval/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.{truncated,discarded}.gz", - enabled: params.save_clipped_reads - ] + enabled: params.save_clipped_reads, + ], ] ext.prefix = { "${meta.id}_run${meta.run}_ar2" } tag = { "${meta.id}_run${meta.run}" } @@ -76,12 +76,12 @@ process { "--minlength ${params.reads_minlength}", "--adapter1 ${params.adapterremoval_adapter1}", "--minquality ${params.adapterremoval_minquality} --trimns", - params.adapterremoval_trim_quality_stretch ? "--trim_qualities" : "--trimwindows 4" + params.adapterremoval_trim_quality_stretch ? "--trim_qualities" : "--trimwindows 4", ].join(' ').trim() publishDir = [ path: { "${params.outdir}/QC_shortreads/adapterremoval/${meta.id}" }, mode: params.publish_dir_mode, - pattern: "*.{settings}" + pattern: "*.{settings}", ] ext.prefix = { "${meta.id}_run${meta.run}_ar2" } tag = { "${meta.id}_run${meta.run}" } @@ -93,14 +93,14 @@ process { [ path: { "${params.outdir}/QC_shortreads/remove_phix" }, mode: params.publish_dir_mode, - pattern: "*.log" + pattern: "*.log", ], [ path: { "${params.outdir}/QC_shortreads/remove_phix" }, mode: params.publish_dir_mode, pattern: "*.unmapped*.fastq.gz", - enabled: params.save_phixremoved_reads - ] + enabled: params.save_phixremoved_reads, + ], ] tag = { "${meta.id}_run${meta.run}" } } @@ -113,14 +113,14 @@ process { [ path: { "${params.outdir}/QC_shortreads/remove_host" }, mode: params.publish_dir_mode, - pattern: "*{.log,read_ids.txt}" + pattern: "*{.log,read_ids.txt}", ], [ path: { "${params.outdir}/QC_shortreads/remove_host" }, mode: params.publish_dir_mode, pattern: "*.unmapped*.fastq.gz", - enabled: params.save_hostremoved_reads - ] + enabled: params.save_hostremoved_reads, + ], ] tag = { "${meta.id}_run${meta.run}" } } @@ -131,7 +131,7 @@ process { publishDir = [ path: { "${params.outdir}/QC_shortreads/fastqc" }, mode: params.publish_dir_mode, - pattern: "*.html" + pattern: "*.html", ] tag = { "${meta.id}_run${meta.run}" } } @@ -139,22 +139,22 @@ process { withName: BBMAP_BBNORM { ext.args = [ params.bbnorm_target ? "target=${params.bbnorm_target}" : '', - params.bbnorm_min ? "min=${params.bbnorm_min}" : '' + params.bbnorm_min ? "min=${params.bbnorm_min}" : '', ].join(' ').trim() publishDir = [ [ path: { "${params.outdir}/bbmap/bbnorm/logs" }, enabled: params.save_bbnorm_reads, mode: params.publish_dir_mode, - pattern: "*.log" + pattern: "*.log", ], [ path: { "${params.outdir}/bbmap/bbnorm/" }, mode: 'copy', enabled: params.save_bbnorm_reads, mode: params.publish_dir_mode, - pattern: "*.fastq.gz" - ] + pattern: "*.fastq.gz", + ], ] } @@ -163,7 +163,7 @@ process { path: { "${params.outdir}/QC_longreads/porechop" }, mode: params.publish_dir_mode, pattern: "*_porechop_trimmed.fastq.gz", - enabled: params.save_porechop_reads + enabled: params.save_porechop_reads, ] ext.prefix = { "${meta.id}_run${meta.run}_porechop_trimmed" } } @@ -173,7 +173,7 @@ process { path: { "${params.outdir}/QC_longreads/porechop" }, mode: params.publish_dir_mode, pattern: "*_porechop-abi_trimmed.fastq.gz", - enabled: params.save_porechop_reads + enabled: params.save_porechop_reads, ] ext.prefix = { "${meta.id}_run${meta.run}_porechop-abi_trimmed" } } @@ -190,7 +190,7 @@ process { path: { "${params.outdir}/QC_longreads/Filtlong" }, mode: params.publish_dir_mode, pattern: "*_filtlong.fastq.gz", - enabled: params.save_filtered_longreads + enabled: params.save_filtered_longreads, ] ext.prefix = { "${meta.id}_run${meta.run}_filtlong" } } @@ -199,20 +199,20 @@ process { ext.args = [ "--min-len ${params.longreads_min_length}", params.longreads_min_quality ? "--min-qual ${params.longreads_min_quality}" : '', - "-vv" + "-vv", ].join(' ').trim() publishDir = [ [ path: { "${params.outdir}/QC_longreads/Nanoq" }, mode: params.publish_dir_mode, pattern: "*_nanoq_filtered.fastq.gz", - enabled: params.save_filtered_longreads + enabled: params.save_filtered_longreads, ], [ path: { "${params.outdir}/QC_longreads/Nanoq" }, mode: params.publish_dir_mode, - pattern: "*_nanoq_filtered.stats" - ] + pattern: "*_nanoq_filtered.stats", + ], ] ext.prefix = { "${meta.id}_run${meta.run}_nanoq_filtered" } } @@ -222,14 +222,14 @@ process { [ path: { "${params.outdir}/QC_longreads/NanoLyse" }, mode: params.publish_dir_mode, - pattern: "*.log" + pattern: "*.log", ], [ path: { "${params.outdir}/QC_longreads/NanoLyse" }, mode: params.publish_dir_mode, pattern: "*_nanolyse.fastq.gz", - enabled: params.save_lambdaremoved_reads - ] + enabled: params.save_lambdaremoved_reads, + ], ] ext.prefix = { "${meta.id}_run${meta.run}_lambdafiltered" } } @@ -237,20 +237,20 @@ process { withName: CHOPPER { ext.args2 = [ params.longreads_min_quality ? "--quality ${params.longreads_min_quality}" : '', - params.longreads_min_length ? "--minlength ${params.longreads_min_length}" : '' + params.longreads_min_length ? "--minlength ${params.longreads_min_length}" : '', ].join(' ').trim() publishDir = [ [ path: { "${params.outdir}/QC_longreads/Chopper" }, mode: params.publish_dir_mode, - pattern: "*.log" + pattern: "*.log", ], [ path: { "${params.outdir}/QC_longreads/Chopper" }, mode: params.publish_dir_mode, pattern: "*_chopper.fastq.gz", - enabled: params.save_lambdaremoved_reads || params.save_filtered_longreads - ] + enabled: params.save_lambdaremoved_reads || params.save_filtered_longreads, + ], ] ext.prefix = { "${meta.id}_run${meta.run}_chopper" } } @@ -261,13 +261,13 @@ process { [ "-p raw_", "--title ${meta.id}_raw", - "-c darkblue" + "-c darkblue", ].join(' ').trim() } publishDir = [ path: { "${params.outdir}/QC_longreads/NanoPlot/${meta.id}" }, mode: params.publish_dir_mode, - pattern: "*.{png,html,txt}" + pattern: "*.{png,html,txt}", ] } @@ -276,13 +276,13 @@ process { [ "-p filtered_", "--title ${meta.id}_filtered", - "-c darkblue" + "-c darkblue", ].join(' ').trim() } publishDir = [ path: { "${params.outdir}/QC_longreads/NanoPlot/${meta.id}" }, mode: params.publish_dir_mode, - pattern: "*.{png,html,txt}" + pattern: "*.{png,html,txt}", ] } @@ -354,7 +354,7 @@ process { publishDir = [ path: { "${params.outdir}/Taxonomy/kraken2/${meta.id}" }, mode: params.publish_dir_mode, - pattern: "*.txt" + pattern: "*.txt", ] } @@ -403,12 +403,12 @@ process { ext.args = [ "--cleanup", "--min-score ${params.genomad_min_score}", - "--splits ${params.genomad_splits}" + "--splits ${params.genomad_splits}", ].join(' ').trim() publishDir = [ path: { "${params.outdir}/VirusIdentification/geNomad/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } @@ -419,14 +419,14 @@ process { [ path: { "${params.outdir}/Assembly/${assembly_meta.assembler}/QC/${assembly_meta.id}" }, mode: params.publish_dir_mode, - pattern: "*.log" + pattern: "*.log", ], [ path: { "${params.outdir}/Assembly/${assembly_meta.assembler}/QC/${assembly_meta.id}" }, mode: params.publish_dir_mode, pattern: "*.{bam,bai}", - enabled: params.save_assembly_mapped_reads - ] + enabled: params.save_assembly_mapped_reads, + ], ] } @@ -438,7 +438,7 @@ process { publishDir = [ path: { "${params.outdir}/GenomeBinning" }, mode: params.publish_dir_mode, - pattern: "*.{png,tsv}" + pattern: "*.{png,tsv}", ] } @@ -453,7 +453,7 @@ process { publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/BUSCO" }, mode: params.publish_dir_mode, - pattern: "*.{log,err,faa.gz,fna.gz,gff,txt}" + pattern: "*.{log,err,faa.gz,fna.gz,gff,txt}", ] } @@ -475,7 +475,7 @@ process { publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/CheckM" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } @@ -485,7 +485,7 @@ process { publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/CheckM" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } @@ -494,7 +494,7 @@ process { publishDir = [ path: { "${params.outdir}/GenomeBinning/QC" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } @@ -504,7 +504,7 @@ process { mode: params.publish_dir_mode, overwrite: false, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_checkm2_data + enabled: params.save_checkm2_data, ] } @@ -513,7 +513,7 @@ process { publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/CheckM2" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } @@ -522,7 +522,7 @@ process { path: { "${params.outdir}/GenomeBinning/QC/GUNC" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.gunc_save_db + enabled: params.gunc_save_db, ] } @@ -531,7 +531,7 @@ process { publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/GUNC/raw/${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}/${fasta.baseName}/" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } @@ -540,7 +540,7 @@ process { publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/GUNC/checkmmerged/${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}/${checkm_file.baseName}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } @@ -561,13 +561,13 @@ process { "--extension fa", "--min_perc_aa ${params.gtdbtk_min_perc_aa}", "--min_af ${params.gtdbtk_min_af}", - "--pplacer_cpus ${params.gtdbtk_pplacer_cpus}" + "--pplacer_cpus ${params.gtdbtk_pplacer_cpus}", ].join(' ') ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" } publishDir = [ path: { "${params.outdir}/Taxonomy/GTDB-Tk/${meta.assembler}/${meta.binner}/${meta.id}" }, mode: params.publish_dir_mode, - pattern: "*.{log,tsv,tree.gz,fasta,fasta.gz}" + pattern: "*.{log,tsv,tree.gz,fasta,fasta.gz}", ] } @@ -604,7 +604,7 @@ process { publishDir = [ path: { "${params.outdir}/Ancient_DNA/variant_calling/consensus" }, mode: params.publish_dir_mode, - pattern: "*.fa" + pattern: "*.fa", ] } @@ -614,7 +614,7 @@ process { publishDir = [ path: { "${params.outdir}/Ancient_DNA/variant_calling/index" }, mode: params.publish_dir_mode, - enabled: false + enabled: false, ] } @@ -622,7 +622,7 @@ process { ext.prefix = { "${meta.assembler}-${meta.id}" } publishDir = [ path: { "${params.outdir}/Ancient_DNA/pydamage/analyze/${meta.assembler}-${meta.id}/" }, - mode: params.publish_dir_mode + mode: params.publish_dir_mode, ] } @@ -631,7 +631,7 @@ process { ext.args = "-t ${params.pydamage_accuracy}" publishDir = [ path: { "${params.outdir}/Ancient_DNA/pydamage/filter/${meta.assembler}-${meta.id}/" }, - mode: params.publish_dir_mode + mode: params.publish_dir_mode, ] } @@ -640,7 +640,7 @@ process { publishDir = [ path: { "${params.outdir}/Ancient_DNA/samtools/faidx" }, mode: params.publish_dir_mode, - enabled: false + enabled: false, ] } @@ -656,7 +656,7 @@ process { ext.args = [ params.min_contig_size < 1500 ? "-m 1500" : "-m ${params.min_contig_size}", "--unbinned", - "--seed ${params.metabat_rng_seed}" + "--seed ${params.metabat_rng_seed}", ].join(' ').trim() } @@ -665,13 +665,13 @@ process { [ path: { "${params.outdir}/GenomeBinning/MaxBin2/discarded" }, mode: params.publish_dir_mode, - pattern: '*.tooshort.gz' + pattern: '*.tooshort.gz', ], [ path: { "${params.outdir}/GenomeBinning/MaxBin2/" }, mode: params.publish_dir_mode, - pattern: '*.{summary,abundance}' - ] + pattern: '*.{summary,abundance}', + ], ] ext.prefix = { "${meta.assembler}-MaxBin2-${meta.id}" } } @@ -681,7 +681,7 @@ process { [ path: { "${params.outdir}/GenomeBinning/MaxBin2/bins/" }, mode: params.publish_dir_mode, - pattern: '*.fa.gz' + pattern: '*.fa.gz', ] ] } @@ -691,14 +691,14 @@ process { [ path: { "${params.outdir}/GenomeBinning/CONCOCT/stats/" }, mode: params.publish_dir_mode, - pattern: "*.{txt,csv,tsv}" + pattern: "*.{txt,csv,tsv}", ], [ path: { "${params.outdir}/GenomeBinning/CONCOCT/bins" }, mode: params.publish_dir_mode, saveAs: { filename -> new File(filename).getName() }, - pattern: "*/*.fa.gz" - ] + pattern: "*/*.fa.gz", + ], ] ext.prefix = { "${meta.assembler}-CONCOCT-${meta.id}" } } @@ -728,7 +728,7 @@ process { [ path: { "${params.outdir}/GenomeBinning/DASTool" }, mode: params.publish_dir_mode, - pattern: '*.{tsv,log,eval,seqlength}' + pattern: '*.{tsv,log,eval,seqlength}', ] ] ext.prefix = { "${meta.assembler}-DASTool-${meta.id}" } @@ -740,13 +740,13 @@ process { [ path: { "${params.outdir}/GenomeBinning/DASTool/unbinned" }, mode: params.publish_dir_mode, - pattern: '*-DASToolUnbinned-*.fa' + pattern: '*-DASToolUnbinned-*.fa', ], [ path: { "${params.outdir}/GenomeBinning/DASTool/bins" }, mode: params.publish_dir_mode, - pattern: '*-{MetaBAT2,MaxBin2,CONCOCT}Refined-*.fa' - ] + pattern: '*-{MetaBAT2,MaxBin2,CONCOCT}Refined-*.fa', + ], ] } @@ -754,7 +754,7 @@ process { publishDir = [ path: { "${params.outdir}/Taxonomy/Tiara/" }, mode: params.publish_dir_mode, - pattern: "*.txt" + pattern: "*.txt", ] ext.args = { "--min_len ${params.tiara_min_length} --probabilities" } ext.prefix = { "${meta.assembler}-${meta.id}.tiara" } @@ -781,11 +781,11 @@ process { publishDir = [path: { "${params.outdir}/Annotation/MetaEuk/${meta.assembler}/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } withName: MULTIQC { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + ext.args = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } } diff --git a/docs/usage.md b/docs/usage.md index 9a64db8c..98f82c20 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -200,7 +200,7 @@ Several generic profiles are bundled with the pipeline which instruct the pipeli > [!IMPORTANT] > We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. -The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to check if your system is suported, please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to check if your system is supported, please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. diff --git a/nextflow.config b/nextflow.config index fe3ca17a..65df7452 100644 --- a/nextflow.config +++ b/nextflow.config @@ -123,16 +123,16 @@ params { save_filtered_longreads = false // binning options - skip_metabat2 = false - skip_maxbin2 = false - skip_concoct = false - bin_domain_classification = false - bin_domain_classification_tool = 'tiara' - tiara_min_length = 3000 - refine_bins_dastool = false - refine_bins_dastool_threshold = 0.5 - postbinning_input = 'raw_bins_only' - exclude_unbins_from_postbinning = false + skip_metabat2 = false + skip_maxbin2 = false + skip_concoct = false + bin_domain_classification = false + bin_domain_classification_tool = 'tiara' + tiara_min_length = 3000 + refine_bins_dastool = false + refine_bins_dastool_threshold = 0.5 + postbinning_input = 'raw_bins_only' + exclude_unbins_from_postbinning = false // Bin QC skip_binqc = false @@ -145,7 +145,8 @@ params { checkm_db = null save_checkm_data = false checkm2_db = null - checkm2_db_version = 5571251 // corresponds to Zenodo record ID + checkm2_db_version = 5571251 + // corresponds to Zenodo record ID save_checkm2_data = false run_gunc = false gunc_database_type = 'progenomes' @@ -166,40 +167,41 @@ params { // References //genome = null // we use --host_genome instead - igenomes_base = 's3://ngi-igenomes/igenomes/' - igenomes_ignore = false + igenomes_base = 's3://ngi-igenomes/igenomes/' + igenomes_ignore = false // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' - multiqc_methods_description = null + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - help_full = false - show_hidden = false - version = false - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' - trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')// Config options - config_profile_name = null - config_profile_description = null - - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + help_full = false + show_hidden = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + trace_report_suffix = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') + // Config options + config_profile_name = null + config_profile_description = null + + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Schema validation default options - validate_params = true + validate_params = true } // Load base.config by default for all pipelines @@ -207,90 +209,90 @@ includeConfig 'conf/base.config' profiles { debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false nextflow.enable.configProcessNamesValidation = true } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - conda.channels = ['conda-forge', 'bioconda'] - apptainer.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - docker.runOptions = '-u $(id -u):$(id -g)' + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false } apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false } wave { apptainer.ociAutoPull = true @@ -300,34 +302,68 @@ profiles { wave.strategy = 'conda,container' } gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB process { resourceLimits = [ memory: 8.GB, - cpus : 4, - time : 1.h + cpus: 4, + time: 1.h, ] } } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } - test_host_rm { includeConfig 'conf/test_host_rm.config' } - test_hybrid { includeConfig 'conf/test_hybrid.config' } - test_hybrid_host_rm { includeConfig 'conf/test_hybrid_host_rm.config' } - test_busco_auto { includeConfig 'conf/test_busco_auto.config' } - test_ancient_dna { includeConfig 'conf/test_ancient_dna.config' } - test_adapterremoval { includeConfig 'conf/test_adapterremoval.config' } - test_binning_entry { includeConfig 'conf/test_binning_entry.config' } - test_binrefinement { includeConfig 'conf/test_binrefinement.config' } - test_no_clipping { includeConfig 'conf/test_no_clipping.config' } - test_bbnorm { includeConfig 'conf/test_bbnorm.config' } - test_nothing { includeConfig 'conf/test_nothing.config' } - test_virus_identification { includeConfig 'conf/test_virus_identification.config' } - test_single_end { includeConfig 'conf/test_single_end.config' } - test_concoct { includeConfig 'conf/test_concoct.config' } - test_longread { includeConfig 'conf/test_longread.config' } + test { + includeConfig 'conf/test.config' + } + test_full { + includeConfig 'conf/test_full.config' + } + test_host_rm { + includeConfig 'conf/test_host_rm.config' + } + test_hybrid { + includeConfig 'conf/test_hybrid.config' + } + test_hybrid_host_rm { + includeConfig 'conf/test_hybrid_host_rm.config' + } + test_busco_auto { + includeConfig 'conf/test_busco_auto.config' + } + test_ancient_dna { + includeConfig 'conf/test_ancient_dna.config' + } + test_adapterremoval { + includeConfig 'conf/test_adapterremoval.config' + } + test_binning_entry { + includeConfig 'conf/test_binning_entry.config' + } + test_binrefinement { + includeConfig 'conf/test_binrefinement.config' + } + test_no_clipping { + includeConfig 'conf/test_no_clipping.config' + } + test_bbnorm { + includeConfig 'conf/test_bbnorm.config' + } + test_nothing { + includeConfig 'conf/test_nothing.config' + } + test_virus_identification { + includeConfig 'conf/test_virus_identification.config' + } + test_single_end { + includeConfig 'conf/test_single_end.config' + } + test_concoct { + includeConfig 'conf/test_concoct.config' + } + test_longread { + includeConfig 'conf/test_longread.config' + } } // Load nf-core custom profiles from different Institutions @@ -339,10 +375,10 @@ includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${pa // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled // Set to your registry if you have a mirror of containers -apptainer.registry = 'quay.io' -docker.registry = 'quay.io' -podman.registry = 'quay.io' -singularity.registry = 'quay.io' +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' charliecloud.registry = 'quay.io' // Load igenomes.config if required @@ -391,71 +427,72 @@ dag { manifest { name = 'nf-core/mag' - author = """Hadrien Gourlé, Daniel Straub, Sabrina Krakau, James A. Fellows Yates, Maxime Borry""" // The author field is deprecated from Nextflow version 24.10.0, use contributors instead + author = """Hadrien Gourlé, Daniel Straub, Sabrina Krakau, James A. Fellows Yates, Maxime Borry""" + // The author field is deprecated from Nextflow version 24.10.0, use contributors instead contributors = [ [ name: 'Hadrien Gourlé', affiliation: 'Department of Animal Breeding and Genetics, Swedish University of Agricultural Sciences, Uppsala, Swden', email: '', github: 'HadrienG', - contribution: ['author'], // List of contribution types ('author', 'maintainer' or 'contributor') - orcid: '0000-0001-9807-1082 ' + contribution: ['author'], + orcid: '0000-0001-9807-1082 ', ], [ name: 'Daniel Straub', affiliation: 'Quantitative Biology Center (QBiC), University of Tübingen, Tübingen, Germany', email: '', github: 'd4straub', - contribution: ['author', 'maintainer'], // List of contribution types ('author', 'maintainer' or 'contributor') - orcid: '0000-0002-2553-0660 ' + contribution: ['author', 'maintainer'], + orcid: '0000-0002-2553-0660 ', ], [ name: 'Sabrina Krakau', affiliation: 'Quantitative Biology Center (QBiC), University of Tübingen, Tübingen, Germany', email: '', github: 'skrakau', - contribution: ['contributor'], // List of contribution types ('author', 'maintainer' or 'contributor') - orcid: '0000-0003-0603-7907 ' + contribution: ['contributor'], + orcid: '0000-0003-0603-7907 ', ], [ name: 'Antonia Schuster', affiliation: 'Quantitative Biology Center (QBiC), University of Tübingen, Tübingen, Germany', email: '', github: 'AntoniaSchuster', - contribution: ['author'], // List of contribution types ('author', 'maintainer' or 'contributor') - orcid: '' + contribution: ['author'], + orcid: '', ], [ name: 'James A. Fellows Yates', affiliation: 'Department of Archaeogenetics, Max Planck Institute for Evolutionary Anthropology, Leipzig, Germany', email: 'jfy133@gmail.com', github: 'jfy133', - contribution: ['maintainer'], // List of contribution types ('author', 'maintainer' or 'contributor') - orcid: '0000-0001-5585-6277' + contribution: ['maintainer'], + orcid: '0000-0001-5585-6277', ], [ name: 'Maxime Borry', affiliation: 'Department of Archaeogenetics, Max Planck Institute for Evolutionary Anthropology, Leipzig, Germany', email: '', github: 'maxibor', - contribution: ['contributor'], // List of contribution types ('author', 'maintainer' or 'contributor') - orcid: '0000-0001-9140-7559' + contribution: ['contributor'], + orcid: '0000-0001-9140-7559', ], [ name: 'Jim Downie', affiliation: 'Wellcome Sanger Institute, Hinxton, UK', email: '', github: 'prototaxites', - contribution: ['contributor'], // List of contribution types ('author', 'maintainer' or 'contributor') - orcid: '0000-0002-7175-0533' + contribution: ['contributor'], + orcid: '0000-0002-7175-0533', ], [ name: 'Carson Miller', affiliation: 'University of Washington, Seattle, USA', email: '0000-0002-7175-0533', github: 'CarsonJM', - contribution: ['contributor'], // List of contribution types ('author', 'maintainer' or 'contributor') - orcid: '0000-0001-9861-4884' + contribution: ['contributor'], + orcid: '0000-0001-9861-4884', ], ] homePage = 'https://github.com/nf-core/mag' @@ -469,18 +506,18 @@ manifest { // Nextflow plugins plugins { - id 'nf-schema@2.1.2' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.3.0' } validation { defaultIgnoreParams = ["genomes"] - monochromeLogs = params.monochrome_logs + monochromeLogs = params.monochrome_logs help { - enabled = true - command = "nextflow run nf-core/mag -profile --input samplesheet.csv --outdir " - fullParameter = "help_full" + enabled = true + command = "nextflow run nf-core/mag -profile --input samplesheet.csv --outdir " + fullParameter = "help_full" showHiddenParameter = "show_hidden" - beforeText = """ + beforeText = """ -\033[2m----------------------------------------------------\033[0m- \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m \033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m @@ -490,7 +527,7 @@ validation { \033[0;35m nf-core/mag ${manifest.version}\033[0m -\033[2m----------------------------------------------------\033[0m- """ - afterText = """${manifest.doi ? "\n* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} + afterText = """${manifest.doi ? "\n* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/', '')}" }.join("\n")}${manifest.doi ? "\n" : ""} * The nf-core framework https://doi.org/10.1038/s41587-020-0439-x @@ -500,7 +537,7 @@ validation { } summary { beforeText = validation.help.beforeText - afterText = validation.help.afterText + afterText = validation.help.afterText } } diff --git a/nextflow_schema.json b/nextflow_schema.json index 070447bf..aad98236 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -509,7 +509,7 @@ }, "gtdbtk_min_completeness": { "type": "number", - "default": 50, + "default": 50.0, "description": "Min. bin completeness (in %) required to apply GTDB-tk classification.", "help_text": "Completeness assessed with BUSCO analysis (100% - %Missing). Must be greater than 0 (min. 0.01) to avoid GTDB-tk errors. If too low, GTDB-tk classification results can be impaired due to not enough marker genes!", "minimum": 0.01, @@ -517,7 +517,7 @@ }, "gtdbtk_max_contamination": { "type": "number", - "default": 10, + "default": 10.0, "description": "Max. bin contamination (in %) allowed to apply GTDB-tk classification.", "help_text": "Contamination approximated based on BUSCO analysis (%Complete and duplicated). If too high, GTDB-tk classification results can be impaired due to contamination!", "minimum": 0, @@ -525,7 +525,7 @@ }, "gtdbtk_min_perc_aa": { "type": "number", - "default": 10, + "default": 10.0, "description": "Min. fraction of AA (in %) in the MSA for bins to be kept.", "minimum": 0, "maximum": 100 diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 6ae77b8c..de24da83 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -22,8 +22,8 @@ "@id": "./", "@type": "Dataset", "creativeWorkStatus": "InProgress", - "datePublished": "2024-12-19T13:13:43+00:00", - "description": "

\n \n \n \"nf-core/mag\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/mag/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/mag/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/mag/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/mag/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/mag/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3589527-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3589527)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)[![Cite Publication](https://img.shields.io/badge/Cite%20Us!-Cite%20Publication-orange)](https://doi.org/10.1093/nargab/lqac007)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/mag)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23mag-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/mag)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/mag** is a bioinformatics best-practise analysis pipeline for assembly, binning and annotation of metagenomes.\n\n

\n \"nf-core/mag\n

\n\n## Pipeline summary\n\nBy default, the pipeline currently performs the following: it supports both short and long reads, quality trims the reads and adapters with [fastp](https://github.com/OpenGene/fastp) and [Porechop](https://github.com/rrwick/Porechop), and performs basic QC with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), and merge multiple sequencing runs.\n\nThe pipeline then:\n\n- assigns taxonomy to reads using [Centrifuge](https://ccb.jhu.edu/software/centrifuge/) and/or [Kraken2](https://github.com/DerrickWood/kraken2/wiki)\n- performs assembly using [MEGAHIT](https://github.com/voutcn/megahit) and [SPAdes](http://cab.spbu.ru/software/spades/), and checks their quality using [Quast](http://quast.sourceforge.net/quast)\n- (optionally) performs ancient DNA assembly validation using [PyDamage](https://github.com/maxibor/pydamage) and contig consensus sequence recalling with [Freebayes](https://github.com/freebayes/freebayes) and [BCFtools](http://samtools.github.io/bcftools/bcftools.html)\n- predicts protein-coding genes for the assemblies using [Prodigal](https://github.com/hyattpd/Prodigal), and bins with [Prokka](https://github.com/tseemann/prokka) and optionally [MetaEuk](https://www.google.com/search?channel=fs&client=ubuntu-sn&q=MetaEuk)\n- performs metagenome binning using [MetaBAT2](https://bitbucket.org/berkeleylab/metabat/src/master/), [MaxBin2](https://sourceforge.net/projects/maxbin2/), and/or with [CONCOCT](https://github.com/BinPro/CONCOCT), and checks the quality of the genome bins using [Busco](https://busco.ezlab.org/), [CheckM](https://ecogenomics.github.io/CheckM/), or [CheckM2](https://github.com/chklovski/CheckM2) and optionally [GUNC](https://grp-bork.embl-community.io/gunc/).\n- Performs ancient DNA validation and repair with [pyDamage](https://github.com/maxibor/pydamage) and [freebayes](https://github.com/freebayes/freebayes)\n- optionally refines bins with [DAS Tool](https://github.com/cmks/DAS_Tool)\n- assigns taxonomy to bins using [GTDB-Tk](https://github.com/Ecogenomics/GTDBTk) and/or [CAT](https://github.com/dutilh/CAT) and optionally identifies viruses in assemblies using [geNomad](https://github.com/apcamargo/genomad), or Eukaryotes with [Tiara](https://github.com/ibe-uw/tiara)\n\nFurthermore, the pipeline creates various reports in the results directory specified, including a [MultiQC](https://multiqc.info/) report summarizing some of the findings and software versions.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n```bash\nnextflow run nf-core/mag -profile --input '*_R{1,2}.fastq.gz' --outdir \n```\n\nor\n\n```bash\nnextflow run nf-core/mag -profile --input samplesheet.csv --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/mag/usage) and the [parameter documentation](https://nf-co.re/mag/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/mag/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/mag/output).\n\n### Group-wise co-assembly and co-abundance computation\n\nEach sample has an associated group ID (see [input specifications](https://nf-co.re/mag/usage#input_specifications)). This group information can be used for group-wise co-assembly with `MEGAHIT` or `SPAdes` and/or to compute co-abundances for the binning step with `MetaBAT2`. By default, group-wise co-assembly is disabled, while the computation of group-wise co-abundances is enabled. For more information about how this group information can be used see the documentation for the parameters [`--coassemble_group`](https://nf-co.re/mag/parameters#coassemble_group) and [`--binning_map_mode`](https://nf-co.re/mag/parameters#binning_map_mode).\n\nWhen group-wise co-assembly is enabled, `SPAdes` is run on accordingly pooled read files, since `metaSPAdes` does not yet allow the input of multiple samples or libraries. In contrast, `MEGAHIT` is run for each group while supplying lists of the individual readfiles.\n\n## Credits\n\nnf-core/mag was written by [Hadrien Gourl\u00e9](https://hadriengourle.com) at [SLU](https://slu.se), [Daniel Straub](https://github.com/d4straub) and [Sabrina Krakau](https://github.com/skrakau) at the [Quantitative Biology Center (QBiC)](http://qbic.life). [James A. Fellows Yates](https://github.com/jfy133) and [Maxime Borry](https://github.com/maxibor) at the [Max Planck Institute for Evolutionary Anthropology](https://www.eva.mpg.de) joined in version 2.2.0.\n\nOther code contributors include:\n\n- [Antonia Schuster](https://github.com/AntoniaSchuster)\n- [Alexander Ramos](https://github.com/alxndrdiaz)\n- [Carson Miller](https://github.com/CarsonJM)\n- [Daniel Lundin](https://github.com/erikrikarddaniel)\n- [Danielle Callan](https://github.com/d-callan)\n- [Gregory Sprenger](https://github.com/gregorysprenger)\n- [Jim Downie](https://github.com/prototaxites)\n- [Phil Palmer](https://github.com/PhilPalmer)\n- [@willros](https://github.com/willros)\n- [Adam Rosenbaum](https://github.com/muabnezor)\n- [Diego Alvarez](https://github.com/dialvarezs)\n\nLong read processing was inspired by [caspargross/HybridAssembly](https://github.com/caspargross/HybridAssembly) written by Caspar Gross [@caspargross](https://github.com/caspargross)\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Alexander Peltzer](https://github.com/apeltzer)\n- [Phil Ewels](https://github.com/ewels)\n- [Gisela Gabernet](https://github.com/ggabernet)\n- [Harshil Patel](https://github.com/drpatelh)\n- [Johannes Alneberg](https://github.com/alneberg)\n- [Maxime Garcia](https://github.com/MaxUlysse)\n- [Michael L Heuer](https://github.com/heuermh)\n- [Alex H\u00fcbner](https://github.com/alexhbnr)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#mag` channel](https://nfcore.slack.com/channels/mag) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/mag for your analysis, please cite the preprint as follows:\n\n> **nf-core/mag: a best-practice pipeline for metagenome hybrid assembly and binning**\n>\n> Sabrina Krakau, Daniel Straub, Hadrien Gourl\u00e9, Gisela Gabernet, Sven Nahnsen.\n>\n> NAR Genom Bioinform. 2022 Feb 2;4(1):lqac007. doi: [10.1093/nargab/lqac007](https://doi.org/10.1093/nargab/lqac007).\n\nAdditionally you can cite the pipeline directly with the following doi: [10.5281/zenodo.3589527](https://doi.org/10.5281/zenodo.3589527)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "datePublished": "2025-01-20T14:35:56+00:00", + "description": "

\n \n \n \"nf-core/mag\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/mag/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/mag/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/mag/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/mag/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/mag/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/mag)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23mag-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/mag)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/mag** is a bioinformatics pipeline that ...\n\n\n\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\n\nNow, you can run the pipeline using:\n\n\n\n```bash\nnextflow run nf-core/mag \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/mag/usage) and the [parameter documentation](https://nf-co.re/mag/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/mag/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/mag/output).\n\n## Credits\n\nnf-core/mag was originally written by Hadrien Gourl\u00e9, Daniel Straub, Sabrina Krakau, James A. Fellows Yates, Maxime Borry.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#mag` channel](https://nfcore.slack.com/channels/mag) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -105,7 +105,7 @@ }, "mentions": [ { - "@id": "#69a630c2-4ecb-45c8-aa72-507b7379fdc9" + "@id": "#7529fbd9-35e2-468b-8f87-4e11f77f53b2" } ], "name": "nf-core/mag" @@ -149,7 +149,7 @@ } ], "dateCreated": "", - "dateModified": "2024-12-19T14:13:43Z", + "dateModified": "2025-01-20T14:35:56Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": [ "nf-core", @@ -201,11 +201,11 @@ "version": "!>=24.04.2" }, { - "@id": "#69a630c2-4ecb-45c8-aa72-507b7379fdc9", + "@id": "#7529fbd9-35e2-468b-8f87-4e11f77f53b2", "@type": "TestSuite", "instance": [ { - "@id": "#9a41f8df-23bd-4f5e-be1a-719ffff430a4" + "@id": "#befa69d0-369e-4215-9af0-e21f944de36e" } ], "mainEntity": { @@ -214,7 +214,7 @@ "name": "Test suite for nf-core/mag" }, { - "@id": "#9a41f8df-23bd-4f5e-be1a-719ffff430a4", + "@id": "#befa69d0-369e-4215-9af0-e21f944de36e", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/mag", "resource": "repos/nf-core/mag/actions/workflows/ci.yml", diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index ad33b56f..abcee2c3 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -17,10 +17,11 @@ include { BBMAP_BBNORM } from '../../modu workflow SHORTREAD_PREPROCESSING { take: - ch_raw_short_reads // [ [meta] , fastq1, fastq2] (mandatory) - ch_host_fasta // [fasta] (optional) - ch_phix_db_file // [fasta] (optional) - ch_metaeuk_db // [fasta] (optional) + ch_raw_short_reads // [ [meta] , fastq1, fastq2] (mandatory) + ch_host_fasta // [fasta] (optional) + ch_host_genome_index // fasta (optional) + ch_phix_db_file // [fasta] (optional) + ch_metaeuk_db // [fasta] (optional) main: ch_versions = Channel.empty() @@ -38,12 +39,11 @@ workflow SHORTREAD_PREPROCESSING { ch_raw_short_reads, [], params.fastp_save_trimmed_fail, - [] + [], ) ch_short_reads_prepped = FASTP.out.reads ch_versions = ch_versions.mix(FASTP.out.versions.first()) ch_multiqc_files = ch_multiqc_files.mix(FASTP.out.json) - } else if (params.clip_tool == 'adapterremoval') { @@ -80,11 +80,14 @@ workflow SHORTREAD_PREPROCESSING { ch_host_bowtie2index = BOWTIE2_HOST_REMOVAL_BUILD.out.index } } + else if (params.host_genome) { + ch_host_bowtie2index = ch_host_genome_index + } if (params.host_fasta || params.host_genome) { BOWTIE2_HOST_REMOVAL_ALIGN( ch_short_reads_prepped, - ch_host_bowtie2index + ch_host_bowtie2index, ) ch_short_reads_hostremoved = BOWTIE2_HOST_REMOVAL_ALIGN.out.reads ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.versions.first()) @@ -100,7 +103,7 @@ workflow SHORTREAD_PREPROCESSING { ) BOWTIE2_PHIX_REMOVAL_ALIGN( ch_short_reads_hostremoved, - BOWTIE2_PHIX_REMOVAL_BUILD.out.index + BOWTIE2_PHIX_REMOVAL_BUILD.out.index, ) ch_short_reads_phixremoved = BOWTIE2_PHIX_REMOVAL_ALIGN.out.reads ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.versions.first()) @@ -171,8 +174,8 @@ workflow SHORTREAD_PREPROCESSING { } emit: - short_reads = ch_short_reads + short_reads = ch_short_reads short_reads_assembly = ch_short_reads_assembly - versions = ch_versions - multiqc_files = ch_multiqc_files + versions = ch_versions + multiqc_files = ch_multiqc_files } diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf index c16f1a71..66268c17 100644 --- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf @@ -42,7 +42,7 @@ workflow PIPELINE_INITIALISATION { version, true, outdir, - workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1, ) // @@ -51,7 +51,7 @@ workflow PIPELINE_INITIALISATION { UTILS_NFSCHEMA_PLUGIN( workflow, validate_params, - null + null, ) // @@ -179,6 +179,7 @@ workflow PIPELINE_COMPLETION { main: summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") def multiqc_reports = multiqc_report.toList() + // // Completion email and summary // @@ -338,6 +339,7 @@ def validateInputSamplesheet(meta, sr1, sr2, lr) { // // Get attribute from genome config file e.g. fasta // +// Note: user uses --host_genome in mag def getGenomeAttribute(attribute) { if (params.genomes && params.host_genome && params.genomes.containsKey(params.host_genome)) { if (params.genomes[params.host_genome].containsKey(attribute)) { @@ -346,13 +348,14 @@ def getGenomeAttribute(attribute) { } return null } - // // Exit pipeline if incorrect --genome key provided // +// Note: user uses --host_genome in mag + def genomeExistsError() { - if (params.genomes && params.host_genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Genome '${params.host_genome}' not found in any config files provided to the pipeline.\n" + " Currently, the available genome keys are:\n" + " ${params.host_genomes.keySet().join(", ")}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + if (params.genomes && params.host_genome && !params.genomes.containsKey(params.host_genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Genome '${params.host_genome}' not found in any config files provided to the pipeline.\n" + " Currently, the available genome keys are:\n" + " ${params.genomes.keySet().join(", ")}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" error(error_string) } } @@ -367,7 +370,7 @@ def toolCitationText() { "Tools used in the workflow included:", "FastQC (Andrews 2010),", "MultiQC (Ewels et al. 2016)", - "." + ".", ].join(' ').trim() return citation_text @@ -379,7 +382,7 @@ def toolBibliographyText() { // Uncomment function in methodsDescriptionText to render in MultiQC report def reference_text = [ "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • ", ].join(' ').trim() return reference_text diff --git a/workflows/mag.nf b/workflows/mag.nf index 836eeb29..dc6d6b21 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -58,7 +58,7 @@ include { COMBINE_TSV as COMBINE_SUMMARY_TSV } from '../modul workflow MAG { take: - ch_raw_short_reads // channel: samplesheet read in from --input + ch_raw_short_reads // channel: samplesheet read in from --input ch_raw_long_reads ch_input_assemblies @@ -75,13 +75,21 @@ workflow MAG { host_fasta = params.genomes[params.host_genome].fasta ?: false ch_host_fasta = Channel.value(file("${host_fasta}")) host_bowtie2index = params.genomes[params.host_genome].bowtie2 ?: false - ch_host_bowtie2index = Channel.value(file("${host_bowtie2index}/*")) + ch_host_bowtie2index = Channel.fromPath("${host_bowtie2index}", checkIfExists: true).first() } else if (params.host_fasta) { - ch_host_fasta = Channel.value(file("${params.host_fasta}")) + ch_host_fasta = Channel.fromPath("${params.host_fasta}", checkIfExists: true).first() ?: false + + if (params.host_fasta_bowtie2index) { + ch_host_bowtie2index = Channel.fromPath("${params.host_fasta_bowtie2index}", checkIfExists: true).first() + } + else { + ch_host_bowtie2index = Channel.empty() + } } else { ch_host_fasta = Channel.empty() + ch_host_bowtie2index = Channel.empty() } if (params.kraken2_db) { @@ -110,10 +118,14 @@ workflow MAG { } else { ch_phix_db_file = Channel.empty() } + else { + ch_phix_db_file = Channel.empty() + } if (!params.keep_lambda) { - ch_lambda_db = Channel.value(file( "${params.lambda_reference}" )) - } else { + ch_lambda_db = Channel.value(file("${params.lambda_reference}")) + } + else { ch_lambda_db = Channel.empty() } @@ -158,15 +170,15 @@ workflow MAG { SHORTREAD_PREPROCESSING( ch_raw_short_reads, ch_host_fasta, + ch_host_bowtie2index, ch_phix_db_file, - ch_metaeuk_db + ch_metaeuk_db, ) ch_versions = ch_versions.mix(SHORTREAD_PREPROCESSING.out.versions) ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_PREPROCESSING.out.multiqc_files.collect { it[1] }.ifEmpty([])) ch_short_reads = SHORTREAD_PREPROCESSING.out.short_reads ch_short_reads_assembly = SHORTREAD_PREPROCESSING.out.short_reads_assembly - } else { ch_short_reads = ch_raw_short_reads.map { meta, reads -> @@ -216,7 +228,7 @@ workflow MAG { ch_short_reads, ch_db_for_centrifuge, false, - false + false, ) ch_versions = ch_versions.mix(CENTRIFUGE_CENTRIFUGE.out.versions.first()) @@ -253,7 +265,7 @@ workflow MAG { KRAKEN2( ch_short_reads, - ch_db_for_kraken2 + ch_db_for_kraken2, ) ch_versions = ch_versions.mix(KRAKEN2.out.versions.first()) @@ -285,7 +297,7 @@ workflow MAG { KRONA_KTIMPORTTAXONOMY( ch_tax_classifications, - ch_krona_db + ch_krona_db, ) ch_versions = ch_versions.mix(KRONA_KTIMPORTTAXONOMY.out.versions.first()) } @@ -343,7 +355,7 @@ workflow MAG { if (!params.skip_prodigal) { PRODIGAL( ch_assemblies, - 'gff' + 'gff', ) ch_versions = ch_versions.mix(PRODIGAL.out.versions.first()) } @@ -402,13 +414,13 @@ workflow MAG { if (params.ancient_dna && !params.skip_ancient_damagecorrection) { BINNING( BINNING_PREPARATION.out.grouped_mappings.join(ANCIENT_DNA_ASSEMBLY_VALIDATION.out.contigs_recalled).map { it -> [it[0], it[4], it[2], it[3]] }, - ch_short_reads + ch_short_reads, ) } else { BINNING( BINNING_PREPARATION.out.grouped_mappings, - ch_short_reads + ch_short_reads, ) } ch_versions = ch_versions.mix(BINNING.out.versions) @@ -551,7 +563,7 @@ workflow MAG { } CAT( ch_input_for_postbinning, - ch_cat_db + ch_cat_db, ) // Group all classification results for each sample in a single file ch_cat_summary = CAT.out.tax_classification_names.collectFile(keepHeader: true) { meta, classification -> @@ -589,7 +601,7 @@ workflow MAG { ch_gtdb_bins, ch_bin_qc_summary, gtdb, - gtdb_mash + gtdb_mash, ) ch_versions = ch_versions.mix(GTDBTK.out.versions.first()) ch_gtdbtk_summary = GTDBTK.out.summary @@ -606,7 +618,7 @@ workflow MAG { ch_quast_bins_summary.ifEmpty([]), ch_gtdbtk_summary.ifEmpty([]), ch_cat_global_summary.ifEmpty([]), - params.binqc_tool + params.binqc_tool, ) } @@ -628,7 +640,7 @@ workflow MAG { PROKKA( ch_bins_for_prokka, [], - [] + [], ) ch_versions = ch_versions.mix(PROKKA.out.versions.first()) } @@ -655,9 +667,9 @@ workflow MAG { softwareVersionsToYAML(ch_versions) .collectFile( storeDir: "${params.outdir}/pipeline_info", - name: 'nf_core_' + 'mag_software_' + 'mqc_' + 'versions.yml', + name: 'nf_core_' + 'mag_software_' + 'mqc_' + 'versions.yml', sort: true, - newLine: true + newLine: true, ) .set { ch_collated_versions } @@ -695,7 +707,7 @@ workflow MAG { ch_multiqc_files = ch_multiqc_files.mix( ch_methods_description.collectFile( name: 'methods_description_mqc.yaml', - sort: true + sort: true, ) ) @@ -729,7 +741,7 @@ workflow MAG { ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList(), [], - [] + [], ) emit: