diff --git a/.github/workflows/add_fastani_ref.yml b/.github/workflows/add_fastani_ref.yml index 0c7b263..8de3d3f 100755 --- a/.github/workflows/add_fastani_ref.yml +++ b/.github/workflows/add_fastani_ref.yml @@ -61,7 +61,7 @@ jobs: - name: Run Grandeur with ref list run: | - nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas --fastani_ref_list fastani_ref_list.txt + nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas --fastani_ref_list fastani_ref_list.txt --publish_dir_mode link cat grandeur/grandeur_summary.tsv cat grandeur/summary/software_versions.yml @@ -69,18 +69,22 @@ jobs: - name: Run Grandeur with refs run: | - nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas -resume --fastani_ref GCA_009665515.2_ASM966551v2_genomic.fna.gz,GCA_009763645.1_ASM976364v1_genomic.fna.gz --outdir grandeur2 + nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas -resume --fastani_ref GCA_009665515.2_ASM966551v2_genomic.fna.gz,GCA_009763645.1_ASM976364v1_genomic.fna.gz --outdir grandeur2 --publish_dir_mode link cat grandeur2/grandeur_summary.tsv cat grandeur2/summary/software_versions.yml ls grandeur2/multiqc/multiqc_report.html + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + - name: Run Grandeur with refs and ref list run: | - nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas -resume --fastani_ref_list fastani_ref_list.txt --fastani_ref GCA_009665515.2_ASM966551v2_genomic.fna.gz,GCA_009763645.1_ASM976364v1_genomic.fna.gz --outdir grandeur3 + nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas -resume --fastani_ref_list fastani_ref_list.txt --fastani_ref GCA_009665515.2_ASM966551v2_genomic.fna.gz,GCA_009763645.1_ASM976364v1_genomic.fna.gz --outdir grandeur3 --publish_dir_mode link cat grandeur3/grandeur_summary.tsv cat grandeur3/summary/software_versions.yml ls grandeur3/multiqc/multiqc_report.html - \ No newline at end of file + - name: tree + run: tree grandeur* diff --git a/.github/workflows/current.yml b/.github/workflows/current.yml index 43b725b..27d8521 100644 --- a/.github/workflows/current.yml +++ b/.github/workflows/current.yml @@ -10,17 +10,21 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash sudo mv nextflow /usr/local/bin/ nextflow -version - - - name: Run Grandeur - run: | - docker --version + - name: Test docker + run: docker --version + + - name: Get files + run: | wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/904/864/595/GCA_904864595.1_INF333/GCA_904864595.1_INF333_genomic.fna.gz && gzip -d GCA_904864595.1_INF333_genomic.fna.gz wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/783/245/GCA_013783245.1_ASM1378324v1/GCA_013783245.1_ASM1378324v1_genomic.fna.gz && gzip -d GCA_013783245.1_ASM1378324v1_genomic.fna.gz wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/026/626/185/GCA_026626185.1_ASM2662618v1/GCA_026626185.1_ASM2662618v1_genomic.fna.gz && gzip -d GCA_026626185.1_ASM2662618v1_genomic.fna.gz @@ -30,8 +34,16 @@ jobs: mkdir fastas mv *fna fastas/. - nextflow run . -profile docker --current_datasets -c .github/workflows/github_actions.config --fastas fastas + - name: Run Grandeur + run: | + nextflow run . -profile docker --current_datasets -c .github/workflows/github_actions.config --fastas fastas --publish_dir_mode link + - name: Check files + run: | cat grandeur/grandeur_summary.tsv cat grandeur/summary/software_versions.yml ls grandeur/multiqc/multiqc_report.html + + - name: tree + run: tree grandeur* + diff --git a/.github/workflows/ecoli.yml b/.github/workflows/ecoli.yml index 26f90e2..f184cfb 100644 --- a/.github/workflows/ecoli.yml +++ b/.github/workflows/ecoli.yml @@ -10,6 +10,9 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -33,7 +36,7 @@ jobs: mv ${accession}_genomic.fna fastas/. done - nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas + nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas --publish_dir_mode link cat grandeur/grandeur_summary.tsv cat grandeur/summary/software_versions.yml ls grandeur/multiqc/multiqc_report.html @@ -45,4 +48,6 @@ jobs: head $file wc -l $file done - \ No newline at end of file + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/gc.yml b/.github/workflows/gc.yml new file mode 100755 index 0000000..5cc10f3 --- /dev/null +++ b/.github/workflows/gc.yml @@ -0,0 +1,54 @@ +name: Test Grandeur workflow with Neisseria meningitidis + +on: [pull_request, workflow_dispatch] + +jobs: + + test: + runs-on: ubuntu-20.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + nextflow -version + + - name: Run Grandeur + run: | + docker --version + + mkdir fastas + for accession in GCA_022869645.1_ASM2286964v1 + do + all=$(echo $accession | cut -f 2 -d "_") + fir=$(echo $all | cut -c 1-3) + mid=$(echo $all | cut -c 4-6) + end=$(echo $all | cut -c 7-9) + wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/${fir}/${mid}/${end}/${accession}/${accession}_genomic.fna.gz + gzip -d ${accession}_genomic.fna.gz + mv ${accession}_genomic.fna fastas/. + done + + nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas --publish_dir_mode link + cat grandeur/grandeur_summary.tsv + cat grandeur/summary/software_versions.yml + ls grandeur/multiqc/multiqc_report.html + + - name: Check Neisseria file + run: | + for file in grandeur/meningotype/meningotype_summary.tsv + do + head $file + wc -l $file + done + + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/github_actions.config b/.github/workflows/github_actions.config index a82a592..994f3e3 100755 --- a/.github/workflows/github_actions.config +++ b/.github/workflows/github_actions.config @@ -29,153 +29,153 @@ process { memory = { 12.GB * task.attempt } } - withName:amrfinderplus { + withName:AMRFINDER { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:bbduk { + withName:BAKTA { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:blastn { + withName:BLASTN { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:blobtools_create { + withName:BLOBTOOLS_CREATE { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:blobtools_plot { + withName:BLOBTOOLS_PLOT { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:blobtools_view { + withName:BLOBTOOLS_VIEW { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:circulocov { + withName:CIRCULOCOV { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:core_genome_evaluation { + withName:CORE_GENOME_EVALUATION { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:datasets_download { + withName:DATASETS_DOWNLOAD { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:datasets_summary { + withName:DATASETS_SUMMARY { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:download_sra { + withName:DOWNLOAD_SRA { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:drprg { + withName:DRPRG { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:elgato { + withName:ELGATO { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:emmtyper { + withName:EMMTYPER { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:fastani { + withName:ENA_DOWNLOAD { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:fastp { + withName:FASTANI { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:fastqc { + withName:FASTP { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:flag { + withName:FASTQC { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:heatcluster { + withName:HEATCLUSTER { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:iqtree2 { + withName:IQTREE2 { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:json_convert { + withName:JSON_CONVERT { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:kaptive { + withName:KAPTIVE { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:kleborate { + withName:KLEBORATE { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:kraken2 { + withName:KRAKEN2 { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:mash_dist { + withName:MASHTREE { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:mash_err { + withName:MASH_DIST { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:mash_sketch_fasta { + withName:MASH_SKETCH { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:mash_sketch_fastq { + withName:MENINGOTYPE { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:mashtree { + withName:MLST { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:mlst { + withName:MQC_PREP { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:mqc_prep { + withName:MULTIQC { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:multiqc { + withName:MYKROBE { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:mykrobe { + withName:NAMES { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:names { - errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} - } - withName:panaroo { + withName:PANAROO { publishDir = [ path: "grandeur", mode: 'link', pattern: "panaroo/*"] errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:pbptyper { + withName:PBPTYPER { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:phytreeviz { + withName:PHYTREEVIZ { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:plasmidfinder { + withName:PLASMIDFINDER { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:prokka { + withName:PROKKA { publishDir = [ path: "grandeur", mode: 'link', pattern: "gff/*.gff" ] errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:quast { + withName:QUAST { + errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} + } + withName:REFERENCES { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:references { + withName:ROARY { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:seqsero2 { + withName:SEQSERO2 { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:serotypefinder { + withName:SEROTYPEFINDER { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:shigatyper { + withName:SHIGATYPER { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:snp_dists { + withName:SNPDISTS { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:spades { + withName:SPADES { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:species { + withName:SPECIES { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:summary { + withName:SUMMARY { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } - withName:versions { + withName:VERSIONS { errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'} } } diff --git a/.github/workflows/just_msa.yml b/.github/workflows/just_msa.yml index 807738e..05e6f5b 100644 --- a/.github/workflows/just_msa.yml +++ b/.github/workflows/just_msa.yml @@ -10,6 +10,9 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -30,7 +33,7 @@ jobs: mkdir fastas mv *fna fastas/. - nextflow run . -profile docker,just_msa -c .github/workflows/github_actions.config --fastas fastas + nextflow run . -profile docker,just_msa -c .github/workflows/github_actions.config --fastas fastas --publish_dir_mode link - name: Check MSA files run: | @@ -39,3 +42,6 @@ jobs: head $file wc -l $file done + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/klebsiella.yml b/.github/workflows/klebsiella.yml index 8f847c9..95f127c 100755 --- a/.github/workflows/klebsiella.yml +++ b/.github/workflows/klebsiella.yml @@ -10,6 +10,9 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -33,7 +36,7 @@ jobs: mv ${accession}_genomic.fna fastas/. done - nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas + nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas --publish_dir_mode link cat grandeur/grandeur_summary.tsv cat grandeur/summary/software_versions.yml ls grandeur/multiqc/multiqc_report.html @@ -44,4 +47,8 @@ jobs: do head $file wc -l $file - done \ No newline at end of file + done + + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/legionella.yml b/.github/workflows/legionella.yml index 22eef66..55ba7ee 100644 --- a/.github/workflows/legionella.yml +++ b/.github/workflows/legionella.yml @@ -10,6 +10,9 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -33,7 +36,7 @@ jobs: mv ${accession}_genomic.fna fastas/. done - nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas + nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas --publish_dir_mode link cat grandeur/grandeur_summary.tsv cat grandeur/summary/software_versions.yml @@ -46,4 +49,6 @@ jobs: head $file wc -l $file done - \ No newline at end of file + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/myco.yml b/.github/workflows/myco.yml index a4634e3..ff17bba 100755 --- a/.github/workflows/myco.yml +++ b/.github/workflows/myco.yml @@ -9,6 +9,9 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main - name: Install Nextflow run: | @@ -33,7 +36,7 @@ jobs: mv ${accession}_genomic.fna fastas/. done - nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas + nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas --publish_dir_mode link cat grandeur/grandeur_summary.tsv cat grandeur/summary/software_versions.yml ls grandeur/multiqc/multiqc_report.html @@ -44,4 +47,7 @@ jobs: do head $file wc -l $file - done \ No newline at end of file + done + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/phylogenetic_workflow.yml b/.github/workflows/phylogenetic_workflow.yml index af37834..e6e4b5f 100644 --- a/.github/workflows/phylogenetic_workflow.yml +++ b/.github/workflows/phylogenetic_workflow.yml @@ -9,6 +9,9 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main - name: Install Nextflow run: | @@ -30,7 +33,7 @@ jobs: mkdir fastas mv *fna fastas/. - nextflow run . -profile docker,msa -c .github/workflows/github_actions.config --fastas fastas + nextflow run . -profile docker,msa -c .github/workflows/github_actions.config --fastas fastas --publish_dir_mode link cat grandeur/grandeur_summary.tsv cat grandeur/summary/software_versions.yml @@ -44,3 +47,6 @@ jobs: wc -l $file done + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/prokka.yml b/.github/workflows/prokka.yml new file mode 100755 index 0000000..f2b88dd --- /dev/null +++ b/.github/workflows/prokka.yml @@ -0,0 +1,49 @@ +name: Test Grandeur just_msa with prokka workflow + +on: [pull_request, workflow_dispatch] + +jobs: + + test: + runs-on: ubuntu-20.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + nextflow -version + + - name: Run Grandeur + run: | + docker --version + + wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/904/864/595/GCA_904864595.1_INF333/GCA_904864595.1_INF333_genomic.fna.gz && gzip -d GCA_904864595.1_INF333_genomic.fna.gz + wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/783/245/GCA_013783245.1_ASM1378324v1/GCA_013783245.1_ASM1378324v1_genomic.fna.gz && gzip -d GCA_013783245.1_ASM1378324v1_genomic.fna.gz + wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/026/626/185/GCA_026626185.1_ASM2662618v1/GCA_026626185.1_ASM2662618v1_genomic.fna.gz && gzip -d GCA_026626185.1_ASM2662618v1_genomic.fna.gz + wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/020/808/985/GCA_020808985.1_ASM2080898v1/GCA_020808985.1_ASM2080898v1_genomic.fna.gz && gzip -d GCA_020808985.1_ASM2080898v1_genomic.fna.gz + wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/904/863/225/GCA_904863225.1_KSB1_6J/GCA_904863225.1_KSB1_6J_genomic.fna.gz && gzip -d GCA_904863225.1_KSB1_6J_genomic.fna.gz + + mkdir fastas + mv *fna fastas/. + + nextflow run . -profile docker,just_msa -c .github/workflows/github_actions.config --fastas fastas --annotator prokka --publish_dir_mode link + + - name: Check MSA files + run: | + for file in grandeur/*/summary_statistics.txt grandeur/iqtree2/iqtree.treefile.nwk grandeur/snp-dists/snp_matrix.txt grandeur/gff/*gff + do + head $file + wc -l $file + done + + + - name: tree + run: tree grandeur* + diff --git a/.github/workflows/prokka_roary.yml b/.github/workflows/prokka_roary.yml new file mode 100755 index 0000000..775df57 --- /dev/null +++ b/.github/workflows/prokka_roary.yml @@ -0,0 +1,49 @@ +name: Test Grandeur just_msa with prokka and roary workflow + +on: [pull_request, workflow_dispatch] + +jobs: + + test: + runs-on: ubuntu-20.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + nextflow -version + + - name: Run Grandeur + run: | + docker --version + + wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/904/864/595/GCA_904864595.1_INF333/GCA_904864595.1_INF333_genomic.fna.gz && gzip -d GCA_904864595.1_INF333_genomic.fna.gz + wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/783/245/GCA_013783245.1_ASM1378324v1/GCA_013783245.1_ASM1378324v1_genomic.fna.gz && gzip -d GCA_013783245.1_ASM1378324v1_genomic.fna.gz + wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/026/626/185/GCA_026626185.1_ASM2662618v1/GCA_026626185.1_ASM2662618v1_genomic.fna.gz && gzip -d GCA_026626185.1_ASM2662618v1_genomic.fna.gz + wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/020/808/985/GCA_020808985.1_ASM2080898v1/GCA_020808985.1_ASM2080898v1_genomic.fna.gz && gzip -d GCA_020808985.1_ASM2080898v1_genomic.fna.gz + wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/904/863/225/GCA_904863225.1_KSB1_6J/GCA_904863225.1_KSB1_6J_genomic.fna.gz && gzip -d GCA_904863225.1_KSB1_6J_genomic.fna.gz + + mkdir fastas + mv *fna fastas/. + + nextflow run . -profile docker,just_msa -c .github/workflows/github_actions.config --fastas fastas --annotator prokka --aligner roary --publish_dir_mode link + + - name: Check MSA files + run: | + for file in grandeur/*/summary_statistics.txt grandeur/iqtree2/iqtree.treefile.nwk grandeur/snp-dists/snp_matrix.txt grandeur/*/*gff* + do + head $file + wc -l $file + done + + + - name: tree + run: tree grandeur* + diff --git a/.github/workflows/roary.yml b/.github/workflows/roary.yml index 18b0e4b..722f971 100755 --- a/.github/workflows/roary.yml +++ b/.github/workflows/roary.yml @@ -10,6 +10,9 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -30,12 +33,15 @@ jobs: mkdir fastas mv *fna fastas/. - nextflow run . -profile docker,just_msa -c .github/workflows/github_actions.config --fastas fastas --aligner roary + nextflow run . -profile docker,just_msa -c .github/workflows/github_actions.config --fastas fastas --aligner roary --publish_dir_mode link - name: Check MSA files run: | - for file in grandeur/*/summary_statistics.txt grandeur/iqtree2/iqtree.treefile.nwk grandeur/snp-dists/snp_matrix.txt + for file in grandeur/*/summary_statistics.txt grandeur/iqtree2/iqtree.treefile.nwk grandeur/snp-dists/snp_matrix.txt grandeur/gff/*gff do head $file wc -l $file done + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/run_workflow.yml b/.github/workflows/run_workflow.yml index 5a70bf1..f61b2d2 100644 --- a/.github/workflows/run_workflow.yml +++ b/.github/workflows/run_workflow.yml @@ -10,6 +10,9 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -27,7 +30,7 @@ jobs: wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR283/002/SRR2838702/SRR2838702_2.fastq.gz mv *fastq.gz reads/. - nextflow run . -profile docker -c .github/workflows/github_actions.config --reads reads + nextflow run . -profile docker -c .github/workflows/github_actions.config --reads reads --publish_dir_mode link cat grandeur/grandeur_summary.tsv cat grandeur/summary/software_versions.yml ls grandeur/multiqc/multiqc_report.html @@ -39,3 +42,6 @@ jobs: head $file wc -l $file done + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/salmonella.yml b/.github/workflows/salmonella.yml index 04ca863..2767302 100644 --- a/.github/workflows/salmonella.yml +++ b/.github/workflows/salmonella.yml @@ -10,6 +10,9 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -35,7 +38,7 @@ jobs: ls fastas/* > fastas.txt - nextflow run . -profile docker -c .github/workflows/github_actions.config --fasta_list fastas.txt + nextflow run . -profile docker -c .github/workflows/github_actions.config --fasta_list fastas.txt --publish_dir_mode link cat grandeur/grandeur_summary.tsv cat grandeur/summary/software_versions.yml @@ -48,4 +51,6 @@ jobs: head $file wc -l $file done - \ No newline at end of file + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/sample_sheet.yml b/.github/workflows/sample_sheet.yml index deb0f44..eafcf07 100755 --- a/.github/workflows/sample_sheet.yml +++ b/.github/workflows/sample_sheet.yml @@ -10,6 +10,9 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -30,7 +33,7 @@ jobs: echo "sample,fastq_1,fastq_2" > sample_sheet.csv echo "SRR2838702,reads/SRR2838702_1.fastq.gz,reads/SRR2838702_2.fastq.gz" >> sample_sheet.csv - nextflow run . -profile docker -c .github/workflows/github_actions.config --sample_sheet sample_sheet.csv --skip_extras + nextflow run . -profile docker -c .github/workflows/github_actions.config --sample_sheet sample_sheet.csv --skip_extras --publish_dir_mode link - name: Check summary files run: | @@ -38,4 +41,7 @@ jobs: do head $file wc -l $file - done \ No newline at end of file + done + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/strepA.yml b/.github/workflows/strepA.yml index 7db4fa8..15764e0 100644 --- a/.github/workflows/strepA.yml +++ b/.github/workflows/strepA.yml @@ -10,6 +10,9 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -33,7 +36,7 @@ jobs: mv ${accession}_genomic.fna fastas/. done - nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas + nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas --publish_dir_mode link cat grandeur/grandeur_summary.tsv cat grandeur/summary/software_versions.yml ls grandeur/multiqc/multiqc_report.html @@ -45,3 +48,6 @@ jobs: head $file wc -l $file done + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/strep_pneumo.yml b/.github/workflows/strep_pneumo.yml index 605411e..7009f2e 100644 --- a/.github/workflows/strep_pneumo.yml +++ b/.github/workflows/strep_pneumo.yml @@ -10,6 +10,9 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -33,7 +36,7 @@ jobs: mv ${accession}_genomic.fna fastas/. done - nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas + nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas --publish_dir_mode link cat grandeur/grandeur_summary.tsv cat grandeur/summary/software_versions.yml ls grandeur/multiqc/multiqc_report.html @@ -45,4 +48,7 @@ jobs: head $file wc -l $file done + + - name: tree + run: tree grandeur* \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e5dc979..76148bf 100755 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,4 +1,4 @@ -name: Test Grandeur workflow profile test0 +name: Test Grandeur workflow profile test on: [pull_request, workflow_dispatch] @@ -10,6 +10,9 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -19,12 +22,15 @@ jobs: - name: Run Grandeur run: | - nextflow run . -profile docker,test0 -c .github/workflows/github_actions.config + nextflow run . -profile docker,test -c .github/workflows/github_actions.config --publish_dir_mode link - name: Check contig files run: | - for file in grandeur/contigs/*_contigs.fa + for file in grandeur/contigs/*_contigs.fa grandeur/grandeur_summary.tsv do head $file wc -l $file done + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/test0.yml b/.github/workflows/test0.yml new file mode 100755 index 0000000..a31b9b3 --- /dev/null +++ b/.github/workflows/test0.yml @@ -0,0 +1,36 @@ +name: Test Grandeur workflow profile test0 + +on: [pull_request, workflow_dispatch] + +jobs: + + test: + runs-on: ubuntu-20.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + nextflow -version + + - name: Run Grandeur + run: | + nextflow run . -profile docker,test0 -c .github/workflows/github_actions.config --publish_dir_mode link + + - name: Check contig files + run: | + for file in grandeur/contigs/*_contigs.fa + do + head $file + wc -l $file + done + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/test1.yml b/.github/workflows/test1.yml new file mode 100755 index 0000000..84c91a2 --- /dev/null +++ b/.github/workflows/test1.yml @@ -0,0 +1,36 @@ +name: Test Grandeur workflow profile test1 + +on: [pull_request, workflow_dispatch] + +jobs: + + test: + runs-on: ubuntu-20.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + nextflow -version + + - name: Run Grandeur + run: | + nextflow run . -profile docker,test1 -c .github/workflows/github_actions.config --publish_dir_mode link + + - name: Check contig files + run: | + for file in grandeur/contigs/*_contigs.fa grandeur/grandeur_summary.tsv + do + head $file + wc -l $file + done + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/test2.yml b/.github/workflows/test2.yml new file mode 100755 index 0000000..d30739f --- /dev/null +++ b/.github/workflows/test2.yml @@ -0,0 +1,36 @@ +name: Test Grandeur workflow profile test2 + +on: [pull_request, workflow_dispatch] + +jobs: + + test: + runs-on: ubuntu-20.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + nextflow -version + + - name: Run Grandeur + run: | + nextflow run . -profile docker,test2 -c .github/workflows/github_actions.config --publish_dir_mode link + + - name: Check contig files + run: | + for file in grandeur/contigs/*_contigs.fa grandeur/grandeur_summary.tsv + do + head $file + wc -l $file + done + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/test3.yml b/.github/workflows/test3.yml new file mode 100755 index 0000000..2a00692 --- /dev/null +++ b/.github/workflows/test3.yml @@ -0,0 +1,36 @@ +name: Test Grandeur workflow profile test3 + +on: [pull_request, workflow_dispatch] + +jobs: + + test: + runs-on: ubuntu-20.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + nextflow -version + + - name: Run Grandeur + run: | + nextflow run . -profile docker,test3 -c .github/workflows/github_actions.config --publish_dir_mode link + + - name: Check contig files + run: | + for file in grandeur/contigs/*_contigs.fa + do + head $file + wc -l $file + done + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/test4.yml b/.github/workflows/test4.yml new file mode 100755 index 0000000..2961911 --- /dev/null +++ b/.github/workflows/test4.yml @@ -0,0 +1,36 @@ +name: Test Grandeur workflow profile test4 + +on: [pull_request, workflow_dispatch] + +jobs: + + test: + runs-on: ubuntu-20.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + nextflow -version + + - name: Run Grandeur + run: | + nextflow run . -profile docker,test4 -c .github/workflows/github_actions.config --publish_dir_mode link + + - name: Check contig files + run: | + for file in grandeur/contigs/*_contigs.fa grandeur/grandeur_summary.tsv + do + head $file + wc -l $file + done + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/test5.yml b/.github/workflows/test5.yml new file mode 100755 index 0000000..4271fff --- /dev/null +++ b/.github/workflows/test5.yml @@ -0,0 +1,36 @@ +name: Test Grandeur workflow profile test5 + +on: [pull_request, workflow_dispatch] + +jobs: + + test: + runs-on: ubuntu-20.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + nextflow -version + + - name: Run Grandeur + run: | + nextflow run . -profile docker,test5 -c .github/workflows/github_actions.config --publish_dir_mode link + + - name: Check contig files + run: | + for file in grandeur/contigs/*_contigs.fa grandeur/grandeur_summary.tsv + do + head $file + wc -l $file + done + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/test6.yml b/.github/workflows/test6.yml new file mode 100755 index 0000000..ecfa7e1 --- /dev/null +++ b/.github/workflows/test6.yml @@ -0,0 +1,36 @@ +name: Test Grandeur workflow profile test6 + +on: [pull_request, workflow_dispatch] + +jobs: + + test: + runs-on: ubuntu-20.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + nextflow -version + + - name: Run Grandeur + run: | + nextflow run . -profile docker,test6 -c .github/workflows/github_actions.config --publish_dir_mode link + + - name: Check contig files + run: | + for file in grandeur/contigs/*_contigs.fa grandeur/grandeur_summary.tsv + do + head $file + wc -l $file + done + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/test7.yml b/.github/workflows/test7.yml new file mode 100755 index 0000000..33ece3c --- /dev/null +++ b/.github/workflows/test7.yml @@ -0,0 +1,36 @@ +name: Test Grandeur workflow profile test7 + +on: [pull_request, workflow_dispatch] + +jobs: + + test: + runs-on: ubuntu-20.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + nextflow -version + + - name: Run Grandeur + run: | + nextflow run . -profile docker,test7 -c .github/workflows/github_actions.config --publish_dir_mode link + + - name: Check contig files + run: | + for file in grandeur/contigs/*_contigs.fa grandeur/grandeur_summary.tsv + do + head $file + wc -l $file + done + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/test8.yml b/.github/workflows/test8.yml new file mode 100755 index 0000000..b6f9a3c --- /dev/null +++ b/.github/workflows/test8.yml @@ -0,0 +1,36 @@ +name: Test Grandeur workflow profile test8 + +on: [pull_request, workflow_dispatch] + +jobs: + + test: + runs-on: ubuntu-20.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + nextflow -version + + - name: Run Grandeur + run: | + nextflow run . -profile docker,test8 -c .github/workflows/github_actions.config --publish_dir_mode link + + - name: Check contig files + run: | + for file in grandeur/grandeur_summary.tsv + do + head $file + wc -l $file + done + + - name: tree + run: tree grandeur* diff --git a/.github/workflows/vibrio.yml b/.github/workflows/vibrio.yml index daa467b..b7aba74 100644 --- a/.github/workflows/vibrio.yml +++ b/.github/workflows/vibrio.yml @@ -10,6 +10,9 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -33,7 +36,7 @@ jobs: mv ${accession}_genomic.fna fastas/. done - nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas + nextflow run . -profile docker -c .github/workflows/github_actions.config --fastas fastas --publish_dir_mode link cat grandeur/grandeur_summary.tsv cat grandeur/summary/software_versions.yml ls grandeur/multiqc/multiqc_report.html diff --git a/.github/workflows/withoutfastani.yml b/.github/workflows/withoutfastani.yml index b23a3a1..72a070a 100644 --- a/.github/workflows/withoutfastani.yml +++ b/.github/workflows/withoutfastani.yml @@ -10,6 +10,9 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@main + - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -30,7 +33,7 @@ jobs: mkdir fastas mv *fna fastas/. - nextflow run . -profile docker,msa -c .github/workflows/github_actions.config --fastas fastas --exclude_top_hit + nextflow run . -profile docker,msa -c .github/workflows/github_actions.config --fastas fastas --exclude_top_hit --publish_dir_mode link cat grandeur/grandeur_summary.tsv cat grandeur/summary/software_versions.yml diff --git a/.nf-core.yml b/.nf-core.yml new file mode 100755 index 0000000..313462b --- /dev/null +++ b/.nf-core.yml @@ -0,0 +1,28 @@ +repository_type: pipeline +lint: + actions_awsfulltest: False + files_exist: False + files_unchanged: False + multiqc_config: False + template_strings: False + version_consistencey: False + nextflow_config: + - "process.cpus" + - "process.memory" + - "process.time" + - "manifest.name" + - "manifest.homePage" +nf_core_version: 3.0.2 +org_path: null +repository_type: pipeline +template: + author: Erin Young + description: reference free assembly and typing + force: true + is_nfcore: false + name: Grandeur + org: UPHL-BioNGS/Grandeur + outdir: . + skip_features: null + version: 1.0.0 +update: null \ No newline at end of file diff --git a/bin/summary.py b/bin/summary.py index e3f7328..03b4b80 100755 --- a/bin/summary.py +++ b/bin/summary.py @@ -33,6 +33,7 @@ legsta = 'legsta_summary.csv' mash = 'mash_summary.csv' mash_err = 'mash_err_summary.csv' +meningotype = 'meningotype_summary.tsv' mlst = 'mlst_summary.tsv' mykrobe = 'mykrobe_summary.csv' pbptyper = 'pbptyper_summary.tsv' @@ -112,8 +113,8 @@ print("Adding results for " + file) analysis = "amrfinder" new_df = pd.read_table(file, dtype = str, index_col= False) - new_df = new_df.sort_values('Gene symbol') - new_df['genes (per cov/per ident)'] = new_df['Gene symbol'] + ' (' + new_df['% Coverage of reference sequence'] + '/' + new_df['% Identity to reference sequence'] + ')' + new_df = new_df.sort_values('Element symbol') + new_df['genes (per cov/per ident)'] = new_df['Element symbol'] + ' (' + new_df['% Coverage of reference'] + '/' + new_df['% Identity to reference'] + ')' new_df = new_df[['Name', 'genes (per cov/per ident)']] new_df = new_df.groupby('Name', as_index=False).agg({'genes (per cov/per ident)': lambda x: list(x)}) new_df = new_df.add_prefix(analysis + '_') @@ -277,6 +278,19 @@ summary_df = pd.merge(summary_df, new_df, left_on="sample", right_on=analysis + "_sample", how = 'left') summary_df.drop(analysis + "_sample", axis=1, inplace=True) +# meningotype : renaming column and reformatting for matching +if exists(meningotype) : + file = meningotype + print("Adding results for " + file) + analysis = "meningotype" + new_df = pd.read_table(file, dtype = str, index_col= False) + new_df = new_df.add_prefix(analysis + "_") + new_df.columns = [x.lower() for x in new_df.columns] + new_df[analysis + "_sample"] = new_df[analysis + "_sample_id"].str.replace(r'\.(fasta|fna|fa)$', '', regex=True) + summary_df = pd.merge(summary_df, new_df, left_on="sample", right_on=analysis + "_sample", how = 'left') + summary_df.drop([analysis + "_sample_id", analysis + "_sample"], axis=1, inplace=True) + + # plasmidfinder : merging relevant rows into one if exists(plasmidfinder) : file = plasmidfinder @@ -346,31 +360,31 @@ summary_df.drop(analysis + "_sample", axis=1, inplace=True) # multiqc : bbduk and fastp -if exists(multiqc_json) : - file = multiqc_json - print("Adding analysis parsed via multiqc in " + file) - with open(file) as multiqc_data: - data = json.load(multiqc_data) +# if exists(multiqc_json) : +# file = multiqc_json +# print("Adding analysis parsed via multiqc in " + file) +# with open(file) as multiqc_data: +# data = json.load(multiqc_data) - # fastp filtered reads - if "fastp_filtered_reads_plot" in data["report_plot_data"].keys(): - samples = [sample.replace("_rmphix_R1", "") for sample in data["report_plot_data"]['fastp_filtered_reads_plot']['samples'][0]] - fastp_passedreads_df = pd.DataFrame(samples, columns=['fastp_sample']) - fastp_passedreads_df['fastp_passed_reads'] = data["report_plot_data"]['fastp_filtered_reads_plot']['datasets'][0][0]['data'] - summary_df = pd.merge(summary_df, fastp_passedreads_df, left_on="sample", right_on="fastp_sample", how = 'left') - summary_df.drop("fastp_sample", axis=1, inplace=True) +# # fastp filtered reads +# if "fastp_filtered_reads_plot" in data["report_plot_data"].keys(): +# samples = [sample.replace("_rmphix_R1", "") for sample in data["report_plot_data"]['fastp_filtered_reads_plot']['samples'][0]] +# fastp_passedreads_df = pd.DataFrame(samples, columns=['fastp_sample']) +# fastp_passedreads_df['fastp_passed_reads'] = data["report_plot_data"]['fastp_filtered_reads_plot']['datasets'][0][0]['data'] +# summary_df = pd.merge(summary_df, fastp_passedreads_df, left_on="sample", right_on="fastp_sample", how = 'left') +# summary_df.drop("fastp_sample", axis=1, inplace=True) - # bbduk phix reads - if "bbmap" in data['report_saved_raw_data'].keys(): - print("Adding in phix reads from bbmap") - samples = [sample.replace(".phix", "") for sample in data['report_saved_raw_data']['bbmap']['stats'].keys()] - phix_reads=[] - for sample in data['report_saved_raw_data']['bbmap']['stats'].keys() : - phix_reads.append(data['report_saved_raw_data']['bbmap']['stats'][sample]['kv']['Matched']) - bbduk_phixreads_df = pd.DataFrame(samples, columns=['bbduk_sample']) - bbduk_phixreads_df['bbduk_phix_reads'] = phix_reads - summary_df = pd.merge(summary_df, bbduk_phixreads_df, left_on="sample", right_on="bbduk_sample", how = 'left') - summary_df.drop("bbduk_sample", axis=1, inplace=True) +# # bbduk phix reads +# if "bbmap" in data['report_saved_raw_data'].keys(): +# print("Adding in phix reads from bbmap") +# samples = [sample.replace(".phix", "") for sample in data['report_saved_raw_data']['bbmap']['stats'].keys()] +# phix_reads=[] +# for sample in data['report_saved_raw_data']['bbmap']['stats'].keys() : +# phix_reads.append(data['report_saved_raw_data']['bbmap']['stats'][sample]['kv']['Matched']) +# bbduk_phixreads_df = pd.DataFrame(samples, columns=['bbduk_sample']) +# bbduk_phixreads_df['bbduk_phix_reads'] = phix_reads +# summary_df = pd.merge(summary_df, bbduk_phixreads_df, left_on="sample", right_on="bbduk_sample", how = 'left') +# summary_df.drop("bbduk_sample", axis=1, inplace=True) if exists(multiqc_stats) : file = multiqc_stats @@ -387,6 +401,17 @@ summary_df.drop("Sample", axis=1, inplace=True) summary_df.drop("possible_fastqc_name", axis=1, inplace=True) + if "fastp-pct_surviving" in new_df.columns : + tmp_df = new_df[["Sample","fastp-pct_surviving"]].copy() + tmp_df["fastp_pct_passed_reads"] = tmp_df["fastp-pct_surviving"].astype(float).round(2) + tmp_df.drop("fastp-pct_surviving", axis=1, inplace=True) + tmp_df = tmp_df.dropna(subset=['fastp_pct_passed_reads']) + + summary_df["possible_fastp_name"] = summary_df['file'].str.split(" ").str[0].str.split(".").str[0] + summary_df = pd.merge(summary_df, tmp_df, left_on="possible_fastp_name", right_on="Sample", how = 'left') + summary_df.drop("Sample", axis=1, inplace=True) + summary_df.drop("possible_fastp_name", axis=1, inplace=True) + # core genome analysis file is also from multiqc if exists(core): file = core @@ -597,8 +622,7 @@ def fill_coverage(row): 'fastqc_total_sequences', 'fastqc_flagged_sequences', 'fastqc_avg_length', - 'fastp_passed_reads', - 'bbduk_phix_reads', + 'fastp_pct_passed_reads', 'quast_#_contigs', 'quast_gc_(%)', 'warnings', @@ -634,6 +658,7 @@ def fill_coverage(row): 'kaptive_best_match_locus_O', 'kaptive_best_match_locus_K', 'elgato_st', + 'meningotype_serogroup', 'mykrobe_phylo_group', 'mykrobe_species', 'mykrobe_lineage', diff --git a/configs/UPHL.config b/conf/UPHL.config similarity index 70% rename from configs/UPHL.config rename to conf/UPHL.config index fd3f3b4..c297db0 100644 --- a/configs/UPHL.config +++ b/conf/UPHL.config @@ -1,6 +1,5 @@ -singularity.enabled = true -singularity.autoMounts = true -singularity.cacheDir = '/Volumes/IDGenomics_NAS/singularity' +docker.enabled = true +docker.runOptions = '-u $(id -u):$(id -g)' params.blast_db = '/Volumes/IDGenomics_NAS/Data/refseq/222/blast_db/' params.blast_db_type = 'ref_prok_rep_genomes' diff --git a/conf/base.config b/conf/base.config new file mode 100755 index 0000000..5da0f83 --- /dev/null +++ b/conf/base.config @@ -0,0 +1,59 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + UPHL-BioNGS/Grandeur Nextflow base config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on + the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. +---------------------------------------------------------------------------------------- +*/ + +process { + cpus = { 1 } + memory = { 6.GB } + time = { 4.h * task.attempt } + + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' + + // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { 1 } + memory = { 6.GB } + time = { 4.h * task.attempt } + } + withLabel:process_low { + cpus = { 2 } + memory = { 12.GB } + time = { 4.h * task.attempt } + } + withLabel:process_medium { + cpus = { 6 } + memory = { 36.GB } + time = { 8.h * task.attempt } + } + withLabel:process_high { + cpus = { 12 } + memory = { 72.GB } + time = { 16.h * task.attempt } + } + withLabel:process_long { + time = { 20.h * task.attempt } + } + withLabel:process_high_memory { + memory = { 200.GB } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } +} \ No newline at end of file diff --git a/configs/grandeur_params.yml b/conf/grandeur_params.yml similarity index 100% rename from configs/grandeur_params.yml rename to conf/grandeur_params.yml diff --git a/configs/grandeur_template.config b/conf/grandeur_template.config similarity index 100% rename from configs/grandeur_template.config rename to conf/grandeur_template.config diff --git a/conf/modules.config b/conf/modules.config new file mode 100755 index 0000000..a2d2383 --- /dev/null +++ b/conf/modules.config @@ -0,0 +1,413 @@ +process { + + withName: AMRFINDER { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: BAKTA { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: BLASTN { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: BLOBTOOLS_CREATE { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: BLOBTOOLS_VIEW { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: BLOBTOOLS_PLOT { + publishDir = [ + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: 'logs/*/*log' + ], + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: 'blobtools/*.stats.txt' + ], + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: 'blobtools/*png' + ] + ] + } + withName: CIRCULOCOV { + publishDir = [ + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: 'logs/*/*log' + ], + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: 'circulocov/*' + ], + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: 'circulocov/*/*' + ] + ] + } + // withName: DATASETS_SUMMARY { + // publishDir = [ + // path: { "${params.outdir}" }, + // mode: params.publish_dir_mode, + // saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + // ] + // } + withName: DATASETS_DOWNLOAD { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: "logs/*/*log" + ] + } + withName: DRPRG { + publishDir = [ + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: 'logs/*/*log' + ], + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: 'drprg/*/*' + ] + ] + } + withName: ELGATO { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: EMMTYPER { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: ENA_DOWNLOAD { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: FASTANI { + publishDir = [ + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: 'logs/*/*log' + ], + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: 'fastani/*txt' + ] + ] + } + withName: FASTP { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: FASTQC { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: HEATCLUSTER { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: IQTREE2 { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: KAPTIVE { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: KLEBORATE { + publishDir = [ + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: "logs/*/*log" + ], + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: "kleborate/*/*output.txt" + ] + ] + } + withName: KRAKEN2 { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: CORE_GENOME_EVALUATION { + publishDir = [ + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: 'logs/*/*log' + ], + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: 'core_genome_evaluation/core_genome_evaluation.csv' + ] + ] + } + //withName: JSON_CONVERT { + //} + + //withName: MQC_PREP { + //} + + //withName: NAMES { + //} + + //withName: REFERENCES { + //} + + withName: SPECIES { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: SUMMARY { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: MASH_SKETCH { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: "logs/*/*.log" + ] + } + withName: MASH_DIST { + publishDir = [ + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: "logs/*/*.log" + ], + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: "mash/*.mashdist.txt" + ] + ] + } + withName: MASHTREE { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: MENINGOTYPE { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: MLST { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: MULTIQC { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: VERSIONS { + publishDir = [ + path: { "${params.outdir}/summary" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: MYKROBE { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: PANAROO { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: PBPTYPER { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: PHYTREEVIZ { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: PLASMIDFINDER { + publishDir = [ + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: "logs/*/*log" + ], + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: "plasmidfinder/*/*" + ] + ] + } + withName: PROKKA { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: QUAST { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> (filename.equals('versions.yml') || filename.startsWith('quast/') && filename.endsWith('.tsv')) ? null : filename } + ] + } + withName: ROARY { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: SEQSERO2 { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: SEROTYPEFINDER { + publishDir = [ + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: "logs/*/*log" + ], + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: "serotypefinder/*/*" + ] + ] + } + withName: SHIGATYPER { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: SNPDISTS { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: SPADES { + publishDir = [ + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: 'logs/*/*log' + ], + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: 'spades/*' + ], + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: 'spades/*/*' + ], + [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + pattern: 'contigs/*' + ] + ] + } +} \ No newline at end of file diff --git a/configs/tower_suggestions.config b/conf/tower_suggestions.config similarity index 100% rename from configs/tower_suggestions.config rename to conf/tower_suggestions.config diff --git a/main.nf b/main.nf index b4ec524..3c4c2bb 100644 --- a/main.nf +++ b/main.nf @@ -1,447 +1,70 @@ #!/usr/bin/env nextflow - -//# For aesthetics - and, yes, we are aware that there are better ways to write this than a bunch of 'println' statements -println('') -println(' /^^^^ /^^^^^^^ /^ /^^^ /^^ /^^^^^ /^^^^^^^^ /^^ /^^ /^^^^^^^ ') -println(' /^ /^^ /^^ /^^ /^ ^^ /^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ ') -println('/^^ /^^ /^^ /^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ ') -println('/^^ /^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^^^^^ /^^ /^^ /^ /^^ ') -println('/^^ /^^^^ /^^ /^^ /^^^^^^ /^^ /^^ /^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ ') -println(' /^^ /^ /^^ /^^ /^^ /^^ /^^ /^ ^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ ') -println(' /^^^^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^^^^ /^^^^^^^^ /^^^^^ /^^ /^^') -println('') - -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - -// Welcome to this workflow! Issues and contributions are gladly accepted at https://github.com/UPHL-BioNGS/Grandeur . - -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - -println("Currently using the Grandeur workflow for use with microbial sequencing.") -println("The view is great from 8299 feet (2530 meters) above sea level.\n") -println("Author: Erin Young") -println("email: eriny@utah.gov") -println("Version: ${workflow.manifest.version}") -println("") - -nextflow.enable.dsl = 2 -nextflow.enable.strict = true - -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - -// Getting config file - -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - -params.config_file = false -if ( params.config_file ) { - def src = new File("${workflow.projectDir}/configs/grandeur_template.config") - def dst = new File("${workflow.launchDir}/edit_me.config") - dst << src.text - println("A config file can be found at ${workflow.launchDir}/edit_me.config") - - def src1 = new File("${workflow.projectDir}/configs/grandeur_params.yml") - def dst1 = new File("${workflow.launchDir}/edit_me.yml") - dst1 << src1.text - println("A params file can be found at ${workflow.launchDir}/edit_me.yml") - exit 0 -} - -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - -// Defining params - -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - -params.outdir = "grandeur" - -// input files -params.reads = "" -params.fastas = "" -params.sample_sheet = "" -params.fasta_list = "" - -// external files -params.kraken2_db = "" -params.blast_db = "" -params.blast_db_type = "" -params.mash_db = "" -params.fastani_ref = "" -params.fastani_ref_list = "" -params.genome_sizes = workflow.projectDir + "/assets/genome_sizes.json" - -// for downloading from databases -params.sra_accessions = [] - -// thresholds and other params -params.minimum_reads = 10000 -params.datasets_max_genomes = 5 -params.mash_max_hits = 25 -params.min_core_genes = 1500 -params.iqtree2_outgroup = "" - -// subworkflow flags -params.current_datasets = false -params.skip_extras = false -params.exclude_top_hit = false -params.msa = false -params.aligner = 'panaroo' - - -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - -// Checking params - -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - -def paramCheck(keys) { - set_keys = [ - "outdir", - "fastas", - "msa", - "kraken2_db", - "mash_db", - "config_file", - "reads", - "sample_sheet", - "fasta_list", - "blast_db", - "blast_db_type", - "fastani_ref", - "fastani_ref_list", - "iqtree2_outgroup", - "genome_sizes", - "sra_accessions", - "minimum_reads", - "datasets_max_genomes", - "mash_max_hits", - "min_core_genes", - "current_datasets", - "skip_extras", - "exclude_top_hit", - "aligner"] - - for(key in keys){ - if (key !in set_keys){ - println("FATAL: ${key} isn't a supported param!") - println("Supported params: ${set_keys}") - exit 1 - } - } -} - -paramCheck(params.keySet()) - -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - -// Sharing params with subworkflows - -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - -// TODO : https://oldsite.nf-co.re/pipeline_schema_builder nf-core schema build - -include { average_nucleotide_identity } from "./subworkflows/average_nucleotide_identity" addParams(params) -include { blobtools } from "./subworkflows/blobtools" addParams(params) -include { de_novo_alignment } from "./subworkflows/de_novo_alignment" addParams(params) -include { information } from "./subworkflows/information" addParams(params) -include { kmer_taxonomic_classification } from "./subworkflows/kmer_taxonomic_classification" addParams(params) -include { min_hash } from "./subworkflows/min_hash" addParams(params) -include { phylogenetic_analysis } from "./subworkflows/phylogenetic_analysis" addParams(params) -include { quality_assessment } from "./subworkflows/quality_assessment" addParams(params) -include { report } from "./subworkflows/report" addParams(params) -include { test } from "./subworkflows/test" addParams(params) - -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - -// Channels for scripts - -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - -dataset_script = Channel.fromPath(workflow.projectDir + "/bin/datasets_download.py", type: "file") -evaluat_script = Channel.fromPath(workflow.projectDir + "/bin/evaluate.py", type: "file") -jsoncon_script = Channel.fromPath(workflow.projectDir + "/bin/json_convert.py", type: "file") -multiqc_script = Channel.fromPath(workflow.projectDir + "/bin/for_multiqc.py", type: "file") -summary_script = Channel.fromPath(workflow.projectDir + "/bin/summary.py", type: "file") -summfle_script = Channel.fromPath(workflow.projectDir + "/bin/summary_file.py", type: "file") -version_script = Channel.fromPath(workflow.projectDir + "/bin/versions.py", type: "file") - -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - -// Channels for input files - -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - -if (params.sample_sheet) { - // using a sample sheet with the column header of 'sample,fastq_1,fastq_2' - Channel - .fromPath("${params.sample_sheet}", type: "file") - .view { "Sample sheet found : ${it}" } - .splitCsv( header: true, sep: ',' ) - .map { row -> - meta = [id:row.sample] - tuple( meta, [ - file("${row.fastq_1}", checkIfExists: true), - file("${row.fastq_2}", checkIfExists: true)]) - } - .set {ch_reads} - -} else { - // Getting the fastq files from a directory - ch_reads = params.reads - ? Channel - .fromFilePairs(["${params.reads}/*_R{1,2}*.{fastq,fastq.gz,fq,fq.gz}", - "${params.reads}/*_{1,2}*.{fastq,fastq.gz,fq,fq.gz}"], size: 2 ) - .map { it -> - meta = [id:it[0].replaceAll(~/_S[0-9]+_L[0-9]+/,"")] - tuple( meta, [ - file(it[1][0], checkIfExists: true), - file(it[1][1], checkIfExists: true)]) - } - .unique() - .view { "Paired-end fastq files found : ${it[0].id}" } - : Channel.empty() -} - -if (params.fasta_list) { - // getting fastas from a file - Channel - .fromPath("${params.fasta_list}", type: "file") - .view { "Fasta list found : ${it}" } - .splitText() - .map{ it -> it.trim()} - .map{ it -> file(it) } - .map { it -> - meta = [id:it.baseName] - tuple( meta, it) - } - .set{ ch_fastas } -} else { - // getting fastas from a directory - ch_fastas = params.fastas - ? Channel - .fromPath("${params.fastas}/*{.fa,.fasta,.fna}") - .view { "Fasta file found : ${it.baseName}" } - .map { it -> - meta = [id: it.baseName] - tuple( meta, file(it, checkIfExists: true)) - } - .unique() - : Channel.empty() -} - -// Getting accession for downloading - -// from SRA -ch_sra_accessions = Channel.from( params.sra_accessions ) - -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - -// Channels for database files - -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - -// Getting the file with genome sizes of common organisms for fastqcscan. The End User can use their own file and set with a param -Channel - .fromPath(params.genome_sizes, type: "file") - .ifEmpty{ - println("The genome sizes file for this workflow are missing!") - exit 1} - .set { ch_genome_sizes } - -// Getting the database for blobtools -ch_blast_db = params.blast_db - ? Channel - .fromPath(params.blast_db, type: "dir") - .ifEmpty{ - println("No blast database was found at ${params.blast_db}") - println("Set 'params.blast_db' to directory with blast database") - exit 1 - } - .view { "Local Blast Database for Blobtools : $it" } - : Channel.empty() - -// Getting the kraken2 database -ch_kraken2_db = params.kraken2_db - ? Channel - .fromPath(params.kraken2_db, type: "dir") - .ifEmpty{ - println("No kraken2 database was found at ${params.kraken2_db}") - println("Set 'params.kraken2_db' to directory with kraken2 database") - exit 1 - } - .view { "Local kraken2 database : $it" } - : Channel.empty() - -// Getting the mash reference -ch_mash_db = params.mash_db - ? Channel - .fromPath(params.mash_db, type: "file") - .ifEmpty{ - println("No mash database was found at ${params.mash_db}") - println("Set 'params.mash_db' to file of pre-sketched mash reference") - exit 1 - } - .view { "Mash reference : $it" } - : Channel.empty() - -//# user supplied fastani reference genomes -ch_fastani_genomes = Channel.empty() - -if ( params.fastani_ref ) { - Channel - .of( params.fastani_ref ) - .splitCsv() - .flatten() - // no meta id - .map { it -> file(it) } - .view{ "Additional fastani reference genomes : $it" } - .set { ch_fastani_genomes_input } - - ch_fastani_genomes = ch_fastani_genomes.mix(ch_fastani_genomes_input) -} - -if ( params.fastani_ref_list ) { - Channel.fromPath(params.fastani_ref_list, type: "file") - .splitText() - .map( it -> it.trim()) - .map{ it -> file(it) } - .view{ "Additional fastani reference genome from file : $it" } - .set{ ch_fastani_ref_list } - - ch_fastani_genomes = ch_fastani_genomes.mix(ch_fastani_ref_list) -} - -println("The files and directory for results is " + params.outdir ) - -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - -// Workflow - -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + UPHL-BioNGS/Grandeur +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Github : https://github.com/UPHL-BioNGS/Grandeur +---------------------------------------------------------------------------------------- +*/ + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { INITIALIZE } from './subworkflows/local/initialize' +include { GRANDEUR } from './workflows/grandeur' + + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ workflow { - ch_for_multiqc = Channel.empty() - ch_for_summary = ch_genome_sizes - ch_for_flag = Channel.empty() - ch_versions = Channel.empty() - - // getting test files - if ( ! params.sra_accessions.isEmpty() ) { - test(ch_sra_accessions) - ch_raw_reads = ch_reads.mix(test.out.fastq) - } else { - ch_raw_reads = ch_reads - } - - if ( params.sample_sheet || params.reads || params.sra_accessions ) { - de_novo_alignment(ch_raw_reads) - - ch_assembled = de_novo_alignment.out.contigs - ch_contigs = ch_fastas.mix(de_novo_alignment.out.contigs) - ch_reads_contigs = ch_fastas.map{it -> tuple{it[0], it[1], null}}.mix(de_novo_alignment.out.reads_contigs) - ch_clean_reads = de_novo_alignment.out.clean_reads - ch_for_multiqc = ch_for_multiqc.mix(de_novo_alignment.out.for_multiqc) - ch_versions = ch_versions.mix(de_novo_alignment.out.versions) - - } else { - ch_contigs = ch_fastas - ch_reads_contigs = Channel.empty() - ch_clean_reads = Channel.empty() - ch_assembled = Channel.empty() - } - // getting a summary of everything - if ( ! params.skip_extras ) { - quality_assessment( - ch_raw_reads, - ch_contigs, - ch_reads_contigs, - summfle_script) + main: + // + // SUBWORKFLOW: Initialize files and tasks + // + INITIALIZE () + + // + // WORKFLOW: Run main workflow + // + GRANDEUR ( + INITIALIZE.out.reads, + INITIALIZE.out.fastas, + INITIALIZE.out.fastani_genomes, + INITIALIZE.out.versions, + INITIALIZE.out.genome_sizes, + INITIALIZE.out.mash_db, + INITIALIZE.out.kraken2_db, + INITIALIZE.out.blast_db, + INITIALIZE.out.dataset_script, + INITIALIZE.out.evaluat_script, + INITIALIZE.out.jsoncon_script, + INITIALIZE.out.multiqc_script, + INITIALIZE.out.summary_script, + INITIALIZE.out.summfle_script, + INITIALIZE.out.version_script + ) - ch_for_multiqc = ch_for_multiqc.mix(quality_assessment.out.for_multiqc) - ch_for_summary = ch_for_summary.mix(quality_assessment.out.for_summary) - ch_versions = ch_versions.mix(quality_assessment.out.versions) - - // optional subworkflow blobtools (useful for interspecies contamination) - if ( params.blast_db && ( params.sample_sheet || params.reads || params.sra_accessions )) { - blobtools(quality_assessment.out.bams, ch_blast_db ) - - ch_for_summary = ch_for_summary.mix(blobtools.out.for_summary) - ch_for_flag = ch_for_flag.mix(blobtools.out.for_flag) - - ch_versions = ch_versions.mix(blobtools.out.versions) - } - - // optional subworkflow kraken2 (useful for interspecies contamination) - if ( params.kraken2_db && ( params.sample_sheet || params.reads || params.sra_accessions )) { - kmer_taxonomic_classification(ch_clean_reads, ch_kraken2_db ) - - ch_for_multiqc = ch_for_multiqc.mix(kmer_taxonomic_classification.out.for_multiqc) - ch_for_summary = ch_for_summary.mix(kmer_taxonomic_classification.out.for_summary) - ch_for_flag = ch_for_flag.mix(kmer_taxonomic_classification.out.for_flag) - ch_versions = ch_versions.mix(kmer_taxonomic_classification.out.versions) - } - - // subworkflow mash for species determination - min_hash(ch_clean_reads, ch_fastas, ch_mash_db) - - // determining organisms in sample - average_nucleotide_identity( - ch_for_summary.mix(min_hash.out.for_summary).collect(), - ch_contigs, - ch_fastani_genomes.ifEmpty([]), - dataset_script) - - ch_for_flag = ch_for_flag.mix(average_nucleotide_identity.out.for_flag).mix(min_hash.out.for_flag) - ch_top_hit = average_nucleotide_identity.out.top_hit - - // getting all the other information - information( - ch_contigs, - ch_for_flag, - summfle_script, - jsoncon_script) - - ch_for_summary = ch_for_summary.mix(information.out.for_summary).mix(min_hash.out.for_summary).mix(average_nucleotide_identity.out.for_summary) - ch_versions = ch_versions.mix(min_hash.out.versions).mix(average_nucleotide_identity.out.versions).mix(information.out.versions) - } else { - ch_top_hit = Channel.empty() - } - - // optional subworkflow for comparing shared genes - if ( params.msa ) { - phylogenetic_analysis( - evaluat_script, - ch_contigs.ifEmpty([]), - ch_top_hit.ifEmpty([])) - - ch_for_multiqc = ch_for_multiqc.mix(phylogenetic_analysis.out.for_multiqc) - ch_versions = ch_versions.mix(phylogenetic_analysis.out.versions) - } - - // getting a summary of everything - if ( ! params.skip_extras ) { - report( - ch_raw_reads, - ch_fastas, - ch_for_multiqc.collect(), - ch_for_summary.concat(summary_script).collect(), - ch_versions.collect(), - multiqc_script, - version_script) - } } -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ -// Final Steps -// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### workflow.onComplete { - println("Pipeline completed at: $workflow.complete") - println("MultiQC report can be found at ${params.outdir}/multiqc/multiqc_report.html") - println("Summary can be found at ${params.outdir}/grandeur_summary.tsv") - println("Execution status: ${ workflow.success ? 'OK' : 'failed' }") -} + println("Pipeline completed at: $workflow.complete") + println("MultiQC report can be found at ${params.outdir}/multiqc/multiqc_report.html") + println("Summary can be found at ${params.outdir}/grandeur_summary.tsv") + println("Execution status: ${ workflow.success ? 'OK' : 'failed' }") +} \ No newline at end of file diff --git a/modules.json b/modules.json new file mode 100755 index 0000000..a1e5017 --- /dev/null +++ b/modules.json @@ -0,0 +1,5 @@ +{ + "name": "UPHL-BioNGS/Grandeur", + "homePage": "https://github.com/UPHL-BioNGS/Grandeur", + "repos": {} +} \ No newline at end of file diff --git a/modules/local/amrfinderplus.nf b/modules/local/amrfinderplus.nf index e921e20..c26fecd 100644 --- a/modules/local/amrfinderplus.nf +++ b/modules/local/amrfinderplus.nf @@ -1,27 +1,25 @@ -process amrfinderplus { +process AMRFINDER { tag "${meta.id}" label "process_high" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - container 'staphb/ncbi-amrfinderplus:3.12.8-2024-07-22.1' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - time '30m' + container 'staphb/ncbi-amrfinderplus:4.0.3-2024-10-22.1' input: tuple val(meta), file(contigs), val(genus), val(species) output: - path "ncbi-AMRFinderplus/*_amrfinder_plus.txt", emit: collect, optional: true - path "logs/${task.process}/*.log", emit: log - path "versions.yml", emit: versions + path "amrfinder/*_amrfinder.txt", emit: collect, optional: true + val meta, emit: meta + path "logs/*/*.log", emit: log + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when - shell: - def args = task.ext.args ?: '' + script: + def args = task.ext.args ?: '--plus' def prefix = task.ext.prefix ?: "${meta.id}" """ - mkdir -p ncbi-AMRFinderplus logs/${task.process} + mkdir -p amrfinder logs/${task.process} log_file=logs/${task.process}/${prefix}.${workflow.sessionId}.log organism=\$(amrfinder -l | tr " " "\\n" | grep -i ${genus} | grep -i ${species} | sed 's/,//g' | head -n 1 ) @@ -43,9 +41,8 @@ process amrfinderplus { --nucleotide ${contigs} \ --threads ${task.cpus} \ --name ${prefix} \ - --output ncbi-AMRFinderplus/${prefix}_amrfinder_plus.txt \ + --output amrfinder/${prefix}_amrfinder.txt \ \$organism_check \ - --plus \ | tee -a \$log_file cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/bakta.nf b/modules/local/bakta.nf new file mode 100644 index 0000000..2dde20b --- /dev/null +++ b/modules/local/bakta.nf @@ -0,0 +1,49 @@ +process BAKTA { + tag "${meta.id}" + label "process_high" + container 'staphb/bakta:1.9.4-5.1-light' + time '30m' + + input: + tuple val(meta), file(contigs), val(organism) + + output: + path "bakta/*" , emit: bakta_files + path "bakta/*.txt" , emit: for_multiqc, optional: true + path "gff/*gff" , emit: gff, optional: true + path "logs/*/*.log" , emit: log + val meta , emit: meta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--min-contig-length 500 --compliant --skip-plot' + def prefix = task.ext.prefix ?: "${meta.id}" + def gen_sp = organism ? "--genus ${organism[0]} --species ${organism[1]}" : "" + """ + mkdir -p bakta gff logs/${task.process} + log_file=logs/${task.process}/${prefix}.${workflow.sessionId}.log + + bakta ${args} \ + --threads ${task.cpus} \ + --output bakta \ + --prefix ${prefix} \ + ${gen_sp} \ + --force ${contigs} \ + | tee -a \$log_file + + if [ -f bakta/${prefix}.gff3 ] + then + cp bakta/${prefix}.gff3 gff/${prefix}.gff + echo "##FASTA" >> gff/${prefix}.gff + cat bakta/${prefix}.fna >> gff/${prefix}.gff + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bakta: \$(echo \$(bakta --version ) | awk '{print \$2}') + END_VERSIONS + """ +} diff --git a/modules/local/bbduk.nf b/modules/local/bbduk.nf deleted file mode 100644 index 3536873..0000000 --- a/modules/local/bbduk.nf +++ /dev/null @@ -1,47 +0,0 @@ -process bbduk { - tag "${meta.id}" - label "process_medium" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - container 'staphb/bbtools:39.01' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - time '10m' - - input: - tuple val(meta), file(reads) - - output: - tuple val(meta), file("bbduk/*_rmphix_R{1,2}.fastq.gz"), emit: fastq, optional: true - path "bbduk/*", emit: files - path "bbduk/*.phix.stats.txt", emit: stats - path "logs/${task.process}/*.log", emit: log - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - shell: - def args = task.ext.args ?: 'k=31 hdist=1' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - mkdir -p bbduk logs/${task.process} - log_file=logs/${task.process}/${prefix}.${workflow.sessionId}.log - - bbduk.sh ${args} \ - in1=${reads[0]} \ - in2=${reads[1]} \ - out1=bbduk/${prefix}_rmphix_R1.fastq.gz \ - out2=bbduk/${prefix}_rmphix_R2.fastq.gz \ - outm=bbduk/${prefix}.matched_phix.fastq.gz \ - ref=/opt/bbmap/resources/phix174_ill.ref.fa.gz \ - stats=bbduk/${prefix}.phix.stats.txt \ - threads=${task.cpus} \ - | tee -a \$log_file - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bbduk: "\$(bbduk.sh --version 2>&1 | grep -v java | grep version | awk '{print \$NF}')" - END_VERSIONS - """ -} - -//ref=/bbmap/resources/phix174_ill.ref.fa.gz \ \ No newline at end of file diff --git a/modules/local/blast.nf b/modules/local/blast.nf index 79bb282..07db6af 100644 --- a/modules/local/blast.nf +++ b/modules/local/blast.nf @@ -1,23 +1,20 @@ -process blastn { +process BLASTN { tag "${meta.id}" label "process_medium" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/blast:2.16.0' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - time '2h' input: tuple val(meta), file(contig), path(blastdb) output: - tuple val(meta), file("blastn/*.tsv"), emit: blastn + tuple val(meta), file("blastn/*.tsv"), emit: blastn path "logs/${task.process}/*.${workflow.sessionId}.log", emit: log - path "versions.yml", emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '-max_target_seqs 10 -max_hsps 1 -evalue 1e-25' def prefix = task.ext.prefix ?: "${meta.id}" """ diff --git a/modules/local/blobtools.nf b/modules/local/blobtools.nf index 0150bcd..8971d37 100644 --- a/modules/local/blobtools.nf +++ b/modules/local/blobtools.nf @@ -1,10 +1,7 @@ -process blobtools_create { +process BLOBTOOLS_CREATE { tag "${meta.id}" label "process_medium" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'chrishah/blobtools:v1.1.1' - time '45m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: tuple val(meta), file(contig), file(bam), file(blastn) @@ -18,7 +15,7 @@ process blobtools_create { when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ @@ -39,12 +36,10 @@ process blobtools_create { """ } -process blobtools_view { +process BLOBTOOLS_VIEW { tag "${meta.id}" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + label "process_medium" container 'chrishah/blobtools:v1.1.1' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: tuple val(meta), file(json) @@ -57,7 +52,7 @@ process blobtools_view { when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ @@ -76,12 +71,10 @@ process blobtools_view { """ } -process blobtools_plot { +process BLOBTOOLS_PLOT { tag "${meta.id}" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + label "process_medium" container 'chrishah/blobtools:v1.1.1' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: tuple val(meta), file(json) @@ -96,7 +89,7 @@ process blobtools_plot { when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '--format png -r species' def prefix = task.ext.prefix ?: "${meta.id}" """ diff --git a/modules/local/circulocov.nf b/modules/local/circulocov.nf index 7c7835b..8914441 100755 --- a/modules/local/circulocov.nf +++ b/modules/local/circulocov.nf @@ -1,13 +1,8 @@ -process circulocov { +process CIRCULOCOV { tag "${meta.id}" label "process_medium" - stageInMode "copy" - publishDir path: params.outdir, mode: 'copy', pattern: 'logs/*/*log' - publishDir path: params.outdir, mode: 'copy', pattern: 'circulocov/*' - publishDir path: params.outdir, mode: 'copy', pattern: 'circulocov/*/*' + //stageInMode "copy" container 'staphb/circulocov:0.1.20240104' - time '30m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: tuple val(meta), file(contigs), file(fastqs) @@ -24,7 +19,7 @@ process circulocov { when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def reads = fastqs.join(" ") diff --git a/modules/local/datasets.nf b/modules/local/datasets.nf index ce0738b..288097f 100644 --- a/modules/local/datasets.nf +++ b/modules/local/datasets.nf @@ -1,10 +1,7 @@ -process datasets_summary { +process DATASETS_SUMMARY { tag "${taxon}" label "process_single" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - container 'staphb/ncbi-datasets:16.30.0' - time '1h' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore' } + container 'staphb/ncbi-datasets:16.35.0' input: tuple val(taxon), file(script) @@ -16,7 +13,7 @@ process datasets_summary { when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '--reference --mag exclude' def args2 = task.ext.args2 ?: '--annotated --assembly-level complete,scaffold --mag exclude' def fields = task.ext.fields ?: 'accession,assminfo-refseq-category,assminfo-level,organism-name,assmstats-total-ungapped-len' @@ -47,14 +44,10 @@ process datasets_summary { // It is faster if datasets can download the entire list at a time, but there is a 20 minute timeout for downloading. // The '||' is to allow each genome to be downloaded on its own, which is longer overall but each genome should be less than 20 minutes. -process datasets_download { +process DATASETS_DOWNLOAD { tag "Downloading Genomes" - // because there's no way to specify threads label "process_medium" - publishDir path: "${params.outdir}", mode: 'copy', pattern: "logs/*/*log" - container 'staphb/ncbi-datasets:16.30.0' - time '5h' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + container 'staphb/ncbi-datasets:16.35.0' input: file(ids) @@ -66,7 +59,7 @@ process datasets_download { when: task.ext.when == null || task.ext.when - shell: + script: """ mkdir -p datasets genomes @@ -85,7 +78,6 @@ process datasets_download { accession=\$(echo \$fasta | cut -f 4 -d / | cut -f 1,2 -d _ ) organism=\$(head -n 1 \$fasta | awk '{print \$2 "_" \$3 }' | sed 's/,//g' | sed 's/\\]//g' | sed 's/\\[//g' ) cat \$fasta | sed 's/ /_/g' | sed 's/,//g' > genomes/\${organism}_\${accession}_ds.fna - gzip genomes/\${organism}_\${accession}_ds.fna done # removing MAGS diff --git a/modules/local/drprg.nf b/modules/local/drprg.nf index 1762616..df4cab7 100644 --- a/modules/local/drprg.nf +++ b/modules/local/drprg.nf @@ -1,11 +1,7 @@ -process drprg { +process DRPRG { tag "${meta.id}" label "process_medium" - stageInMode "copy" - publishDir path: params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/drprg:0.1.1' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: tuple val(meta), file(contigs) @@ -17,9 +13,9 @@ process drprg { path "versions.yml", emit: versions when: - (task.ext.when == null || task.ext.when) + task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ @@ -31,7 +27,7 @@ process drprg { -i ${contigs} \ -o drprg/${prefix} \ --sample ${prefix} \ - | tee =a \$log_file + | tee -a \$log_file cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/elgato.nf b/modules/local/elgato.nf index fff910e..d2814ec 100755 --- a/modules/local/elgato.nf +++ b/modules/local/elgato.nf @@ -1,10 +1,7 @@ -process elgato { +process ELGATO { tag "${meta.id}" label "process_medium" - publishDir path: params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/elgato:1.20.1' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: tuple val(meta), file(contigs) @@ -13,11 +10,12 @@ process elgato { path "elgato/*/possible_mlsts.txt", emit: collect path "logs/${task.process}/*.log" , emit: log path "versions.yml" , emit: versions + val meta , emit: meta when: - (task.ext.when == null || task.ext.when) + task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ diff --git a/modules/local/emmtyper.nf b/modules/local/emmtyper.nf index 5af6307..300ec0b 100644 --- a/modules/local/emmtyper.nf +++ b/modules/local/emmtyper.nf @@ -1,14 +1,7 @@ -process emmtyper { +process EMMTYPER { tag "${meta.id}" label "process_medium" - stageInMode "copy" - publishDir path: params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/emmtyper:0.2.0' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - - when: - (task.ext.when == null || task.ext.when) input: tuple val(meta), file(contigs), file(script) @@ -18,8 +11,12 @@ process emmtyper { path "emmtyper/*" , emit: everything path "logs/${task.process}/*.log", emit: log path "versions.yml" , emit: versions + val meta , emit: meta + + when: + task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ @@ -40,7 +37,7 @@ process emmtyper { cat <<-END_VERSIONS > versions.yml "${task.process}": - emmtyper: \$( echo \$(emmtyper --version 2>&1) | sed 's/^.*emmtyper v//' ) + emmtyper: \$( echo \$(emmtyper --version 2>&1) | sed 's/^.*emmtyper v//' ) END_VERSIONS """ } diff --git a/modules/local/ena.nf b/modules/local/ena.nf new file mode 100755 index 0000000..1e83204 --- /dev/null +++ b/modules/local/ena.nf @@ -0,0 +1,35 @@ +process ENA_DOWNLOAD { + tag "${SRR}" + label "process_single" + container 'staphb/enabrowsertools:1.7.1' + + input: + val(SRR) + + output: + tuple val(SRR), file("*/*{1,2}.fastq.gz"), emit: fastq, optional: true + path "logs/*/*.log", emit: log + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${SRR}" + """ + mkdir -p reads logs/${task.process} + log_file=logs/${task.process}/${prefix}.${workflow.sessionId}.log + + enaDataGet \ + ${args} \ + -f fastq \ + ${SRR} \ + | tee -a \$log_file + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + enaDataGet: \$( enaDataGet -v | awk '{print \$NF}' ) + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/local/fastani.nf b/modules/local/fastani.nf index 301b42e..f6552aa 100644 --- a/modules/local/fastani.nf +++ b/modules/local/fastani.nf @@ -1,19 +1,14 @@ -process fastani { +process FASTANI { tag "${meta.id}" label "process_medium" - stageInMode "copy" - publishDir path: params.outdir, mode: 'copy', pattern: 'logs/*/*log' - publishDir path: params.outdir, mode: 'copy', pattern: 'fastani/*' container 'staphb/fastani:1.34' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: tuple val(meta), file(contigs), file(genomes) output: tuple val(meta), file("fastani/*_fastani.csv"), emit: results, optional: true - tuple val(meta), env(top_hit), path("top_hit/*"), emit: top_hit, optional: true + tuple val(meta), env("top_hit"), path("top_hit/*"), emit: top_hit, optional: true path "fastani/*_fastani_len.csv", emit: top_len, optional: true path "fastani/*", emit: everything path "logs/${task.process}/*.log", emit: log @@ -22,7 +17,7 @@ process fastani { when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def ends = genomes.collect { it.Name[-6..-1] }.flatten().unique().join(' *') diff --git a/modules/local/fastp.nf b/modules/local/fastp.nf index 61fc087..e0c04d6 100644 --- a/modules/local/fastp.nf +++ b/modules/local/fastp.nf @@ -1,10 +1,7 @@ -process fastp { +process FASTP { tag "${meta.id}" label "process_low" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/fastp:0.23.4' - time '30m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: tuple val(meta), file(reads) @@ -14,13 +11,13 @@ process fastp { path "fastp/*_fastp.html", emit: html, optional: true path "fastp/*_fastp.json", emit: fastp_files, optional: true path "logs/${task.process}/*.{log,err}", emit: log - tuple val(meta), file("fastp/*_fastp_R{1,2}.fastq.gz"), env(passed_reads), emit: fastp_results + tuple val(meta), file("fastp/*_fastp_R{1,2}.fastq.gz"), env("passed_reads"), emit: fastp_results path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '--detect_adapter_for_pe' def prefix = task.ext.prefix ?: "${meta.id}" """ @@ -38,6 +35,7 @@ process fastp { 2>> \$err_file | tee -a \$log_file passed_reads=\$(grep "reads passed filter" \$err_file | tail -n 1 | cut -f 2 -d ":" | sed 's/ //g' ) + if [ -z "\$passed_reads" ] ; then passed_reads="0" ; fi cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/fastqc.nf b/modules/local/fastqc.nf index 75f63ee..5942128 100644 --- a/modules/local/fastqc.nf +++ b/modules/local/fastqc.nf @@ -1,10 +1,7 @@ -process fastqc { +process FASTQC { tag "${meta.id}" label "process_single" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/fastqc:0.12.1' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: tuple val(meta), file(fastq) @@ -15,11 +12,12 @@ process fastqc { path "fastqc/*_summary.csv" , emit: collect path "logs/${task.process}/*.log", emit: log_files path "versions.yml" , emit: versions + val meta , emit: meta when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def reads = fastq.join(" ") diff --git a/modules/local/heatcluster.nf b/modules/local/heatcluster.nf index 9958c92..edb7094 100755 --- a/modules/local/heatcluster.nf +++ b/modules/local/heatcluster.nf @@ -1,16 +1,13 @@ -process heatcluster { +process HEATCLUSTER { tag "HeatCluster" label "process_single" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/heatcluster:1.0.2c' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: file(matrix) output: - path "heatcluster/*", optional : true + path "heatcluster/*", optional : true, emit: files path "heatcluster/heatcluster.png", optional : true, emit: for_multiqc path "logs/${task.process}/*.log", emit: log_files path "versions.yml", emit: versions @@ -18,7 +15,7 @@ process heatcluster { when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '-t png' """ mkdir -p heatcluster logs/${task.process} diff --git a/modules/local/iqtree2.nf b/modules/local/iqtree2.nf index 4698a85..cf6c256 100644 --- a/modules/local/iqtree2.nf +++ b/modules/local/iqtree2.nf @@ -1,10 +1,7 @@ -process iqtree2 { +process IQTREE2 { tag "Phylogenetic analysis" label "process_high" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/iqtree2:2.3.6' - time '24h' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: file(msa) @@ -13,12 +10,12 @@ process iqtree2 { path "iqtree2/iqtree*" , emit: tree tuple val("iqtree"), file("iqtree2/iqtree.contree"), optional: true , emit: newick path "logs/${task.process}/${task.process}.${workflow.sessionId}.log", emit: log - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '-t RANDOM -m GTR+F+I -bb 1000 -alrt 1000' def outgroup = params.iqtree2_outgroup ? "-o ${params.iqtree2_outgroup}" : "" """ diff --git a/modules/local/kaptive.nf b/modules/local/kaptive.nf index 8019606..905658f 100644 --- a/modules/local/kaptive.nf +++ b/modules/local/kaptive.nf @@ -1,11 +1,7 @@ -process kaptive { +process KAPTIVE { tag "${meta.id}" label "process_medium" - stageInMode "copy" - publishDir path: params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - container 'staphb/kaptive:2.0.8' - time '30m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + container 'staphb/kaptive:3.0.0b6' input: tuple val(meta), file(contigs) @@ -17,36 +13,40 @@ process kaptive { path "versions.yml", emit: versions when: - (task.ext.when == null || task.ext.when) + task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ mkdir -p kaptive logs/${task.process} log_file=logs/${task.process}/${prefix}.${workflow.sessionId}.log - kaptive.py ${args} \ - --k_refs /kaptive/reference_database/VibrioPara_Kaptivedb_K.gbk \ - --assembly ${contigs} \ + kaptive\ + assembly \ + ${args} \ + /kaptive/reference_database/VibrioPara_Kaptivedb_K.gbk \ + ${contigs} \ --threads ${task.cpus} \ - --out kaptive/${prefix}_VibrioPara_Kaptivedb_K \ + --out kaptive/${prefix}_VibrioPara_Kaptivedb_K.txt \ | tee -a \$log_file - kaptive.py ${args} \ - --k_refs /kaptive/reference_database/VibrioPara_Kaptivedb_O.gbk \ - --assembly ${contigs} \ + kaptive \ + assembly \ + ${args} \ + /kaptive/reference_database/VibrioPara_Kaptivedb_O.gbk \ + ${contigs} \ --threads ${task.cpus} \ - --out kaptive/${prefix}_VibrioPara_Kaptivedb_O \ + --out kaptive/${prefix}_VibrioPara_Kaptivedb_O.txt \ | tee -a \$log_file - grep -h "Other genes" kaptive/*table.txt | head -n 1 > ${prefix}_table.txt - grep -h ${prefix} kaptive/*table.txt >> ${prefix}_table.txt + grep -h "Other genes" kaptive/${prefix}* | head -n 1 > ${prefix}_table.txt + grep -h ${prefix} kaptive/${prefix}* >> ${prefix}_table.txt mv ${prefix}_table.txt kaptive/${prefix}_table.txt cat <<-END_VERSIONS > versions.yml "${task.process}": - kaptive.py: \$( echo \$(kaptive.py --version | sed 's/Kaptive v//;')) + kaptive: \$( echo \$(kaptive --version | sed 's/Kaptive v//;')) END_VERSIONS """ } \ No newline at end of file diff --git a/modules/local/kleborate.nf b/modules/local/kleborate.nf index a8d84ad..a31bcd5 100644 --- a/modules/local/kleborate.nf +++ b/modules/local/kleborate.nf @@ -1,40 +1,41 @@ -process kleborate { +process KLEBORATE { tag "${meta.id}" label "process_medium" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - container 'staphb/kleborate:2.4.1' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + container 'staphb/kleborate:3.1.2' input: tuple val(meta), file(contig), file(script) output: - path "kleborate/*_results.tsv" , emit: collect, optional: true - path "kleborate/*_results.txt" , emit: result + path "kleborate/*_results.tsv", emit: collect, optional: true + path "kleborate/*/*_output.txt", emit: result, optional: true path "logs/${task.process}/*.log", emit: log - path "versions.yml" , emit: versions + path "versions.yml", emit: versions + val meta, emit: meta when: - (task.ext.when == null || task.ext.when) + task.ext.when == null || task.ext.when - shell: - def args = task.ext.args ?: '--all' + script: + def args = task.ext.args ?: '-p kpsc --trim_headers' def prefix = task.ext.prefix ?: "${meta.id}" """ mkdir -p kleborate logs/${task.process} log_file=logs/${task.process}/${prefix}.${workflow.sessionId}.log kleborate ${args} \ - -o kleborate/${prefix}_results.txt \ + -o kleborate/${prefix} \ -a ${contig} \ | tee -a \$log_file - python3 ${script} kleborate/${prefix}_results.txt kleborate/${prefix}_results.tsv kleborate ${prefix} + if ls kleborate/${prefix}/*output.txt 1>/dev/null 2>&1 + then + python3 ${script} kleborate/${prefix}/*output.txt kleborate/${prefix}_results.tsv kleborate ${prefix} + fi cat <<-END_VERSIONS > versions.yml "${task.process}": - kleborate: \$( echo \$(kleborate --version | sed 's/Kleborate v//;')) + kleborate: \$( echo \$(kleborate --version | sed 's/Kleborate v//;')) END_VERSIONS """ } diff --git a/modules/local/kraken2.nf b/modules/local/kraken2.nf index f701581..10a0a72 100644 --- a/modules/local/kraken2.nf +++ b/modules/local/kraken2.nf @@ -1,10 +1,7 @@ -process kraken2 { +process KRAKEN2 { tag "${meta.id}" label "process_high" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/kraken2:2.1.3' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - time '1h' input: tuple val(meta), file(fastq), path(kraken2_db) @@ -19,7 +16,7 @@ process kraken2 { when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ diff --git a/modules/local/local.nf b/modules/local/local.nf index 53f1c93..dc67652 100644 --- a/modules/local/local.nf +++ b/modules/local/local.nf @@ -1,27 +1,25 @@ -process core_genome_evaluation { +process CORE_GENOME_EVALUATION { tag "Evaluating core genome" label "process_single" - publishDir path: params.outdir, mode: 'copy', pattern: 'logs/*/*log' - publishDir path: params.outdir, mode: 'copy', pattern: 'core_genome_evaluation/core_genome_evaluation.csv' - container 'quay.io/biocontainers/pandas:1.5.2' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + container 'staphb/pandas:2.2.3' input: tuple file(fasta), file(summary), file(script) output: - tuple file(fasta), env(num_samples), env(num_core_genes), emit: evaluation + tuple file(fasta), env("num_samples"), env("num_core_genes"), emit: evaluation path "core_genome_evaluation/core_genome_evaluation.csv", emit: for_multiqc path "logs/${task.process}/*.log" , emit: log_files - shell: - def args = task.ext.args ?: '' + when: + task.ext.when == null || task.ext.when + + script: """ mkdir -p core_genome_evaluation logs/${task.process} log_file=logs/${task.process}/${task.process}.${workflow.sessionId}.log - python ${script} | tee -a \$log_file + python3 ${script} | tee -a \$log_file num_samples=\$(wc -l core_genome_evaluation.csv | awk '{print \$1}' ) num_core_genes=\$(cut -f 3 core_genome_evaluation.csv -d "," | tail -n 1 | cut -f 1 -d "." ) @@ -29,49 +27,10 @@ process core_genome_evaluation { """ } -process download_sra { - tag "${SRR}" - label "process_single" - publishDir params.outdir, mode: 'copy' - container 'quay.io/biocontainers/pandas:1.5.2' - time '2h' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - - input: - val(SRR) - - output: - tuple val(SRR), file("reads/${SRR}_{1,2}.fastq.gz"), emit: fastq - path "logs/${task.process}/*.log", emit: log - - when: - task.ext.when == null || task.ext.when - - shell: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${SRR}" - """ - mkdir -p reads logs/${task.process} - log_file=logs/${task.process}/${SRR}.${workflow.sessionId}.log - - echo "fasterq-dump failed. Attempting download from ENA" | tee -a \$log_file - - sra=${SRR} - - wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/\${sra:0:6}/0\${sra: -2}/${SRR}/${SRR}_1.fastq.gz - wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/\${sra:0:6}/0\${sra: -2 }/${SRR}/${SRR}_2.fastq.gz - - mv *fastq.gz reads/. - """ -} - -process json_convert { +process JSON_CONVERT { tag "${meta.id}" label "process_single" - // no publishDir - container 'quay.io/biocontainers/pandas:1.5.2' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + container 'staphb/pandas:2.2.3' input: tuple val(meta), val(analysis), file(json), file(script) @@ -79,76 +38,44 @@ process json_convert { output: path "${analysis}/*_${analysis}*", emit: collect - shell: - """ - mkdir -p ${analysis} - - python3 ${script} ${json} ${analysis} - - mv *${analysis}*tsv ${analysis}/. - """ -} - -process mash_err { - tag "${meta.id}" - // no publishDir - label "process_single" - container 'quay.io/biocontainers/pandas:1.5.2' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - - input: - tuple val(meta), file(error_file) - - output: - path "mash_estimates.csv", emit: summary - when: task.ext.when == null || task.ext.when - shell: - def prefix = task.ext.prefix ?: "${meta.id}" + script: """ + mkdir -p ${analysis} - genome_size=\$(grep "Estimated genome size:" ${error_file} | awk '{print \$NF}') - coverage=\$(grep "Estimated coverage:" ${error_file} | awk '{print \$NF}') + python3 ${script} ${json} ${analysis} - echo "sample,mash_estimated_genome_size,mash_estimated_coverage" > mash_estimates.csv - echo "${prefix},\$genome_size,\$coverage" >> mash_estimates.csv + mv *${analysis}*tsv ${analysis}/. """ } -process mqc_prep { +process MQC_PREP { tag "prepping files" - // no publishDir label "process_single" - container 'quay.io/biocontainers/pandas:1.5.2' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + container 'staphb/pandas:2.2.3' input: file(input) file(script) output: - path "*mqc*", emit: for_multiqc + path "*mqc*", emit: for_multiqc, optional: true when: task.ext.when == null || task.ext.when - shell: + script: """ python3 ${script} """ } -process names { +process NAMES { tag "${meta.id}" - // no publishDir label "process_single" - container 'quay.io/biocontainers/pandas:1.5.2' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + container 'staphb/pandas:2.2.3' input: tuple val(meta), file(input) @@ -159,7 +86,7 @@ process names { when: task.ext.when == null || task.ext.when - shell: + script: def prefix = task.ext.prefix ?: "${meta.id}" def files = input.join(" ") """ @@ -170,13 +97,10 @@ process names { """ } -process references { +process REFERENCES { tag "Preparing references" - // no publishDir label "process_single" - container 'quay.io/uphl/grandeur_ref:2024-06-26' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + container 'staphb/grandeur_ref:4.5' output: path "ref/*", emit: fastas @@ -184,7 +108,7 @@ process references { when: task.ext.when == null || task.ext.when - shell: + script: """ mkdir ref @@ -192,13 +116,10 @@ process references { """ } -process species { +process SPECIES { tag "Creating list of species" label "process_single" - publishDir params.outdir, mode: 'copy' - container 'quay.io/biocontainers/pandas:1.5.2' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + container 'staphb/pandas:2.2.3' input: file(results) @@ -209,7 +130,7 @@ process species { when: task.ext.when == null || task.ext.when - shell: + script: """ mkdir -p datasets @@ -232,31 +153,27 @@ process species { """ } -process summary { +process SUMMARY { tag "Creating summary files" - publishDir params.outdir, mode: 'copy' - container 'quay.io/biocontainers/pandas:1.5.2' + container 'staphb/pandas:2.2.3' label "process_single" - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: file(input) output: - path "grandeur_summary.tsv" , emit: summary_tsv - path "grandeur_summary.txt" , emit: summary_txt - path "summary/grandeur_extended_summary.tsv", emit: extended_tsv - path "summary/grandeur_extended_summary.txt", emit: extended_txt + path "grandeur_summary.tsv" , emit: summary_tsv, optional: true + path "grandeur_summary.txt" , emit: summary_txt, optional: true + path "summary/grandeur_extended_summary.tsv", emit: extended_tsv, optional: true + path "summary/grandeur_extended_summary.txt", emit: extended_txt, optional: true when: task.ext.when == null || task.ext.when - shell: - def args = task.ext.args ?: '' + script: """ mkdir -p summary - python summary.py + python3 summary.py """ } diff --git a/modules/local/mash.nf b/modules/local/mash.nf index c90a31b..55609d8 100644 --- a/modules/local/mash.nf +++ b/modules/local/mash.nf @@ -1,74 +1,41 @@ -process mash_sketch_fastq { +process MASH_SKETCH { tag "${meta.id}" label "process_medium" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/mash:2.3' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - time '10m' input: - tuple val(meta), file(fastq) + tuple val(meta), file(files) output: tuple val(meta), file("mash/*.msh"), emit: msh tuple val(meta), file("mash/*.err"), optional: true, emit: err - path "logs/${task.process}/*.log", emit: log - path "versions.yml", emit: versions + path "mash/*estimates.csv", optional: true, emit: summary + path "logs/${task.process}/*.log", emit: log + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when - shell: - def args = task.ext.args ?: "-m 2" + script: + def args = task.ext.args ?: "" + def mode = files[1] ? "-m 2 $args" : "$args" def prefix = task.ext.prefix ?: "${meta.id}" + def input = files.join(" ") """ mkdir -p mash logs/${task.process} log_file=logs/${task.process}/${prefix}.${workflow.sessionId}.log err_file=mash/${prefix}.${workflow.sessionId}.err - cat ${fastq[0]} ${fastq[1]} | \ - mash sketch ${args} \ - -o mash/${prefix}.fastq - \ + cat ${input} | \ + mash sketch ${mode} \ + -o mash/${prefix} - \ 2>> \$err_file | tee -a \$log_file - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mash: \$( mash --version ) - END_VERSIONS - """ -} + genome_size=\$(grep "Estimated genome size:" \$err_file | awk '{print \$NF}') + coverage=\$(grep "Estimated coverage:" \$err_file | awk '{print \$NF}') -process mash_sketch_fasta { - tag "${meta.id}" - label "process_medium" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - container 'staphb/mash:2.3' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - time '10m' - - input: - tuple val(meta), file(fasta) - - output: - tuple val(meta), file("mash/*.msh"), emit: msh - path "logs/${task.process}/*.log", emit: log - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - shell: - def args = task.ext.args ?: "" - def prefix = task.ext.prefix ?: "${meta.id}" - - """ - mkdir -p mash logs/${task.process} - log_file=logs/${task.process}/${prefix}.${workflow.sessionId}.log - - mash sketch ${args} \ - -o mash/${prefix}.fasta \ - ${fasta} \ - | tee -a \$log_file + echo "sample,mash_estimated_genome_size,mash_estimated_coverage" > mash/${prefix}_mash_estimates.csv + echo "${prefix},\$genome_size,\$coverage" >> mash/${prefix}_mash_estimates.csv cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -77,13 +44,10 @@ process mash_sketch_fasta { """ } -process mash_dist { +process MASH_DIST { tag "${meta.id}" label "process_medium" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/mash:2.3' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - time '10m' input: tuple val(meta), file(msh), file(reference) @@ -97,7 +61,7 @@ process mash_dist { when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: "-v 0 -d 0.5" def prefix = task.ext.prefix ?: "${meta.id}" if ( reference =~ "input" ) { @@ -178,30 +142,3 @@ process mash_dist { """ } } - -// process mash_screen { -// tag "${meta.id}" -// label "process_medium" -// publishDir params.outdir, mode: 'copy' -// container 'staphb/mash:2.3' -// time '10m' - -// input: -// tuple val(meta), file(fasta), file(fastq), file(reference) - -// output: -// path "mash/*", emit: mashdist -// path "logs/${task.process}/*.log", emit: log -// path "versions.yml", emit: versions - -// when: -// task.ext.when == null || task.ext.when - -// shell: -// def args = task.ext.args ?: "-m 2" -// def prefix = task.ext.prefix ?: "${meta.id}" - -// """ -// echo "whatever" -// """ -// } \ No newline at end of file diff --git a/modules/local/mashtree.nf b/modules/local/mashtree.nf index c7223ec..b175507 100755 --- a/modules/local/mashtree.nf +++ b/modules/local/mashtree.nf @@ -1,11 +1,7 @@ -process mashtree { +process MASHTREE { tag "Phylogenetic analysis" label "process_medium" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/mashtree:1.4.6' - stageInMode 'copy' - time '4h' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: file(assemblies) @@ -19,7 +15,7 @@ process mashtree { when: task.ext.when == null || task.ext.when - shell: + script: def prefix = task.ext.prefix ?: "mashtree" def args = task.ext.args ?: "--outmatrix mashtree/${prefix}.txt" def input = assemblies.join(" ") diff --git a/modules/local/meningotype.nf b/modules/local/meningotype.nf new file mode 100644 index 0000000..ac64698 --- /dev/null +++ b/modules/local/meningotype.nf @@ -0,0 +1,33 @@ +process MENINGOTYPE { + tag "${meta.id}" + label "process_medium" + container 'staphb/meningotype:0.8.5' + + input: + tuple val(meta), file(contigs) + + output: + path "meningotype/*.tsv", emit: files + path "versions.yml", emit: versions + val meta, emit: meta + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--finetype' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p meningotype + + meningotype \ + ${args} \ + ${contigs} \ + > meningotype/${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + meningotype: \$(echo \$(meningotype --version 2>&1 | grep meningotype | awk '{print \$NF}')) + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/local/mlst.nf b/modules/local/mlst.nf index ef6735c..0a6df2e 100644 --- a/modules/local/mlst.nf +++ b/modules/local/mlst.nf @@ -1,11 +1,7 @@ -process mlst { +process MLST { tag "${meta.id}" label "process_medium" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - container 'staphb/mlst:2.23.0-2024-11-01' - maxForks 10 - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - time '10m' + container 'staphb/mlst:2.23.0-2025-01-01' input: tuple val(meta), file(contig), file(script) @@ -13,11 +9,12 @@ process mlst { output: path "mlst/*_mlst.tsv", emit: collect path "versions.yml" , emit: versions + val meta , emit: meta when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf index 1215905..7969ae5 100644 --- a/modules/local/multiqc.nf +++ b/modules/local/multiqc.nf @@ -1,10 +1,7 @@ -process multiqc { +process MULTIQC { tag "multiqc" label "process_single" - publishDir params.outdir, mode: 'copy' - container 'staphb/multiqc:1.19' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + container 'staphb/multiqc:1.26' input: file(input) @@ -17,7 +14,7 @@ process multiqc { when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '' """ mkdir -p multiqc quast logs/${task.process} @@ -38,13 +35,10 @@ process multiqc { """ } -process versions { +process VERSIONS { tag "extracting versions" label "process_single" - publishDir "${params.outdir}/summary/", mode: 'copy', pattern: 'software_versions.yml' - container 'staphb/multiqc:1.19' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + container 'staphb/multiqc:1.26' input: file(input) @@ -57,10 +51,10 @@ process versions { when: task.ext.when == null || task.ext.when - shell: + script: """ cat <<-END_VERSIONS >> versions.yml - "report:multiqc": + "REPORT:MULTIQC": multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) END_VERSIONS diff --git a/modules/local/mykrobe.nf b/modules/local/mykrobe.nf index e5ed3af..c0dc71a 100644 --- a/modules/local/mykrobe.nf +++ b/modules/local/mykrobe.nf @@ -1,38 +1,33 @@ -process mykrobe { +process MYKROBE { tag "${meta.id}" label "process_medium" - stageInMode "copy" - publishDir path: params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/mykrobe:0.13.0' - time '1h' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: tuple val(meta), file(contigs) output: - path "mykrobe/*.csv", emit: collect - path "mykrobe/*.json", emit: json + path "mykrobe/*.csv", optional: true, emit: collect + path "mykrobe/*.json", optional: true, emit: json path "logs/${task.process}/*.log", emit: log path "versions.yml", emit: versions + val meta, emit: meta when: - (task.ext.when == null || task.ext.when) + task.ext.when == null || task.ext.when - shell: - def args = task.ext.args ?: '' + script: + def args = task.ext.args ?: '--species tb --format json_and_csv' def prefix = task.ext.prefix ?: "${meta.id}" """ mkdir -p tmp/ logs/${task.process} log_file=logs/${task.process}/${prefix}.${workflow.sessionId}.log mykrobe predict ${args} \ - --species tb \ --sample ${prefix} \ --output mykrobe/${prefix} \ --seq ${contigs} \ --threads ${task.cpus} \ - --format json_and_csv \ | tee -a \$log_file cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/panaroo.nf b/modules/local/panaroo.nf index 3c17fb0..7252677 100755 --- a/modules/local/panaroo.nf +++ b/modules/local/panaroo.nf @@ -1,36 +1,39 @@ -process panaroo { +process PANAROO { tag "Core Genome Alignment" label "process_high" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/panaroo:1.5.0' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - time '10h' input: file(gff) output: path "panaroo/*" , emit: files - tuple path("panaroo/core_gene_alignment.aln"), path("panaroo/gene_presence_absence.Rtab"), emit: core_gene_alignment, optional: true + tuple path("panaroo/core_gene_alignment.aln"), path("panaroo/gene_presence_absence.Rtab"), emit: core_gene_alignment path "logs/${task.process}/*.log" , emit: log_files path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '--clean-mode strict --remove-invalid-genes --alignment core' def prefix = task.ext.prefix ?: 'panaroo' def assemblies = gff.join(' ') """ - mkdir -p logs/${task.process} + mkdir -p input logs/${task.process} log_file=logs/${task.process}/${task.process}.${workflow.sessionId}.log + for assembly in ${assemblies} + do + cp \$assembly input/. + done + + ls input/* > input_genomes.txt + panaroo ${args} \ -t ${task.cpus} \ -o ${prefix} \ - -i ${assemblies} \ - -a core \ + -i input_genomes.txt \ | tee -a \$log_file cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/pbptyper.nf b/modules/local/pbptyper.nf index 85a0e3c..bebc7b3 100644 --- a/modules/local/pbptyper.nf +++ b/modules/local/pbptyper.nf @@ -1,11 +1,8 @@ -process pbptyper { +process PBPTYPER { tag "${meta.id}" label "process_medium" - stageInMode "copy" - publishDir path: params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/pbptyper:2.0.0' - time '1h' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + input: tuple val(meta), file(contigs) @@ -17,9 +14,9 @@ process pbptyper { path "versions.yml" , emit: versions when: - (task.ext.when == null || task.ext.when) + task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ diff --git a/modules/local/phytreeviz.nf b/modules/local/phytreeviz.nf index e557296..0f59849 100755 --- a/modules/local/phytreeviz.nf +++ b/modules/local/phytreeviz.nf @@ -1,10 +1,8 @@ -process phytreeviz { +process PHYTREEVIZ { tag "${analysis}" label "process_medium" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/phytreeviz:0.2.0' - time '1h' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + input: tuple val(analysis), file(newick) @@ -17,7 +15,7 @@ process phytreeviz { when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${analysis}" """ diff --git a/modules/local/plasmidfinder.nf b/modules/local/plasmidfinder.nf index 7a847ec..418cf8f 100644 --- a/modules/local/plasmidfinder.nf +++ b/modules/local/plasmidfinder.nf @@ -1,10 +1,8 @@ -process plasmidfinder { +process PLASMIDFINDER { tag "${meta.id}" label "process_medium" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/plasmidfinder:2.1.6_2024-03-07' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + input: tuple val(meta), file(file), file(script) @@ -14,11 +12,12 @@ process plasmidfinder { path "plasmidfinder/*_plasmidfinder.tsv", emit: collect, optional: true path "logs/${task.process}/*.log" , emit: log path "versions.yml" , emit: versions + val meta , emit: meta when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ @@ -33,6 +32,8 @@ process plasmidfinder { python3 ${script} plasmidfinder/${prefix}/results_tab.tsv plasmidfinder/${prefix}_plasmidfinder.tsv plasmidfinder ${prefix} + rm -rf plasmidfinder/${prefix}/tmp + cat <<-END_VERSIONS > versions.yml "${task.process}": plasmidfinder: "${task.container}" diff --git a/modules/local/prokka.nf b/modules/local/prokka.nf index e3eec08..2f24727 100644 --- a/modules/local/prokka.nf +++ b/modules/local/prokka.nf @@ -1,10 +1,7 @@ -process prokka { +process PROKKA { tag "${meta.id}" label "process_high" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/prokka:1.14.6' - time '2h' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: tuple val(meta), file(contigs), val(organism) @@ -15,11 +12,12 @@ process prokka { path "gff/*.gff" , emit: gff, optional: true path "logs/${task.process}/*.log" , emit: log path "versions.yml" , emit: versions + val meta , emit: meta when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '--mincontiglen 500 --compliant --locustag locus_tag --centre STAPHB' def prefix = task.ext.prefix ?: "${meta.id}" def gen_sp = organism ? "--genus ${organism[0]} --species ${organism[1]}" : "" diff --git a/modules/local/quast.nf b/modules/local/quast.nf index 2e4545c..ace90de 100644 --- a/modules/local/quast.nf +++ b/modules/local/quast.nf @@ -1,31 +1,28 @@ -process quast { +process QUAST { tag "${meta.id}" label "process_medium" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - container 'staphb/quast:5.2.0' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + container 'staphb/quast:5.3.0' input: tuple val(meta), file(contigs), file(reads) output: - path "quast/*" , emit: files - path "quast/*_quast_report.tsv" , optional: true, emit: for_multiqc + path "quast/*", emit: files, optional: true + path "quast/*_quast_report.tsv", optional: true, emit: for_multiqc tuple val(meta), file("quast/*_quast_report.tsv"), optional: true, emit: results - path "quast/*/quast_transposed_report.tsv" , optional: true, emit: collect - path "quast/*/quast_transposed_report_contig.tsv" , optional: true, emit: collect_contig - path "logs/${task.process}/*.log" , emit: log - path "versions.yml" , emit: versions + path "quast/*quast_transposed_report.tsv", optional: true, emit: collect + path "quast/*quast_transposed_report_contig.tsv", optional: true, emit: collect_contig + path "logs/${task.process}/*.log", emit: log + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when - shell: - def args = task.ext.args ?: '' + script: + def args = task.ext.args ?: '--no-icarus' def prefix = task.ext.prefix ?: "${meta.id}" def fastq = reads[1] ? "--pe1 ${reads[0]} --pe2 ${reads[1]}" : "" - def fin = reads[1] ? "quast/${prefix}/quast_transposed_report.tsv" : "quast/${prefix}/quast_transposed_report_contig.tsv" + def fin = reads[1] ? "quast/${prefix}_quast_transposed_report.tsv" : "quast/${prefix}_quast_transposed_report_contig.tsv" """ mkdir -p ${task.process} logs/${task.process} log_file=logs/${task.process}/${prefix}.${workflow.sessionId}.log diff --git a/modules/local/roary.nf b/modules/local/roary.nf index 35fbcda..72f9899 100755 --- a/modules/local/roary.nf +++ b/modules/local/roary.nf @@ -1,22 +1,21 @@ -process roary { +process ROARY { tag "Core Genome Alignment" label "process_high" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/roary:3.13.0' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - time '10h' input: file(contigs) output: path "roary/*" , emit: files - path "roary/fixed_input_files/*" , emit: roary_input_files tuple path("roary/core_gene_alignment.aln"), path("roary/gene_presence_absence.Rtab"), emit: core_gene_alignment, optional: true path "logs/${task.process}/${task.process}.${workflow.sessionId}.log" , emit: log_files path "versions.yml" , emit: versions - shell: + when: + task.ext.when == null || task.ext.when + + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: 'roary' """ @@ -25,7 +24,7 @@ process roary { roary ${args} \ -p ${task.cpus} \ - -f roary \ + -f ${prefix} \ -e -n \ *.gff \ | tee -a \$log_file diff --git a/modules/local/seqsero2.nf b/modules/local/seqsero2.nf index 41849e1..62fd90c 100644 --- a/modules/local/seqsero2.nf +++ b/modules/local/seqsero2.nf @@ -1,24 +1,23 @@ -process seqsero2 { +process SEQSERO2 { tag "${meta.id}" label "process_medium" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/seqsero2:1.3.1' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + input: tuple val(meta), file(file) output: - path "seqsero2/*/*" , emit: files - path "seqsero2/*/SeqSero_result.tsv" , emit: collect + path "seqsero2/*/*", emit: files + path "seqsero2/*/SeqSero_result.tsv", emit: collect path "logs/${task.process}/*.log", emit: log - path "versions.yml" , emit: versions + path "versions.yml", emit: versions + val meta, emit: meta when: - (task.ext.when == null || task.ext.when) + task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '-m a -b mem' def prefix = task.ext.prefix ?: "${meta.id}" """ diff --git a/modules/local/serotypefinder.nf b/modules/local/serotypefinder.nf index a4d6193..911726a 100644 --- a/modules/local/serotypefinder.nf +++ b/modules/local/serotypefinder.nf @@ -1,11 +1,8 @@ -process serotypefinder { +process SEROTYPEFINDER { tag "${meta.id}" label "process_medium" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/serotypefinder:2.0.2' - maxForks 10 - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + input: tuple val(meta), file(file), file(script) @@ -15,11 +12,12 @@ process serotypefinder { path "serotypefinder/*_serotypefinder.tsv", emit: collect, optional: true path "logs/${task.process}/*.log" , emit: log path "versions.yml" , emit: versions + val meta , emit: meta when: - (task.ext.when == null || task.ext.when) + task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ @@ -36,6 +34,8 @@ process serotypefinder { python3 ${script} serotypefinder/${prefix}/results_tab.tsv serotypefinder/${prefix}_serotypefinder.tsv serotypefinder ${prefix} + rm -rf serotypefinder/${prefix}/tmp + cat <<-END_VERSIONS > versions.yml "${task.process}": serotypefinder.py: ${task.container} diff --git a/modules/local/shigatyper.nf b/modules/local/shigatyper.nf index 46ed8d0..3fb44f6 100644 --- a/modules/local/shigatyper.nf +++ b/modules/local/shigatyper.nf @@ -1,12 +1,8 @@ -process shigatyper { +process SHIGATYPER { tag "${meta.id}" label "process_medium" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/shigatyper:2.0.5' - stageInMode 'copy' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - time '10m' - + input: tuple val(meta), file(input), file(script) @@ -15,11 +11,12 @@ process shigatyper { path "shigatyper/*_shigatyper-hits.tsv", optional: true, emit: collect path "logs/${task.process}/*.log", emit: log path "versions.yml", emit: versions + val meta, emit: meta when: - (task.ext.when == null || task.ext.when) + task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ @@ -34,7 +31,7 @@ process shigatyper { python3 ${script} ${prefix}-hits.tsv shigatyper/${prefix}_shigatyper-hits.tsv shigatyper ${prefix} if [ -f "${prefix}.tsv" ] ; then cp ${prefix}.tsv shigatyper/${prefix}_shigatyper.tsv ; fi - + cat <<-END_VERSIONS > versions.yml "${task.process}": shigatyper: \$(echo \$(shigatyper --version 2>&1) | sed 's/^.*ShigaTyper //' ) diff --git a/modules/local/snp-dists.nf b/modules/local/snp-dists.nf index f0bdcdd..8008686 100644 --- a/modules/local/snp-dists.nf +++ b/modules/local/snp-dists.nf @@ -1,11 +1,8 @@ -process snp_dists { +process SNPDISTS { tag "SNP matrix" label "process_medium" - publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/snp-dists:0.8.2' - maxForks 10 - time '2h' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + input: file(contigs) @@ -17,7 +14,7 @@ process snp_dists { when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '-c' """ mkdir -p snp-dists diff --git a/modules/local/spades.nf b/modules/local/spades.nf index 138c97d..5f67c52 100644 --- a/modules/local/spades.nf +++ b/modules/local/spades.nf @@ -1,13 +1,7 @@ -process spades { +process SPADES { tag "${meta.id}" label "process_high" - publishDir path: params.outdir, mode: 'copy', pattern: 'logs/*/*log' - publishDir path: params.outdir, mode: 'copy', pattern: 'spades/*' - publishDir path: params.outdir, mode: 'copy', pattern: 'spades/*/*' - publishDir path: params.outdir, mode: 'copy', pattern: 'contigs/*' container 'staphb/spades:4.0.0' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - time '5h' input: tuple val(meta), file(reads) @@ -22,7 +16,7 @@ process spades { when: task.ext.when == null || task.ext.when - shell: + script: def args = task.ext.args ?: '--isolate' def prefix = task.ext.prefix ?: "${meta.id}" """ @@ -38,6 +32,8 @@ process spades { if [ -f "spades/${prefix}/contigs.fasta" ] ; then cp spades/${prefix}/contigs.fasta contigs/${prefix}_contigs.fa ; fi + rm -rf spades/${prefix}/tmp + cat <<-END_VERSIONS > versions.yml "${task.process}": spades: \$(spades.py --version 2>&1 | sed 's/^.*SPAdes genome assembler v//; s/ .*\$//') diff --git a/nextflow.config b/nextflow.config index 1a984fd..146d605 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,14 +1,87 @@ -manifest { - name = 'Grandeur' - author = 'Erin Young' - homePage = 'https://github.com/UPHL-BioNGS/Grandeur' - mainScript = 'main.nf' - version = '4.5.24310' - defaultBranch = 'main' - description = 'Grandeur is short-read de novo assembly pipeline with serotyping.' - nextflowVersion = '!>=22.10.1' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + UPHL-BioNGS/Grandeur Nextflow config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Default config options for all compute environments +---------------------------------------------------------------------------------------- +*/ + +params { + outdir = "grandeur" + + // Input options + reads = null + fastas = null + input = null + sample_sheet = params.input + fasta_list = null + + // external files + kraken2_db = null + blast_db = null + blast_db_type = "" + mash_db = null + fastani_ref = null + fastani_ref_list = null + + // for downloading from databases + sra_accessions = [] + genome_accessions = [] + + // thresholds and other params + minimum_reads = 10000 + datasets_max_genomes = 5 + mash_max_hits = 25 + min_core_genes = 1500 + iqtree2_outgroup = "" + + // subworkflow flags + current_datasets = false + skip_extras = false + exclude_top_hit = false + msa = false + + // specifying the core workflow + aligner = 'panaroo' + annotator = 'bakta' + + // for qc more than anything + genome_sizes = "${baseDir}/assets/genome_sizes.json" + + // the following were stolen from other nf-core workflows and may have no functionality here + + // Boilerplate options + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + + // Config options + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null + + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationShowHiddenParams = false + validate_params = true } + +// Load nf-core custom profiles from different Institutions +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" + + //########## Setting Profiles ########## profiles { @@ -41,7 +114,7 @@ profiles { } test { // default workflow - params.sra_accessions = ["SRR11725329", "SRR13643280", "SRR14436834", "SRR14634837", "SRR7738178", "SRR7889058"] + params.sra_accessions = ["SRR11725329", "SRR13643280", "SRR14436834", "SRR14634837", "SRR7738178", "SRR7889058"] } test0 { // default workflow while skipping extras @@ -84,38 +157,67 @@ profiles { params.aligner = 'roary' } uphl { - includeConfig './configs/UPHL.config' + includeConfig './conf/UPHL.config' + } + test7 { + // phylogenetic analysis with prokka and exclude fastani top hit + params.sra_accessions = ["SRR22314961", "SRR22314960", "SRR22314959", "SRR22314958" ] + params.msa = true + params.annotator = 'prokka' } + test8 { + // phylogenetic analysis with genome accessions + params.genome_accessions = ["GCF_900475035.1", "GCF_022869605.1", "GCF_002055535.1", "GCF_004028355.1" ] + params.msa = true + } + } -process { - maxRetries = 1 - maxErrors = '-1' +def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +timeline { + enabled = true + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" +} +report { + enabled = true + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" +} +trace { + enabled = true + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" +} +dag { + enabled = true + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" +} - withLabel:process_single { - cpus = { 1 } - memory = { 4.GB } - time = { 30.m } - } - withLabel:process_low { - cpus = { 2 } - memory = { 12.GB } - time = { 2.h } - } - withLabel:process_medium { - cpus = { 6 } - memory = { 36.GB } - time = { 4.h } - } - withLabel:process_high { - cpus = { 12 } - memory = { 72.GB } - time = { 16.h } - } - withLabel:process_long { - time = { 20.h } - } - withLabel:process_high_memory { - memory = { 200.GB } +manifest { + name = 'UPHL-BioNGS/Grandeur' + author = 'Erin Young' + homePage = 'https://github.com/UPHL-BioNGS/Grandeur' + mainScript = 'main.nf' + version = '4.9.24345' + defaultBranch = 'main' + description = 'Grandeur is short-read de novo assembly pipeline with serotyping.' + nextflowVersion = '!>=24.04.4' +} + + +//######## nf-core assistance ##### +validation { + help { + enabled = true + command = "nextflow run UPHL-BioNGS/Grandeur -profile --sample_sheet samplesheet.csv --outdir grandeur" + fullParameter = "help_full" + showHiddenParameter = "show_hidden" } } + + +// Load base.config by default for all pipelines +includeConfig 'conf/base.config' + +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' + + diff --git a/nextflow_schema.json b/nextflow_schema.json old mode 100644 new mode 100755 index f7b3dd6..228363f --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,145 +1,312 @@ { - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/UPHL-BioNGS/Grandeur/main/nextflow_schema.json", - "title": "Grandeur Parameters", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/UPHL-BioNGS/Grandeur/master/nextflow_schema.json", + "title": "UPHL-BioNGS/Grandeur pipeline parameters", "description": "Grandeur is short-read de novo assembly pipeline with serotyping.", "type": "object", - "properties": { - "aligner": { - "type": "string", - "hidden": true, - "description": "chooses core genome aligner (params.msa must be set to true)", - "default": "panaroo", - "enum": [ - "roary", - "panaroo" - ] + "$defs": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": [ + "outdir" + ], + "properties": { + "input": { + "type": "string", + "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.", + "fa_icon": "fas fa-file-csv", + "hidden": true + }, + "sample_sheet": { + "type": "string", + "description": "csv with sample,read1,read2" + }, + "fastas": { + "type": "string", + "hidden": true, + "description": "directory with fasta files (not compatible with cloud resources)" + }, + "fasta_list": { + "type": "string", + "description": "A sample sheet for fasta files" + }, + "reads": { + "type": "string", + "hidden": true, + "description": "directory with paired-end illumina fastq files (not compatible with cloud resources)" + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open", + "default": "grandeur" + }, + "sra_accessions": { + "type": "string", + "default": [], + "hidden": true, + "description": "list of accessions to download from the SRA" + }, + "genome_accessions": { + "type": "string", + "default": [], + "hidden": true, + "description": "list of accessions to download from genomes" + }, + "genome_sizes": { + "type": "string", + "default": null, + "hidden": true, + "description": "text of genome sizes" + } + } }, - "blast_db": { - "type": "string", - "hidden": false, - "description": "directory with blast database" - }, - "blast_db_type": { - "type": "string", - "hidden": false, - "description": "type of blast database (i.e. 'nt')" - }, - "config_file": { - "type": "boolean", - "hidden": true, - "description": "if true, copies config and params template and exits" - }, - "current_datasets": { - "type": "boolean", - "hidden": true, - "default": false, - "description": "toggles whether or not genomes are downloaded from NCBI" - }, - "datasets_max_genomes": { - "type": "number", - "hidden": true, - "default": 5.0, - "description": "the maxiumum number of genomes to download per organism" - }, - "exclude_top_hit": { - "type": "boolean", - "hidden": true, - "default": false, - "description": "removes fastani top hit from msa" - }, - "fasta_list": { - "type": "string", - "hidden": false, - "description": "A sample sheet for fasta files" - }, - "fastani_ref": { - "type": "string", - "hidden": true, - "description": "additional fasta files for fastani references" - }, - "fastani_ref_list": { - "type": "string", - "hidden": true, - "description": "list of genomes for fastani references" - }, - "fastas": { - "type": "string", - "hidden": false, - "default": "fastas", - "description": "directory with fasta files" - }, - "genome_sizes": { - "type": "string", - "hidden": true, - "description": "file with pre-prepared genome sizes" - }, - "iqtree2_outgroup": { - "type": "string", - "hidden": true, - "description": "to specify outgroup in iqtree2" - }, - "kraken2_db": { - "type": "string", - "hidden": false, - "description": "directory of kraken2 database" - }, - "mash_db": { - "type": "string", - "hidden": true, - "description": "prepared mash reference msh file" - }, - "mash_max_hits": { - "type": "number", - "hidden": true, - "default": 25.0, - "description": "the number of mash hits allowed in result file" - }, - "min_core_genes": { - "type": "number", - "hidden": true, - "default": 1500.0, - "description": "minimum number of genes in core genome alignment for iqtree2" - }, - "minimum_reads": { - "type": "number", - "hidden": true, - "default": 10000.0, - "description": "the minimum number of reads in a fastq file required to move to de novo alignment" - }, - "msa": { - "type": "string", - "hidden": false, + "reference_files_paths": { + "title": "Reference files/paths", + "type": "object", + "description": "", "default": "", - "description": "toggles whether or not phylogenetic analysis will be run on samples" + "properties": { + "kraken2_db": { + "type": "string", + "description": "directory of kraken2 database" + }, + "blast_db": { + "type": "string", + "description": "directory of blast database. Compressed directories can be found at https://ftp.ncbi.nlm.nih.gov/blast/db/v5/." + }, + "blast_db_type": { + "type": "string", + "description": "type of blast database (ex: 'nt', 'nt_prok', or 'ref_prok_rep_genomes')" + }, + "mash_db": { + "type": "string", + "description": "prepared mash reference msh file" + }, + "fastani_ref": { + "type": "string", + "description": "additional fasta files for fastani references" + }, + "fastani_ref_list": { + "type": "string", + "description": "list of genomes (in fasta format) for fastani references" + } + } }, - "outdir": { - "type": "string", - "hidden": false, - "description": "result output directory", - "default": "grandeur" + "workflow_values": { + "title": "workflow values", + "type": "object", + "description": "", + "default": "", + "properties": { + "datasets_max_genomes": { + "type": "integer", + "default": 5, + "hidden": true, + "description": "the maxiumum number of genomes to download per organism" + }, + "mash_max_hits": { + "type": "integer", + "default": 25, + "hidden": true, + "description": "the number of mash hits allowed in result file" + }, + "min_core_genes": { + "type": "integer", + "default": 1500, + "description": "minimum number of genes in core genome alignment for iqtree2" + }, + "iqtree2_outgroup": { + "type": "string", + "hidden": true, + "description": "to specify outgroup in iqtree2" + }, + "minimum_reads": { + "type": "integer", + "default": 10000, + "hidden": true, + "description": "the minimum number of reads in a fastq file required to move to de novo alignment" + } + } + }, + "subworkflow_toggles": { + "title": "Subworkflow toggles", + "type": "object", + "description": "", + "default": "", + "properties": { + "exclude_top_hit": { + "type": "boolean", + "hidden": true, + "description": "removes fastani top hit from msa" + }, + "msa": { + "type": "boolean", + "description": "toggles whether or not phylogenetic analysis will be run on samples" + }, + "aligner": { + "type": "string", + "default": "panaroo", + "hidden": true, + "description": "chooses core genome aligner (params.msa must be set to true)", + "enum": [ + "roary", + "panaroo" + ] + }, + "annotator": { + "type": "string", + "default": "bakta", + "hidden": true, + "description": "chooses annotator (params.msa must be set to true)", + "enum": [ + "bakta", + "prokka" + ] + }, + "skip_extras": { + "type": "boolean", + "hidden": true, + "description": "turns off blobtools, kraken2, fastani, mash, and report generation subworkflows" + }, + "current_datasets": { + "type": "boolean", + "hidden": true, + "description": "toggles whether or not genomes are downloaded from NCBI" + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Stolen from example and might not do anything.", + "hidden": true + }, + "email_on_fail": { + "type": "string", + "description": "Stolen from example and might not do anything.", + "hidden": true + }, + "plaintext_email": { + "type": "boolean", + "description": "Stolen from example and might not do anything.", + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Stolen from example and might not do anything.", + "hidden": true + }, + "email": { + "type": "string", + "description": "Stolen from example and might not do anything.", + "hidden": true + }, + "hook_url": { + "type": "string", + "description": "Stolen from example and might not do anything.", + "hidden": true + }, + "pipelines_testdata_base_path": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "description": "Stolen from example and might not do anything.", + "hidden": true + }, + "config_profile_description": { + "type": "string", + "description": "Stolen from example and might not do anything.", + "hidden": true + }, + "config_profile_name": { + "type": "string", + "description": "Stolen from example and might not do anything.", + "hidden": true + }, + "custom_config_version": { + "type": "string", + "default": "master", + "description": "Stolen from example and might not do anything.", + "hidden": true + }, + "custom_config_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "description": "Stolen from example and might not do anything.", + "hidden": true + }, + "config_profile_url": { + "type": "string", + "description": "Stolen from example and might not do anything.", + "hidden": true + }, + "validationLenientMode": { + "type": "boolean", + "description": "Stolen from example and might not do anything.", + "hidden": true + }, + "validationShowHiddenParams": { + "type": "boolean", + "description": "Stolen from example and might not do anything.", + "hidden": true + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "description": "Stolen from example and might not do anything.", + "hidden": true + }, + "config_profile_contact": { + "type": "string", + "description": "Stolen from example and might not do anything.", + "hidden": true + }, + "validate_params": { + "type": "boolean", + "description": "Stolen from example and might not do anything.", + "fa_icon": "fas fa-check-square", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/input_output_options" }, - "reads": { - "type": "string", - "default": "reads", - "hidden": true, - "description": "directory of reads" + { + "$ref": "#/$defs/reference_files_paths" }, - "sample_sheet": { - "type": "string", - "hidden": false, - "description": "csv with sample,read1,read2" + { + "$ref": "#/$defs/workflow_values" }, - "skip_extras": { - "type": "boolean", - "hidden": true, - "default": false, - "description": "turns off blobtools, kraken2, fastani, mash, and report generation subworkflows" + { + "$ref": "#/$defs/subworkflow_toggles" }, - "sra_accessions": { - "type": "string", - "hidden": true, - "description": "list of SRA accessions to download" + { + "$ref": "#/$defs/generic_options" } - } -} + ] +} \ No newline at end of file diff --git a/subworkflows/average_nucleotide_identity.nf b/subworkflows/average_nucleotide_identity.nf deleted file mode 100644 index 8e7ef4e..0000000 --- a/subworkflows/average_nucleotide_identity.nf +++ /dev/null @@ -1,76 +0,0 @@ -include { datasets_summary } from '../modules/local/datasets' addParams(params) -include { datasets_download } from '../modules/local/datasets' addParams(params) -include { fastani } from '../modules/local/fastani' addParams(params) -include { references } from '../modules/local/local' addParams(params) -include { species } from '../modules/local/local' addParams(params) - -workflow average_nucleotide_identity { - take: - ch_species - ch_contigs - ch_fastani_ref - dataset_script - - main: - ch_versions = Channel.empty() - if ( params.current_datasets ) { - species(ch_species) - - species.out.species - .splitText() - .map(it -> it.trim()) - .set{ ch_species_list } - - datasets_summary(ch_species_list.combine(dataset_script)) - datasets_download(datasets_summary.out.genomes.collect()) - - ch_fastani_ref = ch_fastani_ref.mix(datasets_download.out.genomes.flatten()) - - ch_versions = ch_versions.mix(datasets_summary.out.versions.first()).mix(datasets_download.out.versions) - - datasets_summary.out.genomes - .collectFile( - storeDir: "${params.outdir}/datasets/", - keepHeader: true, - sort: { file -> file.text }, - name: "datasets_summary.csv") - .set { datasets_summary } - - } else { - datasets_summary = Channel.empty() - } - - references() - - ch_fastani_ref - .mix(references.out.fastas.flatten()) - .unique() - .collect() - .map { it -> tuple([it])} - .set{ch_fastani_genomes} - - fastani(ch_contigs.combine(ch_fastani_genomes)) - - fastani.out.results - .map { it -> it [1] } - .collectFile( - storeDir: "${params.outdir}/fastani/", - keepHeader: true, - sort: { file -> file.text }, - name: "fastani_summary.csv") - .set { summary } - - fastani.out.top_len - .collectFile( - keepHeader: true, - name: "fastani_top_len.csv") - .set { fastani_len_summary } - - ch_versions = ch_versions.mix(fastani.out.versions.first()) - - emit: - for_flag = fastani.out.results - for_summary = summary.mix(datasets_summary).mix(fastani_len_summary) - top_hit = fastani.out.top_hit - versions = ch_versions -} diff --git a/subworkflows/blobtools.nf b/subworkflows/blobtools.nf deleted file mode 100644 index 4b5ea05..0000000 --- a/subworkflows/blobtools.nf +++ /dev/null @@ -1,31 +0,0 @@ -include { blastn } from '../modules/local/blast' addParams(params) -include { blobtools_create } from '../modules/local/blobtools' addParams(params) -include { blobtools_plot } from '../modules/local/blobtools' addParams(params) -include { blobtools_view } from '../modules/local/blobtools' addParams(params) - -workflow blobtools { - take: - ch_contig_bams - ch_blast_db - - main: - ch_contigs = ch_contig_bams.filter{it[1]}.map{it -> tuple(it[0], it[1])} - - blastn(ch_contigs.combine(ch_blast_db)) - blobtools_create(ch_contig_bams.join(blastn.out.blastn, by: 0, failOnMismatch: false, remainder: false)) - blobtools_view(blobtools_create.out.json) - blobtools_plot(blobtools_create.out.json) - - blobtools_plot.out.collect - .collectFile( - storeDir: "${params.outdir}/blobtools/", - keepHeader: true, - sort: { file -> file.text }, - name: "blobtools_summary.txt") - .set{ summary } - - emit: - for_flag = blobtools_plot.out.results - for_summary = summary - versions = blastn.out.versions.first().mix(blobtools_create.out.versions.first()).mix(blobtools_view.out.versions.first()).mix(blobtools_plot.out.versions.first()) -} diff --git a/subworkflows/de_novo_alignment.nf b/subworkflows/de_novo_alignment.nf deleted file mode 100644 index 5b0f4e0..0000000 --- a/subworkflows/de_novo_alignment.nf +++ /dev/null @@ -1,29 +0,0 @@ -include { fastp } from '../modules/local/fastp' addParams(params) -include { bbduk } from '../modules/local/bbduk' addParams(params) -include { spades } from '../modules/local/spades' addParams(params) - -workflow de_novo_alignment { - take: - reads - - main: - bbduk(reads) - fastp(bbduk.out.fastq) - - fastp.out.fastp_results - .filter ({ it[2] as int >= params.minimum_reads }) - .map ( it -> tuple (it[0], it[1])) - .set{ read_check } - - spades(read_check) - - emit: - // for downstream analyses - reads_contigs = spades.out.reads_contigs - clean_reads = fastp.out.fastq - contigs = spades.out.contigs.filter{it[1] != null} - - // for multiqc - for_multiqc = fastp.out.fastp_files.mix(bbduk.out.stats) - versions = bbduk.out.versions.first().mix(fastp.out.versions.first()).mix(spades.out.versions.first()) -} diff --git a/subworkflows/local/average_nucleotide_identity.nf b/subworkflows/local/average_nucleotide_identity.nf new file mode 100644 index 0000000..10a8316 --- /dev/null +++ b/subworkflows/local/average_nucleotide_identity.nf @@ -0,0 +1,76 @@ +include { DATASETS_SUMMARY } from '../../modules/local/datasets' +include { DATASETS_DOWNLOAD } from '../../modules/local/datasets' +include { FASTANI } from '../../modules/local/fastani' +include { REFERENCES } from '../../modules/local/local' +include { SPECIES } from '../../modules/local/local' + +workflow AVERAGE_NUCLEOTIDE_IDENTITY { + take: + ch_species + ch_contigs + ch_fastani_ref + dataset_script + + main: + ch_versions = Channel.empty() + if ( params.current_datasets ) { + SPECIES(ch_species) + + SPECIES.out.species + .splitText() + .map{ it -> it.trim()} + .set{ ch_species_list } + + DATASETS_SUMMARY(ch_species_list.combine(dataset_script)) + DATASETS_DOWNLOAD(DATASETS_SUMMARY.out.genomes.collect()) + + ch_fastani_ref = ch_fastani_ref.mix(DATASETS_DOWNLOAD.out.genomes.flatten()) + + ch_versions = ch_versions.mix(DATASETS_SUMMARY.out.versions.first()).mix(DATASETS_DOWNLOAD.out.versions) + + DATASETS_SUMMARY.out.genomes + .collectFile( + storeDir: "${params.outdir}/datasets/", + keepHeader: true, + sort: { file -> file.text }, + name: "datasets_summary.csv") + .set { ch_datasets_summary } + + } else { + ch_datasets_summary = Channel.empty() + } + + REFERENCES() + + ch_fastani_ref + .mix(REFERENCES.out.fastas.flatten()) + .unique() + .collect() + .map { it -> tuple([it])} + .set{ch_fastani_genomes} + + FASTANI(ch_contigs.combine(ch_fastani_genomes)) + + FASTANI.out.results + .map { it -> it [1] } + .collectFile( + storeDir: "${params.outdir}/fastani/", + keepHeader: true, + sort: { file -> file.text }, + name: "fastani_summary.csv") + .set { summary } + + FASTANI.out.top_len + .collectFile( + keepHeader: true, + name: "fastani_top_len.csv") + .set { fastani_len_summary } + + ch_versions = ch_versions.mix(FASTANI.out.versions.first()) + + emit: + for_flag = FASTANI.out.results + for_summary = summary.mix(ch_datasets_summary).mix(fastani_len_summary) + top_hit = FASTANI.out.top_hit + versions = ch_versions +} diff --git a/subworkflows/local/blobtools.nf b/subworkflows/local/blobtools.nf new file mode 100644 index 0000000..54ad6c3 --- /dev/null +++ b/subworkflows/local/blobtools.nf @@ -0,0 +1,39 @@ +include { BLASTN } from '../../modules/local/blast' +include { BLOBTOOLS_CREATE as CREATE } from '../../modules/local/blobtools' +include { BLOBTOOLS_PLOT as PLOT } from '../../modules/local/blobtools' +include { BLOBTOOLS_VIEW as VIEW } from '../../modules/local/blobtools' + +workflow BLOBTOOLS { + take: + ch_contig_bams + ch_blast_db + + main: + ch_versions = Channel.empty() + ch_contigs = ch_contig_bams.filter{it[1]}.map{it -> tuple(it[0], it[1])} + + BLASTN(ch_contigs.combine(ch_blast_db)) + ch_versions = ch_versions.mix(BLASTN.out.versions.first()) + + CREATE(ch_contig_bams.join(BLASTN.out.blastn, by: 0, failOnMismatch: false, remainder: false)) + ch_versions = ch_versions.mix(CREATE.out.versions.first()) + + VIEW(CREATE.out.json) + ch_versions = ch_versions.mix(VIEW.out.versions.first()) + + PLOT(CREATE.out.json) + ch_versions = ch_versions.mix(PLOT.out.versions.first()) + + PLOT.out.collect + .collectFile( + storeDir: "${params.outdir}/blobtools/", + keepHeader: true, + sort: { file -> file.text }, + name: "blobtools_summary.txt") + .set{ summary } + + emit: + for_flag = PLOT.out.results + for_summary = summary + versions = ch_versions +} diff --git a/subworkflows/local/de_novo_alignment.nf b/subworkflows/local/de_novo_alignment.nf new file mode 100644 index 0000000..814a100 --- /dev/null +++ b/subworkflows/local/de_novo_alignment.nf @@ -0,0 +1,35 @@ +include { FASTP } from '../../modules/local/fastp' +include { SPADES } from '../../modules/local/spades' + +workflow DE_NOVO_ALIGNMENT { + take: + reads + + main: + ch_versions = Channel.empty() + + FASTP(reads) + + FASTP.out.fastp_results + .filter ({ it[2] as int >= params.minimum_reads }) + .map { it -> + tuple (it[0], it[1]) + } + .set{ read_check } + + ch_versions = ch_versions.mix(FASTP.out.versions.first()) + + SPADES(read_check) + + ch_versions = ch_versions.mix(SPADES.out.versions.first()) + + emit: + // for downstream analyses + reads_contigs = SPADES.out.reads_contigs + clean_reads = FASTP.out.fastq + contigs = SPADES.out.contigs.filter{it[1] != null} + + // for multiqc + for_multiqc = FASTP.out.fastp_files + versions = ch_versions +} diff --git a/subworkflows/information.nf b/subworkflows/local/info.nf similarity index 65% rename from subworkflows/information.nf rename to subworkflows/local/info.nf index a2d78d4..321294b 100644 --- a/subworkflows/information.nf +++ b/subworkflows/local/info.nf @@ -1,15 +1,19 @@ -include { amrfinderplus } from '../modules/local/amrfinderplus' addParams(params) -include { drprg } from '../modules/local/drprg' addParams(params) -include { elgato } from '../modules/local/elgato' addParams(params) -include { emmtyper } from '../modules/local/emmtyper' addParams(params) -include { json_convert } from '../modules/local/local' addParams(params) -include { kaptive } from '../modules/local/kaptive' addParams(params) -include { kleborate } from '../modules/local/kleborate' addParams(params) -include { mykrobe } from '../modules/local/mykrobe' addParams(params) -include { pbptyper } from '../modules/local/pbptyper' addParams(params) -include { seqsero2 } from '../modules/local/seqsero2' addParams(params) -include { serotypefinder } from '../modules/local/serotypefinder' addParams(params) -include { shigatyper } from '../modules/local/shigatyper' addParams(params) + +import groovy.json.JsonSlurper + +include { AMRFINDER } from '../../modules/local/amrfinderplus' +include { DRPRG } from '../../modules/local/drprg' +include { ELGATO } from '../../modules/local/elgato' +include { EMMTYPER } from '../../modules/local/emmtyper' +include { JSON_CONVERT } from '../../modules/local/local' +include { KAPTIVE } from '../../modules/local/kaptive' +include { KLEBORATE } from '../../modules/local/kleborate' +include { MENINGOTYPE } from '../../modules/local/meningotype' +include { MYKROBE } from '../../modules/local/mykrobe' +include { PBPTYPER } from '../../modules/local/pbptyper' +include { SEQSERO2 } from '../../modules/local/seqsero2' +include { SEROTYPEFINDER } from '../../modules/local/serotypefinder' +include { SHIGATYPER } from '../../modules/local/shigatyper' def flagOrg(org_files, phrases) { def found = false @@ -17,9 +21,9 @@ def flagOrg(org_files, phrases) { if (org_file && org_file.exists()) { def count = 0 org_file.withReader { reader -> - while (reader.ready() && count < 10 && !found) { + while(reader.ready() && count < 10 && !found) { def line = reader.readLine() - count++ + count = count + 1 phrases.each { phrase -> if (line.toString().contains(phrase)) { if (org_file.getName().contains('fastani')) { @@ -53,7 +57,7 @@ def topOrg(org_files) { if (lines.size() > 1) { def secondLine = lines[1].split(',') if (secondLine.size() >= 4 && secondLine[3].toFloat() > 90) { - hit = secondLine[2].trim()+ '_unknown' + def hit = secondLine[2].trim()+ '_unknown' genus = hit.split('_')[0] species = hit.split('_')[1] } @@ -66,10 +70,10 @@ def topOrg(org_files) { def line = reader.readLine() def columns = line.split('\t') if (columns.size() > 1) { - hit = columns[1].trim() + def hit = columns[1].trim() if (!['name', 'all', 'no-hit', 'undel'].contains(hit)) { if (columns.size() == 14 && columns[-1].toFloat() > 50) { - name = hit + '_unknown' + def name = hit + '_unknown' genus = name.split('_')[0] species = name.split('_')[1] return @@ -85,7 +89,7 @@ def topOrg(org_files) { if (lines.size() > 1) { def secondLine = lines[1].split(',') if (secondLine.size() >= 2 && secondLine[1].toFloat() > 50) { - hit = secondLine[-1].trim() + '_unknown' + def hit = secondLine[-1].trim() + '_unknown' genus = hit.split('_')[0] species = hit.split('_')[1] } @@ -97,7 +101,7 @@ def topOrg(org_files) { if (lines.size() > 1) { def secondLine = lines[1].split(',') if (secondLine.size() >= 4 && secondLine[3].toFloat() < 0.1) { - hit = secondLine[-1].trim() + '_unknown' + def hit = secondLine[-1].trim() + '_unknown' genus = hit.split('_')[0] species = hit.split('_')[1] } @@ -106,7 +110,7 @@ def topOrg(org_files) { return [genus, species] } -workflow information { +workflow INFO { take: ch_contigs ch_flag @@ -114,6 +118,8 @@ workflow information { jsoncon_script main: + ch_summary = Channel.empty() + ch_versions = Channel.empty() // species specific // branch + join = faster than groupTuple @@ -199,39 +205,41 @@ workflow information { .map { it -> tuple(it[0], it[1])} .set {ch_myco} + + // Neisseria meningitidis + ch_for_flag + .filter{flagOrg(it[2], ['Neisseria'])} + .map { it -> tuple(it[0], it[1])} + .set {ch_gc} + // Getting the top organism for each sample // for amrfinderplus // for prokka ch_for_flag .map { it -> - genus_species = topOrg(it[2]) + def genus_species = topOrg(it[2]) tuple (it[0], it[1], genus_species[0], genus_species[1]) } .set { ch_organism } - amrfinderplus(ch_organism) - drprg(ch_myco) - emmtyper(ch_gas.combine(summfle_script)) - kaptive(ch_vibrio) - kleborate(ch_kleb.combine(summfle_script)) - elgato(ch_legionella) - mykrobe(ch_myco) - pbptyper(ch_strep) - seqsero2(ch_salmonella) - serotypefinder(ch_ecoli.combine(summfle_script)) - shigatyper(ch_ecoli.combine(summfle_script)) - - json_convert(drprg.out.json.combine(jsoncon_script)) - - amrfinderplus.out.collect + AMRFINDER(ch_organism) + + AMRFINDER.out.collect .collectFile(name: 'amrfinderplus.txt', keepHeader: true, sort: { file -> file.text }, - storeDir: "${params.outdir}/ncbi-AMRFinderplus") + storeDir: "${params.outdir}/amrfinder") .set{ amrfinderplus_summary } - json_convert.out.collect + ch_summary = ch_summary.mix(amrfinderplus_summary) + ch_versions = ch_versions.mix(AMRFINDER.out.versions.first()) + + DRPRG(ch_myco) + + JSON_CONVERT(DRPRG.out.json.combine(jsoncon_script)) + + JSON_CONVERT.out.collect .filter( ~/.*drprg.tsv/ ) .collectFile(name: 'drprg_summary.tsv', keepHeader: true, @@ -239,104 +247,137 @@ workflow information { storeDir: "${params.outdir}/drprg") .set{ drprg_summary } - elgato.out.collect - .collectFile(name: 'elgato_summary.tsv', - keepHeader: true, - sort: { file -> file.text }, - storeDir: "${params.outdir}/elgato") - .set{ elgato_summary } + ch_summary = ch_summary.mix(drprg_summary) + ch_versions = ch_versions.mix(DRPRG.out.versions.first()) - emmtyper.out.collect + EMMTYPER(ch_gas.combine(summfle_script)) + + EMMTYPER.out.collect .collectFile(name: 'emmtyper_summary.tsv', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/emmtyper") .set{ emmtyper_summary } - kaptive.out.collect + ch_summary = ch_summary.mix(emmtyper_summary) + ch_versions = ch_versions.mix(EMMTYPER.out.versions.first()) + + KAPTIVE(ch_vibrio) + + KAPTIVE.out.collect .collectFile(name: 'kaptive_summary.txt', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/kaptive") .set{ kaptive_summary } - kleborate.out.collect + ch_summary = ch_summary.mix(kaptive_summary) + ch_versions = ch_versions.mix(KAPTIVE.out.versions.first()) + + KLEBORATE(ch_kleb.combine(summfle_script)) + + KLEBORATE.out.collect .collectFile(name: 'kleborate_results.tsv', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/kleborate") .set{ kleborate_summary } + + ch_summary = ch_summary.mix(kleborate_summary) + ch_versions = ch_versions.mix(KLEBORATE.out.versions.first()) + + ELGATO(ch_legionella) - mykrobe.out.collect + ELGATO.out.collect + .collectFile(name: 'elgato_summary.tsv', + keepHeader: true, + sort: { file -> file.text }, + storeDir: "${params.outdir}/elgato") + .set{ elgato_summary } + + ch_summary = ch_summary.mix(elgato_summary) + ch_versions = ch_versions.mix(ELGATO.out.versions.first()) + + MYKROBE(ch_myco) + + MYKROBE.out.collect .collectFile(name: 'mykrobe_summary.csv', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/mykrobe") .set{ mykrobe_summary } - pbptyper.out.collect + ch_summary = ch_summary.mix(mykrobe_summary) + ch_versions = ch_versions.mix(MYKROBE.out.versions.first()) + + MENINGOTYPE(ch_gc) + + MENINGOTYPE.out.files + .collectFile(name: 'meningotype_summary.tsv', + keepHeader: true, + sort: {file -> file.text }, + storeDir: "${params.outdir}/meningotype") + .set{ meningotype_summary } + + ch_summary = ch_summary.mix(meningotype_summary) + ch_versions = ch_versions.mix(MENINGOTYPE.out.versions.first()) + + PBPTYPER(ch_strep) + + PBPTYPER.out.collect .collectFile(name: 'pbptyper_summary.tsv', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/pbptyper") .set{ pbptyper_summary } - seqsero2.out.collect + ch_summary = ch_summary.mix(pbptyper_summary) + ch_versions = ch_versions.mix(PBPTYPER.out.versions.first()) + + SEQSERO2(ch_salmonella) + + SEQSERO2.out.collect .collectFile(name: 'seqsero2_results.txt', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/seqsero2") .set{ seqsero2_summary } - serotypefinder.out.collect + ch_summary = ch_summary.mix(seqsero2_summary) + ch_versions = ch_versions.mix(SEQSERO2.out.versions.first()) + + SEROTYPEFINDER(ch_ecoli.combine(summfle_script)) + + SEROTYPEFINDER.out.collect .collectFile(name: 'serotypefinder_results.txt', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/serotypefinder") .set{ serotypefinder_summary } + + ch_summary = ch_summary.mix(serotypefinder_summary) + ch_versions = ch_versions.mix(SEROTYPEFINDER.out.versions.first()) + + SHIGATYPER(ch_ecoli.combine(summfle_script)) - shigatyper.out.collect + SHIGATYPER.out.collect .collectFile(name: 'shigatyper_hits.txt', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/shigatyper") .set{ shigatyper_hits } - shigatyper.out.files + SHIGATYPER.out.files .collectFile(name: 'shigatyper_summary.txt', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/shigatyper") .set{ shigatyper_summary } - amrfinderplus_summary - .mix(drprg_summary) - .mix(elgato_summary) - .mix(emmtyper_summary) - .mix(kaptive_summary) - .mix(kleborate_summary) - .mix(mykrobe_summary) - .mix(pbptyper_summary) - .mix(seqsero2_summary) - .mix(serotypefinder_summary) - .mix(shigatyper_hits) - .mix(shigatyper_summary) - .set { for_summary } - - amrfinderplus.out.versions.first() - .mix(drprg.out.versions) - .mix(elgato.out.versions) - .mix(emmtyper.out.versions) - .mix(kaptive.out.versions) - .mix(kleborate.out.versions) - .mix(mykrobe.out.versions) - .mix(pbptyper.out.versions) - .mix(seqsero2.out.versions) - .mix(serotypefinder.out.versions) - .mix(shigatyper.out.versions) - .set { for_versions } + ch_summary = ch_summary.mix(shigatyper_hits).mix(shigatyper_summary) + ch_versions = ch_versions.mix(SHIGATYPER.out.versions.first()) emit: - for_summary = for_summary.collect() - versions = for_versions + for_summary = ch_summary.collect() + versions = ch_versions } diff --git a/subworkflows/local/initialize.nf b/subworkflows/local/initialize.nf new file mode 100755 index 0000000..61aee09 --- /dev/null +++ b/subworkflows/local/initialize.nf @@ -0,0 +1,310 @@ +include { TEST } from "../../subworkflows/local/test" + +def paramCheck(keys) { + def set_keys = [ + "outdir", + "input", + "fastas", + "msa", + "kraken2_db", + "mash_db", + "config_file", + "reads", + "sample_sheet", + "fasta_list", + "blast_db", + "blast_db_type", + "fastani_ref", + "fastani_ref_list", + "iqtree2_outgroup", + "genome_sizes", + "sra_accessions", + "genome_accessions", + "minimum_reads", + "datasets_max_genomes", + "mash_max_hits", + "min_core_genes", + "current_datasets", + "annotator", + "skip_extras", + "exclude_top_hit", + "aligner", + "publish_dir_mode", + "email", + "email_on_fail", + "plaintext_email", + "monochrome_logs", + "hook_url", + "help", + "version", + "pipelines_testdata_base_path", + "config_profile_name", + "config_profile_description", + "custom_config_version", + "custom_config_base", + "config_profile_contact", + "config_profile_url", + "validation-fail-unrecognised-params", + "validationFailUnrecognisedParams", + "validation-lenient-mode", + "validationLenientMode", + "validationShowHiddenParams", + "validation-show-hidden-params", + "validate_params" + ] + + keys.each { x -> + if (x !in set_keys){ + println("WARNING: ${x} isn't a supported param!") + println("Supported params: ${set_keys}") + } + } +} + +workflow INITIALIZE { + main: + ch_fastas = Channel.empty() + ch_versions = Channel.empty() + + //# For aesthetics - and, yes, we are aware that there are better ways to write this than a bunch of 'println' statements + println('') + println(' /^^^^ /^^^^^^^ /^ /^^^ /^^ /^^^^^ /^^^^^^^^ /^^ /^^ /^^^^^^^ ') + println(' /^ /^^ /^^ /^^ /^ ^^ /^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ ') + println('/^^ /^^ /^^ /^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ ') + println('/^^ /^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^^^^^ /^^ /^^ /^ /^^ ') + println('/^^ /^^^^ /^^ /^^ /^^^^^^ /^^ /^^ /^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ ') + println(' /^^ /^ /^^ /^^ /^^ /^^ /^^ /^ ^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^ ') + println(' /^^^^^ /^^ /^^ /^^ /^^ /^^ /^^ /^^^^^ /^^^^^^^^ /^^^^^ /^^ /^^') + println('') + + println("Currently using the Grandeur workflow for use with microbial sequencing.") + println("The view is great from 8299 feet (2530 meters) above sea level.\n") + println("Author: Erin Young") + println("email: eriny@utah.gov") + println("Version: ${workflow.manifest.version}") + println("") + + + // ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### + + // Getting config file + + // ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### + + params.config_file = false + if ( params.config_file ) { + def src = new File("${workflow.projectDir}/configs/grandeur_template.config") + def dst = new File("${workflow.launchDir}/edit_me.config") + dst << src.text + println("A config file can be found at ${workflow.launchDir}/edit_me.config") + + def src1 = new File("${workflow.projectDir}/configs/grandeur_params.yml") + def dst1 = new File("${workflow.launchDir}/edit_me.yml") + dst1 << src1.text + println("A params file can be found at ${workflow.launchDir}/edit_me.yml") + exit 0 + } + + // ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### + + // Checking params + + // ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### + + paramCheck(params.keySet()) + + + + // ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### + + // Channels for scripts + + // ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### + + dataset_script = Channel.fromPath(workflow.projectDir + "/bin/datasets_download.py", type: "file") + evaluat_script = Channel.fromPath(workflow.projectDir + "/bin/evaluate.py", type: "file") + jsoncon_script = Channel.fromPath(workflow.projectDir + "/bin/json_convert.py", type: "file") + multiqc_script = Channel.fromPath(workflow.projectDir + "/bin/for_multiqc.py", type: "file") + summary_script = Channel.fromPath(workflow.projectDir + "/bin/summary.py", type: "file") + summfle_script = Channel.fromPath(workflow.projectDir + "/bin/summary_file.py", type: "file") + version_script = Channel.fromPath(workflow.projectDir + "/bin/versions.py", type: "file") + + // ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### + + // Channels for input files + + // ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### + + if (params.sample_sheet) { + // using a sample sheet with the column header of 'sample,fastq_1,fastq_2' + Channel + .fromPath("${params.sample_sheet}", type: "file") + .view { "Sample sheet found : ${it}" } + .splitCsv( header: true, sep: ',' ) + .map { row -> + def meta = [id:row.sample] + tuple( meta, [ + file("${row.fastq_1}", checkIfExists: true), + file("${row.fastq_2}", checkIfExists: true)]) + } + .set {ch_reads} + + } else { + // Getting the fastq files from a directory + ch_reads = params.reads + ? Channel + .fromFilePairs(["${params.reads}/*_R{1,2}*.{fastq,fastq.gz,fq,fq.gz}", + "${params.reads}/*_{1,2}*.{fastq,fastq.gz,fq,fq.gz}"], size: 2 ) + .map { it -> + def meta = [id:it[0].replaceAll(~/_S[0-9]+_L[0-9]+/,"")] + tuple( meta, [ + file(it[1][0], checkIfExists: true), + file(it[1][1], checkIfExists: true)]) + } + .unique() + .view { "Paired-end fastq files found : ${it[0].id}" } + : Channel.empty() + } + + if (params.fasta_list) { + // getting fastas from a file + Channel + .fromPath("${params.fasta_list}", type: "file") + .view { "Fasta list found : ${it}" } + .splitText() + .map{ it -> it.trim()} + .map{ it -> file(it) } + .map { it -> + def meta = [id:it.baseName] + tuple( meta, it) + } + .set{ ch_fastas } + } else { + // getting fastas from a directory + ch_fastas = params.fastas + ? Channel + .fromPath("${params.fastas}/*{.fa,.fasta,.fna}") + .view { "Fasta file found : ${it.baseName}" } + .map { it -> + def meta = [id: it.baseName] + tuple( meta, file(it, checkIfExists: true)) + } + .unique() + : Channel.empty() + } + + // Getting accession for downloading + + // from SRA + ch_sra_accessions = Channel.from( params.sra_accessions ) + + // from genomes + ch_genome_accessions = Channel.from( params.genome_accessions) + + // ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### + + // Channels for database files + + // ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### + + // Getting the file with genome sizes of common organisms for fastqcscan. The End User can use their own file and set with a param + Channel + .fromPath(params.genome_sizes, type: "file") + .ifEmpty{ + println("The genome sizes file for this workflow are missing!") + exit 1} + .set { ch_genome_sizes } + + // Getting the database for blobtools + ch_blast_db = params.blast_db + ? Channel + .fromPath(params.blast_db, type: "dir") + .ifEmpty{ + println("No blast database was found at ${params.blast_db}") + println("Set 'params.blast_db' to directory with blast database") + exit 1 + } + .view { "Local Blast Database for Blobtools : $it" } + : Channel.empty() + + // Getting the kraken2 database + ch_kraken2_db = params.kraken2_db + ? Channel + .fromPath(params.kraken2_db, type: "dir") + .ifEmpty{ + println("No kraken2 database was found at ${params.kraken2_db}") + println("Set 'params.kraken2_db' to directory with kraken2 database") + exit 1 + } + .view { "Local kraken2 database : $it" } + : Channel.empty() + + // Getting the mash reference + ch_mash_db = params.mash_db + ? Channel + .fromPath(params.mash_db, type: "file") + .ifEmpty{ + println("No mash database was found at ${params.mash_db}") + println("Set 'params.mash_db' to file of pre-sketched mash reference") + exit 1 + } + .view { "Mash reference : $it" } + : Channel.empty() + + //# user supplied fastani reference genomes + ch_fastani_genomes = Channel.empty() + + if ( params.fastani_ref ) { + Channel + .of( params.fastani_ref ) + .splitCsv() + .flatten() + // no meta id + .map { it -> file(it) } + .view{ "Additional fastani reference genomes : $it" } + .set { ch_fastani_genomes_input } + + ch_fastani_genomes = ch_fastani_genomes.mix(ch_fastani_genomes_input) + } + + if ( params.fastani_ref_list ) { + Channel.fromPath(params.fastani_ref_list, type: "file") + .splitText() + .map{ it -> it.trim()} + .map{ it -> file(it) } + .view{ "Additional fastani reference genome from file : $it" } + .set{ ch_fastani_ref_list } + + ch_fastani_genomes = ch_fastani_genomes.mix(ch_fastani_ref_list) + } + + println("The files and directory for results is " + params.outdir ) + + // getting test files + if ( ! params.sra_accessions.isEmpty() || ! params.genome_accessions.isEmpty() ) { + TEST( + ch_sra_accessions.ifEmpty([]), + ch_genome_accessions.ifEmpty([]) + ) + ch_reads = ch_reads.mix(TEST.out.fastq) + ch_fastas = ch_fastas.mix(TEST.out.fasta) + ch_versions = TEST.out.versions + } + + emit: + reads = ch_reads + fastas = ch_fastas + fastani_genomes = ch_fastani_genomes + versions = ch_versions + genome_sizes = ch_genome_sizes + mash_db = ch_mash_db + kraken2_db = ch_kraken2_db + blast_db = ch_blast_db + dataset_script = dataset_script + evaluat_script = evaluat_script + jsoncon_script = jsoncon_script + multiqc_script = multiqc_script + summary_script = summary_script + summfle_script = summfle_script + version_script = version_script +} \ No newline at end of file diff --git a/subworkflows/kmer_taxonomic_classification.nf b/subworkflows/local/kmer_taxonomic_classification.nf similarity index 50% rename from subworkflows/kmer_taxonomic_classification.nf rename to subworkflows/local/kmer_taxonomic_classification.nf index aca27e0..e527f52 100644 --- a/subworkflows/kmer_taxonomic_classification.nf +++ b/subworkflows/local/kmer_taxonomic_classification.nf @@ -1,14 +1,14 @@ -include { kraken2 } from '../modules/local/kraken2' addParams(params) +include { KRAKEN2 } from '../../modules/local/kraken2' -workflow kmer_taxonomic_classification { +workflow KMER_TAXONOMIC_CLASSIFICATION { take: ch_reads ch_kraken2_db - + main: - kraken2(ch_reads.combine(ch_kraken2_db)) + KRAKEN2(ch_reads.combine(ch_kraken2_db)) - kraken2.out.results + KRAKEN2.out.results .map { it -> it [1] } .collectFile( storeDir: "${params.outdir}/kraken2/", @@ -17,9 +17,11 @@ workflow kmer_taxonomic_classification { name: "kraken2_summary.csv") .set { summary } + ch_versions = KRAKEN2.out.versions.first() + emit: - for_flag = kraken2.out.results + for_flag = KRAKEN2.out.results for_summary = summary - for_multiqc = kraken2.out.for_multiqc - versions = kraken2.out.versions.first() + for_multiqc = KRAKEN2.out.for_multiqc + versions = ch_versions } diff --git a/subworkflows/local/min_hash.nf b/subworkflows/local/min_hash.nf new file mode 100644 index 0000000..6357396 --- /dev/null +++ b/subworkflows/local/min_hash.nf @@ -0,0 +1,46 @@ +include { MASH_SKETCH as SKETCH } from '../../modules/local/mash' +include { MASH_DIST as DIST } from '../../modules/local/mash' + +workflow MIN_HASH { + take: + ch_reads + ch_fastas + ch_mash_db + + main: + ch_versions = Channel.empty() + + SKETCH(ch_reads.mix(ch_fastas)) + + SKETCH.out.summary + .collectFile( + storeDir: "${params.outdir}/mash/", + keepHeader: true, + sort: { file -> file.text }, + name: "mash_err_summary.csv") + .set { mash_err_summary } + + ch_versions = ch_versions.mix(SKETCH.out.versions.first()) + + if (params.mash_db) { + DIST(SKETCH.out.msh.filter({it[1].size() > 0 }).combine(ch_mash_db)) + } else { + DIST(SKETCH.out.msh.filter({it[1].size() > 0 }).map{it -> tuple(it[0], it[1], null)}) + } + + DIST.out.results + .map { it -> it [1] } + .collectFile( + storeDir: "${params.outdir}/mash/", + keepHeader: true, + sort: { file -> file.text }, + name: "mash_summary.csv") + .set { mash_summary } + + ch_versions = ch_versions.mix(DIST.out.versions.first()) + + emit: + for_summary = mash_summary.mix(mash_err_summary) + for_flag = DIST.out.results + versions = ch_versions +} diff --git a/subworkflows/local/phylogenetic_analysis.nf b/subworkflows/local/phylogenetic_analysis.nf new file mode 100644 index 0000000..4d064f4 --- /dev/null +++ b/subworkflows/local/phylogenetic_analysis.nf @@ -0,0 +1,114 @@ +include { CORE_GENOME_EVALUATION } from '../../modules/local/local' +include { BAKTA } from '../../modules/local/bakta' +include { HEATCLUSTER } from '../../modules/local/heatcluster' +include { IQTREE2 } from '../../modules/local/iqtree2' +include { MASHTREE } from '../../modules/local/mashtree' +include { PANAROO } from '../../modules/local/panaroo' +include { PHYTREEVIZ } from '../../modules/local/phytreeviz' +include { PROKKA } from '../../modules/local/prokka' +include { ROARY } from '../../modules/local/roary' +include { SNPDISTS } from '../../modules/local/snp-dists' + +workflow PHYLOGENETIC_ANALYSIS { + take: + evaluat_script + ch_contigs + ch_top_hit + + main: + ch_versions = Channel.empty() + ch_multiqc = Channel.empty() + + // adding in organism and top ani hit + if ( ! params.skip_extras ) { + ch_organism = ch_top_hit.map { it -> if (it) { tuple( it[0] , [ it[1].split("_")[0], it[1].split("_")[1]] )}} + + if ( ! params.exclude_top_hit ) { + ch_top_hit + .map { it -> if (it) { tuple( it[1].split("_", 3)[2], it[2], it[1].split("_")[0, 1]) }} + .groupTuple(by: 0) + .map { it -> + if (it) { + def meta = [id:it[1][0].baseName] + tuple( meta, it[1][0], it[2][0] ) }} + .unique() + .set { ch_representative } + + ch_preannotation = ch_contigs.join( ch_organism, by: 0, remainder: true).mix(ch_representative) + } else { + ch_preannotation = ch_contigs.join( ch_organism, by: 0, remainder: true) + } + } else { + // skipping ani and top hit + ch_preannotation = ch_contigs.map{ it -> tuple(it[0], it[1], null)} + } + + if (params.annotator == 'prokka' ) { + PROKKA(ch_preannotation.unique()) + + ch_versions = ch_versions.mix(PROKKA.out.versions.first()) + ch_multiqc = ch_multiqc.mix(PROKKA.out.for_multiqc) + ch_gff = PROKKA.out.gff + } else if (params.annotator == 'bakta') { + BAKTA(ch_preannotation.unique()) + + ch_versions = ch_versions.mix(BAKTA.out.versions.first()) + ch_multiqc = ch_multiqc.mix(BAKTA.out.for_multiqc) + ch_gff = BAKTA.out.gff + + } else { + ch_gff = Channel.empty() + + } + + if (params.aligner == 'panaroo') { + PANAROO(ch_gff.unique().collect()) + + ch_core = PANAROO.out.core_gene_alignment + ch_versions = ch_versions.mix(PANAROO.out.versions) + + } else if (params.aligner == 'roary') { + ROARY(ch_gff.unique().collect()) + + ch_core = ROARY.out.core_gene_alignment + ch_versions = ch_versions.mix(ROARY.out.versions) + } else { + ch_core = Channel.empty() + } + + CORE_GENOME_EVALUATION(ch_core.combine(evaluat_script)) + + CORE_GENOME_EVALUATION.out.evaluation + .filter({it[1] as int >= 4}) + .filter({it[2] as int >= params.min_core_genes}) + .map { it -> it[0] } + .set{ ch_core_genome } + + ch_multiqc = ch_multiqc.mix(CORE_GENOME_EVALUATION.out.for_multiqc) + + // TODO : if channel doesn't go to to iqtree2, then send to mashtree + + // phylogenetic trees + MASHTREE(ch_preannotation.map{it -> if (it) { tuple( it[1]) }}.collect()) + ch_versions = ch_versions.mix(MASHTREE.out.versions) + + IQTREE2(ch_core_genome) + ch_versions = ch_versions.mix(IQTREE2.out.versions) + + PHYTREEVIZ(IQTREE2.out.newick.mix(MASHTREE.out.newick)) + ch_versions = ch_versions.mix(PHYTREEVIZ.out.versions.first()) + ch_multiqc = ch_multiqc.mix(PHYTREEVIZ.out.for_multiqc) + + // SNP matrix + SNPDISTS(CORE_GENOME_EVALUATION.out.evaluation.map{ it -> it[0] }) + ch_versions = ch_versions.mix(SNPDISTS.out.versions) + ch_multiqc = ch_multiqc.mix(SNPDISTS.out.snp_matrix) + + HEATCLUSTER(SNPDISTS.out.snp_matrix) + ch_versions = ch_versions.mix(HEATCLUSTER.out.versions) + ch_multiqc = ch_multiqc.mix(HEATCLUSTER.out.for_multiqc) + + emit: + for_multiqc = ch_multiqc + versions = ch_versions +} diff --git a/subworkflows/local/quality_assessment.nf b/subworkflows/local/quality_assessment.nf new file mode 100755 index 0000000..feaacb0 --- /dev/null +++ b/subworkflows/local/quality_assessment.nf @@ -0,0 +1,98 @@ +include { CIRCULOCOV } from '../../modules/local/circulocov' +include { FASTQC } from '../../modules/local/fastqc' +include { MLST } from '../../modules/local/mlst' +include { PLASMIDFINDER } from '../../modules/local/plasmidfinder' +include { QUAST } from '../../modules/local/quast' + +workflow QUALITY_ASSESSMENT { + take: + ch_reads + ch_contigs + ch_reads_contigs + summfle_script + + main: + for_multiqc = Channel.empty() + ch_versions = Channel.empty() + ch_summary = Channel.empty() + ch_bams = Channel.empty() + + // fastq files only + if ( params.sample_sheet || params.reads || params.sra_accessions ) { + FASTQC(ch_reads) + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + for_multiqc = for_multiqc.mix(FASTQC.out.for_multiqc) + + + FASTQC.out.collect + .collectFile(name: "fastqc_summary.csv", + keepHeader: true, + sort: { file -> file.text }, + storeDir: "${params.outdir}/fastqc") + .set{ fastqc_summary } + + ch_summary = ch_summary.mix(fastqc_summary) + + CIRCULOCOV(ch_reads_contigs.filter{it[1]}.filter{it[2]}) + ch_versions = ch_versions.mix(CIRCULOCOV.out.versions.first()) + + CIRCULOCOV.out.collect + .collectFile(name: "circulocov_summary.tsv", + keepHeader: true, + sort: { file -> file.text }, + storeDir: "${params.outdir}/circulocov") + .set{ circulocov_summary } + + ch_summary = ch_summary.mix(circulocov_summary) + ch_bams = ch_bams.mix(CIRCULOCOV.out.contig_bam) + } + + // contigs + QUAST(ch_reads_contigs) + ch_versions = ch_versions.mix(QUAST.out.versions.first()) + + QUAST.out.collect + .collectFile(name: "quast_report.tsv", + keepHeader: true, + sort: { file -> file.text }, + storeDir: "${params.outdir}/quast") + .set{ quast_summary } + + ch_summary = ch_summary.mix(quast_summary) + + QUAST.out.collect_contig + .collectFile(name: "quast_contig_report.tsv", + keepHeader: true, + sort: { file -> file.text }, + storeDir: "${params.outdir}/quast") + .set{ quast_contig_summary } + ch_summary = ch_summary.mix(quast_contig_summary) + + MLST(ch_contigs.combine(summfle_script)) + ch_versions = ch_versions.mix(MLST.out.versions.first()) + + MLST.out.collect + .collectFile(name: "mlst_summary.tsv", + keepHeader: true, + sort: { file -> file.text }, + storeDir: "${params.outdir}/mlst") + .set{ mlst_summary } + ch_summary = ch_summary.mix(mlst_summary) + + PLASMIDFINDER(ch_contigs.combine(summfle_script)) + ch_versions = ch_versions.mix(PLASMIDFINDER.out.versions.first()) + + PLASMIDFINDER.out.collect + .collectFile(name: "plasmidfinder_result.tsv", + keepHeader: true, + sort: { file -> file.text }, + storeDir: "${params.outdir}/plasmidfinder") + .set{ plasmidfinder_summary } + ch_summary = ch_summary.mix(plasmidfinder_summary) + + emit: + bams = ch_bams + for_summary = ch_summary.collect() + for_multiqc = for_multiqc.mix(QUAST.out.for_multiqc).collect() + versions = ch_versions +} diff --git a/subworkflows/local/report.nf b/subworkflows/local/report.nf new file mode 100644 index 0000000..24dbd5a --- /dev/null +++ b/subworkflows/local/report.nf @@ -0,0 +1,44 @@ +include { NAMES } from '../../modules/local/local' +include { MQC_PREP } from '../../modules/local/local' +include { MULTIQC } from '../../modules/local/multiqc' +include { SUMMARY } from '../../modules/local/local' +include { VERSIONS } from '../../modules/local/multiqc' + +workflow REPORT { + take: + ch_reads + ch_fastas + for_multiqc + for_summary + ch_versions + multiqc_script + version_script + + main: + ch_versions + .collectFile( + keepHeader: false, + name: "versions.yml") + .set { ch_collated_versions } + + VERSIONS(ch_collated_versions, version_script) + + MQC_PREP(for_multiqc.mix(for_summary).collect(), multiqc_script) + + MULTIQC(for_multiqc.mix(for_summary).mix(MQC_PREP.out.for_multiqc).mix(VERSIONS.out.for_multiqc).collect()) + + NAMES(ch_reads.mix(ch_fastas)) + + NAMES.out.collect + .collectFile( + keepHeader: true, + sort: { file -> file.text }, + name: "input_files.csv") + .set { ch_names } + + SUMMARY(for_summary.mix(ch_names).mix(MULTIQC.out.data_folder).collect()) + + emit: + summary = SUMMARY.out.extended_tsv + versions = ch_versions +} \ No newline at end of file diff --git a/subworkflows/local/test.nf b/subworkflows/local/test.nf new file mode 100644 index 0000000..079786d --- /dev/null +++ b/subworkflows/local/test.nf @@ -0,0 +1,45 @@ +include { ENA_DOWNLOAD as DOWNLOAD_FASTQ } from '../../modules/local/ena' +include { DATASETS_DOWNLOAD as DOWNLOAD_GENOME } from '../../modules/local/datasets' + +workflow TEST { + take: + ch_sra_accessions + ch_genome_accessions + + main: + ch_versions = Channel.empty() + + if ( ! params.sra_accessions.isEmpty() || ! params.genome_accessions.isEmpty() ) { + DOWNLOAD_FASTQ(ch_sra_accessions.filter({it[0]})) + ch_versions = ch_versions.mix(DOWNLOAD_FASTQ.out.versions.first()) + + DOWNLOAD_FASTQ.out.fastq + .map { it -> + def meta = [id:it[0]] + tuple( meta, [file(it[1][0]), file(it[1][1])]) + } + .set { ch_fastq } + } else { + ch_fastq = Channel.empty() + } + + if ( ! params.genome_accessions.isEmpty() ) { + DOWNLOAD_GENOME(ch_genome_accessions.collectFile(name: 'ids.csv', newLine: true)) + ch_versions = ch_versions.mix(DOWNLOAD_GENOME.out.versions.first()) + + DOWNLOAD_GENOME.out.genomes + .flatten() + .map { it -> + def meta = [id:it.baseName] + tuple( meta, it) + } + .set { ch_fasta } + } else { + ch_fasta = Channel.empty() + } + + emit: + fastq = ch_fastq + fasta = ch_fasta + versions = ch_versions +} \ No newline at end of file diff --git a/subworkflows/min_hash.nf b/subworkflows/min_hash.nf deleted file mode 100644 index bf02302..0000000 --- a/subworkflows/min_hash.nf +++ /dev/null @@ -1,64 +0,0 @@ -include { mash_sketch_fastq } from '../modules/local/mash' addParams(params) -include { mash_sketch_fasta } from '../modules/local/mash' addParams(params) -include { mash_dist } from '../modules/local/mash' addParams(params) -include { mash_err } from '../modules/local/local' addParams(params) -//include { mash_screen } from '../modules/local/mash' addParams(params) - -workflow min_hash { - take: - ch_reads - ch_fastas - ch_mash_db - - main: - ch_mash_sketches = Channel.empty() - ch_versions = Channel.empty() - - if ( params.sample_sheet || params.reads || params.sra_accessions ) { - mash_sketch_fastq(ch_reads) - - mash_err(mash_sketch_fastq.out.err) - - ch_mash_sketches = ch_mash_sketches.mix(mash_sketch_fastq.out.msh.filter({it[1].size() > 0 })) - - mash_err.out.summary - .collectFile( - storeDir: "${params.outdir}/mash/", - keepHeader: true, - sort: { file -> file.text }, - name: "mash_err_summary.csv") - .set { mash_err_summary } - - ch_versions = ch_versions.mix(mash_sketch_fastq.out.versions.first()) - } else { - mash_err_summary = Channel.empty() - } - - if ( params.fastas || params.fasta_list ) { - mash_sketch_fasta(ch_fastas) - ch_mash_sketches = ch_mash_sketches.mix(mash_sketch_fasta.out.msh.filter({it[1].size() > 0 })) - ch_versions = ch_versions.mix(mash_sketch_fasta.out.versions.first()) - } - - if (params.mash_db) { - mash_dist(ch_mash_sketches.combine(ch_mash_db)) - } else { - mash_dist(ch_mash_sketches.map{it -> tuple(it[0], it[1], null)}) - } - - mash_dist.out.results - .map { it -> it [1] } - .collectFile( - storeDir: "${params.outdir}/mash/", - keepHeader: true, - sort: { file -> file.text }, - name: "mash_summary.csv") - .set { mash_summary } - - ch_versions = ch_versions.mix(mash_dist.out.versions.first()) - - emit: - for_summary = mash_summary.mix(mash_err_summary) - for_flag = mash_dist.out.results - versions = ch_versions -} diff --git a/subworkflows/phylogenetic_analysis.nf b/subworkflows/phylogenetic_analysis.nf deleted file mode 100644 index 34c381e..0000000 --- a/subworkflows/phylogenetic_analysis.nf +++ /dev/null @@ -1,86 +0,0 @@ -include { core_genome_evaluation } from '../modules/local/local' addParams(params) -include { heatcluster } from '../modules/local/heatcluster' addParams(params) -include { iqtree2 } from '../modules/local/iqtree2' addParams(params) -include { mashtree } from '../modules/local/mashtree' addParams(params) -include { panaroo } from '../modules/local/panaroo' addParams(params) -include { phytreeviz } from '../modules/local/phytreeviz' addParams(params) -include { prokka } from '../modules/local/prokka' addParams(params) -include { roary } from '../modules/local/roary' addParams(params) -include { snp_dists } from '../modules/local/snp-dists' addParams(params) - -workflow phylogenetic_analysis { - take: - evaluat_script - ch_contigs - ch_top_hit - - main: - ch_versions = Channel.empty() - - // adding in organism and top ani hit - if ( ! params.skip_extras ) { - ch_organism = ch_top_hit.map { it -> if (it) { tuple( it[0] , [ it[1].split("_")[0], it[1].split("_")[1]] )}} - - if ( ! params.exclude_top_hit ) { - ch_top_hit - .map { it -> if (it) { tuple( it[1].split("_", 3)[2], it[2], it[1].split("_")[0, 1]) }} - .groupTuple(by: 0) - .map { it -> - if (it) { - meta = [id:it[1][0].baseName] - tuple( meta, it[1][0], it[2][0] ) }} - .unique() - .set { ch_representative } - - for_prokka = ch_contigs.join( ch_organism, by: 0, remainder: true).mix(ch_representative) - } else { - for_prokka = ch_contigs.join( ch_organism, by: 0, remainder: true) - } - } else { - // skipping ani and top hit - ch_organism = Channel.empty() - for_prokka = ch_contigs.map{ it -> tuple(it[0], it[1], null)} - } - - prokka(for_prokka.unique()) - - if (params.aligner == 'panaroo') { - panaroo(prokka.out.gff.unique().collect()) - - ch_core = panaroo.out.core_gene_alignment - ch_versions = ch_versions.mix(panaroo.out.versions) - - } else if (params.aligner == 'roary') { - roary(prokka.out.gff.unique().collect()) - - ch_core = roary.out.core_gene_alignment - ch_versions = ch_versions.mix(roary.out.versions) - - } else { - ch_core = Channel.empty() - } - - - core_genome_evaluation(ch_core.combine(evaluat_script)) - - core_genome_evaluation.out.evaluation - .filter({it[1] as int >= 4}) - .filter({it[2] as int >= params.min_core_genes}) - .map ( it -> it[0] ) - .set{ ch_core_genome } - - // TODO : if channel doesn't go to to iqtree2, then send to mashtree - - // phylogenetic trees - mashtree(for_prokka.map{it -> if (it) { tuple( it[1]) }}.collect()) - iqtree2(ch_core_genome) - phytreeviz(iqtree2.out.newick.mix(mashtree.out.newick)) - - // SNP matrix - snp_dists(core_genome_evaluation.out.evaluation.map( it -> it[0] )) - heatcluster(snp_dists.out.snp_matrix) - - emit: - for_multiqc = prokka.out.for_multiqc.mix(snp_dists.out.snp_matrix).mix(heatcluster.out.for_multiqc).mix(phytreeviz.out.for_multiqc).mix(core_genome_evaluation.out.for_multiqc) - versions = ch_versions.mix(prokka.out.versions.first()).mix(mashtree.out.versions).mix(iqtree2.out.versions).mix(phytreeviz.out.versions.first()).mix(snp_dists.out.versions).mix(heatcluster.out.versions) -} diff --git a/subworkflows/quality_assessment.nf b/subworkflows/quality_assessment.nf deleted file mode 100755 index daa65df..0000000 --- a/subworkflows/quality_assessment.nf +++ /dev/null @@ -1,98 +0,0 @@ -include { circulocov } from '../modules/local/circulocov' addParams(params) -include { fastqc } from '../modules/local/fastqc' addParams(params) -include { mlst } from '../modules/local/mlst' addParams(params) -include { plasmidfinder } from '../modules/local/plasmidfinder' addParams(params) -include { quast } from '../modules/local/quast' addParams(params) - -workflow quality_assessment { - take: - ch_reads - ch_contigs - ch_reads_contigs - summfle_script - - main: - for_multiqc = Channel.empty() - ch_versions = Channel.empty() - ch_summary = Channel.empty() - ch_bams = Channel.empty() - - // fastq files only - if ( params.sample_sheet || params.reads || params.sra_accessions ) { - fastqc(ch_reads) - - circulocov(ch_reads_contigs.filter{it[1]}.filter{it[2]}) - - for_multiqc = for_multiqc.mix(fastqc.out.for_multiqc) - - fastqc.out.collect - .collectFile(name: "fastqc_summary.csv", - keepHeader: true, - sort: { file -> file.text }, - storeDir: "${params.outdir}/fastqc") - .set{ fastqc_summary } - - circulocov.out.collect - .collectFile(name: "circulocov_summary.tsv", - keepHeader: true, - sort: { file -> file.text }, - storeDir: "${params.outdir}/circulocov") - .set{ circulocov_summary } - - ch_summary = ch_summary.mix(circulocov_summary).mix(fastqc_summary) - ch_versions = ch_versions.mix(fastqc.out.versions.first()).mix(circulocov.out.versions.first()) - ch_bams = ch_bams.mix(circulocov.out.contig_bam) - } - - // contigs - quast(ch_reads_contigs.filter{it[2]}) - mlst(ch_contigs.combine(summfle_script)) - plasmidfinder(ch_contigs.combine(summfle_script)) - - mlst.out.collect - .collectFile(name: "mlst_summary.tsv", - keepHeader: true, - sort: { file -> file.text }, - storeDir: "${params.outdir}/mlst") - .set{ mlst_summary } - - plasmidfinder.out.collect - .collectFile(name: "plasmidfinder_result.tsv", - keepHeader: true, - sort: { file -> file.text }, - storeDir: "${params.outdir}/plasmidfinder") - .set{ plasmidfinder_summary } - - quast.out.collect - .collectFile(name: "quast_report.tsv", - keepHeader: true, - sort: { file -> file.text }, - storeDir: "${params.outdir}/quast") - .set{ quast_summary } - - quast.out.collect_contig - .collectFile(name: "quast_contig_report.tsv", - keepHeader: true, - sort: { file -> file.text }, - storeDir: "${params.outdir}/quast") - .set{ quast_contig_summary } - - ch_summary - .mix(mlst_summary) - .mix(plasmidfinder_summary) - .mix(quast_summary) - .mix(quast_contig_summary) - .set { for_summary } - - ch_versions - .mix(mlst.out.versions.first()) - .mix(plasmidfinder.out.versions.first()) - .mix(quast.out.versions.first()) - .set { for_versions } - - emit: - bams = ch_bams - for_summary = for_summary.collect() - for_multiqc = for_multiqc.mix(quast.out.for_multiqc).collect() - versions = for_versions -} diff --git a/subworkflows/report.nf b/subworkflows/report.nf deleted file mode 100644 index 91a8f69..0000000 --- a/subworkflows/report.nf +++ /dev/null @@ -1,41 +0,0 @@ -include { names } from '../modules/local/local' addParams(params) -include { mqc_prep } from '../modules/local/local' addParams(params) -include { multiqc } from '../modules/local/multiqc' addParams(params) -include { summary } from '../modules/local/local' addParams(params) -include { versions } from '../modules/local/multiqc' addParams(params) - -workflow report { - take: - ch_reads - ch_fastas - for_multiqc - for_summary - ch_versions - multiqc_script - version_script - - main: - ch_versions - .collectFile( - keepHeader: false, - name: "versions.yml") - .set { ch_collated_versions } - - versions(ch_collated_versions, version_script) - - mqc_prep(for_multiqc.mix(for_summary).collect(), multiqc_script) - - multiqc(for_multiqc.mix(for_summary).mix(mqc_prep.out.for_multiqc).mix(versions.out.for_multiqc).collect()) - - names(ch_reads.mix(ch_fastas)) - - names.out.collect - .collectFile( - storeDir: "${params.outdir}/summary/", - keepHeader: true, - sort: { file -> file.text }, - name: "input_files.csv") - .set { ch_names } - - summary(for_summary.mix(ch_names).mix(multiqc.out.data_folder).collect()) -} \ No newline at end of file diff --git a/subworkflows/test.nf b/subworkflows/test.nf deleted file mode 100644 index fec5ed5..0000000 --- a/subworkflows/test.nf +++ /dev/null @@ -1,18 +0,0 @@ -include { download_sra } from '../modules/local/local' addParams(params) - -workflow test { - take: - ch_accessions - - main: - download_sra(ch_accessions) - - download_sra.out.fastq - .map { it -> - meta = [id:it[0]] - tuple( meta, [file(it[1][0]), file(it[1][1])])} - .set { ch_fastq } - - emit: - fastq = ch_fastq -} \ No newline at end of file diff --git a/workflows/grandeur.nf b/workflows/grandeur.nf new file mode 100755 index 0000000..3c56f19 --- /dev/null +++ b/workflows/grandeur.nf @@ -0,0 +1,139 @@ +include { AVERAGE_NUCLEOTIDE_IDENTITY } from "../subworkflows/local/average_nucleotide_identity" +include { BLOBTOOLS } from "../subworkflows/local/blobtools" +include { DE_NOVO_ALIGNMENT } from "../subworkflows/local/de_novo_alignment" +include { INFO } from "../subworkflows/local/info" +include { KMER_TAXONOMIC_CLASSIFICATION } from "../subworkflows/local/kmer_taxonomic_classification" +include { MIN_HASH } from "../subworkflows/local/min_hash" +include { PHYLOGENETIC_ANALYSIS } from "../subworkflows/local/phylogenetic_analysis" +include { QUALITY_ASSESSMENT } from "../subworkflows/local/quality_assessment" +include { REPORT } from "../subworkflows/local/report" + +workflow GRANDEUR { + take: + ch_raw_reads + ch_fastas + ch_fastani_genomes + ch_versions + ch_genome_sizes + ch_mash_db + ch_kraken2_db + ch_blast_db + dataset_script + evaluat_script + jsoncon_script + multiqc_script + summary_script + summfle_script + version_script + + main: + ch_for_multiqc = Channel.empty() + ch_for_summary = ch_genome_sizes + ch_for_flag = Channel.empty() + ch_versions = Channel.empty() + ch_reads_contigs = ch_fastas.map{it -> tuple(it[0], it[1], null)} + + + if ( params.sample_sheet || params.reads || params.sra_accessions ) { + DE_NOVO_ALIGNMENT(ch_raw_reads) + + ch_assembled = DE_NOVO_ALIGNMENT.out.contigs + ch_contigs = ch_fastas.mix(DE_NOVO_ALIGNMENT.out.contigs) + ch_reads_contigs = ch_reads_contigs.mix(DE_NOVO_ALIGNMENT.out.reads_contigs) + ch_clean_reads = DE_NOVO_ALIGNMENT.out.clean_reads + ch_for_multiqc = ch_for_multiqc.mix(DE_NOVO_ALIGNMENT.out.for_multiqc) + ch_versions = ch_versions.mix(DE_NOVO_ALIGNMENT.out.versions) + + } else { + ch_contigs = ch_fastas + ch_clean_reads = Channel.empty() + ch_assembled = Channel.empty() + } + + // getting a summary of everything + if ( ! params.skip_extras ) { + QUALITY_ASSESSMENT( + ch_raw_reads, + ch_contigs, + ch_reads_contigs, + summfle_script) + + ch_for_multiqc = ch_for_multiqc.mix(QUALITY_ASSESSMENT.out.for_multiqc) + ch_for_summary = ch_for_summary.mix(QUALITY_ASSESSMENT.out.for_summary) + ch_versions = ch_versions.mix(QUALITY_ASSESSMENT.out.versions) + + + // optional subworkflow blobtools (useful for interspecies contamination) + if ( params.blast_db && ( params.sample_sheet || params.reads || params.sra_accessions )) { + BLOBTOOLS(QUALITY_ASSESSMENT.out.bams, ch_blast_db ) + + ch_for_summary = ch_for_summary.mix(BLOBTOOLS.out.for_summary) + ch_for_flag = ch_for_flag.mix(BLOBTOOLS.out.for_flag) + ch_versions = ch_versions.mix(BLOBTOOLS.out.versions) + } + + // optional subworkflow kraken2 (useful for interspecies contamination) + if ( params.kraken2_db && ( params.sample_sheet || params.reads || params.sra_accessions )) { + KMER_TAXONOMIC_CLASSIFICATION(ch_clean_reads, ch_kraken2_db ) + + ch_for_multiqc = ch_for_multiqc.mix(KMER_TAXONOMIC_CLASSIFICATION.out.for_multiqc) + ch_for_summary = ch_for_summary.mix(KMER_TAXONOMIC_CLASSIFICATION.out.for_summary) + ch_for_flag = ch_for_flag.mix(KMER_TAXONOMIC_CLASSIFICATION.out.for_flag) + ch_versions = ch_versions.mix(KMER_TAXONOMIC_CLASSIFICATION.out.versions) + } + + // subworkflow mash for species determination + MIN_HASH(ch_clean_reads, ch_fastas, ch_mash_db) + ch_versions = ch_versions.mix(MIN_HASH.out.versions) + ch_for_summary = ch_for_summary.mix(MIN_HASH.out.for_summary) + + // determining organisms in sample + AVERAGE_NUCLEOTIDE_IDENTITY( + ch_for_summary.collect(), + ch_contigs, + ch_fastani_genomes.ifEmpty([]), + dataset_script) + + ch_versions = ch_versions.mix(AVERAGE_NUCLEOTIDE_IDENTITY.out.versions) + ch_for_flag = ch_for_flag.mix(AVERAGE_NUCLEOTIDE_IDENTITY.out.for_flag).mix(MIN_HASH.out.for_flag) + ch_top_hit = AVERAGE_NUCLEOTIDE_IDENTITY.out.top_hit + ch_for_summary = ch_for_summary.mix(AVERAGE_NUCLEOTIDE_IDENTITY.out.for_summary) + + + // getting all the other information + INFO( + ch_contigs, + ch_for_flag, + summfle_script, + jsoncon_script) + + ch_for_summary = ch_for_summary.mix(INFO.out.for_summary) + ch_versions = ch_versions.mix(INFO.out.versions) + } else { + ch_top_hit = Channel.empty() + } + + // optional subworkflow for comparing shared genes + if ( params.msa ) { + PHYLOGENETIC_ANALYSIS( + evaluat_script, + ch_contigs.ifEmpty([]), + ch_top_hit.ifEmpty([])) + + ch_for_multiqc = ch_for_multiqc.mix(PHYLOGENETIC_ANALYSIS.out.for_multiqc) + ch_versions = ch_versions.mix(PHYLOGENETIC_ANALYSIS.out.versions) + } + + // getting a summary of everything + if ( ! params.skip_extras ) { + REPORT( + ch_raw_reads, + ch_fastas, + ch_for_multiqc.collect(), + ch_for_summary.concat(summary_script).collect(), + ch_versions.collect(), + multiqc_script, + version_script + ) + } +} \ No newline at end of file