From 393928587d5ea47fcc0c614f2363d6a6700f2233 Mon Sep 17 00:00:00 2001 From: replikation Date: Tue, 3 Aug 2021 16:17:32 +0200 Subject: [PATCH] covarplot fix, min depth parameter inclusion --- nextflow.config | 1 + poreCov.nf | 3 ++- workflows/process/artic.nf | 26 +++++++++++++++++---- workflows/process/covarplot.nf | 7 +++--- workflows/process/filter_fastq_by_length.nf | 2 +- workflows/process/kraken2.nf | 2 +- 6 files changed, 30 insertions(+), 11 deletions(-) diff --git a/nextflow.config b/nextflow.config index 37ea938..5cb7f6c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -24,6 +24,7 @@ params { reference_for_qc = '' seq_threshold = '0.90' n_threshold = '0.05' + min_depth = '20' // settings buildDB = false diff --git a/poreCov.nf b/poreCov.nf index 7356909..be1682d 100755 --- a/poreCov.nf +++ b/poreCov.nf @@ -428,6 +428,7 @@ ${c_yellow}Parameters - nCov genome reconstruction${c_reset} --rapid use rapid-barcoding-kit [default: ${params.rapid}] --minLength min length filter raw reads [default: 350 (primer-scheme: V1-3); 500 (primer-scheme: V1200)] --maxLength max length filter raw reads [default: 700 (primer-scheme: V1-3); 1500 (primer-scheme: V1200)] + --min_depth nucleotides below min depth will be masked to "N" [default ${params.min_depth}] --medaka_model medaka model for the artic workflow [default: ${params.medaka_model}] ${c_yellow}Parameters - Genome quality control${c_reset} @@ -517,7 +518,7 @@ def rki() { RKI output for german DESH upload: \033[2mOutput stored at: $params.output/$params.rkidir Min Identity to NC_045512.2: $params.seq_threshold [--seq_threshold] - Min Coverage: 20 [ no parameter] + Min Depth: $params.min_depth [--min_depth] Proportion cutoff N: $params.n_threshold [--n_threshold]\u001B[0m \u001B[1;30m______________________________________\033[0m """.stripIndent() diff --git a/workflows/process/artic.nf b/workflows/process/artic.nf index 316ae02..18d7015 100755 --- a/workflows/process/artic.nf +++ b/workflows/process/artic.nf @@ -10,12 +10,24 @@ process artic_medaka { tuple val(name), path("*.consensus.fasta"), emit: fasta tuple val(name), path("${name}_mapped_*.primertrimmed.sorted.bam"), path("${name}_mapped_*.primertrimmed.sorted.bam.bai"), emit: reference_bam tuple val(name), path("SNP_${name}.pass.vcf"), emit: vcf - //tuple val(name), path("${name}.pass.vcf.gz"), path("${name}.coverage_mask.txt.nCoV-2019_1.depths"), path("${name}.coverage_mask.txt.nCoV-2019_2.depths"), emit: covarplot - tuple val(name), path("${name}.pass.vcf.gz"), emit: covarplot + tuple val(name), path("${name}.pass.vcf.gz"), path("${name}.coverage_mask.txt.nCoV-2019_1.depths"), path("${name}.coverage_mask.txt.nCoV-2019_2.depths"), emit: covarplot script: """ - artic minion --medaka --medaka-model ${params.medaka_model} --normalise 500 --threads ${task.cpus} --scheme-directory ${external_scheme} \ - --read-file ${reads} nCoV-2019/${params.primerV} ${name} + artic minion --medaka \ + --medaka-model ${params.medaka_model} \ + --min-depth ${params.min_depth} \ + --normalise 500 \ + --threads ${task.cpus} \ + --scheme-directory ${external_scheme} \ + --read-file ${reads} \ + nCoV-2019/${params.primerV} ${name} + + # generate depth files + artic_make_depth_mask --depth ${params.min_depth} \ + --store-rg-depths ${external_scheme}/nCoV-2019/${params.primerV}/nCoV-2019.reference.fasta \ + ${name}.primertrimmed.rg.sorted.bam \ + ${name}.coverage_mask.txt + zcat ${name}.pass.vcf.gz > SNP_${name}.pass.vcf sed -i "1s/.*/>${name}/" *.consensus.fasta @@ -60,6 +72,12 @@ process artic_nanopolish { --sequencing-summary sequencing_summary*.txt \ nCoV-2019/${params.primerV} ${name} + # generate depth files + artic_make_depth_mask --depth ${params.min_depth} \ + --store-rg-depths ${external_scheme}/nCoV-2019/${params.primerV}/nCoV-2019.reference.fasta \ + ${name}.primertrimmed.rg.sorted.bam \ + ${name}.coverage_mask.txt + zcat ${name}.pass.vcf.gz > SNP_${name}.pass.vcf sed -i "1s/.*/>${name}/" *.consensus.fasta diff --git a/workflows/process/covarplot.nf b/workflows/process/covarplot.nf index 8c2e477..50971f3 100755 --- a/workflows/process/covarplot.nf +++ b/workflows/process/covarplot.nf @@ -2,15 +2,14 @@ process covarplot { label "covarplot" publishDir "${params.output}/${params.genomedir}/${name}/", mode: 'copy' input: - //tuple val(name), path(vcf), path(depth1), path(depth2), path(primer_bed) - tuple val(name), path(vcf), path(primer_bed) + tuple val(name), path(vcf), path(depth1), path(depth2), path(primer_bed) output: tuple val(name), path("${name}_amplicon_coverage*.png") script: """ - covarplot.py -v ${vcf} -b ${primer_bed}/nCoV-2019/${params.primerV}/nCoV-2019.scheme.bed -s . + covarplot.py -v ${vcf} -d1 ${depth1} -d2 ${depth2} -b ${primer_bed}/nCoV-2019/${params.primerV}/nCoV-2019.scheme.bed -s . mv ${name}.CoVarPlot.png ${name}_amplicon_coverage.png - covarplot.py -v ${vcf} -b ${primer_bed}/nCoV-2019/${params.primerV}/nCoV-2019.scheme.bed -s . --log + covarplot.py -v ${vcf} -d1 ${depth1} -d2 ${depth2} -b ${primer_bed}/nCoV-2019/${params.primerV}/nCoV-2019.scheme.bed -s . --log mv ${name}.CoVarPlot.png ${name}_amplicon_coverage_log.png """ stub: diff --git a/workflows/process/filter_fastq_by_length.nf b/workflows/process/filter_fastq_by_length.nf index f0300d6..a6b951c 100644 --- a/workflows/process/filter_fastq_by_length.nf +++ b/workflows/process/filter_fastq_by_length.nf @@ -1,6 +1,6 @@ process filter_fastq_by_length { label 'ubuntu' - publishDir "${params.output}/${params.readsdir}/2.filtered_reads/", mode: 'copy', pattern: "${name}_filtered.fastq.gz" + publishDir "${params.output}/${params.readsdir}/filtered_reads/", mode: 'copy', pattern: "${name}_filtered.fastq.gz" input: tuple val(name), path(reads) output: diff --git a/workflows/process/kraken2.nf b/workflows/process/kraken2.nf index 49b928b..5c79409 100644 --- a/workflows/process/kraken2.nf +++ b/workflows/process/kraken2.nf @@ -1,6 +1,6 @@ process kraken2 { label 'kraken2' - publishDir "${params.output}/${params.readqcdir}/1.read_classification", mode: 'copy' + publishDir "${params.output}/${params.readqcdir}/${name}/tax_read_classification", mode: 'copy' input: tuple val(name), path(reads) path(database)