From a3db3ec5603f51928ee3eceb68e9e54c495926de Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 20 Dec 2023 16:08:45 +0000 Subject: [PATCH 01/30] Start adding preprocessing components from rnaseq --- assets/rrna-db-defaults.txt | 8 + assets/schema_input.json | 18 +- bin/filter_gtf.py | 73 +++ bin/gtf2bed | 122 +++++ conf/modules.config | 22 + conf/test.config | 8 +- lib/WorkflowRiboseq.groovy | 364 ++++++++++++- modules.json | 94 ++++ modules/local/cat_additional_fasta/main.nf | 42 ++ modules/local/gtf2bed/main.nf | 31 ++ modules/local/gtf_filter/main.nf | 32 ++ .../main.nf | 31 ++ .../star_genomegenerate_igenomes/main.nf | 68 +++ modules/nf-core/bbmap/bbsplit/environment.yml | 7 + modules/nf-core/bbmap/bbsplit/main.nf | 89 +++ modules/nf-core/bbmap/bbsplit/meta.yml | 74 +++ modules/nf-core/cat/fastq/environment.yml | 7 + modules/nf-core/cat/fastq/main.nf | 80 +++ modules/nf-core/cat/fastq/meta.yml | 42 ++ modules/nf-core/cat/fastq/tests/main.nf.test | 143 +++++ .../nf-core/cat/fastq/tests/main.nf.test.snap | 78 +++ modules/nf-core/cat/fastq/tests/tags.yml | 2 + .../custom/getchromsizes/environment.yml | 7 + modules/nf-core/custom/getchromsizes/main.nf | 44 ++ modules/nf-core/custom/getchromsizes/meta.yml | 54 ++ .../custom/getchromsizes/tests/main.nf.test | 60 ++ .../getchromsizes/tests/main.nf.test.snap | 124 +++++ .../custom/getchromsizes/tests/tags.yml | 2 + modules/nf-core/cutadapt/environment.yml | 7 + modules/nf-core/cutadapt/main.nf | 50 ++ modules/nf-core/cutadapt/meta.yml | 49 ++ modules/nf-core/cutadapt/tests/main.nf.test | 69 +++ .../nf-core/cutadapt/tests/main.nf.test.snap | 22 + .../nf-core/cutadapt/tests/nextflow.config | 7 + modules/nf-core/cutadapt/tests/tags.yml | 2 + modules/nf-core/fastp/environment.yml | 7 + modules/nf-core/fastp/main.nf | 102 ++++ modules/nf-core/fastp/meta.yml | 75 +++ modules/nf-core/fastp/tests/main.nf.test | 485 +++++++++++++++++ modules/nf-core/fastp/tests/main.nf.test.snap | 52 ++ modules/nf-core/fastp/tests/nextflow.config | 6 + modules/nf-core/fastp/tests/tags.yml | 2 + modules/nf-core/fq/subsample/environment.yml | 7 + modules/nf-core/fq/subsample/main.nf | 55 ++ modules/nf-core/fq/subsample/meta.yml | 40 ++ .../nf-core/fq/subsample/tests/main.nf.test | 139 +++++ .../fq/subsample/tests/main.nf.test.snap | 145 +++++ .../fq/subsample/tests/nextflow.config | 7 + .../subsample/tests/nextflow_no_args.config | 7 + .../tests/nextflow_probability.config | 7 + .../tests/nextflow_record_count.config | 7 + modules/nf-core/fq/subsample/tests/tags.yml | 2 + modules/nf-core/gffread/environment.yml | 7 + modules/nf-core/gffread/main.nf | 35 ++ modules/nf-core/gffread/meta.yml | 36 ++ modules/nf-core/gffread/tests/main.nf.test | 61 +++ .../nf-core/gffread/tests/main.nf.test.snap | 52 ++ modules/nf-core/gffread/tests/nextflow.config | 5 + modules/nf-core/gffread/tests/tags.yml | 2 + modules/nf-core/gunzip/environment.yml | 7 + modules/nf-core/gunzip/main.nf | 48 ++ modules/nf-core/gunzip/meta.yml | 39 ++ modules/nf-core/gunzip/tests/main.nf.test | 35 ++ .../nf-core/gunzip/tests/main.nf.test.snap | 31 ++ modules/nf-core/gunzip/tests/tags.yml | 2 + .../nf-core/kallisto/index/environment.yml | 7 + modules/nf-core/kallisto/index/main.nf | 44 ++ modules/nf-core/kallisto/index/meta.yml | 41 ++ .../nf-core/kallisto/index/tests/main.nf.test | 33 ++ .../kallisto/index/tests/main.nf.test.snap | 31 ++ modules/nf-core/kallisto/index/tests/tags.yml | 2 + .../rsem/preparereference/environment.yml | 8 + modules/nf-core/rsem/preparereference/main.nf | 72 +++ .../nf-core/rsem/preparereference/meta.yml | 42 ++ .../rsem/preparereference/tests/main.nf.test | 36 ++ .../preparereference/tests/main.nf.test.snap | 35 ++ .../rsem/preparereference/tests/tags.yml | 2 + modules/nf-core/salmon/index/environment.yml | 7 + modules/nf-core/salmon/index/main.nf | 47 ++ modules/nf-core/salmon/index/meta.yml | 37 ++ .../nf-core/salmon/index/tests/main.nf.test | 35 ++ .../salmon/index/tests/main.nf.test.snap | 10 + modules/nf-core/salmon/index/tests/tags.yml | 2 + modules/nf-core/salmon/quant/environment.yml | 7 + modules/nf-core/salmon/quant/main.nf | 79 +++ modules/nf-core/salmon/quant/meta.yml | 62 +++ .../nf-core/salmon/quant/tests/main.nf.test | 192 +++++++ .../salmon/quant/tests/main.nf.test.snap | 10 + .../salmon/quant/tests/nextflow.config | 7 + modules/nf-core/salmon/quant/tests/tags.yml | 2 + modules/nf-core/sortmerna/environment.yml | 7 + modules/nf-core/sortmerna/main.nf | 70 +++ modules/nf-core/sortmerna/meta.yml | 53 ++ modules/nf-core/sortmerna/tests/main.nf.test | 59 ++ .../nf-core/sortmerna/tests/main.nf.test.snap | 49 ++ modules/nf-core/sortmerna/tests/tags.yml | 2 + .../star/genomegenerate/environment.yml | 9 + modules/nf-core/star/genomegenerate/main.nf | 96 ++++ modules/nf-core/star/genomegenerate/meta.yml | 53 ++ .../star/genomegenerate/tests/main.nf.test | 38 ++ .../genomegenerate/tests/main.nf.test.snap | 16 + .../star/genomegenerate/tests/tags.yml | 2 + modules/nf-core/trimgalore/environment.yml | 7 + modules/nf-core/trimgalore/main.nf | 75 +++ modules/nf-core/trimgalore/meta.yml | 68 +++ modules/nf-core/trimgalore/tests/main.nf.test | 105 ++++ .../trimgalore/tests/main.nf.test.snap | 148 +++++ modules/nf-core/trimgalore/tests/tags.yml | 2 + .../nf-core/umitools/extract/environment.yml | 7 + modules/nf-core/umitools/extract/main.nf | 56 ++ modules/nf-core/umitools/extract/meta.yml | 48 ++ .../umitools/extract/tests/main.nf.test | 35 ++ .../umitools/extract/tests/main.nf.test.snap | 10 + .../umitools/extract/tests/nextflow.config | 9 + .../nf-core/umitools/extract/tests/tags.yml | 2 + modules/nf-core/untar/environment.yml | 9 + modules/nf-core/untar/main.nf | 63 +++ modules/nf-core/untar/meta.yml | 46 ++ modules/nf-core/untar/tests/main.nf.test | 77 +++ modules/nf-core/untar/tests/main.nf.test.snap | 513 ++++++++++++++++++ modules/nf-core/untar/tests/tags.yml | 2 + nextflow.config | 62 +++ nextflow_schema.json | 1 + subworkflows/local/prepare_genome/main.nf | 304 +++++++++++ subworkflows/local/preprocess_rnaseq.nf | 196 +++++++ .../fastq_fastqc_umitools_fastp/main.nf | 140 +++++ .../fastq_fastqc_umitools_fastp/meta.yml | 128 +++++ .../tests/main.nf.test | 60 ++ .../tests/main.nf.test.snap | 81 +++ .../tests/tags.yml | 2 + .../fastq_fastqc_umitools_trimgalore/main.nf | 123 +++++ .../fastq_fastqc_umitools_trimgalore/meta.yml | 101 ++++ .../nf-core/fastq_subsample_fq_salmon/main.nf | 54 ++ .../fastq_subsample_fq_salmon/meta.yml | 70 +++ .../tests/main.nf.test | 61 +++ .../tests/main.nf.test.snap | 28 + .../tests/nextflow.config | 9 + .../fastq_subsample_fq_salmon/tests/tags.yml | 2 + workflows/riboseq.nf | 152 +++++- 139 files changed, 7566 insertions(+), 31 deletions(-) create mode 100644 assets/rrna-db-defaults.txt create mode 100755 bin/filter_gtf.py create mode 100755 bin/gtf2bed create mode 100644 modules/local/cat_additional_fasta/main.nf create mode 100644 modules/local/gtf2bed/main.nf create mode 100644 modules/local/gtf_filter/main.nf create mode 100644 modules/local/preprocess_transcripts_fasta_gencode/main.nf create mode 100644 modules/local/star_genomegenerate_igenomes/main.nf create mode 100644 modules/nf-core/bbmap/bbsplit/environment.yml create mode 100644 modules/nf-core/bbmap/bbsplit/main.nf create mode 100644 modules/nf-core/bbmap/bbsplit/meta.yml create mode 100644 modules/nf-core/cat/fastq/environment.yml create mode 100644 modules/nf-core/cat/fastq/main.nf create mode 100644 modules/nf-core/cat/fastq/meta.yml create mode 100644 modules/nf-core/cat/fastq/tests/main.nf.test create mode 100644 modules/nf-core/cat/fastq/tests/main.nf.test.snap create mode 100644 modules/nf-core/cat/fastq/tests/tags.yml create mode 100644 modules/nf-core/custom/getchromsizes/environment.yml create mode 100644 modules/nf-core/custom/getchromsizes/main.nf create mode 100644 modules/nf-core/custom/getchromsizes/meta.yml create mode 100644 modules/nf-core/custom/getchromsizes/tests/main.nf.test create mode 100644 modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap create mode 100644 modules/nf-core/custom/getchromsizes/tests/tags.yml create mode 100644 modules/nf-core/cutadapt/environment.yml create mode 100644 modules/nf-core/cutadapt/main.nf create mode 100644 modules/nf-core/cutadapt/meta.yml create mode 100644 modules/nf-core/cutadapt/tests/main.nf.test create mode 100644 modules/nf-core/cutadapt/tests/main.nf.test.snap create mode 100644 modules/nf-core/cutadapt/tests/nextflow.config create mode 100644 modules/nf-core/cutadapt/tests/tags.yml create mode 100644 modules/nf-core/fastp/environment.yml create mode 100644 modules/nf-core/fastp/main.nf create mode 100644 modules/nf-core/fastp/meta.yml create mode 100644 modules/nf-core/fastp/tests/main.nf.test create mode 100644 modules/nf-core/fastp/tests/main.nf.test.snap create mode 100644 modules/nf-core/fastp/tests/nextflow.config create mode 100644 modules/nf-core/fastp/tests/tags.yml create mode 100644 modules/nf-core/fq/subsample/environment.yml create mode 100644 modules/nf-core/fq/subsample/main.nf create mode 100644 modules/nf-core/fq/subsample/meta.yml create mode 100644 modules/nf-core/fq/subsample/tests/main.nf.test create mode 100644 modules/nf-core/fq/subsample/tests/main.nf.test.snap create mode 100644 modules/nf-core/fq/subsample/tests/nextflow.config create mode 100644 modules/nf-core/fq/subsample/tests/nextflow_no_args.config create mode 100644 modules/nf-core/fq/subsample/tests/nextflow_probability.config create mode 100644 modules/nf-core/fq/subsample/tests/nextflow_record_count.config create mode 100644 modules/nf-core/fq/subsample/tests/tags.yml create mode 100644 modules/nf-core/gffread/environment.yml create mode 100644 modules/nf-core/gffread/main.nf create mode 100644 modules/nf-core/gffread/meta.yml create mode 100644 modules/nf-core/gffread/tests/main.nf.test create mode 100644 modules/nf-core/gffread/tests/main.nf.test.snap create mode 100644 modules/nf-core/gffread/tests/nextflow.config create mode 100644 modules/nf-core/gffread/tests/tags.yml create mode 100644 modules/nf-core/gunzip/environment.yml create mode 100644 modules/nf-core/gunzip/main.nf create mode 100644 modules/nf-core/gunzip/meta.yml create mode 100644 modules/nf-core/gunzip/tests/main.nf.test create mode 100644 modules/nf-core/gunzip/tests/main.nf.test.snap create mode 100644 modules/nf-core/gunzip/tests/tags.yml create mode 100644 modules/nf-core/kallisto/index/environment.yml create mode 100644 modules/nf-core/kallisto/index/main.nf create mode 100644 modules/nf-core/kallisto/index/meta.yml create mode 100644 modules/nf-core/kallisto/index/tests/main.nf.test create mode 100644 modules/nf-core/kallisto/index/tests/main.nf.test.snap create mode 100644 modules/nf-core/kallisto/index/tests/tags.yml create mode 100644 modules/nf-core/rsem/preparereference/environment.yml create mode 100644 modules/nf-core/rsem/preparereference/main.nf create mode 100644 modules/nf-core/rsem/preparereference/meta.yml create mode 100644 modules/nf-core/rsem/preparereference/tests/main.nf.test create mode 100644 modules/nf-core/rsem/preparereference/tests/main.nf.test.snap create mode 100644 modules/nf-core/rsem/preparereference/tests/tags.yml create mode 100644 modules/nf-core/salmon/index/environment.yml create mode 100644 modules/nf-core/salmon/index/main.nf create mode 100644 modules/nf-core/salmon/index/meta.yml create mode 100644 modules/nf-core/salmon/index/tests/main.nf.test create mode 100644 modules/nf-core/salmon/index/tests/main.nf.test.snap create mode 100644 modules/nf-core/salmon/index/tests/tags.yml create mode 100644 modules/nf-core/salmon/quant/environment.yml create mode 100644 modules/nf-core/salmon/quant/main.nf create mode 100644 modules/nf-core/salmon/quant/meta.yml create mode 100644 modules/nf-core/salmon/quant/tests/main.nf.test create mode 100644 modules/nf-core/salmon/quant/tests/main.nf.test.snap create mode 100644 modules/nf-core/salmon/quant/tests/nextflow.config create mode 100644 modules/nf-core/salmon/quant/tests/tags.yml create mode 100644 modules/nf-core/sortmerna/environment.yml create mode 100644 modules/nf-core/sortmerna/main.nf create mode 100644 modules/nf-core/sortmerna/meta.yml create mode 100644 modules/nf-core/sortmerna/tests/main.nf.test create mode 100644 modules/nf-core/sortmerna/tests/main.nf.test.snap create mode 100644 modules/nf-core/sortmerna/tests/tags.yml create mode 100644 modules/nf-core/star/genomegenerate/environment.yml create mode 100644 modules/nf-core/star/genomegenerate/main.nf create mode 100644 modules/nf-core/star/genomegenerate/meta.yml create mode 100644 modules/nf-core/star/genomegenerate/tests/main.nf.test create mode 100644 modules/nf-core/star/genomegenerate/tests/main.nf.test.snap create mode 100644 modules/nf-core/star/genomegenerate/tests/tags.yml create mode 100644 modules/nf-core/trimgalore/environment.yml create mode 100644 modules/nf-core/trimgalore/main.nf create mode 100644 modules/nf-core/trimgalore/meta.yml create mode 100644 modules/nf-core/trimgalore/tests/main.nf.test create mode 100644 modules/nf-core/trimgalore/tests/main.nf.test.snap create mode 100644 modules/nf-core/trimgalore/tests/tags.yml create mode 100644 modules/nf-core/umitools/extract/environment.yml create mode 100644 modules/nf-core/umitools/extract/main.nf create mode 100644 modules/nf-core/umitools/extract/meta.yml create mode 100644 modules/nf-core/umitools/extract/tests/main.nf.test create mode 100644 modules/nf-core/umitools/extract/tests/main.nf.test.snap create mode 100644 modules/nf-core/umitools/extract/tests/nextflow.config create mode 100644 modules/nf-core/umitools/extract/tests/tags.yml create mode 100644 modules/nf-core/untar/environment.yml create mode 100644 modules/nf-core/untar/main.nf create mode 100644 modules/nf-core/untar/meta.yml create mode 100644 modules/nf-core/untar/tests/main.nf.test create mode 100644 modules/nf-core/untar/tests/main.nf.test.snap create mode 100644 modules/nf-core/untar/tests/tags.yml create mode 100644 subworkflows/local/prepare_genome/main.nf create mode 100644 subworkflows/local/preprocess_rnaseq.nf create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/meta.yml create mode 100644 subworkflows/nf-core/fastq_subsample_fq_salmon/main.nf create mode 100644 subworkflows/nf-core/fastq_subsample_fq_salmon/meta.yml create mode 100644 subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test create mode 100644 subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test.snap create mode 100644 subworkflows/nf-core/fastq_subsample_fq_salmon/tests/nextflow.config create mode 100644 subworkflows/nf-core/fastq_subsample_fq_salmon/tests/tags.yml diff --git a/assets/rrna-db-defaults.txt b/assets/rrna-db-defaults.txt new file mode 100644 index 0000000..4223356 --- /dev/null +++ b/assets/rrna-db-defaults.txt @@ -0,0 +1,8 @@ +https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta +https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5s-database-id98.fasta +https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-arc-16s-id95.fasta +https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-arc-23s-id98.fasta +https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-bac-16s-id90.fasta +https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-bac-23s-id98.fasta +https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-euk-18s-id95.fasta +https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-euk-28s-id98.fasta diff --git a/assets/schema_input.json b/assets/schema_input.json index 9f610bd..da69e6a 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,27 +10,37 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] }, "fastq_1": { "type": "string", + "format": "file-path", + "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, "fastq_2": { "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", + "type": "string", + "format": "file-path", + "exists": true, "anyOf": [ { - "type": "string", "pattern": "^\\S+\\.f(ast)?q\\.gz$" }, { - "type": "string", "maxLength": 0 } ] + }, + "strandedness": { + "type": "string", + "errorMessage": "Strandedness must be provided and be one of 'auto', 'forward', 'reverse' or 'unstranded'", + "enum": ["forward", "reverse", "unstranded", "auto"], + "meta": ["strandedness"] } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "fastq_1", "strandedness"] } } diff --git a/bin/filter_gtf.py b/bin/filter_gtf.py new file mode 100755 index 0000000..b2215fd --- /dev/null +++ b/bin/filter_gtf.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python + +# Written by Olga Botvinnik with subsequent reworking by Jonathan Manning. Released under the MIT license. + +import logging +import argparse +import re +import statistics +from typing import Set + +# Create a logger +logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s") +logger = logging.getLogger("fasta_gtf_filter") +logger.setLevel(logging.INFO) + + +def extract_fasta_seq_names(fasta_name: str) -> Set[str]: + """Extracts the sequence names from a FASTA file.""" + with open(fasta_name) as fasta: + return {line[1:].split(None, 1)[0] for line in fasta if line.startswith(">")} + + +def tab_delimited(file: str) -> float: + """Check if file is tab-delimited and return median number of tabs.""" + with open(file, "r") as f: + data = f.read(102400) + return statistics.median(line.count("\t") for line in data.split("\n")) + + +def filter_gtf(fasta: str, gtf_in: str, filtered_gtf_out: str, skip_transcript_id_check: bool) -> None: + """Filter GTF file based on FASTA sequence names.""" + if tab_delimited(gtf_in) != 8: + raise ValueError("Invalid GTF file: Expected 9 tab-separated columns.") + + seq_names_in_genome = extract_fasta_seq_names(fasta) + logger.info(f"Extracted chromosome sequence names from {fasta}") + logger.debug("All sequence IDs from FASTA: " + ", ".join(sorted(seq_names_in_genome))) + + seq_names_in_gtf = set() + try: + with open(gtf_in) as gtf, open(filtered_gtf_out, "w") as out: + line_count = 0 + for line in gtf: + seq_name = line.split("\t")[0] + seq_names_in_gtf.add(seq_name) # Add sequence name to the set + + if seq_name in seq_names_in_genome: + if skip_transcript_id_check or re.search(r'transcript_id "([^"]+)"', line): + out.write(line) + line_count += 1 + + if line_count == 0: + raise ValueError("All GTF lines removed by filters") + + except IOError as e: + logger.error(f"File operation failed: {e}") + return + + logger.debug("All sequence IDs from GTF: " + ", ".join(sorted(seq_names_in_gtf))) + logger.info(f"Extracted {line_count} matching sequences from {gtf_in} into {filtered_gtf_out}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Filters a GTF file based on sequence names in a FASTA file.") + parser.add_argument("--gtf", type=str, required=True, help="GTF file") + parser.add_argument("--fasta", type=str, required=True, help="Genome fasta file") + parser.add_argument("--prefix", dest="prefix", default="genes", type=str, help="Prefix for output GTF files") + parser.add_argument( + "--skip_transcript_id_check", action="store_true", help="Skip checking for transcript IDs in the GTF file" + ) + + args = parser.parse_args() + filter_gtf(args.fasta, args.gtf, args.prefix + ".filtered.gtf", args.skip_transcript_id_check) diff --git a/bin/gtf2bed b/bin/gtf2bed new file mode 100755 index 0000000..4da5c04 --- /dev/null +++ b/bin/gtf2bed @@ -0,0 +1,122 @@ +#!/usr/bin/env perl + +# Copyright (c) 2011 Erik Aronesty (erik@q32.com) +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ALSO, IT WOULD BE NICE IF YOU LET ME KNOW YOU USED IT. + +use Getopt::Long; + +my $extended; +GetOptions("x"=>\$extended); + +$in = shift @ARGV; + +my $in_cmd =($in =~ /\.gz$/ ? "gunzip -c $in|" : $in =~ /\.zip$/ ? "unzip -p $in|" : "$in") || die "Can't open $in: $!\n"; +open IN, $in_cmd; + +while () { + $gff = 2 if /^##gff-version 2/; + $gff = 3 if /^##gff-version 3/; + next if /^#/ && $gff; + + s/\s+$//; + # 0-chr 1-src 2-feat 3-beg 4-end 5-scor 6-dir 7-fram 8-attr + my @f = split /\t/; + if ($gff) { + # most ver 2's stick gene names in the id field + ($id) = $f[8]=~ /\bID="([^"]+)"/; + # most ver 3's stick unquoted names in the name field + ($id) = $f[8]=~ /\bName=([^";]+)/ if !$id && $gff == 3; + } else { + ($id) = $f[8]=~ /transcript_id "([^"]+)"/; + } + + next unless $id && $f[0]; + + if ($f[2] eq 'exon') { + die "no position at exon on line $." if ! $f[3]; + # gff3 puts :\d in exons sometimes + $id =~ s/:\d+$// if $gff == 3; + push @{$exons{$id}}, \@f; + # save lowest start + $trans{$id} = \@f if !$trans{$id}; + } elsif ($f[2] eq 'start_codon') { + #optional, output codon start/stop as "thick" region in bed + $sc{$id}->[0] = $f[3]; + } elsif ($f[2] eq 'stop_codon') { + $sc{$id}->[1] = $f[4]; + } elsif ($f[2] eq 'miRNA' ) { + $trans{$id} = \@f if !$trans{$id}; + push @{$exons{$id}}, \@f; + } +} + +for $id ( + # sort by chr then pos + sort { + $trans{$a}->[0] eq $trans{$b}->[0] ? + $trans{$a}->[3] <=> $trans{$b}->[3] : + $trans{$a}->[0] cmp $trans{$b}->[0] + } (keys(%trans)) ) { + my ($chr, undef, undef, undef, undef, undef, $dir, undef, $attr, undef, $cds, $cde) = @{$trans{$id}}; + my ($cds, $cde); + ($cds, $cde) = @{$sc{$id}} if $sc{$id}; + + # sort by pos + my @ex = sort { + $a->[3] <=> $b->[3] + } @{$exons{$id}}; + + my $beg = $ex[0][3]; + my $end = $ex[-1][4]; + + if ($dir eq '-') { + # swap + $tmp=$cds; + $cds=$cde; + $cde=$tmp; + $cds -= 2 if $cds; + $cde += 2 if $cde; + } + + # not specified, just use exons + $cds = $beg if !$cds; + $cde = $end if !$cde; + + # adjust start for bed + --$beg; --$cds; + + my $exn = @ex; # exon count + my $exst = join ",", map {$_->[3]-$beg-1} @ex; # exon start + my $exsz = join ",", map {$_->[4]-$_->[3]+1} @ex; # exon size + + my $gene_id; + my $extend = ""; + if ($extended) { + ($gene_id) = $attr =~ /gene_name "([^"]+)"/; + ($gene_id) = $attr =~ /gene_id "([^"]+)"/ unless $gene_id; + $extend="\t$gene_id"; + } + # added an extra comma to make it look exactly like ucsc's beds + print "$chr\t$beg\t$end\t$id\t0\t$dir\t$cds\t$cde\t0\t$exn\t$exsz,\t$exst,$extend\n"; +} + +close IN; diff --git a/conf/modules.config b/conf/modules.config index 39e8138..62d5b2b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -29,6 +29,28 @@ process { withName: FASTQC { ext.args = '--quiet' } + + withName: 'FQ_SUBSAMPLE' { + ext.args = '--record-count 1000000 --seed 1' + ext.prefix = { "${meta.id}.subsampled" } + publishDir = [ + enabled: false + ] + } + + withName: '.*:FASTQ_SUBSAMPLE_FQ_SALMON:SALMON_QUANT' { + ext.args = '--skipQuant' + publishDir = [ + enabled: false + ] + } + + withName: 'SALMON_INDEX' { + ext.args = '-k 21' + publishDir = [ + enabled: false + ] + } withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ diff --git a/conf/test.config b/conf/test.config index e10ad7e..c2dc1e4 100644 --- a/conf/test.config +++ b/conf/test.config @@ -16,10 +16,12 @@ params { // Limit resources so that this can run on GitHub Actions max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' + max_memory = '16.GB' + max_time = '16.h' // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/002aa1f1df77b4dbfc816340c8d7d629805b166b/testdata/samplesheet.csv' - fasta = 'http://ftp.ensembl.org/pub/release-110/fasta/mus_musculus/dna/Mus_musculus.GRCm39.dna.chromosome.1.fa.gz' + //fasta = 'http://ftp.ensembl.org/pub/release-110/fasta/mus_musculus/dna/Mus_musculus.GRCm39.dna.chromosome.1.fa.gz' + fasta = 'http://ftp.ensembl.org/pub/release-110/fasta/mus_musculus/dna/Mus_musculus.GRCm39.dna.primary_assembly.fa.gz' + gtf = 'http://ftp.ensembl.org/pub/release-110/gtf/mus_musculus/Mus_musculus.GRCm39.110.gtf.gz' } diff --git a/lib/WorkflowRiboseq.groovy b/lib/WorkflowRiboseq.groovy index a4cd33d..39d4b8d 100755 --- a/lib/WorkflowRiboseq.groovy +++ b/lib/WorkflowRiboseq.groovy @@ -1,8 +1,9 @@ // -// This file holds several functions specific to the workflow/riboseq.nf in the nf-core/riboseq pipeline +// This file holds several functions specific to the workflow/rnaseq.nf in the nf-core/rnaseq pipeline // import nextflow.Nextflow +import groovy.json.JsonSlurper import groovy.text.SimpleTemplateEngine class WorkflowRiboseq { @@ -11,13 +12,237 @@ class WorkflowRiboseq { // Check and validate parameters // public static void initialise(params, log) { - genomeExistsError(params, log) + if (!params.gtf && !params.gff) { + Nextflow.error("No GTF or GFF3 annotation specified! The pipeline requires at least one of these files.") + } + + if (params.gtf) { + if (params.gff) { + gtfGffWarn(log) + } + if (params.genome == 'GRCh38' && params.gtf.contains('Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf')) { + ncbiGenomeWarn(log) + } + if (params.gtf.contains('/UCSC/') && params.gtf.contains('Annotation/Genes/genes.gtf')) { + ucscGenomeWarn(log) + } + } + + if (params.transcript_fasta) { + transcriptsFastaWarn(log) + } + + if (!params.skip_bbsplit && !params.bbsplit_index && !params.bbsplit_fasta_list) { + Nextflow.error("Please provide either --bbsplit_fasta_list / --bbsplit_index to run BBSplit.") + } + + if (params.remove_ribo_rna && !params.ribo_database_manifest) { + Nextflow.error("Please provide --ribo_database_manifest to remove ribosomal RNA with SortMeRNA.") + } + + + if (params.with_umi && !params.skip_umi_extract) { + if (!params.umitools_bc_pattern && !params.umitools_bc_pattern2) { + Nextflow.error("UMI-tools requires a barcode pattern to extract barcodes from the reads.") + } + } + + if (params.skip_alignment) { + skipAlignmentWarn(log) + } + + if (!params.skip_pseudo_alignment && params.pseudo_aligner) { + if (!(params.salmon_index || params.transcript_fasta || (params.fasta && (params.gtf || params.gff)))) { + Nextflow.error("To use `--pseudo_aligner 'salmon'`, you must provide either --salmon_index or --transcript_fasta or both --fasta and --gtf / --gff.") + } + } + + // Checks when running --aligner star_rsem + if (!params.skip_alignment && params.aligner == 'star_rsem') { + if (params.with_umi) { + rsemUmiError(log) + } + if (params.rsem_index && params.star_index) { + rsemStarIndexWarn(log) + } + if (params.aligner == 'star_rsem' && params.extra_star_align_args) { + rsemStarExtraArgumentsWarn(log) + } + } - if (!params.fasta) { - Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." + // Warn if --additional_fasta provided with aligner index + if (!params.skip_alignment && params.additional_fasta) { + def index = '' + if (params.aligner == 'star_salmon' && params.star_index) { + index = 'star' + } + if (params.aligner == 'star_rsem' && params.rsem_index) { + index = 'rsem' + } + if (params.aligner == 'hisat2' && params.hisat2_index) { + index = 'hisat2' + } + if (index) { + additionaFastaIndexWarn(index, log) + } } + + // Check which RSeQC modules we are running + def valid_rseqc_modules = ['bam_stat', 'inner_distance', 'infer_experiment', 'junction_annotation', 'junction_saturation', 'read_distribution', 'read_duplication', 'tin'] + def rseqc_modules = params.rseqc_modules ? params.rseqc_modules.split(',').collect{ it.trim().toLowerCase() } : [] + if ((valid_rseqc_modules + rseqc_modules).unique().size() != valid_rseqc_modules.size()) { + Nextflow.error("Invalid option: ${params.rseqc_modules}. Valid options for '--rseqc_modules': ${valid_rseqc_modules.join(', ')}") + } + } + + // + // Function to validate channels from input samplesheet + // + public static ArrayList validateInput(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same strandedness + def strandedness_ok = metas.collect{ it.strandedness }.unique().size == 1 + if (!strandedness_ok) { + Nextflow.error("Please check input samplesheet -> Multiple runs of a sample must have the same strandedness!: ${metas[0].id}") + } + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 + if (!endedness_ok) { + Nextflow.error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [ metas[0], fastqs ] + } + + // + // Function to check whether biotype field exists in GTF file + // + public static Boolean biotypeInGtf(gtf_file, biotype, log) { + def hits = 0 + gtf_file.eachLine { line -> + def attributes = line.split('\t')[-1].split() + if (attributes.contains(biotype)) { + hits += 1 + } + } + if (hits) { + return true + } else { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Biotype attribute '${biotype}' not found in the last column of the GTF file!\n\n" + + " Biotype QC will be skipped to circumvent the issue below:\n" + + " https://github.com/nf-core/rnaseq/issues/460\n\n" + + " Amend '--featurecounts_group_type' to change this behaviour.\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + return false + } + } + + // + // Function to generate an error if contigs in genome fasta file > 512 Mbp + // + public static void checkMaxContigSize(fai_file, log) { + def max_size = 512000000 + fai_file.eachLine { line -> + def lspl = line.split('\t') + def chrom = lspl[0] + def size = lspl[1] + if (size.toInteger() > max_size) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Contig longer than ${max_size}bp found in reference genome!\n\n" + + " ${chrom}: ${size}\n\n" + + " Provide the '--bam_csi_index' parameter to use a CSI instead of BAI index.\n\n" + + " Please see:\n" + + " https://github.com/nf-core/rnaseq/issues/744\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.error(error_string) + } + } + } + + // + // Function that parses Salmon quant 'meta_info.json' output file to get inferred strandedness + // + public static String getSalmonInferredStrandedness(json_file) { + def lib_type = new JsonSlurper().parseText(json_file.text).get('library_types')[0] + def strandedness = 'reverse' + if (lib_type) { + if (lib_type in ['U', 'IU']) { + strandedness = 'unstranded' + } else if (lib_type in ['SF', 'ISF']) { + strandedness = 'forward' + } else if (lib_type in ['SR', 'ISR']) { + strandedness = 'reverse' + } + } + return strandedness + } + + // + // Function that parses TrimGalore log output file to get total number of reads after trimming + // + public static Integer getTrimGaloreReadsAfterFiltering(log_file) { + def total_reads = 0 + def filtered_reads = 0 + log_file.eachLine { line -> + def total_reads_matcher = line =~ /([\d\.]+)\ssequences processed in total/ + def filtered_reads_matcher = line =~ /shorter than the length cutoff[^:]+:\s([\d\.]+)/ + if (total_reads_matcher) total_reads = total_reads_matcher[0][1].toFloat() + if (filtered_reads_matcher) filtered_reads = filtered_reads_matcher[0][1].toFloat() + } + return total_reads - filtered_reads + } + + // + // Function that parses and returns the alignment rate from the STAR log output + // + public static ArrayList getStarPercentMapped(params, align_log) { + def percent_aligned = 0 + def pattern = /Uniquely mapped reads %\s*\|\s*([\d\.]+)%/ + align_log.eachLine { line -> + def matcher = line =~ pattern + if (matcher) { + percent_aligned = matcher[0][1].toFloat() + } + } + + def pass = false + if (percent_aligned >= params.min_mapped_reads.toFloat()) { + pass = true + } + return [ percent_aligned, pass ] + } + + // + // Function that parses and returns the predicted strandedness from the RSeQC infer_experiment.py output + // + public static ArrayList getInferexperimentStrandedness(inferexperiment_file, cutoff=30) { + def sense = 0 + def antisense = 0 + def undetermined = 0 + inferexperiment_file.eachLine { line -> + def undetermined_matcher = line =~ /Fraction of reads failed to determine:\s([\d\.]+)/ + def se_sense_matcher = line =~ /Fraction of reads explained by "\++,--":\s([\d\.]+)/ + def se_antisense_matcher = line =~ /Fraction of reads explained by "\+-,-\+":\s([\d\.]+)/ + def pe_sense_matcher = line =~ /Fraction of reads explained by "1\++,1--,2\+-,2-\+":\s([\d\.]+)/ + def pe_antisense_matcher = line =~ /Fraction of reads explained by "1\+-,1-\+,2\+\+,2--":\s([\d\.]+)/ + if (undetermined_matcher) undetermined = undetermined_matcher[0][1].toFloat() * 100 + if (se_sense_matcher) sense = se_sense_matcher[0][1].toFloat() * 100 + if (se_antisense_matcher) antisense = se_antisense_matcher[0][1].toFloat() * 100 + if (pe_sense_matcher) sense = pe_sense_matcher[0][1].toFloat() * 100 + if (pe_antisense_matcher) antisense = pe_antisense_matcher[0][1].toFloat() * 100 + } + def strandedness = 'unstranded' + if (sense >= 100-cutoff) { + strandedness = 'forward' + } else if (antisense >= 100-cutoff) { + strandedness = 'reverse' + } + return [ strandedness, sense, antisense, undetermined ] } // @@ -50,9 +275,7 @@ class WorkflowRiboseq { // // Generate methods description for MultiQC // - public static String toolCitationText(params) { - // TODO nf-core: Optionally add in-text citation tools to this list. // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", // Uncomment function in methodsDescriptionText to render in MultiQC report @@ -67,7 +290,6 @@ class WorkflowRiboseq { } public static String toolBibliographyText(params) { - // TODO Optionally add bibliographic entries to this list. // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", // Uncomment function in methodsDescriptionText to render in MultiQC report @@ -97,7 +319,6 @@ class WorkflowRiboseq { //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") //meta["tool_bibliography"] = toolBibliographyText(params) - def methods_text = mqc_methods_yaml.text def engine = new SimpleTemplateEngine() @@ -106,6 +327,18 @@ class WorkflowRiboseq { return description_html } + // + // Create MultiQC tsv custom content from a list of values + // + public static String multiqcTsvFromList(tsv_data, header) { + def tsv_string = "" + if (tsv_data.size() > 0) { + tsv_string += "${header.join('\t')}\n" + tsv_string += tsv_data.join('\n') + } + return tsv_string + } + // // Exit pipeline if incorrect --genome key provided // @@ -119,4 +352,119 @@ class WorkflowRiboseq { Nextflow.error(error_string) } } + + // + // Print a warning if using GRCh38 assembly from igenomes.config + // + private static void ncbiGenomeWarn(log) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " When using '--genome GRCh38' the assembly is from the NCBI and NOT Ensembl.\n" + + " Biotype QC will be skipped to circumvent the issue below:\n" + + " https://github.com/nf-core/rnaseq/issues/460\n\n" + + " If you would like to use the soft-masked Ensembl assembly instead please see:\n" + + " https://github.com/nf-core/rnaseq/issues/159#issuecomment-501184312\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } + + // + // Print a warning if using a UCSC assembly from igenomes.config + // + private static void ucscGenomeWarn(log) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " When using UCSC assemblies the 'gene_biotype' field is absent from the GTF file.\n" + + " Biotype QC will be skipped to circumvent the issue below:\n" + + " https://github.com/nf-core/rnaseq/issues/460\n\n" + + " If you would like to use the soft-masked Ensembl assembly instead please see:\n" + + " https://github.com/nf-core/rnaseq/issues/159#issuecomment-501184312\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } + + // + // Print a warning if both GTF and GFF have been provided + // + private static void gtfGffWarn(log) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Both '--gtf' and '--gff' parameters have been provided.\n" + + " Using GTF file as priority.\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } + + // + // Print a warning if using '--transcript_fasta' + // + private static void transcriptsFastaWarn(log) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " '--transcript_fasta' parameter has been provided.\n" + + " Make sure transcript names in this file match those in the GFF/GTF file.\n\n" + + " Please see:\n" + + " https://github.com/nf-core/rnaseq/issues/753\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } + + // + // Print a warning if --skip_alignment has been provided + // + private static void skipAlignmentWarn(log) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " '--skip_alignment' parameter has been provided.\n" + + " Skipping alignment, genome-based quantification and all downstream QC processes.\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } + + // + // Print a warning if using '--aligner star_rsem' and '--with_umi' + // + private static void rsemUmiError(log) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " When using '--aligner star_rsem', STAR is run by RSEM itself and so it is\n" + + " not possible to remove UMIs before the quantification.\n\n" + + " If you would like to remove UMI barcodes using the '--with_umi' option\n" + + " please use either '--aligner star_salmon' or '--aligner hisat2'.\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.error(error_string) + } + + // + // Print a warning if using '--aligner star_rsem' and providing both '--rsem_index' and '--star_index' + // + private static void rsemStarIndexWarn(log) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " When using '--aligner star_rsem', both the STAR and RSEM indices should\n" + + " be present in the path specified by '--rsem_index'.\n\n" + + " This warning has been generated because you have provided both\n" + + " '--rsem_index' and '--star_index'. The pipeline will ignore the latter.\n\n" + + " Please see:\n" + + " https://github.com/nf-core/rnaseq/issues/568\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } + + // + // Print a warning if using '--aligner star_rsem' and providing '--star_extra_alignment_args' + // + private static void rsemStarExtraArgumentsWarn(log) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " No additional arguments can be passed to STAR when using RSEM.\n" + + " Because RSEM enforces its own parameters for STAR, any extra arguments\n" + + " to STAR will be ignored. Alternatively, choose the STAR+Salmon route.\n\n" + + " This warning has been generated because you have provided both\n" + + " '--aligner star_rsem' and '--extra_star_align_args'.\n\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } + + // + // Print a warning if using '--additional_fasta' and '--_index' + // + private static void additionaFastaIndexWarn(index, log) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " When using '--additional_fasta ' the aligner index will not\n" + + " be re-built with the transgenes incorporated by default since you have \n" + + " already provided an index via '--${index}_index '.\n\n" + + " Set '--additional_fasta --${index}_index false --gene_bed false --save_reference'\n" + + " to re-build the index with transgenes included and the index and gene BED file will be saved in\n" + + " 'results/genome/index/${index}/' for re-use with '--${index}_index'.\n\n" + + " Ignore this warning if you know that the index already contains transgenes.\n\n" + + " Please see:\n" + + " https://github.com/nf-core/rnaseq/issues/556\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } } diff --git a/modules.json b/modules.json index 5f7f69d..5579b49 100644 --- a/modules.json +++ b/modules.json @@ -5,16 +5,56 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "bbmap/bbsplit": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "cat/fastq": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", "installed_by": ["modules"] }, + "custom/getchromsizes": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "cutadapt": { + "branch": "master", + "git_sha": "07881e42938b4f0070e864b45d424b01745bc3a4", + "installed_by": ["modules"] + }, + "fastp": { + "branch": "master", + "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde", + "installed_by": ["modules"] + }, "fastqc": { "branch": "master", "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", "installed_by": ["modules"] }, + "fq/subsample": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["fastq_subsample_fq_salmon"] + }, + "gffread": { + "branch": "master", + "git_sha": "a2d6c3082c5c44b4155a3246daff36701ee49af8", + "installed_by": ["modules"] + }, + "gunzip": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, "hisat2/build": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", @@ -25,10 +65,64 @@ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, + "kallisto/index": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, "multiqc": { "branch": "master", "git_sha": "4ab13872435962dadc239979554d13709e20bf29", "installed_by": ["modules"] + }, + "rsem/preparereference": { + "branch": "master", + "git_sha": "8ec12d844837b0167540c910afe4d3f920cd7114", + "installed_by": ["modules"] + }, + "salmon/index": { + "branch": "master", + "git_sha": "ce801ced94947b2ff01002b4120eef2f79489b34", + "installed_by": ["fastq_subsample_fq_salmon", "modules"] + }, + "salmon/quant": { + "branch": "master", + "git_sha": "ce801ced94947b2ff01002b4120eef2f79489b34", + "installed_by": ["fastq_subsample_fq_salmon"] + }, + "sortmerna": { + "branch": "master", + "git_sha": "a20b6b1e9114a08007608528e4a2b0fbbb8a9ca2", + "installed_by": ["modules"] + }, + "star/genomegenerate": { + "branch": "master", + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["modules"] + }, + "trimgalore": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "umitools/extract": { + "branch": "master", + "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", + "installed_by": ["modules"] + }, + "untar": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + } + } + }, + "subworkflows": { + "nf-core": { + "fastq_subsample_fq_salmon": { + "branch": "master", + "git_sha": "c5ef823ea08007ee393baa7a172913d1ad1de457", + "installed_by": ["subworkflows"] } } } diff --git a/modules/local/cat_additional_fasta/main.nf b/modules/local/cat_additional_fasta/main.nf new file mode 100644 index 0000000..72af4bc --- /dev/null +++ b/modules/local/cat_additional_fasta/main.nf @@ -0,0 +1,42 @@ +process CAT_ADDITIONAL_FASTA { + tag "$add_fasta" + + conda "conda-forge::python=3.9.5" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.9--1' : + 'biocontainers/python:3.9--1' }" + + input: + path fasta + path gtf + path add_fasta + val biotype + + output: + path "${name}.fasta", emit: fasta + path "${name}.gtf" , emit: gtf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def genome_name = params.genome ? params.genome : fasta.getBaseName() + def biotype_name = biotype ? "-b $biotype" : '' + def add_name = add_fasta.getBaseName() + name = "${genome_name}_${add_name}" + """ + fasta2gtf.py \\ + -o ${add_fasta.baseName}.gtf \\ + $biotype_name \\ + $add_fasta + + cat $fasta $add_fasta > ${name}.fasta + cat $gtf ${add_fasta.baseName}.gtf > ${name}.gtf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/gtf2bed/main.nf b/modules/local/gtf2bed/main.nf new file mode 100644 index 0000000..8a4bae8 --- /dev/null +++ b/modules/local/gtf2bed/main.nf @@ -0,0 +1,31 @@ +process GTF2BED { + tag "$gtf" + label 'process_low' + + conda "conda-forge::perl=5.26.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/perl:5.26.2' : + 'biocontainers/perl:5.26.2' }" + + input: + path gtf + + output: + path '*.bed' , emit: bed + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/rnaseq/bin/ + """ + gtf2bed \\ + $gtf \\ + > ${gtf.baseName}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + perl: \$(echo \$(perl --version 2>&1) | sed 's/.*v\\(.*\\)) built.*/\\1/') + END_VERSIONS + """ +} diff --git a/modules/local/gtf_filter/main.nf b/modules/local/gtf_filter/main.nf new file mode 100644 index 0000000..d14e8ff --- /dev/null +++ b/modules/local/gtf_filter/main.nf @@ -0,0 +1,32 @@ +process GTF_FILTER { + tag "$fasta" + + conda "conda-forge::python=3.9.5" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.9--1' : + 'biocontainers/python:3.9--1' }" + + input: + path fasta + path gtf + + output: + path "*.filtered.gtf", emit: genome_gtf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // filter_gtf.py is bundled with the pipeline, in nf-core/rnaseq/bin/ + """ + filter_gtf.py \\ + --gtf $gtf \\ + --fasta $fasta \\ + --prefix ${fasta.baseName} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/preprocess_transcripts_fasta_gencode/main.nf b/modules/local/preprocess_transcripts_fasta_gencode/main.nf new file mode 100644 index 0000000..41f1d42 --- /dev/null +++ b/modules/local/preprocess_transcripts_fasta_gencode/main.nf @@ -0,0 +1,31 @@ +process PREPROCESS_TRANSCRIPTS_FASTA_GENCODE { + tag "$fasta" + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + path fasta + + output: + path "*.fa" , emit: fasta + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def gzipped = fasta.toString().endsWith('.gz') + def outfile = gzipped ? file(fasta.baseName).baseName : fasta.baseName + def command = gzipped ? 'zcat' : 'cat' + """ + $command $fasta | cut -d "|" -f1 > ${outfile}.fixed.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/star_genomegenerate_igenomes/main.nf b/modules/local/star_genomegenerate_igenomes/main.nf new file mode 100644 index 0000000..7766faa --- /dev/null +++ b/modules/local/star_genomegenerate_igenomes/main.nf @@ -0,0 +1,68 @@ +process STAR_GENOMEGENERATE_IGENOMES { + tag "$fasta" + label 'process_high' + + conda "bioconda::star=2.6.1d bioconda::samtools=1.10 conda-forge::gawk=5.1.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0' }" + + input: + path fasta + path gtf + + output: + path "star" , emit: index + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args_list = args.tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + if (args_list.contains('--genomeSAindexNbases')) { + """ + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + --sjdbGTFfile $gtf \\ + --runThreadN $task.cpus \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + samtools faidx $fasta + NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fasta}.fai` + + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + --sjdbGTFfile $gtf \\ + --runThreadN $task.cpus \\ + --genomeSAindexNbases \$NUM_BASES \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/bbmap/bbsplit/environment.yml b/modules/nf-core/bbmap/bbsplit/environment.yml new file mode 100644 index 0000000..4c6db62 --- /dev/null +++ b/modules/nf-core/bbmap/bbsplit/environment.yml @@ -0,0 +1,7 @@ +name: bbmap_bbsplit +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bbmap=39.01 diff --git a/modules/nf-core/bbmap/bbsplit/main.nf b/modules/nf-core/bbmap/bbsplit/main.nf new file mode 100644 index 0000000..b1ae2c8 --- /dev/null +++ b/modules/nf-core/bbmap/bbsplit/main.nf @@ -0,0 +1,89 @@ +process BBMAP_BBSPLIT { + tag "$meta.id" + label 'process_high' + label 'error_retry' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bbmap:39.01--h5c4e2a8_0': + 'biocontainers/bbmap:39.01--h5c4e2a8_0' }" + + input: + tuple val(meta), path(reads) + path index + path primary_ref + tuple val(other_ref_names), path (other_ref_paths) + val only_build_index + + output: + path "bbsplit" , optional:true, emit: index + tuple val(meta), path('*primary*fastq.gz'), optional:true, emit: primary_fastq + tuple val(meta), path('*fastq.gz') , optional:true, emit: all_fastq + tuple val(meta), path('*txt') , optional:true, emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[BBSplit] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + + def other_refs = [] + other_ref_names.eachWithIndex { name, index -> + other_refs << "ref_${name}=${other_ref_paths[index]}" + } + if (only_build_index) { + if (primary_ref && other_ref_names && other_ref_paths) { + """ + bbsplit.sh \\ + -Xmx${avail_mem}M \\ + ref_primary=$primary_ref \\ + ${other_refs.join(' ')} \\ + path=bbsplit \\ + threads=$task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset") + END_VERSIONS + """ + } else { + log.error 'ERROR: Please specify as input a primary fasta file along with names and paths to non-primary fasta files.' + } + } else { + def index_files = '' + if (index) { + index_files = "path=$index" + } else if (primary_ref && other_ref_names && other_ref_paths) { + index_files = "ref_primary=${primary_ref} ${other_refs.join(' ')}" + } else { + log.error 'ERROR: Please either specify a BBSplit index as input or a primary fasta file along with names and paths to non-primary fasta files.' + } + def fastq_in = meta.single_end ? "in=${reads}" : "in=${reads[0]} in2=${reads[1]}" + def fastq_out = meta.single_end ? "basename=${prefix}_%.fastq.gz" : "basename=${prefix}_%_#.fastq.gz" + """ + bbsplit.sh \\ + -Xmx${avail_mem}M \\ + $index_files \\ + threads=$task.cpus \\ + $fastq_in \\ + $fastq_out \\ + refstats=${prefix}.stats.txt \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset") + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/bbmap/bbsplit/meta.yml b/modules/nf-core/bbmap/bbsplit/meta.yml new file mode 100644 index 0000000..f4e0a5e --- /dev/null +++ b/modules/nf-core/bbmap/bbsplit/meta.yml @@ -0,0 +1,74 @@ +name: bbmap_bbsplit +description: Split sequencing reads by mapping them to multiple references simultaneously +keywords: + - align + - map + - fastq + - genome + - reference +tools: + - bbmap: + description: BBMap is a short read aligner, as well as various other bioinformatic tools. + homepage: https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/ + documentation: https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/ + licence: ["UC-LBL license (see package)"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - index: + type: directory + description: Directory to place generated index + pattern: "*" + - primary_ref: + type: file + description: Path to the primary reference + pattern: "*" + - other_ref_names: + type: list + description: List of other reference ids apart from the primary + - other_ref_paths: + type: list + description: Path to other references paths corresponding to "other_ref_names" + - only_build_index: + type: string + description: true = only build index; false = mapping +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - index: + type: directory + description: Directory with index files + pattern: "bbsplit" + - primary_fastq: + type: file + description: Output reads that map to the primary reference + pattern: "*primary*fastq.gz" + - all_fastq: + type: file + description: All reads mapping to any of the references + pattern: "*fastq.gz" + - stats: + type: file + description: Tab-delimited text file containing mapping statistics + pattern: "*.txt" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml new file mode 100644 index 0000000..bff93ad --- /dev/null +++ b/modules/nf-core/cat/fastq/environment.yml @@ -0,0 +1,7 @@ +name: cat_fastq +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf new file mode 100644 index 0000000..3d96378 --- /dev/null +++ b/modules/nf-core/cat/fastq/main.nf @@ -0,0 +1,80 @@ +process CAT_FASTQ { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(reads, stageAs: "input*/*") + + output: + tuple val(meta), path("*.merged.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size >= 1) { + """ + cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size >= 2) { + def read1 = [] + def read2 = [] + readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } + """ + cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz + cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size > 1) { + """ + touch ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size > 2) { + """ + touch ${prefix}_1.merged.fastq.gz + touch ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + +} diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml new file mode 100644 index 0000000..db4ac3c --- /dev/null +++ b/modules/nf-core/cat/fastq/meta.yml @@ -0,0 +1,42 @@ +name: cat_fastq +description: Concatenates fastq files +keywords: + - cat + - fastq + - concatenate +tools: + - cat: + description: | + The cat utility reads files sequentially, writing them to the standard output. + documentation: https://www.gnu.org/software/coreutils/manual/html_node/cat-invocation.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files to be concatenated. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Merged fastq file + pattern: "*.{merged.fastq.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test new file mode 100644 index 0000000..f5f9418 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -0,0 +1,143 @@ +nextflow_process { + + name "Test Process CAT_FASTQ" + script "../main.nf" + process "CAT_FASTQ" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/fastq" + + test("test_cat_fastq_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_single_end_same_name") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_paired_end_same_name") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_single_end_single_file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } +} diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap new file mode 100644 index 0000000..ec2342e --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -0,0 +1,78 @@ +{ + "test_cat_fastq_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d" + ] + ] + ], + "timestamp": "2023-10-17T23:19:12.990284837" + }, + "test_cat_fastq_single_end_same_name": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66" + ] + ] + ], + "timestamp": "2023-10-17T23:19:31.554568147" + }, + "test_cat_fastq_single_end_single_file": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,e325ef7deb4023447a1f074e285761af" + ] + ] + ], + "timestamp": "2023-10-17T23:19:49.629360033" + }, + "test_cat_fastq_paired_end_same_name": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66", + "test_2.merged.fastq.gz:md5,fe9f266f43a6fc3dcab690a18419a56e" + ] + ] + ] + ], + "timestamp": "2023-10-17T23:19:40.711617539" + }, + "test_cat_fastq_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d", + "test_2.merged.fastq.gz:md5,77c8e966e130d8c6b6ec9be52fcb2bda" + ] + ] + ] + ], + "timestamp": "2023-10-18T07:53:20.923560211" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/fastq/tests/tags.yml b/modules/nf-core/cat/fastq/tests/tags.yml new file mode 100644 index 0000000..6ac4361 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/tags.yml @@ -0,0 +1,2 @@ +cat/fastq: + - modules/nf-core/cat/fastq/** diff --git a/modules/nf-core/custom/getchromsizes/environment.yml b/modules/nf-core/custom/getchromsizes/environment.yml new file mode 100644 index 0000000..2a01695 --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/environment.yml @@ -0,0 +1,7 @@ +name: custom_getchromsizes +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.16.1 diff --git a/modules/nf-core/custom/getchromsizes/main.nf b/modules/nf-core/custom/getchromsizes/main.nf new file mode 100644 index 0000000..e8084ea --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/main.nf @@ -0,0 +1,44 @@ +process CUSTOM_GETCHROMSIZES { + tag "$fasta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'biocontainers/samtools:1.16.1--h6899075_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path ("*.sizes"), emit: sizes + tuple val(meta), path ("*.fai") , emit: fai + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools faidx $fasta + cut -f 1,2 ${fasta}.fai > ${fasta}.sizes + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta}.fai + touch ${fasta}.sizes + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/custom/getchromsizes/meta.yml b/modules/nf-core/custom/getchromsizes/meta.yml new file mode 100644 index 0000000..529be07 --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/meta.yml @@ -0,0 +1,54 @@ +name: custom_getchromsizes +description: Generates a FASTA file of chromosome sizes and a fasta index file +keywords: + - fasta + - chromosome + - indexing +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + tool_dev_url: https://github.com/samtools/samtools + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta,fna,fas}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - sizes: + type: file + description: File containing chromosome lengths + pattern: "*.{sizes}" + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@tamara-hodgetts" + - "@chris-cheshire" + - "@muffato" +maintainers: + - "@tamara-hodgetts" + - "@chris-cheshire" + - "@muffato" diff --git a/modules/nf-core/custom/getchromsizes/tests/main.nf.test b/modules/nf-core/custom/getchromsizes/tests/main.nf.test new file mode 100644 index 0000000..844bd99 --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process CUSTOM_GETCHROMSIZES" + script "../main.nf" + process "CUSTOM_GETCHROMSIZES" + + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "custom/getchromsizes" + + test("test_custom_getchromsizes") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_custom_getchromsizes_bgzip") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap b/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap new file mode 100644 index 0000000..142298e --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap @@ -0,0 +1,124 @@ +{ + "test_custom_getchromsizes": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,45a83c5f3dddbc5dcab30035169f7ce8" + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "sizes": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "versions": [ + "versions.yml:md5,45a83c5f3dddbc5dcab30035169f7ce8" + ] + } + ], + "timestamp": "2023-10-17T10:20:42.652464" + }, + "test_custom_getchromsizes_bgzip": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "3": [ + "versions.yml:md5,45a83c5f3dddbc5dcab30035169f7ce8" + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "sizes": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "versions": [ + "versions.yml:md5,45a83c5f3dddbc5dcab30035169f7ce8" + ] + } + ], + "timestamp": "2023-10-17T10:22:25.185203" + } +} \ No newline at end of file diff --git a/modules/nf-core/custom/getchromsizes/tests/tags.yml b/modules/nf-core/custom/getchromsizes/tests/tags.yml new file mode 100644 index 0000000..d89a805 --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/tests/tags.yml @@ -0,0 +1,2 @@ +custom/getchromsizes: + - modules/nf-core/custom/getchromsizes/** diff --git a/modules/nf-core/cutadapt/environment.yml b/modules/nf-core/cutadapt/environment.yml new file mode 100644 index 0000000..d32a8f9 --- /dev/null +++ b/modules/nf-core/cutadapt/environment.yml @@ -0,0 +1,7 @@ +name: cutadapt +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::cutadapt=3.4 diff --git a/modules/nf-core/cutadapt/main.nf b/modules/nf-core/cutadapt/main.nf new file mode 100644 index 0000000..e232a70 --- /dev/null +++ b/modules/nf-core/cutadapt/main.nf @@ -0,0 +1,50 @@ +process CUTADAPT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cutadapt:3.4--py39h38f01e4_1' : + 'biocontainers/cutadapt:3.4--py39h38f01e4_1' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path('*.trim.fastq.gz'), emit: reads + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def trimmed = meta.single_end ? "-o ${prefix}.trim.fastq.gz" : "-o ${prefix}_1.trim.fastq.gz -p ${prefix}_2.trim.fastq.gz" + """ + cutadapt \\ + --cores $task.cpus \\ + $args \\ + $trimmed \\ + $reads \\ + > ${prefix}.cutadapt.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def trimmed = meta.single_end ? "${prefix}.trim.fastq.gz" : "${prefix}_1.trim.fastq.gz ${prefix}_2.trim.fastq.gz" + """ + touch ${prefix}.cutadapt.log + touch ${trimmed} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cutadapt/meta.yml b/modules/nf-core/cutadapt/meta.yml new file mode 100644 index 0000000..5ecfe27 --- /dev/null +++ b/modules/nf-core/cutadapt/meta.yml @@ -0,0 +1,49 @@ +name: cutadapt +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - adapter trimming + - adapters + - quality trimming +tools: + - cuatadapt: + description: | + Cutadapt finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence from your high-throughput sequencing reads. + documentation: https://cutadapt.readthedocs.io/en/stable/index.html + doi: 10.14806/ej.17.1.200 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: The trimmed/modified fastq reads + pattern: "*fastq.gz" + - log: + type: file + description: cuatadapt log file + pattern: "*cutadapt.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/cutadapt/tests/main.nf.test b/modules/nf-core/cutadapt/tests/main.nf.test new file mode 100644 index 0000000..819c8f9 --- /dev/null +++ b/modules/nf-core/cutadapt/tests/main.nf.test @@ -0,0 +1,69 @@ +nextflow_process { + + name "Test Process CUTADAPT" + script "../main.nf" + process "CUTADAPT" + tag "modules" + tag "modules_nfcore" + tag "cutadapt" + + test("sarscov2 Illumina single end [fastq]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.reads != null }, + { assert process.out.reads.get(0).get(1) ==~ ".*.trim.fastq.gz" }, + { assert snapshot(process.out.versions).match("versions") }, + { assert snapshot(path(process.out.reads.get(0).get(1)).linesGzip[0]).match() } + ) + } + } + + test("sarscov2 Illumina paired-end [fastq]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id: 'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.reads != null }, + { assert process.out.reads.get(0).get(1).get(0) ==~ ".*.1.trim.fastq.gz" }, + { assert process.out.reads.get(0).get(1).get(1) ==~ ".*.2.trim.fastq.gz" }, + { assert snapshot(path(process.out.reads.get(0).get(1).get(1)).linesGzip[0]).match() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} diff --git a/modules/nf-core/cutadapt/tests/main.nf.test.snap b/modules/nf-core/cutadapt/tests/main.nf.test.snap new file mode 100644 index 0000000..4bdee0b --- /dev/null +++ b/modules/nf-core/cutadapt/tests/main.nf.test.snap @@ -0,0 +1,22 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,d37c5b9e465accf6d836972608795071" + ] + ], + "timestamp": "2023-10-24T11:22:34.352529" + }, + "Single-Read": { + "content": [ + "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1" + ], + "timestamp": "2023-10-24T11:22:34.364556" + }, + "Paired-Reads": { + "content": [ + "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/2" + ], + "timestamp": "2023-10-24T11:22:41.877854" + } +} \ No newline at end of file diff --git a/modules/nf-core/cutadapt/tests/nextflow.config b/modules/nf-core/cutadapt/tests/nextflow.config new file mode 100644 index 0000000..6c3b425 --- /dev/null +++ b/modules/nf-core/cutadapt/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: CUTADAPT { + ext.args = '-q 25' + } + +} diff --git a/modules/nf-core/cutadapt/tests/tags.yml b/modules/nf-core/cutadapt/tests/tags.yml new file mode 100644 index 0000000..f64f997 --- /dev/null +++ b/modules/nf-core/cutadapt/tests/tags.yml @@ -0,0 +1,2 @@ +cutadapt: + - modules/nf-core/cutadapt/** diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml new file mode 100644 index 0000000..70389e6 --- /dev/null +++ b/modules/nf-core/fastp/environment.yml @@ -0,0 +1,7 @@ +name: fastp +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastp=0.23.4 diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf new file mode 100644 index 0000000..5fac3c1 --- /dev/null +++ b/modules/nf-core/fastp/main.nf @@ -0,0 +1,102 @@ +process FASTP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : + 'biocontainers/fastp:0.23.4--h5f740d0_0' }" + + input: + tuple val(meta), path(reads) + path adapter_fasta + val save_trimmed_fail + val save_merged + + output: + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) \\ + | gzip -c > ${prefix}.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --in1 ${prefix}.fastq.gz \\ + --out1 ${prefix}.fastp.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else { + def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz + fastp \\ + --in1 ${prefix}_1.fastq.gz \\ + --in2 ${prefix}_2.fastq.gz \\ + --out1 ${prefix}_1.fastp.fastq.gz \\ + --out2 ${prefix}_2.fastp.fastq.gz \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $merge_fastq \\ + --thread $task.cpus \\ + --detect_adapter_for_pe \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml new file mode 100644 index 0000000..c22a16a --- /dev/null +++ b/modules/nf-core/fastp/meta.yml @@ -0,0 +1,75 @@ +name: fastp +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - quality control + - fastq +tools: + - fastp: + description: | + A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. + documentation: https://github.com/OpenGene/fastp + doi: 10.1093/bioinformatics/bty560 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" + - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` + - save_merged: + type: boolean + description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + - json: + type: file + description: Results in JSON format + pattern: "*.json" + - html: + type: file + description: Results in HTML format + pattern: "*.html" + - log: + type: file + description: fastq log file + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads_fail: + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" + - reads_merged: + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test new file mode 100644 index 0000000..f610b73 --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -0,0 +1,485 @@ +nextflow_process { + + name "Test Process FASTP" + script "../main.nf" + process "FASTP" + tag "modules" + tag "modules_nfcore" + tag "fastp" + + test("test_fastp_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ + [ id:'test', single_end:true ], + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:12.922000 K (92.984097%)", + "single end (151 cycles)" ] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 99" ] + def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("test_fastp_single_end_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] + def log_text = [ "No adapter detected for read1", + "Q30 bases: 12281(88.3716%)"] + def json_text = ['"passed_filter_reads": 198'] + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("fastp test_fastp_interleaved") { + config './nextflow.config' + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "paired end (151 cycles + 151 cycles)"] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 198"] + def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("fastp test_fastp_interleaved_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_single_end_trim_fail") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = true + save_merged = false + + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:12.922000 K (92.984097%)", + "single end (151 cycles)"] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 99" ] + def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { failed_read_lines.each { failed_read_line -> + { assert path(process.out.reads_fail.get(0).get(1)).linesGzip.contains(failed_read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("test_fastp_single_end_trim_fail_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_trim_fail") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = true + save_merged = false + + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] + def log_text = [ "No adapter detected for read1", + "Q30 bases: 12281(88.3716%)"] + def json_text = ['"passed_filter_reads": 198'] + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { failed_read2_lines.each { failed_read2_line -> + { assert path(process.out.reads_fail.get(0).get(1).get(1)).linesGzip.contains(failed_read2_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_merged") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = true + + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "
    "] + def log_text = [ "Merged and filtered:", + "total reads: 75", + "total bases: 13683"] + def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683'] + def read1_lines = [ "@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", + "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", + "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { read_merged_lines.each { read_merged_line -> + { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/fastp/adapters.fasta", checkIfExists: true) + save_trimmed_fail = false + save_merged = true + + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "
    "] + def log_text = [ "Merged and filtered:", + "total reads: 75", + "total bases: 13683"] + def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683',"--adapter_fasta"] + def read1_lines = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", + "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", + "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { read_merged_lines.each { read_merged_line -> + { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap new file mode 100644 index 0000000..0fa68c7 --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -0,0 +1,52 @@ +{ + "fastp test_fastp_interleaved_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,168f516f7bd4b7b6c32da7cba87299a4" + ] + ] + ], + "timestamp": "2023-10-17T11:04:45.794175881" + }, + "test_fastp_single_end_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc" + ] + ] + ], + "timestamp": "2023-10-17T11:04:10.566343705" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "timestamp": "2023-10-17T11:04:10.582076024" + }, + "test_fastp_single_end_trim_fail_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5" + ] + ] + ], + "timestamp": "2023-10-17T11:05:00.379878948" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/nextflow.config b/modules/nf-core/fastp/tests/nextflow.config new file mode 100644 index 0000000..0f7849a --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + + withName: FASTP { + ext.args = "--interleaved_in" + } +} diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml new file mode 100644 index 0000000..c1afcce --- /dev/null +++ b/modules/nf-core/fastp/tests/tags.yml @@ -0,0 +1,2 @@ +fastp: + - modules/nf-core/fastp/** diff --git a/modules/nf-core/fq/subsample/environment.yml b/modules/nf-core/fq/subsample/environment.yml new file mode 100644 index 0000000..c588de3 --- /dev/null +++ b/modules/nf-core/fq/subsample/environment.yml @@ -0,0 +1,7 @@ +name: fq_subsample +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fq=0.9.1 diff --git a/modules/nf-core/fq/subsample/main.nf b/modules/nf-core/fq/subsample/main.nf new file mode 100644 index 0000000..f3d8cc7 --- /dev/null +++ b/modules/nf-core/fq/subsample/main.nf @@ -0,0 +1,55 @@ +process FQ_SUBSAMPLE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fq:0.9.1--h9ee0642_0': + 'biocontainers/fq:0.9.1--h9ee0642_0' }" + + input: + tuple val(meta), path(fastq) + + output: + tuple val(meta), path("*.fastq.gz"), emit: fastq + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + /* args requires: + --probability : Probability read is kept, between 0 and 1. Mutually exclusive with record-count. + --record-count : Number of records to keep. Mutually exclusive with probability + */ + def args = task.ext.args ?: '' + def prob_exists = args =~ /-p|--probability/ + def nrec_exists = args =~ /-n|--record-count/ + if ( !(prob_exists || nrec_exists) ){ + error "FQ/SUBSAMPLE requires --probability (-p) or --record-count (-n) specified in task.ext.args!" + } + def prefix = task.ext.prefix ?: "${meta.id}" + def n_fastq = fastq instanceof List ? fastq.size() : 1 + log.debug "FQ/SUBSAMPLE found ${n_fastq} FASTQ files" + if ( n_fastq == 1 ){ + fastq1_output = "--r1-dst ${prefix}.fastq.gz" + fastq2_output = "" + } else if ( n_fastq == 2 ){ + fastq1_output = "--r1-dst ${prefix}_R1.fastq.gz" + fastq2_output = "--r2-dst ${prefix}_R2.fastq.gz" + } else { + error "FQ/SUBSAMPLE only accepts 1 or 2 FASTQ files!" + } + """ + fq subsample \\ + $args \\ + $fastq \\ + $fastq1_output \\ + $fastq2_output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fq: \$(echo \$(fq subsample --version | sed 's/fq-subsample //g')) + END_VERSIONS + """ +} diff --git a/modules/nf-core/fq/subsample/meta.yml b/modules/nf-core/fq/subsample/meta.yml new file mode 100644 index 0000000..d4f1d1f --- /dev/null +++ b/modules/nf-core/fq/subsample/meta.yml @@ -0,0 +1,40 @@ +name: "fq_subsample" +description: fq subsample outputs a subset of records from single or paired FASTQ files. This requires a seed (--seed) to be set in ext.args. +keywords: + - fastq + - sample +tools: + - "fq": + description: "fq is a library to generate and validate FASTQ file pairs." + homepage: "https://github.com/stjude-rust-labs/fq" + documentation: "https://github.com/stjude-rust-labs/fq" + tool_dev_url: "https://github.com/stjude-rust-labs/fq" + licence: "['MIT']" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastq: + type: file + description: FASTQ file + pattern: "*.{fq,fastq}{,.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fastq: + type: file + description: Randomly sampled FASTQ files. + pattern: "*_R[12].fastq.gz" +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" diff --git a/modules/nf-core/fq/subsample/tests/main.nf.test b/modules/nf-core/fq/subsample/tests/main.nf.test new file mode 100644 index 0000000..d1b4c9f --- /dev/null +++ b/modules/nf-core/fq/subsample/tests/main.nf.test @@ -0,0 +1,139 @@ +nextflow_process { + + name "Test Process FQ_SUBSAMPLE" + script "../main.nf" + process "FQ_SUBSAMPLE" + + tag "modules" + tag "modules_nfcore" + tag "fq" + tag "fq/subsample" + + test("test_fq_subsample_no_args") { + config "./nextflow_no_args.config" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert !process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("test_fq_subsample_probability") { + config "./nextflow_probability.config" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("test_fq_subsample_record_count") { + config "./nextflow_record_count.config" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("test_fq_subsample_single") { + config "./nextflow.config" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("test_fq_subsample_no_input") { + config "./nextflow.config" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [] + ] + """ + } + } + + then { + assertAll ( + { assert !process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/fq/subsample/tests/main.nf.test.snap b/modules/nf-core/fq/subsample/tests/main.nf.test.snap new file mode 100644 index 0000000..2eea16b --- /dev/null +++ b/modules/nf-core/fq/subsample/tests/main.nf.test.snap @@ -0,0 +1,145 @@ +{ + "test_fq_subsample_probability": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastq.gz:md5,b5da750e06284b86b6d02229ac63949e", + "test_R2.fastq.gz:md5,b22a836a135a226b578a5f7813de3d6e" + ] + ] + ], + "1": [ + "versions.yml:md5,ffbf935bd0de512fbc9e83b187bf924f" + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastq.gz:md5,b5da750e06284b86b6d02229ac63949e", + "test_R2.fastq.gz:md5,b22a836a135a226b578a5f7813de3d6e" + ] + ] + ], + "versions": [ + "versions.yml:md5,ffbf935bd0de512fbc9e83b187bf924f" + ] + } + ], + "timestamp": "2023-10-17T11:17:16.981462" + }, + "test_fq_subsample_record_count": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastq.gz:md5,5d93c45561d7a77299fe6dfdaaaa02ce", + "test_R2.fastq.gz:md5,3a29e0716b14d81bf84be41af5c9eb78" + ] + ] + ], + "1": [ + "versions.yml:md5,ffbf935bd0de512fbc9e83b187bf924f" + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastq.gz:md5,5d93c45561d7a77299fe6dfdaaaa02ce", + "test_R2.fastq.gz:md5,3a29e0716b14d81bf84be41af5c9eb78" + ] + ] + ], + "versions": [ + "versions.yml:md5,ffbf935bd0de512fbc9e83b187bf924f" + ] + } + ], + "timestamp": "2023-10-17T11:17:25.17608" + }, + "test_fq_subsample_single": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastq.gz:md5,b5da750e06284b86b6d02229ac63949e" + ] + ], + "1": [ + "versions.yml:md5,ffbf935bd0de512fbc9e83b187bf924f" + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastq.gz:md5,b5da750e06284b86b6d02229ac63949e" + ] + ], + "versions": [ + "versions.yml:md5,ffbf935bd0de512fbc9e83b187bf924f" + ] + } + ], + "timestamp": "2023-10-17T11:17:32.536769" + }, + "test_fq_subsample_no_args": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "fastq": [ + + ], + "versions": [ + + ] + } + ], + "timestamp": "2023-10-17T11:17:09.761156" + }, + "test_fq_subsample_no_input": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "fastq": [ + + ], + "versions": [ + + ] + } + ], + "timestamp": "2023-10-17T11:17:37.555824" + } +} \ No newline at end of file diff --git a/modules/nf-core/fq/subsample/tests/nextflow.config b/modules/nf-core/fq/subsample/tests/nextflow.config new file mode 100644 index 0000000..41edd9f --- /dev/null +++ b/modules/nf-core/fq/subsample/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: FQ_SUBSAMPLE { + ext.args = '--probability 0.1 -s 123' + } + +} diff --git a/modules/nf-core/fq/subsample/tests/nextflow_no_args.config b/modules/nf-core/fq/subsample/tests/nextflow_no_args.config new file mode 100644 index 0000000..88990d2 --- /dev/null +++ b/modules/nf-core/fq/subsample/tests/nextflow_no_args.config @@ -0,0 +1,7 @@ +process { + + withName: FQ_SUBSAMPLE { + ext.args = '' + } + +} diff --git a/modules/nf-core/fq/subsample/tests/nextflow_probability.config b/modules/nf-core/fq/subsample/tests/nextflow_probability.config new file mode 100644 index 0000000..8cde0c7 --- /dev/null +++ b/modules/nf-core/fq/subsample/tests/nextflow_probability.config @@ -0,0 +1,7 @@ +process { + + withName: FQ_SUBSAMPLE { + ext.args = '-p 0.1 -s 123' + } + +} diff --git a/modules/nf-core/fq/subsample/tests/nextflow_record_count.config b/modules/nf-core/fq/subsample/tests/nextflow_record_count.config new file mode 100644 index 0000000..1ea624e --- /dev/null +++ b/modules/nf-core/fq/subsample/tests/nextflow_record_count.config @@ -0,0 +1,7 @@ +process { + + withName: FQ_SUBSAMPLE { + ext.args = '-n 10 -s 123' + } + +} diff --git a/modules/nf-core/fq/subsample/tests/tags.yml b/modules/nf-core/fq/subsample/tests/tags.yml new file mode 100644 index 0000000..5156431 --- /dev/null +++ b/modules/nf-core/fq/subsample/tests/tags.yml @@ -0,0 +1,2 @@ +fq/subsample: + - modules/nf-core/fq/subsample/** diff --git a/modules/nf-core/gffread/environment.yml b/modules/nf-core/gffread/environment.yml new file mode 100644 index 0000000..5398f71 --- /dev/null +++ b/modules/nf-core/gffread/environment.yml @@ -0,0 +1,7 @@ +name: gffread +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gffread=0.12.1 diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf new file mode 100644 index 0000000..d8a473e --- /dev/null +++ b/modules/nf-core/gffread/main.nf @@ -0,0 +1,35 @@ +process GFFREAD { + tag "$gff" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' : + 'biocontainers/gffread:0.12.1--h8b12597_0' }" + + input: + path gff + + output: + path "*.gtf" , emit: gtf , optional: true + path "*.gff3" , emit: gffread_gff , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${gff.baseName}" + def extension = args.contains("-T") ? 'gtf' : 'gffread.gff3' + """ + gffread \\ + $gff \\ + $args \\ + -o ${prefix}.${extension} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gffread: \$(gffread --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/gffread/meta.yml b/modules/nf-core/gffread/meta.yml new file mode 100644 index 0000000..27ac310 --- /dev/null +++ b/modules/nf-core/gffread/meta.yml @@ -0,0 +1,36 @@ +name: gffread +description: Validate, filter, convert and perform various other operations on GFF files +keywords: + - gff + - conversion + - validation +tools: + - gffread: + description: GFF/GTF utility providing format conversions, region filtering, FASTA sequence extraction and more. + homepage: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + documentation: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + tool_dev_url: https://github.com/gpertea/gffread + doi: 10.12688/f1000research.23297.1 + licence: ["MIT"] +input: + - gff: + type: file + description: A reference file in either the GFF3, GFF2 or GTF format. + pattern: "*.{gff, gtf}" +output: + - gtf: + type: file + description: GTF file resulting from the conversion of the GFF input file if '-T' argument is present + pattern: "*.{gtf}" + - gffread_gff: + type: file + description: GFF3 file resulting from the conversion of the GFF input file if '-T' argument is absent + pattern: "*.{gff3}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@emiller88" +maintainers: + - "@emiller88" diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test new file mode 100644 index 0000000..3c064b3 --- /dev/null +++ b/modules/nf-core/gffread/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_process { + + name "Test Process GFFREAD" + script "../main.nf" + process "GFFREAD" + + tag "gffread" + tag "modules_nfcore" + tag "modules" + + test("sarscov2-gff3-gtf") { + + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf != null }, + { assert process.out.gffread_gff == [] } + ) + } + + } + + test("sarscov2-gff3-gff3") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_gff != null }, + ) + } + + } + +} diff --git a/modules/nf-core/gffread/tests/main.nf.test.snap b/modules/nf-core/gffread/tests/main.nf.test.snap new file mode 100644 index 0000000..1f1342e --- /dev/null +++ b/modules/nf-core/gffread/tests/main.nf.test.snap @@ -0,0 +1,52 @@ +{ + "sarscov2-gff3-gtf": { + "content": [ + { + "0": [ + "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3" + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ], + "gffread_gff": [ + + ], + "gtf": [ + "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3" + ], + "versions": [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ] + } + ], + "timestamp": "2023-11-29T15:39:30.006985" + }, + "sarscov2-gff3-gff3": { + "content": [ + { + "0": [ + + ], + "1": [ + "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d" + ], + "2": [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ], + "gffread_gff": [ + "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d" + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ] + } + ], + "timestamp": "2023-11-29T15:39:34.636061" + } +} \ No newline at end of file diff --git a/modules/nf-core/gffread/tests/nextflow.config b/modules/nf-core/gffread/tests/nextflow.config new file mode 100644 index 0000000..74b2509 --- /dev/null +++ b/modules/nf-core/gffread/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '-T' + } +} diff --git a/modules/nf-core/gffread/tests/tags.yml b/modules/nf-core/gffread/tests/tags.yml new file mode 100644 index 0000000..0557606 --- /dev/null +++ b/modules/nf-core/gffread/tests/tags.yml @@ -0,0 +1,2 @@ +gffread: + - modules/nf-core/gffread/** diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 0000000..25910b3 --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,7 @@ +name: gunzip +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf new file mode 100644 index 0000000..468a6f2 --- /dev/null +++ b/modules/nf-core/gunzip/main.nf @@ -0,0 +1,48 @@ +process GUNZIP { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$gunzip"), emit: gunzip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + gunzip = archive.toString() - '.gz' + """ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ + $args \\ + $archive \\ + > $gunzip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + gunzip = archive.toString() - '.gz' + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml new file mode 100644 index 0000000..231034f --- /dev/null +++ b/modules/nf-core/gunzip/meta.yml @@ -0,0 +1,39 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression + - decompression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test new file mode 100644 index 0000000..d031792 --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [], + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap new file mode 100644 index 0000000..720fd9f --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "timestamp": "2023-10-17T15:35:37.690477896" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml new file mode 100644 index 0000000..fd3f691 --- /dev/null +++ b/modules/nf-core/gunzip/tests/tags.yml @@ -0,0 +1,2 @@ +gunzip: + - modules/nf-core/gunzip/** diff --git a/modules/nf-core/kallisto/index/environment.yml b/modules/nf-core/kallisto/index/environment.yml new file mode 100644 index 0000000..471b006 --- /dev/null +++ b/modules/nf-core/kallisto/index/environment.yml @@ -0,0 +1,7 @@ +name: kallisto_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::kallisto=0.48.0 diff --git a/modules/nf-core/kallisto/index/main.nf b/modules/nf-core/kallisto/index/main.nf new file mode 100644 index 0000000..28a47db --- /dev/null +++ b/modules/nf-core/kallisto/index/main.nf @@ -0,0 +1,44 @@ +process KALLISTO_INDEX { + tag "$fasta" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/kallisto:0.48.0--h15996b6_2': + 'biocontainers/kallisto:0.48.0--h15996b6_2' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("kallisto") , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + kallisto \\ + index \\ + $args \\ + -i kallisto \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kallisto: \$(echo \$(kallisto 2>&1) | sed 's/^kallisto //; s/Usage.*\$//') + END_VERSIONS + """ + + stub: + """ + touch kallisto + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kallisto: \$(echo \$(kallisto 2>&1) | sed 's/^kallisto //; s/Usage.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/kallisto/index/meta.yml b/modules/nf-core/kallisto/index/meta.yml new file mode 100644 index 0000000..d366aeb --- /dev/null +++ b/modules/nf-core/kallisto/index/meta.yml @@ -0,0 +1,41 @@ +name: kallisto_index +description: Create kallisto index +keywords: + - kallisto + - kallisto/index + - index +tools: + - kallisto: + description: Quantifying abundances of transcripts from bulk and single-cell RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads. + homepage: https://pachterlab.github.io/kallisto/ + documentation: https://pachterlab.github.io/kallisto/manual + tool_dev_url: https://github.com/pachterlab/kallisto + licence: ["BSD-2-Clause"] +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: genome fasta file + pattern: "*.{fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - index: + type: directory + description: Kallisto genome index + pattern: "*.idx" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@ggabernet" +maintainers: + - "@ggabernet" diff --git a/modules/nf-core/kallisto/index/tests/main.nf.test b/modules/nf-core/kallisto/index/tests/main.nf.test new file mode 100644 index 0000000..97933d6 --- /dev/null +++ b/modules/nf-core/kallisto/index/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process KALLISTO_INDEX" + script "../main.nf" + process "KALLISTO_INDEX" + tag "modules" + tag "modules_nfcore" + tag "kallisto" + tag "kallisto/index" + + test("homo_sapiens genome_fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test_fasta' ], // meta map + [ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/kallisto/index/tests/main.nf.test.snap b/modules/nf-core/kallisto/index/tests/main.nf.test.snap new file mode 100644 index 0000000..c0f45ac --- /dev/null +++ b/modules/nf-core/kallisto/index/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "homo_sapiens genome_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test_fasta" + }, + "kallisto:md5,2dab84e1456201beca5a43f4c514d67c" + ] + ], + "1": [ + "versions.yml:md5,178f9b57d4228edc356911d571b958a4" + ], + "index": [ + [ + { + "id": "test_fasta" + }, + "kallisto:md5,2dab84e1456201beca5a43f4c514d67c" + ] + ], + "versions": [ + "versions.yml:md5,178f9b57d4228edc356911d571b958a4" + ] + } + ], + "timestamp": "2023-11-02T09:58:48.83625986" + } +} \ No newline at end of file diff --git a/modules/nf-core/kallisto/index/tests/tags.yml b/modules/nf-core/kallisto/index/tests/tags.yml new file mode 100644 index 0000000..9f47b88 --- /dev/null +++ b/modules/nf-core/kallisto/index/tests/tags.yml @@ -0,0 +1,2 @@ +kallisto/index: + - modules/nf-core/kallisto/index/** diff --git a/modules/nf-core/rsem/preparereference/environment.yml b/modules/nf-core/rsem/preparereference/environment.yml new file mode 100644 index 0000000..c80e691 --- /dev/null +++ b/modules/nf-core/rsem/preparereference/environment.yml @@ -0,0 +1,8 @@ +name: rsem_preparereference +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::rsem=1.3.3 + - bioconda::star=2.7.10a diff --git a/modules/nf-core/rsem/preparereference/main.nf b/modules/nf-core/rsem/preparereference/main.nf new file mode 100644 index 0000000..44f76eb --- /dev/null +++ b/modules/nf-core/rsem/preparereference/main.nf @@ -0,0 +1,72 @@ +process RSEM_PREPAREREFERENCE { + tag "$fasta" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-cf0123ef83b3c38c13e3b0696a3f285d3f20f15b:64aad4a4e144878400649e71f42105311be7ed87-0' : + 'biocontainers/mulled-v2-cf0123ef83b3c38c13e3b0696a3f285d3f20f15b:64aad4a4e144878400649e71f42105311be7ed87-0' }" + + input: + path fasta, stageAs: "rsem/*" + path gtf + + output: + path "rsem" , emit: index + path "*transcripts.fa", emit: transcript_fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args_list = args.tokenize() + if (args_list.contains('--star')) { + args_list.removeIf { it.contains('--star') } + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + """ + STAR \\ + --runMode genomeGenerate \\ + --genomeDir rsem/ \\ + --genomeFastaFiles $fasta \\ + --sjdbGTFfile $gtf \\ + --runThreadN $task.cpus \\ + $memory \\ + $args2 + + rsem-prepare-reference \\ + --gtf $gtf \\ + --num-threads $task.cpus \\ + ${args_list.join(' ')} \\ + $fasta \\ + rsem/genome + + cp rsem/genome.transcripts.fa . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rsem: \$(rsem-calculate-expression --version | sed -e "s/Current version: RSEM v//g") + star: \$(STAR --version | sed -e "s/STAR_//g") + END_VERSIONS + """ + } else { + """ + rsem-prepare-reference \\ + --gtf $gtf \\ + --num-threads $task.cpus \\ + $args \\ + $fasta \\ + rsem/genome + + cp rsem/genome.transcripts.fa . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rsem: \$(rsem-calculate-expression --version | sed -e "s/Current version: RSEM v//g") + star: \$(STAR --version | sed -e "s/STAR_//g") + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/rsem/preparereference/meta.yml b/modules/nf-core/rsem/preparereference/meta.yml new file mode 100644 index 0000000..05aa44f --- /dev/null +++ b/modules/nf-core/rsem/preparereference/meta.yml @@ -0,0 +1,42 @@ +name: rsem_preparereference +description: Prepare a reference genome for RSEM +keywords: + - rsem + - genome + - index +tools: + - rseqc: + description: | + RSEM: accurate transcript quantification from RNA-Seq data with or without a reference genome + homepage: https://github.com/deweylab/RSEM + documentation: https://github.com/deweylab/RSEM + doi: 10.1186/1471-2105-12-323 + licence: ["GPL-3.0-or-later"] +input: + - fasta: + type: file + description: The Fasta file of the reference genome + pattern: "*.{fasta,fa}" + - gtf: + type: file + description: The GTF file of the reference genome + pattern: "*.gtf" +output: + - rsem: + type: directory + description: RSEM index directory + pattern: "rsem" + - transcript_fasta: + type: file + description: Fasta file of transcripts + pattern: "rsem/*transcripts.fa" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/rsem/preparereference/tests/main.nf.test b/modules/nf-core/rsem/preparereference/tests/main.nf.test new file mode 100644 index 0000000..a1d948d --- /dev/null +++ b/modules/nf-core/rsem/preparereference/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process RSEM_PREPAREREFERENCE" + script "../main.nf" + process "RSEM_PREPAREREFERENCE" + tag "modules" + tag "modules_nfcore" + tag "rsem" + tag "rsem/preparereference" + + test("homo_sapiens") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]) + input[1] = Channel.of([file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.index).match("index")}, + { assert snapshot(process.out.transcript_fasta).match("transcript_fasta")}, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} diff --git a/modules/nf-core/rsem/preparereference/tests/main.nf.test.snap b/modules/nf-core/rsem/preparereference/tests/main.nf.test.snap new file mode 100644 index 0000000..0251065 --- /dev/null +++ b/modules/nf-core/rsem/preparereference/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,517611c42f6354d3609db1b35fffa397" + ] + ], + "timestamp": "2023-11-22T13:16:49.170587" + }, + "index": { + "content": [ + [ + [ + "genome.chrlist:md5,b190587cae0531f3cf25552d8aa674db", + "genome.fasta:md5,f315020d899597c1b57e5fe9f60f4c3e", + "genome.grp:md5,c2848a8b6d495956c11ec53efc1de67e", + "genome.idx.fa:md5,050c521a2719c2ae48267c1e65218f29", + "genome.n2g.idx.fa:md5,050c521a2719c2ae48267c1e65218f29", + "genome.seq:md5,94da0c6b88c33e63c9a052a11f4f57c1", + "genome.ti:md5,c9e4ae8d4d13a504eec2acf1b8589a66", + "genome.transcripts.fa:md5,050c521a2719c2ae48267c1e65218f29" + ] + ] + ], + "timestamp": "2023-11-22T13:16:49.140398" + }, + "transcript_fasta": { + "content": [ + [ + "genome.transcripts.fa:md5,050c521a2719c2ae48267c1e65218f29" + ] + ], + "timestamp": "2023-11-22T13:16:49.159946" + } +} \ No newline at end of file diff --git a/modules/nf-core/rsem/preparereference/tests/tags.yml b/modules/nf-core/rsem/preparereference/tests/tags.yml new file mode 100644 index 0000000..1129045 --- /dev/null +++ b/modules/nf-core/rsem/preparereference/tests/tags.yml @@ -0,0 +1,2 @@ +rsem/preparereference: + - modules/nf-core/rsem/preparereference/** diff --git a/modules/nf-core/salmon/index/environment.yml b/modules/nf-core/salmon/index/environment.yml new file mode 100644 index 0000000..a6607d9 --- /dev/null +++ b/modules/nf-core/salmon/index/environment.yml @@ -0,0 +1,7 @@ +name: salmon_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::salmon=1.10.1 diff --git a/modules/nf-core/salmon/index/main.nf b/modules/nf-core/salmon/index/main.nf new file mode 100644 index 0000000..88d9cf1 --- /dev/null +++ b/modules/nf-core/salmon/index/main.nf @@ -0,0 +1,47 @@ +process SALMON_INDEX { + tag "$transcript_fasta" + label "process_medium" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/salmon:1.10.1--h7e5ed60_0' : + 'biocontainers/salmon:1.10.1--h7e5ed60_0' }" + + input: + path genome_fasta + path transcript_fasta + + output: + path "salmon" , emit: index + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def get_decoy_ids = "grep '^>' $genome_fasta | cut -d ' ' -f 1 | cut -d \$'\\t' -f 1 > decoys.txt" + def gentrome = "gentrome.fa" + if (genome_fasta.endsWith('.gz')) { + get_decoy_ids = "grep '^>' <(gunzip -c $genome_fasta) | cut -d ' ' -f 1 | cut -d \$'\\t' -f 1 > decoys.txt" + gentrome = "gentrome.fa.gz" + } + """ + $get_decoy_ids + sed -i.bak -e 's/>//g' decoys.txt + cat $transcript_fasta $genome_fasta > $gentrome + + salmon \\ + index \\ + --threads $task.cpus \\ + -t $gentrome \\ + -d decoys.txt \\ + $args \\ + -i salmon + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/salmon/index/meta.yml b/modules/nf-core/salmon/index/meta.yml new file mode 100644 index 0000000..fd94dd2 --- /dev/null +++ b/modules/nf-core/salmon/index/meta.yml @@ -0,0 +1,37 @@ +name: salmon_index +description: Create index for salmon +keywords: + - index + - fasta + - genome + - reference +tools: + - salmon: + description: | + Salmon is a tool for wicked-fast transcript quantification from RNA-seq data + homepage: https://salmon.readthedocs.io/en/latest/salmon.html + manual: https://salmon.readthedocs.io/en/latest/salmon.html + doi: 10.1038/nmeth.4197 + licence: ["GPL-3.0-or-later"] +input: + - genome_fasta: + type: file + description: Fasta file of the reference genome + - transcriptome_fasta: + type: file + description: Fasta file of the reference transcriptome +output: + - index: + type: directory + description: Folder containing the star index files + pattern: "salmon" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/salmon/index/tests/main.nf.test b/modules/nf-core/salmon/index/tests/main.nf.test new file mode 100644 index 0000000..24f95c0 --- /dev/null +++ b/modules/nf-core/salmon/index/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process SALMON_INDEX" + script "../main.nf" + process "SALMON_INDEX" + tag "modules" + tag "modules_nfcore" + tag "salmon" + tag "salmon/index" + + test("sarscov2") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)]) + input[1] = Channel.of([file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.index.get(0)).exists() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/salmon/index/tests/main.nf.test.snap b/modules/nf-core/salmon/index/tests/main.nf.test.snap new file mode 100644 index 0000000..1e3e6b6 --- /dev/null +++ b/modules/nf-core/salmon/index/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,563eeafb4577be0b13801d7021c0bf42" + ] + ], + "timestamp": "2023-11-22T14:26:33.32036" + } +} \ No newline at end of file diff --git a/modules/nf-core/salmon/index/tests/tags.yml b/modules/nf-core/salmon/index/tests/tags.yml new file mode 100644 index 0000000..0299789 --- /dev/null +++ b/modules/nf-core/salmon/index/tests/tags.yml @@ -0,0 +1,2 @@ +salmon/index: + - modules/nf-core/salmon/index/** diff --git a/modules/nf-core/salmon/quant/environment.yml b/modules/nf-core/salmon/quant/environment.yml new file mode 100644 index 0000000..8626672 --- /dev/null +++ b/modules/nf-core/salmon/quant/environment.yml @@ -0,0 +1,7 @@ +name: salmon_quant +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::salmon=1.10.1 diff --git a/modules/nf-core/salmon/quant/main.nf b/modules/nf-core/salmon/quant/main.nf new file mode 100644 index 0000000..e115d39 --- /dev/null +++ b/modules/nf-core/salmon/quant/main.nf @@ -0,0 +1,79 @@ +process SALMON_QUANT { + tag "$meta.id" + label "process_medium" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/salmon:1.10.1--h7e5ed60_0' : + 'biocontainers/salmon:1.10.1--h7e5ed60_0' }" + + input: + tuple val(meta), path(reads) + path index + path gtf + path transcript_fasta + val alignment_mode + val lib_type + + output: + tuple val(meta), path("${prefix}") , emit: results + tuple val(meta), path("*info.json"), emit: json_info, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def reference = "--index $index" + def reads1 = [], reads2 = [] + meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v } + def input_reads = meta.single_end ? "-r ${reads1.join(" ")}" : "-1 ${reads1.join(" ")} -2 ${reads2.join(" ")}" + if (alignment_mode) { + reference = "-t $transcript_fasta" + input_reads = "-a $reads" + } + + def strandedness_opts = [ + 'A', 'U', 'SF', 'SR', + 'IS', 'IU' , 'ISF', 'ISR', + 'OS', 'OU' , 'OSF', 'OSR', + 'MS', 'MU' , 'MSF', 'MSR' + ] + def strandedness = 'A' + if (lib_type) { + if (strandedness_opts.contains(lib_type)) { + strandedness = lib_type + } else { + log.info "[Salmon Quant] Invalid library type specified '--libType=${lib_type}', defaulting to auto-detection with '--libType=A'." + } + } else { + strandedness = meta.single_end ? 'U' : 'IU' + if (meta.strandedness == 'forward') { + strandedness = meta.single_end ? 'SF' : 'ISF' + } else if (meta.strandedness == 'reverse') { + strandedness = meta.single_end ? 'SR' : 'ISR' + } + } + """ + salmon quant \\ + --geneMap $gtf \\ + --threads $task.cpus \\ + --libType=$strandedness \\ + $reference \\ + $input_reads \\ + $args \\ + -o $prefix + + if [ -f $prefix/aux_info/meta_info.json ]; then + cp $prefix/aux_info/meta_info.json "${prefix}_meta_info.json" + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/salmon/quant/meta.yml b/modules/nf-core/salmon/quant/meta.yml new file mode 100644 index 0000000..49d7137 --- /dev/null +++ b/modules/nf-core/salmon/quant/meta.yml @@ -0,0 +1,62 @@ +name: salmon_quant +description: gene/transcript quantification with Salmon +keywords: + - index + - fasta + - genome + - reference +tools: + - salmon: + description: | + Salmon is a tool for wicked-fast transcript quantification from RNA-seq data + homepage: https://salmon.readthedocs.io/en/latest/salmon.html + manual: https://salmon.readthedocs.io/en/latest/salmon.html + doi: 10.1038/nmeth.4197 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files for single-end or paired-end data. + Multiple single-end fastqs or pairs of paired-end fastqs are + handled. + - index: + type: directory + description: Folder containing the star index files + - gtf: + type: file + description: GTF of the reference transcriptome + - transcriptome_fasta: + type: file + description: Fasta file of the reference transcriptome + - alignment_mode: + type: boolean + description: whether to run salmon in alignment mode + - lib_type: + type: string + description: | + Override library type inferred based on strandedness defined in meta object +output: + - results: + type: directory + description: Folder containing the quantification results for a specific sample + pattern: "${prefix}" + - json_info: + type: file + description: File containing meta information from Salmon quant + pattern: "*info.json" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/salmon/quant/tests/main.nf.test b/modules/nf-core/salmon/quant/tests/main.nf.test new file mode 100644 index 0000000..7b28db3 --- /dev/null +++ b/modules/nf-core/salmon/quant/tests/main.nf.test @@ -0,0 +1,192 @@ +nextflow_process { + + name "Test Process SALMON_QUANT" + script "../main.nf" + process "SALMON_QUANT" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "salmon" + tag "salmon/quant" + + test("sarscov2 - single_end") { + + setup { + run("SALMON_INDEX") { + script "../../../salmon/index/main.nf" + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)]) + input[1] = Channel.of([file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)]) + """ + } + } + } + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true)]) + input[3] = Channel.of([file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)]) + input[4] = false + input[5] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("sarscov2 - single_end lib type A") { + + setup { + run("SALMON_INDEX") { + script "../../../salmon/index/main.nf" + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)]) + input[1] = Channel.of([file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)]) + """ + } + } + } + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true)]) + input[3] = Channel.of([file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)]) + input[4] = false + input[5] = 'A' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("sarscov2 - pair_end") { + + setup { + run("SALMON_INDEX") { + script "../../../salmon/index/main.nf" + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)]) + input[1] = Channel.of([file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)]) + """ + } + } + } + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true)]) + input[3] = Channel.of([file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)]) + input[4] = false + input[5] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + test("sarscov2 - pair_end multiple") { + + setup { + run("SALMON_INDEX") { + script "../../../salmon/index/main.nf" + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)]) + input[1] = Channel.of([file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)]) + """ + } + } + } + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) + ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true)]) + input[3] = Channel.of([file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)]) + input[4] = false + input[5] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } +} \ No newline at end of file diff --git a/modules/nf-core/salmon/quant/tests/main.nf.test.snap b/modules/nf-core/salmon/quant/tests/main.nf.test.snap new file mode 100644 index 0000000..386a7a3 --- /dev/null +++ b/modules/nf-core/salmon/quant/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,80eb3d2ad36960c7e9263f81ede9d263" + ] + ], + "timestamp": "2023-11-22T14:47:10.132112" + } +} \ No newline at end of file diff --git a/modules/nf-core/salmon/quant/tests/nextflow.config b/modules/nf-core/salmon/quant/tests/nextflow.config new file mode 100644 index 0000000..37c0821 --- /dev/null +++ b/modules/nf-core/salmon/quant/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SALMON_QUANT { + ext.args = '--minAssignedFrags 1' + } + +} diff --git a/modules/nf-core/salmon/quant/tests/tags.yml b/modules/nf-core/salmon/quant/tests/tags.yml new file mode 100644 index 0000000..048d816 --- /dev/null +++ b/modules/nf-core/salmon/quant/tests/tags.yml @@ -0,0 +1,2 @@ +salmon/quant: + - modules/nf-core/salmon/quant/** diff --git a/modules/nf-core/sortmerna/environment.yml b/modules/nf-core/sortmerna/environment.yml new file mode 100644 index 0000000..3dae00a --- /dev/null +++ b/modules/nf-core/sortmerna/environment.yml @@ -0,0 +1,7 @@ +name: sortmerna +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::sortmerna=4.3.4 diff --git a/modules/nf-core/sortmerna/main.nf b/modules/nf-core/sortmerna/main.nf new file mode 100644 index 0000000..53ccb97 --- /dev/null +++ b/modules/nf-core/sortmerna/main.nf @@ -0,0 +1,70 @@ +process SORTMERNA { + tag "$meta.id" + label "process_high" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/sortmerna:4.3.4--h9ee0642_0' : + 'biocontainers/sortmerna:4.3.4--h9ee0642_0' }" + + input: + tuple val(meta), path(reads) + path fastas + + output: + tuple val(meta), path("*non_rRNA.fastq.gz"), emit: reads + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + """ + sortmerna \\ + ${'--ref '+fastas.join(' --ref ')} \\ + --reads $reads \\ + --threads $task.cpus \\ + --workdir . \\ + --aligned rRNA_reads \\ + --fastx \\ + --other non_rRNA_reads \\ + $args + + mv non_rRNA_reads.f*q.gz ${prefix}.non_rRNA.fastq.gz + mv rRNA_reads.log ${prefix}.sortmerna.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//') + END_VERSIONS + """ + } else { + """ + sortmerna \\ + ${'--ref '+fastas.join(' --ref ')} \\ + --reads ${reads[0]} \\ + --reads ${reads[1]} \\ + --threads $task.cpus \\ + --workdir . \\ + --aligned rRNA_reads \\ + --fastx \\ + --other non_rRNA_reads \\ + --paired_in \\ + --out2 \\ + $args + + mv non_rRNA_reads_fwd.f*q.gz ${prefix}_1.non_rRNA.fastq.gz + mv non_rRNA_reads_rev.f*q.gz ${prefix}_2.non_rRNA.fastq.gz + mv rRNA_reads.log ${prefix}.sortmerna.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//') + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/sortmerna/meta.yml b/modules/nf-core/sortmerna/meta.yml new file mode 100644 index 0000000..de0b18e --- /dev/null +++ b/modules/nf-core/sortmerna/meta.yml @@ -0,0 +1,53 @@ +name: sortmerna +description: Local sequence alignment tool for filtering, mapping and clustering. +keywords: + - filtering + - mapping + - clustering + - rRNA + - ribosomal RNA +tools: + - SortMeRNA: + description: The core algorithm is based on approximate seeds and allows for sensitive analysis of NGS reads. The main application of SortMeRNA is filtering rRNA from metatranscriptomic data. SortMeRNA takes as input files of reads (fasta, fastq, fasta.gz, fastq.gz) and one or multiple rRNA database file(s), and sorts apart aligned and rejected reads into two files. Additional applications include clustering and taxonomy assignation available through QIIME v1.9.1. SortMeRNA works with Illumina, Ion Torrent and PacBio data, and can produce SAM and BLAST-like alignments. + homepage: https://hpc.nih.gov/apps/sortmeRNA.html + documentation: https://github.com/biocore/sortmerna/wiki/ + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - fastas: + type: file + description: | + Path to reference file(s) +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: The filtered fastq reads + pattern: "*fastq.gz" + - log: + type: file + description: SortMeRNA log file + pattern: "*sortmerna.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@mashehu" +maintainers: + - "@drpatelh" + - "@mashehu" diff --git a/modules/nf-core/sortmerna/tests/main.nf.test b/modules/nf-core/sortmerna/tests/main.nf.test new file mode 100644 index 0000000..3ec2692 --- /dev/null +++ b/modules/nf-core/sortmerna/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process SORTMERNA" + script "../main.nf" + process "SORTMERNA" + tag "modules" + tag "modules_nfcore" + tag "sortmerna" + + test("sarscov2 single_end") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match("se_reads") }, + { assert process.out.log }, + { assert snapshot(process.out.versions).match("se_versions") } + ) + } + + } + + test("sarscov2 paired_end") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match("pe_reads") }, + { assert process.out.log }, + { assert snapshot(process.out.versions).match("pe_versions") } + ) + } + + } + +} diff --git a/modules/nf-core/sortmerna/tests/main.nf.test.snap b/modules/nf-core/sortmerna/tests/main.nf.test.snap new file mode 100644 index 0000000..f1bedb7 --- /dev/null +++ b/modules/nf-core/sortmerna/tests/main.nf.test.snap @@ -0,0 +1,49 @@ +{ + "se_versions": { + "content": [ + [ + "versions.yml:md5,96553a18cad5237fbf76d5a6c966360e" + ] + ], + "timestamp": "2023-11-22T14:25:07.95908694" + }, + "pe_reads": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.non_rRNA.fastq.gz:md5,e62ff0123a74adfc6903d59a449cbdb0", + "test_2.non_rRNA.fastq.gz:md5,e62ff0123a74adfc6903d59a449cbdb0" + ] + ] + ] + ], + "timestamp": "2023-11-22T14:25:19.098771475" + }, + "se_reads": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.non_rRNA.fastq.gz:md5,e62ff0123a74adfc6903d59a449cbdb0" + ] + ] + ], + "timestamp": "2023-11-22T14:25:07.949212892" + }, + "pe_versions": { + "content": [ + [ + "versions.yml:md5,96553a18cad5237fbf76d5a6c966360e" + ] + ], + "timestamp": "2023-11-22T14:25:19.105098985" + } +} \ No newline at end of file diff --git a/modules/nf-core/sortmerna/tests/tags.yml b/modules/nf-core/sortmerna/tests/tags.yml new file mode 100644 index 0000000..e088480 --- /dev/null +++ b/modules/nf-core/sortmerna/tests/tags.yml @@ -0,0 +1,2 @@ +sortmerna: + - modules/nf-core/sortmerna/** diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml new file mode 100644 index 0000000..350a459 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/environment.yml @@ -0,0 +1,9 @@ +name: star_genomegenerate +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::star=2.7.10a + - bioconda::samtools=1.18 + - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf new file mode 100644 index 0000000..2bc3e29 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -0,0 +1,96 @@ +process STAR_GENOMEGENERATE { + tag "$fasta" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(gtf) + + output: + tuple val(meta), path("star") , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args_list = args.tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + if (args_list.contains('--genomeSAindexNbases')) { + """ + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + --sjdbGTFfile $gtf \\ + --runThreadN $task.cpus \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + samtools faidx $fasta + NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fasta}.fai` + + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + --sjdbGTFfile $gtf \\ + --runThreadN $task.cpus \\ + --genomeSAindexNbases \$NUM_BASES \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } + + stub: + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/exonGeTrInfo.tab + touch star/exonInfo.tab + touch star/geneInfo.tab + touch star/genomeParameters.txt + touch star/sjdbInfo.txt + touch star/sjdbList.fromGTF.out.tab + touch star/sjdbList.out.tab + touch star/transcriptInfo.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml new file mode 100644 index 0000000..1061e1b --- /dev/null +++ b/modules/nf-core/star/genomegenerate/meta.yml @@ -0,0 +1,53 @@ +name: star_genomegenerate +description: Create index for STAR +keywords: + - index + - fasta + - genome + - reference +tools: + - star: + description: | + STAR is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/alexdobin/STAR + manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf + doi: 10.1093/bioinformatics/bts635 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Fasta file of the reference genome + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: GTF file of the reference genome +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - index: + type: directory + description: Folder containing the star index files + pattern: "star" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test new file mode 100644 index 0000000..eed8292 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process STAR_GENOMEGENERATE" + script "../main.nf" + process "STAR_GENOMEGENERATE" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/genomegenerate" + + test("homo_sapiens") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).name).match("index") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap new file mode 100644 index 0000000..e7bb6ee --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap @@ -0,0 +1,16 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "timestamp": "2023-12-04T18:01:27.298248806" + }, + "index": { + "content": [ + "star" + ], + "timestamp": "2023-11-23T11:31:47.560528" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/tests/tags.yml b/modules/nf-core/star/genomegenerate/tests/tags.yml new file mode 100644 index 0000000..79f619b --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/tags.yml @@ -0,0 +1,2 @@ +star/genomegenerate: + - modules/nf-core/star/genomegenerate/** diff --git a/modules/nf-core/trimgalore/environment.yml b/modules/nf-core/trimgalore/environment.yml new file mode 100644 index 0000000..6cd0f51 --- /dev/null +++ b/modules/nf-core/trimgalore/environment.yml @@ -0,0 +1,7 @@ +name: trimgalore +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::trim-galore=0.6.7 diff --git a/modules/nf-core/trimgalore/main.nf b/modules/nf-core/trimgalore/main.nf new file mode 100644 index 0000000..24ead87 --- /dev/null +++ b/modules/nf-core/trimgalore/main.nf @@ -0,0 +1,75 @@ +process TRIMGALORE { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/trim-galore:0.6.7--hdfd78af_0' : + 'biocontainers/trim-galore:0.6.7--hdfd78af_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*{3prime,5prime,trimmed,val}*.fq.gz"), emit: reads + tuple val(meta), path("*report.txt") , emit: log , optional: true + tuple val(meta), path("*unpaired*.fq.gz") , emit: unpaired, optional: true + tuple val(meta), path("*.html") , emit: html , optional: true + tuple val(meta), path("*.zip") , emit: zip , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + // Calculate number of --cores for TrimGalore based on value of task.cpus + // See: https://github.com/FelixKrueger/TrimGalore/blob/master/Changelog.md#version-060-release-on-1-mar-2019 + // See: https://github.com/nf-core/atacseq/pull/65 + def cores = 1 + if (task.cpus) { + cores = (task.cpus as int) - 4 + if (meta.single_end) cores = (task.cpus as int) - 3 + if (cores < 1) cores = 1 + if (cores > 8) cores = 8 + } + + // Added soft-links to original fastqs for consistent naming in MultiQC + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + def args_list = args.split("\\s(?=--)").toList() + args_list.removeAll { it.toLowerCase().contains('_r2 ') } + """ + [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz + trim_galore \\ + ${args_list.join(' ')} \\ + --cores $cores \\ + --gzip \\ + ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ + } else { + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + trim_galore \\ + $args \\ + --cores $cores \\ + --paired \\ + --gzip \\ + ${prefix}_1.fastq.gz \\ + ${prefix}_2.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/trimgalore/meta.yml b/modules/nf-core/trimgalore/meta.yml new file mode 100644 index 0000000..e649088 --- /dev/null +++ b/modules/nf-core/trimgalore/meta.yml @@ -0,0 +1,68 @@ +name: trimgalore +description: Trim FastQ files using Trim Galore! +keywords: + - trimming + - adapters + - sequencing adapters + - fastq +tools: + - trimgalore: + description: | + A wrapper tool around Cutadapt and FastQC to consistently apply quality + and adapter trimming to FastQ files, with some extra functionality for + MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/ + documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input adapter trimmed FastQ files of size 1 and 2 for + single-end and paired-end data, respectively. + pattern: "*{3prime,5prime,trimmed,val}*.fq.gz" + - unpaired: + type: file + description: | + FastQ files containing unpaired reads from read 1 or read 2 + pattern: "*unpaired*.fq.gz" + - html: + type: file + description: FastQC report (optional) + pattern: "*_{fastqc.html}" + - zip: + type: file + description: FastQC report archive (optional) + pattern: "*_{fastqc.zip}" + - log: + type: file + description: Trim Galore! trimming report + pattern: "*_{report.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/trimgalore/tests/main.nf.test b/modules/nf-core/trimgalore/tests/main.nf.test new file mode 100644 index 0000000..bc6812c --- /dev/null +++ b/modules/nf-core/trimgalore/tests/main.nf.test @@ -0,0 +1,105 @@ +nextflow_process { + + name "Test Process TRIMGALORE" + script "../main.nf" + process "TRIMGALORE" + tag "modules" + tag "modules_nfcore" + tag "trimgalore" + + test("test_trimgalore_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { report1_lines.each { report1_line -> + { assert path(process.out.log.get(0).get(1)).getText().contains(report1_line) } + } + } + ) + } + } + + test("test_trimgalore_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { report1_lines.each { report1_line -> + { assert path(process.out.log.get(0).get(1).get(0)).getText().contains(report1_line) } + } + }, + { report2_lines.each { report2_line -> + { assert path(process.out.log.get(0).get(1).get(1)).getText().contains(report2_line) } + } + } + ) + } + } +} diff --git a/modules/nf-core/trimgalore/tests/main.nf.test.snap b/modules/nf-core/trimgalore/tests/main.nf.test.snap new file mode 100644 index 0000000..84feacc --- /dev/null +++ b/modules/nf-core/trimgalore/tests/main.nf.test.snap @@ -0,0 +1,148 @@ +{ + "test_trimgalore_single_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_trimmed.fq.gz:md5,e0a7516b8ea8d6467d6306acb2cd13c4" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastq.gz_trimming_report.txt:md5,a1ab3958205f1ddf48af623242b5b429" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + "versions.yml:md5,47d966cbb31c80eb8f7fe860d55659b7" + ], + "html": [ + + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastq.gz_trimming_report.txt:md5,a1ab3958205f1ddf48af623242b5b429" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_trimmed.fq.gz:md5,e0a7516b8ea8d6467d6306acb2cd13c4" + ] + ], + "unpaired": [ + + ], + "versions": [ + "versions.yml:md5,47d966cbb31c80eb8f7fe860d55659b7" + ], + "zip": [ + + ] + } + ], + "timestamp": "2023-10-17T15:24:57.782141441" + }, + "test_trimgalore_paired_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1_val_1.fq.gz:md5,e0a7516b8ea8d6467d6306acb2cd13c4", + "test_2_val_2.fq.gz:md5,f3d61189e6d10202da7b8686f1dbb71b" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastq.gz_trimming_report.txt:md5,315d40465412f9909bbaabf52269274d", + "test_2.fastq.gz_trimming_report.txt:md5,34436303da1c78811103427a2fb57f7b" + ] + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + "versions.yml:md5,47d966cbb31c80eb8f7fe860d55659b7" + ], + "html": [ + + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastq.gz_trimming_report.txt:md5,315d40465412f9909bbaabf52269274d", + "test_2.fastq.gz_trimming_report.txt:md5,34436303da1c78811103427a2fb57f7b" + ] + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1_val_1.fq.gz:md5,e0a7516b8ea8d6467d6306acb2cd13c4", + "test_2_val_2.fq.gz:md5,f3d61189e6d10202da7b8686f1dbb71b" + ] + ] + ], + "unpaired": [ + + ], + "versions": [ + "versions.yml:md5,47d966cbb31c80eb8f7fe860d55659b7" + ], + "zip": [ + + ] + } + ], + "timestamp": "2023-10-17T15:25:08.513589909" + } +} \ No newline at end of file diff --git a/modules/nf-core/trimgalore/tests/tags.yml b/modules/nf-core/trimgalore/tests/tags.yml new file mode 100644 index 0000000..e993769 --- /dev/null +++ b/modules/nf-core/trimgalore/tests/tags.yml @@ -0,0 +1,2 @@ +trimgalore: + - modules/nf-core/trimgalore/** diff --git a/modules/nf-core/umitools/extract/environment.yml b/modules/nf-core/umitools/extract/environment.yml new file mode 100644 index 0000000..7d08ac0 --- /dev/null +++ b/modules/nf-core/umitools/extract/environment.yml @@ -0,0 +1,7 @@ +name: umitools_extract +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::umi_tools=1.1.4 diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf new file mode 100644 index 0000000..4bd79e7 --- /dev/null +++ b/modules/nf-core/umitools/extract/main.nf @@ -0,0 +1,56 @@ +process UMITOOLS_EXTRACT { + tag "$meta.id" + label "process_single" + label "process_long" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' : + 'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.fastq.gz"), emit: reads + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + """ + umi_tools \\ + extract \\ + -I $reads \\ + -S ${prefix}.umi_extract.fastq.gz \\ + $args \\ + > ${prefix}.umi_extract.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) + END_VERSIONS + """ + } else { + """ + umi_tools \\ + extract \\ + -I ${reads[0]} \\ + --read2-in=${reads[1]} \\ + -S ${prefix}.umi_extract_1.fastq.gz \\ + --read2-out=${prefix}.umi_extract_2.fastq.gz \\ + $args \\ + > ${prefix}.umi_extract.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/umitools/extract/meta.yml b/modules/nf-core/umitools/extract/meta.yml new file mode 100644 index 0000000..7695b27 --- /dev/null +++ b/modules/nf-core/umitools/extract/meta.yml @@ -0,0 +1,48 @@ +name: umitools_extract +description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place +keywords: + - UMI + - barcode + - extract + - umitools +tools: + - umi_tools: + description: > + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes + documentation: https://umi-tools.readthedocs.io/en/latest/ + license: "MIT" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: list + description: | + List of input FASTQ files whose UMIs will be extracted. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: > + Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + pattern: "*.{fastq.gz}" + - log: + type: file + description: Logfile for umi_tools + pattern: "*.{log}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test b/modules/nf-core/umitools/extract/tests/main.nf.test new file mode 100644 index 0000000..22242d1 --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process UMITOOLS_EXTRACT" + script "../main.nf" + process "UMITOOLS_EXTRACT" + config "./nextflow.config" + tag "modules_nfcore" + tag "modules" + tag "umitools" + tag "umitools/extract" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } +} \ No newline at end of file diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test.snap b/modules/nf-core/umitools/extract/tests/main.nf.test.snap new file mode 100644 index 0000000..6d5944f --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,5a18da2d3a5a4de15e7aaae9082d7abb" + ] + ], + "timestamp": "2023-12-08T09:41:43.540658352" + } +} \ No newline at end of file diff --git a/modules/nf-core/umitools/extract/tests/nextflow.config b/modules/nf-core/umitools/extract/tests/nextflow.config new file mode 100644 index 0000000..c866f5a --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: UMITOOLS_EXTRACT { + ext.args = '--bc-pattern="NNNN"' + } + +} diff --git a/modules/nf-core/umitools/extract/tests/tags.yml b/modules/nf-core/umitools/extract/tests/tags.yml new file mode 100644 index 0000000..c3fb23d --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/tags.yml @@ -0,0 +1,2 @@ +umitools/extract: + - modules/nf-core/umitools/extract/** diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 0000000..d6917da --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,9 @@ +name: untar +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 + - conda-forge::grep=3.11 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 0000000..8a75bb9 --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,63 @@ +process UNTAR { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C $prefix --strip-components 1 \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir $prefix + touch ${prefix}/file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml new file mode 100644 index 0000000..a9a2110 --- /dev/null +++ b/modules/nf-core/untar/meta.yml @@ -0,0 +1,46 @@ +name: untar +description: Extract files. +keywords: + - untar + - uncompress + - extract +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - untar: + type: directory + description: Directory containing contents of archive + pattern: "*/" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test new file mode 100644 index 0000000..d40db13 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test @@ -0,0 +1,77 @@ +nextflow_process { + + name "Test Process UNTAR" + script "../main.nf" + process "UNTAR" + + tag "modules" + tag "modules_nfcore" + tag "untar" + + test("test_untar") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.untar).match("test_untar") }, + ) + } + + } + + test("test_untar_different_output_path") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [], file(params.test_data['homo_sapiens']['illumina']['test_flowcell'], checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.untar).match("test_untar_different_output_path") }, + ) + } + + } + + test("test_untar_onlyfiles") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [], file(params.test_data['generic']['tar']['tar_gz'], checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.untar).match("test_untar_onlyfiles") }, + ) + } + + } + +} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap new file mode 100644 index 0000000..146c867 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -0,0 +1,513 @@ +{ + "test_untar_different_output_path": { + "content": [ + [ + [ + [ + + ], + [ + [ + [ + [ + [ + [ + "s_1_1101.bcl:md5,ad01889e2ff43e2f194224e20bdb600c", + "s_1_1101.stats:md5,4bbbf103454b37fbc3138fadf1b4446b" + ], + [ + "s_1_1101.bcl:md5,565384bbe67a694dfd690bae6d1d30c2", + "s_1_1101.stats:md5,55e5abd8f129ff38ef169873547abdb8" + ], + [ + "s_1_1101.bcl:md5,650fa58a630a9148835ba79e323d4237", + "s_1_1101.stats:md5,77403669ca1b05340c390dff64425c1e" + ], + [ + "s_1_1101.bcl:md5,54471c9e97299cd141e202e204637702", + "s_1_1101.stats:md5,67b14c9a89b7f8556674a7524d5cfb2d" + ], + [ + "s_1_1101.bcl:md5,74e4f929fc7476c380fd9d741ddb6700", + "s_1_1101.stats:md5,5730a4c35463eaa12a06b6758710b98c" + ], + [ + "s_1_1101.bcl:md5,c785f472f4350c120c02c888c8189590", + "s_1_1101.stats:md5,fee4ec63895ea81007e06ee6a36ba5e0" + ], + [ + "s_1_1101.bcl:md5,b7ea50bb25f08d43c301741d77050a9b", + "s_1_1101.stats:md5,fa7c68f3122c74d14364e6f7b011af70" + ], + [ + "s_1_1101.bcl:md5,9d5087dc4bcae39d66486363d4f68ecf", + "s_1_1101.stats:md5,23cdceee4d82c4b8e7c60018b9276ace" + ], + [ + "s_1_1101.bcl:md5,581e0c5ee94e8f2de14b2b1d8e777530", + "s_1_1101.stats:md5,9a3536d573c97f66bb56b49463612607" + ], + [ + "s_1_1101.bcl:md5,296fc026bb34c67bbe2b44845fe0d1de", + "s_1_1101.stats:md5,a7f57a7770fb9c5ae2a0fb1ef403ec4f" + ], + [ + "s_1_1101.bcl:md5,2a3ca15531556c36d10d132a9e051de8", + "s_1_1101.stats:md5,2d0bcdb0a1b51d3d79e415db2ab2d3b1" + ], + [ + "s_1_1101.bcl:md5,1150d46a2ccd4ac58aee0585d3e4ffd7", + "s_1_1101.stats:md5,2e97550bd5b5864ffd0565bb7a3f6d40" + ], + [ + "s_1_1101.bcl:md5,0b85c4b3da0de95e7b862d849c5333ae", + "s_1_1101.stats:md5,6eab9746fbeb783b0cd70398f44e0c1a" + ], + [ + "s_1_1101.bcl:md5,e0e9c91f4698804d7a6d1058ef68b34f", + "s_1_1101.stats:md5,790022cdc7878a02b2ebd166e1ddf0a7" + ], + [ + "s_1_1101.bcl:md5,38cd0ad4de359e651c8ac0d5777ea625", + "s_1_1101.stats:md5,a1b1d5ea5371d326abb029774483c5e6" + ], + [ + "s_1_1101.bcl:md5,b0ddc05c4012ccba24e712a1cfec748f", + "s_1_1101.stats:md5,af3d232f839d720f76f40ba06caa2987" + ], + [ + "s_1_1101.bcl:md5,af32fcc5dc3b836cf7a5ba3db85a75dd", + "s_1_1101.stats:md5,f93f2c09bd4e486c74a5f6e2040f7296" + ], + [ + "s_1_1101.bcl:md5,54b7428e037ca87816107647d4a3d9db", + "s_1_1101.stats:md5,e5ac77a72cd7bed5e9bf03cccda0e48c" + ], + [ + "s_1_1101.bcl:md5,fc8b4eacd493bf3d0b20bc23998dc7ff", + "s_1_1101.stats:md5,190315e159e2f4bc4c057ded7470dc52" + ], + [ + "s_1_1101.bcl:md5,9484ecffda489927fce424ac6a44fa9d", + "s_1_1101.stats:md5,0825feeb457ecc9efcf6f8526ba32311" + ], + [ + "s_1_1101.bcl:md5,eec59e21036e31c95ce1e847bfb0a9c4", + "s_1_1101.stats:md5,9acc13f63c98e5a8445e7be70d49222b" + ], + [ + "s_1_1101.bcl:md5,a9fb24476f87cba4fba68e2b3c3f2c07", + "s_1_1101.stats:md5,dc0aa7db9790733291c3e6480ca2a0fc" + ], + [ + "s_1_1101.bcl:md5,ed950b3e82c500927c2e236c9df005c6", + "s_1_1101.stats:md5,dccb71ec47d1f9d33a192da6d5660a45" + ], + [ + "s_1_1101.bcl:md5,b3e992025e995ca56b5ea2820144ef47", + "s_1_1101.stats:md5,a6a829bf2cffb26ac5d9dc3012057699" + ], + [ + "s_1_1101.bcl:md5,89edc726a5a4e0b4ff8ca3899ed0232b", + "s_1_1101.stats:md5,5b9b4fd8110577a59b82d0c419519d29" + ], + [ + "s_1_1101.bcl:md5,4dc696149169f232c451225f563cb5cd", + "s_1_1101.stats:md5,d3514a71ea3adc60e2943c6b8f6e2598" + ], + [ + "s_1_1101.bcl:md5,35b992d0318afb7c825ceaa31b0755e6", + "s_1_1101.stats:md5,2826093acc175c16c3795de7c4ca8f07" + ], + [ + "s_1_1101.bcl:md5,7bc927f56a362e49c00b5d76ee048901", + "s_1_1101.stats:md5,e47d862b795fd6b88a31d7d482ab22f6" + ], + [ + "s_1_1101.bcl:md5,84742233ff2a651626fe9036f27f7cb2", + "s_1_1101.stats:md5,b78fad11d3c50bc76b722cdc03e3028b" + ], + [ + "s_1_1101.bcl:md5,3935341c86263a7938e8c49620ef39f8", + "s_1_1101.stats:md5,cc6585b2daac5354073d150874da9704" + ], + [ + "s_1_1101.bcl:md5,3627f4fd548bf6e64aaf08fba3a342be", + "s_1_1101.stats:md5,120ae4831ae004ff7d16728aef36e82f" + ], + [ + "s_1_1101.bcl:md5,07631014bc35124149fabd80ef19f933", + "s_1_1101.stats:md5,eadd63d91f47cc6db6b6f0a967a23927" + ], + [ + "s_1_1101.bcl:md5,a1149c80415dc2f34d768eeb397c43fb", + "s_1_1101.stats:md5,ca89a9def67611a9151c6ce685b7cce1" + ], + [ + "s_1_1101.bcl:md5,eb5f71d4741d2f40618756bc72eaf8b4", + "s_1_1101.stats:md5,90f48501e735e5915b843478e23d1ae2" + ], + [ + "s_1_1101.bcl:md5,9bf270fe3f6add1a591ebc24fff10078", + "s_1_1101.stats:md5,a4e429671d4098034293c638aa655e16" + ], + [ + "s_1_1101.bcl:md5,219bedcbd24bae54fe4cf05dae05282c", + "s_1_1101.stats:md5,dd97525b65b68207137d51fcf19132c7" + ], + [ + "s_1_1101.bcl:md5,5163bc00a68fd57ae50cae0b76350892", + "s_1_1101.stats:md5,b606a5368eff1f012f3ea5d11ccdf2e0" + ], + [ + "s_1_1101.bcl:md5,fc429195a5af59a59e0cc4c48e6c05ea", + "s_1_1101.stats:md5,d809aa19698053f90d639da4dcad8008" + ], + [ + "s_1_1101.bcl:md5,383340219a1dd77076a092a64a71a7e4", + "s_1_1101.stats:md5,b204a5cf256378679ffc906c15cc1bae" + ], + [ + "s_1_1101.bcl:md5,0c369540d3e24696cf1f9c55bab69315", + "s_1_1101.stats:md5,a2bc69a4031a22ce9621dcc623a0bf4b" + ], + [ + "s_1_1101.bcl:md5,3127abc8016ba8eb954f8f8015dff387", + "s_1_1101.stats:md5,5deafff31150b7bf757f814e49a53bc2" + ], + [ + "s_1_1101.bcl:md5,045f40c82de676bafec3d59f91376a7a", + "s_1_1101.stats:md5,890700edc20687c090ef52248c7884b1" + ], + [ + "s_1_1101.bcl:md5,78af269aa2b39a1d765703f0a4739a86", + "s_1_1101.stats:md5,303cf457aa1543a8208544f694cbc531" + ], + [ + "s_1_1101.bcl:md5,0ab8c781959b783b62888e9274364a46", + "s_1_1101.stats:md5,2605b0e8322f83aa4d0dae5da4ec7a7a" + ], + [ + "s_1_1101.bcl:md5,d0cf823ffe352e8b3f75d589544ab617", + "s_1_1101.stats:md5,efa3c0e01e3db71e12fd961cb2d03739" + ], + [ + "s_1_1101.bcl:md5,db4ca4ab7a01e03c246f9160c3758d82", + "s_1_1101.stats:md5,f61550d9e4a90df6b860e68f41f82f60" + ], + [ + "s_1_1101.bcl:md5,1af39a2c7e5ff20ece91cb8160b51d17", + "s_1_1101.stats:md5,d0e20879afcaf6dfcd88c73f1c5c78cf" + ], + [ + "s_1_1101.bcl:md5,4cf7123bb0fffcd79266df03aef01665", + "s_1_1101.stats:md5,29bff4075109a121b087116b58d7e927" + ], + [ + "s_1_1101.bcl:md5,aa9980428cb60cd6320f4b48f4dd0d74", + "s_1_1101.stats:md5,6b0e20bde93133117a8d1a6df3d6f37b" + ], + [ + "s_1_1101.bcl:md5,0f6e440374e15b9b491d52fb83a8adfe", + "s_1_1101.stats:md5,55cb5eb0ecdabd23dca39ab8c4607598" + ], + [ + "s_1_1101.bcl:md5,2c645d7bdaddaa403f6e304d36df9e4b", + "s_1_1101.stats:md5,53acf33d21f832779b400c2447386ce4" + ], + [ + "s_1_1101.bcl:md5,3bbf0863b423b770c879203644420206", + "s_1_1101.stats:md5,579bdc7293cac8c3d7407249cacf4c25" + ], + [ + "s_1_1101.bcl:md5,6658a08409e81d29cfeb2d096b491985", + "s_1_1101.stats:md5,bb559ffbea46d612f9933cefa84c4c03" + ], + [ + "s_1_1101.bcl:md5,1700d9a13d3d4f7643af2943ef838acb", + "s_1_1101.stats:md5,f01cb6050ebfb15da1e0399ebd791eb4" + ], + [ + "s_1_1101.bcl:md5,1ac7aa9ffae25eb103f755f33e4a39c6", + "s_1_1101.stats:md5,0b9d45d7929ccf336d5e5b95373ed3c2" + ], + [ + "s_1_1101.bcl:md5,812a97af2e983a53226e18c75190b06c", + "s_1_1101.stats:md5,d2410c7b0e506dab2972e77e2398de1e" + ], + [ + "s_1_1101.bcl:md5,c981e8e4dcc434956c2b86159da268bc", + "s_1_1101.stats:md5,e9c826e85361ce673f1f248786c9a611" + ], + [ + "s_1_1101.bcl:md5,88e09e99a0a4ef3357b203a41b22f77c", + "s_1_1101.stats:md5,ef06f2e5ad667bbd383f9ed6a05b7b42" + ], + [ + "s_1_1101.bcl:md5,461c8b146fc8a7938be38689978ecd09", + "s_1_1101.stats:md5,65115693935da66f9791b27136e22fb0" + ], + [ + "s_1_1101.bcl:md5,c7b827df5ce20e0f21916fe60860ca3f", + "s_1_1101.stats:md5,87be73613aeb507847f94d3cac5bb30a" + ], + [ + "s_1_1101.bcl:md5,7c4cc3dc9c8a1b0f15917b282dfb40ce", + "s_1_1101.stats:md5,bdd9181fa89debbfafe7b6ea3e064065" + ], + [ + "s_1_1101.bcl:md5,19f4debaf91e118aca8934517179ac33", + "s_1_1101.stats:md5,1143082719e136241d21b14a6b19b8a2" + ], + [ + "s_1_1101.bcl:md5,38aa256ad2d697d84b0b2c0e876a3eba", + "s_1_1101.stats:md5,64dd82f03df23f7f437eede2671ed4fe" + ], + [ + "s_1_1101.bcl:md5,b7929970378949571fed922c1b8cab32", + "s_1_1101.stats:md5,3d6d7985a41629fe196e4342d7fe36aa" + ], + [ + "s_1_1101.bcl:md5,fb2ed0bf6e89d79624ee78754e773491", + "s_1_1101.stats:md5,f34940810ff255aee79953496a12716d" + ], + [ + "s_1_1101.bcl:md5,4f8a8311f5f9c3a7629c1a973a7b280e", + "s_1_1101.stats:md5,4fd7cd28c09f4e152e7c2ad1ab541cd2" + ], + [ + "s_1_1101.bcl:md5,9eb46c903d0344e25af51f88cc311d60", + "s_1_1101.stats:md5,df3abd5f620d9e7f99496098d9fd3f7f" + ], + [ + "s_1_1101.bcl:md5,3ecbc17f3660e2014b58d7fe70ae62d5", + "s_1_1101.stats:md5,8e89a13c85a6d6ab3ccd251b66d1f165" + ], + [ + "s_1_1101.bcl:md5,5d59cc2499a77791233a64f73fe82894", + "s_1_1101.stats:md5,32ec99cd400f4b80cb26e2fa8e07ece0" + ], + [ + "s_1_1101.bcl:md5,1c052da47b9ae8554388f0fa3aade482", + "s_1_1101.stats:md5,d23f438772673688aa7bc92421dc6dce" + ], + [ + "s_1_1101.bcl:md5,1a52bd4f23130c0c96bc967ccd448a2b", + "s_1_1101.stats:md5,9b597e3388d59ef1f61aba30ac90ea79" + ], + [ + "s_1_1101.bcl:md5,8a1e84b79cf3f80794c20e3a0cc84688", + "s_1_1101.stats:md5,9561f7b6ef4b1849afc72b2bb49792bd" + ], + [ + "s_1_1101.bcl:md5,75c00111051f3fa95d04286823cb9109", + "s_1_1101.stats:md5,1fe786cdf8181767deafbd60b3c76610" + ], + [ + "s_1_1101.bcl:md5,529255d8deee0873ed5565e6d1a2ebda", + "s_1_1101.stats:md5,3fa7f467e97a75880f32d17b7429d316" + ], + [ + "s_1_1101.bcl:md5,ea4d960e3d9355d2149da71b88a21df4", + "s_1_1101.stats:md5,2540fe65586e8e800c1ddd8cddd1e8cd" + ], + [ + "s_1_1101.bcl:md5,0dfe1fd92a2dce2f23119aa483429744", + "s_1_1101.stats:md5,78257b2169fb9f0cf40966e06e847e86" + ], + [ + "s_1_1101.bcl:md5,f692ddc9aa3ab849271d07c666d0b3b9", + "s_1_1101.stats:md5,aa2ec6a3e3a9c116e34fe74a21e6459e" + ], + [ + "s_1_1101.bcl:md5,29cc4c239eae7c871c9a1adf92ebdb98", + "s_1_1101.stats:md5,263184813090acd740a5bf25304aed3a" + ], + [ + "s_1_1101.bcl:md5,e005af6a84925e326afbfe264241f047", + "s_1_1101.stats:md5,b6fb20868eebaffcc19daa694a449795" + ], + [ + "s_1_1101.bcl:md5,02f1a699b1ba9967accccf99a7af3d24", + "s_1_1101.stats:md5,4f007efacecaf26dc0e0231aede28754" + ], + [ + "s_1_1101.bcl:md5,df308c72a2dcc655cd95e98f5457187a", + "s_1_1101.stats:md5,130c4b07f4c14030bab012824cbe34da" + ], + [ + "s_1_1101.bcl:md5,f3ce10d8d2406b72355023bfa8c96822", + "s_1_1101.stats:md5,2638f4db393ed5b699ec2ce59ff0ec19" + ], + [ + "s_1_1101.bcl:md5,cc2f6d675ad1593ff96f734b172d249e", + "s_1_1101.stats:md5,f5b13f1e1ababc9e1a7a73b0b993cbf1" + ], + [ + "s_1_1101.bcl:md5,7938a0b21448305a951b023b1845b3a7", + "s_1_1101.stats:md5,fcd57511adabfc3ba1ac045165330006" + ], + [ + "s_1_1101.bcl:md5,44879bc6a38df1fee8def61868115041", + "s_1_1101.stats:md5,517e20e4b58a8023a37f9af62e0e2036" + ], + [ + "s_1_1101.bcl:md5,8749611e62406a7d2f34c610a55e56af", + "s_1_1101.stats:md5,8ccf24b3676ef84f2e513be8f2a9f3d1" + ], + [ + "s_1_1101.bcl:md5,a9846a037611cda3721958088f714c0e", + "s_1_1101.stats:md5,6438fa5a1892f328cab1605a95d80a3b" + ], + [ + "s_1_1101.bcl:md5,d6c4a2a726496476eb826532f974ed5f", + "s_1_1101.stats:md5,8c2c65b5e8b00dbf61ada65252aeb266" + ], + [ + "s_1_1101.bcl:md5,be3dde6cae7dd85855a6bf295ebfacfe", + "s_1_1101.stats:md5,93bc13f3b0749b2b8d8bcb0b1199f4f0" + ], + [ + "s_1_1101.bcl:md5,7c64514735a6cf1565b60647edd17d20", + "s_1_1101.stats:md5,4a0aa6c49b24f876415e5878cef7f805" + ], + [ + "s_1_1101.bcl:md5,3983b4043bc9df4b505202a5134ccf03", + "s_1_1101.stats:md5,1c9d9a8558adc1279ca27c96bc1b9758" + ], + [ + "s_1_1101.bcl:md5,a0b8d77f116ec95975f9253dcb768136", + "s_1_1101.stats:md5,c3992b786756e7ec42f65ef4b13b50d4" + ], + [ + "s_1_1101.bcl:md5,43c95ba35d06bb7c57fbd16f3d1cfd6c", + "s_1_1101.stats:md5,3cb69d04698c39f97f962e5bf1eea7f0" + ], + [ + "s_1_1101.bcl:md5,3dbeea0cad7052f19f53ff6f19dd4d90", + "s_1_1101.stats:md5,58bbc8254f0f5f4a244531e8e9c12a04" + ], + [ + "s_1_1101.bcl:md5,da56d088996376c898d855b6cd0a7dfc", + "s_1_1101.stats:md5,9f2d78af6908ce1576b89cdc059844ff" + ], + [ + "s_1_1101.bcl:md5,7b641a5565f095e9a6ffcad9e4305033", + "s_1_1101.stats:md5,3ada06c59b4fb41b83ab6abd0979e9fc" + ], + [ + "s_1_1101.bcl:md5,a3843d397a01d51657825bb652c191e5", + "s_1_1101.stats:md5,19341e52a4bfc7d9d48e9d2acc68c519" + ], + [ + "s_1_1101.bcl:md5,048e3ebfc8efeb8012def6b741c9060d", + "s_1_1101.stats:md5,88bd38deca1e87d700effab1fd099565" + ], + [ + "s_1_1101.bcl:md5,b340db0e07e829dd5da22371916a1a9e", + "s_1_1101.stats:md5,e44cfaddcc4ffb968e5b1a2f41ac48a5" + ], + [ + "s_1_1101.bcl:md5,e6011ec6eabbc2b8792deb283c621ce0", + "s_1_1101.stats:md5,090875dcd1a431af24bc631333f089c4" + ], + [ + "s_1_1101.bcl:md5,a08f216e3352345031ed100ec4245082", + "s_1_1101.stats:md5,97b949ef4b96219e1369f673cf5f8a6c" + ], + [ + "s_1_1101.bcl:md5,b43337c76fb037dfcf5f8f7bcb3618e5", + "s_1_1101.stats:md5,ddef585805e79951f69d23ab7354f69b" + ], + [ + "s_1_1101.bcl:md5,8c61fd004104397b360855e058bbf1bf", + "s_1_1101.stats:md5,0f8d253816d594dcfea3ccf48c826401" + ], + [ + "s_1_1101.bcl:md5,594d06310d328b188aa0b3edfff22cb2", + "s_1_1101.stats:md5,3160bf271b39aeb7590e4fd2984710ba" + ], + [ + "s_1_1101.bcl:md5,4c9eada67c9d55437211d83e111961d5", + "s_1_1101.stats:md5,2901b46ab16ec4863d30e4c84ec29c97" + ], + [ + "s_1_1101.bcl:md5,e03971ae5282f0accc0c1b7374d9ef1b", + "s_1_1101.stats:md5,60d2a19ce59bf70a21a28555484cead8" + ], + [ + "s_1_1101.bcl:md5,e1c6f7a06e63d149895d3e48e63df155", + "s_1_1101.stats:md5,44beb10af847ea3dddaf06dda7031126" + ], + [ + "s_1_1101.bcl:md5,960a99bf29a8f9d936e9b8582d46c9c6", + "s_1_1101.stats:md5,544cd1a7aaaa841914b40ece43399334" + ], + [ + "s_1_1101.bcl:md5,5706679f349fd4a6b6313bc2c41c7a42", + "s_1_1101.stats:md5,627eea844b26dae033848c2f9f69177b" + ], + [ + "s_1_1101.bcl:md5,21da5abc4b0402bbac14b5ab998b0b4f", + "s_1_1101.stats:md5,515bd140b095ad90473ca7a9a69877ab" + ], + "s_1_1101.control:md5,08a72e2198ae95150718e8adf011d105", + "s_1_1101.filter:md5,3a72bc73b323c8cb0ac5bfeb62d98989" + ] + ], + [ + "s_1_1101.locs:md5,0827ea802e5257cc5b20e757a33d4c98" + ], + "RTAConfiguration.xml:md5,c7d6e257bc374f142dc64b9d2281d4c9", + "config.xml:md5,9a4cc7ec01fefa2f1ce9bcb45bbad6e9" + ] + ], + [ + "ControlMetricsOut.bin:md5,6d77b38d0793a6e1ce1e85706e488953", + "CorrectedIntMetricsOut.bin:md5,2bbf84d3be72734addaa2fe794711434", + "ErrorMetricsOut.bin:md5,38c88def138e9bb832539911affdb286", + "ExtractionMetricsOut.bin:md5,7497c3178837eea8f09350b5cd252e99", + "IndexMetricsOut.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "QMetricsOut.bin:md5,7e9f198d53ebdfbb699a5f94cf1ed51c", + "TileMetricsOut.bin:md5,83891751ec1c91a425a524b476b6ca3c" + ], + "RunInfo.xml:md5,03038959f4dd181c86bc97ae71fe270a" + ] + ] + ] + ], + "timestamp": "2023-10-18T11:56:39.562418" + }, + "test_untar_onlyfiles": { + "content": [ + [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ] + ], + "timestamp": "2023-10-18T11:56:46.878844" + }, + "test_untar": { + "content": [ + [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ] + ], + "timestamp": "2023-10-18T11:56:08.16574" + } +} \ No newline at end of file diff --git a/modules/nf-core/untar/tests/tags.yml b/modules/nf-core/untar/tests/tags.yml new file mode 100644 index 0000000..feb6f15 --- /dev/null +++ b/modules/nf-core/untar/tests/tags.yml @@ -0,0 +1,2 @@ +untar: + - modules/nf-core/untar/** diff --git a/nextflow.config b/nextflow.config index ea0de5f..5a4e939 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,11 +13,73 @@ params { // Input options input = null // References + skip_gtf_filter = false genome = null + splicesites = null igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false + gencode = false + featurecounts_group_type = 'gene_biotype' + // UMI handling + with_umi = false + skip_umi_extract = false + umitools_extract_method = 'string' + umitools_grouping_method = 'directional' + umitools_dedup_stats = false + umitools_bc_pattern = null + umitools_bc_pattern2 = null + umitools_umi_separator = null + umi_discard_read = null + save_umi_intermeds = false + // BBSplit genome filtering + bbsplit_fasta_list = null + save_bbsplit_reads = false + skip_bbsplit = true + + // Ribosomal RNA removal + remove_ribo_rna = true + save_non_ribo_reads = false + ribo_database_manifest = "${projectDir}/assets/rrna-db-defaults.txt" + + // Alignment + aligner = 'star_salmon' + pseudo_aligner = null + seq_center = null + bam_csi_index = false + star_ignore_sjdbgtf = false + salmon_quant_libtype = null + hisat2_build_memory = '200.GB' // Amount of memory required to build HISAT2 index with splice sites + stringtie_ignore_gtf = false + min_mapped_reads = 5 + extra_star_align_args = null + extra_salmon_quant_args = null + extra_kallisto_quant_args = null + kallisto_quant_fraglen = 200 + kallisto_quant_fraglen_sd = 200 + save_merged_fastq = false + save_unaligned = false + save_align_intermeds = false + skip_markduplicates = false + skip_alignment = true + skip_pseudo_alignment = false + + // QC + skip_qc = false + skip_bigwig = false + skip_stringtie = false + skip_fastqc = false + skip_preseq = true + skip_dupradar = false + skip_qualimap = false + skip_rseqc = false + skip_biotype_qc = false + skip_deseq2_qc = false + skip_multiqc = false + deseq2_vst = true + rseqc_modules = 'bam_stat,inner_distance,infer_experiment,junction_annotation,junction_saturation,read_distribution,read_duplication' + // MultiQC options multiqc_config = null multiqc_title = null diff --git a/nextflow_schema.json b/nextflow_schema.json index ecc8a6c..61e8772 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -17,6 +17,7 @@ "format": "file-path", "exists": true, "mimetype": "text/csv", + "schema": "assets/schema_input.json", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/riboseq/usage#samplesheet-input).", diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf new file mode 100644 index 0000000..0840c77 --- /dev/null +++ b/subworkflows/local/prepare_genome/main.nf @@ -0,0 +1,304 @@ +// +// Uncompress and prepare reference genome files +// + +include { GUNZIP as GUNZIP_FASTA } from '../../../modules/nf-core/gunzip' +include { GUNZIP as GUNZIP_GTF } from '../../../modules/nf-core/gunzip' +include { GUNZIP as GUNZIP_GFF } from '../../../modules/nf-core/gunzip' +include { GUNZIP as GUNZIP_GENE_BED } from '../../../modules/nf-core/gunzip' +include { GUNZIP as GUNZIP_TRANSCRIPT_FASTA } from '../../../modules/nf-core/gunzip' +include { GUNZIP as GUNZIP_ADDITIONAL_FASTA } from '../../../modules/nf-core/gunzip' + +include { UNTAR as UNTAR_BBSPLIT_INDEX } from '../../../modules/nf-core/untar' +include { UNTAR as UNTAR_STAR_INDEX } from '../../../modules/nf-core/untar' +include { UNTAR as UNTAR_RSEM_INDEX } from '../../../modules/nf-core/untar' +include { UNTAR as UNTAR_HISAT2_INDEX } from '../../../modules/nf-core/untar' +include { UNTAR as UNTAR_SALMON_INDEX } from '../../../modules/nf-core/untar' +include { UNTAR as UNTAR_KALLISTO_INDEX } from '../../../modules/nf-core/untar' + +include { CUSTOM_GETCHROMSIZES } from '../../../modules/nf-core/custom/getchromsizes' +include { GFFREAD } from '../../../modules/nf-core/gffread' +include { BBMAP_BBSPLIT } from '../../../modules/nf-core/bbmap/bbsplit' +include { STAR_GENOMEGENERATE } from '../../../modules/nf-core/star/genomegenerate' +include { HISAT2_EXTRACTSPLICESITES } from '../../../modules/nf-core/hisat2/extractsplicesites' +include { HISAT2_BUILD } from '../../../modules/nf-core/hisat2/build' +include { SALMON_INDEX } from '../../../modules/nf-core/salmon/index' +include { KALLISTO_INDEX } from '../../../modules/nf-core/kallisto/index' +include { RSEM_PREPAREREFERENCE as RSEM_PREPAREREFERENCE_GENOME } from '../../../modules/nf-core/rsem/preparereference' +include { RSEM_PREPAREREFERENCE as MAKE_TRANSCRIPTS_FASTA } from '../../../modules/nf-core/rsem/preparereference' + +include { PREPROCESS_TRANSCRIPTS_FASTA_GENCODE } from '../../../modules/local/preprocess_transcripts_fasta_gencode' +include { GTF2BED } from '../../../modules/local/gtf2bed' +include { CAT_ADDITIONAL_FASTA } from '../../../modules/local/cat_additional_fasta' +include { GTF_FILTER } from '../../../modules/local/gtf_filter' +include { STAR_GENOMEGENERATE_IGENOMES } from '../../../modules/local/star_genomegenerate_igenomes' + +workflow PREPARE_GENOME { + take: + fasta // file: /path/to/genome.fasta + gtf // file: /path/to/genome.gtf + gff // file: /path/to/genome.gff + additional_fasta // file: /path/to/additional.fasta + transcript_fasta // file: /path/to/transcript.fasta + gene_bed // file: /path/to/gene.bed + splicesites // file: /path/to/splicesites.txt + bbsplit_fasta_list // file: /path/to/bbsplit_fasta_list.txt + star_index // directory: /path/to/star/index/ + rsem_index // directory: /path/to/rsem/index/ + salmon_index // directory: /path/to/salmon/index/ + kallisto_index // directory: /path/to/kallisto/index/ + hisat2_index // directory: /path/to/hisat2/index/ + bbsplit_index // directory: /path/to/rsem/index/ + gencode // boolean: whether the genome is from GENCODE + is_aws_igenome // boolean: whether the genome files are from AWS iGenomes + biotype // string: if additional fasta file is provided biotype value to use when appending entries to GTF file + prepare_tool_indices // list: tools to prepare indices for + filter_gtf // boolean: whether to filter GTF file + + main: + + ch_versions = Channel.empty() + + // + // Uncompress genome fasta file if required + // + if (fasta.endsWith('.gz')) { + ch_fasta = GUNZIP_FASTA ( [ [:], fasta ] ).gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) + } else { + ch_fasta = Channel.value(file(fasta)) + } + + // + // Uncompress GTF annotation file or create from GFF3 if required + // + if (gtf || gff) { + if (gtf) { + if (gtf.endsWith('.gz')) { + ch_gtf = GUNZIP_GTF ( [ [:], gtf ] ).gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) + } else { + ch_gtf = Channel.value(file(gtf)) + } + } else if (gff) { + if (gff.endsWith('.gz')) { + ch_gff = GUNZIP_GFF ( [ [:], gff ] ).gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) + } else { + ch_gff = Channel.value(file(gff)) + } + ch_gtf = GFFREAD ( ch_gff ).gtf + ch_versions = ch_versions.mix(GFFREAD.out.versions) + } + + if (filter_gtf) { + GTF_FILTER ( ch_fasta, ch_gtf ) + ch_gtf = GTF_FILTER.out.genome_gtf + ch_versions = ch_versions.mix(GTF_FILTER.out.versions) + } + } + + // + // Uncompress additional fasta file and concatenate with reference fasta and gtf files + // + if (additional_fasta) { + if (additional_fasta.endsWith('.gz')) { + ch_add_fasta = GUNZIP_ADDITIONAL_FASTA ( [ [:], additional_fasta ] ).gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_ADDITIONAL_FASTA.out.versions) + } else { + ch_add_fasta = Channel.value(file(additional_fasta)) + } + CAT_ADDITIONAL_FASTA ( ch_fasta, ch_gtf, ch_add_fasta, biotype ) + ch_fasta = CAT_ADDITIONAL_FASTA.out.fasta + ch_gtf = CAT_ADDITIONAL_FASTA.out.gtf + ch_versions = ch_versions.mix(CAT_ADDITIONAL_FASTA.out.versions) + } + + // + // Uncompress gene BED annotation file or create from GTF if required + // + if (gene_bed) { + if (gene_bed.endsWith('.gz')) { + ch_gene_bed = GUNZIP_GENE_BED ( [ [:], gene_bed ] ).gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions) + } else { + ch_gene_bed = Channel.value(file(gene_bed)) + } + } else { + ch_gene_bed = GTF2BED ( ch_gtf ).bed + ch_versions = ch_versions.mix(GTF2BED.out.versions) + } + + // + // Uncompress transcript fasta file / create if required + // + if (transcript_fasta) { + if (transcript_fasta.endsWith('.gz')) { + ch_transcript_fasta = GUNZIP_TRANSCRIPT_FASTA ( [ [:], transcript_fasta ] ).gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_TRANSCRIPT_FASTA.out.versions) + } else { + ch_transcript_fasta = Channel.value(file(transcript_fasta)) + } + if (gencode) { + PREPROCESS_TRANSCRIPTS_FASTA_GENCODE ( ch_transcript_fasta ) + ch_transcript_fasta = PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.out.fasta + ch_versions = ch_versions.mix(PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.out.versions) + } + } else { + ch_transcript_fasta = MAKE_TRANSCRIPTS_FASTA ( ch_fasta, ch_gtf ).transcript_fasta + ch_versions = ch_versions.mix(MAKE_TRANSCRIPTS_FASTA.out.versions) + } + + // + // Create chromosome sizes file + // + CUSTOM_GETCHROMSIZES ( ch_fasta.map { [ [:], it ] } ) + ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] } + ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] } + ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) + + // + // Uncompress BBSplit index or generate from scratch if required + // + ch_bbsplit_index = Channel.empty() + if ('bbsplit' in prepare_tool_indices) { + if (bbsplit_index) { + if (bbsplit_index.endsWith('.tar.gz')) { + ch_bbsplit_index = UNTAR_BBSPLIT_INDEX ( [ [:], bbsplit_index ] ).untar.map { it[1] } + ch_versions = ch_versions.mix(UNTAR_BBSPLIT_INDEX.out.versions) + } else { + ch_bbsplit_index = Channel.value(file(bbsplit_index)) + } + } else { + Channel + .from(file(bbsplit_fasta_list)) + .splitCsv() // Read in 2 column csv file: short_name,path_to_fasta + .flatMap { id, fasta -> [ [ 'id', id ], [ 'fasta', file(fasta, checkIfExists: true) ] ] } // Flatten entries to be able to groupTuple by a common key + .groupTuple() + .map { it -> it[1] } // Get rid of keys and keep grouped values + .collect { [ it ] } // Collect entries as a list to pass as "tuple val(short_names), path(path_to_fasta)" to module + .set { ch_bbsplit_fasta_list } + + ch_bbsplit_index = BBMAP_BBSPLIT ( [ [:], [] ], [], ch_fasta, ch_bbsplit_fasta_list, true ).index + ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions) + } + } + + // + // Uncompress STAR index or generate from scratch if required + // + ch_star_index = Channel.empty() + if ('star_salmon' in prepare_tool_indices) { + if (star_index) { + if (star_index.endsWith('.tar.gz')) { + ch_star_index = UNTAR_STAR_INDEX ( [ [:], star_index ] ).untar.map { it[1] } + ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions) + } else { + ch_star_index = Channel.value(file(star_index)) + } + } else { + if (is_aws_igenome) { + ch_star_index = STAR_GENOMEGENERATE_IGENOMES ( ch_fasta, ch_gtf ).index + ch_versions = ch_versions.mix(STAR_GENOMEGENERATE_IGENOMES.out.versions) + } else { + ch_star_index = STAR_GENOMEGENERATE ( ch_fasta.map { [ [:], it ] }, ch_gtf.map { [ [:], it ] } ).index.map { it[1] } + ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) + } + } + } + + // + // Uncompress RSEM index or generate from scratch if required + // + ch_rsem_index = Channel.empty() + if ('star_rsem' in prepare_tool_indices) { + if (rsem_index) { + if (rsem_index.endsWith('.tar.gz')) { + ch_rsem_index = UNTAR_RSEM_INDEX ( [ [:], rsem_index ] ).untar.map { it[1] } + ch_versions = ch_versions.mix(UNTAR_RSEM_INDEX.out.versions) + } else { + ch_rsem_index = Channel.value(file(rsem_index)) + } + } else { + ch_rsem_index = RSEM_PREPAREREFERENCE_GENOME ( ch_fasta, ch_gtf ).index + ch_versions = ch_versions.mix(RSEM_PREPAREREFERENCE_GENOME.out.versions) + } + } + + // + // Uncompress HISAT2 index or generate from scratch if required + // + ch_splicesites = Channel.empty() + ch_hisat2_index = Channel.empty() + if ('hisat2' in prepare_tool_indices) { + if (!splicesites) { + ch_splicesites = HISAT2_EXTRACTSPLICESITES ( ch_gtf.map { [ [:], it ] } ).txt.map { it[1] } + ch_versions = ch_versions.mix(HISAT2_EXTRACTSPLICESITES.out.versions) + } else { + ch_splicesites = Channel.value(file(splicesites)) + } + if (hisat2_index) { + if (hisat2_index.endsWith('.tar.gz')) { + ch_hisat2_index = UNTAR_HISAT2_INDEX ( [ [:], hisat2_index ] ).untar.map { it[1] } + ch_versions = ch_versions.mix(UNTAR_HISAT2_INDEX.out.versions) + } else { + ch_hisat2_index = Channel.value(file(hisat2_index)) + } + } else { + ch_hisat2_index = HISAT2_BUILD ( ch_fasta.map { [ [:], it ] }, ch_gtf.map { [ [:], it ] }, ch_splicesites.map { [ [:], it ] } ).index.map { it[1] } + ch_versions = ch_versions.mix(HISAT2_BUILD.out.versions) + } + } + + // + // Uncompress Salmon index or generate from scratch if required + // + ch_salmon_index = Channel.empty() + if (salmon_index) { + if (salmon_index.endsWith('.tar.gz')) { + ch_salmon_index = UNTAR_SALMON_INDEX ( [ [:], salmon_index ] ).untar.map { it[1] } + ch_versions = ch_versions.mix(UNTAR_SALMON_INDEX.out.versions) + } else { + ch_salmon_index = Channel.value(file(salmon_index)) + } + } else { + if ('salmon' in prepare_tool_indices) { + ch_salmon_index = SALMON_INDEX ( ch_fasta, ch_transcript_fasta ).index + ch_versions = ch_versions.mix(SALMON_INDEX.out.versions) + } + } + + // + // Uncompress Kallisto index or generate from scratch if required + // + ch_kallisto_index = Channel.empty() + if (kallisto_index) { + if (kallisto_index.endsWith('.tar.gz')) { + ch_kallisto_index = UNTAR_KALLISTO_INDEX ( [ [:], kallisto_index ] ).untar + ch_versions = ch_versions.mix(UNTAR_KALLISTO_INDEX.out.versions) + } else { + ch_kallisto_index = Channel.value([[:], file(kallisto_index)]) + } + } else { + if ('kallisto' in prepare_tool_indices) { + ch_kallisto_index = KALLISTO_INDEX ( ch_transcript_fasta.map{[ [:], it]} ).index + ch_versions = ch_versions.mix(KALLISTO_INDEX.out.versions) + } + } + + emit: + fasta = ch_fasta // channel: path(genome.fasta) + gtf = ch_gtf // channel: path(genome.gtf) + fai = ch_fai // channel: path(genome.fai) + gene_bed = ch_gene_bed // channel: path(gene.bed) + transcript_fasta = ch_transcript_fasta // channel: path(transcript.fasta) + chrom_sizes = ch_chrom_sizes // channel: path(genome.sizes) + splicesites = ch_splicesites // channel: path(genome.splicesites.txt) + bbsplit_index = ch_bbsplit_index // channel: path(bbsplit/index/) + star_index = ch_star_index // channel: path(star/index/) + rsem_index = ch_rsem_index // channel: path(rsem/index/) + hisat2_index = ch_hisat2_index // channel: path(hisat2/index/) + salmon_index = ch_salmon_index // channel: path(salmon/index/) + kallisto_index = ch_kallisto_index // channel: [ meta, path(kallisto/index/) ] + versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] +} diff --git a/subworkflows/local/preprocess_rnaseq.nf b/subworkflows/local/preprocess_rnaseq.nf new file mode 100644 index 0000000..8848273 --- /dev/null +++ b/subworkflows/local/preprocess_rnaseq.nf @@ -0,0 +1,196 @@ +include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main' +include { FASTQC } from '../../modules/nf-core/fastqc/main' +include { SORTMERNA } from '../../modules/nf-core/sortmerna/main' + +include { FASTQ_SUBSAMPLE_FQ_SALMON } from '../../subworkflows/nf-core/fastq_subsample_fq_salmon' + +workflow PREPROCESS_RNASEQ { + + take: + ch_fastq_in // channel: [ val(meta), [ fastq ] ] + ch_fasta + ch_transcript_fasta + ch_gtf + ch_salmon_index + ch_bbsplit_index + make_salmon_index + ch_ribo_db + + main: + + ch_versions = Channel.empty() + ch_filtered_reads = Channel.empty() + ch_fastqc_raw_multiqc = Channel.empty() + ch_fastqc_trim_multiqc = Channel.empty() + ch_trim_log_multiqc = Channel.empty() + ch_trim_read_count = Channel.empty() + + ch_fastq_in + .branch { + meta, fastqs -> + single : fastqs.size() == 1 + return [ meta, fastqs.flatten() ] + multiple: fastqs.size() > 1 + return [ meta, fastqs.flatten() ] + } + .set { ch_fastq } + + // + // MODULE: Concatenate FastQ files from same sample if required + // + CAT_FASTQ ( + ch_fastq.multiple + ) + .reads + .mix(ch_fastq.single) + .set { ch_filtered_reads } + + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null)) + + // + // MODULE: Remove ribosomal RNA reads + // + ch_sortmerna_multiqc = Channel.empty() + if (params.remove_ribo_rna) { + ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()).map { row -> file(row, checkIfExists: true) }.collect() + + SORTMERNA ( + ch_filtered_reads, + ch_sortmerna_fastas + ) + .reads + .set { ch_filtered_reads } + + ch_sortmerna_multiqc = SORTMERNA.out.log + ch_versions = ch_versions.mix(SORTMERNA.out.versions.first()) + } + + // Branch FastQ channels if 'auto' specified to infer strandedness + ch_filtered_reads + .branch { + meta, fastq -> + auto_strand : meta.strandedness == 'auto' + return [ meta, fastq ] + known_strand: meta.strandedness != 'auto' + return [ meta, fastq ] + } + .set { ch_strand_fastq } + + // + // SUBWORKFLOW: Sub-sample FastQ files and pseudoalign with Salmon to auto-infer strandedness + // + // Return empty channel if ch_strand_fastq.auto_strand is empty so salmon index isn't created + ch_fasta + .combine(ch_strand_fastq.auto_strand) + .map { it.first() } + .first() + .set { ch_genome_fasta } + + ch_strand_fastq.auto_strand.view() + ch_strand_fastq.known_strand.view() + + FASTQ_SUBSAMPLE_FQ_SALMON ( + ch_strand_fastq.auto_strand, + ch_genome_fasta, + ch_transcript_fasta, + ch_gtf, + ch_salmon_index, + make_salmon_index + ) + ch_versions = ch_versions.mix(FASTQ_SUBSAMPLE_FQ_SALMON.out.versions) + + FASTQ_SUBSAMPLE_FQ_SALMON + .out + .json_info + .join(ch_strand_fastq.auto_strand) + .map { meta, json, reads -> + return [ meta + [ strandedness: WorkflowRnaseq.getSalmonInferredStrandedness(json) ], reads ] + } + .mix(ch_strand_fastq.known_strand) + + // + // SUBWORKFLOW: Read QC, extract UMI and trim adapters with TrimGalore! + // + if (params.trimmer == 'trimgalore') { + FASTQ_FASTQC_UMITOOLS_TRIMGALORE ( + ch_strand_inferred_fastq, + params.skip_fastqc || params.skip_qc, + params.with_umi, + params.skip_umi_extract, + params.skip_trimming, + params.umi_discard_read, + params.min_trimmed_reads + ) + ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads + ch_fastqc_raw_multiqc = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip + ch_fastqc_trim_multiqc = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_zip + ch_trim_log_multiqc = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_log + ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_read_count + ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.versions) + } + + // + // SUBWORKFLOW: Read QC, extract UMI and trim adapters with fastp + // + if (params.trimmer == 'fastp') { + FASTQ_FASTQC_UMITOOLS_FASTP ( + ch_strand_inferred_fastq, + params.skip_fastqc || params.skip_qc, + params.with_umi, + params.skip_umi_extract, + params.umi_discard_read, + params.skip_trimming, + [], + params.save_trimmed, + params.save_trimmed, + params.min_trimmed_reads + ) + ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads + ch_fastqc_raw_multiqc = FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip + ch_fastqc_trim_multiqc = FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_trim_zip + ch_trim_log_multiqc = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_json + ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_read_count + ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) + } + + // + // Get list of samples that failed trimming threshold for MultiQC report + // + ch_trim_read_count + .map { + meta, num_reads -> + pass_trimmed_reads[meta.id] = true + if (num_reads <= params.min_trimmed_reads.toFloat()) { + pass_trimmed_reads[meta.id] = false + return [ "$meta.id\t$num_reads" ] + } + } + .collect() + .map { + tsv_data -> + def header = ["Sample", "Reads after trimming"] + WorkflowRnaseq.multiqcTsvFromList(tsv_data, header) + } + .set { ch_fail_trimming_multiqc } + + // + // MODULE: Remove genome contaminant reads + // + if (!params.skip_bbsplit) { + BBMAP_BBSPLIT ( + ch_filtered_reads, + ch_bbsplit_index, + [], + [ [], [] ], + false + ) + .primary_fastq + .set { ch_filtered_reads } + ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions.first()) + } + + emit: + + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf new file mode 100644 index 0000000..3dbb27e --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf @@ -0,0 +1,140 @@ +// +// Read QC, UMI extraction and trimming +// + +include { FASTQC as FASTQC_RAW } from '../../../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_TRIM } from '../../../modules/nf-core/fastqc/main' +include { UMITOOLS_EXTRACT } from '../../../modules/nf-core/umitools/extract/main' +include { FASTP } from '../../../modules/nf-core/fastp/main' + +// +// Function that parses fastp json output file to get total number of reads after trimming +// +import groovy.json.JsonSlurper + +def getFastpReadsAfterFiltering(json_file) { + def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary') + return json['after_filtering']['total_reads'].toLong() +} + +workflow FASTQ_FASTQC_UMITOOLS_FASTP { + take: + reads // channel: [ val(meta), [ reads ] ] + skip_fastqc // boolean: true/false + with_umi // boolean: true/false + skip_umi_extract // boolean: true/false + umi_discard_read // integer: 0, 1 or 2 + skip_trimming // boolean: true/false + adapter_fasta // file: adapter.fasta + save_trimmed_fail // boolean: true/false + save_merged // boolean: true/false + min_trimmed_reads // integer: > 0 + + main: + ch_versions = Channel.empty() + fastqc_raw_html = Channel.empty() + fastqc_raw_zip = Channel.empty() + if (!skip_fastqc) { + FASTQC_RAW ( + reads + ) + fastqc_raw_html = FASTQC_RAW.out.html + fastqc_raw_zip = FASTQC_RAW.out.zip + ch_versions = ch_versions.mix(FASTQC_RAW.out.versions.first()) + } + + umi_reads = reads + umi_log = Channel.empty() + if (with_umi && !skip_umi_extract) { + UMITOOLS_EXTRACT ( + reads + ) + umi_reads = UMITOOLS_EXTRACT.out.reads + umi_log = UMITOOLS_EXTRACT.out.log + ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first()) + + // Discard R1 / R2 if required + if (umi_discard_read in [1,2]) { + UMITOOLS_EXTRACT + .out + .reads + .map { + meta, reads -> + meta.single_end ? [ meta, reads ] : [ meta + [single_end: true], reads[umi_discard_read % 2] ] + } + .set { umi_reads } + } + } + + trim_reads = umi_reads + trim_json = Channel.empty() + trim_html = Channel.empty() + trim_log = Channel.empty() + trim_reads_fail = Channel.empty() + trim_reads_merged = Channel.empty() + fastqc_trim_html = Channel.empty() + fastqc_trim_zip = Channel.empty() + trim_read_count = Channel.empty() + if (!skip_trimming) { + FASTP ( + umi_reads, + adapter_fasta, + save_trimmed_fail, + save_merged + ) + trim_json = FASTP.out.json + trim_html = FASTP.out.html + trim_log = FASTP.out.log + trim_reads_fail = FASTP.out.reads_fail + trim_reads_merged = FASTP.out.reads_merged + ch_versions = ch_versions.mix(FASTP.out.versions.first()) + + // + // Filter FastQ files based on minimum trimmed read count after adapter trimming + // + FASTP + .out + .reads + .join(trim_json) + .map { meta, reads, json -> [ meta, reads, getFastpReadsAfterFiltering(json) ] } + .set { ch_num_trimmed_reads } + + ch_num_trimmed_reads + .filter { meta, reads, num_reads -> num_reads >= min_trimmed_reads.toLong() } + .map { meta, reads, num_reads -> [ meta, reads ] } + .set { trim_reads } + + ch_num_trimmed_reads + .map { meta, reads, num_reads -> [ meta, num_reads ] } + .set { trim_read_count } + + if (!skip_fastqc) { + FASTQC_TRIM ( + trim_reads + ) + fastqc_trim_html = FASTQC_TRIM.out.html + fastqc_trim_zip = FASTQC_TRIM.out.zip + ch_versions = ch_versions.mix(FASTQC_TRIM.out.versions.first()) + } + } + + emit: + reads = trim_reads // channel: [ val(meta), [ reads ] ] + + fastqc_raw_html // channel: [ val(meta), [ html ] ] + fastqc_raw_zip // channel: [ val(meta), [ zip ] ] + + umi_log // channel: [ val(meta), [ log ] ] + + trim_json // channel: [ val(meta), [ json ] ] + trim_html // channel: [ val(meta), [ html ] ] + trim_log // channel: [ val(meta), [ log ] ] + trim_reads_fail // channel: [ val(meta), [ fastq.gz ] ] + trim_reads_merged // channel: [ val(meta), [ fastq.gz ] ] + trim_read_count // channel: [ val(meta), val(count) ] + + fastqc_trim_html // channel: [ val(meta), [ html ] ] + fastqc_trim_zip // channel: [ val(meta), [ zip ] ] + + versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml new file mode 100644 index 0000000..220e8db --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml @@ -0,0 +1,128 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +# yaml-language-server: $schema=yaml-schema.json +name: "fastq_fastqc_umitools_fastp" +description: Read QC, UMI extraction and trimming +keywords: + - fastq + - fastqc + - qc + - UMI + - trimming + - fastp +components: + - fastqc + - umitools/extract + - fastp +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - skip_fastqc: + type: boolean + description: | + Skip fastqc process + - with_umi: + type: boolean + description: | + With or without umi detection + - skip_umi_extract: + type: boolean + description: | + With or without umi extrection + - umi_discard_read: + type: integer + description: | + Discard R1 / R2 if required + - skip_trimming: + type: boolean + description: | + Allows to skip trimgalore execution + - adapter_fasta: + type: file + description: | + Fasta file of adapter sequences + - save_trimmed_fail: + type: boolean + description: | + Save trimmed fastqs of failed samples + - save_merged: + type: boolean + description: | + Save merged fastqs + - min_trimmed_reads: + type: integer + description: | + Inputs with fewer than this reads will be filtered out of the "reads" output channel +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reads: + type: file + description: > + Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | + + + + For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + pattern: "*.{fastq.gz}" + - fastqc_html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - fastqc_zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - log: + type: file + description: Logfile for umi_tools + pattern: "*.{log}" + - trim_json: + type: file + description: FastP Trimming report + pattern: "*.{fastp.json}" + - trim_html: + type: file + description: FastP Trimming report + pattern: "*.{fastp.html}" + - log: + type: file + description: Logfile FastP + pattern: "*.{fastp.log}" + - trim_reads_fail: + type: file + description: Trimmed fastq files failing QC + pattern: "*.{fastq.gz}" + - trim_reads_merged: + type: file + description: Trimmed and merged fastq files + pattern: "*.{fastq.gz}" + - trim_read_count: + type: integer + description: Number of reads after trimming + - fastqc_trim_html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - fastqc_trim_zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@robsyme" +maintainers: + - "@robsyme" diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test new file mode 100644 index 0000000..cdd7398 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_workflow { + + name "Test Workflow FASTQ_FASTQC_UMITOOLS_FASTP" + script "../main.nf" + workflow "FASTQ_FASTQC_UMITOOLS_FASTP" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_fastqc_umitools_fastp" + tag "fastq_fastqc_umitools_fastp" + tag "fastqc" + tag "umitools/extract" + tag "fastp" + + + test("sarscov2 paired-end [fastq]") { + + when { + workflow { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = false // skip_fastqc + input[2] = false // with_umi + input[3] = false // skip_umi_extract + input[4] = 1 // umi_discard_read + input[5] = false // skip_trimming + input[6] = [] // adapter_fasta + input[7] = false // save_trimmed_fail + input[8] = false // save_merged + input[9] = 1 // min_trimmed_reads + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.reads).match("reads") }, + { assert snapshot(workflow.out.umi_log).match("umi_log") }, + { assert snapshot(workflow.out.trim_json).match("trim_json") }, + { assert snapshot(workflow.out.trim_reads_fail).match("trim_reads_fail") }, + { assert snapshot(workflow.out.trim_reads_merged).match("trim_reads_merged") }, + { assert snapshot(workflow.out.trim_read_count).match("trim_read_count") }, + { assert snapshot(workflow.out.versions).match("versions") }, + + { assert workflow.out.fastqc_raw_html }, + { assert workflow.out.fastqc_raw_zip }, + { assert workflow.out.trim_html }, + { assert workflow.out.trim_log }, + { assert workflow.out.fastqc_trim_html }, + { assert workflow.out.fastqc_trim_zip } + ) + } + } +} diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap new file mode 100644 index 0000000..38a65ae --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap @@ -0,0 +1,81 @@ +{ + "trim_reads_merged": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-26T02:28:26.26920982" + }, + "trim_reads_fail": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-26T02:28:26.25861515" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ] + ], + "timestamp": "2023-11-26T02:28:26.30891403" + }, + "trim_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd" + ] + ] + ], + "timestamp": "2023-11-26T02:28:26.24768259" + }, + "reads": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", + "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39" + ] + ] + ] + ], + "timestamp": "2023-12-04T11:30:32.061644815" + }, + "umi_log": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-26T02:28:26.238536" + }, + "trim_read_count": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + 198 + ] + ] + ], + "timestamp": "2023-11-26T02:28:26.27984169" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml new file mode 100644 index 0000000..84a4b56 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/fastq_fastqc_umitools_fastp: + - subworkflows/nf-core/fastq_fastqc_umitools_fastp/** diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf new file mode 100644 index 0000000..db2e5b3 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf @@ -0,0 +1,123 @@ +// +// Read QC, UMI extraction and trimming +// + +include { FASTQC } from '../../../modules/nf-core/fastqc/main' +include { UMITOOLS_EXTRACT } from '../../../modules/nf-core/umitools/extract/main' +include { TRIMGALORE } from '../../../modules/nf-core/trimgalore/main' + +// +// Function that parses TrimGalore log output file to get total number of reads after trimming +// +def getTrimGaloreReadsAfterFiltering(log_file) { + def total_reads = 0 + def filtered_reads = 0 + log_file.eachLine { line -> + def total_reads_matcher = line =~ /([\d\.]+)\ssequences processed in total/ + def filtered_reads_matcher = line =~ /shorter than the length cutoff[^:]+:\s([\d\.]+)/ + if (total_reads_matcher) total_reads = total_reads_matcher[0][1].toFloat() + if (filtered_reads_matcher) filtered_reads = filtered_reads_matcher[0][1].toFloat() + } + return total_reads - filtered_reads +} + +workflow FASTQ_FASTQC_UMITOOLS_TRIMGALORE { + take: + reads // channel: [ val(meta), [ reads ] ] + skip_fastqc // boolean: true/false + with_umi // boolean: true/false + skip_umi_extract // boolean: true/false + skip_trimming // boolean: true/false + umi_discard_read // integer: 0, 1 or 2 + min_trimmed_reads // integer: > 0 + + main: + ch_versions = Channel.empty() + fastqc_html = Channel.empty() + fastqc_zip = Channel.empty() + if (!skip_fastqc) { + FASTQC (reads) + fastqc_html = FASTQC.out.html + fastqc_zip = FASTQC.out.zip + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + } + + umi_reads = reads + umi_log = Channel.empty() + if (with_umi && !skip_umi_extract) { + UMITOOLS_EXTRACT (reads) + umi_reads = UMITOOLS_EXTRACT.out.reads + umi_log = UMITOOLS_EXTRACT.out.log + ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first()) + + // Discard R1 / R2 if required + if (umi_discard_read in [1,2]) { + UMITOOLS_EXTRACT + .out + .reads + .map { + meta, reads -> + meta.single_end ? [ meta, reads ] : [ meta + ['single_end': true], reads[umi_discard_read % 2] ] + } + .set { umi_reads } + } + } + + trim_reads = umi_reads + trim_unpaired = Channel.empty() + trim_html = Channel.empty() + trim_zip = Channel.empty() + trim_log = Channel.empty() + trim_read_count = Channel.empty() + if (!skip_trimming) { + TRIMGALORE (umi_reads) + trim_unpaired = TRIMGALORE.out.unpaired + trim_html = TRIMGALORE.out.html + trim_zip = TRIMGALORE.out.zip + trim_log = TRIMGALORE.out.log + ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first()) + + // + // Filter FastQ files based on minimum trimmed read count after adapter trimming + // + TRIMGALORE + .out + .reads + .join(trim_log, remainder: true) + .map { + meta, reads, trim_log -> + if (trim_log) { + num_reads = getTrimGaloreReadsAfterFiltering(meta.single_end ? trim_log : trim_log[-1]) + [ meta, reads, num_reads ] + } else { + [ meta, reads, min_trimmed_reads.toFloat() + 1 ] + } + } + .set { ch_num_trimmed_reads } + + ch_num_trimmed_reads + .filter { meta, reads, num_reads -> num_reads >= min_trimmed_reads.toFloat() } + .map { meta, reads, num_reads -> [ meta, reads ] } + .set { trim_reads } + + ch_num_trimmed_reads + .map { meta, reads, num_reads -> [ meta, num_reads ] } + .set { trim_read_count } + } + + emit: + reads = trim_reads // channel: [ val(meta), [ reads ] ] + + fastqc_html // channel: [ val(meta), [ html ] ] + fastqc_zip // channel: [ val(meta), [ zip ] ] + + umi_log // channel: [ val(meta), [ log ] ] + + trim_unpaired // channel: [ val(meta), [ reads ] ] + trim_html // channel: [ val(meta), [ html ] ] + trim_zip // channel: [ val(meta), [ zip ] ] + trim_log // channel: [ val(meta), [ txt ] ] + trim_read_count // channel: [ val(meta), val(count) ] + + versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/meta.yml b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/meta.yml new file mode 100644 index 0000000..a7df97f --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/meta.yml @@ -0,0 +1,101 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_fastqc_umitools_trimgalore" +description: Read QC, UMI extraction and trimming +keywords: + - fastq + - fastqc + - qc + - UMI + - trimming + - trimgalore +components: + - fastqc + - umitools/extract + - trimgalore +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - skip_fastqc: + type: boolean + description: | + Skip fastqc process + - with_umi: + type: boolean + description: | + With or without umi detection + - skip_umi_extract: + type: boolean + description: | + With or without umi extrection + - skip_trimming: + type: boolean + description: | + Allows to skip trimgalore execution + - umi_discard_read: + type: integer + description: | + Discard R1 / R2 if required + - min_trimmed_reads: + type: integer + description: | + Inputs with fewer than this reads will be filtered out of the "reads" output channel +output: + - reads: + type: file + description: > + Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | + + + + For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + pattern: "*.{fastq.gz}" + - fastqc_html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - fastqc_zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - log: + type: file + description: Logfile for umi_tools + pattern: "*.{log}" + - trim_unpaired: + type: file + description: | + FastQ files containing unpaired reads from read 1 or read 2 + pattern: "*unpaired*.fq.gz" + - trim_html: + type: file + description: FastQC report (optional) + pattern: "*_{fastqc.html}" + - trim_zip: + type: file + description: FastQC report archive (optional) + pattern: "*_{fastqc.zip}" + - trim_log: + type: file + description: Trim Galore! trimming report + pattern: "*_{report.txt}" + - trim_read_count: + type: integer + description: Number of reads remaining after trimming for all input samples + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@KamilMaliszArdigen" +maintainers: + - "@drpatelh" + - "@KamilMaliszArdigen" diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/main.nf b/subworkflows/nf-core/fastq_subsample_fq_salmon/main.nf new file mode 100644 index 0000000..0ac3e53 --- /dev/null +++ b/subworkflows/nf-core/fastq_subsample_fq_salmon/main.nf @@ -0,0 +1,54 @@ +// +// Sub-sample FastQ files and pseudo-align with Salmon +// can be used to infer strandedness of library +// + +include { SALMON_INDEX } from '../../../modules/nf-core/salmon/index/main' +include { FQ_SUBSAMPLE } from '../../../modules/nf-core/fq/subsample/main' +include { SALMON_QUANT } from '../../../modules/nf-core/salmon/quant/main' + +workflow FASTQ_SUBSAMPLE_FQ_SALMON { + take: + ch_reads // channel: [ val(meta), [ reads ] ] + ch_genome_fasta // channel: /path/to/genome.fasta + ch_transcript_fasta // channel: /path/to/transcript.fasta + ch_gtf // channel: /path/to/genome.gtf + ch_index // channel: /path/to/salmon/index/ + make_index // boolean: Whether to create salmon index before running salmon quant + + main: + + ch_versions = Channel.empty() + + // + // Create Salmon index if required + // + if (make_index) { + ch_index = SALMON_INDEX ( ch_genome_fasta, ch_transcript_fasta ).index + ch_versions = ch_versions.mix(SALMON_INDEX.out.versions) + } + + // + // Sub-sample FastQ files with fq + // + FQ_SUBSAMPLE ( ch_reads ) + ch_versions = ch_versions.mix(FQ_SUBSAMPLE.out.versions.first()) + + // + // Pseudo-alignment with Salmon + // + def lib_type = 'A' + def alignment_mode = false + SALMON_QUANT ( FQ_SUBSAMPLE.out.fastq, ch_index, ch_gtf, ch_transcript_fasta, alignment_mode, lib_type ) + ch_versions = ch_versions.mix(SALMON_QUANT.out.versions.first()) + + emit: + index = ch_index // channel: [ index ] + + reads = FQ_SUBSAMPLE.out.fastq // channel: [ val(meta), fastq ] + + results = SALMON_QUANT.out.results // channel: [ val(meta), results_dir ] + json_info = SALMON_QUANT.out.json_info // channel: [ val(meta), json_info + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/meta.yml b/subworkflows/nf-core/fastq_subsample_fq_salmon/meta.yml new file mode 100644 index 0000000..7e2f109 --- /dev/null +++ b/subworkflows/nf-core/fastq_subsample_fq_salmon/meta.yml @@ -0,0 +1,70 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_subsample_fq_salmon" +description: Subsample fastq +keywords: + - fastq + - subsample + - strandedness +components: + - fq/subsample + - salmon/quant + - salmon/index +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ch_reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - ch_genome_fasta: + type: file + description: Genome fasta file + pattern: "Path to genome sequence in fasta format" + - ch_transcript_fasta: + type: file + description: Transcript fasta file + pattern: "Path to transcript sequence in fasta format" + - ch_gtf: + type: file + description: GTF features file + pattern: "Path features in GTF format" + - ch_index: + type: file + description: Salmon index files + pattern: "Directory containing Salmon index" + - make_index: + type: boolean + description: Whether to create salmon index before running salmon quant +output: + - index: + type: directory + description: Directory containing salmon index + pattern: "salmon" + - reads: + type: file + description: Subsampled fastq reads. + pattern: "*.{fq,fastq}{,.gz}" + - results: + type: directory + description: Folder containing the quantification results for a specific sample + pattern: "${prefix}" + - json_info: + type: file + description: | + File containing meta information from Salmon quant + Which could be used to infer strandedness among other things + pattern: "*info.json" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@robsyme" + - "@drpatelh" +maintainers: + - "@robsyme" + - "@drpatelh" diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test new file mode 100644 index 0000000..19289c7 --- /dev/null +++ b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_workflow { + + name "Test Workflow FASTQ_SUBSAMPLE_FQ_SALMON" + script "../main.nf" + workflow "FASTQ_SUBSAMPLE_FQ_SALMON" + config "./nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_subsample_fq_salmon" + tag "fastq_subsample_fq_salmon" + tag "salmon/index" + tag "fq/subsample" + tag "salmon/quant" + + + test("homo_sapiens paired-end [fastq]") { + + setup { + run("SALMON_INDEX") { + script "../../../../modules/nf-core/salmon/index/main.nf" + process { + """ + input[0] = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) // genome_fasta + input[1] = file(params.test_data['homo_sapiens']['genome']['transcriptome_fasta'], checkIfExists: true) // transcript_fasta + """ + } + } + } + + when { + workflow { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) // genome_fasta + input[2] = file(params.test_data['homo_sapiens']['genome']['transcriptome_fasta'], checkIfExists: true) // transcript_fasta + input[3] = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) // gtf + input[4] = SALMON_INDEX.out.index + input[5] = false + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.reads).match("reads") }, + { assert snapshot(workflow.out.versions).match("versions") }, + + { assert workflow.out.index }, + { assert workflow.out.results }, + { assert workflow.out.json_info } + ) + } + } +} diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test.snap new file mode 100644 index 0000000..0146e50 --- /dev/null +++ b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test.snap @@ -0,0 +1,28 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,12c0d1f67c2afb97470ae0974e5e01bb", + "versions.yml:md5,885fde9e7beac002b3a17b66b92db4bd" + ] + ], + "timestamp": "2023-11-26T16:41:10.396971682" + }, + "reads": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastq.gz:md5,eda1cd0cfb2d3269f7f100cdcee1f286", + "test_R2.fastq.gz:md5,3e2ea75a2f0fb0178ac6a8b41ad1a5dd" + ] + ] + ] + ], + "timestamp": "2023-11-26T16:41:10.167020985" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/nextflow.config b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/nextflow.config new file mode 100644 index 0000000..7fc4d63 --- /dev/null +++ b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: FQ_SUBSAMPLE { + ext.args = '--record-count 1000000 --seed 1' + } + +} diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/tags.yml b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/tags.yml new file mode 100644 index 0000000..cc809c5 --- /dev/null +++ b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/fastq_subsample_fq_salmon: + - subworkflows/nf-core/fastq_subsample_fq_salmon/** diff --git a/workflows/riboseq.nf b/workflows/riboseq.nf index 47abb81..5f5487f 100644 --- a/workflows/riboseq.nf +++ b/workflows/riboseq.nf @@ -1,10 +1,29 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + GENOME PARAMETER VALUES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.transcript_fasta = WorkflowMain.getGenomeAttribute(params, 'transcript_fasta') +params.additional_fasta = WorkflowMain.getGenomeAttribute(params, 'additional_fasta') +params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') +params.gff = WorkflowMain.getGenomeAttribute(params, 'gff') +params.gene_bed = WorkflowMain.getGenomeAttribute(params, 'bed12') +params.bbsplit_index = WorkflowMain.getGenomeAttribute(params, 'bbsplit') +params.star_index = WorkflowMain.getGenomeAttribute(params, 'star') +params.hisat2_index = WorkflowMain.getGenomeAttribute(params, 'hisat2') +params.rsem_index = WorkflowMain.getGenomeAttribute(params, 'rsem') +params.salmon_index = WorkflowMain.getGenomeAttribute(params, 'salmon') +params.kallisto_index = WorkflowMain.getGenomeAttribute(params, 'kallisto') + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PRINT PARAMS SUMMARY ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) def citation = '\n' + WorkflowMain.citation(workflow) + '\n' @@ -13,8 +32,59 @@ def summary_params = paramsSummaryMap(workflow) // Print parameter summary log to screen log.info logo + paramsSummaryLog(workflow) + citation +// Check if an AWS iGenome has been provided to use the appropriate version of STAR +def is_aws_igenome = false +if (params.fasta && params.gtf) { + if ((file(params.fasta).getName() - '.gz' == 'genome.fa') && (file(params.gtf).getName() - '.gz' == 'genes.gtf')) { + is_aws_igenome = true + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + WorkflowRiboseq.initialise(params, log) +// Check rRNA databases for sortmerna +if (params.remove_ribo_rna) { + ch_ribo_db = file(params.ribo_database_manifest) + if (ch_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${ch_ribo_db.getName()}!"} +} + +// Check if file with list of fastas is provided when running BBSplit +if (!params.skip_bbsplit && !params.bbsplit_index && params.bbsplit_fasta_list) { + ch_bbsplit_fasta_list = file(params.bbsplit_fasta_list) + if (ch_bbsplit_fasta_list.isEmpty()) {exit 1, "File provided with --bbsplit_fasta_list is empty: ${ch_bbsplit_fasta_list.getName()}!"} +} + +// Check alignment parameters +def prepareToolIndices = [] +if (!params.skip_bbsplit) { prepareToolIndices << 'bbsplit' } +if (!params.skip_alignment) { prepareToolIndices << params.aligner } +if (!params.skip_pseudo_alignment && params.pseudo_aligner) { prepareToolIndices << params.pseudo_aligner } + +// Determine whether to filter the GTF or not +def filterGtf = + (( + // Condition 1: Alignment is required and aligner is set + !params.skip_alignment && params.aligner + ) || + ( + // Condition 2: Pseudoalignment is required and pseudoaligner is set + !params.skip_pseudo_alignment && params.pseudo_aligner + ) || + ( + // Condition 3: Transcript FASTA file is not provided + !params.transcript_fasta + )) && + ( + // Condition 4: --skip_gtf_filter is not provided + !params.skip_gtf_filter + ) + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES @@ -35,7 +105,8 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' +include { PREPROCESS_RNASEQ } from '../subworkflows/local/preprocess_rnaseq' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -46,7 +117,6 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' // // MODULE: Installed directly from nf-core/modules // -include { FASTQC } from '../modules/nf-core/fastqc/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' @@ -62,25 +132,76 @@ def multiqc_report = [] workflow RIBOSEQ { ch_versions = Channel.empty() - + // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files + // SUBWORKFLOW: Uncompress and prepare reference genome files // - INPUT_CHECK ( - file(params.input) + def biotype = params.gencode ? "gene_type" : params.featurecounts_group_type + PREPARE_GENOME ( + params.fasta, + params.gtf, + params.gff, + params.additional_fasta, + params.transcript_fasta, + params.gene_bed, + params.splicesites, + params.bbsplit_fasta_list, + params.star_index, + params.rsem_index, + params.salmon_index, + params.kallisto_index, + params.hisat2_index, + params.bbsplit_index, + params.gencode, + is_aws_igenome, + biotype, + prepareToolIndices, + filterGtf ) - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") - // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ - // ! There is currently no tooling to help you write a sample sheet schema + ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) + + // Check if contigs in genome fasta file > 512 Mbp + if (!params.skip_alignment && !params.bam_csi_index) { + PREPARE_GENOME + .out + .fai + .map { WorkflowRiboseq.checkMaxContigSize(it, log) } + } + + // + // Create input channel from input file provided through params.input + // + Channel + .fromSamplesheet("input") + .map { + meta, fastq_1, fastq_2 -> + if (!fastq_2) { + return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + } else { + return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + } + } + .groupTuple() + .map { + WorkflowRiboseq.validateInput(it) + } + .set { ch_fastq } // - // MODULE: Run FastQC + // SUBWORKFLOW: preprocess using same methodology as RNA-seq // - FASTQC ( - INPUT_CHECK.out.reads + + PREPROCESS_RNASEQ ( + ch_fastq, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.transcript_fasta, + PREPARE_GENOME.out.gtf, + PREPARE_GENOME.out.salmon_index, + PREPARE_GENOME.out.bbsplit_index, + !params.salmon_index && !('salmon' in prepareToolIndices), + ch_ribo_db ) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') @@ -99,7 +220,6 @@ workflow RIBOSEQ { ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) MULTIQC ( ch_multiqc_files.collect(), From d3be5ed0017829e81fe1473a2bd972ed5913a70a Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 21 Dec 2023 11:35:10 +0000 Subject: [PATCH 02/30] [skip ci] Unrestrict test resources while I'm mucking about --- conf/test.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/test.config b/conf/test.config index c2dc1e4..61016e3 100644 --- a/conf/test.config +++ b/conf/test.config @@ -15,9 +15,9 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '16.GB' - max_time = '16.h' + //max_cpus = 4 + //max_memory = '20.GB' + //max_time = '16.h' // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/002aa1f1df77b4dbfc816340c8d7d629805b166b/testdata/samplesheet.csv' From 6d96c751a3ac14d875b746736095aedd04d2f207 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 21 Dec 2023 11:40:13 +0000 Subject: [PATCH 03/30] Add gtf to schema --- nextflow_schema.json | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index 61e8772..fa85a9a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -65,6 +65,16 @@ "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", "fa_icon": "far fa-file-code" }, + "gtf": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.gtf(\\.gz)?$", + "description": "Path to GTF annotation file.", + "fa_icon": "fas fa-code-branch", + "help_text": "This parameter is *mandatory* if `--genome` is not specified." + }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", From c99d04204fa5ff0f4fa59fa52f5ee7544146c112 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 21 Dec 2023 18:56:10 +0000 Subject: [PATCH 04/30] Reduce kmer size --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 62d5b2b..63780fd 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -46,7 +46,7 @@ process { } withName: 'SALMON_INDEX' { - ext.args = '-k 21' + ext.args = '-k 17' publishDir = [ enabled: false ] From 71316504ff0e721d25eab97c2617d2ea39f8ba6f Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 21 Dec 2023 18:57:06 +0000 Subject: [PATCH 05/30] Trim before attempting strand detection --- nextflow.config | 8 +++ subworkflows/local/preprocess_rnaseq.nf | 93 +++++++++++++------------ 2 files changed, 56 insertions(+), 45 deletions(-) diff --git a/nextflow.config b/nextflow.config index 5a4e939..9528655 100644 --- a/nextflow.config +++ b/nextflow.config @@ -32,6 +32,14 @@ params { umitools_umi_separator = null umi_discard_read = null save_umi_intermeds = false + + // Trimming + trimmer = 'trimgalore' + min_trimmed_reads = 10000 + extra_trimgalore_args = null + extra_fastp_args = null + save_trimmed = false + skip_trimming = false // BBSplit genome filtering bbsplit_fasta_list = null diff --git a/subworkflows/local/preprocess_rnaseq.nf b/subworkflows/local/preprocess_rnaseq.nf index 8848273..eb3afd5 100644 --- a/subworkflows/local/preprocess_rnaseq.nf +++ b/subworkflows/local/preprocess_rnaseq.nf @@ -3,6 +3,7 @@ include { FASTQC } from '../../modules/nf-core/fastqc/main' include { SORTMERNA } from '../../modules/nf-core/sortmerna/main' include { FASTQ_SUBSAMPLE_FQ_SALMON } from '../../subworkflows/nf-core/fastq_subsample_fq_salmon' +include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore' workflow PREPROCESS_RNASEQ { @@ -65,55 +66,12 @@ workflow PREPROCESS_RNASEQ { ch_versions = ch_versions.mix(SORTMERNA.out.versions.first()) } - // Branch FastQ channels if 'auto' specified to infer strandedness - ch_filtered_reads - .branch { - meta, fastq -> - auto_strand : meta.strandedness == 'auto' - return [ meta, fastq ] - known_strand: meta.strandedness != 'auto' - return [ meta, fastq ] - } - .set { ch_strand_fastq } - - // - // SUBWORKFLOW: Sub-sample FastQ files and pseudoalign with Salmon to auto-infer strandedness - // - // Return empty channel if ch_strand_fastq.auto_strand is empty so salmon index isn't created - ch_fasta - .combine(ch_strand_fastq.auto_strand) - .map { it.first() } - .first() - .set { ch_genome_fasta } - - ch_strand_fastq.auto_strand.view() - ch_strand_fastq.known_strand.view() - - FASTQ_SUBSAMPLE_FQ_SALMON ( - ch_strand_fastq.auto_strand, - ch_genome_fasta, - ch_transcript_fasta, - ch_gtf, - ch_salmon_index, - make_salmon_index - ) - ch_versions = ch_versions.mix(FASTQ_SUBSAMPLE_FQ_SALMON.out.versions) - - FASTQ_SUBSAMPLE_FQ_SALMON - .out - .json_info - .join(ch_strand_fastq.auto_strand) - .map { meta, json, reads -> - return [ meta + [ strandedness: WorkflowRnaseq.getSalmonInferredStrandedness(json) ], reads ] - } - .mix(ch_strand_fastq.known_strand) - // // SUBWORKFLOW: Read QC, extract UMI and trim adapters with TrimGalore! // if (params.trimmer == 'trimgalore') { FASTQ_FASTQC_UMITOOLS_TRIMGALORE ( - ch_strand_inferred_fastq, + ch_filtered_reads, params.skip_fastqc || params.skip_qc, params.with_umi, params.skip_umi_extract, @@ -134,7 +92,7 @@ workflow PREPROCESS_RNASEQ { // if (params.trimmer == 'fastp') { FASTQ_FASTQC_UMITOOLS_FASTP ( - ch_strand_inferred_fastq, + ch_filtered_reads, params.skip_fastqc || params.skip_qc, params.with_umi, params.skip_umi_extract, @@ -188,6 +146,51 @@ workflow PREPROCESS_RNASEQ { .set { ch_filtered_reads } ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions.first()) } + + // Branch FastQ channels if 'auto' specified to infer strandedness + ch_filtered_reads + .branch { + meta, fastq -> + auto_strand : meta.strandedness == 'auto' + return [ meta, fastq ] + known_strand: meta.strandedness != 'auto' + return [ meta, fastq ] + } + .set { ch_strand_fastq } + + // + // SUBWORKFLOW: Sub-sample FastQ files and pseudoalign with Salmon to auto-infer strandedness + // + // Return empty channel if ch_strand_fastq.auto_strand is empty so salmon index isn't created + + ch_fasta + .combine(ch_strand_fastq.auto_strand) + .map { it.first() } + .first() + .set { ch_genome_fasta } + + ch_strand_fastq.auto_strand.view() + ch_strand_fastq.known_strand.view() + + FASTQ_SUBSAMPLE_FQ_SALMON ( + ch_strand_fastq.auto_strand, + ch_genome_fasta, + ch_transcript_fasta, + ch_gtf, + ch_salmon_index, + make_salmon_index + ) + ch_versions = ch_versions.mix(FASTQ_SUBSAMPLE_FQ_SALMON.out.versions) + + FASTQ_SUBSAMPLE_FQ_SALMON + .out + .json_info + .join(ch_strand_fastq.auto_strand) + .map { meta, json, reads -> + return [ meta + [ strandedness: WorkflowRnaseq.getSalmonInferredStrandedness(json) ], reads ] + } + .mix(ch_strand_fastq.known_strand) + .set { ch_strand_inferred_fastq } emit: From a0f25d66e02d2c66417ad5eb41b87c8e0faa0b6f Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 21 Dec 2023 19:21:34 +0000 Subject: [PATCH 06/30] Add pass_trimmed_reads --- subworkflows/local/preprocess_rnaseq.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/subworkflows/local/preprocess_rnaseq.nf b/subworkflows/local/preprocess_rnaseq.nf index eb3afd5..9610740 100644 --- a/subworkflows/local/preprocess_rnaseq.nf +++ b/subworkflows/local/preprocess_rnaseq.nf @@ -5,6 +5,8 @@ include { SORTMERNA } from '../../modules/nf-core/sortmerna/ma include { FASTQ_SUBSAMPLE_FQ_SALMON } from '../../subworkflows/nf-core/fastq_subsample_fq_salmon' include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore' +def pass_trimmed_reads = [:] + workflow PREPROCESS_RNASEQ { take: From 6c0f58e3d6459d0b351a5652acd9b1851618ce26 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 21 Dec 2023 19:59:49 +0000 Subject: [PATCH 07/30] Fix lib calls --- subworkflows/local/preprocess_rnaseq.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/preprocess_rnaseq.nf b/subworkflows/local/preprocess_rnaseq.nf index 9610740..819d14b 100644 --- a/subworkflows/local/preprocess_rnaseq.nf +++ b/subworkflows/local/preprocess_rnaseq.nf @@ -129,7 +129,7 @@ workflow PREPROCESS_RNASEQ { .map { tsv_data -> def header = ["Sample", "Reads after trimming"] - WorkflowRnaseq.multiqcTsvFromList(tsv_data, header) + WorkflowRiboseq.multiqcTsvFromList(tsv_data, header) } .set { ch_fail_trimming_multiqc } @@ -189,7 +189,7 @@ workflow PREPROCESS_RNASEQ { .json_info .join(ch_strand_fastq.auto_strand) .map { meta, json, reads -> - return [ meta + [ strandedness: WorkflowRnaseq.getSalmonInferredStrandedness(json) ], reads ] + return [ meta + [ strandedness: WorkflowRiboseq.getSalmonInferredStrandedness(json) ], reads ] } .mix(ch_strand_fastq.known_strand) .set { ch_strand_inferred_fastq } From c8309d52738fe0a6257221cdcd4a04f26462e49c Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 21 Dec 2023 20:32:56 +0000 Subject: [PATCH 08/30] Fix test profile --- conf/test.config | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index 61016e3..9737ff2 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,8 +20,11 @@ params { //max_time = '16.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/002aa1f1df77b4dbfc816340c8d7d629805b166b/testdata/samplesheet.csv' + //input = 'https://raw.githubusercontent.com/nf-core/test-datasets/002aa1f1df77b4dbfc816340c8d7d629805b166b/testdata/samplesheet.csv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/riboseq/testdata/samplesheet.csv' + //fasta = 'http://ftp.ensembl.org/pub/release-110/fasta/mus_musculus/dna/Mus_musculus.GRCm39.dna.chromosome.1.fa.gz' fasta = 'http://ftp.ensembl.org/pub/release-110/fasta/mus_musculus/dna/Mus_musculus.GRCm39.dna.primary_assembly.fa.gz' gtf = 'http://ftp.ensembl.org/pub/release-110/gtf/mus_musculus/Mus_musculus.GRCm39.110.gtf.gz' + min_trimmed_reads = 1000 } From 0e243eebf6cbc68599ac3eb97959bdaacbc34132 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 21 Dec 2023 21:35:55 +0000 Subject: [PATCH 09/30] Update conf --- conf/test.config | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/conf/test.config b/conf/test.config index 9737ff2..d363bb6 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,11 +20,9 @@ params { //max_time = '16.h' // Input data - //input = 'https://raw.githubusercontent.com/nf-core/test-datasets/002aa1f1df77b4dbfc816340c8d7d629805b166b/testdata/samplesheet.csv' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/riboseq/testdata/samplesheet.csv' - //fasta = 'http://ftp.ensembl.org/pub/release-110/fasta/mus_musculus/dna/Mus_musculus.GRCm39.dna.chromosome.1.fa.gz' - fasta = 'http://ftp.ensembl.org/pub/release-110/fasta/mus_musculus/dna/Mus_musculus.GRCm39.dna.primary_assembly.fa.gz' + fasta = 'http://ftp.ensembl.org/pub/release-110/fasta/mus_musculus/dna/Mus_musculus.GRCm39.dna.chromosome.1.fa.gz' gtf = 'http://ftp.ensembl.org/pub/release-110/gtf/mus_musculus/Mus_musculus.GRCm39.110.gtf.gz' min_trimmed_reads = 1000 } From 601aed2db4223d37af43de14c89a515486d7ae82 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Fri, 22 Dec 2023 10:53:44 +0000 Subject: [PATCH 10/30] Messy but working rnaseq-style proprocessing --- assets/multiqc_config.yml | 174 +++++++++++++++++++++++- subworkflows/local/preprocess_rnaseq.nf | 83 ++++++++--- workflows/riboseq.nf | 44 +++--- 3 files changed, 257 insertions(+), 44 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index f25fe6d..4085d95 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,13 +1,179 @@ report_comment: > - This report has been generated by the nf-core/riboseq + This report has been generated by the nf-core/rnaseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: - "nf-core-riboseq-methods-description": + "nf-core-rnaseq-methods-description": order: -1000 software_versions: order: -1001 - "nf-core-riboseq-summary": + "nf-core-rnaseq-summary": order: -1002 export_plots: true + +# Run only these modules +run_modules: + - custom_content + - fastqc + - cutadapt + - fastp + - sortmerna + - star + - hisat2 + - rsem + - salmon + - kallisto + - samtools + - picard + - preseq + - rseqc + - qualimap + +# Order of modules +top_modules: + - "fail_trimmed_samples" + - "fail_mapped_samples" + - "fail_strand_check" + - "star_rsem_deseq2_pca" + - "star_rsem_deseq2_clustering" + - "star_salmon_deseq2_pca" + - "star_salmon_deseq2_clustering" + - "salmon_deseq2_pca" + - "salmon_deseq2_clustering" + - "biotype_counts" + - "dupradar" + +module_order: + - fastqc: + name: "FastQC (raw)" + info: "This section of the report shows FastQC results before adapter trimming." + path_filters: + - "./fastqc/raw/*.zip" + - cutadapt + - fastp + - fastqc: + name: "FastQC (trimmed)" + info: "This section of the report shows FastQC results after adapter trimming." + path_filters: + - "./fastqc/trim/*.zip" + +# Don't show % Dups in the General Stats table (we have this from Picard) +table_columns_visible: + fastqc: + percent_duplicates: False + +extra_fn_clean_exts: + - ".umi_dedup" + - "_val" + - ".markdup" + - "_primary" + +# Customise the module search patterns to speed up execution time +# - Skip module sub-tools that we are not interested in +# - Replace file-content searching with filename pattern searching +# - Don't add anything that is the same as the MultiQC default +# See https://multiqc.info/docs/#optimise-file-search-patterns for details +sp: + cutadapt: + fn: "*trimming_report.txt" + + fastp: + fn: "*.fastp.json" + + sortmerna: + fn: "*.sortmerna.log" + + hisat2: + fn: "*.hisat2.summary.log" + + salmon/meta: + fn: "meta_info.json" + + preseq: + fn: "*.lc_extrap.txt" + + samtools/stats: + fn: "*.stats" + samtools/flagstat: + fn: "*.flagstat" + samtools/idxstats: + fn: "*.idxstats*" + + rseqc/bam_stat: + fn: "*.bam_stat.txt" + rseqc/gene_body_coverage: + skip: true + rseqc/junction_annotation: + fn: "*.junction_annotation.log" + rseqc/read_gc: + skip: true + rseqc/read_distribution: + fn: "*.read_distribution.txt" + rseqc/infer_experiment: + fn: "*.infer_experiment.txt" + rseqc/tin: + fn: "*.summary.txt" + + picard/markdups: + fn: "*.MarkDuplicates.metrics.txt" + picard/alignment_metrics: + skip: true + picard/basedistributionbycycle: + skip: true + picard/gcbias: + skip: true + picard/hsmetrics: + skip: true + picard/insertsize: + skip: true + picard/oxogmetrics: + skip: true + picard/pcr_metrics: + skip: true + picard/quality_by_cycle: + skip: true + picard/quality_score_distribution: + skip: true + picard/quality_yield_metrics: + skip: true + picard/rnaseqmetrics: + skip: true + picard/rrbs_metrics: + skip: true + picard/sam_file_validation: + skip: true + picard/variant_calling_metrics: + skip: true + picard/wgs_metrics: + skip: true + +# See https://github.com/ewels/MultiQC_TestData/blob/master/data/custom_content/with_config/table_headerconfig/multiqc_config.yaml +custom_data: + fail_trimmed_samples: + section_name: "WARNING: Fail Trimming Check" + description: "List of samples that failed the minimum trimmed reads threshold specified via the '--min_trimmed_reads' parameter, and hence were ignored for the downstream processing steps." + plot_type: "table" + pconfig: + id: "fail_trimmed_samples_table" + table_title: "Samples failed trimming threshold" + namespace: "Samples failed trimming threshold" + format: "{:.0f}" + fail_mapped_samples: + section_name: "WARNING: Fail Alignment Check" + description: "List of samples that failed the STAR minimum mapped reads threshold specified via the '--min_mapped_reads' parameter, and hence were ignored for the downstream processing steps." + plot_type: "table" + pconfig: + id: "fail_mapped_samples_table" + table_title: "Samples failed mapping threshold" + namespace: "Samples failed mapping threshold" + format: "{:.2f}" + fail_strand_check: + section_name: "WARNING: Fail Strand Check" + description: "List of samples that failed the strandedness check between that provided in the samplesheet and calculated by the RSeQC infer_experiment.py tool." + plot_type: "table" + pconfig: + id: "fail_strand_check_table" + table_title: "Samples failed strandedness check" + namespace: "Samples failed strandedness check" + format: "{:.2f}" diff --git a/subworkflows/local/preprocess_rnaseq.nf b/subworkflows/local/preprocess_rnaseq.nf index 819d14b..5beb3c5 100644 --- a/subworkflows/local/preprocess_rnaseq.nf +++ b/subworkflows/local/preprocess_rnaseq.nf @@ -1,3 +1,5 @@ +import groovy.json.JsonSlurper + include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main' include { FASTQC } from '../../modules/nf-core/fastqc/main' include { SORTMERNA } from '../../modules/nf-core/sortmerna/main' @@ -7,6 +9,33 @@ include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../../subworkflows/nf-core/fa def pass_trimmed_reads = [:] +public static String getSalmonInferredStrandedness(json_file) { + def lib_type = new JsonSlurper().parseText(json_file.text).get('library_types')[0] + def strandedness = 'reverse' + if (lib_type) { + if (lib_type in ['U', 'IU']) { + strandedness = 'unstranded' + } else if (lib_type in ['SF', 'ISF']) { + strandedness = 'forward' + } else if (lib_type in ['SR', 'ISR']) { + strandedness = 'reverse' + } + } + return strandedness +} + +// +// Create MultiQC tsv custom content from a list of values +// +public static String multiqcTsvFromList(tsv_data, header) { + def tsv_string = "" + if (tsv_data.size() > 0) { + tsv_string += "${header.join('\t')}\n" + tsv_string += tsv_data.join('\n') + } + return tsv_string +} + workflow PREPROCESS_RNASEQ { take: @@ -18,15 +47,14 @@ workflow PREPROCESS_RNASEQ { ch_bbsplit_index make_salmon_index ch_ribo_db + trimmer main: ch_versions = Channel.empty() ch_filtered_reads = Channel.empty() - ch_fastqc_raw_multiqc = Channel.empty() - ch_fastqc_trim_multiqc = Channel.empty() - ch_trim_log_multiqc = Channel.empty() ch_trim_read_count = Channel.empty() + ch_multiqc_files = Channel.empty() ch_fastq_in .branch { @@ -47,13 +75,12 @@ workflow PREPROCESS_RNASEQ { .reads .mix(ch_fastq.single) .set { ch_filtered_reads } - + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null)) // // MODULE: Remove ribosomal RNA reads // - ch_sortmerna_multiqc = Channel.empty() if (params.remove_ribo_rna) { ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()).map { row -> file(row, checkIfExists: true) }.collect() @@ -64,14 +91,15 @@ workflow PREPROCESS_RNASEQ { .reads .set { ch_filtered_reads } - ch_sortmerna_multiqc = SORTMERNA.out.log + ch_multiqc_files = ch_multiqc_files.mix(SORTMERNA.out.log.map{it[1]}) + ch_versions = ch_versions.mix(SORTMERNA.out.versions.first()) } // // SUBWORKFLOW: Read QC, extract UMI and trim adapters with TrimGalore! // - if (params.trimmer == 'trimgalore') { + if (trimmer == 'trimgalore') { FASTQ_FASTQC_UMITOOLS_TRIMGALORE ( ch_filtered_reads, params.skip_fastqc || params.skip_qc, @@ -82,17 +110,20 @@ workflow PREPROCESS_RNASEQ { params.min_trimmed_reads ) ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads - ch_fastqc_raw_multiqc = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip - ch_fastqc_trim_multiqc = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_zip - ch_trim_log_multiqc = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_log ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_read_count + ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.versions) + ch_multiqc_files = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip + .mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_zip) + .mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_log) + .map{it[1]} + .mix(ch_multiqc_files) } // // SUBWORKFLOW: Read QC, extract UMI and trim adapters with fastp // - if (params.trimmer == 'fastp') { + if (trimmer == 'fastp') { FASTQ_FASTQC_UMITOOLS_FASTP ( ch_filtered_reads, params.skip_fastqc || params.skip_qc, @@ -106,16 +137,20 @@ workflow PREPROCESS_RNASEQ { params.min_trimmed_reads ) ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads - ch_fastqc_raw_multiqc = FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip - ch_fastqc_trim_multiqc = FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_trim_zip - ch_trim_log_multiqc = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_json ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_read_count ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) + + ch_multiqc_files = FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_zip + .mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_zip) + .mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_log) + .map{it[1]} + .mix(ch_multiqc_files) } - + // // Get list of samples that failed trimming threshold for MultiQC report // + ch_trim_read_count .map { meta, num_reads -> @@ -129,9 +164,14 @@ workflow PREPROCESS_RNASEQ { .map { tsv_data -> def header = ["Sample", "Reads after trimming"] - WorkflowRiboseq.multiqcTsvFromList(tsv_data, header) + multiqcTsvFromList(tsv_data, header) } .set { ch_fail_trimming_multiqc } + + ch_multiqc_files = ch_multiqc_files + .mix( + ch_fail_trimming_multiqc.collectFile(name: 'fail_trimmed_samples_mqc.tsv').ifEmpty([]) + ) // // MODULE: Remove genome contaminant reads @@ -171,9 +211,6 @@ workflow PREPROCESS_RNASEQ { .first() .set { ch_genome_fasta } - ch_strand_fastq.auto_strand.view() - ch_strand_fastq.known_strand.view() - FASTQ_SUBSAMPLE_FQ_SALMON ( ch_strand_fastq.auto_strand, ch_genome_fasta, @@ -189,13 +226,17 @@ workflow PREPROCESS_RNASEQ { .json_info .join(ch_strand_fastq.auto_strand) .map { meta, json, reads -> - return [ meta + [ strandedness: WorkflowRiboseq.getSalmonInferredStrandedness(json) ], reads ] + return [ meta + [ strandedness: getSalmonInferredStrandedness(json) ], reads ] } .mix(ch_strand_fastq.known_strand) .set { ch_strand_inferred_fastq } emit: - versions = ch_versions // channel: [ versions.yml ] + reads = ch_strand_inferred_fastq + trim_read_count = ch_trim_read_count + + multiqc_files = ch_multiqc_files + versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/riboseq.nf b/workflows/riboseq.nf index 5f5487f..4927731 100644 --- a/workflows/riboseq.nf +++ b/workflows/riboseq.nf @@ -131,7 +131,8 @@ def multiqc_report = [] workflow RIBOSEQ { - ch_versions = Channel.empty() + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() // // SUBWORKFLOW: Uncompress and prepare reference genome files @@ -199,35 +200,40 @@ workflow RIBOSEQ { PREPARE_GENOME.out.salmon_index, PREPARE_GENOME.out.bbsplit_index, !params.salmon_index && !('salmon' in prepareToolIndices), - ch_ribo_db + ch_ribo_db, + params.trimmer ) - + ch_multiqc_files = ch_multiqc_files.mix(PREPROCESS_RNASEQ.out.multiqc_files) + ch_versions = ch_versions.mix(PREPROCESS_RNASEQ.out.versions) CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) + ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml) // // MODULE: MultiQC // - workflow_summary = WorkflowRiboseq.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) + if (!params.skip_multiqc) { + workflow_summary = WorkflowRiboseq.paramsSummaryMultiqc(workflow, summary_params) + ch_workflow_summary = Channel.value(workflow_summary) - methods_description = WorkflowRiboseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) + methods_description = WorkflowRiboseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) + ch_methods_description = Channel.value(methods_description) - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() - ) - multiqc_report = MULTIQC.out.report.toList() + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + multiqc_report = MULTIQC.out.report.toList() + + } } /* From 216ad1ae320f7c98fab0d655c893591ef7c00b40 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Fri, 22 Dec 2023 12:00:33 +0000 Subject: [PATCH 11/30] Encapsulated preprocessing --- subworkflows/local/preprocess_rnaseq.nf | 68 ++++++++++++++----------- workflows/riboseq.nf | 17 ++++--- 2 files changed, 50 insertions(+), 35 deletions(-) diff --git a/subworkflows/local/preprocess_rnaseq.nf b/subworkflows/local/preprocess_rnaseq.nf index 5beb3c5..f0a614b 100644 --- a/subworkflows/local/preprocess_rnaseq.nf +++ b/subworkflows/local/preprocess_rnaseq.nf @@ -1,11 +1,12 @@ import groovy.json.JsonSlurper include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main' -include { FASTQC } from '../../modules/nf-core/fastqc/main' -include { SORTMERNA } from '../../modules/nf-core/sortmerna/main' +include { FASTQC } from '../../modules/nf-core/fastqc/main' +include { SORTMERNA } from '../../modules/nf-core/sortmerna/main' include { FASTQ_SUBSAMPLE_FQ_SALMON } from '../../subworkflows/nf-core/fastq_subsample_fq_salmon' include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore' +include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../../subworkflows/nf-core/fastq_fastqc_umitools_fastp' def pass_trimmed_reads = [:] @@ -39,15 +40,24 @@ public static String multiqcTsvFromList(tsv_data, header) { workflow PREPROCESS_RNASEQ { take: - ch_fastq_in // channel: [ val(meta), [ fastq ] ] - ch_fasta - ch_transcript_fasta - ch_gtf - ch_salmon_index - ch_bbsplit_index - make_salmon_index - ch_ribo_db - trimmer + ch_reads // channel: [ val(meta), [ reads ] ] + ch_fasta // channel: /path/to/genome.fasta + ch_transcript_fasta // channel: /path/to/transcript.fasta + ch_gtf // channel: /path/to/genome.gtf + make_salmon_index // boolean: Whether to create salmon index before running salmon quant + ch_salmon_index // channel: /path/to/salmon/index/ (optional) + skip_bbsplit // boolean: Skip BBSplit for removal of non-reference genome reads. + ch_bbsplit_index // channel: /path/to/bbsplit/index/ (optional) + skip_fastqc // boolean: true/false + skip_trimming // boolean: true/false + trimmer // string: 'fastp' or 'trimgalore' + min_trimmed_reads // integer: > 0 + save_trimmed // boolean: true/false + remove_ribo_rna // boolean: true/false: whether to run sortmerna to remove rrnas + ch_ribo_db // Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA. (optional) + with_umi // boolean: true/false: Enable UMI-based read deduplication. + skip_umi_extract // boolean: true/false + umi_discard_read // integer: 0, 1 or 2 main: @@ -56,7 +66,7 @@ workflow PREPROCESS_RNASEQ { ch_trim_read_count = Channel.empty() ch_multiqc_files = Channel.empty() - ch_fastq_in + ch_reads .branch { meta, fastqs -> single : fastqs.size() == 1 @@ -81,7 +91,7 @@ workflow PREPROCESS_RNASEQ { // // MODULE: Remove ribosomal RNA reads // - if (params.remove_ribo_rna) { + if (remove_ribo_rna) { ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()).map { row -> file(row, checkIfExists: true) }.collect() SORTMERNA ( @@ -102,12 +112,12 @@ workflow PREPROCESS_RNASEQ { if (trimmer == 'trimgalore') { FASTQ_FASTQC_UMITOOLS_TRIMGALORE ( ch_filtered_reads, - params.skip_fastqc || params.skip_qc, - params.with_umi, - params.skip_umi_extract, - params.skip_trimming, - params.umi_discard_read, - params.min_trimmed_reads + skip_fastqc, + with_umi, + skip_umi_extract, + skip_trimming, + umi_discard_read, + min_trimmed_reads ) ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_read_count @@ -126,15 +136,15 @@ workflow PREPROCESS_RNASEQ { if (trimmer == 'fastp') { FASTQ_FASTQC_UMITOOLS_FASTP ( ch_filtered_reads, - params.skip_fastqc || params.skip_qc, - params.with_umi, - params.skip_umi_extract, - params.umi_discard_read, - params.skip_trimming, + skip_fastqc, + with_umi, + skip_umi_extract, + umi_discard_read, + skip_trimming, [], - params.save_trimmed, - params.save_trimmed, - params.min_trimmed_reads + save_trimmed, + save_trimmed, + min_trimmed_reads ) ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_read_count @@ -155,7 +165,7 @@ workflow PREPROCESS_RNASEQ { .map { meta, num_reads -> pass_trimmed_reads[meta.id] = true - if (num_reads <= params.min_trimmed_reads.toFloat()) { + if (num_reads <= min_trimmed_reads.toFloat()) { pass_trimmed_reads[meta.id] = false return [ "$meta.id\t$num_reads" ] } @@ -176,7 +186,7 @@ workflow PREPROCESS_RNASEQ { // // MODULE: Remove genome contaminant reads // - if (!params.skip_bbsplit) { + if (!skip_bbsplit) { BBMAP_BBSPLIT ( ch_filtered_reads, ch_bbsplit_index, diff --git a/workflows/riboseq.nf b/workflows/riboseq.nf index 4927731..e797e26 100644 --- a/workflows/riboseq.nf +++ b/workflows/riboseq.nf @@ -188,20 +188,25 @@ workflow RIBOSEQ { } .set { ch_fastq } - // - // SUBWORKFLOW: preprocess using same methodology as RNA-seq - // - PREPROCESS_RNASEQ ( ch_fastq, PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.transcript_fasta, PREPARE_GENOME.out.gtf, PREPARE_GENOME.out.salmon_index, - PREPARE_GENOME.out.bbsplit_index, !params.salmon_index && !('salmon' in prepareToolIndices), + params.skip_bbsplit, + PREPARE_GENOME.out.bbsplit_index, + params.skip_fastqc || params.skip_qc, + params.skip_trimming, + params.trimmer, + params.min_trimmed_reads, + params.save_trimmed, + params.remove_ribo_rna, ch_ribo_db, - params.trimmer + params.with_umi, + params.skip_umi_extract, + params.umi_discard_read ) ch_multiqc_files = ch_multiqc_files.mix(PREPROCESS_RNASEQ.out.multiqc_files) ch_versions = ch_versions.mix(PREPROCESS_RNASEQ.out.versions) From 2647cbcfaff3b8f803f86b3e754f24309f5bf185 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Fri, 22 Dec 2023 14:56:51 +0000 Subject: [PATCH 12/30] Borrow more config from rnaseq --- conf/modules.config | 1137 +++++++++++++++++++++++++++++++++++++++++- conf/test.config | 6 +- main.nf | 16 +- nextflow.config | 8 +- nextflow_schema.json | 595 +++++++++++++++++++++- 5 files changed, 1723 insertions(+), 39 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 63780fd..4665219 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -10,63 +10,1154 @@ ---------------------------------------------------------------------------------------- */ -process { +def rseqc_modules = params.rseqc_modules ? params.rseqc_modules.split(',').collect{ it.trim().toLowerCase() } : [] + +// +// General configuration options +// +process { publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: SAMPLESHEET_CHECK { + withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { publishDir = [ path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + pattern: '*_versions.yml' ] } +} + +// +// Genome preparation options +// - withName: FASTQC { - ext.args = '--quiet' +process { + withName: 'GUNZIP_.*|MAKE_TRANSCRIPTS_FASTA' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : params.save_reference ? filename : null } + ] } - - withName: 'FQ_SUBSAMPLE' { - ext.args = '--record-count 1000000 --seed 1' - ext.prefix = { "${meta.id}.subsampled" } + + withName: 'UNTAR_.*' { + ext.args2 = '--no-same-owner' + } + + withName: 'UNTAR_.*|STAR_GENOMEGENERATE|STAR_GENOMEGENERATE_IGENOMES|HISAT2_BUILD' { publishDir = [ - enabled: false + path: { "${params.outdir}/genome/index" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : params.save_reference ? filename : null } ] } - withName: '.*:FASTQ_SUBSAMPLE_FQ_SALMON:SALMON_QUANT' { - ext.args = '--skipQuant' + withName: 'GFFREAD' { + ext.args = '--keep-exon-attrs -F -T' publishDir = [ - enabled: false + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : params.save_reference ? filename : null } ] } - + + withName: 'HISAT2_EXTRACTSPLICESITES' { + publishDir = [ + path: { "${params.outdir}/genome/index" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : params.save_reference ? filename : null } + ] + } + withName: 'SALMON_INDEX' { - ext.args = '-k 17' + ext.args = { [ + params.gencode ? '--gencode' : '', + params.pseudo_aligner_kmer_size ? "-k ${params.pseudo_aligner_kmer_size}": '' + ].join(' ').trim() } publishDir = [ - enabled: false + path: { "${params.outdir}/genome/index" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : params.save_reference ? filename : null } ] } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { + withName: 'KALLISTO_INDEX' { + ext.args = params.pseudo_aligner_kmer_size ? "-k ${params.pseudo_aligner_kmer_size}" : '' publishDir = [ - path: { "${params.outdir}/pipeline_info" }, + path: { "${params.outdir}/genome/index" }, mode: params.publish_dir_mode, - pattern: '*_versions.yml' + saveAs: { filename -> filename.equals('versions.yml') ? null : params.save_reference ? filename : null } + ] + } + + withName: 'RSEM_PREPAREREFERENCE_GENOME' { + ext.args = '--star' + publishDir = [ + path: { "${params.outdir}/genome/index" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : params.save_reference ? filename : null } + ] + } + + withName: 'GTF2BED' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : params.save_reference ? filename : null } + ] + } + + withName: 'CAT_ADDITIONAL_FASTA|PREPROCESS_TRANSCRIPTS_FASTA_GENCODE' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : params.save_reference ? filename : null } + ] + } + + withName: 'GTF_FILTER' { + ext.args = { params.skip_gtf_transcript_filter ?: '--skip_transcript_id_check' } + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : params.save_reference ? filename : null } ] } - withName: 'MULTIQC' { - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + withName: 'CUSTOM_GETCHROMSIZES' { publishDir = [ - path: { "${params.outdir}/multiqc" }, + path: { "${params.outdir}/genome" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : params.save_reference ? filename : null } ] } + withName: 'CAT_FASTQ' { + publishDir = [ + path: { "${params.outdir}/fastq" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : params.save_merged_fastq ? filename : null } + ] + } +} + +if (!params.skip_bbsplit && params.bbsplit_fasta_list) { + process { + withName: '.*:PREPARE_GENOME:BBMAP_BBSPLIT' { + ext.args = 'build=1' + publishDir = [ + path: { "${params.outdir}/genome/index" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : params.save_reference ? filename : null } + ] + } + } +} + +// +// Read subsampling and strand inferring options +// + +process { + withName: 'FQ_SUBSAMPLE' { + ext.args = '--record-count 1000000 --seed 1' + ext.prefix = { "${meta.id}.subsampled" } + publishDir = [ + enabled: false + ] + } + + withName: '.*:FASTQ_SUBSAMPLE_FQ_SALMON:SALMON_QUANT' { + ext.args = '--skipQuant' + publishDir = [ + enabled: false + ] + } +} + +// +// Read QC and trimming options +// + +if (!(params.skip_fastqc || params.skip_qc)) { + if (params.trimmer == 'trimgalore') { + process { + withName: '.*:FASTQ_FASTQC_UMITOOLS_TRIMGALORE:FASTQC' { + ext.args = '--quiet' + } + } + } + + if (params.trimmer == 'fastp') { + process { + withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTQC_RAW' { + ext.args = '--quiet' + } + + withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTQC_TRIM' { + ext.args = '--quiet' + publishDir = [ + path: { "${params.outdir}/${params.trimmer}/fastqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } +} + +if (!params.skip_trimming) { + if (params.trimmer == 'trimgalore') { + process { + withName: '.*:FASTQ_FASTQC_UMITOOLS_TRIMGALORE:TRIMGALORE' { + ext.args = { + [ + "--fastqc_args '-t ${task.cpus}'", + params.extra_trimgalore_args ? params.extra_trimgalore_args.split("\\s(?=--)") : '' + ].flatten().unique(false).join(' ').trim() + } + publishDir = [ + [ + path: { "${params.outdir}/${params.trimmer}/fastqc" }, + mode: params.publish_dir_mode, + pattern: "*.{html,zip}" + ], + [ + path: { "${params.outdir}/${params.trimmer}" }, + mode: params.publish_dir_mode, + pattern: "*.fq.gz", + saveAs: { params.save_trimmed ? it : null } + ], + [ + path: { "${params.outdir}/${params.trimmer}" }, + mode: params.publish_dir_mode, + pattern: "*.txt" + ] + ] + } + } + } + + if (params.trimmer == 'fastp') { + process { + withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTP' { + ext.args = { params.extra_fastp_args ?: '' } + publishDir = [ + [ + path: { "${params.outdir}/${params.trimmer}" }, + mode: params.publish_dir_mode, + pattern: "*.{json,html}" + ], + [ + path: { "${params.outdir}/${params.trimmer}/log" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: { "${params.outdir}/${params.trimmer}" }, + mode: params.publish_dir_mode, + pattern: "*.fastq.gz", + saveAs: { params.save_trimmed ? it : null } + ] + ] + } + } + } +} + +if (params.with_umi && !params.skip_umi_extract) { + process { + withName: 'UMITOOLS_EXTRACT' { + ext.args = { [ + params.umitools_extract_method ? "--extract-method=${params.umitools_extract_method}" : '', + params.umitools_bc_pattern ? "--bc-pattern='${params.umitools_bc_pattern}'" : '', + params.umitools_bc_pattern2 ? "--bc-pattern2='${params.umitools_bc_pattern2}'" : '', + params.umitools_umi_separator ? "--umi-separator='${params.umitools_umi_separator}'" : '' + ].join(' ').trim() } + publishDir = [ + [ + path: { "${params.outdir}/umitools" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: { "${params.outdir}/umitools" }, + mode: params.publish_dir_mode, + pattern: "*.fastq.gz", + saveAs: { params.save_umi_intermeds ? it : null } + ] + ] + } + } +} + +// +// Contaminant removal options +// + +if (!params.skip_bbsplit) { + process { + withName: 'BBMAP_BBSPLIT' { + ext.args = 'build=1 ambiguous2=all maxindel=150000' + publishDir = [ + [ + path: { "${params.outdir}/bbsplit" }, + mode: params.publish_dir_mode, + pattern: '*.txt' + ], + [ + path: { "${params.outdir}/bbsplit" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + saveAs: { params.save_bbsplit_reads ? it : null } + ] + ] + } + } +} + +if (params.remove_ribo_rna) { + process { + withName: 'SORTMERNA' { + ext.args = '--num_alignments 1 -v' + publishDir = [ + [ + path: { "${params.outdir}/sortmerna" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: { "${params.outdir}/sortmerna" }, + mode: params.publish_dir_mode, + pattern: "*.fastq.gz", + saveAs: { params.save_non_ribo_reads ? it : null } + ] + ] + } + } +} + +// +// General alignment options +// + +if (!params.skip_alignment) { + process { + withName: 'NFCORE_RNASEQ:RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: "*.{stats,flagstat,idxstats}" + ] + } + + withName: 'NFCORE_RNASEQ:RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: "*.bam", + saveAs: { ( ['star_salmon','hisat2'].contains(params.aligner) && + ( params.save_align_intermeds || ( !params.with_umi && params.skip_markduplicates ) ) + ) || params.save_align_intermeds || params.skip_markduplicates ? it : null } + ] + } + + withName: 'NFCORE_RNASEQ:RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + ext.args = { params.bam_csi_index ? '-c' : '' } + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: "*.{bai,csi}", + saveAs: { ( ['star_salmon','hisat2'].contains(params.aligner) && + ( params.save_align_intermeds || ( !params.with_umi && params.skip_markduplicates ) ) + ) || params.save_align_intermeds || params.skip_markduplicates ? it : null } + ] + } + } + + if (!params.skip_markduplicates && !params.with_umi) { + process { + withName: '.*:BAM_MARKDUPLICATES_PICARD:PICARD_MARKDUPLICATES' { + ext.args = '--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' + ext.prefix = { "${meta.id}.markdup.sorted" } + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/picard_metrics" }, + mode: params.publish_dir_mode, + pattern: '*metrics.txt' + ], + [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: '*.bam' + ] + ] + } + + withName: '.*:BAM_MARKDUPLICATES_PICARD:SAMTOOLS_INDEX' { + ext.args = { params.bam_csi_index ? '-c' : '' } + ext.prefix = { "${meta.id}.markdup.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}' + ] + } + + withName: '.*:BAM_MARKDUPLICATES_PICARD:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.markdup.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: '*.{stats,flagstat,idxstats}' + ] + } + } + } + + if (params.with_umi && ['star_salmon','hisat2'].contains(params.aligner)) { + process { + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME:UMITOOLS_DEDUP' { + ext.args = { [ + meta.single_end ? '' : '--unpaired-reads=discard --chimeric-pairs=discard', + params.umitools_grouping_method ? "--method='${params.umitools_grouping_method}'" : '', + params.umitools_umi_separator ? "--umi-separator='${params.umitools_umi_separator}'" : '' + ].join(' ').trim() } + ext.prefix = { "${meta.id}.umi_dedup.sorted" } + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/umitools" }, + mode: params.publish_dir_mode, + pattern: '*.tsv' + ], + [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + saveAs: { params.save_align_intermeds || params.with_umi || params.save_umi_intermeds ? it : null } + ] + ] + } + + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME:SAMTOOLS_INDEX' { + ext.args = { params.bam_csi_index ? '-c' : '' } + ext.prefix = { "${meta.id}.umi_dedup.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}', + saveAs: { params.save_align_intermeds || params.with_umi || params.save_umi_intermeds ? it : null } + ] + } + + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.umi_dedup.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: '*.{stats,flagstat,idxstats}' + ] + } + } + } + + if (!params.skip_bigwig) { + process { + withName: 'BEDTOOLS_GENOMECOV' { + ext.args = '-split -du' + publishDir = [ + enabled: false + ] + } + + withName: '.*:BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_FORWARD:UCSC_BEDCLIP' { + ext.prefix = { "${meta.id}.clip.forward" } + publishDir = [ + enabled: false + ] + } + + withName: '.*:BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_FORWARD:UCSC_BEDGRAPHTOBIGWIG' { + ext.prefix = { "${meta.id}.forward" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/bigwig" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_REVERSE:UCSC_BEDCLIP' { + ext.prefix = { "${meta.id}.clip.reverse" } + publishDir = [ + enabled: false + ] + } + + withName: '.*:BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_REVERSE:UCSC_BEDGRAPHTOBIGWIG' { + ext.prefix = { "${meta.id}.reverse" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/bigwig" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_stringtie) { + process { + withName: 'STRINGTIE_STRINGTIE' { + ext.args = { [ + '-v', + params.stringtie_ignore_gtf ? '' : '-e' + ].join(' ').trim() } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/stringtie" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } +} + +// +// STAR Salmon alignment options +// + +if (!params.skip_alignment && params.aligner == 'star_salmon') { + process { + withName: '.*:ALIGN_STAR:STAR_ALIGN|.*:ALIGN_STAR:STAR_ALIGN_IGENOMES' { + ext.args = { [ + '--quantMode TranscriptomeSAM', + '--twopassMode Basic', + '--outSAMtype BAM Unsorted', + '--readFilesCommand zcat', + '--runRNGseed 0', + '--outFilterMultimapNmax 20', + '--alignSJDBoverhangMin 1', + '--outSAMattributes NH HI AS NM MD', + '--quantTranscriptomeBan Singleend', + '--outSAMstrandField intronMotif', + params.save_unaligned ? '--outReadsUnmapped Fastx' : '', + params.extra_star_align_args ? params.extra_star_align_args.split("\\s(?=--)") : '' + ].flatten().unique(false).join(' ').trim() } + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/log" }, + mode: params.publish_dir_mode, + pattern: '*.{out,tab}' + ], + [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + saveAs: { params.save_align_intermeds ? it : null } + ], + [ + path: { "${params.outdir}/${params.aligner}/unmapped" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + saveAs: { params.save_unaligned ? it : null } + ] + ] + } + + withName: '.*:QUANTIFY_STAR_SALMON:SALMON_QUANT' { + ext.args = { params.extra_salmon_quant_args ?: '' } + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('_meta_info.json') ? null : filename } + ] + } + + withName: '.*:QUANTIFY_STAR_SALMON:TX2GENE' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:QUANTIFY_STAR_SALMON:TXIMPORT' { + ext.prefix = { "${quant_type}.merged" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:QUANTIFY_STAR_SALMON:SE_.*' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + if (params.with_umi) { + process { + withName: 'NFCORE_RNASEQ:RNASEQ:SAMTOOLS_SORT' { + ext.args = '-n' + ext.prefix = { "${meta.id}.umi_dedup.transcriptome" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + saveAs: { params.save_align_intermeds || params.save_umi_intermeds ? it : null } + ] + } + + withName: 'NFCORE_RNASEQ:RNASEQ:UMITOOLS_PREPAREFORSALMON' { + ext.prefix = { "${meta.id}.umi_dedup.transcriptome.filtered" } + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/umitools/log" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ], + [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + saveAs: { params.save_align_intermeds || params.save_umi_intermeds ? it : null } + ] + ] + } + + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.transcriptome.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + saveAs: { params.save_align_intermeds || params.save_umi_intermeds ? it : null } + ] + } + + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: '*.bai', + saveAs: { params.save_align_intermeds || params.save_umi_intermeds ? it : null } + ] + } + + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.transcriptome.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: '*.{stats,flagstat,idxstats}', + saveAs: { params.save_align_intermeds || params.save_umi_intermeds ? it : null } + ] + } + + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME:UMITOOLS_DEDUP' { + ext.args = { [ + meta.single_end ? '' : '--unpaired-reads=discard --chimeric-pairs=discard', + params.umitools_grouping_method ? "--method='${params.umitools_grouping_method}'" : '', + params.umitools_umi_separator ? "--umi-separator='${params.umitools_umi_separator}'" : '' + ].join(' ').trim() } + ext.prefix = { "${meta.id}.umi_dedup.transcriptome.sorted" } + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/umitools" }, + mode: params.publish_dir_mode, + pattern: '*.tsv' + ], + [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + saveAs: { params.save_align_intermeds || params.save_umi_intermeds ? it : null } + ] + ] + } + + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME:SAMTOOLS_INDEX' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: '*.bai', + saveAs: { params.save_align_intermeds || params.save_umi_intermeds ? it : null } + ] + } + + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.umi_dedup.transcriptome.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: '*.{stats,flagstat,idxstats}' + ] + } + } + } + + if (!params.skip_qc & !params.skip_deseq2_qc) { + process { + withName: 'DESEQ2_QC_STAR_SALMON' { + ext.args = { [ + "--id_col 1", + "--sample_suffix ''", + "--count_col 3", + params.deseq2_vst ? '--vst TRUE' : '' + ].join(' ').trim() } + ext.args2 = 'star_salmon' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/deseq2_qc" }, + mode: params.publish_dir_mode, + pattern: "*{RData,pca.vals.txt,plots.pdf,sample.dists.txt,size_factors,log}" + ] + } + } + } +} + +// +// STAR RSEM alignment options +// + +if (!params.skip_alignment && params.aligner == 'star_rsem') { + process { + withName: '.*:QUANTIFY_RSEM:RSEM_CALCULATEEXPRESSION' { + ext.args = [ + '--star', + '--star-output-genome-bam', + '--star-gzipped-read-file', + '--estimate-rspd', + '--seed 1' + ].join(' ').trim() + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: "*.{stat,results}" + ], + [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: "*.bam", + saveAs: { params.save_align_intermeds ? it : null } + ], + [ + path: { "${params.outdir}/${params.aligner}/log" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ] + ] + } + + withName: '.*:QUANTIFY_RSEM:RSEM_MERGE_COUNTS' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + if (!params.skip_qc & !params.skip_deseq2_qc) { + process { + withName: 'DESEQ2_QC_RSEM' { + ext.args = { [ + "--id_col 1", + "--sample_suffix ''", + "--count_col 3", + params.deseq2_vst ? '--vst TRUE' : '' + ].join(' ').trim() } + ext.args2 = 'star_rsem' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/deseq2_qc" }, + mode: params.publish_dir_mode, + pattern: "*{RData,pca.vals.txt,plots.pdf,sample.dists.txt,size_factors,log}" + ] + } + } + } +} + +// +// HISAT2 alignment options +// + +if (!params.skip_alignment && params.aligner == 'hisat2') { + process { + withName: '.*:FASTQ_ALIGN_HISAT2:HISAT2_ALIGN' { + ext.args = '--met-stderr --new-summary --dta' + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/log" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ], + [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + saveAs: { params.save_align_intermeds ? it : null } + ], + [ + path: { "${params.outdir}/${params.aligner}/unmapped" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + saveAs: { params.save_unaligned ? it : null } + ] + ] + } + } +} + +// +// Post-alignment QC options +// + +if (!params.skip_alignment && !params.skip_qc) { + if (!params.skip_preseq) { + process { + withName: 'PRESEQ_LCEXTRAP' { + ext.args = '-verbose -bam -seed 1 -seg_len 100000000' + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/preseq" }, + mode: params.publish_dir_mode, + pattern: "*.txt" + ], + [ + path: { "${params.outdir}/${params.aligner}/preseq/log" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ] + ] + } + } + } + + if (!params.skip_qualimap) { + process { + withName: 'QUALIMAP_RNASEQ' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}/qualimap" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_dupradar) { + process { + withName: 'DUPRADAR' { + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/dupradar/scatter_plot" }, + mode: params.publish_dir_mode, + pattern: "*Dens.pdf" + ], + [ + path: { "${params.outdir}/${params.aligner}/dupradar/box_plot" }, + mode: params.publish_dir_mode, + pattern: "*Boxplot.pdf" + ], + [ + path: { "${params.outdir}/${params.aligner}/dupradar/histogram" }, + mode: params.publish_dir_mode, + pattern: "*Hist.pdf" + ], + [ + path: { "${params.outdir}/${params.aligner}/dupradar/gene_data" }, + mode: params.publish_dir_mode, + pattern: "*Matrix.txt" + ], + [ + path: { "${params.outdir}/${params.aligner}/dupradar/intercepts_slope" }, + mode: params.publish_dir_mode, + pattern: "*slope.txt" + ] + ] + } + } + } + + if (!params.skip_biotype_qc && params.featurecounts_group_type) { + process { + withName: 'SUBREAD_FEATURECOUNTS' { + ext.args = { [ + '-B -C', + params.gencode ? "-g gene_type" : "-g $params.featurecounts_group_type", + "-t $params.featurecounts_feature_type" + ].join(' ').trim() } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/featurecounts" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'MULTIQC_CUSTOM_BIOTYPE' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}/featurecounts" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_rseqc && 'bam_stat' in rseqc_modules) { + process { + withName: '.*:BAM_RSEQC:RSEQC_BAMSTAT' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}/rseqc/bam_stat" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_rseqc && 'infer_experiment' in rseqc_modules) { + process { + withName: '.*:BAM_RSEQC:RSEQC_INFEREXPERIMENT' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}/rseqc/infer_experiment" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_rseqc && 'junction_annotation' in rseqc_modules) { + process { + withName: '.*:BAM_RSEQC:RSEQC_JUNCTIONANNOTATION' { + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/rseqc/junction_annotation/pdf" }, + mode: params.publish_dir_mode, + pattern: '*.pdf' + ], + [ + path: { "${params.outdir}/${params.aligner}/rseqc/junction_annotation/bed" }, + mode: params.publish_dir_mode, + pattern: '*.bed' + ], + [ + path: { "${params.outdir}/${params.aligner}/rseqc/junction_annotation/xls" }, + mode: params.publish_dir_mode, + pattern: '*.xls' + ], + [ + path: { "${params.outdir}/${params.aligner}/rseqc/junction_annotation/log" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ], + [ + path: { "${params.outdir}/${params.aligner}/rseqc/junction_annotation/rscript" }, + mode: params.publish_dir_mode, + pattern: '*.r' + ] + ] + } + } + } + + if (!params.skip_rseqc && 'junction_saturation' in rseqc_modules) { + process { + withName: '.*:BAM_RSEQC:RSEQC_JUNCTIONSATURATION' { + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/rseqc/junction_saturation/pdf" }, + mode: params.publish_dir_mode, + pattern: '*.pdf' + ], + [ + path: { "${params.outdir}/${params.aligner}/rseqc/junction_saturation/rscript" }, + mode: params.publish_dir_mode, + pattern: '*.r' + ] + ] + } + } + } + + if (!params.skip_rseqc && 'read_duplication' in rseqc_modules) { + process { + withName: '.*:BAM_RSEQC:RSEQC_READDUPLICATION' { + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/rseqc/read_duplication/pdf" }, + mode: params.publish_dir_mode, + pattern: '*.pdf' + ], + [ + path: { "${params.outdir}/${params.aligner}/rseqc/read_duplication/xls" }, + mode: params.publish_dir_mode, + pattern: '*.xls' + ], + [ + path: { "${params.outdir}/${params.aligner}/rseqc/read_duplication/rscript" }, + mode: params.publish_dir_mode, + pattern: '*.r' + ] + ] + } + } + } + + if (!params.skip_rseqc && 'read_distribution' in rseqc_modules && !params.bam_csi_index) { + process { + withName: '.*:BAM_RSEQC:RSEQC_READDISTRIBUTION' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}/rseqc/read_distribution" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_rseqc && 'inner_distance' in rseqc_modules && !params.bam_csi_index) { + process { + withName: '.*:BAM_RSEQC:RSEQC_INNERDISTANCE' { + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/rseqc/inner_distance/txt" }, + mode: params.publish_dir_mode, + pattern: '*.txt', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ], + [ + path: { "${params.outdir}/${params.aligner}/rseqc/inner_distance/pdf" }, + mode: params.publish_dir_mode, + pattern: '*.pdf' + ], + [ + path: { "${params.outdir}/${params.aligner}/rseqc/inner_distance/rscript" }, + mode: params.publish_dir_mode, + pattern: '*.r' + ] + ] + } + } + } + + if (!params.skip_rseqc && 'tin' in rseqc_modules && !params.bam_csi_index) { + process { + withName: '.*:BAM_RSEQC:RSEQC_TIN' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}/rseqc/tin" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } +} + +if (!params.skip_multiqc) { + process { + withName: 'MULTIQC' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + ext.prefix = "multiqc_report" + publishDir = [ + path: { [ + "${params.outdir}/multiqc", + params.skip_alignment? '' : "/${params.aligner}" + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} + +// +// Salmon/ Kallisto pseudoalignment options +// + +if (!params.skip_pseudo_alignment && params.pseudo_aligner == 'salmon') { + process { + withName: '.*:QUANTIFY_PSEUDO_ALIGNMENT:SALMON_QUANT' { + ext.args = { params.extra_salmon_quant_args ?: '' } + publishDir = [ + path: { "${params.outdir}/${params.pseudo_aligner}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('_meta_info.json') ? null : filename } + ] + } + } +} + +if (!params.skip_pseudo_alignment && params.pseudo_aligner == 'kallisto') { + process { + withName: '.*:QUANTIFY_PSEUDO_ALIGNMENT:KALLISTO_QUANT' { + ext.args = params.extra_kallisto_quant_args ?: '' + publishDir = [ + path: { "${params.outdir}/${params.pseudo_aligner}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('.run_info.json') || filename.endsWith('.log') ? null : filename } + ] + } + } +} + +if (!params.skip_pseudo_alignment && params.pseudo_aligner) { + process { + withName: '.*:QUANTIFY_PSEUDO_ALIGNMENT:TX2GENE' { + publishDir = [ + path: { "${params.outdir}/${params.pseudo_aligner}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:QUANTIFY_PSEUDO_ALIGNMENT:TXIMPORT' { + ext.prefix = { "${quant_type}.merged" } + publishDir = [ + path: { "${params.outdir}/${params.pseudo_aligner}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:QUANTIFY_PSEUDO_ALIGNMENT:SE_.*' { + publishDir = [ + path: { "${params.outdir}/${params.pseudo_aligner}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + if (!params.skip_qc & !params.skip_deseq2_qc) { + process { + withName: 'DESEQ2_QC_PSEUDO' { + ext.args = { [ + "--id_col 1", + "--sample_suffix ''", + "--count_col 3", + params.deseq2_vst ? '--vst TRUE' : '' + ].join(' ').trim() } + ext.args2 = { params.pseudo_aligner } + publishDir = [ + path: { "${params.outdir}/${params.pseudo_aligner}/deseq2_qc" }, + mode: params.publish_dir_mode, + pattern: "*{RData,pca.vals.txt,plots.pdf,sample.dists.txt,size_factors,log}" + ] + } + } + } } diff --git a/conf/test.config b/conf/test.config index d363bb6..0c91574 100644 --- a/conf/test.config +++ b/conf/test.config @@ -15,9 +15,9 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - //max_cpus = 4 - //max_memory = '20.GB' - //max_time = '16.h' + max_cpus = 4 + max_memory = '20.GB' + max_time = '16.h' // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/riboseq/testdata/samplesheet.csv' diff --git a/main.nf b/main.nf index 91453db..12ab6bb 100644 --- a/main.nf +++ b/main.nf @@ -17,10 +17,18 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// TODO nf-core: Remove this line if you don't need a FASTA file -// This is an example of how to use getGenomeAttribute() to fetch parameters -// from igenomes.config using `--genome` -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.transcript_fasta = WorkflowMain.getGenomeAttribute(params, 'transcript_fasta') +params.additional_fasta = WorkflowMain.getGenomeAttribute(params, 'additional_fasta') +params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') +params.gff = WorkflowMain.getGenomeAttribute(params, 'gff') +params.gene_bed = WorkflowMain.getGenomeAttribute(params, 'bed12') +params.bbsplit_index = WorkflowMain.getGenomeAttribute(params, 'bbsplit') +params.star_index = WorkflowMain.getGenomeAttribute(params, 'star') +params.hisat2_index = WorkflowMain.getGenomeAttribute(params, 'hisat2') +params.rsem_index = WorkflowMain.getGenomeAttribute(params, 'rsem') +params.salmon_index = WorkflowMain.getGenomeAttribute(params, 'salmon') +params.kallisto_index = WorkflowMain.getGenomeAttribute(params, 'kallisto') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/nextflow.config b/nextflow.config index 9528655..3b422b5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,17 +9,22 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options input = null // References skip_gtf_filter = false genome = null splicesites = null + gtf_extra_attributes = 'gene_name' + gtf_group_features = 'gene_id' + skip_gtf_filter = false + skip_gtf_transcript_filter = false igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false gencode = false + save_reference = false featurecounts_group_type = 'gene_biotype' + featurecounts_feature_type = 'exon' // UMI handling with_umi = false @@ -54,6 +59,7 @@ params { // Alignment aligner = 'star_salmon' pseudo_aligner = null + pseudo_aligner_kmer_size = 31 seq_center = null bam_csi_index = false star_ignore_sjdbgtf = false diff --git a/nextflow_schema.json b/nextflow_schema.json index fa85a9a..f696c76 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,8 +1,8 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/nf-core/riboseq/master/nextflow_schema.json", - "title": "nf-core/riboseq pipeline parameters", - "description": "Analysis of ribosome profiling, or Ribo-Seq (also named ribosome footprinting)", + "$id": "https://raw.githubusercontent.com/nf-core/rnaseq/master/nextflow_schema.json", + "title": "nf-core/rnaseq pipeline parameters", + "description": "RNA sequencing analysis pipeline for gene/isoform quantification and extensive quality control.", "type": "object", "definitions": { "input_output_options": { @@ -17,10 +17,10 @@ "format": "file-path", "exists": true, "mimetype": "text/csv", - "schema": "assets/schema_input.json", "pattern": "^\\S+\\.csv$", + "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/riboseq/usage#samplesheet-input).", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row. See [usage docs](https://nf-co.re/rnaseq/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" }, "outdir": { @@ -53,7 +53,7 @@ "type": "string", "description": "Name of iGenomes reference.", "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "help_text": "If using a reference genome configured in the pipeline using iGenomes (not recommended), use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." }, "fasta": { "type": "string", @@ -62,7 +62,7 @@ "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", + "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have the appropriate alignment index available this will be generated for you automatically. Combine with `--save_reference` to save alignment index for future runs.", "fa_icon": "far fa-file-code" }, "gtf": { @@ -75,6 +75,135 @@ "fa_icon": "fas fa-code-branch", "help_text": "This parameter is *mandatory* if `--genome` is not specified." }, + "gff": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.gff(\\.gz)?$", + "fa_icon": "fas fa-code-branch", + "description": "Path to GFF3 annotation file.", + "help_text": "This parameter must be specified if `--genome` or `--gtf` are not specified." + }, + "gene_bed": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.bed(\\.gz)?$", + "fa_icon": "fas fa-procedures", + "description": "Path to BED file containing gene intervals. This will be created from the GTF file if not specified." + }, + "transcript_fasta": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", + "fa_icon": "far fa-file-code", + "description": "Path to FASTA transcriptome file." + }, + "additional_fasta": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", + "fa_icon": "far fa-file-code", + "description": "FASTA file to concatenate to genome FASTA file e.g. containing spike-in sequences.", + "help_text": "If provided, the sequences in this file will get concatenated to the existing genome FASTA file, a GTF file will be automatically created using the entire sequence as the gene, transcript, and exon features, and any alignment index will get created from the combined FASTA and GTF. It is recommended to save the reference with `--save_reference` to re-use the index for future runs so you do not need to create it again." + }, + "splicesites": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "fa_icon": "fas fa-hand-scissors", + "description": "Splice sites file required for HISAT2." + }, + "star_index": { + "type": "string", + "format": "path", + "exists": true, + "fa_icon": "fas fa-bezier-curve", + "description": "Path to directory or tar.gz archive for pre-built STAR index." + }, + "hisat2_index": { + "type": "string", + "format": "path", + "exists": true, + "fa_icon": "fas fa-bezier-curve", + "description": "Path to directory or tar.gz archive for pre-built HISAT2 index." + }, + "rsem_index": { + "type": "string", + "format": "path", + "exists": true, + "fa_icon": "fas fa-bezier-curve", + "description": "Path to directory or tar.gz archive for pre-built RSEM index." + }, + "salmon_index": { + "type": "string", + "format": "path", + "exists": true, + "fa_icon": "fas fa-bezier-curve", + "description": "Path to directory or tar.gz archive for pre-built Salmon index." + }, + "kallisto_index": { + "type": "string", + "format": "path", + "exists": true, + "fa_icon": "fas fa-bezier-curve", + "description": "Path to directory or tar.gz archive for pre-built Kallisto index." + }, + "hisat2_build_memory": { + "type": "string", + "default": "200.GB", + "fa_icon": "fas fa-memory", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "description": "Minimum memory required to use splice sites and exons in the HiSAT2 index build process.", + "help_text": "HiSAT2 requires a huge amount of RAM to build a genome index for larger genomes, if including splice sites and exons e.g. the human genome might typically require 200GB. If you specify less than this threshold for the `HISAT2_BUILD` process then the splice sites and exons will be ignored, meaning that the process will require a lot less memory. If you are working with a small genome, set this parameter to a lower value to reduce the threshold for skipping this check. If using a larger genome, consider supplying more memory to the `HISAT2_BUILD` process." + }, + "gencode": { + "type": "boolean", + "fa_icon": "fas fa-code-branch", + "description": "Specify if your GTF annotation is in GENCODE format.", + "help_text": "If your GTF file is in GENCODE format and you would like to run Salmon i.e. `--pseudo_aligner salmon`, you will need to provide this parameter in order to build the Salmon index appropriately." + }, + "gtf_extra_attributes": { + "type": "string", + "default": "gene_name", + "fa_icon": "fas fa-plus-square", + "description": "By default, the pipeline uses the `gene_name` field to obtain additional gene identifiers from the input GTF file when running Salmon.", + "help_text": "This behaviour can be modified by specifying `--gtf_extra_attributes` when running the pipeline. Note that you can also specify more than one desired value, separated by a comma e.g. `--gtf_extra_attributes gene_id,...`.\n" + }, + "gtf_group_features": { + "type": "string", + "default": "gene_id", + "description": "Define the attribute type used to group features in the GTF file when running Salmon.", + "fa_icon": "fas fa-layer-group" + }, + "featurecounts_group_type": { + "type": "string", + "default": "gene_biotype", + "fa_icon": "fas fa-layer-group", + "description": "The attribute type used to group feature types in the GTF file when generating the biotype plot with featureCounts." + }, + "featurecounts_feature_type": { + "type": "string", + "default": "exon", + "description": "By default, the pipeline assigns reads based on the 'exon' attribute within the GTF file.", + "fa_icon": "fas fa-indent", + "help_text": "The feature type used from the GTF file when generating the biotype plot with featureCounts." + }, + "igenomes_base": { + "type": "string", + "format": "directory-path", + "description": "Directory / URL base for iGenomes references.", + "default": "s3://ngi-igenomes/igenomes", + "fa_icon": "fas fa-cloud-download-alt", + "hidden": true + }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", @@ -82,6 +211,421 @@ "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." } + }, + "required": ["fasta"] + }, + "read_trimming_options": { + "title": "Read trimming options", + "type": "object", + "fa_icon": "fas fa-cut", + "description": "Options to adjust read trimming criteria.", + "properties": { + "trimmer": { + "type": "string", + "default": "trimgalore", + "description": "Specifies the trimming tool to use - available options are 'trimgalore' and 'fastp'.", + "fa_icon": "fas fa-cut", + "enum": ["trimgalore", "fastp"] + }, + "extra_trimgalore_args": { + "type": "string", + "description": "Extra arguments to pass to Trim Galore! command in addition to defaults defined by the pipeline.", + "fa_icon": "fas fa-plus" + }, + "extra_fastp_args": { + "type": "string", + "description": "Extra arguments to pass to fastp command in addition to defaults defined by the pipeline.", + "fa_icon": "fas fa-plus" + }, + "min_trimmed_reads": { + "type": "integer", + "default": 10000, + "fa_icon": "fas fa-hand-paper", + "description": "Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low." + } + } + }, + "read_filtering_options": { + "title": "Read filtering options", + "type": "object", + "description": "Options for filtering reads prior to alignment", + "default": "", + "properties": { + "bbsplit_fasta_list": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "fa_icon": "fas fa-list-alt", + "description": "Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. You have to also explicitly set `--skip_bbsplit false` if you want to use BBSplit.", + "help_text": "The file should contain 2 columns: short name and full path to reference genome(s) e.g. \n```\nmm10,/path/to/mm10.fa\necoli,/path/to/ecoli.fa\n```" + }, + "bbsplit_index": { + "type": "string", + "format": "path", + "exists": true, + "fa_icon": "fas fa-bezier-curve", + "description": "Path to directory or tar.gz archive for pre-built BBSplit index.", + "help_text": "The BBSplit index will have to be built at least once with this pipeline (see `--save_reference` to save index). It can then be provided via `--bbsplit_index` for future runs." + }, + "remove_ribo_rna": { + "type": "boolean", + "fa_icon": "fas fa-trash-alt", + "description": "Enable the removal of reads derived from ribosomal RNA using SortMeRNA.", + "help_text": "Any patterns found in the sequences defined by the '--ribo_database_manifest' parameter will be used." + }, + "ribo_database_manifest": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "default": "${projectDir}/assets/rrna-db-defaults.txt", + "fa_icon": "fas fa-database", + "description": "Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA.", + "help_text": "By default, [rRNA databases](https://github.com/biocore/sortmerna/tree/master/data/rRNA_databases) defined in the SortMeRNA GitHub repo are used. You can see an example in the pipeline Github repository in `assets/rrna-default-dbs.txt`.\nPlease note that commercial/non-academic entities require [`licensing for SILVA`](https://www.arb-silva.de/silva-license-information) for these default databases." + } + }, + "fa_icon": "fas fa-trash-alt" + }, + "umi_options": { + "title": "UMI options", + "type": "object", + "description": "Options for processing reads with unique molecular identifiers", + "default": "", + "properties": { + "with_umi": { + "type": "boolean", + "fa_icon": "fas fa-barcode", + "description": "Enable UMI-based read deduplication." + }, + "umitools_extract_method": { + "type": "string", + "default": "string", + "fa_icon": "fas fa-barcode", + "description": "UMI pattern to use. Can be either 'string' (default) or 'regex'.", + "help_text": "More details can be found in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/extract.html#extract-method).\n" + }, + "umitools_bc_pattern": { + "type": "string", + "fa_icon": "fas fa-barcode", + "help_text": "More details can be found in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/extract.html#extract-method).", + "description": "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides of the read are from the UMI." + }, + "umitools_bc_pattern2": { + "type": "string", + "fa_icon": "fas fa-barcode", + "description": "The UMI barcode pattern to use if the UMI is located in read 2." + }, + "umi_discard_read": { + "type": "integer", + "fa_icon": "fas fa-barcode", + "description": "After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively." + }, + "umitools_umi_separator": { + "type": "string", + "fa_icon": "fas fa-star-half-alt", + "description": "The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with UMI-tools and used other software." + }, + "umitools_grouping_method": { + "type": "string", + "default": "directional", + "fa_icon": "far fa-object-ungroup", + "description": "Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying the reads with the same mapping position, but treat similar yet nonidentical UMIs differently.", + "enum": ["unique", "percentile", "cluster", "adjacency", "directional"] + }, + "umitools_dedup_stats": { + "type": "boolean", + "fa_icon": "fas fa-barcode", + "help_text": "It can be quite time consuming generating these output stats - see [#827](https://github.com/nf-core/rnaseq/issues/827).", + "description": "Generate output stats when running \"umi_tools dedup\"." + } + }, + "fa_icon": "fas fa-barcode" + }, + "alignment_options": { + "title": "Alignment options", + "type": "object", + "fa_icon": "fas fa-map-signs", + "description": "Options to adjust parameters and filtering criteria for read alignments.", + "properties": { + "aligner": { + "type": "string", + "default": "star_salmon", + "description": "Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2'.", + "fa_icon": "fas fa-map-signs", + "enum": ["star_salmon", "star_rsem", "hisat2"] + }, + "pseudo_aligner": { + "type": "string", + "description": "Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner'.", + "fa_icon": "fas fa-hamburger", + "enum": ["salmon", "kallisto"] + }, + "pseudo_aligner_kmer_size": { + "type": "integer", + "default": 31, + "description": "Kmer length passed to indexing step of pseudoaligners", + "help_text": "Failure to set a good kmer size could cause issues with quantification with Kallisto or Salmon. This is mostly an issue for short reads (<50bp), where the default kmer size of 31 is an problem.", + "fa_icon": "fas fa-ruler-horizontal" + }, + "bam_csi_index": { + "type": "boolean", + "description": "Create a CSI index for BAM files instead of the traditional BAI index. This will be required for genomes with larger chromosome sizes.", + "fa_icon": "fas fa-sort-alpha-down" + }, + "star_ignore_sjdbgtf": { + "type": "boolean", + "fa_icon": "fas fa-ban", + "description": "When using pre-built STAR indices do not re-extract and use splice junctions from the GTF file." + }, + "salmon_quant_libtype": { + "type": "string", + "fa_icon": "fas fa-fast-forward", + "description": " Override Salmon library type inferred based on strandedness defined in meta object.", + "help_text": "See [Salmon docs](https://salmon.readthedocs.io/en/latest/library_type.html).", + "enum": [ + "A", + "IS", + "ISF", + "ISR", + "IU", + "MS", + "MSF", + "MSR", + "MU", + "OS", + "OSF", + "OSR", + "OU", + "SF", + "SR", + "U" + ] + }, + "min_mapped_reads": { + "type": "number", + "default": 5, + "fa_icon": "fas fa-percentage", + "description": "Minimum percentage of uniquely mapped reads below which samples are removed from further processing.", + "help_text": "Some downstream steps in the pipeline will fail if this threshold is too low." + }, + "seq_center": { + "type": "string", + "description": "Sequencing center information to be added to read group of BAM files.", + "fa_icon": "fas fa-synagogue" + }, + "stringtie_ignore_gtf": { + "type": "boolean", + "description": "Perform reference-guided de novo assembly of transcripts using StringTie i.e. dont restrict to those in GTF file.", + "fa_icon": "fas fa-ban" + }, + "extra_star_align_args": { + "type": "string", + "description": "Extra arguments to pass to STAR alignment command in addition to defaults defined by the pipeline. Only available for the STAR-Salmon route.", + "fa_icon": "fas fa-plus" + }, + "extra_salmon_quant_args": { + "type": "string", + "description": "Extra arguments to pass to Salmon quant command in addition to defaults defined by the pipeline.", + "fa_icon": "fas fa-plus" + }, + "extra_kallisto_quant_args": { + "type": "string", + "description": "Extra arguments to pass to Kallisto quant command in addition to defaults defined by the pipeline.", + "fa_icon": "fas fa-plus" + }, + "kallisto_quant_fraglen": { + "type": "integer", + "description": "In single-end mode Kallisto requires an estimated fragment length. Specify a default value for that here. TODO: use existing RSeQC results to do this dynamically.", + "default": 200, + "fa_icon": "fas fa-ruler-horizontal" + }, + "kallisto_quant_fraglen_sd": { + "type": "integer", + "description": "In single-end mode, Kallisto requires an estimated standard error for fragment length. Specify a default value for that here. TODO: use existing RSeQC results to do this dynamically.", + "default": 200, + "fa_icon": "fas fa-sort-amount-up-alt" + } + } + }, + "optional_outputs": { + "title": "Optional outputs", + "type": "object", + "description": "Additional output files produces as intermediates that can be saved", + "default": "", + "properties": { + "save_merged_fastq": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Save FastQ files after merging re-sequenced libraries in the results directory." + }, + "save_umi_intermeds": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "If this option is specified, intermediate FastQ and BAM files produced by UMI-tools are also saved in the results directory." + }, + "save_non_ribo_reads": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "If this option is specified, intermediate FastQ files containing non-rRNA reads will be saved in the results directory." + }, + "save_bbsplit_reads": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "If this option is specified, FastQ files split by reference will be saved in the results directory." + }, + "save_reference": { + "type": "boolean", + "description": "If generated by the pipeline save the STAR index in the results directory.", + "help_text": "If an alignment index is generated by the pipeline use this parameter to save it to your results folder. These can then be used for future pipeline runs, reducing processing times.", + "fa_icon": "fas fa-save" + }, + "save_trimmed": { + "type": "boolean", + "description": "Save the trimmed FastQ files in the results directory.", + "help_text": "By default, trimmed FastQ files will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete.", + "fa_icon": "fas fa-save" + }, + "save_align_intermeds": { + "type": "boolean", + "description": "Save the intermediate BAM files from the alignment step.", + "help_text": "By default, intermediate BAM files will not be saved. The final BAM files created after the appropriate filtering step are always saved to limit storage usage. Set this parameter to also save other intermediate BAM files.", + "fa_icon": "fas fa-save" + }, + "save_unaligned": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Where possible, save unaligned reads from either STAR, HISAT2 or Salmon to the results directory.", + "help_text": "This may either be in the form of FastQ or BAM files depending on the options available for that particular tool." + } + } + }, + "quality_control": { + "title": "Quality Control", + "type": "object", + "description": "Additional quality control options.", + "default": "", + "properties": { + "deseq2_vst": { + "type": "boolean", + "description": "Use vst transformation instead of rlog with DESeq2.", + "help_text": "See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization).", + "fa_icon": "fas fa-dolly", + "default": true + }, + "rseqc_modules": { + "type": "string", + "default": "bam_stat,inner_distance,infer_experiment,junction_annotation,junction_saturation,read_distribution,read_duplication", + "fa_icon": "fas fa-chart-pie", + "description": "Specify the RSeQC modules to run." + } + } + }, + "process_skipping_options": { + "title": "Process skipping options", + "type": "object", + "fa_icon": "fas fa-fast-forward", + "description": "Options to skip various steps within the workflow.", + "properties": { + "skip_gtf_filter": { + "type": "boolean", + "fa_icon": "fas fa-forward", + "description": "Skip filtering of GTF for valid scaffolds and/ or transcript IDs.", + "help_text": "If you're confident on the validity of the GTF with respect to the genome fasta file, or wish to disregard failures thriggered by the filtering module, activate this option." + }, + "skip_gtf_transcript_filter": { + "type": "boolean", + "fa_icon": "fas fa-forward", + "description": "Skip the 'transcript_id' checking component of the GTF filtering script used in the pipeline." + }, + "skip_bbsplit": { + "type": "boolean", + "default": true, + "fa_icon": "fas fa-fast-forward", + "description": "Skip BBSplit for removal of non-reference genome reads." + }, + "skip_umi_extract": { + "type": "boolean", + "fa_icon": "fas fa-compress-alt", + "description": "Skip the UMI extraction from the read in case the UMIs have been moved to the headers in advance of the pipeline run." + }, + "skip_trimming": { + "type": "boolean", + "description": "Skip the adapter trimming step.", + "help_text": "Use this if your input FastQ files have already been trimmed outside of the workflow or if you're very confident that there is no adapter contamination in your data.", + "fa_icon": "fas fa-fast-forward" + }, + "skip_alignment": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip all of the alignment-based processes within the pipeline." + }, + "skip_pseudo_alignment": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip all of the pseudoalignment-based processes within the pipeline." + }, + "skip_markduplicates": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip picard MarkDuplicates step." + }, + "skip_bigwig": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip bigWig file creation." + }, + "skip_stringtie": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip StringTie." + }, + "skip_fastqc": { + "type": "boolean", + "description": "Skip FastQC.", + "fa_icon": "fas fa-fast-forward" + }, + "skip_preseq": { + "type": "boolean", + "description": "Skip Preseq.", + "fa_icon": "fas fa-fast-forward", + "default": true + }, + "skip_dupradar": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip dupRadar." + }, + "skip_qualimap": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip Qualimap." + }, + "skip_rseqc": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip RSeQC." + }, + "skip_biotype_qc": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip additional featureCounts process for biotype QC." + }, + "skip_deseq2_qc": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip DESeq2 PCA and heatmap plotting." + }, + "skip_multiqc": { + "type": "boolean", + "description": "Skip MultiQC.", + "fa_icon": "fas fa-fast-forward" + }, + "skip_qc": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip all QC steps except for MultiQC." + } } }, "institutional_config_options": { @@ -129,6 +673,13 @@ "description": "Institutional config URL link.", "hidden": true, "fa_icon": "fas fa-users-cog" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3", + "description": "Base path / URL for data used in the test profiles", + "help_text": "Warning: The `-profile test` samplesheet file itself contains remote paths. Setting this parameter does not alter the contents of that file.", + "hidden": true } } }, @@ -212,7 +763,6 @@ "max_multiqc_email_size": { "type": "string", "description": "File size limit when attaching MultiQC reports to summary emails.", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", "default": "25.MB", "fa_icon": "fas fa-file-upload", "hidden": true @@ -233,18 +783,26 @@ "multiqc_config": { "type": "string", "format": "file-path", + "exists": true, + "mimetype": "text/plain", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true }, "multiqc_logo": { "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", "fa_icon": "fas fa-image", "hidden": true }, "multiqc_methods_description": { "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", "description": "Custom MultiQC yaml file containing HTML including a methods description.", "fa_icon": "fas fa-cog" }, @@ -286,6 +844,27 @@ { "$ref": "#/definitions/reference_genome_options" }, + { + "$ref": "#/definitions/read_trimming_options" + }, + { + "$ref": "#/definitions/read_filtering_options" + }, + { + "$ref": "#/definitions/umi_options" + }, + { + "$ref": "#/definitions/alignment_options" + }, + { + "$ref": "#/definitions/optional_outputs" + }, + { + "$ref": "#/definitions/quality_control" + }, + { + "$ref": "#/definitions/process_skipping_options" + }, { "$ref": "#/definitions/institutional_config_options" }, From 22922dd5d7bd84fa0b67fb11c0d15d7b1b5543af Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Fri, 22 Dec 2023 17:30:52 +0000 Subject: [PATCH 13/30] Reset test resources back --- conf/test.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/test.config b/conf/test.config index 0c91574..2884e4d 100644 --- a/conf/test.config +++ b/conf/test.config @@ -15,9 +15,9 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 4 - max_memory = '20.GB' - max_time = '16.h' + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/riboseq/testdata/samplesheet.csv' From 5933b288d60066f85fb42bc3fb2640df28acdc1b Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 3 Jan 2024 10:42:40 +0000 Subject: [PATCH 14/30] Add test_data_base default --- nextflow.config | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow.config b/nextflow.config index d887f8b..7308f0f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -119,6 +119,7 @@ params { custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_contact = null config_profile_url = null + test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/riboseq/testdata/' // Max resource options From 58ce68c471e4bb8547904787da99537aadad37d4 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 3 Jan 2024 10:43:51 +0000 Subject: [PATCH 15/30] Fix multiqc config --- assets/multiqc_config.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 2b6536e..5a2a6ee 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,13 +1,13 @@ report_comment: > This report has been generated by the nf-core/riboseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: - "nf-core-rnaseq-methods-description": + "nf-core-riboseq-methods-description": order: -1000 software_versions: order: -1001 - "nf-core-rnaseq-summary": + "nf-core-riboseq-summary": order: -1002 export_plots: true From 9db1555896f9a2a95f37558091207bbbb1510da6 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 11 Jan 2024 15:28:49 +0000 Subject: [PATCH 16/30] Bump to fix subworkflow tests --- modules.json | 2 +- .../tests/main.nf.test | 7 ++- .../tests/main.nf.test.snap | 45 ++++++++++++++----- 3 files changed, 40 insertions(+), 14 deletions(-) diff --git a/modules.json b/modules.json index 5579b49..2213c6e 100644 --- a/modules.json +++ b/modules.json @@ -121,7 +121,7 @@ "nf-core": { "fastq_subsample_fq_salmon": { "branch": "master", - "git_sha": "c5ef823ea08007ee393baa7a172913d1ad1de457", + "git_sha": "bdfd7bc8d2e5a93531e14bd94c4128d9a72a2435", "installed_by": ["subworkflows"] } } diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test index 19289c7..28e6b65 100644 --- a/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test @@ -47,9 +47,14 @@ nextflow_workflow { } then { + def readlines1 = path(workflow.out.reads[0][1][0]).linesGzip + def readlines2 = path(workflow.out.reads[0][1][1]).linesGzip assertAll( { assert workflow.success }, - { assert snapshot(workflow.out.reads).match("reads") }, + { assert snapshot(readlines1[0..5]).match("test_reads_1_lines") }, + { assert snapshot(readlines1.size()).match("test_reads_1_size") }, + { assert snapshot(readlines2[0..5]).match("test_reads_2_lines") }, + { assert snapshot(readlines2.size()).match("test_reads_2_size") }, { assert snapshot(workflow.out.versions).match("versions") }, { assert workflow.out.index }, diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test.snap index 0146e50..afbe0b5 100644 --- a/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test.snap @@ -1,4 +1,10 @@ { + "test_reads_1_size": { + "content": [ + 1066944 + ], + "timestamp": "2024-01-11T10:33:54.747992124" + }, "versions": { "content": [ [ @@ -8,21 +14,36 @@ ], "timestamp": "2023-11-26T16:41:10.396971682" }, - "reads": { + "test_reads_1_lines": { "content": [ [ - [ - { - "id": "test", - "single_end": false - }, - [ - "test_R1.fastq.gz:md5,eda1cd0cfb2d3269f7f100cdcee1f286", - "test_R2.fastq.gz:md5,3e2ea75a2f0fb0178ac6a8b41ad1a5dd" - ] - ] + "@normal#21#998579#1/1", + "CCTTCTCCCTGCTGGGGTTGCTTGTCAGTAGCGGGCAAGGTAGGAGTGTGGCGCTTTATTGCATTTACTTTCCCTCCCCCTTCCCCCCGGCCAAGAGAGG", + "+", + "102302000331;3333;23133320233330*33/233333333333333/313232333/3;3;3/333000;11/00;;01//103*1032323233", + "@normal#21#998572#2/1", + "CTCCTCTCCTTCTACCTGCTGGGGTTGCTTGTCAGTAGCGGGCAAGGTCGGAGTGTTGCGCTTTATTGCATTTACTTTCCCTCCCCCTTCCACCCGGCCA" ] ], - "timestamp": "2023-11-26T16:41:10.167020985" + "timestamp": "2024-01-11T10:33:54.730250665" + }, + "test_reads_2_lines": { + "content": [ + [ + "@normal#21#998579#1/2", + "AAAAAAAAAGAAGAAGCAGAAGCTGTTTCCCTGGATATCCTGCTCACCGATTCCCCTCTCCAATTCTGTATTTTCCCTTCTCTTATTTAAGGGTCTCCAC", + "+", + "023333233332333310333302333211/3333;0300;*/;000/32;201003031/22;21333032;;11/23030322;2332333313/030", + "@normal#21#998572#2/2", + "TTCCCCTCTCCAATTGAGTATTTTCCCTTCTCTTATTTAAGGGTCTCCACACAAACAGATACAATTTTAGGGACAGCTAGGAGAAAGAACGAAAATAATAA" + ] + ], + "timestamp": "2024-01-11T10:33:54.756723613" + }, + "test_reads_2_size": { + "content": [ + 1066944 + ], + "timestamp": "2024-01-11T10:33:54.763399473" } } \ No newline at end of file From 81de6edc119f5f08c055c62dbb50dbe06655fc4d Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 11 Jan 2024 16:08:11 +0000 Subject: [PATCH 17/30] Bump sortmerna --- modules.json | 2 +- modules/nf-core/sortmerna/environment.yml | 2 +- modules/nf-core/sortmerna/main.nf | 32 +++++- modules/nf-core/sortmerna/tests/main.nf.test | 97 +++++++++++++++++-- .../nf-core/sortmerna/tests/main.nf.test.snap | 46 +++------ 5 files changed, 137 insertions(+), 42 deletions(-) diff --git a/modules.json b/modules.json index 2213c6e..d4e8294 100644 --- a/modules.json +++ b/modules.json @@ -92,7 +92,7 @@ }, "sortmerna": { "branch": "master", - "git_sha": "a20b6b1e9114a08007608528e4a2b0fbbb8a9ca2", + "git_sha": "ce558e30784469b88a16923ca96d81899d240b42", "installed_by": ["modules"] }, "star/genomegenerate": { diff --git a/modules/nf-core/sortmerna/environment.yml b/modules/nf-core/sortmerna/environment.yml index 3dae00a..f40f995 100644 --- a/modules/nf-core/sortmerna/environment.yml +++ b/modules/nf-core/sortmerna/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::sortmerna=4.3.4 + - bioconda::sortmerna=4.3.6 diff --git a/modules/nf-core/sortmerna/main.nf b/modules/nf-core/sortmerna/main.nf index 53ccb97..29c640c 100644 --- a/modules/nf-core/sortmerna/main.nf +++ b/modules/nf-core/sortmerna/main.nf @@ -1,11 +1,11 @@ process SORTMERNA { tag "$meta.id" - label "process_high" + label 'process_high' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/sortmerna:4.3.4--h9ee0642_0' : - 'biocontainers/sortmerna:4.3.4--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/sortmerna:4.3.6--h9ee0642_0' : + 'biocontainers/sortmerna:4.3.6--h9ee0642_0' }" input: tuple val(meta), path(reads) @@ -67,4 +67,30 @@ process SORTMERNA { END_VERSIONS """ } + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + """ + touch ${prefix}.non_rRNA.fastq.gz + touch ${prefix}.sortmerna.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//') + END_VERSIONS + """ + } else { + """ + touch ${prefix}_1.non_rRNA.fastq.gz + touch ${prefix}_2.non_rRNA.fastq.gz + touch ${prefix}.sortmerna.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//') + END_VERSIONS + """ + } } diff --git a/modules/nf-core/sortmerna/tests/main.nf.test b/modules/nf-core/sortmerna/tests/main.nf.test index 3ec2692..8a01e2a 100644 --- a/modules/nf-core/sortmerna/tests/main.nf.test +++ b/modules/nf-core/sortmerna/tests/main.nf.test @@ -23,9 +23,51 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match("se_reads") }, - { assert process.out.log }, - { assert snapshot(process.out.versions).match("se_versions") } + { assert process.out.reads }, + { assert file(process.out.log[0][1]).text.contains("Total reads passing E-value threshold = 100 (100.00)") }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + ).sort() + ).match("sarscov2 single_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("sarscov2 single_end stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + ).sort() + ).match("sarscov2 single_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } ) } @@ -48,9 +90,52 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match("pe_reads") }, - { assert process.out.log }, - { assert snapshot(process.out.versions).match("pe_versions") } + { assert process.out.reads }, + { assert file(process.out.log[0][1]).text.contains("Total reads passing E-value threshold = 200 (100.00)") }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.log.collect { file(it[1]).getName() } + ).sort() + ).match("sarscov2 paired_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("sarscov2 paired_end stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.log.collect { file(it[1]).getName() } + ).sort() + ).match("sarscov2 paired_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } ) } diff --git a/modules/nf-core/sortmerna/tests/main.nf.test.snap b/modules/nf-core/sortmerna/tests/main.nf.test.snap index f1bedb7..e502000 100644 --- a/modules/nf-core/sortmerna/tests/main.nf.test.snap +++ b/modules/nf-core/sortmerna/tests/main.nf.test.snap @@ -1,49 +1,33 @@ { - "se_versions": { + "sarscov2 single_end-for_stub_match": { "content": [ [ - "versions.yml:md5,96553a18cad5237fbf76d5a6c966360e" + "test.non_rRNA.fastq.gz", + "test.sortmerna.log", + "{id=test, single_end=true}" ] ], - "timestamp": "2023-11-22T14:25:07.95908694" + "timestamp": "2023-12-21T11:56:00.15356" }, - "pe_reads": { + "versions": { "content": [ [ - [ - { - "id": "test", - "single_end": false - }, - [ - "test_1.non_rRNA.fastq.gz:md5,e62ff0123a74adfc6903d59a449cbdb0", - "test_2.non_rRNA.fastq.gz:md5,e62ff0123a74adfc6903d59a449cbdb0" - ] - ] + "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b" ] ], - "timestamp": "2023-11-22T14:25:19.098771475" + "timestamp": "2023-12-21T11:56:00.200244" }, - "se_reads": { + "sarscov2 paired_end-for_stub_match": { "content": [ [ [ - { - "id": "test", - "single_end": true - }, - "test.non_rRNA.fastq.gz:md5,e62ff0123a74adfc6903d59a449cbdb0" - ] - ] - ], - "timestamp": "2023-11-22T14:25:07.949212892" - }, - "pe_versions": { - "content": [ - [ - "versions.yml:md5,96553a18cad5237fbf76d5a6c966360e" + "test_1.non_rRNA.fastq.gz", + "test_2.non_rRNA.fastq.gz" + ], + "test.sortmerna.log", + "{id=test, single_end=false}" ] ], - "timestamp": "2023-11-22T14:25:19.105098985" + "timestamp": "2023-12-21T12:00:47.879193" } } \ No newline at end of file From 81a01a77e404f8fe4538f054a744d61f95c5f42d Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 11 Jan 2024 18:11:45 +0000 Subject: [PATCH 18/30] Fix HISAT2 tests --- tests/nextflow.config | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/nextflow.config b/tests/nextflow.config index ec574b3..464bb5a 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -4,4 +4,17 @@ ======================================================================================== */ +// Impose sensible resource limits for testing +process { + withName: '.*' { + cpus = 2 + memory = 3.GB + time = 2.h + } +} + +params { + hisat2_build_memory = '3.GB' +} + includeConfig 'https://raw.githubusercontent.com/nf-core/modules/0094ae45ba8f5a2e30644d4e252970775a03ee91/tests/config/test_data.config' From a4cc7ed4e3a1b2518574a7d6da6452d52d052868 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 23 Jan 2024 14:58:46 +0000 Subject: [PATCH 19/30] Bump cat/fastq and update config to fix --- modules.json | 2 +- modules/nf-core/cat/fastq/tests/main.nf.test | 63 +++--- .../nf-core/cat/fastq/tests/main.nf.test.snap | 185 +++++++++++++----- tests/nextflow.config | 2 + 4 files changed, 170 insertions(+), 82 deletions(-) diff --git a/modules.json b/modules.json index d4e8294..6443846 100644 --- a/modules.json +++ b/modules.json @@ -12,7 +12,7 @@ }, "cat/fastq": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "02fd5bd7275abad27aad32d5c852e0a9b1b98882", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test index f5f9418..dab2e14 100644 --- a/modules/nf-core/cat/fastq/tests/main.nf.test +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -16,11 +16,11 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true) ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -28,8 +28,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -42,13 +41,13 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -56,8 +55,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -70,11 +68,11 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -82,8 +80,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -96,13 +93,13 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -110,8 +107,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -124,10 +120,10 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -135,8 +131,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap index ec2342e..43dfe28 100644 --- a/modules/nf-core/cat/fastq/tests/main.nf.test.snap +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -1,78 +1,169 @@ { "test_cat_fastq_single_end": { "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d" + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:12.990284837" + "timestamp": "2024-01-17T17:30:39.816981" }, "test_cat_fastq_single_end_same_name": { "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66" + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:31.554568147" + "timestamp": "2024-01-17T17:32:35.229332" }, "test_cat_fastq_single_end_single_file": { "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.merged.fastq.gz:md5,e325ef7deb4023447a1f074e285761af" + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:49.629360033" + "timestamp": "2024-01-17T17:34:00.058829" }, "test_cat_fastq_paired_end_same_name": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, + { + "0": [ [ - "test_1.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66", - "test_2.merged.fastq.gz:md5,fe9f266f43a6fc3dcab690a18419a56e" + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:40.711617539" + "timestamp": "2024-01-17T17:33:33.031555" }, "test_cat_fastq_paired_end": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ [ - "test_1.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d", - "test_2.merged.fastq.gz:md5,77c8e966e130d8c6b6ec9be52fcb2bda" + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-18T07:53:20.923560211" + "timestamp": "2024-01-17T17:32:02.270935" } } \ No newline at end of file diff --git a/tests/nextflow.config b/tests/nextflow.config index 464bb5a..9b7d61a 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -14,6 +14,8 @@ process { } params { + test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules' + modules_testdata_base_path = 's3://ngi-igenomes/testdata/nf-core/modules/' hisat2_build_memory = '3.GB' } From 5e33157689825ba4b78f5d9b5da76b7543602603 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 23 Jan 2024 18:42:26 +0000 Subject: [PATCH 20/30] Override workflow config for testing fq/subsample --- tests/nextflow.config | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/nextflow.config b/tests/nextflow.config index 9b7d61a..c070064 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -4,13 +4,21 @@ ======================================================================================== */ -// Impose sensible resource limits for testing process { + + // Impose sensible resource limits for testing + withName: '.*' { cpus = 2 memory = 3.GB time = 2.h } + + // Override modules.config so module snapshots match + + withName: FQ_SUBSAMPLE { + ext.prefix = '' + } } params { From b560b6e96060770808c3bfa4771985f09ac6976b Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 23 Jan 2024 18:42:55 +0000 Subject: [PATCH 21/30] Bump fq/subsample --- modules.json | 4 +- modules/nf-core/fq/subsample/meta.yml | 5 ++- .../nf-core/fq/subsample/tests/main.nf.test | 45 ++++++++++--------- .../fq/subsample/tests/main.nf.test.snap | 26 +++++------ .../tests/main.nf.test | 27 ++++++----- 5 files changed, 54 insertions(+), 53 deletions(-) diff --git a/modules.json b/modules.json index 6443846..cc7d7e8 100644 --- a/modules.json +++ b/modules.json @@ -42,7 +42,7 @@ }, "fq/subsample": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "59d1faf07ace4f7a00f7fa7778bce4e1f1dcdd63", "installed_by": ["fastq_subsample_fq_salmon"] }, "gffread": { @@ -121,7 +121,7 @@ "nf-core": { "fastq_subsample_fq_salmon": { "branch": "master", - "git_sha": "bdfd7bc8d2e5a93531e14bd94c4128d9a72a2435", + "git_sha": "a4bceac1aecee5aa0a5dbc601baf0e2e61013fb2", "installed_by": ["subworkflows"] } } diff --git a/modules/nf-core/fq/subsample/meta.yml b/modules/nf-core/fq/subsample/meta.yml index d4f1d1f..6c5e87f 100644 --- a/modules/nf-core/fq/subsample/meta.yml +++ b/modules/nf-core/fq/subsample/meta.yml @@ -2,14 +2,15 @@ name: "fq_subsample" description: fq subsample outputs a subset of records from single or paired FASTQ files. This requires a seed (--seed) to be set in ext.args. keywords: - fastq - - sample + - fq + - subsample tools: - "fq": description: "fq is a library to generate and validate FASTQ file pairs." homepage: "https://github.com/stjude-rust-labs/fq" documentation: "https://github.com/stjude-rust-labs/fq" tool_dev_url: "https://github.com/stjude-rust-labs/fq" - licence: "['MIT']" + licence: ["MIT"] input: - meta: type: map diff --git a/modules/nf-core/fq/subsample/tests/main.nf.test b/modules/nf-core/fq/subsample/tests/main.nf.test index d1b4c9f..285f30c 100644 --- a/modules/nf-core/fq/subsample/tests/main.nf.test +++ b/modules/nf-core/fq/subsample/tests/main.nf.test @@ -17,11 +17,11 @@ nextflow_process { } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -43,11 +43,11 @@ nextflow_process { } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -69,11 +69,11 @@ nextflow_process { } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -95,10 +95,10 @@ nextflow_process { } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) """ } } @@ -120,9 +120,10 @@ nextflow_process { } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - [] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ ] + ]) """ } } diff --git a/modules/nf-core/fq/subsample/tests/main.nf.test.snap b/modules/nf-core/fq/subsample/tests/main.nf.test.snap index 2eea16b..1e47918 100644 --- a/modules/nf-core/fq/subsample/tests/main.nf.test.snap +++ b/modules/nf-core/fq/subsample/tests/main.nf.test.snap @@ -9,8 +9,8 @@ "single_end": false }, [ - "test_R1.fastq.gz:md5,b5da750e06284b86b6d02229ac63949e", - "test_R2.fastq.gz:md5,b22a836a135a226b578a5f7813de3d6e" + "test_R1.fastq.gz:md5,19326ff922a16c0cb81191f2a0a5c5fc", + "test_R2.fastq.gz:md5,ce7ff46296d89b68521ad55a3588bcfe" ] ] ], @@ -24,8 +24,8 @@ "single_end": false }, [ - "test_R1.fastq.gz:md5,b5da750e06284b86b6d02229ac63949e", - "test_R2.fastq.gz:md5,b22a836a135a226b578a5f7813de3d6e" + "test_R1.fastq.gz:md5,19326ff922a16c0cb81191f2a0a5c5fc", + "test_R2.fastq.gz:md5,ce7ff46296d89b68521ad55a3588bcfe" ] ] ], @@ -34,7 +34,7 @@ ] } ], - "timestamp": "2023-10-17T11:17:16.981462" + "timestamp": "2024-01-17T17:57:15.446336" }, "test_fq_subsample_record_count": { "content": [ @@ -46,8 +46,8 @@ "single_end": false }, [ - "test_R1.fastq.gz:md5,5d93c45561d7a77299fe6dfdaaaa02ce", - "test_R2.fastq.gz:md5,3a29e0716b14d81bf84be41af5c9eb78" + "test_R1.fastq.gz:md5,394c7a233f1c1c1a167a34cf2895d26d", + "test_R2.fastq.gz:md5,32724cbdb5ab954a0a659ebcd56ca422" ] ] ], @@ -61,8 +61,8 @@ "single_end": false }, [ - "test_R1.fastq.gz:md5,5d93c45561d7a77299fe6dfdaaaa02ce", - "test_R2.fastq.gz:md5,3a29e0716b14d81bf84be41af5c9eb78" + "test_R1.fastq.gz:md5,394c7a233f1c1c1a167a34cf2895d26d", + "test_R2.fastq.gz:md5,32724cbdb5ab954a0a659ebcd56ca422" ] ] ], @@ -71,7 +71,7 @@ ] } ], - "timestamp": "2023-10-17T11:17:25.17608" + "timestamp": "2024-01-17T17:57:23.920058" }, "test_fq_subsample_single": { "content": [ @@ -82,7 +82,7 @@ "id": "test", "single_end": false }, - "test.fastq.gz:md5,b5da750e06284b86b6d02229ac63949e" + "test.fastq.gz:md5,19326ff922a16c0cb81191f2a0a5c5fc" ] ], "1": [ @@ -94,7 +94,7 @@ "id": "test", "single_end": false }, - "test.fastq.gz:md5,b5da750e06284b86b6d02229ac63949e" + "test.fastq.gz:md5,19326ff922a16c0cb81191f2a0a5c5fc" ] ], "versions": [ @@ -102,7 +102,7 @@ ] } ], - "timestamp": "2023-10-17T11:17:32.536769" + "timestamp": "2024-01-17T17:57:31.908993" }, "test_fq_subsample_no_args": { "content": [ diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test index 28e6b65..6342449 100644 --- a/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_subsample_fq_salmon/tests/main.nf.test @@ -12,7 +12,6 @@ nextflow_workflow { tag "fq/subsample" tag "salmon/quant" - test("homo_sapiens paired-end [fastq]") { setup { @@ -20,8 +19,8 @@ nextflow_workflow { script "../../../../modules/nf-core/salmon/index/main.nf" process { """ - input[0] = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) // genome_fasta - input[1] = file(params.test_data['homo_sapiens']['genome']['transcriptome_fasta'], checkIfExists: true) // transcript_fasta + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // genome_fasta + input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/transcriptome.fasta', checkIfExists: true)) // transcriptome_fasta """ } } @@ -30,18 +29,18 @@ nextflow_workflow { when { workflow { """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - [ - file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ] - ] - input[1] = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) // genome_fasta - input[2] = file(params.test_data['homo_sapiens']['genome']['transcriptome_fasta'], checkIfExists: true) // transcript_fasta - input[3] = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) // gtf + make_index = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // genome_fasta + input[2] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/transcriptome.fasta', checkIfExists: true)) // transcriptome_fasta + input[3] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)) // genome_gtf input[4] = SALMON_INDEX.out.index - input[5] = false + input[5] = make_index """ } } From 0b963ecc6e4f0b6c3117bcd6d3e9c4ac58bcbc1b Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 23 Jan 2024 19:45:19 +0000 Subject: [PATCH 22/30] Set GFFREAD test args --- tests/nextflow.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/nextflow.config b/tests/nextflow.config index c070064..76431f9 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -19,6 +19,9 @@ process { withName: FQ_SUBSAMPLE { ext.prefix = '' } + withName: GFFREAD { + ext.args = '-T' + } } params { From aae53f565e7157b336210882999bf1318c2dca6e Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 23 Jan 2024 19:48:20 +0000 Subject: [PATCH 23/30] Bump gffread --- modules.json | 2 +- modules/nf-core/gffread/tests/main.nf.test | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules.json b/modules.json index cc7d7e8..e82b21c 100644 --- a/modules.json +++ b/modules.json @@ -47,7 +47,7 @@ }, "gffread": { "branch": "master", - "git_sha": "a2d6c3082c5c44b4155a3246daff36701ee49af8", + "git_sha": "b8858b10356b87db4325341872816f9672541b7b", "installed_by": ["modules"] }, "gunzip": { diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test index 3c064b3..c4dfbdf 100644 --- a/modules/nf-core/gffread/tests/main.nf.test +++ b/modules/nf-core/gffread/tests/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { } process { """ - input[0] = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + input[0] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) """ } } @@ -42,7 +42,7 @@ nextflow_process { } process { """ - input[0] = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + input[0] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) """ } } From d9c4795f8887689e9d0c6ca6a7b969eee61d6e77 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 23 Jan 2024 20:36:01 +0000 Subject: [PATCH 24/30] Totally suppress workflow ext.args for GFFREAD --- tests/nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/nextflow.config b/tests/nextflow.config index 76431f9..c82fb21 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -20,7 +20,7 @@ process { ext.prefix = '' } withName: GFFREAD { - ext.args = '-T' + ext.args = null } } From 08526e68f7d03dea7694185a537b17d8db3f82a0 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 23 Jan 2024 20:53:02 +0000 Subject: [PATCH 25/30] Run prettier --- .devcontainer/devcontainer.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4ecfbfe..4a9bc5c 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -18,11 +18,11 @@ "python.linting.flake8Path": "/opt/conda/bin/flake8", "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.linting.pylintPath": "/opt/conda/bin/pylint", }, // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] - } - } + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"], + }, + }, } From 086846e756b987629108a3c798aa5c8796f29a87 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 23 Jan 2024 21:08:08 +0000 Subject: [PATCH 26/30] Arrange subworkflow better --- .../local/{preprocess_rnaseq.nf => preprocess_rnaseq/main.nf} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename subworkflows/local/{preprocess_rnaseq.nf => preprocess_rnaseq/main.nf} (100%) diff --git a/subworkflows/local/preprocess_rnaseq.nf b/subworkflows/local/preprocess_rnaseq/main.nf similarity index 100% rename from subworkflows/local/preprocess_rnaseq.nf rename to subworkflows/local/preprocess_rnaseq/main.nf From 8cffca823e3530d5c9fc9c6b0dcf052dc4eb9717 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 23 Jan 2024 21:16:02 +0000 Subject: [PATCH 27/30] Fix relative includes for new subworkflow location --- subworkflows/local/preprocess_rnaseq/main.nf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/preprocess_rnaseq/main.nf b/subworkflows/local/preprocess_rnaseq/main.nf index f0a614b..6de3371 100644 --- a/subworkflows/local/preprocess_rnaseq/main.nf +++ b/subworkflows/local/preprocess_rnaseq/main.nf @@ -1,12 +1,12 @@ import groovy.json.JsonSlurper -include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main' -include { FASTQC } from '../../modules/nf-core/fastqc/main' -include { SORTMERNA } from '../../modules/nf-core/sortmerna/main' +include { CAT_FASTQ } from '../../../modules/nf-core/cat/fastq/main' +include { FASTQC } from '../../../modules/nf-core/fastqc/main' +include { SORTMERNA } from '../../../modules/nf-core/sortmerna/main' -include { FASTQ_SUBSAMPLE_FQ_SALMON } from '../../subworkflows/nf-core/fastq_subsample_fq_salmon' -include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore' -include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../../subworkflows/nf-core/fastq_fastqc_umitools_fastp' +include { FASTQ_SUBSAMPLE_FQ_SALMON } from '../../../subworkflows/nf-core/fastq_subsample_fq_salmon' +include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../../../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore' +include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../../../subworkflows/nf-core/fastq_fastqc_umitools_fastp' def pass_trimmed_reads = [:] From bc548a6bf38d16691fcb405941a3b4721649bbdd Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 23 Jan 2024 21:32:55 +0000 Subject: [PATCH 28/30] Bump outdated modules --- modules.json | 18 +- .../dumpsoftwareversions/environment.yml | 2 +- .../custom/dumpsoftwareversions/main.nf | 4 +- .../dumpsoftwareversions/tests/main.nf.test | 7 +- .../tests/main.nf.test.snap | 48 +- .../custom/getchromsizes/tests/main.nf.test | 14 +- .../getchromsizes/tests/main.nf.test.snap | 34 +- modules/nf-core/fastp/main.nf | 18 + modules/nf-core/fastp/tests/main.nf.test | 302 +++++++++-- modules/nf-core/fastp/tests/main.nf.test.snap | 63 ++- modules/nf-core/fastqc/tests/main.nf.test | 265 +++++++--- .../nf-core/fastqc/tests/main.nf.test.snap | 12 +- modules/nf-core/gffread/tests/main.nf.test | 20 +- .../nf-core/gffread/tests/main.nf.test.snap | 56 +- modules/nf-core/gunzip/tests/main.nf.test | 9 +- modules/nf-core/multiqc/environment.yml | 2 +- modules/nf-core/multiqc/main.nf | 6 +- modules/nf-core/multiqc/meta.yml | 1 - modules/nf-core/multiqc/tests/main.nf.test | 48 +- .../star/genomegenerate/environment.yml | 4 +- modules/nf-core/star/genomegenerate/main.nf | 83 +-- .../star/genomegenerate/tests/main.nf.test | 81 ++- .../genomegenerate/tests/main.nf.test.snap | 14 +- modules/nf-core/untar/environment.yml | 4 +- modules/nf-core/untar/tests/main.nf.test | 26 +- modules/nf-core/untar/tests/main.nf.test.snap | 479 ------------------ 26 files changed, 827 insertions(+), 793 deletions(-) diff --git a/modules.json b/modules.json index e82b21c..ffdc080 100644 --- a/modules.json +++ b/modules.json @@ -17,12 +17,12 @@ }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", + "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", "installed_by": ["modules"] }, "custom/getchromsizes": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "1b0ffa4e5aed5b7e3cd4311af31bd3b2c8345051", "installed_by": ["modules"] }, "cutadapt": { @@ -32,12 +32,12 @@ }, "fastp": { "branch": "master", - "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde", + "git_sha": "c9488585ce7bd35ccd2a30faa2371454c8112fb9", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", + "git_sha": "c9488585ce7bd35ccd2a30faa2371454c8112fb9", "installed_by": ["modules"] }, "fq/subsample": { @@ -47,12 +47,12 @@ }, "gffread": { "branch": "master", - "git_sha": "b8858b10356b87db4325341872816f9672541b7b", + "git_sha": "9a6dc58f7d65c1e6f0ee0107521aea85bbed83cb", "installed_by": ["modules"] }, "gunzip": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208", "installed_by": ["modules"] }, "hisat2/build": { @@ -72,7 +72,7 @@ }, "multiqc": { "branch": "master", - "git_sha": "4ab13872435962dadc239979554d13709e20bf29", + "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", "installed_by": ["modules"] }, "rsem/preparereference": { @@ -97,7 +97,7 @@ }, "star/genomegenerate": { "branch": "master", - "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "git_sha": "d87a6e2156c2099c09280fa70776eaf0a824817a", "installed_by": ["modules"] }, "trimgalore": { @@ -112,7 +112,7 @@ }, "untar": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "e719354ba77df0a1bd310836aa2039b45c29d620", "installed_by": ["modules"] } } diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml index f0c63f6..9b3272b 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::multiqc=1.17 + - bioconda::multiqc=1.19 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 7685b33..f218761 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -4,8 +4,8 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : - 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : + 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test index eec1db1..b1e1630 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -31,7 +31,12 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.versions, + file(process.out.mqc_yml[0]).readLines()[0..10], + file(process.out.yml[0]).readLines()[0..7] + ).match() + } ) } } diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap index 7fef262..5f59a93 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -1,27 +1,33 @@ { "Should run without failures": { "content": [ - { - "0": [ - "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" - ], - "1": [ - "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" - ], - "2": [ - "versions.yml:md5,3843ac526e762117eedf8825b40683df" - ], - "mqc_yml": [ - "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" - ], - "versions": [ - "versions.yml:md5,3843ac526e762117eedf8825b40683df" - ], - "yml": [ - "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" - ] - } + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] ], - "timestamp": "2023-11-03T14:43:22.157011" + "timestamp": "2024-01-09T23:01:18.710682" } } \ No newline at end of file diff --git a/modules/nf-core/custom/getchromsizes/tests/main.nf.test b/modules/nf-core/custom/getchromsizes/tests/main.nf.test index 844bd99..9f6b564 100644 --- a/modules/nf-core/custom/getchromsizes/tests/main.nf.test +++ b/modules/nf-core/custom/getchromsizes/tests/main.nf.test @@ -17,9 +17,10 @@ nextflow_process { } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) """ } } @@ -41,9 +42,10 @@ nextflow_process { } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ]) """ } } diff --git a/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap b/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap index 142298e..2e560bd 100644 --- a/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap +++ b/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap @@ -5,8 +5,7 @@ "0": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" ] @@ -14,8 +13,7 @@ "1": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" ] @@ -29,8 +27,7 @@ "fai": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" ] @@ -41,8 +38,7 @@ "sizes": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" ] @@ -52,7 +48,7 @@ ] } ], - "timestamp": "2023-10-17T10:20:42.652464" + "timestamp": "2024-01-17T17:48:35.562918" }, "test_custom_getchromsizes_bgzip": { "content": [ @@ -60,8 +56,7 @@ "0": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" ] @@ -69,8 +64,7 @@ "1": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" ] @@ -78,8 +72,7 @@ "2": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" ] @@ -90,8 +83,7 @@ "fai": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" ] @@ -99,8 +91,7 @@ "gzi": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" ] @@ -108,8 +99,7 @@ "sizes": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" ] @@ -119,6 +109,6 @@ ] } ], - "timestamp": "2023-10-17T10:22:25.185203" + "timestamp": "2024-01-17T17:49:02.562311" } } \ No newline at end of file diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index 5fac3c1..2a3b679 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -99,4 +99,22 @@ process FASTP { END_VERSIONS """ } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end + def touch_reads = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz" + def touch_merged = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : "" + """ + touch $touch_reads + touch "${prefix}.fastp.json" + touch "${prefix}.fastp.html" + touch "${prefix}.fastp.log" + $touch_merged + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ } diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test index f610b73..fa7e5b4 100644 --- a/modules/nf-core/fastp/tests/main.nf.test +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -19,11 +19,10 @@ nextflow_process { save_trimmed_fail = false save_merged = false - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:true ], - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] - ] - + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -57,6 +56,66 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_single_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_single_end-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_single_end-for_stub_match") + }, { assert snapshot(process.out.versions).match("versions") } ) } @@ -74,12 +133,11 @@ nextflow_process { save_trimmed_fail = false save_merged = false - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] - + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -127,6 +185,66 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end-for_stub_match") + }, { assert snapshot(process.out.versions).match("versions") } ) } @@ -144,10 +262,10 @@ nextflow_process { save_trimmed_fail = false save_merged = false - input[0] = [ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ] - ] - + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -181,6 +299,66 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_interleaved-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("fastp test_fastp_interleaved-stub") { + + options '-stub' + + config './nextflow.config' + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_interleaved-for_stub_match") + }, { assert snapshot(process.out.versions).match("versions") } ) } @@ -198,9 +376,10 @@ nextflow_process { save_trimmed_fail = true save_merged = false - input[0] = [ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -258,13 +437,11 @@ nextflow_process { save_trimmed_fail = true save_merged = false - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -337,11 +514,11 @@ nextflow_process { adapter_fasta = [] save_trimmed_fail = false save_merged = true - - input[0] = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -399,6 +576,66 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_merged-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_merged-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_merged-for_stub_match") + }, { assert snapshot(process.out.versions).match("versions") } ) } @@ -412,14 +649,15 @@ nextflow_process { } process { """ - adapter_fasta = file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/fastp/adapters.fasta", checkIfExists: true) + adapter_fasta = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ]) save_trimmed_fail = false save_merged = true - input[0] = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap index 0fa68c7..6a71b68 100644 --- a/modules/nf-core/fastp/tests/main.nf.test.snap +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -1,4 +1,19 @@ { + "test_fastp_paired_end-for_stub_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=false}" + ] + ], + "timestamp": "2024-01-17T18:07:15.398827" + }, "fastp test_fastp_interleaved_json": { "content": [ [ @@ -11,7 +26,23 @@ ] ] ], - "timestamp": "2023-10-17T11:04:45.794175881" + "timestamp": "2024-01-17T18:08:06.123035" + }, + "test_fastp_paired_end_merged-for_stub_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "test.merged.fastq.gz", + "{id=test, single_end=false}" + ] + ], + "timestamp": "2024-01-17T18:10:13.467574" }, "test_fastp_single_end_json": { "content": [ @@ -25,7 +56,7 @@ ] ] ], - "timestamp": "2023-10-17T11:04:10.566343705" + "timestamp": "2024-01-17T18:06:00.223817" }, "versions": { "content": [ @@ -33,7 +64,31 @@ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" ] ], - "timestamp": "2023-10-17T11:04:10.582076024" + "timestamp": "2024-01-17T18:06:00.248422" + }, + "test_fastp_interleaved-for_stub_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "timestamp": "2024-01-17T18:08:06.127974" + }, + "test_fastp_single_end-for_stub_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "timestamp": "2024-01-17T18:06:00.244202" }, "test_fastp_single_end_trim_fail_json": { "content": [ @@ -47,6 +102,6 @@ ] ] ], - "timestamp": "2023-10-17T11:05:00.379878948" + "timestamp": "2024-01-17T18:08:41.942317" } } \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index b9e8f92..1f21c66 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -3,24 +3,20 @@ nextflow_process { name "Test Process FASTQC" script "../main.nf" process "FASTQC" + tag "modules" tag "modules_nfcore" tag "fastqc" - test("Single-Read") { + test("sarscov2 single-end [fastq]") { when { - params { - outdir = "$outputDir" - } process { """ - input[0] = [ + input[0] = Channel.of([ [ id: 'test', single_end:true ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) """ } } @@ -28,82 +24,189 @@ nextflow_process { then { assertAll ( { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. // looks like this:
    Mon 2 Oct 2023
    test.gz
    // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, - { assert path(process.out.html.get(0).get(1)).getText().contains("") }, - { assert snapshot(process.out.versions).match("versions") }, - { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } ) } } -// TODO -// // -// // Test with paired-end data -// // -// workflow test_fastqc_paired_end { -// input = [ -// [id: 'test', single_end: false], // meta map -// [ -// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) -// ] -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with interleaved data -// // -// workflow test_fastqc_interleaved { -// input = [ -// [id: 'test', single_end: false], // meta map -// file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with bam data -// // -// workflow test_fastqc_bam { -// input = [ -// [id: 'test', single_end: false], // meta map -// file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with multiple samples -// // -// workflow test_fastqc_multiple { -// input = [ -// [id: 'test', single_end: false], // meta map -// [ -// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) -// ] -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with custom prefix -// // -// workflow test_fastqc_custom_prefix { -// input = [ -// [ id:'mysample', single_end:true ], // meta map -// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) -// ] - -// FASTQC ( input ) -// } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + { assert path(process.out.html[0][1][2]).text.contains("") }, + { assert path(process.out.html[0][1][3]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.html.collect { file(it[1]).getName() } + + process.out.zip.collect { file(it[1]).getName() } + + process.out.versions ).match() } + ) + } + } + } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap index 636a32c..5d624bb 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -1,10 +1,20 @@ { + "sarscov2 single-end [fastq] - stub": { + "content": [ + [ + "test.html", + "test.zip", + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "timestamp": "2024-01-17T18:40:57.254299" + }, "versions": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], - "timestamp": "2023-10-09T23:40:54+0000" + "timestamp": "2024-01-17T18:36:50.033627" } } \ No newline at end of file diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test index c4dfbdf..bdbc96a 100644 --- a/modules/nf-core/gffread/tests/main.nf.test +++ b/modules/nf-core/gffread/tests/main.nf.test @@ -25,10 +25,12 @@ nextflow_process { then { assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() }, - { assert process.out.gtf != null }, - { assert process.out.gffread_gff == [] } + { assert process.success }, + { assert snapshot( + process.out.gtf, + process.out.versions + ).match() }, + { assert process.out.gffread_gff == [] } ) } @@ -49,10 +51,12 @@ nextflow_process { then { assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() }, - { assert process.out.gtf == [] }, - { assert process.out.gffread_gff != null }, + { assert process.success }, + { assert snapshot( + process.out.gffread_gff, + process.out.versions + ).match() }, + { assert process.out.gtf == [] }, ) } diff --git a/modules/nf-core/gffread/tests/main.nf.test.snap b/modules/nf-core/gffread/tests/main.nf.test.snap index 1f1342e..00a11a4 100644 --- a/modules/nf-core/gffread/tests/main.nf.test.snap +++ b/modules/nf-core/gffread/tests/main.nf.test.snap @@ -1,52 +1,24 @@ { "sarscov2-gff3-gtf": { "content": [ - { - "0": [ - "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3" - ], - "1": [ - - ], - "2": [ - "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" - ], - "gffread_gff": [ - - ], - "gtf": [ - "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3" - ], - "versions": [ - "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" - ] - } + [ + "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3" + ], + [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ] ], - "timestamp": "2023-11-29T15:39:30.006985" + "timestamp": "2024-01-23T20:00:32.688779117" }, "sarscov2-gff3-gff3": { "content": [ - { - "0": [ - - ], - "1": [ - "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d" - ], - "2": [ - "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" - ], - "gffread_gff": [ - "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d" - ], - "gtf": [ - - ], - "versions": [ - "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" - ] - } + [ + "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d" + ], + [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ] ], - "timestamp": "2023-11-29T15:39:34.636061" + "timestamp": "2024-01-23T20:07:11.457356625" } } \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test index d031792..6406008 100644 --- a/modules/nf-core/gunzip/tests/main.nf.test +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -15,10 +15,11 @@ nextflow_process { } process { """ - input[0] = [ - [], - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) """ } } diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index bc0bdb5..7625b75 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::multiqc=1.18 + - bioconda::multiqc=1.19 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 00cc48d..1b9f7c4 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.18--pyhdfd78af_0' : - 'biocontainers/multiqc:1.18--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : + 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -43,7 +43,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f1aa660..45a9bc3 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,4 +1,3 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index c2dad21..d0438ed 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -7,12 +7,9 @@ nextflow_process { tag "modules_nfcore" tag "multiqc" - test("MULTIQC: FASTQC") { + test("sarscov2 single-end [fastqc]") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) @@ -26,20 +23,17 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.report.get(0)).exists() }, - { assert path(process.out.data.get(0)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("versions") } ) } } - test("MULTIQC: FASTQC and a config file") { + test("sarscov2 single-end [fastqc] [config]") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) @@ -53,9 +47,35 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.report.get(0)).exists() }, - { assert path(process.out.data.get(0)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match() } ) } diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml index 350a459..93e4476 100644 --- a/modules/nf-core/star/genomegenerate/environment.yml +++ b/modules/nf-core/star/genomegenerate/environment.yml @@ -1,9 +1,11 @@ name: star_genomegenerate + channels: - conda-forge - bioconda - defaults + dependencies: - - bioconda::star=2.7.10a - bioconda::samtools=1.18 + - bioconda::star=2.7.10a - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf index 2bc3e29..b885571 100644 --- a/modules/nf-core/star/genomegenerate/main.nf +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -19,9 +19,10 @@ process STAR_GENOMEGENERATE { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args_list = args.tokenize() - def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def args = task.ext.args ?: '' + def args_list = args.tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def include_gtf = gtf ? "--sjdbGTFfile $gtf" : '' if (args_list.contains('--genomeSAindexNbases')) { """ mkdir star @@ -29,7 +30,7 @@ process STAR_GENOMEGENERATE { --runMode genomeGenerate \\ --genomeDir star/ \\ --genomeFastaFiles $fasta \\ - --sjdbGTFfile $gtf \\ + $include_gtf \\ --runThreadN $task.cpus \\ $memory \\ $args @@ -51,7 +52,7 @@ process STAR_GENOMEGENERATE { --runMode genomeGenerate \\ --genomeDir star/ \\ --genomeFastaFiles $fasta \\ - --sjdbGTFfile $gtf \\ + $include_gtf \\ --runThreadN $task.cpus \\ --genomeSAindexNbases \$NUM_BASES \\ $memory \\ @@ -67,30 +68,52 @@ process STAR_GENOMEGENERATE { } stub: - """ - mkdir star - touch star/Genome - touch star/Log.out - touch star/SA - touch star/SAindex - touch star/chrLength.txt - touch star/chrName.txt - touch star/chrNameLength.txt - touch star/chrStart.txt - touch star/exonGeTrInfo.tab - touch star/exonInfo.tab - touch star/geneInfo.tab - touch star/genomeParameters.txt - touch star/sjdbInfo.txt - touch star/sjdbList.fromGTF.out.tab - touch star/sjdbList.out.tab - touch star/transcriptInfo.tab + if (gtf) { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/exonGeTrInfo.tab + touch star/exonInfo.tab + touch star/geneInfo.tab + touch star/genomeParameters.txt + touch star/sjdbInfo.txt + touch star/sjdbList.fromGTF.out.tab + touch star/sjdbList.out.tab + touch star/transcriptInfo.tab - cat <<-END_VERSIONS > versions.yml - "${task.process}": - star: \$(STAR --version | sed -e "s/STAR_//g") - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') - END_VERSIONS - """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/genomeParameters.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } } diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test index eed8292..af0c942 100644 --- a/modules/nf-core/star/genomegenerate/tests/main.nf.test +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test @@ -28,7 +28,86 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.index[0][1]).name).match("index") }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens-stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens-without_gtf") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens-without_gtf-stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") }, { assert snapshot(process.out.versions).match("versions") } ) } diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap index e7bb6ee..9de08c7 100644 --- a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap @@ -5,12 +5,18 @@ "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" ] ], - "timestamp": "2023-12-04T18:01:27.298248806" + "timestamp": "2023-12-19T11:05:51.741109" }, - "index": { + "index_with_gtf": { "content": [ - "star" + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]" ], - "timestamp": "2023-11-23T11:31:47.560528" + "timestamp": "2023-12-19T11:38:14.551548" + }, + "index_without_gtf": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]" + ], + "timestamp": "2023-12-19T11:38:22.382905" } } \ No newline at end of file diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml index d6917da..0c9cbb1 100644 --- a/modules/nf-core/untar/environment.yml +++ b/modules/nf-core/untar/environment.yml @@ -1,9 +1,11 @@ name: untar + channels: - conda-forge - bioconda - defaults + dependencies: - - conda-forge::sed=4.7 - conda-forge::grep=3.11 + - conda-forge::sed=4.7 - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test index d40db13..679e83c 100644 --- a/modules/nf-core/untar/tests/main.nf.test +++ b/modules/nf-core/untar/tests/main.nf.test @@ -16,7 +16,7 @@ nextflow_process { } process { """ - input[0] = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ] + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] """ } } @@ -30,28 +30,6 @@ nextflow_process { } - test("test_untar_different_output_path") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = [ [], file(params.test_data['homo_sapiens']['illumina']['test_flowcell'], checkIfExists: true) ] - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out.untar).match("test_untar_different_output_path") }, - ) - } - - } - test("test_untar_onlyfiles") { when { @@ -60,7 +38,7 @@ nextflow_process { } process { """ - input[0] = [ [], file(params.test_data['generic']['tar']['tar_gz'], checkIfExists: true) ] + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] """ } } diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap index 146c867..ace4257 100644 --- a/modules/nf-core/untar/tests/main.nf.test.snap +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -1,483 +1,4 @@ { - "test_untar_different_output_path": { - "content": [ - [ - [ - [ - - ], - [ - [ - [ - [ - [ - [ - "s_1_1101.bcl:md5,ad01889e2ff43e2f194224e20bdb600c", - "s_1_1101.stats:md5,4bbbf103454b37fbc3138fadf1b4446b" - ], - [ - "s_1_1101.bcl:md5,565384bbe67a694dfd690bae6d1d30c2", - "s_1_1101.stats:md5,55e5abd8f129ff38ef169873547abdb8" - ], - [ - "s_1_1101.bcl:md5,650fa58a630a9148835ba79e323d4237", - "s_1_1101.stats:md5,77403669ca1b05340c390dff64425c1e" - ], - [ - "s_1_1101.bcl:md5,54471c9e97299cd141e202e204637702", - "s_1_1101.stats:md5,67b14c9a89b7f8556674a7524d5cfb2d" - ], - [ - "s_1_1101.bcl:md5,74e4f929fc7476c380fd9d741ddb6700", - "s_1_1101.stats:md5,5730a4c35463eaa12a06b6758710b98c" - ], - [ - "s_1_1101.bcl:md5,c785f472f4350c120c02c888c8189590", - "s_1_1101.stats:md5,fee4ec63895ea81007e06ee6a36ba5e0" - ], - [ - "s_1_1101.bcl:md5,b7ea50bb25f08d43c301741d77050a9b", - "s_1_1101.stats:md5,fa7c68f3122c74d14364e6f7b011af70" - ], - [ - "s_1_1101.bcl:md5,9d5087dc4bcae39d66486363d4f68ecf", - "s_1_1101.stats:md5,23cdceee4d82c4b8e7c60018b9276ace" - ], - [ - "s_1_1101.bcl:md5,581e0c5ee94e8f2de14b2b1d8e777530", - "s_1_1101.stats:md5,9a3536d573c97f66bb56b49463612607" - ], - [ - "s_1_1101.bcl:md5,296fc026bb34c67bbe2b44845fe0d1de", - "s_1_1101.stats:md5,a7f57a7770fb9c5ae2a0fb1ef403ec4f" - ], - [ - "s_1_1101.bcl:md5,2a3ca15531556c36d10d132a9e051de8", - "s_1_1101.stats:md5,2d0bcdb0a1b51d3d79e415db2ab2d3b1" - ], - [ - "s_1_1101.bcl:md5,1150d46a2ccd4ac58aee0585d3e4ffd7", - "s_1_1101.stats:md5,2e97550bd5b5864ffd0565bb7a3f6d40" - ], - [ - "s_1_1101.bcl:md5,0b85c4b3da0de95e7b862d849c5333ae", - "s_1_1101.stats:md5,6eab9746fbeb783b0cd70398f44e0c1a" - ], - [ - "s_1_1101.bcl:md5,e0e9c91f4698804d7a6d1058ef68b34f", - "s_1_1101.stats:md5,790022cdc7878a02b2ebd166e1ddf0a7" - ], - [ - "s_1_1101.bcl:md5,38cd0ad4de359e651c8ac0d5777ea625", - "s_1_1101.stats:md5,a1b1d5ea5371d326abb029774483c5e6" - ], - [ - "s_1_1101.bcl:md5,b0ddc05c4012ccba24e712a1cfec748f", - "s_1_1101.stats:md5,af3d232f839d720f76f40ba06caa2987" - ], - [ - "s_1_1101.bcl:md5,af32fcc5dc3b836cf7a5ba3db85a75dd", - "s_1_1101.stats:md5,f93f2c09bd4e486c74a5f6e2040f7296" - ], - [ - "s_1_1101.bcl:md5,54b7428e037ca87816107647d4a3d9db", - "s_1_1101.stats:md5,e5ac77a72cd7bed5e9bf03cccda0e48c" - ], - [ - "s_1_1101.bcl:md5,fc8b4eacd493bf3d0b20bc23998dc7ff", - "s_1_1101.stats:md5,190315e159e2f4bc4c057ded7470dc52" - ], - [ - "s_1_1101.bcl:md5,9484ecffda489927fce424ac6a44fa9d", - "s_1_1101.stats:md5,0825feeb457ecc9efcf6f8526ba32311" - ], - [ - "s_1_1101.bcl:md5,eec59e21036e31c95ce1e847bfb0a9c4", - "s_1_1101.stats:md5,9acc13f63c98e5a8445e7be70d49222b" - ], - [ - "s_1_1101.bcl:md5,a9fb24476f87cba4fba68e2b3c3f2c07", - "s_1_1101.stats:md5,dc0aa7db9790733291c3e6480ca2a0fc" - ], - [ - "s_1_1101.bcl:md5,ed950b3e82c500927c2e236c9df005c6", - "s_1_1101.stats:md5,dccb71ec47d1f9d33a192da6d5660a45" - ], - [ - "s_1_1101.bcl:md5,b3e992025e995ca56b5ea2820144ef47", - "s_1_1101.stats:md5,a6a829bf2cffb26ac5d9dc3012057699" - ], - [ - "s_1_1101.bcl:md5,89edc726a5a4e0b4ff8ca3899ed0232b", - "s_1_1101.stats:md5,5b9b4fd8110577a59b82d0c419519d29" - ], - [ - "s_1_1101.bcl:md5,4dc696149169f232c451225f563cb5cd", - "s_1_1101.stats:md5,d3514a71ea3adc60e2943c6b8f6e2598" - ], - [ - "s_1_1101.bcl:md5,35b992d0318afb7c825ceaa31b0755e6", - "s_1_1101.stats:md5,2826093acc175c16c3795de7c4ca8f07" - ], - [ - "s_1_1101.bcl:md5,7bc927f56a362e49c00b5d76ee048901", - "s_1_1101.stats:md5,e47d862b795fd6b88a31d7d482ab22f6" - ], - [ - "s_1_1101.bcl:md5,84742233ff2a651626fe9036f27f7cb2", - "s_1_1101.stats:md5,b78fad11d3c50bc76b722cdc03e3028b" - ], - [ - "s_1_1101.bcl:md5,3935341c86263a7938e8c49620ef39f8", - "s_1_1101.stats:md5,cc6585b2daac5354073d150874da9704" - ], - [ - "s_1_1101.bcl:md5,3627f4fd548bf6e64aaf08fba3a342be", - "s_1_1101.stats:md5,120ae4831ae004ff7d16728aef36e82f" - ], - [ - "s_1_1101.bcl:md5,07631014bc35124149fabd80ef19f933", - "s_1_1101.stats:md5,eadd63d91f47cc6db6b6f0a967a23927" - ], - [ - "s_1_1101.bcl:md5,a1149c80415dc2f34d768eeb397c43fb", - "s_1_1101.stats:md5,ca89a9def67611a9151c6ce685b7cce1" - ], - [ - "s_1_1101.bcl:md5,eb5f71d4741d2f40618756bc72eaf8b4", - "s_1_1101.stats:md5,90f48501e735e5915b843478e23d1ae2" - ], - [ - "s_1_1101.bcl:md5,9bf270fe3f6add1a591ebc24fff10078", - "s_1_1101.stats:md5,a4e429671d4098034293c638aa655e16" - ], - [ - "s_1_1101.bcl:md5,219bedcbd24bae54fe4cf05dae05282c", - "s_1_1101.stats:md5,dd97525b65b68207137d51fcf19132c7" - ], - [ - "s_1_1101.bcl:md5,5163bc00a68fd57ae50cae0b76350892", - "s_1_1101.stats:md5,b606a5368eff1f012f3ea5d11ccdf2e0" - ], - [ - "s_1_1101.bcl:md5,fc429195a5af59a59e0cc4c48e6c05ea", - "s_1_1101.stats:md5,d809aa19698053f90d639da4dcad8008" - ], - [ - "s_1_1101.bcl:md5,383340219a1dd77076a092a64a71a7e4", - "s_1_1101.stats:md5,b204a5cf256378679ffc906c15cc1bae" - ], - [ - "s_1_1101.bcl:md5,0c369540d3e24696cf1f9c55bab69315", - "s_1_1101.stats:md5,a2bc69a4031a22ce9621dcc623a0bf4b" - ], - [ - "s_1_1101.bcl:md5,3127abc8016ba8eb954f8f8015dff387", - "s_1_1101.stats:md5,5deafff31150b7bf757f814e49a53bc2" - ], - [ - "s_1_1101.bcl:md5,045f40c82de676bafec3d59f91376a7a", - "s_1_1101.stats:md5,890700edc20687c090ef52248c7884b1" - ], - [ - "s_1_1101.bcl:md5,78af269aa2b39a1d765703f0a4739a86", - "s_1_1101.stats:md5,303cf457aa1543a8208544f694cbc531" - ], - [ - "s_1_1101.bcl:md5,0ab8c781959b783b62888e9274364a46", - "s_1_1101.stats:md5,2605b0e8322f83aa4d0dae5da4ec7a7a" - ], - [ - "s_1_1101.bcl:md5,d0cf823ffe352e8b3f75d589544ab617", - "s_1_1101.stats:md5,efa3c0e01e3db71e12fd961cb2d03739" - ], - [ - "s_1_1101.bcl:md5,db4ca4ab7a01e03c246f9160c3758d82", - "s_1_1101.stats:md5,f61550d9e4a90df6b860e68f41f82f60" - ], - [ - "s_1_1101.bcl:md5,1af39a2c7e5ff20ece91cb8160b51d17", - "s_1_1101.stats:md5,d0e20879afcaf6dfcd88c73f1c5c78cf" - ], - [ - "s_1_1101.bcl:md5,4cf7123bb0fffcd79266df03aef01665", - "s_1_1101.stats:md5,29bff4075109a121b087116b58d7e927" - ], - [ - "s_1_1101.bcl:md5,aa9980428cb60cd6320f4b48f4dd0d74", - "s_1_1101.stats:md5,6b0e20bde93133117a8d1a6df3d6f37b" - ], - [ - "s_1_1101.bcl:md5,0f6e440374e15b9b491d52fb83a8adfe", - "s_1_1101.stats:md5,55cb5eb0ecdabd23dca39ab8c4607598" - ], - [ - "s_1_1101.bcl:md5,2c645d7bdaddaa403f6e304d36df9e4b", - "s_1_1101.stats:md5,53acf33d21f832779b400c2447386ce4" - ], - [ - "s_1_1101.bcl:md5,3bbf0863b423b770c879203644420206", - "s_1_1101.stats:md5,579bdc7293cac8c3d7407249cacf4c25" - ], - [ - "s_1_1101.bcl:md5,6658a08409e81d29cfeb2d096b491985", - "s_1_1101.stats:md5,bb559ffbea46d612f9933cefa84c4c03" - ], - [ - "s_1_1101.bcl:md5,1700d9a13d3d4f7643af2943ef838acb", - "s_1_1101.stats:md5,f01cb6050ebfb15da1e0399ebd791eb4" - ], - [ - "s_1_1101.bcl:md5,1ac7aa9ffae25eb103f755f33e4a39c6", - "s_1_1101.stats:md5,0b9d45d7929ccf336d5e5b95373ed3c2" - ], - [ - "s_1_1101.bcl:md5,812a97af2e983a53226e18c75190b06c", - "s_1_1101.stats:md5,d2410c7b0e506dab2972e77e2398de1e" - ], - [ - "s_1_1101.bcl:md5,c981e8e4dcc434956c2b86159da268bc", - "s_1_1101.stats:md5,e9c826e85361ce673f1f248786c9a611" - ], - [ - "s_1_1101.bcl:md5,88e09e99a0a4ef3357b203a41b22f77c", - "s_1_1101.stats:md5,ef06f2e5ad667bbd383f9ed6a05b7b42" - ], - [ - "s_1_1101.bcl:md5,461c8b146fc8a7938be38689978ecd09", - "s_1_1101.stats:md5,65115693935da66f9791b27136e22fb0" - ], - [ - "s_1_1101.bcl:md5,c7b827df5ce20e0f21916fe60860ca3f", - "s_1_1101.stats:md5,87be73613aeb507847f94d3cac5bb30a" - ], - [ - "s_1_1101.bcl:md5,7c4cc3dc9c8a1b0f15917b282dfb40ce", - "s_1_1101.stats:md5,bdd9181fa89debbfafe7b6ea3e064065" - ], - [ - "s_1_1101.bcl:md5,19f4debaf91e118aca8934517179ac33", - "s_1_1101.stats:md5,1143082719e136241d21b14a6b19b8a2" - ], - [ - "s_1_1101.bcl:md5,38aa256ad2d697d84b0b2c0e876a3eba", - "s_1_1101.stats:md5,64dd82f03df23f7f437eede2671ed4fe" - ], - [ - "s_1_1101.bcl:md5,b7929970378949571fed922c1b8cab32", - "s_1_1101.stats:md5,3d6d7985a41629fe196e4342d7fe36aa" - ], - [ - "s_1_1101.bcl:md5,fb2ed0bf6e89d79624ee78754e773491", - "s_1_1101.stats:md5,f34940810ff255aee79953496a12716d" - ], - [ - "s_1_1101.bcl:md5,4f8a8311f5f9c3a7629c1a973a7b280e", - "s_1_1101.stats:md5,4fd7cd28c09f4e152e7c2ad1ab541cd2" - ], - [ - "s_1_1101.bcl:md5,9eb46c903d0344e25af51f88cc311d60", - "s_1_1101.stats:md5,df3abd5f620d9e7f99496098d9fd3f7f" - ], - [ - "s_1_1101.bcl:md5,3ecbc17f3660e2014b58d7fe70ae62d5", - "s_1_1101.stats:md5,8e89a13c85a6d6ab3ccd251b66d1f165" - ], - [ - "s_1_1101.bcl:md5,5d59cc2499a77791233a64f73fe82894", - "s_1_1101.stats:md5,32ec99cd400f4b80cb26e2fa8e07ece0" - ], - [ - "s_1_1101.bcl:md5,1c052da47b9ae8554388f0fa3aade482", - "s_1_1101.stats:md5,d23f438772673688aa7bc92421dc6dce" - ], - [ - "s_1_1101.bcl:md5,1a52bd4f23130c0c96bc967ccd448a2b", - "s_1_1101.stats:md5,9b597e3388d59ef1f61aba30ac90ea79" - ], - [ - "s_1_1101.bcl:md5,8a1e84b79cf3f80794c20e3a0cc84688", - "s_1_1101.stats:md5,9561f7b6ef4b1849afc72b2bb49792bd" - ], - [ - "s_1_1101.bcl:md5,75c00111051f3fa95d04286823cb9109", - "s_1_1101.stats:md5,1fe786cdf8181767deafbd60b3c76610" - ], - [ - "s_1_1101.bcl:md5,529255d8deee0873ed5565e6d1a2ebda", - "s_1_1101.stats:md5,3fa7f467e97a75880f32d17b7429d316" - ], - [ - "s_1_1101.bcl:md5,ea4d960e3d9355d2149da71b88a21df4", - "s_1_1101.stats:md5,2540fe65586e8e800c1ddd8cddd1e8cd" - ], - [ - "s_1_1101.bcl:md5,0dfe1fd92a2dce2f23119aa483429744", - "s_1_1101.stats:md5,78257b2169fb9f0cf40966e06e847e86" - ], - [ - "s_1_1101.bcl:md5,f692ddc9aa3ab849271d07c666d0b3b9", - "s_1_1101.stats:md5,aa2ec6a3e3a9c116e34fe74a21e6459e" - ], - [ - "s_1_1101.bcl:md5,29cc4c239eae7c871c9a1adf92ebdb98", - "s_1_1101.stats:md5,263184813090acd740a5bf25304aed3a" - ], - [ - "s_1_1101.bcl:md5,e005af6a84925e326afbfe264241f047", - "s_1_1101.stats:md5,b6fb20868eebaffcc19daa694a449795" - ], - [ - "s_1_1101.bcl:md5,02f1a699b1ba9967accccf99a7af3d24", - "s_1_1101.stats:md5,4f007efacecaf26dc0e0231aede28754" - ], - [ - "s_1_1101.bcl:md5,df308c72a2dcc655cd95e98f5457187a", - "s_1_1101.stats:md5,130c4b07f4c14030bab012824cbe34da" - ], - [ - "s_1_1101.bcl:md5,f3ce10d8d2406b72355023bfa8c96822", - "s_1_1101.stats:md5,2638f4db393ed5b699ec2ce59ff0ec19" - ], - [ - "s_1_1101.bcl:md5,cc2f6d675ad1593ff96f734b172d249e", - "s_1_1101.stats:md5,f5b13f1e1ababc9e1a7a73b0b993cbf1" - ], - [ - "s_1_1101.bcl:md5,7938a0b21448305a951b023b1845b3a7", - "s_1_1101.stats:md5,fcd57511adabfc3ba1ac045165330006" - ], - [ - "s_1_1101.bcl:md5,44879bc6a38df1fee8def61868115041", - "s_1_1101.stats:md5,517e20e4b58a8023a37f9af62e0e2036" - ], - [ - "s_1_1101.bcl:md5,8749611e62406a7d2f34c610a55e56af", - "s_1_1101.stats:md5,8ccf24b3676ef84f2e513be8f2a9f3d1" - ], - [ - "s_1_1101.bcl:md5,a9846a037611cda3721958088f714c0e", - "s_1_1101.stats:md5,6438fa5a1892f328cab1605a95d80a3b" - ], - [ - "s_1_1101.bcl:md5,d6c4a2a726496476eb826532f974ed5f", - "s_1_1101.stats:md5,8c2c65b5e8b00dbf61ada65252aeb266" - ], - [ - "s_1_1101.bcl:md5,be3dde6cae7dd85855a6bf295ebfacfe", - "s_1_1101.stats:md5,93bc13f3b0749b2b8d8bcb0b1199f4f0" - ], - [ - "s_1_1101.bcl:md5,7c64514735a6cf1565b60647edd17d20", - "s_1_1101.stats:md5,4a0aa6c49b24f876415e5878cef7f805" - ], - [ - "s_1_1101.bcl:md5,3983b4043bc9df4b505202a5134ccf03", - "s_1_1101.stats:md5,1c9d9a8558adc1279ca27c96bc1b9758" - ], - [ - "s_1_1101.bcl:md5,a0b8d77f116ec95975f9253dcb768136", - "s_1_1101.stats:md5,c3992b786756e7ec42f65ef4b13b50d4" - ], - [ - "s_1_1101.bcl:md5,43c95ba35d06bb7c57fbd16f3d1cfd6c", - "s_1_1101.stats:md5,3cb69d04698c39f97f962e5bf1eea7f0" - ], - [ - "s_1_1101.bcl:md5,3dbeea0cad7052f19f53ff6f19dd4d90", - "s_1_1101.stats:md5,58bbc8254f0f5f4a244531e8e9c12a04" - ], - [ - "s_1_1101.bcl:md5,da56d088996376c898d855b6cd0a7dfc", - "s_1_1101.stats:md5,9f2d78af6908ce1576b89cdc059844ff" - ], - [ - "s_1_1101.bcl:md5,7b641a5565f095e9a6ffcad9e4305033", - "s_1_1101.stats:md5,3ada06c59b4fb41b83ab6abd0979e9fc" - ], - [ - "s_1_1101.bcl:md5,a3843d397a01d51657825bb652c191e5", - "s_1_1101.stats:md5,19341e52a4bfc7d9d48e9d2acc68c519" - ], - [ - "s_1_1101.bcl:md5,048e3ebfc8efeb8012def6b741c9060d", - "s_1_1101.stats:md5,88bd38deca1e87d700effab1fd099565" - ], - [ - "s_1_1101.bcl:md5,b340db0e07e829dd5da22371916a1a9e", - "s_1_1101.stats:md5,e44cfaddcc4ffb968e5b1a2f41ac48a5" - ], - [ - "s_1_1101.bcl:md5,e6011ec6eabbc2b8792deb283c621ce0", - "s_1_1101.stats:md5,090875dcd1a431af24bc631333f089c4" - ], - [ - "s_1_1101.bcl:md5,a08f216e3352345031ed100ec4245082", - "s_1_1101.stats:md5,97b949ef4b96219e1369f673cf5f8a6c" - ], - [ - "s_1_1101.bcl:md5,b43337c76fb037dfcf5f8f7bcb3618e5", - "s_1_1101.stats:md5,ddef585805e79951f69d23ab7354f69b" - ], - [ - "s_1_1101.bcl:md5,8c61fd004104397b360855e058bbf1bf", - "s_1_1101.stats:md5,0f8d253816d594dcfea3ccf48c826401" - ], - [ - "s_1_1101.bcl:md5,594d06310d328b188aa0b3edfff22cb2", - "s_1_1101.stats:md5,3160bf271b39aeb7590e4fd2984710ba" - ], - [ - "s_1_1101.bcl:md5,4c9eada67c9d55437211d83e111961d5", - "s_1_1101.stats:md5,2901b46ab16ec4863d30e4c84ec29c97" - ], - [ - "s_1_1101.bcl:md5,e03971ae5282f0accc0c1b7374d9ef1b", - "s_1_1101.stats:md5,60d2a19ce59bf70a21a28555484cead8" - ], - [ - "s_1_1101.bcl:md5,e1c6f7a06e63d149895d3e48e63df155", - "s_1_1101.stats:md5,44beb10af847ea3dddaf06dda7031126" - ], - [ - "s_1_1101.bcl:md5,960a99bf29a8f9d936e9b8582d46c9c6", - "s_1_1101.stats:md5,544cd1a7aaaa841914b40ece43399334" - ], - [ - "s_1_1101.bcl:md5,5706679f349fd4a6b6313bc2c41c7a42", - "s_1_1101.stats:md5,627eea844b26dae033848c2f9f69177b" - ], - [ - "s_1_1101.bcl:md5,21da5abc4b0402bbac14b5ab998b0b4f", - "s_1_1101.stats:md5,515bd140b095ad90473ca7a9a69877ab" - ], - "s_1_1101.control:md5,08a72e2198ae95150718e8adf011d105", - "s_1_1101.filter:md5,3a72bc73b323c8cb0ac5bfeb62d98989" - ] - ], - [ - "s_1_1101.locs:md5,0827ea802e5257cc5b20e757a33d4c98" - ], - "RTAConfiguration.xml:md5,c7d6e257bc374f142dc64b9d2281d4c9", - "config.xml:md5,9a4cc7ec01fefa2f1ce9bcb45bbad6e9" - ] - ], - [ - "ControlMetricsOut.bin:md5,6d77b38d0793a6e1ce1e85706e488953", - "CorrectedIntMetricsOut.bin:md5,2bbf84d3be72734addaa2fe794711434", - "ErrorMetricsOut.bin:md5,38c88def138e9bb832539911affdb286", - "ExtractionMetricsOut.bin:md5,7497c3178837eea8f09350b5cd252e99", - "IndexMetricsOut.bin:md5,d41d8cd98f00b204e9800998ecf8427e", - "QMetricsOut.bin:md5,7e9f198d53ebdfbb699a5f94cf1ed51c", - "TileMetricsOut.bin:md5,83891751ec1c91a425a524b476b6ca3c" - ], - "RunInfo.xml:md5,03038959f4dd181c86bc97ae71fe270a" - ] - ] - ] - ], - "timestamp": "2023-10-18T11:56:39.562418" - }, "test_untar_onlyfiles": { "content": [ [ From 4b88f155ca90a075f16935050a6d2680bef9806a Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Fri, 26 Jan 2024 10:53:55 +0000 Subject: [PATCH 29/30] Update modules/local/gtf2bed/main.nf Co-authored-by: Maxime U Garcia --- modules/local/gtf2bed/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/gtf2bed/main.nf b/modules/local/gtf2bed/main.nf index 8a4bae8..65d3f7a 100644 --- a/modules/local/gtf2bed/main.nf +++ b/modules/local/gtf2bed/main.nf @@ -17,7 +17,7 @@ process GTF2BED { when: task.ext.when == null || task.ext.when - script: // This script is bundled with the pipeline, in nf-core/rnaseq/bin/ + script: // This script is bundled with the pipeline, in nf-core/riboseq/bin/ """ gtf2bed \\ $gtf \\ From aeffe214c411a7d3bd38bd1fffaeed91e3792184 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Fri, 26 Jan 2024 11:27:34 +0000 Subject: [PATCH 30/30] Apply rnaseq -> riboseq fixes from code review Co-authored-by: Maxime U Garcia --- modules/local/gtf_filter/main.nf | 2 +- nextflow_schema.json | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/local/gtf_filter/main.nf b/modules/local/gtf_filter/main.nf index d14e8ff..b61d9cd 100644 --- a/modules/local/gtf_filter/main.nf +++ b/modules/local/gtf_filter/main.nf @@ -17,7 +17,7 @@ process GTF_FILTER { when: task.ext.when == null || task.ext.when - script: // filter_gtf.py is bundled with the pipeline, in nf-core/rnaseq/bin/ + script: // filter_gtf.py is bundled with the pipeline, in nf-core/riboseq/bin/ """ filter_gtf.py \\ --gtf $gtf \\ diff --git a/nextflow_schema.json b/nextflow_schema.json index f696c76..800ba5e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,8 +1,8 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/nf-core/rnaseq/master/nextflow_schema.json", - "title": "nf-core/rnaseq pipeline parameters", - "description": "RNA sequencing analysis pipeline for gene/isoform quantification and extensive quality control.", + "$id": "https://raw.githubusercontent.com/nf-core/riboseq/master/nextflow_schema.json", + "title": "nf-core/riboseq pipeline parameters", + "description": "Analysis of ribosome profiling, or Ribo-Seq (also named ribosome footprinting)", "type": "object", "definitions": { "input_output_options": { @@ -20,7 +20,7 @@ "pattern": "^\\S+\\.csv$", "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row. See [usage docs](https://nf-co.re/rnaseq/usage#samplesheet-input).", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/riboseq/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" }, "outdir": {
    Process Name \\", + " \\ Software Version
    CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
    yaml5.4.1
    TOOL1tool10.11.9
    TOOL2tool21.9
    WorkflowNextflow
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls