Skip to content

Commit

Permalink
Refactor processes, workflows, output block with static types
Browse files Browse the repository at this point in the history
Signed-off-by: Ben Sherman <[email protected]>
  • Loading branch information
bentsherman committed Nov 5, 2024
1 parent 1815dc2 commit 8fc8342
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 65 deletions.
79 changes: 55 additions & 24 deletions main.nf
Original file line number Diff line number Diff line change
@@ -1,39 +1,70 @@
#!/usr/bin/env nextflow
#!/usr/bin/env nextflow

/*
* Proof of concept of a RNAseq pipeline implemented with Nextflow
*/

nextflow.preview.types = true

/*
* Default pipeline parameters. They can be overriden on the command line eg.
* given `params.foo` specify on the run command line `--foo some_value`.
* Default pipeline parameters. They can be overridden on the command line, e.g.
* `params.reads` can be specified on the command line as `--reads some_value`.
*/

params.reads = "$baseDir/data/ggal/ggal_gut_{1,2}.fq"
params.transcriptome = "$baseDir/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"
params.outdir = "results"
params.multiqc = "$baseDir/multiqc"
params.reads = "${projectDir}/data/ggal/ggal_gut_{1,2}.fq"
params.transcriptome = "${projectDir}/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"
params.multiqc = "${projectDir}/multiqc"


// import modules
include { RNASEQ } from './modules/rnaseq'
include { FastqPair ; Sample } from './modules/rnaseq'
include { MULTIQC } from './modules/multiqc'

/*
* main script flow
*/
workflow {
main:
log.info """\
R N A S E Q - N F P I P E L I N E
===================================
transcriptome: ${params.transcriptome}
reads : ${params.reads}
outdir : ${workflow.outputDir}
""".stripIndent()

let (index, samples) = params.reads
|> Channel.fromFilePairs( checkIfExists: true ) // Channel<(String,List<Path>)>
|> map { (id, reads) ->
new FastqPair(id, reads[0], reads[1])
} // Channel<FastqPair>
|> RNASEQ( file(params.transcriptome) ) // NamedTuple(index: Path, samples: Channel<Sample>)

let summary = samples
|> flatMap { s -> [ s.fastqc, s.quant ] } // Channel<Path>
|> collect // Bag<Path> (future)
|> MULTIQC( file(params.multiqc) ) // Path (future)

workflow.onComplete {
log.info ( workflow.success
? "\nDone! Open the following report in your browser --> ${workflow.outputDir}/multiqc_report.html\n"
: "Oops .. something went wrong" )
}

publish:
index >> 'index'
samples >> 'samples'
summary >> 'summary'
}

output {
index: Path {
path '.'
}

samples: Sample {
path { sample -> sample.id }
index {
path 'samples.json'
}
}

log.info """\
R N A S E Q - N F P I P E L I N E
===================================
transcriptome: ${params.transcriptome}
reads : ${params.reads}
outdir : ${params.outdir}
"""

read_pairs_ch = channel.fromFilePairs( params.reads, checkIfExists: true )
RNASEQ( params.transcriptome, read_pairs_ch )
MULTIQC( RNASEQ.out, params.multiqc )
summary: Path {
path '.'
}
}
16 changes: 8 additions & 8 deletions modules/fastqc/main.nf
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
params.outdir = 'results'

process FASTQC {
tag "FASTQC on $sample_id"
tag "FASTQC on $id"
conda 'bioconda::fastqc=0.12.1'
publishDir params.outdir, mode:'copy'

input:
tuple val(sample_id), path(reads)

output:
path "fastqc_${sample_id}_logs", emit: logs
id : String
fastq_1 : Path
fastq_2 : Path

script:
"""
fastqc.sh "$sample_id" "$reads"
fastqc.sh $id "$fastq_1 $fastq_2"
"""

output:
file("fastqc_${id}_logs")
}
10 changes: 5 additions & 5 deletions modules/index/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
process INDEX {
tag "$transcriptome.simpleName"
conda 'bioconda::salmon=1.10.3'

input:
path transcriptome

output:
path 'index'
input:
transcriptome : Path

script:
"""
salmon index --threads $task.cpus -t $transcriptome -i index
"""

output:
file('index')
}
12 changes: 5 additions & 7 deletions modules/multiqc/main.nf
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
params.outdir = 'results'

process MULTIQC {
conda 'bioconda::multiqc=1.25'
publishDir params.outdir, mode:'copy'

input:
path '*'
path config

output:
path 'multiqc_report.html', emit: report
inputs : Bag<Path>
config : Path

script:
"""
cp $config/* .
echo "custom_logo: \$PWD/logo.png" >> multiqc_config.yaml
multiqc -o multiqc_report.html .
"""

output:
file('multiqc_report.html')
}
22 changes: 15 additions & 7 deletions modules/quant/main.nf
Original file line number Diff line number Diff line change
@@ -1,17 +1,25 @@

process QUANT {
tag "$pair_id"
tag "$id"
conda 'bioconda::salmon=1.10.3'

input:
path index
tuple val(pair_id), path(reads)

output:
path pair_id
index : Path
id : String
fastq_1 : Path
fastq_2 : Path

script:
"""
salmon quant --threads $task.cpus --libType=U -i $index -1 ${reads[0]} -2 ${reads[1]} -o $pair_id
salmon quant \
--threads $task.cpus \
--libType=U \
-i $index \
-1 ${fastq_1} \
-2 ${fastq_2} \
-o quant_${id}
"""

output:
file("quant_${id}")
}
38 changes: 29 additions & 9 deletions modules/rnaseq.nf
Original file line number Diff line number Diff line change
@@ -1,19 +1,39 @@
params.outdir = 'results'

include { INDEX } from './index'
include { QUANT } from './quant'
include { FASTQC } from './fastqc'

workflow RNASEQ {
take:
transcriptome
read_pairs_ch
pairs : Channel<FastqPair>
transcriptome : Path

main:
INDEX(transcriptome)
FASTQC(read_pairs_ch)
QUANT(INDEX.out, read_pairs_ch)
transcriptome // Path
|> INDEX // Path (future)
|> set { index } // Path (future)

pairs // Channel<FastqPair>
|> map { pair ->
let (id, fastq_1, fastq_2) = (pair.id, pair.fastq_1, pair.fastq_2)
let fastqc = FASTQC(id, fastq_1, fastq_2)
let quant = QUANT(index, id, fastq_1, fastq_2)
new Sample(id, fastqc, quant)
} // Channel<Sample>
|> set { samples } // Channel<Sample>

emit:
index : Path
samples : Channel<Sample>
}

record FastqPair {
id : String
fastq_1 : Path
fastq_2 : Path
}

emit:
QUANT.out | concat(FASTQC.out) | collect
record Sample {
id : String
fastqc : Path
quant : Path
}
12 changes: 7 additions & 5 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@ manifest {
}

/*
* default params
* config params
*/
params.outdir = 'results'

params.outdir = "results"
params.reads = "${projectDir}/data/ggal/ggal_gut_{1,2}.fq"
params.transcriptome = "${projectDir}/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"
params.multiqc = "${projectDir}/multiqc"
/*
* configure outputs publishing
*/
outputDir = params.outdir
workflow.output.mode = 'copy'

/*
* defines execution profiles for different environments
Expand Down

0 comments on commit 8fc8342

Please sign in to comment.