Skip to content

Commit

Permalink
Merge pull request #243 from czbiohub/olgabot/featurecounts-merge-memory
Browse files Browse the repository at this point in the history
Minimize memory footprint of `merge_featurecounts`
  • Loading branch information
apeltzer authored Jul 7, 2019
2 parents 3c72549 + 2be0b27 commit 034ee2a
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 5 deletions.
1 change: 0 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ dependencies:
- preseq=2.0.3
- deeptools=3.2.1
- gffread=0.11.4
- csvtk=0.18.2
- qualimap=2.2.2c
- rseqc=3.0.0
- subread=1.6.4
Expand Down
15 changes: 11 additions & 4 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -1042,10 +1042,13 @@ process featureCounts {
"""
}



/*
* STEP 10 - Merge featurecounts
*/
process merge_featureCounts {
label "mid_memory"
tag "${input_files[0].baseName - '.sorted'}"
publishDir "${params.outdir}/featureCounts", mode: 'copy'

Expand All @@ -1056,10 +1059,14 @@ process merge_featureCounts {
file 'merged_gene_counts.txt' into featurecounts_merged

script:
//if we only have 1 file, just use cat and pipe output to csvtk. Else join all files first, and then remove unwanted column names.
def merge = input_files instanceof Path ? 'cat' : 'csvtk join -t -f "Geneid,Start,Length,End,Chr,Strand,gene_name"'
// Redirection (the `<()`) for the win!
// Geneid in 1st column and gene_name in 7th
gene_ids = "<(tail -n +2 ${input_files[0]} | cut -f1,7 )"
counts = input_files.collect{filename ->
// Remove first line and take third column
"<(tail -n +2 ${filename} | sed 's:.sorted.bam::' | cut -f8)"}.join(" ")
"""
$merge $input_files | csvtk cut -t -f "-Start,-Chr,-End,-Length,-Strand" | sed 's/Aligned.sortedByCoord.out.markDups.bam//g' > merged_gene_counts.txt
paste $gene_ids $counts > merged_gene_counts.txt
"""
}

Expand Down Expand Up @@ -1122,7 +1129,7 @@ if (params.pseudo_aligner == 'salmon'){
}

process salmon_merge {
label 'low_memory'
label 'mid_memory'
publishDir "${params.outdir}/salmon", mode: 'copy'

input:
Expand Down

0 comments on commit 034ee2a

Please sign in to comment.