Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add name of each ID step #176

Merged
merged 17 commits into from
May 6, 2022
4 changes: 4 additions & 0 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,7 @@ export_plots: true
custom_logo: "./nf-core-quantms_logo_light.png"
custom_logo_url: "https://github.com/bigbio/quantms"
custom_logo_title: "quantms"

sp:
quantms/exp_design:
fn: "*_design.tsv"
7 changes: 6 additions & 1 deletion bin/msstats_tmt.R
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,11 @@ if(typeof(reference_norm) == 'character'){
reference_norm <- char_to_boolean[reference_norm]
}

if (length(args)<12) {
# outputPrefix
args[12] <- './msstatsiso'
}

csv_input <- args[1]
contrast_str <- args[2]
control_str <- args[3]
Expand Down Expand Up @@ -241,5 +246,5 @@ if (l == 1) {
test.MSstatsTMT <- groupComparisonTMT(contrast.matrix=contrast_mat, data=processed.quant)

#TODO allow manual input (e.g. proteins of interest)
write.table(test.MSstatsTMT$ComparisonResult, file=paste0("msstatsiso_results.csv"), quote=FALSE, sep='\t', row.names = FALSE)
write.table(test.MSstatsTMT$ComparisonResult, file=paste0(args[12],"_comparisons.csv"), quote=FALSE, sep='\t', row.names = FALSE)
}
6 changes: 4 additions & 2 deletions modules/local/msstats/main.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
process MSSTATS {
tag "$msstats_csv_input.Name"
label 'process_medium'

conda (params.enable_conda ? "bioconda::bioconductor-msstats=4.2.0" : null)
Expand All @@ -9,7 +10,7 @@ process MSSTATS {
}

input:
path out_msstats
path msstats_csv_input

output:
// The generation of the PDFs from MSstats are very unstable, especially with auto-contrasts.
Expand All @@ -25,13 +26,14 @@ process MSSTATS {

"""
msstats_plfq.R \\
${out_msstats} \\
${msstats_csv_input} \\
${params.contrasts} \\
"${ref_con}" \\
${params.msstats_remove_one_feat_prot} \\
${params.msstatslfq_removeFewMeasurements} \\
${params.msstatslfq_feature_subset_protein} \\
${params.msstatslfq_quant_summary_method} \\
${msstats_csv_input.baseName} \\
$args \\
> msstats.log \\
|| echo "Optional MSstats step failed. Please check logs and re-run or do a manual statistical analysis."
Expand Down
6 changes: 4 additions & 2 deletions modules/local/msstatstmt/main.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
process MSSTATSTMT {
tag "$msstatstmt_csv_input.Name"
label 'process_medium'

conda (params.enable_conda ? "bioconda::bioconductor-msstatstmt=2.2.0" : null)
Expand All @@ -9,7 +10,7 @@ process MSSTATSTMT {
}

input:
path out_msstats_tmt
path msstatstmt_csv_input

output:
// The generation of the PDFs from MSstatsTMT are very unstable, especially with auto-contrasts.
Expand All @@ -25,7 +26,7 @@ process MSSTATSTMT {

"""
msstats_tmt.R \\
${out_msstats_tmt} \\
${msstatstmt_csv_input} \\
${params.contrasts} \\
"${ref_con}" \\
${params.msstats_remove_one_feat_prot} \\
Expand All @@ -36,6 +37,7 @@ process MSSTATSTMT {
${params.msstatsiso_global_norm} \\
${params.msstatsiso_remove_norm_channel} \\
${params.msstatsiso_reference_normalization} \\
${msstatstmt_csv_input.baseName} \\
$args \\
> msstats_tmt.log \\
|| echo "Optional MSstatsTMT step failed. Please check logs and re-run or do a manual statistical analysis."
Expand Down
1 change: 1 addition & 0 deletions modules/local/openms/consensusid/main.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
process CONSENSUSID {
tag "$meta.id"
label 'process_medium'
// TODO could be easily parallelized
label 'process_single_thread'
Expand Down
1 change: 1 addition & 0 deletions modules/local/openms/extractpsmfeatures/main.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
process EXTRACTPSMFEATURES {
tag "$meta.id"
label 'process_very_low'
label 'process_single_thread'
label 'openms'
Expand Down
1 change: 1 addition & 0 deletions modules/local/openms/falsediscoveryrate/main.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
process FALSEDISCOVERYRATE {
tag "$meta.id"
label 'process_low'
label 'process_single_thread'
label 'openms'
Expand Down
2 changes: 1 addition & 1 deletion modules/local/openms/idfilter/main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process IDFILTER {

tag {task.ext.suffix == ".idXML" ? "$meta.id" : "$id_file.baseName"}
label 'process_very_low'
label 'process_single_thread'
label 'openms'
Expand Down
1 change: 1 addition & 0 deletions modules/local/openms/idpep/main.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
process IDPEP {
tag "$meta.id"
label 'process_very_low'

conda (params.enable_conda ? "bioconda::openms=2.8.0" : null)
Expand Down
1 change: 1 addition & 0 deletions modules/local/openms/indexpeptides/main.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
process INDEXPEPTIDES {
tag "$meta.id"
label 'process_low'

conda (params.enable_conda ? "bioconda::openms=2.8.0" : null)
Expand Down
3 changes: 2 additions & 1 deletion modules/local/openms/msstatsconverter/main.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
process MSSTATSCONVERTER {
tag "$exp_file.Name"
label 'process_low'

conda (params.enable_conda ? "bioconda::openms=2.8.0" : null)
Expand All @@ -24,7 +25,7 @@ process MSSTATSCONVERTER {
-in ${consensusXML} \\
-in_design ${exp_file} \\
-method ${quant_method} \\
-out out_msstats.csv \\
-out ${exp_file.baseName}_out_msstats.csv \\
$args \\
|& tee MSstatsConverter.log

Expand Down
17 changes: 9 additions & 8 deletions modules/local/openms/proteomicslfq/main.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
process PROTEOMICSLFQ {
tag "${expdes.baseName - ~/_design$/}"
label 'process_high'

conda (params.enable_conda ? "bioconda::openms=2.8.0" : null)
Expand All @@ -13,10 +14,10 @@ process PROTEOMICSLFQ {
path(fasta)

output:
path "out.mzTab", emit: out_mztab
path "out.consensusXML", emit: out_consensusXML
path "out_msstats.csv", emit: out_msstats optional true
path "out_triqler.tsv", emit: out_triqler optional true
path "${expdes.baseName - ~/_design$/}.mzTab", emit: out_mztab
path "${expdes.baseName - ~/_design$/}.consensusXML", emit: out_consensusXML
path "*out_msstats.csv", emit: out_msstats optional true
path "*out_triqler.tsv", emit: out_triqler optional true
path "debug_mergedIDs.idXML", emit: debug_mergedIDs optional true
path "debug_mergedIDs_inference.idXML", emit: debug_mergedIDs_inference optional true
path "debug_mergedIDsGreedyResolved.idXML", emit: debug_mergedIDsGreedyResolved optional true
Expand All @@ -28,8 +29,8 @@ process PROTEOMICSLFQ {

script:
def args = task.ext.args ?: ''
def msstats_present = params.quantification_method == "feature_intensity" ? '-out_msstats out_msstats.csv' : ''
def triqler_present = (params.quantification_method == "feature_intensity") && (params.add_triqler_output) ? '-out_triqler out_triqler.tsv' : ''
def msstats_present = params.quantification_method == "feature_intensity" ? "-out_msstats ${expdes.baseName - ~/_design$/}_msstats_in.csv" : ""
def triqler_present = (params.quantification_method == "feature_intensity") && (params.add_triqler_output) ? "-out_triqler ${expdes.baseName - ~/_design$/}_triqler_in.tsv" : ""
def decoys_present = (params.quantify_decoys || ((params.quantification_method == "feature_intensity") && params.add_triqler_output)) ? '-PeptideQuantification:quantify_decoys' : ''

"""
Expand All @@ -46,12 +47,12 @@ process PROTEOMICSLFQ {
-protein_quantification ${params.protein_quant} \\
-alignment_order ${params.alignment_order} \\
-picked_proteinFDR true \\
-out out.mzTab \\
-out ${expdes.baseName - ~/_design$/}.mzTab \\
-threads ${task.cpus} \\
${msstats_present} \\
${triqler_present} \\
${decoys_present} \\
-out_cxml out.consensusXML \\
-out_cxml ${expdes.baseName - ~/_design$/}.consensusXML \\
-proteinFDR ${params.protein_level_fdr_cutoff} \\
$args \\
|& tee proteomicslfq.log
Expand Down
1 change: 1 addition & 0 deletions modules/local/openms/thirdparty/percolator/main.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
process PERCOLATOR {
tag "$meta.id"
label 'process_medium'

conda (params.enable_conda ? "bioconda::openms-thirdparty=2.8.0" : null)
Expand Down
6 changes: 3 additions & 3 deletions modules/local/pmultiqc/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ process PMULTIQC {

output:
path "*.html", emit: ch_pmultiqc_report
path "*.db", optional:true, emit: ch_pmultiqc_db
path "*.db", optional: true, emit: ch_pmultiqc_db
path "versions.yml", emit: versions
path "*_data", emit: data
path "*_plots", optional:true, emit: plots
path "*_plots", optional: true, emit: plots

script:
def args = task.ext.args ?: ''
def disable_pmultqic = params.enable_pmultiqc ? "": "--disable_plugin"
def disable_pmultqic = params.enable_pmultiqc ? "" : "--disable_plugin"

"""
multiqc \\
Expand Down
9 changes: 5 additions & 4 deletions modules/local/preprocess_expdesign.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,25 @@
process PREPROCESS_EXPDESIGN {
label 'process_very_low'
label 'process_single_thread'
tag "$design.Name"

container "frolvlad/alpine-bash"

input:
path design

output:
path "experimental_design.tsv", emit: ch_expdesign
path "config.tsv", emit: ch_config
path "${design.baseName}_design.tsv", emit: ch_expdesign
path "${design.baseName}_config.tsv", emit: ch_config

script:

"""
# since we know that we will need to convert from raw to mzML for all tools that need the design (i.e., OpenMS tools)
# we edit the design here and change the endings.
sed 's/.raw\\t/.mzML\\t/I' $design > experimental_design.tsv
sed 's/.raw\\t/.mzML\\t/I' ${design} > ${design.baseName}_design.tsv

# here we extract the filenames and fake an empty config (since the config values will be deduced from the workflow params)
a=\$(grep -n '^\$' $design | head -n1| awk -F":" '{print \$1}'); sed -e ''"\${a}"',\$d' $design > config.tsv
a=\$(grep -n '^\$' ${design} | head -n1| awk -F":" '{print \$1}'); sed -e ''"\${a}"',\$d' ${design} > ${design.baseName}_config.tsv
"""
}
11 changes: 7 additions & 4 deletions modules/local/sdrfparsing/main.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
process SDRFPARSING {
tag "$sdrf.Name"
label 'process_low'

conda (params.enable_conda ? "conda-forge::pandas_schema bioconda::sdrf-pipelines=0.0.21" : null)
Expand All @@ -10,9 +11,9 @@ process SDRFPARSING {
path sdrf

output:
path "experimental_design.tsv", optional:true, emit: ch_expdesign
path "openms.tsv", optional:true, emit: ch_sdrf_config_file
path "*.xml", optional:true, emit: mqpar
path "${sdrf.baseName}_design.tsv", optional: true, emit: ch_expdesign
path "${sdrf.baseName}_config.tsv", optional: true, emit: ch_sdrf_config_file
path "*.xml", optional: true, emit: mqpar
path "*.log", emit: log
path "versions.yml", emit: version

Expand All @@ -24,7 +25,9 @@ process SDRFPARSING {
## -l for legacy behavior to always add sample columns
## TODO Update the sdrf-pipelines to dynamic print versions

parse_sdrf convert-openms -t2 -l -s ${sdrf} |& tee sdrf_parsing.log
parse_sdrf convert-openms -t2 -l -s ${sdrf} |& tee ${sdrf.baseName}_parsing.log
mv openms.tsv ${sdrf.baseName}_config.tsv
mv experimental_design.tsv ${sdrf.baseName}_design.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down