Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "first PR about identification subworkflow" #361

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions assets/adaptivecard.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@
"body": [
{
"type": "FactSet",
"facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"
}.join(",\n") %>
"facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %>
]
}
]
Expand Down
24 changes: 5 additions & 19 deletions bin/mzml_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
import sys
from pathlib import Path
import sqlite3
import re

import pandas as pd
from pyopenms import MSExperiment, MzMLFile


def ms_dataframe(ms_path: str, id_only: bool = False) -> None:
def ms_dataframe(ms_path: str) -> None:
file_columns = [
"SpectrumID",
"MSLevel",
Expand All @@ -25,9 +25,8 @@ def ms_dataframe(ms_path: str, id_only: bool = False) -> None:
"AcquisitionDateTime",
]

def parse_mzml(file_name: str, file_columns: list, id_only: bool = False):
def parse_mzml(file_name: str, file_columns: list):
info = []
psm_part_info = []
exp = MSExperiment()
acquisition_datetime = exp.getDateTime().get()
MzMLFile().load(file_name, exp)
Expand Down Expand Up @@ -55,23 +54,11 @@ def parse_mzml(file_name: str, file_columns: list, id_only: bool = False):
charge_state = spectrum.getPrecursors()[0].getCharge()
emz = spectrum.getPrecursors()[0].getMZ() if spectrum.getPrecursors()[0].getMZ() else None
info_list = [id_, MSLevel, charge_state, peak_per_ms, bpc, tic, rt, emz, acquisition_datetime]
mz_array = peaks_tuple[0]
intensity_array = peaks_tuple[1]
else:
info_list = [id_, MSLevel, None, None, None, None, rt, None, acquisition_datetime]

if id_only and MSLevel == 2:
psm_part_info.append([re.findall(r"[scan|spectrum]=(\d+)", id_)[0], MSLevel, mz_array, intensity_array])
info.append(info_list)

if id_only and len(psm_part_info) > 0:
pd.DataFrame(psm_part_info, columns=["scan", "ms_level", "mz", "intensity"]).to_csv(
f"{Path(ms_path).stem}_spectrum_df.csv",
mode="w",
index=False,
header=True,
)

return pd.DataFrame(info, columns=file_columns)

def parse_bruker_d(file_name: str, file_columns: list):
Expand Down Expand Up @@ -152,7 +139,7 @@ def parse_bruker_d(file_name: str, file_columns: list):
if Path(ms_path).suffix == ".d" and Path(ms_path).is_dir():
ms_df = parse_bruker_d(ms_path, file_columns)
elif Path(ms_path).suffix in [".mzML", ".mzml"]:
ms_df = parse_mzml(ms_path, file_columns, id_only)
ms_df = parse_mzml(ms_path, file_columns)
else:
msg = f"Unrecognized or inexistent mass spec file '{ms_path}'"
raise RuntimeError(msg)
Expand All @@ -168,8 +155,7 @@ def parse_bruker_d(file_name: str, file_columns: list):

def main():
ms_path = sys.argv[1]
id_only = sys.argv[2]
ms_dataframe(ms_path, id_only)
ms_dataframe(ms_path)


if __name__ == "__main__":
Expand Down
117 changes: 0 additions & 117 deletions bin/psm_conversion.py

This file was deleted.

10 changes: 0 additions & 10 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -245,16 +245,6 @@ process {
]
}

withName: '.*:DDA_ID:PSMFDRCONTROL:IDFILTER' {
ext.args = "-score:pep \"$params.run_fdr_cutoff\""
ext.suffix = '.idXML'
publishDir = [
path: { "${params.outdir}/idfilter" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

// PROTEOMICSLFQ
withName: '.*:LFQ:PROTEOMICSLFQ' {
ext.args = "-debug $params.plfq_debug"
Expand Down
36 changes: 0 additions & 36 deletions modules/local/extract_psm/main.nf

This file was deleted.

34 changes: 0 additions & 34 deletions modules/local/extract_psm/meta.yml

This file was deleted.

2 changes: 0 additions & 2 deletions modules/local/mzmlstatistics/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ process MZMLSTATISTICS {

output:
path "*_ms_info.tsv", emit: ms_statistics
tuple val(meta), path("*_spectrum_df.csv"), emit: spectrum_df
path "versions.yml", emit: version
path "*.log", emit: log

Expand All @@ -25,7 +24,6 @@ process MZMLSTATISTICS {

"""
mzml_statistics.py "${ms_file}" \\
$params.id_only \\
2>&1 | tee mzml_statistics.log

cat <<-END_VERSIONS > versions.yml
Expand Down
4 changes: 0 additions & 4 deletions modules/local/mzmlstatistics/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@ output:
type: file
description: mzMLs statistics file
pattern: "*_mzml_info.tsv"
- spectrum_df:
type: file
description: spectrum data file
pattern: "_spectrum_df.csv"
- version:
type: file
description: File containing software version
Expand Down
4 changes: 1 addition & 3 deletions modules/local/openms/thirdparty/searchenginemsgf/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,9 @@ process SEARCHENGINEMSGF {
}

num_enzyme_termini = ""
max_missed_cleavages = "-max_missed_cleavages ${params.allowed_missed_cleavages}"
if (meta.enzyme == "unspecific cleavage")
{
num_enzyme_termini = "none"
max_missed_cleavages = ""
}
else if (params.num_enzyme_termini == "fully")
{
Expand All @@ -77,7 +75,7 @@ process SEARCHENGINEMSGF {
-max_precursor_charge $params.max_precursor_charge \\
-min_peptide_length $params.min_peptide_length \\
-max_peptide_length $params.max_peptide_length \\
${max_missed_cleavages} \\
-max_missed_cleavages $params.allowed_missed_cleavages \\
-isotope_error_range $params.isotope_error_range \\
-enzyme "${enzyme}" \\
-tryptic ${msgf_num_enzyme_termini} \\
Expand Down
5 changes: 0 additions & 5 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,13 @@ params {
local_input_type = 'mzML'
database = null
acquisition_method = null
id_only = false

// Input options
input = null

// Tools flags
posterior_probabilities = 'percolator'
add_decoys = false
skip_rescoring = false
search_engines = 'comet'
sage_processes = 1
run_fdr_cutoff = 0.10
Expand Down Expand Up @@ -108,9 +106,6 @@ params {
// IDPEP flags
outlier_handling = "none"

// DDA_ID flags
export_decoy_psm = true

// Percolator flags
train_FDR = 0.05
test_FDR = 0.05
Expand Down
18 changes: 0 additions & 18 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -58,18 +58,6 @@
"description": "Proteomics data acquisition method",
"enum": ["dda", "dia"],
"fa_icon": "far fa-list-ol"
},
"id_only": {
"type": "boolean",
"description": "Only perform identification subworkflow.",
"fa_icon": "far fa-check-square",
"help_text": "Only perform identification subworkflow for specific cases."
},
"export_decoy_psm": {
"type": "boolean",
"description": "Whether export PSM from decoy in final identification results",
"fa_icon": "far fa-check-square",
"help_text": "Whether export PSM from decoy in final identification results for dda_id subworkflow for specific cases."
}
}
},
Expand Down Expand Up @@ -428,12 +416,6 @@
"description": "Choose between different rescoring/posterior probability calculation methods and set them up.",
"default": "",
"properties": {
"skip_rescoring": {
"type": "boolean",
"description": "Skip PSM rescoring steps for specific cases, such as studying pure search engine results and search engine ranks",
"default": false,
"fa_icon": "far fa-check-square"
},
"posterior_probabilities": {
"type": "string",
"description": "How to calculate posterior probabilities for PSMs:\n\n* 'percolator' = Re-score based on PSM-feature-based SVM and transform distance\n to hyperplane for posteriors\n* 'fit_distributions' = Fit positive and negative distributions to scores\n (similar to PeptideProphet)",
Expand Down
Loading
Loading