bigbio · daichengxin · Mar 24, 2024
diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json
@@ -54,8 +54,7 @@
                             "body": [
                                 {
                                     "type": "FactSet",
-                                    "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"
-                                        }.join(",\n") %>
+                                    "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %>
                                     ]
                                 }
                             ]

diff --git a/bin/mzml_statistics.py b/bin/mzml_statistics.py
@@ -7,12 +7,12 @@
 import sys
 from pathlib import Path
 import sqlite3
-import re
+
 import pandas as pd
 from pyopenms import MSExperiment, MzMLFile
 
 
-def ms_dataframe(ms_path: str, id_only: bool = False) -> None:
+def ms_dataframe(ms_path: str) -> None:
     file_columns = [
         "SpectrumID",
         "MSLevel",
@@ -25,9 +25,8 @@ def ms_dataframe(ms_path: str, id_only: bool = False) -> None:
         "AcquisitionDateTime",
     ]
 
-    def parse_mzml(file_name: str, file_columns: list, id_only: bool = False):
+    def parse_mzml(file_name: str, file_columns: list):
         info = []
-        psm_part_info = []
         exp = MSExperiment()
         acquisition_datetime = exp.getDateTime().get()
         MzMLFile().load(file_name, exp)
@@ -55,23 +54,11 @@ def parse_mzml(file_name: str, file_columns: list, id_only: bool = False):
                 charge_state = spectrum.getPrecursors()[0].getCharge()
                 emz = spectrum.getPrecursors()[0].getMZ() if spectrum.getPrecursors()[0].getMZ() else None
                 info_list = [id_, MSLevel, charge_state, peak_per_ms, bpc, tic, rt, emz, acquisition_datetime]
-                mz_array = peaks_tuple[0]
-                intensity_array = peaks_tuple[1]
             else:
                 info_list = [id_, MSLevel, None, None, None, None, rt, None, acquisition_datetime]
 
-            if id_only and MSLevel == 2:
-                psm_part_info.append([re.findall(r"[scan|spectrum]=(\d+)", id_)[0], MSLevel, mz_array, intensity_array])
             info.append(info_list)
 
-        if id_only and len(psm_part_info) > 0:
-            pd.DataFrame(psm_part_info, columns=["scan", "ms_level", "mz", "intensity"]).to_csv(
-                f"{Path(ms_path).stem}_spectrum_df.csv",
-                mode="w",
-                index=False,
-                header=True,
-            )
-
         return pd.DataFrame(info, columns=file_columns)
 
     def parse_bruker_d(file_name: str, file_columns: list):
@@ -152,7 +139,7 @@ def parse_bruker_d(file_name: str, file_columns: list):
     if Path(ms_path).suffix == ".d" and Path(ms_path).is_dir():
         ms_df = parse_bruker_d(ms_path, file_columns)
     elif Path(ms_path).suffix in [".mzML", ".mzml"]:
-        ms_df = parse_mzml(ms_path, file_columns, id_only)
+        ms_df = parse_mzml(ms_path, file_columns)
     else:
         msg = f"Unrecognized or inexistent mass spec file '{ms_path}'"
         raise RuntimeError(msg)
@@ -168,8 +155,7 @@ def parse_bruker_d(file_name: str, file_columns: list):
 
 def main():
     ms_path = sys.argv[1]
-    id_only = sys.argv[2]
-    ms_dataframe(ms_path, id_only)
+    ms_dataframe(ms_path)
 
 
 if __name__ == "__main__":

diff --git a/bin/psm_conversion.py b/bin/psm_conversion.py
diff --git a/conf/modules.config b/conf/modules.config
@@ -245,16 +245,6 @@ process {
         ]
     }
 
-    withName: '.*:DDA_ID:PSMFDRCONTROL:IDFILTER' {
-        ext.args    = "-score:pep \"$params.run_fdr_cutoff\""
-        ext.suffix  = '.idXML'
-        publishDir  = [
-            path: { "${params.outdir}/idfilter" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
-    }
-
     // PROTEOMICSLFQ
     withName: '.*:LFQ:PROTEOMICSLFQ' {
         ext.args    = "-debug $params.plfq_debug"

diff --git a/modules/local/extract_psm/main.nf b/modules/local/extract_psm/main.nf
diff --git a/modules/local/extract_psm/meta.yml b/modules/local/extract_psm/meta.yml
diff --git a/modules/local/mzmlstatistics/main.nf b/modules/local/mzmlstatistics/main.nf
@@ -15,7 +15,6 @@ process MZMLSTATISTICS {
 
     output:
     path "*_ms_info.tsv", emit: ms_statistics
-    tuple val(meta), path("*_spectrum_df.csv"), emit: spectrum_df
     path "versions.yml", emit: version
     path "*.log", emit: log
 
@@ -25,7 +24,6 @@ process MZMLSTATISTICS {
 
     """
     mzml_statistics.py "${ms_file}" \\
-        $params.id_only \\
         2>&1 | tee mzml_statistics.log
 
     cat <<-END_VERSIONS > versions.yml

diff --git a/modules/local/mzmlstatistics/meta.yml b/modules/local/mzmlstatistics/meta.yml
@@ -19,10 +19,6 @@ output:
       type: file
       description: mzMLs statistics file
       pattern: "*_mzml_info.tsv"
-  - spectrum_df:
-      type: file
-      description: spectrum data file
-      pattern: "_spectrum_df.csv"
   - version:
       type: file
       description: File containing software version

diff --git a/modules/local/openms/thirdparty/searchenginemsgf/main.nf b/modules/local/openms/thirdparty/searchenginemsgf/main.nf
@@ -49,11 +49,9 @@ process SEARCHENGINEMSGF {
     }
 
     num_enzyme_termini = ""
-    max_missed_cleavages = "-max_missed_cleavages ${params.allowed_missed_cleavages}"
     if (meta.enzyme == "unspecific cleavage")
     {
         num_enzyme_termini = "none"
-        max_missed_cleavages = ""
     }
     else if (params.num_enzyme_termini == "fully")
     {
@@ -77,7 +75,7 @@ process SEARCHENGINEMSGF {
         -max_precursor_charge $params.max_precursor_charge \\
         -min_peptide_length $params.min_peptide_length \\
         -max_peptide_length $params.max_peptide_length \\
-        ${max_missed_cleavages} \\
+        -max_missed_cleavages $params.allowed_missed_cleavages \\
         -isotope_error_range $params.isotope_error_range \\
         -enzyme "${enzyme}" \\
         -tryptic ${msgf_num_enzyme_termini} \\

diff --git a/nextflow.config b/nextflow.config
@@ -15,15 +15,13 @@ params {
     local_input_type   = 'mzML'
     database           = null
     acquisition_method = null
-    id_only            = false
 
     // Input options
     input                      = null
 
     // Tools flags
     posterior_probabilities  = 'percolator'
     add_decoys               = false
-    skip_rescoring           = false
     search_engines           = 'comet'
     sage_processes           = 1
     run_fdr_cutoff           = 0.10
@@ -108,9 +106,6 @@ params {
     // IDPEP flags
     outlier_handling = "none"
 
-    // DDA_ID flags
-    export_decoy_psm   = true
-
     // Percolator flags
     train_FDR                    = 0.05
     test_FDR                     = 0.05

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -58,18 +58,6 @@
                     "description": "Proteomics data acquisition method",
                     "enum": ["dda", "dia"],
                     "fa_icon": "far fa-list-ol"
-                },
-                "id_only": {
-                    "type": "boolean",
-                    "description": "Only perform identification subworkflow.",
-                    "fa_icon": "far fa-check-square",
-                    "help_text": "Only perform identification subworkflow for specific cases."
-                },
-                "export_decoy_psm": {
-                    "type": "boolean",
-                    "description": "Whether export PSM from decoy in final identification results",
-                    "fa_icon": "far fa-check-square",
-                    "help_text": "Whether export PSM from decoy in final identification results for dda_id subworkflow for specific cases."
                 }
             }
         },
@@ -428,12 +416,6 @@
             "description": "Choose between different rescoring/posterior probability calculation methods and set them up.",
             "default": "",
             "properties": {
-                "skip_rescoring": {
-                    "type": "boolean",
-                    "description": "Skip PSM rescoring steps for specific cases, such as studying pure search engine results and search engine ranks",
-                    "default": false,
-                    "fa_icon": "far fa-check-square"
-                },
                 "posterior_probabilities": {
                     "type": "string",
                     "description": "How to calculate posterior probabilities for PSMs:\n\n* 'percolator' = Re-score based on PSM-feature-based SVM and transform distance\n    to hyperplane for posteriors\n* 'fit_distributions' = Fit positive and negative distributions to scores\n    (similar to PeptideProphet)",