From a436649a65aafdf61113fa399dc1f39cd70e781b Mon Sep 17 00:00:00 2001 From: William Fondrie Date: Fri, 12 Jul 2024 15:07:45 -0700 Subject: [PATCH 01/13] Add option to disable ontologies --- bin/check_samplesheet.py | 27 +++++++++++++++++---------- modules/local/samplesheet_check.nf | 3 ++- nextflow.config | 1 + nextflow_schema.json | 6 ++++++ subworkflows/local/input_check.nf | 2 +- 5 files changed, 27 insertions(+), 12 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index dd125b4e..384d90f7 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -20,6 +20,7 @@ def parse_args(args=None): parser = argparse.ArgumentParser(description=Description, epilog=Epilog) parser.add_argument("SDRF", help="SDRF/Expdesign file to be validated") parser.add_argument("ISSDRF", help="SDRF file or Expdesign file") + parser.add_argument("VALIDATE_ONTOLOGIES", help="Validate ontology terms.") parser.add_argument("--CHECK_MS", help="check mass spectrometry fields in SDRF.", action="store_true") return parser.parse_args(args) @@ -44,20 +45,26 @@ def print_error(error, context="Line", context_str=""): sys.exit(1) -def check_sdrf(check_ms, sdrf): +def check_sdrf(check_ms, sdrf, validate_ontologies): df = SdrfDataFrame.parse(sdrf) - errors = df.validate(DEFAULT_TEMPLATE) - if check_ms: - errors = errors + df.validate(MASS_SPECTROMETRY) - for error in errors: - print(error) - if not errors: - print("Everying seems to be fine. Well done.") + if validate_ontologies: + errors = df.validate(DEFAULT_TEMPLATE) + if check_ms: + errors = errors + df.validate(MASS_SPECTROMETRY) + for error in errors: + print(error) + if not errors: + print("Everying seems to be fine. Well done.") + else: + print("There were validation errors!") else: - print("There were validation errors!") + errors = False + print("No ontology term validation was performed.") + sys.exit(bool(errors)) + def check_expdesign(expdesign): data = pd.read_csv(expdesign, sep="\t", header=0, dtype=str) data = data.dropna() @@ -117,7 +124,7 @@ def main(args=None): args = parse_args(args) if args.ISSDRF == "true": - check_sdrf(args.CHECK_MS, args.SDRF) + check_sdrf(args.CHECK_MS, args.SDRF, args.VALIDATE_ONTOLOGIES) else: check_expdesign(args.SDRF) diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index 67d6a5f4..7015c002 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -13,6 +13,7 @@ process SAMPLESHEET_CHECK { input: path input_file val is_sdrf + val validate_ontologies output: path "*.log", emit: log @@ -27,7 +28,7 @@ process SAMPLESHEET_CHECK { def args = task.ext.args ?: '' """ - check_samplesheet.py "${input_file}" ${is_sdrf} --CHECK_MS 2>&1 | tee input_check.log + check_samplesheet.py "${input_file}" ${is_sdrf} ${validate_ontologies} --CHECK_MS 2>&1 | tee input_check.log cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index 3aa01d83..a48ce8a7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -19,6 +19,7 @@ params { // Input options input = null + validate_ontologies = true // Tools flags posterior_probabilities = 'percolator' diff --git a/nextflow_schema.json b/nextflow_schema.json index 8d1244a8..982402f3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -70,6 +70,12 @@ "description": "Whether export PSM from decoy in final identification results", "fa_icon": "far fa-check-square", "help_text": "Whether export PSM from decoy in final identification results for dda_id subworkflow for specific cases." + }, + "validate_ontologies": { + "type": "boolean", + "description": "Check that ontology terms in an input SDRF file exist.", + "fa_icon": "far fa-check-square", + "help_text": "If false, only a basic readability check is performed on an input SDRF file. This option is useful when ontology providers are inaccessible." } } }, diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 002bf9cd..fc9c4543 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -18,7 +18,7 @@ workflow INPUT_CHECK { exit 1 } } - SAMPLESHEET_CHECK ( input_file, is_sdrf ) + SAMPLESHEET_CHECK ( input_file, is_sdrf, params.validate_ontologies ) emit: ch_input_file = SAMPLESHEET_CHECK.out.checked_file From bbc06b1fb72762f0f66f3d1e85a93b7c4d9fc2fa Mon Sep 17 00:00:00 2001 From: William Fondrie Date: Fri, 12 Jul 2024 15:54:03 -0700 Subject: [PATCH 02/13] Fix boolean --- bin/check_samplesheet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 384d90f7..3e2d41a1 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -124,7 +124,7 @@ def main(args=None): args = parse_args(args) if args.ISSDRF == "true": - check_sdrf(args.CHECK_MS, args.SDRF, args.VALIDATE_ONTOLOGIES) + check_sdrf(args.CHECK_MS, args.SDRF, args.VALIDATE_ONTOLOGIES == "true") else: check_expdesign(args.SDRF) From d609e5657e2fbb1c1dd8b1a2a5c7694a22b9cd0d Mon Sep 17 00:00:00 2001 From: William Fondrie Date: Fri, 12 Jul 2024 16:25:37 -0700 Subject: [PATCH 03/13] bumped changelog --- CHANGELOG.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 318d735d..da830459 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,22 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] nfcore/quantms + +### `Added` + +- [#386](https://github.com/bigbio/quantms/pull/386) Make validation of ontology terms optional + +### `Changed` + +### `Fixed` + +### `Dependencies` + +### `Parameters` + +- `validate_ontologies`: enable or disable validating ontologies in the input SDRF file. + ## [1.3.0] nfcore/quantms - [08/04/2024] - Santiago de Cuba ### `Added` From 0e3e2fbaa9ef3063638443b8a7e1c14d3d923609 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Tue, 16 Jul 2024 14:15:21 +0800 Subject: [PATCH 04/13] Update main.nf --- modules/local/ms2rescore/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/ms2rescore/main.nf b/modules/local/ms2rescore/main.nf index 31640fe4..c9059589 100644 --- a/modules/local/ms2rescore/main.nf +++ b/modules/local/ms2rescore/main.nf @@ -2,7 +2,7 @@ process MS2RESCORE { tag "$meta.mzml_id" label 'process_high' - conda "bioconda::ms2rescore=3.0.3 bioconda::psm-utils=0.8.0 conda-forge::pydantic=1.10" + conda "bioconda::ms2rescore=3.0.3 bioconda::psm-utils=0.8.0 conda-forge::pydantic=1.10 pygam=0.9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ms2rescore:3.0.3--pyhdfd78af_0': 'biocontainers/ms2rescore:3.0.3--pyhdfd78af_0' }" From b8231a7cc269937073820908914d34ca0c377356 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Tue, 16 Jul 2024 14:46:11 +0800 Subject: [PATCH 05/13] Update main.nf --- modules/local/ms2rescore/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/ms2rescore/main.nf b/modules/local/ms2rescore/main.nf index c9059589..b0ca1dac 100644 --- a/modules/local/ms2rescore/main.nf +++ b/modules/local/ms2rescore/main.nf @@ -2,7 +2,7 @@ process MS2RESCORE { tag "$meta.mzml_id" label 'process_high' - conda "bioconda::ms2rescore=3.0.3 bioconda::psm-utils=0.8.0 conda-forge::pydantic=1.10 pygam=0.9.1" + conda "bioconda::ms2rescore=3.0.3 bioconda::psm-utils=0.8.0 conda-forge::pydantic=1.10 pygam=0.9.1 bioconda::deeplc=2.2.27" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ms2rescore:3.0.3--pyhdfd78af_0': 'biocontainers/ms2rescore:3.0.3--pyhdfd78af_0' }" From 3c49d89f8112a363a25c4501c79fa96ce724e83f Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Tue, 16 Jul 2024 15:13:53 +0800 Subject: [PATCH 06/13] Update main.nf --- modules/local/ms2rescore/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/ms2rescore/main.nf b/modules/local/ms2rescore/main.nf index b0ca1dac..41e745cf 100644 --- a/modules/local/ms2rescore/main.nf +++ b/modules/local/ms2rescore/main.nf @@ -2,7 +2,7 @@ process MS2RESCORE { tag "$meta.mzml_id" label 'process_high' - conda "bioconda::ms2rescore=3.0.3 bioconda::psm-utils=0.8.0 conda-forge::pydantic=1.10 pygam=0.9.1 bioconda::deeplc=2.2.27" + conda "bioconda::ms2rescore=3.0.3 bioconda::psm-utils=0.8.2 conda-forge::pydantic=1.10.14 pygam=0.9.1 bioconda::deeplc=2.2.27 bioconda::ms2pip=4.0.0.dev8 bioconda::deeplcretrainer=0.2.11" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ms2rescore:3.0.3--pyhdfd78af_0': 'biocontainers/ms2rescore:3.0.3--pyhdfd78af_0' }" From 1246727f2928070abdb5f89b34c304b6fd413250 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Tue, 16 Jul 2024 16:11:26 +0800 Subject: [PATCH 07/13] Update main.nf --- modules/local/ms2rescore/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/ms2rescore/main.nf b/modules/local/ms2rescore/main.nf index 41e745cf..473a2e63 100644 --- a/modules/local/ms2rescore/main.nf +++ b/modules/local/ms2rescore/main.nf @@ -2,7 +2,7 @@ process MS2RESCORE { tag "$meta.mzml_id" label 'process_high' - conda "bioconda::ms2rescore=3.0.3 bioconda::psm-utils=0.8.2 conda-forge::pydantic=1.10.14 pygam=0.9.1 bioconda::deeplc=2.2.27 bioconda::ms2pip=4.0.0.dev8 bioconda::deeplcretrainer=0.2.11" + conda "bioconda::ms2rescore=3.0.3 bioconda::psm-utils=0.8.2 conda-forge::pydantic=1.10.14 pygam=0.9.1 bioconda::deeplc=2.2.27 bioconda::ms2pip=4.0.0.dev8 bioconda::deeplcretrainer=0.2.11 conda-forge::scikit-learn=1.4.2 conda-forge::scipy=1.13.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ms2rescore:3.0.3--pyhdfd78af_0': 'biocontainers/ms2rescore:3.0.3--pyhdfd78af_0' }" From d34ee73cde795babd499b83263ee584b6c564e48 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Tue, 16 Jul 2024 16:56:30 +0800 Subject: [PATCH 08/13] Update psm_conversion.py --- bin/psm_conversion.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/psm_conversion.py b/bin/psm_conversion.py index 767859a0..5abd8e21 100755 --- a/bin/psm_conversion.py +++ b/bin/psm_conversion.py @@ -17,6 +17,8 @@ def mods_position(peptide): + if peptide.startswith("."): + peptide = peptide[1:] pattern = re.compile(r"\((.*?)\)") original_mods = pattern.findall(peptide) peptide = re.sub(r"\(.*?\)", ".", peptide) From dce32154cd718e98f2313db999502c18f1cf730b Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Wed, 17 Jul 2024 13:44:22 +0100 Subject: [PATCH 09/13] sdrf-pipelines 0.0.26 -> 0.0.27 --- modules/local/preprocess_expdesign.nf | 6 +++--- modules/local/samplesheet_check.nf | 6 +++--- modules/local/sdrfparsing/main.nf | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/local/preprocess_expdesign.nf b/modules/local/preprocess_expdesign.nf index dbe01a79..76ea14fe 100644 --- a/modules/local/preprocess_expdesign.nf +++ b/modules/local/preprocess_expdesign.nf @@ -6,11 +6,11 @@ process PREPROCESS_EXPDESIGN { tag "$design.Name" label 'process_low' - conda "bioconda::sdrf-pipelines=0.0.26" + conda "bioconda::sdrf-pipelines=0.0.27" if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.26--pyhdfd78af_0" + container "https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.27--pyhdfd78af_0" } else { - container "biocontainers/sdrf-pipelines:0.0.26--pyhdfd78af_0" + container "biocontainers/sdrf-pipelines:0.0.27--pyhdfd78af_0" } input: diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index 7015c002..9097151c 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -3,11 +3,11 @@ process SAMPLESHEET_CHECK { tag "$input_file" label 'process_single' - conda "bioconda::sdrf-pipelines=0.0.26" + conda "bioconda::sdrf-pipelines=0.0.27" if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.26--pyhdfd78af_0" + container "https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.27--pyhdfd78af_0" } else { - container "biocontainers/sdrf-pipelines:0.0.26--pyhdfd78af_0" + container "biocontainers/sdrf-pipelines:0.0.27--pyhdfd78af_0" } input: diff --git a/modules/local/sdrfparsing/main.nf b/modules/local/sdrfparsing/main.nf index 5236c5de..2a425c11 100644 --- a/modules/local/sdrfparsing/main.nf +++ b/modules/local/sdrfparsing/main.nf @@ -2,10 +2,10 @@ process SDRFPARSING { tag "$sdrf.Name" label 'process_low' - conda "conda-forge::pandas_schema bioconda::sdrf-pipelines=0.0.26" + conda "conda-forge::pandas_schema bioconda::sdrf-pipelines=0.0.27" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.26--pyhdfd78af_0' : - 'biocontainers/sdrf-pipelines:0.0.26--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.27--pyhdfd78af_0' : + 'biocontainers/sdrf-pipelines:0.0.27--pyhdfd78af_0' }" input: path sdrf From bdcaed1d9f687a89fe351238d1ad4b8d6708e0d2 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Wed, 17 Jul 2024 15:55:39 +0100 Subject: [PATCH 10/13] small changes in mamba --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a7289004..35546611 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -61,7 +61,8 @@ jobs: wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba echo "$(pwd)/bin" >> $GITHUB_PATH echo "$(pwd)/micromamba/bin" >> $GITHUB_PATH - ./bin/micromamba shell init -s bash -p ./micromamba + export MAMBA_ROOT_PREFIX="./micromamba" + ./bin/micromamba shell init -s bash echo $'channels:\n - conda-forge\n - bioconda\n - defaults\nuse_lockfiles: false' >> ~/.mambarc - name: Run pipeline with test data From 182b9e10d099e4e94d64d15ad3ad096eb22e6953 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Wed, 17 Jul 2024 16:02:10 +0100 Subject: [PATCH 11/13] small changes in mamba --- .github/workflows/ci.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 35546611..a7289004 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -61,8 +61,7 @@ jobs: wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba echo "$(pwd)/bin" >> $GITHUB_PATH echo "$(pwd)/micromamba/bin" >> $GITHUB_PATH - export MAMBA_ROOT_PREFIX="./micromamba" - ./bin/micromamba shell init -s bash + ./bin/micromamba shell init -s bash -p ./micromamba echo $'channels:\n - conda-forge\n - bioconda\n - defaults\nuse_lockfiles: false' >> ~/.mambarc - name: Run pipeline with test data From 6bd2be2831dafdbaac07a1348c61bf21732b43de Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Wed, 17 Jul 2024 16:26:08 +0100 Subject: [PATCH 12/13] small changes in mamba --- .github/workflows/ci.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a7289004..925421ba 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -56,12 +56,14 @@ jobs: uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - name: Install micromamba + env: + MAMBA_ROOT_PREFIX: ${{ github.workspace }}/.micromamba if: matrix.exec_profile == 'conda' run: | wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba echo "$(pwd)/bin" >> $GITHUB_PATH echo "$(pwd)/micromamba/bin" >> $GITHUB_PATH - ./bin/micromamba shell init -s bash -p ./micromamba + ./bin/micromamba shell init -s bash echo $'channels:\n - conda-forge\n - bioconda\n - defaults\nuse_lockfiles: false' >> ~/.mambarc - name: Run pipeline with test data @@ -72,11 +74,14 @@ jobs: run: | nextflow run ${GITHUB_WORKSPACE} -profile $TEST_PROFILE,$EXEC_PROFILE --outdir ${TEST_PROFILE}_${EXEC_PROFILE}_results - name: Run pipeline with test data in conda profile (and single-threaded) + env: + MAMBA_ROOT_PREFIX: ${{ github.workspace }}/.micromamba if: matrix.exec_profile == 'conda' # TODO nf-core: You can customise CI pipeline run tests as required # For example: adding multiple test runs with different parameters # Remember that you can parallelise this by using strategy.matrix run: | + source $MAMBA_ROOT_PREFIX/etc/profile.d/micromamba.sh nextflow run ${GITHUB_WORKSPACE} -profile $TEST_PROFILE,micromamba --outdir ${TEST_PROFILE}_${EXEC_PROFILE}_results - name: Gather failed logs if: failure() || cancelled() From d4d0d1f47fdcf00e843c28dc5cb4571a6c6d5983 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Wed, 17 Jul 2024 16:31:57 +0100 Subject: [PATCH 13/13] small changes in mamba --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 925421ba..a372b35d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -81,7 +81,6 @@ jobs: # For example: adding multiple test runs with different parameters # Remember that you can parallelise this by using strategy.matrix run: | - source $MAMBA_ROOT_PREFIX/etc/profile.d/micromamba.sh nextflow run ${GITHUB_WORKSPACE} -profile $TEST_PROFILE,micromamba --outdir ${TEST_PROFILE}_${EXEC_PROFILE}_results - name: Gather failed logs if: failure() || cancelled()