From d6f54b7cbf1931e364151edf7f4a80fda118d4f6 Mon Sep 17 00:00:00 2001 From: fleisch Date: Tue, 31 Oct 2023 00:01:20 +0100 Subject: [PATCH] added ConsensusAnnotations for compounds, adde denovo FeatureAnnotations, fixed some bugs, did some refactorings --- .../controller/AlignedFeaturesController.java | 10 +- .../annotations/BinaryFingerprint.java | 2 +- .../annotations/CanopusLevels.java | 2 +- .../annotations/CanopusPrediction.java | 2 +- .../annotations/CompoundClass.java | 4 +- .../annotations/CompoundClasses.java | 5 +- .../annotations/ConsensusAnnotations.java | 50 +++++ .../annotations/ConsensusAnnotationsCSI.java | 64 ++++++ .../ConsensusAnnotationsDeNovo.java | 44 ++++ .../FeatureAnnotations.java} | 17 +- .../annotations/FormulaCandidate.java | 14 +- .../annotations/FragmentNode.java | 2 +- .../annotations/FragmentationTree.java | 4 +- .../{features => }/annotations/LossEdge.java | 2 +- .../annotations/StructureCandidate.java | 16 +- .../StructureCandidateFormula.java} | 44 ++-- .../annotations/StructureCandidateScored.java | 46 +++++ .../middleware/model/compounds/Compound.java | 45 ++--- .../model/features/AlignedFeature.java | 56 +++--- .../service/annotations/AnnotationUtils.java | 189 ++++++++++++++++++ .../middleware/service/projects/Project.java | 18 +- .../projects/SiriusProjectSpaceImpl.java | 122 +++++++---- 22 files changed, 593 insertions(+), 165 deletions(-) rename sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/{features => }/annotations/BinaryFingerprint.java (96%) rename sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/{features => }/annotations/CanopusLevels.java (96%) rename sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/{features => }/annotations/CanopusPrediction.java (97%) rename sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/{features => }/annotations/CompoundClass.java (95%) rename sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/{features => }/annotations/CompoundClasses.java (97%) create mode 100644 sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/ConsensusAnnotations.java create mode 100644 sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/ConsensusAnnotationsCSI.java create mode 100644 sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/ConsensusAnnotationsDeNovo.java rename sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/{features/annotations/Annotations.java => annotations/FeatureAnnotations.java} (78%) rename sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/{features => }/annotations/FormulaCandidate.java (84%) rename sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/{features => }/annotations/FragmentNode.java (95%) rename sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/{features => }/annotations/FragmentationTree.java (96%) rename sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/{features => }/annotations/LossEdge.java (94%) rename sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/{features => }/annotations/StructureCandidate.java (76%) rename sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/{features/annotations/StructureCandidateExt.java => annotations/StructureCandidateFormula.java} (69%) create mode 100644 sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/StructureCandidateScored.java create mode 100644 sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/service/annotations/AnnotationUtils.java diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/controller/AlignedFeaturesController.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/controller/AlignedFeaturesController.java index 20e0f3f5bf..36a8f5a9f1 100644 --- a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/controller/AlignedFeaturesController.java +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/controller/AlignedFeaturesController.java @@ -20,8 +20,8 @@ package de.unijena.bioinf.ms.middleware.controller; import de.unijena.bioinf.ms.middleware.model.SearchQueryType; +import de.unijena.bioinf.ms.middleware.model.annotations.*; import de.unijena.bioinf.ms.middleware.model.features.AlignedFeature; -import de.unijena.bioinf.ms.middleware.model.features.annotations.*; import de.unijena.bioinf.ms.middleware.model.spectra.AnnotatedSpectrum; import de.unijena.bioinf.ms.middleware.service.projects.ProjectsProvider; import io.swagger.v3.oas.annotations.Hidden; @@ -111,12 +111,12 @@ public void deleteAlignedFeature(@PathVariable String projectId, @PathVariable S * @return StructureCandidate of this feature (aligned over runs) candidate with specified optional fields. */ @GetMapping(value = "/{alignedFeatureId}/structures", produces = MediaType.APPLICATION_JSON_VALUE) - public Page getStructureCandidates( + public Page getStructureCandidates( @PathVariable String projectId, @PathVariable String alignedFeatureId, @ParameterObject Pageable pageable, @RequestParam(required = false) String searchQuery, @RequestParam(defaultValue = "LUCENE") SearchQueryType querySyntax, - @RequestParam(defaultValue = "") EnumSet optFields + @RequestParam(defaultValue = "") EnumSet optFields ) { return projectsProvider.getProjectOrThrow(projectId) .findStructureCandidatesByFeatureId(alignedFeatureId, pageable, optFields); @@ -177,12 +177,12 @@ public FormulaCandidate getFormulaCandidate( * @return StructureCandidate of this formula candidate with specified optional fields. */ @GetMapping(value = "/{alignedFeatureId}/formulas/{formulaId}/structures", produces = MediaType.APPLICATION_JSON_VALUE) - public Page getStructureCandidatesByFormula( + public Page getStructureCandidatesByFormula( @PathVariable String projectId, @PathVariable String alignedFeatureId, @PathVariable String formulaId, @ParameterObject Pageable pageable, @RequestParam(required = false) String searchQuery, @RequestParam(defaultValue = "LUCENE") SearchQueryType querySyntax, - @RequestParam(defaultValue = "") EnumSet optFields + @RequestParam(defaultValue = "") EnumSet optFields ) { return projectsProvider.getProjectOrThrow(projectId) .findStructureCandidatesByFeatureIdAndFormulaId(formulaId, alignedFeatureId, pageable, optFields); diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/BinaryFingerprint.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/BinaryFingerprint.java similarity index 96% rename from sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/BinaryFingerprint.java rename to sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/BinaryFingerprint.java index 7bcc8d17ff..2e6c29bd35 100644 --- a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/BinaryFingerprint.java +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/BinaryFingerprint.java @@ -18,7 +18,7 @@ * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see */ -package de.unijena.bioinf.ms.middleware.model.features.annotations; +package de.unijena.bioinf.ms.middleware.model.annotations; import de.unijena.bioinf.ChemistryBase.fp.Fingerprint; import lombok.Getter; diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/CanopusLevels.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/CanopusLevels.java similarity index 96% rename from sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/CanopusLevels.java rename to sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/CanopusLevels.java index 101ebd1d0c..4f310eb74d 100644 --- a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/CanopusLevels.java +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/CanopusLevels.java @@ -18,7 +18,7 @@ * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see */ -package de.unijena.bioinf.ms.middleware.model.features.annotations; +package de.unijena.bioinf.ms.middleware.model.annotations; import de.unijena.bioinf.ChemistryBase.fp.NPCFingerprintVersion; diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/CanopusPrediction.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/CanopusPrediction.java similarity index 97% rename from sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/CanopusPrediction.java rename to sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/CanopusPrediction.java index 957fcb9554..a263e701e8 100644 --- a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/CanopusPrediction.java +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/CanopusPrediction.java @@ -18,7 +18,7 @@ * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see */ -package de.unijena.bioinf.ms.middleware.model.features.annotations; +package de.unijena.bioinf.ms.middleware.model.annotations; import de.unijena.bioinf.ChemistryBase.fp.ClassyfireProperty; import de.unijena.bioinf.ChemistryBase.fp.FPIter; diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/CompoundClass.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/CompoundClass.java similarity index 95% rename from sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/CompoundClass.java rename to sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/CompoundClass.java index 68216b5767..1786edfcc1 100644 --- a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/CompoundClass.java +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/CompoundClass.java @@ -3,7 +3,7 @@ * This file is part of the SIRIUS library for analyzing MS and MS/MS data * * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, - * Chair of Bioinformatics, Friedrich-Schilller University. + * Chair of Bioinformatics, Friedrich-Schiller University. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -18,7 +18,7 @@ * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see */ -package de.unijena.bioinf.ms.middleware.model.features.annotations; +package de.unijena.bioinf.ms.middleware.model.annotations; import de.unijena.bioinf.ChemistryBase.fp.ClassyfireProperty; import de.unijena.bioinf.ChemistryBase.fp.NPCFingerprintVersion; diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/CompoundClasses.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/CompoundClasses.java similarity index 97% rename from sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/CompoundClasses.java rename to sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/CompoundClasses.java index 72b7cb93f6..3e3ba63b6e 100644 --- a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/CompoundClasses.java +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/CompoundClasses.java @@ -3,7 +3,7 @@ * This file is part of the SIRIUS library for analyzing MS and MS/MS data * * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, - * Chair of Bioinformatics, Friedrich-Schilller University. + * Chair of Bioinformatics, Friedrich-Schiller University. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -18,8 +18,7 @@ * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see */ - -package de.unijena.bioinf.ms.middleware.model.features.annotations; +package de.unijena.bioinf.ms.middleware.model.annotations; import de.unijena.bioinf.ChemistryBase.fp.*; import de.unijena.bioinf.canopus.CanopusResult; diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/ConsensusAnnotations.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/ConsensusAnnotations.java new file mode 100644 index 0000000000..ba206bd80a --- /dev/null +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/ConsensusAnnotations.java @@ -0,0 +1,50 @@ +/* + * + * This file is part of the SIRIUS library for analyzing MS and MS/MS data + * + * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, + * Chair of Bioinformatics, Friedrich-Schilller University. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see + */ + +package de.unijena.bioinf.ms.middleware.model.annotations; + +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.SuperBuilder; + +import java.util.List; + +@Getter +@Setter +@SuperBuilder +abstract class ConsensusAnnotations { + + /** + * Molecular formula of the consensus annotation + * Might be null if no consensus formula is available. + */ + protected String molecularFormula; + + /** + * Compound classes (predicted with CANOPUS) corresponding to the molecularFormula + * Might be null if no fingerprints or compound classes are available. + */ + protected CompoundClasses compoundClasses; + + /** + * FeatureIds where the topAnnotation supports this annotation. + */ + protected List supportingFeatureIds; +} diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/ConsensusAnnotationsCSI.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/ConsensusAnnotationsCSI.java new file mode 100644 index 0000000000..5cd4226aa4 --- /dev/null +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/ConsensusAnnotationsCSI.java @@ -0,0 +1,64 @@ +/* + * + * This file is part of the SIRIUS library for analyzing MS and MS/MS data + * + * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, + * Chair of Bioinformatics, Friedrich-Schilller University. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see + */ + +package de.unijena.bioinf.ms.middleware.model.annotations; + +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.SuperBuilder; + +@Getter +@Setter +@SuperBuilder +public class ConsensusAnnotationsCSI extends ConsensusAnnotations { + public enum Criterion { + MAJORITY_STRUCTURE, + CONFIDENCE_STRUCTURE, + SINGLETON_STRUCTURE, + MAJORITY_FORMULA, + TOP_FORMULA, + SINGLETON_FORMULA + } + + + /** + * Null if this is a custom selection + */ + protected Criterion selectionCriterion; + + /** + * Database structure candidate (searched with CSI:FingerID), that also defines the molecularFormula + * Might be null if no consensus structure is available. + */ + protected StructureCandidate csiFingerIdStructure; + + /** + * Confidence value that represents the certainty that reported consensus structure is exactly the measured one + * If multiple features support this consensus structure the maximum confidence is reported + */ + protected Double confidenceExactMatch; + + /** + * Confidence value that represents the certainty that the exact consensus structure or a very similar + * structure (e.g. measured by Maximum Common Edge Subgraph Distance) is the measured one. + * If multiple features support this consensus structure the maximum confidence is reported + */ + protected Double confidenceApproxMatch; +} diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/ConsensusAnnotationsDeNovo.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/ConsensusAnnotationsDeNovo.java new file mode 100644 index 0000000000..92f4eae07c --- /dev/null +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/ConsensusAnnotationsDeNovo.java @@ -0,0 +1,44 @@ +/* + * + * This file is part of the SIRIUS library for analyzing MS and MS/MS data + * + * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, + * Chair of Bioinformatics, Friedrich-Schilller University. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see + */ + +package de.unijena.bioinf.ms.middleware.model.annotations; + +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.SuperBuilder; + +@Getter +@Setter +@SuperBuilder +public class ConsensusAnnotationsDeNovo extends ConsensusAnnotations{ + public enum Criterion { + MAJORITY_FORMULA, + TOP_FORMULA, + SINGLETON_FORMULA + } + + protected Criterion selectionCriterion; + +// /** +// * DeNovo Structure candidate (predicted with MSNovelist) corresponding to the molecularFormula +// * Might be null if no consensus structure is available. +// */ +// protected StructureCandidate structureMsNovelist; +} diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/Annotations.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/FeatureAnnotations.java similarity index 78% rename from sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/Annotations.java rename to sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/FeatureAnnotations.java index 370b3fde3f..db8ae7bf86 100644 --- a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/Annotations.java +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/FeatureAnnotations.java @@ -17,7 +17,7 @@ * You should have received a copy of the GNU Affero General Public License along with SIRIUS. If not, see */ -package de.unijena.bioinf.ms.middleware.model.features.annotations; +package de.unijena.bioinf.ms.middleware.model.annotations; import com.fasterxml.jackson.annotation.JsonInclude; import lombok.Getter; @@ -25,7 +25,6 @@ /** * Summary of the results of a feature (aligned over runs). Can be added to a AlignedFeature. - * It is null within a AlignedFeature if it was not requested und non-null otherwise. * The different annotation fields within this summary object are null if the corresponding * feature does not contain the represented results. If fields are non-null * the corresponding result has been computed but might still be empty. @@ -33,9 +32,17 @@ @Getter @Setter @JsonInclude(JsonInclude.Include.NON_NULL) -public class Annotations { - //result previews +public class FeatureAnnotations { + /** + * Best matching FormulaCandidate. + */ protected FormulaCandidate formulaAnnotation; // SIRIUS + ZODIAC - protected StructureCandidate structureAnnotation; // CSI:FingerID + /** + * Best matching StructureCandidate ranked by CSI:FingerID Score over all FormulaCandidates. + */ + protected StructureCandidateScored structureAnnotation; // CSI:FingerID or MSNovelist + /** + * Best matching compound classes that correspond to the formulaAnnotation + */ protected CompoundClasses compoundClassAnnotation; // CANOPUS } diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/FormulaCandidate.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/FormulaCandidate.java similarity index 84% rename from sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/FormulaCandidate.java rename to sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/FormulaCandidate.java index 2989c6c3c2..f23e4b6ea1 100644 --- a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/FormulaCandidate.java +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/FormulaCandidate.java @@ -18,24 +18,12 @@ * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see */ - -package de.unijena.bioinf.ms.middleware.model.features.annotations; +package de.unijena.bioinf.ms.middleware.model.annotations; import com.fasterxml.jackson.annotation.JsonInclude; import de.unijena.bioinf.ChemistryBase.ms.Deviation; -import de.unijena.bioinf.ChemistryBase.ms.ft.FTree; -import de.unijena.bioinf.GibbsSampling.ZodiacScore; import de.unijena.bioinf.ms.middleware.model.spectra.AnnotatedSpectrum; -import de.unijena.bioinf.projectspace.FormulaResult; -import de.unijena.bioinf.projectspace.FormulaResultId; -import de.unijena.bioinf.projectspace.FormulaScoring; -import de.unijena.bioinf.sirius.FTreeMetricsHelper; -import de.unijena.bioinf.sirius.scores.IsotopeScore; -import de.unijena.bioinf.sirius.scores.SiriusScore; -import de.unijena.bioinf.sirius.scores.TreeScore; import lombok.*; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; /** * Molecular formula candidate that holds a unique identifier (molecular formula + adduct). diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/FragmentNode.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/FragmentNode.java similarity index 95% rename from sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/FragmentNode.java rename to sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/FragmentNode.java index 0487f1b2d6..457dd59c4f 100644 --- a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/FragmentNode.java +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/FragmentNode.java @@ -18,7 +18,7 @@ * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see */ -package de.unijena.bioinf.ms.middleware.model.features.annotations; +package de.unijena.bioinf.ms.middleware.model.annotations; import lombok.Getter; import lombok.Setter; diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/FragmentationTree.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/FragmentationTree.java similarity index 96% rename from sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/FragmentationTree.java rename to sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/FragmentationTree.java index f991326e87..99508b393e 100644 --- a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/FragmentationTree.java +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/FragmentationTree.java @@ -3,7 +3,7 @@ * This file is part of the SIRIUS library for analyzing MS and MS/MS data * * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, - * Chair of Bioinformatics, Friedrich-Schilller University. + * Chair of Bioinformatics, Friedrich-Schiller University. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -18,7 +18,7 @@ * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see */ -package de.unijena.bioinf.ms.middleware.model.features.annotations; +package de.unijena.bioinf.ms.middleware.model.annotations; import de.unijena.bioinf.ChemistryBase.ms.AnnotatedPeak; import de.unijena.bioinf.ChemistryBase.ms.Deviation; diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/LossEdge.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/LossEdge.java similarity index 94% rename from sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/LossEdge.java rename to sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/LossEdge.java index 23f6377123..4787cc5dfc 100644 --- a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/LossEdge.java +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/LossEdge.java @@ -18,7 +18,7 @@ * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see */ -package de.unijena.bioinf.ms.middleware.model.features.annotations; +package de.unijena.bioinf.ms.middleware.model.annotations; import lombok.Getter; import lombok.Setter; diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/StructureCandidate.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/StructureCandidate.java similarity index 76% rename from sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/StructureCandidate.java rename to sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/StructureCandidate.java index 227e01c0b0..3b655f66d1 100644 --- a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/StructureCandidate.java +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/StructureCandidate.java @@ -3,7 +3,7 @@ * This file is part of the SIRIUS library for analyzing MS and MS/MS data * * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, - * Chair of Bioinformatics, Friedrich-Schilller University. + * Chair of Bioinformatics, Friedrich-Schiller University. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -18,7 +18,7 @@ * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see */ -package de.unijena.bioinf.ms.middleware.model.features.annotations; +package de.unijena.bioinf.ms.middleware.model.annotations; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonInclude; @@ -31,27 +31,17 @@ @Getter @Setter @JsonInclude(JsonInclude.Include.NON_NULL) -@JsonIgnoreProperties({ "molecularFormula", "adduct"}) +@JsonIgnoreProperties({ "molecularFormula", "adduct", "csiScore", "tanimotoSimilarity", "confidenceExactMatch", "confidenceApproxMatch", "fingerprint"}) public class StructureCandidate { - public enum OptFields {fingerprint, dbLinks, refSpectraLinks, pubmedIds} protected String structureName; protected String smiles; - protected Double csiScore; - protected Double tanimotoSimilarity; - protected Double confidenceScore; - protected Integer numOfPubMedIds; protected Double xlogP; protected String inchiKey; //Extended Results - /** - * Array containing the indices of the molecular fingerprint that are available in the structure (1 if present) - * OPTIONAL: needs to be added by parameter - */ - BinaryFingerprint fingerprint; /** * List of structure database links belonging to this structure candidate * OPTIONAL: needs to be added by parameter diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/StructureCandidateExt.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/StructureCandidateFormula.java similarity index 69% rename from sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/StructureCandidateExt.java rename to sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/StructureCandidateFormula.java index 8503eb491c..36ce5e0d73 100644 --- a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/annotations/StructureCandidateExt.java +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/StructureCandidateFormula.java @@ -3,7 +3,7 @@ * This file is part of the SIRIUS library for analyzing MS and MS/MS data * * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, - * Chair of Bioinformatics, Friedrich-Schilller University. + * Chair of Bioinformatics, Friedrich-Schiller University. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -18,8 +18,9 @@ * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see */ -package de.unijena.bioinf.ms.middleware.model.features.annotations; +package de.unijena.bioinf.ms.middleware.model.annotations; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonInclude; import de.unijena.bioinf.ChemistryBase.algorithm.scoring.Scored; import de.unijena.bioinf.ChemistryBase.chem.MolecularFormula; @@ -40,7 +41,8 @@ @Getter @Setter @JsonInclude(JsonInclude.Include.NON_NULL) -public class StructureCandidateExt extends StructureCandidate { +@JsonIgnoreProperties({}) +public class StructureCandidateFormula extends StructureCandidateScored { /** * molecular formula of this candidate */ @@ -50,45 +52,45 @@ public class StructureCandidateExt extends StructureCandidate { */ protected String adduct; - public static StructureCandidateExt of(Scored can, FormulaScoring scorings, - EnumSet optFields, - FormulaResultId fid + public static StructureCandidateFormula of(Scored can, FormulaScoring scorings, + EnumSet optFields, + FormulaResultId fid ) { return of(can, null, scorings, optFields, fid.getMolecularFormula(), fid.getIonType()); } - public static StructureCandidateExt of(Scored can, FormulaScoring scorings, - EnumSet optFields, - MolecularFormula formula, - PrecursorIonType adduct + public static StructureCandidateFormula of(Scored can, FormulaScoring scorings, + EnumSet optFields, + MolecularFormula formula, + PrecursorIonType adduct ) { return of(can, null, scorings, optFields, formula, adduct); } - public static StructureCandidateExt of(Scored can, @Nullable Fingerprint fp, - @Nullable FormulaScoring confidenceScoreProvider, - EnumSet optFields, - FormulaResultId fid + public static StructureCandidateFormula of(Scored can, @Nullable Fingerprint fp, + @Nullable FormulaScoring confidenceScoreProvider, + EnumSet optFields, + FormulaResultId fid ) { return of(can, fp, confidenceScoreProvider, optFields, fid.getMolecularFormula(), fid.getIonType()); } - public static StructureCandidateExt of(Scored can, @Nullable Fingerprint fp, - @Nullable FormulaScoring confidenceScoreProvider, - EnumSet optFields, - MolecularFormula formula, - PrecursorIonType adduct + public static StructureCandidateFormula of(Scored can, @Nullable Fingerprint fp, + @Nullable FormulaScoring confidenceScoreProvider, + EnumSet optFields, + MolecularFormula formula, + PrecursorIonType adduct ) { - final StructureCandidateExt sSum = new StructureCandidateExt(); + final StructureCandidateFormula sSum = new StructureCandidateFormula(); sSum.setMolecularFormula(formula.toString()); sSum.setAdduct(adduct.toString()); // scores sSum.setCsiScore(can.getScore()); sSum.setTanimotoSimilarity(can.getCandidate().getTanimoto()); if (confidenceScoreProvider != null) - confidenceScoreProvider.getAnnotation(ConfidenceScore.class).map(ConfidenceScore::score).ifPresent(sSum::setConfidenceScore); + confidenceScoreProvider.getAnnotation(ConfidenceScore.class).map(ConfidenceScore::score).ifPresent(sSum::setConfidenceExactMatch); //Structure information //check for "null" strings since the database might not be perfectly curated diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/StructureCandidateScored.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/StructureCandidateScored.java new file mode 100644 index 0000000000..d07e3c27de --- /dev/null +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/annotations/StructureCandidateScored.java @@ -0,0 +1,46 @@ +/* + * + * This file is part of the SIRIUS library for analyzing MS and MS/MS data + * + * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, + * Chair of Bioinformatics, Friedrich-Schiller University. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see + */ + +package de.unijena.bioinf.ms.middleware.model.annotations; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonInclude; +import lombok.Getter; +import lombok.Setter; + +@Getter +@Setter +@JsonInclude(JsonInclude.Include.NON_NULL) +@JsonIgnoreProperties({ "molecularFormula", "adduct"}) +public class StructureCandidateScored extends StructureCandidate { + public enum OptFields {fingerprint, dbLinks, refSpectraLinks, pubmedIds} + + protected Double csiScore; + protected Double tanimotoSimilarity; + protected Double confidenceExactMatch; + protected Double confidenceApproxMatch; + + //Extended Results + /** + * Array containing the indices of the molecular fingerprint that are available in the structure (1 if present) + * OPTIONAL: needs to be added by parameter + */ + BinaryFingerprint fingerprint; +} diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/compounds/Compound.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/compounds/Compound.java index 06446f263d..9613fcf6eb 100644 --- a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/compounds/Compound.java +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/compounds/Compound.java @@ -3,27 +3,7 @@ * This file is part of the SIRIUS library for analyzing MS and MS/MS data * * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, - * Chair of Bioinformatics, Friedrich-Schilller University. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see - */ - -/* - * - * This file is part of the SIRIUS library for analyzing MS and MS/MS data - * - * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, - * Chair of Bioinformatics, Friedrich-Schilller University. + * Chair of Bioinformatics, Friedrich-Schiller University. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -40,6 +20,8 @@ package de.unijena.bioinf.ms.middleware.model.compounds; +import de.unijena.bioinf.ms.middleware.model.annotations.ConsensusAnnotationsCSI; +import de.unijena.bioinf.ms.middleware.model.annotations.ConsensusAnnotationsDeNovo; import de.unijena.bioinf.ms.middleware.model.features.AlignedFeature; import lombok.Builder; import lombok.Getter; @@ -52,7 +34,7 @@ @Setter @Builder public class Compound { - public enum OptFields {consensusAnnotations, customAnnotations} + public enum OptFields {consensusAnnotations, consensusAnnotationsDeNovo, customAnnotations} /** * uid of this compound Entity @@ -60,7 +42,6 @@ public enum OptFields {consensusAnnotations, customAnnotations} @NotNull protected String compoundId; - /** * Some (optional) human-readable name */ @@ -70,6 +51,7 @@ public enum OptFields {consensusAnnotations, customAnnotations} * The merged/consensus retention time start (earliest rt) of this compound */ protected Double rtStartSeconds; + /** * The merged/consensus retention time end (latest rt) of this compound */ @@ -85,4 +67,21 @@ public enum OptFields {consensusAnnotations, customAnnotations} * List of aligned features (adducts) that belong to the same (this) compound */ protected List features; + + /** + * The consensus of the top annotations from all the features of this compound. + * Null if it was not requested und non-null otherwise. Might contain empty fields if results are not available + */ + ConsensusAnnotationsCSI consensusAnnotations; + + /** + * The consensus of the top de novo annotations from all the features of this compound. + * Null if it was not requested und non-null otherwise. Might contain empty fields if results are not available + */ + ConsensusAnnotationsDeNovo consensusAnnotationsDeNovo; + + /** + * Alternative annotations selected by the User. + */ + ConsensusAnnotationsCSI customAnnotations; } diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/AlignedFeature.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/AlignedFeature.java index 8d00ca3f4c..9ea825400c 100644 --- a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/AlignedFeature.java +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/model/features/AlignedFeature.java @@ -1,23 +1,3 @@ -/* - * - * This file is part of the SIRIUS library for analyzing MS and MS/MS data - * - * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, - * Chair of Bioinformatics, Friedrich-Schilller University. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see - */ - /* * This file is part of the SIRIUS Software for analyzing MS and MS/MS data * @@ -40,16 +20,11 @@ package de.unijena.bioinf.ms.middleware.model.features; import com.fasterxml.jackson.annotation.JsonInclude; -import de.unijena.bioinf.ChemistryBase.chem.PrecursorIonType; -import de.unijena.bioinf.ChemistryBase.ms.CompoundQuality; -import de.unijena.bioinf.ms.middleware.model.features.annotations.Annotations; -import de.unijena.bioinf.projectspace.CompoundContainerId; +import de.unijena.bioinf.ms.middleware.model.annotations.FeatureAnnotations; import lombok.Getter; import lombok.Setter; import org.jetbrains.annotations.NotNull; -import java.util.EnumSet; - /** * The AlignedFeature contains the ID of a featured (aligned over runs) together with some read-only information * that might be displayed in some summary view. @@ -58,7 +33,7 @@ @Setter @JsonInclude(JsonInclude.Include.NON_NULL) public class AlignedFeature { - public enum OptFields {topAnnotations, msData} + public enum OptFields {msData, topAnnotations, topAnnotationsDeNovo} // identifier @NotNull @@ -75,12 +50,33 @@ public enum OptFields {topAnnotations, msData} protected Double rtStartSeconds; protected Double rtEndSeconds; - //Summary of the results of the compounds - protected Annotations topAnnotations; + /** + * Mass Spec data of this feature (input data) + */ protected MsData msData; /** - * Wirte lock for this feature. If the feature is locked no write operations are possible. + * Top annotations of this feature. + * If a CSI:FingerID structureAnnotation is available, the FormulaCandidate that corresponds to the + * structureAnnotation is returned. Otherwise, it's the FormulaCandidate with the highest SiriusScore is returned. + * CANOPUS Compound classes correspond to the FormulaCandidate no matter how it was selected + * + * Null if it was not requested und non-null otherwise. + */ + protected FeatureAnnotations topAnnotations; + + /** + * Top de novo annotations of this feature. + * The FormulaCandidate with the highest SiriusScore is returned. MSNovelist structureAnnotation and + * CANOPUS compoundClasses correspond to the FormulaCandidate. + * + * Null if it was not requested und non-null otherwise. + */ + protected FeatureAnnotations topAnnotationsDeNovo; + + + /** + * Write lock for this feature. If the feature is locked no write operations are possible. * True if any computation is modifying this feature or its results */ protected boolean computing; diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/service/annotations/AnnotationUtils.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/service/annotations/AnnotationUtils.java new file mode 100644 index 0000000000..30279c26ec --- /dev/null +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/service/annotations/AnnotationUtils.java @@ -0,0 +1,189 @@ +/* + * + * This file is part of the SIRIUS library for analyzing MS and MS/MS data + * + * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, + * Chair of Bioinformatics, Friedrich-Schilller University. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see + */ + +package de.unijena.bioinf.ms.middleware.service.annotations; + +import de.unijena.bioinf.ms.middleware.model.annotations.*; +import de.unijena.bioinf.ms.middleware.model.features.AlignedFeature; +import lombok.extern.slf4j.Slf4j; + +import java.util.*; +import java.util.stream.Collectors; + +@Slf4j +public class AnnotationUtils { + private AnnotationUtils() { + // just to prevent instantiation + } + + public static ConsensusAnnotationsDeNovo buildConsensusAnnotationsDeNovo(Collection features) { + //formula based consensus + Map> formulaAnnotationAgreement = features.stream() + .collect(Collectors.groupingBy(f -> Optional.of(f) + .map(AlignedFeature::getTopAnnotations) + .map(FeatureAnnotations::getFormulaAnnotation) + .map(FormulaCandidate::getMolecularFormula) + .orElse(""), Collectors.toList())); + //filter features with no valid formula candidate + formulaAnnotationAgreement.remove(""); + + if (!formulaAnnotationAgreement.isEmpty()) { + Map.Entry> max = formulaAnnotationAgreement.entrySet().stream() + .max(Comparator.comparing(e -> e.getValue().size())).orElseThrow(); + + if (formulaAnnotationAgreement.values().stream().filter(v -> v.size() == max.getValue().size()).count() == 1) { + return consensusDeNovo(max.getValue(), max.getValue().size() == 1 + ? ConsensusAnnotationsDeNovo.Criterion.SINGLETON_FORMULA + : ConsensusAnnotationsDeNovo.Criterion.MAJORITY_FORMULA); + } + return consensusDeNovo( + formulaAnnotationAgreement.values().stream().flatMap(List::stream).toList(), + ConsensusAnnotationsDeNovo.Criterion.TOP_FORMULA); + } + //empty results + return ConsensusAnnotationsDeNovo.builder().build(); + } + + public static ConsensusAnnotationsCSI buildConsensusAnnotationsCSI(Collection features) { + { + Map> structureAnnotationAgreement = features.stream() + .collect(Collectors.groupingBy(f -> Optional.of(f) + .map(AlignedFeature::getTopAnnotations) + .map(FeatureAnnotations::getStructureAnnotation) + .map(StructureCandidateScored::getInchiKey) + .orElse(""), Collectors.toList())); + + //filter features with no valid structure candidate + structureAnnotationAgreement.remove(""); + + //structure based consensus + if (!structureAnnotationAgreement.isEmpty()) { + Map.Entry> max = structureAnnotationAgreement.entrySet().stream() + .max(Comparator.comparing(e -> e.getValue().size())).orElseThrow(); + + if (structureAnnotationAgreement.values().stream().filter(v -> v.size() == max.getValue().size()).count() == 1) { + return consensusByStructureCSI(max.getValue(), max.getValue().size() == 1 + ? ConsensusAnnotationsCSI.Criterion.SINGLETON_STRUCTURE + : ConsensusAnnotationsCSI.Criterion.MAJORITY_STRUCTURE); + } + return consensusByStructureCSI( + structureAnnotationAgreement.values().stream().flatMap(List::stream).toList(), + ConsensusAnnotationsCSI.Criterion.CONFIDENCE_STRUCTURE); + } + } + //formula based consensus + Map> formulaAnnotationAgreement = features.stream() + .collect(Collectors.groupingBy(f -> Optional.of(f) + .map(AlignedFeature::getTopAnnotations) + .map(FeatureAnnotations::getFormulaAnnotation) + .map(FormulaCandidate::getMolecularFormula) + .orElse(""), Collectors.toList())); + //filter features with no valid formula candidate + formulaAnnotationAgreement.remove(""); + + if (!formulaAnnotationAgreement.isEmpty()) { + Map.Entry> max = formulaAnnotationAgreement.entrySet().stream() + .max(Comparator.comparing(e -> e.getValue().size())).orElseThrow(); + + if (formulaAnnotationAgreement.values().stream().filter(v -> v.size() == max.getValue().size()).count() == 1) { + return consensusByFormulaCSI(max.getValue(), max.getValue().size() == 1 + ? ConsensusAnnotationsCSI.Criterion.SINGLETON_FORMULA + : ConsensusAnnotationsCSI.Criterion.MAJORITY_FORMULA); + } + return consensusByFormulaCSI( + formulaAnnotationAgreement.values().stream().flatMap(List::stream).toList(), + ConsensusAnnotationsCSI.Criterion.TOP_FORMULA); + } + //empty results + return ConsensusAnnotationsCSI.builder().build(); + } + + private static ConsensusAnnotationsDeNovo consensusDeNovo(Collection features, + ConsensusAnnotationsDeNovo.Criterion type) { + //prefer candidate with compound classes + AlignedFeature top = features.stream() + .filter(f -> f.getTopAnnotations().getCompoundClassAnnotation() != null) + .min(Comparator.comparing(f -> f.getTopAnnotations().getFormulaAnnotation().getSiriusScore())) + .orElse(null); + + // fallback to non compound class candidate + if (top == null) + top = features.stream() + .min(Comparator.comparing(f -> f.getTopAnnotations().getFormulaAnnotation().getSiriusScore())) + .orElseThrow(() -> new IllegalStateException("No Formula Candidate Found!")); + + return ConsensusAnnotationsDeNovo.builder() + .selectionCriterion(type) + .compoundClasses(top.getTopAnnotations().getCompoundClassAnnotation()) + .molecularFormula(top.getTopAnnotations().getFormulaAnnotation().getMolecularFormula()) + .supportingFeatureIds(ConsensusAnnotationsDeNovo.Criterion.TOP_FORMULA == type + ? List.of(top.getAlignedFeatureId()) + : features.stream().map(AlignedFeature::getAlignedFeatureId).toList() + ).build(); + } + + + private static ConsensusAnnotationsCSI consensusByFormulaCSI(Collection features, + ConsensusAnnotationsCSI.Criterion type) { + + AlignedFeature top = features.stream() + .min(Comparator.comparing(f -> f.getTopAnnotations().getFormulaAnnotation().getSiriusScore())) + .orElseThrow(() -> new IllegalStateException("No Formula Candidate Found!")); + + + return ConsensusAnnotationsCSI.builder() + .selectionCriterion(type) + .molecularFormula(top.getTopAnnotations().getFormulaAnnotation().getMolecularFormula()) + .compoundClasses(top.getTopAnnotations().getCompoundClassAnnotation()) + .supportingFeatureIds(ConsensusAnnotationsCSI.Criterion.TOP_FORMULA == type + ? List.of(top.getAlignedFeatureId()) + : features.stream().map(AlignedFeature::getAlignedFeatureId).toList() + ).build(); + } + + private static ConsensusAnnotationsCSI consensusByStructureCSI(Collection features, + ConsensusAnnotationsCSI.Criterion type) { + final boolean mixedStructures = ConsensusAnnotationsCSI.Criterion.CONFIDENCE_STRUCTURE == type; + //todo use approx confidence if available + AlignedFeature topConf = features.stream() + .min(Comparator.comparing(f -> f.getTopAnnotations().getStructureAnnotation().getConfidenceExactMatch())) + .orElseThrow(() -> new IllegalStateException("No Structure Candidate Found!")); + + Double topConfExact = topConf.getTopAnnotations().getStructureAnnotation().getConfidenceExactMatch(); + Double topConfApprox = mixedStructures + ? topConf.getTopAnnotations().getStructureAnnotation().getConfidenceApproxMatch() + : features.stream() + .map(f -> f.getTopAnnotations().getStructureAnnotation().getConfidenceExactMatch()) + .min(Double::compareTo) + .orElse(null); + + return ConsensusAnnotationsCSI.builder() + .selectionCriterion(type) + .csiFingerIdStructure(topConf.getTopAnnotations().getStructureAnnotation()) + .compoundClasses(topConf.getTopAnnotations().getCompoundClassAnnotation()) + .confidenceExactMatch(topConfExact) + .confidenceApproxMatch(topConfApprox) + .molecularFormula(topConf.getTopAnnotations().getFormulaAnnotation().getMolecularFormula()) + .supportingFeatureIds(mixedStructures + ? List.of(topConf.getAlignedFeatureId()) + : features.stream().map(AlignedFeature::getAlignedFeatureId).toList() + ).build(); + } +} diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/service/projects/Project.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/service/projects/Project.java index 1ce0813530..609927ee00 100644 --- a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/service/projects/Project.java +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/service/projects/Project.java @@ -23,9 +23,9 @@ import de.unijena.bioinf.ms.middleware.model.compounds.Compound; import de.unijena.bioinf.ms.middleware.model.features.AlignedFeature; import de.unijena.bioinf.ms.middleware.model.features.AlignedFeatureQuality; -import de.unijena.bioinf.ms.middleware.model.features.annotations.FormulaCandidate; -import de.unijena.bioinf.ms.middleware.model.features.annotations.StructureCandidate; -import de.unijena.bioinf.ms.middleware.model.features.annotations.StructureCandidateExt; +import de.unijena.bioinf.ms.middleware.model.annotations.FormulaCandidate; +import de.unijena.bioinf.ms.middleware.model.annotations.StructureCandidateScored; +import de.unijena.bioinf.ms.middleware.model.annotations.StructureCandidateFormula; import org.springframework.data.domain.Page; import org.springframework.data.domain.Pageable; @@ -95,21 +95,21 @@ default FormulaCandidate findFormulaCandidateByFeatureIdAndId(String formulaId, return findFormulaCandidateByFeatureIdAndId(formulaId, alignedFeatureId, EnumSet.copyOf(List.of(optFields))); } - Page findStructureCandidatesByFeatureIdAndFormulaId(String formulaId, String alignedFeatureId, Pageable pageable, EnumSet optFields); + Page findStructureCandidatesByFeatureIdAndFormulaId(String formulaId, String alignedFeatureId, Pageable pageable, EnumSet optFields); - default Page findStructureCandidatesByFeatureIdAndFormulaId(String formulaId, String alignedFeatureId, Pageable pageable, StructureCandidate.OptFields... optFields) { + default Page findStructureCandidatesByFeatureIdAndFormulaId(String formulaId, String alignedFeatureId, Pageable pageable, StructureCandidateScored.OptFields... optFields) { return findStructureCandidatesByFeatureIdAndFormulaId(formulaId, alignedFeatureId, pageable, EnumSet.copyOf(List.of(optFields))); } - Page findStructureCandidatesByFeatureId(String alignedFeatureId, Pageable pageable, EnumSet optFields); + Page findStructureCandidatesByFeatureId(String alignedFeatureId, Pageable pageable, EnumSet optFields); - default Page findStructureCandidatesByFeatureId(String alignedFeatureId, Pageable pageable, StructureCandidate.OptFields... optFields) { + default Page findStructureCandidatesByFeatureId(String alignedFeatureId, Pageable pageable, StructureCandidateScored.OptFields... optFields) { return findStructureCandidatesByFeatureId(alignedFeatureId, pageable, EnumSet.copyOf(List.of(optFields))); } - StructureCandidate findTopStructureCandidateByFeatureId(String alignedFeatureId, EnumSet optFields); + StructureCandidateScored findTopStructureCandidateByFeatureId(String alignedFeatureId, EnumSet optFields); - default StructureCandidate findTopStructureCandidateByFeatureId(String alignedFeatureId, StructureCandidate.OptFields... optFields) { + default StructureCandidateScored findTopStructureCandidateByFeatureId(String alignedFeatureId, StructureCandidateScored.OptFields... optFields) { return findTopStructureCandidateByFeatureId(alignedFeatureId, EnumSet.copyOf(List.of(optFields))); } } diff --git a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/service/projects/SiriusProjectSpaceImpl.java b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/service/projects/SiriusProjectSpaceImpl.java index bff8e6d6fe..3d7ace8705 100644 --- a/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/service/projects/SiriusProjectSpaceImpl.java +++ b/sirius_rest_service/src/main/java/de/unijena/bioinf/ms/middleware/service/projects/SiriusProjectSpaceImpl.java @@ -41,13 +41,14 @@ import de.unijena.bioinf.ms.annotations.DataAnnotation; import de.unijena.bioinf.ms.frontend.core.ApplicationCore; import de.unijena.bioinf.ms.middleware.controller.AlignedFeaturesController; +import de.unijena.bioinf.ms.middleware.model.annotations.*; import de.unijena.bioinf.ms.middleware.model.compounds.Compound; import de.unijena.bioinf.ms.middleware.model.features.AlignedFeature; import de.unijena.bioinf.ms.middleware.model.features.AlignedFeatureQuality; import de.unijena.bioinf.ms.middleware.model.features.LCMSFeatureQuality; import de.unijena.bioinf.ms.middleware.model.features.MsData; -import de.unijena.bioinf.ms.middleware.model.features.annotations.*; import de.unijena.bioinf.ms.middleware.model.spectra.AnnotatedSpectrum; +import de.unijena.bioinf.ms.middleware.service.annotations.AnnotationUtils; import de.unijena.bioinf.projectspace.*; import de.unijena.bioinf.projectspace.fingerid.FBCandidateNumber; import de.unijena.bioinf.sirius.FTreeMetricsHelper; @@ -199,33 +200,33 @@ public FormulaCandidate findFormulaCandidateByFeatureIdAndId(String formulaId, S } @Override - public Page findStructureCandidatesByFeatureIdAndFormulaId(String formulaId, String alignedFeatureId, Pageable pageable, EnumSet optFields) { - List> para = (optFields.contains(StructureCandidate.OptFields.fingerprint) + public Page findStructureCandidatesByFeatureIdAndFormulaId(String formulaId, String alignedFeatureId, Pageable pageable, EnumSet optFields) { + List> para = (optFields.contains(StructureCandidateScored.OptFields.fingerprint) ? List.of(FormulaScoring.class, FBCandidates.class, FBCandidateFingerprints.class) : List.of(FormulaScoring.class, FBCandidates.class)); Instance instance = loadInstance(alignedFeatureId); FormulaResultId fidObj = parseFID(instance, formulaId); return loadStructureCandidates(instance, fidObj, pageable, para, optFields) - .map(l -> l.stream().map(c -> (StructureCandidate)c).toList()) - .map(it -> (Page) new PageImpl<>(it)) + .map(l -> l.stream().map(c -> (StructureCandidateScored) c).toList()) + .map(it -> (Page) new PageImpl<>(it)) .orElse(Page.empty(pageable)); //todo number of candidates for page. } @Override - public Page findStructureCandidatesByFeatureId(String alignedFeatureId, Pageable pageable, EnumSet optFields) { - List> para = (optFields.contains(StructureCandidate.OptFields.fingerprint) + public Page findStructureCandidatesByFeatureId(String alignedFeatureId, Pageable pageable, EnumSet optFields) { + List> para = (optFields.contains(StructureCandidateScored.OptFields.fingerprint) ? List.of(FormulaScoring.class, FBCandidates.class, FBCandidateFingerprints.class) : List.of(FormulaScoring.class, FBCandidates.class)); Instance instance = loadInstance(alignedFeatureId); - List candidates = instance.loadFormulaResults(FormulaScoring.class).stream() + List candidates = instance.loadFormulaResults(FormulaScoring.class).stream() .filter(fr -> fr.getCandidate().getAnnotation(FormulaScoring.class) .flatMap(s -> s.getAnnotation(TopCSIScore.class)).isPresent()) .map(fr -> fr.getCandidate().getId()) .map(fid -> loadStructureCandidates(instance, fid, pageable, para, optFields)) .filter(Optional::isPresent).flatMap(Optional::stream).flatMap(List::stream) - .sorted(Comparator.comparing(StructureCandidate::getCsiScore).reversed()) + .sorted(Comparator.comparing(StructureCandidateScored::getCsiScore).reversed()) .skip(pageable.getOffset()) .limit(pageable.getPageSize()).toList(); @@ -233,8 +234,8 @@ public Page findStructureCandidatesByFeatureId(String ali } @Override - public StructureCandidateExt findTopStructureCandidateByFeatureId(String alignedFeatureId, EnumSet optFields) { - List> para = (optFields.contains(StructureCandidate.OptFields.fingerprint) + public StructureCandidateFormula findTopStructureCandidateByFeatureId(String alignedFeatureId, EnumSet optFields) { + List> para = (optFields.contains(StructureCandidateScored.OptFields.fingerprint) ? List.of(FormulaScoring.class, FBCandidates.class, FBCandidateFingerprints.class) : List.of(FormulaScoring.class, FBCandidates.class)); @@ -245,7 +246,7 @@ public StructureCandidateExt findTopStructureCandidateByFeatureId(String aligned return instance.loadFormulaResult(fr.getId(), (Class[]) para.toArray(Class[]::new)) .flatMap(fr2 -> fr2.getAnnotation(FBCandidates.class).map(FBCandidates::getResults) .filter(l -> !l.isEmpty()).map(r -> r.get(0)) - .map(sc -> StructureCandidateExt.of(sc, + .map(sc -> StructureCandidateFormula.of(sc, fr2.getAnnotation(FBCandidateFingerprints.class) .map(FBCandidateFingerprints::getFingerprints) .map(fps -> fps.isEmpty() ? null : fps.get(0)) @@ -260,7 +261,9 @@ private AlignedFeature asAlignedFeature(CompoundContainerId cid, EnumSet cids, EnumSet optFields, EnumSet optFeatureFields) { - //todo handle optional if available fields //compound with ID Compound.CompoundBuilder c = Compound.builder() .compoundId(cids.stream().map(CompoundContainerId::getGroupId) .filter(Optional::isPresent).flatMap(Optional::stream).findFirst().orElseThrow()); - // features - List features = cids.stream().map(cid -> asAlignedFeature(cid, optFeatureFields)).toList(); - c.features(features); + { + // merge optional field config + final EnumSet mergedFeatureFields = EnumSet.copyOf(optFeatureFields); + if (optFields.contains(Compound.OptFields.consensusAnnotations)) + mergedFeatureFields.add(AlignedFeature.OptFields.topAnnotations); + if (optFields.contains(Compound.OptFields.consensusAnnotationsDeNovo)) + mergedFeatureFields.add(AlignedFeature.OptFields.topAnnotationsDeNovo); + + // features + List features = cids.stream().map(cid -> asAlignedFeature(cid, mergedFeatureFields)).toList(); + c.features(features); + + if (optFields.contains(Compound.OptFields.consensusAnnotations)) + c.consensusAnnotations(AnnotationUtils.buildConsensusAnnotationsCSI(features)); + if (optFields.contains(Compound.OptFields.consensusAnnotationsDeNovo)) + c.consensusAnnotationsDeNovo(AnnotationUtils.buildConsensusAnnotationsDeNovo(features)); + if (optFields.contains(Compound.OptFields.customAnnotations)) + c.customAnnotations(ConsensusAnnotationsCSI.builder().build()); //todo implement custom annotations -> storage needed + + //remove optionals if not requested + if (!optFeatureFields.contains(AlignedFeature.OptFields.topAnnotations)) + features.forEach(f -> f.setTopAnnotations(null)); + if (!optFeatureFields.contains(AlignedFeature.OptFields.topAnnotationsDeNovo)) + features.forEach(f -> f.setTopAnnotationsDeNovo(null)); + } //compound RT RetentionTime rt = cids.stream().map(CompoundContainerId::getGroupRt) @@ -301,8 +325,15 @@ private Compound asCompound(List cids, EnumSet NUMBER_FORMAT.format(r.getMiddleTime() / 60)).orElse("N/A") + "-m" + NUMBER_FORMAT.format(co.getNeutralMass())); return co; } @@ -341,39 +372,39 @@ protected FormulaResultId parseFID(Instance instance, String fid) { } - private static Optional> loadStructureCandidates( + private static Optional> loadStructureCandidates( Instance instance, FormulaResultId fidObj, Pageable pageable, List> para, - EnumSet optFields + EnumSet optFields ) { long topK = pageable.getOffset() + pageable.getPageSize(); fidObj.setAnnotation(FBCandidateNumber.class, topK <= 0 ? FBCandidateNumber.ALL : new FBCandidateNumber((int) topK)); FormulaResult fr = instance.loadFormulaResult(fidObj, (Class[]) para.toArray(Class[]::new)).orElseThrow(); return fr.getAnnotation(FBCandidates.class).map(FBCandidates::getResults).map(l -> { - List candidates = new ArrayList(); + List candidates = new ArrayList(); Iterator> it = l.stream().skip(pageable.getOffset()).limit(pageable.getPageSize()).iterator(); - if (optFields.contains(StructureCandidate.OptFields.fingerprint)) { + if (optFields.contains(StructureCandidateScored.OptFields.fingerprint)) { Iterator fps = fr.getAnnotationOrThrow(FBCandidateFingerprints.class).getFingerprints() .stream().skip(pageable.getOffset()).limit(pageable.getPageSize()).iterator(); if (it.hasNext())//tophit - candidates.add(StructureCandidateExt.of(it.next(), fps.next(), + candidates.add(StructureCandidateFormula.of(it.next(), fps.next(), fr.getAnnotationOrNull(FormulaScoring.class), optFields, fidObj)); while (it.hasNext()) - candidates.add(StructureCandidateExt.of(it.next(), fps.next(), + candidates.add(StructureCandidateFormula.of(it.next(), fps.next(), null, optFields, fidObj)); } else { if (it.hasNext())//tophit - candidates.add(StructureCandidateExt.of(it.next(), null, + candidates.add(StructureCandidateFormula.of(it.next(), null, fr.getAnnotationOrNull(FormulaScoring.class), optFields, fidObj)); while (it.hasNext()) - candidates.add(StructureCandidateExt.of(it.next(), null, + candidates.add(StructureCandidateFormula.of(it.next(), null, null, optFields, fidObj)); } return candidates; @@ -489,12 +520,35 @@ public static Optional asSimulatedIsotopePattern(Instance ins .map(AnnotatedSpectrum::new); } - public static Annotations asCompoundSummary(Instance inst) { - return inst.loadTopFormulaResult(List.of(TopCSIScore.class)).map(de.unijena.bioinf.projectspace.FormulaResult::getId).flatMap(frid -> { + public static FeatureAnnotations extractTopAnnotationsDeNovo(Instance inst) { + return inst.loadTopFormulaResult(List.of(SiriusScore.class)).map(FormulaResult::getId) + .flatMap(frid -> inst.loadFormulaResult(frid, FormulaScoring.class, FTree.class, CanopusResult.class) + .map(topHit -> { + final FeatureAnnotations cSum = new FeatureAnnotations(); +// + //add formula summary + cSum.setFormulaAnnotation(asFormulaCandidate(topHit)); + + // todo add msnovelist candidatas +// topHit.getAnnotation(FBCandidates.class).map(FBCandidates::getResults) +// .filter(l -> !l.isEmpty()).map(r -> r.get(0)).map(s -> +// StructureCandidateFormula.of(s, topHit.getAnnotationOrThrow(FormulaScoring.class), +// EnumSet.of(StructureCandidateScored.OptFields.dbLinks, StructureCandidateScored.OptFields.pubmedIds, StructureCandidateScored.OptFields.refSpectraLinks), topHit.getId())) +// .ifPresent(cSum::setStructureAnnotation); + + topHit.getAnnotation(CanopusResult.class).map(CompoundClasses::of). + ifPresent(cSum::setCompoundClassAnnotation); + return cSum; + + })).orElseGet(FeatureAnnotations::new); + } + + public static FeatureAnnotations extractTopAnnotations(Instance inst) { + return inst.loadTopFormulaResult(List.of(TopCSIScore.class, SiriusScore.class)).map(FormulaResult::getId).flatMap(frid -> { frid.setAnnotation(FBCandidateNumber.class, new FBCandidateNumber(1)); return inst.loadFormulaResult(frid, FormulaScoring.class, FTree.class, FBCandidates.class, CanopusResult.class) .map(topHit -> { - final Annotations cSum = new Annotations(); + final FeatureAnnotations cSum = new FeatureAnnotations(); // //add formula summary cSum.setFormulaAnnotation(asFormulaCandidate(topHit)); @@ -502,8 +556,8 @@ public static Annotations asCompoundSummary(Instance inst) { // fingerid result topHit.getAnnotation(FBCandidates.class).map(FBCandidates::getResults) .filter(l -> !l.isEmpty()).map(r -> r.get(0)).map(s -> - StructureCandidateExt.of(s, topHit.getAnnotationOrThrow(FormulaScoring.class), - EnumSet.of(StructureCandidate.OptFields.dbLinks, StructureCandidate.OptFields.pubmedIds, StructureCandidate.OptFields.refSpectraLinks), topHit.getId())) + StructureCandidateFormula.of(s, topHit.getAnnotationOrThrow(FormulaScoring.class), + EnumSet.of(StructureCandidateScored.OptFields.dbLinks, StructureCandidateScored.OptFields.pubmedIds, StructureCandidateScored.OptFields.refSpectraLinks), topHit.getId())) .ifPresent(cSum::setStructureAnnotation); topHit.getAnnotation(CanopusResult.class).map(CompoundClasses::of). @@ -511,7 +565,7 @@ public static Annotations asCompoundSummary(Instance inst) { return cSum; }); - }).orElseGet(Annotations::new); + }).orElseGet(FeatureAnnotations::new); } public static MsData asCompoundMsData(Instance instance) {