diff --git a/chemical_db_oss/src/main/java/de/unijena/bioinf/chemdb/DataSource.java b/chemical_db_oss/src/main/java/de/unijena/bioinf/chemdb/DataSource.java index 237c18f75d..bafff80f13 100644 --- a/chemical_db_oss/src/main/java/de/unijena/bioinf/chemdb/DataSource.java +++ b/chemical_db_oss/src/main/java/de/unijena/bioinf/chemdb/DataSource.java @@ -131,6 +131,10 @@ public static boolean isBioOnly(long flags) { return flags != 0 && (flags & BIO.flag) != 0; } + public static boolean isInAll(long flags){ + return flags != 0 && (flags & ALL.flag) != 0; + } + public boolean isBioOnly() { return isBioOnly(flag); } @@ -147,11 +151,8 @@ public static DataSource[] valuesNoALL() { return Arrays.stream(DataSource.values()).filter(it -> it != ALL).toArray(DataSource[]::new); } - public static DataSource[] valuesNoALLNoMINES() { - return Arrays.stream(DataSource.values()).filter(it -> it != ALL && !it.mines).toArray(DataSource[]::new); - } - private final static DataSource[] BioDatabases = new DataSource[] {MESH, HMDB, KNAPSACK,CHEBI,KEGG,HSDB,MACONDA,METACYC,GNPS,TRAIN,YMDB,PLANTCYC,NORMAN,SUPERNATURAL,COCONUT,BloodExposome,TeroMol,PUBCHEMANNOTATIONBIO,PUBCHEMANNOTATIONDRUG,PUBCHEMANNOTATIONSAFETYANDTOXIC,PUBCHEMANNOTATIONFOOD,LOTUS,FooDB,MiMeDB,LIPIDMAPS,LIPID}; + private final static DataSource[] BIO_DATABASES = new DataSource[] {MESH, HMDB, KNAPSACK,CHEBI,KEGG,HSDB,MACONDA,METACYC,GNPS,TRAIN,YMDB,PLANTCYC,NORMAN,SUPERNATURAL,COCONUT,BloodExposome,TeroMol,PUBCHEMANNOTATIONBIO,PUBCHEMANNOTATIONDRUG,PUBCHEMANNOTATIONSAFETYANDTOXIC,PUBCHEMANNOTATIONFOOD,LOTUS,FooDB,MiMeDB,LIPIDMAPS,LIPID}; // 4294401852 private static long makeBIOFLAG() { diff --git a/chemical_db_oss/src/main/java/de/unijena/bioinf/chemdb/custom/CustomDataSources.java b/chemical_db_oss/src/main/java/de/unijena/bioinf/chemdb/custom/CustomDataSources.java index 8aaf5b60b3..a0e4e2f984 100644 --- a/chemical_db_oss/src/main/java/de/unijena/bioinf/chemdb/custom/CustomDataSources.java +++ b/chemical_db_oss/src/main/java/de/unijena/bioinf/chemdb/custom/CustomDataSources.java @@ -258,14 +258,6 @@ public static List getAllSelectableDbs() { .collect(Collectors.toList()); } - public static List getNonInSilicoSelectableDbs() { - return Arrays.stream(DataSource.valuesNoALLNoMINES()) - .map(DataSource::name) - .filter(CustomDataSources::isSearchable) - .map(CustomDataSources::getSourceFromName) - .filter(Objects::nonNull) - .toList(); - } // listener stuff public static void notifyListeners(Source changed, boolean removed) { diff --git a/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/ms/DetectedAdducts.java b/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/ms/DetectedAdducts.java index 1f23552cf1..16f8e800fd 100644 --- a/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/ms/DetectedAdducts.java +++ b/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/ms/DetectedAdducts.java @@ -21,8 +21,10 @@ package de.unijena.bioinf.ChemistryBase.ms; import de.unijena.bioinf.ChemistryBase.chem.PrecursorIonType; +import de.unijena.bioinf.ChemistryBase.ms.ft.model.AdductSettings; import de.unijena.bioinf.ms.annotations.Ms2ExperimentAnnotation; import org.jetbrains.annotations.NotNull; +import org.slf4j.LoggerFactory; import java.util.*; import java.util.concurrent.ConcurrentHashMap; @@ -34,29 +36,195 @@ * This is intended to collect Adducts from different detection sources. * Can be attached to MsExperiment */ -public final class DetectedAdducts extends ConcurrentHashMap implements Ms2ExperimentAnnotation, Cloneable { //todo ElementFilter: ConcurrentHashMap is not immutable. Hence, in princlipe, this could be cleared. Should Ms2ExperimentAnnotation be immutable? - public enum Source {INPUT_FILE, LCMS_ALIGN, MS1_PREPROCESSOR, SPECTRAL_LIBRARY_SEARCH, UNSPECIFIED_SOURCE} //todo implement PossibleAdducts by library search +public final class DetectedAdducts extends ConcurrentHashMap implements Ms2ExperimentAnnotation, Cloneable { //todo ElementFilter: ConcurrentHashMap is not immutable. Hence, in princlipe, this could be cleared. Should Ms2ExperimentAnnotation be immutable? + public enum Source { + /** + * this source indicates adducts specified in the input file + */ + INPUT_FILE(true, false, true), + /** + * adducts found during SIRIUS LCMS preprocessing. May contain unknown adduct to indicate to add fallback adducts. + */ + LCMS_ALIGN(true, true, false), + /** + * adducts detected based on MS1 only are never very confident. Hence, we will always add the fallback adducts. + */ + MS1_PREPROCESSOR(true, true, false), + + //special sources. These are only added additionally, but not used as primary source. Hence. if only these are available, we add the fallbacks. + + /** + * adducts found by spectral library search. May be additionally added. + */ + SPECTRAL_LIBRARY_SEARCH(false, false, false), + /** + * this is never directly specified. It is only used to make sure the map can be parsed from string. Unknown sources are mapped to this enum value. May be additionally added. + */ + UNSPECIFIED_SOURCE(false, false, false); + + + private final boolean isPrimarySource; + private final boolean canBeEmpty; + private final boolean forbidAdditionalSources; + + Source(boolean isPrimarySource, boolean canBeEmpty, boolean forbidAdditionalSources) { + this.isPrimarySource = isPrimarySource; + this.canBeEmpty = canBeEmpty; + this.forbidAdditionalSources = forbidAdditionalSources; + } + + public boolean isPrimaryDetectionSource() { + return isPrimarySource; + } + + public boolean isAdditionalDetectionSource() { + return !isPrimarySource; + } + + public boolean isForbidAdditionalSources() { + return forbidAdditionalSources; + } + } + + public static DetectedAdducts singleton(Source source, PrecursorIonType ionType) { + DetectedAdducts det = new DetectedAdducts(); + det.put(source, new PossibleAdducts(ionType)); + return det; + } public DetectedAdducts() { super(); } - public Optional getAdducts() { - return getAdducts(Source.values()); + /** + * + * @return primary adducts, if available. If no adducts are available returns Optional.empty and not empty PossibleAdducts. + */ + protected Optional getPrimaryAdducts() { + return getAdducts((Source[]) Arrays.stream(Source.values()).filter(Source::isPrimaryDetectionSource).toArray(l -> new Source[l])).flatMap(pa -> pa.isEmpty() ? Optional.empty() : Optional.of(pa)); } - public Optional getAdducts(Source... keyPrio) { - return getAdducts(Arrays.stream(keyPrio).map(Source::name).toArray(String[]::new)); + /** + * + * @return additional adducts, if available. If no adducts are available returns Optional.empty and not empty PossibleAdducts. + */ + protected Optional getAdditionalAdducts() { + return getUnionOfAdducts((Source[]) Arrays.stream(Source.values()).filter(Source::isAdditionalDetectionSource).toArray(l -> new Source[l])).flatMap(pa -> pa.isEmpty() ? Optional.empty() : Optional.of(pa)); + } + + protected boolean hasPrimarySourceThatForbidsAdditionalSources() { + return Arrays.stream(Source.values()).filter(Source::isPrimaryDetectionSource).filter(Source::isForbidAdditionalSources).anyMatch(source -> !getOrDefault(source, PossibleAdducts.empty()).isEmpty()); + } + + + /** + * returns adducts by the following rules + * 1. input file adducts are returned if prioritized + * 2. adducts of the most important primary source (may be from input file) are selected + * 3. adducts of the additional sources are added if primary source allows for addition + * 4. fallback adducts are added if set of adducts is empty so far or if the set contains an unknown {@link PrecursorIonType} [M+?] to specify fallback adduct shall be added + * 5. the intersection of the set of adducts and the detectable adducts is calculated + * 6. enforced adducts are added + * 7. the final set is cleaned of unknown adducts and is returnd + * @return set of adducts that shall be considered for compound annnotation + */ + public PossibleAdducts getDetectedAdductsAndOrFallback(AdductSettings adductSettings, int charge) { + if (adductSettings.isPrioritizeInputFileAdducts() && containsKey(Source.INPUT_FILE)) { + PossibleAdducts inputAdducts = get(Source.INPUT_FILE); + if (inputAdducts.isEmpty()){ + warnIsEmpty(Source.INPUT_FILE); + } else if (!inputAdducts.hasUnknownIontype()) { + return inputAdducts; + } else { + //input file detected adduct annotation contains unknown adduct. + //Probably a very uncommon way to specify this + } + } + PossibleAdducts primaryAdductsOrFallback = getPrimaryAdducts().map(pa -> + (allowFallbackAdducts(pa)) ? PossibleAdducts.union(pa, adductSettings.getFallback(charge)) : pa) + .orElse(new PossibleAdducts(adductSettings.getFallback(charge))); + + if (hasPrimarySourceThatForbidsAdditionalSources()) return processwithAdductSettingsAndClean(primaryAdductsOrFallback, adductSettings, charge); + + Optional additionalAdducts = getAdditionalAdducts(); + if (additionalAdducts.isEmpty()) return processwithAdductSettingsAndClean(primaryAdductsOrFallback, adductSettings, charge); + else return processwithAdductSettingsAndClean(PossibleAdducts.union(primaryAdductsOrFallback, additionalAdducts.get()), adductSettings, charge); + } + + private boolean allowFallbackAdducts(PossibleAdducts pa) { + //an unknown PrecursorIonType such aus [M+?]+ means that we are not sure and still want to add fallback adducts + return pa.hasUnknownIontype() || pa.isEmpty(); } - public Optional getAdducts(String... keyPrio) { - for (String key : keyPrio) - if (containsKey(key)) - return Optional.of(get(key)); + private void warnIsEmpty(Source source) { + LoggerFactory.getLogger(this.getClass()).warn("Detected adduct source '" + source + "' specified, but adducts are empty."); + } + + + private static final PrecursorIonType M_PLUS = PrecursorIonType.getPrecursorIonType("[M]+"); + private static final PrecursorIonType M_H_PLUS = PrecursorIonType.getPrecursorIonType("[M+H]+"); + + /** + * 1. remove unknown adducts + * 2. guarantees that never both, [M]+ and [M+H]+, are contained. This prevents issues with duplicate structure candidates in subsequent steps. [M+H]+ is favored. + * @param possibleAdducts + * @return + */ + private PossibleAdducts cleanAdducts(PossibleAdducts possibleAdducts) { + Set adducts = possibleAdducts.getAdducts().stream().filter(a -> !a.isIonizationUnknown()).collect(Collectors.toCollection(HashSet::new)); + if (adducts.contains(M_PLUS) && adducts.contains(M_H_PLUS)) adducts.remove(M_PLUS); + return new PossibleAdducts(adducts); + } + + /** + * interect with detectable adducts, add enforced and clean unknown adducts + * @param possibleAdducts + * @param as + * @param charge + * @return + */ + private PossibleAdducts processwithAdductSettingsAndClean(PossibleAdducts possibleAdducts, AdductSettings as, int charge) { + possibleAdducts = PossibleAdducts.intersection(possibleAdducts, as.getDetectable()); + + if (!as.getEnforced(charge).isEmpty()) + possibleAdducts = PossibleAdducts.union(possibleAdducts, as.getEnforced(charge)); + possibleAdducts = cleanAdducts(possibleAdducts); + + if (possibleAdducts.isEmpty()) + LoggerFactory.getLogger(this.getClass()).error("Final set of selected adducts is empty."); + + return possibleAdducts; + } + + public Optional getAdducts(Source... keyPrio) { + for (Source key : keyPrio) { + if (containsKey(key)) { + if (key.canBeEmpty || !get(key).isEmpty()) { + return Optional.of(get(key)); + } else { + warnIsEmpty(key); + } + } + } return Optional.empty(); } + /** + * not so efficient for many sources. But best if most of the times less or equal 1 sources are expected. + * @param keys + * @return + */ + protected Optional getUnionOfAdducts(Source... keys) { + PossibleAdducts union = null; + for (Source key : keys) + if (containsKey(key)) { + if (union == null) union = get(key); + else union = PossibleAdducts.union(union, get(key)); + } + return union == null ? Optional.empty() : Optional.of(union); + } + public PossibleAdducts getAllAdducts() { return values().stream().flatMap(it -> it.getAdducts().stream()).collect(Collectors.collectingAndThen(Collectors.toSet(), PossibleAdducts::new)); } @@ -68,34 +236,41 @@ public boolean hasAdducts() { return values().stream().anyMatch(it -> !it.isEmpty()); } - public boolean containsKey(Source key) { - return containsKey(key.name()); - } - - public PossibleAdducts get(Source key) { - return get(key.name()); + public Set getSources() { + return keySet(); } public PossibleAdducts put(@NotNull Source key, @NotNull PossibleAdducts value) { - return put(key.name(), value); + if (key == Source.MS1_PREPROCESSOR) + return super.put(key, ensureMS1PreprocessorAllowsToAddFallbackAdducts(value)); + else + return super.put(key, value); } - public Set getSourceStrings() { - //todo ElementFilter: Do we require String keys for flexibility or can be change it to the Source enum? - return Collections.unmodifiableSet(keySet()); - } - - public Set getSources() { - Set sourceSet = new HashSet<>(); - for (String key : keySet()) { - try { - Source source = Source.valueOf(key); - sourceSet.add(source); - } catch (IllegalArgumentException e) { - sourceSet.add(Source.UNSPECIFIED_SOURCE); + private PossibleAdducts ensureMS1PreprocessorAllowsToAddFallbackAdducts(@NotNull PossibleAdducts value) { + //this ensures we add fallback adducts, because we are never certain enough with MS1_PREPROCESSOR + //both, empty list or unknown adduct indicate to add fallback + if (!value.isEmpty() && !value.hasUnknownIontype()) { + Set adducts = new HashSet<>(value.getAdducts()); + //to indicate that MS1_PREPROCESSOR is always combined with fallback adducts + if (adducts.stream().anyMatch(PrecursorIonType::isPositive)) { + adducts.add(PrecursorIonType.unknown(1)); + } else { + adducts.add(PrecursorIonType.unknown(-1)); } + return new PossibleAdducts(adducts); + } else { + return value; } - return sourceSet; + } + + /** + * + * @param source + * @return true if {@link DetectedAdducts} contain adducts from a {@link Source} that is more important than the queried. + */ + public boolean hasMoreImportantSource(Source source) { + return keySet().stream().anyMatch(k -> k.compareTo(source)<0); } @Override @@ -120,8 +295,16 @@ public static DetectedAdducts fromString(String json) { String[] keyValue = mapping.replace("}", "").split("\\s*(:|->)\\s*\\{\\s*"); PossibleAdducts val = keyValue.length > 1 ? Arrays.stream(keyValue[1].split(",")).filter(Objects::nonNull).filter(s -> !s.isBlank()).map(PrecursorIonType::parsePrecursorIonType).flatMap(Optional::stream) .collect(Collectors.collectingAndThen(Collectors.toSet(), PossibleAdducts::new)) : new PossibleAdducts(); - if (keyValue.length > 0) - ads.put(keyValue[0], val); + if (keyValue.length > 0){ + Source source; + try { + source = Source.valueOf(keyValue[0]); + } catch (IllegalArgumentException e) { + source = Source.UNSPECIFIED_SOURCE; + } + ads.put(source, val); + } + } return ads; diff --git a/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/ms/Ms2Experiment.java b/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/ms/Ms2Experiment.java index 31c6a5cf9b..404be20504 100644 --- a/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/ms/Ms2Experiment.java +++ b/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/ms/Ms2Experiment.java @@ -60,21 +60,6 @@ public interface Ms2Experiment extends Cloneable, AnnotatedWithDefaults getDetectedAdducts() { - final PossibleAdducts adducts = getAnnotation(DetectedAdducts.class).flatMap(DetectedAdducts::getAdducts) - .orElseGet(PossibleAdducts::empty); - return adducts.isEmpty() ? Optional.empty() : Optional.of(adducts); - } - /** * Returns a list of detected adducts, if no adducts are found a fallback list will be returned @@ -83,12 +68,9 @@ default Optional getDetectedAdducts() { */ @NotNull default PossibleAdducts getPossibleAdductsOrFallback() { - final PossibleAdducts detectedOrFallback = getDetectedAdducts(). - orElseGet(() -> getAnnotation(AdductSettings.class).map(as -> as.getFallback(getPrecursorIonType().getCharge())).map(PossibleAdducts::new).orElseGet(() -> new PossibleAdducts(PropertyManager.DEFAULTS.createInstanceWithDefaults(AdductSettings.class).getFallback(getPrecursorIonType().getCharge()))) - ); - final PossibleAdducts detected = getAnnotation(AdductSettings.class).map(as -> PossibleAdducts.union(detectedOrFallback, as.getEnforced(getPrecursorIonType().getCharge()))).orElse(detectedOrFallback); - - return detected; + return getAnnotation(DetectedAdducts.class).orElseGet(DetectedAdducts::new).getDetectedAdductsAndOrFallback( + getAnnotation(AdductSettings.class).orElse(PropertyManager.DEFAULTS.createInstanceWithDefaults(AdductSettings.class)), getPrecursorIonType().getCharge() + ); } diff --git a/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/ms/PossibleAdducts.java b/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/ms/PossibleAdducts.java index 8fe817cdb5..61157596a7 100644 --- a/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/ms/PossibleAdducts.java +++ b/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/ms/PossibleAdducts.java @@ -84,6 +84,10 @@ public boolean hasNegativeCharge() { return false; } + public boolean hasUnknownIontype() { + return value.stream().anyMatch(PrecursorIonType::isIonizationUnknown); + } + public PossibleAdducts keepOnlyPositive() { return new PossibleAdducts(value.stream().filter(it -> it.getCharge() > 1).collect(Collectors.toSet())); } diff --git a/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/ms/ft/model/AdductSettings.java b/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/ms/ft/model/AdductSettings.java index 4ba1a9b5d5..2762404599 100644 --- a/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/ms/ft/model/AdductSettings.java +++ b/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/ms/ft/model/AdductSettings.java @@ -40,21 +40,23 @@ public class AdductSettings implements Ms2ExperimentAnnotation { @NotNull protected final Set enforced; @NotNull protected final Set detectable; @NotNull protected final Set fallback; + @NotNull protected final boolean prioritizeInputFileAdducts; /** * @param enforced Enforced ion modes that are always considered. * @param detectable Detectable ion modes which are only considered if there is an indication in the MS1 scan (e.g. correct mass delta). * @param fallback Fallback ion modes which are considered if the auto detection did not find any indication for an ion mode. + * @param prioritizeInputFileAdducts Adducts specified in the input file are used as is independent of what enforced/detectable/fallback adducts are set. */ @DefaultInstanceProvider - public static AdductSettings newInstance(@DefaultProperty(propertyKey = "enforced") Set enforced, @DefaultProperty(propertyKey = "detectable") Set detectable, @DefaultProperty(propertyKey = "fallback") Set fallback) { - return new AdductSettings(enforced, detectable, fallback); + public static AdductSettings newInstance(@DefaultProperty(propertyKey = "enforced") Set enforced, @DefaultProperty(propertyKey = "detectable") Set detectable, @DefaultProperty(propertyKey = "fallback") Set fallback, @DefaultProperty(propertyKey = "prioritizeInputFileAdducts") boolean prioritizeInputFileAdducts) { + return new AdductSettings(enforced, detectable, fallback, prioritizeInputFileAdducts); } public AdductSettings withEnforced(Set enforced) { final HashSet enforcedJoin = new HashSet<>(this.enforced); enforcedJoin.addAll(enforced); - return new AdductSettings(enforcedJoin, detectable, fallback); + return new AdductSettings(enforcedJoin, detectable, fallback, prioritizeInputFileAdducts); } @@ -62,12 +64,14 @@ protected AdductSettings() { this.enforced = new HashSet<>(); this.detectable = new HashSet<>(); this.fallback = new HashSet<>(); + this.prioritizeInputFileAdducts = true; } - public AdductSettings(@NotNull Set enforced, @NotNull Set detectable, @NotNull Set fallback) { + public AdductSettings(@NotNull Set enforced, @NotNull Set detectable, @NotNull Set fallback, @NotNull boolean prioritizeInputFileAdducts) { this.enforced = enforced; this.detectable = detectable; this.fallback = fallback; + this.prioritizeInputFileAdducts = prioritizeInputFileAdducts; } public Set getEnforced() { @@ -124,4 +128,8 @@ private Set availableAdductsForIonizations(Set e.g. for checking want went wrong /** - * apply {@link de.unijena.bioinf.ChemistryBase.chem.FormulaConstraints} (chemical alphabet / element filter, and filters such as {@link de.unijena.bioinf.ChemistryBase.chem.utils.ValenceFilter}) to formulas from bottum-up search + * apply chemical alphabet / element filter specified via {@link de.unijena.bioinf.ChemistryBase.chem.FormulaConstraints} to formulas from bottum-up search. + * Filters such as {@link de.unijena.bioinf.ChemistryBase.chem.utils.ValenceFilter}) are always applied */ public final boolean applyFormulaConstraintsToBottomUp; diff --git a/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/utils/DataQuality.java b/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/utils/DataQuality.java index 449c62ff88..e94b1c594f 100644 --- a/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/utils/DataQuality.java +++ b/chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/utils/DataQuality.java @@ -1,16 +1,9 @@ package de.unijena.bioinf.ChemistryBase.utils; -public enum DataQuality { +import io.swagger.v3.oas.annotations.media.Schema; - /** - * Features with the lowest quality are probably not real signals. We would rather delete then. - */ - LOWEST(0f), - /** - * Features with the lowest quality that have dependencies (e.g. are adducts or isotopes) should not - * be deleted for this reason. - */ - LOWEST_WITH_DEPENDENCIES(0f), +@Schema(enumAsRef = true, nullable = true) +public enum DataQuality { /** * If a subcategory is "NOT_APPLICABLE" then it should not be considered for quality check @@ -18,6 +11,11 @@ public enum DataQuality { */ NOT_APPLICABLE(0f), + /** + * Features with the lowest quality are probably not real signals. We would rather delete then. + */ + LOWEST(0f), + /** * features with bad quality are maybe real features. Still, we don't want to list them * in our feature list by default. diff --git a/chemistry_base/src/main/resources/de.unijena.bioinf.ms.defaults/chemistry_base.auto.config b/chemistry_base/src/main/resources/de.unijena.bioinf.ms.defaults/chemistry_base.auto.config index 5b128e68f2..61b4652179 100644 --- a/chemistry_base/src/main/resources/de.unijena.bioinf.ms.defaults/chemistry_base.auto.config +++ b/chemistry_base/src/main/resources/de.unijena.bioinf.ms.defaults/chemistry_base.auto.config @@ -54,6 +54,9 @@ AdductSettings.detectable = [M+H]+,[M+K]+,[M+Na]+,[M+H-H2O]+,[M+H-H4O2]+,[M+NH4] # Fallback ion modes which are considered if the auto detection did not find any indication for # an ion mode. AdductSettings.fallback = [M+H]+,[M-H]-,[M+Na]+,[M+K]+ +# Adducts specified in the input file are used as is independent of what +# enforced/detectable/fallback adducts are set. +AdductSettings.prioritizeInputFileAdducts = true # This configuration holds a set of neutral formulas to be used as candidates for SIRIUS. # The formulas may be provided by the user, from a database or from the input file. diff --git a/confidence_score_predict_oss/src/main/java/de/unijena/bioinf/confidence_score/ExpansiveSearchConfidenceMode.java b/confidence_score_predict_oss/src/main/java/de/unijena/bioinf/confidence_score/ExpansiveSearchConfidenceMode.java index 013c589f6a..b54931345b 100644 --- a/confidence_score_predict_oss/src/main/java/de/unijena/bioinf/confidence_score/ExpansiveSearchConfidenceMode.java +++ b/confidence_score_predict_oss/src/main/java/de/unijena/bioinf/confidence_score/ExpansiveSearchConfidenceMode.java @@ -3,6 +3,7 @@ import de.unijena.bioinf.ChemistryBase.utils.DescriptiveOptions; import de.unijena.bioinf.ms.annotations.Ms2ExperimentAnnotation; import de.unijena.bioinf.ms.properties.DefaultProperty; +import io.swagger.v3.oas.annotations.media.Schema; /** * Expansive search parameters. @@ -12,7 +13,7 @@ public class ExpansiveSearchConfidenceMode implements Ms2ExperimentAnnotation { - + @Schema(enumAsRef = true, name = "ConfidenceMode") public enum Mode implements DescriptiveOptions { OFF("No expansive search is performed."), EXACT("Use confidence score in exact mode: Only molecular structures identical to the true structure should count as correct identification."),//todo NewWorkflow: change description -> should be javadoc instead to allow for automatic usage in config files, CLI and GUI diff --git a/document-storage/src/main/java/de/unijena/bioinf/storage/db/nosql/Database.java b/document-storage/src/main/java/de/unijena/bioinf/storage/db/nosql/Database.java index 803ad709ca..e911bcdd72 100644 --- a/document-storage/src/main/java/de/unijena/bioinf/storage/db/nosql/Database.java +++ b/document-storage/src/main/java/de/unijena/bioinf/storage/db/nosql/Database.java @@ -92,8 +92,12 @@ default int modify(Object primaryKey, Class clazz, Consumer modifier) Iterable find(Filter filter, Class clazz, String sortField, SortOrder sortOrder, String... withOptionalFields) throws IOException; + Iterable find(Filter filter, Class clazz, String[] sortFields, SortOrder[] sortOrders, String... withOptionalFields) throws IOException; + Iterable find(Filter filter, Class clazz, long offset, int pageSize, String sortField, SortOrder sortOrder, String... withOptionalFields) throws IOException; + Iterable find(Filter filter, Class clazz, long offset, int pageSize, String[] sortFields, SortOrder[] sortOrders, String... withOptionalFields) throws IOException; + Iterable find(String collectionName, Filter filter, String... withOptionalFields) throws IOException; Iterable find(String collectionName, Filter filter, long offset, int pageSize, String... withOptionalFields) throws IOException; @@ -108,8 +112,12 @@ default int modify(Object primaryKey, Class clazz, Consumer modifier) Iterable findAll(Class clazz, String sortField, SortOrder sortOrder, String... withOptionalFields) throws IOException; + Iterable findAll(Class clazz, String[] sortFields, SortOrder[] sortOrders, String... withOptionalFields) throws IOException; + Iterable findAll(Class clazz, long offset, int pageSize, String sortField, SortOrder sortOrder, String... withOptionalFields) throws IOException; + Iterable findAll(Class clazz, long offset, int pageSize, String[] sortFields, SortOrder[] sortOrders, String... withOptionalFields) throws IOException; + Iterable findAll(String collectionName, String... withOptionalFields) throws IOException; Iterable findAll(String collectionName, long offset, int pageSize, String... withOptionalFields) throws IOException; @@ -271,10 +279,18 @@ default Stream findStr(Filter filter, Class clazz, String sortField, S return StreamSupport.stream(find(filter, clazz, sortField, sortOrder, withOptionalFields).spliterator(), false); } + default Stream findStr(Filter filter, Class clazz, String[] sortFields, SortOrder[] sortOrders, String... withOptionalFields) throws IOException { + return StreamSupport.stream(find(filter, clazz, sortFields, sortOrders, withOptionalFields).spliterator(), false); + } + default Stream findStr(Filter filter, Class clazz, long offset, int pageSize, String sortField, SortOrder sortOrder, String... withOptionalFields) throws IOException { return StreamSupport.stream(find(filter, clazz, offset, pageSize, sortField, sortOrder, withOptionalFields).spliterator(), false); } + default Stream findStr(Filter filter, Class clazz, long offset, int pageSize, String[] sortFields, SortOrder[] sortOrders, String... withOptionalFields) throws IOException { + return StreamSupport.stream(find(filter, clazz, offset, pageSize, sortFields, sortOrders, withOptionalFields).spliterator(), false); + } + default Stream findStr(String collectionName, Filter filter, String... withOptionalFields) throws IOException { return StreamSupport.stream(find(collectionName, filter, withOptionalFields).spliterator(), false); } @@ -303,10 +319,18 @@ default Stream findAllStr(Class clazz, String sortField, SortOrder sor return StreamSupport.stream(findAll(clazz, sortField, sortOrder, withOptionalFields).spliterator(), false); } + default Stream findAllStr(Class clazz, String[] sortFields, SortOrder[] sortOrders, String... withOptionalFields) throws IOException { + return StreamSupport.stream(findAll(clazz, sortFields, sortOrders, withOptionalFields).spliterator(), false); + } + default Stream findAllStr(Class clazz, long offset, int pageSize, String sortField, SortOrder sortOrder, String... withOptionalFields) throws IOException { return StreamSupport.stream(findAll(clazz, offset, pageSize, sortField, sortOrder, withOptionalFields).spliterator(), false); } + default Stream findAllStr(Class clazz, long offset, int pageSize, String[] sortFields, SortOrder[] sortOrders, String... withOptionalFields) throws IOException { + return StreamSupport.stream(findAll(clazz, offset, pageSize, sortFields, sortOrders, withOptionalFields).spliterator(), false); + } + default Stream findAllStr(String collectionName, String... withOptionalFields) throws IOException { return StreamSupport.stream(findAll(collectionName, withOptionalFields).spliterator(), false); } diff --git a/document-storage/src/main/java/de/unijena/bioinf/storage/db/nosql/nitrite/NitriteDatabase.java b/document-storage/src/main/java/de/unijena/bioinf/storage/db/nosql/nitrite/NitriteDatabase.java index a32bb35e9c..5efee53f39 100644 --- a/document-storage/src/main/java/de/unijena/bioinf/storage/db/nosql/nitrite/NitriteDatabase.java +++ b/document-storage/src/main/java/de/unijena/bioinf/storage/db/nosql/nitrite/NitriteDatabase.java @@ -479,6 +479,18 @@ private RecordStream maybeProjectDocuments(String collectionName, @Nul return CustomDocumentStream.of(cursor).project(omittedFields); } + private FindOptions translateSort(String[] sortFields, SortOrder[] sortOrder) { + if (sortFields.length == sortOrder.length && sortFields.length > 0) { + FindOptions options = FindOptions.orderBy(sortFields[0], (sortOrder[0] == SortOrder.ASCENDING) ? org.dizitart.no2.common.SortOrder.Ascending : org.dizitart.no2.common.SortOrder.Descending); + for (int i = 1; i < sortFields.length; i++) { + options.thenOrderBy(sortFields[i], (sortOrder[i] == SortOrder.ASCENDING) ? org.dizitart.no2.common.SortOrder.Ascending : org.dizitart.no2.common.SortOrder.Descending); + } + return options; + } else { + return new FindOptions(); + } + } + private Iterable doFind(Class clazz, @Nullable Filter filter, @Nullable FindOptions findOptions) throws IOException { if (filter != null && findOptions != null) { return getRepository(clazz).find(getFilter(filter), findOptions); @@ -671,11 +683,22 @@ public Iterable find(Filter filter, Class clazz, String sortField, Sor return this.read(() -> maybeProject(clazz, filter, FindOptions.orderBy(sortField, (sortOrder == SortOrder.ASCENDING) ? org.dizitart.no2.common.SortOrder.Ascending : org.dizitart.no2.common.SortOrder.Descending), withOptionalFields)); } + @Override + public Iterable find(Filter filter, Class clazz, String[] sortFields, SortOrder[] sortOrders, String... withOptionalFields) throws IOException { + return this.read(() -> maybeProject(clazz, filter, translateSort(sortFields, sortOrders), withOptionalFields)); + } + @Override public Iterable find(Filter filter, Class clazz, long offset, int pageSize, String sortField, SortOrder sortOrder, String... withOptionalFields) throws IOException { return this.read(() -> maybeProject(clazz, filter, FindOptions.orderBy(sortField, (sortOrder == SortOrder.ASCENDING) ? org.dizitart.no2.common.SortOrder.Ascending : org.dizitart.no2.common.SortOrder.Descending).skip(offset).limit(pageSize), withOptionalFields)); } + @Override + public Iterable find(Filter filter, Class clazz, long offset, int pageSize, String[] sortFields, SortOrder[] sortOrders, String... withOptionalFields) throws IOException { + FindOptions options = translateSort(sortFields, sortOrders); + return this.read(() -> maybeProject(clazz, filter, options.skip(offset).limit(pageSize), withOptionalFields)); + } + @Override public Iterable find(String collectionName, Filter filter, String... withOptionalFields) throws IOException { return this.read(() -> maybeProjectDocuments(collectionName, filter, null, withOptionalFields)); @@ -711,11 +734,22 @@ public Iterable findAll(Class clazz, String sortField, SortOrder sortO return this.read(() -> maybeProject(clazz, null, FindOptions.orderBy(sortField, (sortOrder == SortOrder.ASCENDING) ? org.dizitart.no2.common.SortOrder.Ascending : org.dizitart.no2.common.SortOrder.Descending), withOptionalFields)); } + @Override + public Iterable findAll(Class clazz, String[] sortFields, SortOrder[] sortOrders, String... withOptionalFields) throws IOException { + return this.read(() -> maybeProject(clazz, null, translateSort(sortFields, sortOrders), withOptionalFields)); + } + @Override public Iterable findAll(Class clazz, long offset, int pageSize, String sortField, SortOrder sortOrder, String... withOptionalFields) throws IOException { return this.read(() -> maybeProject(clazz, null, FindOptions.orderBy(sortField, (sortOrder == SortOrder.ASCENDING) ? org.dizitart.no2.common.SortOrder.Ascending : org.dizitart.no2.common.SortOrder.Descending).skip(offset).limit(pageSize), withOptionalFields)); } + @Override + public Iterable findAll(Class clazz, long offset, int pageSize, String[] sortFields, SortOrder[] sortOrders, String... withOptionalFields) throws IOException { + FindOptions options = translateSort(sortFields, sortOrders); + return this.read(() -> maybeProject(clazz, null, options.skip(offset).limit(pageSize), withOptionalFields)); + } + @Override public Iterable findAll(String collectionName, String... withOptionalFields) throws IOException { return this.read(() -> maybeProjectDocuments(collectionName, null, null, withOptionalFields)); diff --git a/fragmentation_tree/fragmentation_tree_construction/src/main/java/de/unijena/bioinf/sirius/plugins/BottomUpSearch.java b/fragmentation_tree/fragmentation_tree_construction/src/main/java/de/unijena/bioinf/sirius/plugins/BottomUpSearch.java index f4f354cad4..6162bc95fa 100644 --- a/fragmentation_tree/fragmentation_tree_construction/src/main/java/de/unijena/bioinf/sirius/plugins/BottomUpSearch.java +++ b/fragmentation_tree/fragmentation_tree_construction/src/main/java/de/unijena/bioinf/sirius/plugins/BottomUpSearch.java @@ -16,10 +16,7 @@ import java.io.IOException; import java.io.ObjectInputStream; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; +import java.util.*; import java.util.stream.Collectors; import java.util.zip.GZIPInputStream; @@ -86,10 +83,14 @@ private static List generateDecompositions(ProcessedInput input, Whiteset ws = input.getAnnotationOrThrow(Whiteset.class); FormulaSearchSettings formulaSearchSettings = input.getAnnotation(FormulaSearchSettings.class, FormulaSearchSettings::bottomUpOnly); + final FormulaConstraints formulaConstraints = input.getAnnotationOrThrow(FormulaConstraints.class); + PossibleAdducts possibleAdducts = input.getAnnotationOrThrow(PossibleAdducts.class); if (formulaSearchSettings.applyFormulaConstraintsToBottomUp) { - final FormulaConstraints formulaConstraints = input.getAnnotationOrThrow(FormulaConstraints.class); - PossibleAdducts possibleAdducts = input.getAnnotationOrThrow(PossibleAdducts.class); formulas = Whiteset.filterMeasuredFormulas(formulas, formulaConstraints, possibleAdducts.getAdducts().stream().filter(x->x.isSupportedForFragmentationTreeComputation()).collect(Collectors.toSet())); + } else { + //filter need to be applied because later we cannot differientiate formulas with and without applied filter anyways but need this to select adducts. + FormulaConstraints formulaConstraintsWithAllElements = new FormulaConstraints(formulaConstraints.getChemicalAlphabet().extend(formulas.stream().flatMap(mf -> mf.elements().stream()).filter(Objects::isNull).distinct().toArray(Element[]::new)), formulaConstraints.getFilters()); + formulas = Whiteset.filterMeasuredFormulas(formulas, formulaConstraintsWithAllElements, possibleAdducts.getAdducts().stream().filter(x->x.isSupportedForFragmentationTreeComputation()).collect(Collectors.toSet())); } Whiteset bottomUpWs = Whiteset.ofMeasuredFormulas(formulas, BottomUpSearch.class); diff --git a/io/src/main/java/de/unijena/bioinf/babelms/mgf/MgfParser.java b/io/src/main/java/de/unijena/bioinf/babelms/mgf/MgfParser.java index f2fe8ba90b..324ff42b71 100644 --- a/io/src/main/java/de/unijena/bioinf/babelms/mgf/MgfParser.java +++ b/io/src/main/java/de/unijena/bioinf/babelms/mgf/MgfParser.java @@ -379,7 +379,9 @@ private synchronized Ms2Experiment parseInst(URI source) throws IOException { exp.getMs2Spectra().add(new MutableMs2Spectrum(spec.spectrum)); } if (exp.getPrecursorIonType() == null || exp.getPrecursorIonType().isUnknownNoCharge()) { - exp.setPrecursorIonType(spec.ionType); + exp.setPrecursorIonType(PrecursorIonType.unknown(spec.ionType.getCharge())); + if (!spec.ionType.isIonizationUnknown()) + exp.setAnnotation(DetectedAdducts.class, DetectedAdducts.singleton(DetectedAdducts.Source.INPUT_FILE, spec.ionType)); } if (spec.inchi != null && spec.inchi.startsWith("InChI=")) { exp.setAnnotation(InChI.class, newInChI(null, spec.inchi)); diff --git a/io/src/main/java/de/unijena/bioinf/babelms/ms/JenaMsParser.java b/io/src/main/java/de/unijena/bioinf/babelms/ms/JenaMsParser.java index ef1c46a6c6..e5fc10eb05 100644 --- a/io/src/main/java/de/unijena/bioinf/babelms/ms/JenaMsParser.java +++ b/io/src/main/java/de/unijena/bioinf/babelms/ms/JenaMsParser.java @@ -449,7 +449,9 @@ private void flushCompound() { exp.setPrecursorIonType(PrecursorIonType.unknown(charge)); } } else { - exp.setPrecursorIonType(ionization); + exp.setPrecursorIonType(PrecursorIonType.unknown(ionization.getCharge())); + if (!ionization.isIonizationUnknown()) + exp.setAnnotation(DetectedAdducts.class, DetectedAdducts.singleton(DetectedAdducts.Source.INPUT_FILE, ionization)); } exp.setMs1Spectra(ms1spectra); exp.setMs2Spectra(ms2spectra); diff --git a/lcms2/src/main/java/de/unijena/bioinf/lcms/LCMSProcessing.java b/lcms2/src/main/java/de/unijena/bioinf/lcms/LCMSProcessing.java index 121a164c62..b3e98e9253 100644 --- a/lcms2/src/main/java/de/unijena/bioinf/lcms/LCMSProcessing.java +++ b/lcms2/src/main/java/de/unijena/bioinf/lcms/LCMSProcessing.java @@ -2,6 +2,8 @@ import com.google.common.collect.Range; import de.unijena.bioinf.ChemistryBase.jobs.SiriusJobs; +import de.unijena.bioinf.ChemistryBase.math.Statistics; +import de.unijena.bioinf.ChemistryBase.ms.Deviation; import de.unijena.bioinf.ChemistryBase.ms.utils.SimpleSpectrum; import de.unijena.bioinf.jjobs.BasicJJob; import de.unijena.bioinf.jjobs.JJob; @@ -31,6 +33,8 @@ import de.unijena.bioinf.ms.persistence.model.core.feature.AlignedFeatures; import de.unijena.bioinf.ms.persistence.model.core.run.*; import it.unimi.dsi.fastutil.doubles.DoubleArrayList; +import it.unimi.dsi.fastutil.doubles.DoubleList; +import it.unimi.dsi.fastutil.floats.FloatArrayList; import it.unimi.dsi.fastutil.ints.Int2ObjectMap; import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; import it.unimi.dsi.fastutil.ints.IntArrayList; @@ -172,6 +176,7 @@ public AlignmentBackbone align() throws IOException { samples.get(0).getPolarity(), samples.size() // TODO: better use sample idx 0? ); + makeMergeStatistics(merged, alignmentBackbone.getSamples()); samples.add(merged); sampleByIdx.put(merged.getUid(), merged); alignmentBackbone = alignmentStrategy.align(merged, alignmentBackbone, Arrays.asList(alignmentBackbone.getSamples()), alignmentAlgorithm, alignmentScorerFull); @@ -183,6 +188,26 @@ public AlignmentBackbone align() throws IOException { return alignmentBackbone; } + private void makeMergeStatistics(ProcessedSample merged, ProcessedSample[] samples) { + FloatArrayList ms2NoiseLevels = new FloatArrayList(); + FloatArrayList ppmsWithinTraces = new FloatArrayList(), ppmsBetweenTraces = new FloatArrayList(); + FloatArrayList absWithinTraces = new FloatArrayList(), absBetweenTraces = new FloatArrayList(); + for (ProcessedSample sample : samples) { + SampleStats s = sample.getStorage().getStatistics(); + ms2NoiseLevels.add(s.getMs2NoiseLevel()); + ppmsWithinTraces.add((float)s.getMs1MassDeviationWithinTraces().getPpm()); + ppmsBetweenTraces.add((float)s.getMinimumMs1MassDeviationBetweenTraces().getPpm()); + absWithinTraces.add((float)s.getMs1MassDeviationWithinTraces().getAbsolute()); + absBetweenTraces.add((float)s.getMinimumMs1MassDeviationBetweenTraces().getAbsolute()); + } + SampleStats statistics = new SampleStats( + new float[0], (float)Statistics.robustAverage(ms2NoiseLevels.toFloatArray()), + new Deviation(Statistics.robustAverage(ppmsWithinTraces.toFloatArray()),Statistics.robustAverage(absWithinTraces.toFloatArray())), + new Deviation(Statistics.robustAverage(ppmsBetweenTraces.toFloatArray()),Statistics.robustAverage(absBetweenTraces.toFloatArray())) + ); + merged.getStorage().setStatistics(statistics); + } + public ProcessedSample merge(AlignmentBackbone backbone) { ProcessedSample merged = this.samples.get(samples.size()-1); mergeStrategy.merge(merged, backbone); @@ -241,20 +266,28 @@ private SampleStatistics collectFinalStatistics(ProcessedSample merged, Alignmen * this code relies on lazy evaluation of streams. If that is not the case we might have a huge memory peak here :/ */ DoubleArrayList fwhms = new DoubleArrayList(); - DoubleArrayList ms2Noise = new DoubleArrayList(); + DoubleArrayList heightDividedByfwhms = new DoubleArrayList(); siriusDatabaseAdapter.getImportedFeatureStream(true) .filter(x -> x.getRunId() == merged.getRun().getRunId()) .filter(x -> x.getMSData().get().getIsotopePattern() != null && x.getMSData().get().getIsotopePattern().size() >= 2) .forEach(x->{ fwhms.add(x.getFwhm().doubleValue()); + heightDividedByfwhms.add(x.getApexIntensity()/x.getFwhm()); }); SampleStats st = merged.getStorage().getStatistics(); fwhms.sort(null); + DoubleList ms2NoiseLevel = new DoubleArrayList(); + for (ProcessedSample sample : alignmentBackbone.getSamples()) { + SampleStats statistics = sample.getStorage().getStatistics(); + ms2NoiseLevel.add(statistics.getMs2NoiseLevel()); + } return new SampleStatistics( st.getMs1MassDeviationWithinTraces(), alignmentBackbone.getStatistics().getExpectedMassDeviationBetweenSamples(), alignmentBackbone.getStatistics().getExpectedRetentionTimeDeviation(), - fwhms.getDouble(fwhms.size()/2), (int)alignmentBackbone.getStatistics().getMedianNumberOfAlignments(), + fwhms.getDouble(fwhms.size()/2), + heightDividedByfwhms.getDouble(heightDividedByfwhms.size()/2), + (int)alignmentBackbone.getStatistics().getMedianNumberOfAlignments(), st.ms2NoiseLevel() ); diff --git a/lcms2/src/main/java/de/unijena/bioinf/lcms/adducts/AdductNetwork.java b/lcms2/src/main/java/de/unijena/bioinf/lcms/adducts/AdductNetwork.java index 8da4a438a9..2418137e0e 100644 --- a/lcms2/src/main/java/de/unijena/bioinf/lcms/adducts/AdductNetwork.java +++ b/lcms2/src/main/java/de/unijena/bioinf/lcms/adducts/AdductNetwork.java @@ -2,6 +2,7 @@ import com.google.common.collect.Range; import de.unijena.bioinf.ChemistryBase.algorithm.BinarySearch; +import de.unijena.bioinf.ChemistryBase.chem.PrecursorIonType; import de.unijena.bioinf.ChemistryBase.chem.RetentionTime; import de.unijena.bioinf.ChemistryBase.jobs.SiriusJobs; import de.unijena.bioinf.ChemistryBase.ms.Deviation; @@ -35,6 +36,7 @@ public class AdductNetwork { protected AdductNode[] rtOrderedNodes; protected AdductManager adductManager; List> subgraphs = new ArrayList<>(); + List singletons = new ArrayList<>(); protected Deviation deviation; ProjectSpaceTraceProvider provider; @@ -170,6 +172,7 @@ protected NetworkResult compute() throws Exception { if (!visited.get(rtOrderedNodes[k].index)) { List nodes = spread(rtOrderedNodes[k], visited); if (nodes.size()>1) subgraphs.add(nodes); + else singletons.add(nodes.get(0)); } } } @@ -227,6 +230,15 @@ protected Object compute() throws Exception { } })); } + jobs.add(manager.submitJob(new BasicJJob() { + @Override + protected Object compute() throws Exception { + for (AdductNode node : singletons) { + updateRoutine.accept(singletonCompound(node)); + } + return ""; + } + })); jobs.forEach(JJob::takeResult); } @@ -372,6 +384,11 @@ private Compound extractCompound(Map assignments, maxRt = Math.max(maxRt, n.features.getRetentionTime().getEndTime()); } rt /= intens; + if (compound.size()==1) { + // damned, have to look closer into that. But if a compound cannot be resolved properly, then + // adduct detection is likely wrong + compound.get(0).features.getDetectedAdducts().add(DetectedAdduct.builder().adduct(PrecursorIonType.unknown(compound.get(0).getFeature().getCharge())).score(0.5d).source(de.unijena.bioinf.ChemistryBase.ms.DetectedAdducts.Source.LCMS_ALIGN).build()); + } return new Compound( 0, new RetentionTime(minRt, maxRt, rt), @@ -382,6 +399,22 @@ private Compound extractCompound(Map assignments, ); } + public Compound singletonCompound(AdductNode n) { + AlignedFeatures f = n.features; + if (f.getDetectedAdducts()==null) { + f.setDetectedAdducts(DetectedAdducts.singleton(de.unijena.bioinf.ChemistryBase.ms.DetectedAdducts.Source.LCMS_ALIGN, PrecursorIonType.unknown(f.getCharge()))); + } + return new Compound( + 0, + f.getRetentionTime(), + null, + f.getName(), + n.hasMsMs, + new ArrayList<>(List.of(f)), + new ArrayList<>() + ); + } + private String pp(SimpleSpectrum spec) { StringBuilder buf = new StringBuilder(); SimpleSpectrum xs =Spectrums.getNormalizedSpectrum(spec, Normalization.Max); diff --git a/lcms2/src/main/java/de/unijena/bioinf/lcms/merge/MergeTracesWithoutGapFilling.java b/lcms2/src/main/java/de/unijena/bioinf/lcms/merge/MergeTracesWithoutGapFilling.java index 117a484667..1f73558f69 100644 --- a/lcms2/src/main/java/de/unijena/bioinf/lcms/merge/MergeTracesWithoutGapFilling.java +++ b/lcms2/src/main/java/de/unijena/bioinf/lcms/merge/MergeTracesWithoutGapFilling.java @@ -11,6 +11,7 @@ import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; +import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.Arrays; @@ -59,12 +60,12 @@ protected Object compute() throws Exception { jobs.clear(); sample.inactive(); } - System.out.println("Average number of Alignmments in backbone: " + alignment.getStatistics().getAverageNumberOfAlignments()); - System.out.println("Median number of Alignmments in backbone: " + alignment.getStatistics().getMedianNumberOfAlignments()); + LoggerFactory.getLogger(MergeTracesWithoutGapFilling.class).debug("Average number of Alignmments in backbone: " + alignment.getStatistics().getAverageNumberOfAlignments()); + LoggerFactory.getLogger(MergeTracesWithoutGapFilling.class).debug("Median number of Alignmments in backbone: " + alignment.getStatistics().getMedianNumberOfAlignments()); for (int k=0; k < mergedNoiseLevelPerScan.length; ++k) { mergedNoiseLevelPerScan[k] /= alignment.getStatistics().getAverageNumberOfAlignments(); } - merged.getStorage().setStatistics(SampleStats.builder().noiseLevelPerScan(mergedNoiseLevelPerScan).ms2NoiseLevel(0f).ms1MassDeviationWithinTraces(new Deviation(10)).minimumMs1MassDeviationBetweenTraces(new Deviation(10)).build()); + merged.getStorage().setStatistics(merged.getStorage().getStatistics().withNoiseLevelPerScan(mergedNoiseLevelPerScan)); } @@ -79,9 +80,6 @@ private void prepareRects(ProcessedSample merged, AlignmentBackbone alignment) { MergeStorage mergeStorage = merged.getStorage().getMergeStorage(); TraceRectangleMap rectangleMap = mergeStorage.getRectangleMap(); for (MoI m : merged.getStorage().getAlignmentStorage()) { - if (m.isIsotopePeak()) { - System.err.println("That shouldn't happen, right?"); - } final AlignedMoI moi = (AlignedMoI)m; Rect r = new Rect(moi.getRect()); r.minMz = (float)moi.getMz(); diff --git a/lcms2/src/main/java/de/unijena/bioinf/lcms/msms/MergedSpectrum.java b/lcms2/src/main/java/de/unijena/bioinf/lcms/msms/MergedSpectrum.java index 17d3c29025..debb9d082a 100644 --- a/lcms2/src/main/java/de/unijena/bioinf/lcms/msms/MergedSpectrum.java +++ b/lcms2/src/main/java/de/unijena/bioinf/lcms/msms/MergedSpectrum.java @@ -65,7 +65,7 @@ public void merge(MsMsQuerySpectrum msms) { headers[headers.length-1] = msms.header; sampleIds.add(msms.sampleId); - this.chimericPollution += weight * (msms.ms1Intensity/msms.chimericPollution); + this.chimericPollution += weight * (msms.chimericPollution/msms.ms1Intensity); } public double getChimericPollutionRatio() { diff --git a/lcms2/src/main/java/de/unijena/bioinf/lcms/projectspace/PickFeaturesAndImportToSirius.java b/lcms2/src/main/java/de/unijena/bioinf/lcms/projectspace/PickFeaturesAndImportToSirius.java index 89aef6572f..56e603dac4 100644 --- a/lcms2/src/main/java/de/unijena/bioinf/lcms/projectspace/PickFeaturesAndImportToSirius.java +++ b/lcms2/src/main/java/de/unijena/bioinf/lcms/projectspace/PickFeaturesAndImportToSirius.java @@ -207,6 +207,7 @@ private void extractMs1(DbMapper dbMapper, ProcessedSample mergedSample, MergedT if (featuresParent[i]!=null) { isotopePatterns[i] = new SimpleMutableSpectrum(); isotopePatterns[i].addPeak(mergedTraceParent.averagedMz(), 1d); + featuresParent[i].setHasMs1(true); } } @@ -464,6 +465,7 @@ public void assignMs2(DbMapper dbMapper, ProcessedSample mergedSample, MergedTra merged = Spectrums.mergeSpectra(new Deviation(10), true, false, Arrays.stream(msn).map(MergedMSnSpectrum::getPeaks).toArray(SimpleSpectrum[]::new)); } else merged = msn[0].getPeaks(); feature.getMSData().get().setMergedMSnSpectrum(Spectrums.getNormalizedSpectrum(merged, Normalization.Sum)); + feature.setHasMsMs(true); } } diff --git a/lcms2/src/main/java/de/unijena/bioinf/lcms/quality/CheckAlignmentQuality.java b/lcms2/src/main/java/de/unijena/bioinf/lcms/quality/CheckAlignmentQuality.java index a464125730..f265228b9b 100644 --- a/lcms2/src/main/java/de/unijena/bioinf/lcms/quality/CheckAlignmentQuality.java +++ b/lcms2/src/main/java/de/unijena/bioinf/lcms/quality/CheckAlignmentQuality.java @@ -41,7 +41,7 @@ public void addToReport(QualityReport report, MergedLCMSRun run, AlignedFeatures double[] ints = feature.getFeatures().stream().flatMap(List::stream).mapToDouble(AbstractFeature::getApexIntensity).toArray(); double max = Arrays.stream(ints).max().orElse(1d); int intensiveFeatures = (int)Arrays.stream(ints).filter(x->x>max*0.33d).count(); - if (intensiveFeatures >= minimumNumber) { + if (intensiveFeatures < minimumNumber) { peakQuality.getItems().add(new QualityReport.Item( "feature alignment is very imbalanced with only " + intensiveFeatures + " have a high apex intensity.", DataQuality.BAD, QualityReport.Weight.MINOR )); diff --git a/lcms2/src/main/java/de/unijena/bioinf/lcms/quality/CheckMs2Quality.java b/lcms2/src/main/java/de/unijena/bioinf/lcms/quality/CheckMs2Quality.java index b01cc9a687..5de62bee0c 100644 --- a/lcms2/src/main/java/de/unijena/bioinf/lcms/quality/CheckMs2Quality.java +++ b/lcms2/src/main/java/de/unijena/bioinf/lcms/quality/CheckMs2Quality.java @@ -27,8 +27,8 @@ public void addToReport(QualityReport report, MergedLCMSRun run, AlignedFeatures // 1. number of peaks above 10*noise level int numberOfIntensivePeaks = 0, bareMinimum = 0; - final double threshold = run.getSampleStats().getMs2NoiseLevel()*10; - final double threshold2 = run.getSampleStats().getMs2NoiseLevel()*5; + final double threshold = run.getSampleStats().getMs2NoiseLevel()*4; + final double threshold2 = run.getSampleStats().getMs2NoiseLevel()*2; findSpec: for (MergedMSnSpectrum spec : spectra) { int sofar=numberOfIntensivePeaks, bsofar=bareMinimum; @@ -38,7 +38,7 @@ public void addToReport(QualityReport report, MergedLCMSRun run, AlignedFeatures if (peaks.getMzAt(k) >= spec.getMergedPrecursorMz()-6) break; if (peaks.getIntensityAt(k) >= threshold) { - ++numberOfIntensivePeaks; + ++numberOfIntensivePeaks;++bareMinimum; if (numberOfIntensivePeaks>=8) break findSpec; } else if (peaks.getIntensityAt(k) >= threshold2) ++bareMinimum; } diff --git a/lcms2/src/main/java/de/unijena/bioinf/lcms/quality/CheckPeakQuality.java b/lcms2/src/main/java/de/unijena/bioinf/lcms/quality/CheckPeakQuality.java index e055aa4564..f7fbf674cd 100644 --- a/lcms2/src/main/java/de/unijena/bioinf/lcms/quality/CheckPeakQuality.java +++ b/lcms2/src/main/java/de/unijena/bioinf/lcms/quality/CheckPeakQuality.java @@ -19,15 +19,15 @@ public void addToReport(QualityReport report, MergedLCMSRun run, AlignedFeatures QualityReport.Category peakQuality = new QualityReport.Category(QualityReport.PEAK_QUALITY); // 1. Peak should be clearly above noise level final double ratio = feature.getSnr(); - if (ratio >= 20) { + if (ratio >= 50) { peakQuality.getItems().add(new QualityReport.Item( String.format(Locale.US, "peak intensity is %.1f-fold above noise intensity", ratio), DataQuality.GOOD, QualityReport.Weight.MAJOR )); - } else if (ratio >= 5) { + } else if (ratio >= 10) { peakQuality.getItems().add(new QualityReport.Item( String.format(Locale.US, "peak intensity is %.1f-fold above noise intensity", ratio), DataQuality.DECENT, QualityReport.Weight.MAJOR )); - } else if (ratio > 1) { + } else if (ratio > 2) { peakQuality.getItems().add(new QualityReport.Item( String.format(Locale.US, "peak intensity is %.1f-fold above noise intensity", ratio), DataQuality.BAD, QualityReport.Weight.MAJOR )); @@ -37,20 +37,35 @@ public void addToReport(QualityReport report, MergedLCMSRun run, AlignedFeatures )); } - // 2. Peak should be not too wide - final double medianPeakWidthOfIntensivePeaks = run.getSampleStats().getMedianPeakWidthInSeconds(); - final double minimumPeakWidth = (retentionTimes[1]-retentionTimes[0])*6; - - if (feature.getFwhm() < minimumPeakWidth) { + // 2. Peak should be not too short + final int dp = feature.getTraceRef().getEnd()-feature.getTraceRef().getStart()+1; + if (dp <= 4) { + peakQuality.getItems().add(new QualityReport.Item( + String.format(Locale.US, "peak has too few data points (%d datapoints in merged trace)", dp), DataQuality.BAD, QualityReport.Weight.MAJOR + )); + } else if (dp <= 8) { peakQuality.getItems().add(new QualityReport.Item( - String.format(Locale.US, "peak has too few data points (fwhm is %.1f seconds)", feature.getFwhm()), DataQuality.BAD, QualityReport.Weight.MAJOR + String.format(Locale.US, "peak has few data points (%d datapoints in merged trace)", dp), DataQuality.DECENT, QualityReport.Weight.MAJOR )); } else { - if (feature.getFwhm() <= 3*medianPeakWidthOfIntensivePeaks) { + peakQuality.getItems().add(new QualityReport.Item( + String.format(Locale.US, "peak has many data points (%d datapoints in merged trace)", dp), DataQuality.GOOD, QualityReport.Weight.MAJOR + )); + } + // 2. Peak should be not too wide + final double medianSquareness = run.getSampleStats().getMedianHeightDividedByPeakWidth(); + final double medianPeakWidth = run.getSampleStats().getMedianPeakWidthInSeconds(); + final double squareness = feature.getApexIntensity()/feature.getFwhm(); + { + if (squareness >= medianSquareness) { peakQuality.getItems().add(new QualityReport.Item( - String.format(Locale.US, "proper peak width of %.1f seconds", feature.getFwhm()), DataQuality.GOOD, QualityReport.Weight.MAJOR + String.format(Locale.US, "peak has proper shape.", feature.getFwhm()), DataQuality.GOOD, QualityReport.Weight.MAJOR )); - } else { + } else if (feature.getFwhm() >= medianPeakWidth && squareness >= medianSquareness*0.5) { + peakQuality.getItems().add(new QualityReport.Item( + String.format(Locale.US, "peak is too wide with fwhm is %.1f seconds", feature.getFwhm()), DataQuality.DECENT, QualityReport.Weight.MAJOR + )); + } else if (feature.getFwhm() >= medianPeakWidth) { peakQuality.getItems().add(new QualityReport.Item( String.format(Locale.US, "peak is too wide with fwhm is %.1f seconds", feature.getFwhm()), DataQuality.BAD, QualityReport.Weight.MAJOR )); @@ -74,22 +89,26 @@ public void addToReport(QualityReport report, MergedLCMSRun run, AlignedFeatures final MergedTrace trace = traceProvider.getMergeTrace(feature).orElse(null); if (trace!=null) { float start = trace.getIntensities().getFloat(feature.getTraceRef().getStart()); - float end = trace.getIntensities().getFloat(feature.getTraceRef().getStart()); + float end = trace.getIntensities().getFloat(feature.getTraceRef().getEnd()); double ratioStart = feature.getApexIntensity()/ start; double ratioEnd = feature.getApexIntensity() / end; boolean goodStart = (start <= 2*noise && ratioStart>3) || ratioStart>=10; boolean goodEnd = (end <= 2*noise && ratioEnd>3) || ratioEnd>=10; if (goodStart && goodEnd) { peakQuality.getItems().add(new QualityReport.Item( - "peak has clearly defined start and end points.", DataQuality.GOOD, QualityReport.Weight.MINOR + "peak has clearly defined start and end points.", DataQuality.GOOD, QualityReport.Weight.MAJOR )); } else if (goodStart) { peakQuality.getItems().add(new QualityReport.Item( - "the right edge of the peak is not clearly defined", DataQuality.BAD, QualityReport.Weight.MINOR + "the right edge of the peak is not clearly defined", DataQuality.BAD, QualityReport.Weight.MAJOR )); } else if (goodEnd) { peakQuality.getItems().add(new QualityReport.Item( - "the left edge of the peak is not clearly defined", DataQuality.BAD, QualityReport.Weight.MINOR + "the left edge of the peak is not clearly defined", DataQuality.BAD, QualityReport.Weight.MAJOR + )); + } else { + peakQuality.getItems().add(new QualityReport.Item( + "the peak has no clearly defined edges.", DataQuality.BAD, QualityReport.Weight.MAJOR )); } } diff --git a/lcms2/src/main/java/de/unijena/bioinf/lcms/quality/QualityAssessment.java b/lcms2/src/main/java/de/unijena/bioinf/lcms/quality/QualityAssessment.java index 2ddeedb131..ee08bcd792 100644 --- a/lcms2/src/main/java/de/unijena/bioinf/lcms/quality/QualityAssessment.java +++ b/lcms2/src/main/java/de/unijena/bioinf/lcms/quality/QualityAssessment.java @@ -36,18 +36,31 @@ public void addToReport(QualityReport report, MergedLCMSRun run, AlignedFeatures ++count; } } + float c = report.getCategories().get(QualityReport.MS2_QUALITY).getOverallQuality().getScore(); if (count==0) { report.setOverallQuality(DataQuality.NOT_APPLICABLE); } else { score /= count; - if (score >= 2.66) { + if (score >= 2.5 && c>=2) { report.setOverallQuality(DataQuality.GOOD); - } else if (score >= 1.66) { + } else if (score >= 1.6) { report.setOverallQuality(DataQuality.DECENT); - } else if (score >= 0.66) { + } else if (score >= 0.6) { report.setOverallQuality(DataQuality.BAD); } else report.setOverallQuality(DataQuality.LOWEST); } + // we add some additional rules: + // - whenever we have a good MS2 we KEEP the data. ALWAYS. + // - if almost everything is bad (including the spectra) we downgrade the quality level to lowest + // this essentially marks a compound for "deletion". + float c2 = report.getCategories().get(QualityReport.PEAK_QUALITY).getOverallQuality().getScore(); + float c3 = report.getCategories().get(QualityReport.ADDUCT_QUALITY).getOverallQuality().getScore(); + float c4 = report.getCategories().get(QualityReport.ISOTOPE_QUALITY).getOverallQuality().getScore(); + if (report.getOverallQuality().getScore() > 1 || c >= 2 || Math.max(c2,Math.max(c3,c4))>=3 || (c2+c3+c4) >= 5 ) { + if (report.getOverallQuality().getScore()<0) report.setOverallQuality(DataQuality.BAD); + } else { + report.setOverallQuality(DataQuality.LOWEST); + } } @@ -58,30 +71,29 @@ public void addToReport(QualityReport report, MergedLCMSRun run, AlignedFeatures public static DataQuality scoreCategory(QualityReport.Category category) { if (category.getItems().isEmpty()) return DataQuality.NOT_APPLICABLE; float minScore= DataQuality.GOOD.getScore(), maxScore = DataQuality.LOWEST.getScore(); - float tiebreaker = 0f; int tiebreakcount=0; + float flexibleScore = 0, count = 0; for (QualityReport.Item item : category.getItems()) { if (item.getWeight()== QualityReport.Weight.MAJOR) { minScore = Math.min(minScore, item.getQuality().getScore()); maxScore = Math.max(maxScore, item.getQuality().getScore()); + flexibleScore += item.getQuality().getScore(); + count+=1; } else { - tiebreakcount++; - tiebreaker += item.getQuality().getScore(); + flexibleScore += item.getQuality().getScore()*0.2f; + count += 0.2f; } } - if (tiebreakcount==0) tiebreaker = 1; - else tiebreaker/=tiebreakcount; - float score = (maxScore+minScore)/2f; - if (score <= 0) { - return DataQuality.LOWEST; - } else if (score < 1) { - return tiebreaker >= 1 ? DataQuality.BAD : DataQuality.LOWEST; - } else if (score == 1) {return DataQuality.BAD; - } else if (score < 2) { - return tiebreaker >= 2 ? DataQuality.DECENT : DataQuality.BAD; - } else if (score == 2) { - return DataQuality.DECENT; - } else if (score < 3) { - return tiebreaker >= 3 ? DataQuality.GOOD : DataQuality.DECENT; - } else return DataQuality.GOOD; + float score = Math.min(minScore+1, flexibleScore/count); + if (count==0) { + return DataQuality.NOT_APPLICABLE; + } else { + if (score >= 2.66) { + return DataQuality.GOOD; + } else if (score >= 1.66) { + return DataQuality.DECENT; + } else if (score >= 0.66) { + return DataQuality.BAD; + } else return DataQuality.LOWEST; + } } } diff --git a/lcms2/src/main/java/de/unijena/bioinf/lcms/statistics/MedianNoiseCollectionStrategy.java b/lcms2/src/main/java/de/unijena/bioinf/lcms/statistics/MedianNoiseCollectionStrategy.java index f8c0257f03..906b4de24d 100644 --- a/lcms2/src/main/java/de/unijena/bioinf/lcms/statistics/MedianNoiseCollectionStrategy.java +++ b/lcms2/src/main/java/de/unijena/bioinf/lcms/statistics/MedianNoiseCollectionStrategy.java @@ -52,8 +52,11 @@ public void processMs2(Ms2SpectrumHeader header, SimpleSpectrum ms2Spectrum) { // ignore spectra that are almost empty return; } - int perc = Math.min(xs.length-10, Math.max(60, (int)(0.8*xs.length))); + int perc = Math.min(xs.length-10, (int)(0.75*xs.length)); double noiseLevel = Quickselect.quickselectInplace(xs, 0, xs.length, perc); + double minimumIntensity = Arrays.stream(xs).min().orElse(noiseLevel); + // I have the feeling the noise level is a bit too high, so I incorporate the minimum noise to it. + noiseLevel = (2*noiseLevel+minimumIntensity)/3d; ms2Noise.add((float)noiseLevel); } diff --git a/lcms2/src/main/java/de/unijena/bioinf/lcms/statistics/SampleStats.java b/lcms2/src/main/java/de/unijena/bioinf/lcms/statistics/SampleStats.java index 735bf411ef..b36d3fed7f 100644 --- a/lcms2/src/main/java/de/unijena/bioinf/lcms/statistics/SampleStats.java +++ b/lcms2/src/main/java/de/unijena/bioinf/lcms/statistics/SampleStats.java @@ -18,7 +18,7 @@ public class SampleStats { * Assigns a noise level to each spectrum in the sample * The noise level is the intensity where we think all peaks below it are noise */ - private float[] noiseLevelPerScan; + @With private float[] noiseLevelPerScan; /** * Assigns a noise level to all MS/MS in the sample diff --git a/lcms2/src/main/java/de/unijena/bioinf/lcms/trace/Rect.java b/lcms2/src/main/java/de/unijena/bioinf/lcms/trace/Rect.java index c52425fbde..a09e5e1267 100644 --- a/lcms2/src/main/java/de/unijena/bioinf/lcms/trace/Rect.java +++ b/lcms2/src/main/java/de/unijena/bioinf/lcms/trace/Rect.java @@ -73,7 +73,7 @@ public String toString() { } public Spatial toKey() { - return new MVSpatialKey(id, minMz, maxMz, minRt, maxRt); + return new MVSpatialKey(id, minMz-Rect.FLOATING_POINT_TOLERANCE, maxMz+Rect.FLOATING_POINT_TOLERANCE, minRt, maxRt); } public Rect recalibrateRt(RecalibrationFunction rtRecalibration) { diff --git a/ms_persistence_oss/ms_persistence_document_storage_oss/src/main/java/de/unijena/bioinf/ms/persistence/storage/StorageUtils.java b/ms_persistence_oss/ms_persistence_document_storage_oss/src/main/java/de/unijena/bioinf/ms/persistence/storage/StorageUtils.java index 7ff80acf71..d90c0c3596 100644 --- a/ms_persistence_oss/ms_persistence_document_storage_oss/src/main/java/de/unijena/bioinf/ms/persistence/storage/StorageUtils.java +++ b/ms_persistence_oss/ms_persistence_document_storage_oss/src/main/java/de/unijena/bioinf/ms/persistence/storage/StorageUtils.java @@ -35,7 +35,8 @@ import de.unijena.bioinf.ms.persistence.model.core.spectrum.MSData; import de.unijena.bioinf.ms.persistence.model.core.spectrum.MergedMSnSpectrum; import de.unijena.bioinf.ms.properties.ParameterConfig; -import de.unijena.bioinf.sirius.Ms2Preprocessor; +import de.unijena.bioinf.sirius.Sirius; +import de.unijena.bioinf.sirius.SiriusCachedFactory; import lombok.extern.slf4j.Slf4j; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; @@ -47,6 +48,14 @@ @Slf4j public class StorageUtils { + private static SiriusCachedFactory SIRIUS_PROVIDER; + + public synchronized static SiriusCachedFactory siriusProvider() { + if (SIRIUS_PROVIDER == null) + SIRIUS_PROVIDER = new SiriusCachedFactory(); + return SIRIUS_PROVIDER; + } + public static Ms2Experiment toMs2Experiment(@NotNull AlignedFeatures feature, @NotNull ParameterConfig config) { MSData spectra = feature.getMSData().orElseThrow(); @@ -76,6 +85,10 @@ public static Ms2Experiment toMs2Experiment(@NotNull AlignedFeatures feature, @N } public static AlignedFeatures fromMs2Experiment(Ms2Experiment exp) { + Sirius sirius = siriusProvider().sirius(exp.getAnnotation(MsInstrumentation.class) + .orElse(MsInstrumentation.Unknown) + .getRecommendedProfile()); + SimpleSpectrum mergedMs1 = exp.getMergedMs1Spectrum() != null ? (SimpleSpectrum) exp.getMergedMs1Spectrum() : Spectrums.mergeSpectra(exp.getMs1Spectra()); @@ -90,13 +103,20 @@ public static AlignedFeatures fromMs2Experiment(Ms2Experiment exp) { MSData.MSDataBuilder builder = MSData.builder() .isotopePattern(isotopePattern != null ? new IsotopePattern(isotopePattern, IsotopePattern.Type.MERGED_APEX) : null) .mergedMs1Spectrum(mergedMs1) - .mergedMSnSpectrum(Spectrums.from(new Ms2Preprocessor().preprocess(exp).getMergedPeaks())) + .mergedMSnSpectrum(Spectrums.from(sirius.getMs2Preprocessor().preprocess(exp).getMergedPeaks())) .msnSpectra(exp.getMs2Spectra().stream().map(StorageUtils::msnSpectrumFrom).toList()); MSData msData = builder.build(); - de.unijena.bioinf.ChemistryBase.ms.DetectedAdducts det =exp.getAnnotation(de.unijena.bioinf.ChemistryBase.ms.DetectedAdducts.class).orElse(new de.unijena.bioinf.ChemistryBase.ms.DetectedAdducts()); + + //detect adducts for the first time + if (exp.getMs1Spectra() != null && !exp.getMs1Spectra().isEmpty()) + sirius.getMs1Preprocessor().preprocess(exp); + de.unijena.bioinf.ChemistryBase.ms.DetectedAdducts det = exp.getAnnotation(de.unijena.bioinf.ChemistryBase.ms.DetectedAdducts.class).orElse(new de.unijena.bioinf.ChemistryBase.ms.DetectedAdducts()); if (!exp.getPrecursorIonType().isIonizationUnknown()) { - det.put(de.unijena.bioinf.ChemistryBase.ms.DetectedAdducts.Source.UNSPECIFIED_SOURCE, new PossibleAdducts(exp.getPrecursorIonType())); + PossibleAdducts inputFileAdducts = det.get(de.unijena.bioinf.ChemistryBase.ms.DetectedAdducts.Source.INPUT_FILE); + PossibleAdducts ionTypeAdducts = new PossibleAdducts(exp.getPrecursorIonType()); + det.put(de.unijena.bioinf.ChemistryBase.ms.DetectedAdducts.Source.INPUT_FILE, + inputFileAdducts == null ? ionTypeAdducts : PossibleAdducts.union(inputFileAdducts, ionTypeAdducts)); } int charge = exp.getPrecursorIonType().getCharge(); @@ -109,9 +129,8 @@ public static AlignedFeatures fromMs2Experiment(Ms2Experiment exp) { Feature feature = Feature.builder() .dataSource(DataSource.fromPath(exp.getSourceString())) .retentionTime(exp.getAnnotation(RetentionTime.class).orElse(null)) - //todo @MEL: wir habe im modell kein MZ of interest, aber letztendlich ist das einfach average mz oder? Gibt ja nur ein window keine wirkliche mzofinterest .averageMass(exp.getMs2Spectra().stream().mapToDouble(Ms2Spectrum::getPrecursorMz).average().orElse(Double.NaN)) - .charge((byte)exp.getPrecursorIonType().getCharge()) + .charge((byte) charge) //todo @MEL ich habe die mal als nullable wrapper objekte gemacht, da wir diese info fuer peak list daten nicht wirklich haben. // .apexIntensity() // .apexMass() @@ -123,9 +142,8 @@ public static AlignedFeatures fromMs2Experiment(Ms2Experiment exp) { alignedFeature.setExternalFeatureId(exp.getFeatureId()); alignedFeature.setMolecularFormula(exp.getMolecularFormula()); alignedFeature.setDetectedAdducts(StorageUtils.fromMs2ExpAnnotation(det)); - //todo how do we want to handle detected adducts without losing scores? - if (exp.hasAnnotation(de.unijena.bioinf.ChemistryBase.ms.DetectedAdducts.class)) - log.warn("Experiment '" + exp.getName() + "' contains Detected adducts that which will not preserved during import!"); + alignedFeature.setHasMs1(msData.getMergedMs1Spectrum() != null); + alignedFeature.setHasMsMs((msData.getMsnSpectra() != null && !msData.getMsnSpectra().isEmpty()) || (msData.getMergedMSnSpectrum() != null)); return alignedFeature; } @@ -144,7 +162,7 @@ public static DetectedAdducts fromMs2ExpAnnotation(@Nullable de.unijena.bioinf.C if (adducts == null) return null; List featureAdducts = adducts.entrySet().stream().flatMap(e -> e.getValue().getAdducts().stream() - .map(p -> DetectedAdduct.builder().adduct(p).source(de.unijena.bioinf.ChemistryBase.ms.DetectedAdducts.Source.valueOf(e.getKey())).build())) + .map(p -> DetectedAdduct.builder().adduct(p).source(e.getKey()).build())) .toList(); DetectedAdducts featureDetectedAdducts = new DetectedAdducts(); diff --git a/ms_persistence_oss/ms_persistence_model_oss/src/main/java/de/unijena/bioinf/ms/persistence/model/core/feature/AbstractAlignedFeatures.java b/ms_persistence_oss/ms_persistence_model_oss/src/main/java/de/unijena/bioinf/ms/persistence/model/core/feature/AbstractAlignedFeatures.java index a97e06e05a..5fe2b288bb 100644 --- a/ms_persistence_oss/ms_persistence_model_oss/src/main/java/de/unijena/bioinf/ms/persistence/model/core/feature/AbstractAlignedFeatures.java +++ b/ms_persistence_oss/ms_persistence_model_oss/src/main/java/de/unijena/bioinf/ms/persistence/model/core/feature/AbstractAlignedFeatures.java @@ -52,6 +52,9 @@ public Optional> getFeatures() { return Optional.ofNullable(features); } + private boolean hasMs1; + private boolean hasMsMs; + @JsonIgnore @ToString.Exclude protected MSData msData; diff --git a/ms_persistence_oss/ms_persistence_model_oss/src/main/java/de/unijena/bioinf/ms/persistence/model/core/run/SampleStatistics.java b/ms_persistence_oss/ms_persistence_model_oss/src/main/java/de/unijena/bioinf/ms/persistence/model/core/run/SampleStatistics.java index b4ae28c6cf..4d74624f84 100644 --- a/ms_persistence_oss/ms_persistence_model_oss/src/main/java/de/unijena/bioinf/ms/persistence/model/core/run/SampleStatistics.java +++ b/ms_persistence_oss/ms_persistence_model_oss/src/main/java/de/unijena/bioinf/ms/persistence/model/core/run/SampleStatistics.java @@ -24,13 +24,16 @@ public class SampleStatistics { private int medianNumberOfAlignments; + private double medianHeightDividedByPeakWidth; + protected SampleStatistics() { } - public SampleStatistics(Deviation ms1MassDeviationsWithinSamples, Deviation ms1MassDeviationsBetweenSamples, double retentionTimeDeviationsInSeconds, double medianPeakWidthInSeconds, int medianNumberOfAlignments, double ms2NoiseLevel) { + public SampleStatistics(Deviation ms1MassDeviationsWithinSamples, Deviation ms1MassDeviationsBetweenSamples, double retentionTimeDeviationsInSeconds, double medianPeakWidthInSeconds, double medianHeightDividedByPeakWidth, int medianNumberOfAlignments, double ms2NoiseLevel) { this.ms1MassDeviationsWithinSamples = ms1MassDeviationsWithinSamples; this.ms1MassDeviationsBetweenSamples = ms1MassDeviationsBetweenSamples; this.retentionTimeDeviationsInSeconds = retentionTimeDeviationsInSeconds; + this.medianHeightDividedByPeakWidth = medianHeightDividedByPeakWidth; this.ms2NoiseLevel = ms2NoiseLevel; this.medianPeakWidthInSeconds =medianPeakWidthInSeconds; this.medianNumberOfAlignments = medianNumberOfAlignments; diff --git a/preprocessing/src/main/java/de/unijena/bioinf/sirius/Ms1Preprocessor.java b/preprocessing/src/main/java/de/unijena/bioinf/sirius/Ms1Preprocessor.java index 42271f5c1a..04963c27bc 100644 --- a/preprocessing/src/main/java/de/unijena/bioinf/sirius/Ms1Preprocessor.java +++ b/preprocessing/src/main/java/de/unijena/bioinf/sirius/Ms1Preprocessor.java @@ -33,7 +33,6 @@ import de.unijena.bioinf.sirius.deisotope.TargetedIsotopePatternDetection; import de.unijena.bioinf.sirius.elementdetection.DeepNeuralNetworkElementDetector; import de.unijena.bioinf.sirius.elementdetection.ElementDetection; -import de.unijena.bioinf.sirius.iondetection.AdductDetection; import de.unijena.bioinf.sirius.iondetection.DetectIonsFromMs1; import de.unijena.bioinf.sirius.merging.Ms1Merging; import de.unijena.bioinf.sirius.validation.Ms1Validator; @@ -51,7 +50,7 @@ public class Ms1Preprocessor implements SiriusPreprocessor { protected Ms2ExperimentValidator validator = new Ms1Validator(); protected ElementDetection elementDetection = new DeepNeuralNetworkElementDetector(); - protected AdductDetection ionModeDetection = new DetectIonsFromMs1(); + protected DetectIonsFromMs1 ms1IonAdductDetection = new DetectIonsFromMs1(); protected Ms1Merging ms1Merging = new Ms1Merging(); protected IsotopePatternDetection deisotoper = new TargetedIsotopePatternDetection(); @@ -119,7 +118,7 @@ public void adductDetection(ProcessedInput pinput) { //todo we need to write to the original data here. to keep the predicted adducts. maybe this should be part of the IDResult instead???? final Ms2Experiment exp = pinput.getOriginalInput(); - // if input file contains an adduct annotation, disable adduct detection + //todo this is contained of historical reasons. Remove if absolutely sure that this is not necessary anymore. Currently it still seems to be needed for detectElements() in the compute panel if (!exp.getPrecursorIonType().isIonizationUnknown()) { pinput.setAnnotation(PossibleAdducts.class, new PossibleAdducts(exp.getPrecursorIonType())); return; @@ -128,11 +127,12 @@ public void adductDetection(ProcessedInput pinput) { if (pinput.hasAnnotation(PossibleAdducts.class)) return; final DetectedAdducts detAdds = exp.computeAnnotationIfAbsent(DetectedAdducts.class, DetectedAdducts::new); - if (!detAdds.containsKey(DetectedAdducts.Source.MS1_PREPROCESSOR) && !detAdds.containsKey(DetectedAdducts.Source.LCMS_ALIGN)) { + if (!detAdds.containsKey(DetectedAdducts.Source.MS1_PREPROCESSOR) && !detAdds.hasMoreImportantSource(DetectedAdducts.Source.MS1_PREPROCESSOR)) { + //Source.MS1_PREPROCESSOR shall only be present for peaklist data for which adducts are not specified in input file final int charge = exp.getPrecursorIonType().getCharge(); final AdductSettings settings = pinput.getAnnotationOrDefault(AdductSettings.class); - final PossibleAdducts ionModes = ionModeDetection.detect(pinput, settings.getDetectable(charge)); + final PossibleAdducts ionModes = ms1IonAdductDetection.detect(pinput, settings.getDetectable(charge)); if (ionModes != null) detAdds.put(DetectedAdducts.Source.MS1_PREPROCESSOR, new PossibleAdducts(ionModes.getAdducts())); diff --git a/settings.gradle b/settings.gradle index 7159097c28..ba1b488b43 100644 --- a/settings.gradle +++ b/settings.gradle @@ -77,7 +77,6 @@ include ':fingerid_project_space_oss' include ':web_service_oss' include ':web_service_oss:sirius_web_client' - include ':web_service_oss:amqp_client_oss' include ':web_service_oss:web_core_oss' include ':web_service_oss:rest_client_oss' include ':web_service_oss:client_auth_oss' diff --git a/sirius_api/src/main/java/de/unijena/bioinf/sirius/Sirius.java b/sirius_api/src/main/java/de/unijena/bioinf/sirius/Sirius.java index c04a9bcabe..a92b54d164 100644 --- a/sirius_api/src/main/java/de/unijena/bioinf/sirius/Sirius.java +++ b/sirius_api/src/main/java/de/unijena/bioinf/sirius/Sirius.java @@ -701,7 +701,7 @@ private List resolveAdductIfPossible(IdentificationResult List resolvedTrees = new ArrayList<>(); for (PrecursorIonType pa : possibleAdducts.getAdducts(ionType.getIonization())) { - boolean checkElementFilter = false; + boolean checkElementFilter = false; //should be sufficient to check when adding to whiteset. since filter is not applied to all formulas. E.g. bottom up may be excluded. if(isValidNeutralFormula(measuredMF, pa, ws, constraints, checkElementFilter)) { resolvedTrees.add(new IdentificationResult(new IonTreeUtils().treeToNeutralTree(tree, pa, true), siriusScore)); } @@ -709,7 +709,7 @@ private List resolveAdductIfPossible(IdentificationResult if (resolvedTrees.size()==0) { //should not happen anymore LoggerFactory.getLogger(getClass()).warn("No valid adducts found for ionization " + ionType.getIonization() + " for compound " + experiment.getName()); - return Collections.singletonList(identificationResult); + //return Collections.singletonList(identificationResult); } return resolvedTrees; diff --git a/web_service_oss/amqp_client_oss/build.gradle b/web_service_oss/amqp_client_oss/build.gradle deleted file mode 100644 index 9f9d5e65b6..0000000000 --- a/web_service_oss/amqp_client_oss/build.gradle +++ /dev/null @@ -1,5 +0,0 @@ - -dependencies { - api project(':web_service_oss:web_core_oss') - api project(':rabbitmq-support') -} diff --git a/web_service_oss/amqp_client_oss/src/main/java/de/unijena/bioinf/ms/amqp/client/AmqpClient.java b/web_service_oss/amqp_client_oss/src/main/java/de/unijena/bioinf/ms/amqp/client/AmqpClient.java deleted file mode 100644 index 228568c229..0000000000 --- a/web_service_oss/amqp_client_oss/src/main/java/de/unijena/bioinf/ms/amqp/client/AmqpClient.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * - * This file is part of the SIRIUS library for analyzing MS and MS/MS data - * - * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, - * Chair of Bioinformatics, Friedrich-Schilller University. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see - */ - -package de.unijena.bioinf.ms.amqp.client; - -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.rabbitmq.client.AMQP; -import com.rabbitmq.client.Channel; -import com.rabbitmq.client.DefaultConsumer; -import com.rabbitmq.client.Envelope; -import de.unijena.bioinf.ChemistryBase.jobs.SiriusJobs; -import de.unijena.bioinf.ChemistryBase.utils.IOFunctions; -import de.unijena.bioinf.rest.NetUtils; -import de.unijena.bioinf.fingerid.connection_pooling.PooledConnection; -import de.unijena.bioinf.jjobs.BasicJJob; -import de.unijena.bioinf.ms.amqp.client.jobs.AmqpWebJJob; -import de.unijena.bioinf.ms.amqp.client.jobs.JobMessage; -import de.unijena.bioinf.ms.properties.PropertyManager; -import de.unijena.bioinf.rabbitmq.RabbitMqChannelPool; -import org.jetbrains.annotations.NotNull; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicLong; -import java.util.function.Function; - -public class AmqpClient { - protected static AtomicLong MESSAGE_COUNTER = new AtomicLong(0); - - public static long JOB_TIME_OUT = PropertyManager.getLong("de.unijena.bioinf.fingerid.web.job.timeout", 1000L * 60L * 60L); //default 1h - protected static final String REGISTER_PREFIX = PropertyManager.getProperty("de.unijena.bioinf.ms.sirius.amqp.prefix.register", null, "register"); - protected static final String CLIENT_EXCHANGE = PropertyManager.getProperty("", null, "sirius.client.in"); - protected static final String CLIENT_TYPE = PropertyManager.getProperty("de.unijena.bioinf.ms.sirius.amqp.client", null, "sirius"); - protected final String clientID; //aka session key to allow multiple queues per userid -> multiple clients - protected final String userID; - protected final RabbitMqChannelPool channelPool; - - protected final List consumerThreads = new ArrayList<>(); - protected final String consumerQ; - protected final String registerRKey; - protected final int threads; - - - protected final Map> messageJobs = new ConcurrentHashMap<>(); - - - public AmqpClient(@NotNull RabbitMqChannelPool channelPool, @NotNull String userID, @NotNull String clientID, int consumerThreads) { - this.channelPool = channelPool; - this.userID = userID; - this.clientID = clientID; - this.threads = consumerThreads; - this.consumerQ = CLIENT_TYPE + "." + userID + "." + clientID; - this.registerRKey = REGISTER_PREFIX + "." + CLIENT_TYPE + "." + userID + "." + clientID; - - } - - public void startConsuming(long timeout) { - NetUtils.tryAndWaitAsJJob(() -> { - try (PooledConnection connection = channelPool.orderConnection()) { - connection.connection.basicPublish(CLIENT_EXCHANGE, registerRKey, defaultProps().build(), new byte[]{}); - connection.connection.waitForConfirms(5000); - } - }, timeout); - - AMQP.Queue.DeclareOk ok = NetUtils.tryAndWaitAsJJob(() -> { - try (PooledConnection connection = channelPool.orderConnection()) { - return connection.connection.queueDeclarePassive(consumerQ); - } - }, timeout); - - if (!ok.getQueue().equals(consumerQ)) - throw new IllegalArgumentException("Illegal q name returned from Server"); - - LoggerFactory.getLogger(getClass()).info("Successfully created callback queue!"); - - consumerThreads.add(NetUtils.tryAndWaitAsJJob(() -> { - final Channel channel = channelPool.orderConnection().connection; - return channel.basicConsume(consumerQ, false, - new DefaultConsumer(channel) { - @Override - public void handleDelivery(String consumerTag, Envelope envelope, - AMQP.BasicProperties properties, byte[] body) throws IOException { - long deliveryTag = envelope.getDeliveryTag(); - //handle Message, should be submitted to SIRIUS Jobs System to do unwrapping in parallel without - // having many connection or blocking them too long - SiriusJobs.getGlobalJobManager().submitJob(new AMPQCallbackJJob(consumerTag, properties, body)); - channel.basicAck(deliveryTag, false); - } - }); - - }, timeout)); - } - - public AmqpWebJJob publish(@NotNull String routingPrefix, T jacksonSerializable, @NotNull Function> jobBuilder) throws IOException { - return publish(routingPrefix, jacksonSerializable, (body) -> new ObjectMapper().writeValueAsString(body), jobBuilder); - } - - public AmqpWebJJob publish(@NotNull String routingPrefix, T body, @NotNull IOFunctions.IOFunction jsonizer, @NotNull Function> jobBuilder) throws IOException { - return publish(routingPrefix, jsonizer.apply(body), jobBuilder); - } - - public AmqpWebJJob publish(@NotNull String routingPrefix, String jsonBody, @NotNull Function> jobBuilder) throws IOException { - return publish(routingPrefix, jsonBody.getBytes(StandardCharsets.UTF_8.name()), jobBuilder); - } - - public AmqpWebJJob publish(@NotNull String routingPrefix, byte[] body, @NotNull Function> jobBuilder) throws IOException { - //MessageID is used to identify the corresponding webJJob - //The receiving Service needs to return a JobMessage with this ID. - final String messageID = routingPrefix + "." + MESSAGE_COUNTER.incrementAndGet(); - final AmqpWebJJob job = jobBuilder.apply(messageID); - assert !messageJobs.containsKey(messageID); - messageJobs.put(messageID, job); - try (PooledConnection connection = channelPool.orderConnection()) { - connection.connection.basicPublish(CLIENT_EXCHANGE, decorateRoutingPrefix(routingPrefix), - defaultProps().messageId(job.getJobId()).build(), body); - try { - if (!connection.connection.waitForConfirms(5000)) - LoggerFactory.getLogger(getClass()).warn("Could not confirm publication of Job '" + messageID + "' Jobs might not be delivered an is likely to timeout."); - } catch (TimeoutException | InterruptedException e) { - //should - LoggerFactory.getLogger(getClass()).warn("Could not confirm publication of Job: " + messageID + System.lineSeparator() + e.getMessage()); - } - return job; - } catch (InterruptedException e) { - throw new IOException(e); - } - } - - - private String decorateRoutingPrefix(String prefix) { - return prefix + "." + userID + "." + clientID; - } - - private AMQP.BasicProperties.Builder defaultProps() { - return new AMQP.BasicProperties.Builder() - .contentEncoding(StandardCharsets.UTF_8.name()) - .contentType("application/json") - .userId(userID) - .appId("SIRIUS"); - } - - public boolean isConnected() { - //todo send test message - return !consumerThreads.isEmpty(); - } - - public class AMPQCallbackJJob extends BasicJJob> { - private final AMQP.BasicProperties properties; - private final byte[] body; - private final String consumerTag; - - public AMPQCallbackJJob(String consumerTag, AMQP.BasicProperties properties, byte[] body) { - this.properties = properties; - this.body = body; - this.consumerTag = consumerTag; - } - - - @Override - protected JobMessage compute() throws Exception { - JobMessage messageJob = new ObjectMapper().readValue(body, new TypeReference<>() { - }); - AmqpWebJJob job = messageJobs.get(messageJob.getID()); - assert job.getJobId().equals(messageJob.getID()); - job.update(messageJob); - return messageJob; - } - } -} diff --git a/web_service_oss/amqp_client_oss/src/main/java/de/unijena/bioinf/ms/amqp/client/AmqpClients.java b/web_service_oss/amqp_client_oss/src/main/java/de/unijena/bioinf/ms/amqp/client/AmqpClients.java deleted file mode 100644 index 354467567a..0000000000 --- a/web_service_oss/amqp_client_oss/src/main/java/de/unijena/bioinf/ms/amqp/client/AmqpClients.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * - * This file is part of the SIRIUS library for analyzing MS and MS/MS data - * - * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, - * Chair of Bioinformatics, Friedrich-Schilller University. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see - */ -package de.unijena.bioinf.ms.amqp.client; - -import de.unijena.bioinf.ms.properties.PropertyManager; - -public class AmqpClients { - - protected static final String WORKER_QUEUE_CATEGORY = PropertyManager.getProperty("de.unijena.bioinf.sirius.amqp.type.worker", null, "worker"); - protected static final String WORKER_QUEUE_TYPE = PropertyManager.getProperty("de.unijena.bioinf.sirius.amqp.type.job", null, "job"); - protected static final String WORKER_SUFFIX = PropertyManager.getProperty("de.unijena.bioinf.sirius.amqp.suffix.worker", null, "ce"); - protected static final String DATA_QUEUE_CATEGORY = PropertyManager.getProperty("de.unijena.bioinf.sirius.amqp.type.data", null, "data"); - - - public static String jobRoutePrefix(String jobType, boolean pos) { - return WORKER_QUEUE_CATEGORY + "." + jobType + "." + (pos ? "pos" : "neg") + "." + WORKER_SUFFIX + "." + WORKER_QUEUE_TYPE; - } - - public static String dataRoutingKeyPrefix(String data, boolean pos) { - return DATA_QUEUE_CATEGORY + ".client"; - } -} diff --git a/web_service_oss/amqp_client_oss/src/main/java/de/unijena/bioinf/ms/amqp/client/jobs/AmqpWebJJob.java b/web_service_oss/amqp_client_oss/src/main/java/de/unijena/bioinf/ms/amqp/client/jobs/AmqpWebJJob.java deleted file mode 100644 index 88cbc82c84..0000000000 --- a/web_service_oss/amqp_client_oss/src/main/java/de/unijena/bioinf/ms/amqp/client/jobs/AmqpWebJJob.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * - * This file is part of the SIRIUS library for analyzing MS and MS/MS data - * - * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, - * Chair of Bioinformatics, Friedrich-Schilller University. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see - */ - -package de.unijena.bioinf.ms.amqp.client.jobs; - -import de.unijena.bioinf.ChemistryBase.utils.IOFunctions; -import de.unijena.bioinf.ms.webapi.WebJJob; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -public class AmqpWebJJob extends WebJJob { - - public AmqpWebJJob(String jobId, @NotNull IOFunctions.IOFunction outputConverter) { - this(jobId, null, outputConverter); - } - - public AmqpWebJJob(@NotNull String jobId, @Nullable I input, @NotNull IOFunctions.IOFunction outputConverter) { - super(jobId, outputConverter); - this.input = input; - } -} diff --git a/web_service_oss/amqp_client_oss/src/main/java/de/unijena/bioinf/ms/amqp/client/jobs/JobMessage.java b/web_service_oss/amqp_client_oss/src/main/java/de/unijena/bioinf/ms/amqp/client/jobs/JobMessage.java deleted file mode 100644 index fe9402b0e8..0000000000 --- a/web_service_oss/amqp_client_oss/src/main/java/de/unijena/bioinf/ms/amqp/client/jobs/JobMessage.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * - * This file is part of the SIRIUS library for analyzing MS and MS/MS data - * - * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, - * Chair of Bioinformatics, Friedrich-Schilller University. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see - */ - -package de.unijena.bioinf.ms.amqp.client.jobs; - -import com.fasterxml.jackson.databind.annotation.JsonDeserialize; -import de.unijena.bioinf.ms.webapi.JobUpdate; - - -@JsonDeserialize(using = JobMessageDeserializer.class) -public class JobMessage extends JobMessageBase implements JobUpdate { - protected D data; - - public JobMessage(JobMessageBase base, D data) { - super(data.getClass()); - this.data = data; - this.jobID = base.jobID; - this.state = base.state; - this.errorMessage = base.errorMessage; - } - - public void setData(D data) { - this.data = data; - } - - @Override - public Class getDataType() { - return (Class) dataType; - } - - public D getData() { - return data; - } - -} diff --git a/web_service_oss/amqp_client_oss/src/main/java/de/unijena/bioinf/ms/amqp/client/jobs/JobMessageBase.java b/web_service_oss/amqp_client_oss/src/main/java/de/unijena/bioinf/ms/amqp/client/jobs/JobMessageBase.java deleted file mode 100644 index 2018a3f5aa..0000000000 --- a/web_service_oss/amqp_client_oss/src/main/java/de/unijena/bioinf/ms/amqp/client/jobs/JobMessageBase.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * - * This file is part of the SIRIUS library for analyzing MS and MS/MS data - * - * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, - * Chair of Bioinformatics, Friedrich-Schilller University. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see - */ - -package de.unijena.bioinf.ms.amqp.client.jobs; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; - -@JsonIgnoreProperties(ignoreUnknown = true) -public class JobMessageBase { - public final Class dataType; - - protected String jobID; - protected Integer state; - protected String errorMessage; - - public JobMessageBase(Class dataType) { - this.dataType = dataType; - } - - public String getID() { - return jobID; - } - - public void setJobID(String jobID) { - this.jobID = jobID; - } - - public String getErrorMessage() { - return errorMessage; - } - - public void setErrorMessage(String errorMessage) { - this.errorMessage = errorMessage; - } - - public Integer getState() { - return state; - } - - public void setState(Integer state) { - this.state = state; - } -} diff --git a/web_service_oss/amqp_client_oss/src/main/java/de/unijena/bioinf/ms/amqp/client/jobs/JobMessageDeserializer.java b/web_service_oss/amqp_client_oss/src/main/java/de/unijena/bioinf/ms/amqp/client/jobs/JobMessageDeserializer.java deleted file mode 100644 index 99b8fffee7..0000000000 --- a/web_service_oss/amqp_client_oss/src/main/java/de/unijena/bioinf/ms/amqp/client/jobs/JobMessageDeserializer.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * - * This file is part of the SIRIUS library for analyzing MS and MS/MS data - * - * Copyright (C) 2013-2020 Kai Dührkop, Markus Fleischauer, Marcus Ludwig, Martin A. Hoffman, Fleming Kretschmer and Sebastian Böcker, - * Chair of Bioinformatics, Friedrich-Schilller University. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License along with SIRIUS. If not, see - */ - -package de.unijena.bioinf.ms.amqp.client.jobs; - -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.TreeNode; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.JsonDeserializer; -import com.fasterxml.jackson.databind.ObjectMapper; - -import java.io.IOException; - -public class JobMessageDeserializer extends JsonDeserializer> { - - @Override - public JobMessage deserialize(JsonParser p, DeserializationContext ctxt) throws IOException { - final ObjectMapper mapper = new ObjectMapper(); - final TreeNode tree = mapper.readTree(p); - final JobMessageBase baseInfo = mapper.readValue(tree.traverse(), JobMessageBase.class); - final Object data = mapper.readValue(tree.get("data").traverse(), baseInfo.dataType); - return new JobMessage<>(baseInfo, data); - } -} diff --git a/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/chemdb/WebWithCustomDatabase.java b/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/chemdb/WebWithCustomDatabase.java index 6fa4b273fe..8fdfbb46f4 100644 --- a/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/chemdb/WebWithCustomDatabase.java +++ b/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/chemdb/WebWithCustomDatabase.java @@ -154,9 +154,9 @@ public CandidateResult loadCompoundsByFormula(MolecularFormula formula, Collecti final long requestFilter = extractFilterBits(dbs).orElse(-1); if (requestFilter >= 0 || includeRestAllDb) { - final long searchFilter = includeRestAllDb ? DataSource.ALL.searchFlag : requestFilter; + final long searchFilter = includeRestAllDb ? 0 : requestFilter; result = api.applyStructureDB(searchFilter, restCache, restDb -> new CandidateResult( - restDb.lookupStructuresAndFingerprintsByFormula(formula), searchFilter, requestFilter)); + restDb.lookupStructuresAndFingerprintsByFormula(formula).stream().filter(s -> DataSource.isInAll(s.getBitset())).toList(), searchFilter, requestFilter)); } else { logger.warn("No filter for Rest DBs found bits in DB list: '" + dbs.stream().map(CustomDataSources.Source::name).collect(Collectors.joining(",")) + "'. Returning empty search list from REST DB"); result = new CandidateResult(); @@ -508,9 +508,9 @@ private Optional> getAllDbCandidatesOpt() { return Optional.of(restDbInChIs); } - + //Filtering for this happens earlier in loadCompoundsByFormula, not sure how useful this part still is public boolean containsAllDb() { - return restFilter == DataSource.ALL.searchFlag; + return restFilter == 0 || restFilter == DataSource.ALL.searchFlag; } public void merge(@NotNull CandidateResult other) { diff --git a/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/chemdb/annotations/SearchableDBAnnotation.java b/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/chemdb/annotations/SearchableDBAnnotation.java index d1a68db3a3..cf3f7f95a6 100644 --- a/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/chemdb/annotations/SearchableDBAnnotation.java +++ b/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/chemdb/annotations/SearchableDBAnnotation.java @@ -55,8 +55,6 @@ public long getDBFlag() { public static List makeDB(@NotNull String names) { if (names.equalsIgnoreCase(DataSource.ALL.name()) || names.equalsIgnoreCase(DataSource.ALL.realName())) return CustomDataSources.getAllSelectableDbs(); - if (names.equalsIgnoreCase("ALL_BUT_INSILICO") || names.equalsIgnoreCase("All but combinatorial DBs")) //just for legacy command support - return CustomDataSources.getNonInSilicoSelectableDbs(); return Arrays.stream(names.trim().split("\\s*,\\s*")) .map(CustomDataSources::getSourceFromName).filter(Objects::nonNull).distinct().toList(); diff --git a/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/chemdb/custom/CustomDatabaseImporter.java b/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/chemdb/custom/CustomDatabaseImporter.java index fde93e1dad..7fdc260fd5 100644 --- a/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/chemdb/custom/CustomDatabaseImporter.java +++ b/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/chemdb/custom/CustomDatabaseImporter.java @@ -418,8 +418,8 @@ private void downloadAndAnnotateMissingCandidates(final ConcurrentHashMap { - List cans = db.lookupStructuresAndFingerprintsByFormula(formula); + api.consumeStructureDB(0, db -> { + List cans = db.lookupStructuresAndFingerprintsByFormula(formula).stream().filter(s -> DataSource.isInAll(s.getBitset())).toList(); for (FingerprintCandidate can : cans) { checkCancellation(); Comp toAdd = key2DToComp.get(can.getInchi().key2D()); diff --git a/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/fingerid/FingerblastJJob.java b/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/fingerid/FingerblastJJob.java index 8827f4e26f..ae5eeb041b 100644 --- a/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/fingerid/FingerblastJJob.java +++ b/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/fingerid/FingerblastJJob.java @@ -39,7 +39,6 @@ import de.unijena.bioinf.fingerid.blast.FingerblastResult; import de.unijena.bioinf.fingerid.blast.parameters.ParameterStore; import de.unijena.bioinf.jjobs.BasicMasterJJob; -import de.unijena.bioinf.jjobs.JJob; import de.unijena.bioinf.ms.properties.PropertyManager; import de.unijena.bioinf.ms.rest.model.canopus.CanopusJobInput; import de.unijena.bioinf.ms.rest.model.covtree.CovtreeJobInput; @@ -76,7 +75,7 @@ private static boolean useConfidenceScore() { private StructureSearchResult structureSearchResult; - Set> webJJobs = new HashSet<>(); + Set webJJobs = new HashSet<>(); public FingerblastJJob(@NotNull CSIPredictor predictor, @NotNull WebAPI webAPI) { this(predictor, webAPI, null); @@ -180,35 +179,34 @@ protected List compute() throws Exception { return s; }, this::checkForInterruption); - // Loop over all fingerprints and start SearchJjobs for each one. Jobs gets saved in searchJJobs, jobFidResult maps a searchjob back to the FingerIDresult - for (int i = 0; i < idResults.size(); i++) { - final FingerIdResult fingeridInput = idResults.get(i); + { + //compute missing trees + WebJJob[] bayesJobs = new WebJJob[scorings.length]; + for (int i = 0; i < scorings.length; i++) { + if (scorings[i] == null) { + logInfo("Starting new BayesTree Job."); + bayesJobs[i] = webAPI.submitCovtreeJob(idResults.get(i).getMolecularFormula(), predictor.predictorType); + webJJobs.add(bayesJobs[i]); + } + } - final FingerblastSearchJJob blastJob; - if (scorings[i] != null) { - blastJob = FingerblastSearchJJob.of(predictor, scorings[i], fingeridInput); - searchJJobs.add(blastJob); - } else { - System.out.println("Executing Bayes tree job!!!!!"); - // bayesnetScoring is null --> make a prepare job which computes the bayessian network (covTree) for the - // given molecular formula - blastJob = FingerblastSearchJJob.of(predictor, fingeridInput); + // Loop over all fingerprints and start SearchJjobs for each one. Jobs gets saved in searchJJobs, jobFidResult maps a searchjob back to the FingerIDresult + for (int i = 0; i < idResults.size(); i++) { + final FingerIdResult fingeridInput = idResults.get(i); + + if (scorings[i] == null) { + if (bayesJobs[i] == null) + throw new IllegalStateException("Expected bayes tree job missing."); + scorings[i] = bayesJobs[i].awaitResult(); + webJJobs.remove(bayesJobs[i]); + } + final FingerblastSearchJJob blastJob = FingerblastSearchJJob.of(predictor, scorings[i], fingeridInput); searchJJobs.add(blastJob); - WebJJob covTreeJob = - webAPI.submitCovtreeJob(fingeridInput.getMolecularFormula(), predictor.predictorType); - webJJobs.add(covTreeJob); - - blastJob.addRequiredJob(covTreeJob); - //remove jobs to free up memory - blastJob.addJobProgressListener(jobProgressEvent -> { - if (((JJob) jobProgressEvent.getSource()).isFinished()) - webJJobs.remove(covTreeJob); - }); - } - blastJob.addRequiredJob(formulaJobs.get(i)); - submitJob(blastJob); //no submitsubjob because we are waiting for web job. + blastJob.addRequiredJob(formulaJobs.get(i)); + submitJob(blastJob); //no submitsubjob because we are waiting for web job. + } } } //search job are now prepared and submitted @@ -444,7 +442,7 @@ private ConfidenceJJob executeConfidenceStack(ArrayList canopusWebJJob = webAPI.submitCanopusJob( requestedMergedCandidates.get(0).getCandidate().getInchi().extractFormulaOrThrow(), diff --git a/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/fingerid/FingerblastSearchJJob.java b/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/fingerid/FingerblastSearchJJob.java index c860ba3f78..f571c7752b 100644 --- a/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/fingerid/FingerblastSearchJJob.java +++ b/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/fingerid/FingerblastSearchJJob.java @@ -35,7 +35,6 @@ import de.unijena.bioinf.jjobs.BasicJJob; import de.unijena.bioinf.jjobs.JJob; import de.unijena.bioinf.ms.annotations.AnnotationJJob; -import de.unijena.bioinf.ms.webapi.WebJJob; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.openscience.cdk.interfaces.IAtomContainer; @@ -85,14 +84,6 @@ public synchronized void handleFinishedRequiredJob(JJob required) { candidates = job.result(); } } - - if (bayesnetScoring == null) { - if (required instanceof WebJJob) { - Object r = required.result(); - if (r instanceof BayesnetScoring) - bayesnetScoring = (BayesnetScoring) r; - } - } } public List> getAllScoredCandidates() { diff --git a/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/fingerid/FormulaWhiteListJob.java b/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/fingerid/FormulaWhiteListJob.java index 1aa77fc9ec..8b4bb5014e 100644 --- a/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/fingerid/FormulaWhiteListJob.java +++ b/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/fingerid/FormulaWhiteListJob.java @@ -60,11 +60,10 @@ private FormulaWhiteListJob(WebWithCustomDatabase searchDB, List dbToSearch, Ms2Experiment experiment , PrecursorIonType[] allowedIonTypes) { + public static FormulaWhiteListJob create(WebWithCustomDatabase searchDB, List dbToSearch, Ms2Experiment experiment) { final double precursorMass = experiment.getIonMass(); final Deviation massDev = getMassDeviation(experiment); - return new FormulaWhiteListJob(searchDB, dbToSearch, precursorMass, massDev, allowedIonTypes); + return new FormulaWhiteListJob(searchDB, dbToSearch, precursorMass, massDev, experiment.getPossibleAdductsOrFallback().getAdducts().toArray(PrecursorIonType[]::new)); } /** diff --git a/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/fingerid/MsNovelistFingerblastJJob.java b/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/fingerid/MsNovelistFingerblastJJob.java index 1ebe17c2b4..bb2b30b311 100644 --- a/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/fingerid/MsNovelistFingerblastJJob.java +++ b/web_service_oss/sirius_web_client/src/main/java/de/unijena/bioinf/fingerid/MsNovelistFingerblastJJob.java @@ -211,11 +211,11 @@ protected List> compute() { // bayesnetScoring is null --> make a job which computes the bayessian network (covTree) for the // given molecular formula + //todo can probably be optimized by starting them before other local computations if (bayesnetScoring == null) { WebJJob covTreeJob = webAPI.submitCovtreeJob(idResult.getMolecularFormula(), predictor.predictorType); - covTreeJob.awaitResult(); - bayesnetScoring = covTreeJob.getResult(); + bayesnetScoring = covTreeJob.awaitResult(); } checkForInterruption();