Skip to content

Commit

Permalink
Merge branch 'refs/heads/master' into 99-enable-reference-spectra-par…
Browse files Browse the repository at this point in the history
…sing-from-sdf-files
  • Loading branch information
mfleisch committed May 28, 2024
2 parents 96ec84d + 82945f4 commit 24b5ff9
Show file tree
Hide file tree
Showing 47 changed files with 557 additions and 640 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ public static boolean isBioOnly(long flags) {
return flags != 0 && (flags & BIO.flag) != 0;
}

public static boolean isInAll(long flags){
return flags != 0 && (flags & ALL.flag) != 0;
}

public boolean isBioOnly() {
return isBioOnly(flag);
}
Expand All @@ -147,11 +151,8 @@ public static DataSource[] valuesNoALL() {
return Arrays.stream(DataSource.values()).filter(it -> it != ALL).toArray(DataSource[]::new);
}

public static DataSource[] valuesNoALLNoMINES() {
return Arrays.stream(DataSource.values()).filter(it -> it != ALL && !it.mines).toArray(DataSource[]::new);
}

private final static DataSource[] BioDatabases = new DataSource[] {MESH, HMDB, KNAPSACK,CHEBI,KEGG,HSDB,MACONDA,METACYC,GNPS,TRAIN,YMDB,PLANTCYC,NORMAN,SUPERNATURAL,COCONUT,BloodExposome,TeroMol,PUBCHEMANNOTATIONBIO,PUBCHEMANNOTATIONDRUG,PUBCHEMANNOTATIONSAFETYANDTOXIC,PUBCHEMANNOTATIONFOOD,LOTUS,FooDB,MiMeDB,LIPIDMAPS,LIPID};
private final static DataSource[] BIO_DATABASES = new DataSource[] {MESH, HMDB, KNAPSACK,CHEBI,KEGG,HSDB,MACONDA,METACYC,GNPS,TRAIN,YMDB,PLANTCYC,NORMAN,SUPERNATURAL,COCONUT,BloodExposome,TeroMol,PUBCHEMANNOTATIONBIO,PUBCHEMANNOTATIONDRUG,PUBCHEMANNOTATIONSAFETYANDTOXIC,PUBCHEMANNOTATIONFOOD,LOTUS,FooDB,MiMeDB,LIPIDMAPS,LIPID};

// 4294401852
private static long makeBIOFLAG() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,14 +258,6 @@ public static List<Source> getAllSelectableDbs() {
.collect(Collectors.toList());
}

public static List<Source> getNonInSilicoSelectableDbs() {
return Arrays.stream(DataSource.valuesNoALLNoMINES())
.map(DataSource::name)
.filter(CustomDataSources::isSearchable)
.map(CustomDataSources::getSourceFromName)
.filter(Objects::nonNull)
.toList();
}

// listener stuff
public static void notifyListeners(Source changed, boolean removed) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
package de.unijena.bioinf.ChemistryBase.ms;

import de.unijena.bioinf.ChemistryBase.chem.PrecursorIonType;
import de.unijena.bioinf.ChemistryBase.ms.ft.model.AdductSettings;
import de.unijena.bioinf.ms.annotations.Ms2ExperimentAnnotation;
import org.jetbrains.annotations.NotNull;
import org.slf4j.LoggerFactory;

import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
Expand All @@ -34,29 +36,195 @@
* This is intended to collect Adducts from different detection sources.
* Can be attached to MsExperiment
*/
public final class DetectedAdducts extends ConcurrentHashMap<String, PossibleAdducts> implements Ms2ExperimentAnnotation, Cloneable { //todo ElementFilter: ConcurrentHashMap is not immutable. Hence, in princlipe, this could be cleared. Should Ms2ExperimentAnnotation be immutable?
public enum Source {INPUT_FILE, LCMS_ALIGN, MS1_PREPROCESSOR, SPECTRAL_LIBRARY_SEARCH, UNSPECIFIED_SOURCE} //todo implement PossibleAdducts by library search
public final class DetectedAdducts extends ConcurrentHashMap<DetectedAdducts.Source, PossibleAdducts> implements Ms2ExperimentAnnotation, Cloneable { //todo ElementFilter: ConcurrentHashMap is not immutable. Hence, in princlipe, this could be cleared. Should Ms2ExperimentAnnotation be immutable?
public enum Source {
/**
* this source indicates adducts specified in the input file
*/
INPUT_FILE(true, false, true),
/**
* adducts found during SIRIUS LCMS preprocessing. May contain unknown adduct to indicate to add fallback adducts.
*/
LCMS_ALIGN(true, true, false),
/**
* adducts detected based on MS1 only are never very confident. Hence, we will always add the fallback adducts.
*/
MS1_PREPROCESSOR(true, true, false),

//special sources. These are only added additionally, but not used as primary source. Hence. if only these are available, we add the fallbacks.

/**
* adducts found by spectral library search. May be additionally added.
*/
SPECTRAL_LIBRARY_SEARCH(false, false, false),
/**
* this is never directly specified. It is only used to make sure the map can be parsed from string. Unknown sources are mapped to this enum value. May be additionally added.
*/
UNSPECIFIED_SOURCE(false, false, false);


private final boolean isPrimarySource;
private final boolean canBeEmpty;
private final boolean forbidAdditionalSources;

Source(boolean isPrimarySource, boolean canBeEmpty, boolean forbidAdditionalSources) {
this.isPrimarySource = isPrimarySource;
this.canBeEmpty = canBeEmpty;
this.forbidAdditionalSources = forbidAdditionalSources;
}

public boolean isPrimaryDetectionSource() {
return isPrimarySource;
}

public boolean isAdditionalDetectionSource() {
return !isPrimarySource;
}

public boolean isForbidAdditionalSources() {
return forbidAdditionalSources;
}
}

public static DetectedAdducts singleton(Source source, PrecursorIonType ionType) {
DetectedAdducts det = new DetectedAdducts();
det.put(source, new PossibleAdducts(ionType));
return det;
}

public DetectedAdducts() {
super();
}

public Optional<PossibleAdducts> getAdducts() {
return getAdducts(Source.values());
/**
*
* @return primary adducts, if available. If no adducts are available returns Optional.empty and not empty PossibleAdducts.
*/
protected Optional<PossibleAdducts> getPrimaryAdducts() {
return getAdducts((Source[]) Arrays.stream(Source.values()).filter(Source::isPrimaryDetectionSource).toArray(l -> new Source[l])).flatMap(pa -> pa.isEmpty() ? Optional.empty() : Optional.of(pa));
}

public Optional<PossibleAdducts> getAdducts(Source... keyPrio) {
return getAdducts(Arrays.stream(keyPrio).map(Source::name).toArray(String[]::new));
/**
*
* @return additional adducts, if available. If no adducts are available returns Optional.empty and not empty PossibleAdducts.
*/
protected Optional<PossibleAdducts> getAdditionalAdducts() {
return getUnionOfAdducts((Source[]) Arrays.stream(Source.values()).filter(Source::isAdditionalDetectionSource).toArray(l -> new Source[l])).flatMap(pa -> pa.isEmpty() ? Optional.empty() : Optional.of(pa));
}

protected boolean hasPrimarySourceThatForbidsAdditionalSources() {
return Arrays.stream(Source.values()).filter(Source::isPrimaryDetectionSource).filter(Source::isForbidAdditionalSources).anyMatch(source -> !getOrDefault(source, PossibleAdducts.empty()).isEmpty());
}


/**
* returns adducts by the following rules
* 1. input file adducts are returned if prioritized
* 2. adducts of the most important primary source (may be from input file) are selected
* 3. adducts of the additional sources are added if primary source allows for addition
* 4. fallback adducts are added if set of adducts is empty so far or if the set contains an unknown {@link PrecursorIonType} [M+?] to specify fallback adduct shall be added
* 5. the intersection of the set of adducts and the detectable adducts is calculated
* 6. enforced adducts are added
* 7. the final set is cleaned of unknown adducts and is returnd
* @return set of adducts that shall be considered for compound annnotation
*/
public PossibleAdducts getDetectedAdductsAndOrFallback(AdductSettings adductSettings, int charge) {
if (adductSettings.isPrioritizeInputFileAdducts() && containsKey(Source.INPUT_FILE)) {
PossibleAdducts inputAdducts = get(Source.INPUT_FILE);
if (inputAdducts.isEmpty()){
warnIsEmpty(Source.INPUT_FILE);
} else if (!inputAdducts.hasUnknownIontype()) {
return inputAdducts;
} else {
//input file detected adduct annotation contains unknown adduct.
//Probably a very uncommon way to specify this
}
}
PossibleAdducts primaryAdductsOrFallback = getPrimaryAdducts().map(pa ->
(allowFallbackAdducts(pa)) ? PossibleAdducts.union(pa, adductSettings.getFallback(charge)) : pa)
.orElse(new PossibleAdducts(adductSettings.getFallback(charge)));

if (hasPrimarySourceThatForbidsAdditionalSources()) return processwithAdductSettingsAndClean(primaryAdductsOrFallback, adductSettings, charge);

Optional<PossibleAdducts> additionalAdducts = getAdditionalAdducts();
if (additionalAdducts.isEmpty()) return processwithAdductSettingsAndClean(primaryAdductsOrFallback, adductSettings, charge);
else return processwithAdductSettingsAndClean(PossibleAdducts.union(primaryAdductsOrFallback, additionalAdducts.get()), adductSettings, charge);
}

private boolean allowFallbackAdducts(PossibleAdducts pa) {
//an unknown PrecursorIonType such aus [M+?]+ means that we are not sure and still want to add fallback adducts
return pa.hasUnknownIontype() || pa.isEmpty();
}

public Optional<PossibleAdducts> getAdducts(String... keyPrio) {
for (String key : keyPrio)
if (containsKey(key))
return Optional.of(get(key));
private void warnIsEmpty(Source source) {
LoggerFactory.getLogger(this.getClass()).warn("Detected adduct source '" + source + "' specified, but adducts are empty.");
}


private static final PrecursorIonType M_PLUS = PrecursorIonType.getPrecursorIonType("[M]+");
private static final PrecursorIonType M_H_PLUS = PrecursorIonType.getPrecursorIonType("[M+H]+");

/**
* 1. remove unknown adducts
* 2. guarantees that never both, [M]+ and [M+H]+, are contained. This prevents issues with duplicate structure candidates in subsequent steps. [M+H]+ is favored.
* @param possibleAdducts
* @return
*/
private PossibleAdducts cleanAdducts(PossibleAdducts possibleAdducts) {
Set<PrecursorIonType> adducts = possibleAdducts.getAdducts().stream().filter(a -> !a.isIonizationUnknown()).collect(Collectors.toCollection(HashSet::new));
if (adducts.contains(M_PLUS) && adducts.contains(M_H_PLUS)) adducts.remove(M_PLUS);
return new PossibleAdducts(adducts);
}

/**
* interect with detectable adducts, add enforced and clean unknown adducts
* @param possibleAdducts
* @param as
* @param charge
* @return
*/
private PossibleAdducts processwithAdductSettingsAndClean(PossibleAdducts possibleAdducts, AdductSettings as, int charge) {
possibleAdducts = PossibleAdducts.intersection(possibleAdducts, as.getDetectable());

if (!as.getEnforced(charge).isEmpty())
possibleAdducts = PossibleAdducts.union(possibleAdducts, as.getEnforced(charge));

possibleAdducts = cleanAdducts(possibleAdducts);

if (possibleAdducts.isEmpty())
LoggerFactory.getLogger(this.getClass()).error("Final set of selected adducts is empty.");

return possibleAdducts;
}

public Optional<PossibleAdducts> getAdducts(Source... keyPrio) {
for (Source key : keyPrio) {
if (containsKey(key)) {
if (key.canBeEmpty || !get(key).isEmpty()) {
return Optional.of(get(key));
} else {
warnIsEmpty(key);
}
}
}
return Optional.empty();
}

/**
* not so efficient for many sources. But best if most of the times less or equal 1 sources are expected.
* @param keys
* @return
*/
protected Optional<PossibleAdducts> getUnionOfAdducts(Source... keys) {
PossibleAdducts union = null;
for (Source key : keys)
if (containsKey(key)) {
if (union == null) union = get(key);
else union = PossibleAdducts.union(union, get(key));
}
return union == null ? Optional.empty() : Optional.of(union);
}

public PossibleAdducts getAllAdducts() {
return values().stream().flatMap(it -> it.getAdducts().stream()).collect(Collectors.collectingAndThen(Collectors.toSet(), PossibleAdducts::new));
}
Expand All @@ -68,34 +236,41 @@ public boolean hasAdducts() {
return values().stream().anyMatch(it -> !it.isEmpty());
}

public boolean containsKey(Source key) {
return containsKey(key.name());
}

public PossibleAdducts get(Source key) {
return get(key.name());
public Set<Source> getSources() {
return keySet();
}

public PossibleAdducts put(@NotNull Source key, @NotNull PossibleAdducts value) {
return put(key.name(), value);
if (key == Source.MS1_PREPROCESSOR)
return super.put(key, ensureMS1PreprocessorAllowsToAddFallbackAdducts(value));
else
return super.put(key, value);
}

public Set<String> getSourceStrings() {
//todo ElementFilter: Do we require String keys for flexibility or can be change it to the Source enum?
return Collections.unmodifiableSet(keySet());
}

public Set<Source> getSources() {
Set<Source> sourceSet = new HashSet<>();
for (String key : keySet()) {
try {
Source source = Source.valueOf(key);
sourceSet.add(source);
} catch (IllegalArgumentException e) {
sourceSet.add(Source.UNSPECIFIED_SOURCE);
private PossibleAdducts ensureMS1PreprocessorAllowsToAddFallbackAdducts(@NotNull PossibleAdducts value) {
//this ensures we add fallback adducts, because we are never certain enough with MS1_PREPROCESSOR
//both, empty list or unknown adduct indicate to add fallback
if (!value.isEmpty() && !value.hasUnknownIontype()) {
Set<PrecursorIonType> adducts = new HashSet<>(value.getAdducts());
//to indicate that MS1_PREPROCESSOR is always combined with fallback adducts
if (adducts.stream().anyMatch(PrecursorIonType::isPositive)) {
adducts.add(PrecursorIonType.unknown(1));
} else {
adducts.add(PrecursorIonType.unknown(-1));
}
return new PossibleAdducts(adducts);
} else {
return value;
}
return sourceSet;
}

/**
*
* @param source
* @return true if {@link DetectedAdducts} contain adducts from a {@link Source} that is more important than the queried.
*/
public boolean hasMoreImportantSource(Source source) {
return keySet().stream().anyMatch(k -> k.compareTo(source)<0);
}

@Override
Expand All @@ -120,8 +295,16 @@ public static DetectedAdducts fromString(String json) {
String[] keyValue = mapping.replace("}", "").split("\\s*(:|->)\\s*\\{\\s*");
PossibleAdducts val = keyValue.length > 1 ? Arrays.stream(keyValue[1].split(",")).filter(Objects::nonNull).filter(s -> !s.isBlank()).map(PrecursorIonType::parsePrecursorIonType).flatMap(Optional::stream)
.collect(Collectors.collectingAndThen(Collectors.toSet(), PossibleAdducts::new)) : new PossibleAdducts();
if (keyValue.length > 0)
ads.put(keyValue[0], val);
if (keyValue.length > 0){
Source source;
try {
source = Source.valueOf(keyValue[0]);
} catch (IllegalArgumentException e) {
source = Source.UNSPECIFIED_SOURCE;
}
ads.put(source, val);
}

}

return ads;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,21 +60,6 @@ public interface Ms2Experiment extends Cloneable, AnnotatedWithDefaults<Ms2Exper
@NotNull PrecursorIonType getPrecursorIonType();


/**
* Returns a list of detected adducts, if available
*
* Do not add any enforced or fallback adducts!
* Use the #getPossibleAdductsOrFallback for that!
*
* @return Optional collection of detected adducts
*/
@NotNull
default Optional<PossibleAdducts> getDetectedAdducts() {
final PossibleAdducts adducts = getAnnotation(DetectedAdducts.class).flatMap(DetectedAdducts::getAdducts)
.orElseGet(PossibleAdducts::empty);
return adducts.isEmpty() ? Optional.empty() : Optional.of(adducts);
}


/**
* Returns a list of detected adducts, if no adducts are found a fallback list will be returned
Expand All @@ -83,12 +68,9 @@ default Optional<PossibleAdducts> getDetectedAdducts() {
*/
@NotNull
default PossibleAdducts getPossibleAdductsOrFallback() {
final PossibleAdducts detectedOrFallback = getDetectedAdducts().
orElseGet(() -> getAnnotation(AdductSettings.class).map(as -> as.getFallback(getPrecursorIonType().getCharge())).map(PossibleAdducts::new).orElseGet(() -> new PossibleAdducts(PropertyManager.DEFAULTS.createInstanceWithDefaults(AdductSettings.class).getFallback(getPrecursorIonType().getCharge())))
);
final PossibleAdducts detected = getAnnotation(AdductSettings.class).map(as -> PossibleAdducts.union(detectedOrFallback, as.getEnforced(getPrecursorIonType().getCharge()))).orElse(detectedOrFallback);

return detected;
return getAnnotation(DetectedAdducts.class).orElseGet(DetectedAdducts::new).getDetectedAdductsAndOrFallback(
getAnnotation(AdductSettings.class).orElse(PropertyManager.DEFAULTS.createInstanceWithDefaults(AdductSettings.class)), getPrecursorIonType().getCharge()
);
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ public boolean hasNegativeCharge() {
return false;
}

public boolean hasUnknownIontype() {
return value.stream().anyMatch(PrecursorIonType::isIonizationUnknown);
}

public PossibleAdducts keepOnlyPositive() {
return new PossibleAdducts(value.stream().filter(it -> it.getCharge() > 1).collect(Collectors.toSet()));
}
Expand Down
Loading

0 comments on commit 24b5ff9

Please sign in to comment.