Skip to content

Commit

Permalink
Merge branch '110-prioritize-custom-db-names-over-remote-db-names-ind…
Browse files Browse the repository at this point in the history
…ependently-of-length' into 'master'

Resolve "Prioritize custom db names over remote db names independently of length"

Closes #110

See merge request bright-giant/sirius/sirius-libs!70
  • Loading branch information
Markus Fleischauer committed May 24, 2024
2 parents 5e2f866 + 08fc42b commit ac97b01
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -336,25 +336,24 @@ public int hashCode() {
}


public static List<FingerprintCandidate> mergeCompounds(Collection<FingerprintCandidate> compounds) {
return mergeCompounds(compounds, false);
private static List<FingerprintCandidate> mergeCompounds(Collection<FingerprintCandidate> compounds, Set<String> customNames) {
return mergeCompounds(compounds, customNames, false);
}


/**
* merge compounds with same InChIKey
*/
public static List<FingerprintCandidate> mergeCompounds(Collection<FingerprintCandidate> compounds, boolean onlyContained) {
private static List<FingerprintCandidate> mergeCompounds(Collection<FingerprintCandidate> compounds, Set<String> customNames, boolean onlyContained) {
HashMap<String, FingerprintCandidate> it = new HashMap<>();
mergeCompounds(compounds, it, onlyContained);
mergeCompounds(compounds, it, customNames, onlyContained,false);
return new ArrayList<>(it.values());
}

public static Set<FingerprintCandidate> mergeCompounds(Collection<FingerprintCandidate> compounds, final HashMap<String, FingerprintCandidate> mergeMap) {
return mergeCompounds(compounds, mergeMap, false);
private static Set<FingerprintCandidate> mergeCompounds(Collection<FingerprintCandidate> compounds, final HashMap<String, FingerprintCandidate> mergeMap, Set<String> customNames) {
return mergeCompounds(compounds, mergeMap, customNames, false,false);
}

public static Set<FingerprintCandidate> mergeCompounds(Collection<FingerprintCandidate> compounds, final HashMap<String, FingerprintCandidate> mergeMap, boolean onlyContained) {
private static Set<FingerprintCandidate> mergeCompounds(Collection<FingerprintCandidate> compounds, final HashMap<String, FingerprintCandidate> mergeMap, Set<String> customNames, boolean onlyContained, boolean fromCustomDB) {
final Set<FingerprintCandidate> mergedCandidates = new HashSet<>(compounds.size());
for (FingerprintCandidate c : compounds) {
final String key = c.getInchiKey2D();
Expand All @@ -366,7 +365,25 @@ public static Set<FingerprintCandidate> mergeCompounds(Collection<FingerprintCan
x.setQLayer(x.getQLayer() | c.getQLayer());
x.mergeDBLinks(c.links);
x.mergeBits(c.bitset);
x.mergeCompoundName(c.getName());
if (customNames.contains(key)){
if (fromCustomDB)
//search the shortest name among custom names
x.mergeCompoundName(c.getName());
else if (c.getName()!=null && !c.getName().isBlank())
//replace remote name with custom name
x.setName(c.getName());
}else {
if (fromCustomDB){
//replace remote name with custom name
if (c.getName()!=null && !c.getName().isBlank()){
x.setName(c.getName());
customNames.add(key);
}
}else {
//search the shortest name among remote names
x.mergeCompoundName(c.getName());
}
}
} else {
if (onlyContained)
continue;
Expand All @@ -379,6 +396,8 @@ public static Set<FingerprintCandidate> mergeCompounds(Collection<FingerprintCan

public static class CandidateResult {
final HashMap<String, FingerprintCandidate> cs = new HashMap<>();
final Set<String> customNames = new HashSet<>();

final HashMap<String, Set<FingerprintCandidate>> customInChIs = new HashMap<>();
final Set<FingerprintCandidate> restDbInChIs;
private long requestFilter;
Expand All @@ -405,21 +424,21 @@ private CandidateResult() {
private CandidateResult(List<FingerprintCandidate> compounds, long appliedFilter, long requestFilter) {
restFilter = appliedFilter;
this.requestFilter = requestFilter;
restDbInChIs = mergeCompounds(compounds, cs);
restDbInChIs = mergeCompounds(compounds, cs, customNames);
}

private void addRequestedCustom(String name, List<FingerprintCandidate> compounds) {
if (customInChIs.containsKey(name))
throw new IllegalArgumentException("Custom db already exists: '" + name + "'");
customInChIs.put(name, mergeCompounds(compounds, cs, false));
customInChIs.put(name, mergeCompounds(compounds, cs, customNames, false,true));
}

private void addAdditionalCustom(String name, List<FingerprintCandidate> compounds) {
if (customInChIs.containsKey(name))
throw new IllegalArgumentException("Custom db already exists: '" + name + "'");
HashMap<String, FingerprintCandidate> candidates = new HashMap<>(cs);
candidates.keySet().retainAll(getReqCandidatesInChIs());
customInChIs.put(name, mergeCompounds(compounds, candidates, true));
customInChIs.put(name, mergeCompounds(compounds, candidates, customNames, true,false));
}

public Set<String> getCombCandidatesInChIs() {
Expand Down Expand Up @@ -498,8 +517,8 @@ public void merge(@NotNull CandidateResult other) {
if (other.requestFilter != requestFilter || other.restFilter != restFilter)
throw new IllegalArgumentException("Instances with different filters cannot be merged!");

restDbInChIs.addAll(mergeCompounds(other.restDbInChIs, cs));
other.customInChIs.forEach((k, v) -> customInChIs.get(k).addAll(mergeCompounds(v, cs)));
restDbInChIs.addAll(mergeCompounds(other.restDbInChIs, cs, customNames));
other.customInChIs.forEach((k, v) -> customInChIs.get(k).addAll(mergeCompounds(v, cs, customNames)));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ private void mergeLinksAndNames(@NotNull Comp comp) {
final HashSet<DBLink> links = new HashSet<>(fc.getMutableLinks());

if (!molecule.ids.isEmpty()) {
molecule.ids.stream().map(id -> new DBLink(null, id)).forEach(links::add);
molecule.ids.stream().filter(Objects::nonNull).map(id -> new DBLink(null, id)).forEach(links::add);
if (fc.getName() == null || fc.getName().isBlank())
fc.setName(molecule.ids.iterator().next());
}
Expand All @@ -602,7 +602,7 @@ private void clearAndCreateLinksAndName(@NotNull Comp comp) {

//override remote db links.
if (!molecule.ids.isEmpty()) {
fc.setLinks(molecule.ids.stream().map(id -> new DBLink(null, id)).toList()); //we add just id so that names can be added during db retrieval
fc.setLinks(molecule.ids.stream().filter(Objects::nonNull).map(id -> new DBLink(null, id)).toList()); //we add just id so that names can be added during db retrieval
if (fc.getName() == null || fc.getName().isBlank())
fc.setName(molecule.ids.iterator().next());
}
Expand Down Expand Up @@ -671,7 +671,7 @@ private FingerprintCandidateWrapper computeNewCandidate(Molecule molecule) throw
fc.setName(molecule.name);

if (!molecule.ids.isEmpty()) {
fc.setLinks(molecule.ids.stream().map(id -> new DBLink(null, id)).toList());
fc.setLinks(molecule.ids.stream().filter(Objects::nonNull).map(id -> new DBLink(null, id)).toList());
if (fc.getName() == null || fc.getName().isEmpty())
fc.setName(molecule.ids.iterator().next()); //set id as name if no name was set
}
Expand Down

0 comments on commit ac97b01

Please sign in to comment.