Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide option in namesvalidator to speed up searches and validation #310

Merged
merged 2 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ default Substance findBySubstanceReference(SubstanceReference substanceReference
Optional<SubstanceSummary> findSummaryByUuid(UUID uuid);

List<SubstanceSummary> findByNames_NameIgnoreCase(String name);

//use an explicit query to prevent Hibernate from inserting a call to UPPER() which
// slows down processing on some RDBMSs
@Query("select s from Substance s join s.names n where n.name = ?1")
List<SubstanceSummary> findByNames_NameIgnoreCaseImplicit(String name);

List<SubstanceSummary> findByNames_Name(String name);

List<SubstanceSummary> findByNames_StdNameIgnoreCase(String stdName);

List<SubstanceSummary> findByCodes_CodeIgnoreCase(String code);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ public class NamesValidator extends AbstractValidatorPlugin<Substance> {
private SubstanceRepository substanceRepository;
// Currently, this is false at FDA; it maybe confusing if used together with TagsValidator.
boolean extractLocators = false;
private boolean duplicateNameIsError = false;

private String caseSearchType = "Explicit";

// Keep consistent with NamesUtilities
// This and other replacers should be handled later in a new NameStandardizer class similar to HTMLNameStandardizer
Expand Down Expand Up @@ -105,7 +108,7 @@ public void validate(Substance s, Substance objold, ValidatorCallback callback)
boolean preferred = false;
int display = 0;
Iterator<Name> nameIterator = s.names.iterator();
while(nameIterator.hasNext()){
while (nameIterator.hasNext()) {
Name n = nameIterator.next();
if (n == null) {
GinasProcessingMessage mes = GinasProcessingMessage
Expand All @@ -120,7 +123,7 @@ public void validate(Substance s, Substance objold, ValidatorCallback callback)
continue;

}
if(n.getName() ==null){
if (n.getName() == null) {
callback.addMessage(GinasProcessingMessage.ERROR_MESSAGE("name can not be null"));
continue;
}
Expand All @@ -140,13 +143,13 @@ public void validate(Substance s, Substance objold, ValidatorCallback callback)
// shown which name(s) have been changed in the warning.
TagUtilities.BracketExtraction be = TagUtilities.getBracketExtraction(n.getName());
List<String> locators = be.getTagTerms();
if(!locators.isEmpty()){
if (!locators.isEmpty()) {
GinasProcessingMessage mes = GinasProcessingMessage
.WARNING_MESSAGE(
"Names of form \"<NAME> [<TEXT>]\" are transformed to locators. The following locators will be added:%s",
locators.toString())
locators.toString())
.appliableChange(true);
callback.addMessage(mes, ()->{
callback.addMessage(mes, () -> {
for (String loc : locators) {
// Name is changed to just the namePart!
n.name = be.getNamePart();
Expand Down Expand Up @@ -182,7 +185,7 @@ public void validate(Substance s, Substance objold, ValidatorCallback callback)
for (Replacer r : replacers.get()) {
//check for Null
String name = n.getName();
if(name!=null && r.matches(name)) {
if (name != null && r.matches(name)) {
GinasProcessingMessage mes = GinasProcessingMessage
.WARNING_MESSAGE(
r.getMessage(name))
Expand All @@ -191,17 +194,17 @@ public void validate(Substance s, Substance objold, ValidatorCallback callback)

}
}
if(n.getAccess().isEmpty()){
if (n.getAccess().isEmpty()) {
boolean hasPublicReference = n.getReferences().stream()
.map(r->r.getValue())
.map(r->s.getReferenceByUUID(r))
.map(r -> r.getValue())
.map(r -> s.getReferenceByUUID(r))
.filter(Objects::nonNull)
.filter(r->r.isPublic())
.filter(r->r.isPublicDomain())
.filter(r -> r.isPublic())
.filter(r -> r.isPublicDomain())
.findAny()
.isPresent();

if(!hasPublicReference){
if (!hasPublicReference) {
GinasProcessingMessage mes = GinasProcessingMessage
.ERROR_MESSAGE("The name :\"%s\" needs an unprotected reference marked \"Public Domain\" in order to be made public.",
n.getName());
Expand Down Expand Up @@ -238,61 +241,71 @@ public void validate(Substance s, Substance objold, ValidatorCallback callback)

Map<String, Set<String>> nameSetByLanguage = new HashMap<>();

Optional<Name> oldDisplayName= (objold!=null && objold.names !=null) ? objold.names.stream().filter(n->n!=null && n.displayName).findFirst() : Optional.empty();
LogUtil.trace(()->String.format("oldDisplayName: present: %b; value: %s", oldDisplayName.isPresent(),
Optional<Name> oldDisplayName = (objold != null && objold.names != null) ? objold.names.stream().filter(n -> n != null && n.displayName).findFirst() : Optional.empty();
LogUtil.trace(() -> String.format("oldDisplayName: present: %b; value: %s", oldDisplayName.isPresent(),
oldDisplayName.isPresent() ? oldDisplayName.get().getName() : ""));

for (Name n : s.names) {
if( n==null || n.getName() == null) {
if (n == null || n.getName() == null) {
//skip over null names
continue;
}
String name = n.getName();
Iterator<Keyword> iter = n.languages.iterator();
String uppercasedName = name.toUpperCase();

while(iter.hasNext()){
while (iter.hasNext()) {
String language = iter.next().getValue();
// System.out.println("language for " + n + " = " + language);
Set<String> names = nameSetByLanguage.computeIfAbsent(language, k->new HashSet<>());
if(!names.add(uppercasedName)){
Set<String> names = nameSetByLanguage.computeIfAbsent(language, k -> new HashSet<>());
if (!names.add(uppercasedName)) {
GinasProcessingMessage mes;
mes = GinasProcessingMessage
.WARNING_MESSAGE("Name '%s' is a duplicate name in the record.", name)
.markPossibleDuplicate();
.WARNING_MESSAGE("Name '%s' is a duplicate name in the record.", name)
.markPossibleDuplicate();
callback.addMessage(mes);
}

}
//nameSet.add(n.getName());
try {
List<SubstanceRepository.SubstanceSummary> sr = substanceRepository.findByNames_NameIgnoreCase(n.name);
List<SubstanceRepository.SubstanceSummary> sr =
(!this.caseSearchType.equalsIgnoreCase("IMPLICIT"))
? substanceRepository.findByNames_NameIgnoreCase(n.name)
: substanceRepository.findByNames_Name(n.name);
if (sr != null && !sr.isEmpty()) {
SubstanceRepository.SubstanceSummary s2 = sr.iterator().next();
if (!s2.getUuid().equals(s.getOrGenerateUUID())) {
GinasProcessingMessage mes = GinasProcessingMessage
.WARNING_MESSAGE("Name '%s' collides (possible duplicate) with existing name for substance:", n.name)
//TODO katzelda Feb 2021: add link back
. addLink(ValidationUtils.createSubstanceLink(s2.toSubstanceReference()))
;
//TODO katzelda Feb 2021: add link back
.addLink(ValidationUtils.createSubstanceLink(s2.toSubstanceReference()));
callback.addMessage(mes);
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
if(oldDisplayName.isPresent() && n.displayName && !oldDisplayName.get().getName().equalsIgnoreCase(n.getName())
&& (s.changeReason==null || !s.changeReason.equalsIgnoreCase(CHANGE_REASON_DISPLAYNAME_CHANGED))) {
if (oldDisplayName.isPresent() && n.displayName && !oldDisplayName.get().getName().equalsIgnoreCase(n.getName())
&& (s.changeReason == null || !s.changeReason.equalsIgnoreCase(CHANGE_REASON_DISPLAYNAME_CHANGED))) {
GinasProcessingMessage mes = GinasProcessingMessage
.WARNING_MESSAGE(
"Preferred Name has been changed from '%s' to '%s'. Please confirm that this change is intentional by submitting.",
oldDisplayName.get().getName(), n.getName());
callback.addMessage(mes);
}
}
}

public String getCaseSearchType() {
return caseSearchType;
}

public void setCaseSearchType(String caseSearchType) {
this.caseSearchType = caseSearchType;
}


public void setReplaceSingleLinefeedPrecededByCertainCharactersWithBlank(boolean replaceSingleLinefeedPrecededByCertainCharactersWithBlank) {
this.replaceSingleLinefeedPrecededByCertainCharactersWithBlank = replaceSingleLinefeedPrecededByCertainCharactersWithBlank;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,10 @@ gsrs.validators.substances = [
"validatorClass" = "ix.ginas.utils.validation.validators.NamesValidator",
"newObjClass" = "ix.ginas.models.v1.Substance",
"type" = "PRIMARY"
"configClass" = "SubstanceValidatorConfig"
"configClass" = "SubstanceValidatorConfig",
"parameters"= {
"caseSearchType": "Explicit"
}
},

{
Expand Down
Loading