Skip to content

Commit

Permalink
Merge pull request #452 from geneontology/dev
Browse files Browse the repository at this point in the history
Bring master up to date with dev
  • Loading branch information
balhoff authored Jan 28, 2022
2 parents 33c0583 + 8893160 commit efd9247
Show file tree
Hide file tree
Showing 18 changed files with 1,749 additions and 189 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -315,14 +315,23 @@ public static void main(String[] args) {
gpad_options.addOption("cat", "catalog", true, "Catalog file for tbox ontology. " +
"Use this to specify local copies of the ontology and or its imports to " +
"speed and control the process. If not used, will download the tbox and all its imports.");
gpad_options.addOption("ontojournal", "ontojournal", true, "Specify a blazegraph journal file containing the merged, pre-reasoned tbox aka go-lego.owl");
cmd = parser.parse(gpad_options, args, false);
String inputDB = cmd.getOptionValue("input");
String gpadOutputFolder = cmd.getOptionValue("gpad-output");
String modelIdPrefix = cmd.getOptionValue("model-id-prefix");
String modelIdcurie = cmd.getOptionValue("model-id-curie");
String ontologyIRI = cmd.getOptionValue("ontology");
String catalog = cmd.getOptionValue("catalog");
legoToAnnotationsSPARQL(modelIdPrefix, modelIdcurie, inputDB, gpadOutputFolder, ontologyIRI, catalog);
String go_lego_journal_file = null;
if(cmd.hasOption("ontojournal")) {
go_lego_journal_file = cmd.getOptionValue("ontojournal");
}
if(go_lego_journal_file==null) {
System.err.println("Missing -- ontojournal . Need to specify blazegraph journal file containing the merged go-lego tbox (neo, GO-plus, etc..)");
System.exit(-1);
}
legoToAnnotationsSPARQL(modelIdPrefix, modelIdcurie, inputDB, gpadOutputFolder, ontologyIRI, catalog, go_lego_journal_file);
}else if(cmd.hasOption("version")) {
printVersion();
}else if(cmd.hasOption("validate-go-cams")) {
Expand Down Expand Up @@ -711,7 +720,7 @@ public static void owl2LegoJson(String input, String output, boolean usePretty)
* @param ontologyIRI
* @throws Exception
*/
public static void legoToAnnotationsSPARQL(String modelIdPrefix, String modelIdcurie, String inputDB, String gpadOutputFolder, String ontologyIRI, String catalog) throws Exception {
public static void legoToAnnotationsSPARQL(String modelIdPrefix, String modelIdcurie, String inputDB, String gpadOutputFolder, String ontologyIRI, String catalog, String go_lego_journal_file) throws Exception {
if(modelIdPrefix==null) {
modelIdPrefix = "http://model.geneontology.org/";
}
Expand All @@ -738,13 +747,13 @@ public static void legoToAnnotationsSPARQL(String modelIdPrefix, String modelIdc
OWLOntology ontology = ontman.loadOntology(IRI.create(ontologyIRI));
CurieMappings localMappings = new CurieMappings.SimpleCurieMappings(Collections.singletonMap(modelIdcurie, modelIdPrefix));
CurieHandler curieHandler = new MappedCurieHandler(DefaultCurieHandler.loadDefaultMappings(), localMappings);
BlazegraphMolecularModelManager<Void> m3 = new BlazegraphMolecularModelManager<>(ontology, curieHandler, modelIdPrefix, inputDB, null, null);
BlazegraphMolecularModelManager<Void> m3 = new BlazegraphMolecularModelManager<>(ontology, curieHandler, modelIdPrefix, inputDB, null, go_lego_journal_file);
final String immutableModelIdPrefix = modelIdPrefix;
final String immutableGpadOutputFolder = gpadOutputFolder;
m3.getAvailableModelIds().stream().parallel().forEach(modelIRI -> {
try {
//TODO investigate whether changing to a neo-lite model has an impact on this - may need to make use of ontology journal
String gpad = new GPADSPARQLExport(curieHandler, m3.getLegacyRelationShorthandIndex(), m3.getTboxShorthandIndex()).exportGPAD(m3.createInferredModel(modelIRI), modelIRI);
String gpad = new GPADSPARQLExport(curieHandler, m3.getLegacyRelationShorthandIndex(), m3.getTboxShorthandIndex(), m3.getGolego_repo().regulatorsToRegulated).exportGPAD(m3.createInferredModel(modelIRI), modelIRI);
String fileName = StringUtils.replaceOnce(modelIRI.toString(), immutableModelIdPrefix, "") + ".gpad";
Writer writer = new OutputStreamWriter(new FileOutputStream(Paths.get(immutableGpadOutputFolder, fileName).toFile()), StandardCharsets.UTF_8);
writer.write(gpad);
Expand Down Expand Up @@ -920,13 +929,13 @@ else if(!model_iris.add(modeluri)) {
// TODO Auto-generated catch block
e1.printStackTrace();
}

if(checkShex) {
if(checkShex) {
shex.setActive(true);
}else {
shex.setActive(false);
}
shex.setActive(true);
}else {
shex.setActive(false);
}

//shex validator is ready, now build the inference provider (which provides access to the shex validator and provides inferences useful for shex)
String reasonerOpt = "arachne";
LOGGER.info("Building OWL inference provider: "+reasonerOpt);
Expand Down Expand Up @@ -1038,7 +1047,7 @@ else if(!model_iris.add(modeluri)) {
int n_rows_gpad = 0;
if(isConsistent) {
try {
Set<GPADData> gpad = new GPADSPARQLExport(curieHandler, m3.getLegacyRelationShorthandIndex(), m3.getTboxShorthandIndex()).getGPAD(m3.createInferredModel(modelIRI), modelIRI);
Set<GPADData> gpad = new GPADSPARQLExport(curieHandler, m3.getLegacyRelationShorthandIndex(), m3.getTboxShorthandIndex(), m3.getGolego_repo().regulatorsToRegulated).getGPAD(m3.createInferredModel(modelIRI), modelIRI);
if(gpad!=null) {
n_rows_gpad = gpad.size();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@

import scala.collection.JavaConverters;

import static java.util.stream.Collectors.mapping;
import static java.util.stream.Collectors.toSet;

/* Note: the example GPAD files are available at this link: http://www.informatics.jax.org/downloads/reports/mgi.gpa.gz */
public class GPADSPARQLExport {
private static final Logger LOG = Logger.getLogger(GPADSPARQLExport.class);
Expand All @@ -54,6 +57,9 @@ public class GPADSPARQLExport {
private static final String BP = "http://purl.obolibrary.org/obo/GO_0008150";
private static final String CC = "http://purl.obolibrary.org/obo/GO_0005575";
private static final Set<String> rootTerms = new HashSet<>(Arrays.asList(MF, BP, CC));
private static final String ENABLES = "http://purl.obolibrary.org/obo/RO_0002327";
private static final String CONTRIBUTES_TO = "http://purl.obolibrary.org/obo/RO_0002326";
private static final Set<String> functionRelations = new HashSet<>(Arrays.asList(ENABLES, CONTRIBUTES_TO));
private static final String EMAPA_NAMESPACE = "http://purl.obolibrary.org/obo/EMAPA_";
private static final String UBERON_NAMESPACE = "http://purl.obolibrary.org/obo/UBERON_";
private static final String inconsistentQuery =
Expand Down Expand Up @@ -96,11 +102,13 @@ public class GPADSPARQLExport {
private final CurieHandler curieHandler;
private final Map<IRI, String> relationShorthandIndex;
private final Map<IRI, String> tboxShorthandIndex;
private final Map<IRI, Set<IRI>> regulators;

public GPADSPARQLExport(CurieHandler handler, Map<IRI, String> shorthandIndex, Map<IRI, String> tboxShorthandIndex) {
public GPADSPARQLExport(CurieHandler handler, Map<IRI, String> shorthandIndex, Map<IRI, String> tboxShorthandIndex, Map<IRI, Set<IRI>> regulators) {
this.curieHandler = handler;
this.relationShorthandIndex = shorthandIndex;
this.tboxShorthandIndex = tboxShorthandIndex;
this.regulators = regulators;
}

public String exportGPAD(WorkingMemory wm, IRI modelIRI) throws InconsistentOntologyException {
Expand Down Expand Up @@ -147,11 +155,15 @@ public Set<GPADData> getGPAD(WorkingMemory wm, IRI modelIRI) throws Inconsistent
possibleExtensions.forEach(ae -> statementsToExplain.add(ae.getTriple()));
Map<Triple, Set<Explanation>> allExplanations = statementsToExplain.stream().collect(Collectors.toMap(Function.identity(), s -> toJava(wm.explain(Bridge.tripleFromJena(s)))));

Map<Triple, Set<GPADEvidence>> allEvidences = evidencesForFacts(allExplanations.values().stream().flatMap(es -> es.stream()).flatMap(e -> toJava(e.facts()).stream().map(t -> Bridge.jenaFromTriple(t))).collect(Collectors.toSet()), model, modelID, modelLevelAnnotations);
Set<Node> gpNodesWithOtherThanRootMF = basicAnnotations.stream().filter(a -> !a.getOntologyClass().toString().equals(MF)).map(a -> a.getObjectNode()).collect(Collectors.toSet());
Map<Triple, Set<GPADEvidence>> allEvidences = evidencesForFacts(allExplanations.values().stream().flatMap(es -> es.stream()).flatMap(e -> toJava(e.facts()).stream().map(t -> Bridge.jenaFromTriple(t))).collect(toSet()), model, modelID, modelLevelAnnotations);
Set<IRI> gpsWithAnyMFNotRootMF = basicAnnotations.stream().filter(a -> functionRelations.contains(a.getQualifier().toString())).filter(a -> !a.getOntologyClass().toString().equals(MF)).map(a -> a.getObject()).collect(toSet());
Map<Node, Set<IRI>> nodesToOntologyClasses = basicAnnotations.stream().collect(Collectors.groupingBy(BasicGPADData::getObjectNode, mapping(BasicGPADData::getOntologyClass, toSet())));
for (BasicGPADData annotation : basicAnnotations) {
Set<IRI> termsRegulatedByAnnotationsForThisGPNode = nodesToOntologyClasses.get(annotation.getObjectNode()).stream().flatMap(term -> regulators.getOrDefault(term, Collections.emptySet()).stream()).collect(toSet());
boolean regulationViolation = termsRegulatedByAnnotationsForThisGPNode.contains(annotation.getOntologyClass());
if (regulationViolation) continue;
for (Explanation explanation : allExplanations.get(Triple.create(annotation.getObjectNode(), NodeFactory.createURI(annotation.getQualifier().toString()), annotation.getOntologyClassNode()))) {
Set<Triple> requiredFacts = toJava(explanation.facts()).stream().map(t -> Bridge.jenaFromTriple(t)).collect(Collectors.toSet());
Set<Triple> requiredFacts = toJava(explanation.facts()).stream().map(t -> Bridge.jenaFromTriple(t)).collect(toSet());
// Every statement in the explanation must have at least one evidence, unless the statement is a class assertion
if (requiredFacts.stream().filter(t -> !t.getPredicate().getURI().equals(RDF.type.getURI())).allMatch(f -> !(allEvidences.get(f).isEmpty()))) {
// The evidence used for the annotation must be on an edge to or from the target node
Expand Down Expand Up @@ -179,8 +191,8 @@ public Set<GPADData> getGPAD(WorkingMemory wm, IRI modelIRI) throws Inconsistent
if (rootTerms.contains(annotation.getOntologyClass().toString())) {
rootViolation = !ND.equals(currentEvidence.getEvidence().toString());
} else { rootViolation = false; }
final boolean rootMFWithBP = annotation.getOntologyClass().toString().equals(MF) && gpNodesWithOtherThanRootMF.contains(annotation.getObjectNode());
if (!rootViolation && !rootMFWithBP) {
final boolean rootMFWithOtherMF = annotation.getOntologyClass().toString().equals(MF) && gpsWithAnyMFNotRootMF.contains(annotation.getObject());
if (!rootViolation && !rootMFWithOtherMF) {
DefaultGPADData defaultGPADData = new DefaultGPADData(annotation.getObject(), annotation.getQualifier(), annotation.getOntologyClass(), goodExtensions,
reference, currentEvidence.getEvidence(), currentEvidence.getWithOrFrom(), Optional.empty(), currentEvidence.getModificationDate(),
currentEvidence.getAssignedBy(), currentEvidence.getAnnotations());
Expand Down Expand Up @@ -250,7 +262,7 @@ private Map<Triple, Set<GPADEvidence>> evidencesForFacts(Set<Triple> facts, Mode
Optional<String> with = Optional.ofNullable(eqs.getLiteral("with")).map(Literal::getLexicalForm);
Set<Pair<String, String>> annotationAnnotations = new HashSet<>();
annotationAnnotations.add(Pair.of("noctua-model-id", modelID));
annotationAnnotations.addAll(getContributors(eqs).stream().map(c -> Pair.of("contributor", c)).collect(Collectors.toSet()));
annotationAnnotations.addAll(getContributors(eqs).stream().map(c -> Pair.of("contributor", c)).collect(toSet()));
String modificationDate = eqs.getLiteral("modification_date").getLexicalForm();
Optional<String> creationDate = Optional.ofNullable(eqs.getLiteral("creation_date")).map(Literal::getLexicalForm);
creationDate.ifPresent(date -> annotationAnnotations.add(Pair.of("creation-date", date)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ VALUES ?extension_rel {
<http://purl.obolibrary.org/obo/RO_0002490>
<http://purl.obolibrary.org/obo/RO_0002491>
<http://purl.obolibrary.org/obo/RO_0000053>
<http://purl.obolibrary.org/obo/RO_0000057>
<http://purl.obolibrary.org/obo/RO_0000087>
<http://purl.obolibrary.org/obo/RO_0001025>
<http://purl.obolibrary.org/obo/RO_0002007>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ FILTER(?evidence_type NOT IN (rdfs:Resource, owl:Thing, owl:NamedIndividual, BFO
?evidence dc:contributor ?contributor .
?evidence dc:date ?modification_date .
?evidence dc:source ?source_pub .
OPTIONAL { ?axiom dct:created ?creation_date }
OPTIONAL { ?evidence dct:created ?creation_date }
OPTIONAL { ?axiom dct:dateAccepted ?import_date }
OPTIONAL { ?evidence pav:providedBy ?provided_by . }
OPTIONAL { ?evidence <http://geneontology.org/lego/evidence-with> ?with }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,7 @@
import scala.collection.JavaConverters;

import java.io.File;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.*;
import java.util.stream.Collectors;

public class GPADSPARQLTest {
Expand All @@ -47,7 +44,7 @@ public static void setupRules() throws OWLOntologyCreationException {
@BeforeClass
public static void setupExporter() {
JenaSystem.init();
exporter = new GPADSPARQLExport(DefaultCurieHandler.getDefaultHandler(), new HashMap<IRI, String>(), new HashMap<IRI, String>());
exporter = new GPADSPARQLExport(DefaultCurieHandler.getDefaultHandler(), new HashMap<IRI, String>(), new HashMap<IRI, String>(), new HashMap<>());
}

@Test
Expand Down Expand Up @@ -124,21 +121,23 @@ public void testGPADContainsAcceptedAndCreatedDates() throws Exception {
Set<Triple> triples = model.listStatements().toList().stream().map(s -> Bridge.tripleFromJena(s.asTriple())).collect(Collectors.toSet());
WorkingMemory mem = arachne.processTriples(JavaConverters.asScalaSetConverter(triples).asScala());
Set<GPADData> annotations = exporter.getGPAD(mem, IRI.create("http://test.org"));
IRI gene = IRI.create("http://identifiers.org/mgi/MGI:1922815");
Pair<String, String> creationDate = Pair.of("creation-date", "2012-09-17");
IRI gene = IRI.create("http://identifiers.org/wormbase/WBGene00001326");
Pair<String, String> creationDate = Pair.of("creation-date", "2021-05-13");
Assert.assertTrue(annotations.stream().anyMatch(a -> a.getObject().equals(gene) && a.getAnnotations().contains(creationDate)));
}

@Test
public void testFilterRootMFWhenRootBP() throws Exception {
public void testFilterRootMFWhenOtherMF() throws Exception {
IRI rootMF = IRI.create("http://purl.obolibrary.org/obo/GO_0003674");
IRI rootBP = IRI.create("http://purl.obolibrary.org/obo/GO_0008150");
IRI rootCC = IRI.create("http://purl.obolibrary.org/obo/GO_0005575");

Model model = ModelFactory.createDefaultModel();
model.read(this.getClass().getResourceAsStream("/test_root_mf_filter.ttl"), "", "ttl");
Set<Triple> triples = model.listStatements().toList().stream().map(s -> Bridge.tripleFromJena(s.asTriple())).collect(Collectors.toSet());
WorkingMemory mem = arachne.processTriples(JavaConverters.asScalaSetConverter(triples).asScala());
Set<GPADData> annotations = exporter.getGPAD(mem, IRI.create("http://test.org"));
IRI gene = IRI.create("http://identifiers.org/mgi/MGI:2153470");
IRI rootMF = IRI.create("http://purl.obolibrary.org/obo/GO_0003674");
IRI rootBP = IRI.create("http://purl.obolibrary.org/obo/GO_0008150");
Assert.assertTrue(annotations.stream().noneMatch(a -> a.getObject().equals(gene) && a.getOntologyClass().equals(rootMF)));

Model model2 = ModelFactory.createDefaultModel();
Expand All @@ -149,6 +148,33 @@ public void testFilterRootMFWhenRootBP() throws Exception {
IRI gene2 = IRI.create("http://identifiers.org/mgi/MGI:98392");
Assert.assertTrue(annotations2.stream().anyMatch(a -> a.getObject().equals(gene2) && a.getOntologyClass().equals(rootMF)));
Assert.assertTrue(annotations2.stream().anyMatch(a -> a.getObject().equals(gene2) && a.getOntologyClass().equals(rootBP)));

Model model3 = ModelFactory.createDefaultModel();
model3.read(this.getClass().getResourceAsStream("/test_root_mf_filter3.ttl"), "", "ttl");
Set<Triple> triples3 = model3.listStatements().toList().stream().map(s -> Bridge.tripleFromJena(s.asTriple())).collect(Collectors.toSet());
WorkingMemory mem3 = arachne.processTriples(JavaConverters.asScalaSetConverter(triples3).asScala());
Set<GPADData> annotations3 = exporter.getGPAD(mem3, IRI.create("http://test.org"));
IRI gene3 = IRI.create("http://identifiers.org/sgd/S000002650");
Assert.assertTrue(annotations3.stream().anyMatch(a -> a.getObject().equals(gene3) && a.getOntologyClass().equals(rootMF)));
Assert.assertTrue(annotations3.stream().anyMatch(a -> a.getObject().equals(gene3) && a.getOntologyClass().equals(rootBP)));
Assert.assertTrue(annotations3.stream().anyMatch(a -> a.getObject().equals(gene3) && a.getOntologyClass().equals(rootCC)));
}

@Test
public void testFilterAnnotationsToRegulatedProcess() throws Exception {
HashMap<IRI, Set<IRI>> regulators = new HashMap<>();
regulators.put(IRI.create("http://purl.obolibrary.org/obo/GO_0030511"), Collections.singleton(IRI.create("http://purl.obolibrary.org/obo/GO_0007179")));
GPADSPARQLExport exporter = new GPADSPARQLExport(DefaultCurieHandler.getDefaultHandler(), new HashMap<IRI, String>(), new HashMap<IRI, String>(), regulators);
Model model = ModelFactory.createDefaultModel();
model.read(this.getClass().getResourceAsStream("/test_filter_regulated_process.ttl"), "", "ttl");
Set<Triple> triples = model.listStatements().toList().stream().map(s -> Bridge.tripleFromJena(s.asTriple())).collect(Collectors.toSet());
WorkingMemory mem = arachne.processTriples(JavaConverters.asScalaSetConverter(triples).asScala());
Set<GPADData> annotations = exporter.getGPAD(mem, IRI.create("http://test.org"));
IRI gene = IRI.create("http://identifiers.org/mgi/MGI:2148811");
IRI regulator = IRI.create("http://purl.obolibrary.org/obo/GO_0030511");
IRI regulated = IRI.create("http://purl.obolibrary.org/obo/GO_0007179");
Assert.assertTrue(annotations.stream().anyMatch(a -> a.getObject().equals(gene) && a.getOntologyClass().equals(regulator)));
Assert.assertTrue(annotations.stream().noneMatch(a -> a.getObject().equals(gene) && a.getOntologyClass().equals(regulated)));
}

}
Loading

0 comments on commit efd9247

Please sign in to comment.