Skip to content

Commit

Permalink
Merge pull request #352 from geneontology/speed-up-model-stats
Browse files Browse the repository at this point in the history
No impact on anything aside from stats calculation.  All tests look good locally and in travis.  Merging.
  • Loading branch information
goodb authored Oct 2, 2020
2 parents c9488d4 + 3503153 commit 731390d
Show file tree
Hide file tree
Showing 10 changed files with 5,263 additions and 24,397 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -939,7 +939,7 @@ else if(!model_iris.add(modeluri)) {
}
LOGGER.info("preparing model stats...");
//OWLOntology gocam_owl = m3.getModelAbox(modelIRI);//no imports
GoCamModel gcm = new GoCamModel(gocam, m3.getGolego_repo());
GoCamModel gcm = new GoCamModel(gocam, m3);
String title = "title";
if(gcm.getTitle()!=null) {
title = makeColSafe(gcm.getTitle());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,32 @@ public Map<OWLNamedIndividual, Set<String>> getSuperCategoryMapForIndividuals(Se
}

public Set<String> replaceDeprecated(Set<String> uris){
Set<String> fixed = new HashSet<String>(uris);
Set<String> fixed = new HashSet<String>();
Map<String, String> old_new = mapDeprecated(uris);
for(String t : uris) {
if(old_new.get(t)!=null) {
fixed.add(old_new.get(t));
}else {
fixed.add(t);
}
}
return fixed;
}

public Set<String> replaceDeprecated(Set<String> uris, Map<String, String> old_new){
Set<String> fixed = new HashSet<String>();
for(String t : uris) {
if(old_new.get(t)!=null) {
fixed.add(old_new.get(t));
}else {
fixed.add(t);
}
}
return fixed;
}

public Map<String, String> mapDeprecated(Set<String> uris){
Map<String, String> old_new = new HashMap<String, String>();
BigdataSailRepositoryConnection connection;
try {
connection = go_lego_repo.getReadOnlyConnection();
Expand All @@ -392,9 +417,7 @@ public Set<String> replaceDeprecated(Set<String> uris){
BindingSet binding = result.next();
Value c = binding.getValue("c");
Value replacement = binding.getValue("replacement");
if(fixed.remove(c.stringValue())) {
fixed.add(replacement.stringValue());
}
old_new.put(c.stringValue(),replacement.stringValue());
}
} catch (MalformedQueryException e) {
// TODO Auto-generated catch block
Expand All @@ -409,7 +432,7 @@ public Set<String> replaceDeprecated(Set<String> uris){
// TODO Auto-generated catch block
e1.printStackTrace();
}
return fixed;
return old_new;
}

public Map<String, Set<String>> getSuperCategoryMap(Set<String> uris) throws IOException {
Expand Down Expand Up @@ -707,6 +730,8 @@ public String getLabel(String entity) throws IOException {
return label;
}



public boolean exists(String entity) throws IOException {
boolean exists = false;
String query = "select * "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,15 @@
import java.util.Set;

import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
import org.apache.log4j.Logger;
import org.geneontology.minerva.BlazegraphMolecularModelManager;
import org.geneontology.minerva.BlazegraphOntologyManager;
import org.geneontology.minerva.CoreMolecularModelManager;
import org.openrdf.query.BindingSet;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.QueryEvaluationException;
import org.openrdf.query.TupleQueryResult;
import org.openrdf.repository.RepositoryException;
import org.semanticweb.owlapi.model.IRI;
import org.semanticweb.owlapi.model.OWLAnnotation;
import org.semanticweb.owlapi.model.OWLClass;
Expand All @@ -16,6 +24,7 @@
import org.semanticweb.owlapi.model.OWLOntology;

public class GoCamModel extends ProvenanceAnnotated{
private static Logger LOG = Logger.getLogger(GoCamModel.class);
BlazegraphOntologyManager go_lego;
String modelstate;
Set<String> in_taxon;
Expand All @@ -33,25 +42,73 @@ public class GoCamModel extends ProvenanceAnnotated{
GoCamModelStats stats;
Map<OWLObjectProperty, Integer> causal_count;

public GoCamModel(OWLOntology abox, BlazegraphOntologyManager go_lego_manager) throws IOException {
public GoCamModel(OWLOntology abox, BlazegraphMolecularModelManager m3) throws IOException, MalformedQueryException, QueryEvaluationException, RepositoryException {
ont = abox;
me = ont.getOWLOntologyManager().getOWLDataFactory().getOWLClass(IRI.create("http://purl.obolibrary.org/obo/go/extensions/reacto.owl#molecular_event"));
mf = ont.getOWLOntologyManager().getOWLDataFactory().getOWLClass(IRI.create("http://purl.obolibrary.org/obo/GO_0003674"));
bp = ont.getOWLOntologyManager().getOWLDataFactory().getOWLClass(IRI.create("http://purl.obolibrary.org/obo/GO_0008150"));
cc = ont.getOWLOntologyManager().getOWLDataFactory().getOWLClass(IRI.create("http://purl.obolibrary.org/obo/GO_0005575"));
causal_count = new HashMap<OWLObjectProperty, Integer>();
go_lego = go_lego_manager;
go_lego = m3.getGolego_repo();
iri = abox.getOntologyID().getOntologyIRI().get().toString();
ind_entity = new HashMap<OWLNamedIndividual, GoCamEntity>();
addAnnotations();
//setIndTypesWithOwl();
setIndTypesWithSparql(m3, iri);
addActivities();
this.setGoCamModelStats();
}


private void setIndTypesWithSparql(BlazegraphMolecularModelManager m3, String graph_id) throws MalformedQueryException, QueryEvaluationException, RepositoryException, IOException {
Map<OWLNamedIndividual, Set<String>> i_types = new HashMap<OWLNamedIndividual, Set<String>>();
Set<String> all_types = new HashSet<String>();
TupleQueryResult r = (TupleQueryResult) m3.executeSPARQLQuery(""
+ "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ "select ?instance ?type where {"
+ "GRAPH <"+graph_id+"> { "
+ "?instance rdf:type <http://www.w3.org/2002/07/owl#NamedIndividual> ."
+ "?instance rdf:type ?type ."
+ "filter (?type != <http://www.w3.org/2002/07/owl#NamedIndividual> ) "
+ "}}", 100);
while(r.hasNext()) {
BindingSet bs = r.next();
String instance = bs.getBinding("instance").getValue().stringValue();
String type = bs.getBinding("type").getValue().stringValue();
OWLNamedIndividual i = ont.getOWLOntologyManager().getOWLDataFactory().getOWLNamedIndividual(IRI.create(instance));
Set<String> types = i_types.get(i);
if(types==null) {
types = new HashSet<String>();
}
types.add(type);
i_types.put(i, types);
all_types.add(type);
}
r.close();
Map<String, String> old_new = go_lego.mapDeprecated(all_types);
Set<String> corrected_types = go_lego.replaceDeprecated(all_types, old_new);
Map<String, Set<String>> type_roots = go_lego.getSuperCategoryMap(corrected_types);
//set global
ind_types = new HashMap<OWLNamedIndividual, Set<String>>();
for(OWLNamedIndividual ind : i_types.keySet()) {
//fix deprecated
Set<String> types = go_lego.replaceDeprecated(i_types.get(ind), old_new);
//convert to root types
Set<String> roots = new HashSet<String>();
for(String type : types) {
roots.addAll(type_roots.get(type));
}
ind_types.put(ind, roots);
}
}

private void setIndTypesWithOwl() throws IOException {
boolean fix_deprecated = true;
Set<OWLNamedIndividual> inds = ont.getIndividualsInSignature();
ind_types = go_lego.getSuperCategoryMapForIndividuals(inds, ont, fix_deprecated);
}

private void addActivities() throws IOException {
activities = new HashSet<ActivityUnit> ();
boolean fix_deprecated = true;
ind_types = go_lego.getSuperCategoryMapForIndividuals(ont.getIndividualsInSignature(), ont, fix_deprecated);
for(OWLNamedIndividual ind : ind_types.keySet()) {
Set<String> types = ind_types.get(ind);
if(types!=null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public class GoCamModelStats {
int n_activity_units = 0;
int n_complete_activity_units = 0;
int n_connected_processes = 0;
int n_causal_out_relation_assertions = 0;
int n_causal_in_relation_assertions = 0;
int n_unconnected = 0;
int n_unconnected_out = 0;
int n_unconnected_in = 0;
Expand Down Expand Up @@ -149,11 +149,11 @@ public GoCamModelStats(GoCamModel model) {
}
}
n_connected_processes = p.size();
if(a.causal_out!=null) {
for(OWLObjectProperty prop : a.causal_out.keySet()) {
Set<GoCamOccurent> ocs = a.causal_out.get(prop);
if(a.causal_in!=null) {
for(OWLObjectProperty prop : a.causal_in.keySet()) {
Set<GoCamOccurent> ocs = a.causal_in.get(prop);
for(GoCamOccurent oc : ocs ) {
n_causal_out_relation_assertions++;
n_causal_in_relation_assertions++;
}
}
}
Expand All @@ -180,7 +180,7 @@ public String toString() {
g+=" n unlocated activity units "+n_no_location+"\n";
g+=" n activity units unconnected to a BP "+n_no_bp+"\n";
g+=" n connected biological processes "+n_connected_processes+"\n";
g+=" n causal relation assertions "+n_causal_out_relation_assertions+"\n";
g+=" n causal relation assertions "+n_causal_in_relation_assertions+"\n";
g+=" n unconnected activities "+n_unconnected+"\n";
g+=" n activities with no outgoing connections "+n_unconnected_out+"\n";
g+=" n activities with no incoming connections "+n_unconnected_in+"\n";
Expand All @@ -189,11 +189,11 @@ public String toString() {
g+=" descriptive statistics for depth in ontology for BP terms containing activity units \n"+stats2string(bp_depth);
g+=" descriptive statistics for depth in ontology for CC terms used as locations for activity units \n"+stats2string(cc_depth);
return g;
}
}

public String stats2cols() {
String r = n_activity_units+"\t"+n_complete_activity_units+"\t"+n_raw_mf+"\t"+n_raw_bp+"\t"+n_raw_cc+"\t"+n_no_enabler+"\t"+n_no_location+"\t"+n_no_bp+
"\t"+n_connected_processes+"\t"+n_causal_out_relation_assertions+"\t"+n_unconnected+"\t"+n_unconnected_out+"\t"+n_unconnected_in+"\t"+max_connected_graph+
"\t"+n_connected_processes+"\t"+n_causal_in_relation_assertions+"\t"+n_unconnected+"\t"+n_unconnected_out+"\t"+n_unconnected_in+"\t"+max_connected_graph+
"\t"+mf_depth.getPercentile(50)+"\t"+bp_depth.getPercentile(50)+"\t"+cc_depth.getPercentile(50);
return r;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.openrdf.query.BindingSet;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.QueryEvaluationException;
import org.openrdf.query.QueryResult;
import org.openrdf.query.TupleQueryResult;
import org.openrdf.repository.RepositoryException;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParseException;
Expand Down Expand Up @@ -40,7 +45,7 @@ public static void tearDownAfterClass() throws Exception {
}

@Test
public void testWithM3() throws IOException, OWLOntologyCreationException, RepositoryException, RDFParseException, RDFHandlerException {
public void testGoModelStats() throws Exception {
OWLOntologyManager man = OWLManager.createOWLOntologyManager();
OWLOntology tbox_ontology = man.loadOntology(IRI.create("http://purl.obolibrary.org/obo/go/extensions/go-lego.owl"));
CurieHandler curieHandler = new MappedCurieHandler();
Expand All @@ -66,13 +71,32 @@ public void testWithM3() throws IOException, OWLOntologyCreationException, Repos
}
//read it back out and check on stats
for(IRI modelIRI : m3.getAvailableModelIds()) {
//the following results in very odd behavior where sometimes the title goes missing from the model
ModelContainer mc = m3.getModel(modelIRI);
OWLOntology gocam_via_mc = mc.getAboxOntology();
GoCamModel g = new GoCamModel(gocam_via_mc, onto_repo);
OWLOntology gocam_via_mc = mc.getAboxOntology();
GoCamModel g = new GoCamModel(gocam_via_mc, m3);
//testing for an issue with the OWL blazegraph loader
assertFalse("title not read out of M3 retrieved model "+modelIRI, (g.getTitle()==null));
System.out.println(g.toString()+"\t"+g.getStats().stats2cols());
//note these test cases from reactome contain some reactions that are not officially 'part of' the model
//these reactions are not counted as activities, but causal relations coming from them are counted.
if(modelIRI.toString().contains("R-HSA-5654719")) {
//SHC-mediated cascade:FGFR4
assertTrue("wrong n activities "+g.getStats().n_activity_units, g.getStats().n_activity_units==4);
assertTrue("wrong n complete activities "+g.getStats().n_complete_activity_units, g.getStats().n_complete_activity_units==2);
assertTrue("wrong n unenabled activities "+g.getStats().n_no_enabler, g.getStats().n_no_enabler==2);
assertTrue("wrong n causal relations "+g.getStats().n_causal_in_relation_assertions, g.getStats().n_causal_in_relation_assertions==6);
}else if(modelIRI.toString().contains("R-HSA-201688")) {
//WNT mediated activation of DVL
assertTrue("wrong n activities "+g.getStats().n_activity_units, g.getStats().n_activity_units==4);
assertTrue("wrong n complete activities "+g.getStats().n_complete_activity_units, g.getStats().n_complete_activity_units==3);
assertTrue("wrong n unenabled activities "+g.getStats().n_no_enabler, g.getStats().n_no_enabler==1);
assertTrue("wrong n causal relations "+g.getStats().n_causal_in_relation_assertions, g.getStats().n_causal_in_relation_assertions==3);
}else if(modelIRI.toString().contains("R-HSA-5654733")) {
//Negative regulation of FGFR4 signaling
assertTrue("wrong n activities "+g.getStats().n_activity_units, g.getStats().n_activity_units==3);
assertTrue("wrong n complete activities "+g.getStats().n_complete_activity_units, g.getStats().n_complete_activity_units==2);
assertTrue("wrong n unenabled activities "+g.getStats().n_no_enabler, g.getStats().n_no_enabler==1);
assertTrue("wrong n causal relations "+g.getStats().n_causal_in_relation_assertions, g.getStats().n_causal_in_relation_assertions==3);
}
}
}

Expand Down
Loading

0 comments on commit 731390d

Please sign in to comment.