Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update to model statistics calculation #352

Merged
merged 1 commit into from
Oct 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -939,7 +939,7 @@ else if(!model_iris.add(modeluri)) {
}
LOGGER.info("preparing model stats...");
//OWLOntology gocam_owl = m3.getModelAbox(modelIRI);//no imports
GoCamModel gcm = new GoCamModel(gocam, m3.getGolego_repo());
GoCamModel gcm = new GoCamModel(gocam, m3);
String title = "title";
if(gcm.getTitle()!=null) {
title = makeColSafe(gcm.getTitle());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,32 @@ public Map<OWLNamedIndividual, Set<String>> getSuperCategoryMapForIndividuals(Se
}

public Set<String> replaceDeprecated(Set<String> uris){
Set<String> fixed = new HashSet<String>(uris);
Set<String> fixed = new HashSet<String>();
Map<String, String> old_new = mapDeprecated(uris);
for(String t : uris) {
if(old_new.get(t)!=null) {
fixed.add(old_new.get(t));
}else {
fixed.add(t);
}
}
return fixed;
}

public Set<String> replaceDeprecated(Set<String> uris, Map<String, String> old_new){
Set<String> fixed = new HashSet<String>();
for(String t : uris) {
if(old_new.get(t)!=null) {
fixed.add(old_new.get(t));
}else {
fixed.add(t);
}
}
return fixed;
}

public Map<String, String> mapDeprecated(Set<String> uris){
Map<String, String> old_new = new HashMap<String, String>();
BigdataSailRepositoryConnection connection;
try {
connection = go_lego_repo.getReadOnlyConnection();
Expand All @@ -392,9 +417,7 @@ public Set<String> replaceDeprecated(Set<String> uris){
BindingSet binding = result.next();
Value c = binding.getValue("c");
Value replacement = binding.getValue("replacement");
if(fixed.remove(c.stringValue())) {
fixed.add(replacement.stringValue());
}
old_new.put(c.stringValue(),replacement.stringValue());
}
} catch (MalformedQueryException e) {
// TODO Auto-generated catch block
Expand All @@ -409,7 +432,7 @@ public Set<String> replaceDeprecated(Set<String> uris){
// TODO Auto-generated catch block
e1.printStackTrace();
}
return fixed;
return old_new;
}

public Map<String, Set<String>> getSuperCategoryMap(Set<String> uris) throws IOException {
Expand Down Expand Up @@ -707,6 +730,8 @@ public String getLabel(String entity) throws IOException {
return label;
}



public boolean exists(String entity) throws IOException {
boolean exists = false;
String query = "select * "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,15 @@
import java.util.Set;

import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
import org.apache.log4j.Logger;
import org.geneontology.minerva.BlazegraphMolecularModelManager;
import org.geneontology.minerva.BlazegraphOntologyManager;
import org.geneontology.minerva.CoreMolecularModelManager;
import org.openrdf.query.BindingSet;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.QueryEvaluationException;
import org.openrdf.query.TupleQueryResult;
import org.openrdf.repository.RepositoryException;
import org.semanticweb.owlapi.model.IRI;
import org.semanticweb.owlapi.model.OWLAnnotation;
import org.semanticweb.owlapi.model.OWLClass;
Expand All @@ -16,6 +24,7 @@
import org.semanticweb.owlapi.model.OWLOntology;

public class GoCamModel extends ProvenanceAnnotated{
private static Logger LOG = Logger.getLogger(GoCamModel.class);
BlazegraphOntologyManager go_lego;
String modelstate;
Set<String> in_taxon;
Expand All @@ -33,25 +42,73 @@ public class GoCamModel extends ProvenanceAnnotated{
GoCamModelStats stats;
Map<OWLObjectProperty, Integer> causal_count;

public GoCamModel(OWLOntology abox, BlazegraphOntologyManager go_lego_manager) throws IOException {
public GoCamModel(OWLOntology abox, BlazegraphMolecularModelManager m3) throws IOException, MalformedQueryException, QueryEvaluationException, RepositoryException {
ont = abox;
me = ont.getOWLOntologyManager().getOWLDataFactory().getOWLClass(IRI.create("http://purl.obolibrary.org/obo/go/extensions/reacto.owl#molecular_event"));
mf = ont.getOWLOntologyManager().getOWLDataFactory().getOWLClass(IRI.create("http://purl.obolibrary.org/obo/GO_0003674"));
bp = ont.getOWLOntologyManager().getOWLDataFactory().getOWLClass(IRI.create("http://purl.obolibrary.org/obo/GO_0008150"));
cc = ont.getOWLOntologyManager().getOWLDataFactory().getOWLClass(IRI.create("http://purl.obolibrary.org/obo/GO_0005575"));
causal_count = new HashMap<OWLObjectProperty, Integer>();
go_lego = go_lego_manager;
go_lego = m3.getGolego_repo();
iri = abox.getOntologyID().getOntologyIRI().get().toString();
ind_entity = new HashMap<OWLNamedIndividual, GoCamEntity>();
addAnnotations();
//setIndTypesWithOwl();
setIndTypesWithSparql(m3, iri);
addActivities();
this.setGoCamModelStats();
}


private void setIndTypesWithSparql(BlazegraphMolecularModelManager m3, String graph_id) throws MalformedQueryException, QueryEvaluationException, RepositoryException, IOException {
Map<OWLNamedIndividual, Set<String>> i_types = new HashMap<OWLNamedIndividual, Set<String>>();
Set<String> all_types = new HashSet<String>();
TupleQueryResult r = (TupleQueryResult) m3.executeSPARQLQuery(""
+ "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ "select ?instance ?type where {"
+ "GRAPH <"+graph_id+"> { "
+ "?instance rdf:type <http://www.w3.org/2002/07/owl#NamedIndividual> ."
+ "?instance rdf:type ?type ."
+ "filter (?type != <http://www.w3.org/2002/07/owl#NamedIndividual> ) "
+ "}}", 100);
while(r.hasNext()) {
BindingSet bs = r.next();
String instance = bs.getBinding("instance").getValue().stringValue();
String type = bs.getBinding("type").getValue().stringValue();
OWLNamedIndividual i = ont.getOWLOntologyManager().getOWLDataFactory().getOWLNamedIndividual(IRI.create(instance));
Set<String> types = i_types.get(i);
if(types==null) {
types = new HashSet<String>();
}
types.add(type);
i_types.put(i, types);
all_types.add(type);
}
r.close();
Map<String, String> old_new = go_lego.mapDeprecated(all_types);
Set<String> corrected_types = go_lego.replaceDeprecated(all_types, old_new);
Map<String, Set<String>> type_roots = go_lego.getSuperCategoryMap(corrected_types);
//set global
ind_types = new HashMap<OWLNamedIndividual, Set<String>>();
for(OWLNamedIndividual ind : i_types.keySet()) {
//fix deprecated
Set<String> types = go_lego.replaceDeprecated(i_types.get(ind), old_new);
//convert to root types
Set<String> roots = new HashSet<String>();
for(String type : types) {
roots.addAll(type_roots.get(type));
}
ind_types.put(ind, roots);
}
}

private void setIndTypesWithOwl() throws IOException {
boolean fix_deprecated = true;
Set<OWLNamedIndividual> inds = ont.getIndividualsInSignature();
ind_types = go_lego.getSuperCategoryMapForIndividuals(inds, ont, fix_deprecated);
}

private void addActivities() throws IOException {
activities = new HashSet<ActivityUnit> ();
boolean fix_deprecated = true;
ind_types = go_lego.getSuperCategoryMapForIndividuals(ont.getIndividualsInSignature(), ont, fix_deprecated);
for(OWLNamedIndividual ind : ind_types.keySet()) {
Set<String> types = ind_types.get(ind);
if(types!=null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public class GoCamModelStats {
int n_activity_units = 0;
int n_complete_activity_units = 0;
int n_connected_processes = 0;
int n_causal_out_relation_assertions = 0;
int n_causal_in_relation_assertions = 0;
int n_unconnected = 0;
int n_unconnected_out = 0;
int n_unconnected_in = 0;
Expand Down Expand Up @@ -149,11 +149,11 @@ public GoCamModelStats(GoCamModel model) {
}
}
n_connected_processes = p.size();
if(a.causal_out!=null) {
for(OWLObjectProperty prop : a.causal_out.keySet()) {
Set<GoCamOccurent> ocs = a.causal_out.get(prop);
if(a.causal_in!=null) {
for(OWLObjectProperty prop : a.causal_in.keySet()) {
Set<GoCamOccurent> ocs = a.causal_in.get(prop);
for(GoCamOccurent oc : ocs ) {
n_causal_out_relation_assertions++;
n_causal_in_relation_assertions++;
}
}
}
Expand All @@ -180,7 +180,7 @@ public String toString() {
g+=" n unlocated activity units "+n_no_location+"\n";
g+=" n activity units unconnected to a BP "+n_no_bp+"\n";
g+=" n connected biological processes "+n_connected_processes+"\n";
g+=" n causal relation assertions "+n_causal_out_relation_assertions+"\n";
g+=" n causal relation assertions "+n_causal_in_relation_assertions+"\n";
g+=" n unconnected activities "+n_unconnected+"\n";
g+=" n activities with no outgoing connections "+n_unconnected_out+"\n";
g+=" n activities with no incoming connections "+n_unconnected_in+"\n";
Expand All @@ -189,11 +189,11 @@ public String toString() {
g+=" descriptive statistics for depth in ontology for BP terms containing activity units \n"+stats2string(bp_depth);
g+=" descriptive statistics for depth in ontology for CC terms used as locations for activity units \n"+stats2string(cc_depth);
return g;
}
}

public String stats2cols() {
String r = n_activity_units+"\t"+n_complete_activity_units+"\t"+n_raw_mf+"\t"+n_raw_bp+"\t"+n_raw_cc+"\t"+n_no_enabler+"\t"+n_no_location+"\t"+n_no_bp+
"\t"+n_connected_processes+"\t"+n_causal_out_relation_assertions+"\t"+n_unconnected+"\t"+n_unconnected_out+"\t"+n_unconnected_in+"\t"+max_connected_graph+
"\t"+n_connected_processes+"\t"+n_causal_in_relation_assertions+"\t"+n_unconnected+"\t"+n_unconnected_out+"\t"+n_unconnected_in+"\t"+max_connected_graph+
"\t"+mf_depth.getPercentile(50)+"\t"+bp_depth.getPercentile(50)+"\t"+cc_depth.getPercentile(50);
return r;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.openrdf.query.BindingSet;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.QueryEvaluationException;
import org.openrdf.query.QueryResult;
import org.openrdf.query.TupleQueryResult;
import org.openrdf.repository.RepositoryException;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParseException;
Expand Down Expand Up @@ -40,7 +45,7 @@ public static void tearDownAfterClass() throws Exception {
}

@Test
public void testWithM3() throws IOException, OWLOntologyCreationException, RepositoryException, RDFParseException, RDFHandlerException {
public void testGoModelStats() throws Exception {
OWLOntologyManager man = OWLManager.createOWLOntologyManager();
OWLOntology tbox_ontology = man.loadOntology(IRI.create("http://purl.obolibrary.org/obo/go/extensions/go-lego.owl"));
CurieHandler curieHandler = new MappedCurieHandler();
Expand All @@ -66,13 +71,32 @@ public void testWithM3() throws IOException, OWLOntologyCreationException, Repos
}
//read it back out and check on stats
for(IRI modelIRI : m3.getAvailableModelIds()) {
//the following results in very odd behavior where sometimes the title goes missing from the model
ModelContainer mc = m3.getModel(modelIRI);
OWLOntology gocam_via_mc = mc.getAboxOntology();
GoCamModel g = new GoCamModel(gocam_via_mc, onto_repo);
OWLOntology gocam_via_mc = mc.getAboxOntology();
GoCamModel g = new GoCamModel(gocam_via_mc, m3);
//testing for an issue with the OWL blazegraph loader
assertFalse("title not read out of M3 retrieved model "+modelIRI, (g.getTitle()==null));
System.out.println(g.toString()+"\t"+g.getStats().stats2cols());
//note these test cases from reactome contain some reactions that are not officially 'part of' the model
//these reactions are not counted as activities, but causal relations coming from them are counted.
if(modelIRI.toString().contains("R-HSA-5654719")) {
//SHC-mediated cascade:FGFR4
assertTrue("wrong n activities "+g.getStats().n_activity_units, g.getStats().n_activity_units==4);
assertTrue("wrong n complete activities "+g.getStats().n_complete_activity_units, g.getStats().n_complete_activity_units==2);
assertTrue("wrong n unenabled activities "+g.getStats().n_no_enabler, g.getStats().n_no_enabler==2);
assertTrue("wrong n causal relations "+g.getStats().n_causal_in_relation_assertions, g.getStats().n_causal_in_relation_assertions==6);
}else if(modelIRI.toString().contains("R-HSA-201688")) {
//WNT mediated activation of DVL
assertTrue("wrong n activities "+g.getStats().n_activity_units, g.getStats().n_activity_units==4);
assertTrue("wrong n complete activities "+g.getStats().n_complete_activity_units, g.getStats().n_complete_activity_units==3);
assertTrue("wrong n unenabled activities "+g.getStats().n_no_enabler, g.getStats().n_no_enabler==1);
assertTrue("wrong n causal relations "+g.getStats().n_causal_in_relation_assertions, g.getStats().n_causal_in_relation_assertions==3);
}else if(modelIRI.toString().contains("R-HSA-5654733")) {
//Negative regulation of FGFR4 signaling
assertTrue("wrong n activities "+g.getStats().n_activity_units, g.getStats().n_activity_units==3);
assertTrue("wrong n complete activities "+g.getStats().n_complete_activity_units, g.getStats().n_complete_activity_units==2);
assertTrue("wrong n unenabled activities "+g.getStats().n_no_enabler, g.getStats().n_no_enabler==1);
assertTrue("wrong n causal relations "+g.getStats().n_causal_in_relation_assertions, g.getStats().n_causal_in_relation_assertions==3);
}
}
}

Expand Down
Loading