From 6556408facb68e75216d62cbaf597fc762f4eb8f Mon Sep 17 00:00:00 2001 From: Jim Balhoff Date: Fri, 22 Jul 2022 18:24:50 -0400 Subject: [PATCH 1/3] Add command to dump models to JSON files. --- .../minerva/cli/CommandLineInterface.java | 106 +++++++++++++++++- .../server/handler/OperationsTools.java | 2 +- 2 files changed, 103 insertions(+), 5 deletions(-) diff --git a/minerva-cli/src/main/java/org/geneontology/minerva/cli/CommandLineInterface.java b/minerva-cli/src/main/java/org/geneontology/minerva/cli/CommandLineInterface.java index 80f958c6..3e151438 100644 --- a/minerva-cli/src/main/java/org/geneontology/minerva/cli/CommandLineInterface.java +++ b/minerva-cli/src/main/java/org/geneontology/minerva/cli/CommandLineInterface.java @@ -28,6 +28,7 @@ import org.geneontology.minerva.model.GoCamModel; import org.geneontology.minerva.model.GoCamModelStats; import org.geneontology.minerva.server.StartUpTool; +import org.geneontology.minerva.server.handler.OperationsTools; import org.geneontology.minerva.server.inferences.InferenceProviderCreator; import org.geneontology.minerva.server.validation.MinervaShexValidator; import org.geneontology.minerva.util.BlazegraphMutationCounter; @@ -37,12 +38,16 @@ import org.geneontology.minerva.validation.pipeline.BatchPipelineValidationReport; import org.geneontology.minerva.validation.pipeline.ErrorMessage; import org.obolibrary.robot.CatalogXmlIRIMapper; +import org.openrdf.model.Statement; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.impl.URIImpl; +import org.openrdf.model.vocabulary.OWL; +import org.openrdf.model.vocabulary.RDF; import org.openrdf.query.MalformedQueryException; import org.openrdf.query.QueryLanguage; import org.openrdf.query.UpdateExecutionException; import org.openrdf.repository.RepositoryException; -import org.openrdf.rio.RDFHandlerException; -import org.openrdf.rio.RDFParseException; +import org.openrdf.rio.*; import org.semanticweb.owlapi.apibinding.OWLManager; import org.semanticweb.owlapi.formats.TurtleDocumentFormat; import org.semanticweb.owlapi.io.IRIDocumentSource; @@ -62,6 +67,8 @@ import java.nio.file.Paths; import java.util.*; +import static org.geneontology.minerva.server.handler.OperationsTools.createModelRenderer; + public class CommandLineInterface { private static final Logger LOGGER = Logger.getLogger(CommandLineInterface.class); @@ -72,6 +79,12 @@ public static void main(String[] args) { Options main_options = new Options(); OptionGroup methods = new OptionGroup(); methods.setRequired(true); + Option dumpJSON = Option.builder() + .longOpt("dump-owl-json") + .desc("export JSON format GO-CAM models from journal") + .hasArg(false) + .build(); + methods.addOption(dumpJSON); Option dump = Option.builder() .longOpt("dump-owl-models") .desc("export OWL GO-CAM models from journal") @@ -210,6 +223,19 @@ public static void main(String[] args) { String outputFolder = cmd.getOptionValue("f"); //--folder String modelIdPrefix = cmd.getOptionValue("p"); //--prefix modelsToOWL(journalFilePath, outputFolder, modelIdPrefix); + } else if (cmd.hasOption("dump-owl-json")) { + Options jsonDumpOptions = new Options(); + jsonDumpOptions.addOption(dumpJSON); + jsonDumpOptions.addOption("j", "journal", true, "Sets the Blazegraph journal file for the database"); + jsonDumpOptions.addOption("ontojournal", "ontojournal", true, "Specify a blazegraph journal file containing the merged, pre-reasoned tbox aka go-lego.owl"); + jsonDumpOptions.addOption("f", "folder", true, "Sets the output folder the GO-CAM model files"); + jsonDumpOptions.addOption("p", "model-id-prefix", true, "prefix for GO-CAM model ids"); + cmd = parser.parse(jsonDumpOptions, args, false); + String journalFilePath = cmd.getOptionValue("j"); //--journal + String ontojournalFilePath = cmd.getOptionValue("ontojournal"); + String outputFolder = cmd.getOptionValue("f"); //--folder + String modelIdPrefix = cmd.getOptionValue("p"); //--prefix + modelsToJSON(journalFilePath, ontojournalFilePath, outputFolder, modelIdPrefix); } else if (cmd.hasOption("import-owl-models")) { Options import_options = new Options(); import_options.addOption(import_owl); @@ -423,6 +449,78 @@ public static void modelsToOWL(String journalFilePath, String outputFolder, Stri m3.dispose(); } + /** + * Given a blazegraph journal with go-cams in it, write them all out as JSON files. + * cli --dump-owl-json + * + * @param journalFilePath + * @param outputFolder + * @param modelIdPrefix + * @throws Exception + */ + public static void modelsToJSON(String journalFilePath, String ontojournalFilePath, String outputFolder, String modelIdPrefix) throws Exception { + final String idPrefix; + if (modelIdPrefix == null) { + idPrefix = "http://model.geneontology.org/"; + } else { + idPrefix = modelIdPrefix; + } + // minimal inputs + if (journalFilePath == null) { + System.err.println("No journal file was configured."); + System.exit(-1); + return; + } + if (ontojournalFilePath == null) { + System.err.println("No ontology journal file was configured."); + System.exit(-1); + return; + } + if (outputFolder == null) { + System.err.println("No output folder was configured."); + System.exit(-1); + return; + } + OWLOntology dummy = OWLManager.createOWLOntologyManager().createOntology(IRI.create("http://example.org/dummy")); + CurieHandler curieHandler = new MappedCurieHandler(); + BlazegraphMolecularModelManager m3 = new BlazegraphMolecularModelManager<>(dummy, curieHandler, modelIdPrefix, journalFilePath, outputFolder, ontojournalFilePath, true); + InferenceProvider inferenceProvider = null; + Gson gson = new Gson(); + FileUtils.forceMkdir(new File(outputFolder)); + m3.getStoredModelIds().stream().parallel().forEach(iri -> { + ModelContainer mc = m3.getModel(iri); + final MolecularModelJsonRenderer renderer = OperationsTools.createModelRenderer(mc, m3.getGolego_repo(), inferenceProvider, curieHandler, m3.getTboxLabelIndex()); + JsonModel jsonModel = renderer.renderModel(); + String fileName = StringUtils.replaceOnce(iri.toString(), idPrefix, "") + ".json"; + File targetFile = new File(outputFolder, fileName).getAbsoluteFile(); + if (targetFile.exists()) { + if (targetFile.isFile() == false) { + throw new RuntimeException(new IOException("For modelId: '" + iri + "', the resulting path is not a file: " + targetFile.getAbsolutePath())); + } + if (targetFile.canWrite() == false) { + throw new RuntimeException(new IOException("For modelId: '" + iri + "', Cannot write to the file: " + targetFile.getAbsolutePath())); + } + } + File tempFile = null; + try { + // create tempFile + String prefix = iri.toString(); // TODO escape + tempFile = File.createTempFile(prefix, ".json"); + try (FileWriter writer = new FileWriter(tempFile)) { + gson.toJson(jsonModel, writer); + } + FileUtils.copyFile(tempFile, targetFile); + } catch (IOException e) { + throw new RuntimeException(e); + } finally { + // delete temp file + FileUtils.deleteQuietly(tempFile); + } + m3.unlinkModel(iri); + }); + m3.dispose(); + } + /** * Load the go-cam files in the input folder into the journal * cli import-owl-models @@ -852,7 +950,7 @@ public static void validateGoCams(String input, String outputFolder, modelid_filename.put(modeluri, file.getName()); } } catch (OWLOntologyCreationException | RepositoryException | RDFParseException - | RDFHandlerException | IOException e) { + | RDFHandlerException | IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } @@ -863,7 +961,7 @@ public static void validateGoCams(String input, String outputFolder, try { m3.importModelToDatabase(i, true); } catch (OWLOntologyCreationException | RepositoryException | RDFParseException - | RDFHandlerException | IOException e) { + | RDFHandlerException | IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } diff --git a/minerva-server/src/main/java/org/geneontology/minerva/server/handler/OperationsTools.java b/minerva-server/src/main/java/org/geneontology/minerva/server/handler/OperationsTools.java index b2f0aac2..959b05c0 100644 --- a/minerva-server/src/main/java/org/geneontology/minerva/server/handler/OperationsTools.java +++ b/minerva-server/src/main/java/org/geneontology/minerva/server/handler/OperationsTools.java @@ -109,7 +109,7 @@ static MolecularModelJsonRenderer createModelRenderer( } //BlazegraphOntologyManager - static MolecularModelJsonRenderer createModelRenderer( + public static MolecularModelJsonRenderer createModelRenderer( final ModelContainer model, final BlazegraphOntologyManager go_lego_repo, final InferenceProvider inferenceProvider, From a16995b44aeb74e7bed8f2f799736bf8c7383d9c Mon Sep 17 00:00:00 2001 From: Jim Balhoff Date: Thu, 11 Aug 2022 16:06:50 -0400 Subject: [PATCH 2/3] Set up CURIE handler correctly. --- .../minerva/cli/CommandLineInterface.java | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/minerva-cli/src/main/java/org/geneontology/minerva/cli/CommandLineInterface.java b/minerva-cli/src/main/java/org/geneontology/minerva/cli/CommandLineInterface.java index 3e151438..b0dd6c0e 100644 --- a/minerva-cli/src/main/java/org/geneontology/minerva/cli/CommandLineInterface.java +++ b/minerva-cli/src/main/java/org/geneontology/minerva/cli/CommandLineInterface.java @@ -230,12 +230,14 @@ public static void main(String[] args) { jsonDumpOptions.addOption("ontojournal", "ontojournal", true, "Specify a blazegraph journal file containing the merged, pre-reasoned tbox aka go-lego.owl"); jsonDumpOptions.addOption("f", "folder", true, "Sets the output folder the GO-CAM model files"); jsonDumpOptions.addOption("p", "model-id-prefix", true, "prefix for GO-CAM model ids"); + jsonDumpOptions.addOption("prefixes", "prefixes", true, "Prefix mappings file"); cmd = parser.parse(jsonDumpOptions, args, false); String journalFilePath = cmd.getOptionValue("j"); //--journal String ontojournalFilePath = cmd.getOptionValue("ontojournal"); String outputFolder = cmd.getOptionValue("f"); //--folder String modelIdPrefix = cmd.getOptionValue("p"); //--prefix - modelsToJSON(journalFilePath, ontojournalFilePath, outputFolder, modelIdPrefix); + String prefixMappingsFileLoc = cmd.getOptionValue("prefixes"); + modelsToJSON(journalFilePath, ontojournalFilePath, outputFolder, modelIdPrefix, prefixMappingsFileLoc); } else if (cmd.hasOption("import-owl-models")) { Options import_options = new Options(); import_options.addOption(import_owl); @@ -458,7 +460,7 @@ public static void modelsToOWL(String journalFilePath, String outputFolder, Stri * @param modelIdPrefix * @throws Exception */ - public static void modelsToJSON(String journalFilePath, String ontojournalFilePath, String outputFolder, String modelIdPrefix) throws Exception { + public static void modelsToJSON(String journalFilePath, String ontojournalFilePath, String outputFolder, String modelIdPrefix, String prefixMappingsFilePath) throws Exception { final String idPrefix; if (modelIdPrefix == null) { idPrefix = "http://model.geneontology.org/"; @@ -481,9 +483,16 @@ public static void modelsToJSON(String journalFilePath, String ontojournalFilePa System.exit(-1); return; } + final CurieMappings mappings; + if (prefixMappingsFilePath != null) { + mappings = DefaultCurieHandler.loadMappingsFromFile(new File(prefixMappingsFilePath)); + } else { + mappings = DefaultCurieHandler.loadDefaultMappings(); + } + CurieMappings localMappings = new CurieMappings.SimpleCurieMappings(Collections.singletonMap("gomodel", idPrefix)); + CurieHandler curieHandler = new MappedCurieHandler(mappings, localMappings); OWLOntology dummy = OWLManager.createOWLOntologyManager().createOntology(IRI.create("http://example.org/dummy")); - CurieHandler curieHandler = new MappedCurieHandler(); - BlazegraphMolecularModelManager m3 = new BlazegraphMolecularModelManager<>(dummy, curieHandler, modelIdPrefix, journalFilePath, outputFolder, ontojournalFilePath, true); + BlazegraphMolecularModelManager m3 = new BlazegraphMolecularModelManager<>(dummy, curieHandler, idPrefix, journalFilePath, outputFolder, ontojournalFilePath, true); InferenceProvider inferenceProvider = null; Gson gson = new Gson(); FileUtils.forceMkdir(new File(outputFolder)); From 6c0b6ae3a5ea7104e0808c089f26f310e47ae329 Mon Sep 17 00:00:00 2001 From: Jim Balhoff Date: Tue, 16 Aug 2022 14:44:43 -0400 Subject: [PATCH 3/3] =?UTF-8?q?Don=E2=80=99t=20run=20JSON=20dump=20in=20pa?= =?UTF-8?q?rallel.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../org/geneontology/minerva/cli/CommandLineInterface.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/minerva-cli/src/main/java/org/geneontology/minerva/cli/CommandLineInterface.java b/minerva-cli/src/main/java/org/geneontology/minerva/cli/CommandLineInterface.java index b0dd6c0e..2dfa0f01 100644 --- a/minerva-cli/src/main/java/org/geneontology/minerva/cli/CommandLineInterface.java +++ b/minerva-cli/src/main/java/org/geneontology/minerva/cli/CommandLineInterface.java @@ -496,17 +496,17 @@ public static void modelsToJSON(String journalFilePath, String ontojournalFilePa InferenceProvider inferenceProvider = null; Gson gson = new Gson(); FileUtils.forceMkdir(new File(outputFolder)); - m3.getStoredModelIds().stream().parallel().forEach(iri -> { + m3.getStoredModelIds().forEach(iri -> { ModelContainer mc = m3.getModel(iri); final MolecularModelJsonRenderer renderer = OperationsTools.createModelRenderer(mc, m3.getGolego_repo(), inferenceProvider, curieHandler, m3.getTboxLabelIndex()); JsonModel jsonModel = renderer.renderModel(); String fileName = StringUtils.replaceOnce(iri.toString(), idPrefix, "") + ".json"; File targetFile = new File(outputFolder, fileName).getAbsoluteFile(); if (targetFile.exists()) { - if (targetFile.isFile() == false) { + if (!targetFile.isFile()) { throw new RuntimeException(new IOException("For modelId: '" + iri + "', the resulting path is not a file: " + targetFile.getAbsolutePath())); } - if (targetFile.canWrite() == false) { + if (!targetFile.canWrite()) { throw new RuntimeException(new IOException("For modelId: '" + iri + "', Cannot write to the file: " + targetFile.getAbsolutePath())); } }