From f769bcbc8ab67b29906af383ad4cd0759af3e06d Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Wed, 1 May 2024 15:48:11 +0200 Subject: [PATCH] Make it possible to run HermiT on EMMO (#740) # Description Make it possible to run HermiT on EMMO, by: * Add support for additional keyword arguments that `sync_reasoner()` may pass to the `save()` method. * Remove custom datatypes before running HermiT. Readd the removed data properties after HermiT completes. * Add `--java-executable` and `--java-memory` arguments to ontoconvert to control how to run java. ## Type of change - [x] Bug fix. - [ ] New feature. - [ ] Documentation update. - [ ] Test update. Co-authored-by: francescalb --- ontopy/ontology.py | 51 +++++++++++++++++++++++++++++++++++++++++++--- tools/ontoconvert | 18 +++++++++++++++- 2 files changed, 65 insertions(+), 4 deletions(-) diff --git a/ontopy/ontology.py b/ontopy/ontology.py index 47425c771..5eacec242 100644 --- a/ontopy/ontology.py +++ b/ontopy/ontology.py @@ -871,6 +871,7 @@ def save( write_catalog_file=False, append_catalog=False, catalog_file="catalog-v001.xml", + **kwargs, ) -> Path: """Writes the ontology to file. @@ -915,6 +916,7 @@ def save( # pylint: disable=redefined-builtin,too-many-arguments # pylint: disable=too-many-statements,too-many-branches # pylint: disable=too-many-locals,arguments-renamed,invalid-name + if not _validate_installed_version( package="rdflib", min_version="6.0.0" ) and format == FMAP.get("ttl", ""): @@ -940,7 +942,9 @@ def save( raise TypeError("`filename` and `format` cannot both be None.") else: file = filename - filepath = os.path.join(dir, file) + filepath = os.path.join( + dir, file if isinstance(file, (str, Path)) else file.name + ) returnpath = filepath dir = Path(filepath).resolve().parent @@ -980,6 +984,7 @@ def save( recursive=False, squash=False, write_catalog_file=False, + **kwargs, ) if write_catalog_file: @@ -1045,7 +1050,7 @@ def save( graph.serialize(destination=filepath, format=format) elif format in OWLREADY2_FORMATS: - super().save(file=filepath, format=fmt) + super().save(file=filepath, format=fmt, **kwargs) else: # The try-finally clause is needed for cleanup and because # we have to provide delete=False to NamedTemporaryFile @@ -1056,7 +1061,7 @@ def save( suffix=".owl", delete=False ) as handle: tmpfile = handle.name - super().save(tmpfile, format="ntriples") + super().save(tmpfile, format="ntriples", **kwargs) graph = rdflib.Graph() graph.parse(tmpfile, format="ntriples") graph.namespace_manager.bind( @@ -1331,12 +1336,46 @@ def sync_reasoner( Keyword arguments are passed to the underlying owlready2 function. """ + # pylint: disable=too-many-branches + + removed_equivalent = defaultdict(list) + removed_subclasses = defaultdict(list) + if reasoner == "FaCT++": sync = sync_reasoner_factpp elif reasoner == "Pellet": sync = owlready2.sync_reasoner_pellet elif reasoner == "HermiT": sync = owlready2.sync_reasoner_hermit + + # Remove custom data propertyes, otherwise HermiT will crash + datatype_iri = "http://www.w3.org/2000/01/rdf-schema#Datatype" + + for cls in self.classes(imported=include_imported): + remove_eq = [] + for i, r in enumerate(cls.equivalent_to): + if isinstance(r, owlready2.Restriction): + if ( + hasattr(r.value.__class__, "iri") + and r.value.__class__.iri == datatype_iri + ): + remove_eq.append(i) + removed_equivalent[cls].append(r) + for i in reversed(remove_eq): + del cls.equivalent_to[i] + + remove_subcls = [] + for i, r in enumerate(cls.is_a): + if isinstance(r, owlready2.Restriction): + if ( + hasattr(r.value.__class__, "iri") + and r.value.__class__.iri == datatype_iri + ): + remove_subcls.append(i) + removed_subclasses[cls].append(r) + for i in reversed(remove_subcls): + del cls.is_a[i] + else: raise ValueError( f"Unknown reasoner '{reasoner}'. Supported reasoners " @@ -1353,6 +1392,12 @@ def sync_reasoner( else: sync(self, **kwargs) + # Restore removed custom data properties + for cls, eqs in removed_equivalent.items(): + cls.extend(eqs) + for cls, subcls in removed_subclasses.items(): + cls.extend(subcls) + def sync_attributes( # pylint: disable=too-many-branches self, name_policy=None, diff --git a/tools/ontoconvert b/tools/ontoconvert index 4e06cdf79..d97123bd9 100755 --- a/tools/ontoconvert +++ b/tools/ontoconvert @@ -9,6 +9,9 @@ from rdflib.util import guess_format from ontopy import get_ontology from ontopy.utils import annotate_source, rename_iris, copy_annotation +import owlready2 # pylint: disable=wrong-import-order +import owlready2.reasoning # pylint: disable=wrong-import-order + def main(argv: list = None): """Main run function. @@ -116,7 +119,14 @@ def main(argv: list = None): action="store_true", help="Do not infer imported ontologies.", ) - # To be implemented... + parser.add_argument( + "--java-executable", + help="Path to Java executable to use. Default is `java`.", + ) + parser.add_argument( + "--java-memory", + help="Maximum memory allocated to Java in MB. Default is 2000.", + ) parser.add_argument( "--iri", "-I", @@ -218,6 +228,12 @@ def main(argv: list = None): if not output_format: output_format = "xml" + # Settings for running Java + if args.java_executable: + owlready2.JAVA_EXE = args.java_executable + if args.java_memory: + owlready2.reasoning.JAVA_MEMORY = int(args.java_memory) + # Annotations to copy with --copy-emmo-annotations if args.copy_emmo_annotations: args.copy_annotation.extend(