From f769bcbc8ab67b29906af383ad4cd0759af3e06d Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper-friis@users.noreply.github.com>
Date: Wed, 1 May 2024 15:48:11 +0200
Subject: [PATCH] Make it possible to run HermiT on EMMO (#740)

# Description
Make it possible to run HermiT on EMMO, by:
* Add support for additional keyword arguments that `sync_reasoner()` may pass to the `save()` method.
* Remove custom datatypes before running HermiT. Readd the removed data properties after HermiT completes.
* Add `--java-executable` and `--java-memory` arguments to ontoconvert to control how to run java.

## Type of change
- [x] Bug fix.
- [ ] New feature.
- [ ] Documentation update.
- [ ] Test update.

Co-authored-by: francescalb <francesca.l.bleken@sintef.no>
---
 ontopy/ontology.py | 51 +++++++++++++++++++++++++++++++++++++++++++---
 tools/ontoconvert  | 18 +++++++++++++++-
 2 files changed, 65 insertions(+), 4 deletions(-)

diff --git a/ontopy/ontology.py b/ontopy/ontology.py
index 47425c771..5eacec242 100644
--- a/ontopy/ontology.py
+++ b/ontopy/ontology.py
@@ -871,6 +871,7 @@ def save(
         write_catalog_file=False,
         append_catalog=False,
         catalog_file="catalog-v001.xml",
+        **kwargs,
     ) -> Path:
         """Writes the ontology to file.
 
@@ -915,6 +916,7 @@ def save(
         # pylint: disable=redefined-builtin,too-many-arguments
         # pylint: disable=too-many-statements,too-many-branches
         # pylint: disable=too-many-locals,arguments-renamed,invalid-name
+
         if not _validate_installed_version(
             package="rdflib", min_version="6.0.0"
         ) and format == FMAP.get("ttl", ""):
@@ -940,7 +942,9 @@ def save(
                 raise TypeError("`filename` and `format` cannot both be None.")
         else:
             file = filename
-        filepath = os.path.join(dir, file)
+        filepath = os.path.join(
+            dir, file if isinstance(file, (str, Path)) else file.name
+        )
         returnpath = filepath
 
         dir = Path(filepath).resolve().parent
@@ -980,6 +984,7 @@ def save(
                     recursive=False,
                     squash=False,
                     write_catalog_file=False,
+                    **kwargs,
                 )
 
             if write_catalog_file:
@@ -1045,7 +1050,7 @@ def save(
 
             graph.serialize(destination=filepath, format=format)
         elif format in OWLREADY2_FORMATS:
-            super().save(file=filepath, format=fmt)
+            super().save(file=filepath, format=fmt, **kwargs)
         else:
             # The try-finally clause is needed for cleanup and because
             # we have to provide delete=False to NamedTemporaryFile
@@ -1056,7 +1061,7 @@ def save(
                     suffix=".owl", delete=False
                 ) as handle:
                     tmpfile = handle.name
-                super().save(tmpfile, format="ntriples")
+                super().save(tmpfile, format="ntriples", **kwargs)
                 graph = rdflib.Graph()
                 graph.parse(tmpfile, format="ntriples")
                 graph.namespace_manager.bind(
@@ -1331,12 +1336,46 @@ def sync_reasoner(
 
         Keyword arguments are passed to the underlying owlready2 function.
         """
+        # pylint: disable=too-many-branches
+
+        removed_equivalent = defaultdict(list)
+        removed_subclasses = defaultdict(list)
+
         if reasoner == "FaCT++":
             sync = sync_reasoner_factpp
         elif reasoner == "Pellet":
             sync = owlready2.sync_reasoner_pellet
         elif reasoner == "HermiT":
             sync = owlready2.sync_reasoner_hermit
+
+            # Remove custom data propertyes, otherwise HermiT will crash
+            datatype_iri = "http://www.w3.org/2000/01/rdf-schema#Datatype"
+
+            for cls in self.classes(imported=include_imported):
+                remove_eq = []
+                for i, r in enumerate(cls.equivalent_to):
+                    if isinstance(r, owlready2.Restriction):
+                        if (
+                            hasattr(r.value.__class__, "iri")
+                            and r.value.__class__.iri == datatype_iri
+                        ):
+                            remove_eq.append(i)
+                            removed_equivalent[cls].append(r)
+                for i in reversed(remove_eq):
+                    del cls.equivalent_to[i]
+
+                remove_subcls = []
+                for i, r in enumerate(cls.is_a):
+                    if isinstance(r, owlready2.Restriction):
+                        if (
+                            hasattr(r.value.__class__, "iri")
+                            and r.value.__class__.iri == datatype_iri
+                        ):
+                            remove_subcls.append(i)
+                            removed_subclasses[cls].append(r)
+                for i in reversed(remove_subcls):
+                    del cls.is_a[i]
+
         else:
             raise ValueError(
                 f"Unknown reasoner '{reasoner}'. Supported reasoners "
@@ -1353,6 +1392,12 @@ def sync_reasoner(
             else:
                 sync(self, **kwargs)
 
+        # Restore removed custom data properties
+        for cls, eqs in removed_equivalent.items():
+            cls.extend(eqs)
+        for cls, subcls in removed_subclasses.items():
+            cls.extend(subcls)
+
     def sync_attributes(  # pylint: disable=too-many-branches
         self,
         name_policy=None,
diff --git a/tools/ontoconvert b/tools/ontoconvert
index 4e06cdf79..d97123bd9 100755
--- a/tools/ontoconvert
+++ b/tools/ontoconvert
@@ -9,6 +9,9 @@ from rdflib.util import guess_format
 from ontopy import get_ontology
 from ontopy.utils import annotate_source, rename_iris, copy_annotation
 
+import owlready2  # pylint: disable=wrong-import-order
+import owlready2.reasoning  # pylint: disable=wrong-import-order
+
 
 def main(argv: list = None):
     """Main run function.
@@ -116,7 +119,14 @@ def main(argv: list = None):
         action="store_true",
         help="Do not infer imported ontologies.",
     )
-    # To be implemented...
+    parser.add_argument(
+        "--java-executable",
+        help="Path to Java executable to use. Default is `java`.",
+    )
+    parser.add_argument(
+        "--java-memory",
+        help="Maximum memory allocated to Java in MB. Default is 2000.",
+    )
     parser.add_argument(
         "--iri",
         "-I",
@@ -218,6 +228,12 @@ def main(argv: list = None):
     if not output_format:
         output_format = "xml"
 
+    # Settings for running Java
+    if args.java_executable:
+        owlready2.JAVA_EXE = args.java_executable
+    if args.java_memory:
+        owlready2.reasoning.JAVA_MEMORY = int(args.java_memory)
+
     # Annotations to copy with --copy-emmo-annotations
     if args.copy_emmo_annotations:
         args.copy_annotation.extend(