From 18d1d70d37e8ed55fc37a8eade195299a05e4331 Mon Sep 17 00:00:00 2001
From: Ashley Sommer <ashleysommer@gmail.com>
Date: Thu, 24 Oct 2024 07:49:40 +1000
Subject: [PATCH 1/8] Revert previous commit that made html support
 non-optional. html support is now optional again, and it uses html5rdf rather
 than html5lib/html5lib-modern.

---
 devtools/constraints.min                   |  2 +-
 docker/latest/requirements.in              |  2 +-
 docker/latest/requirements.txt             |  2 +-
 poetry.lock                                | 12 +++---
 pyproject.toml                             |  3 +-
 rdflib/__init__.py                         |  4 +-
 rdflib/term.py                             | 45 +++++++++++++++-------
 test/test_literal/test_literal.py          | 10 ++---
 test/test_literal/test_literal_html5lib.py | 10 +++--
 test/test_literal/test_xmlliterals.py      |  8 ++--
 tox.ini                                    |  4 +-
 11 files changed, 62 insertions(+), 40 deletions(-)

diff --git a/devtools/constraints.min b/devtools/constraints.min
index 66deec089..2a3f256b5 100644
--- a/devtools/constraints.min
+++ b/devtools/constraints.min
@@ -6,6 +6,6 @@ pyparsing==2.1.0
 importlib-metadata==4.0.0
 berkeleydb==18.1.2
 networkx==2.0
-html5lib-modern==1.2.0
+html5rdf==1.2.0
 lxml==4.3.0
 orjson==3.9.14
diff --git a/docker/latest/requirements.in b/docker/latest/requirements.in
index 8579291d7..4c2e3ec71 100644
--- a/docker/latest/requirements.in
+++ b/docker/latest/requirements.in
@@ -1,6 +1,6 @@
 # This file is used for building a docker image of the latest rdflib release. It
 # will be updated by dependabot when new releases are made.
 rdflib==7.1.0
-html5lib-modern==1.2.0
+html5rdf==1.2.0
 # isodate is required to allow the Dockerfile to build on with pre-RDFLib-7.1 releases.
 isodate==0.7.2
diff --git a/docker/latest/requirements.txt b/docker/latest/requirements.txt
index dd96e0dd7..8cbd3cbd3 100644
--- a/docker/latest/requirements.txt
+++ b/docker/latest/requirements.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile docker/latest/requirements.in
 #
-html5lib-modern==1.2
+html5rdf==1.2
     # via
     #   -r docker/latest/requirements.in
     #   rdflib
diff --git a/poetry.lock b/poetry.lock
index dc1c6f5e0..99421313d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -339,14 +339,14 @@ files = [
 test = ["pytest (>=6)"]
 
 [[package]]
-name = "html5lib-modern"
+name = "html5rdf"
 version = "1.2"
 description = "HTML parser based on the WHATWG HTML specification"
-optional = false
+optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "html5lib_modern-1.2-py2.py3-none-any.whl", hash = "sha256:3458b6e31525ede4fcaac0ff42d9eeb5efaf755473768103cb56e0275caa8d99"},
-    {file = "html5lib_modern-1.2.tar.gz", hash = "sha256:1fadbfc27ea955431270e4e79a4a4c290ba11c3a3098a95cc22dc73e312a1768"},
+    {file = "html5rdf-1.2-py2.py3-none-any.whl", hash = "sha256:08169aa52a98ee3a6d3456d83feb36211fb5edcbcf3e05f6d19e0136f581638c"},
+    {file = "html5rdf-1.2.tar.gz", hash = "sha256:08378cbbbb63993ba7bb5eb1eac44bf9ca7b1a23dbee3d2afef5376597fb00a5"},
 ]
 
 [package.extras]
@@ -1107,7 +1107,6 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
     {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
     {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
     {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@@ -1456,6 +1455,7 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 
 [extras]
 berkeleydb = ["berkeleydb"]
+html = ["html5rdf", "lxml"]
 lxml = ["lxml"]
 networkx = ["networkx"]
 orjson = ["orjson"]
@@ -1463,4 +1463,4 @@ orjson = ["orjson"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.8.1"
-content-hash = "b0303e119538a8c1ca41f75206ce1c2fb3016699a00a02039bcafae17da6b03b"
+content-hash = "214d37624612043042464f0e154a4e551ec43177be1aabe4b9aced3ace7182de"
diff --git a/pyproject.toml b/pyproject.toml
index 71fbc5541..df698997f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,7 +43,7 @@ isodate = {version=">=0.7.2,<1.0.0", python = "<3.11"}
 pyparsing = ">=2.1.0,<4"
 berkeleydb = {version = "^18.1.0", optional = true}
 networkx = {version = ">=2,<4", optional = true}
-html5lib-modern = "^1.2"
+html5rdf = {version = ">=1.2,<2", optional = true}
 lxml = {version = ">=4.3,<6.0", optional = true}
 orjson = {version = ">=3.9.14,<4", optional = true}
 
@@ -74,6 +74,7 @@ ruff = ">=0.0.286,<0.8.0"
 [tool.poetry.extras]
 berkeleydb = ["berkeleydb"]
 networkx = ["networkx"]
+html = ["html5rdf", "lxml"]
 lxml = ["lxml"]
 orjson = ["orjson"]
 
diff --git a/rdflib/__init__.py b/rdflib/__init__.py
index 30faeee5b..dfc45e5b3 100644
--- a/rdflib/__init__.py
+++ b/rdflib/__init__.py
@@ -47,11 +47,11 @@
 import sys
 from importlib import metadata
 
-_DISTRIBUTION_METADATA = metadata.metadata("rdflib")
+#_DISTRIBUTION_METADATA = metadata.metadata("rdflib")
 
 __docformat__ = "restructuredtext en"
 
-__version__: str = _DISTRIBUTION_METADATA["Version"]
+__version__: str = "0.0.0" #_DISTRIBUTION_METADATA["Version"]
 __date__ = "2024-10-17"
 
 __all__ = [
diff --git a/rdflib/term.py b/rdflib/term.py
index 9503d7779..aa39d398a 100644
--- a/rdflib/term.py
+++ b/rdflib/term.py
@@ -65,8 +65,6 @@
 from urllib.parse import urldefrag, urljoin, urlparse
 from uuid import uuid4
 
-import html5lib
-
 import rdflib
 import rdflib.util
 from rdflib.compat import long_type
@@ -86,6 +84,14 @@
     from .namespace import NamespaceManager
     from .paths import AlternativePath, InvPath, NegatedPath, Path, SequencePath
 
+_HAS_HTML5RDF = False
+
+try:
+    import html5rdf
+
+    _HAS_HTML5RDF = True
+except ImportError:
+    html5rdf = None
 
 _SKOLEM_DEFAULT_AUTHORITY = "https://rdflib.github.io"
 
@@ -1668,19 +1674,19 @@ def _parseXML(xmlstring: str) -> xml.dom.minidom.Document:  # noqa: N802
 def _parse_html(lexical_form: str) -> xml.dom.minidom.DocumentFragment:
     """
     Parse the lexical form of an HTML literal into a document fragment
-    using the ``dom`` from html5lib tree builder.
+    using the ``dom`` from html5rdf tree builder.
 
     :param lexical_form: The lexical form of the HTML literal.
     :return: A document fragment representing the HTML literal.
-    :raises: `html5lib.html5parser.ParseError` if the lexical form is
+    :raises: `html5rdf.html5parser.ParseError` if the lexical form is
         not valid HTML.
     """
-    parser = html5lib.HTMLParser(
-        tree=html5lib.treebuilders.getTreeBuilder("dom"), strict=True
+    parser = html5rdf.HTMLParser(
+        tree=html5rdf.treebuilders.getTreeBuilder("dom"), strict=True
     )
     try:
         result: xml.dom.minidom.DocumentFragment = parser.parseFragment(lexical_form)
-    except html5lib.html5parser.ParseError as e:
+    except html5rdf.html5parser.ParseError as e:
         logger.info(f"Failed to parse HTML: {e}")
         raise e
     result.normalize()
@@ -1695,7 +1701,7 @@ def _write_html(value: xml.dom.minidom.DocumentFragment) -> bytes:
     :param value: A document fragment representing an HTML literal.
     :return: The lexical form of the HTML literal.
     """
-    result = html5lib.serialize(value, tree="dom")
+    result = html5rdf.serialize(value, tree="dom")
     return result
 
 
@@ -2012,14 +2018,21 @@ def _castPythonToLiteral(  # noqa: N802
     (Duration, (lambda i: duration_isoformat(i), _XSD_DURATION)),
     (timedelta, (lambda i: duration_isoformat(i), _XSD_DAYTIMEDURATION)),
     (xml.dom.minidom.Document, (_writeXML, _RDF_XMLLITERAL)),
-    # This is a bit dirty, by accident the html5lib parser produces
-    # DocumentFragments, and the xml parser Documents, letting this
-    # decide what datatype to use makes roundtripping easier, but it a
-    # bit random.
-    (xml.dom.minidom.DocumentFragment, (_write_html, _RDF_HTMLLITERAL)),
     (Fraction, (None, _OWL_RATIONAL)),
 ]
 
+if html5rdf is not None:
+    # This is a bit dirty, by accident the html5rdf parser produces
+    # DocumentFragments, and the xml parser Documents, letting this
+    # decide what datatype to use makes roundtripping easier, but its a
+    # bit random.
+
+    # This must happen before _GenericPythonToXSDRules is assigned to
+    # _OriginalGenericPythonToXSDRules.
+    _GenericPythonToXSDRules.append(
+        (xml.dom.minidom.DocumentFragment, (_write_html, _RDF_HTMLLITERAL))
+    )
+
 _OriginalGenericPythonToXSDRules = list(_GenericPythonToXSDRules)
 
 _SpecificPythonToXSDRules: List[
@@ -2069,10 +2082,14 @@ def _castPythonToLiteral(  # noqa: N802
     URIRef(_XSD_PFX + "double"): float,
     URIRef(_XSD_PFX + "base64Binary"): b64decode,
     URIRef(_XSD_PFX + "anyURI"): None,
-    _RDF_HTMLLITERAL: _parse_html,
     _RDF_XMLLITERAL: _parseXML,
 }
 
+if html5rdf is not None:
+    # It is probably best to keep this close to the definition of
+    # _GenericPythonToXSDRules so nobody misses it.
+    XSDToPython[_RDF_HTMLLITERAL] = _parse_html
+
 _check_well_formed_types: Dict[URIRef, Callable[[Union[str, bytes], Any], bool]] = {
     URIRef(_XSD_PFX + "boolean"): _well_formed_boolean,
     URIRef(_XSD_PFX + "nonPositiveInteger"): _well_formed_non_positive_integer,
diff --git a/test/test_literal/test_literal.py b/test/test_literal/test_literal.py
index bc2bc3b45..a28e67e12 100644
--- a/test/test_literal/test_literal.py
+++ b/test/test_literal/test_literal.py
@@ -23,11 +23,11 @@
 
 
 try:
-    import html5lib as _  # noqa: F401
+    import html5rdf as _  # noqa: F401
 
-    _HAVE_HTML5LIB = True
+    _HAVE_HTML5RDF = True
 except ImportError:
-    _HAVE_HTML5LIB = False
+    _HAVE_HTML5RDF = False
 
 import pytest
 
@@ -981,7 +981,7 @@ def __eq__(self, __value: object) -> bool:
         (
             lambda: Literal("<body>", datatype=RDF.HTML),
             LiteralChecker(
-                ..., None, RDF.HTML, True if _HAVE_HTML5LIB else None, "<body>"
+                ..., None, RDF.HTML, True if _HAVE_HTML5RDF else None, "<body>"
             ),
         ),
         (
@@ -990,7 +990,7 @@ def __eq__(self, __value: object) -> bool:
                 ...,
                 None,
                 RDF.HTML,
-                False if _HAVE_HTML5LIB else None,
+                False if _HAVE_HTML5RDF else None,
                 "<table></table>",
             ),
         ),
diff --git a/test/test_literal/test_literal_html5lib.py b/test/test_literal/test_literal_html5lib.py
index b77feb94b..bbde5f0f7 100644
--- a/test/test_literal/test_literal_html5lib.py
+++ b/test/test_literal/test_literal_html5lib.py
@@ -1,7 +1,6 @@
 import xml.dom.minidom
 from typing import Callable
 
-import html5lib  # noqa: F401
 import pytest
 
 import rdflib.term
@@ -10,8 +9,13 @@
 from test.utils.literal import LiteralChecker
 from test.utils.outcome import OutcomeChecker, OutcomePrimitives
 
+try:
+    import html5rdf as _  # noqa: F401
+except ImportError:
+    pytest.skip("html5rdf not installed", allow_module_level=True)
 
-def test_has_html5lib() -> None:
+def test_has_html5rdf() -> None:
+    assert rdflib.term._HAS_HTML5RDF is True
     assert RDF.HTML in rdflib.term.XSDToPython
     rule = next(
         (
@@ -29,7 +33,7 @@ def test_has_html5lib() -> None:
     ["factory", "outcome"],
     [
         # Ill-typed literals, these have lexical forms that result in
-        # errors when parsed as HTML by html5lib.
+        # errors when parsed as HTML by html5rdf.
         (
             lambda: Literal("<body><h1>Hello, World!</h1></body>", datatype=RDF.HTML),
             LiteralChecker(
diff --git a/test/test_literal/test_xmlliterals.py b/test/test_literal/test_xmlliterals.py
index 38ae549d4..40227bc63 100644
--- a/test/test_literal/test_xmlliterals.py
+++ b/test/test_literal/test_xmlliterals.py
@@ -9,11 +9,11 @@
 from rdflib import RDF, Literal
 
 try:
-    import html5lib  # noqa: F401
+    import html5rdf  # noqa: F401
 
-    have_html5lib = True
+    have_html5rdf = True
 except ImportError:
-    have_html5lib = False
+    have_html5rdf = False
 
 
 def testPythonRoundtrip():  # noqa: N802
@@ -90,7 +90,7 @@ def testRoundtrip():  # noqa: N802
     roundtrip("nt")
 
 
-@pytest.mark.skipif(not have_html5lib, reason="requires html5lib")
+@pytest.mark.skipif(not have_html5rdf, reason="requires html5rdf")
 def testHTML():  # noqa: N802
     l1 = Literal("<msg>hello</msg>", datatype=RDF.XMLLiteral)
     assert l1.value is not None, "xml must have been parsed"
diff --git a/tox.ini b/tox.ini
index ef02ff4d2..9ec80d516 100644
--- a/tox.ini
+++ b/tox.ini
@@ -15,7 +15,7 @@ setenv =
     COVERAGE_FILE = {env:COVERAGE_FILE:{toxinidir}/.coverage.{envname}}
     MYPY_CACHE_DIR = {envdir}/.mypy_cache
     docs: POETRY_ARGS_docs = --only=docs
-    extensive: POETRY_ARGS_extensive = --extras=berkeleydb --extras=networkx --extras=orjson
+    extensive: POETRY_ARGS_extensive = --extras=berkeleydb --extras=networkx --extras=html --extras=orjson
     lxml: POETRY_ARGS_lxml = --extras=lxml
 commands_pre =
     py3{8,9,10,11}: python -c 'import os; print("\n".join(f"{key}={value}" for key, value in os.environ.items()))'
@@ -59,7 +59,7 @@ setenv =
     PYTHONHASHSEED = 0
 commands_pre =
     poetry lock --check
-    poetry install --only=main --only=docs
+    poetry install --only=main --only=docs --extras=html
     poetry env info
 commands =
     poetry run sphinx-build -T -W -b html -d {envdir}/doctree docs docs/_build/html

From e7dc73798c75132b4d4f0ef8dae4567cecceea9b Mon Sep 17 00:00:00 2001
From: Ashley Sommer <ashleysommer@gmail.com>
Date: Thu, 24 Oct 2024 08:58:58 +1000
Subject: [PATCH 2/8] Revert "Auxiliary commit to revert individual files from
 18d1d70d37e8ed55fc37a8eade195299a05e4331"

This reverts commit 6f80ad9cd2cc4b76ff4e1bc0998951768aff7573.
---
 rdflib/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/rdflib/__init__.py b/rdflib/__init__.py
index dfc45e5b3..30faeee5b 100644
--- a/rdflib/__init__.py
+++ b/rdflib/__init__.py
@@ -47,11 +47,11 @@
 import sys
 from importlib import metadata
 
-#_DISTRIBUTION_METADATA = metadata.metadata("rdflib")
+_DISTRIBUTION_METADATA = metadata.metadata("rdflib")
 
 __docformat__ = "restructuredtext en"
 
-__version__: str = "0.0.0" #_DISTRIBUTION_METADATA["Version"]
+__version__: str = _DISTRIBUTION_METADATA["Version"]
 __date__ = "2024-10-17"
 
 __all__ = [

From 1d33786f007ad5b4ffd8e22293dd25d4a96592fc Mon Sep 17 00:00:00 2001
From: Ashley Sommer <ashleysommer@gmail.com>
Date: Thu, 24 Oct 2024 09:45:35 +1000
Subject: [PATCH 3/8] Add html5lib-modern back into dockerfile dependencies so
 the docker image can be built with the released rdflib v7.1.0

---
 docker/latest/requirements.in  | 4 ++--
 docker/latest/requirements.txt | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docker/latest/requirements.in b/docker/latest/requirements.in
index 4c2e3ec71..42fb39ae7 100644
--- a/docker/latest/requirements.in
+++ b/docker/latest/requirements.in
@@ -2,5 +2,5 @@
 # will be updated by dependabot when new releases are made.
 rdflib==7.1.0
 html5rdf==1.2.0
-# isodate is required to allow the Dockerfile to build on with pre-RDFLib-7.1 releases.
-isodate==0.7.2
+# html5lib-modern is required to allow the Dockerfile to build on with pre-RDFLib-7.1.1 releases.
+html5lib-modern==1.2.0
diff --git a/docker/latest/requirements.txt b/docker/latest/requirements.txt
index 8cbd3cbd3..570502462 100644
--- a/docker/latest/requirements.txt
+++ b/docker/latest/requirements.txt
@@ -8,7 +8,7 @@ html5rdf==1.2
     # via
     #   -r docker/latest/requirements.in
     #   rdflib
-isodate==0.7.2
+html5lib-modern==1.2
     # via -r docker/latest/requirements.in
 pyparsing==3.0.9
     # via rdflib

From 09640950c01f0d15d17c922e16f725ee6f1916ed Mon Sep 17 00:00:00 2001
From: Ashley Sommer <ashleysommer@gmail.com>
Date: Thu, 24 Oct 2024 10:00:56 +1000
Subject: [PATCH 4/8] Reformat test file again.

---
 test/test_literal/test_literal_html5lib.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/test_literal/test_literal_html5lib.py b/test/test_literal/test_literal_html5lib.py
index bbde5f0f7..b29d78a52 100644
--- a/test/test_literal/test_literal_html5lib.py
+++ b/test/test_literal/test_literal_html5lib.py
@@ -14,6 +14,7 @@
 except ImportError:
     pytest.skip("html5rdf not installed", allow_module_level=True)
 
+
 def test_has_html5rdf() -> None:
     assert rdflib.term._HAS_HTML5RDF is True
     assert RDF.HTML in rdflib.term.XSDToPython

From c57bd7eadf39a11b641d468990f1b213fd578a4d Mon Sep 17 00:00:00 2001
From: Ashley Sommer <ashleysommer@gmail.com>
Date: Thu, 24 Oct 2024 10:38:52 +1000
Subject: [PATCH 5/8] lxml is not actually required for operation of html5rdf

---
 poetry.lock    | 4 ++--
 pyproject.toml | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 99421313d..a07dd5f75 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1455,7 +1455,7 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 
 [extras]
 berkeleydb = ["berkeleydb"]
-html = ["html5rdf", "lxml"]
+html = ["html5rdf"]
 lxml = ["lxml"]
 networkx = ["networkx"]
 orjson = ["orjson"]
@@ -1463,4 +1463,4 @@ orjson = ["orjson"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.8.1"
-content-hash = "214d37624612043042464f0e154a4e551ec43177be1aabe4b9aced3ace7182de"
+content-hash = "71704ba175e33528872fab8121cb609041bd97b6a99f8f04022a26904941b27c"
diff --git a/pyproject.toml b/pyproject.toml
index df698997f..6067009e7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -74,7 +74,9 @@ ruff = ">=0.0.286,<0.8.0"
 [tool.poetry.extras]
 berkeleydb = ["berkeleydb"]
 networkx = ["networkx"]
-html = ["html5rdf", "lxml"]
+# html support is optional, it is used only in tokenizing `rdf:HTML` type Literals
+html = ["html5rdf"]
+# lxml support is optional, it is used only for parsing XML-formatted SPARQL results
 lxml = ["lxml"]
 orjson = ["orjson"]
 

From 0a865b014f24406ef5a0cb1da724097b7419981c Mon Sep 17 00:00:00 2001
From: Ashley Sommer <ashleysommer@gmail.com>
Date: Thu, 24 Oct 2024 10:39:55 +1000
Subject: [PATCH 6/8] Fix differences in compariston of XML and HTML nodes when
 html5rdf is used vs when it is not used.

---
 rdflib/term.py                             | 25 ++++++++++++++++------
 test/test_literal/test_literal_html5lib.py |  2 +-
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/rdflib/term.py b/rdflib/term.py
index aa39d398a..501777508 100644
--- a/rdflib/term.py
+++ b/rdflib/term.py
@@ -1113,7 +1113,7 @@ def __gt__(self, other: Any) -> bool:
         if other is None:
             return True  # Everything is greater than None
         if isinstance(other, Literal):
-            # Fast path for comapring numeric literals
+            # Fast path for comparing numeric literals
             # that are not ill-typed and don't have a None value
             if (
                 (
@@ -1356,9 +1356,15 @@ def eq(self, other: Any) -> bool:
 
         """
         if isinstance(other, Literal):
+            # Fast path for comparing numeric literals
+            # that are not ill-typed and don't have a None value
             if (
-                self.datatype in _NUMERIC_LITERAL_TYPES
-                and other.datatype in _NUMERIC_LITERAL_TYPES
+                (
+                    self.datatype in _NUMERIC_LITERAL_TYPES
+                    and other.datatype in _NUMERIC_LITERAL_TYPES
+                )
+                and ((not self.ill_typed) and (not other.ill_typed))
+                and (self.value is not None and other.value is not None)
             ):
                 if self.value is not None and other.value is not None:
                     return self.value == other.value
@@ -1380,6 +1386,13 @@ def eq(self, other: Any) -> bool:
                 # string/plain literals, compare on lexical form
                 return str.__eq__(self, other)
 
+            # XML can be compared to HTML, only if html5rdf is enabled
+            if ((dtself in _XML_COMPARABLE and dtother in _XML_COMPARABLE) and
+                # Ill-typed can be none if unknown, but we don't want it to be False.
+                ((self.ill_typed is not False) and (other.ill_typed is not False)) and
+                (self.value is not None and other.value is not None)):
+                return _isEqualXMLNode(self.value, other.value)
+
             if dtself != dtother:
                 if rdflib.DAWG_LITERAL_COLLATION:
                     raise TypeError(
@@ -1393,9 +1406,6 @@ def eq(self, other: Any) -> bool:
             # maybe there are counter examples
 
             if self.value is not None and other.value is not None:
-                if self.datatype in (_RDF_XMLLITERAL, _RDF_HTMLLITERAL):
-                    return _isEqualXMLNode(self.value, other.value)
-
                 return self.value == other.value
             else:
                 if str.__eq__(self, other):
@@ -2089,6 +2099,9 @@ def _castPythonToLiteral(  # noqa: N802
     # It is probably best to keep this close to the definition of
     # _GenericPythonToXSDRules so nobody misses it.
     XSDToPython[_RDF_HTMLLITERAL] = _parse_html
+    _XML_COMPARABLE = (_RDF_XMLLITERAL, _RDF_HTMLLITERAL)
+else:
+    _XML_COMPARABLE = (_RDF_XMLLITERAL,)
 
 _check_well_formed_types: Dict[URIRef, Callable[[Union[str, bytes], Any], bool]] = {
     URIRef(_XSD_PFX + "boolean"): _well_formed_boolean,
diff --git a/test/test_literal/test_literal_html5lib.py b/test/test_literal/test_literal_html5lib.py
index b29d78a52..a6dfc19e7 100644
--- a/test/test_literal/test_literal_html5lib.py
+++ b/test/test_literal/test_literal_html5lib.py
@@ -52,7 +52,7 @@ def test_has_html5rdf() -> None:
             ),
         ),
         # Well-typed literals, these have lexical forms that parse
-        # without errors with html5lib.
+        # without errors with html5rdf.
         (
             lambda: Literal("<table></table>", datatype=RDF.HTML),
             LiteralChecker(..., None, RDF.HTML, False, "<table></table>"),

From 5214ba28b38b92d416d619831193705ea05058d5 Mon Sep 17 00:00:00 2001
From: Ashley Sommer <ashleysommer@gmail.com>
Date: Thu, 24 Oct 2024 10:44:29 +1000
Subject: [PATCH 7/8] Add correct type hint to _XML_COMPARABLE variable.

---
 rdflib/term.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rdflib/term.py b/rdflib/term.py
index 501777508..a2d506572 100644
--- a/rdflib/term.py
+++ b/rdflib/term.py
@@ -2099,7 +2099,7 @@ def _castPythonToLiteral(  # noqa: N802
     # It is probably best to keep this close to the definition of
     # _GenericPythonToXSDRules so nobody misses it.
     XSDToPython[_RDF_HTMLLITERAL] = _parse_html
-    _XML_COMPARABLE = (_RDF_XMLLITERAL, _RDF_HTMLLITERAL)
+    _XML_COMPARABLE: Tuple[URIRef, ...] = (_RDF_XMLLITERAL, _RDF_HTMLLITERAL)
 else:
     _XML_COMPARABLE = (_RDF_XMLLITERAL,)
 

From c32fcbce299ea2d78a7a8441ea90368cd253c8e4 Mon Sep 17 00:00:00 2001
From: Ashley Sommer <ashleysommer@gmail.com>
Date: Thu, 24 Oct 2024 10:58:39 +1000
Subject: [PATCH 8/8] Fix logic in using non-ill-typed XML literals for
 comparison

---
 rdflib/term.py                        | 11 +++++++----
 test/test_literal/test_xmlliterals.py |  4 ++--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/rdflib/term.py b/rdflib/term.py
index a2d506572..3e397674b 100644
--- a/rdflib/term.py
+++ b/rdflib/term.py
@@ -1387,10 +1387,13 @@ def eq(self, other: Any) -> bool:
                 return str.__eq__(self, other)
 
             # XML can be compared to HTML, only if html5rdf is enabled
-            if ((dtself in _XML_COMPARABLE and dtother in _XML_COMPARABLE) and
-                # Ill-typed can be none if unknown, but we don't want it to be False.
-                ((self.ill_typed is not False) and (other.ill_typed is not False)) and
-                (self.value is not None and other.value is not None)):
+            if (
+                (dtself in _XML_COMPARABLE and dtother in _XML_COMPARABLE)
+                and
+                # Ill-typed can be None if unknown, but we don't want it to be True.
+                ((self.ill_typed is not True) and (other.ill_typed is not True))
+                and (self.value is not None and other.value is not None)
+            ):
                 return _isEqualXMLNode(self.value, other.value)
 
             if dtself != dtother:
diff --git a/test/test_literal/test_xmlliterals.py b/test/test_literal/test_xmlliterals.py
index 40227bc63..3f2e2637f 100644
--- a/test/test_literal/test_xmlliterals.py
+++ b/test/test_literal/test_xmlliterals.py
@@ -126,7 +126,7 @@ def testHTML():  # noqa: N802
                         textwrap.dedent(
                             """\
                     <!DOCTYPE example>
-                    <something/>
+                    <something2/>
                     """
                         )
                     ),
@@ -137,7 +137,7 @@ def testHTML():  # noqa: N802
                         textwrap.dedent(
                             """\
                     <!DOCTYPE example>
-                    <something />
+                    <something2 />
                     """
                         )
                     ),