From 18d1d70d37e8ed55fc37a8eade195299a05e4331 Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Thu, 24 Oct 2024 07:49:40 +1000 Subject: [PATCH 1/8] Revert previous commit that made html support non-optional. html support is now optional again, and it uses html5rdf rather than html5lib/html5lib-modern. --- devtools/constraints.min | 2 +- docker/latest/requirements.in | 2 +- docker/latest/requirements.txt | 2 +- poetry.lock | 12 +++--- pyproject.toml | 3 +- rdflib/__init__.py | 4 +- rdflib/term.py | 45 +++++++++++++++------- test/test_literal/test_literal.py | 10 ++--- test/test_literal/test_literal_html5lib.py | 10 +++-- test/test_literal/test_xmlliterals.py | 8 ++-- tox.ini | 4 +- 11 files changed, 62 insertions(+), 40 deletions(-) diff --git a/devtools/constraints.min b/devtools/constraints.min index 66deec089..2a3f256b5 100644 --- a/devtools/constraints.min +++ b/devtools/constraints.min @@ -6,6 +6,6 @@ pyparsing==2.1.0 importlib-metadata==4.0.0 berkeleydb==18.1.2 networkx==2.0 -html5lib-modern==1.2.0 +html5rdf==1.2.0 lxml==4.3.0 orjson==3.9.14 diff --git a/docker/latest/requirements.in b/docker/latest/requirements.in index 8579291d7..4c2e3ec71 100644 --- a/docker/latest/requirements.in +++ b/docker/latest/requirements.in @@ -1,6 +1,6 @@ # This file is used for building a docker image of the latest rdflib release. It # will be updated by dependabot when new releases are made. rdflib==7.1.0 -html5lib-modern==1.2.0 +html5rdf==1.2.0 # isodate is required to allow the Dockerfile to build on with pre-RDFLib-7.1 releases. isodate==0.7.2 diff --git a/docker/latest/requirements.txt b/docker/latest/requirements.txt index dd96e0dd7..8cbd3cbd3 100644 --- a/docker/latest/requirements.txt +++ b/docker/latest/requirements.txt @@ -4,7 +4,7 @@ # # pip-compile docker/latest/requirements.in # -html5lib-modern==1.2 +html5rdf==1.2 # via # -r docker/latest/requirements.in # rdflib diff --git a/poetry.lock b/poetry.lock index dc1c6f5e0..99421313d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -339,14 +339,14 @@ files = [ test = ["pytest (>=6)"] [[package]] -name = "html5lib-modern" +name = "html5rdf" version = "1.2" description = "HTML parser based on the WHATWG HTML specification" -optional = false +optional = true python-versions = ">=3.8" files = [ - {file = "html5lib_modern-1.2-py2.py3-none-any.whl", hash = "sha256:3458b6e31525ede4fcaac0ff42d9eeb5efaf755473768103cb56e0275caa8d99"}, - {file = "html5lib_modern-1.2.tar.gz", hash = "sha256:1fadbfc27ea955431270e4e79a4a4c290ba11c3a3098a95cc22dc73e312a1768"}, + {file = "html5rdf-1.2-py2.py3-none-any.whl", hash = "sha256:08169aa52a98ee3a6d3456d83feb36211fb5edcbcf3e05f6d19e0136f581638c"}, + {file = "html5rdf-1.2.tar.gz", hash = "sha256:08378cbbbb63993ba7bb5eb1eac44bf9ca7b1a23dbee3d2afef5376597fb00a5"}, ] [package.extras] @@ -1107,7 +1107,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -1456,6 +1455,7 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [extras] berkeleydb = ["berkeleydb"] +html = ["html5rdf", "lxml"] lxml = ["lxml"] networkx = ["networkx"] orjson = ["orjson"] @@ -1463,4 +1463,4 @@ orjson = ["orjson"] [metadata] lock-version = "2.0" python-versions = "^3.8.1" -content-hash = "b0303e119538a8c1ca41f75206ce1c2fb3016699a00a02039bcafae17da6b03b" +content-hash = "214d37624612043042464f0e154a4e551ec43177be1aabe4b9aced3ace7182de" diff --git a/pyproject.toml b/pyproject.toml index 71fbc5541..df698997f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ isodate = {version=">=0.7.2,<1.0.0", python = "<3.11"} pyparsing = ">=2.1.0,<4" berkeleydb = {version = "^18.1.0", optional = true} networkx = {version = ">=2,<4", optional = true} -html5lib-modern = "^1.2" +html5rdf = {version = ">=1.2,<2", optional = true} lxml = {version = ">=4.3,<6.0", optional = true} orjson = {version = ">=3.9.14,<4", optional = true} @@ -74,6 +74,7 @@ ruff = ">=0.0.286,<0.8.0" [tool.poetry.extras] berkeleydb = ["berkeleydb"] networkx = ["networkx"] +html = ["html5rdf", "lxml"] lxml = ["lxml"] orjson = ["orjson"] diff --git a/rdflib/__init__.py b/rdflib/__init__.py index 30faeee5b..dfc45e5b3 100644 --- a/rdflib/__init__.py +++ b/rdflib/__init__.py @@ -47,11 +47,11 @@ import sys from importlib import metadata -_DISTRIBUTION_METADATA = metadata.metadata("rdflib") +#_DISTRIBUTION_METADATA = metadata.metadata("rdflib") __docformat__ = "restructuredtext en" -__version__: str = _DISTRIBUTION_METADATA["Version"] +__version__: str = "0.0.0" #_DISTRIBUTION_METADATA["Version"] __date__ = "2024-10-17" __all__ = [ diff --git a/rdflib/term.py b/rdflib/term.py index 9503d7779..aa39d398a 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -65,8 +65,6 @@ from urllib.parse import urldefrag, urljoin, urlparse from uuid import uuid4 -import html5lib - import rdflib import rdflib.util from rdflib.compat import long_type @@ -86,6 +84,14 @@ from .namespace import NamespaceManager from .paths import AlternativePath, InvPath, NegatedPath, Path, SequencePath +_HAS_HTML5RDF = False + +try: + import html5rdf + + _HAS_HTML5RDF = True +except ImportError: + html5rdf = None _SKOLEM_DEFAULT_AUTHORITY = "https://rdflib.github.io" @@ -1668,19 +1674,19 @@ def _parseXML(xmlstring: str) -> xml.dom.minidom.Document: # noqa: N802 def _parse_html(lexical_form: str) -> xml.dom.minidom.DocumentFragment: """ Parse the lexical form of an HTML literal into a document fragment - using the ``dom`` from html5lib tree builder. + using the ``dom`` from html5rdf tree builder. :param lexical_form: The lexical form of the HTML literal. :return: A document fragment representing the HTML literal. - :raises: `html5lib.html5parser.ParseError` if the lexical form is + :raises: `html5rdf.html5parser.ParseError` if the lexical form is not valid HTML. """ - parser = html5lib.HTMLParser( - tree=html5lib.treebuilders.getTreeBuilder("dom"), strict=True + parser = html5rdf.HTMLParser( + tree=html5rdf.treebuilders.getTreeBuilder("dom"), strict=True ) try: result: xml.dom.minidom.DocumentFragment = parser.parseFragment(lexical_form) - except html5lib.html5parser.ParseError as e: + except html5rdf.html5parser.ParseError as e: logger.info(f"Failed to parse HTML: {e}") raise e result.normalize() @@ -1695,7 +1701,7 @@ def _write_html(value: xml.dom.minidom.DocumentFragment) -> bytes: :param value: A document fragment representing an HTML literal. :return: The lexical form of the HTML literal. """ - result = html5lib.serialize(value, tree="dom") + result = html5rdf.serialize(value, tree="dom") return result @@ -2012,14 +2018,21 @@ def _castPythonToLiteral( # noqa: N802 (Duration, (lambda i: duration_isoformat(i), _XSD_DURATION)), (timedelta, (lambda i: duration_isoformat(i), _XSD_DAYTIMEDURATION)), (xml.dom.minidom.Document, (_writeXML, _RDF_XMLLITERAL)), - # This is a bit dirty, by accident the html5lib parser produces - # DocumentFragments, and the xml parser Documents, letting this - # decide what datatype to use makes roundtripping easier, but it a - # bit random. - (xml.dom.minidom.DocumentFragment, (_write_html, _RDF_HTMLLITERAL)), (Fraction, (None, _OWL_RATIONAL)), ] +if html5rdf is not None: + # This is a bit dirty, by accident the html5rdf parser produces + # DocumentFragments, and the xml parser Documents, letting this + # decide what datatype to use makes roundtripping easier, but its a + # bit random. + + # This must happen before _GenericPythonToXSDRules is assigned to + # _OriginalGenericPythonToXSDRules. + _GenericPythonToXSDRules.append( + (xml.dom.minidom.DocumentFragment, (_write_html, _RDF_HTMLLITERAL)) + ) + _OriginalGenericPythonToXSDRules = list(_GenericPythonToXSDRules) _SpecificPythonToXSDRules: List[ @@ -2069,10 +2082,14 @@ def _castPythonToLiteral( # noqa: N802 URIRef(_XSD_PFX + "double"): float, URIRef(_XSD_PFX + "base64Binary"): b64decode, URIRef(_XSD_PFX + "anyURI"): None, - _RDF_HTMLLITERAL: _parse_html, _RDF_XMLLITERAL: _parseXML, } +if html5rdf is not None: + # It is probably best to keep this close to the definition of + # _GenericPythonToXSDRules so nobody misses it. + XSDToPython[_RDF_HTMLLITERAL] = _parse_html + _check_well_formed_types: Dict[URIRef, Callable[[Union[str, bytes], Any], bool]] = { URIRef(_XSD_PFX + "boolean"): _well_formed_boolean, URIRef(_XSD_PFX + "nonPositiveInteger"): _well_formed_non_positive_integer, diff --git a/test/test_literal/test_literal.py b/test/test_literal/test_literal.py index bc2bc3b45..a28e67e12 100644 --- a/test/test_literal/test_literal.py +++ b/test/test_literal/test_literal.py @@ -23,11 +23,11 @@ try: - import html5lib as _ # noqa: F401 + import html5rdf as _ # noqa: F401 - _HAVE_HTML5LIB = True + _HAVE_HTML5RDF = True except ImportError: - _HAVE_HTML5LIB = False + _HAVE_HTML5RDF = False import pytest @@ -981,7 +981,7 @@ def __eq__(self, __value: object) -> bool: ( lambda: Literal("", datatype=RDF.HTML), LiteralChecker( - ..., None, RDF.HTML, True if _HAVE_HTML5LIB else None, "" + ..., None, RDF.HTML, True if _HAVE_HTML5RDF else None, "" ), ), ( @@ -990,7 +990,7 @@ def __eq__(self, __value: object) -> bool: ..., None, RDF.HTML, - False if _HAVE_HTML5LIB else None, + False if _HAVE_HTML5RDF else None, "
", ), ), diff --git a/test/test_literal/test_literal_html5lib.py b/test/test_literal/test_literal_html5lib.py index b77feb94b..bbde5f0f7 100644 --- a/test/test_literal/test_literal_html5lib.py +++ b/test/test_literal/test_literal_html5lib.py @@ -1,7 +1,6 @@ import xml.dom.minidom from typing import Callable -import html5lib # noqa: F401 import pytest import rdflib.term @@ -10,8 +9,13 @@ from test.utils.literal import LiteralChecker from test.utils.outcome import OutcomeChecker, OutcomePrimitives +try: + import html5rdf as _ # noqa: F401 +except ImportError: + pytest.skip("html5rdf not installed", allow_module_level=True) -def test_has_html5lib() -> None: +def test_has_html5rdf() -> None: + assert rdflib.term._HAS_HTML5RDF is True assert RDF.HTML in rdflib.term.XSDToPython rule = next( ( @@ -29,7 +33,7 @@ def test_has_html5lib() -> None: ["factory", "outcome"], [ # Ill-typed literals, these have lexical forms that result in - # errors when parsed as HTML by html5lib. + # errors when parsed as HTML by html5rdf. ( lambda: Literal("

Hello, World!

", datatype=RDF.HTML), LiteralChecker( diff --git a/test/test_literal/test_xmlliterals.py b/test/test_literal/test_xmlliterals.py index 38ae549d4..40227bc63 100644 --- a/test/test_literal/test_xmlliterals.py +++ b/test/test_literal/test_xmlliterals.py @@ -9,11 +9,11 @@ from rdflib import RDF, Literal try: - import html5lib # noqa: F401 + import html5rdf # noqa: F401 - have_html5lib = True + have_html5rdf = True except ImportError: - have_html5lib = False + have_html5rdf = False def testPythonRoundtrip(): # noqa: N802 @@ -90,7 +90,7 @@ def testRoundtrip(): # noqa: N802 roundtrip("nt") -@pytest.mark.skipif(not have_html5lib, reason="requires html5lib") +@pytest.mark.skipif(not have_html5rdf, reason="requires html5rdf") def testHTML(): # noqa: N802 l1 = Literal("hello", datatype=RDF.XMLLiteral) assert l1.value is not None, "xml must have been parsed" diff --git a/tox.ini b/tox.ini index ef02ff4d2..9ec80d516 100644 --- a/tox.ini +++ b/tox.ini @@ -15,7 +15,7 @@ setenv = COVERAGE_FILE = {env:COVERAGE_FILE:{toxinidir}/.coverage.{envname}} MYPY_CACHE_DIR = {envdir}/.mypy_cache docs: POETRY_ARGS_docs = --only=docs - extensive: POETRY_ARGS_extensive = --extras=berkeleydb --extras=networkx --extras=orjson + extensive: POETRY_ARGS_extensive = --extras=berkeleydb --extras=networkx --extras=html --extras=orjson lxml: POETRY_ARGS_lxml = --extras=lxml commands_pre = py3{8,9,10,11}: python -c 'import os; print("\n".join(f"{key}={value}" for key, value in os.environ.items()))' @@ -59,7 +59,7 @@ setenv = PYTHONHASHSEED = 0 commands_pre = poetry lock --check - poetry install --only=main --only=docs + poetry install --only=main --only=docs --extras=html poetry env info commands = poetry run sphinx-build -T -W -b html -d {envdir}/doctree docs docs/_build/html From e7dc73798c75132b4d4f0ef8dae4567cecceea9b Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Thu, 24 Oct 2024 08:58:58 +1000 Subject: [PATCH 2/8] Revert "Auxiliary commit to revert individual files from 18d1d70d37e8ed55fc37a8eade195299a05e4331" This reverts commit 6f80ad9cd2cc4b76ff4e1bc0998951768aff7573. --- rdflib/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rdflib/__init__.py b/rdflib/__init__.py index dfc45e5b3..30faeee5b 100644 --- a/rdflib/__init__.py +++ b/rdflib/__init__.py @@ -47,11 +47,11 @@ import sys from importlib import metadata -#_DISTRIBUTION_METADATA = metadata.metadata("rdflib") +_DISTRIBUTION_METADATA = metadata.metadata("rdflib") __docformat__ = "restructuredtext en" -__version__: str = "0.0.0" #_DISTRIBUTION_METADATA["Version"] +__version__: str = _DISTRIBUTION_METADATA["Version"] __date__ = "2024-10-17" __all__ = [ From 1d33786f007ad5b4ffd8e22293dd25d4a96592fc Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Thu, 24 Oct 2024 09:45:35 +1000 Subject: [PATCH 3/8] Add html5lib-modern back into dockerfile dependencies so the docker image can be built with the released rdflib v7.1.0 --- docker/latest/requirements.in | 4 ++-- docker/latest/requirements.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/latest/requirements.in b/docker/latest/requirements.in index 4c2e3ec71..42fb39ae7 100644 --- a/docker/latest/requirements.in +++ b/docker/latest/requirements.in @@ -2,5 +2,5 @@ # will be updated by dependabot when new releases are made. rdflib==7.1.0 html5rdf==1.2.0 -# isodate is required to allow the Dockerfile to build on with pre-RDFLib-7.1 releases. -isodate==0.7.2 +# html5lib-modern is required to allow the Dockerfile to build on with pre-RDFLib-7.1.1 releases. +html5lib-modern==1.2.0 diff --git a/docker/latest/requirements.txt b/docker/latest/requirements.txt index 8cbd3cbd3..570502462 100644 --- a/docker/latest/requirements.txt +++ b/docker/latest/requirements.txt @@ -8,7 +8,7 @@ html5rdf==1.2 # via # -r docker/latest/requirements.in # rdflib -isodate==0.7.2 +html5lib-modern==1.2 # via -r docker/latest/requirements.in pyparsing==3.0.9 # via rdflib From 09640950c01f0d15d17c922e16f725ee6f1916ed Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Thu, 24 Oct 2024 10:00:56 +1000 Subject: [PATCH 4/8] Reformat test file again. --- test/test_literal/test_literal_html5lib.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_literal/test_literal_html5lib.py b/test/test_literal/test_literal_html5lib.py index bbde5f0f7..b29d78a52 100644 --- a/test/test_literal/test_literal_html5lib.py +++ b/test/test_literal/test_literal_html5lib.py @@ -14,6 +14,7 @@ except ImportError: pytest.skip("html5rdf not installed", allow_module_level=True) + def test_has_html5rdf() -> None: assert rdflib.term._HAS_HTML5RDF is True assert RDF.HTML in rdflib.term.XSDToPython From c57bd7eadf39a11b641d468990f1b213fd578a4d Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Thu, 24 Oct 2024 10:38:52 +1000 Subject: [PATCH 5/8] lxml is not actually required for operation of html5rdf --- poetry.lock | 4 ++-- pyproject.toml | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 99421313d..a07dd5f75 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1455,7 +1455,7 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [extras] berkeleydb = ["berkeleydb"] -html = ["html5rdf", "lxml"] +html = ["html5rdf"] lxml = ["lxml"] networkx = ["networkx"] orjson = ["orjson"] @@ -1463,4 +1463,4 @@ orjson = ["orjson"] [metadata] lock-version = "2.0" python-versions = "^3.8.1" -content-hash = "214d37624612043042464f0e154a4e551ec43177be1aabe4b9aced3ace7182de" +content-hash = "71704ba175e33528872fab8121cb609041bd97b6a99f8f04022a26904941b27c" diff --git a/pyproject.toml b/pyproject.toml index df698997f..6067009e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,9 @@ ruff = ">=0.0.286,<0.8.0" [tool.poetry.extras] berkeleydb = ["berkeleydb"] networkx = ["networkx"] -html = ["html5rdf", "lxml"] +# html support is optional, it is used only in tokenizing `rdf:HTML` type Literals +html = ["html5rdf"] +# lxml support is optional, it is used only for parsing XML-formatted SPARQL results lxml = ["lxml"] orjson = ["orjson"] From 0a865b014f24406ef5a0cb1da724097b7419981c Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Thu, 24 Oct 2024 10:39:55 +1000 Subject: [PATCH 6/8] Fix differences in compariston of XML and HTML nodes when html5rdf is used vs when it is not used. --- rdflib/term.py | 25 ++++++++++++++++------ test/test_literal/test_literal_html5lib.py | 2 +- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/rdflib/term.py b/rdflib/term.py index aa39d398a..501777508 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -1113,7 +1113,7 @@ def __gt__(self, other: Any) -> bool: if other is None: return True # Everything is greater than None if isinstance(other, Literal): - # Fast path for comapring numeric literals + # Fast path for comparing numeric literals # that are not ill-typed and don't have a None value if ( ( @@ -1356,9 +1356,15 @@ def eq(self, other: Any) -> bool: """ if isinstance(other, Literal): + # Fast path for comparing numeric literals + # that are not ill-typed and don't have a None value if ( - self.datatype in _NUMERIC_LITERAL_TYPES - and other.datatype in _NUMERIC_LITERAL_TYPES + ( + self.datatype in _NUMERIC_LITERAL_TYPES + and other.datatype in _NUMERIC_LITERAL_TYPES + ) + and ((not self.ill_typed) and (not other.ill_typed)) + and (self.value is not None and other.value is not None) ): if self.value is not None and other.value is not None: return self.value == other.value @@ -1380,6 +1386,13 @@ def eq(self, other: Any) -> bool: # string/plain literals, compare on lexical form return str.__eq__(self, other) + # XML can be compared to HTML, only if html5rdf is enabled + if ((dtself in _XML_COMPARABLE and dtother in _XML_COMPARABLE) and + # Ill-typed can be none if unknown, but we don't want it to be False. + ((self.ill_typed is not False) and (other.ill_typed is not False)) and + (self.value is not None and other.value is not None)): + return _isEqualXMLNode(self.value, other.value) + if dtself != dtother: if rdflib.DAWG_LITERAL_COLLATION: raise TypeError( @@ -1393,9 +1406,6 @@ def eq(self, other: Any) -> bool: # maybe there are counter examples if self.value is not None and other.value is not None: - if self.datatype in (_RDF_XMLLITERAL, _RDF_HTMLLITERAL): - return _isEqualXMLNode(self.value, other.value) - return self.value == other.value else: if str.__eq__(self, other): @@ -2089,6 +2099,9 @@ def _castPythonToLiteral( # noqa: N802 # It is probably best to keep this close to the definition of # _GenericPythonToXSDRules so nobody misses it. XSDToPython[_RDF_HTMLLITERAL] = _parse_html + _XML_COMPARABLE = (_RDF_XMLLITERAL, _RDF_HTMLLITERAL) +else: + _XML_COMPARABLE = (_RDF_XMLLITERAL,) _check_well_formed_types: Dict[URIRef, Callable[[Union[str, bytes], Any], bool]] = { URIRef(_XSD_PFX + "boolean"): _well_formed_boolean, diff --git a/test/test_literal/test_literal_html5lib.py b/test/test_literal/test_literal_html5lib.py index b29d78a52..a6dfc19e7 100644 --- a/test/test_literal/test_literal_html5lib.py +++ b/test/test_literal/test_literal_html5lib.py @@ -52,7 +52,7 @@ def test_has_html5rdf() -> None: ), ), # Well-typed literals, these have lexical forms that parse - # without errors with html5lib. + # without errors with html5rdf. ( lambda: Literal("
", datatype=RDF.HTML), LiteralChecker(..., None, RDF.HTML, False, "
"), From 5214ba28b38b92d416d619831193705ea05058d5 Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Thu, 24 Oct 2024 10:44:29 +1000 Subject: [PATCH 7/8] Add correct type hint to _XML_COMPARABLE variable. --- rdflib/term.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdflib/term.py b/rdflib/term.py index 501777508..a2d506572 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -2099,7 +2099,7 @@ def _castPythonToLiteral( # noqa: N802 # It is probably best to keep this close to the definition of # _GenericPythonToXSDRules so nobody misses it. XSDToPython[_RDF_HTMLLITERAL] = _parse_html - _XML_COMPARABLE = (_RDF_XMLLITERAL, _RDF_HTMLLITERAL) + _XML_COMPARABLE: Tuple[URIRef, ...] = (_RDF_XMLLITERAL, _RDF_HTMLLITERAL) else: _XML_COMPARABLE = (_RDF_XMLLITERAL,) From c32fcbce299ea2d78a7a8441ea90368cd253c8e4 Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Thu, 24 Oct 2024 10:58:39 +1000 Subject: [PATCH 8/8] Fix logic in using non-ill-typed XML literals for comparison --- rdflib/term.py | 11 +++++++---- test/test_literal/test_xmlliterals.py | 4 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/rdflib/term.py b/rdflib/term.py index a2d506572..3e397674b 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -1387,10 +1387,13 @@ def eq(self, other: Any) -> bool: return str.__eq__(self, other) # XML can be compared to HTML, only if html5rdf is enabled - if ((dtself in _XML_COMPARABLE and dtother in _XML_COMPARABLE) and - # Ill-typed can be none if unknown, but we don't want it to be False. - ((self.ill_typed is not False) and (other.ill_typed is not False)) and - (self.value is not None and other.value is not None)): + if ( + (dtself in _XML_COMPARABLE and dtother in _XML_COMPARABLE) + and + # Ill-typed can be None if unknown, but we don't want it to be True. + ((self.ill_typed is not True) and (other.ill_typed is not True)) + and (self.value is not None and other.value is not None) + ): return _isEqualXMLNode(self.value, other.value) if dtself != dtother: diff --git a/test/test_literal/test_xmlliterals.py b/test/test_literal/test_xmlliterals.py index 40227bc63..3f2e2637f 100644 --- a/test/test_literal/test_xmlliterals.py +++ b/test/test_literal/test_xmlliterals.py @@ -126,7 +126,7 @@ def testHTML(): # noqa: N802 textwrap.dedent( """\ - + """ ) ), @@ -137,7 +137,7 @@ def testHTML(): # noqa: N802 textwrap.dedent( """\ - + """ ) ),