diff --git a/.gitignore b/.gitignore index 13236940b..43d030d5d 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ RDFLib.sublime-project /docs/_build/ RDFLib.sublime-workspace coverage/ +cov.xml /.hgtags /.hgignore build/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 27c9fc414..d006172c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -248,6 +248,25 @@ and will be removed for release. + + + + + + + +- Fixed handling of `Literal` `datatype` to correctly differentiate between + blank string values and undefined values, also changed the datatype of + `rdflib.term.Literal.datatype` from `Optional[str]` to `Optional[URIRef]` now + that all non-`URIRef` `str` values will be converted to `URIRef`. + [PR #2076](https://github.com/RDFLib/rdflib/pull/2076). + + + + + + + diff --git a/Taskfile.yml b/Taskfile.yml index a4c83aad2..0683b3e65 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -148,7 +148,7 @@ tasks: mypy: desc: Run mypy cmds: - - "{{._PYTHON | shellQuote}} -m mypy --show-error-context --show-error-codes" + - "{{._PYTHON | shellQuote}} -m mypy --show-error-context --show-error-codes {{.CLI_ARGS}}" lint:fix: desc: Fix auto-fixable linting errors diff --git a/rdflib/plugins/stores/memory.py b/rdflib/plugins/stores/memory.py index 07515e6ea..93c069710 100644 --- a/rdflib/plugins/stores/memory.py +++ b/rdflib/plugins/stores/memory.py @@ -217,14 +217,12 @@ def bind(self, prefix: str, namespace: "URIRef", override: bool = True) -> None: self.__namespace[prefix] = namespace else: # type error: Invalid index type "Optional[URIRef]" for "Dict[URIRef, str]"; expected type "URIRef" - # type error: Incompatible types in assignment (expression has type "Optional[str]", target has type "str") - self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( # type: ignore[index, assignment] - bound_prefix, prefix + self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( # type: ignore[index] + bound_prefix, default=prefix ) # type error: Invalid index type "Optional[str]" for "Dict[str, URIRef]"; expected type "str" - # type error: Incompatible types in assignment (expression has type "Optional[URIRef]", target has type "URIRef") - self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( # type: ignore[index, assignment] - bound_namespace, namespace + self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( # type: ignore[index] + bound_namespace, default=namespace ) def namespace(self, prefix: str) -> Optional["URIRef"]: @@ -538,14 +536,13 @@ def bind(self, prefix: str, namespace: "URIRef", override: bool = True) -> None: self.__namespace[prefix] = namespace else: # type error: Invalid index type "Optional[URIRef]" for "Dict[URIRef, str]"; expected type "URIRef" - # type error: Incompatible types in assignment (expression has type "Optional[str]", target has type "str") - self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( # type: ignore[index, assignment] - bound_prefix, prefix + self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( # type: ignore[index] + bound_prefix, default=prefix ) # type error: Invalid index type "Optional[str]" for "Dict[str, URIRef]"; expected type "str" # type error: Incompatible types in assignment (expression has type "Optional[URIRef]", target has type "URIRef") - self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( # type: ignore[index, assignment] - bound_namespace, namespace + self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( # type: ignore[index] + bound_namespace, default=namespace ) def namespace(self, prefix: str) -> Optional["URIRef"]: diff --git a/rdflib/term.py b/rdflib/term.py index e68f1a7dc..5d1377a6f 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -69,6 +69,7 @@ ) import rdflib +import rdflib.util from rdflib.compat import long_type if TYPE_CHECKING: @@ -598,7 +599,7 @@ class Literal(Identifier): _value: Any _language: Optional[str] # NOTE: _datatype should maybe be of type URIRef, and not optional. - _datatype: Optional[str] + _datatype: Optional[URIRef] _ill_typed: Optional[bool] __slots__ = ("_language", "_datatype", "_value", "_ill_typed") @@ -624,7 +625,7 @@ def __new__( if lang is not None and not _is_valid_langtag(lang): raise ValueError(f"'{str(lang)}' is not a valid language tag!") - if datatype: + if datatype is not None: datatype = URIRef(datatype) value = None @@ -633,7 +634,7 @@ def __new__( # create from another Literal instance lang = lang or lexical_or_value.language - if datatype: + if datatype is not None: # override datatype value = _castLexicalToPython(lexical_or_value, datatype) else: @@ -644,7 +645,7 @@ def __new__( # passed a string # try parsing lexical form of datatyped literal value = _castLexicalToPython(lexical_or_value, datatype) - if datatype and datatype in _toPythonMapping: + if datatype is not None and datatype in _toPythonMapping: # datatype is a recognized datatype IRI: # https://www.w3.org/TR/rdf11-concepts/#dfn-recognized-datatype-iris dt_uri: URIRef = URIRef(datatype) @@ -661,10 +662,12 @@ def __new__( value = lexical_or_value _value, _datatype = _castPythonToLiteral(lexical_or_value, datatype) - datatype = datatype or _datatype + _datatype = None if _datatype is None else URIRef(_datatype) + + datatype = rdflib.util._coalesce(datatype, _datatype) if _value is not None: lexical_or_value = _value - if datatype: + if datatype is not None: lang = None if isinstance(lexical_or_value, bytes): @@ -729,7 +732,7 @@ def language(self) -> Optional[str]: return self._language @property - def datatype(self) -> Optional[str]: + def datatype(self) -> Optional[URIRef]: return self._datatype def __reduce__( @@ -743,7 +746,7 @@ def __reduce__( def __getstate__(self) -> Tuple[None, Dict[str, Union[str, None]]]: return (None, dict(language=self.language, datatype=self.datatype)) - def __setstate__(self, arg: Tuple[Any, Dict[str, str]]) -> None: + def __setstate__(self, arg: Tuple[Any, Dict[str, Any]]) -> None: _, d = arg self._language = d["language"] self._datatype = d["datatype"] @@ -1096,8 +1099,8 @@ def __gt__(self, other: Any) -> bool: # plain-literals and xsd:string literals # are "the same" - dtself = self.datatype or _XSD_STRING - dtother = other.datatype or _XSD_STRING + dtself = rdflib.util._coalesce(self.datatype, default=_XSD_STRING) + dtother = rdflib.util._coalesce(other.datatype, default=_XSD_STRING) if dtself != dtother: if rdflib.DAWG_LITERAL_COLLATION: @@ -1129,9 +1132,9 @@ def __gt__(self, other: Any) -> bool: # same language, same lexical form, check real dt # plain-literals come before xsd:string! if self.datatype != other.datatype: - if not self.datatype: + if self.datatype is None: return False - elif not other.datatype: + elif other.datatype is None: return True else: return self.datatype > other.datatype @@ -1186,7 +1189,7 @@ def _comparable_to(self, other: Any) -> bool: rich-compare with this literal """ if isinstance(other, Literal): - if self.datatype and other.datatype: + if self.datatype is not None and other.datatype is not None: # two datatyped literals if ( self.datatype not in XSDToPython @@ -1247,7 +1250,7 @@ def __hash__(self) -> int: # type: ignore[override] # Directly accessing the member is faster than the property. if self._language: res ^= hash(self._language.lower()) - if self._datatype: + if self._datatype is not None: res ^= hash(self._datatype) return res @@ -1342,8 +1345,8 @@ def eq(self, other: Any) -> bool: if (self.language or "").lower() != (other.language or "").lower(): return False - dtself = self.datatype or _XSD_STRING - dtother = other.datatype or _XSD_STRING + dtself = rdflib.util._coalesce(self.datatype, default=_XSD_STRING) + dtother = rdflib.util._coalesce(other.datatype, default=_XSD_STRING) if dtself == _XSD_STRING and dtother == _XSD_STRING: # string/plain literals, compare on lexical form @@ -1556,7 +1559,7 @@ def _literal_n3( datatype = self.datatype quoted_dt = None - if datatype: + if datatype is not None: if qname_callback: quoted_dt = qname_callback(datatype) if not quoted_dt: @@ -1906,16 +1909,18 @@ def _well_formed_negative_integer(lexical: Union[str, bytes], value: Any) -> boo URIRef(_XSD_PFX + "token"), ) +_StrT = TypeVar("_StrT", bound=str) + def _py2literal( obj: Any, pType: Any, # noqa: N803 castFunc: Optional[Callable[[Any], Any]], - dType: Optional[str], -) -> Tuple[Any, Optional[str]]: - if castFunc: + dType: Optional[_StrT], +) -> Tuple[Any, Optional[_StrT]]: + if castFunc is not None: return castFunc(obj), dType - elif dType: + elif dType is not None: return obj, dType else: return obj, None @@ -2062,7 +2067,7 @@ def _reset_bindings() -> None: def _castLexicalToPython( # noqa: N802 - lexical: Union[str, bytes], datatype: Optional[str] + lexical: Union[str, bytes], datatype: Optional[URIRef] ) -> Any: """ Map a lexical form to the value-space for the given datatype diff --git a/rdflib/util.py b/rdflib/util.py index c3d04065c..c0fba7895 100644 --- a/rdflib/util.py +++ b/rdflib/util.py @@ -1,3 +1,5 @@ +from __future__ import annotations + """ Some utility functions. @@ -35,13 +37,14 @@ Set, Tuple, TypeVar, + overload, ) from urllib.parse import quote, urlsplit, urlunsplit import rdflib.graph # avoid circular dependency +import rdflib.namespace +import rdflib.term from rdflib.compat import sign -from rdflib.namespace import XSD, Namespace, NamespaceManager -from rdflib.term import BNode, Literal, Node, URIRef if TYPE_CHECKING: from rdflib.graph import Graph @@ -117,11 +120,11 @@ def to_term(s, default=None): if not s: return default elif s.startswith("<") and s.endswith(">"): - return URIRef(s[1:-1]) + return rdflib.term.URIRef(s[1:-1]) elif s.startswith('"') and s.endswith('"'): - return Literal(s[1:-1]) + return rdflib.term.Literal(s[1:-1]) elif s.startswith("_"): - return BNode(s) + return rdflib.term.BNode(s) else: msg = "Unrecognised term syntax: '%s'" % s raise Exception(msg) @@ -131,6 +134,8 @@ def from_n3(s: str, default=None, backend=None, nsm=None): r''' Creates the Identifier corresponding to the given n3 string. + >>> from rdflib.term import URIRef, Literal + >>> from rdflib.namespace import NamespaceManager >>> from_n3('') == URIRef('http://ex.com/foo') True >>> from_n3('"foo"@de') == Literal('foo', lang='de') @@ -159,7 +164,9 @@ def from_n3(s: str, default=None, backend=None, nsm=None): if s.startswith("<"): # Hack: this should correctly handle strings with either native unicode # characters, or \u1234 unicode escapes. - return URIRef(s[1:-1].encode("raw-unicode-escape").decode("unicode-escape")) + return rdflib.term.URIRef( + s[1:-1].encode("raw-unicode-escape").decode("unicode-escape") + ) elif s.startswith('"'): if s.startswith('"""'): quotes = '"""' @@ -189,9 +196,9 @@ def from_n3(s: str, default=None, backend=None, nsm=None): # Hack: this should correctly handle strings with either native unicode # characters, or \u1234 unicode escapes. value = value.encode("raw-unicode-escape").decode("unicode-escape") - return Literal(value, language, datatype) + return rdflib.term.Literal(value, language, datatype) elif s == "true" or s == "false": - return Literal(s == "true") + return rdflib.term.Literal(s == "true") elif ( s.lower() .replace(".", "", 1) @@ -200,10 +207,10 @@ def from_n3(s: str, default=None, backend=None, nsm=None): .isnumeric() ): if "e" in s.lower(): - return Literal(s, datatype=XSD.double) + return rdflib.term.Literal(s, datatype=rdflib.namespace.XSD.double) if "." in s: - return Literal(float(s), datatype=XSD.decimal) - return Literal(int(s), datatype=XSD.integer) + return rdflib.term.Literal(float(s), datatype=rdflib.namespace.XSD.decimal) + return rdflib.term.Literal(int(s), datatype=rdflib.namespace.XSD.integer) elif s.startswith("{"): identifier = from_n3(s[1:-1]) @@ -212,16 +219,16 @@ def from_n3(s: str, default=None, backend=None, nsm=None): identifier = from_n3(s[1:-1]) return rdflib.graph.Graph(backend, identifier) elif s.startswith("_:"): - return BNode(s[2:]) + return rdflib.term.BNode(s[2:]) elif ":" in s: if nsm is None: # instantiate default NamespaceManager and rely on its defaults - nsm = NamespaceManager(rdflib.graph.Graph()) + nsm = rdflib.namespace.NamespaceManager(rdflib.graph.Graph()) prefix, last_part = s.split(":", 1) ns = dict(nsm.namespaces())[prefix] - return Namespace(ns)[last_part] + return rdflib.namespace.Namespace(ns)[last_part] else: - return BNode(s) + return rdflib.term.BNode(s) def date_time(t=None, local_time_zone=False): @@ -382,8 +389,10 @@ def _get_ext(fpath, lower=True): def find_roots( - graph: "Graph", prop: "URIRef", roots: Optional[Set["Node"]] = None -) -> Set["Node"]: + graph: "Graph", + prop: "rdflib.term.URIRef", + roots: Optional[Set["rdflib.term.Node"]] = None, +) -> Set["rdflib.term.Node"]: """ Find the roots in some sort of transitive hierarchy. @@ -395,7 +404,7 @@ def find_roots( """ - non_roots: Set[Node] = set() + non_roots: Set[rdflib.term.Node] = set() if roots is None: roots = set() for x, y in graph.subject_objects(prop): @@ -409,13 +418,13 @@ def find_roots( def get_tree( graph: "Graph", - root: "Node", - prop: "URIRef", - mapper: Callable[["Node"], "Node"] = lambda x: x, + root: "rdflib.term.Node", + prop: "rdflib.term.URIRef", + mapper: Callable[["rdflib.term.Node"], "rdflib.term.Node"] = lambda x: x, sortkey: Optional[Callable[[Any], Any]] = None, - done: Optional[Set["Node"]] = None, + done: Optional[Set["rdflib.term.Node"]] = None, dir: str = "down", -) -> Optional[Tuple[Node, List[Any]]]: +) -> Optional[Tuple["rdflib.term.Node", List[Any]]]: """ Return a nested list/tuple structure representing the tree built by the transitive property given, starting from the root given @@ -442,7 +451,7 @@ def get_tree( done.add(root) tree = [] - branches: Iterator[Node] + branches: Iterator[rdflib.term.Node] if dir == "down": branches = graph.subjects(prop, root) else: @@ -459,24 +468,41 @@ def get_tree( _AnyT = TypeVar("_AnyT") -def _coalesce(*args: Optional[_AnyT]) -> Optional[_AnyT]: +@overload +def _coalesce(*args: Optional[_AnyT], default: _AnyT) -> _AnyT: + ... + + +@overload +def _coalesce( + *args: Optional[_AnyT], default: Optional[_AnyT] = ... +) -> Optional[_AnyT]: + ... + + +def _coalesce( + *args: Optional[_AnyT], default: Optional[_AnyT] = None +) -> Optional[_AnyT]: """ This is a null coalescing function, it will return the first non-`None` - argument passed to it, otherwise it will return `None`. + argument passed to it, otherwise it will return ``default`` which is `None` + by default. - For more info regarding the rationale of this function see deferred `PEP - 505 `_. + For more info regarding the rationale of this function see deferred `PEP 505 + `_. :param args: Values to consider as candidates to return, the first arg that is not `None` will be returned. If no argument is passed this function will return None. - :return: The first ``arg`` that is not `None`, otherwise `None` if there - are no args or if all args are `None`. + :param default: The default value to return if none of the args are not + `None`. + :return: The first ``args`` that is not `None`, otherwise the value of + ``default`` if there are no ``args`` or if all ``args`` are `None`. """ for arg in args: if arg is not None: return arg - return None + return default def _iri2uri(iri: str) -> str: diff --git a/test/test_literal/test_literal.py b/test/test_literal/test_literal.py index 41d3021c3..074abe1e6 100644 --- a/test/test_literal/test_literal.py +++ b/test/test_literal/test_literal.py @@ -12,7 +12,8 @@ from contextlib import ExitStack from decimal import Decimal from test.utils import affix_tuples -from typing import Any, Generator, Optional, Type, Union +from test.utils.literal import LiteralChecker +from typing import Any, Callable, Generator, Iterable, Optional, Type, Union import isodate import pytest @@ -962,3 +963,74 @@ def unlexify(s: str) -> str: and caplog.records[0].exc_info and str(caplog.records[0].exc_info[1]) == "TEST_EXCEPTION" ) + + +@pytest.mark.parametrize( + ["literal_maker", "checks"], + [ + ( + lambda: Literal("foo"), + LiteralChecker("foo", None, None, None, "foo"), + ), + ( + lambda: Literal("foo", None, ""), + LiteralChecker(None, None, URIRef(""), None, "foo"), + ), + ( + lambda: Literal("foo", None, XSD.string), + LiteralChecker("foo", None, XSD.string, False, "foo"), + ), + ( + lambda: Literal("1", None, XSD.integer), + LiteralChecker(1, None, XSD.integer, False, "1"), + ), + ( + lambda: Literal("1", "en", XSD.integer), + TypeError, + ), + ( + lambda: Literal(Literal("1", None, XSD.integer)), + Literal("1", None, XSD.integer), + ), + ( + lambda: Literal(Literal("1", None, "")), + [LiteralChecker(None, None, URIRef(""), None, "1"), Literal("1", None, "")], + ), + (lambda: Literal(Literal("1")), Literal("1")), + ( + lambda: Literal(Literal("blue sky", "en")), + Literal("blue sky", "en"), + ), + ], +) +def test_literal_construction( + literal_maker: Callable[[], Literal], + checks: Union[ + Iterable[Union[LiteralChecker, Literal]], + LiteralChecker, + Literal, + Type[Exception], + ], +) -> None: + check_error: Optional[Type[Exception]] = None + if isinstance(checks, type) and issubclass(checks, Exception): + check_error = checks + checks = [] + elif not isinstance(checks, Iterable): + checks = [checks] + + catcher: Optional[pytest.ExceptionInfo[Exception]] = None + with ExitStack() as xstack: + if check_error is not None: + catcher = xstack.enter_context(pytest.raises(check_error)) + literal = literal_maker() + + if check_error is not None: + assert catcher is not None + assert catcher.value is not None + + for check in checks: + if isinstance(check, LiteralChecker): + check.check(literal) + else: + check = literal diff --git a/test/test_util.py b/test/test_util.py index 3ca542705..f15e35cb1 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -347,19 +347,45 @@ def test_util_from_n3_not_escapes_xf(self, string: str) -> None: @pytest.mark.parametrize( - ["params", "expected_result"], + ["params", "default", "expected_result"], [ - ([], None), - (["something"], "something"), - ([False, "something"], False), - (["", "something"], ""), - ([0, "something"], 0), - ([None, "something", 1], "something"), - (["something", None, 1], "something"), + ([], ..., None), + (["something"], ..., "something"), + ([False, "something"], ..., False), + (["", "something"], ..., ""), + ([0, "something"], ..., 0), + ([None, "something", 1], ..., "something"), + (["something", None, 1], ..., "something"), + (["something", None, 1], 5, "something"), + ([], 5, 5), + ([None], 5, 5), + ([None, None], 5, 5), + ([None, None], 5, 5), ], ) -def test__coalesce(params: Collection[Any], expected_result: Any) -> None: - assert expected_result == _coalesce(*params) +def test__coalesce(params: Collection[Any], default: Any, expected_result: Any) -> None: + if default == Ellipsis: + result = _coalesce(*params) + else: + result = _coalesce(*params, default) + assert expected_result == result + + +def test__coalesce_typing() -> None: + """ + type checking for _coalesce behaves as expected. + """ + str_value: str + optional_str_value: Optional[str] + + optional_str_value = _coalesce(None, "a", None) + assert optional_str_value == "a" + + str_value = _coalesce(None, "a", None) # type: ignore[assignment] + assert str_value == "a" + + str_value = _coalesce(None, "a", None, default="3") + assert str_value == "a" @pytest.mark.parametrize( diff --git a/test/utils/literal.py b/test/utils/literal.py new file mode 100644 index 000000000..1b3f37988 --- /dev/null +++ b/test/utils/literal.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +import builtins +from dataclasses import dataclass +from typing import Any, Union + +from rdflib.term import Literal, URIRef + + +@dataclass +class LiteralChecker: + value: Union[builtins.ellipsis, Any] = ... + language: Union[builtins.ellipsis, str, None] = ... + datatype: Union[builtins.ellipsis, URIRef, None] = ... + ill_typed: Union[builtins.ellipsis, bool, None] = ... + lexical: Union[builtins.ellipsis, str] = ... + + def check(self, actual: Literal) -> None: + if self.value is not Ellipsis: + assert self.value == actual.value + if self.lexical is not Ellipsis: + assert self.lexical == f"{actual}" + if self.ill_typed is not Ellipsis: + assert self.ill_typed == actual.ill_typed + if self.language is not Ellipsis: + assert self.language == actual.language + if self.datatype is not Ellipsis: + assert self.datatype == actual.datatype