Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Redo XSD Datetime, Date, Time, Duration parser and serializers #2929

Merged
merged 14 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions devtools/constraints.min
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# This file selects minimum versions to ensure that the test suite passes on
# these versions. The file's extension (`.min`) is chosen to evade Dependabot
# which operates on `*.{txt,in}` files.
isodate==0.6.0
isodate==0.7.2; python_version < "3.11"
pyparsing==2.1.0
importlib-metadata==4.0.0
berkeleydb==18.1.2
networkx==2.0
html5lib==1.0.1
html5lib-modern==1.2.0
lxml==4.3.0
orjson==3.9.14
6 changes: 4 additions & 2 deletions docker/latest/requirements.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# This file is used for building a docker image of hte latest rdflib release. It
# This file is used for building a docker image of the latest rdflib release. It
# will be updated by dependabot when new releases are made.
rdflib==7.0.0
html5lib
html5lib-modern==1.2.0
# isodate is required to allow the Dockerfile to build on with pre-RDFLib-7.1 releases.
isodate==0.7.2
8 changes: 4 additions & 4 deletions docker/latest/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
#
html5lib-modern==1.2
# via -r docker/latest/requirements.in
isodate==0.6.1
isodate==0.7.2; python_version < "3.11"
# via rdflib
pyparsing==3.0.9
# via rdflib
rdflib==7.0.0
# via -r docker/latest/requirements.in
six==1.16.0
# via
# isodate
# isodate is required to allow the Dockerfile to build on with pre-RDFLib-7.1 releases.
isodate==0.7.2
# via -r docker/latest/requirements.in
3 changes: 2 additions & 1 deletion docs/rdf_terms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,8 @@ rdf:HTML :class:`xml.dom.minidom.DocumentFragment`
.. [#f1] plain literals map directly to value space

.. [#f2] Date, time and datetime literals are mapped to Python
instances using the `isodate <http://pypi.python.org/pypi/isodate/>`_
instances using the RDFlib xsd_datetime module, that is based
on the `isodate <http://pypi.python.org/pypi/isodate/>`_
package).

.. [#f3] this is a bit dirty - by accident the ``html5lib`` parser
Expand Down
25 changes: 5 additions & 20 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ rdfgraphisomorphism = 'rdflib.tools.graphisomorphism:main'

[tool.poetry.dependencies]
python = "^3.8.1"
isodate = "^0.6.0"
isodate = {version=">=0.7.2,<1.0.0", python = "<3.11"}
pyparsing = ">=2.1.0,<4"
berkeleydb = {version = "^18.1.0", optional = true}
networkx = {version = ">=2,<4", optional = true}
Expand Down
19 changes: 12 additions & 7 deletions rdflib/plugins/sparql/operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from typing import Any, Callable, Dict, NoReturn, Optional, Tuple, Union, overload
from urllib.parse import quote

import isodate
from pyparsing import ParseResults

from rdflib.namespace import RDF, XSD
Expand All @@ -47,6 +46,7 @@
URIRef,
Variable,
)
from rdflib.xsd_datetime import Duration, parse_datetime # type: ignore[attr-defined]


def Builtin_IRI(expr: Expr, ctx: FrozenBindings) -> URIRef:
Expand Down Expand Up @@ -521,8 +521,13 @@ def Builtin_TZ(e: Expr, ctx) -> Literal:
if not d.tzinfo:
return Literal("")
n = d.tzinfo.tzname(d)
if n == "UTC":
if n is None:
n = ""
elif n == "UTC":
n = "Z"
elif n.startswith("UTC"):
# Replace tzname like "UTC-05:00" with simply "-05:00" to match Jena tz fn
n = n[3:]
Copy link
Contributor Author

@ashleysommer ashleysommer Oct 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was a required change because previous implementation used tzinfo instances (timezone instances) from isodate, but the new implementation always uses timezones from the stdlib. The SPARQL builtin TZ() will return the "timezone name" if known, in python this uses the tzname() function.
The difference is that stdlib timezones and isodate timezones have different tzname() generation pattern. In isodate an unnamed timezone with "-5H" offset will be have tzname of "-05:00", but in stdlib an unnamed timezone with "-5H" offset will have tzname of "UTC-05:00" (more correct IMHO).

So for consistency and backwards compatibility, this change was added to normalize the tzname output, I tested Jena's TZ() SPARQL function too, and it also outputs "-05:00" like isodate did.

return Literal(n)


Expand Down Expand Up @@ -687,7 +692,7 @@ def default_cast(e: Expr, ctx: FrozenBindings) -> Literal: # type: ignore[retur
if x.datatype and x.datatype not in (XSD.dateTime, XSD.string):
raise SPARQLError("Cannot cast %r to XSD:dateTime" % x.datatype)
try:
return Literal(isodate.parse_datetime(x), datatype=e.iri)
return Literal(parse_datetime(x), datatype=e.iri)
except: # noqa: E722
raise SPARQLError("Cannot interpret '%r' as datetime" % x)

Expand Down Expand Up @@ -1085,7 +1090,7 @@ def dateTimeObjects(expr: Literal) -> Any:
def isCompatibleDateTimeDatatype( # type: ignore[return]
obj1: Union[py_datetime.date, py_datetime.datetime],
dt1: URIRef,
obj2: Union[isodate.Duration, py_datetime.timedelta],
obj2: Union[Duration, py_datetime.timedelta],
dt2: URIRef,
) -> bool:
"""
Expand All @@ -1098,7 +1103,7 @@ def isCompatibleDateTimeDatatype( # type: ignore[return]
return True
elif dt2 == XSD.dayTimeDuration or dt2 == XSD.Duration:
# checking if the dayTimeDuration has no Time Component
# else it wont be compatible with Date Literal
# else it won't be compatible with Date Literal
if "T" in str(obj2):
return False
else:
Expand All @@ -1110,7 +1115,7 @@ def isCompatibleDateTimeDatatype( # type: ignore[return]
elif dt2 == XSD.dayTimeDuration or dt2 == XSD.Duration:
# checking if the dayTimeDuration has no Date Component
# (by checking if the format is "PT...." )
# else it wont be compatible with Time Literal
# else it won't be compatible with Time Literal
if "T" == str(obj2)[1]:
return True
else:
Expand Down Expand Up @@ -1139,7 +1144,7 @@ def calculateDuration(
def calculateFinalDateTime(
obj1: Union[py_datetime.date, py_datetime.datetime],
dt1: URIRef,
obj2: Union[isodate.Duration, py_datetime.timedelta],
obj2: Union[Duration, py_datetime.timedelta],
dt2: URIRef,
operation: str,
) -> Literal:
Expand Down
4 changes: 1 addition & 3 deletions rdflib/plugins/sparql/sparql.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
Union,
)

import isodate

import rdflib.plugins.sparql
from rdflib.graph import ConjunctiveGraph, Dataset, Graph
from rdflib.namespace import NamespaceManager
Expand Down Expand Up @@ -302,7 +300,7 @@ def __init__(
@property
def now(self) -> datetime.datetime:
if self._now is None:
self._now = datetime.datetime.now(isodate.tzinfo.UTC)
self._now = datetime.datetime.now(datetime.timezone.utc)
return self._now

def clone(
Expand Down
32 changes: 17 additions & 15 deletions rdflib/term.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
"Literal",
"Variable",
]

import logging
import math
import warnings
Expand Down Expand Up @@ -67,19 +66,22 @@
from uuid import uuid4

import html5lib
from isodate import (

import rdflib
import rdflib.util
from rdflib.compat import long_type

from .xsd_datetime import ( # type: ignore[attr-defined]
Duration,
duration_isoformat,
parse_date,
parse_datetime,
parse_duration,
parse_time,
parse_xsd_date,
parse_xsd_duration,
parse_xsd_gyear,
parse_xsd_gyearmonth,
)

import rdflib
import rdflib.util
from rdflib.compat import long_type

if TYPE_CHECKING:
from .namespace import NamespaceManager
from .paths import AlternativePath, InvPath, NegatedPath, Path, SequencePath
Expand Down Expand Up @@ -1424,7 +1426,7 @@ def eq(self, other: Any) -> bool:
):
return self.value == other
# NOTE for type ignore: bool is a subclass of int so this won't ever run.
elif isinstance(other, bool): # type: ignore[unreachable]
elif isinstance(other, bool): # type: ignore[unreachable, unused-ignore]
if self.datatype == _XSD_BOOLEAN:
return self.value == other

Expand Down Expand Up @@ -2030,13 +2032,13 @@ def _castPythonToLiteral( # noqa: N802
XSDToPython: Dict[Optional[str], Optional[Callable[[str], Any]]] = {
None: None, # plain literals map directly to value space
URIRef(_XSD_PFX + "time"): parse_time,
URIRef(_XSD_PFX + "date"): parse_date,
URIRef(_XSD_PFX + "gYear"): parse_date,
URIRef(_XSD_PFX + "gYearMonth"): parse_date,
URIRef(_XSD_PFX + "date"): parse_xsd_date,
URIRef(_XSD_PFX + "gYear"): parse_xsd_gyear,
URIRef(_XSD_PFX + "gYearMonth"): parse_xsd_gyearmonth,
URIRef(_XSD_PFX + "dateTime"): parse_datetime,
URIRef(_XSD_PFX + "duration"): parse_duration,
URIRef(_XSD_PFX + "dayTimeDuration"): parse_duration,
URIRef(_XSD_PFX + "yearMonthDuration"): parse_duration,
URIRef(_XSD_PFX + "duration"): parse_xsd_duration,
URIRef(_XSD_PFX + "dayTimeDuration"): parse_xsd_duration,
URIRef(_XSD_PFX + "yearMonthDuration"): parse_xsd_duration,
URIRef(_XSD_PFX + "hexBinary"): _unhexlify,
URIRef(_XSD_PFX + "string"): None,
URIRef(_XSD_PFX + "normalizedString"): None,
Expand Down
Loading
Loading