From 453b6310ad43a4fe508ba80aa3ed6110b60aee0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 17 Dec 2024 10:57:39 +0100 Subject: [PATCH 01/16] enhance and move iso-8601-parser to coding.times --- xarray/coding/cftime_offsets.py | 3 +- xarray/coding/cftimeindex.py | 73 +------------------------------- xarray/coding/times.py | 70 ++++++++++++++++++++++++++++++ xarray/tests/test_cftimeindex.py | 4 +- 4 files changed, 77 insertions(+), 73 deletions(-) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 2cd8eccd6f3..1f75d4a8613 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -53,9 +53,10 @@ import pandas as pd from packaging.version import Version -from xarray.coding.cftimeindex import CFTimeIndex, _parse_iso8601_with_reso +from xarray.coding.cftimeindex import CFTimeIndex from xarray.coding.times import ( _is_standard_calendar, + _parse_iso8601_with_reso, _should_cftime_be_used, convert_time_or_go_back, format_cftime_datetime, diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 0494952fc9c..2285c5db99d 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -42,7 +42,6 @@ from __future__ import annotations import math -import re import warnings from datetime import timedelta from typing import TYPE_CHECKING, Any @@ -53,6 +52,8 @@ from xarray.coding.times import ( _STANDARD_CALENDARS, + _parse_iso8601_with_reso, + _parse_iso8601_without_reso, cftime_to_nptime, infer_calendar_name, ) @@ -78,71 +79,6 @@ OUT_OF_BOUNDS_TIMEDELTA_ERRORS = (OverflowError,) -def named(name, pattern): - return "(?P<" + name + ">" + pattern + ")" - - -def optional(x): - return "(?:" + x + ")?" - - -def trailing_optional(xs): - if not xs: - return "" - return xs[0] + optional(trailing_optional(xs[1:])) - - -def build_pattern(date_sep=r"\-", datetime_sep=r"T", time_sep=r"\:", micro_sep=r"."): - pieces = [ - (None, "year", r"\d{4}"), - (date_sep, "month", r"\d{2}"), - (date_sep, "day", r"\d{2}"), - (datetime_sep, "hour", r"\d{2}"), - (time_sep, "minute", r"\d{2}"), - (time_sep, "second", r"\d{2}"), - (micro_sep, "microsecond", r"\d{1,6}"), - ] - pattern_list = [] - for sep, name, sub_pattern in pieces: - pattern_list.append((sep if sep else "") + named(name, sub_pattern)) - # TODO: allow timezone offsets? - return "^" + trailing_optional(pattern_list) + "$" - - -_BASIC_PATTERN = build_pattern(date_sep="", time_sep="") -_EXTENDED_PATTERN = build_pattern() -_CFTIME_PATTERN = build_pattern(datetime_sep=" ") -_PATTERNS = [_BASIC_PATTERN, _EXTENDED_PATTERN, _CFTIME_PATTERN] - - -def parse_iso8601_like(datetime_string): - for pattern in _PATTERNS: - match = re.match(pattern, datetime_string) - if match: - return match.groupdict() - raise ValueError( - f"no ISO-8601 or cftime-string-like match for string: {datetime_string}" - ) - - -def _parse_iso8601_with_reso(date_type, timestr): - _ = attempt_import("cftime") - - default = date_type(1, 1, 1) - result = parse_iso8601_like(timestr) - replace = {} - - for attr in ["year", "month", "day", "hour", "minute", "second", "microsecond"]: - value = result.get(attr, None) - if value is not None: - if attr == "microsecond": - # convert match string into valid microsecond value - value = 10 ** (6 - len(value)) * int(value) - replace[attr] = int(value) - resolution = attr - return default.replace(**replace), resolution - - def _parsed_string_to_bounds(date_type, resolution, parsed): """Generalization of pandas.tseries.index.DatetimeIndex._parsed_string_to_bounds @@ -811,11 +747,6 @@ def is_leap_year(self): return func(self.year, calendar=self.calendar) -def _parse_iso8601_without_reso(date_type, datetime_str): - date, _ = _parse_iso8601_with_reso(date_type, datetime_str) - return date - - def _parse_array_of_cftime_strings(strings, date_type): """Create a numpy array from an array of strings. diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 4622298e152..2bd001e1588 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -189,6 +189,76 @@ def _unpack_netcdf_time_units(units: str) -> tuple[str, str]: return delta_units, ref_date +def named(name, pattern): + return "(?P<" + name + ">" + pattern + ")" + + +def optional(x): + return "(?:" + x + ")?" + + +def trailing_optional(xs): + if not xs: + return "" + return xs[0] + optional(trailing_optional(xs[1:])) + + +def build_pattern(date_sep=r"\-", datetime_sep=r"T", time_sep=r"\:", micro_sep=r"."): + pieces = [ + (None, "year", r"[+-]?\d{4,5}"), + (date_sep, "month", r"\d{2}"), + (date_sep, "day", r"\d{2}"), + (datetime_sep, "hour", r"\d{2}"), + (time_sep, "minute", r"\d{2}"), + (time_sep, "second", r"\d{2}"), + (micro_sep, "microsecond", r"\d{1,6}"), + ] + pattern_list = [] + for sep, name, sub_pattern in pieces: + pattern_list.append((sep if sep else "") + named(name, sub_pattern)) + # TODO: allow timezone offsets? + return "^" + trailing_optional(pattern_list) + "$" + + +_BASIC_PATTERN = build_pattern(date_sep="", time_sep="") +_EXTENDED_PATTERN = build_pattern() +_CFTIME_PATTERN = build_pattern(datetime_sep=" ") +_PATTERNS = [_BASIC_PATTERN, _EXTENDED_PATTERN, _CFTIME_PATTERN] + + +def parse_iso8601_like(datetime_string): + for pattern in _PATTERNS: + match = re.match(pattern, datetime_string) + if match: + return match.groupdict() + raise ValueError( + f"no ISO-8601 or cftime-string-like match for string: {datetime_string}" + ) + + +def _parse_iso8601_with_reso(date_type, timestr): + default = date_type(1, 1, 1) + result = parse_iso8601_like(timestr) + replace = {} + + for attr in ["year", "month", "day", "hour", "minute", "second", "microsecond"]: + value = result.get(attr, None) + if value is not None: + resolution = attr + if attr == "microsecond": + if len(value) <= 3: + resolution = "millisecond" + # convert match string into valid microsecond value + value = 10 ** (6 - len(value)) * int(value) + replace[attr] = int(value) + return default.replace(**replace), resolution + + +def _parse_iso8601_without_reso(date_type, datetime_str): + date, _ = _parse_iso8601_with_reso(date_type, datetime_str) + return date + + def _unpack_time_units_and_ref_date(units: str) -> tuple[str, pd.Timestamp]: # same us _unpack_netcdf_time_units but finalizes ref_date for # processing in encode_cf_datetime diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index d1fccc52a9a..1ef50cf925b 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -12,9 +12,11 @@ from xarray.coding.cftimeindex import ( CFTimeIndex, _parse_array_of_cftime_strings, - _parse_iso8601_with_reso, _parsed_string_to_bounds, assert_all_valid_date_type, +) +from xarray.coding.times import ( + _parse_iso8601_with_reso, parse_iso8601_like, ) from xarray.tests import ( From e358a30ce45d1369191e17c97b80e0036945eacc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 17 Dec 2024 10:59:48 +0100 Subject: [PATCH 02/16] enhance test for iso 8601 parser --- xarray/tests/test_cftimeindex.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 1ef50cf925b..fb061f4c89d 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -134,16 +134,26 @@ def date_dict( list(ISO8601_LIKE_STRING_TESTS.values()), ids=list(ISO8601_LIKE_STRING_TESTS.keys()), ) -def test_parse_iso8601_like(string, expected): - result = parse_iso8601_like(string) +@pytest.mark.parametrize("five", [False, True], ids=["4Y", "5Y"]) +@pytest.mark.parametrize("sign", ["", "+", "-"], ids=["None", "plus", "minus"]) +def test_parse_iso8601_like(five, sign, string, expected): + pre = "1" if five else "" + datestring = sign + pre + string + result = parse_iso8601_like(datestring) + expected = expected.copy() + expected.update(year=sign + pre + expected["year"]) assert result == expected - if result["microsecond"] is None: + # check malformed single digit addendum + # tests for year/month/day excluded as year can be 4 or 5 digits + if result["microsecond"] is None and result["hour"] is not None: with pytest.raises(ValueError): - parse_iso8601_like(string + "3") - if result["second"] is None: + parse_iso8601_like(datestring + "3") + + # check malformed floating point addendum + if result["second"] is None or result["microsecond"] is not None: with pytest.raises(ValueError): - parse_iso8601_like(string + ".3") + parse_iso8601_like(datestring + ".3") _CFTIME_CALENDARS = [ From c395bc82e565a197ba1e6f9323a1f4a7f7c38e67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 17 Dec 2024 12:00:44 +0100 Subject: [PATCH 03/16] add whats-new.rst entry --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ecf1702c356..f7c895bcd50 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -61,6 +61,8 @@ Internal Changes ~~~~~~~~~~~~~~~~ - Move non-CF related ``ensure_dtype_not_object`` from conventions to backends (:pull:`9828`). By `Kai Mühlbauer `_. +- Move ISO-8601 parser from coding.cftimeindex to coding.times to make it available there (prevents circular import) (:pull:`9899`). + By `Kai Mühlbauer `_. .. _whats-new.2024.11.0: From 88573ba48ac4ef82b2a3bdfb8fc84320b0341f7e Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 17 Dec 2024 08:23:09 -0700 Subject: [PATCH 04/16] add datetime property test --- properties/test_encode_decode.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/properties/test_encode_decode.py b/properties/test_encode_decode.py index e7eece7e81e..211c33ff6b5 100644 --- a/properties/test_encode_decode.py +++ b/properties/test_encode_decode.py @@ -11,10 +11,12 @@ # isort: split import hypothesis.extra.numpy as npst +import hypothesis.strategies as st import numpy as np from hypothesis import given import xarray as xr +from xarray.coding.times import _parse_iso8601_without_reso from xarray.testing.strategies import variables @@ -43,3 +45,10 @@ def test_CFScaleOffset_coder_roundtrip(original) -> None: coder = xr.coding.variables.CFScaleOffsetCoder() roundtripped = coder.decode(coder.encode(original)) xr.testing.assert_identical(original, roundtripped) + + +# TODO: add cftime.datetime +@given(dt=st.datetimes()) +def test_iso8601_decode(dt): + iso = dt.isoformat() + assert dt == _parse_iso8601_without_reso(type(dt), iso) From dd16f39a1b38b6d2d6d62935c1ef7bc259eaad55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 17 Dec 2024 17:06:26 +0100 Subject: [PATCH 05/16] Apply suggestions from code review Co-authored-by: Deepak Cherian --- xarray/tests/test_cftimeindex.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index fb061f4c89d..49988658550 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -134,7 +134,7 @@ def date_dict( list(ISO8601_LIKE_STRING_TESTS.values()), ids=list(ISO8601_LIKE_STRING_TESTS.keys()), ) -@pytest.mark.parametrize("five", [False, True], ids=["4Y", "5Y"]) +@pytest.mark.parametrize("five-digit-year", [False, True], ids=["4Y", "5Y"]) @pytest.mark.parametrize("sign", ["", "+", "-"], ids=["None", "plus", "minus"]) def test_parse_iso8601_like(five, sign, string, expected): pre = "1" if five else "" @@ -145,7 +145,11 @@ def test_parse_iso8601_like(five, sign, string, expected): assert result == expected # check malformed single digit addendum - # tests for year/month/day excluded as year can be 4 or 5 digits + # this check is only performed when we have at least "hour" given + # like "1999010101", where a single added digit should raise + # for "1999" (year), "199901" (month) and "19990101" (day) + # and a single added digit the string would just be interpreted + # as having a 5-digit year. if result["microsecond"] is None and result["hour"] is not None: with pytest.raises(ValueError): parse_iso8601_like(datestring + "3") From fc861604fb750977c19aaccdbf8f61bfd9a147fe Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:07:54 +0000 Subject: [PATCH 06/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_cftimeindex.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 49988658550..ef30516b468 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -145,10 +145,10 @@ def test_parse_iso8601_like(five, sign, string, expected): assert result == expected # check malformed single digit addendum - # this check is only performed when we have at least "hour" given + # this check is only performed when we have at least "hour" given # like "1999010101", where a single added digit should raise - # for "1999" (year), "199901" (month) and "19990101" (day) - # and a single added digit the string would just be interpreted + # for "1999" (year), "199901" (month) and "19990101" (day) + # and a single added digit the string would just be interpreted # as having a 5-digit year. if result["microsecond"] is None and result["hour"] is not None: with pytest.raises(ValueError): From 0af4ff1af950165f30a0ddbfce93bc8e7b8a078c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 17 Dec 2024 17:40:47 +0100 Subject: [PATCH 07/16] fix test --- xarray/tests/test_cftimeindex.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index ef30516b468..20d14b053cf 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -134,10 +134,10 @@ def date_dict( list(ISO8601_LIKE_STRING_TESTS.values()), ids=list(ISO8601_LIKE_STRING_TESTS.keys()), ) -@pytest.mark.parametrize("five-digit-year", [False, True], ids=["4Y", "5Y"]) +@pytest.mark.parametrize("five_digit_year", [False, True], ids=["4Y", "5Y"]) @pytest.mark.parametrize("sign", ["", "+", "-"], ids=["None", "plus", "minus"]) -def test_parse_iso8601_like(five, sign, string, expected): - pre = "1" if five else "" +def test_parse_iso8601_like(five_digit_year, sign, string, expected): + pre = "1" if five_digit_year else "" datestring = sign + pre + string result = parse_iso8601_like(datestring) expected = expected.copy() From a6f41c0c0b566ee4838401cf5be19c40d64d1c57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 29 Dec 2024 13:55:26 +0100 Subject: [PATCH 08/16] Update xarray/tests/test_cftimeindex.py Co-authored-by: Spencer Clark --- xarray/tests/test_cftimeindex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 20d14b053cf..09d9f69d062 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -134,7 +134,7 @@ def date_dict( list(ISO8601_LIKE_STRING_TESTS.values()), ids=list(ISO8601_LIKE_STRING_TESTS.keys()), ) -@pytest.mark.parametrize("five_digit_year", [False, True], ids=["4Y", "5Y"]) +@pytest.mark.parametrize("five_digit_year", [False, True], ids=["four-digit-year", "five-digit-year"]) @pytest.mark.parametrize("sign", ["", "+", "-"], ids=["None", "plus", "minus"]) def test_parse_iso8601_like(five_digit_year, sign, string, expected): pre = "1" if five_digit_year else "" From a2de3077ef57c4f2ef2f64413c3924ecc52f62d7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 29 Dec 2024 12:55:43 +0000 Subject: [PATCH 09/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_cftimeindex.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 09d9f69d062..4f2880ddc31 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -134,7 +134,9 @@ def date_dict( list(ISO8601_LIKE_STRING_TESTS.values()), ids=list(ISO8601_LIKE_STRING_TESTS.keys()), ) -@pytest.mark.parametrize("five_digit_year", [False, True], ids=["four-digit-year", "five-digit-year"]) +@pytest.mark.parametrize( + "five_digit_year", [False, True], ids=["four-digit-year", "five-digit-year"] +) @pytest.mark.parametrize("sign", ["", "+", "-"], ids=["None", "plus", "minus"]) def test_parse_iso8601_like(five_digit_year, sign, string, expected): pre = "1" if five_digit_year else "" From 3eb8ba7077a1d66e6aa52445b872255b725d2b22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 29 Dec 2024 14:09:38 +0100 Subject: [PATCH 10/16] Update doc/whats-new.rst --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c59aa3f6421..9bba768cd9a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -73,7 +73,7 @@ Internal Changes within ``as_compatible_data``. This is consistent with how lists of these objects will be converted (:pull:`9900`). By `Kai Mühlbauer `_. -- Move ISO-8601 parser from coding.cftimeindex to coding.times to make it available there (prevents circular import) (:pull:`9899`). +- Move ISO-8601 parser from coding.cftimeindex to coding.times to make it available there (prevents circular import), add capability to parse negative and/or five-digit years (:pull:`9899`). By `Kai Mühlbauer `_. From 81d44ef1ed699949dd8d1eb57a5fda2839a1dd09 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 29 Dec 2024 10:05:28 -0500 Subject: [PATCH 11/16] Add hypothesis strategy for cftime objects (#2) --- properties/test_encode_decode.py | 13 ++++++++---- xarray/testing/strategies.py | 34 ++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/properties/test_encode_decode.py b/properties/test_encode_decode.py index 211c33ff6b5..374c6b33b93 100644 --- a/properties/test_encode_decode.py +++ b/properties/test_encode_decode.py @@ -5,6 +5,8 @@ """ +import warnings + import pytest pytest.importorskip("hypothesis") @@ -17,7 +19,8 @@ import xarray as xr from xarray.coding.times import _parse_iso8601_without_reso -from xarray.testing.strategies import variables +from xarray.testing.strategies import CFTimeStrategyISO8601, variables +from xarray.tests import requires_cftime @pytest.mark.slow @@ -47,8 +50,10 @@ def test_CFScaleOffset_coder_roundtrip(original) -> None: xr.testing.assert_identical(original, roundtripped) -# TODO: add cftime.datetime -@given(dt=st.datetimes()) +@requires_cftime +@given(dt=st.datetimes() | CFTimeStrategyISO8601()) def test_iso8601_decode(dt): iso = dt.isoformat() - assert dt == _parse_iso8601_without_reso(type(dt), iso) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message=".*date/calendar/year zero.*") + assert dt == _parse_iso8601_without_reso(type(dt), iso) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index cfa226d991c..340988f8036 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,3 +1,5 @@ +import datetime +import warnings from collections.abc import Hashable, Iterable, Mapping, Sequence from typing import TYPE_CHECKING, Any, Protocol, overload @@ -8,6 +10,7 @@ import xarray as xr from xarray.core.types import T_DuckArray from xarray.core.utils import attempt_import +from xarray.tests.test_coding_times import _all_cftime_date_types if TYPE_CHECKING: from xarray.core.types import _DTypeLikeNested, _ShapeLike @@ -473,3 +476,34 @@ def unique_subset_of( return ( {k: objs[k] for k in subset_keys} if isinstance(objs, Mapping) else subset_keys ) + + +class CFTimeStategy(st.SearchStrategy): + def __init__(self, min_value, max_value): + self.min_value = min_value + self.max_value = max_value + + def do_draw(self, data): + unit_microsecond = datetime.timedelta(microseconds=1) + timespan_microseconds = (self.max_value - self.min_value) // unit_microsecond + result = data.draw_integer(0, timespan_microseconds) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message=".*date/calendar/year zero.*") + return self.min_value + datetime.timedelta(microseconds=result) + + +class CFTimeStrategyISO8601(st.SearchStrategy): + def __init__(self): + self.date_types = _all_cftime_date_types() + self.calendars = list(self.date_types) + + def do_draw(self, data): + calendar = data.draw(st.sampled_from(self.calendars)) + date_type = self.date_types[calendar] + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message=".*date/calendar/year zero.*") + daysinmonth = date_type(99999, 12, 1).daysinmonth + min_value = date_type(-99999, 1, 1) + max_value = date_type(99999, 12, daysinmonth, 23, 59, 59, 999999) + strategy = CFTimeStategy(min_value, max_value) + return strategy.do_draw(data) From f7340c41f36edbe4c1cfdcb913ef7d85fe28cfd7 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 29 Dec 2024 11:37:18 -0500 Subject: [PATCH 12/16] Fix doc build --- xarray/testing/strategies.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 340988f8036..e60572fbddd 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -10,7 +10,6 @@ import xarray as xr from xarray.core.types import T_DuckArray from xarray.core.utils import attempt_import -from xarray.tests.test_coding_times import _all_cftime_date_types if TYPE_CHECKING: from xarray.core.types import _DTypeLikeNested, _ShapeLike @@ -494,6 +493,8 @@ def do_draw(self, data): class CFTimeStrategyISO8601(st.SearchStrategy): def __init__(self): + from xarray.tests.test_coding_times import _all_cftime_date_types + self.date_types = _all_cftime_date_types() self.calendars = list(self.date_types) From bd0825b06bdfca685f53f702081e15dff6aeaa3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Mon, 30 Dec 2024 12:25:21 +0100 Subject: [PATCH 13/16] Apply suggestions from code review Co-authored-by: Michael Niklas --- xarray/coding/times.py | 10 +++++----- xarray/tests/test_cftimeindex.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 2bd001e1588..e5421033e20 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -189,21 +189,21 @@ def _unpack_netcdf_time_units(units: str) -> tuple[str, str]: return delta_units, ref_date -def named(name, pattern): +def named(name: str, pattern: str) -> str: return "(?P<" + name + ">" + pattern + ")" -def optional(x): +def optional(x: str) -> str: return "(?:" + x + ")?" -def trailing_optional(xs): +def trailing_optional(xs: list[str]) -> str: if not xs: return "" return xs[0] + optional(trailing_optional(xs[1:])) -def build_pattern(date_sep=r"\-", datetime_sep=r"T", time_sep=r"\:", micro_sep=r"."): +def build_pattern(date_sep: str = r"\-", datetime_sep: str = r"T", time_sep: str = r"\:", micro_sep: str = r".") -> str: pieces = [ (None, "year", r"[+-]?\d{4,5}"), (date_sep, "month", r"\d{2}"), @@ -226,7 +226,7 @@ def build_pattern(date_sep=r"\-", datetime_sep=r"T", time_sep=r"\:", micro_sep=r _PATTERNS = [_BASIC_PATTERN, _EXTENDED_PATTERN, _CFTIME_PATTERN] -def parse_iso8601_like(datetime_string): +def parse_iso8601_like(datetime_string: str) -> dict[str, str]: for pattern in _PATTERNS: match = re.match(pattern, datetime_string) if match: diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 4f2880ddc31..f9c186da446 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -138,7 +138,7 @@ def date_dict( "five_digit_year", [False, True], ids=["four-digit-year", "five-digit-year"] ) @pytest.mark.parametrize("sign", ["", "+", "-"], ids=["None", "plus", "minus"]) -def test_parse_iso8601_like(five_digit_year, sign, string, expected): +def test_parse_iso8601_like(five_digit_year: bool, sign: str, string: str, expected: dict) -> None: pre = "1" if five_digit_year else "" datestring = sign + pre + string result = parse_iso8601_like(datestring) From 8a4d1afd7ab0e56eb12020a58a879c9bd67b93cc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 30 Dec 2024 11:25:39 +0000 Subject: [PATCH 14/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/coding/times.py | 7 ++++++- xarray/tests/test_cftimeindex.py | 4 +++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index e5421033e20..252943c297b 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -203,7 +203,12 @@ def trailing_optional(xs: list[str]) -> str: return xs[0] + optional(trailing_optional(xs[1:])) -def build_pattern(date_sep: str = r"\-", datetime_sep: str = r"T", time_sep: str = r"\:", micro_sep: str = r".") -> str: +def build_pattern( + date_sep: str = r"\-", + datetime_sep: str = r"T", + time_sep: str = r"\:", + micro_sep: str = r".", +) -> str: pieces = [ (None, "year", r"[+-]?\d{4,5}"), (date_sep, "month", r"\d{2}"), diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index f9c186da446..80548b2af80 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -138,7 +138,9 @@ def date_dict( "five_digit_year", [False, True], ids=["four-digit-year", "five-digit-year"] ) @pytest.mark.parametrize("sign", ["", "+", "-"], ids=["None", "plus", "minus"]) -def test_parse_iso8601_like(five_digit_year: bool, sign: str, string: str, expected: dict) -> None: +def test_parse_iso8601_like( + five_digit_year: bool, sign: str, string: str, expected: dict +) -> None: pre = "1" if five_digit_year else "" datestring = sign + pre + string result = parse_iso8601_like(datestring) From 616a7e30e2baf7e9d643b1eee9dab777759fd817 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Mon, 30 Dec 2024 13:25:43 +0100 Subject: [PATCH 15/16] fix typing --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 252943c297b..7414ff4d3f1 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -231,7 +231,7 @@ def build_pattern( _PATTERNS = [_BASIC_PATTERN, _EXTENDED_PATTERN, _CFTIME_PATTERN] -def parse_iso8601_like(datetime_string: str) -> dict[str, str]: +def parse_iso8601_like(datetime_string: str) -> dict[str, str | None]: for pattern in _PATTERNS: match = re.match(pattern, datetime_string) if match: From c61f2cea350e6911b1551bd626966139296e8633 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Mon, 30 Dec 2024 13:32:24 +0100 Subject: [PATCH 16/16] rename _parse_iso8601_with_reso to _parse_iso8601, remove _parse_iso8601_without_reso --- properties/test_encode_decode.py | 5 +++-- xarray/coding/cftime_offsets.py | 4 ++-- xarray/coding/cftimeindex.py | 13 ++++++------- xarray/coding/times.py | 7 +------ xarray/tests/test_cftimeindex.py | 4 ++-- 5 files changed, 14 insertions(+), 19 deletions(-) diff --git a/properties/test_encode_decode.py b/properties/test_encode_decode.py index 374c6b33b93..1d5b43a6da6 100644 --- a/properties/test_encode_decode.py +++ b/properties/test_encode_decode.py @@ -18,7 +18,7 @@ from hypothesis import given import xarray as xr -from xarray.coding.times import _parse_iso8601_without_reso +from xarray.coding.times import _parse_iso8601 from xarray.testing.strategies import CFTimeStrategyISO8601, variables from xarray.tests import requires_cftime @@ -56,4 +56,5 @@ def test_iso8601_decode(dt): iso = dt.isoformat() with warnings.catch_warnings(): warnings.filterwarnings("ignore", message=".*date/calendar/year zero.*") - assert dt == _parse_iso8601_without_reso(type(dt), iso) + parsed, _ = _parse_iso8601(type(dt), iso) + assert dt == parsed diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 1f75d4a8613..50b048a8e29 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -56,7 +56,7 @@ from xarray.coding.cftimeindex import CFTimeIndex from xarray.coding.times import ( _is_standard_calendar, - _parse_iso8601_with_reso, + _parse_iso8601, _should_cftime_be_used, convert_time_or_go_back, format_cftime_datetime, @@ -844,7 +844,7 @@ def to_cftime_datetime(date_str_or_date, calendar=None): "If converting a string to a cftime.datetime object, " "a calendar type must be provided" ) - date, _ = _parse_iso8601_with_reso(get_date_type(calendar), date_str_or_date) + date, _ = _parse_iso8601(get_date_type(calendar), date_str_or_date) return date elif isinstance(date_str_or_date, cftime.datetime): return date_str_or_date diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 2285c5db99d..596a51a0dcf 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -52,8 +52,7 @@ from xarray.coding.times import ( _STANDARD_CALENDARS, - _parse_iso8601_with_reso, - _parse_iso8601_without_reso, + _parse_iso8601, cftime_to_nptime, infer_calendar_name, ) @@ -372,7 +371,7 @@ def _partial_date_slice(self, resolution, parsed): def _get_string_slice(self, key): """Adapted from pandas.tseries.index.DatetimeIndex._get_string_slice""" - parsed, resolution = _parse_iso8601_with_reso(self.date_type, key) + parsed, resolution = _parse_iso8601(self.date_type, key) try: loc = self._partial_date_slice(resolution, parsed) except KeyError as err: @@ -419,7 +418,7 @@ def _maybe_cast_slice_bound(self, label, side): if not isinstance(label, str): return label - parsed, resolution = _parse_iso8601_with_reso(self.date_type, label) + parsed, resolution = _parse_iso8601(self.date_type, label) start, end = _parsed_string_to_bounds(self.date_type, resolution, parsed) if self.is_monotonic_decreasing and len(self) > 1: return end if side == "left" else start @@ -764,9 +763,9 @@ def _parse_array_of_cftime_strings(strings, date_type): ------- np.array """ - return np.array( - [_parse_iso8601_without_reso(date_type, s) for s in strings.ravel()] - ).reshape(strings.shape) + return np.array([_parse_iso8601(date_type, s)[0] for s in strings.ravel()]).reshape( + strings.shape + ) def _contains_datetime_timedeltas(array): diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 7414ff4d3f1..f111dcef5bf 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -241,7 +241,7 @@ def parse_iso8601_like(datetime_string: str) -> dict[str, str | None]: ) -def _parse_iso8601_with_reso(date_type, timestr): +def _parse_iso8601(date_type, timestr): default = date_type(1, 1, 1) result = parse_iso8601_like(timestr) replace = {} @@ -259,11 +259,6 @@ def _parse_iso8601_with_reso(date_type, timestr): return default.replace(**replace), resolution -def _parse_iso8601_without_reso(date_type, datetime_str): - date, _ = _parse_iso8601_with_reso(date_type, datetime_str) - return date - - def _unpack_time_units_and_ref_date(units: str) -> tuple[str, pd.Timestamp]: # same us _unpack_netcdf_time_units but finalizes ref_date for # processing in encode_cf_datetime diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 80548b2af80..2f527bf298e 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -16,7 +16,7 @@ assert_all_valid_date_type, ) from xarray.coding.times import ( - _parse_iso8601_with_reso, + _parse_iso8601, parse_iso8601_like, ) from xarray.tests import ( @@ -368,7 +368,7 @@ def test_cftimeindex_days_in_month_accessor(index): def test_parse_iso8601_with_reso(date_type, string, date_args, reso): expected_date = date_type(*date_args) expected_reso = reso - result_date, result_reso = _parse_iso8601_with_reso(date_type, string) + result_date, result_reso = _parse_iso8601(date_type, string) assert result_date == expected_date assert result_reso == expected_reso