From 5cb989b7ec5403422918a90d82f73cdfa6efe2d4 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 9 Jun 2024 14:51:48 +0100 Subject: [PATCH] feat(python!): consistently convert to given time zone in Series constructor --- py-polars/polars/__init__.py | 2 + .../polars/_utils/construction/series.py | 72 ++++++++++++------- py-polars/polars/selectors.py | 13 ++-- .../testing/parametric/strategies/core.py | 10 ++- .../unit/constructors/test_constructors.py | 8 +-- .../tests/unit/constructors/test_series.py | 25 +++++-- py-polars/tests/unit/dataframe/test_df.py | 17 +++-- .../tests/unit/datatypes/test_temporal.py | 18 ++--- .../functions/range/test_datetime_range.py | 2 +- py-polars/tests/unit/io/test_delta.py | 6 +- .../temporal/test_add_business_days.py | 21 +++++- .../namespaces/temporal/test_datetime.py | 8 ++- .../namespaces/temporal/test_offset_by.py | 10 ++- .../operations/namespaces/test_strptime.py | 6 +- .../tests/unit/operations/test_ewm_by.py | 18 +++-- .../tests/unit/operations/test_interpolate.py | 33 +++++++-- .../unit/series/buffers/test_from_buffers.py | 18 +++-- py-polars/tests/unit/series/test_series.py | 4 +- 18 files changed, 197 insertions(+), 94 deletions(-) diff --git a/py-polars/polars/__init__.py b/py-polars/polars/__init__.py index e869efec4370..cbf3f88087f0 100644 --- a/py-polars/polars/__init__.py +++ b/py-polars/polars/__init__.py @@ -91,6 +91,7 @@ SQLInterfaceError, SQLSyntaxError, StructFieldNotFoundError, + TimeZoneAwareConstructorWarning, UnstableWarning, ) from polars.expr import Expr @@ -246,6 +247,7 @@ "ChronoFormatWarning", "MapWithoutReturnDtypeWarning", "PolarsWarning", + "TimeZoneAwareConstructorWarning", "UnstableWarning", # core classes "DataFrame", diff --git a/py-polars/polars/_utils/construction/series.py b/py-polars/polars/_utils/construction/series.py index aee1da92abd3..87d888dc410a 100644 --- a/py-polars/polars/_utils/construction/series.py +++ b/py-polars/polars/_utils/construction/series.py @@ -74,6 +74,30 @@ from polars.dependencies import pandas as pd from polars.type_aliases import PolarsDataType +TZ_NAIVE_VALUES_WITH_TZ_AWARE_DTYPE_MSG = ( + "Constructing a Series with time-zone-naive " + "datetimes and a time-zone-aware dtype results in a Series where " + "the datetimes are converted to the given time zone as if starting " + "from UTC.\n\n" + "Note: this is a breaking change since pre-1.0.0 behaviour.\n\n" + "Hint: to silence this warning, you can filter out " + "warnings of class pl.TimeZoneAwareConstructorWarning.\n" + "Alternatively, you can replace " + "`pl.Series(values, dtype=pl.Datetime({}, {}))` with one of:\n" + "- `pl.Series(values, dtype=pl.Datetime(time_unit)).dt.replace_time_zone(time_zone)`\n" + "- `pl.Series(values, dtype=pl.Datetime(time_unit)).dt.convert_time_zone(time_zone)`\n" + "depending on whether you want to replace or convert the time zone." +) +TZ_AWARE_VALUES_WITH_TZ_NAIVE_DTYPE_MSG = ( + "Constructing a Series with time-zone-aware " + "datetimes and a time-zone-naive dtype results in a Series where " + "the datetimes are converted to UTC.\n\n" + "Hint: to silence this warning, you can filter out " + "warnings of class pl.TimeZoneAwareConstructorWarning.\n" + "Alternatively, you can set the time zone in the `dtype`, e.g.:\n" + " pl.Series(values, dtype=pl.Datetime({}, 'UTC'))`" +) + def sequence_to_pyseries( name: str, @@ -203,41 +227,37 @@ def sequence_to_pyseries( s = wrap_s(py_series).dt.cast_time_unit(time_unit) if (values_dtype == Date) & (dtype == Datetime): - return ( - s.cast(Datetime(time_unit or "us")) - .dt.replace_time_zone( - time_zone, - ambiguous="raise" if strict else "null", - non_existent="raise" if strict else "null", - ) - ._s - ) + result = s.cast(Datetime(time_unit or "us")) + if time_zone is not None: + if time_zone != "UTC": + warnings.warn( + TZ_NAIVE_VALUES_WITH_TZ_AWARE_DTYPE_MSG.format( + time_unit or "us", time_zone + ), + TimeZoneAwareConstructorWarning, + stacklevel=find_stacklevel(), + ) + result = result.dt.convert_time_zone(time_zone) + return result._s if (dtype == Datetime) and (value.tzinfo is not None or time_zone is not None): values_tz = str(value.tzinfo) if value.tzinfo is not None else None dtype_tz = time_zone - if values_tz is not None and (dtype_tz is not None and dtype_tz != "UTC"): - msg = ( - "time-zone-aware datetimes are converted to UTC" - "\n\nPlease either drop the time zone from the dtype, or set it to 'UTC'." - " To convert to a different time zone, please use `.dt.convert_time_zone`." + if values_tz is not None and dtype_tz is None and values_tz != "UTC": + warnings.warn( + TZ_AWARE_VALUES_WITH_TZ_NAIVE_DTYPE_MSG.format(time_unit or "us"), + TimeZoneAwareConstructorWarning, + stacklevel=find_stacklevel(), ) - raise ValueError(msg) - if values_tz != "UTC" and dtype_tz is None: + if values_tz is None and dtype_tz is not None and dtype_tz != "UTC": warnings.warn( - "Constructing a Series with time-zone-aware " - "datetimes results in a Series with UTC time zone. " - "To silence this warning, you can filter " - "warnings of class TimeZoneAwareConstructorWarning, or " - "set 'UTC' as the time zone of your datatype.", + TZ_NAIVE_VALUES_WITH_TZ_AWARE_DTYPE_MSG.format( + time_unit or "us", time_zone + ), TimeZoneAwareConstructorWarning, stacklevel=find_stacklevel(), ) - return s.dt.replace_time_zone( - dtype_tz or "UTC", - ambiguous="raise" if strict else "null", - non_existent="raise" if strict else "null", - )._s + return s.dt.convert_time_zone(dtype_tz or "UTC")._s return s._s elif ( diff --git a/py-polars/polars/selectors.py b/py-polars/polars/selectors.py index 976839dd23f2..2000937e3f8f 100644 --- a/py-polars/polars/selectors.py +++ b/py-polars/polars/selectors.py @@ -1307,17 +1307,20 @@ def datetime( Examples -------- - >>> from datetime import datetime, date + >>> from datetime import datetime, date, timezone >>> import polars.selectors as cs + >>> from zoneinfo import ZoneInfo + >>> tokyo_tz = ZoneInfo("Asia/Tokyo") + >>> utc_tz = timezone.utc >>> df = pl.DataFrame( ... { ... "tstamp_tokyo": [ - ... datetime(1999, 7, 21, 5, 20, 16, 987654), - ... datetime(2000, 5, 16, 6, 21, 21, 123465), + ... datetime(1999, 7, 21, 5, 20, 16, 987654, tzinfo=tokyo_tz), + ... datetime(2000, 5, 16, 6, 21, 21, 123465, tzinfo=tokyo_tz), ... ], ... "tstamp_utc": [ - ... datetime(2023, 4, 10, 12, 14, 16, 999000), - ... datetime(2025, 8, 25, 14, 18, 22, 666000), + ... datetime(2023, 4, 10, 12, 14, 16, 999000, tzinfo=utc_tz), + ... datetime(2025, 8, 25, 14, 18, 22, 666000, tzinfo=utc_tz), ... ], ... "tstamp": [ ... datetime(2000, 11, 20, 18, 12, 16, 600000), diff --git a/py-polars/polars/testing/parametric/strategies/core.py b/py-polars/polars/testing/parametric/strategies/core.py index df4d27e500b0..d5996d5f9b9d 100644 --- a/py-polars/polars/testing/parametric/strategies/core.py +++ b/py-polars/polars/testing/parametric/strategies/core.py @@ -1,5 +1,6 @@ from __future__ import annotations +import warnings from dataclasses import dataclass from typing import TYPE_CHECKING, Any, Collection, Mapping, Sequence, overload @@ -9,6 +10,7 @@ from polars._utils.deprecation import issue_deprecation_warning from polars.dataframe import DataFrame from polars.datatypes import DataType, DataTypeClass, Null +from polars.exceptions import TimeZoneAwareConstructorWarning from polars.series import Series from polars.string_cache import StringCache from polars.testing.parametric.strategies._utils import flexhash @@ -203,7 +205,13 @@ def series( # noqa: D417 ) ) - s = Series(name=name, values=values, dtype=dtype) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "Constructing a Series with time-zone-naive", + category=TimeZoneAwareConstructorWarning, + ) + s = Series(name=name, values=values, dtype=dtype) # Apply chunking if allow_chunks and size > 1 and draw(st.booleans()): diff --git a/py-polars/tests/unit/constructors/test_constructors.py b/py-polars/tests/unit/constructors/test_constructors.py index 0e0f626beb2a..0910b2204f29 100644 --- a/py-polars/tests/unit/constructors/test_constructors.py +++ b/py-polars/tests/unit/constructors/test_constructors.py @@ -897,17 +897,13 @@ def test_init_1d_sequence() -> None: [datetime(2020, 1, 1, tzinfo=timezone.utc)], schema={"ts": pl.Datetime("ms")} ) assert df.schema == {"ts": pl.Datetime("ms", "UTC")} - with pytest.warns( - TimeZoneAwareConstructorWarning, match="Series with UTC time zone" - ): + with pytest.warns(TimeZoneAwareConstructorWarning, match="converted to UTC"): df = pl.DataFrame( [datetime(2020, 1, 1, tzinfo=timezone(timedelta(hours=1)))], schema={"ts": pl.Datetime("ms")}, ) assert df.schema == {"ts": pl.Datetime("ms", "UTC")} - with pytest.warns( - TimeZoneAwareConstructorWarning, match="Series with UTC time zone" - ): + with pytest.warns(TimeZoneAwareConstructorWarning, match="converted to UTC"): df = pl.DataFrame( [datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kathmandu"))], schema={"ts": pl.Datetime("ms")}, diff --git a/py-polars/tests/unit/constructors/test_series.py b/py-polars/tests/unit/constructors/test_series.py index 2135a61df88a..73a27afe5853 100644 --- a/py-polars/tests/unit/constructors/test_series.py +++ b/py-polars/tests/unit/constructors/test_series.py @@ -105,11 +105,15 @@ def test_series_init_ambiguous_datetime() -> None: value = datetime(2001, 10, 28, 2) dtype = pl.Datetime(time_zone="Europe/Belgrade") - with pytest.raises(pl.ComputeError, match="ambiguous"): - pl.Series([value], dtype=dtype, strict=True) + with pytest.warns(pl.TimeZoneAwareConstructorWarning, match="converted to"): + result = pl.Series([value], dtype=dtype, strict=True) + expected = pl.Series([datetime(2001, 10, 28, 3)]).dt.replace_time_zone( + "Europe/Belgrade" + ) + assert_series_equal(result, expected) - result = pl.Series([value], dtype=dtype, strict=False) - expected = pl.Series([None], dtype=dtype) + with pytest.warns(pl.TimeZoneAwareConstructorWarning, match="converted to"): + result = pl.Series([value], dtype=dtype, strict=False) assert_series_equal(result, expected) @@ -117,11 +121,18 @@ def test_series_init_nonexistent_datetime() -> None: value = datetime(2024, 3, 31, 2, 30) dtype = pl.Datetime(time_zone="Europe/Amsterdam") - with pytest.raises(pl.ComputeError, match="non-existent"): + with pytest.warns( + pl.TimeZoneAwareConstructorWarning, match="converted to the given time zone" + ): pl.Series([value], dtype=dtype, strict=True) - result = pl.Series([value], dtype=dtype, strict=False) - expected = pl.Series([None], dtype=dtype) + with pytest.warns( + pl.TimeZoneAwareConstructorWarning, match="converted to the given time zone" + ): + result = pl.Series([value], dtype=dtype, strict=False) + expected = pl.Series([datetime(2024, 3, 31, 4, 30)]).dt.replace_time_zone( + "Europe/Amsterdam" + ) assert_series_equal(result, expected) diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index f9f78d1a9bc1..ae8220733419 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -2427,7 +2427,10 @@ def test_init_datetimes_with_timezone() -> None: }, ): result = pl.DataFrame( # type: ignore[arg-type] - data={"d1": [dtm], "d2": [dtm]}, + data={ + "d1": [dtm.replace(tzinfo=ZoneInfo(tz_us))], + "d2": [dtm.replace(tzinfo=ZoneInfo(tz_europe))], + }, **type_overrides, ) expected = pl.DataFrame( @@ -2476,7 +2479,7 @@ def test_init_vs_strptime_consistency( expected_item: datetime, warn: bool, ) -> None: - msg = r"UTC time zone" + msg = r"converted to UTC" context_manager: contextlib.AbstractContextManager[pytest.WarningsRecorder | None] if warn: context_manager = pytest.warns(TimeZoneAwareConstructorWarning, match=msg) @@ -2497,11 +2500,11 @@ def test_init_vs_strptime_consistency( def test_init_vs_strptime_consistency_raises() -> None: msg = "-aware datetimes are converted to UTC" - with pytest.raises(ValueError, match=msg): - pl.Series( - [datetime(2020, 1, 1, tzinfo=timezone(timedelta(hours=-8)))], - dtype=pl.Datetime("us", "US/Pacific"), - ) + result = pl.Series( + [datetime(2020, 1, 1, tzinfo=timezone(timedelta(hours=-8)))], + dtype=pl.Datetime("us", "US/Pacific"), + ).item() + assert result == datetime(2020, 1, 1, 0, 0, tzinfo=ZoneInfo(key="US/Pacific")) with pytest.raises(ComputeError, match=msg): pl.Series(["2020-01-01 00:00-08:00"]).str.strptime( pl.Datetime("us", "US/Pacific") diff --git a/py-polars/tests/unit/datatypes/test_temporal.py b/py-polars/tests/unit/datatypes/test_temporal.py index 85762f216438..03ce62c4e8a6 100644 --- a/py-polars/tests/unit/datatypes/test_temporal.py +++ b/py-polars/tests/unit/datatypes/test_temporal.py @@ -340,9 +340,7 @@ def test_datetime_consistency() -> None: datetime(3099, 12, 31, 23, 59, 59, 123456, tzinfo=ZoneInfo("Asia/Kathmandu")), datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=ZoneInfo("Asia/Kathmandu")), ] - with pytest.warns( - TimeZoneAwareConstructorWarning, match="Series with UTC time zone" - ): + with pytest.warns(TimeZoneAwareConstructorWarning, match="converted to UTC"): ddf = pl.DataFrame({"dtm": test_data}).with_columns( pl.col("dtm").dt.nanosecond().alias("ns") ) @@ -359,9 +357,7 @@ def test_datetime_consistency() -> None: datetime(2021, 11, 7, 1, 0, fold=1, tzinfo=ZoneInfo("US/Central")), datetime(2021, 11, 7, 2, 0, tzinfo=ZoneInfo("US/Central")), ] - with pytest.warns( - TimeZoneAwareConstructorWarning, match="Series with UTC time zone" - ): + with pytest.warns(TimeZoneAwareConstructorWarning, match="converted to UTC"): ddf = pl.DataFrame({"dtm": test_data}).select( pl.col("dtm").dt.convert_time_zone("US/Central") ) @@ -1135,8 +1131,8 @@ def test_replace_time_zone_non_existent_null() -> None: .str.to_datetime() .dt.replace_time_zone("Europe/Warsaw", non_existent="null") ) - expected = pl.Series( - [None, datetime(2021, 3, 28, 3, 30)], dtype=pl.Datetime("us", "Europe/Warsaw") + expected = pl.Series([None, datetime(2021, 3, 28, 3, 30)]).dt.replace_time_zone( + "Europe/Warsaw" ) assert_series_equal(result, expected) @@ -1281,9 +1277,7 @@ def test_tz_datetime_duration_arithm_5221() -> None: def test_auto_infer_time_zone() -> None: dt = datetime(2022, 10, 17, 10, tzinfo=ZoneInfo("Asia/Shanghai")) - with pytest.warns( - TimeZoneAwareConstructorWarning, match="Series with UTC time zone" - ): + with pytest.warns(TimeZoneAwareConstructorWarning, match="converted to UTC"): s = pl.Series([dt]) assert s.dtype == pl.Datetime("us", "UTC") assert s[0] == dt @@ -2350,7 +2344,7 @@ def test_series_is_temporal() -> None: ) def test_misc_precision_any_value_conversion(time_zone: Any, warn: bool) -> None: context_manager: contextlib.AbstractContextManager[pytest.WarningsRecorder | None] - msg = r"UTC time zone" + msg = r"converted to UTC" if warn: context_manager = pytest.warns(TimeZoneAwareConstructorWarning, match=msg) else: diff --git a/py-polars/tests/unit/functions/range/test_datetime_range.py b/py-polars/tests/unit/functions/range/test_datetime_range.py index eebe12e2e176..2b2252e18ee7 100644 --- a/py-polars/tests/unit/functions/range/test_datetime_range.py +++ b/py-polars/tests/unit/functions/range/test_datetime_range.py @@ -107,7 +107,7 @@ def test_datetime_range_invalid_time_unit() -> None: def test_datetime_range_lazy_time_zones_warning() -> None: start = datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kathmandu")) stop = datetime(2020, 1, 2, tzinfo=ZoneInfo("Asia/Kathmandu")) - with pytest.warns(TimeZoneAwareConstructorWarning, match="Series with UTC"): + with pytest.warns(TimeZoneAwareConstructorWarning, match="converted to UTC"): ( pl.DataFrame({"start": [start], "stop": [stop]}) .with_columns( diff --git a/py-polars/tests/unit/io/test_delta.py b/py-polars/tests/unit/io/test_delta.py index ca939b2dc153..c5cc44da21c1 100644 --- a/py-polars/tests/unit/io/test_delta.py +++ b/py-polars/tests/unit/io/test_delta.py @@ -269,10 +269,8 @@ def test_write_delta_overwrite_schema_deprecated( dtype=pl.List(pl.List(pl.List(pl.List(pl.UInt16)))), ), pl.Series( - "date_ns", - [datetime(2010, 1, 1, 0, 0)], - dtype=pl.Datetime(time_unit="ns", time_zone="Australia/Lord_Howe"), - ), + "date_ns", [datetime(2010, 1, 1, 0, 0)], dtype=pl.Datetime(time_unit="ns") + ).dt.replace_time_zone("Australia/Lord_Howe"), pl.Series( "date_us", [datetime(2010, 1, 1, 0, 0)], diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_add_business_days.py b/py-polars/tests/unit/operations/namespaces/temporal/test_add_business_days.py index fb86302fc2a4..4fe1235db775 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_add_business_days.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_add_business_days.py @@ -1,6 +1,7 @@ from __future__ import annotations import datetime as dt +import sys from datetime import date, datetime, timedelta from typing import TYPE_CHECKING @@ -10,11 +11,19 @@ from hypothesis import assume, given import polars as pl +from polars.dependencies import _ZONEINFO_AVAILABLE from polars.testing import assert_series_equal if TYPE_CHECKING: from polars.type_aliases import Roll, TimeUnit +if sys.version_info >= (3, 9): + from zoneinfo import ZoneInfo +elif _ZONEINFO_AVAILABLE: + # Import from submodule due to typing issue with backports.zoneinfo package: + # https://github.com/pganssle/zoneinfo/issues/125 + from backports.zoneinfo._zoneinfo import ZoneInfo + def test_add_business_days() -> None: # (Expression, expression) @@ -173,8 +182,14 @@ def test_add_business_days_w_roll() -> None: @pytest.mark.parametrize("time_zone", [None, "Europe/London", "Asia/Kathmandu"]) @pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) def test_add_business_days_datetime(time_zone: str | None, time_unit: TimeUnit) -> None: + tzinfo = ZoneInfo(time_zone) if time_zone is not None else None df = pl.DataFrame( - {"start": [datetime(2020, 3, 28, 1), datetime(2020, 1, 10, 4)]}, + { + "start": [ + datetime(2020, 3, 28, 1, tzinfo=tzinfo), + datetime(2020, 1, 10, 4, tzinfo=tzinfo), + ] + }, schema={"start": pl.Datetime(time_unit, time_zone)}, ) result = df.select( @@ -183,8 +198,8 @@ def test_add_business_days_datetime(time_zone: str | None, time_unit: TimeUnit) expected = pl.Series( "result", [datetime(2020, 3, 30, 1), datetime(2020, 1, 12, 4)], - pl.Datetime(time_unit, time_zone), - ) + pl.Datetime(time_unit), + ).dt.replace_time_zone(time_zone) assert_series_equal(result, expected) with pytest.raises(pl.ComputeError, match="is not a business date"): diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py index 4f9643aa5c61..88946503dafd 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py @@ -918,7 +918,13 @@ def test_offset_by_expressions() -> None: { "c": [None, None, datetime(2020, 10, 26), datetime(2021, 1, 12), None], "d": [None, None, datetime(2020, 10, 26), datetime(2021, 1, 12), None], - "e": [None, None, datetime(2020, 10, 26), datetime(2021, 1, 12), None], + "e": [ + None, + None, + datetime(2020, 10, 26, tzinfo=ZoneInfo("Europe/London")), + datetime(2021, 1, 12, tzinfo=ZoneInfo("Europe/London")), + None, + ], "f": [None, None, date(2020, 10, 26), date(2021, 1, 12), None], }, schema_overrides={ diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_offset_by.py b/py-polars/tests/unit/operations/namespaces/temporal/test_offset_by.py index 9f14679cedc1..072faa175c8a 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_offset_by.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_offset_by.py @@ -107,8 +107,12 @@ def test_datetime_offset_by( time_unit: TimeUnit, time_zone: str | None, ) -> None: - result = pl.Series(inputs, dtype=pl.Datetime(time_unit, time_zone)).dt.offset_by( - offset + result = ( + pl.Series(inputs, dtype=pl.Datetime(time_unit)) + .dt.replace_time_zone(time_zone) + .dt.offset_by(offset) + ) + expected = pl.Series(outputs, dtype=pl.Datetime(time_unit)).dt.replace_time_zone( + time_zone ) - expected = pl.Series(outputs, dtype=pl.Datetime(time_unit, time_zone)) assert_series_equal(result, expected) diff --git a/py-polars/tests/unit/operations/namespaces/test_strptime.py b/py-polars/tests/unit/operations/namespaces/test_strptime.py index ab49e64dbb88..058e343b529e 100644 --- a/py-polars/tests/unit/operations/namespaces/test_strptime.py +++ b/py-polars/tests/unit/operations/namespaces/test_strptime.py @@ -176,7 +176,7 @@ def test_non_exact_short_elements_10223(value: str, attr: str) -> None: def test_to_datetime_non_exact_strptime( offset: str, time_zone: str | None, tzinfo: timezone | None, format: str ) -> None: - msg = "Series with UTC time zone" + msg = "converted to UTC" context_manager: contextlib.AbstractContextManager[pytest.WarningsRecorder | None] if offset: context_manager = pytest.warns(TimeZoneAwareConstructorWarning, match=msg) @@ -692,9 +692,7 @@ def test_to_datetime_naive_format_and_time_zone() -> None: result = pl.Series(["2020-01-01"]).str.to_datetime( format="%Y-%m-%d", time_zone="Asia/Kathmandu" ) - expected = pl.Series( - [datetime(2020, 1, 1)], dtype=pl.Datetime("us", "Asia/Kathmandu") - ) + expected = pl.Series([datetime(2020, 1, 1)]).dt.replace_time_zone("Asia/Kathmandu") assert_series_equal(result, expected) # format-inferred path result = pl.Series(["2020-01-01"]).str.to_datetime(time_zone="Asia/Kathmandu") diff --git a/py-polars/tests/unit/operations/test_ewm_by.py b/py-polars/tests/unit/operations/test_ewm_by.py index 0f806f262bbe..180e5f7e106e 100644 --- a/py-polars/tests/unit/operations/test_ewm_by.py +++ b/py-polars/tests/unit/operations/test_ewm_by.py @@ -1,16 +1,25 @@ from __future__ import annotations +import sys from datetime import date, datetime, timedelta from typing import TYPE_CHECKING import pytest import polars as pl +from polars.dependencies import _ZONEINFO_AVAILABLE from polars.testing import assert_frame_equal, assert_series_equal if TYPE_CHECKING: from polars.type_aliases import PolarsIntegerType, TimeUnit +if sys.version_info >= (3, 9): + from zoneinfo import ZoneInfo +elif _ZONEINFO_AVAILABLE: + # Import from submodule due to typing issue with backports.zoneinfo package: + # https://github.com/pganssle/zoneinfo/issues/125 + from backports.zoneinfo._zoneinfo import ZoneInfo + @pytest.mark.parametrize("sort", [True, False]) def test_ewma_by_date(sort: bool) -> None: @@ -110,15 +119,16 @@ def test_ewma_by_datetime(time_unit: TimeUnit, time_zone: str | None) -> None: @pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) def test_ewma_by_datetime_tz_aware(time_unit: TimeUnit) -> None: + tzinfo = ZoneInfo("Asia/Kathmandu") df = pl.DataFrame( { "values": [3.0, 1.0, 2.0, None, 4.0], "times": [ None, - datetime(2020, 1, 4), - datetime(2020, 1, 11), - datetime(2020, 1, 16), - datetime(2020, 1, 18), + datetime(2020, 1, 4, tzinfo=tzinfo), + datetime(2020, 1, 11, tzinfo=tzinfo), + datetime(2020, 1, 16, tzinfo=tzinfo), + datetime(2020, 1, 18, tzinfo=tzinfo), ], }, schema_overrides={"times": pl.Datetime(time_unit, "Asia/Kathmandu")}, diff --git a/py-polars/tests/unit/operations/test_interpolate.py b/py-polars/tests/unit/operations/test_interpolate.py index 904fe23b41a8..e3e9b45ff71a 100644 --- a/py-polars/tests/unit/operations/test_interpolate.py +++ b/py-polars/tests/unit/operations/test_interpolate.py @@ -1,16 +1,25 @@ from __future__ import annotations +import sys from datetime import date, datetime, time, timedelta from typing import TYPE_CHECKING, Any import pytest import polars as pl +from polars.dependencies import _ZONEINFO_AVAILABLE from polars.testing import assert_frame_equal if TYPE_CHECKING: from polars.type_aliases import PolarsDataType, PolarsTemporalType +if sys.version_info >= (3, 9): + from zoneinfo import ZoneInfo +elif _ZONEINFO_AVAILABLE: + # Import from submodule due to typing issue with backports.zoneinfo package: + # https://github.com/pganssle/zoneinfo/issues/125 + from backports.zoneinfo._zoneinfo import ZoneInfo + @pytest.mark.parametrize( ("input_dtype", "output_dtype"), @@ -53,9 +62,17 @@ def test_interpolate_linear( [datetime(2020, 1, 1), datetime(2020, 1, 1, 12), datetime(2020, 1, 2)], ), ( - [datetime(2020, 1, 1), None, datetime(2020, 1, 2)], + [ + datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kathmandu")), + None, + datetime(2020, 1, 2, tzinfo=ZoneInfo("Asia/Kathmandu")), + ], pl.Datetime("us", "Asia/Kathmandu"), - [datetime(2020, 1, 1), datetime(2020, 1, 1, 12), datetime(2020, 1, 2)], + [ + datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kathmandu")), + datetime(2020, 1, 1, 12, tzinfo=ZoneInfo("Asia/Kathmandu")), + datetime(2020, 1, 2, tzinfo=ZoneInfo("Asia/Kathmandu")), + ], ), ([time(1), None, time(2)], pl.Time, [time(1), time(1, 30), time(2)]), ( @@ -112,9 +129,17 @@ def test_interpolate_nearest(input_dtype: PolarsDataType) -> None: [datetime(2020, 1, 1), datetime(2020, 1, 2), datetime(2020, 1, 2)], ), ( - [datetime(2020, 1, 1), None, datetime(2020, 1, 2)], + [ + datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kathmandu")), + None, + datetime(2020, 1, 2, tzinfo=ZoneInfo("Asia/Kathmandu")), + ], pl.Datetime("us", "Asia/Kathmandu"), - [datetime(2020, 1, 1), datetime(2020, 1, 2), datetime(2020, 1, 2)], + [ + datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kathmandu")), + datetime(2020, 1, 2, tzinfo=ZoneInfo("Asia/Kathmandu")), + datetime(2020, 1, 2, tzinfo=ZoneInfo("Asia/Kathmandu")), + ], ), ([time(1), None, time(2)], pl.Time, [time(1), time(2), time(2)]), ( diff --git a/py-polars/tests/unit/series/buffers/test_from_buffers.py b/py-polars/tests/unit/series/buffers/test_from_buffers.py index 1c2c64c9a84e..24cdbc130eb2 100644 --- a/py-polars/tests/unit/series/buffers/test_from_buffers.py +++ b/py-polars/tests/unit/series/buffers/test_from_buffers.py @@ -1,6 +1,7 @@ from __future__ import annotations from datetime import datetime +from typing import TYPE_CHECKING import pytest from hypothesis import given @@ -20,6 +21,12 @@ {pl.Date, pl.Time} | pl.DURATION_DTYPES | DATETIME_DTYPES ) +if TYPE_CHECKING: + from zoneinfo import ZoneInfo + +else: + from polars._utils.convert import string_to_zoneinfo as ZoneInfo + @given( s=series( @@ -89,11 +96,12 @@ def test_series_from_buffers_boolean() -> None: def test_series_from_buffers_datetime() -> None: dtype = pl.Datetime(time_zone="Europe/Amsterdam") + tzinfo = ZoneInfo("Europe/Amsterdam") data = pl.Series( [ - datetime(2022, 2, 10, 6), - datetime(2022, 2, 11, 12), - datetime(2022, 2, 12, 18), + datetime(2022, 2, 10, 6, tzinfo=tzinfo), + datetime(2022, 2, 11, 12, tzinfo=tzinfo), + datetime(2022, 2, 12, 18, tzinfo=tzinfo), ], dtype=dtype, ).cast(pl.Int64) @@ -103,9 +111,9 @@ def test_series_from_buffers_datetime() -> None: expected = pl.Series( [ - datetime(2022, 2, 10, 6), + datetime(2022, 2, 10, 6, tzinfo=tzinfo), None, - datetime(2022, 2, 12, 18), + datetime(2022, 2, 12, 18, tzinfo=tzinfo), ], dtype=dtype, ) diff --git a/py-polars/tests/unit/series/test_series.py b/py-polars/tests/unit/series/test_series.py index 87409793d7fb..ca61e4d2a31c 100644 --- a/py-polars/tests/unit/series/test_series.py +++ b/py-polars/tests/unit/series/test_series.py @@ -131,7 +131,9 @@ def test_init_inputs(monkeypatch: Any) -> None: # conversion of Date to Datetime with specified timezone and units tu: TimeUnit = "ms" tz = "America/Argentina/Rio_Gallegos" - s = pl.Series([date(2023, 1, 1), date(2023, 1, 2)], dtype=pl.Datetime(tu, tz)) + s = pl.Series( + [date(2023, 1, 1), date(2023, 1, 2)], dtype=pl.Datetime(tu) + ).dt.replace_time_zone(tz) d1 = datetime(2023, 1, 1, 0, 0, 0, 0, ZoneInfo(tz)) d2 = datetime(2023, 1, 2, 0, 0, 0, 0, ZoneInfo(tz)) assert s.to_list() == [d1, d2]