Skip to content

Commit

Permalink
feat(python!): consistently convert to given time zone in Series cons…
Browse files Browse the repository at this point in the history
…tructor
  • Loading branch information
MarcoGorelli committed Jun 9, 2024
1 parent c81ef69 commit 5cb989b
Show file tree
Hide file tree
Showing 18 changed files with 197 additions and 94 deletions.
2 changes: 2 additions & 0 deletions py-polars/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@
SQLInterfaceError,
SQLSyntaxError,
StructFieldNotFoundError,
TimeZoneAwareConstructorWarning,
UnstableWarning,
)
from polars.expr import Expr
Expand Down Expand Up @@ -246,6 +247,7 @@
"ChronoFormatWarning",
"MapWithoutReturnDtypeWarning",
"PolarsWarning",
"TimeZoneAwareConstructorWarning",
"UnstableWarning",
# core classes
"DataFrame",
Expand Down
72 changes: 46 additions & 26 deletions py-polars/polars/_utils/construction/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,30 @@
from polars.dependencies import pandas as pd
from polars.type_aliases import PolarsDataType

TZ_NAIVE_VALUES_WITH_TZ_AWARE_DTYPE_MSG = (
"Constructing a Series with time-zone-naive "
"datetimes and a time-zone-aware dtype results in a Series where "
"the datetimes are converted to the given time zone as if starting "
"from UTC.\n\n"
"Note: this is a breaking change since pre-1.0.0 behaviour.\n\n"
"Hint: to silence this warning, you can filter out "
"warnings of class pl.TimeZoneAwareConstructorWarning.\n"
"Alternatively, you can replace "
"`pl.Series(values, dtype=pl.Datetime({}, {}))` with one of:\n"
"- `pl.Series(values, dtype=pl.Datetime(time_unit)).dt.replace_time_zone(time_zone)`\n"
"- `pl.Series(values, dtype=pl.Datetime(time_unit)).dt.convert_time_zone(time_zone)`\n"
"depending on whether you want to replace or convert the time zone."
)
TZ_AWARE_VALUES_WITH_TZ_NAIVE_DTYPE_MSG = (
"Constructing a Series with time-zone-aware "
"datetimes and a time-zone-naive dtype results in a Series where "
"the datetimes are converted to UTC.\n\n"
"Hint: to silence this warning, you can filter out "
"warnings of class pl.TimeZoneAwareConstructorWarning.\n"
"Alternatively, you can set the time zone in the `dtype`, e.g.:\n"
" pl.Series(values, dtype=pl.Datetime({}, 'UTC'))`"
)


def sequence_to_pyseries(
name: str,
Expand Down Expand Up @@ -203,41 +227,37 @@ def sequence_to_pyseries(
s = wrap_s(py_series).dt.cast_time_unit(time_unit)

if (values_dtype == Date) & (dtype == Datetime):
return (
s.cast(Datetime(time_unit or "us"))
.dt.replace_time_zone(
time_zone,
ambiguous="raise" if strict else "null",
non_existent="raise" if strict else "null",
)
._s
)
result = s.cast(Datetime(time_unit or "us"))
if time_zone is not None:
if time_zone != "UTC":
warnings.warn(
TZ_NAIVE_VALUES_WITH_TZ_AWARE_DTYPE_MSG.format(
time_unit or "us", time_zone
),
TimeZoneAwareConstructorWarning,
stacklevel=find_stacklevel(),
)
result = result.dt.convert_time_zone(time_zone)
return result._s

if (dtype == Datetime) and (value.tzinfo is not None or time_zone is not None):
values_tz = str(value.tzinfo) if value.tzinfo is not None else None
dtype_tz = time_zone
if values_tz is not None and (dtype_tz is not None and dtype_tz != "UTC"):
msg = (
"time-zone-aware datetimes are converted to UTC"
"\n\nPlease either drop the time zone from the dtype, or set it to 'UTC'."
" To convert to a different time zone, please use `.dt.convert_time_zone`."
if values_tz is not None and dtype_tz is None and values_tz != "UTC":
warnings.warn(
TZ_AWARE_VALUES_WITH_TZ_NAIVE_DTYPE_MSG.format(time_unit or "us"),
TimeZoneAwareConstructorWarning,
stacklevel=find_stacklevel(),
)
raise ValueError(msg)
if values_tz != "UTC" and dtype_tz is None:
if values_tz is None and dtype_tz is not None and dtype_tz != "UTC":
warnings.warn(
"Constructing a Series with time-zone-aware "
"datetimes results in a Series with UTC time zone. "
"To silence this warning, you can filter "
"warnings of class TimeZoneAwareConstructorWarning, or "
"set 'UTC' as the time zone of your datatype.",
TZ_NAIVE_VALUES_WITH_TZ_AWARE_DTYPE_MSG.format(
time_unit or "us", time_zone
),
TimeZoneAwareConstructorWarning,
stacklevel=find_stacklevel(),
)
return s.dt.replace_time_zone(
dtype_tz or "UTC",
ambiguous="raise" if strict else "null",
non_existent="raise" if strict else "null",
)._s
return s.dt.convert_time_zone(dtype_tz or "UTC")._s
return s._s

elif (
Expand Down
13 changes: 8 additions & 5 deletions py-polars/polars/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1307,17 +1307,20 @@ def datetime(
Examples
--------
>>> from datetime import datetime, date
>>> from datetime import datetime, date, timezone
>>> import polars.selectors as cs
>>> from zoneinfo import ZoneInfo
>>> tokyo_tz = ZoneInfo("Asia/Tokyo")
>>> utc_tz = timezone.utc
>>> df = pl.DataFrame(
... {
... "tstamp_tokyo": [
... datetime(1999, 7, 21, 5, 20, 16, 987654),
... datetime(2000, 5, 16, 6, 21, 21, 123465),
... datetime(1999, 7, 21, 5, 20, 16, 987654, tzinfo=tokyo_tz),
... datetime(2000, 5, 16, 6, 21, 21, 123465, tzinfo=tokyo_tz),
... ],
... "tstamp_utc": [
... datetime(2023, 4, 10, 12, 14, 16, 999000),
... datetime(2025, 8, 25, 14, 18, 22, 666000),
... datetime(2023, 4, 10, 12, 14, 16, 999000, tzinfo=utc_tz),
... datetime(2025, 8, 25, 14, 18, 22, 666000, tzinfo=utc_tz),
... ],
... "tstamp": [
... datetime(2000, 11, 20, 18, 12, 16, 600000),
Expand Down
10 changes: 9 additions & 1 deletion py-polars/polars/testing/parametric/strategies/core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import warnings
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Collection, Mapping, Sequence, overload

Expand All @@ -9,6 +10,7 @@
from polars._utils.deprecation import issue_deprecation_warning
from polars.dataframe import DataFrame
from polars.datatypes import DataType, DataTypeClass, Null
from polars.exceptions import TimeZoneAwareConstructorWarning
from polars.series import Series
from polars.string_cache import StringCache
from polars.testing.parametric.strategies._utils import flexhash
Expand Down Expand Up @@ -203,7 +205,13 @@ def series( # noqa: D417
)
)

s = Series(name=name, values=values, dtype=dtype)
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"Constructing a Series with time-zone-naive",
category=TimeZoneAwareConstructorWarning,
)
s = Series(name=name, values=values, dtype=dtype)

# Apply chunking
if allow_chunks and size > 1 and draw(st.booleans()):
Expand Down
8 changes: 2 additions & 6 deletions py-polars/tests/unit/constructors/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -897,17 +897,13 @@ def test_init_1d_sequence() -> None:
[datetime(2020, 1, 1, tzinfo=timezone.utc)], schema={"ts": pl.Datetime("ms")}
)
assert df.schema == {"ts": pl.Datetime("ms", "UTC")}
with pytest.warns(
TimeZoneAwareConstructorWarning, match="Series with UTC time zone"
):
with pytest.warns(TimeZoneAwareConstructorWarning, match="converted to UTC"):
df = pl.DataFrame(
[datetime(2020, 1, 1, tzinfo=timezone(timedelta(hours=1)))],
schema={"ts": pl.Datetime("ms")},
)
assert df.schema == {"ts": pl.Datetime("ms", "UTC")}
with pytest.warns(
TimeZoneAwareConstructorWarning, match="Series with UTC time zone"
):
with pytest.warns(TimeZoneAwareConstructorWarning, match="converted to UTC"):
df = pl.DataFrame(
[datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kathmandu"))],
schema={"ts": pl.Datetime("ms")},
Expand Down
25 changes: 18 additions & 7 deletions py-polars/tests/unit/constructors/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,23 +105,34 @@ def test_series_init_ambiguous_datetime() -> None:
value = datetime(2001, 10, 28, 2)
dtype = pl.Datetime(time_zone="Europe/Belgrade")

with pytest.raises(pl.ComputeError, match="ambiguous"):
pl.Series([value], dtype=dtype, strict=True)
with pytest.warns(pl.TimeZoneAwareConstructorWarning, match="converted to"):
result = pl.Series([value], dtype=dtype, strict=True)
expected = pl.Series([datetime(2001, 10, 28, 3)]).dt.replace_time_zone(
"Europe/Belgrade"
)
assert_series_equal(result, expected)

result = pl.Series([value], dtype=dtype, strict=False)
expected = pl.Series([None], dtype=dtype)
with pytest.warns(pl.TimeZoneAwareConstructorWarning, match="converted to"):
result = pl.Series([value], dtype=dtype, strict=False)
assert_series_equal(result, expected)


def test_series_init_nonexistent_datetime() -> None:
value = datetime(2024, 3, 31, 2, 30)
dtype = pl.Datetime(time_zone="Europe/Amsterdam")

with pytest.raises(pl.ComputeError, match="non-existent"):
with pytest.warns(
pl.TimeZoneAwareConstructorWarning, match="converted to the given time zone"
):
pl.Series([value], dtype=dtype, strict=True)

result = pl.Series([value], dtype=dtype, strict=False)
expected = pl.Series([None], dtype=dtype)
with pytest.warns(
pl.TimeZoneAwareConstructorWarning, match="converted to the given time zone"
):
result = pl.Series([value], dtype=dtype, strict=False)
expected = pl.Series([datetime(2024, 3, 31, 4, 30)]).dt.replace_time_zone(
"Europe/Amsterdam"
)
assert_series_equal(result, expected)


Expand Down
17 changes: 10 additions & 7 deletions py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -2427,7 +2427,10 @@ def test_init_datetimes_with_timezone() -> None:
},
):
result = pl.DataFrame( # type: ignore[arg-type]
data={"d1": [dtm], "d2": [dtm]},
data={
"d1": [dtm.replace(tzinfo=ZoneInfo(tz_us))],
"d2": [dtm.replace(tzinfo=ZoneInfo(tz_europe))],
},
**type_overrides,
)
expected = pl.DataFrame(
Expand Down Expand Up @@ -2476,7 +2479,7 @@ def test_init_vs_strptime_consistency(
expected_item: datetime,
warn: bool,
) -> None:
msg = r"UTC time zone"
msg = r"converted to UTC"
context_manager: contextlib.AbstractContextManager[pytest.WarningsRecorder | None]
if warn:
context_manager = pytest.warns(TimeZoneAwareConstructorWarning, match=msg)
Expand All @@ -2497,11 +2500,11 @@ def test_init_vs_strptime_consistency(

def test_init_vs_strptime_consistency_raises() -> None:
msg = "-aware datetimes are converted to UTC"
with pytest.raises(ValueError, match=msg):
pl.Series(
[datetime(2020, 1, 1, tzinfo=timezone(timedelta(hours=-8)))],
dtype=pl.Datetime("us", "US/Pacific"),
)
result = pl.Series(
[datetime(2020, 1, 1, tzinfo=timezone(timedelta(hours=-8)))],
dtype=pl.Datetime("us", "US/Pacific"),
).item()
assert result == datetime(2020, 1, 1, 0, 0, tzinfo=ZoneInfo(key="US/Pacific"))
with pytest.raises(ComputeError, match=msg):
pl.Series(["2020-01-01 00:00-08:00"]).str.strptime(
pl.Datetime("us", "US/Pacific")
Expand Down
18 changes: 6 additions & 12 deletions py-polars/tests/unit/datatypes/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,9 +340,7 @@ def test_datetime_consistency() -> None:
datetime(3099, 12, 31, 23, 59, 59, 123456, tzinfo=ZoneInfo("Asia/Kathmandu")),
datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=ZoneInfo("Asia/Kathmandu")),
]
with pytest.warns(
TimeZoneAwareConstructorWarning, match="Series with UTC time zone"
):
with pytest.warns(TimeZoneAwareConstructorWarning, match="converted to UTC"):
ddf = pl.DataFrame({"dtm": test_data}).with_columns(
pl.col("dtm").dt.nanosecond().alias("ns")
)
Expand All @@ -359,9 +357,7 @@ def test_datetime_consistency() -> None:
datetime(2021, 11, 7, 1, 0, fold=1, tzinfo=ZoneInfo("US/Central")),
datetime(2021, 11, 7, 2, 0, tzinfo=ZoneInfo("US/Central")),
]
with pytest.warns(
TimeZoneAwareConstructorWarning, match="Series with UTC time zone"
):
with pytest.warns(TimeZoneAwareConstructorWarning, match="converted to UTC"):
ddf = pl.DataFrame({"dtm": test_data}).select(
pl.col("dtm").dt.convert_time_zone("US/Central")
)
Expand Down Expand Up @@ -1135,8 +1131,8 @@ def test_replace_time_zone_non_existent_null() -> None:
.str.to_datetime()
.dt.replace_time_zone("Europe/Warsaw", non_existent="null")
)
expected = pl.Series(
[None, datetime(2021, 3, 28, 3, 30)], dtype=pl.Datetime("us", "Europe/Warsaw")
expected = pl.Series([None, datetime(2021, 3, 28, 3, 30)]).dt.replace_time_zone(
"Europe/Warsaw"
)
assert_series_equal(result, expected)

Expand Down Expand Up @@ -1281,9 +1277,7 @@ def test_tz_datetime_duration_arithm_5221() -> None:

def test_auto_infer_time_zone() -> None:
dt = datetime(2022, 10, 17, 10, tzinfo=ZoneInfo("Asia/Shanghai"))
with pytest.warns(
TimeZoneAwareConstructorWarning, match="Series with UTC time zone"
):
with pytest.warns(TimeZoneAwareConstructorWarning, match="converted to UTC"):
s = pl.Series([dt])
assert s.dtype == pl.Datetime("us", "UTC")
assert s[0] == dt
Expand Down Expand Up @@ -2350,7 +2344,7 @@ def test_series_is_temporal() -> None:
)
def test_misc_precision_any_value_conversion(time_zone: Any, warn: bool) -> None:
context_manager: contextlib.AbstractContextManager[pytest.WarningsRecorder | None]
msg = r"UTC time zone"
msg = r"converted to UTC"
if warn:
context_manager = pytest.warns(TimeZoneAwareConstructorWarning, match=msg)
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def test_datetime_range_invalid_time_unit() -> None:
def test_datetime_range_lazy_time_zones_warning() -> None:
start = datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kathmandu"))
stop = datetime(2020, 1, 2, tzinfo=ZoneInfo("Asia/Kathmandu"))
with pytest.warns(TimeZoneAwareConstructorWarning, match="Series with UTC"):
with pytest.warns(TimeZoneAwareConstructorWarning, match="converted to UTC"):
(
pl.DataFrame({"start": [start], "stop": [stop]})
.with_columns(
Expand Down
6 changes: 2 additions & 4 deletions py-polars/tests/unit/io/test_delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,10 +269,8 @@ def test_write_delta_overwrite_schema_deprecated(
dtype=pl.List(pl.List(pl.List(pl.List(pl.UInt16)))),
),
pl.Series(
"date_ns",
[datetime(2010, 1, 1, 0, 0)],
dtype=pl.Datetime(time_unit="ns", time_zone="Australia/Lord_Howe"),
),
"date_ns", [datetime(2010, 1, 1, 0, 0)], dtype=pl.Datetime(time_unit="ns")
).dt.replace_time_zone("Australia/Lord_Howe"),
pl.Series(
"date_us",
[datetime(2010, 1, 1, 0, 0)],
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import datetime as dt
import sys
from datetime import date, datetime, timedelta
from typing import TYPE_CHECKING

Expand All @@ -10,11 +11,19 @@
from hypothesis import assume, given

import polars as pl
from polars.dependencies import _ZONEINFO_AVAILABLE
from polars.testing import assert_series_equal

if TYPE_CHECKING:
from polars.type_aliases import Roll, TimeUnit

if sys.version_info >= (3, 9):
from zoneinfo import ZoneInfo
elif _ZONEINFO_AVAILABLE:
# Import from submodule due to typing issue with backports.zoneinfo package:
# https://github.com/pganssle/zoneinfo/issues/125
from backports.zoneinfo._zoneinfo import ZoneInfo


def test_add_business_days() -> None:
# (Expression, expression)
Expand Down Expand Up @@ -173,8 +182,14 @@ def test_add_business_days_w_roll() -> None:
@pytest.mark.parametrize("time_zone", [None, "Europe/London", "Asia/Kathmandu"])
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
def test_add_business_days_datetime(time_zone: str | None, time_unit: TimeUnit) -> None:
tzinfo = ZoneInfo(time_zone) if time_zone is not None else None
df = pl.DataFrame(
{"start": [datetime(2020, 3, 28, 1), datetime(2020, 1, 10, 4)]},
{
"start": [
datetime(2020, 3, 28, 1, tzinfo=tzinfo),
datetime(2020, 1, 10, 4, tzinfo=tzinfo),
]
},
schema={"start": pl.Datetime(time_unit, time_zone)},
)
result = df.select(
Expand All @@ -183,8 +198,8 @@ def test_add_business_days_datetime(time_zone: str | None, time_unit: TimeUnit)
expected = pl.Series(
"result",
[datetime(2020, 3, 30, 1), datetime(2020, 1, 12, 4)],
pl.Datetime(time_unit, time_zone),
)
pl.Datetime(time_unit),
).dt.replace_time_zone(time_zone)
assert_series_equal(result, expected)

with pytest.raises(pl.ComputeError, match="is not a business date"):
Expand Down
Loading

0 comments on commit 5cb989b

Please sign in to comment.