From a8bc696010fa94fa0be44fba2570bee0eab83ba2 Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Tue, 16 Jul 2024 15:22:30 -0400 Subject: [PATCH] fix: Retire the datetime.utcnow(). (#4352) * fix: Retire the datetime.utcnow(). Signed-off-by: Shuchu Han * fix: Remove unnecessary unit test. Signed-off-by: Shuchu Han --------- Signed-off-by: Shuchu Han --- sdk/python/feast/driver_test_data.py | 43 ++++++++++++++++------ sdk/python/feast/type_map.py | 3 +- sdk/python/feast/utils.py | 4 +- sdk/python/tests/unit/test_datetime.py | 6 --- sdk/python/tests/utils/feature_records.py | 4 +- sdk/python/tests/utils/test_log_creator.py | 2 +- 6 files changed, 38 insertions(+), 24 deletions(-) delete mode 100644 sdk/python/tests/unit/test_datetime.py diff --git a/sdk/python/feast/driver_test_data.py b/sdk/python/feast/driver_test_data.py index 7959046e6e..defeb404a3 100644 --- a/sdk/python/feast/driver_test_data.py +++ b/sdk/python/feast/driver_test_data.py @@ -61,11 +61,11 @@ def create_orders_df( df["order_is_success"] = np.random.randint(0, 2, size=order_count).astype(np.int32) df[DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL] = [ _convert_event_timestamp( - pd.Timestamp(dt, unit="ms", tz="UTC").round("ms"), + pd.Timestamp(dt, unit="ms").round("ms"), EventTimestampType(idx % 4), ) for idx, dt in enumerate( - pd.date_range(start=start_date, end=end_date, periods=order_count) + pd.date_range(start=start_date, end=end_date, periods=order_count, tz="UTC") ) ] df.sort_values( @@ -101,9 +101,13 @@ def create_driver_hourly_stats_df(drivers, start_date, end_date) -> pd.DataFrame df_hourly = pd.DataFrame( { "event_timestamp": [ - pd.Timestamp(dt, unit="ms", tz="UTC").round("ms") + pd.Timestamp(dt, unit="ms").round("ms") for dt in pd.date_range( - start=start_date, end=end_date, freq="1h", inclusive="left" + start=start_date, + end=end_date, + freq="1h", + inclusive="left", + tz="UTC", ) ] # include a fixed timestamp for get_historical_features in the quickstart @@ -162,9 +166,13 @@ def create_customer_daily_profile_df(customers, start_date, end_date) -> pd.Data df_daily = pd.DataFrame( { "event_timestamp": [ - pd.Timestamp(dt, unit="ms", tz="UTC").round("ms") + pd.Timestamp(dt, unit="ms").round("ms") for dt in pd.date_range( - start=start_date, end=end_date, freq="1D", inclusive="left" + start=start_date, + end=end_date, + freq="1D", + inclusive="left", + tz="UTC", ) ] } @@ -207,9 +215,13 @@ def create_location_stats_df(locations, start_date, end_date) -> pd.DataFrame: df_hourly = pd.DataFrame( { "event_timestamp": [ - pd.Timestamp(dt, unit="ms", tz="UTC").round("ms") + pd.Timestamp(dt, unit="ms").round("ms") for dt in pd.date_range( - start=start_date, end=end_date, freq="1h", inclusive="left" + start=start_date, + end=end_date, + freq="1h", + inclusive="left", + tz="UTC", ) ] } @@ -254,9 +266,16 @@ def create_global_daily_stats_df(start_date, end_date) -> pd.DataFrame: df_daily = pd.DataFrame( { "event_timestamp": [ - pd.Timestamp(dt, unit="ms", tz="UTC").round("ms") + pd.Timestamp( + dt, + unit="ms", + ).round("ms") for dt in pd.date_range( - start=start_date, end=end_date, freq="1D", inclusive="left" + start=start_date, + end=end_date, + freq="1D", + inclusive="left", + tz="UTC", ) ] } @@ -286,11 +305,11 @@ def create_field_mapping_df(start_date, end_date) -> pd.DataFrame: df["column_name"] = np.random.randint(1, 100, size=size).astype(np.int32) df[DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL] = [ _convert_event_timestamp( - pd.Timestamp(dt, unit="ms", tz="UTC").round("ms"), + pd.Timestamp(dt, unit="ms").round("ms"), EventTimestampType(idx % 4), ) for idx, dt in enumerate( - pd.date_range(start=start_date, end=end_date, periods=size) + pd.date_range(start=start_date, end=end_date, periods=size, tz="UTC") ) ] df["created"] = pd.to_datetime(pd.Timestamp.now(tz=None).round("ms")) diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 6ba61fc8c5..703c1dc7c5 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -162,7 +162,8 @@ def python_type_to_feast_value_type( "timestamp": ValueType.UNIX_TIMESTAMP, "datetime": ValueType.UNIX_TIMESTAMP, "datetime64[ns]": ValueType.UNIX_TIMESTAMP, - "datetime64[ns, tz]": ValueType.UNIX_TIMESTAMP, + "datetime64[ns, tz]": ValueType.UNIX_TIMESTAMP, # special dtype of pandas + "datetime64[ns, utc]": ValueType.UNIX_TIMESTAMP, "category": ValueType.STRING, } diff --git a/sdk/python/feast/utils.py b/sdk/python/feast/utils.py index 1a1d757fc1..0467393aa2 100644 --- a/sdk/python/feast/utils.py +++ b/sdk/python/feast/utils.py @@ -5,7 +5,7 @@ import typing import warnings from collections import Counter, defaultdict -from datetime import datetime +from datetime import datetime, timezone from pathlib import Path from typing import ( Any, @@ -1055,4 +1055,4 @@ def tags_str_to_dict(tags: str = "") -> dict[str, str]: def _utc_now() -> datetime: - return datetime.utcnow() + return datetime.now(tz=timezone.utc) diff --git a/sdk/python/tests/unit/test_datetime.py b/sdk/python/tests/unit/test_datetime.py deleted file mode 100644 index aaab507ed0..0000000000 --- a/sdk/python/tests/unit/test_datetime.py +++ /dev/null @@ -1,6 +0,0 @@ -# -*- coding: utf-8 -*- - - -""" -Test the retirement of datetime.utcnow() function. -""" diff --git a/sdk/python/tests/utils/feature_records.py b/sdk/python/tests/utils/feature_records.py index 2c26f3c000..bd3567c9ee 100644 --- a/sdk/python/tests/utils/feature_records.py +++ b/sdk/python/tests/utils/feature_records.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from typing import Any, Dict, List, Optional import numpy as np @@ -520,7 +520,7 @@ def get_last_feature_row(df: pd.DataFrame, driver_id, max_date: datetime): """Manually extract last feature value from a dataframe for a given driver_id with up to `max_date` date""" filtered = df[ (df["driver_id"] == driver_id) - & (df["event_timestamp"] < max_date.replace(tzinfo=utc)) + & (df["event_timestamp"] < max_date.replace(tzinfo=timezone.utc)) ] max_ts = filtered.loc[filtered["event_timestamp"].idxmax()]["event_timestamp"] filtered_by_ts = filtered[filtered["event_timestamp"] == max_ts] diff --git a/sdk/python/tests/utils/test_log_creator.py b/sdk/python/tests/utils/test_log_creator.py index f072f4c886..987c8d77ef 100644 --- a/sdk/python/tests/utils/test_log_creator.py +++ b/sdk/python/tests/utils/test_log_creator.py @@ -117,7 +117,7 @@ def prepare_logs( f"{destination_field}__status" ].mask( logs_df[f"{destination_field}__timestamp"] - < (_utc_now() - view.ttl), + < (_utc_now() - view.ttl).replace(tzinfo=None), FieldStatus.OUTSIDE_MAX_AGE, )