diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py index 0e5064ba78..0b873a2091 100644 --- a/sdk/python/feast/infra/offline_stores/file.py +++ b/sdk/python/feast/infra/offline_stores/file.py @@ -4,6 +4,7 @@ from pathlib import Path from typing import Any, Callable, List, Literal, Optional, Tuple, Union +import dask import dask.dataframe as dd import pandas as pd import pyarrow @@ -42,6 +43,11 @@ _run_dask_field_mapping, ) +# FileRetrievalJob will cast string objects to string[pyarrow] from dask version 2023.7.1 +# This is not the desired behavior for our use case, so we set the convert-string option to False +# See (https://github.com/dask/dask/issues/10881#issuecomment-1923327936) +dask.config.set({"dataframe.convert-string": False}) + class FileOfflineStoreConfig(FeastConfigBaseModel): """Offline store config for local (file-based) store""" @@ -366,8 +372,6 @@ def evaluate_offline_job(): source_df[DUMMY_ENTITY_ID] = DUMMY_ENTITY_VAL columns_to_extract.add(DUMMY_ENTITY_ID) - source_df = source_df.persist() - return source_df[list(columns_to_extract)].persist() # When materializing a single feature view, we don't need full feature names. On demand transforms aren't materialized diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index f20bc05df9..3414fd0c3b 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -59,11 +59,11 @@ black==22.12.0 # via feast (setup.py) bleach==6.1.0 # via nbconvert -boto3==1.34.42 +boto3==1.34.44 # via # feast (setup.py) # moto -botocore==1.34.42 +botocore==1.34.44 # via # boto3 # moto @@ -215,7 +215,7 @@ google-api-core[grpc]==2.17.1 # google-cloud-storage google-api-python-client==2.118.0 # via firebase-admin -google-auth==2.27.0 +google-auth==2.28.0 # via # google-api-core # google-api-python-client @@ -226,10 +226,6 @@ google-auth==2.27.0 google-auth-httplib2==0.2.0 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 - # via - # feast (setup.py) - # google-cloud-bigquery -google-cloud-bigquery-storage==2.24.0 # via feast (setup.py) google-cloud-bigquery-storage==2.24.0 # via feast (setup.py) @@ -264,7 +260,7 @@ googleapis-common-protos[grpc]==1.62.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.18.8 +great-expectations==0.18.9 # via feast (setup.py) greenlet==3.0.3 # via sqlalchemy @@ -491,7 +487,7 @@ nodeenv==1.8.0 # via pre-commit notebook==7.1.0 # via great-expectations -notebook-shim==0.2.3 +notebook-shim==0.2.4 # via # jupyterlab # notebook @@ -529,7 +525,7 @@ packaging==23.2 # pytest # snowflake-connector-python # sphinx -pandas==1.5.3 +pandas==2.2.0 # via # altair # db-dtypes @@ -538,7 +534,7 @@ pandas==1.5.3 # great-expectations # pandavro # snowflake-connector-python -pandavro==1.5.2 +pandavro==1.8.0 # via feast (setup.py) pandocfilters==1.5.1 # via nbconvert @@ -552,7 +548,7 @@ pbr==6.0.0 # via mock pexpect==4.9.0 # via ipython -pip-tools==7.3.0 +pip-tools==7.4.0 # via feast (setup.py) platformdirs==3.11.0 # via @@ -666,7 +662,9 @@ pyparsing==3.1.1 # great-expectations # httplib2 pyproject-hooks==1.0.0 - # via build + # via + # build + # pip-tools pyspark==3.5.0 # via feast (setup.py) pytest==7.4.4 @@ -795,7 +793,6 @@ six==1.16.0 # isodate # kubernetes # mock - # pandavro # python-dateutil # rfc3339-validator # thriftpy2 @@ -826,7 +823,9 @@ sphinxcontrib-qthelp==1.0.7 sphinxcontrib-serializinghtml==1.1.10 # via sphinx sqlalchemy[mypy]==1.4.51 - # via feast (setup.py) + # via + # feast (setup.py) + # sqlalchemy sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy stack-data==0.6.3 @@ -893,7 +892,7 @@ traitlets==5.14.1 # nbclient # nbconvert # nbformat -trino==0.327.0 +trino==0.328.0 # via feast (setup.py) typeguard==4.1.5 # via feast (setup.py) @@ -913,11 +912,11 @@ types-pytz==2024.1.0.20240203 # via feast (setup.py) types-pyyaml==6.0.12.12 # via feast (setup.py) -types-redis==4.6.0.20240106 +types-redis==4.6.0.20240218 # via feast (setup.py) types-requests==2.30.0.0 # via feast (setup.py) -types-setuptools==69.0.0.20240125 +types-setuptools==69.1.0.20240217 # via feast (setup.py) types-tabulate==0.9.0.20240106 # via feast (setup.py) @@ -936,7 +935,10 @@ typing-extensions==4.9.0 # pydantic-core # snowflake-connector-python # sqlalchemy2-stubs + # typeguard # uvicorn +tzdata==2024.1 + # via pandas tzlocal==5.2 # via # great-expectations diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 3943662d01..07e2faeadb 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -121,11 +121,11 @@ packaging==23.2 # via # dask # gunicorn -pandas==1.5.3 +pandas==2.2.0 # via # feast (setup.py) # pandavro -pandavro==1.5.2 +pandavro==1.8.0 # via feast (setup.py) partd==1.4.1 # via dask @@ -171,9 +171,7 @@ rpds-py==0.18.0 # jsonschema # referencing six==1.16.0 - # via - # pandavro - # python-dateutil + # via python-dateutil sniffio==1.3.0 # via # anyio @@ -212,13 +210,14 @@ typing-extensions==4.9.0 # pydantic # pydantic-core # sqlalchemy2-stubs + # typeguard # uvicorn -urllib3==2.2.0 +tzdata==2024.1 + # via pandas +urllib3==2.2.1 # via requests uvicorn[standard]==0.27.1 - # via - # feast (setup.py) - # uvicorn + # via feast (setup.py) uvloop==0.19.0 # via uvicorn volatile==2.1.0 diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index afa43ec2a2..9a4e38b716 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -4,7 +4,6 @@ # # pip-compile --extra=ci --output-file=sdk/python/requirements/py3.8-ci-requirements.txt # - alabaster==0.7.13 # via sphinx altair==4.2.2 @@ -66,11 +65,11 @@ black==22.12.0 # via feast (setup.py) bleach==6.1.0 # via nbconvert -boto3==1.34.42 +boto3==1.34.44 # via # feast (setup.py) # moto -botocore==1.34.42 +botocore==1.34.44 # via # boto3 # moto @@ -221,7 +220,7 @@ google-api-core[grpc]==2.17.1 # google-cloud-storage google-api-python-client==2.118.0 # via firebase-admin -google-auth==2.27.0 +google-auth==2.28.0 # via # google-api-core # google-api-python-client @@ -232,10 +231,6 @@ google-auth==2.27.0 google-auth-httplib2==0.2.0 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 - # via - # feast (setup.py) - # google-cloud-bigquery -google-cloud-bigquery-storage==2.24.0 # via feast (setup.py) google-cloud-bigquery-storage==2.24.0 # via feast (setup.py) @@ -270,7 +265,7 @@ googleapis-common-protos[grpc]==1.62.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.18.8 +great-expectations==0.18.9 # via feast (setup.py) greenlet==3.0.3 # via sqlalchemy @@ -344,6 +339,7 @@ importlib-metadata==6.11.0 # jupyterlab-server # nbconvert # sphinx + # typeguard importlib-resources==6.1.1 # via # feast (setup.py) @@ -508,7 +504,7 @@ nodeenv==1.8.0 # via pre-commit notebook==7.1.0 # via great-expectations -notebook-shim==0.2.3 +notebook-shim==0.2.4 # via # jupyterlab # notebook @@ -555,7 +551,7 @@ pandas==1.5.3 # great-expectations # pandavro # snowflake-connector-python -pandavro==1.5.2 +pandavro==1.8.0 # via feast (setup.py) pandocfilters==1.5.1 # via nbconvert @@ -571,7 +567,7 @@ pexpect==4.9.0 # via ipython pickleshare==0.7.5 # via ipython -pip-tools==7.3.0 +pip-tools==7.4.0 # via feast (setup.py) pkgutil-resolve-name==1.3.10 # via jsonschema @@ -687,7 +683,9 @@ pyparsing==3.1.1 # great-expectations # httplib2 pyproject-hooks==1.0.0 - # via build + # via + # build + # pip-tools pyspark==3.5.0 # via feast (setup.py) pytest==7.4.4 @@ -819,7 +817,6 @@ six==1.16.0 # isodate # kubernetes # mock - # pandavro # python-dateutil # rfc3339-validator # thriftpy2 @@ -850,7 +847,9 @@ sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.5 # via sphinx sqlalchemy[mypy]==1.4.51 - # via feast (setup.py) + # via + # feast (setup.py) + # sqlalchemy sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy stack-data==0.6.3 @@ -917,7 +916,7 @@ traitlets==5.14.1 # nbclient # nbconvert # nbformat -trino==0.327.0 +trino==0.328.0 # via feast (setup.py) typeguard==4.1.5 # via feast (setup.py) @@ -937,11 +936,11 @@ types-pytz==2024.1.0.20240203 # via feast (setup.py) types-pyyaml==6.0.12.12 # via feast (setup.py) -types-redis==4.6.0.20240106 +types-redis==4.6.0.20240218 # via feast (setup.py) types-requests==2.30.0.0 # via feast (setup.py) -types-setuptools==69.0.0.20240125 +types-setuptools==69.1.0.20240217 # via feast (setup.py) types-tabulate==0.9.0.20240106 # via feast (setup.py) @@ -949,6 +948,7 @@ types-urllib3==1.26.25.14 # via types-requests typing-extensions==4.9.0 # via + # annotated-types # anyio # async-lru # azure-core @@ -963,7 +963,10 @@ typing-extensions==4.9.0 # snowflake-connector-python # sqlalchemy2-stubs # starlette + # typeguard # uvicorn +tzdata==2024.1 + # via pandas tzlocal==5.2 # via # great-expectations diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index 079064a9ec..e64ebf9d94 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -89,6 +89,7 @@ importlib-metadata==6.11.0 # via # dask # feast (setup.py) + # typeguard importlib-resources==6.1.1 # via # feast (setup.py) @@ -128,7 +129,7 @@ pandas==1.5.3 # via # feast (setup.py) # pandavro -pandavro==1.5.2 +pandavro==1.8.0 # via feast (setup.py) partd==1.4.1 # via dask @@ -176,15 +177,15 @@ rpds-py==0.18.0 # jsonschema # referencing six==1.16.0 - # via - # pandavro - # python-dateutil + # via python-dateutil sniffio==1.3.0 # via # anyio # httpx sqlalchemy[mypy]==1.4.51 - # via feast (setup.py) + # via + # feast (setup.py) + # sqlalchemy sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy starlette==0.36.3 @@ -209,6 +210,7 @@ types-protobuf==4.24.0.20240129 # via mypy-protobuf typing-extensions==4.9.0 # via + # annotated-types # anyio # fastapi # mypy @@ -216,13 +218,14 @@ typing-extensions==4.9.0 # pydantic-core # sqlalchemy2-stubs # starlette + # typeguard # uvicorn -urllib3==2.2.0 +tzdata==2024.1 + # via pandas +urllib3==2.2.1 # via requests uvicorn[standard]==0.27.1 - # via - # feast (setup.py) - # uvicorn + # via feast (setup.py) uvloop==0.19.0 # via uvicorn volatile==2.1.0 diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 6c26f889e2..0e52a90ab8 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -59,11 +59,11 @@ black==22.12.0 # via feast (setup.py) bleach==6.1.0 # via nbconvert -boto3==1.34.42 +boto3==1.34.44 # via # feast (setup.py) # moto -botocore==1.34.42 +botocore==1.34.44 # via # boto3 # moto @@ -215,7 +215,7 @@ google-api-core[grpc]==2.17.1 # google-cloud-storage google-api-python-client==2.118.0 # via firebase-admin -google-auth==2.27.0 +google-auth==2.28.0 # via # google-api-core # google-api-python-client @@ -226,10 +226,6 @@ google-auth==2.27.0 google-auth-httplib2==0.2.0 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 - # via - # feast (setup.py) - # google-cloud-bigquery -google-cloud-bigquery-storage==2.24.0 # via feast (setup.py) google-cloud-bigquery-storage==2.24.0 # via feast (setup.py) @@ -264,7 +260,7 @@ googleapis-common-protos[grpc]==1.62.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.18.8 +great-expectations==0.18.9 # via feast (setup.py) greenlet==3.0.3 # via sqlalchemy @@ -338,6 +334,7 @@ importlib-metadata==6.11.0 # jupyterlab-server # nbconvert # sphinx + # typeguard importlib-resources==6.1.1 # via feast (setup.py) iniconfig==2.0.0 @@ -498,7 +495,7 @@ nodeenv==1.8.0 # via pre-commit notebook==7.1.0 # via great-expectations -notebook-shim==0.2.3 +notebook-shim==0.2.4 # via # jupyterlab # notebook @@ -536,7 +533,7 @@ packaging==23.2 # pytest # snowflake-connector-python # sphinx -pandas==1.5.3 +pandas==2.2.0 # via # altair # db-dtypes @@ -545,7 +542,7 @@ pandas==1.5.3 # great-expectations # pandavro # snowflake-connector-python -pandavro==1.5.2 +pandavro==1.8.0 # via feast (setup.py) pandocfilters==1.5.1 # via nbconvert @@ -559,7 +556,7 @@ pbr==6.0.0 # via mock pexpect==4.9.0 # via ipython -pip-tools==7.3.0 +pip-tools==7.4.0 # via feast (setup.py) platformdirs==3.11.0 # via @@ -673,7 +670,9 @@ pyparsing==3.1.1 # great-expectations # httplib2 pyproject-hooks==1.0.0 - # via build + # via + # build + # pip-tools pyspark==3.5.0 # via feast (setup.py) pytest==7.4.4 @@ -804,7 +803,6 @@ six==1.16.0 # isodate # kubernetes # mock - # pandavro # python-dateutil # rfc3339-validator # thriftpy2 @@ -835,7 +833,9 @@ sphinxcontrib-qthelp==1.0.7 sphinxcontrib-serializinghtml==1.1.10 # via sphinx sqlalchemy[mypy]==1.4.51 - # via feast (setup.py) + # via + # feast (setup.py) + # sqlalchemy sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy stack-data==0.6.3 @@ -902,7 +902,7 @@ traitlets==5.14.1 # nbclient # nbconvert # nbformat -trino==0.327.0 +trino==0.328.0 # via feast (setup.py) typeguard==4.1.5 # via feast (setup.py) @@ -922,11 +922,11 @@ types-pytz==2024.1.0.20240203 # via feast (setup.py) types-pyyaml==6.0.12.12 # via feast (setup.py) -types-redis==4.6.0.20240106 +types-redis==4.6.0.20240218 # via feast (setup.py) types-requests==2.30.0.0 # via feast (setup.py) -types-setuptools==69.0.0.20240125 +types-setuptools==69.1.0.20240217 # via feast (setup.py) types-tabulate==0.9.0.20240106 # via feast (setup.py) @@ -948,7 +948,10 @@ typing-extensions==4.9.0 # snowflake-connector-python # sqlalchemy2-stubs # starlette + # typeguard # uvicorn +tzdata==2024.1 + # via pandas tzlocal==5.2 # via # great-expectations diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 182cb7ad07..40d6a6d8db 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -89,6 +89,7 @@ importlib-metadata==6.11.0 # via # dask # feast (setup.py) + # typeguard importlib-resources==6.1.1 # via feast (setup.py) jinja2==3.1.3 @@ -121,11 +122,11 @@ packaging==23.2 # via # dask # gunicorn -pandas==1.5.3 +pandas==2.2.0 # via # feast (setup.py) # pandavro -pandavro==1.5.2 +pandavro==1.8.0 # via feast (setup.py) partd==1.4.1 # via dask @@ -171,15 +172,15 @@ rpds-py==0.18.0 # jsonschema # referencing six==1.16.0 - # via - # pandavro - # python-dateutil + # via python-dateutil sniffio==1.3.0 # via # anyio # httpx sqlalchemy[mypy]==1.4.51 - # via feast (setup.py) + # via + # feast (setup.py) + # sqlalchemy sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy starlette==0.36.3 @@ -211,8 +212,11 @@ typing-extensions==4.9.0 # pydantic-core # sqlalchemy2-stubs # starlette + # typeguard # uvicorn -urllib3==2.2.0 +tzdata==2024.1 + # via pandas +urllib3==2.2.1 # via requests uvicorn[standard]==0.27.1 # via feast (setup.py) diff --git a/sdk/python/tests/integration/e2e/test_validation.py b/sdk/python/tests/integration/e2e/test_validation.py index f49ed80a26..fdf182be57 100644 --- a/sdk/python/tests/integration/e2e/test_validation.py +++ b/sdk/python/tests/integration/e2e/test_validation.py @@ -167,7 +167,7 @@ def test_logged_features_validation(environment, universal_data_sources): { "customer_id": 2000 + i, "driver_id": 6000 + i, - "event_timestamp": datetime.datetime.now(), + "event_timestamp": make_tzaware(datetime.datetime.now()), } ] ), diff --git a/setup.py b/setup.py index c14d64557a..8250d17658 100644 --- a/setup.py +++ b/setup.py @@ -54,9 +54,8 @@ "jsonschema", "mmh3", "numpy>=1.22,<1.25", - "pandas>=1.4.3,<2", - # For some reason pandavro higher than 1.5.* only support pandas less than 1.3. - "pandavro~=1.5.0", + "pandas>=1.4.3,<3", + "pandavro>=1.7.1", # Higher than 4.23.4 seems to cause a seg fault "protobuf<4.23.4,>3.20", "proto-plus>=1.20.0,<2",