From 050f623f9688cf572e10513db16d1998bd013d67 Mon Sep 17 00:00:00 2001 From: Hai Nguyen Date: Thu, 28 Sep 2023 11:05:18 +0700 Subject: [PATCH] feat: Support GCS filesystem for bytewax engine Signed-off-by: Hai Nguyen --- .../contrib/bytewax/bytewax_materialization_dataflow.py | 4 +--- setup.py | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_dataflow.py b/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_dataflow.py index bf5229303ab..fe2a7f35c17 100644 --- a/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_dataflow.py +++ b/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_dataflow.py @@ -2,7 +2,6 @@ import pyarrow as pa import pyarrow.parquet as pq -import s3fs from bytewax.dataflow import Dataflow # type: ignore from bytewax.execution import cluster_main from bytewax.inputs import ManualInputConfig, distribute @@ -29,8 +28,7 @@ def __init__( self._run_dataflow() def process_path(self, path): - fs = s3fs.S3FileSystem() - dataset = pq.ParquetDataset(path, filesystem=fs, use_legacy_dataset=False) + dataset = pq.ParquetDataset(path, use_legacy_dataset=False) batches = [] for fragment in dataset.fragments: for batch in fragment.to_table().to_batches(): diff --git a/setup.py b/setup.py index f7b1ff04175..16a6462cc2a 100644 --- a/setup.py +++ b/setup.py @@ -91,6 +91,7 @@ "google-cloud-datastore>=2.1.0,<3", "google-cloud-storage>=1.34.0,<3", "google-cloud-bigtable>=2.11.0,<3", + "gcsfs>=2023.3.0,<2024.0.0", ] REDIS_REQUIRED = [