From 1d24e71041e35e26e126aa4508ed7384e8aa031c Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Tue, 25 Feb 2025 17:36:53 -0500 Subject: [PATCH] Turn `ObjectStoreLocationProvider` off by default (#1722) Closes #1721 Otherwise, there's a default behavior change in new 0.9.0 release. Previous versions will write to `data/`, new version will write to `data//` --- pyiceberg/table/__init__.py | 2 +- .../integration/test_writes/test_partitioned_writes.py | 9 +++++---- tests/table/test_locations.py | 10 +++++++--- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index e625b848b2..679f74d107 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -202,7 +202,7 @@ class TableProperties: WRITE_PY_LOCATION_PROVIDER_IMPL = "write.py-location-provider.impl" OBJECT_STORE_ENABLED = "write.object-storage.enabled" - OBJECT_STORE_ENABLED_DEFAULT = True + OBJECT_STORE_ENABLED_DEFAULT = False WRITE_OBJECT_STORE_PARTITIONED_PATHS = "write.object-storage.partitioned-paths" WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT = True diff --git a/tests/integration/test_writes/test_partitioned_writes.py b/tests/integration/test_writes/test_partitioned_writes.py index 1e6ea1b797..a299036e6b 100644 --- a/tests/integration/test_writes/test_partitioned_writes.py +++ b/tests/integration/test_writes/test_partitioned_writes.py @@ -294,13 +294,14 @@ def test_object_storage_location_provider_excludes_partition_path( PartitionField(source_id=nested_field.field_id, field_id=1001, transform=IdentityTransform(), name=part_col) ) - # write.object-storage.enabled and write.object-storage.partitioned-paths don't need to be specified as they're on by default - assert TableProperties.OBJECT_STORE_ENABLED_DEFAULT - assert TableProperties.WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT + # Enable `write.object-storage.enabled` which is False by default + # `write.object-storage.partitioned-paths` is True by default + assert TableProperties.OBJECT_STORE_ENABLED_DEFAULT is False + assert TableProperties.WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT is True tbl = _create_table( session_catalog=session_catalog, identifier=f"default.arrow_table_v{format_version}_with_null_partitioned_on_col_{part_col}", - properties={"format-version": str(format_version)}, + properties={"format-version": str(format_version), TableProperties.OBJECT_STORE_ENABLED: True}, data=[arrow_table_with_null], partition_spec=partition_spec, ) diff --git a/tests/table/test_locations.py b/tests/table/test_locations.py index d66bf18792..4efa64326a 100644 --- a/tests/table/test_locations.py +++ b/tests/table/test_locations.py @@ -74,7 +74,7 @@ def test_custom_location_provider_not_found(caplog: Any) -> None: def test_object_storage_no_partition() -> None: - provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT) + provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "true"}) location = provider.new_data_location("test.parquet") parts = location.split("/") @@ -111,6 +111,7 @@ def test_object_storage_partitioned_paths_disabled(partition_key: Optional[Parti provider = load_location_provider( table_location="table_location", table_properties={ + "write.object-storage.enabled": "true", "write.object-storage.partitioned-paths": "false", }, ) @@ -131,7 +132,7 @@ def test_object_storage_partitioned_paths_disabled(partition_key: Optional[Parti ], ) def test_hash_injection(data_file_name: str, expected_hash: str) -> None: - provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT) + provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "true"}) assert provider.new_data_location(data_file_name) == f"table_location/data/{expected_hash}/{data_file_name}" @@ -139,7 +140,10 @@ def test_hash_injection(data_file_name: str, expected_hash: str) -> None: def test_object_location_provider_write_data_path() -> None: provider = load_location_provider( table_location="s3://table-location/table", - table_properties={TableProperties.WRITE_DATA_PATH: "s3://table-location/custom/data/path"}, + table_properties={ + "write.object-storage.enabled": "true", + TableProperties.WRITE_DATA_PATH: "s3://table-location/custom/data/path", + }, ) assert (