From 92228bd7b3c36687c6b20b29ff92a17acaad0616 Mon Sep 17 00:00:00 2001 From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> Date: Mon, 22 Apr 2024 18:08:23 +0200 Subject: [PATCH] adds remove actions during overwrite create on existing table --- crates/core/src/operations/create.rs | 9 ++++++++- python/tests/test_create.py | 13 ++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/crates/core/src/operations/create.rs b/crates/core/src/operations/create.rs index b2092cafe8..9ad5bcfb9b 100644 --- a/crates/core/src/operations/create.rs +++ b/crates/core/src/operations/create.rs @@ -310,6 +310,7 @@ impl CreateBuilder { }; let mut actions = vec![Action::Protocol(protocol), Action::Metadata(metadata)]; + actions.extend( self.actions .into_iter() @@ -329,7 +330,7 @@ impl std::future::IntoFuture for CreateBuilder { Box::pin(async move { let mode = this.mode; let app_metadata = this.metadata.clone().unwrap_or_default(); - let (mut table, actions, operation) = this.into_table_and_actions()?; + let (mut table, mut actions, operation) = this.into_table_and_actions()?; let log_store = table.log_store(); let table_state = if log_store.is_delta_table_location().await? { @@ -342,6 +343,12 @@ impl std::future::IntoFuture for CreateBuilder { } SaveMode::Overwrite => { table.load().await?; + let remove_actions = table + .snapshot()? + .log_data() + .into_iter() + .map(|p| p.remove_action(true).into()); + actions.extend(remove_actions); Some(table.snapshot()?) } } diff --git a/python/tests/test_create.py b/python/tests/test_create.py index 3852fc2bab..ceca8178c3 100644 --- a/python/tests/test_create.py +++ b/python/tests/test_create.py @@ -3,7 +3,7 @@ import pyarrow as pa import pytest -from deltalake import DeltaTable +from deltalake import DeltaTable, write_deltalake from deltalake.exceptions import DeltaError @@ -54,3 +54,14 @@ def test_create_schema(tmp_path: pathlib.Path, sample_data: pa.Table): ) assert dt.schema().to_pyarrow() == sample_data.schema + + +def test_create_or_replace_existing_table( + tmp_path: pathlib.Path, sample_data: pa.Table +): + write_deltalake(table_or_uri=tmp_path, data=sample_data) + dt = DeltaTable.create( + tmp_path, sample_data.schema, partition_by=["utf8"], mode="overwrite" + ) + + assert dt.files() == []