diff --git a/java/core/lance-jni/src/blocking_dataset.rs b/java/core/lance-jni/src/blocking_dataset.rs index 6384d266dc..7c0ffaf717 100644 --- a/java/core/lance-jni/src/blocking_dataset.rs +++ b/java/core/lance-jni/src/blocking_dataset.rs @@ -657,3 +657,28 @@ fn inner_list_indexes<'local>( Ok(array_list) } + +////////////////////////////// +// Schema evolution Methods // +////////////////////////////// +#[no_mangle] +pub extern "system" fn Java_com_lancedb_lance_Dataset_nativeDropColumns( + mut env: JNIEnv, + java_dataset: JObject, + columns_obj: JObject, // List +) { + ok_or_throw_without_return!(env, inner_drop_columns(&mut env, java_dataset, columns_obj)) +} + +fn inner_drop_columns( + env: &mut JNIEnv, + java_dataset: JObject, + columns_obj: JObject, // List +) -> Result<()> { + let columns: Vec = env.get_strings(&columns_obj)?; + let columns_slice: Vec<&str> = columns.iter().map(AsRef::as_ref).collect(); + let mut dataset_guard = + unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }?; + RT.block_on(dataset_guard.inner.drop_columns(&columns_slice))?; + Ok(()) +} diff --git a/java/core/src/main/java/com/lancedb/lance/Dataset.java b/java/core/src/main/java/com/lancedb/lance/Dataset.java index c0c1e33cb8..73e4190418 100644 --- a/java/core/src/main/java/com/lancedb/lance/Dataset.java +++ b/java/core/src/main/java/com/lancedb/lance/Dataset.java @@ -253,6 +253,20 @@ public static native Dataset commitAppend( */ public static native void drop(String path, Map storageOptions); + /** + * Drop columns from the dataset. + * + * @param columns The columns to drop + */ + public void dropColumns(List columns) { + try (LockManager.WriteLock writeLock = lockManager.acquireWriteLock()) { + Preconditions.checkArgument(nativeDatasetHandle != 0, "Dataset is closed"); + nativeDropColumns(columns); + } + } + + private native void nativeDropColumns(List columns); + /** * Create a new Dataset Scanner. * diff --git a/java/core/src/test/java/com/lancedb/lance/DatasetTest.java b/java/core/src/test/java/com/lancedb/lance/DatasetTest.java index 4d5ba75843..0115bb35f0 100644 --- a/java/core/src/test/java/com/lancedb/lance/DatasetTest.java +++ b/java/core/src/test/java/com/lancedb/lance/DatasetTest.java @@ -13,6 +13,9 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -21,7 +24,9 @@ import java.io.IOException; import java.net.URISyntaxException; import java.nio.file.Path; +import java.util.Collections; import java.util.HashMap; +import java.util.stream.Collectors; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -193,6 +198,30 @@ void testGetSchemaWithClosedDataset() { } } + @Test + void testDropColumns() { + String testMethodName = new Object() {}.getClass().getEnclosingMethod().getName(); + String datasetPath = tempDir.resolve(testMethodName).toString(); + try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE)) { + TestUtils.SimpleTestDataset testDataset = + new TestUtils.SimpleTestDataset(allocator, datasetPath); + dataset = testDataset.createEmptyDataset(); + assertEquals(testDataset.getSchema(), dataset.getSchema()); + dataset.dropColumns(Collections.singletonList("name")); + + Schema changedSchema = + new Schema( + Collections.singletonList(Field.nullable("id", new ArrowType.Int(32, true))), null); + + assertEquals(changedSchema.getFields().size(), dataset.getSchema().getFields().size()); + assertEquals( + changedSchema.getFields().stream().map(Field::getName).collect(Collectors.toList()), + dataset.getSchema().getFields().stream() + .map(Field::getName) + .collect(Collectors.toList())); + } + } + @Test void testDropPath() { String testMethodName = new Object() {}.getClass().getEnclosingMethod().getName();