Skip to content

Commit

Permalink
feat(java): support drop columns for dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
yanghua committed Dec 12, 2024
1 parent 7ec23f0 commit 0088d4b
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 0 deletions.
25 changes: 25 additions & 0 deletions java/core/lance-jni/src/blocking_dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -657,3 +657,28 @@ fn inner_list_indexes<'local>(

Ok(array_list)
}

//////////////////////////////
// Schema evolution Methods //
//////////////////////////////
#[no_mangle]
pub extern "system" fn Java_com_lancedb_lance_Dataset_nativeDropColumns(
mut env: JNIEnv,
java_dataset: JObject,
columns_obj: JObject, // List<String>
) {
ok_or_throw_without_return!(env, inner_drop_columns(&mut env, java_dataset, columns_obj))
}

fn inner_drop_columns(
env: &mut JNIEnv,
java_dataset: JObject,
columns_obj: JObject, // List<String>
) -> Result<()> {
let columns: Vec<String> = env.get_strings(&columns_obj)?;
let columns_slice: Vec<&str> = columns.iter().map(AsRef::as_ref).collect();
let mut dataset_guard =
unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }?;
RT.block_on(dataset_guard.inner.drop_columns(&columns_slice))?;
Ok(())
}
14 changes: 14 additions & 0 deletions java/core/src/main/java/com/lancedb/lance/Dataset.java
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,20 @@ public static native Dataset commitAppend(
*/
public static native void drop(String path, Map<String, String> storageOptions);

/**
* Drop columns from the dataset.
*
* @param columns The columns to drop
*/
public void dropColumns(List<String> columns) {
try (LockManager.WriteLock writeLock = lockManager.acquireWriteLock()) {
Preconditions.checkArgument(nativeDatasetHandle != 0, "Dataset is closed");
nativeDropColumns(columns);
}
}

private native void nativeDropColumns(List<String> columns);

/**
* Create a new Dataset Scanner.
*
Expand Down
29 changes: 29 additions & 0 deletions java/core/src/test/java/com/lancedb/lance/DatasetTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.Schema;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
Expand All @@ -21,7 +24,9 @@
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.file.Path;
import java.util.Collections;
import java.util.HashMap;
import java.util.stream.Collectors;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
Expand Down Expand Up @@ -193,6 +198,30 @@ void testGetSchemaWithClosedDataset() {
}
}

@Test
void testDropColumns() {
String testMethodName = new Object() {}.getClass().getEnclosingMethod().getName();
String datasetPath = tempDir.resolve(testMethodName).toString();
try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE)) {
TestUtils.SimpleTestDataset testDataset =
new TestUtils.SimpleTestDataset(allocator, datasetPath);
dataset = testDataset.createEmptyDataset();
assertEquals(testDataset.getSchema(), dataset.getSchema());
dataset.dropColumns(Collections.singletonList("name"));

Schema changedSchema =
new Schema(
Collections.singletonList(Field.nullable("id", new ArrowType.Int(32, true))), null);

assertEquals(changedSchema.getFields().size(), dataset.getSchema().getFields().size());
assertEquals(
changedSchema.getFields().stream().map(Field::getName).collect(Collectors.toList()),
dataset.getSchema().getFields().stream()
.map(Field::getName)
.collect(Collectors.toList()));
}
}

@Test
void testDropPath() {
String testMethodName = new Object() {}.getClass().getEnclosingMethod().getName();
Expand Down

0 comments on commit 0088d4b

Please sign in to comment.