Skip to content

Commit

Permalink
Expand testing to make sure we return nice error messages for the uns…
Browse files Browse the repository at this point in the history
…upported operations
  • Loading branch information
westonpace committed Nov 5, 2024
1 parent 5fe765f commit 74a6ba3
Show file tree
Hide file tree
Showing 5 changed files with 211 additions and 134 deletions.
74 changes: 74 additions & 0 deletions python/python/tests/test_balanced.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,80 @@ def test_compaction(tmp_path, big_val):
)


def test_schema(balanced_dataset):
# Schema should contain blob columns
assert balanced_dataset.schema == pa.schema(
[
pa.field(
"blobs",
pa.large_binary(),
metadata={
"lance-schema:storage-class": "blob",
},
),
pa.field("idx", pa.uint64()),
]
)


def test_sample(balanced_dataset):
assert balanced_dataset.sample(10, columns=["idx"]).num_rows == 10
# Not the most obvious error but hopefully not long lived
with pytest.raises(
OSError, match="Not supported.*mapping from row addresses to row ids"
):
assert balanced_dataset.sample(10).num_rows == 10
with pytest.raises(
OSError, match="Not supported.*mapping from row addresses to row ids"
):
assert balanced_dataset.sample(10, columns=["blobs"]).num_rows == 10


def test_add_columns(tmp_path, balanced_dataset):
# Adding columns should be fine as long as we don't try to use the blob
# column in any way

balanced_dataset.add_columns(
{
"idx2": "idx * 2",
}
)

assert balanced_dataset.to_table() == pa.table(
{
"idx": pa.array(range(128), pa.uint64()),
"idx2": pa.array(range(0, 256, 2), pa.uint64()),
}
)

with pytest.raises(
OSError, match="Not supported.*adding columns.*scanning non-default storage"
):
balanced_dataset.add_columns({"blobs2": "blobs"})


def test_unsupported(balanced_dataset, big_val):
# The following operations are not yet supported and we need to make
# sure they fail with a useful error message

# Updates & merge-insert are not supported. They add new rows and we
# will need to make sure the sibling datasets are kept in sync.

with pytest.raises(
ValueError, match="Not supported.*Updating.*non-default storage"
):
balanced_dataset.update({"idx": "0"})

with pytest.raises(
# This error could be nicer but it's fine for now
OSError,
match="Not supported.*Scanning.*non-default storage",
):
balanced_dataset.merge_insert("idx").when_not_matched_insert_all().execute(
make_table(0, 1, big_val)
)


# TODO: Once https://github.com/lancedb/lance/pull/3041 merges we will
# want to test partial appends. We need to make sure an append of
# non-blob data is supported. In order to do this we need to make
Expand Down
11 changes: 1 addition & 10 deletions rust/lance/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1199,12 +1199,7 @@ impl Dataset {
row_indices: &[u64],
projection: impl Into<ProjectionRequest>,
) -> Result<RecordBatch> {
take::take(
self,
row_indices,
&projection.into().into_projection_plan(self.schema())?,
)
.await
take::take(self, row_indices, projection.into()).await
}

/// Take Rows by the internal ROW ids.
Expand Down Expand Up @@ -1605,10 +1600,6 @@ impl Dataset {
.collect())
}

// Leaving this here so it is more obvious to future readers that we can do this and
// someone doesn't go off and create a new function to do this. Delete this comment
// if you use this method.
#[allow(unused)]
pub(crate) async fn filter_deleted_addresses(&self, addrs: &[u64]) -> Result<Vec<u64>> {
self.filter_addr_or_ids(addrs, addrs).await
}
Expand Down
8 changes: 8 additions & 0 deletions rust/lance/src/dataset/fragment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1186,6 +1186,14 @@ impl FileFragment {
}
schema = schema.project(&projection)?;
}

if schema.fields.iter().any(|f| !f.is_default_storage()) {
return Err(Error::NotSupported {
source: "adding columns whose value depends on scanning non-default storage".into(),
location: location!(),
});
}

// If there is no projection, we at least need to read the row addresses
with_row_addr |= schema.fields.is_empty();

Expand Down
Loading

0 comments on commit 74a6ba3

Please sign in to comment.