Skip to content

Commit

Permalink
fix: fix a panic that could happen when scanning only the row id from…
Browse files Browse the repository at this point in the history
… fragment with deleted rows (#2302)
  • Loading branch information
westonpace authored May 6, 2024
1 parent 85a6656 commit 7c8333f
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 2 deletions.
8 changes: 8 additions & 0 deletions python/python/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1333,6 +1333,14 @@ def test_scan_no_columns(tmp_path: Path):
with pytest.raises(ValueError, match="no columns were selected"):
dataset.scanner(columns=[]).to_table()

# also test with deleted data to make sure deleted ids not included
dataset.delete("a = 5")
num_rows = 0
for batch in dataset.scanner(columns=[], with_row_id=True).to_batches():
num_rows += batch.num_rows

assert num_rows == 99


def test_scan_prefilter(tmp_path: Path):
base_dir = tmp_path / "dataset"
Expand Down
5 changes: 3 additions & 2 deletions rust/lance/src/dataset/fragment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1328,12 +1328,13 @@ impl FragmentReader {
u64::from(RowAddress::new_from_parts(self.fragment_id as u32, *row_id))
})
.collect();
let num_intact_rows = row_ids.len() as u32;
let row_ids_array = UInt64Array::from(row_ids);
let row_id_schema = Arc::new(self.output_schema.clone());
let tasks = (0..total_num_rows)
let tasks = (0..num_intact_rows)
.step_by(batch_size as usize)
.map(move |offset| {
let length = batch_size.min(total_num_rows - offset);
let length = batch_size.min(num_intact_rows - offset);
let array = Arc::new(row_ids_array.slice(offset as usize, length as usize));
let batch = RecordBatch::try_new(row_id_schema.clone(), vec![array]);
std::future::ready(batch.map_err(Error::from)).boxed()
Expand Down

0 comments on commit 7c8333f

Please sign in to comment.