Skip to content

Commit

Permalink
perf: don't concat the batches when merging partitions
Browse files Browse the repository at this point in the history
Signed-off-by: BubbleCal <[email protected]>
  • Loading branch information
BubbleCal committed Jan 23, 2025
1 parent aae351b commit d1b394b
Showing 1 changed file with 11 additions and 7 deletions.
18 changes: 11 additions & 7 deletions rust/lance/src/index/vector/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -665,16 +665,20 @@ impl<S: IvfSubIndex + 'static, Q: Quantization + 'static> IvfIndexBuilder<S, Q>
)?
.try_collect::<Vec<_>>()
.await?;
let batch = arrow::compute::concat_batches(&batches[0].schema(), batches.iter())?;
let num_rows = batches.iter().map(|b| b.num_rows()).sum::<usize>();
if storage_writer.is_none() {
storage_writer = Some(FileWriter::try_new(
self.store.create(&storage_path).await?,
batch.schema_ref().as_ref().try_into()?,
batches[0].schema_ref().as_ref().try_into()?,
Default::default(),
)?);
}
storage_writer.as_mut().unwrap().write_batch(&batch).await?;
storage_ivf.add_partition(batch.num_rows() as u32);
storage_writer
.as_mut()
.unwrap()
.write_batches(batches.iter())
.await?;
storage_ivf.add_partition(num_rows as u32);
}

if index_size == 0 {
Expand All @@ -699,9 +703,9 @@ impl<S: IvfSubIndex + 'static, Q: Quantization + 'static> IvfIndexBuilder<S, Q>
)?
.try_collect::<Vec<_>>()
.await?;
let batch = arrow::compute::concat_batches(&batches[0].schema(), batches.iter())?;
index_writer.write_batch(&batch).await?;
index_ivf.add_partition(batch.num_rows() as u32);
let num_rows = batches.iter().map(|b| b.num_rows()).sum::<usize>();
index_writer.write_batches(batches.iter()).await?;
index_ivf.add_partition(num_rows as u32);
partition_index_metadata.push(
reader
.schema()
Expand Down

0 comments on commit d1b394b

Please sign in to comment.