Skip to content

Commit

Permalink
feat: store IVF in global buffer (lancedb#2449)
Browse files Browse the repository at this point in the history
  • Loading branch information
BubbleCal authored Jun 11, 2024
1 parent 0a1944f commit 40ba433
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 22 deletions.
1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ half = { "version" = "2.4.1", default-features = false, features = [
"num-traits",
"std",
] }
hex = "0.4"
bitvec = "1"
bytes = "1.4"
byteorder = "1.5"
Expand Down
1 change: 0 additions & 1 deletion rust/lance-index/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ datafusion.workspace = true
deepsize.workspace = true
futures.workspace = true
half.workspace = true
hex.workspace = true
itertools.workspace = true
lance-arrow.workspace = true
lance-core.workspace = true
Expand Down
12 changes: 8 additions & 4 deletions rust/lance-index/src/vector/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,16 +172,20 @@ impl<Q: Quantization> IvfQuantizationStorage<Q> {
.as_str(),
)?;

let ivf_pb_bytes =
hex::decode(schema.metadata.get(IVF_METADATA_KEY).ok_or(Error::Index {
let ivf_pos = schema
.metadata
.get(IVF_METADATA_KEY)
.ok_or(Error::Index {
message: format!("{} not found", IVF_METADATA_KEY),
location: location!(),
})?)
})?
.parse()
.map_err(|e| Error::Index {
message: format!("Failed to decode IVF metadata: {}", e),
location: location!(),
})?;
let ivf = IvfData::try_from(pb::Ivf::decode(ivf_pb_bytes.as_ref())?)?;
let ivf_bytes = reader.read_global_buffer(ivf_pos).await?;
let ivf = IvfData::try_from(pb::Ivf::decode(ivf_bytes)?)?;

let quantizer_metadata: Q::Metadata = serde_json::from_str(
schema
Expand Down
1 change: 0 additions & 1 deletion rust/lance/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ dashmap = "5"
deepsize.workspace = true
# matches arrow-rs use
half.workspace = true
hex.workspace = true
itertools.workspace = true
object_store = { workspace = true, features = ["aws", "gcp", "azure"] }
aws-credential-types.workspace = true
Expand Down
14 changes: 8 additions & 6 deletions rust/lance/src/index/vector/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,19 +303,21 @@ impl<S: IvfSubIndex, Q: Quantization + Clone> IvfIndexBuilder<S, Q> {
let mut storage_writer = storage_writer.unwrap();
let storage_ivf_pb = pb::Ivf::try_from(&storage_ivf)?;
storage_writer.add_schema_metadata(DISTANCE_TYPE_KEY, self.distance_type.to_string());
storage_writer.add_schema_metadata(
IVF_METADATA_KEY,
hex::encode(storage_ivf_pb.encode_to_vec()),
);
let ivf_buffer_pos = storage_writer
.add_global_buffer(storage_ivf_pb.encode_to_vec().into())
.await?;
storage_writer.add_schema_metadata(IVF_METADATA_KEY, ivf_buffer_pos.to_string());
storage_writer.add_schema_metadata(
Q::metadata_key(),
self.quantizer.metadata(None)?.to_string(),
);

let index_ivf_pb = pb::Ivf::try_from(&index_ivf)?;
index_writer.add_schema_metadata(DISTANCE_TYPE_KEY, self.distance_type.to_string());
index_writer
.add_schema_metadata(IVF_METADATA_KEY, hex::encode(index_ivf_pb.encode_to_vec()));
let ivf_buffer_pos = index_writer
.add_global_buffer(index_ivf_pb.encode_to_vec().into())
.await?;
index_writer.add_schema_metadata(IVF_METADATA_KEY, ivf_buffer_pos.to_string());

storage_writer.finish().await?;
index_writer.finish().await?;
Expand Down
21 changes: 12 additions & 9 deletions rust/lance/src/index/vector/ivf/v2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,18 +107,21 @@ impl<I: IvfSubIndex + 'static, Q: Quantization> IVFIndex<I, Q> {
.as_str(),
)?;

let ivf_pb_bytes =
hex::decode(index_reader.schema().metadata.get(IVF_METADATA_KEY).ok_or(
Error::Index {
message: format!("{} not found", IVF_METADATA_KEY),
location: location!(),
},
)?)
let ivf_pos = index_reader
.schema()
.metadata
.get(IVF_METADATA_KEY)
.ok_or(Error::Index {
message: format!("{} not found", IVF_METADATA_KEY),
location: location!(),
})?
.parse()
.map_err(|e| Error::Index {
message: format!("Failed to decode IVF metadata: {}", e),
message: format!("Failed to decode IVF position: {}", e),
location: location!(),
})?;
let ivf = Ivf::try_from(&pb::Ivf::decode(ivf_pb_bytes.as_ref())?)?;
let ivf_pb_bytes = index_reader.read_global_buffer(ivf_pos).await?;
let ivf = Ivf::try_from(&pb::Ivf::decode(ivf_pb_bytes)?)?;

let storage_reader = FileReader::try_open(
scheduler
Expand Down

0 comments on commit 40ba433

Please sign in to comment.