diff --git a/Cargo.toml b/Cargo.toml index 59673e3a22..5103573897 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,7 +79,6 @@ half = { "version" = "2.4.1", default-features = false, features = [ "num-traits", "std", ] } -hex = "0.4" bitvec = "1" bytes = "1.4" byteorder = "1.5" diff --git a/rust/lance-index/Cargo.toml b/rust/lance-index/Cargo.toml index d21d9691af..2f88fa684f 100644 --- a/rust/lance-index/Cargo.toml +++ b/rust/lance-index/Cargo.toml @@ -28,7 +28,6 @@ datafusion.workspace = true deepsize.workspace = true futures.workspace = true half.workspace = true -hex.workspace = true itertools.workspace = true lance-arrow.workspace = true lance-core.workspace = true diff --git a/rust/lance-index/src/vector/storage.rs b/rust/lance-index/src/vector/storage.rs index 8c40844cc9..a515f5473d 100644 --- a/rust/lance-index/src/vector/storage.rs +++ b/rust/lance-index/src/vector/storage.rs @@ -172,16 +172,20 @@ impl IvfQuantizationStorage { .as_str(), )?; - let ivf_pb_bytes = - hex::decode(schema.metadata.get(IVF_METADATA_KEY).ok_or(Error::Index { + let ivf_pos = schema + .metadata + .get(IVF_METADATA_KEY) + .ok_or(Error::Index { message: format!("{} not found", IVF_METADATA_KEY), location: location!(), - })?) + })? + .parse() .map_err(|e| Error::Index { message: format!("Failed to decode IVF metadata: {}", e), location: location!(), })?; - let ivf = IvfData::try_from(pb::Ivf::decode(ivf_pb_bytes.as_ref())?)?; + let ivf_bytes = reader.read_global_buffer(ivf_pos).await?; + let ivf = IvfData::try_from(pb::Ivf::decode(ivf_bytes)?)?; let quantizer_metadata: Q::Metadata = serde_json::from_str( schema diff --git a/rust/lance/Cargo.toml b/rust/lance/Cargo.toml index daa5ab44ec..f5f78a0b2d 100644 --- a/rust/lance/Cargo.toml +++ b/rust/lance/Cargo.toml @@ -44,7 +44,6 @@ dashmap = "5" deepsize.workspace = true # matches arrow-rs use half.workspace = true -hex.workspace = true itertools.workspace = true object_store = { workspace = true, features = ["aws", "gcp", "azure"] } aws-credential-types.workspace = true diff --git a/rust/lance/src/index/vector/builder.rs b/rust/lance/src/index/vector/builder.rs index 248b472cbc..8085342038 100644 --- a/rust/lance/src/index/vector/builder.rs +++ b/rust/lance/src/index/vector/builder.rs @@ -303,10 +303,10 @@ impl IvfIndexBuilder { let mut storage_writer = storage_writer.unwrap(); let storage_ivf_pb = pb::Ivf::try_from(&storage_ivf)?; storage_writer.add_schema_metadata(DISTANCE_TYPE_KEY, self.distance_type.to_string()); - storage_writer.add_schema_metadata( - IVF_METADATA_KEY, - hex::encode(storage_ivf_pb.encode_to_vec()), - ); + let ivf_buffer_pos = storage_writer + .add_global_buffer(storage_ivf_pb.encode_to_vec().into()) + .await?; + storage_writer.add_schema_metadata(IVF_METADATA_KEY, ivf_buffer_pos.to_string()); storage_writer.add_schema_metadata( Q::metadata_key(), self.quantizer.metadata(None)?.to_string(), @@ -314,8 +314,10 @@ impl IvfIndexBuilder { let index_ivf_pb = pb::Ivf::try_from(&index_ivf)?; index_writer.add_schema_metadata(DISTANCE_TYPE_KEY, self.distance_type.to_string()); - index_writer - .add_schema_metadata(IVF_METADATA_KEY, hex::encode(index_ivf_pb.encode_to_vec())); + let ivf_buffer_pos = index_writer + .add_global_buffer(index_ivf_pb.encode_to_vec().into()) + .await?; + index_writer.add_schema_metadata(IVF_METADATA_KEY, ivf_buffer_pos.to_string()); storage_writer.finish().await?; index_writer.finish().await?; diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs index 63a01d32b9..3b5ef10bb3 100644 --- a/rust/lance/src/index/vector/ivf/v2.rs +++ b/rust/lance/src/index/vector/ivf/v2.rs @@ -107,18 +107,21 @@ impl IVFIndex { .as_str(), )?; - let ivf_pb_bytes = - hex::decode(index_reader.schema().metadata.get(IVF_METADATA_KEY).ok_or( - Error::Index { - message: format!("{} not found", IVF_METADATA_KEY), - location: location!(), - }, - )?) + let ivf_pos = index_reader + .schema() + .metadata + .get(IVF_METADATA_KEY) + .ok_or(Error::Index { + message: format!("{} not found", IVF_METADATA_KEY), + location: location!(), + })? + .parse() .map_err(|e| Error::Index { - message: format!("Failed to decode IVF metadata: {}", e), + message: format!("Failed to decode IVF position: {}", e), location: location!(), })?; - let ivf = Ivf::try_from(&pb::Ivf::decode(ivf_pb_bytes.as_ref())?)?; + let ivf_pb_bytes = index_reader.read_global_buffer(ivf_pos).await?; + let ivf = Ivf::try_from(&pb::Ivf::decode(ivf_pb_bytes)?)?; let storage_reader = FileReader::try_open( scheduler