Skip to content

Commit

Permalink
revert back to hashmap
Browse files Browse the repository at this point in the history
  • Loading branch information
chebbyChefNEQ committed Dec 6, 2023
1 parent 04d0e88 commit 49061bc
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 14 deletions.
16 changes: 8 additions & 8 deletions rust/lance-index/src/vector/ivf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

//! IVF - Inverted File Index
use std::collections::HashMap;
use std::ops::Range;
use std::sync::Arc;

Expand All @@ -38,7 +39,6 @@ use lance_linalg::{
MatrixView,
};
use log::{debug, info};
use nohash_hasher::IntMap;
use snafu::{location, Location};
use tracing::{instrument, Instrument};

Expand All @@ -58,7 +58,7 @@ fn new_ivf_impl<T: ArrowFloatType + Dot + Cosine + L2 + 'static>(
metric_type: MetricType,
transforms: Vec<Arc<dyn Transformer>>,
range: Option<Range<u32>>,
precomputed_partitions: Option<IntMap<u64, u32>>,
precomputed_partitions: Option<HashMap<u64, u32>>,
) -> Arc<dyn Ivf> {
let mat = MatrixView::<T>::new(Arc::new(centroids.clone()), dimension);
Arc::new(IvfImpl::<T>::new(
Expand All @@ -85,7 +85,7 @@ pub fn new_ivf(
metric_type: MetricType,
transforms: Vec<Arc<dyn Transformer>>,
range: Option<Range<u32>>,
precomputed_partitions: Option<IntMap<u64, u32>>,
precomputed_partitions: Option<HashMap<u64, u32>>,
) -> Result<Arc<dyn Ivf>> {
match centroids.data_type() {
DataType::Float16 => Ok(new_ivf_impl::<Float16Type>(
Expand Down Expand Up @@ -129,7 +129,7 @@ fn new_ivf_with_pq_impl<T: ArrowFloatType + Dot + Cosine + L2 + 'static>(
vector_column: &str,
pq: Arc<dyn ProductQuantizer>,
range: Option<Range<u32>>,
precomputed_partitions: Option<IntMap<u64, u32>>,
precomputed_partitions: Option<HashMap<u64, u32>>,
) -> Arc<dyn Ivf> {
let mat = MatrixView::<T>::new(Arc::new(centroids.clone()), dimension);
Arc::new(IvfImpl::<T>::new_with_pq(
Expand All @@ -149,7 +149,7 @@ pub fn new_ivf_with_pq(
vector_column: &str,
pq: Arc<dyn ProductQuantizer>,
range: Option<Range<u32>>,
precomputed_partitions: Option<IntMap<u64, u32>>,
precomputed_partitions: Option<HashMap<u64, u32>>,
) -> Result<Arc<dyn Ivf>> {
match centroids.data_type() {
DataType::Float16 => Ok(new_ivf_with_pq_impl::<Float16Type>(
Expand Down Expand Up @@ -255,7 +255,7 @@ pub struct IvfImpl<T: ArrowFloatType + Dot + L2 + Cosine> {
/// Only covers a range of partitions.
partition_range: Option<Range<u32>>,

precomputed_partitions: Option<IntMap<u64, u32>>,
precomputed_partitions: Option<HashMap<u64, u32>>,
}

impl<T: ArrowFloatType + Dot + L2 + Cosine + 'static> IvfImpl<T> {
Expand All @@ -264,7 +264,7 @@ impl<T: ArrowFloatType + Dot + L2 + Cosine + 'static> IvfImpl<T> {
metric_type: MetricType,
transforms: Vec<Arc<dyn Transformer>>,
range: Option<Range<u32>>,
precomputed_partitions: Option<IntMap<u64, u32>>,
precomputed_partitions: Option<HashMap<u64, u32>>,
) -> Self {
Self {
centroids,
Expand All @@ -281,7 +281,7 @@ impl<T: ArrowFloatType + Dot + L2 + Cosine + 'static> IvfImpl<T> {
vector_column: &str,
pq: Arc<dyn ProductQuantizer>,
range: Option<Range<u32>>,
precomputed_partitions: Option<IntMap<u64, u32>>,
precomputed_partitions: Option<HashMap<u64, u32>>,
) -> Self {
Self {
centroids: centroids.clone(),
Expand Down
7 changes: 3 additions & 4 deletions rust/lance/src/index/vector/ivf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ use lance_index::{
};
use lance_linalg::distance::{Cosine, Dot, MetricType, L2};
use log::{debug, info};
use nohash_hasher::{BuildNoHashHasher, IntMap};
use nohash_hasher::IntMap;
use rand::{rngs::SmallRng, SeedableRng};
use roaring::RoaringBitmap;
use serde::Serialize;
Expand Down Expand Up @@ -816,8 +816,7 @@ pub async fn build_ivf_pq_index(
)
.await?;

let mut partition_lookup =
HashMap::with_capacity_and_hasher(reader.len(), BuildNoHashHasher::default());
let mut partition_lookup = HashMap::with_capacity(reader.len());

for i in 0..reader.num_batches() {
let batch = reader.read_batch(i as i32, RangeFull, &schema).await?;
Expand Down Expand Up @@ -1000,7 +999,7 @@ async fn write_index_file(
pq: Arc<dyn ProductQuantizer>,
metric_type: MetricType,
stream: impl RecordBatchStream + Unpin + 'static,
precomputed_partitons: Option<IntMap<u64, u32>>,
precomputed_partitons: Option<HashMap<u64, u32>>,
) -> Result<()> {
let object_store = dataset.object_store();
let path = dataset.indices_dir().child(uuid).child(INDEX_FILE_NAME);
Expand Down
4 changes: 2 additions & 2 deletions rust/lance/src/index/vector/ivf/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

use std::ops::Range;
use std::collections::HashMap;
use std::sync::Arc;

use arrow_schema::{DataType, Field, Schema};
Expand All @@ -30,7 +31,6 @@ use lance_index::vector::pq::ProductQuantizer;
use lance_index::vector::{PART_ID_COLUMN, PQ_CODE_COLUMN};
use lance_linalg::distance::MetricType;
use log::info;
use nohash_hasher::IntMap;
use snafu::{location, Location};
use tracing::instrument;

Expand Down Expand Up @@ -121,7 +121,7 @@ pub(super) async fn build_partitions(
pq: Arc<dyn ProductQuantizer>,
metric_type: MetricType,
part_range: Range<u32>,
precomputed_partitons: Option<IntMap<u64, u32>>,
precomputed_partitons: Option<HashMap<u64, u32>>,
) -> Result<()> {
let schema = data.schema();
if schema.column_with_name(column).is_none() {
Expand Down

0 comments on commit 49061bc

Please sign in to comment.