From d783f4555d38f258dc208bc8550238b79d8d0fd2 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Tue, 17 Dec 2024 15:21:21 +0800 Subject: [PATCH] fix: list indices shows vector index type is always IVF_PQ Signed-off-by: BubbleCal --- python/python/tests/test_vector_index.py | 31 ++++++++++++++++++++++++ python/src/dataset.rs | 22 ++++++----------- 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/python/python/tests/test_vector_index.py b/python/python/tests/test_vector_index.py index df9e461286..43f890ad27 100644 --- a/python/python/tests/test_vector_index.py +++ b/python/python/tests/test_vector_index.py @@ -373,6 +373,37 @@ def test_has_index(dataset, tmp_path): assert ann_ds.list_indices()[0]["fields"] == ["vector"] +def test_index_type(dataset, tmp_path): + ann_ds = lance.write_dataset(dataset.to_table(), tmp_path / "indexed.lance") + + ann_ds = ann_ds.create_index( + "vector", + index_type="IVF_PQ", + num_partitions=4, + num_sub_vectors=16, + replace=True, + ) + assert ann_ds.list_indices()[0]["type"] == "IVF_PQ" + + ann_ds = ann_ds.create_index( + "vector", + index_type="IVF_HNSW_SQ", + num_partitions=4, + num_sub_vectors=16, + replace=True, + ) + assert ann_ds.list_indices()[0]["type"] == "IVF_HNSW_SQ" + + ann_ds = ann_ds.create_index( + "vector", + index_type="IVF_HNSW_PQ", + num_partitions=4, + num_sub_vectors=16, + replace=True, + ) + assert ann_ds.list_indices()[0]["type"] == "IVF_HNSW_PQ" + + def test_create_dot_index(dataset, tmp_path): assert not dataset.has_index ann_ds = lance.write_dataset(dataset.to_table(), tmp_path / "indexed.lance") diff --git a/python/src/dataset.rs b/python/src/dataset.rs index f55c0646ba..b274a7dc39 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -568,23 +568,15 @@ impl Dataset { let idx_schema = schema.project_by_ids(idx.fields.as_slice(), true); - let is_vector = idx_schema - .fields - .iter() - .any(|f| matches!(f.data_type(), DataType::FixedSizeList(_, _))); - - let idx_type = if is_vector { - IndexType::Vector - } else { - let ds = self_.ds.clone(); - RT.block_on(Some(self_.py()), async { - let scalar_idx = ds - .open_scalar_index(&idx_schema.fields[0].name, &idx.uuid.to_string()) + let ds = self_.ds.clone(); + let idx_type = RT + .block_on(Some(self_.py()), async { + let idx = ds + .open_generic_index(&idx_schema.fields[0].name, &idx.uuid.to_string()) .await?; - Ok::<_, lance::Error>(scalar_idx.index_type()) + Ok::<_, lance::Error>(idx.index_type()) })? - .map_err(|e| PyIOError::new_err(e.to_string()))? - }; + .map_err(|e| PyIOError::new_err(e.to_string()))?; let field_names = idx_schema .fields