From c5c4dcf0fda9c479c7f8826daaab912cce6f60ba Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Fri, 6 Dec 2024 14:13:39 +0800 Subject: [PATCH] docs: add doc and test for 4bit PQ Signed-off-by: BubbleCal --- python/python/lance/dataset.py | 13 ++++++++++--- python/python/tests/test_vector_index.py | 15 +++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index d19df694d1..a3f6199a2b 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -216,9 +216,13 @@ def __getstate__(self): ) def __setstate__(self, state): - self._uri, self._storage_options, version, manifest, default_scan_options = ( - state - ) + ( + self._uri, + self._storage_options, + version, + manifest, + default_scan_options, + ) = state self._ds = _Dataset( self._uri, version, @@ -1663,6 +1667,9 @@ def create_index( Optional parameters for "IVF_PQ": ivf_centroids : K-mean centroids for IVF clustering. + num_bits : int, optional + The number of bits for PQ (Product Quantization). Default is 8. + Only 4, 8 are supported. Optional parameters for "IVF_HNSW_*": max_level : int diff --git a/python/python/tests/test_vector_index.py b/python/python/tests/test_vector_index.py index 7a85586379..df9e461286 100644 --- a/python/python/tests/test_vector_index.py +++ b/python/python/tests/test_vector_index.py @@ -386,6 +386,21 @@ def test_create_dot_index(dataset, tmp_path): assert ann_ds.has_index +def test_create_4bit_ivf_pq_index(dataset, tmp_path): + assert not dataset.has_index + ann_ds = lance.write_dataset(dataset.to_table(), tmp_path / "indexed.lance") + ann_ds = ann_ds.create_index( + "vector", + index_type="IVF_PQ", + num_partitions=1, + num_sub_vectors=16, + num_bits=4, + metric="l2", + ) + index = ann_ds.stats.index_stats("vector_idx") + assert index["indices"][0]["sub_index"]["nbits"] == 4 + + def test_create_ivf_hnsw_pq_index(dataset, tmp_path): assert not dataset.has_index ann_ds = lance.write_dataset(dataset.to_table(), tmp_path / "indexed.lance")