diff --git a/python/DEVELOPMENT.md b/python/DEVELOPMENT.md index 5b44144d52..9b86fba08f 100644 --- a/python/DEVELOPMENT.md +++ b/python/DEVELOPMENT.md @@ -110,6 +110,10 @@ Note the parameter `--benchmark-min-time`: this controls how many seconds to run the benchmark in each round (default 5 rounds). The default is very low but you can increase this so that the profile gets more samples. +You can drop the `--no-inline` to have the program try to identify which functions +were inlined to get more detail, though this will make the processing take +considerably longer. + This will only work on Linux. Note that you'll want to run the benchmarks once prior to profiling, so that diff --git a/python/python/benchmarks/test_search.py b/python/python/benchmarks/test_search.py index b2e6abbb05..b5fda299d3 100644 --- a/python/python/benchmarks/test_search.py +++ b/python/python/benchmarks/test_search.py @@ -46,12 +46,13 @@ def test_dataset(data_dir: Path) -> lance.LanceDataset: @pytest.mark.benchmark(group="query_ann") def test_knn_search(test_dataset, benchmark): - q = pa.FixedSizeListArray.from_arrays(pc.random(N_DIMS).cast(pa.float32()), N_DIMS) + q = pc.random(N_DIMS).cast(pa.float32()) result = benchmark( test_dataset.to_table, nearest=dict( + column="vector", q=q, - k=10, + k=100, nprobes=10, use_index=False, ), @@ -61,12 +62,13 @@ def test_knn_search(test_dataset, benchmark): @pytest.mark.benchmark(group="query_ann") def test_flat_index_search(test_dataset, benchmark): - q = pa.FixedSizeListArray.from_arrays(pc.random(N_DIMS).cast(pa.float32()), N_DIMS) + q = pc.random(N_DIMS).cast(pa.float32()) result = benchmark( test_dataset.to_table, nearest=dict( + column="vector", q=q, - k=10, + k=100, nprobes=10, ), ) @@ -75,12 +77,13 @@ def test_flat_index_search(test_dataset, benchmark): @pytest.mark.benchmark(group="query_ann") def test_ivf_pq_index_search(test_dataset, benchmark): - q = pa.FixedSizeListArray.from_arrays(pc.random(N_DIMS).cast(pa.float32()), N_DIMS) + q = pc.random(N_DIMS).cast(pa.float32()) result = benchmark( test_dataset.to_table, nearest=dict( + column="vector", q=q, - k=10, + k=100, nprobes=10, refine_factor=2, ),