diff --git a/contrib/evaluation.py b/contrib/evaluation.py index f78e33951d..9c762b6081 100644 --- a/contrib/evaluation.py +++ b/contrib/evaluation.py @@ -226,7 +226,7 @@ def compute_PR_for(q): # Functions that compare search results with a reference result. # They are intended for use in tests -def test_ref_knn_with_draws(Dref, Iref, Dnew, Inew): +def check_ref_knn_with_draws(Dref, Iref, Dnew, Inew): """ test that knn search results are identical, raise if not """ np.testing.assert_array_almost_equal(Dref, Dnew, decimal=5) # here we have to be careful because of draws @@ -243,14 +243,14 @@ def test_ref_knn_with_draws(Dref, Iref, Dnew, Inew): testcase.assertEqual(set(Iref[i, mask]), set(Inew[i, mask])) -def test_ref_range_results(lims_ref, Dref, Iref, - lims_new, Dnew, Inew): +def check_ref_range_results(Lref, Dref, Iref, + Lnew, Dnew, Inew): """ compare range search results wrt. a reference result, throw if it fails """ - np.testing.assert_array_equal(lims_ref, lims_new) - nq = len(lims_ref) - 1 + np.testing.assert_array_equal(Lref, Lnew) + nq = len(Lref) - 1 for i in range(nq): - l0, l1 = lims_ref[i], lims_ref[i + 1] + l0, l1 = Lref[i], Lref[i + 1] Ii_ref = Iref[l0:l1] Ii_new = Inew[l0:l1] Di_ref = Dref[l0:l1] diff --git a/contrib/exhaustive_search.py b/contrib/exhaustive_search.py index fa7f1b11c4..19b452a25d 100644 --- a/contrib/exhaustive_search.py +++ b/contrib/exhaustive_search.py @@ -11,7 +11,6 @@ LOG = logging.getLogger(__name__) - def knn_ground_truth(xq, db_iterator, k, metric_type=faiss.METRIC_L2): """Computes the exact KNN search results for a dataset that possibly does not fit in RAM but for which we have an iterator that @@ -51,47 +50,82 @@ def knn_ground_truth(xq, db_iterator, k, metric_type=faiss.METRIC_L2): -def range_search_gpu(xq, r2, index_gpu, index_cpu): +def range_search_gpu(xq, r2, index_gpu, index_cpu, gpu_k=1024): """GPU does not support range search, so we emulate it with knn search + fallback to CPU index. - The index_cpu can either be a CPU index or a numpy table that will - be used to construct a Flat index if needed. + The index_cpu can either be: + - a CPU index that supports range search + - a numpy table, that will be used to construct a Flat index if needed. + - None. In that case, at most gpu_k results will be returned """ nq, d = xq.shape - LOG.debug("GPU search %d queries" % nq) - k = min(index_gpu.ntotal, 1024) + k = min(index_gpu.ntotal, gpu_k) + keep_max = faiss.is_similarity_metric(index_gpu.metric_type) + LOG.debug(f"GPU search {nq} queries with {k=:}") + t0 = time.time() D, I = index_gpu.search(xq, k) - if index_gpu.metric_type == faiss.METRIC_L2: - mask = D[:, k - 1] < r2 - else: - mask = D[:, k - 1] > r2 - if mask.sum() > 0: - LOG.debug("CPU search remain %d" % mask.sum()) - if isinstance(index_cpu, np.ndarray): - # then it in fact an array that we have to make flat - xb = index_cpu - index_cpu = faiss.IndexFlat(d, index_gpu.metric_type) - index_cpu.add(xb) - lim_remain, D_remain, I_remain = index_cpu.range_search(xq[mask], r2) + t1 = time.time() - t0 + t2 = 0 + lim_remain = None + if index_cpu is not None: + if not keep_max: + mask = D[:, k - 1] < r2 + else: + mask = D[:, k - 1] > r2 + if mask.sum() > 0: + LOG.debug("CPU search remain %d" % mask.sum()) + t0 = time.time() + if isinstance(index_cpu, np.ndarray): + # then it in fact an array that we have to make flat + xb = index_cpu + index_cpu = faiss.IndexFlat(d, index_gpu.metric_type) + index_cpu.add(xb) + lim_remain, D_remain, I_remain = index_cpu.range_search(xq[mask], r2) + t2 = time.time() - t0 LOG.debug("combine") - D_res, I_res = [], [] - nr = 0 - for i in range(nq): - if not mask[i]: - if index_gpu.metric_type == faiss.METRIC_L2: - nv = (D[i, :] < r2).sum() + t0 = time.time() + + combiner = faiss.CombinerRangeKNN(nq, k, float(r2), keep_max) + if True: + sp = faiss.swig_ptr + combiner.I = sp(I) + combiner.D = sp(D) + # combiner.set_knn_result(sp(I), sp(D)) + if lim_remain is not None: + combiner.mask = sp(mask) + combiner.D_remain = sp(D_remain) + combiner.lim_remain = sp(lim_remain.view("int64")) + combiner.I_remain = sp(I_remain) + # combiner.set_range_result(sp(mask), sp(lim_remain.view("int64")), sp(D_remain), sp(I_remain)) + L_res = np.empty(nq + 1, dtype='int64') + combiner.compute_sizes(sp(L_res)) + nres = L_res[-1] + D_res = np.empty(nres, dtype='float32') + I_res = np.empty(nres, dtype='int64') + combiner.write_result(sp(D_res), sp(I_res)) + else: + D_res, I_res = [], [] + nr = 0 + for i in range(nq): + if not mask[i]: + if index_gpu.metric_type == faiss.METRIC_L2: + nv = (D[i, :] < r2).sum() + else: + nv = (D[i, :] > r2).sum() + D_res.append(D[i, :nv]) + I_res.append(I[i, :nv]) else: - nv = (D[i, :] > r2).sum() - D_res.append(D[i, :nv]) - I_res.append(I[i, :nv]) - else: - l0, l1 = lim_remain[nr], lim_remain[nr + 1] - D_res.append(D_remain[l0:l1]) - I_res.append(I_remain[l0:l1]) - nr += 1 - lims = np.cumsum([0] + [len(di) for di in D_res]) - return lims, np.hstack(D_res), np.hstack(I_res) + l0, l1 = lim_remain[nr], lim_remain[nr + 1] + D_res.append(D_remain[l0:l1]) + I_res.append(I_remain[l0:l1]) + nr += 1 + L_res = np.cumsum([0] + [len(di) for di in D_res]) + D_res = np.hstack(D_res) + I_res = np.hstack(I_res) + t3 = time.time() - t0 + LOG.debug(f"times {t1:.3f}s {t2:.3f}s {t3:.3f}s") + return L_res, D_res, I_res def range_ground_truth(xq, db_iterator, threshold, metric_type=faiss.METRIC_L2, diff --git a/contrib/ivf_tools.py b/contrib/ivf_tools.py index 3ea8827315..26ada886a1 100644 --- a/contrib/ivf_tools.py +++ b/contrib/ivf_tools.py @@ -77,7 +77,7 @@ def range_search_preassigned(index_ivf, x, radius, list_nos, coarse_dis=None): res = faiss.RangeSearchResult(n) sp = faiss.swig_ptr - index_ivf.range_search_preassigned( + index_ivf.range_search_preassigned_c( n, sp(x), radius, sp(list_nos), sp(coarse_dis), res diff --git a/faiss/gpu/GpuCloner.cpp b/faiss/gpu/GpuCloner.cpp index 80e1fa20cb..9109981a73 100644 --- a/faiss/gpu/GpuCloner.cpp +++ b/faiss/gpu/GpuCloner.cpp @@ -309,6 +309,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) { std::vector shards(n); +#pragma omp parallel for for (idx_t i = 0; i < n; i++) { // make a shallow copy if (reserveVecs) { diff --git a/faiss/gpu/test/test_contrib_gpu.py b/faiss/gpu/test/test_contrib_gpu.py index 061df7da5b..faeeb5d0d8 100644 --- a/faiss/gpu/test/test_contrib_gpu.py +++ b/faiss/gpu/test/test_contrib_gpu.py @@ -12,7 +12,7 @@ from faiss.contrib import datasets, evaluation, big_batch_search from faiss.contrib.exhaustive_search import knn_ground_truth, \ - range_ground_truth + range_ground_truth, range_search_gpu class TestComputeGT(unittest.TestCase): @@ -51,7 +51,7 @@ def do_test_range(self, metric): xq, ds.database_iterator(bs=100), threshold, metric_type=metric) - evaluation.test_ref_range_results( + evaluation.check_ref_range_results( ref_lims, ref_D, ref_I, new_lims, new_D, new_I ) @@ -131,3 +131,49 @@ def knn_function(xq, xb, k, metric=faiss.METRIC_L2, thread_id=None): def test_Flat(self): self.do_test("IVF64,Flat") + + +class TestRangeSearchGpu(unittest.TestCase): + + def do_test(self, factory_string): + ds = datasets.SyntheticDataset(32, 2000, 4000, 1000) + k = 10 + index_gpu = faiss.index_cpu_to_all_gpus( + faiss.index_factory(ds.d, factory_string) + ) + index_gpu.train(ds.get_train()) + index_gpu.add(ds.get_database()) + # just to find a reasonable threshold + D, _ = index_gpu.search(ds.get_queries(), k) + threshold = np.median(D[:, 5]) + + # ref run + index_cpu = faiss.index_gpu_to_cpu(index_gpu) + Lref, Dref, Iref = index_cpu.range_search(ds.get_queries(), threshold) + nres_per_query = Lref[1:] - Lref[:-1] + # make sure some entries were computed by CPU and some by GPU + assert np.any(nres_per_query > 4) and not np.all(nres_per_query > 4) + + # mixed GPU / CPU run + Lnew, Dnew, Inew = range_search_gpu( + ds.get_queries(), threshold, index_gpu, index_cpu, gpu_k=4) + evaluation.check_ref_range_results( + Lref, Dref, Iref, + Lnew, Dnew, Inew + ) + + # also test the version without CPU search + Lnew2, Dnew2, Inew2 = range_search_gpu( + ds.get_queries(), threshold, index_gpu, None, gpu_k=4) + for q in range(ds.nq): + ref = Iref[Lref[q]:Lref[q+1]] + new = Inew2[Lnew2[q]:Lnew2[q+1]] + if nres_per_query[q] <= 4: + self.assertEqual(set(ref), set(new)) + else: + ref = set(ref) + for v in new: + self.assertIn(v, ref) + + def test_ivf(self): + self.do_test("IVF64,Flat") diff --git a/faiss/python/__init__.py b/faiss/python/__init__.py index 49343043bd..c0f0f9456d 100644 --- a/faiss/python/__init__.py +++ b/faiss/python/__init__.py @@ -22,7 +22,7 @@ from faiss.extra_wrappers import kmin, kmax, pairwise_distances, rand, randint, \ lrand, randn, rand_smooth_vectors, eval_intersection, normalize_L2, \ ResultHeap, knn, Kmeans, checksum, matrix_bucket_sort_inplace, bucket_sort, \ - merge_knn_results + merge_knn_results, MapInt64ToInt64 __version__ = "%d.%d.%d" % (FAISS_VERSION_MAJOR, diff --git a/faiss/python/class_wrappers.py b/faiss/python/class_wrappers.py index 54539797f5..7d410d4afb 100644 --- a/faiss/python/class_wrappers.py +++ b/faiss/python/class_wrappers.py @@ -544,6 +544,7 @@ def replacement_range_search(self, x, thresh, *, params=None): n, d = x.shape assert d == self.d x = np.ascontiguousarray(x, dtype='float32') + thresh = float(thresh) res = RangeSearchResult(n) self.range_search_c(n, swig_ptr(x), thresh, res, params) @@ -618,6 +619,64 @@ def replacement_search_preassigned(self, x, k, Iq, Dq, *, params=None, D=None, I ) return D, I + def replacement_range_search_preassigned(self, x, thresh, Iq, Dq, *, params=None): + """Search vectors that are within a distance of the query vectors. + + Parameters + ---------- + x : array_like + Query vectors, shape (n, d) where d is appropriate for the index. + `dtype` must be float32. + thresh : float + Threshold to select neighbors. All elements within this radius are returned, + except for maximum inner product indexes, where the elements above the + threshold are returned + Iq : array_like, optional + Nearest centroids, size (n, nprobe) + Dq : array_like, optional + Distance array to the centroids, size (n, nprobe) + params : SearchParameters + Search parameters of the current search (overrides the class-level params) + + + Returns + ------- + lims: array_like + Starting index of the results for each query vector, size n+1. + D : array_like + Distances of the nearest neighbors, shape `lims[n]`. The distances for + query i are in `D[lims[i]:lims[i+1]]`. + I : array_like + Labels of nearest neighbors, shape `lims[n]`. The labels for query i + are in `I[lims[i]:lims[i+1]]`. + + """ + n, d = x.shape + assert d == self.d + x = np.ascontiguousarray(x, dtype='float32') + + Iq = np.ascontiguousarray(Iq, dtype='int64') + assert params is None, "params not supported" + assert Iq.shape == (n, self.nprobe) + + if Dq is not None: + Dq = np.ascontiguousarray(Dq, dtype='float32') + assert Dq.shape == Iq.shape + + thresh = float(thresh) + res = RangeSearchResult(n) + self.range_search_preassigned_c( + n, swig_ptr(x), thresh, + swig_ptr(Iq), swig_ptr(Dq), + res + ) + # get pointers and copy them + lims = rev_swig_ptr(res.lims, n + 1).copy() + nd = int(lims[-1]) + D = rev_swig_ptr(res.distances, nd).copy() + I = rev_swig_ptr(res.labels, nd).copy() + return lims, D, I + def replacement_sa_encode(self, x, codes=None): n, d = x.shape assert d == self.d @@ -675,8 +734,12 @@ def replacement_permute_entries(self, perm): ignore_missing=True) replace_method(the_class, 'search_and_reconstruct', replacement_search_and_reconstruct, ignore_missing=True) + + # these ones are IVF-specific replace_method(the_class, 'search_preassigned', replacement_search_preassigned, ignore_missing=True) + replace_method(the_class, 'range_search_preassigned', + replacement_range_search_preassigned, ignore_missing=True) replace_method(the_class, 'sa_encode', replacement_sa_encode) replace_method(the_class, 'sa_decode', replacement_sa_decode) replace_method(the_class, 'add_sa_codes', replacement_add_sa_codes, @@ -776,6 +839,36 @@ def replacement_range_search(self, x, thresh): I = rev_swig_ptr(res.labels, nd).copy() return lims, D, I + def replacement_range_search_preassigned(self, x, thresh, Iq, Dq, *, params=None): + n, d = x.shape + x = _check_dtype_uint8(x) + assert d * 8 == self.d + + Iq = np.ascontiguousarray(Iq, dtype='int64') + assert params is None, "params not supported" + assert Iq.shape == (n, self.nprobe) + + if Dq is not None: + Dq = np.ascontiguousarray(Dq, dtype='int32') + assert Dq.shape == Iq.shape + + thresh = int(thresh) + res = RangeSearchResult(n) + self.range_search_preassigned_c( + n, swig_ptr(x), thresh, + swig_ptr(Iq), swig_ptr(Dq), + res + ) + # get pointers and copy them + lims = rev_swig_ptr(res.lims, n + 1).copy() + nd = int(lims[-1]) + D = rev_swig_ptr(res.distances, nd).copy() + I = rev_swig_ptr(res.labels, nd).copy() + return lims, D, I + + + + def replacement_remove_ids(self, x): if isinstance(x, IDSelector): sel = x @@ -794,6 +887,8 @@ def replacement_remove_ids(self, x): replace_method(the_class, 'remove_ids', replacement_remove_ids) replace_method(the_class, 'search_preassigned', replacement_search_preassigned, ignore_missing=True) + replace_method(the_class, 'range_search_preassigned', + replacement_range_search_preassigned, ignore_missing=True) def handle_VectorTransform(the_class): @@ -937,7 +1032,7 @@ def handle_MapLong2Long(the_class): def replacement_map_add(self, keys, vals): n, = keys.shape - assert (n,) == keys.shape + assert (n,) == vals.shape self.add_c(n, swig_ptr(keys), swig_ptr(vals)) def replacement_map_search_multiple(self, keys): diff --git a/faiss/python/extra_wrappers.py b/faiss/python/extra_wrappers.py index deff3c8995..d264767724 100644 --- a/faiss/python/extra_wrappers.py +++ b/faiss/python/extra_wrappers.py @@ -297,6 +297,34 @@ def merge_knn_results(Dall, Iall, keep_max=False): ) return Dnew, Inew +###################################################### +# Efficient ID to ID map +###################################################### + +class MapInt64ToInt64: + + def __init__(self, capacity): + self.log2_capacity = int(np.log2(capacity)) + assert capacity == 2 ** self.log2_capacity, "need power of 2 capacity" + self.capacity = capacity + self.tab = np.empty((capacity, 2), dtype='int64') + faiss.hashtable_int64_to_int64_init(self.log2_capacity, swig_ptr(self.tab)) + + def add(self, keys, vals): + n, = keys.shape + assert vals.shape == (n,) + faiss.hashtable_int64_to_int64_add( + self.log2_capacity, swig_ptr(self.tab), + n, swig_ptr(keys), swig_ptr(vals)) + + def lookup(self, keys): + n, = keys.shape + vals = np.empty((n,), dtype='int64') + faiss.hashtable_int64_to_int64_lookup( + self.log2_capacity, swig_ptr(self.tab), + n, swig_ptr(keys), swig_ptr(vals)) + return vals + ###################################################### # KNN function ###################################################### diff --git a/faiss/python/swigfaiss.swig b/faiss/python/swigfaiss.swig index a57aa93536..b49dcd80e2 100644 --- a/faiss/python/swigfaiss.swig +++ b/faiss/python/swigfaiss.swig @@ -794,17 +794,141 @@ faiss::Quantizer * downcast_Quantizer (faiss::Quantizer *aq) #endif +/******************************************************************* + * How should the template objects appear in the scripting language? + *******************************************************************/ + +// answer: the same as the C++ typedefs, but we still have to redefine them + +%template() faiss::CMin; +%template() faiss::CMin; +%template() faiss::CMax; +%template() faiss::CMax; + +%template(float_minheap_array_t) faiss::HeapArray >; +%template(int_minheap_array_t) faiss::HeapArray >; +%template(float_maxheap_array_t) faiss::HeapArray >; +%template(int_maxheap_array_t) faiss::HeapArray >; + +%template(CMin_float_partition_fuzzy) + faiss::partition_fuzzy >; +%template(CMax_float_partition_fuzzy) + faiss::partition_fuzzy >; + +%template(AlignedTableUint8) faiss::AlignedTable; +%template(AlignedTableUint16) faiss::AlignedTable; +%template(AlignedTableFloat32) faiss::AlignedTable; + + +// SWIG seems to have some trouble resolving function template types here, so +// declare explicitly + +%define INSTANTIATE_uint16_partition_fuzzy(C, id_t) + +%inline %{ + +uint16_t C ## _uint16_partition_fuzzy( + uint16_t *vals, id_t *ids, size_t n, + size_t q_min, size_t q_max, size_t * q_out) +{ + return faiss::partition_fuzzy >( + vals, ids, n, q_min, q_max, q_out); +} + +%} + +%enddef + +INSTANTIATE_uint16_partition_fuzzy(CMin, int64_t) +INSTANTIATE_uint16_partition_fuzzy(CMax, int64_t) +INSTANTIATE_uint16_partition_fuzzy(CMin, int) +INSTANTIATE_uint16_partition_fuzzy(CMax, int) + +// Same for merge_knn_results + +// same define as explicit instanciation in Heap.cpp +%define INSTANTIATE_merge_knn_results(C, distance_t) + +%inline %{ +void merge_knn_results_ ## C( + size_t n, size_t k, int nshard, + const distance_t *all_distances, const faiss::idx_t *all_labels, + distance_t *distances, faiss::idx_t *labels) +{ + faiss::merge_knn_results>( + n, k, nshard, all_distances, all_labels, distances, labels); +} +%} + +%enddef + +INSTANTIATE_merge_knn_results(CMin, float); +INSTANTIATE_merge_knn_results(CMax, float); +INSTANTIATE_merge_knn_results(CMin, int32_t); +INSTANTIATE_merge_knn_results(CMax, int32_t); + + + +%inline %{ + +/******************************************************************* + * numpy misses a hash table implementation, hence this class. It + * represents not found values as -1 like in the Index implementation + *******************************************************************/ + + +struct MapLong2Long { + std::unordered_map map; + + void add(size_t n, const int64_t *keys, const int64_t *vals) { + map.reserve(map.size() + n); + for (size_t i = 0; i < n; i++) { + map[keys[i]] = vals[i]; + } + } + + int64_t search(int64_t key) const { + auto ptr = map.find(key); + if (ptr == map.end()) { + return -1; + } else { + return ptr->second; + } + } + + void search_multiple(size_t n, int64_t *keys, int64_t * vals) { + for (size_t i = 0; i < n; i++) { + vals[i] = search(keys[i]); + } + } +}; + +%} + + +/******************************************************************* + * Expose a few basic functions + *******************************************************************/ + + +void omp_set_num_threads (int num_threads); +int omp_get_max_threads (); +void *memcpy(void *dest, const void *src, size_t n); + + + + +/******************************************************************* + * Python-specific: do not release GIL any more, as functions below + * use the Python/C API + *******************************************************************/ -// Python-specific: do not release GIL any more, as functions below -// use the Python/C API #ifdef SWIGPYTHON %exception; #endif - - /******************************************************************* * Python specific: numpy array <-> C++ pointer interface *******************************************************************/ @@ -859,6 +983,9 @@ PyObject *swig_ptr (PyObject *a) if(PyArray_TYPE(ao) == NPY_INT32) { return SWIG_NewPointerObj(data, SWIGTYPE_p_int, 0); } + if(PyArray_TYPE(ao) == NPY_BOOL) { + return SWIG_NewPointerObj(data, SWIGTYPE_p_bool, 0); + } if(PyArray_TYPE(ao) == NPY_UINT64) { #ifdef SWIGWORDSIZE64 return SWIG_NewPointerObj(data, SWIGTYPE_p_unsigned_long, 0); @@ -935,90 +1062,6 @@ REV_SWIG_PTR(uint64_t, NPY_UINT64); -/******************************************************************* - * How should the template objects appear in the scripting language? - *******************************************************************/ - -// answer: the same as the C++ typedefs, but we still have to redefine them - -%template() faiss::CMin; -%template() faiss::CMin; -%template() faiss::CMax; -%template() faiss::CMax; - -%template(float_minheap_array_t) faiss::HeapArray >; -%template(int_minheap_array_t) faiss::HeapArray >; -%template(float_maxheap_array_t) faiss::HeapArray >; -%template(int_maxheap_array_t) faiss::HeapArray >; - -%template(CMin_float_partition_fuzzy) - faiss::partition_fuzzy >; -%template(CMax_float_partition_fuzzy) - faiss::partition_fuzzy >; - -%template(AlignedTableUint8) faiss::AlignedTable; -%template(AlignedTableUint16) faiss::AlignedTable; -%template(AlignedTableFloat32) faiss::AlignedTable; - - -// SWIG seems to have some trouble resolving function template types here, so -// declare explicitly - -%define INSTANTIATE_uint16_partition_fuzzy(C, id_t) - -%inline %{ - -uint16_t C ## _uint16_partition_fuzzy( - uint16_t *vals, id_t *ids, size_t n, - size_t q_min, size_t q_max, size_t * q_out) -{ - return faiss::partition_fuzzy >( - vals, ids, n, q_min, q_max, q_out); -} - -%} - -%enddef - -INSTANTIATE_uint16_partition_fuzzy(CMin, int64_t) -INSTANTIATE_uint16_partition_fuzzy(CMax, int64_t) -INSTANTIATE_uint16_partition_fuzzy(CMin, int) -INSTANTIATE_uint16_partition_fuzzy(CMax, int) - -// Same for merge_knn_results - -// same define as explicit instanciation in Heap.cpp -%define INSTANTIATE_merge_knn_results(C, distance_t) - -%inline %{ -void merge_knn_results_ ## C( - size_t n, size_t k, int nshard, - const distance_t *all_distances, const faiss::idx_t *all_labels, - distance_t *distances, faiss::idx_t *labels) -{ - faiss::merge_knn_results>( - n, k, nshard, all_distances, all_labels, distances, labels); -} -%} - -%enddef - -INSTANTIATE_merge_knn_results(CMin, float); -INSTANTIATE_merge_knn_results(CMax, float); -INSTANTIATE_merge_knn_results(CMin, int32_t); -INSTANTIATE_merge_knn_results(CMax, int32_t); - - -/******************************************************************* - * Expose a few basic functions - *******************************************************************/ - - -void omp_set_num_threads (int num_threads); -int omp_get_max_threads (); -void *memcpy(void *dest, const void *src, size_t n); - - /******************************************************************* * For Faiss/Pytorch interop via pointers encoded as longs *******************************************************************/ @@ -1047,42 +1090,6 @@ void * cast_integer_to_void_ptr (int64_t x) { %} -/******************************************************************* - * Range search interface - *******************************************************************/ - - -%inline %{ - -// numpy misses a hash table implementation, hence this class. It -// represents not found values as -1 like in the Index implementation - -struct MapLong2Long { - std::unordered_map map; - - void add(size_t n, const int64_t *keys, const int64_t *vals) { - map.reserve(map.size() + n); - for (size_t i = 0; i < n; i++) { - map[keys[i]] = vals[i]; - } - } - - int64_t search(int64_t key) { - if (map.count(key) == 0) { - return -1; - } else { - return map[key]; - } - } - - void search_multiple(size_t n, int64_t *keys, int64_t * vals) { - for (size_t i = 0; i < n; i++) { - vals[i] = search(keys[i]); - } - } -}; - -%} diff --git a/faiss/utils/partitioning.h b/faiss/utils/partitioning.h index 90157d2312..cd63592401 100644 --- a/faiss/utils/partitioning.h +++ b/faiss/utils/partitioning.h @@ -71,4 +71,5 @@ struct PartitionStats { // global var that collects them all FAISS_API extern PartitionStats partition_stats; + } // namespace faiss diff --git a/faiss/utils/sorting.cpp b/faiss/utils/sorting.cpp index 18c3f963a5..67dd51bf73 100644 --- a/faiss/utils/sorting.cpp +++ b/faiss/utils/sorting.cpp @@ -689,4 +689,144 @@ void matrix_bucket_sort_inplace( } } +/** Hashtable implementation for int64 -> int64 with external storage + * implemented for speed and parallel processing. + */ + +namespace { + +int log2_capacity_to_log2_nbucket(int log2_capacity) { + return log2_capacity < 12 ? 0 + : log2_capacity < 20 ? log2_capacity - 12 + : 10; +} + +// https://bigprimes.org/ +int64_t bigprime = 8955327411143; + +inline int64_t hash_function(int64_t x) { + return (x * 1000003) % bigprime; +} + +} // anonymous namespace + +void hashtable_int64_to_int64_init(int log2_capacity, int64_t* tab) { + size_t capacity = (size_t)1 << log2_capacity; +#pragma omp parallel for + for (int64_t i = 0; i < capacity; i++) { + tab[2 * i] = -1; + tab[2 * i + 1] = -1; + } +} + +void hashtable_int64_to_int64_add( + int log2_capacity, + int64_t* tab, + size_t n, + const int64_t* keys, + const int64_t* vals) { + size_t capacity = (size_t)1 << log2_capacity; + std::vector hk(n); + std::vector bucket_no(n); + int64_t mask = capacity - 1; + int log2_nbucket = log2_capacity_to_log2_nbucket(log2_capacity); + size_t nbucket = (size_t)1 << log2_nbucket; + +#pragma omp parallel for + for (int64_t i = 0; i < n; i++) { + hk[i] = hash_function(keys[i]) & mask; + bucket_no[i] = hk[i] >> (log2_capacity - log2_nbucket); + } + + std::vector lims(nbucket + 1); + std::vector perm(n); + bucket_sort( + n, + bucket_no.data(), + nbucket, + lims.data(), + perm.data(), + omp_get_max_threads()); + + int num_errors = 0; +#pragma omp parallel for reduction(+ : num_errors) + for (int64_t bucket = 0; bucket < nbucket; bucket++) { + size_t k0 = bucket << (log2_capacity - log2_nbucket); + size_t k1 = (bucket + 1) << (log2_capacity - log2_nbucket); + + for (size_t i = lims[bucket]; i < lims[bucket + 1]; i++) { + int64_t j = perm[i]; + assert(bucket_no[j] == bucket); + assert(hk[j] >= k0 && hk[j] < k1); + size_t slot = hk[j]; + for (;;) { + if (tab[slot * 2] == -1) { // found! + tab[slot * 2] = keys[j]; + tab[slot * 2 + 1] = vals[j]; + break; + } else if (tab[slot * 2] == keys[j]) { // overwrite! + tab[slot * 2 + 1] = vals[j]; + break; + } + slot++; + if (slot == k1) { + slot = k0; + } + if (slot == hk[j]) { // no free slot left in bucket + num_errors++; + break; + } + } + if (num_errors > 0) { + break; + } + } + } + FAISS_THROW_IF_NOT_MSG(num_errors == 0, "hashtable capacity exhausted"); +} + +void hashtable_int64_to_int64_lookup( + int log2_capacity, + const int64_t* tab, + size_t n, + const int64_t* keys, + int64_t* vals) { + size_t capacity = (size_t)1 << log2_capacity; + std::vector hk(n), bucket_no(n); + int64_t mask = capacity - 1; + int log2_nbucket = log2_capacity_to_log2_nbucket(log2_capacity); + size_t nbucket = (size_t)1 << log2_nbucket; + +#pragma omp parallel for + for (int64_t i = 0; i < n; i++) { + int64_t k = keys[i]; + int64_t hk = hash_function(k) & mask; + size_t slot = hk; + + if (tab[2 * slot] == -1) { // not in table + vals[i] = -1; + } else if (tab[2 * slot] == k) { // found! + vals[i] = tab[2 * slot + 1]; + } else { // need to search in [k0, k1) + size_t bucket = hk >> (log2_capacity - log2_nbucket); + size_t k0 = bucket << (log2_capacity - log2_nbucket); + size_t k1 = (bucket + 1) << (log2_capacity - log2_nbucket); + for (;;) { + if (tab[slot * 2] == k) { // found! + vals[i] = tab[2 * slot + 1]; + break; + } + slot++; + if (slot == k1) { + slot = k0; + } + if (slot == hk) { // bucket is full and not found + vals[i] = -1; + break; + } + } + } + } +} + } // namespace faiss diff --git a/faiss/utils/sorting.h b/faiss/utils/sorting.h index a7dd783b1e..593d952ae0 100644 --- a/faiss/utils/sorting.h +++ b/faiss/utils/sorting.h @@ -68,4 +68,31 @@ void matrix_bucket_sort_inplace( int64_t* lims, int nt = 0); +/** Hashtable implementation for int64 -> int64 with external storage + * implemented for fast batch add and lookup. + * + * tab is of size 2 * (1 << log2_capacity) + * n is the number of elements to add or search + * + * adding several values in a same batch: an arbitrary one gets added + * in different batches: the newer batch overwrites. + * raises an exception if capacity is exhausted. + */ + +void hashtable_int64_to_int64_init(int log2_capacity, int64_t* tab); + +void hashtable_int64_to_int64_add( + int log2_capacity, + int64_t* tab, + size_t n, + const int64_t* keys, + const int64_t* vals); + +void hashtable_int64_to_int64_lookup( + int log2_capacity, + const int64_t* tab, + size_t n, + const int64_t* keys, + int64_t* vals); + } // namespace faiss diff --git a/faiss/utils/utils.cpp b/faiss/utils/utils.cpp index 894653b1cd..54a3700672 100644 --- a/faiss/utils/utils.cpp +++ b/faiss/utils/utils.cpp @@ -528,4 +528,66 @@ bool check_openmp() { return true; } +namespace { + +int64_t count_lt(int64_t n, const float* row, float threshold) { + for (int64_t i = 0; i < n; i++) { + if (!(row[i] < threshold)) { + return i; + } + } + return n; +} + +int64_t count_gt(int64_t n, const float* row, float threshold) { + for (int64_t i = 0; i < n; i++) { + if (!(row[i] > threshold)) { + return i; + } + } + return n; +} + +} // namespace + +void CombinerRangeKNN::compute_sizes(int64_t* L_res) { + this->L_res = L_res; + L_res[0] = 0; + int64_t j = 0; + for (int64_t i = 0; i < nq; i++) { + int64_t n_in; + if (!mask || !mask[i]) { + const float* row = D + i * k; + n_in = keep_max ? count_gt(k, row, r2) + : count_lt(k, row, r2); + } else { + n_in = lim_remain[j + 1] - lim_remain[j]; + j++; + } + L_res[i + 1] = n_in; // L_res[i] + n_in; + } + // cumsum + for (int64_t i = 0; i < nq; i++) { + L_res[i + 1] += L_res[i]; + } +} + +void CombinerRangeKNN::write_result(float* D_res, int64_t* I_res) { + FAISS_THROW_IF_NOT(L_res); + int64_t j = 0; + for (int64_t i = 0; i < nq; i++) { + int64_t n_in = L_res[i + 1] - L_res[i]; + float* D_row = D_res + L_res[i]; + int64_t* I_row = I_res + L_res[i]; + if (!mask || !mask[i]) { + memcpy(D_row, D + i * k, n_in * sizeof(*D_row)); + memcpy(I_row, I + i * k, n_in * sizeof(*I_row)); + } else { + memcpy(D_row, D_remain + lim_remain[j], n_in * sizeof(*D_row)); + memcpy(I_row, I_remain + lim_remain[j], n_in * sizeof(*I_row)); + j++; + } + } +} + } // namespace faiss diff --git a/faiss/utils/utils.h b/faiss/utils/utils.h index b1080a929b..af7c677e39 100644 --- a/faiss/utils/utils.h +++ b/faiss/utils/utils.h @@ -163,6 +163,39 @@ uint64_t hash_bytes(const uint8_t* bytes, int64_t n); /** Whether OpenMP annotations were respected. */ bool check_openmp(); +/** This class is used to combine range and knn search results + * in contrib.exhaustive_search.range_search_gpu */ + +struct CombinerRangeKNN { + int64_t nq; /// nb of queries + size_t k; /// number of neighbors for the knn search part + float r2; /// range search radius + bool keep_max; /// whether to keep max values instead of min. + + CombinerRangeKNN(int64_t nq, size_t k, float r2, bool keep_max) + : nq(nq), k(k), r2(r2), keep_max(keep_max) {} + + /// Knn search results + const int64_t* I = nullptr; /// size nq * k + const float* D = nullptr; /// size nq * k + + /// optional: range search results (ignored if mask is NULL) + const bool* mask = + nullptr; /// mask for where knn results are valid, size nq + // range search results for remaining entries nrange = sum(mask) + const int64_t* lim_remain = nullptr; /// size nrange + 1 + const float* D_remain = nullptr; /// size lim_remain[nrange] + const int64_t* I_remain = nullptr; /// size lim_remain[nrange] + + const int64_t* L_res = nullptr; /// size nq + 1 + // Phase 1: compute sizes into limits array (of size nq + 1) + void compute_sizes(int64_t* L_res); + + /// Phase 2: caller allocates D_res and I_res (size L_res[nq]) + /// Phase 3: fill in D_res and I_res + void write_result(float* D_res, int64_t* I_res); +}; + } // namespace faiss #endif /* FAISS_utils_h */ diff --git a/tests/test_build_blocks.py b/tests/test_build_blocks.py index 342d56443a..189c785998 100644 --- a/tests/test_build_blocks.py +++ b/tests/test_build_blocks.py @@ -693,3 +693,35 @@ def test_max_int(self): def test_max_float(self): self.do_test(ismax=True, dtype='float32') + + +class TestMapInt64ToInt64(unittest.TestCase): + + def do_test(self, capacity, n): + """ test that we are able to lookup """ + rs = np.random.RandomState(123) + # make sure we have unique values + keys = np.unique(rs.choice(2 ** 29, size=n)) + rs.shuffle(keys) + n = keys.size + vals = rs.choice(2 ** 30, size=n).astype('int64') + tab = faiss.MapInt64ToInt64(capacity) + tab.add(keys, vals) + + # lookup and check + vals2 = tab.lookup(keys) + np.testing.assert_array_equal(vals, vals2) + + # make a few keys that we know are not there + mask = rs.rand(n) < 0.3 + keys[mask] = rs.choice(2 ** 29, size=n)[mask] + 2 ** 29 + vals2 = tab.lookup(keys) + np.testing.assert_array_equal(-1, vals2[mask]) + np.testing.assert_array_equal(vals[~mask], vals2[~mask]) + + def test_small(self): + self.do_test(16384, 10000) + + def xx_test_large(self): + # don't run by default because it's slow + self.do_test(2 ** 21, 10 ** 6) diff --git a/tests/test_contrib.py b/tests/test_contrib.py index 10ac3224c7..ad85a3ddd0 100644 --- a/tests/test_contrib.py +++ b/tests/test_contrib.py @@ -130,7 +130,7 @@ def do_test_range(self, metric): xq, ds.database_iterator(bs=100), threshold, ngpu=0, metric_type=metric) - evaluation.test_ref_range_results( + evaluation.check_ref_range_results( ref_lims, ref_D, ref_I, new_lims, new_D, new_I ) @@ -161,7 +161,7 @@ def matrix_iterator(xb, bs): _, new_lims, new_D, new_I = range_search_max_results( index, matrix_iterator(xq, 100), threshold, max_results=1e10) - evaluation.test_ref_range_results( + evaluation.check_ref_range_results( ref_lims, ref_D, ref_I, new_lims, new_D, new_I ) @@ -175,7 +175,7 @@ def matrix_iterator(xb, bs): ref_lims, ref_D, ref_I = index.range_search(xq, new_threshold) - evaluation.test_ref_range_results( + evaluation.check_ref_range_results( ref_lims, ref_D, ref_I, new_lims, new_D, new_I ) @@ -435,7 +435,7 @@ def do_test(self, metric_type): min_results=Dref.size, clip_to_min=True ) - evaluation.test_ref_range_results( + evaluation.check_ref_range_results( lims_ref, Dref, Iref, lims_new, Dnew, Inew ) diff --git a/tests/test_index.py b/tests/test_index.py index bc9392d7ea..0e828e08c1 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -1112,9 +1112,6 @@ def test_IVFSQ(self): def test_IVFPQ(self): self.do_test("IVF5,PQ4x4np") -if __name__ == '__main__': - unittest.main() - class TestValidIndexParams(unittest.TestCase): diff --git a/tests/test_partition.py b/tests/test_partition.py index 08087fb7c3..02de7e8c2c 100644 --- a/tests/test_partition.py +++ b/tests/test_partition.py @@ -40,6 +40,8 @@ def test_partition_fuzzy_2(self): self.do_partition(160, (70, 80)) +def pointer_to_minus1(): + return np.array([-1], dtype='int64').view("uint64") class TestPartitioningFloat(unittest.TestCase, PartitionTests): @@ -67,7 +69,7 @@ def do_partition(self, n, q, maxval=None, seed=None): ) else: q_min, q_max = q - q = np.array([-1], dtype='uint64') + q = pointer_to_minus1() faiss.CMax_float_partition_fuzzy( sp(vals), sp(ids), n, q_min, q_max, sp(q) @@ -117,7 +119,7 @@ def do_partition(self, n, q, maxval=None, seed=None): ) else: q_min, q_max = q - q = np.array([-1], dtype='uint64') + q = pointer_to_minus1() faiss.CMin_float_partition_fuzzy( sp(vals), sp(ids), n, q_min, q_max, sp(q) @@ -164,7 +166,7 @@ def do_partition(self, n, q, maxval=65536, seed=None): tab_a.get(), sp(ids), n, q, q, None) else: q_min, q_max = q - q = np.array([-1], dtype='uint64') + q = pointer_to_minus1() faiss.CMax_uint16_partition_fuzzy( tab_a.get(), sp(ids), n, q_min, q_max, sp(q) @@ -213,7 +215,7 @@ def do_partition(self, n, q, maxval=65536, seed=None): tab_a.get(), sp(ids), n, q, q, None) else: q_min, q_max = q - q = np.array([-1], dtype='uint64') + q = pointer_to_minus1() thresh2 = faiss.CMin_uint16_partition_fuzzy( tab_a.get(), sp(ids), n, q_min, q_max, sp(q) diff --git a/tests/test_search_params.py b/tests/test_search_params.py index 2c76f151dc..7b83bbfcc5 100644 --- a/tests/test_search_params.py +++ b/tests/test_search_params.py @@ -9,7 +9,7 @@ import unittest from faiss.contrib import datasets -from faiss.contrib.evaluation import sort_range_res_2 +from faiss.contrib.evaluation import sort_range_res_2, check_ref_range_results faiss.omp_set_num_threads(4) @@ -416,3 +416,28 @@ def test_12_92(self): len(ids), sp(ids), sp(j01[:1]), sp(j01[1:])) print(j01) assert j01[0] >= j01[1] + +class TestPrecomputed(unittest.TestCase): + + def test_knn_and_range(self): + ds = datasets.SyntheticDataset(32, 1000, 100, 20) + index = faiss.index_factory(ds.d, "IVF32,Flat") + index.train(ds.get_train()) + index.add(ds.get_database()) + index.nprobe = 5 + Dref, Iref = index.search(ds.get_queries(), 10) + + Dq, Iq = index.quantizer.search(ds.get_queries(), index.nprobe) + Dnew, Inew = index.search_preassigned(ds.get_queries(), 10, Iq, Dq) + np.testing.assert_equal(Iref, Inew) + np.testing.assert_equal(Dref, Dnew) + + r2 = float(np.median(Dref[:, 5])) + Lref, Dref, Iref = index.range_search(ds.get_queries(), r2) + assert Lref.size > 10 # make sure there is something to test... + + Lnew, Dnew, Inew = index.range_search_preassigned(ds.get_queries(), r2, Iq, Dq) + check_ref_range_results( + Lref, Dref, Iref, + Lnew, Dnew, Inew + )