From c6def12e171e4da45364447c14b862d0359e673d Mon Sep 17 00:00:00 2001 From: yhmo Date: Fri, 9 Jul 2021 18:26:45 +0800 Subject: [PATCH 1/2] implement calc_distance() Signed-off-by: yhmo --- pymilvus_orm/utility.py | 50 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/pymilvus_orm/utility.py b/pymilvus_orm/utility.py index 0feedb7..3837583 100644 --- a/pymilvus_orm/utility.py +++ b/pymilvus_orm/utility.py @@ -260,3 +260,53 @@ def list_collections(timeout=None, using="default") -> list: >>> utility.list_collections() """ return _get_connection(using).list_collections() + + +def calc_distance(vectors_left, vectors_right, params=None, timeout=None, using="default"): + """ + Calculate distance between two vector arrays. + + :param vectors_left: The vectors on the left of operator. + :type vectors_left: dict + `{"ids": [1, 2, 3, .... n], "collection": "c_1", "partition": "p_1", "field": "v_1"}` + or + `{"float_vectors": [[1.0, 2.0], [3.0, 4.0], ... [9.0, 10.0]]}` + or + `{"bin_vectors": [b'\x94', b'N', ... b'\xca']}` + + :param vectors_right: The vectors on the right of operator. + :type vectors_right: dict + `{"ids": [1, 2, 3, .... n], "collection": "col_1", "partition": "p_1", "field": "v_1"}` + or + `{"float_vectors": [[1.0, 2.0], [3.0, 4.0], ... [9.0, 10.0]]}` + or + `{"bin_vectors": [b'\x94', b'N', ... b'\xca']}` + + :param params: parameters, currently only support "metric_type", default value is "L2" + extra parameter for "L2" distance: "sqrt", true or false, default is false + extra parameter for "HAMMING" and "TANIMOTO": "dim", set this value if dimension is not a multiple of 8, otherwise the dimension will be calculted by list length + :type params: dict + There are examples of supported metric_type: + `{"metric": "L2"}` + `{"metric": "IP"}` + `{"metric": "HAMMING"}` + `{"metric": "TANIMOTO"}` + Note: "L2", "IP", "HAMMING", "TANIMOTO" are case insensitive + + :return: 2-d array distances + :rtype: list[list[int]] for "HAMMING" or list[list[float]] for others + Assume the vectors_left: L_1, L_2, L_3 + Assume the vectors_right: R_a, R_b + Distance between L_n and R_m we called "D_n_m" + The returned distances are arranged like this: + [D_1_a, D_1_b, D_2_a, D_2_b, D_3_a, D_3_b] + + :example: + >>> vectors_l = [[random.random() for _ in range(64)] for _ in range(5)] + >>> vectors_r = [[random.random() for _ in range(64)] for _ in range(10)] + >>> op_l = {"float_vectors": vectors_l} + >>> op_r = {"float_vectors": vectors_r} + >>> params = {"metric": "L2", "sqrt": True} + >>> results = utility.calc_distance(vectors_left=op_l, vectors_right=op_r, params=params) + """ + return _get_connection(using).calc_distance(vectors_left, vectors_right, params, timeout) \ No newline at end of file From 46b420047988bc8696337fb6b35ad2cb552e8f10 Mon Sep 17 00:00:00 2001 From: yhmo Date: Tue, 13 Jul 2021 10:54:52 +0800 Subject: [PATCH 2/2] implement calc_distance() Signed-off-by: yhmo --- pymilvus_orm/utility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymilvus_orm/utility.py b/pymilvus_orm/utility.py index 3837583..b4a16d7 100644 --- a/pymilvus_orm/utility.py +++ b/pymilvus_orm/utility.py @@ -309,4 +309,4 @@ def calc_distance(vectors_left, vectors_right, params=None, timeout=None, using= >>> params = {"metric": "L2", "sqrt": True} >>> results = utility.calc_distance(vectors_left=op_l, vectors_right=op_r, params=params) """ - return _get_connection(using).calc_distance(vectors_left, vectors_right, params, timeout) \ No newline at end of file + return _get_connection(using).calc_distance(vectors_left, vectors_right, params, timeout)