Skip to content

Commit

Permalink
MDEV-35450 VEC_DISTANCE() function to autouse the available index type
Browse files Browse the repository at this point in the history
  • Loading branch information
vuvova committed Jan 21, 2025
1 parent 528249a commit e11592a
Show file tree
Hide file tree
Showing 6 changed files with 210 additions and 2 deletions.
100 changes: 100 additions & 0 deletions mysql-test/main/vector_funcs.result
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,103 @@ select vec_totext(`null`) from (values (null),(0x00000000)) x;
vec_totext(`null`)
NULL
[0]
# End of 11.7 tests
#
# MDEV-35450 VEC_DISTANCE() function to autouse the available index type
#
create table t1 (a int primary key, b vector(5) not null, vector index (b) distance=euclidean);
insert t1 values (0,vec_fromtext('[1,2,3,4,5]')), (1,vec_fromtext('[2,2,3,4,5]')),
(2,vec_fromtext('[1,3,3,4,5]')), (3,vec_fromtext('[1,2,4,4,5]')),
(4,vec_fromtext('[1,2,4,5,5]'));
create table t2 (c int primary key, d vector(5) not null, vector index (d) distance=cosine);
insert t2 select * from t1;
create table t3 (e int primary key, f vector(5) not null);
insert t3 select * from t1;
select a,c,vec_distance(b,d),vec_distance_euclidean(b,d) from t1, t2 where a=(c+11)*13%5;
a c vec_distance(b,d) vec_distance_euclidean(b,d)
3 0 1 1
1 1 0 0
4 2 1.73205 1.73205
2 3 1.41421 1.41421
0 4 1.41421 1.41421
select a,c,vec_distance(d,b),vec_distance_cosine(b,d) from t1, t2 where a=(c+11)*13%5;
a c vec_distance(d,b) vec_distance_cosine(b,d)
3 0 0.00676 0.00676
1 1 0 0
4 2 0.01943 0.01943
2 3 0.01626 0.01626
0 4 0.00784 0.00784
select a,e,vec_distance(b,f),vec_distance_euclidean(b,f) from t1, t3 where a=(e+11)*13%5;
a e vec_distance(b,f) vec_distance_euclidean(b,f)
3 0 1 1
1 1 0 0
4 2 1.73205 1.73205
2 3 1.41421 1.41421
0 4 1.41421 1.41421
select e,c,vec_distance(f,d),vec_distance_cosine(d,f) from t2, t3 where e=(c+11)*13%5;
e c vec_distance(f,d) vec_distance_cosine(d,f)
3 0 0.00676 0.00676
1 1 0 0
4 2 0.01943 0.01943
2 3 0.01626 0.01626
0 4 0.00784 0.00784
select a,vec_distance(b,vec_fromtext('[5,4,3,2,1]')),vec_distance_euclidean(b,vec_fromtext('[5,4,3,2,1]')) from t1;
a vec_distance(b,vec_fromtext('[5,4,3,2,1]')) vec_distance_euclidean(b,vec_fromtext('[5,4,3,2,1]'))
0 6.32455 6.32455
1 5.74456 5.74456
2 6.08276 6.08276
3 6.40312 6.40312
4 6.78232 6.78232
select c,vec_distance(d,vec_fromtext('[5,4,3,2,1]')),vec_distance_cosine(d,vec_fromtext('[5,4,3,2,1]')) from t2;
c vec_distance(d,vec_fromtext('[5,4,3,2,1]')) vec_distance_cosine(d,vec_fromtext('[5,4,3,2,1]'))
0 0.36363 0.36363
1 0.29178 0.29178
2 0.32109 0.32109
3 0.34926 0.34926
4 0.35989 0.35989
select e,vec_distance(f,vec_fromtext('[5,4,3,2,1]')) from t3;
ERROR HY000: Cannot determine distance type for VEC_DISTANCE, index is not found
drop table t1, t2, t3;
#
# Item_func_vec_distance::do_get_copy()
#
create table t1 (a vector(1) not null, vector(a));
create algorithm=temptable view v1 as select * from t1;
select * from v1 where vec_distance(a,0x30303030) > 0;
a
drop view v1;
drop table t1;
#
# MDEV-35724 VEC_DISTANCE does not work in HAVING clause
#
create table t (v vector(1) not null, vector(v));
insert t values (0x31313131),(0x32323232);
select v from t having vec_distance(v,0x30303030) > 0;
v
1111
2222
drop table t;
#
# MDEV-35752 VEC_DISTANCE does not work in triggers
#
create table t (id int primary key default 1, v vector(1) not null default 0x30303030, vector(v), d float);
create trigger tr before insert on t for each row set new.d = vec_distance(new.v,0x30303030);
insert t (v) values (0x31313131);
select vec_distance(default(v), 0x31313131) from t;
vec_distance(default(v), 0x31313131)
0.00000
insert t (v) values (0x32323232) on duplicate key update d=vec_distance(values(v), 0x31313131);
drop table t;
#
# MDEV-35778 Server crashes in Item_func_vec_distance::fix_length_and_dec upon using VEC_DISTANCE with temptable view
#
create table t (x vector(1) not null, vector(x));
insert into t values (0x31313131),(0x32323232);
create algorithm=temptable view v as select * from t;
select * from v order by vec_distance(0x30303030, x);
x
1111
2222
drop view v;
drop table t;
# End of 11.8 tests
72 changes: 72 additions & 0 deletions mysql-test/main/vector_funcs.test
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,75 @@ select vec_fromtext(0x00000000);
--echo # MDEV-35220 Assertion `!item->null_value' failed upon VEC_TOTEXT call
--echo #
select vec_totext(`null`) from (values (null),(0x00000000)) x;

--echo # End of 11.7 tests

--echo #
--echo # MDEV-35450 VEC_DISTANCE() function to autouse the available index type
--echo #

create table t1 (a int primary key, b vector(5) not null, vector index (b) distance=euclidean);
insert t1 values (0,vec_fromtext('[1,2,3,4,5]')), (1,vec_fromtext('[2,2,3,4,5]')),
(2,vec_fromtext('[1,3,3,4,5]')), (3,vec_fromtext('[1,2,4,4,5]')),
(4,vec_fromtext('[1,2,4,5,5]'));
create table t2 (c int primary key, d vector(5) not null, vector index (d) distance=cosine);
insert t2 select * from t1;
create table t3 (e int primary key, f vector(5) not null);
insert t3 select * from t1;

--replace_regex /(\.\d{5})\d+/\1/
select a,c,vec_distance(b,d),vec_distance_euclidean(b,d) from t1, t2 where a=(c+11)*13%5;
--replace_regex /(\.\d{5})\d+/\1/
select a,c,vec_distance(d,b),vec_distance_cosine(b,d) from t1, t2 where a=(c+11)*13%5;
--replace_regex /(\.\d{5})\d+/\1/
select a,e,vec_distance(b,f),vec_distance_euclidean(b,f) from t1, t3 where a=(e+11)*13%5;
--replace_regex /(\.\d{5})\d+/\1/
select e,c,vec_distance(f,d),vec_distance_cosine(d,f) from t2, t3 where e=(c+11)*13%5;
--replace_regex /(\.\d{5})\d+/\1/
select a,vec_distance(b,vec_fromtext('[5,4,3,2,1]')),vec_distance_euclidean(b,vec_fromtext('[5,4,3,2,1]')) from t1;
--replace_regex /(\.\d{5})\d+/\1/
select c,vec_distance(d,vec_fromtext('[5,4,3,2,1]')),vec_distance_cosine(d,vec_fromtext('[5,4,3,2,1]')) from t2;
--error ER_VEC_DISTANCE_TYPE
select e,vec_distance(f,vec_fromtext('[5,4,3,2,1]')) from t3;

drop table t1, t2, t3;

--echo #
--echo # Item_func_vec_distance::do_get_copy()
--echo #
create table t1 (a vector(1) not null, vector(a));
create algorithm=temptable view v1 as select * from t1;
select * from v1 where vec_distance(a,0x30303030) > 0;
drop view v1;
drop table t1;

--echo #
--echo # MDEV-35724 VEC_DISTANCE does not work in HAVING clause
--echo #
create table t (v vector(1) not null, vector(v));
insert t values (0x31313131),(0x32323232);
select v from t having vec_distance(v,0x30303030) > 0;
drop table t;

--echo #
--echo # MDEV-35752 VEC_DISTANCE does not work in triggers
--echo #
create table t (id int primary key default 1, v vector(1) not null default 0x30303030, vector(v), d float);
create trigger tr before insert on t for each row set new.d = vec_distance(new.v,0x30303030);
insert t (v) values (0x31313131);
--replace_regex /(\.\d{5})\d+/\1/
select vec_distance(default(v), 0x31313131) from t;
insert t (v) values (0x32323232) on duplicate key update d=vec_distance(values(v), 0x31313131);
drop table t;

--echo #
--echo # MDEV-35778 Server crashes in Item_func_vec_distance::fix_length_and_dec upon using VEC_DISTANCE with temptable view
--echo #
create table t (x vector(1) not null, vector(x));
insert into t values (0x31313131),(0x32323232);
create algorithm=temptable view v as select * from t;
select * from v order by vec_distance(0x30303030, x);
drop view v;
drop table t;

--echo # End of 11.8 tests
17 changes: 17 additions & 0 deletions sql/item_create.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6287,6 +6287,22 @@ class Create_func_vec_distance_cosine: public Create_func_arg2

Create_func_vec_distance_cosine Create_func_vec_distance_cosine::s_singleton;

class Create_func_vec_distance: public Create_func_arg2
{
public:
Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override
{ return new (thd->mem_root)
Item_func_vec_distance(thd, arg1, arg2, Item_func_vec_distance::AUTO); }

static Create_func_vec_distance s_singleton;

protected:
Create_func_vec_distance() = default;
virtual ~Create_func_vec_distance() = default;
};

Create_func_vec_distance Create_func_vec_distance::s_singleton;

class Create_func_vec_totext: public Create_func_arg1
{
public:
Expand Down Expand Up @@ -6549,6 +6565,7 @@ const Native_func_registry func_array[] =
{ { STRING_WITH_LEN("UUID_SHORT") }, BUILDER(Create_func_uuid_short)},
{ { STRING_WITH_LEN("VEC_DISTANCE_EUCLIDEAN") }, BUILDER(Create_func_vec_distance_euclidean)},
{ { STRING_WITH_LEN("VEC_DISTANCE_COSINE") }, BUILDER(Create_func_vec_distance_cosine)},
{ { STRING_WITH_LEN("VEC_DISTANCE") }, BUILDER(Create_func_vec_distance)},
{ { STRING_WITH_LEN("VEC_FROMTEXT") }, BUILDER(Create_func_vec_fromtext)},
{ { STRING_WITH_LEN("VEC_TOTEXT") }, BUILDER(Create_func_vec_totext)},
{ { STRING_WITH_LEN("VERSION") }, BUILDER(Create_func_version)},
Expand Down
16 changes: 16 additions & 0 deletions sql/item_vectorfunc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,22 @@ bool Item_func_vec_distance::fix_length_and_dec(THD *thd)
switch (kind) {
case EUCLIDEAN: calc_distance= calc_distance_euclidean; break;
case COSINE: calc_distance= calc_distance_cosine; break;
case AUTO:
for (uint i=0; i < 2; i++)
if (auto *item= dynamic_cast<Item_field*>(args[i]->real_item()))
{
TABLE_SHARE *share= item->field->orig_table->s;
Field *f= share->field[item->field->field_index];
KEY *kinfo= share->key_info;
for (uint j= share->keys; j < share->total_keys; j++)
if (kinfo[j].algorithm == HA_KEY_ALG_VECTOR && f->key_start.is_set(j))
{
kind= mhnsw_uses_distance(f->table, kinfo + j);
return fix_length_and_dec(thd);
}
}
my_error(ER_VEC_DISTANCE_TYPE, MYF(0));
return 1;
}
set_maybe_null(); // if wrong dimensions
return Item_real_func::fix_length_and_dec(thd);
Expand Down
5 changes: 3 additions & 2 deletions sql/item_vectorfunc.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,14 @@ class Item_func_vec_distance: public Item_real_func
double (*calc_distance)(float *v1, float *v2, size_t v_len);

public:
enum distance_kind { EUCLIDEAN, COSINE } kind;
enum distance_kind { EUCLIDEAN, COSINE, AUTO } kind;
Item_func_vec_distance(THD *thd, Item *a, Item *b, distance_kind kind);
LEX_CSTRING func_name_cstring() const override
{
static LEX_CSTRING name[3]= {
{ STRING_WITH_LEN("VEC_DISTANCE_EUCLIDEAN") },
{ STRING_WITH_LEN("VEC_DISTANCE_COSINE") }
{ STRING_WITH_LEN("VEC_DISTANCE_COSINE") },
{ STRING_WITH_LEN("VEC_DISTANCE") }
};
return name[kind];
}
Expand Down
2 changes: 2 additions & 0 deletions sql/share/errmsg-utf8.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12291,3 +12291,5 @@ ER_VECTOR_BINARY_FORMAT_INVALID
eng "Invalid binary vector format. Must use IEEE standard float representation in little-endian format. Use VEC_FromText() to generate it."
ER_VECTOR_FORMAT_INVALID
eng "Invalid vector format at offset: %d for '%-.100s'. Must be a valid JSON array of numbers."
ER_VEC_DISTANCE_TYPE
eng "Cannot determine distance type for VEC_DISTANCE, index is not found"

0 comments on commit e11592a

Please sign in to comment.