Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: don't calc the heavy expression used in ORDER BY stmt twice #58208

Merged
merged 10 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/expression/integration_test/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ go_test(
"main_test.go",
],
flaky = True,
shard_count = 48,
shard_count = 50,
deps = [
"//pkg/config",
"//pkg/domain",
Expand Down
209 changes: 203 additions & 6 deletions pkg/expression/integration_test/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,12 +339,11 @@ func TestVectorConstantExplain(t *testing.T) {
))
tk.MustQuery(`EXPLAIN format = 'brief' SELECT VEC_COSINE_DISTANCE(c, '[1,2,3,4,5,6,7,8,9,10,11]') AS d FROM t ORDER BY d LIMIT 10;`).Check(testkit.Rows(
"Projection 10.00 root vec_cosine_distance(test.t.c, [1,2,3,4,5,(6 more)...])->Column#3",
"└─Projection 10.00 root test.t.c",
" └─TopN 10.00 root Column#4, offset:0, count:10",
" └─Projection 10.00 root test.t.c, vec_cosine_distance(test.t.c, [1,2,3,4,5,(6 more)...])->Column#4",
" └─TableReader 10.00 root data:TopN",
" └─TopN 10.00 cop[tikv] vec_cosine_distance(test.t.c, [1,2,3,4,5,(6 more)...]), offset:0, count:10",
" └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo",
"└─TopN 10.00 root Column#4, offset:0, count:10",
" └─TableReader 10.00 root data:TopN",
" └─TopN 10.00 cop[tikv] Column#4, offset:0, count:10",
" └─Projection 10.00 cop[tikv] test.t.c, vec_cosine_distance(test.t.c, [1,2,3,4,5,(6 more)...])->Column#4",
" └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo",
))

// Prepare a large Vector string
Expand Down Expand Up @@ -849,6 +848,204 @@ func TestVectorMiscFunctions(t *testing.T) {
tk.MustQuery(`SELECT * FROM a;`).Check(testkit.Rows("1 [2,10,14] 3"))
}

func testVectorSearchInternal(tk *testkit.TestKit) {
tk.MustExec(`
create table t1 (
id int primary key,
vec vector(3),
a int,
b int,
c vector(3),
d vector,
VECTOR INDEX idx_embedding ((VEC_COSINE_DISTANCE(vec)))
)
`)
tk.MustExec(`
insert into t1 values
(1, '[1,1,1]', 11, 111, '[1,1,1]', '[1,1,1]'),
(2, '[2,2,2]', 22, 222, '[2,2,2]', '[2,2,2]'),
(3, '[3,3,3]', 33, 333, '[3,3,3]', '[3,3,3]');
`)
tk.MustExec("analyze table t1")

tk.MustQuery("select id from t1 order by id").Check(testkit.Rows(
"1",
"2",
"3",
))
tk.MustQuery("select id from t1 order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"3",
"2",
"1",
))
tk.MustQuery("select id from t1 order by vec_l2_distance(vec, '[3,3,3]') limit 1").Check(testkit.Rows(
"3",
))
tk.MustQuery("select id from t1 order by vec_l2_distance(vec, '[3,3,3]')").Check(testkit.Rows(
"3",
"2",
"1",
))
tk.MustQuery("select * from t1 order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"3 [3,3,3] 33 333 [3,3,3] [3,3,3]",
"2 [2,2,2] 22 222 [2,2,2] [2,2,2]",
"1 [1,1,1] 11 111 [1,1,1] [1,1,1]",
))
tk.MustQuery("select id, a, b from t1 order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"3 33 333",
"2 22 222",
"1 11 111",
))
tk.MustQuery("select a, id, b from t1 order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"33 3 333",
"22 2 222",
"11 1 111",
))
tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d from t1 order by d limit 10").Check(testkit.Rows(
"3 0",
"2 1.7320508075688772",
"1 3.4641016151377544",
))
tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d from t1 order by d").Check(testkit.Rows(
"3 0",
"2 1.7320508075688772",
"1 3.4641016151377544",
))
tk.MustQuery("select *, vec_l2_distance(vec, '[3,3,3]') as d from t1 order by d limit 10").Check(testkit.Rows(
"3 [3,3,3] 33 333 [3,3,3] [3,3,3] 0",
"2 [2,2,2] 22 222 [2,2,2] [2,2,2] 1.7320508075688772",
"1 [1,1,1] 11 111 [1,1,1] [1,1,1] 3.4641016151377544",
))
tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d, a, b from t1 order by d limit 10").Check(testkit.Rows(
"3 0 33 333",
"2 1.7320508075688772 22 222",
"1 3.4641016151377544 11 111",
))
tk.MustQuery("select id, a, b, vec_l2_distance(vec, '[3,3,3]') as d from t1 order by d limit 10").Check(testkit.Rows(
"3 33 333 0",
"2 22 222 1.7320508075688772",
"1 11 111 3.4641016151377544",
))

tk.MustExec(`
create table tp (
id int,
vec vector(3) comment 'hnsw(distance=cosine)',
a int, b int,
store_id int
) PARTITION BY RANGE COLUMNS(store_id) (
PARTITION p0 VALUES LESS THAN (100),
PARTITION p1 VALUES LESS THAN (200),
PARTITION p2 VALUES LESS THAN (MAXVALUE)
);
`)
tk.MustExec(`
insert into tp values
(1, '[1,1,1]', 11, 111, 50),
(2, '[2,2,2]', 22, 222, 150),
(3, '[3,3,3]', 33, 333, 250);
`)
tk.MustExec("analyze table tp")

tk.MustQuery("select id from tp order by id").Check(testkit.Rows(
"1",
"2",
"3",
))
tk.MustQuery("select id from tp order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"3",
"2",
"1",
))
tk.MustQuery("select id from tp order by vec_l2_distance(vec, '[3,3,3]')").Check(testkit.Rows(
"3",
"2",
"1",
))
tk.MustQuery("select * from tp order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"3 [3,3,3] 33 333 250",
"2 [2,2,2] 22 222 150",
"1 [1,1,1] 11 111 50",
))
tk.MustQuery("select id, a, b from tp order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"3 33 333",
"2 22 222",
"1 11 111",
))
tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d from tp order by d limit 10").Check(testkit.Rows(
"3 0",
"2 1.7320508075688772",
"1 3.4641016151377544",
))
tk.MustQuery("select *, vec_l2_distance(vec, '[3,3,3]') as d from tp order by d limit 10").Check(testkit.Rows(
"3 [3,3,3] 33 333 250 0",
"2 [2,2,2] 22 222 150 1.7320508075688772",
"1 [1,1,1] 11 111 50 3.4641016151377544",
))
tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d, a, b from tp order by d limit 10").Check(testkit.Rows(
"3 0 33 333",
"2 1.7320508075688772 22 222",
"1 3.4641016151377544 11 111",
))
tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d, a, b from tp order by d").Check(testkit.Rows(
"3 0 33 333",
"2 1.7320508075688772 22 222",
"1 3.4641016151377544 11 111",
))

tk.MustQuery("select id from tp partition (p0) order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"1",
))
tk.MustQuery("select * from tp partition (p0) order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"1 [1,1,1] 11 111 50",
))
tk.MustQuery("select id, a, b from tp partition (p0) order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"1 11 111",
))
tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d from tp partition (p0) order by d limit 10").Check(testkit.Rows(
"1 3.4641016151377544",
))
tk.MustQuery("select *, vec_l2_distance(vec, '[3,3,3]') as d from tp partition (p0) order by d limit 10").Check(testkit.Rows(
"1 [1,1,1] 11 111 50 3.4641016151377544",
))
tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d, a, b from tp partition (p0) order by d limit 10").Check(testkit.Rows(
"1 3.4641016151377544 11 111",
))
tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d, a, b from tp partition (p0) order by d").Check(testkit.Rows(
"1 3.4641016151377544 11 111",
))
}

func TestVectorSearchExtractProj(t *testing.T) {
{
store, _ := testkit.CreateMockStoreAndDomainWithSchemaLease(t, 200*time.Millisecond, mockstore.WithMockTiFlash(1))
tk := testkit.NewTestKit(t, store)
tk.MustExec("USE test;")
testVectorSearchInternal(tk)
}
{
store, _ := testkit.CreateMockStoreAndDomainWithSchemaLease(t, 200*time.Millisecond, mockstore.WithMockTiFlash(1))
tk := testkit.NewTestKit(t, store)
tk.MustExec("USE test;")
tk.MustExec("SET SESSION tidb_opt_fix_control = '56318:OFF';")
testVectorSearchInternal(tk)
}
}

func TestVectorSearchPreparedStatement(t *testing.T) {
store, _ := testkit.CreateMockStoreAndDomainWithSchemaLease(t, 200*time.Millisecond, mockstore.WithMockTiFlash(1))
tk := testkit.NewTestKit(t, store)
tk.MustExec("USE test;")
tk.MustExec("CREATE TABLE t1 (pk INT PRIMARY KEY, vec vector(3), VECTOR INDEX idx_embedding ((VEC_COSINE_DISTANCE(vec))) );")
tk.MustExec("INSERT INTO t1 VALUES (1, '[1,2,3]'), (2, '[4,5,6]'), (3, '[7,8,9]');")
tk.MustExec("ANALYZE TABLE t1;")

tk.MustExec("PREPARE stmt FROM 'SELECT pk FROM t1 ORDER BY vec_cosine_distance(vec, ?) LIMIT ?';")
tk.MustExec("SET @pvec = '[7,8,9]';")
tk.MustExec("SET @plimit = 10;")
tk.MustQuery("EXECUTE stmt USING @pvec, @plimit;").Check(testkit.Rows("3", "2", "1"))
}

func TestGetLock(t *testing.T) {
ctx := context.Background()
store := testkit.CreateMockStore(t, mockstore.WithStoreType(mockstore.EmbedUnistore))
Expand Down
2 changes: 1 addition & 1 deletion pkg/planner/core/casetest/vectorsearch/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ go_test(
],
data = glob(["testdata/**"]),
flaky = True,
shard_count = 5,
shard_count = 7,
deps = [
"//pkg/config",
"//pkg/domain",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,41 @@
"explain select * from t1 where store_id between 80 and 120 order by vec_cosine_distance(vec, '[1,1,1]') limit 1",
"explain select * from t1 partition (p0) order by vec_cosine_distance(vec, '[1,1,1]') limit 1"
]
},
{
"name": "TestVectorSearchWithPKAuto",
"cases": [
"explain select id from t1",

"explain select id from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10",
EricZequan marked this conversation as resolved.
Show resolved Hide resolved
"explain select id from t1 order by vec_cosine_distance(vec, '[1,1,1]')",
"explain select * from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10",
"explain select id, a, b from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10",
"explain select a, id, b from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10",

"explain select id, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10",
"explain select id, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d",
"explain select *, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10",
"explain select id, vec_cosine_distance(vec, '[1,1,1]') as d, a, b from t1 order by d limit 10",
"explain select id, a, b, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10"
]
},
{
"name": "TestVectorSearchWithPKForceTiKV",
"cases": [
"explain select id from t1",

"explain select id from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10",
"explain select id from t1 order by vec_cosine_distance(vec, '[1,1,1]')",
"explain select * from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10",
"explain select id, a, b from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10",
"explain select a, id, b from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10",

"explain select id, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10",
"explain select id, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d",
"explain select *, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10",
"explain select id, vec_cosine_distance(vec, '[1,1,1]') as d, a, b from t1 order by d limit 10",
"explain select id, a, b, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10"
]
}
]
Loading