From 146231ec0d34f475f174f8d302f8192b77bbe5fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CEricZequan=E2=80=9D?= Date: Thu, 12 Dec 2024 17:30:28 +0800 Subject: [PATCH 1/9] Reuse the vector distance column MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: “EricZequan” --- .../core/casetest/vectorsearch/BUILD.bazel | 2 +- .../testdata/ann_index_suite_in.json | 36 +++ .../testdata/ann_index_suite_out.json | 274 ++++++++++++++++++ .../vectorsearch/vector_index_test.go | 119 +++++++- pkg/planner/core/task.go | 124 +++++++- pkg/planner/util/fixcontrol/get.go | 3 + 6 files changed, 543 insertions(+), 15 deletions(-) diff --git a/pkg/planner/core/casetest/vectorsearch/BUILD.bazel b/pkg/planner/core/casetest/vectorsearch/BUILD.bazel index ca2f66ab0a98c..db58d928154b0 100644 --- a/pkg/planner/core/casetest/vectorsearch/BUILD.bazel +++ b/pkg/planner/core/casetest/vectorsearch/BUILD.bazel @@ -9,7 +9,7 @@ go_test( ], data = glob(["testdata/**"]), flaky = True, - shard_count = 5, + shard_count = 7, deps = [ "//pkg/config", "//pkg/domain", diff --git a/pkg/planner/core/casetest/vectorsearch/testdata/ann_index_suite_in.json b/pkg/planner/core/casetest/vectorsearch/testdata/ann_index_suite_in.json index 43ca98699edd7..be40b93f6a9c7 100644 --- a/pkg/planner/core/casetest/vectorsearch/testdata/ann_index_suite_in.json +++ b/pkg/planner/core/casetest/vectorsearch/testdata/ann_index_suite_in.json @@ -44,5 +44,41 @@ "explain select * from t1 where store_id between 80 and 120 order by vec_cosine_distance(vec, '[1,1,1]') limit 1", "explain select * from t1 partition (p0) order by vec_cosine_distance(vec, '[1,1,1]') limit 1" ] + }, + { + "name": "TestVectorSearchWithPKAuto", + "cases": [ + "explain select id from t1", + + "explain select id from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", + "explain select id from t1 order by vec_cosine_distance(vec, '[1,1,1]')", + "explain select * from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", + "explain select id, a, b from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", + "explain select a, id, b from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", + + "explain select id, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10", + "explain select id, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d", + "explain select *, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10", + "explain select id, vec_cosine_distance(vec, '[1,1,1]') as d, a, b from t1 order by d limit 10", + "explain select id, a, b, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10" + ] + }, + { + "name": "TestVectorSearchWithPKForceTiKV", + "cases": [ + "explain select id from t1", + + "explain select id from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", + "explain select id from t1 order by vec_cosine_distance(vec, '[1,1,1]')", + "explain select * from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", + "explain select id, a, b from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", + "explain select a, id, b from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", + + "explain select id, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10", + "explain select id, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d", + "explain select *, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10", + "explain select id, vec_cosine_distance(vec, '[1,1,1]') as d, a, b from t1 order by d limit 10", + "explain select id, a, b, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10" + ] } ] diff --git a/pkg/planner/core/casetest/vectorsearch/testdata/ann_index_suite_out.json b/pkg/planner/core/casetest/vectorsearch/testdata/ann_index_suite_out.json index aabb45980d0fe..2c339f6ab23b8 100644 --- a/pkg/planner/core/casetest/vectorsearch/testdata/ann_index_suite_out.json +++ b/pkg/planner/core/casetest/vectorsearch/testdata/ann_index_suite_out.json @@ -507,5 +507,279 @@ "Warn": null } ] + }, + { + "Name": "TestVectorSearchWithPKAuto", + "Cases": [ + { + "SQL": "explain select id from t1", + "Plan": [ + "TableReader_14 6000.00 root MppVersion: 2, data:ExchangeSender_13", + "└─ExchangeSender_13 6000.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TableFullScan_11 6000.00 mpp[tiflash] table:t1 keep order:false" + ], + "Warn": null + }, + { + "SQL": "explain select id from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", + "Plan": [ + "Projection_7 10.00 root test.t1.id", + "└─TopN_11 10.00 root Column#9, offset:0, count:10", + " └─TableReader_28 10.00 root MppVersion: 2, data:ExchangeSender_27", + " └─ExchangeSender_27 10.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_26 10.00 mpp[tiflash] Column#9, offset:0, count:10", + " └─Projection_25 10.00 mpp[tiflash] test.t1.id, test.t1.vec, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#9", + " └─TableFullScan_24 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" + ], + "Warn": null + }, + { + "SQL": "explain select id from t1 order by vec_cosine_distance(vec, '[1,1,1]')", + "Plan": [ + "Projection_5 6000.00 root test.t1.id", + "└─Projection_14 6000.00 root test.t1.id, test.t1.vec", + " └─Sort_7 6000.00 root Column#8", + " └─Projection_15 6000.00 root test.t1.id, test.t1.vec, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", + " └─TableReader_12 6000.00 root MppVersion: 2, data:ExchangeSender_11", + " └─ExchangeSender_11 6000.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TableFullScan_10 6000.00 mpp[tiflash] table:t1 keep order:false" + ], + "Warn": null + }, + { + "SQL": "explain select * from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", + "Plan": [ + "Projection_6 10.00 root test.t1.id, test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", + "└─TopN_10 10.00 root Column#8, offset:0, count:10", + " └─TableReader_27 10.00 root MppVersion: 2, data:ExchangeSender_26", + " └─ExchangeSender_26 10.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_25 10.00 mpp[tiflash] Column#8, offset:0, count:10", + " └─Projection_24 10.00 mpp[tiflash] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", + " └─TableFullScan_23 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" + ], + "Warn": null + }, + { + "SQL": "explain select id, a, b from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", + "Plan": [ + "Projection_7 10.00 root test.t1.id, test.t1.a, test.t1.b", + "└─TopN_11 10.00 root Column#11, offset:0, count:10", + " └─TableReader_28 10.00 root MppVersion: 2, data:ExchangeSender_27", + " └─ExchangeSender_27 10.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_26 10.00 mpp[tiflash] Column#11, offset:0, count:10", + " └─Projection_25 10.00 mpp[tiflash] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#11", + " └─TableFullScan_24 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" + ], + "Warn": null + }, + { + "SQL": "explain select a, id, b from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", + "Plan": [ + "Projection_7 10.00 root test.t1.a, test.t1.id, test.t1.b", + "└─TopN_11 10.00 root Column#11, offset:0, count:10", + " └─TableReader_28 10.00 root MppVersion: 2, data:ExchangeSender_27", + " └─ExchangeSender_27 10.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_26 10.00 mpp[tiflash] Column#11, offset:0, count:10", + " └─Projection_25 10.00 mpp[tiflash] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#11", + " └─TableFullScan_24 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" + ], + "Warn": null + }, + { + "SQL": "explain select id, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10", + "Plan": [ + "Projection_6 10.00 root test.t1.id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", + "└─TopN_10 10.00 root Column#9, offset:0, count:10", + " └─TableReader_27 10.00 root MppVersion: 2, data:ExchangeSender_26", + " └─ExchangeSender_26 10.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_25 10.00 mpp[tiflash] Column#9, offset:0, count:10", + " └─Projection_24 10.00 mpp[tiflash] test.t1.id, test.t1.vec, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#9", + " └─TableFullScan_23 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" + ], + "Warn": null + }, + { + "SQL": "explain select id, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d", + "Plan": [ + "Sort_4 6000.00 root Column#7", + "└─TableReader_17 6000.00 root MppVersion: 2, data:ExchangeSender_16", + " └─ExchangeSender_16 6000.00 mpp[tiflash] ExchangeType: PassThrough", + " └─Projection_7 6000.00 mpp[tiflash] test.t1.id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", + " └─TableFullScan_14 6000.00 mpp[tiflash] table:t1 keep order:false" + ], + "Warn": null + }, + { + "SQL": "explain select *, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10", + "Plan": [ + "Projection_6 10.00 root test.t1.id, test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", + "└─TopN_10 10.00 root Column#9, offset:0, count:10", + " └─TableReader_27 10.00 root MppVersion: 2, data:ExchangeSender_26", + " └─ExchangeSender_26 10.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_25 10.00 mpp[tiflash] Column#9, offset:0, count:10", + " └─Projection_24 10.00 mpp[tiflash] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#9", + " └─TableFullScan_23 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" + ], + "Warn": null + }, + { + "SQL": "explain select id, vec_cosine_distance(vec, '[1,1,1]') as d, a, b from t1 order by d limit 10", + "Plan": [ + "Projection_6 10.00 root test.t1.id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7, test.t1.a, test.t1.b", + "└─TopN_10 10.00 root Column#9, offset:0, count:10", + " └─TableReader_27 10.00 root MppVersion: 2, data:ExchangeSender_26", + " └─ExchangeSender_26 10.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_25 10.00 mpp[tiflash] Column#9, offset:0, count:10", + " └─Projection_24 10.00 mpp[tiflash] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#9", + " └─TableFullScan_23 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" + ], + "Warn": null + }, + { + "SQL": "explain select id, a, b, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10", + "Plan": [ + "Projection_6 10.00 root test.t1.id, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", + "└─TopN_10 10.00 root Column#9, offset:0, count:10", + " └─TableReader_27 10.00 root MppVersion: 2, data:ExchangeSender_26", + " └─ExchangeSender_26 10.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_25 10.00 mpp[tiflash] Column#9, offset:0, count:10", + " └─Projection_24 10.00 mpp[tiflash] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#9", + " └─TableFullScan_23 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" + ], + "Warn": null + } + ] + }, + { + "Name": "TestVectorSearchWithPKForceTiKV", + "Cases": [ + { + "SQL": "explain select id from t1", + "Plan": [ + "TableReader_8 6000.00 root data:Projection_4", + "└─Projection_4 6000.00 cop[tikv] test.t1.id", + " └─TableFullScan_7 6000.00 cop[tikv] table:t1 keep order:false" + ], + "Warn": null + }, + { + "SQL": "explain select id from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", + "Plan": [ + "Projection_7 10.00 root test.t1.id", + "└─TopN_8 10.00 root Column#8, offset:0, count:10", + " └─TableReader_15 10.00 root data:TopN_14", + " └─TopN_14 10.00 cop[tikv] Column#8, offset:0, count:10", + " └─Projection_13 10.00 cop[tikv] test.t1.id, test.t1.vec, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", + " └─TableFullScan_12 6000.00 cop[tikv] table:t1 keep order:false" + ], + "Warn": null + }, + { + "SQL": "explain select id from t1 order by vec_cosine_distance(vec, '[1,1,1]')", + "Plan": [ + "Projection_5 6000.00 root test.t1.id", + "└─Projection_9 6000.00 root test.t1.id, test.t1.vec", + " └─Sort_6 6000.00 root Column#8", + " └─Projection_10 6000.00 root test.t1.id, test.t1.vec, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", + " └─TableReader_8 6000.00 root data:TableFullScan_7", + " └─TableFullScan_7 6000.00 cop[tikv] table:t1 keep order:false" + ], + "Warn": null + }, + { + "SQL": "explain select * from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", + "Plan": [ + "Projection_6 10.00 root test.t1.id, test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", + "└─TopN_7 10.00 root Column#7, offset:0, count:10", + " └─TableReader_14 10.00 root data:TopN_13", + " └─TopN_13 10.00 cop[tikv] Column#7, offset:0, count:10", + " └─Projection_12 10.00 cop[tikv] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", + " └─TableFullScan_11 6000.00 cop[tikv] table:t1 keep order:false" + ], + "Warn": null + }, + { + "SQL": "explain select id, a, b from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", + "Plan": [ + "Projection_7 10.00 root test.t1.id, test.t1.a, test.t1.b", + "└─TopN_8 10.00 root Column#10, offset:0, count:10", + " └─TableReader_15 10.00 root data:TopN_14", + " └─TopN_14 10.00 cop[tikv] Column#10, offset:0, count:10", + " └─Projection_13 10.00 cop[tikv] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#10", + " └─TableFullScan_12 6000.00 cop[tikv] table:t1 keep order:false" + ], + "Warn": null + }, + { + "SQL": "explain select a, id, b from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", + "Plan": [ + "Projection_7 10.00 root test.t1.a, test.t1.id, test.t1.b", + "└─TopN_8 10.00 root Column#10, offset:0, count:10", + " └─TableReader_15 10.00 root data:TopN_14", + " └─TopN_14 10.00 cop[tikv] Column#10, offset:0, count:10", + " └─Projection_13 10.00 cop[tikv] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#10", + " └─TableFullScan_12 6000.00 cop[tikv] table:t1 keep order:false" + ], + "Warn": null + }, + { + "SQL": "explain select id, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10", + "Plan": [ + "Projection_6 10.00 root test.t1.id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", + "└─TopN_7 10.00 root Column#8, offset:0, count:10", + " └─TableReader_14 10.00 root data:TopN_13", + " └─TopN_13 10.00 cop[tikv] Column#8, offset:0, count:10", + " └─Projection_12 10.00 cop[tikv] test.t1.id, test.t1.vec, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", + " └─TableFullScan_11 6000.00 cop[tikv] table:t1 keep order:false" + ], + "Warn": null + }, + { + "SQL": "explain select id, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d", + "Plan": [ + "Sort_4 6000.00 root Column#7", + "└─TableReader_11 6000.00 root data:Projection_7", + " └─Projection_7 6000.00 cop[tikv] test.t1.id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", + " └─TableFullScan_10 6000.00 cop[tikv] table:t1 keep order:false" + ], + "Warn": null + }, + { + "SQL": "explain select *, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10", + "Plan": [ + "Projection_6 10.00 root test.t1.id, test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", + "└─TopN_7 10.00 root Column#8, offset:0, count:10", + " └─TableReader_14 10.00 root data:TopN_13", + " └─TopN_13 10.00 cop[tikv] Column#8, offset:0, count:10", + " └─Projection_12 10.00 cop[tikv] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", + " └─TableFullScan_11 6000.00 cop[tikv] table:t1 keep order:false" + ], + "Warn": null + }, + { + "SQL": "explain select id, vec_cosine_distance(vec, '[1,1,1]') as d, a, b from t1 order by d limit 10", + "Plan": [ + "Projection_6 10.00 root test.t1.id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7, test.t1.a, test.t1.b", + "└─TopN_7 10.00 root Column#8, offset:0, count:10", + " └─TableReader_14 10.00 root data:TopN_13", + " └─TopN_13 10.00 cop[tikv] Column#8, offset:0, count:10", + " └─Projection_12 10.00 cop[tikv] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", + " └─TableFullScan_11 6000.00 cop[tikv] table:t1 keep order:false" + ], + "Warn": null + }, + { + "SQL": "explain select id, a, b, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10", + "Plan": [ + "Projection_6 10.00 root test.t1.id, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", + "└─TopN_7 10.00 root Column#8, offset:0, count:10", + " └─TableReader_14 10.00 root data:TopN_13", + " └─TopN_13 10.00 cop[tikv] Column#8, offset:0, count:10", + " └─Projection_12 10.00 cop[tikv] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", + " └─TableFullScan_11 6000.00 cop[tikv] table:t1 keep order:false" + ], + "Warn": null + } + ] } ] diff --git a/pkg/planner/core/casetest/vectorsearch/vector_index_test.go b/pkg/planner/core/casetest/vectorsearch/vector_index_test.go index aed89b570593c..3c097e7b90d88 100644 --- a/pkg/planner/core/casetest/vectorsearch/vector_index_test.go +++ b/pkg/planner/core/casetest/vectorsearch/vector_index_test.go @@ -16,6 +16,7 @@ package vectorsearch import ( "context" + "fmt" "strings" "testing" "time" @@ -173,15 +174,13 @@ func TestANNIndexNormalizedPlan(t *testing.T) { tk.MustExec("explain select * from t order by vec_cosine_distance(vec, '[0,0,0]') limit 1") p1, d1 := getNormalizedPlan() require.Equal(t, []string{ - " Projection root test.t.vec", - " └─TopN root ?", - " └─Projection root test.t.vec, vec_cosine_distance(test.t.vec, ?)", - " └─TableReader root ", - " └─ExchangeSender cop[tiflash] ", - " └─Projection cop[tiflash] test.t.vec", - " └─TopN cop[tiflash] ?", - " └─Projection cop[tiflash] test.t.vec, vec_cosine_distance(test.t.vec, ?)", - " └─TableFullScan cop[tiflash] table:t, index:vector_index(vec), range:[?,?], keep order:false, annIndex:COSINE(vec..[?], limit:?)", + " Projection root test.t.vec", + " └─TopN root ?", + " └─TableReader root ", + " └─ExchangeSender cop[tiflash] ", + " └─TopN cop[tiflash] ?", + " └─Projection cop[tiflash] test.t.vec, vec_cosine_distance(test.t.vec, ?)", + " └─TableFullScan cop[tiflash] table:t, index:vector_index(vec), range:[?,?], keep order:false, annIndex:COSINE(vec..[?], limit:?)", }, p1) tk.MustExec("explain select * from t order by vec_cosine_distance(vec, '[1,2,3]') limit 3") @@ -314,3 +313,105 @@ func TestANNIndexWithNonIntClusteredPk(t *testing.T) { require.Equal(t, types.KindMinNotNull, tableScan.Ranges[0].LowVal[0].Kind()) require.Equal(t, types.KindMaxValue, tableScan.Ranges[0].HighVal[0].Kind()) } +func prepareVectorSearchWithPK(t *testing.T) *testkit.TestKit { + store, dom := testkit.CreateMockStoreAndDomainWithSchemaLease(t, 200*time.Millisecond, mockstore.WithMockTiFlash(2)) + tk := testkit.NewTestKit(t, store) + + tk.MustExec("use test") + tk.MustExec("drop table if exists t1") + tk.MustExec("drop table if exists tp") + tk.MustExec("drop table if exists doc") + + // A non-partitioned table + tk.MustExec(` + create table t1 ( + id int primary key, + vec vector(3), + a int, + b int, + c vector(3), + d vector, + VECTOR INDEX idx_embedding ((VEC_COSINE_DISTANCE(vec))) + ) + `) + for i := 0; i < 2000; i++ { + tk.MustExec(fmt.Sprintf(` + insert into t1 values + (%d, '[1,1,1]', 1, 1, '[1,1,1]', '[1,1,1]'), + (%d, '[2,2,2]', 2, 2, '[2,2,2]', '[2,2,2]'), + (%d, '[3,3,3]', 3, 3, '[3,3,3]', '[3,3,3]'); + `, i, 2000+i, 2000*2+i)) + } + tk.MustExec("analyze table t1") + + // Another table for join + tk.MustExec("create table doc(id INT, doc LONGTEXT)") + + testkit.SetTiFlashReplica(t, dom, "test", "t1") + testkit.SetTiFlashReplica(t, dom, "test", "tp") + + return tk +} + +func TestVectorSearchWithPKAuto(t *testing.T) { + tk := prepareVectorSearchWithPK(t) + var input []string + var output []struct { + SQL string + Plan []string + Warn []string + } + suiteData := GetANNIndexSuiteData() + suiteData.LoadTestCases(t, &input, &output) + for i, tt := range input { + testdata.OnRecord(func() { + output[i].SQL = tt + }) + if strings.HasPrefix(tt, "set") || strings.HasPrefix(tt, "UPDATE") { + tk.MustExec(tt) + continue + } + testdata.OnRecord(func() { + output[i].SQL = tt + output[i].Plan = testdata.ConvertRowsToStrings(tk.MustQuery(tt).Rows()) + output[i].Warn = testdata.ConvertSQLWarnToStrings(tk.Session().GetSessionVars().StmtCtx.GetWarnings()) + }) + res := tk.MustQuery(tt) + res.Check(testkit.Rows(output[i].Plan...)) + require.Equal(t, output[i].Warn, testdata.ConvertSQLWarnToStrings(tk.Session().GetSessionVars().StmtCtx.GetWarnings())) + } +} + +func TestVectorSearchWithPKForceTiKV(t *testing.T) { + tk := prepareVectorSearchWithPK(t) + tk.MustExec("set @@tidb_isolation_read_engines = 'tikv'") + // enable the tikv AllowProjectionPushDown to use vector search optimization. + // https://github.com/tidbcloud/tidb-cse/pull/1426 + tk.Session().GetSessionVars().AllowProjectionPushDown = true + + var input []string + var output []struct { + SQL string + Plan []string + Warn []string + } + suiteData := GetANNIndexSuiteData() + suiteData.LoadTestCases(t, &input, &output) + for i, tt := range input { + testdata.OnRecord(func() { + output[i].SQL = tt + }) + if strings.HasPrefix(tt, "set") || strings.HasPrefix(tt, "UPDATE") { + tk.MustExec(tt) + continue + } + testdata.OnRecord(func() { + output[i].SQL = tt + output[i].Plan = testdata.ConvertRowsToStrings(tk.MustQuery(tt).Rows()) + output[i].Warn = testdata.ConvertSQLWarnToStrings(tk.Session().GetSessionVars().StmtCtx.GetWarnings()) + }) + res := tk.MustQuery(tt) + res.Check(testkit.Rows(output[i].Plan...)) + require.Equal(t, output[i].Warn, testdata.ConvertSQLWarnToStrings(tk.Session().GetSessionVars().StmtCtx.GetWarnings())) + } +} diff --git a/pkg/planner/core/task.go b/pkg/planner/core/task.go index b4a479c72701a..204000b56145e 100644 --- a/pkg/planner/core/task.go +++ b/pkg/planner/core/task.go @@ -32,6 +32,7 @@ import ( "github.com/pingcap/tidb/pkg/planner/core/operator/logicalop" "github.com/pingcap/tidb/pkg/planner/property" "github.com/pingcap/tidb/pkg/planner/util" + "github.com/pingcap/tidb/pkg/planner/util/fixcontrol" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/chunk" "github.com/pingcap/tidb/pkg/util/collate" @@ -41,6 +42,19 @@ import ( "go.uber.org/zap" ) +// HeavyFunctionNameMap stores function names that is worth to do HeavyFunctionOptimize. +// Currently this only applies to Vector data types and their functions. The HeavyFunctionOptimize +// eliminate the usage of the function in TopN operators to avoid vector distance re-calculation +// of TopN in the root task. +var HeavyFunctionNameMap = map[string]bool{ + "vec_cosine_distance": true, + "vec_l1_distance": true, + "vec_l2_distance": true, + "vec_negative_inner_product": true, + "vec_dims": true, + "vec_l2_norm": true, +} + func attachPlan2Task(p base.PhysicalPlan, t base.Task) base.Task { switch v := t.(type) { case *CopTask: @@ -861,7 +875,23 @@ func (p *NominalSort) Attach2Task(tasks ...base.Task) base.Task { return t } -func (p *PhysicalTopN) getPushedDownTopN(childPlan base.PhysicalPlan) *PhysicalTopN { +func (p *PhysicalTopN) getPushedDownTopN(childPlan base.PhysicalPlan, storeTp kv.StoreType) (*PhysicalTopN, *PhysicalTopN) { + var newGlobalTopN *PhysicalTopN + + fixValue := fixcontrol.GetBoolWithDefault(p.SCtx().GetSessionVars().GetOptimizerFixControlMap(), fixcontrol.Fix56318, true) + // HeavyFunctionOptimize: if TopN's ByItems is a HeavyFunction (currently mainly for Vector Search), we will change + // the ByItems in order to reuse the function result. + if fixValue && ContainHeavyFunction(p.ByItems[0].Expr) { + x, err := p.Clone(p.SCtx()) + if err != nil { + return nil, nil + } + newGlobalTopN = x.(*PhysicalTopN) + // the projecton's construction cannot be create if the AllowProjectionPushDown is disable. + if storeTp == kv.TiKV && !p.SCtx().GetSessionVars().AllowProjectionPushDown { + newGlobalTopN = nil + } + } newByItems := make([]*util.ByItems, 0, len(p.ByItems)) for _, expr := range p.ByItems { newByItems = append(newByItems, expr.Clone()) @@ -875,13 +905,69 @@ func (p *PhysicalTopN) getPushedDownTopN(childPlan base.PhysicalPlan) *PhysicalT // Strictly speaking, for the row count of pushed down TopN, we should multiply newCount with "regionNum", // but "regionNum" is unknown since the copTask can be a double read, so we ignore it now. stats := util.DeriveLimitStats(childProfile, float64(newCount)) + + if newGlobalTopN != nil { + // create a new PhysicalProjection to calculate the distance columns, and add it into plan route + bottomProjSchemaCols := make([]*expression.Column, 0, len(childPlan.Schema().Columns)) + bottomProjExprs := make([]expression.Expression, 0, len(childPlan.Schema().Columns)) + for _, col := range childPlan.Schema().Columns { + newCol := col.Clone().(*expression.Column) + bottomProjSchemaCols = append(bottomProjSchemaCols, newCol) + bottomProjExprs = append(bottomProjExprs, newCol) + } + bottomProjExprs = append(bottomProjExprs, newGlobalTopN.ByItems[0].Expr) + distanceCol := &expression.Column{ + UniqueID: newGlobalTopN.SCtx().GetSessionVars().AllocPlanColumnID(), + RetType: newGlobalTopN.ByItems[0].Expr.GetType(p.SCtx().GetExprCtx().GetEvalCtx()), + } + bottomProjSchemaCols = append(bottomProjSchemaCols, distanceCol) + + bottomProj := PhysicalProjection{ + Exprs: bottomProjExprs, + }.Init(p.SCtx(), stats, p.QueryBlockOffset(), p.GetChildReqProps(0)) + bottomProj.SetSchema(expression.NewSchema(bottomProjSchemaCols...)) + bottomProj.SetChildren(childPlan) + + topN := PhysicalTopN{ + ByItems: newByItems, + PartitionBy: newPartitionBy, + Count: newCount, + }.Init(p.SCtx(), stats, p.QueryBlockOffset(), p.GetChildReqProps(0)) + // mppTask's topN + topN.ByItems[0].Expr = distanceCol + // rootTask's topn, need reuse the distance col + newGlobalTopN.ByItems[0].Expr = distanceCol + topN.SetChildren(bottomProj) + + return topN, newGlobalTopN + } + topN := PhysicalTopN{ ByItems: newByItems, PartitionBy: newPartitionBy, Count: newCount, }.Init(p.SCtx(), stats, p.QueryBlockOffset(), p.GetChildReqProps(0)) topN.SetChildren(childPlan) - return topN + return topN, newGlobalTopN +} + +// ContainHeavyFunction check if the expr contains a function that need to do HeavyFunctionOptimize. Currently this only applies +// to Vector data types and their functions. The HeavyFunctionOptimize eliminate the usage of the function in TopN operators +// to avoid vector distance re-calculation of TopN in the root task. +func ContainHeavyFunction(expr expression.Expression) bool { + sf, ok := expr.(*expression.ScalarFunction) + if !ok { + return false + } + if _, ok := HeavyFunctionNameMap[sf.FuncName.L]; ok { + return true + } + for _, arg := range sf.GetArgs() { + if ContainHeavyFunction(arg) { + return true + } + } + return false } // canPushToIndexPlan checks if this TopN can be pushed to the index side of copTask. @@ -978,18 +1064,46 @@ func (p *PhysicalTopN) Attach2Task(tasks ...base.Task) base.Task { // If all columns in topN are from index plan, we push it to index plan, otherwise we finish the index plan and // push it to table plan. var pushedDownTopN *PhysicalTopN + var newGlobalTopN *PhysicalTopN if !copTask.indexPlanFinished && p.canPushToIndexPlan(copTask.indexPlan, cols) { - pushedDownTopN = p.getPushedDownTopN(copTask.indexPlan) + pushedDownTopN, newGlobalTopN = p.getPushedDownTopN(copTask.indexPlan, copTask.getStoreType()) copTask.indexPlan = pushedDownTopN + if newGlobalTopN != nil { + rootTask := t.ConvertToRootTask(newGlobalTopN.SCtx()) + // Skip TopN with partition on the root. This is a derived topN and window function + // will take care of the filter. + if len(p.GetPartitionBy()) > 0 { + return t + } + return attachPlan2Task(newGlobalTopN, rootTask) + } } else { // It works for both normal index scan and index merge scan. copTask.finishIndexPlan() - pushedDownTopN = p.getPushedDownTopN(copTask.tablePlan) + pushedDownTopN, newGlobalTopN = p.getPushedDownTopN(copTask.tablePlan, copTask.getStoreType()) copTask.tablePlan = pushedDownTopN + if newGlobalTopN != nil { + rootTask := t.ConvertToRootTask(newGlobalTopN.SCtx()) + // Skip TopN with partition on the root. This is a derived topN and window function + // will take care of the filter. + if len(p.GetPartitionBy()) > 0 { + return t + } + return attachPlan2Task(newGlobalTopN, rootTask) + } } } else if mppTask, ok := t.(*MppTask); ok && needPushDown && p.canPushDownToTiFlash(mppTask) { - pushedDownTopN := p.getPushedDownTopN(mppTask.p) + pushedDownTopN, newGlobalTopN := p.getPushedDownTopN(mppTask.p, kv.TiFlash) mppTask.p = pushedDownTopN + if newGlobalTopN != nil { + rootTask := t.ConvertToRootTask(newGlobalTopN.SCtx()) + // Skip TopN with partition on the root. This is a derived topN and window function + // will take care of the filter. + if len(p.GetPartitionBy()) > 0 { + return t + } + return attachPlan2Task(newGlobalTopN, rootTask) + } } rootTask := t.ConvertToRootTask(p.SCtx()) // Skip TopN with partition on the root. This is a derived topN and window function diff --git a/pkg/planner/util/fixcontrol/get.go b/pkg/planner/util/fixcontrol/get.go index 13422682290e0..8fe66b36f70e2 100644 --- a/pkg/planner/util/fixcontrol/get.go +++ b/pkg/planner/util/fixcontrol/get.go @@ -71,6 +71,9 @@ const ( Fix52869 uint64 = 52869 // Fix54337 controls whether to apply or not range intersection for index access. Fix54337 uint64 = 54337 + // Fix56318 controls whether to do HeavyFunctionOptimize. Currently this only applies to Vector data types and + // their functions. The HeavyFunctionOptimize eliminate the usage of the function in TopN operators + Fix56318 uint64 = 56318 ) // GetStr fetches the given key from the fix control map as a string type. From d48ed8d909f2fb968e9d5739ae52199fbe0edd9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CEricZequan=E2=80=9D?= Date: Thu, 12 Dec 2024 17:48:37 +0800 Subject: [PATCH 2/9] update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: “EricZequan” --- pkg/expression/integration_test/BUILD.bazel | 2 +- .../integration_test/integration_test.go | 198 ++++++++++++++++++ 2 files changed, 199 insertions(+), 1 deletion(-) diff --git a/pkg/expression/integration_test/BUILD.bazel b/pkg/expression/integration_test/BUILD.bazel index ae2ec434f590d..fc10d03844631 100644 --- a/pkg/expression/integration_test/BUILD.bazel +++ b/pkg/expression/integration_test/BUILD.bazel @@ -8,7 +8,7 @@ go_test( "main_test.go", ], flaky = True, - shard_count = 48, + shard_count = 50, deps = [ "//pkg/config", "//pkg/domain", diff --git a/pkg/expression/integration_test/integration_test.go b/pkg/expression/integration_test/integration_test.go index b81177c23610e..767e882d9f7ed 100644 --- a/pkg/expression/integration_test/integration_test.go +++ b/pkg/expression/integration_test/integration_test.go @@ -849,6 +849,204 @@ func TestVectorMiscFunctions(t *testing.T) { tk.MustQuery(`SELECT * FROM a;`).Check(testkit.Rows("1 [2,10,14] 3")) } +func testVectorSearchInternal(tk *testkit.TestKit) { + tk.MustExec(` + create table t1 ( + id int primary key, + vec vector(3), + a int, + b int, + c vector(3), + d vector, + VECTOR INDEX idx_embedding ((VEC_COSINE_DISTANCE(vec))) + ) + `) + tk.MustExec(` + insert into t1 values + (1, '[1,1,1]', 11, 111, '[1,1,1]', '[1,1,1]'), + (2, '[2,2,2]', 22, 222, '[2,2,2]', '[2,2,2]'), + (3, '[3,3,3]', 33, 333, '[3,3,3]', '[3,3,3]'); + `) + tk.MustExec("analyze table t1") + + tk.MustQuery("select id from t1 order by id").Check(testkit.Rows( + "1", + "2", + "3", + )) + tk.MustQuery("select id from t1 order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows( + "3", + "2", + "1", + )) + tk.MustQuery("select id from t1 order by vec_l2_distance(vec, '[3,3,3]') limit 1").Check(testkit.Rows( + "3", + )) + tk.MustQuery("select id from t1 order by vec_l2_distance(vec, '[3,3,3]')").Check(testkit.Rows( + "3", + "2", + "1", + )) + tk.MustQuery("select * from t1 order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows( + "3 [3,3,3] 33 333 [3,3,3] [3,3,3]", + "2 [2,2,2] 22 222 [2,2,2] [2,2,2]", + "1 [1,1,1] 11 111 [1,1,1] [1,1,1]", + )) + tk.MustQuery("select id, a, b from t1 order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows( + "3 33 333", + "2 22 222", + "1 11 111", + )) + tk.MustQuery("select a, id, b from t1 order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows( + "33 3 333", + "22 2 222", + "11 1 111", + )) + tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d from t1 order by d limit 10").Check(testkit.Rows( + "3 0", + "2 1.7320508075688772", + "1 3.4641016151377544", + )) + tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d from t1 order by d").Check(testkit.Rows( + "3 0", + "2 1.7320508075688772", + "1 3.4641016151377544", + )) + tk.MustQuery("select *, vec_l2_distance(vec, '[3,3,3]') as d from t1 order by d limit 10").Check(testkit.Rows( + "3 [3,3,3] 33 333 [3,3,3] [3,3,3] 0", + "2 [2,2,2] 22 222 [2,2,2] [2,2,2] 1.7320508075688772", + "1 [1,1,1] 11 111 [1,1,1] [1,1,1] 3.4641016151377544", + )) + tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d, a, b from t1 order by d limit 10").Check(testkit.Rows( + "3 0 33 333", + "2 1.7320508075688772 22 222", + "1 3.4641016151377544 11 111", + )) + tk.MustQuery("select id, a, b, vec_l2_distance(vec, '[3,3,3]') as d from t1 order by d limit 10").Check(testkit.Rows( + "3 33 333 0", + "2 22 222 1.7320508075688772", + "1 11 111 3.4641016151377544", + )) + + tk.MustExec(` + create table tp ( + id int, + vec vector(3) comment 'hnsw(distance=cosine)', + a int, b int, + store_id int + ) PARTITION BY RANGE COLUMNS(store_id) ( + PARTITION p0 VALUES LESS THAN (100), + PARTITION p1 VALUES LESS THAN (200), + PARTITION p2 VALUES LESS THAN (MAXVALUE) + ); + `) + tk.MustExec(` + insert into tp values + (1, '[1,1,1]', 11, 111, 50), + (2, '[2,2,2]', 22, 222, 150), + (3, '[3,3,3]', 33, 333, 250); + `) + tk.MustExec("analyze table tp") + + tk.MustQuery("select id from tp order by id").Check(testkit.Rows( + "1", + "2", + "3", + )) + tk.MustQuery("select id from tp order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows( + "3", + "2", + "1", + )) + tk.MustQuery("select id from tp order by vec_l2_distance(vec, '[3,3,3]')").Check(testkit.Rows( + "3", + "2", + "1", + )) + tk.MustQuery("select * from tp order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows( + "3 [3,3,3] 33 333 250", + "2 [2,2,2] 22 222 150", + "1 [1,1,1] 11 111 50", + )) + tk.MustQuery("select id, a, b from tp order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows( + "3 33 333", + "2 22 222", + "1 11 111", + )) + tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d from tp order by d limit 10").Check(testkit.Rows( + "3 0", + "2 1.7320508075688772", + "1 3.4641016151377544", + )) + tk.MustQuery("select *, vec_l2_distance(vec, '[3,3,3]') as d from tp order by d limit 10").Check(testkit.Rows( + "3 [3,3,3] 33 333 250 0", + "2 [2,2,2] 22 222 150 1.7320508075688772", + "1 [1,1,1] 11 111 50 3.4641016151377544", + )) + tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d, a, b from tp order by d limit 10").Check(testkit.Rows( + "3 0 33 333", + "2 1.7320508075688772 22 222", + "1 3.4641016151377544 11 111", + )) + tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d, a, b from tp order by d").Check(testkit.Rows( + "3 0 33 333", + "2 1.7320508075688772 22 222", + "1 3.4641016151377544 11 111", + )) + + tk.MustQuery("select id from tp partition (p0) order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows( + "1", + )) + tk.MustQuery("select * from tp partition (p0) order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows( + "1 [1,1,1] 11 111 50", + )) + tk.MustQuery("select id, a, b from tp partition (p0) order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows( + "1 11 111", + )) + tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d from tp partition (p0) order by d limit 10").Check(testkit.Rows( + "1 3.4641016151377544", + )) + tk.MustQuery("select *, vec_l2_distance(vec, '[3,3,3]') as d from tp partition (p0) order by d limit 10").Check(testkit.Rows( + "1 [1,1,1] 11 111 50 3.4641016151377544", + )) + tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d, a, b from tp partition (p0) order by d limit 10").Check(testkit.Rows( + "1 3.4641016151377544 11 111", + )) + tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d, a, b from tp partition (p0) order by d").Check(testkit.Rows( + "1 3.4641016151377544 11 111", + )) +} + +func TestVectorSearchExtractProj(t *testing.T) { + { + store, _ := testkit.CreateMockStoreAndDomainWithSchemaLease(t, 200*time.Millisecond, mockstore.WithMockTiFlash(1)) + tk := testkit.NewTestKit(t, store) + tk.MustExec("USE test;") + testVectorSearchInternal(tk) + } + { + store, _ := testkit.CreateMockStoreAndDomainWithSchemaLease(t, 200*time.Millisecond, mockstore.WithMockTiFlash(1)) + tk := testkit.NewTestKit(t, store) + tk.MustExec("USE test;") + tk.MustExec("SET SESSION tidb_opt_fix_control = '56318:OFF';") + testVectorSearchInternal(tk) + } +} + +func TestVectorSearchPreparedStatement(t *testing.T) { + store, _ := testkit.CreateMockStoreAndDomainWithSchemaLease(t, 200*time.Millisecond, mockstore.WithMockTiFlash(1)) + tk := testkit.NewTestKit(t, store) + tk.MustExec("USE test;") + tk.MustExec("CREATE TABLE t1 (pk INT PRIMARY KEY, vec vector(3) comment 'hnsw(distance=cosine)');") + tk.MustExec("INSERT INTO t1 VALUES (1, '[1,2,3]'), (2, '[4,5,6]'), (3, '[7,8,9]');") + tk.MustExec("ANALYZE TABLE t1;") + + tk.MustExec("PREPARE stmt FROM 'SELECT pk FROM t1 ORDER BY vec_cosine_distance(vec, ?) LIMIT ?';") + tk.MustExec("SET @pvec = '[7,8,9]';") + tk.MustExec("SET @plimit = 10;") + tk.MustQuery("EXECUTE stmt USING @pvec, @plimit;").Check(testkit.Rows("3", "2", "1")) +} + func TestGetLock(t *testing.T) { ctx := context.Background() store := testkit.CreateMockStore(t, mockstore.WithStoreType(mockstore.EmbedUnistore)) From cc9bb5004451e6261c04e655779b9d1a317d677c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CEricZequan=E2=80=9D?= Date: Thu, 12 Dec 2024 21:43:22 +0800 Subject: [PATCH 3/9] fix comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: “EricZequan” --- .../integration_test/integration_test.go | 2 +- .../testdata/ann_index_suite_out.json | 487 ++++++++---------- .../vectorsearch/vector_index_test.go | 11 +- pkg/planner/core/task.go | 15 +- pkg/planner/util/fixcontrol/get.go | 4 +- 5 files changed, 231 insertions(+), 288 deletions(-) diff --git a/pkg/expression/integration_test/integration_test.go b/pkg/expression/integration_test/integration_test.go index 767e882d9f7ed..88a8c0013f5c1 100644 --- a/pkg/expression/integration_test/integration_test.go +++ b/pkg/expression/integration_test/integration_test.go @@ -1037,7 +1037,7 @@ func TestVectorSearchPreparedStatement(t *testing.T) { store, _ := testkit.CreateMockStoreAndDomainWithSchemaLease(t, 200*time.Millisecond, mockstore.WithMockTiFlash(1)) tk := testkit.NewTestKit(t, store) tk.MustExec("USE test;") - tk.MustExec("CREATE TABLE t1 (pk INT PRIMARY KEY, vec vector(3) comment 'hnsw(distance=cosine)');") + tk.MustExec("CREATE TABLE t1 (pk INT PRIMARY KEY, vec vector(3), VECTOR INDEX idx_embedding ((VEC_COSINE_DISTANCE(vec))) );") tk.MustExec("INSERT INTO t1 VALUES (1, '[1,2,3]'), (2, '[4,5,6]'), (3, '[7,8,9]');") tk.MustExec("ANALYZE TABLE t1;") diff --git a/pkg/planner/core/casetest/vectorsearch/testdata/ann_index_suite_out.json b/pkg/planner/core/casetest/vectorsearch/testdata/ann_index_suite_out.json index 2c339f6ab23b8..01fee9da6153c 100644 --- a/pkg/planner/core/casetest/vectorsearch/testdata/ann_index_suite_out.json +++ b/pkg/planner/core/casetest/vectorsearch/testdata/ann_index_suite_out.json @@ -67,13 +67,11 @@ "Plan": [ "Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", "└─TopN 1.00 root Column#8, offset:0, count:1", - " └─Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", - " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", - " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableFullScan 1.00 mpp[tiflash] table:t1, index:vector_index(vec) keep order:false, stats:pseudo, annIndex:COSINE(vec..[1,1,1], limit:1)" + " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", + " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN 1.00 mpp[tiflash] Column#8, offset:0, count:1", + " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", + " └─TableFullScan 1.00 mpp[tiflash] table:t1, index:vector_index(vec) keep order:false, stats:pseudo, annIndex:COSINE(vec..[1,1,1], limit:1)" ], "Warn": null }, @@ -82,13 +80,11 @@ "Plan": [ "Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", "└─TopN 1.00 root Column#8, offset:0, count:1", - " └─Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", - " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", - " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableFullScan 1.00 mpp[tiflash] table:t1, index:vector_index(vec) keep order:false, stats:pseudo, annIndex:COSINE(vec..[1,1,1], limit:1)" + " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", + " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN 1.00 mpp[tiflash] Column#8, offset:0, count:1", + " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", + " └─TableFullScan 1.00 mpp[tiflash] table:t1, index:vector_index(vec) keep order:false, stats:pseudo, annIndex:COSINE(vec..[1,1,1], limit:1)" ], "Warn": null }, @@ -96,14 +92,12 @@ "SQL": "explain format = 'brief' select * from t1 order by vec_cosine_distance(vec, '[1,1,1]') desc limit 1", "Plan": [ "Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - "└─TopN 1.00 root Column#8:desc, offset:0, count:1", - " └─Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", - " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", - " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - " └─TopN 1.00 mpp[tiflash] Column#7:desc, offset:0, count:1", - " └─Projection 49152.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + "└─TopN 1.00 root Column#7:desc, offset:0, count:1", + " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", + " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN 1.00 mpp[tiflash] Column#7:desc, offset:0, count:1", + " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", + " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -111,14 +105,12 @@ "SQL": "explain format = 'brief' select * from t1 order by vec_cosine_distance(vec, vec_from_text('[1,1,1]')) desc limit 1", "Plan": [ "Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - "└─TopN 1.00 root Column#8:desc, offset:0, count:1", - " └─Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", - " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", - " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - " └─TopN 1.00 mpp[tiflash] Column#7:desc, offset:0, count:1", - " └─Projection 49152.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + "└─TopN 1.00 root Column#7:desc, offset:0, count:1", + " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", + " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN 1.00 mpp[tiflash] Column#7:desc, offset:0, count:1", + " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", + " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -126,14 +118,12 @@ "SQL": "explain format = 'brief' select * from t1 order by vec_cosine_distance(vec, '[1,1,1]')+1 limit 1", "Plan": [ "Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - "└─TopN 1.00 root Column#8, offset:0, count:1", - " └─Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, plus(vec_cosine_distance(test.t1.vec, [1,1,1]), 1)->Column#8", - " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", - " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", - " └─Projection 49152.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, plus(vec_cosine_distance(test.t1.vec, [1,1,1]), 1)->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + "└─TopN 1.00 root Column#7, offset:0, count:1", + " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", + " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", + " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, plus(vec_cosine_distance(test.t1.vec, [1,1,1]), 1)->Column#7", + " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -141,14 +131,12 @@ "SQL": "explain format = 'brief' select * from t1 order by vec_cosine_distance(vec, '[1,1,1]'),vec limit 1", "Plan": [ "Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - "└─TopN 1.00 root Column#8, test.t1.vec, offset:0, count:1", - " └─Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", - " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", - " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - " └─TopN 1.00 mpp[tiflash] Column#7, test.t1.vec, offset:0, count:1", - " └─Projection 49152.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + "└─TopN 1.00 root Column#7, test.t1.vec, offset:0, count:1", + " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", + " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN 1.00 mpp[tiflash] Column#7, test.t1.vec, offset:0, count:1", + " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", + " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -156,14 +144,12 @@ "SQL": "explain format = 'brief' select * from t1 order by vec_l2_distance(vec, '[1,1,1]') limit 1", "Plan": [ "Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - "└─TopN 1.00 root Column#8, offset:0, count:1", - " └─Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l2_distance(test.t1.vec, [1,1,1])->Column#8", - " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", - " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", - " └─Projection 49152.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l2_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + "└─TopN 1.00 root Column#7, offset:0, count:1", + " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", + " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", + " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l2_distance(test.t1.vec, [1,1,1])->Column#7", + " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -171,14 +157,12 @@ "SQL": "explain format = 'brief' select * from t1 order by vec_l1_distance(vec, '[1,1,1]') limit 1", "Plan": [ "Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - "└─TopN 1.00 root Column#8, offset:0, count:1", - " └─Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l1_distance(test.t1.vec, [1,1,1])->Column#8", - " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", - " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", - " └─Projection 49152.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l1_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + "└─TopN 1.00 root Column#7, offset:0, count:1", + " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", + " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", + " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l1_distance(test.t1.vec, [1,1,1])->Column#7", + " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -186,14 +170,12 @@ "SQL": "explain format = 'brief' select * from t1 order by vec_l2_distance(c, '[1,1,1]') limit 1", "Plan": [ "Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - "└─TopN 1.00 root Column#8, offset:0, count:1", - " └─Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l2_distance(test.t1.c, [1,1,1])->Column#8", - " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", - " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", - " └─Projection 49152.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l2_distance(test.t1.c, [1,1,1])->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + "└─TopN 1.00 root Column#7, offset:0, count:1", + " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", + " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", + " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l2_distance(test.t1.c, [1,1,1])->Column#7", + " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -201,14 +183,12 @@ "SQL": "explain format = 'brief' select * from t1 order by vec_l2_distance(d, '[1,1,1]') limit 1", "Plan": [ "Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - "└─TopN 1.00 root Column#8, offset:0, count:1", - " └─Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l2_distance(test.t1.d, [1,1,1])->Column#8", - " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", - " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", - " └─Projection 49152.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l2_distance(test.t1.d, [1,1,1])->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + "└─TopN 1.00 root Column#7, offset:0, count:1", + " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", + " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", + " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l2_distance(test.t1.d, [1,1,1])->Column#7", + " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -216,14 +196,12 @@ "SQL": "explain format = 'brief' select * from t1 order by vec_cosine_distance(d, '[1,1,1]') limit 1", "Plan": [ "Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - "└─TopN 1.00 root Column#8, offset:0, count:1", - " └─Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.d, [1,1,1])->Column#8", - " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", - " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", - " └─Projection 49152.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.d, [1,1,1])->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + "└─TopN 1.00 root Column#7, offset:0, count:1", + " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", + " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", + " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.d, [1,1,1])->Column#7", + " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -231,14 +209,12 @@ "SQL": "explain format = 'brief' select * from t1 order by vec_l1_distance(d, '[1,1,1]') limit 1", "Plan": [ "Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - "└─TopN 1.00 root Column#8, offset:0, count:1", - " └─Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l1_distance(test.t1.d, [1,1,1])->Column#8", - " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", - " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", - " └─Projection 49152.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l1_distance(test.t1.d, [1,1,1])->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + "└─TopN 1.00 root Column#7, offset:0, count:1", + " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", + " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", + " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l1_distance(test.t1.d, [1,1,1])->Column#7", + " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -257,15 +233,12 @@ "SQL": "explain format = 'brief' select vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 1", "Plan": [ "Projection 1.00 root vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - "└─Projection 1.00 root test.t1.vec", - " └─TopN 1.00 root Column#9, offset:0, count:1", - " └─Projection 1.00 root test.t1.vec, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#9", - " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", - " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection 1.00 mpp[tiflash] test.t1.vec", - " └─TopN 1.00 mpp[tiflash] Column#8, offset:0, count:1", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", - " └─TableFullScan 1.00 mpp[tiflash] table:t1, index:vector_index(vec) keep order:false, stats:pseudo, annIndex:COSINE(vec..[1,1,1], limit:1)" + "└─TopN 1.00 root Column#9, offset:0, count:1", + " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", + " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN 1.00 mpp[tiflash] Column#9, offset:0, count:1", + " └─Projection 1.00 mpp[tiflash] test.t1.vec, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#9", + " └─TableFullScan 1.00 mpp[tiflash] table:t1, index:vector_index(vec) keep order:false, stats:pseudo, annIndex:COSINE(vec..[1,1,1], limit:1)" ], "Warn": null }, @@ -273,15 +246,12 @@ "SQL": "explain format = 'brief' select vec_cosine_distance(vec, '[1,1,1]') as d, vec, a, b from t1 order by d limit 1", "Plan": [ "Projection 1.00 root vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7, test.t1.vec, test.t1.a, test.t1.b", - "└─Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b", - " └─TopN 1.00 root Column#9, offset:0, count:1", - " └─Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#9", - " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", - " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b", - " └─TopN 1.00 mpp[tiflash] Column#8, offset:0, count:1", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", - " └─TableFullScan 1.00 mpp[tiflash] table:t1, index:vector_index(vec) keep order:false, stats:pseudo, annIndex:COSINE(vec..[1,1,1], limit:1)" + "└─TopN 1.00 root Column#9, offset:0, count:1", + " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", + " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN 1.00 mpp[tiflash] Column#9, offset:0, count:1", + " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#9", + " └─TableFullScan 1.00 mpp[tiflash] table:t1, index:vector_index(vec) keep order:false, stats:pseudo, annIndex:COSINE(vec..[1,1,1], limit:1)" ], "Warn": null }, @@ -289,14 +259,12 @@ "SQL": "explain format = 'brief' select * from t1 where a=0 order by vec_cosine_distance(vec, '[1,1,1]') limit 1", "Plan": [ "Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - "└─TopN 1.00 root Column#8, offset:0, count:1", - " └─Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", - " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", - " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", - " └─Projection 49.15 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableFullScan 49.15 mpp[tiflash] table:t1 pushed down filter:eq(test.t1.a, 0), keep order:false, stats:pseudo" + "└─TopN 1.00 root Column#7, offset:0, count:1", + " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", + " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", + " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", + " └─TableFullScan 49.15 mpp[tiflash] table:t1 pushed down filter:eq(test.t1.a, 0), keep order:false, stats:pseudo" ], "Warn": null } @@ -342,75 +310,59 @@ " └─ExchangeSender_11 49152.00 mpp[tiflash] ExchangeType: PassThrough", " └─TableFullScan_10 49152.00 mpp[tiflash] table:t1 keep order:false, PartitionTableScan:true" ], - "Warn": [ - "ANN index not used: only Top N queries (like ORDER BY ... LIMIT ...) can use ANN index" - ] + "Warn": null }, { "SQL": "explain select * from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 1", "Plan": [ - "Projection_21 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", - "└─TopN_10 1.00 root Column#7, offset:0, count:1", - " └─Projection_22 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableReader_18 1.00 root partition:all MppVersion: 2, data:ExchangeSender_17", - " └─ExchangeSender_17 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection_19 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", - " └─TopN_16 1.00 mpp[tiflash] Column#6, offset:0, count:1", - " └─Projection_20 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#6", - " └─TableFullScan_15 1.00 mpp[tiflash] table:t1 annIndex:COSINE(test.t1.vec..[1,1,1], limit:1), keep order:false, PartitionTableScan:true" + "Projection_6 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", + "└─TopN_10 1.00 root Column#6, offset:0, count:1", + " └─TableReader_20 1.00 root partition:all MppVersion: 2, data:ExchangeSender_19", + " └─ExchangeSender_19 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_18 1.00 mpp[tiflash] Column#6, offset:0, count:1", + " └─Projection_17 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#6", + " └─TableFullScan_16 49152.00 mpp[tiflash] table:t1 keep order:false, PartitionTableScan:true" ], "Warn": null }, { "SQL": "explain select * from t1 order by vec_cosine_distance(vec, '[1,1,1]') desc limit 1", "Plan": [ - "Projection_21 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", - "└─TopN_10 1.00 root Column#7:desc, offset:0, count:1", - " └─Projection_22 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableReader_18 1.00 root partition:all MppVersion: 2, data:ExchangeSender_17", - " └─ExchangeSender_17 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection_19 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", - " └─TopN_16 1.00 mpp[tiflash] Column#6:desc, offset:0, count:1", - " └─Projection_20 49152.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#6", - " └─TableFullScan_15 49152.00 mpp[tiflash] table:t1 keep order:false, PartitionTableScan:true" + "Projection_6 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", + "└─TopN_10 1.00 root Column#6:desc, offset:0, count:1", + " └─TableReader_19 1.00 root partition:all MppVersion: 2, data:ExchangeSender_18", + " └─ExchangeSender_18 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_17 1.00 mpp[tiflash] Column#6:desc, offset:0, count:1", + " └─Projection_16 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#6", + " └─TableFullScan_15 49152.00 mpp[tiflash] table:t1 keep order:false, PartitionTableScan:true" ], - "Warn": [ - "ANN index not used: index can be used only when ordering by vec_cosine_distance() in ASC order" - ] + "Warn": null }, { "SQL": "explain select * from t1 order by vec_cosine_distance(vec, vec_from_text('[1,1,1]')) desc limit 1", "Plan": [ - "Projection_21 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", - "└─TopN_10 1.00 root Column#7:desc, offset:0, count:1", - " └─Projection_22 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableReader_18 1.00 root partition:all MppVersion: 2, data:ExchangeSender_17", - " └─ExchangeSender_17 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection_19 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", - " └─TopN_16 1.00 mpp[tiflash] Column#6:desc, offset:0, count:1", - " └─Projection_20 49152.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#6", - " └─TableFullScan_15 49152.00 mpp[tiflash] table:t1 keep order:false, PartitionTableScan:true" + "Projection_6 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", + "└─TopN_10 1.00 root Column#6:desc, offset:0, count:1", + " └─TableReader_19 1.00 root partition:all MppVersion: 2, data:ExchangeSender_18", + " └─ExchangeSender_18 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_17 1.00 mpp[tiflash] Column#6:desc, offset:0, count:1", + " └─Projection_16 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#6", + " └─TableFullScan_15 49152.00 mpp[tiflash] table:t1 keep order:false, PartitionTableScan:true" ], - "Warn": [ - "ANN index not used: index can be used only when ordering by vec_cosine_distance() in ASC order" - ] + "Warn": null }, { "SQL": "explain select * from t1 order by vec_cosine_distance(vec, '[1,1,1]')+1 limit 1", "Plan": [ - "Projection_21 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", - "└─TopN_10 1.00 root Column#7, offset:0, count:1", - " └─Projection_22 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, plus(vec_cosine_distance(test.t1.vec, [1,1,1]), 1)->Column#7", - " └─TableReader_18 1.00 root partition:all MppVersion: 2, data:ExchangeSender_17", - " └─ExchangeSender_17 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection_19 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", - " └─TopN_16 1.00 mpp[tiflash] Column#6, offset:0, count:1", - " └─Projection_20 49152.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, plus(vec_cosine_distance(test.t1.vec, [1,1,1]), 1)->Column#6", - " └─TableFullScan_15 49152.00 mpp[tiflash] table:t1 keep order:false, PartitionTableScan:true" + "Projection_6 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", + "└─TopN_10 1.00 root Column#6, offset:0, count:1", + " └─TableReader_19 1.00 root partition:all MppVersion: 2, data:ExchangeSender_18", + " └─ExchangeSender_18 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_17 1.00 mpp[tiflash] Column#6, offset:0, count:1", + " └─Projection_16 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, plus(vec_cosine_distance(test.t1.vec, [1,1,1]), 1)->Column#6", + " └─TableFullScan_15 49152.00 mpp[tiflash] table:t1 keep order:false, PartitionTableScan:true" ], - "Warn": [ - "ANN index not used: not ordering by a vector distance function" - ] + "Warn": null }, { "SQL": "explain select vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d", @@ -421,23 +373,18 @@ " └─Projection_7 49152.00 mpp[tiflash] vec_cosine_distance(test.t1.vec, [1,1,1])->Column#6", " └─TableFullScan_11 49152.00 mpp[tiflash] table:t1 keep order:false, PartitionTableScan:true" ], - "Warn": [ - "ANN index not used: only Top N queries (like ORDER BY ... LIMIT ...) can use ANN index" - ] + "Warn": null }, { "SQL": "explain select vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 1", "Plan": [ "Projection_6 1.00 root vec_cosine_distance(test.t1.vec, [1,1,1])->Column#6", - "└─Projection_21 1.00 root test.t1.vec", - " └─TopN_10 1.00 root Column#8, offset:0, count:1", - " └─Projection_22 1.00 root test.t1.vec, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", - " └─TableReader_18 1.00 root partition:all MppVersion: 2, data:ExchangeSender_17", - " └─ExchangeSender_17 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection_19 1.00 mpp[tiflash] test.t1.vec", - " └─TopN_16 1.00 mpp[tiflash] Column#7, offset:0, count:1", - " └─Projection_20 1.00 mpp[tiflash] test.t1.vec, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableFullScan_15 1.00 mpp[tiflash] table:t1 annIndex:COSINE(test.t1.vec..[1,1,1], limit:1), keep order:false, PartitionTableScan:true" + "└─TopN_10 1.00 root Column#7, offset:0, count:1", + " └─TableReader_20 1.00 root partition:all MppVersion: 2, data:ExchangeSender_19", + " └─ExchangeSender_19 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_18 1.00 mpp[tiflash] Column#7, offset:0, count:1", + " └─Projection_17 1.00 mpp[tiflash] test.t1.vec, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", + " └─TableFullScan_16 49152.00 mpp[tiflash] table:t1 keep order:false, PartitionTableScan:true" ], "Warn": null }, @@ -445,64 +392,51 @@ "SQL": "explain select vec_cosine_distance(vec, '[1,1,1]') as d, vec, a, b from t1 order by d limit 1", "Plan": [ "Projection_6 1.00 root vec_cosine_distance(test.t1.vec, [1,1,1])->Column#6, test.t1.vec, test.t1.a, test.t1.b", - "└─Projection_21 1.00 root test.t1.vec, test.t1.a, test.t1.b", - " └─TopN_10 1.00 root Column#8, offset:0, count:1", - " └─Projection_22 1.00 root test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", - " └─TableReader_18 1.00 root partition:all MppVersion: 2, data:ExchangeSender_17", - " └─ExchangeSender_17 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection_19 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b", - " └─TopN_16 1.00 mpp[tiflash] Column#7, offset:0, count:1", - " └─Projection_20 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableFullScan_15 1.00 mpp[tiflash] table:t1 annIndex:COSINE(test.t1.vec..[1,1,1], limit:1), keep order:false, PartitionTableScan:true" + "└─TopN_10 1.00 root Column#7, offset:0, count:1", + " └─TableReader_20 1.00 root partition:all MppVersion: 2, data:ExchangeSender_19", + " └─ExchangeSender_19 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_18 1.00 mpp[tiflash] Column#7, offset:0, count:1", + " └─Projection_17 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", + " └─TableFullScan_16 49152.00 mpp[tiflash] table:t1 keep order:false, PartitionTableScan:true" ], "Warn": null }, { "SQL": "explain select * from t1 where a=0 order by vec_cosine_distance(vec, '[1,1,1]') limit 1", "Plan": [ - "Projection_24 0.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", - "└─TopN_11 0.00 root Column#7, offset:0, count:1", - " └─Projection_25 0.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableReader_21 0.00 root partition:all MppVersion: 2, data:ExchangeSender_20", - " └─ExchangeSender_20 0.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection_22 0.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", - " └─TopN_19 0.00 mpp[tiflash] Column#6, offset:0, count:1", - " └─Projection_23 0.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#6", - " └─TableFullScan_17 0.00 mpp[tiflash] table:t1 pushed down filter:eq(test.t1.a, 0), keep order:false, PartitionTableScan:true" + "Projection_7 0.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", + "└─TopN_11 0.00 root Column#6, offset:0, count:1", + " └─TableReader_22 0.00 root partition:all MppVersion: 2, data:ExchangeSender_21", + " └─ExchangeSender_21 0.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_20 0.00 mpp[tiflash] Column#6, offset:0, count:1", + " └─Projection_19 0.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#6", + " └─TableFullScan_17 0.00 mpp[tiflash] table:t1 pushed down filter:eq(test.t1.a, 0), keep order:false, PartitionTableScan:true" ], - "Warn": [ - "ANN index not used: cannot utilize ANN index when there is a WHERE or HAVING clause" - ] + "Warn": null }, { "SQL": "explain select * from t1 where store_id between 80 and 120 order by vec_cosine_distance(vec, '[1,1,1]') limit 1", "Plan": [ - "Projection_24 0.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", - "└─TopN_11 0.00 root Column#7, offset:0, count:1", - " └─Projection_25 0.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableReader_21 0.00 root partition:p0,p1 MppVersion: 2, data:ExchangeSender_20", - " └─ExchangeSender_20 0.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection_22 0.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", - " └─TopN_19 0.00 mpp[tiflash] Column#6, offset:0, count:1", - " └─Projection_23 0.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#6", - " └─TableFullScan_17 0.00 mpp[tiflash] table:t1 pushed down filter:ge(test.t1.store_id, 80), le(test.t1.store_id, 120), keep order:false, PartitionTableScan:true" + "Projection_7 0.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", + "└─TopN_11 0.00 root Column#6, offset:0, count:1", + " └─TableReader_22 0.00 root partition:p0,p1 MppVersion: 2, data:ExchangeSender_21", + " └─ExchangeSender_21 0.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_20 0.00 mpp[tiflash] Column#6, offset:0, count:1", + " └─Projection_19 0.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#6", + " └─TableFullScan_17 0.00 mpp[tiflash] table:t1 pushed down filter:ge(test.t1.store_id, 80), le(test.t1.store_id, 120), keep order:false, PartitionTableScan:true" ], - "Warn": [ - "ANN index not used: cannot utilize ANN index when there is a WHERE or HAVING clause" - ] + "Warn": null }, { "SQL": "explain select * from t1 partition (p0) order by vec_cosine_distance(vec, '[1,1,1]') limit 1", "Plan": [ - "Projection_21 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", - "└─TopN_10 1.00 root Column#7, offset:0, count:1", - " └─Projection_22 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableReader_18 1.00 root partition:p0 MppVersion: 2, data:ExchangeSender_17", - " └─ExchangeSender_17 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─Projection_19 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", - " └─TopN_16 1.00 mpp[tiflash] Column#6, offset:0, count:1", - " └─Projection_20 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#6", - " └─TableFullScan_15 1.00 mpp[tiflash] table:t1 annIndex:COSINE(test.t1.vec..[1,1,1], limit:1), keep order:false, PartitionTableScan:true" + "Projection_6 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id", + "└─TopN_10 1.00 root Column#6, offset:0, count:1", + " └─TableReader_20 1.00 root partition:p0 MppVersion: 2, data:ExchangeSender_19", + " └─ExchangeSender_19 1.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_18 1.00 mpp[tiflash] Column#6, offset:0, count:1", + " └─Projection_17 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.store_id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#6", + " └─TableFullScan_16 49152.00 mpp[tiflash] table:t1 keep order:false, PartitionTableScan:true" ], "Warn": null } @@ -524,12 +458,12 @@ "SQL": "explain select id from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", "Plan": [ "Projection_7 10.00 root test.t1.id", - "└─TopN_11 10.00 root Column#9, offset:0, count:10", - " └─TableReader_28 10.00 root MppVersion: 2, data:ExchangeSender_27", - " └─ExchangeSender_27 10.00 mpp[tiflash] ExchangeType: PassThrough", - " └─TopN_26 10.00 mpp[tiflash] Column#9, offset:0, count:10", - " └─Projection_25 10.00 mpp[tiflash] test.t1.id, test.t1.vec, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#9", - " └─TableFullScan_24 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" + "└─TopN_11 10.00 root Column#10, offset:0, count:10", + " └─TableReader_29 10.00 root MppVersion: 2, data:ExchangeSender_28", + " └─ExchangeSender_28 10.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_27 10.00 mpp[tiflash] Column#10, offset:0, count:10", + " └─Projection_26 10.00 mpp[tiflash] test.t1.id, test.t1.vec, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#10", + " └─TableFullScan_25 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" ], "Warn": null }, @@ -550,12 +484,12 @@ "SQL": "explain select * from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", "Plan": [ "Projection_6 10.00 root test.t1.id, test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - "└─TopN_10 10.00 root Column#8, offset:0, count:10", - " └─TableReader_27 10.00 root MppVersion: 2, data:ExchangeSender_26", - " └─ExchangeSender_26 10.00 mpp[tiflash] ExchangeType: PassThrough", - " └─TopN_25 10.00 mpp[tiflash] Column#8, offset:0, count:10", - " └─Projection_24 10.00 mpp[tiflash] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#8", - " └─TableFullScan_23 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" + "└─TopN_10 10.00 root Column#9, offset:0, count:10", + " └─TableReader_28 10.00 root MppVersion: 2, data:ExchangeSender_27", + " └─ExchangeSender_27 10.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_26 10.00 mpp[tiflash] Column#9, offset:0, count:10", + " └─Projection_25 10.00 mpp[tiflash] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#9", + " └─TableFullScan_24 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" ], "Warn": null }, @@ -563,12 +497,12 @@ "SQL": "explain select id, a, b from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", "Plan": [ "Projection_7 10.00 root test.t1.id, test.t1.a, test.t1.b", - "└─TopN_11 10.00 root Column#11, offset:0, count:10", - " └─TableReader_28 10.00 root MppVersion: 2, data:ExchangeSender_27", - " └─ExchangeSender_27 10.00 mpp[tiflash] ExchangeType: PassThrough", - " └─TopN_26 10.00 mpp[tiflash] Column#11, offset:0, count:10", - " └─Projection_25 10.00 mpp[tiflash] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#11", - " └─TableFullScan_24 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" + "└─TopN_11 10.00 root Column#12, offset:0, count:10", + " └─TableReader_29 10.00 root MppVersion: 2, data:ExchangeSender_28", + " └─ExchangeSender_28 10.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_27 10.00 mpp[tiflash] Column#12, offset:0, count:10", + " └─Projection_26 10.00 mpp[tiflash] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#12", + " └─TableFullScan_25 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" ], "Warn": null }, @@ -576,12 +510,12 @@ "SQL": "explain select a, id, b from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10", "Plan": [ "Projection_7 10.00 root test.t1.a, test.t1.id, test.t1.b", - "└─TopN_11 10.00 root Column#11, offset:0, count:10", - " └─TableReader_28 10.00 root MppVersion: 2, data:ExchangeSender_27", - " └─ExchangeSender_27 10.00 mpp[tiflash] ExchangeType: PassThrough", - " └─TopN_26 10.00 mpp[tiflash] Column#11, offset:0, count:10", - " └─Projection_25 10.00 mpp[tiflash] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#11", - " └─TableFullScan_24 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" + "└─TopN_11 10.00 root Column#12, offset:0, count:10", + " └─TableReader_29 10.00 root MppVersion: 2, data:ExchangeSender_28", + " └─ExchangeSender_28 10.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_27 10.00 mpp[tiflash] Column#12, offset:0, count:10", + " └─Projection_26 10.00 mpp[tiflash] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#12", + " └─TableFullScan_25 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" ], "Warn": null }, @@ -589,12 +523,12 @@ "SQL": "explain select id, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10", "Plan": [ "Projection_6 10.00 root test.t1.id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - "└─TopN_10 10.00 root Column#9, offset:0, count:10", - " └─TableReader_27 10.00 root MppVersion: 2, data:ExchangeSender_26", - " └─ExchangeSender_26 10.00 mpp[tiflash] ExchangeType: PassThrough", - " └─TopN_25 10.00 mpp[tiflash] Column#9, offset:0, count:10", - " └─Projection_24 10.00 mpp[tiflash] test.t1.id, test.t1.vec, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#9", - " └─TableFullScan_23 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" + "└─TopN_10 10.00 root Column#10, offset:0, count:10", + " └─TableReader_28 10.00 root MppVersion: 2, data:ExchangeSender_27", + " └─ExchangeSender_27 10.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_26 10.00 mpp[tiflash] Column#10, offset:0, count:10", + " └─Projection_25 10.00 mpp[tiflash] test.t1.id, test.t1.vec, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#10", + " └─TableFullScan_24 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" ], "Warn": null }, @@ -613,12 +547,12 @@ "SQL": "explain select *, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10", "Plan": [ "Projection_6 10.00 root test.t1.id, test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - "└─TopN_10 10.00 root Column#9, offset:0, count:10", - " └─TableReader_27 10.00 root MppVersion: 2, data:ExchangeSender_26", - " └─ExchangeSender_26 10.00 mpp[tiflash] ExchangeType: PassThrough", - " └─TopN_25 10.00 mpp[tiflash] Column#9, offset:0, count:10", - " └─Projection_24 10.00 mpp[tiflash] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#9", - " └─TableFullScan_23 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" + "└─TopN_10 10.00 root Column#10, offset:0, count:10", + " └─TableReader_28 10.00 root MppVersion: 2, data:ExchangeSender_27", + " └─ExchangeSender_27 10.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_26 10.00 mpp[tiflash] Column#10, offset:0, count:10", + " └─Projection_25 10.00 mpp[tiflash] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#10", + " └─TableFullScan_24 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" ], "Warn": null }, @@ -626,12 +560,12 @@ "SQL": "explain select id, vec_cosine_distance(vec, '[1,1,1]') as d, a, b from t1 order by d limit 10", "Plan": [ "Projection_6 10.00 root test.t1.id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7, test.t1.a, test.t1.b", - "└─TopN_10 10.00 root Column#9, offset:0, count:10", - " └─TableReader_27 10.00 root MppVersion: 2, data:ExchangeSender_26", - " └─ExchangeSender_26 10.00 mpp[tiflash] ExchangeType: PassThrough", - " └─TopN_25 10.00 mpp[tiflash] Column#9, offset:0, count:10", - " └─Projection_24 10.00 mpp[tiflash] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#9", - " └─TableFullScan_23 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" + "└─TopN_10 10.00 root Column#10, offset:0, count:10", + " └─TableReader_28 10.00 root MppVersion: 2, data:ExchangeSender_27", + " └─ExchangeSender_27 10.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_26 10.00 mpp[tiflash] Column#10, offset:0, count:10", + " └─Projection_25 10.00 mpp[tiflash] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#10", + " └─TableFullScan_24 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" ], "Warn": null }, @@ -639,12 +573,12 @@ "SQL": "explain select id, a, b, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10", "Plan": [ "Projection_6 10.00 root test.t1.id, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - "└─TopN_10 10.00 root Column#9, offset:0, count:10", - " └─TableReader_27 10.00 root MppVersion: 2, data:ExchangeSender_26", - " └─ExchangeSender_26 10.00 mpp[tiflash] ExchangeType: PassThrough", - " └─TopN_25 10.00 mpp[tiflash] Column#9, offset:0, count:10", - " └─Projection_24 10.00 mpp[tiflash] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#9", - " └─TableFullScan_23 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" + "└─TopN_10 10.00 root Column#10, offset:0, count:10", + " └─TableReader_28 10.00 root MppVersion: 2, data:ExchangeSender_27", + " └─ExchangeSender_27 10.00 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN_26 10.00 mpp[tiflash] Column#10, offset:0, count:10", + " └─Projection_25 10.00 mpp[tiflash] test.t1.id, test.t1.vec, test.t1.a, test.t1.b, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#10", + " └─TableFullScan_24 10.00 mpp[tiflash] table:t1, index:idx_embedding(vec) keep order:false, annIndex:COSINE(vec..[1,1,1], limit:10)" ], "Warn": null } @@ -656,9 +590,8 @@ { "SQL": "explain select id from t1", "Plan": [ - "TableReader_8 6000.00 root data:Projection_4", - "└─Projection_4 6000.00 cop[tikv] test.t1.id", - " └─TableFullScan_7 6000.00 cop[tikv] table:t1 keep order:false" + "TableReader_5 6000.00 root data:TableFullScan_4", + "└─TableFullScan_4 6000.00 cop[tikv] table:t1 keep order:false" ], "Warn": null }, @@ -738,9 +671,9 @@ "SQL": "explain select id, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d", "Plan": [ "Sort_4 6000.00 root Column#7", - "└─TableReader_11 6000.00 root data:Projection_7", - " └─Projection_7 6000.00 cop[tikv] test.t1.id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableFullScan_10 6000.00 cop[tikv] table:t1 keep order:false" + "└─Projection_6 6000.00 root test.t1.id, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", + " └─TableReader_8 6000.00 root data:TableFullScan_7", + " └─TableFullScan_7 6000.00 cop[tikv] table:t1 keep order:false" ], "Warn": null }, @@ -782,4 +715,4 @@ } ] } -] +] \ No newline at end of file diff --git a/pkg/planner/core/casetest/vectorsearch/vector_index_test.go b/pkg/planner/core/casetest/vectorsearch/vector_index_test.go index 3c097e7b90d88..3110204efe121 100644 --- a/pkg/planner/core/casetest/vectorsearch/vector_index_test.go +++ b/pkg/planner/core/casetest/vectorsearch/vector_index_test.go @@ -206,9 +206,9 @@ func TestANNIndexNormalizedPlan(t *testing.T) { require.Equal(t, []string{ " Projection root test.t.vec", " └─TopN root ?", - " └─Projection root test.t.vec, vec_cosine_distance(test.t.vec, ?)", - " └─TableReader root ", - " └─TopN cop vec_cosine_distance(test.t.vec, ?)", + " └─TableReader root ", + " └─TopN cop ?", + " └─Projection cop test.t.vec, vec_cosine_distance(test.t.vec, ?)", " └─TableFullScan cop table:t, range:[?,?], keep order:false", }, p2) tbl.Meta().TiFlashReplica.Available = true @@ -313,6 +313,7 @@ func TestANNIndexWithNonIntClusteredPk(t *testing.T) { require.Equal(t, types.KindMinNotNull, tableScan.Ranges[0].LowVal[0].Kind()) require.Equal(t, types.KindMaxValue, tableScan.Ranges[0].HighVal[0].Kind()) } + func prepareVectorSearchWithPK(t *testing.T) *testkit.TestKit { store, dom := testkit.CreateMockStoreAndDomainWithSchemaLease(t, 200*time.Millisecond, mockstore.WithMockTiFlash(2)) tk := testkit.NewTestKit(t, store) @@ -348,7 +349,6 @@ func prepareVectorSearchWithPK(t *testing.T) *testkit.TestKit { tk.MustExec("create table doc(id INT, doc LONGTEXT)") testkit.SetTiFlashReplica(t, dom, "test", "t1") - testkit.SetTiFlashReplica(t, dom, "test", "tp") return tk } @@ -385,9 +385,6 @@ func TestVectorSearchWithPKAuto(t *testing.T) { func TestVectorSearchWithPKForceTiKV(t *testing.T) { tk := prepareVectorSearchWithPK(t) tk.MustExec("set @@tidb_isolation_read_engines = 'tikv'") - // enable the tikv AllowProjectionPushDown to use vector search optimization. - // https://github.com/tidbcloud/tidb-cse/pull/1426 - tk.Session().GetSessionVars().AllowProjectionPushDown = true var input []string var output []struct { diff --git a/pkg/planner/core/task.go b/pkg/planner/core/task.go index 204000b56145e..9724032f79b3d 100644 --- a/pkg/planner/core/task.go +++ b/pkg/planner/core/task.go @@ -878,7 +878,7 @@ func (p *NominalSort) Attach2Task(tasks ...base.Task) base.Task { func (p *PhysicalTopN) getPushedDownTopN(childPlan base.PhysicalPlan, storeTp kv.StoreType) (*PhysicalTopN, *PhysicalTopN) { var newGlobalTopN *PhysicalTopN - fixValue := fixcontrol.GetBoolWithDefault(p.SCtx().GetSessionVars().GetOptimizerFixControlMap(), fixcontrol.Fix56318, true) + fixValue := fixcontrol.GetBoolWithDefault(p.SCtx().GetSessionVars().GetOptimizerFixControlMap(), fixcontrol.Fix58217, true) // HeavyFunctionOptimize: if TopN's ByItems is a HeavyFunction (currently mainly for Vector Search), we will change // the ByItems in order to reuse the function result. if fixValue && ContainHeavyFunction(p.ByItems[0].Expr) { @@ -906,6 +906,19 @@ func (p *PhysicalTopN) getPushedDownTopN(childPlan base.PhysicalPlan, storeTp kv // but "regionNum" is unknown since the copTask can be a double read, so we ignore it now. stats := util.DeriveLimitStats(childProfile, float64(newCount)) + // Add a extra physicalProjection to save the distance column, a example like : + // select id from t order by vec_distance(vec, '[1,2,3]') limit x + // The Plan will be modified like: + // + // Original: DataSource(id, vec) -> TopN(by vec->dis) -> Projection(id) + // └─Byitem: vec_distance(vec, '[1,2,3]') + // └─Schema: id, vec + // + // New: DataSource(id, vec) -> Projection(id, vec->dis) -> TopN(by dis) -> Projection(id) + // └─Byitem: dis + // └─Schema: id, dis + // + // Note that for plan now, TopN has its own schema and does not use the schema of children. if newGlobalTopN != nil { // create a new PhysicalProjection to calculate the distance columns, and add it into plan route bottomProjSchemaCols := make([]*expression.Column, 0, len(childPlan.Schema().Columns)) diff --git a/pkg/planner/util/fixcontrol/get.go b/pkg/planner/util/fixcontrol/get.go index 8fe66b36f70e2..d6f4b1657cb15 100644 --- a/pkg/planner/util/fixcontrol/get.go +++ b/pkg/planner/util/fixcontrol/get.go @@ -71,9 +71,9 @@ const ( Fix52869 uint64 = 52869 // Fix54337 controls whether to apply or not range intersection for index access. Fix54337 uint64 = 54337 - // Fix56318 controls whether to do HeavyFunctionOptimize. Currently this only applies to Vector data types and + // Fix58217 controls whether to do HeavyFunctionOptimize. Currently this only applies to Vector data types and // their functions. The HeavyFunctionOptimize eliminate the usage of the function in TopN operators - Fix56318 uint64 = 56318 + Fix58217 uint64 = 58217 ) // GetStr fetches the given key from the fix control map as a string type. From 51c9f63b1da1ff854ff2a9134718fa73a0a1cff2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CEricZequan=E2=80=9D?= Date: Thu, 12 Dec 2024 21:56:45 +0800 Subject: [PATCH 4/9] update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: “EricZequan” --- pkg/planner/core/task.go | 2 +- pkg/planner/util/fixcontrol/get.go | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pkg/planner/core/task.go b/pkg/planner/core/task.go index 9724032f79b3d..60005c182bc61 100644 --- a/pkg/planner/core/task.go +++ b/pkg/planner/core/task.go @@ -878,7 +878,7 @@ func (p *NominalSort) Attach2Task(tasks ...base.Task) base.Task { func (p *PhysicalTopN) getPushedDownTopN(childPlan base.PhysicalPlan, storeTp kv.StoreType) (*PhysicalTopN, *PhysicalTopN) { var newGlobalTopN *PhysicalTopN - fixValue := fixcontrol.GetBoolWithDefault(p.SCtx().GetSessionVars().GetOptimizerFixControlMap(), fixcontrol.Fix58217, true) + fixValue := fixcontrol.GetBoolWithDefault(p.SCtx().GetSessionVars().GetOptimizerFixControlMap(), fixcontrol.Fix56318, true) // HeavyFunctionOptimize: if TopN's ByItems is a HeavyFunction (currently mainly for Vector Search), we will change // the ByItems in order to reuse the function result. if fixValue && ContainHeavyFunction(p.ByItems[0].Expr) { diff --git a/pkg/planner/util/fixcontrol/get.go b/pkg/planner/util/fixcontrol/get.go index d6f4b1657cb15..eb4b7818f7ee6 100644 --- a/pkg/planner/util/fixcontrol/get.go +++ b/pkg/planner/util/fixcontrol/get.go @@ -71,9 +71,8 @@ const ( Fix52869 uint64 = 52869 // Fix54337 controls whether to apply or not range intersection for index access. Fix54337 uint64 = 54337 - // Fix58217 controls whether to do HeavyFunctionOptimize. Currently this only applies to Vector data types and - // their functions. The HeavyFunctionOptimize eliminate the usage of the function in TopN operators - Fix58217 uint64 = 58217 + // Fix56318 controls whether to do HeavyFunctionOptimize. The HeavyFunctionOptimize eliminate the usage of the function in TopN operators + Fix56318 uint64 = 56318 ) // GetStr fetches the given key from the fix control map as a string type. From ea66d1ce3ce62fb0cf2c9f5905347bed03907cde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CEricZequan=E2=80=9D?= Date: Thu, 12 Dec 2024 22:01:20 +0800 Subject: [PATCH 5/9] fix ci MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: “EricZequan” --- pkg/planner/util/fixcontrol/get.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/planner/util/fixcontrol/get.go b/pkg/planner/util/fixcontrol/get.go index eb4b7818f7ee6..a3049e49d99e3 100644 --- a/pkg/planner/util/fixcontrol/get.go +++ b/pkg/planner/util/fixcontrol/get.go @@ -71,7 +71,8 @@ const ( Fix52869 uint64 = 52869 // Fix54337 controls whether to apply or not range intersection for index access. Fix54337 uint64 = 54337 - // Fix56318 controls whether to do HeavyFunctionOptimize. The HeavyFunctionOptimize eliminate the usage of the function in TopN operators + // Fix56318 controls whether to do HeavyFunctionOptimize. The HeavyFunctionOptimize eliminate the usage of + // the function in TopN operators Fix56318 uint64 = 56318 ) From c8c70ad1b45871ef6dd51f55ba84bdf668dc45b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CEricZequan=E2=80=9D?= Date: Fri, 13 Dec 2024 12:14:40 +0800 Subject: [PATCH 6/9] fix ci MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: “EricZequan” --- pkg/expression/integration_test/integration_test.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pkg/expression/integration_test/integration_test.go b/pkg/expression/integration_test/integration_test.go index 88a8c0013f5c1..3943b9239ee0e 100644 --- a/pkg/expression/integration_test/integration_test.go +++ b/pkg/expression/integration_test/integration_test.go @@ -339,12 +339,11 @@ func TestVectorConstantExplain(t *testing.T) { )) tk.MustQuery(`EXPLAIN format = 'brief' SELECT VEC_COSINE_DISTANCE(c, '[1,2,3,4,5,6,7,8,9,10,11]') AS d FROM t ORDER BY d LIMIT 10;`).Check(testkit.Rows( "Projection 10.00 root vec_cosine_distance(test.t.c, [1,2,3,4,5,(6 more)...])->Column#3", - "└─Projection 10.00 root test.t.c", - " └─TopN 10.00 root Column#4, offset:0, count:10", - " └─Projection 10.00 root test.t.c, vec_cosine_distance(test.t.c, [1,2,3,4,5,(6 more)...])->Column#4", - " └─TableReader 10.00 root data:TopN", - " └─TopN 10.00 cop[tikv] vec_cosine_distance(test.t.c, [1,2,3,4,5,(6 more)...]), offset:0, count:10", - " └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo", + "└─TopN 10.00 root Column#4, offset:0, count:10", + " └─TableReader 10.00 root data:TopN", + " └─TopN 10.00 cop[tikv] Column#4, offset:0, count:10", + " └─Projection 10.00 cop[tikv] test.t.c, vec_cosine_distance(test.t.c, [1,2,3,4,5,(6 more)...])->Column#4", + " └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo", )) // Prepare a large Vector string From 027997461700a785774f16635944cd48d1ae64df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CEricZequan=E2=80=9D?= Date: Thu, 19 Dec 2024 11:02:55 +0800 Subject: [PATCH 7/9] fix ci MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: “EricZequan” --- .../testdata/ann_index_suite_out.json | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/pkg/planner/core/casetest/vectorsearch/testdata/ann_index_suite_out.json b/pkg/planner/core/casetest/vectorsearch/testdata/ann_index_suite_out.json index 270561bcc5af7..232b1dc9eac01 100644 --- a/pkg/planner/core/casetest/vectorsearch/testdata/ann_index_suite_out.json +++ b/pkg/planner/core/casetest/vectorsearch/testdata/ann_index_suite_out.json @@ -99,7 +99,7 @@ " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", " └─TopN 1.00 mpp[tiflash] Column#7:desc, offset:0, count:1", " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + " └─TableFullScan 48.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -112,7 +112,7 @@ " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", " └─TopN 1.00 mpp[tiflash] Column#7:desc, offset:0, count:1", " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + " └─TableFullScan 48.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -125,7 +125,7 @@ " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, plus(vec_cosine_distance(test.t1.vec, [1,1,1]), 1)->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + " └─TableFullScan 48.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -138,7 +138,7 @@ " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", " └─TopN 1.00 mpp[tiflash] Column#7, test.t1.vec, offset:0, count:1", " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + " └─TableFullScan 48.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -151,7 +151,7 @@ " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l2_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + " └─TableFullScan 48.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -164,7 +164,7 @@ " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l1_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + " └─TableFullScan 48.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -177,7 +177,7 @@ " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l2_distance(test.t1.c, [1,1,1])->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + " └─TableFullScan 48.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -190,7 +190,7 @@ " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l2_distance(test.t1.d, [1,1,1])->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + " └─TableFullScan 48.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -203,7 +203,7 @@ " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.d, [1,1,1])->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + " └─TableFullScan 48.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -216,7 +216,7 @@ " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_l1_distance(test.t1.d, [1,1,1])->Column#7", - " └─TableFullScan 49152.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" + " └─TableFullScan 48.00 mpp[tiflash] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -260,13 +260,14 @@ { "SQL": "explain format = 'brief' select * from t1 where a=0 order by vec_cosine_distance(vec, '[1,1,1]') limit 1", "Plan": [ - "Projection 1.00 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", - "└─TopN 1.00 root Column#7, offset:0, count:1", - " └─TableReader 1.00 root MppVersion: 2, data:ExchangeSender", - " └─ExchangeSender 1.00 mpp[tiflash] ExchangeType: PassThrough", - " └─TopN 1.00 mpp[tiflash] Column#7, offset:0, count:1", - " └─Projection 1.00 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", - " └─TableFullScan 49.15 mpp[tiflash] table:t1 pushed down filter:eq(test.t1.a, 0), keep order:false, stats:pseudo" + "Projection 0.05 root test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d", + "└─TopN 0.05 root Column#7, offset:0, count:1", + " └─TableReader 0.05 root MppVersion: 2, data:ExchangeSender", + " └─ExchangeSender 0.05 mpp[tiflash] ExchangeType: PassThrough", + " └─TopN 0.05 mpp[tiflash] Column#7, offset:0, count:1", + " └─Projection 0.05 mpp[tiflash] test.t1.vec, test.t1.a, test.t1.b, test.t1.c, test.t1.d, vec_cosine_distance(test.t1.vec, [1,1,1])->Column#7", + " └─Selection 0.05 mpp[tiflash] eq(test.t1.a, 0)", + " └─TableFullScan 48.00 mpp[tiflash] table:t1 pushed down filter:empty, keep order:false, stats:pseudo" ], "Warn": null } From 40785c97af2a2fd3035682156100c3da44d4dd0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CEricZequan=E2=80=9D?= Date: Thu, 19 Dec 2024 20:23:14 +0800 Subject: [PATCH 8/9] fix comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: “EricZequan” --- pkg/planner/core/task.go | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/pkg/planner/core/task.go b/pkg/planner/core/task.go index 94def20be562b..82daee30a607c 100644 --- a/pkg/planner/core/task.go +++ b/pkg/planner/core/task.go @@ -881,7 +881,13 @@ func (p *PhysicalTopN) getPushedDownTopN(childPlan base.PhysicalPlan, storeTp kv fixValue := fixcontrol.GetBoolWithDefault(p.SCtx().GetSessionVars().GetOptimizerFixControlMap(), fixcontrol.Fix56318, true) // HeavyFunctionOptimize: if TopN's ByItems is a HeavyFunction (currently mainly for Vector Search), we will change // the ByItems in order to reuse the function result. - if fixValue && ContainHeavyFunction(p.ByItems[0].Expr) { + byItemIndex := make([]int, 0) + for i, byItem := range p.ByItems { + if ContainHeavyFunction(byItem.Expr) { + byItemIndex = append(byItemIndex, i) + } + } + if fixValue && len(byItemIndex) > 0 { x, err := p.Clone(p.SCtx()) if err != nil { return nil, nil @@ -928,12 +934,25 @@ func (p *PhysicalTopN) getPushedDownTopN(childPlan base.PhysicalPlan, storeTp kv bottomProjSchemaCols = append(bottomProjSchemaCols, newCol) bottomProjExprs = append(bottomProjExprs, newCol) } - bottomProjExprs = append(bottomProjExprs, newGlobalTopN.ByItems[0].Expr) - distanceCol := &expression.Column{ - UniqueID: newGlobalTopN.SCtx().GetSessionVars().AllocPlanColumnID(), - RetType: newGlobalTopN.ByItems[0].Expr.GetType(p.SCtx().GetExprCtx().GetEvalCtx()), + type DistanceColItem struct { + Index int + DistanceCol *expression.Column + } + distanceCols := make([]DistanceColItem, 0) + for _, idx := range byItemIndex { + bottomProjExprs = append(bottomProjExprs, newGlobalTopN.ByItems[idx].Expr) + distanceCol := &expression.Column{ + UniqueID: newGlobalTopN.SCtx().GetSessionVars().AllocPlanColumnID(), + RetType: newGlobalTopN.ByItems[idx].Expr.GetType(p.SCtx().GetExprCtx().GetEvalCtx()), + } + distanceCols = append(distanceCols, DistanceColItem{ + Index: idx, + DistanceCol: distanceCol, + }) + } + for _, dis := range distanceCols { + bottomProjSchemaCols = append(bottomProjSchemaCols, dis.DistanceCol) } - bottomProjSchemaCols = append(bottomProjSchemaCols, distanceCol) bottomProj := PhysicalProjection{ Exprs: bottomProjExprs, @@ -947,9 +966,14 @@ func (p *PhysicalTopN) getPushedDownTopN(childPlan base.PhysicalPlan, storeTp kv Count: newCount, }.Init(p.SCtx(), stats, p.QueryBlockOffset(), p.GetChildReqProps(0)) // mppTask's topN - topN.ByItems[0].Expr = distanceCol + for _, expr := range distanceCols { + topN.ByItems[expr.Index].Expr = expr.DistanceCol + } + // rootTask's topn, need reuse the distance col - newGlobalTopN.ByItems[0].Expr = distanceCol + for _, expr := range distanceCols { + newGlobalTopN.ByItems[expr.Index].Expr = expr.DistanceCol + } topN.SetChildren(bottomProj) return topN, newGlobalTopN From 0c3027e121e250c2e9f8aca12c80f7ada05a0427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CEricZequan=E2=80=9D?= Date: Fri, 20 Dec 2024 15:41:48 +0800 Subject: [PATCH 9/9] fix comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: “EricZequan” --- pkg/planner/core/casetest/vectorsearch/vector_index_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/planner/core/casetest/vectorsearch/vector_index_test.go b/pkg/planner/core/casetest/vectorsearch/vector_index_test.go index b967f4c6b2c8e..1f1129bd3f81e 100644 --- a/pkg/planner/core/casetest/vectorsearch/vector_index_test.go +++ b/pkg/planner/core/casetest/vectorsearch/vector_index_test.go @@ -322,7 +322,6 @@ func prepareVectorSearchWithPK(t *testing.T) *testkit.TestKit { tk.MustExec("use test") tk.MustExec("drop table if exists t1") - tk.MustExec("drop table if exists tp") tk.MustExec("drop table if exists doc") // A non-partitioned table