Skip to content

Commit

Permalink
opt: remove incorrect query plans for trigram similarity filters
Browse files Browse the repository at this point in the history
Previously, the optimizer would produce incorrect query plans for
queries with trigram similarity filters when
`pg_trgm.similarity_threshold == 0`, producing incorrect results.

To address this, this patch adds a check to return early if
`pg_trgm.similarity_threshold == 0` in trigram similarity queries on
inverted indices.

Fixes: #122443

Release note (bug fix): The optimizer could produce incorrect query
plans for queries using trigram similarity filters (e.g. `col % 'val'`) when
`pg_trgm.similarity_threshold` was set to 0. This bug was introduced in
v22.2.0 and is now fixed. Note that this issue does not affect v24.2.0+
releases when the `optimizer_use_trigram_similarity_optimization`
session variable (introduced in v24.2.0) is set to its default value
`true`, as it would skip this behaviour.
  • Loading branch information
normanchenn committed Jan 28, 2025
1 parent 87f4a22 commit 496f35f
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 0 deletions.
26 changes: 26 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/trigram_indexes
Original file line number Diff line number Diff line change
Expand Up @@ -358,3 +358,29 @@ FROM
AS tab (col1_1, col1_2) ON
(tab2.col1) = (tab.col1_1)
AND (tab2.col2) = (tab.col1_2);

subtest trigram_similarity_zero_threshold_inverted_index

statement ok
SET pg_trgm.similarity_threshold = 0;

statement ok
CREATE TABLE trigram_similarity_zero_threshold_inverted_index_a (
a INT PRIMARY KEY,
b STRING,
INVERTED INDEX c (b gin_trgm_ops)
);

statement ok
SET optimizer_use_trigram_similarity_optimization = false;

statement error pq: index "c" is inverted and cannot be used for this query
SELECT * FROM trigram_similarity_zero_threshold_inverted_index_a@c WHERE b % 'foo';

statement ok
SET optimizer_use_trigram_similarity_optimization = true;

statement error pq: index "c" is inverted and cannot be used for this query
SELECT * FROM trigram_similarity_zero_threshold_inverted_index_a@c WHERE b % 'foo';

subtest end
6 changes: 6 additions & 0 deletions pkg/sql/opt/invertedidx/trigram.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ func (t *trigramFilterPlanner) extractInvertedFilterConditionFromLeaf(
if evalCtx.SessionData().OptimizerUseTrigramSimilarityOptimization {
return inverted.NonInvertedColExpression{}, expr, nil
}

// Do not plan inverted index scans when the trigram similarity threshold is 0
// because all strings will be matched.
if evalCtx.SessionData().TrigramSimilarityThreshold == 0 {
return inverted.NonInvertedColExpression{}, expr, nil
}
// If we're doing a % expression (similarity threshold), we need to
// construct an OR out of the spans: we need to find results that match any
// of the trigrams in the constant datum, and we'll filter the results
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/opt/invertedidx/trigram_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ func TestTryFilterTrigram(t *testing.T) {
semaCtx := tree.MakeSemaContext(nil /* resolver */)
st := cluster.MakeTestingClusterSettings()
evalCtx := eval.NewTestingEvalContext(st)
evalCtx.SessionData().TrigramSimilarityThreshold = 0.3

tc := testcat.New()
if _, err := tc.ExecuteDDL(
Expand Down

0 comments on commit 496f35f

Please sign in to comment.