Skip to content

Commit

Permalink
Fix case insensitive query on wildcard field (#15882) (#15936)
Browse files Browse the repository at this point in the history
* fix case insensitive query on wildcard field
* fix YAML test
* add change log

---------

(cherry picked from commit aaa92ae)

Signed-off-by: gesong.samuel <[email protected]>
Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Signed-off-by: gaobinlong <[email protected]>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: gesong.samuel <[email protected]>
Co-authored-by: gaobinlong <[email protected]>
  • Loading branch information
4 people authored Oct 2, 2024
1 parent e8d498d commit afd1cbc
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 8 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

### Fixed
- Fix wildcard query containing escaped character ([#15737](https://github.com/opensearch-project/OpenSearch/pull/15737))
- Fix case-insensitive query on wildcard field ([#15882](https://github.com/opensearch-project/OpenSearch/pull/15882))
- Add validation for the search backpressure cancellation settings ([#15501](https://github.com/opensearch-project/OpenSearch/pull/15501))
- Fix search_as_you_type not supporting multi-fields ([#15988](https://github.com/opensearch-project/OpenSearch/pull/15988))
- Avoid infinite loop when `flat_object` field contains invalid token ([#15985](https://github.com/opensearch-project/OpenSearch/pull/15985))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ setup:
id: 6
body:
other_field: "test"
- do:
index:
index: test
id: 7
body:
my_field: "ABCD"
- do:
indices.refresh: {}

Expand Down Expand Up @@ -90,8 +96,9 @@ setup:
query:
term:
my_field.lower: "abcd"
- match: { hits.total.value: 1 }
- match: { hits.total.value: 2 }
- match: { hits.hits.0._id: "5" }
- match: { hits.hits.1._id: "7" }

- do:
search:
Expand All @@ -100,8 +107,9 @@ setup:
query:
term:
my_field.lower: "ABCD"
- match: { hits.total.value: 1 }
- match: { hits.total.value: 2 }
- match: { hits.hits.0._id: "5" }
- match: { hits.hits.1._id: "7" }

- do:
search:
Expand Down Expand Up @@ -215,7 +223,7 @@ setup:
wildcard:
my_field:
value: "*"
- match: { hits.total.value: 5 }
- match: { hits.total.value: 6 }
---
"regexp match-all works":
- do:
Expand All @@ -226,7 +234,7 @@ setup:
regexp:
my_field:
value: ".*"
- match: { hits.total.value: 5 }
- match: { hits.total.value: 6 }
---
"terms query on wildcard field matches":
- do:
Expand All @@ -237,3 +245,28 @@ setup:
terms: { my_field: ["AbCd"] }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "5" }
---
"case insensitive query on wildcard field":
- do:
search:
index: test
body:
query:
wildcard:
my_field:
value: "AbCd"
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "5" }

- do:
search:
index: test
body:
query:
wildcard:
my_field:
value: "AbCd"
case_insensitive: true
- match: { hits.total.value: 2 }
- match: { hits.hits.0._id: "5" }
- match: { hits.hits.1._id: "7" }
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.apache.lucene.util.automaton.RegExp;
import org.opensearch.common.lucene.BytesRefs;
import org.opensearch.common.lucene.Lucene;
import org.opensearch.common.lucene.search.AutomatonQueries;
import org.opensearch.common.unit.Fuzziness;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.index.analysis.IndexAnalyzers;
Expand Down Expand Up @@ -464,7 +465,7 @@ public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, bo
return existsQuery(context);
}
} else {
approximation = matchAllTermsQuery(name(), requiredNGrams);
approximation = matchAllTermsQuery(name(), requiredNGrams, caseInsensitive);
}
return new WildcardMatchingQuery(name(), approximation, matchPredicate, value, context, this);
}
Expand Down Expand Up @@ -678,7 +679,7 @@ public Query termsQuery(List<?> values, QueryShardContext context) {
StringBuilder pattern = new StringBuilder();
for (Object value : values) {
String stringVal = BytesRefs.toString(value);
builder.add(matchAllTermsQuery(name(), getRequiredNGrams(stringVal)), BooleanClause.Occur.SHOULD);
builder.add(matchAllTermsQuery(name(), getRequiredNGrams(stringVal), false), BooleanClause.Occur.SHOULD);
expectedValues.add(stringVal);
if (pattern.length() > 0) {
pattern.append('|');
Expand All @@ -688,10 +689,16 @@ public Query termsQuery(List<?> values, QueryShardContext context) {
return new WildcardMatchingQuery(name(), builder.build(), expectedValues::contains, pattern.toString(), context, this);
}

private static BooleanQuery matchAllTermsQuery(String fieldName, Set<String> terms) {
private static BooleanQuery matchAllTermsQuery(String fieldName, Set<String> terms, boolean caseInsensitive) {
BooleanQuery.Builder matchAllTermsBuilder = new BooleanQuery.Builder();
Query query;
for (String term : terms) {
matchAllTermsBuilder.add(new TermQuery(new Term(fieldName, term)), BooleanClause.Occur.FILTER);
if (caseInsensitive) {
query = AutomatonQueries.caseInsensitiveTermQuery(new Term(fieldName, term));
} else {
query = new TermQuery(new Term(fieldName, term));
}
matchAllTermsBuilder.add(query, BooleanClause.Occur.FILTER);
}
return matchAllTermsBuilder.build();
}
Expand Down

0 comments on commit afd1cbc

Please sign in to comment.