Merge remote-tracking branch 'upstream/main' into fix_remove

opensearch-project · Jan 3, 2024 · e707b70 · e707b70
2 parents bb39c9b + 7b1c2c7
commit e707b70
Show file tree

Hide file tree

Showing 50 changed files with 4,324 additions and 256 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -119,6 +119,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Create separate transport action for render search template action ([#11170](https://github.com/opensearch-project/OpenSearch/pull/11170))
 - Add additional handling in SearchTemplateRequest when simulate is set to true ([#11591](https://github.com/opensearch-project/OpenSearch/pull/11591))
 - Introduce cluster level setting `cluster.index.restrict.replication.type` to prevent replication type setting override during index creations([#11583](https://github.com/opensearch-project/OpenSearch/pull/11583))
+- Add match_only_text field that is optimized for storage by trading off positional queries performance ([#6836](https://github.com/opensearch-project/OpenSearch/pull/11039))
 
 ### Dependencies
 - Bumps jetty version to 9.4.52.v20230823 to fix GMS-2023-1857 ([#9822](https://github.com/opensearch-project/OpenSearch/pull/9822))
@@ -138,7 +139,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Bump `actions/github-script` from 6 to 7 ([#11271](https://github.com/opensearch-project/OpenSearch/pull/11271))
 - Bump `jackson` and `jackson_databind` from 2.15.2 to 2.16.0 ([#11273](https://github.com/opensearch-project/OpenSearch/pull/11273))
 - Bump `netty` from 4.1.100.Final to 4.1.101.Final ([#11294](https://github.com/opensearch-project/OpenSearch/pull/11294))
-- Bump `com.avast.gradle:gradle-docker-compose-plugin` from 0.16.12 to 0.17.5 ([#10163](https://github.com/opensearch-project/OpenSearch/pull/10163))
+- Bump `com.avast.gradle:gradle-docker-compose-plugin` from 0.16.12 to 0.17.6 ([#10163](https://github.com/opensearch-project/OpenSearch/pull/10163), [#11692](https://github.com/opensearch-project/OpenSearch/pull/11692))
 - Bump `com.squareup.okhttp3:okhttp` from 4.11.0 to 4.12.0 ([#10861](https://github.com/opensearch-project/OpenSearch/pull/10861))
 - Bump `org.apache.commons:commons-text` from 1.10.0 to 1.11.0 ([#11344](https://github.com/opensearch-project/OpenSearch/pull/11344))
 - Bump `reactor-netty-core` from 1.1.12 to 1.1.13 ([#11350](https://github.com/opensearch-project/OpenSearch/pull/11350))
@@ -204,6 +205,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Fix for stuck update action in a bulk with `retry_on_conflict` property ([#11152](https://github.com/opensearch-project/OpenSearch/issues/11152))
 - Fix template setting override for replication type ([#11417](https://github.com/opensearch-project/OpenSearch/pull/11417))
 - Fix Automatic addition of protocol broken in #11512 ([#11609](https://github.com/opensearch-project/OpenSearch/pull/11609))
+- Fix issue when calling Delete PIT endpoint and no PITs exist ([#11711](https://github.com/opensearch-project/OpenSearch/pull/11711))
 
 ### Security
 

diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle
@@ -115,7 +115,7 @@ dependencies {
   api 'org.jdom:jdom2:2.0.6.1'
   api "org.jetbrains.kotlin:kotlin-stdlib-jdk8:${props.getProperty('kotlin')}"
   api 'de.thetaphi:forbiddenapis:3.6'
-  api 'com.avast.gradle:gradle-docker-compose-plugin:0.17.5'
+  api 'com.avast.gradle:gradle-docker-compose-plugin:0.17.6'
   api "org.yaml:snakeyaml:${props.getProperty('snakeyaml')}"
   api 'org.apache.maven:maven-model:3.9.6'
   api 'com.networknt:json-schema-validator:1.0.86'

diff --git a/...yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml b/...yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml
@@ -0,0 +1,70 @@
+# integration tests for queries with specific analysis chains
+
+"match query with stacked stems":
+  - skip:
+      version: " - 2.99.99"
+      reason: "match_only_text was added in 3.0"
+  # Tests the match query stemmed tokens are "stacked" on top of the unstemmed
+  # versions in the same position.
+  - do:
+      indices.create:
+        index: test
+        body:
+          settings:
+            number_of_shards: 1
+            number_of_replicas: 1
+            analysis:
+              analyzer:
+                index:
+                  tokenizer: standard
+                  filter: [lowercase]
+                search:
+                  rest_total_hits_as_int: true
+                  tokenizer: standard
+                  filter: [lowercase, keyword_repeat, porter_stem, unique_stem]
+              filter:
+                unique_stem:
+                  type: unique
+                  only_on_same_position: true
+          mappings:
+            properties:
+              text:
+                type: match_only_text
+                analyzer: index
+                search_analyzer: search
+
+  - do:
+      index:
+        index: test
+        id:    1
+        body:  { "text": "the fox runs across the street" }
+        refresh: true
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            match:
+              text:
+                query: fox runs
+                operator: AND
+  - match: {hits.total: 1}
+
+  - do:
+      index:
+        index: test
+        id:    2
+        body:  { "text": "run fox run" }
+        refresh: true
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            match:
+              text:
+                query: fox runs
+                operator: AND
+  - match: {hits.total: 2}
diff --git a/...tTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml b/...tTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml
@@ -0,0 +1,144 @@
+"ngram search":
+  - skip:
+      version: " - 2.99.99"
+      reason: "match_only_text was added in 3.0"
+  - do:
+      indices.create:
+        index: test
+        body:
+          settings:
+            number_of_shards: 1
+            number_of_replicas: 0
+            analysis:
+              analyzer:
+                my_analyzer:
+                  tokenizer: standard
+                  filter: [my_ngram]
+              filter:
+                my_ngram:
+                  type: ngram
+                  min: 2,
+                  max: 2
+          mappings:
+            properties:
+              text:
+                type: match_only_text
+                analyzer: my_analyzer
+
+  - do:
+      index:
+        index: test
+        id:    1
+        body:  { "text": "foo bar baz" }
+        refresh: true
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            match:
+              text:
+                query: foa
+  - match: {hits.total: 1}
+
+---
+"testNGramCopyField":
+  - skip:
+      version: " - 2.99.99"
+      reason: "match_only_text was added in 3.0"
+  - do:
+      indices.create:
+        index: test
+        body:
+          settings:
+            number_of_shards: 1
+            number_of_replicas: 0
+            max_ngram_diff: 9
+            analysis:
+              analyzer:
+                my_ngram_analyzer:
+                  tokenizer: my_ngram_tokenizer
+              tokenizer:
+                my_ngram_tokenizer:
+                  type: ngram
+                  min: 1,
+                  max: 10
+                  token_chars: []
+          mappings:
+            properties:
+              origin:
+                type: match_only_text
+                copy_to: meta
+              meta:
+                type: match_only_text
+                analyzer: my_ngram_analyzer
+
+  - do:
+      index:
+        index: test
+        id:    1
+        body:  { "origin": "C.A1234.5678" }
+        refresh: true
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            match:
+              meta:
+                query: 1234
+  - match: {hits.total: 1}
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            match:
+              meta:
+                query: 1234.56
+  - match: {hits.total: 1}
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            match:
+              meta:
+                query: A1234
+  - match: {hits.total: 1}
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            term:
+              meta:
+                value: a1234
+  - match: {hits.total: 0}
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            match:
+              meta:
+                query: A1234
+                analyzer: my_ngram_analyzer
+  - match: {hits.total: 1}
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            match:
+              meta:
+                query: a1234
+                analyzer: my_ngram_analyzer
+  - match: {hits.total: 1}
diff --git a/...resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml b/...resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml
@@ -0,0 +1,137 @@
+"ngram highlighting":
+  - skip:
+      version: " - 2.99.99"
+      reason: "match_only_text was added in 3.0"
+  - do:
+      indices.create:
+        index: test
+        body:
+          settings:
+            number_of_shards: 1
+            number_of_replicas: 0
+            index.max_ngram_diff: 19
+            analysis:
+              tokenizer:
+                my_ngramt:
+                  type: ngram
+                  min_gram: 1
+                  max_gram: 20
+                  token_chars: letter,digit
+              filter:
+                my_ngram:
+                  type: ngram
+                  min_gram: 1
+                  max_gram: 20
+              analyzer:
+                name2_index_analyzer:
+                  tokenizer: whitespace
+                  filter: [my_ngram]
+                name_index_analyzer:
+                  tokenizer: my_ngramt
+                name_search_analyzer:
+                  tokenizer: whitespace
+          mappings:
+            properties:
+              name:
+                type: match_only_text
+                term_vector: with_positions_offsets
+                analyzer: name_index_analyzer
+                search_analyzer: name_search_analyzer
+              name2:
+                type: match_only_text
+                term_vector: with_positions_offsets
+                analyzer: name2_index_analyzer
+                search_analyzer: name_search_analyzer
+
+  - do:
+      index:
+        index: test
+        id:    1
+        refresh: true
+        body:
+          name: logicacmg ehemals avinci - the know how company
+          name2: logicacmg ehemals avinci - the know how company
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            match:
+              name:
+                query: logica m
+          highlight:
+            fields:
+              - name: {}
+  - match: {hits.total: 1}
+  - match: {hits.hits.0.highlight.name.0: "<em>logica</em>c<em>m</em>g ehe<em>m</em>als avinci - the know how co<em>m</em>pany"}
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            match:
+              name:
+                query: logica ma
+          highlight:
+            fields:
+              - name: {}
+  - match: {hits.total: 1}
+  - match: {hits.hits.0.highlight.name.0: "<em>logica</em>cmg ehe<em>ma</em>ls avinci - the know how company"}
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            match:
+              name:
+                query: logica
+          highlight:
+            fields:
+              - name: {}
+  - match: {hits.total: 1}
+  - match: {hits.hits.0.highlight.name.0: "<em>logica</em>cmg ehemals avinci - the know how company"}
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            match:
+              name2:
+                query: logica m
+          highlight:
+            fields:
+              - name2: {}
+  - match: {hits.total: 1}
+  - match: {hits.hits.0.highlight.name2.0: "<em>logicacmg</em> <em>ehemals</em> avinci - the know how <em>company</em>"}
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            match:
+              name2:
+                query: logica ma
+          highlight:
+            fields:
+              - name2: {}
+  - match: {hits.total: 1}
+  - match: {hits.hits.0.highlight.name2.0: "<em>logicacmg</em> <em>ehemals</em> avinci - the know how company"}
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            match:
+              name2:
+                query: logica
+          highlight:
+            fields:
+              - name2: {}
+  - match: {hits.total: 1}
+  - match: {hits.hits.0.highlight.name2.0: "<em>logicacmg</em> ehemals avinci - the know how company"}