-
Notifications
You must be signed in to change notification settings - Fork 132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for search using the "fields" parameter with knn_vector field #2314
base: main
Are you sure you want to change the base?
Changes from 4 commits
b49e6b2
064fdbc
57c41e5
0dc9f1b
c595c3d
d8c10f9
1c26853
2afb483
28225e0
c4094b0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ | |
import org.apache.lucene.util.BytesRef; | ||
import org.opensearch.index.fielddata.IndexFieldData; | ||
import org.opensearch.index.mapper.MappedFieldType; | ||
import org.opensearch.index.mapper.ArraySourceValueFetcher; | ||
import org.opensearch.index.mapper.TextSearchInfo; | ||
import org.opensearch.index.mapper.ValueFetcher; | ||
import org.opensearch.index.query.QueryShardContext; | ||
|
@@ -51,7 +52,12 @@ public KNNVectorFieldType(String name, Map<String, String> metadata, VectorDataT | |
|
||
@Override | ||
public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) { | ||
throw new UnsupportedOperationException("KNN Vector do not support fields search"); | ||
return new ArraySourceValueFetcher(name(), context) { | ||
martin-gaievski marked this conversation as resolved.
Show resolved
Hide resolved
|
||
@Override | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add a UT for this. |
||
protected Object parseSourceValue(Object value) { | ||
return value; | ||
} | ||
}; | ||
} | ||
|
||
@Override | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,7 @@ | |
|
||
import com.google.common.collect.ImmutableList; | ||
import com.google.common.primitives.Floats; | ||
import java.util.Locale; | ||
|
||
import lombok.SneakyThrows; | ||
import org.apache.hc.core5.http.ParseException; | ||
import org.junit.BeforeClass; | ||
|
@@ -39,6 +39,7 @@ | |
import java.net.URL; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.Locale; | ||
import java.util.Map; | ||
import java.util.TreeMap; | ||
|
||
|
@@ -900,6 +901,136 @@ public void testKNNIndex_whenBuildVectorGraphThresholdIsProvidedEndToEnd_thenBui | |
deleteKNNIndex(indexName); | ||
} | ||
|
||
public void testKNNIndexSearchFieldsParameter() throws Exception { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also lets add an IT test where some the docs don't have vector field and some have vector field in it. |
||
createKnnIndex(INDEX_NAME, createKnnIndexMapping(Arrays.asList("vector1", "vector2", "vector3"), Arrays.asList(2, 3, 5))); | ||
for (int i = 1; i <= 10; i++) { | ||
Float[] vector1 = { (float) i, (float) (i + 1) }; | ||
Float[] vector2 = { (float) i, (float) (i + 1), (float) (i + 2) }; | ||
Float[] vector3 = { (float) i, (float) (i + 1), (float) (i + 2), (float) (i + 3), (float) (i + 4) }; | ||
addKnnDoc( | ||
INDEX_NAME, | ||
Integer.toString(i), | ||
Arrays.asList("vector1", "vector2", "vector3"), | ||
Arrays.asList(vector1, vector2, vector3) | ||
); | ||
} | ||
int k = 100; // nearest 100 neighbors | ||
|
||
// Create search body, all fields | ||
XContentBuilder builder = XContentFactory.jsonBuilder() | ||
.startObject() | ||
.field("fields", new String[] { "*" }) | ||
.startObject("query") | ||
.startObject("match_all") | ||
Comment on lines
+945
to
+949
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why we are using a match all query here? Since a query is a query and it will solve the purpose, I would recommend having both match_all and a k-NN query to ensure that all cases are covered. |
||
.endObject() | ||
.endObject() | ||
.endObject(); | ||
Response response = searchKNNIndex(INDEX_NAME, builder, k); | ||
List<KNNResult> resultsField1 = parseSearchResponseScriptFields(EntityUtils.toString(response.getEntity()), "vector1"); | ||
List<KNNResult> resultsField2 = parseSearchResponseScriptFields(EntityUtils.toString(response.getEntity()), "vector2"); | ||
List<KNNResult> resultsField3 = parseSearchResponseScriptFields(EntityUtils.toString(response.getEntity()), "vector3"); | ||
assertEquals(10, resultsField1.size()); | ||
assertEquals(10, resultsField2.size()); | ||
assertEquals(10, resultsField3.size()); | ||
|
||
// Create search body, some fields | ||
builder = XContentFactory.jsonBuilder() | ||
.startObject() | ||
.field("fields", new String[] { "vector1", "vector2" }) | ||
.startObject("query") | ||
.startObject("match_all") | ||
.endObject() | ||
.endObject() | ||
.endObject(); | ||
Response response2 = searchKNNIndex(INDEX_NAME, builder, k); | ||
resultsField1 = parseSearchResponseScriptFields(EntityUtils.toString(response.getEntity()), "vector1"); | ||
e-emoto marked this conversation as resolved.
Show resolved
Hide resolved
|
||
resultsField2 = parseSearchResponseScriptFields(EntityUtils.toString(response.getEntity()), "vector2"); | ||
expectThrows( | ||
NullPointerException.class, | ||
() -> parseSearchResponseScriptFields(EntityUtils.toString(response2.getEntity()), "vector3") | ||
); | ||
assertEquals(10, resultsField1.size()); | ||
assertEquals(10, resultsField2.size()); | ||
} | ||
e-emoto marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
public void testKNNIndexSearchFieldsParameterWithOtherFields() throws Exception { | ||
XContentBuilder xContentBuilder = XContentFactory.jsonBuilder() | ||
.startObject() | ||
.startObject("properties") | ||
.startObject("vector1") | ||
.field("type", "knn_vector") | ||
.field("dimension", "2") | ||
.endObject() | ||
.startObject("vector2") | ||
.field("type", "knn_vector") | ||
.field("dimension", "3") | ||
.endObject() | ||
.startObject("float1") | ||
.field("type", "float") | ||
.endObject() | ||
.startObject("float2") | ||
.field("type", "float") | ||
.endObject() | ||
.endObject() | ||
.endObject(); | ||
createKnnIndex(INDEX_NAME, xContentBuilder.toString()); | ||
for (int i = 1; i <= 10; i++) { | ||
Float[] vector1 = { (float) i, (float) (i + 1) }; | ||
Float[] vector2 = { (float) i, (float) (i + 1), (float) (i + 2) }; | ||
Float[] text1 = { (float) i }; | ||
Float[] text2 = { (float) (i + 1) }; | ||
addKnnDoc( | ||
INDEX_NAME, | ||
Integer.toString(i), | ||
Arrays.asList("vector1", "vector2", "float1", "float2"), | ||
Arrays.asList(vector1, vector2, text1, text2) | ||
); | ||
} | ||
int k = 100; // nearest 100 neighbors | ||
|
||
// Create search body, all fields | ||
XContentBuilder builder = XContentFactory.jsonBuilder() | ||
.startObject() | ||
.field("fields", new String[] { "*" }) | ||
.startObject("query") | ||
.startObject("match_all") | ||
.endObject() | ||
.endObject() | ||
.endObject(); | ||
Response response = searchKNNIndex(INDEX_NAME, builder, k); | ||
List<KNNResult> resultsField1 = parseSearchResponseScriptFields(EntityUtils.toString(response.getEntity()), "vector1"); | ||
List<KNNResult> resultsField2 = parseSearchResponseScriptFields(EntityUtils.toString(response.getEntity()), "vector2"); | ||
List<KNNResult> resultsField3 = parseSearchResponseScriptFields(EntityUtils.toString(response.getEntity()), "float1"); | ||
List<KNNResult> resultsField4 = parseSearchResponseScriptFields(EntityUtils.toString(response.getEntity()), "float2"); | ||
assertEquals(10, resultsField1.size()); | ||
assertEquals(10, resultsField2.size()); | ||
assertEquals(10, resultsField3.size()); | ||
assertEquals(10, resultsField4.size()); | ||
|
||
// Create search body, some fields | ||
builder = XContentFactory.jsonBuilder() | ||
.startObject() | ||
.field("fields", new String[] { "vector1", "float2" }) | ||
.startObject("query") | ||
.startObject("match_all") | ||
.endObject() | ||
.endObject() | ||
.endObject(); | ||
Response response2 = searchKNNIndex(INDEX_NAME, builder, k); | ||
resultsField1 = parseSearchResponseScriptFields(EntityUtils.toString(response.getEntity()), "vector1"); | ||
expectThrows( | ||
NullPointerException.class, | ||
() -> parseSearchResponseScriptFields(EntityUtils.toString(response2.getEntity()), "vector2") | ||
); | ||
expectThrows( | ||
NullPointerException.class, | ||
() -> parseSearchResponseScriptFields(EntityUtils.toString(response2.getEntity()), "float1") | ||
); | ||
resultsField4 = parseSearchResponseScriptFields(EntityUtils.toString(response.getEntity()), "float2"); | ||
assertEquals(10, resultsField1.size()); | ||
assertEquals(10, resultsField4.size()); | ||
} | ||
|
||
private List<KNNResult> getResults(final String indexName, final String fieldName, final float[] vector, final int k) | ||
throws IOException, ParseException { | ||
final Response searchResponseField = searchKNNIndex(indexName, new KNNQueryBuilder(fieldName, vector, k), k); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What does this format variable do? Do we have to add any check for that?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also do you see problem with backward compatibility? Does this work for index created before 2.19?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure exactly what it does, in my testing it was always null when
valueFetcher
was called for KNNVectorFieldType.I don't see any reason why it would have issues with backward compatibility, but how would I test this to confirm it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since this is change in query path, I don't see any reason why BWC will fail.