Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add positive_score_impact to rank_features type #69994

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 34 additions & 3 deletions docs/reference/mapping/types/rank-features.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,24 @@ PUT my-index-000001
"properties": {
"topics": {
"type": "rank_features" <1>
},
"negative_reviews" : {
"type": "rank_features",
"positive_score_impact": false <2>
}
}
}
}
PUT my-index-000001/_doc/1
{
"topics": { <2>
"topics": { <3>
"politics": 20,
"economics": 50.8
},
"negative_reviews": {
"1star": 10,
"2star": 100
}
}
Expand All @@ -38,21 +46,38 @@ PUT my-index-000001/_doc/2
"topics": {
"politics": 5.2,
"sports": 80.1
},
"negative_reviews": {
"1star": 1,
"2star": 10
}
}
GET my-index-000001/_search
{
"query": {
"query": { <4>
"rank_feature": {
"field": "topics.politics"
}
}
}
GET my-index-000001/_search
{
"query": { <5>
"rank_feature": {
"field": "negative_reviews.1star"
}
}
}
--------------------------------------------------

<1> Rank features fields must use the `rank_features` field type
<2> Rank features fields must be a hash with string keys and strictly positive numeric values
<2> Rank features that correlate negatively with the score need to declare it
<3> Rank features fields must be a hash with string keys and strictly positive numeric values
<4> This query ranks documents by how much they are about the "politics" topic.
<5> This query ranks documents inversely to the number of "1star" reviews they received.


NOTE: `rank_features` fields only support single-valued features and strictly
positive values. Multi-valued fields and zero or negative values will be rejected.
Expand All @@ -63,3 +88,9 @@ only be queried using <<query-dsl-rank-feature-query,`rank_feature`>> queries.
NOTE: `rank_features` fields only preserve 9 significant bits for the
precision, which translates to a relative error of about 0.4%.

Rank features that correlate negatively with the score should set
`positive_score_impact` to `false` (defaults to `true`). This will be used by
the <<query-dsl-rank-feature-query,`rank_feature`>> query to modify the scoring formula
in such a way that the score decreases with the value of the feature instead of
increasing.

Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import org.elasticsearch.search.lookup.SearchLookup;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Supplier;
Expand All @@ -30,8 +29,14 @@ public class RankFeaturesFieldMapper extends FieldMapper {

public static final String CONTENT_TYPE = "rank_features";

private static RankFeaturesFieldType ft(FieldMapper in) {
return ((RankFeaturesFieldMapper)in).fieldType();
}

public static class Builder extends FieldMapper.Builder {

private final Parameter<Boolean> positiveScoreImpact
= Parameter.boolParam("positive_score_impact", false, m -> ft(m).positiveScoreImpact, true);
private final Parameter<Map<String, String>> meta = Parameter.metaParam();

public Builder(String name) {
Expand All @@ -40,30 +45,37 @@ public Builder(String name) {

@Override
protected List<Parameter<?>> getParameters() {
return Collections.singletonList(meta);
return List.of(positiveScoreImpact, meta);
}

@Override
public RankFeaturesFieldMapper build(ContentPath contentPath) {
return new RankFeaturesFieldMapper(
name, new RankFeaturesFieldType(buildFullName(contentPath), meta.getValue()),
multiFieldsBuilder.build(this, contentPath), copyTo.build());
name, new RankFeaturesFieldType(buildFullName(contentPath), meta.getValue(), positiveScoreImpact.getValue()),
multiFieldsBuilder.build(this, contentPath), copyTo.build(), positiveScoreImpact.getValue());
}
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n));

public static final class RankFeaturesFieldType extends MappedFieldType {

public RankFeaturesFieldType(String name, Map<String, String> meta) {
private final boolean positiveScoreImpact;

public RankFeaturesFieldType(String name, Map<String, String> meta, boolean positiveScoreImpact) {
super(name, false, false, false, TextSearchInfo.NONE, meta);
this.positiveScoreImpact = positiveScoreImpact;
}

@Override
public String typeName() {
return CONTENT_TYPE;
}

public boolean positiveScoreImpact() {
return positiveScoreImpact;
}

@Override
public Query existsQuery(SearchExecutionContext context) {
throw new IllegalArgumentException("[rank_features] fields do not support [exists] queries");
Expand All @@ -85,9 +97,12 @@ public Query termQuery(Object value, SearchExecutionContext context) {
}
}

private final boolean positiveScoreImpact;

private RankFeaturesFieldMapper(String simpleName, MappedFieldType mappedFieldType,
MultiFields multiFields, CopyTo copyTo) {
MultiFields multiFields, CopyTo copyTo, boolean positiveScoreImpact) {
super(simpleName, mappedFieldType, Lucene.KEYWORD_ANALYZER, multiFields, copyTo);
this.positiveScoreImpact = positiveScoreImpact;
}

@Override
Expand Down Expand Up @@ -124,6 +139,9 @@ public void parse(ParseContext context) throws IOException {
throw new IllegalArgumentException("[rank_features] fields do not support indexing multiple values for the same " +
"rank feature [" + key + "] in the same document");
}
if (positiveScoreImpact == false) {
value = 1 / value;
}
context.doc().addWithKey(key, new FeatureField(name(), feature, value));
} else {
throw new IllegalArgumentException("[rank_features] fields take hashes that map a feature to a strictly positive " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ protected void minimalMapping(XContentBuilder b) throws IOException {
}

@Override
protected void registerParameters(ParameterChecker checker) {
// no parameters to configure
protected void registerParameters(ParameterChecker checker) throws IOException {
checker.registerConflictCheck("positive_score_impact", b -> b.field("positive_score_impact", false));
}

@Override
Expand Down Expand Up @@ -80,6 +80,33 @@ public void testDefaults() throws Exception {
assertTrue(freq1 < freq2);
}

public void testNegativeScoreImpact() throws Exception {
DocumentMapper mapper = createDocumentMapper(
fieldMapping(b -> b.field("type", "rank_features").field("positive_score_impact", false))
);

ParsedDocument doc1 = mapper.parse(source(this::writeField));

IndexableField[] fields = doc1.rootDoc().getFields("field");
assertEquals(2, fields.length);
assertThat(fields[0], Matchers.instanceOf(FeatureField.class));
FeatureField featureField1 = null;
FeatureField featureField2 = null;
for (IndexableField field : fields) {
if (field.stringValue().equals("ten")) {
featureField1 = (FeatureField)field;
} else if (field.stringValue().equals("twenty")) {
featureField2 = (FeatureField)field;
} else {
throw new UnsupportedOperationException();
}
}

int freq1 = RankFeatureFieldMapperTests.getFrequency(featureField1.tokenStream(null, null));
int freq2 = RankFeatureFieldMapperTests.getFrequency(featureField2.tokenStream(null, null));
assertTrue(freq1 > freq2);
}

public void testRejectMultiValuedFields() throws MapperParsingException, IOException {
DocumentMapper mapper = createDocumentMapper(mapping(b -> {
b.startObject("field").field("type", "rank_features").endObject();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
public class RankFeaturesFieldTypeTests extends FieldTypeTestCase {

public void testIsNotAggregatable() {
MappedFieldType fieldType = new RankFeaturesFieldMapper.RankFeaturesFieldType("field", Collections.emptyMap());
MappedFieldType fieldType = new RankFeaturesFieldMapper.RankFeaturesFieldType("field", Collections.emptyMap(), true);
assertFalse(fieldType.isAggregatable());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ setup:
properties:
tags:
type: rank_features
negative_reviews:
type: rank_features
positive_score_impact: false


- do:
index:
Expand All @@ -18,6 +22,9 @@ setup:
tags:
foo: 3
bar: 5
negative_reviews:
1star: 10
2star: 1

- do:
index:
Expand All @@ -27,6 +34,9 @@ setup:
tags:
bar: 6
quux: 10
negative_reviews:
1star: 1
2star: 10

- do:
indices.refresh: {}
Expand Down Expand Up @@ -122,3 +132,35 @@ setup:
hits.hits.1._id: "1"
- match:
hits.hits.1._score: 5.0


---
"Linear negative impact":

- do:
search:
index: test
body:
query:
rank_feature:
field: negative_reviews.1star
linear: {}

- match:
hits.hits.0._id: "2"
- match:
hits.hits.1._id: "1"

- do:
search:
index: test
body:
query:
rank_feature:
field: negative_reviews.2star
linear: {}

- match:
hits.hits.0._id: "1"
- match:
hits.hits.1._id: "2"