From 1c1688e9399ad483cdb400c526fef93f41f5163b Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 8 Sep 2017 16:25:22 +0200 Subject: [PATCH] Deduplicate `_field_names`. This is a minor optimization that should save some utf8 conversions and indexing. --- .../index/mapper/FieldNamesFieldMapper.java | 14 +++++++++-- .../mapper/FieldNamesFieldMapperTests.java | 23 ++++++++++++++++++- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java index 4396cba58cc28..c2923be4c74ab 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java @@ -258,9 +258,19 @@ protected void parseCreateField(ParseContext context, List field return; } for (ParseContext.Document document : context.docs()) { - final List paths = new ArrayList<>(); + final List paths = new ArrayList<>(document.getFields().size()); + String previousPath = ""; // used as a sentinel - field names can't be empty for (IndexableField field : document.getFields()) { - paths.add(field.name()); + final String path = field.name(); + if (path.equals(previousPath)) { + // Sometimes mappers create multiple Lucene fields, eg. one for indexing, + // one for doc values and one for storing. Deduplicating is not required + // for correctness but this simple check helps save utf-8 conversions and + // gives Lucene fewer values to deal with. + continue; + } + paths.add(path); + previousPath = path; } for (String path : paths) { for (String fieldName : extractFieldNames(path)) { diff --git a/core/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldMapperTests.java b/core/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldMapperTests.java index dde37962af586..9d25e2b70b89c 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldMapperTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldMapperTests.java @@ -38,6 +38,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import java.util.function.Supplier; @@ -56,7 +57,7 @@ private static SortedSet set(T... values) { return new TreeSet<>(Arrays.asList(values)); } - void assertFieldNames(SortedSet expected, ParsedDocument doc) { + void assertFieldNames(Set expected, ParsedDocument doc) { String[] got = doc.rootDoc().getValues("_field_names"); assertEquals(expected, set(got)); } @@ -120,6 +121,26 @@ public void testExplicitEnabled() throws Exception { assertFieldNames(set("field", "field.keyword", "_id", "_version", "_seq_no", "_primary_term", "_source"), doc); } + public void testDedup() throws Exception { + String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("_field_names").field("enabled", true).endObject() + .endObject().endObject().string(); + DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); + FieldNamesFieldMapper fieldNamesMapper = docMapper.metadataMapper(FieldNamesFieldMapper.class); + assertTrue(fieldNamesMapper.fieldType().isEnabled()); + + ParsedDocument doc = docMapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder() + .startObject() + .field("field", 3) // will create 2 lucene fields under the hood: index and doc values + .endObject() + .bytes(), + XContentType.JSON)); + + Set fields = set("field", "_id", "_version", "_seq_no", "_primary_term", "_source"); + assertFieldNames(fields, doc); + assertEquals(fields.size(), doc.rootDoc().getValues("_field_names").length); + } + public void testDisabled() throws Exception { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("_field_names").field("enabled", false).endObject()