Skip to content

Commit c47cd48

Browse files
committed
Deduplicate _field_names. (#26550)
This is a minor optimization that should save some utf8 conversions and indexing.
1 parent 46c1c3a commit c47cd48

File tree

2 files changed

+34
-3
lines changed

2 files changed

+34
-3
lines changed

core/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,9 +258,19 @@ protected void parseCreateField(ParseContext context, List<IndexableField> field
258258
return;
259259
}
260260
for (ParseContext.Document document : context.docs()) {
261-
final List<String> paths = new ArrayList<>();
261+
final List<String> paths = new ArrayList<>(document.getFields().size());
262+
String previousPath = ""; // used as a sentinel - field names can't be empty
262263
for (IndexableField field : document.getFields()) {
263-
paths.add(field.name());
264+
final String path = field.name();
265+
if (path.equals(previousPath)) {
266+
// Sometimes mappers create multiple Lucene fields, eg. one for indexing,
267+
// one for doc values and one for storing. Deduplicating is not required
268+
// for correctness but this simple check helps save utf-8 conversions and
269+
// gives Lucene fewer values to deal with.
270+
continue;
271+
}
272+
paths.add(path);
273+
previousPath = path;
264274
}
265275
for (String path : paths) {
266276
for (String fieldName : extractFieldNames(path)) {

core/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldMapperTests.java

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import java.util.Collections;
3939
import java.util.List;
4040
import java.util.Map;
41+
import java.util.Set;
4142
import java.util.SortedSet;
4243
import java.util.TreeSet;
4344
import java.util.function.Supplier;
@@ -56,7 +57,7 @@ private static <T> SortedSet<T> set(T... values) {
5657
return new TreeSet<>(Arrays.asList(values));
5758
}
5859

59-
void assertFieldNames(SortedSet<String> expected, ParsedDocument doc) {
60+
void assertFieldNames(Set<String> expected, ParsedDocument doc) {
6061
String[] got = doc.rootDoc().getValues("_field_names");
6162
assertEquals(expected, set(got));
6263
}
@@ -120,6 +121,26 @@ public void testExplicitEnabled() throws Exception {
120121
assertFieldNames(set("field", "field.keyword", "_id", "_version", "_seq_no", "_primary_term", "_source"), doc);
121122
}
122123

124+
public void testDedup() throws Exception {
125+
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
126+
.startObject("_field_names").field("enabled", true).endObject()
127+
.endObject().endObject().string();
128+
DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
129+
FieldNamesFieldMapper fieldNamesMapper = docMapper.metadataMapper(FieldNamesFieldMapper.class);
130+
assertTrue(fieldNamesMapper.fieldType().isEnabled());
131+
132+
ParsedDocument doc = docMapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
133+
.startObject()
134+
.field("field", 3) // will create 2 lucene fields under the hood: index and doc values
135+
.endObject()
136+
.bytes(),
137+
XContentType.JSON));
138+
139+
Set<String> fields = set("field", "_id", "_version", "_seq_no", "_primary_term", "_source");
140+
assertFieldNames(fields, doc);
141+
assertEquals(fields.size(), doc.rootDoc().getValues("_field_names").length);
142+
}
143+
123144
public void testDisabled() throws Exception {
124145
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
125146
.startObject("_field_names").field("enabled", false).endObject()

0 commit comments

Comments
 (0)