Skip to content

Commit 9750c8f

Browse files
author
Christoph Büscher
committed
Improve lookup for "include_unmapped" field pattern (#69984)
Currently we use a CharacterRunAutomaton build from all field pattern that have the "include_unmapped" option set. This can lead to unnecessarily large automata for cases where the user unessesarily list all known fields and adds the "include_unmapped" option. We only really need the automaton for pattern that contain wildcards and can look up any other field path directly and only need to do so if the field path isn't indeed mapped. Relates to #69983
1 parent 62f9b3e commit 9750c8f

File tree

2 files changed

+88
-40
lines changed

2 files changed

+88
-40
lines changed

server/src/main/java/org/elasticsearch/search/fetch/subphase/FieldFetcher.java

+70-40
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import org.elasticsearch.common.Nullable;
1414
import org.elasticsearch.common.document.DocumentField;
1515
import org.elasticsearch.common.regex.Regex;
16+
import org.elasticsearch.common.xcontent.support.XContentMapValues;
1617
import org.elasticsearch.index.mapper.MappedFieldType;
1718
import org.elasticsearch.index.mapper.NestedValueFetcher;
1819
import org.elasticsearch.index.mapper.ObjectMapper;
@@ -38,6 +39,11 @@
3839
*/
3940
public class FieldFetcher {
4041

42+
/**
43+
* Default maximum number of states in the automaton that looks up unmapped fields.
44+
*/
45+
private static final int AUTOMATON_MAX_DETERMINIZED_STATES = 100000;
46+
4147
public static FieldFetcher create(SearchExecutionContext context,
4248
Collection<FieldAndFormat> fieldAndFormats) {
4349
Set<String> nestedMappingPaths = context.hasNested()
@@ -115,23 +121,33 @@ private static FieldFetcher create(SearchExecutionContext context,
115121
}
116122

117123
CharacterRunAutomaton unmappedFieldsFetchAutomaton = null;
118-
if (unmappedFetchPattern.isEmpty() == false) {
124+
// We separate the "include_unmapped" field patters with wildcards from the rest in order to use less
125+
// space in the lookup automaton
126+
Map<Boolean, List<String>> partitions = unmappedFetchPattern.stream()
127+
.collect(Collectors.partitioningBy((s -> Regex.isSimpleMatchPattern(s))));
128+
List<String> unmappedWildcardPattern = partitions.get(true);
129+
List<String> unmappedConcreteFields = partitions.get(false);
130+
if (unmappedWildcardPattern.isEmpty() == false) {
119131
unmappedFieldsFetchAutomaton = new CharacterRunAutomaton(
120-
Regex.simpleMatchToAutomaton(unmappedFetchPattern.toArray(new String[unmappedFetchPattern.size()]))
132+
Regex.simpleMatchToAutomaton(unmappedWildcardPattern.toArray(new String[unmappedWildcardPattern.size()])),
133+
AUTOMATON_MAX_DETERMINIZED_STATES
121134
);
122135
}
123-
return new FieldFetcher(fieldContexts, unmappedFieldsFetchAutomaton);
136+
return new FieldFetcher(fieldContexts, unmappedFieldsFetchAutomaton, unmappedConcreteFields);
124137
}
125138

126139
private final Map<String, FieldContext> fieldContexts;
127140
private final CharacterRunAutomaton unmappedFieldsFetchAutomaton;
141+
private final List<String> unmappedConcreteFields;
128142

129143
private FieldFetcher(
130144
Map<String, FieldContext> fieldContexts,
131-
@Nullable CharacterRunAutomaton unmappedFieldsFetchAutomaton
145+
@Nullable CharacterRunAutomaton unmappedFieldsFetchAutomaton,
146+
@Nullable List<String> unmappedConcreteFields
132147
) {
133148
this.fieldContexts = fieldContexts;
134149
this.unmappedFieldsFetchAutomaton = unmappedFieldsFetchAutomaton;
150+
this.unmappedConcreteFields = unmappedConcreteFields;
135151
}
136152

137153
public Map<String, DocumentField> fetch(SourceLookup sourceLookup) throws IOException {
@@ -145,51 +161,65 @@ public Map<String, DocumentField> fetch(SourceLookup sourceLookup) throws IOExce
145161
documentFields.put(field, new DocumentField(field, parsedValues));
146162
}
147163
}
148-
if (this.unmappedFieldsFetchAutomaton != null) {
149-
collectUnmapped(documentFields, sourceLookup.source(), "", 0);
150-
}
164+
collectUnmapped(documentFields, sourceLookup.source(), "", 0);
151165
return documentFields;
152166
}
153167

154168
private void collectUnmapped(Map<String, DocumentField> documentFields, Map<String, Object> source, String parentPath, int lastState) {
155-
for (String key : source.keySet()) {
156-
Object value = source.get(key);
157-
String currentPath = parentPath + key;
158-
if (this.fieldContexts.containsKey(currentPath)) {
159-
continue;
160-
}
161-
int currentState = step(this.unmappedFieldsFetchAutomaton, key, lastState);
162-
if (currentState == -1) {
163-
// current path doesn't match any fields pattern
164-
continue;
165-
}
166-
if (value instanceof Map) {
167-
// one step deeper into source tree
168-
collectUnmapped(
169-
documentFields,
170-
(Map<String, Object>) value,
171-
currentPath + ".",
172-
step(this.unmappedFieldsFetchAutomaton, ".", currentState)
173-
);
174-
} else if (value instanceof List) {
175-
// iterate through list values
176-
collectUnmappedList(documentFields, (List<?>) value, currentPath, currentState);
177-
} else {
178-
// we have a leaf value
179-
if (this.unmappedFieldsFetchAutomaton.isAccept(currentState)) {
180-
if (value != null) {
181-
DocumentField currentEntry = documentFields.get(currentPath);
182-
if (currentEntry == null) {
183-
List<Object> list = new ArrayList<>();
184-
list.add(value);
185-
documentFields.put(currentPath, new DocumentField(currentPath, list));
186-
} else {
187-
currentEntry.getValues().add(value);
169+
// lookup field patterns containing wildcards
170+
if (this.unmappedFieldsFetchAutomaton != null) {
171+
for (String key : source.keySet()) {
172+
Object value = source.get(key);
173+
String currentPath = parentPath + key;
174+
if (this.fieldContexts.containsKey(currentPath)) {
175+
continue;
176+
}
177+
int currentState = step(this.unmappedFieldsFetchAutomaton, key, lastState);
178+
if (currentState == -1) {
179+
// current path doesn't match any fields pattern
180+
continue;
181+
}
182+
if (value instanceof Map) {
183+
// one step deeper into source tree
184+
collectUnmapped(
185+
documentFields,
186+
(Map<String, Object>) value,
187+
currentPath + ".",
188+
step(this.unmappedFieldsFetchAutomaton, ".", currentState)
189+
);
190+
} else if (value instanceof List) {
191+
// iterate through list values
192+
collectUnmappedList(documentFields, (List<?>) value, currentPath, currentState);
193+
} else {
194+
// we have a leaf value
195+
if (this.unmappedFieldsFetchAutomaton.isAccept(currentState)) {
196+
if (value != null) {
197+
DocumentField currentEntry = documentFields.get(currentPath);
198+
if (currentEntry == null) {
199+
List<Object> list = new ArrayList<>();
200+
list.add(value);
201+
documentFields.put(currentPath, new DocumentField(currentPath, list));
202+
} else {
203+
currentEntry.getValues().add(value);
204+
}
188205
}
189206
}
190207
}
191208
}
192209
}
210+
211+
// lookup concrete fields
212+
if (this.unmappedConcreteFields != null) {
213+
for (String path : unmappedConcreteFields) {
214+
if (this.fieldContexts.containsKey(path)) {
215+
continue; // this is actually a mapped field
216+
}
217+
List<Object> values = XContentMapValues.extractRawValues(path, source);
218+
if (values.isEmpty() == false) {
219+
documentFields.put(path, new DocumentField(path, values));
220+
}
221+
}
222+
}
193223
}
194224

195225
private void collectUnmappedList(Map<String, DocumentField> documentFields, Iterable<?> iterable, String parentPath, int lastState) {

server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java

+18
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
package org.elasticsearch.search.fetch.subphase;
1010

11+
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
1112
import org.elasticsearch.Version;
1213
import org.elasticsearch.cluster.metadata.IndexMetadata;
1314
import org.elasticsearch.common.Strings;
@@ -844,6 +845,23 @@ public void testLastFormatWins() throws IOException {
844845
assertThat(fields.get("date_field").getValues().get(1), equalTo("12"));
845846
}
846847

848+
/**
849+
* Field patterns retrieved with "include_unmapped" use an automaton with a maximal allowed size internally.
850+
* This test checks we have a bound in place to avoid misuse of this with exceptionally large field patterns
851+
*/
852+
public void testTooManyUnmappedFieldWildcardPattern() throws IOException {
853+
MapperService mapperService = createMapperService();
854+
855+
XContentBuilder source = XContentFactory.jsonBuilder().startObject().field("a", "foo").endObject();
856+
857+
List<FieldAndFormat> fieldAndFormatList = new ArrayList<>();
858+
boolean includeUnmapped = true;
859+
for (int i = 0; i < 1000; i++) {
860+
fieldAndFormatList.add(new FieldAndFormat(randomAlphaOfLength(150) + "*", null, includeUnmapped));
861+
}
862+
expectThrows(TooComplexToDeterminizeException.class, () -> fetchFields(mapperService, source, fieldAndFormatList));
863+
}
864+
847865
private List<FieldAndFormat> fieldAndFormatList(String name, String format, boolean includeUnmapped) {
848866
return Collections.singletonList(new FieldAndFormat(name, format, includeUnmapped));
849867
}

0 commit comments

Comments
 (0)