Skip to content

Commit 955bd8d

Browse files
[7.x][ML] Exclude nested fields in data frame analytics (elastic#71400) (elastic#71415)
Previously, the destination index was sorted which meant it could not have `nested` fields. Since this has changed, `nested` fields may be present. These were handled incorrectly as the _explain API would report that they can be included in the analysis while that is not the case. This commit fixes this issue by detecting `nested` fields and children of those `nested` fields and excluding them from the analysis. A `nested` field may contain multiple inner fields. To avoid the noise in the API response, we collapse them into a single entry with the path to the top level nested field. Backport of elastic#71400
1 parent 754cda8 commit 955bd8d

File tree

3 files changed

+164
-28
lines changed

3 files changed

+164
-28
lines changed

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import java.util.List;
4848
import java.util.Map;
4949
import java.util.Objects;
50+
import java.util.Optional;
5051
import java.util.Set;
5152
import java.util.TreeSet;
5253
import java.util.stream.Collectors;
@@ -65,6 +66,7 @@ public class ExtractedFieldsDetector {
6566
private final int docValueFieldsLimit;
6667
private final FieldCapabilitiesResponse fieldCapabilitiesResponse;
6768
private final Map<String, Long> cardinalitiesForFieldsWithConstraints;
69+
private final List<String> topNestedFieldPrefixes;
6870

6971
ExtractedFieldsDetector(DataFrameAnalyticsConfig config,
7072
int docValueFieldsLimit,
@@ -74,6 +76,26 @@ public class ExtractedFieldsDetector {
7476
this.docValueFieldsLimit = docValueFieldsLimit;
7577
this.fieldCapabilitiesResponse = Objects.requireNonNull(fieldCapabilitiesResponse);
7678
this.cardinalitiesForFieldsWithConstraints = Objects.requireNonNull(cardinalitiesForFieldsWithConstraints);
79+
this.topNestedFieldPrefixes = findTopNestedFieldPrefixes(fieldCapabilitiesResponse);
80+
}
81+
82+
private List<String> findTopNestedFieldPrefixes(FieldCapabilitiesResponse fieldCapabilitiesResponse) {
83+
List<String> sortedNestedFieldPrefixes = fieldCapabilitiesResponse.get().keySet().stream()
84+
.filter(field -> isNested(getMappingTypes(field)))
85+
.map(field -> field + ".")
86+
.sorted()
87+
.collect(Collectors.toList());
88+
Iterator<String> iterator = sortedNestedFieldPrefixes.iterator();
89+
String previousNestedFieldPrefix = null;
90+
while (iterator.hasNext()) {
91+
String nestedFieldPrefix = iterator.next();
92+
if (previousNestedFieldPrefix != null && nestedFieldPrefix.startsWith(previousNestedFieldPrefix)) {
93+
iterator.remove();
94+
} else {
95+
previousNestedFieldPrefix = nestedFieldPrefix;
96+
}
97+
}
98+
return Collections.unmodifiableList(sortedNestedFieldPrefixes);
7799
}
78100

79101
public Tuple<ExtractedFields, List<FieldSelection>> detect() {
@@ -139,7 +161,14 @@ private void validateFieldsRequireForProcessors(Set<String> processorFields) {
139161
}
140162
removeObjects(fieldsForProcessor);
141163
if (fieldsForProcessor.size() < processorFields.size()) {
142-
throw ExceptionsHelper.badRequestException("fields for feature_processors must not be objects");
164+
throw ExceptionsHelper.badRequestException("fields for feature_processors must not be objects or nested");
165+
}
166+
for (String field : fieldsForProcessor) {
167+
Optional<String> matchingNestedFieldPattern = findMatchingNestedFieldPattern(field);
168+
if (matchingNestedFieldPattern.isPresent()) {
169+
throw ExceptionsHelper.badRequestException("nested fields [{}] cannot be used in a feature_processor",
170+
matchingNestedFieldPattern.get());
171+
}
143172
}
144173
Collection<String> errorFields = new ArrayList<>();
145174
for (String fieldName : fieldsForProcessor) {
@@ -190,7 +219,7 @@ private void removeObjects(Set<String> fields) {
190219
while (fieldsIterator.hasNext()) {
191220
String field = fieldsIterator.next();
192221
Set<String> types = getMappingTypes(field);
193-
if (isObject(types)) {
222+
if (isObject(types) || isNested(types)) {
194223
fieldsIterator.remove();
195224
}
196225
}
@@ -210,6 +239,11 @@ private void addExcludedField(String field, String reason, Set<FieldSelection> f
210239
fieldSelection.add(FieldSelection.excluded(field, getMappingTypes(field), reason));
211240
}
212241

242+
private void addExcludedNestedPattern(String pattern, Set<FieldSelection> fieldSelection) {
243+
fieldSelection.add(FieldSelection.excluded(
244+
pattern, Collections.singleton(ObjectMapper.NESTED_CONTENT_TYPE), "nested fields are not supported"));
245+
}
246+
213247
private Set<String> getMappingTypes(String field) {
214248
Map<String, FieldCapabilities> fieldCaps = fieldCapabilitiesResponse.getField(field);
215249
return fieldCaps == null ? Collections.emptySet() : fieldCaps.keySet();
@@ -223,6 +257,11 @@ private void removeFieldsWithIncompatibleTypes(Set<String> fields, Set<FieldSele
223257
addExcludedField(field, "unsupported type; supported types are " + getSupportedTypes(), fieldSelection);
224258
fieldsIterator.remove();
225259
}
260+
Optional<String> matchingNestedFieldPattern = findMatchingNestedFieldPattern(field);
261+
if (matchingNestedFieldPattern.isPresent()) {
262+
addExcludedNestedPattern(matchingNestedFieldPattern.get(), fieldSelection);
263+
fieldsIterator.remove();
264+
}
226265
}
227266
}
228267

@@ -257,6 +296,10 @@ private Set<String> getSupportedTypes() {
257296
return supportedTypes;
258297
}
259298

299+
private Optional<String> findMatchingNestedFieldPattern(String field) {
300+
return topNestedFieldPrefixes.stream().filter(prefix -> field.startsWith(prefix)).map(prefix -> prefix + "*").findFirst();
301+
}
302+
260303
private void includeAndExcludeFields(Set<String> fields, Set<FieldSelection> fieldSelection) {
261304
FetchSourceContext analyzedFields = config.getAnalyzedFields();
262305
if (analyzedFields == null) {
@@ -294,10 +337,10 @@ private void includeAndExcludeFields(Set<String> fields, Set<FieldSelection> fie
294337

295338
private void checkIncludesExcludesAreNotObjects(FetchSourceContext analyzedFields) {
296339
List<String> objectFields = Stream.concat(Arrays.stream(analyzedFields.includes()), Arrays.stream(analyzedFields.excludes()))
297-
.filter(field -> isObject(getMappingTypes(field)))
340+
.filter(field -> isObject(getMappingTypes(field)) || isNested(getMappingTypes(field)))
298341
.collect(Collectors.toList());
299342
if (objectFields.isEmpty() == false) {
300-
throw ExceptionsHelper.badRequestException("{} must not include or exclude object fields: {}",
343+
throw ExceptionsHelper.badRequestException("{} must not include or exclude object or nested fields: {}",
301344
DataFrameAnalyticsConfig.ANALYZED_FIELDS.getPreferredName(), objectFields);
302345
}
303346
}
@@ -317,10 +360,15 @@ private void applyIncludesExcludes(Set<String> fields, Set<String> includes, Set
317360
}
318361
} else {
319362
fieldsIterator.remove();
320-
if (hasCompatibleType(field)) {
321-
addExcludedField(field, "field not in includes list", fieldSelection);
322-
} else {
363+
if (hasCompatibleType(field) == false) {
323364
addExcludedField(field, "unsupported type; supported types are " + getSupportedTypes(), fieldSelection);
365+
} else {
366+
Optional<String> matchingNestedFieldPattern = findMatchingNestedFieldPattern(field);
367+
if (matchingNestedFieldPattern.isPresent()) {
368+
addExcludedNestedPattern(matchingNestedFieldPattern.get(), fieldSelection);
369+
} else {
370+
addExcludedField(field, "field not in includes list", fieldSelection);
371+
}
324372
}
325373
}
326374
}
@@ -337,6 +385,10 @@ private void checkFieldsHaveCompatibleTypes(Set<String> fields) {
337385
throw ExceptionsHelper.badRequestException("field [{}] has unsupported type {}. Supported types are {}.", field,
338386
fieldCaps.keySet(), getSupportedTypes());
339387
}
388+
Optional<String> matchingNestedFieldPattern = findMatchingNestedFieldPattern(field);
389+
if (matchingNestedFieldPattern.isPresent()) {
390+
throw ExceptionsHelper.badRequestException("nested fields [{}] are not supported", matchingNestedFieldPattern.get());
391+
}
340392
}
341393
}
342394

@@ -601,7 +653,11 @@ private static boolean isBoolean(Set<String> types) {
601653
return types.size() == 1 && types.contains(BooleanFieldMapper.CONTENT_TYPE);
602654
}
603655

604-
private boolean isObject(Set<String> types) {
656+
private static boolean isObject(Set<String> types) {
605657
return types.size() == 1 && types.contains(ObjectMapper.CONTENT_TYPE);
606658
}
659+
660+
private static boolean isNested(Set<String> types) {
661+
return types.size() == 1 && types.contains(ObjectMapper.NESTED_CONTENT_TYPE);
662+
}
607663
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedFields.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,13 +205,16 @@ private boolean isMultiField(String field, String parent) {
205205
return false;
206206
}
207207
Map<String, FieldCapabilities> parentFieldCaps = fieldsCapabilities.getField(parent);
208-
if (parentFieldCaps == null || (parentFieldCaps.size() == 1 && parentFieldCaps.containsKey("object"))) {
209-
// We check if the parent is an object which is indicated by field caps containing an "object" entry.
210-
// If an object, it's not a multi field
208+
if (parentFieldCaps == null || (parentFieldCaps.size() == 1 && isNestedOrObject(parentFieldCaps))) {
209+
// We check if the parent is an object or nested field. If so, it's not a multi field.
211210
return false;
212211
}
213212
return true;
214213
}
214+
215+
private static boolean isNestedOrObject(Map<String, FieldCapabilities> fieldCaps) {
216+
return fieldCaps.containsKey("object") || fieldCaps.containsKey("nested");
217+
}
215218
}
216219

217220
/**

0 commit comments

Comments
 (0)