Skip to content

Commit 790cac2

Browse files
[7.6][ML] DF Analytics _explain API should skip object fields (elastic#51115) (elastic#51148)
Object fields cannot be used as features. At the moment _explain API includes them and even worse it allows it does not error when an object field is excluded. This creates the expectation to the user that all children fields will also be excluded while it's not the case. This commit omits object fields from the _explain API and also adds an error if an object field is included or excluded. Backport of elastic#51115
1 parent a3c20d9 commit 790cac2

File tree

3 files changed

+77
-2
lines changed

3 files changed

+77
-2
lines changed

docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc

+3-2
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,10 @@ The following explanations are provided:
4444

4545
* which fields are included or not in the analysis and why,
4646
* how much memory is estimated to be required. The estimate can be used when
47-
deciding the appropriate value for `model_memory_limit` setting later on,
47+
deciding the appropriate value for `model_memory_limit` setting later on.
4848

49-
about either an existing {dfanalytics-job} or one that has not been created yet.
49+
If you have object fields or fields that are excluded via source filtering,
50+
they are not included in the explanation.
5051

5152

5253
[[ml-explain-dfanalytics-path-params]]

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java

+31
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.elasticsearch.common.regex.Regex;
1616
import org.elasticsearch.index.IndexSettings;
1717
import org.elasticsearch.index.mapper.BooleanFieldMapper;
18+
import org.elasticsearch.index.mapper.ObjectMapper;
1819
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
1920
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
2021
import org.elasticsearch.xpack.core.ml.dataframe.analyses.DataFrameAnalysis;
@@ -40,6 +41,7 @@
4041
import java.util.Set;
4142
import java.util.TreeSet;
4243
import java.util.stream.Collectors;
44+
import java.util.stream.Stream;
4345

4446
public class ExtractedFieldsDetector {
4547

@@ -82,6 +84,7 @@ private Set<String> getIncludedFields(Set<FieldSelection> fieldSelection) {
8284
Set<String> fields = new TreeSet<>(fieldCapabilitiesResponse.get().keySet());
8385
fields.removeAll(IGNORE_FIELDS);
8486
removeFieldsUnderResultsField(fields);
87+
removeObjects(fields);
8588
applySourceFiltering(fields);
8689
FetchSourceContext analyzedFields = config.getAnalyzedFields();
8790

@@ -112,6 +115,17 @@ private void removeFieldsUnderResultsField(Set<String> fields) {
112115
fields.removeIf(field -> field.startsWith(resultsField + "."));
113116
}
114117

118+
private void removeObjects(Set<String> fields) {
119+
Iterator<String> fieldsIterator = fields.iterator();
120+
while (fieldsIterator.hasNext()) {
121+
String field = fieldsIterator.next();
122+
Set<String> types = getMappingTypes(field);
123+
if (isObject(types)) {
124+
fieldsIterator.remove();
125+
}
126+
}
127+
}
128+
115129
private void applySourceFiltering(Set<String> fields) {
116130
Iterator<String> fieldsIterator = fields.iterator();
117131
while (fieldsIterator.hasNext()) {
@@ -178,6 +192,9 @@ private void includeAndExcludeFields(Set<String> fields, Set<FieldSelection> fie
178192
if (analyzedFields == null) {
179193
return;
180194
}
195+
196+
checkIncludesExcludesAreNotObjects(analyzedFields);
197+
181198
String includes = analyzedFields.includes().length == 0 ? "*" : Strings.arrayToCommaDelimitedString(analyzedFields.includes());
182199
String excludes = Strings.arrayToCommaDelimitedString(analyzedFields.excludes());
183200

@@ -205,6 +222,16 @@ private void includeAndExcludeFields(Set<String> fields, Set<FieldSelection> fie
205222
}
206223
}
207224

225+
private void checkIncludesExcludesAreNotObjects(FetchSourceContext analyzedFields) {
226+
List<String> objectFields = Stream.concat(Arrays.stream(analyzedFields.includes()), Arrays.stream(analyzedFields.excludes()))
227+
.filter(field -> isObject(getMappingTypes(field)))
228+
.collect(Collectors.toList());
229+
if (objectFields.isEmpty() == false) {
230+
throw ExceptionsHelper.badRequestException("{} must not include or exclude object fields: {}",
231+
DataFrameAnalyticsConfig.ANALYZED_FIELDS.getPreferredName(), objectFields);
232+
}
233+
}
234+
208235
private void applyIncludesExcludes(Set<String> fields, Set<String> includes, Set<String> excludes,
209236
Set<FieldSelection> fieldSelection) {
210237
Iterator<String> fieldsIterator = fields.iterator();
@@ -394,4 +421,8 @@ static Set<String> getCategoricalFields(ExtractedFields extractedFields, DataFra
394421
private static boolean isBoolean(Set<String> types) {
395422
return types.size() == 1 && types.contains(BooleanFieldMapper.CONTENT_TYPE);
396423
}
424+
425+
private boolean isObject(Set<String> types) {
426+
return types.size() == 1 && types.contains(ObjectMapper.CONTENT_TYPE);
427+
}
397428
}

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java

+43
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,49 @@ public void testDetect_GivenSourceFilteringWithExcludes() {
861861
FieldSelection.included("field_22", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL));
862862
}
863863

864+
public void testDetect_GivenObjectFields() {
865+
FieldCapabilitiesResponse fieldCapabilities = new MockFieldCapsResponseBuilder()
866+
.addAggregatableField("float_field", "float")
867+
.addNonAggregatableField("object_field_1", "object")
868+
.addNonAggregatableField("object_field_2", "object").build();
869+
870+
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
871+
SOURCE_INDEX, buildOutlierDetectionConfig(), 100, fieldCapabilities, Collections.emptyMap());
872+
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
873+
874+
List<ExtractedField> allFields = fieldExtraction.v1().getAllFields();
875+
assertThat(allFields, hasSize(1));
876+
assertThat(allFields.get(0).getName(), equalTo("float_field"));
877+
}
878+
879+
public void testDetect_GivenAnalyzedFieldIncludesObjectField() {
880+
FieldCapabilitiesResponse fieldCapabilities = new MockFieldCapsResponseBuilder()
881+
.addAggregatableField("float_field", "float")
882+
.addNonAggregatableField("object_field", "object").build();
883+
884+
analyzedFields = new FetchSourceContext(true, new String[] { "float_field", "object_field" }, null);
885+
886+
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
887+
SOURCE_INDEX, buildOutlierDetectionConfig(), 100, fieldCapabilities, Collections.emptyMap());
888+
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, extractedFieldsDetector::detect);
889+
890+
assertThat(e.getMessage(), equalTo("analyzed_fields must not include or exclude object fields: [object_field]"));
891+
}
892+
893+
public void testDetect_GivenAnalyzedFieldExcludesObjectField() {
894+
FieldCapabilitiesResponse fieldCapabilities = new MockFieldCapsResponseBuilder()
895+
.addAggregatableField("float_field", "float")
896+
.addNonAggregatableField("object_field", "object").build();
897+
898+
analyzedFields = new FetchSourceContext(true, null, new String[] { "object_field" });
899+
900+
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
901+
SOURCE_INDEX, buildOutlierDetectionConfig(), 100, fieldCapabilities, Collections.emptyMap());
902+
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, extractedFieldsDetector::detect);
903+
904+
assertThat(e.getMessage(), equalTo("analyzed_fields must not include or exclude object fields: [object_field]"));
905+
}
906+
864907
private DataFrameAnalyticsConfig buildOutlierDetectionConfig() {
865908
return new DataFrameAnalyticsConfig.Builder()
866909
.setId("foo")

0 commit comments

Comments
 (0)