Skip to content

Commit 801665a

Browse files
[FEATURE][ML] Only write numeric fields to data frame (#35961)
1 parent 3f49eef commit 801665a

File tree

3 files changed

+157
-2
lines changed

3 files changed

+157
-2
lines changed

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportRunAnalyticsAction.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,9 @@ private void runPipelineAnalytics(String index, ActionListener<AcknowledgedRespo
185185
listener::onFailure
186186
);
187187

188+
// TODO This could fail with errors. In that case we get stuck with the copied index.
189+
// We could delete the index in case of failure or we could try building the factory before reindexing
190+
// to catch the error early on.
188191
DataFrameDataExtractorFactory.create(client, Collections.emptyMap(), index, dataExtractorFactoryListener);
189192
}
190193
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactory.java

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,29 @@
77

88
import org.elasticsearch.ResourceNotFoundException;
99
import org.elasticsearch.action.ActionListener;
10+
import org.elasticsearch.action.fieldcaps.FieldCapabilities;
1011
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesAction;
1112
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesRequest;
1213
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
1314
import org.elasticsearch.client.Client;
1415
import org.elasticsearch.index.IndexNotFoundException;
16+
import org.elasticsearch.index.mapper.NumberFieldMapper;
1517
import org.elasticsearch.index.query.QueryBuilders;
1618
import org.elasticsearch.xpack.core.ClientHelper;
19+
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
1720
import org.elasticsearch.xpack.ml.datafeed.extractor.fields.ExtractedField;
1821
import org.elasticsearch.xpack.ml.datafeed.extractor.fields.ExtractedFields;
1922

2023
import java.util.ArrayList;
2124
import java.util.Arrays;
2225
import java.util.Collections;
26+
import java.util.Iterator;
2327
import java.util.List;
2428
import java.util.Map;
2529
import java.util.Objects;
2630
import java.util.Set;
31+
import java.util.stream.Collectors;
32+
import java.util.stream.Stream;
2733

2834
public class DataFrameDataExtractorFactory {
2935

@@ -33,6 +39,20 @@ public class DataFrameDataExtractorFactory {
3339
private static final List<String> IGNORE_FIELDS = Arrays.asList("_id", "_field_names", "_index", "_parent", "_routing", "_seq_no",
3440
"_source", "_type", "_uid", "_version", "_feature", "_ignored");
3541

42+
/**
43+
* The types supported by data frames
44+
*/
45+
private static final Set<String> COMPATIBLE_FIELD_TYPES;
46+
47+
static {
48+
Set<String> compatibleTypes = Stream.of(NumberFieldMapper.NumberType.values())
49+
.map(NumberFieldMapper.NumberType::typeName)
50+
.collect(Collectors.toSet());
51+
compatibleTypes.add("scaled_float"); // have to add manually since scaled_float is in a module
52+
53+
COMPATIBLE_FIELD_TYPES = Collections.unmodifiableSet(compatibleTypes);
54+
}
55+
3656
private final Client client;
3757
private final String index;
3858
private final ExtractedFields extractedFields;
@@ -82,10 +102,27 @@ public static void create(Client client, Map<String, String> headers, String ind
82102
});
83103
}
84104

85-
private static ExtractedFields detectExtractedFields(FieldCapabilitiesResponse fieldCapabilitiesResponse) {
105+
// Visible for testing
106+
static ExtractedFields detectExtractedFields(FieldCapabilitiesResponse fieldCapabilitiesResponse) {
86107
Set<String> fields = fieldCapabilitiesResponse.get().keySet();
87108
fields.removeAll(IGNORE_FIELDS);
88-
return ExtractedFields.build(new ArrayList<>(fields), Collections.emptySet(), fieldCapabilitiesResponse)
109+
removeFieldsWithIncompatibleTypes(fields, fieldCapabilitiesResponse);
110+
ExtractedFields extractedFields = ExtractedFields.build(new ArrayList<>(fields), Collections.emptySet(), fieldCapabilitiesResponse)
89111
.filterFields(ExtractedField.ExtractionMethod.DOC_VALUE);
112+
if (extractedFields.getAllFields().isEmpty()) {
113+
throw ExceptionsHelper.badRequestException("No compatible fields could be detected");
114+
}
115+
return extractedFields;
116+
}
117+
118+
private static void removeFieldsWithIncompatibleTypes(Set<String> fields, FieldCapabilitiesResponse fieldCapabilitiesResponse) {
119+
Iterator<String> fieldsIterator = fields.iterator();
120+
while (fieldsIterator.hasNext()) {
121+
String field = fieldsIterator.next();
122+
Map<String, FieldCapabilities> fieldCaps = fieldCapabilitiesResponse.getField(field);
123+
if (fieldCaps == null || COMPATIBLE_FIELD_TYPES.containsAll(fieldCaps.keySet()) == false) {
124+
fieldsIterator.remove();
125+
}
126+
}
90127
}
91128
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
package org.elasticsearch.xpack.ml.analytics;
7+
8+
import org.elasticsearch.ElasticsearchStatusException;
9+
import org.elasticsearch.action.fieldcaps.FieldCapabilities;
10+
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
11+
import org.elasticsearch.test.ESTestCase;
12+
import org.elasticsearch.xpack.ml.datafeed.extractor.fields.ExtractedField;
13+
import org.elasticsearch.xpack.ml.datafeed.extractor.fields.ExtractedFields;
14+
15+
import java.util.HashMap;
16+
import java.util.List;
17+
import java.util.Map;
18+
import java.util.stream.Collectors;
19+
20+
import static org.hamcrest.Matchers.containsInAnyOrder;
21+
import static org.hamcrest.Matchers.equalTo;
22+
import static org.mockito.Mockito.mock;
23+
import static org.mockito.Mockito.when;
24+
25+
public class DataFrameDataExtractorFactoryTests extends ESTestCase {
26+
27+
public void testDetectExtractedFields_GivenFloatField() {
28+
FieldCapabilitiesResponse fieldCapabilities= new MockFieldCapsResponseBuilder()
29+
.addAggregatableField("some_float", "float").build();
30+
31+
ExtractedFields extractedFields = DataFrameDataExtractorFactory.detectExtractedFields(fieldCapabilities);
32+
33+
List<ExtractedField> allFields = extractedFields.getAllFields();
34+
assertThat(allFields.size(), equalTo(1));
35+
assertThat(allFields.get(0).getName(), equalTo("some_float"));
36+
}
37+
38+
public void testDetectExtractedFields_GivenNumericFieldWithMultipleTypes() {
39+
FieldCapabilitiesResponse fieldCapabilities= new MockFieldCapsResponseBuilder()
40+
.addAggregatableField("some_number", "long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float")
41+
.build();
42+
43+
ExtractedFields extractedFields = DataFrameDataExtractorFactory.detectExtractedFields(fieldCapabilities);
44+
45+
List<ExtractedField> allFields = extractedFields.getAllFields();
46+
assertThat(allFields.size(), equalTo(1));
47+
assertThat(allFields.get(0).getName(), equalTo("some_number"));
48+
}
49+
50+
public void testDetectExtractedFields_GivenNonNumericField() {
51+
FieldCapabilitiesResponse fieldCapabilities= new MockFieldCapsResponseBuilder()
52+
.addAggregatableField("some_keyword", "keyword").build();
53+
54+
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class,
55+
() -> DataFrameDataExtractorFactory.detectExtractedFields(fieldCapabilities));
56+
assertThat(e.getMessage(), equalTo("No compatible fields could be detected"));
57+
}
58+
59+
public void testDetectExtractedFields_GivenFieldWithNumericAndNonNumericTypes() {
60+
FieldCapabilitiesResponse fieldCapabilities= new MockFieldCapsResponseBuilder()
61+
.addAggregatableField("indecisive_field", "float", "keyword").build();
62+
63+
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class,
64+
() -> DataFrameDataExtractorFactory.detectExtractedFields(fieldCapabilities));
65+
assertThat(e.getMessage(), equalTo("No compatible fields could be detected"));
66+
}
67+
68+
public void testDetectExtractedFields_GivenMultipleFields() {
69+
FieldCapabilitiesResponse fieldCapabilities= new MockFieldCapsResponseBuilder()
70+
.addAggregatableField("some_float", "float")
71+
.addAggregatableField("some_long", "long")
72+
.addAggregatableField("some_keyword", "keyword")
73+
.build();
74+
75+
ExtractedFields extractedFields = DataFrameDataExtractorFactory.detectExtractedFields(fieldCapabilities);
76+
77+
List<ExtractedField> allFields = extractedFields.getAllFields();
78+
assertThat(allFields.size(), equalTo(2));
79+
assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toSet()),
80+
containsInAnyOrder("some_float", "some_long"));
81+
}
82+
83+
public void testDetectExtractedFields_GivenIgnoredField() {
84+
FieldCapabilitiesResponse fieldCapabilities= new MockFieldCapsResponseBuilder()
85+
.addAggregatableField("_id", "float").build();
86+
87+
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class,
88+
() -> DataFrameDataExtractorFactory.detectExtractedFields(fieldCapabilities));
89+
assertThat(e.getMessage(), equalTo("No compatible fields could be detected"));
90+
}
91+
92+
private static class MockFieldCapsResponseBuilder {
93+
94+
private final Map<String, Map<String, FieldCapabilities>> fieldCaps = new HashMap<>();
95+
96+
private MockFieldCapsResponseBuilder addAggregatableField(String field, String... types) {
97+
Map<String, FieldCapabilities> caps = new HashMap<>();
98+
for (String type : types) {
99+
caps.put(type, new FieldCapabilities(field, type, true, true));
100+
}
101+
fieldCaps.put(field, caps);
102+
return this;
103+
}
104+
105+
private FieldCapabilitiesResponse build() {
106+
FieldCapabilitiesResponse response = mock(FieldCapabilitiesResponse.class);
107+
when(response.get()).thenReturn(fieldCaps);
108+
109+
for (String field : fieldCaps.keySet()) {
110+
when(response.getField(field)).thenReturn(fieldCaps.get(field));
111+
}
112+
return response;
113+
}
114+
}
115+
}

0 commit comments

Comments
 (0)