Skip to content

Commit 601faf6

Browse files
[7.5][ML] Prevent fetching multi-field from source (#48770) (#48798)
Aggregatable mutli-fields are at the moment wrongly mapped as normal doc_value fields and thus they support fetching from source. However, they do not exist in the source. This results to failure to extract such fields. This commit fixes this bug. While a fix could be worked out on top of the existing code, it is evident the extraction logic has become difficult to understand and maintain. As we also want to deduplicate multi-fields for data frame analytics, it seemed appropriate to refactor the code to simplify and better handle the extraction of multi-fields. Relates #48756 Backport of #48770
1 parent 1a34fe4 commit 601faf6

30 files changed

+1214
-675
lines changed

x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/DatafeedJobsRestIT.java

+10-11
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ protected boolean preserveTemplatesUponCompletion() {
5555
return true;
5656
}
5757

58-
private void setupDataAccessRole(String index) throws IOException {
58+
private static void setupDataAccessRole(String index) throws IOException {
5959
Request request = new Request("PUT", "/_security/role/test_data_access");
6060
request.setJsonEntity("{"
6161
+ " \"indices\" : ["
@@ -283,10 +283,12 @@ public void testLookbackOnlyWithSourceDisabled() throws Exception {
283283
new LookbackOnlyTestHelper("test-lookback-only-with-source-disabled", "airline-data-disabled-source").execute();
284284
}
285285

286-
@AwaitsFix(bugUrl = "This test uses painless which is not available in the integTest phase")
287286
public void testLookbackOnlyWithScriptFields() throws Exception {
288-
new LookbackOnlyTestHelper("test-lookback-only-with-script-fields", "airline-data-disabled-source")
289-
.setAddScriptedFields(true).execute();
287+
new LookbackOnlyTestHelper("test-lookback-only-with-script-fields", "airline-data")
288+
.setScriptedFields(
289+
"{\"scripted_airline\":{\"script\":{\"lang\":\"painless\",\"source\":\"doc['airline.keyword'].value\"}}}")
290+
.setAirlineVariant("scripted_airline")
291+
.execute();
290292
}
291293

292294
public void testLookbackOnlyWithNestedFields() throws Exception {
@@ -1088,7 +1090,7 @@ private class LookbackOnlyTestHelper {
10881090
private String jobId;
10891091
private String airlineVariant;
10901092
private String dataIndex;
1091-
private boolean addScriptedFields;
1093+
private String scriptedFields;
10921094
private boolean shouldSucceedInput;
10931095
private boolean shouldSucceedProcessing;
10941096

@@ -1100,8 +1102,8 @@ private class LookbackOnlyTestHelper {
11001102
this.airlineVariant = "airline";
11011103
}
11021104

1103-
public LookbackOnlyTestHelper setAddScriptedFields(boolean value) {
1104-
addScriptedFields = value;
1105+
public LookbackOnlyTestHelper setScriptedFields(String scriptFields) {
1106+
this.scriptedFields = scriptFields;
11051107
return this;
11061108
}
11071109

@@ -1124,10 +1126,7 @@ public LookbackOnlyTestHelper setShouldSucceedProcessing(boolean value) {
11241126
public void execute() throws Exception {
11251127
createJob(jobId, airlineVariant);
11261128
String datafeedId = "datafeed-" + jobId;
1127-
new DatafeedBuilder(datafeedId, jobId, dataIndex)
1128-
.setScriptedFields(addScriptedFields ?
1129-
"{\"airline\":{\"script\":{\"lang\":\"painless\",\"inline\":\"doc['airline'].value\"}}}" : null)
1130-
.build();
1129+
new DatafeedBuilder(datafeedId, jobId, dataIndex).setScriptedFields(scriptedFields).build();
11311130
openJob(client(), jobId);
11321131

11331132
startDatafeedAndWaitUntilStopped(datafeedId);

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/ScrollDataExtractor.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ private SearchRequestBuilder buildSearchRequest(long start) {
130130
context.query, context.extractedFields.timeField(), start, context.end));
131131

132132
for (ExtractedField docValueField : context.extractedFields.getDocValueFields()) {
133-
searchRequestBuilder.addDocValueField(docValueField.getName(), docValueField.getDocValueFormat());
133+
searchRequestBuilder.addDocValueField(docValueField.getSearchField(), docValueField.getDocValueFormat());
134134
}
135135
String[] sourceFields = context.extractedFields.getSourceFields();
136136
if (sourceFields.length == 0) {

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/SearchHitToJsonProcessor.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class SearchHitToJsonProcessor implements Releasable {
2929
public void process(SearchHit hit) throws IOException {
3030
jsonBuilder.startObject();
3131
for (ExtractedField field : fields.getAllFields()) {
32-
writeKeyValue(field.getAlias(), field.value(hit));
32+
writeKeyValue(field.getName(), field.value(hit));
3333
}
3434
jsonBuilder.endObject();
3535
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/TimeBasedExtractedFields.java

+8-11
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
import java.util.ArrayList;
1616
import java.util.Arrays;
17-
import java.util.Collections;
1817
import java.util.List;
1918
import java.util.Objects;
2019
import java.util.Set;
@@ -42,13 +41,13 @@ public String timeField() {
4241
public Long timeFieldValue(SearchHit hit) {
4342
Object[] value = timeField.value(hit);
4443
if (value.length != 1) {
45-
throw new RuntimeException("Time field [" + timeField.getAlias() + "] expected a single value; actual was: "
44+
throw new RuntimeException("Time field [" + timeField.getName() + "] expected a single value; actual was: "
4645
+ Arrays.toString(value));
4746
}
4847
if (value[0] instanceof Long) {
4948
return (Long) value[0];
5049
}
51-
throw new RuntimeException("Time field [" + timeField.getAlias() + "] expected a long value; actual was: " + value[0]);
50+
throw new RuntimeException("Time field [" + timeField.getName() + "] expected a long value; actual was: " + value[0]);
5251
}
5352

5453
public static TimeBasedExtractedFields build(Job job, DatafeedConfig datafeed, FieldCapabilitiesResponse fieldsCapabilities) {
@@ -58,20 +57,18 @@ public static TimeBasedExtractedFields build(Job job, DatafeedConfig datafeed, F
5857
if (scriptFields.contains(timeField) == false && extractionMethodDetector.isAggregatable(timeField) == false) {
5958
throw new IllegalArgumentException("cannot retrieve time field [" + timeField + "] because it is not aggregatable");
6059
}
61-
ExtractedField timeExtractedField = extractedTimeField(timeField, scriptFields, fieldsCapabilities);
60+
ExtractedField timeExtractedField = extractedTimeField(timeField, scriptFields);
6261
List<String> remainingFields = job.allInputFields().stream().filter(f -> !f.equals(timeField)).collect(Collectors.toList());
6362
List<ExtractedField> allExtractedFields = new ArrayList<>(remainingFields.size() + 1);
6463
allExtractedFields.add(timeExtractedField);
6564
remainingFields.stream().forEach(field -> allExtractedFields.add(extractionMethodDetector.detect(field)));
65+
6666
return new TimeBasedExtractedFields(timeExtractedField, allExtractedFields);
6767
}
6868

69-
private static ExtractedField extractedTimeField(String timeField, Set<String> scriptFields,
70-
FieldCapabilitiesResponse fieldCapabilities) {
71-
if (scriptFields.contains(timeField)) {
72-
return ExtractedField.newTimeField(timeField, Collections.emptySet(), ExtractedField.ExtractionMethod.SCRIPT_FIELD);
73-
}
74-
return ExtractedField.newTimeField(timeField, fieldCapabilities.getField(timeField).keySet(),
75-
ExtractedField.ExtractionMethod.DOC_VALUE);
69+
private static ExtractedField extractedTimeField(String timeField, Set<String> scriptFields) {
70+
ExtractedField.Method method = scriptFields.contains(timeField) ? ExtractedField.Method.SCRIPT_FIELD
71+
: ExtractedField.Method.DOC_VALUE;
72+
return ExtractedFields.newTimeField(timeField, method);
7673
}
7774
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractor.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
import org.elasticsearch.search.sort.SortOrder;
2525
import org.elasticsearch.xpack.core.ClientHelper;
2626
import org.elasticsearch.xpack.core.ml.dataframe.analyses.DataFrameAnalysis;
27-
import org.elasticsearch.xpack.ml.extractor.ExtractedField;
2827
import org.elasticsearch.xpack.ml.dataframe.DataFrameAnalyticsIndex;
28+
import org.elasticsearch.xpack.ml.extractor.ExtractedField;
2929

3030
import java.io.IOException;
3131
import java.util.ArrayList;
@@ -138,7 +138,7 @@ private SearchRequestBuilder buildSearchRequest() {
138138
setFetchSource(searchRequestBuilder);
139139

140140
for (ExtractedField docValueField : context.extractedFields.getDocValueFields()) {
141-
searchRequestBuilder.addDocValueField(docValueField.getName(), docValueField.getDocValueFormat());
141+
searchRequestBuilder.addDocValueField(docValueField.getSearchField(), docValueField.getDocValueFormat());
142142
}
143143

144144
return searchRequestBuilder;
@@ -231,7 +231,7 @@ private void clearScroll(String scrollId) {
231231
}
232232

233233
public List<String> getFieldNames() {
234-
return context.extractedFields.getAllFields().stream().map(ExtractedField::getAlias).collect(Collectors.toList());
234+
return context.extractedFields.getAllFields().stream().map(ExtractedField::getName).collect(Collectors.toList());
235235
}
236236

237237
public DataSummary collectDataSummary() {

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java

+4-35
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,9 @@
1111
import org.elasticsearch.action.fieldcaps.FieldCapabilities;
1212
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
1313
import org.elasticsearch.common.Strings;
14-
import org.elasticsearch.common.document.DocumentField;
1514
import org.elasticsearch.common.regex.Regex;
1615
import org.elasticsearch.index.IndexSettings;
1716
import org.elasticsearch.index.mapper.BooleanFieldMapper;
18-
import org.elasticsearch.search.SearchHit;
1917
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
2018
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
2119
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsDest;
@@ -24,9 +22,9 @@
2422
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
2523
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
2624
import org.elasticsearch.xpack.core.ml.utils.NameResolver;
25+
import org.elasticsearch.xpack.ml.dataframe.DataFrameAnalyticsIndex;
2726
import org.elasticsearch.xpack.ml.extractor.ExtractedField;
2827
import org.elasticsearch.xpack.ml.extractor.ExtractedFields;
29-
import org.elasticsearch.xpack.ml.dataframe.DataFrameAnalyticsIndex;
3028

3129
import java.util.ArrayList;
3230
import java.util.Arrays;
@@ -264,13 +262,13 @@ private ExtractedFields fetchBooleanFieldsAsIntegers(ExtractedFields extractedFi
264262
List<ExtractedField> adjusted = new ArrayList<>(extractedFields.getAllFields().size());
265263
for (ExtractedField field : extractedFields.getAllFields()) {
266264
if (isBoolean(field.getTypes())) {
267-
if (config.getAnalysis().getAllowedCategoricalTypes(field.getAlias()).contains(BooleanFieldMapper.CONTENT_TYPE)) {
265+
if (config.getAnalysis().getAllowedCategoricalTypes(field.getName()).contains(BooleanFieldMapper.CONTENT_TYPE)) {
268266
// We convert boolean field to string if it is a categorical dependent variable
269-
adjusted.add(new BooleanMapper<>(field, Boolean.TRUE.toString(), Boolean.FALSE.toString()));
267+
adjusted.add(ExtractedFields.applyBooleanMapping(field, Boolean.TRUE.toString(), Boolean.FALSE.toString()));
270268
} else {
271269
// We convert boolean fields to integers with values 0, 1 as this is the preferred
272270
// way to consume such features in the analytics process.
273-
adjusted.add(new BooleanMapper<>(field, 1, 0));
271+
adjusted.add(ExtractedFields.applyBooleanMapping(field, 1, 0));
274272
}
275273
} else {
276274
adjusted.add(field);
@@ -282,33 +280,4 @@ private ExtractedFields fetchBooleanFieldsAsIntegers(ExtractedFields extractedFi
282280
private static boolean isBoolean(Set<String> types) {
283281
return types.size() == 1 && types.contains(BooleanFieldMapper.CONTENT_TYPE);
284282
}
285-
286-
/**
287-
* {@link BooleanMapper} makes boolean field behave as a field of different type.
288-
*/
289-
private static final class BooleanMapper<T> extends ExtractedField {
290-
291-
private final T trueValue;
292-
private final T falseValue;
293-
294-
BooleanMapper(ExtractedField field, T trueValue, T falseValue) {
295-
super(field.getAlias(), field.getName(), Collections.singleton(BooleanFieldMapper.CONTENT_TYPE), ExtractionMethod.DOC_VALUE);
296-
this.trueValue = trueValue;
297-
this.falseValue = falseValue;
298-
}
299-
300-
@Override
301-
public Object[] value(SearchHit hit) {
302-
DocumentField keyValue = hit.field(name);
303-
if (keyValue != null) {
304-
return keyValue.getValues().stream().map(v -> Boolean.TRUE.equals(v) ? trueValue : falseValue).toArray();
305-
}
306-
return new Object[0];
307-
}
308-
309-
@Override
310-
public boolean supportsFromSource() {
311-
return false;
312-
}
313-
}
314283
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
package org.elasticsearch.xpack.ml.extractor;
7+
8+
import org.elasticsearch.common.document.DocumentField;
9+
import org.elasticsearch.search.SearchHit;
10+
11+
import java.util.List;
12+
import java.util.Objects;
13+
import java.util.Set;
14+
15+
abstract class AbstractField implements ExtractedField {
16+
17+
private final String name;
18+
19+
private final Set<String> types;
20+
21+
AbstractField(String name, Set<String> types) {
22+
this.name = Objects.requireNonNull(name);
23+
this.types = Objects.requireNonNull(types);
24+
}
25+
26+
@Override
27+
public String getName() {
28+
return name;
29+
}
30+
31+
@Override
32+
public String getSearchField() {
33+
return name;
34+
}
35+
36+
@Override
37+
public Set<String> getTypes() {
38+
return types;
39+
}
40+
41+
protected Object[] getFieldValue(SearchHit hit) {
42+
DocumentField keyValue = hit.field(getSearchField());
43+
if (keyValue != null) {
44+
List<Object> values = keyValue.getValues();
45+
return values.toArray(new Object[0]);
46+
}
47+
return new Object[0];
48+
}
49+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
package org.elasticsearch.xpack.ml.extractor;
7+
8+
import org.elasticsearch.common.Nullable;
9+
import org.elasticsearch.search.SearchHit;
10+
11+
import java.util.Set;
12+
13+
public class DocValueField extends AbstractField {
14+
15+
public DocValueField(String name, Set<String> types) {
16+
super(name, types);
17+
}
18+
19+
@Override
20+
public Method getMethod() {
21+
return Method.DOC_VALUE;
22+
}
23+
24+
@Override
25+
public Object[] value(SearchHit hit) {
26+
return getFieldValue(hit);
27+
}
28+
29+
@Override
30+
public boolean supportsFromSource() {
31+
return true;
32+
}
33+
34+
@Override
35+
public ExtractedField newFromSource() {
36+
return new SourceField(getSearchField(), getTypes());
37+
}
38+
39+
@Override
40+
public boolean isMultiField() {
41+
return false;
42+
}
43+
44+
@Nullable
45+
public String getDocValueFormat() {
46+
return null;
47+
}
48+
}

0 commit comments

Comments
 (0)