From 9b3e32ff6a887413658bd98957d94bf62e302eb3 Mon Sep 17 00:00:00 2001 From: Dimitris Athanasiou Date: Tue, 27 Nov 2018 15:10:10 +0000 Subject: [PATCH 1/4] [FEATURE][ML] Only write numeric fields to data frame --- .../action/TransportRunAnalyticsAction.java | 3 + .../DataFrameDataExtractorFactory.java | 31 ++++- .../DataFrameDataExtractorFactoryTests.java | 115 ++++++++++++++++++ 3 files changed, 147 insertions(+), 2 deletions(-) create mode 100644 x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactoryTests.java diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportRunAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportRunAnalyticsAction.java index 2a6199486a2ac..7d7b194de8ae6 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportRunAnalyticsAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportRunAnalyticsAction.java @@ -185,6 +185,9 @@ private void runPipelineAnalytics(String index, ActionListener IGNORE_FIELDS = Arrays.asList("_id", "_field_names", "_index", "_parent", "_routing", "_seq_no", "_source", "_type", "_uid", "_version", "_feature", "_ignored"); + /** + * The types supported by data frames + */ + private static final Set COMPATIBLE_FIELD_TYPES = new HashSet<>(Arrays.asList( + "long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float")); + private final Client client; private final String index; private final ExtractedFields extractedFields; @@ -82,10 +92,27 @@ public static void create(Client client, Map headers, String ind }); } - private static ExtractedFields detectExtractedFields(FieldCapabilitiesResponse fieldCapabilitiesResponse) { + // Visible for testing + static ExtractedFields detectExtractedFields(FieldCapabilitiesResponse fieldCapabilitiesResponse) { Set fields = fieldCapabilitiesResponse.get().keySet(); fields.removeAll(IGNORE_FIELDS); - return ExtractedFields.build(new ArrayList<>(fields), Collections.emptySet(), fieldCapabilitiesResponse) + removeFieldsWithIncompatibleTypes(fields, fieldCapabilitiesResponse); + ExtractedFields extractedFields = ExtractedFields.build(new ArrayList<>(fields), Collections.emptySet(), fieldCapabilitiesResponse) .filterFields(ExtractedField.ExtractionMethod.DOC_VALUE); + if (extractedFields.getAllFields().isEmpty()) { + throw ExceptionsHelper.badRequestException("No compatible fields could be detected"); + } + return extractedFields; + } + + private static void removeFieldsWithIncompatibleTypes(Set fields, FieldCapabilitiesResponse fieldCapabilitiesResponse) { + Iterator fieldsIterator = fields.iterator(); + while (fieldsIterator.hasNext()) { + String field = fieldsIterator.next(); + Map fieldCaps = fieldCapabilitiesResponse.getField(field); + if (fieldCaps == null || COMPATIBLE_FIELD_TYPES.containsAll(fieldCaps.keySet()) == false) { + fieldsIterator.remove(); + } + } } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactoryTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactoryTests.java new file mode 100644 index 0000000000000..1a43b2893baef --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactoryTests.java @@ -0,0 +1,115 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.analytics; + +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.action.fieldcaps.FieldCapabilities; +import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.ml.datafeed.extractor.fields.ExtractedField; +import org.elasticsearch.xpack.ml.datafeed.extractor.fields.ExtractedFields; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class DataFrameDataExtractorFactoryTests extends ESTestCase { + + public void testDetectExtractedFields_GivenFloatField() { + FieldCapabilitiesResponse fieldCapabilities= new MockFieldCapsResponseBuilder() + .addAggregatableField("some_float", "float").build(); + + ExtractedFields extractedFields = DataFrameDataExtractorFactory.detectExtractedFields(fieldCapabilities); + + List allFields = extractedFields.getAllFields(); + assertThat(allFields.size(), equalTo(1)); + assertThat(allFields.get(0).getName(), equalTo("some_float")); + } + + public void testDetectExtractedFields_GivenNumericFieldWithMultipleTypes() { + FieldCapabilitiesResponse fieldCapabilities= new MockFieldCapsResponseBuilder() + .addAggregatableField("some_number", "long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float") + .build(); + + ExtractedFields extractedFields = DataFrameDataExtractorFactory.detectExtractedFields(fieldCapabilities); + + List allFields = extractedFields.getAllFields(); + assertThat(allFields.size(), equalTo(1)); + assertThat(allFields.get(0).getName(), equalTo("some_number")); + } + + public void testDetectExtractedFields_GivenNonNumericField() { + FieldCapabilitiesResponse fieldCapabilities= new MockFieldCapsResponseBuilder() + .addAggregatableField("some_keyword", "keyword").build(); + + ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, + () -> DataFrameDataExtractorFactory.detectExtractedFields(fieldCapabilities)); + assertThat(e.getMessage(), equalTo("No compatible fields could be detected")); + } + + public void testDetectExtractedFields_GivenFieldWithNumericAndNonNumericTypes() { + FieldCapabilitiesResponse fieldCapabilities= new MockFieldCapsResponseBuilder() + .addAggregatableField("indecisive_field", "float", "keyword").build(); + + ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, + () -> DataFrameDataExtractorFactory.detectExtractedFields(fieldCapabilities)); + assertThat(e.getMessage(), equalTo("No compatible fields could be detected")); + } + + public void testDetectExtractedFields_GivenMultipleFields() { + FieldCapabilitiesResponse fieldCapabilities= new MockFieldCapsResponseBuilder() + .addAggregatableField("some_float", "float") + .addAggregatableField("some_long", "long") + .addAggregatableField("some_keyword", "keyword") + .build(); + + ExtractedFields extractedFields = DataFrameDataExtractorFactory.detectExtractedFields(fieldCapabilities); + + List allFields = extractedFields.getAllFields(); + assertThat(allFields.size(), equalTo(2)); + assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toSet()), + containsInAnyOrder("some_float", "some_long")); + } + + public void testDetectExtractedFields_GivenIgnoredField() { + FieldCapabilitiesResponse fieldCapabilities= new MockFieldCapsResponseBuilder() + .addAggregatableField("_id", "float").build(); + + ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, + () -> DataFrameDataExtractorFactory.detectExtractedFields(fieldCapabilities)); + assertThat(e.getMessage(), equalTo("No compatible fields could be detected")); + } + + private static class MockFieldCapsResponseBuilder { + + private final Map> fieldCaps = new HashMap<>(); + + private MockFieldCapsResponseBuilder addAggregatableField(String field, String... types) { + Map caps = new HashMap<>(); + for (String type : types) { + caps.put(type, new FieldCapabilities(field, type, true, true)); + } + fieldCaps.put(field, caps); + return this; + } + + private FieldCapabilitiesResponse build() { + FieldCapabilitiesResponse response = mock(FieldCapabilitiesResponse.class); + when(response.get()).thenReturn(fieldCaps); + + for (String field : fieldCaps.keySet()) { + when(response.getField(field)).thenReturn(fieldCaps.get(field)); + } + return response; + } + } +} From 25e3b9bdf77adfb6c1c704a3cc5e810292bb1664 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Wed, 28 Nov 2018 12:24:05 +0000 Subject: [PATCH 2/4] Use NumberType values Co-Authored-By: dimitris-athanasiou --- .../ml/analytics/DataFrameDataExtractorFactory.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactory.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactory.java index 713d47dcadf92..94b91309bd74e 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactory.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactory.java @@ -40,7 +40,14 @@ public class DataFrameDataExtractorFactory { /** * The types supported by data frames */ - private static final Set COMPATIBLE_FIELD_TYPES = new HashSet<>(Arrays.asList( + private static final Set COMPATIBLE_FIELD_TYPES; + static { + Set types = Stream.of(NumberFieldMapper.NumberType.values()) + .map(NumberFieldMapper.NumberType::typeName) + .collect(Collectors.toSet()); + types.add("scaled_float"); // have to add manually since scaled_float is in a module + COMPATIBLE_FIELD_TYPES = types; + } "long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float")); private final Client client; From 18adbd416b9548c484f01ddf336b46af291f15f0 Mon Sep 17 00:00:00 2001 From: Dimitris Athanasiou Date: Wed, 28 Nov 2018 12:26:58 +0000 Subject: [PATCH 3/4] Fix github apply-suggestion fallout --- .../ml/analytics/DataFrameDataExtractorFactory.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactory.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactory.java index 94b91309bd74e..1bc94def80b74 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactory.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactory.java @@ -13,6 +13,7 @@ import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; import org.elasticsearch.client.Client; import org.elasticsearch.index.IndexNotFoundException; +import org.elasticsearch.index.mapper.NumberFieldMapper; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.xpack.core.ClientHelper; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; @@ -22,12 +23,13 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; public class DataFrameDataExtractorFactory { @@ -41,14 +43,13 @@ public class DataFrameDataExtractorFactory { * The types supported by data frames */ private static final Set COMPATIBLE_FIELD_TYPES; + static { - Set types = Stream.of(NumberFieldMapper.NumberType.values()) + COMPATIBLE_FIELD_TYPES = Stream.of(NumberFieldMapper.NumberType.values()) .map(NumberFieldMapper.NumberType::typeName) .collect(Collectors.toSet()); - types.add("scaled_float"); // have to add manually since scaled_float is in a module - COMPATIBLE_FIELD_TYPES = types; + COMPATIBLE_FIELD_TYPES.add("scaled_float"); // have to add manually since scaled_float is in a module } - "long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float")); private final Client client; private final String index; From a734bb01eef02c4803e9ddb9800f0756a28ff6da Mon Sep 17 00:00:00 2001 From: Dimitris Athanasiou Date: Wed, 28 Nov 2018 12:59:20 +0000 Subject: [PATCH 4/4] Make COMPATIBLE_TYPES unmodifiable --- .../xpack/ml/analytics/DataFrameDataExtractorFactory.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactory.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactory.java index 1bc94def80b74..35085d282c87f 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactory.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactory.java @@ -45,10 +45,12 @@ public class DataFrameDataExtractorFactory { private static final Set COMPATIBLE_FIELD_TYPES; static { - COMPATIBLE_FIELD_TYPES = Stream.of(NumberFieldMapper.NumberType.values()) + Set compatibleTypes = Stream.of(NumberFieldMapper.NumberType.values()) .map(NumberFieldMapper.NumberType::typeName) .collect(Collectors.toSet()); - COMPATIBLE_FIELD_TYPES.add("scaled_float"); // have to add manually since scaled_float is in a module + compatibleTypes.add("scaled_float"); // have to add manually since scaled_float is in a module + + COMPATIBLE_FIELD_TYPES = Collections.unmodifiableSet(compatibleTypes); } private final Client client;