Skip to content

Commit 55fe93e

Browse files
authored
[ML] Support the unsigned_long type in data frame analytics (#64066)
Adds support for the unsigned_long type to data frame analytics. This type is handled in the same way as the long type. Values sent to the ML native processes are converted to floats and hence will lose accuracy when outside the range where a float can uniquely represent long values. Relates #60050
1 parent 695584e commit 55fe93e

File tree

7 files changed

+23
-20
lines changed

7 files changed

+23
-20
lines changed

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Types.java

+4-3
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,13 @@ private Types() {}
2828
.collect(Collectors.toUnmodifiableSet());
2929

3030
private static final Set<String> NUMERICAL_TYPES =
31-
Stream.concat(Stream.of(NumberType.values()).map(NumberType::typeName), Stream.of("scaled_float"))
31+
Stream.concat(Stream.of(NumberType.values()).map(NumberType::typeName), Stream.of("scaled_float", "unsigned_long"))
3232
.collect(Collectors.toUnmodifiableSet());
3333

3434
private static final Set<String> DISCRETE_NUMERICAL_TYPES =
35-
Stream.of(NumberType.BYTE, NumberType.SHORT, NumberType.INTEGER, NumberType.LONG)
36-
.map(NumberType::typeName)
35+
Stream.concat(
36+
Stream.of(NumberType.BYTE, NumberType.SHORT, NumberType.INTEGER, NumberType.LONG).map(NumberType::typeName),
37+
Stream.of("unsigned_long"))
3738
.collect(Collectors.toUnmodifiableSet());
3839

3940
private static final Set<String> BOOL_TYPES = Collections.singleton(BooleanFieldMapper.CONTENT_TYPE);

x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/DataFrameAnalysisCustomFeatureIT.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ private static void createIndex(String index, boolean isDatastream) {
167167
" \"type\": \"double\"\n" +
168168
" }," +
169169
" \""+ DISCRETE_NUMERICAL_FIELD + "\": {\n" +
170-
" \"type\": \"integer\"\n" +
170+
" \"type\": \"unsigned_long\"\n" +
171171
" }," +
172172
" \""+ TEXT_FIELD + "\": {\n" +
173173
" \"type\": \"text\"\n" +

x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/ExplainDataFrameAnalyticsIT.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ public void testSourceQueryIsApplied() throws IOException {
5252
client().admin().indices().prepareCreate(sourceIndex)
5353
.setMapping(
5454
"numeric_1", "type=double",
55-
"numeric_2", "type=float",
55+
"numeric_2", "type=unsigned_long",
5656
"categorical", "type=keyword",
5757
"filtered_field", "type=keyword")
5858
.get();
@@ -64,7 +64,7 @@ public void testSourceQueryIsApplied() throws IOException {
6464
IndexRequest indexRequest = new IndexRequest(sourceIndex);
6565
indexRequest.source(
6666
"numeric_1", 1.0,
67-
"numeric_2", 2.0,
67+
"numeric_2", 2,
6868
"categorical", i % 2 == 0 ? "class_1" : "class_2",
6969
"filtered_field", i < 2 ? "bingo" : "rest"); // We tag bingo on the first two docs to ensure we have 2 classes
7070
bulkRequestBuilder.add(indexRequest);

x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,7 @@ static void indexData(String sourceIndex, int numTrainingRows, int numNonTrainin
698698
" \"type\": \"double\"\n" +
699699
" }," +
700700
" \"" + DISCRETE_NUMERICAL_FEATURE_FIELD + "\": {\n" +
701-
" \"type\": \"long\"\n" +
701+
" \"type\": \"unsigned_long\"\n" +
702702
" }," +
703703
" \"" + DEPENDENT_VARIABLE_FIELD + "\": {\n" +
704704
" \"type\": \"double\"\n" +

x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RunDataFrameAnalyticsIT.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ public void testOutlierDetectionWithFewDocuments() throws Exception {
7171
String sourceIndex = "test-outlier-detection-with-few-docs";
7272

7373
client().admin().indices().prepareCreate(sourceIndex)
74-
.setMapping("numeric_1", "type=double", "numeric_2", "type=float", "categorical_1", "type=keyword")
74+
.setMapping("numeric_1", "type=double", "numeric_2", "type=unsigned_long", "categorical_1", "type=keyword")
7575
.get();
7676

7777
BulkRequestBuilder bulkRequestBuilder = client().prepareBulk();
@@ -83,7 +83,7 @@ public void testOutlierDetectionWithFewDocuments() throws Exception {
8383
// We insert one odd value out of 5 for one feature
8484
String docId = i == 0 ? "outlier" : "normal" + i;
8585
indexRequest.id(docId);
86-
indexRequest.source("numeric_1", i == 0 ? 100.0 : 1.0, "numeric_2", 1.0, "categorical_1", "foo_" + i);
86+
indexRequest.source("numeric_1", i == 0 ? 100.0 : 1.0, "numeric_2", 1, "categorical_1", "foo_" + i);
8787
bulkRequestBuilder.add(indexRequest);
8888
}
8989
BulkResponse bulkResponse = bulkRequestBuilder.get();

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java

+11-9
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ public void testDetect_GivenOutlierDetectionAndNonNumericField() {
105105
assertThat(fieldExtraction.v2().get(0).getName(), equalTo("some_keyword"));
106106
assertThat(fieldExtraction.v2().get(0).isIncluded(), is(false));
107107
assertThat(fieldExtraction.v2().get(0).getReason(), equalTo("unsupported type; supported types are " +
108-
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short]"));
108+
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]"));
109109
}
110110

111111
public void testDetect_GivenOutlierDetectionAndFieldWithNumericAndNonNumericTypes() {
@@ -121,7 +121,7 @@ public void testDetect_GivenOutlierDetectionAndFieldWithNumericAndNonNumericType
121121
assertThat(fieldExtraction.v2().get(0).getName(), equalTo("indecisive_field"));
122122
assertThat(fieldExtraction.v2().get(0).isIncluded(), is(false));
123123
assertThat(fieldExtraction.v2().get(0).getReason(), equalTo("unsupported type; supported types are " +
124-
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short]"));
124+
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]"));
125125
}
126126

127127
public void testDetect_GivenOutlierDetectionAndMultipleFields() {
@@ -147,7 +147,7 @@ public void testDetect_GivenOutlierDetectionAndMultipleFields() {
147147
FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL),
148148
FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
149149
FieldSelection.excluded("some_keyword", Collections.singleton("keyword"), "unsupported type; " +
150-
"supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]"),
150+
"supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]"),
151151
FieldSelection.included("some_long", Collections.singleton("long"), false, FieldSelection.FeatureType.NUMERICAL)
152152
);
153153
}
@@ -282,7 +282,7 @@ public void testDetect_GivenRegressionAndRequiredFieldHasInvalidType() {
282282
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, extractedFieldsDetector::detect);
283283

284284
assertThat(e.getMessage(), equalTo("invalid types [keyword] for required field [foo]; " +
285-
"expected types are [byte, double, float, half_float, integer, long, scaled_float, short]"));
285+
"expected types are [byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]"));
286286
}
287287

288288
public void testDetect_GivenClassificationAndRequiredFieldHasInvalidType() {
@@ -298,7 +298,7 @@ public void testDetect_GivenClassificationAndRequiredFieldHasInvalidType() {
298298
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, extractedFieldsDetector::detect);
299299

300300
assertThat(e.getMessage(), equalTo("invalid types [float] for required field [some_float]; " +
301-
"expected types are [boolean, byte, integer, ip, keyword, long, short, text]"));
301+
"expected types are [boolean, byte, integer, ip, keyword, long, short, text, unsigned_long]"));
302302
}
303303

304304
public void testDetect_GivenClassificationAndDependentVariableHasInvalidCardinality() {
@@ -371,7 +371,8 @@ public void testDetect_GivenExcludedFieldIsUnsupported() {
371371

372372
assertFieldSelectionContains(fieldExtraction.v2(),
373373
FieldSelection.excluded("categorical", Collections.singleton("keyword"),
374-
"unsupported type; supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]"),
374+
"unsupported type; supported types are " +
375+
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]"),
375376
FieldSelection.included("numeric", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)
376377
);
377378
}
@@ -471,7 +472,7 @@ public void testDetect_GivenIncludedFieldHasUnsupportedType() {
471472
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, extractedFieldsDetector::detect);
472473

473474
assertThat(e.getMessage(), equalTo("field [your_keyword] has unsupported type [keyword]. " +
474-
"Supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]."));
475+
"Supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]."));
475476
}
476477

477478
public void testDetect_GivenNotIncludedFieldHasUnsupportedType() {
@@ -492,7 +493,8 @@ public void testDetect_GivenNotIncludedFieldHasUnsupportedType() {
492493

493494
assertFieldSelectionContains(fieldExtraction.v2(),
494495
FieldSelection.excluded("categorical", Collections.singleton("keyword"),
495-
"unsupported type; supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]"),
496+
"unsupported type; supported types are " +
497+
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]"),
496498
FieldSelection.included("numeric", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)
497499
);
498500
}
@@ -517,7 +519,7 @@ public void testDetect_GivenIndexContainsResultsField() {
517519
FieldSelection.included("my_field1", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
518520
FieldSelection.included("your_field2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
519521
FieldSelection.excluded("your_keyword", Collections.singleton("keyword"), "unsupported type; supported types " +
520-
"are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]")
522+
"are [boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]")
521523
);
522524
}
523525

x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@
225225
- match: { field_selection.2.is_included: false }
226226
- match: { field_selection.2.is_required: false }
227227
- is_false: field_selection.2.feature_type
228-
- match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text]" }
228+
- match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text, unsigned_long]" }
229229
- match: { field_selection.3.name: "field_4" }
230230
- match: { field_selection.3.mapping_types: ["text"] }
231231
- match: { field_selection.3.is_included: false }
@@ -299,7 +299,7 @@
299299
- match: { field_selection.2.is_included: false }
300300
- match: { field_selection.2.is_required: false }
301301
- is_false: field_selection.2.feature_type
302-
- match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text]" }
302+
- match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text, unsigned_long]" }
303303
- match: { field_selection.3.name: "field_4" }
304304
- match: { field_selection.3.mapping_types: ["text"] }
305305
- match: { field_selection.3.is_included: false }

0 commit comments

Comments
 (0)