Skip to content

Commit 40a1435

Browse files
committed
[ML] Support the unsigned_long type in data frame analytics
Adds support for the unsigned_long type to data frame analytics. This type is handled in the same way as the long type. Values sent to the ML native processes are converted to floats and hence will lose accuracy when outside the range where a float can uniquely represent long values. Relates elastic#60050
1 parent 3f8097b commit 40a1435

File tree

5 files changed

+10
-9
lines changed

5 files changed

+10
-9
lines changed

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Types.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,13 @@ private Types() {}
2828
.collect(Collectors.toUnmodifiableSet());
2929

3030
private static final Set<String> NUMERICAL_TYPES =
31-
Stream.concat(Stream.of(NumberType.values()).map(NumberType::typeName), Stream.of("scaled_float"))
31+
Stream.concat(Stream.of(NumberType.values()).map(NumberType::typeName), Stream.of("scaled_float", "unsigned_long"))
3232
.collect(Collectors.toUnmodifiableSet());
3333

3434
private static final Set<String> DISCRETE_NUMERICAL_TYPES =
35-
Stream.of(NumberType.BYTE, NumberType.SHORT, NumberType.INTEGER, NumberType.LONG)
36-
.map(NumberType::typeName)
35+
Stream.concat(
36+
Stream.of(NumberType.BYTE, NumberType.SHORT, NumberType.INTEGER, NumberType.LONG).map(NumberType::typeName),
37+
Stream.of("unsigned_long"))
3738
.collect(Collectors.toUnmodifiableSet());
3839

3940
private static final Set<String> BOOL_TYPES = Collections.singleton(BooleanFieldMapper.CONTENT_TYPE);

x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/DataFrameAnalysisCustomFeatureIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ private static void createIndex(String index, boolean isDatastream) {
167167
" \"type\": \"double\"\n" +
168168
" }," +
169169
" \""+ DISCRETE_NUMERICAL_FIELD + "\": {\n" +
170-
" \"type\": \"integer\"\n" +
170+
" \"type\": \"unsigned_long\"\n" +
171171
" }," +
172172
" \""+ TEXT_FIELD + "\": {\n" +
173173
" \"type\": \"text\"\n" +

x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/ExplainDataFrameAnalyticsIT.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ public void testSourceQueryIsApplied() throws IOException {
5252
client().admin().indices().prepareCreate(sourceIndex)
5353
.setMapping(
5454
"numeric_1", "type=double",
55-
"numeric_2", "type=float",
55+
"numeric_2", "type=unsigned_long",
5656
"categorical", "type=keyword",
5757
"filtered_field", "type=keyword")
5858
.get();
@@ -64,7 +64,7 @@ public void testSourceQueryIsApplied() throws IOException {
6464
IndexRequest indexRequest = new IndexRequest(sourceIndex);
6565
indexRequest.source(
6666
"numeric_1", 1.0,
67-
"numeric_2", 2.0,
67+
"numeric_2", 2,
6868
"categorical", i % 2 == 0 ? "class_1" : "class_2",
6969
"filtered_field", i < 2 ? "bingo" : "rest"); // We tag bingo on the first two docs to ensure we have 2 classes
7070
bulkRequestBuilder.add(indexRequest);

x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,7 @@ static void indexData(String sourceIndex, int numTrainingRows, int numNonTrainin
698698
" \"type\": \"double\"\n" +
699699
" }," +
700700
" \"" + DISCRETE_NUMERICAL_FEATURE_FIELD + "\": {\n" +
701-
" \"type\": \"long\"\n" +
701+
" \"type\": \"unsigned_long\"\n" +
702702
" }," +
703703
" \"" + DEPENDENT_VARIABLE_FIELD + "\": {\n" +
704704
" \"type\": \"double\"\n" +

x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RunDataFrameAnalyticsIT.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ public void testOutlierDetectionWithFewDocuments() throws Exception {
7171
String sourceIndex = "test-outlier-detection-with-few-docs";
7272

7373
client().admin().indices().prepareCreate(sourceIndex)
74-
.setMapping("numeric_1", "type=double", "numeric_2", "type=float", "categorical_1", "type=keyword")
74+
.setMapping("numeric_1", "type=double", "numeric_2", "type=unsigned_long", "categorical_1", "type=keyword")
7575
.get();
7676

7777
BulkRequestBuilder bulkRequestBuilder = client().prepareBulk();
@@ -83,7 +83,7 @@ public void testOutlierDetectionWithFewDocuments() throws Exception {
8383
// We insert one odd value out of 5 for one feature
8484
String docId = i == 0 ? "outlier" : "normal" + i;
8585
indexRequest.id(docId);
86-
indexRequest.source("numeric_1", i == 0 ? 100.0 : 1.0, "numeric_2", 1.0, "categorical_1", "foo_" + i);
86+
indexRequest.source("numeric_1", i == 0 ? 100.0 : 1.0, "numeric_2", 1, "categorical_1", "foo_" + i);
8787
bulkRequestBuilder.add(indexRequest);
8888
}
8989
BulkResponse bulkResponse = bulkRequestBuilder.get();

0 commit comments

Comments
 (0)