Skip to content

Commit 1225107

Browse files
committed
ML: changing automatic check_window calculation (#35643)
* ML: changing automatic check_window calculation * adding docs on how we calculate the default
1 parent 7df652a commit 1225107

File tree

5 files changed

+9
-13
lines changed

5 files changed

+9
-13
lines changed

docs/java-rest/high-level/ml/put-datafeed.asciidoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ The window must be larger than the Job's bucket size, but smaller than 24 hours,
7272
and span less than 10,000 buckets.
7373
Defaults to `null`, which causes an appropriate window span to be calculated when
7474
the datafeed runs.
75+
The default `check_window` span calculation is the max between `2h` or `8 * bucket_span`.
7576
To explicitly disable, pass `DelayedDataCheckConfig.disabledDelayedDataCheckConfig()`.
7677

7778
["source","java",subs="attributes,callouts,macros"]

docs/reference/ml/apis/datafeedresource.asciidoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ The configuration object has the following properties:
111111
(time units) The window of time before the latest finalized bucket that should be searched
112112
for late data. Defaults to `null` which causes an appropriate `check_window` to be calculated
113113
when the real-time {dfeed} runs.
114+
The default `check_window` span calculation is the max between `2h` or `8 * bucket_span`.
114115

115116
[float]
116117
[[ml-datafeed-counts]]

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,9 @@ public final class Messages {
2323
"script_fields cannot be used in combination with aggregations";
2424
public static final String DATAFEED_CONFIG_INVALID_OPTION_VALUE = "Invalid {0} value ''{1}'' in datafeed configuration";
2525
public static final String DATAFEED_CONFIG_DELAYED_DATA_CHECK_TOO_SMALL =
26-
"delayed_data_check_window [{0}] must be greater than the bucket_span [{1}]";
27-
public static final String DATAFEED_CONFIG_DELAYED_DATA_CHECK_TOO_LARGE =
28-
"delayed_data_check_window [{0}] must be less than or equal to [24h]";
26+
"delayed_data_check_config: check_window [{0}] must be greater than the bucket_span [{1}]";
2927
public static final String DATAFEED_CONFIG_DELAYED_DATA_CHECK_SPANS_TOO_MANY_BUCKETS =
30-
"delayed_data_check_window [{0}] must be less than 10,000x the bucket_span [{1}]";
28+
"delayed_data_check_config: check_window [{0}] must be less than 10,000x the bucket_span [{1}]";
3129

3230
public static final String DATAFEED_DOES_NOT_SUPPORT_JOB_WITH_LATENCY = "A job configured with datafeed cannot support latency";
3331
public static final String DATAFEED_NOT_FOUND = "No datafeed with id [{0}] exists";

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/delayeddatacheck/DelayedDataDetectorFactory.java

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
public class DelayedDataDetectorFactory {
2222

2323
// There are eight 15min buckets in a two hour span, so matching that number as the fallback for very long buckets
24-
private static final int FALLBACK_NUMBER_OF_BUCKETS_TO_SPAN = 8;
25-
private static final TimeValue DEFAULT_CHECK_WINDOW = TimeValue.timeValueHours(2);
24+
private static final int DEFAULT_NUMBER_OF_BUCKETS_TO_SPAN = 8;
25+
private static final long DEFAULT_CHECK_WINDOW_MS = 7_200_000L; // 2 hours in Milliseconds
2626

2727
/**
2828
* This will build the appropriate detector given the parameters.
@@ -57,11 +57,7 @@ private static long validateAndCalculateWindowLength(TimeValue bucketSpan, TimeV
5757
return 0;
5858
}
5959
if (currentWindow == null) { // we should provide a good default as the user did not specify a window
60-
if(bucketSpan.compareTo(DEFAULT_CHECK_WINDOW) >= 0) {
61-
return FALLBACK_NUMBER_OF_BUCKETS_TO_SPAN * bucketSpan.millis();
62-
} else {
63-
return DEFAULT_CHECK_WINDOW.millis();
64-
}
60+
return Math.max(DEFAULT_CHECK_WINDOW_MS, DEFAULT_NUMBER_OF_BUCKETS_TO_SPAN * bucketSpan.millis());
6561
}
6662
if (currentWindow.compareTo(bucketSpan) < 0) {
6763
throw new IllegalArgumentException(

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/delayeddatacheck/DelayedDataDetectorFactoryTests.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,13 @@ public void testBuilder() {
5252
assertEquals(Messages.getMessage(
5353
Messages.DATAFEED_CONFIG_DELAYED_DATA_CHECK_SPANS_TOO_MANY_BUCKETS, "12h", "2s"), e.getMessage());
5454

55-
Job withBigBucketSpan = createJob(TimeValue.timeValueHours(3));
55+
Job withBigBucketSpan = createJob(TimeValue.timeValueHours(1));
5656
datafeedConfig = createDatafeed(true, null);
5757

5858
// Should not throw
5959
DelayedDataDetector delayedDataDetector =
6060
DelayedDataDetectorFactory.buildDetector(withBigBucketSpan, datafeedConfig, mock(Client.class));
61-
assertThat(delayedDataDetector.getWindow(), equalTo(TimeValue.timeValueHours(3).millis() * 8));
61+
assertThat(delayedDataDetector.getWindow(), equalTo(TimeValue.timeValueHours(1).millis() * 8));
6262

6363
datafeedConfig = createDatafeed(true, null);
6464

0 commit comments

Comments
 (0)