Skip to content

Commit cc254e3

Browse files
[ML] Account for gaps in data counts after job is reopened (#30294)
This commit fixes an issue with the data diagnostics were empty buckets are not reported even though they should. Once a job is reopened, the diagnostics do not get initialized from the current data counts (especially the latest record timestamp). The result is that if the data that is sent have a time gap compared to the previous ones, that gap is not accounted for in the empty bucket count. This commit fixes that by initializing the diagnostics with the current data counts. Closes #30080
1 parent b83ad84 commit cc254e3

File tree

7 files changed

+129
-20
lines changed

7 files changed

+129
-20
lines changed

docs/CHANGELOG.asciidoc

+4
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,10 @@ Security::
399399
* Fixed `saml-metadata` env file such that it sources the appropriate
400400
environment file.
401401

402+
Machine Learning::
403+
404+
* Account for gaps in data counts after job is reopened ({pull}30294[#30294])
405+
402406
//[float]
403407
//=== Regressions
404408

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/DataCountsReporter.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ public DataCountsReporter(Settings settings, Job job, DataCounts counts, JobData
8282

8383
totalRecordStats = counts;
8484
incrementalRecordStats = new DataCounts(job.getId());
85-
diagnostics = new DataStreamDiagnostics(job);
85+
diagnostics = new DataStreamDiagnostics(job, counts);
8686

8787
acceptablePercentDateParseErrors = ACCEPTABLE_PERCENTAGE_DATE_PARSE_ERRORS_SETTING.get(settings);
8888
acceptablePercentOutOfOrderErrors = ACCEPTABLE_PERCENTAGE_OUT_OF_ORDER_ERRORS_SETTING.get(settings);

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/diagnostics/BucketDiagnostics.java

+9-1
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,11 @@
66
package org.elasticsearch.xpack.ml.job.process.diagnostics;
77

88
import org.elasticsearch.xpack.core.ml.job.config.Job;
9+
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts;
910
import org.elasticsearch.xpack.core.ml.utils.Intervals;
1011

12+
import java.util.Date;
13+
1114
/**
1215
* A moving window of buckets that allow keeping
1316
* track of some statistics like the bucket count,
@@ -33,12 +36,17 @@ class BucketDiagnostics {
3336
private long latestFlushedBucketStartMs = -1;
3437
private final BucketFlushListener bucketFlushListener;
3538

36-
BucketDiagnostics(Job job, BucketFlushListener bucketFlushListener) {
39+
BucketDiagnostics(Job job, DataCounts dataCounts, BucketFlushListener bucketFlushListener) {
3740
bucketSpanMs = job.getAnalysisConfig().getBucketSpan().millis();
3841
latencyMs = job.getAnalysisConfig().getLatency() == null ? 0 : job.getAnalysisConfig().getLatency().millis();
3942
maxSize = Math.max((int) (Intervals.alignToCeil(latencyMs, bucketSpanMs) / bucketSpanMs), MIN_BUCKETS);
4043
buckets = new long[maxSize];
4144
this.bucketFlushListener = bucketFlushListener;
45+
46+
Date latestRecordTimestamp = dataCounts.getLatestRecordTimeStamp();
47+
if (latestRecordTimestamp != null) {
48+
addRecord(latestRecordTimestamp.getTime());
49+
}
4250
}
4351

4452
void addRecord(long recordTimestampMs) {

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/diagnostics/DataStreamDiagnostics.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import org.apache.logging.log4j.Logger;
99
import org.elasticsearch.common.logging.Loggers;
1010
import org.elasticsearch.xpack.core.ml.job.config.Job;
11+
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts;
1112

1213
import java.util.Date;
1314

@@ -32,8 +33,8 @@ public class DataStreamDiagnostics {
3233
private long sparseBucketCount = 0;
3334
private long latestSparseBucketTime = -1;
3435

35-
public DataStreamDiagnostics(Job job) {
36-
bucketDiagnostics = new BucketDiagnostics(job, createBucketFlushListener());
36+
public DataStreamDiagnostics(Job job, DataCounts dataCounts) {
37+
bucketDiagnostics = new BucketDiagnostics(job, dataCounts, createBucketFlushListener());
3738
}
3839

3940
private BucketDiagnostics.BucketFlushListener createBucketFlushListener() {

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/diagnostics/DataStreamDiagnosticsTests.java

+17-14
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
1212
import org.elasticsearch.xpack.core.ml.job.config.Detector;
1313
import org.elasticsearch.xpack.core.ml.job.config.Job;
14+
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts;
1415
import org.junit.Before;
1516

1617
import java.util.Arrays;
@@ -20,6 +21,7 @@ public class DataStreamDiagnosticsTests extends ESTestCase {
2021

2122
private static final long BUCKET_SPAN = 60000;
2223
private Job job;
24+
private DataCounts dataCounts;
2325

2426
@Before
2527
public void setUpMocks() {
@@ -32,10 +34,11 @@ public void setUpMocks() {
3234
builder.setAnalysisConfig(acBuilder);
3335
builder.setDataDescription(new DataDescription.Builder());
3436
job = createJob(TimeValue.timeValueMillis(BUCKET_SPAN), null);
37+
dataCounts = new DataCounts(job.getId());
3538
}
3639

3740
public void testIncompleteBuckets() {
38-
DataStreamDiagnostics d = new DataStreamDiagnostics(job);
41+
DataStreamDiagnostics d = new DataStreamDiagnostics(job, dataCounts);
3942

4043
d.checkRecord(1000);
4144
d.checkRecord(2000);
@@ -81,7 +84,7 @@ public void testIncompleteBuckets() {
8184
}
8285

8386
public void testSimple() {
84-
DataStreamDiagnostics d = new DataStreamDiagnostics(job);
87+
DataStreamDiagnostics d = new DataStreamDiagnostics(job, dataCounts);
8588

8689
d.checkRecord(70000);
8790
d.checkRecord(130000);
@@ -103,7 +106,7 @@ public void testSimple() {
103106
}
104107

105108
public void testSimpleReverse() {
106-
DataStreamDiagnostics d = new DataStreamDiagnostics(job);
109+
DataStreamDiagnostics d = new DataStreamDiagnostics(job, dataCounts);
107110

108111
d.checkRecord(610000);
109112
d.checkRecord(550000);
@@ -126,7 +129,7 @@ public void testSimpleReverse() {
126129

127130
public void testWithLatencyLessThanTenBuckets() {
128131
job = createJob(TimeValue.timeValueMillis(BUCKET_SPAN), TimeValue.timeValueMillis(3 * BUCKET_SPAN));
129-
DataStreamDiagnostics d = new DataStreamDiagnostics(job);
132+
DataStreamDiagnostics d = new DataStreamDiagnostics(job, dataCounts);
130133

131134
long timestamp = 70000;
132135
while (timestamp < 70000 + 20 * BUCKET_SPAN) {
@@ -141,7 +144,7 @@ public void testWithLatencyLessThanTenBuckets() {
141144

142145
public void testWithLatencyGreaterThanTenBuckets() {
143146
job = createJob(TimeValue.timeValueMillis(BUCKET_SPAN), TimeValue.timeValueMillis(13 * BUCKET_SPAN + 10000));
144-
DataStreamDiagnostics d = new DataStreamDiagnostics(job);
147+
DataStreamDiagnostics d = new DataStreamDiagnostics(job, dataCounts);
145148

146149
long timestamp = 70000;
147150
while (timestamp < 70000 + 20 * BUCKET_SPAN) {
@@ -155,7 +158,7 @@ public void testWithLatencyGreaterThanTenBuckets() {
155158
}
156159

157160
public void testEmptyBuckets() {
158-
DataStreamDiagnostics d = new DataStreamDiagnostics(job);
161+
DataStreamDiagnostics d = new DataStreamDiagnostics(job, dataCounts);
159162

160163
d.checkRecord(10000);
161164
d.checkRecord(70000);
@@ -177,7 +180,7 @@ public void testEmptyBuckets() {
177180
}
178181

179182
public void testEmptyBucketsStartLater() {
180-
DataStreamDiagnostics d = new DataStreamDiagnostics(job);
183+
DataStreamDiagnostics d = new DataStreamDiagnostics(job, dataCounts);
181184

182185
d.checkRecord(1110000);
183186
d.checkRecord(1170000);
@@ -199,7 +202,7 @@ public void testEmptyBucketsStartLater() {
199202
}
200203

201204
public void testSparseBuckets() {
202-
DataStreamDiagnostics d = new DataStreamDiagnostics(job);
205+
DataStreamDiagnostics d = new DataStreamDiagnostics(job, dataCounts);
203206

204207
sendManyDataPoints(d, 10000, 69000, 1000);
205208
sendManyDataPoints(d, 70000, 129000, 1200);
@@ -227,7 +230,7 @@ public void testSparseBuckets() {
227230
* signal
228231
*/
229232
public void testSparseBucketsLast() {
230-
DataStreamDiagnostics d = new DataStreamDiagnostics(job);
233+
DataStreamDiagnostics d = new DataStreamDiagnostics(job, dataCounts);
231234

232235
sendManyDataPoints(d, 10000, 69000, 1000);
233236
sendManyDataPoints(d, 70000, 129000, 1200);
@@ -255,7 +258,7 @@ public void testSparseBucketsLast() {
255258
* signal on the 2nd to last
256259
*/
257260
public void testSparseBucketsLastTwo() {
258-
DataStreamDiagnostics d = new DataStreamDiagnostics(job);
261+
DataStreamDiagnostics d = new DataStreamDiagnostics(job, dataCounts);
259262

260263
sendManyDataPoints(d, 10000, 69000, 1000);
261264
sendManyDataPoints(d, 70000, 129000, 1200);
@@ -280,7 +283,7 @@ public void testSparseBucketsLastTwo() {
280283
}
281284

282285
public void testMixedEmptyAndSparseBuckets() {
283-
DataStreamDiagnostics d = new DataStreamDiagnostics(job);
286+
DataStreamDiagnostics d = new DataStreamDiagnostics(job, dataCounts);
284287

285288
sendManyDataPoints(d, 10000, 69000, 1000);
286289
sendManyDataPoints(d, 70000, 129000, 1200);
@@ -308,7 +311,7 @@ public void testMixedEmptyAndSparseBuckets() {
308311
* whether counts are right.
309312
*/
310313
public void testEmptyBucketsLongerOutage() {
311-
DataStreamDiagnostics d = new DataStreamDiagnostics(job);
314+
DataStreamDiagnostics d = new DataStreamDiagnostics(job, dataCounts);
312315

313316
d.checkRecord(10000);
314317
d.checkRecord(70000);
@@ -336,7 +339,7 @@ public void testEmptyBucketsLongerOutage() {
336339
* The number of sparse buckets should not be to much, it could be normal.
337340
*/
338341
public void testSparseBucketsLongerPeriod() {
339-
DataStreamDiagnostics d = new DataStreamDiagnostics(job);
342+
DataStreamDiagnostics d = new DataStreamDiagnostics(job, dataCounts);
340343

341344
sendManyDataPoints(d, 10000, 69000, 1000);
342345
sendManyDataPoints(d, 70000, 129000, 1200);
@@ -374,7 +377,7 @@ private static Job createJob(TimeValue bucketSpan, TimeValue latency) {
374377
}
375378

376379
public void testFlushAfterZeroRecords() {
377-
DataStreamDiagnostics d = new DataStreamDiagnostics(job);
380+
DataStreamDiagnostics d = new DataStreamDiagnostics(job, dataCounts);
378381
d.flush();
379382
assertEquals(0, d.getBucketCount());
380383
}

x-pack/qa/ml-basic-multi-node/src/test/java/org/elasticsearch/xpack/ml/integration/MlBasicMultiNodeIT.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ public void testMiniFarequoteReopen() throws Exception {
241241
assertEquals(0, responseBody.get("invalid_date_count"));
242242
assertEquals(0, responseBody.get("missing_field_count"));
243243
assertEquals(0, responseBody.get("out_of_order_timestamp_count"));
244-
assertEquals(0, responseBody.get("bucket_count"));
244+
assertEquals(1000, responseBody.get("bucket_count"));
245245

246246
// unintuitive: should return the earliest record timestamp of this feed???
247247
assertEquals(null, responseBody.get("earliest_record_timestamp"));
@@ -266,7 +266,7 @@ public void testMiniFarequoteReopen() throws Exception {
266266
assertEquals(0, dataCountsDoc.get("invalid_date_count"));
267267
assertEquals(0, dataCountsDoc.get("missing_field_count"));
268268
assertEquals(0, dataCountsDoc.get("out_of_order_timestamp_count"));
269-
assertEquals(0, dataCountsDoc.get("bucket_count"));
269+
assertEquals(1000, dataCountsDoc.get("bucket_count"));
270270
assertEquals(1403481600000L, dataCountsDoc.get("earliest_record_timestamp"));
271271
assertEquals(1407082000000L, dataCountsDoc.get("latest_record_timestamp"));
272272

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
package org.elasticsearch.xpack.ml.integration;
7+
8+
import org.elasticsearch.common.unit.TimeValue;
9+
import org.elasticsearch.xpack.core.ml.action.GetBucketsAction;
10+
import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig;
11+
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
12+
import org.elasticsearch.xpack.core.ml.job.config.Detector;
13+
import org.elasticsearch.xpack.core.ml.job.config.Job;
14+
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts;
15+
import org.junit.After;
16+
17+
import java.util.ArrayList;
18+
import java.util.Collections;
19+
import java.util.HashMap;
20+
import java.util.List;
21+
import java.util.Map;
22+
import java.util.stream.Collectors;
23+
24+
import static org.hamcrest.Matchers.equalTo;
25+
26+
/**
27+
* Tests that after reopening a job and sending more
28+
* data after a gap, data counts are reported correctly.
29+
*/
30+
public class ReopenJobWithGapIT extends MlNativeAutodetectIntegTestCase {
31+
32+
private static final String JOB_ID = "reopen-job-with-gap-test";
33+
private static final long BUCKET_SPAN_SECONDS = 3600;
34+
35+
@After
36+
public void cleanUpTest() {
37+
cleanUp();
38+
}
39+
40+
public void test() throws Exception {
41+
AnalysisConfig.Builder analysisConfig = new AnalysisConfig.Builder(
42+
Collections.singletonList(new Detector.Builder("count", null).build()));
43+
analysisConfig.setBucketSpan(TimeValue.timeValueSeconds(BUCKET_SPAN_SECONDS));
44+
DataDescription.Builder dataDescription = new DataDescription.Builder();
45+
dataDescription.setTimeFormat("epoch");
46+
Job.Builder job = new Job.Builder(JOB_ID);
47+
job.setAnalysisConfig(analysisConfig);
48+
job.setDataDescription(dataDescription);
49+
50+
registerJob(job);
51+
putJob(job);
52+
openJob(job.getId());
53+
54+
long timestamp = 1483228800L; // 2017-01-01T00:00:00Z
55+
List<String> data = new ArrayList<>();
56+
for (int i = 0; i < 10; i++) {
57+
data.add(createJsonRecord(createRecord(timestamp)));
58+
timestamp += BUCKET_SPAN_SECONDS;
59+
}
60+
61+
postData(job.getId(), data.stream().collect(Collectors.joining()));
62+
flushJob(job.getId(), true);
63+
closeJob(job.getId());
64+
65+
GetBucketsAction.Request request = new GetBucketsAction.Request(job.getId());
66+
request.setExcludeInterim(true);
67+
assertThat(client().execute(GetBucketsAction.INSTANCE, request).actionGet().getBuckets().count(), equalTo(9L));
68+
assertThat(getJobStats(job.getId()).get(0).getDataCounts().getBucketCount(), equalTo(9L));
69+
70+
timestamp += 10 * BUCKET_SPAN_SECONDS;
71+
data = new ArrayList<>();
72+
for (int i = 0; i < 10; i++) {
73+
data.add(createJsonRecord(createRecord(timestamp)));
74+
timestamp += BUCKET_SPAN_SECONDS;
75+
}
76+
77+
openJob(job.getId());
78+
postData(job.getId(), data.stream().collect(Collectors.joining()));
79+
flushJob(job.getId(), true);
80+
closeJob(job.getId());
81+
82+
assertThat(client().execute(GetBucketsAction.INSTANCE, request).actionGet().getBuckets().count(), equalTo(29L));
83+
DataCounts dataCounts = getJobStats(job.getId()).get(0).getDataCounts();
84+
assertThat(dataCounts.getBucketCount(), equalTo(29L));
85+
assertThat(dataCounts.getEmptyBucketCount(), equalTo(10L));
86+
}
87+
88+
private static Map<String, Object> createRecord(long timestamp) {
89+
Map<String, Object> record = new HashMap<>();
90+
record.put("time", timestamp);
91+
return record;
92+
}
93+
}

0 commit comments

Comments
 (0)