Skip to content

Commit 76067e4

Browse files
committed
Add delayed datacheck to the datafeed job runner (#35387)
* ML: Adding missing datacheck to datafeedjob * Adding client side and docs * Making adjustments to validations * Making values default to on, having more sensible limits * Intermittent commit, still need to figure out interval * Adjusting delayed data check interval * updating docs * Making parameter Boolean, so it is nullable * bumping bwc to 7 before backport * changing to version current * moving delayed data check config its own object * Separation of duties for delayed data detection * fixing checkstyles * fixing checkstyles * Adjusting default behavior so that null windows are allowed * Mentioning the default value * Fixing comments, syncing up validations
1 parent 71a1066 commit 76067e4

File tree

31 files changed

+1136
-166
lines changed

31 files changed

+1136
-166
lines changed

client/rest-high-level/src/main/java/org/elasticsearch/client/ml/datafeed/DatafeedConfig.java

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ public class DatafeedConfig implements ToXContentObject {
6262
public static final ParseField AGGREGATIONS = new ParseField("aggregations");
6363
public static final ParseField SCRIPT_FIELDS = new ParseField("script_fields");
6464
public static final ParseField CHUNKING_CONFIG = new ParseField("chunking_config");
65+
public static final ParseField DELAYED_DATA_CHECK_CONFIG = new ParseField("delayed_data_check_config");
6566

6667
public static final ConstructingObjectParser<Builder, Void> PARSER = new ConstructingObjectParser<>(
6768
"datafeed_config", true, a -> new Builder((String)a[0], (String)a[1]));
@@ -88,6 +89,7 @@ public class DatafeedConfig implements ToXContentObject {
8889
}, SCRIPT_FIELDS);
8990
PARSER.declareInt(Builder::setScrollSize, SCROLL_SIZE);
9091
PARSER.declareObject(Builder::setChunkingConfig, ChunkingConfig.PARSER, CHUNKING_CONFIG);
92+
PARSER.declareObject(Builder::setDelayedDataCheckConfig, DelayedDataCheckConfig.PARSER, DELAYED_DATA_CHECK_CONFIG);
9193
}
9294

9395
private static BytesReference parseBytes(XContentParser parser) throws IOException {
@@ -107,10 +109,12 @@ private static BytesReference parseBytes(XContentParser parser) throws IOExcepti
107109
private final List<SearchSourceBuilder.ScriptField> scriptFields;
108110
private final Integer scrollSize;
109111
private final ChunkingConfig chunkingConfig;
112+
private final DelayedDataCheckConfig delayedDataCheckConfig;
113+
110114

111115
private DatafeedConfig(String id, String jobId, TimeValue queryDelay, TimeValue frequency, List<String> indices, List<String> types,
112116
BytesReference query, BytesReference aggregations, List<SearchSourceBuilder.ScriptField> scriptFields,
113-
Integer scrollSize, ChunkingConfig chunkingConfig) {
117+
Integer scrollSize, ChunkingConfig chunkingConfig, DelayedDataCheckConfig delayedDataCheckConfig) {
114118
this.id = id;
115119
this.jobId = jobId;
116120
this.queryDelay = queryDelay;
@@ -122,6 +126,7 @@ private DatafeedConfig(String id, String jobId, TimeValue queryDelay, TimeValue
122126
this.scriptFields = scriptFields == null ? null : Collections.unmodifiableList(scriptFields);
123127
this.scrollSize = scrollSize;
124128
this.chunkingConfig = chunkingConfig;
129+
this.delayedDataCheckConfig = delayedDataCheckConfig;
125130
}
126131

127132
public String getId() {
@@ -168,6 +173,10 @@ public ChunkingConfig getChunkingConfig() {
168173
return chunkingConfig;
169174
}
170175

176+
public DelayedDataCheckConfig getDelayedDataCheckConfig() {
177+
return delayedDataCheckConfig;
178+
}
179+
171180
@Override
172181
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
173182
builder.startObject();
@@ -204,6 +213,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
204213
if (chunkingConfig != null) {
205214
builder.field(CHUNKING_CONFIG.getPreferredName(), chunkingConfig);
206215
}
216+
if (delayedDataCheckConfig != null) {
217+
builder.field(DELAYED_DATA_CHECK_CONFIG.getPreferredName(), delayedDataCheckConfig);
218+
}
207219

208220
builder.endObject();
209221
return builder;
@@ -244,7 +256,8 @@ public boolean equals(Object other) {
244256
&& Objects.equals(this.scrollSize, that.scrollSize)
245257
&& Objects.equals(asMap(this.aggregations), asMap(that.aggregations))
246258
&& Objects.equals(this.scriptFields, that.scriptFields)
247-
&& Objects.equals(this.chunkingConfig, that.chunkingConfig);
259+
&& Objects.equals(this.chunkingConfig, that.chunkingConfig)
260+
&& Objects.equals(this.delayedDataCheckConfig, that.delayedDataCheckConfig);
248261
}
249262

250263
/**
@@ -255,7 +268,7 @@ public boolean equals(Object other) {
255268
@Override
256269
public int hashCode() {
257270
return Objects.hash(id, jobId, frequency, queryDelay, indices, types, asMap(query), scrollSize, asMap(aggregations), scriptFields,
258-
chunkingConfig);
271+
chunkingConfig, delayedDataCheckConfig);
259272
}
260273

261274
public static Builder builder(String id, String jobId) {
@@ -275,6 +288,7 @@ public static class Builder {
275288
private List<SearchSourceBuilder.ScriptField> scriptFields;
276289
private Integer scrollSize;
277290
private ChunkingConfig chunkingConfig;
291+
private DelayedDataCheckConfig delayedDataCheckConfig;
278292

279293
public Builder(String id, String jobId) {
280294
this.id = Objects.requireNonNull(id, ID.getPreferredName());
@@ -293,6 +307,7 @@ public Builder(DatafeedConfig config) {
293307
this.scriptFields = config.scriptFields;
294308
this.scrollSize = config.scrollSize;
295309
this.chunkingConfig = config.chunkingConfig;
310+
this.delayedDataCheckConfig = config.getDelayedDataCheckConfig();
296311
}
297312

298313
public Builder setIndices(List<String> indices) {
@@ -366,9 +381,23 @@ public Builder setChunkingConfig(ChunkingConfig chunkingConfig) {
366381
return this;
367382
}
368383

384+
/**
385+
* This sets the {@link DelayedDataCheckConfig} settings.
386+
*
387+
* See {@link DelayedDataCheckConfig} for more information.
388+
*
389+
* @param delayedDataCheckConfig the delayed data check configuration
390+
* Default value is enabled, with `check_window` being null. This means the true window is
391+
* calculated when the real-time Datafeed runs.
392+
*/
393+
public Builder setDelayedDataCheckConfig(DelayedDataCheckConfig delayedDataCheckConfig) {
394+
this.delayedDataCheckConfig = delayedDataCheckConfig;
395+
return this;
396+
}
397+
369398
public DatafeedConfig build() {
370399
return new DatafeedConfig(id, jobId, queryDelay, frequency, indices, types, query, aggregations, scriptFields, scrollSize,
371-
chunkingConfig);
400+
chunkingConfig, delayedDataCheckConfig);
372401
}
373402

374403
private static BytesReference xContentToBytes(ToXContentObject object) throws IOException {

client/rest-high-level/src/main/java/org/elasticsearch/client/ml/datafeed/DatafeedUpdate.java

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ public class DatafeedUpdate implements ToXContentObject {
7777
}, DatafeedConfig.SCRIPT_FIELDS);
7878
PARSER.declareInt(Builder::setScrollSize, DatafeedConfig.SCROLL_SIZE);
7979
PARSER.declareObject(Builder::setChunkingConfig, ChunkingConfig.PARSER, DatafeedConfig.CHUNKING_CONFIG);
80+
PARSER.declareObject(Builder::setDelayedDataCheckConfig,
81+
DelayedDataCheckConfig.PARSER,
82+
DatafeedConfig.DELAYED_DATA_CHECK_CONFIG);
8083
}
8184

8285
private static BytesReference parseBytes(XContentParser parser) throws IOException {
@@ -96,10 +99,11 @@ private static BytesReference parseBytes(XContentParser parser) throws IOExcepti
9699
private final List<SearchSourceBuilder.ScriptField> scriptFields;
97100
private final Integer scrollSize;
98101
private final ChunkingConfig chunkingConfig;
102+
private final DelayedDataCheckConfig delayedDataCheckConfig;
99103

100104
private DatafeedUpdate(String id, String jobId, TimeValue queryDelay, TimeValue frequency, List<String> indices, List<String> types,
101105
BytesReference query, BytesReference aggregations, List<SearchSourceBuilder.ScriptField> scriptFields,
102-
Integer scrollSize, ChunkingConfig chunkingConfig) {
106+
Integer scrollSize, ChunkingConfig chunkingConfig, DelayedDataCheckConfig delayedDataCheckConfig) {
103107
this.id = id;
104108
this.jobId = jobId;
105109
this.queryDelay = queryDelay;
@@ -111,6 +115,7 @@ private DatafeedUpdate(String id, String jobId, TimeValue queryDelay, TimeValue
111115
this.scriptFields = scriptFields;
112116
this.scrollSize = scrollSize;
113117
this.chunkingConfig = chunkingConfig;
118+
this.delayedDataCheckConfig = delayedDataCheckConfig;
114119
}
115120

116121
/**
@@ -146,6 +151,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
146151
}
147152
builder.endObject();
148153
}
154+
if (delayedDataCheckConfig != null) {
155+
builder.field(DatafeedConfig.DELAYED_DATA_CHECK_CONFIG.getPreferredName(), delayedDataCheckConfig);
156+
}
149157
addOptionalField(builder, DatafeedConfig.SCROLL_SIZE, scrollSize);
150158
addOptionalField(builder, DatafeedConfig.CHUNKING_CONFIG, chunkingConfig);
151159
builder.endObject();
@@ -198,6 +206,10 @@ public ChunkingConfig getChunkingConfig() {
198206
return chunkingConfig;
199207
}
200208

209+
public DelayedDataCheckConfig getDelayedDataCheckConfig() {
210+
return delayedDataCheckConfig;
211+
}
212+
201213
private static Map<String, Object> asMap(BytesReference bytesReference) {
202214
return bytesReference == null ? null : XContentHelper.convertToMap(bytesReference, true, XContentType.JSON).v2();
203215
}
@@ -232,6 +244,7 @@ public boolean equals(Object other) {
232244
&& Objects.equals(asMap(this.query), asMap(that.query))
233245
&& Objects.equals(this.scrollSize, that.scrollSize)
234246
&& Objects.equals(asMap(this.aggregations), asMap(that.aggregations))
247+
&& Objects.equals(this.delayedDataCheckConfig, that.delayedDataCheckConfig)
235248
&& Objects.equals(this.scriptFields, that.scriptFields)
236249
&& Objects.equals(this.chunkingConfig, that.chunkingConfig);
237250
}
@@ -244,7 +257,7 @@ public boolean equals(Object other) {
244257
@Override
245258
public int hashCode() {
246259
return Objects.hash(id, jobId, frequency, queryDelay, indices, types, asMap(query), scrollSize, asMap(aggregations), scriptFields,
247-
chunkingConfig);
260+
chunkingConfig, delayedDataCheckConfig);
248261
}
249262

250263
public static Builder builder(String id) {
@@ -264,6 +277,7 @@ public static class Builder {
264277
private List<SearchSourceBuilder.ScriptField> scriptFields;
265278
private Integer scrollSize;
266279
private ChunkingConfig chunkingConfig;
280+
private DelayedDataCheckConfig delayedDataCheckConfig;
267281

268282
public Builder(String id) {
269283
this.id = Objects.requireNonNull(id, DatafeedConfig.ID.getPreferredName());
@@ -281,6 +295,7 @@ public Builder(DatafeedUpdate config) {
281295
this.scriptFields = config.scriptFields;
282296
this.scrollSize = config.scrollSize;
283297
this.chunkingConfig = config.chunkingConfig;
298+
this.delayedDataCheckConfig = config.delayedDataCheckConfig;
284299
}
285300

286301
public Builder setJobId(String jobId) {
@@ -359,9 +374,14 @@ public Builder setChunkingConfig(ChunkingConfig chunkingConfig) {
359374
return this;
360375
}
361376

377+
public Builder setDelayedDataCheckConfig(DelayedDataCheckConfig delayedDataCheckConfig) {
378+
this.delayedDataCheckConfig = delayedDataCheckConfig;
379+
return this;
380+
}
381+
362382
public DatafeedUpdate build() {
363383
return new DatafeedUpdate(id, jobId, queryDelay, frequency, indices, types, query, aggregations, scriptFields, scrollSize,
364-
chunkingConfig);
384+
chunkingConfig, delayedDataCheckConfig);
365385
}
366386

367387
private static BytesReference xContentToBytes(ToXContentObject object) throws IOException {
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.client.ml.datafeed;
20+
21+
import org.elasticsearch.common.Nullable;
22+
import org.elasticsearch.common.ParseField;
23+
import org.elasticsearch.common.unit.TimeValue;
24+
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
25+
import org.elasticsearch.common.xcontent.ObjectParser;
26+
import org.elasticsearch.common.xcontent.ToXContentObject;
27+
import org.elasticsearch.common.xcontent.XContentBuilder;
28+
import org.elasticsearch.common.xcontent.XContentParser;
29+
30+
import java.io.IOException;
31+
import java.util.Objects;
32+
33+
/**
34+
* The configuration object containing the delayed data check settings.
35+
*
36+
* See {@link DelayedDataCheckConfig#enabledDelayedDataCheckConfig(TimeValue)} for creating a new
37+
* enabled datacheck with the given check_window
38+
*
39+
* See {@link DelayedDataCheckConfig#disabledDelayedDataCheckConfig()} for creating a config for disabling
40+
* delayed data checking.
41+
*/
42+
public class DelayedDataCheckConfig implements ToXContentObject {
43+
44+
public static final ParseField ENABLED = new ParseField("enabled");
45+
public static final ParseField CHECK_WINDOW = new ParseField("check_window");
46+
47+
// These parsers follow the pattern that metadata is parsed leniently (to allow for enhancements), whilst config is parsed strictly
48+
public static final ConstructingObjectParser<DelayedDataCheckConfig, Void> PARSER = new ConstructingObjectParser<>(
49+
"delayed_data_check_config", true, a -> new DelayedDataCheckConfig((Boolean) a[0], (TimeValue) a[1]));
50+
static {
51+
PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), ENABLED);
52+
PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> {
53+
if (p.currentToken() == XContentParser.Token.VALUE_STRING) {
54+
return TimeValue.parseTimeValue(p.text(), CHECK_WINDOW.getPreferredName());
55+
}
56+
throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]");
57+
}, CHECK_WINDOW, ObjectParser.ValueType.STRING);
58+
}
59+
60+
/**
61+
* This creates a new DelayedDataCheckConfig that has a check_window of the passed `timeValue`
62+
*
63+
* We query the index to the latest finalized bucket from this TimeValue in the past looking to see if any data has been indexed
64+
* since the data was read with the Datafeed.
65+
*
66+
* The window must be larger than the {@link org.elasticsearch.client.ml.job.config.AnalysisConfig#bucketSpan}, less than
67+
* 24 hours, and span less than 10,000x buckets.
68+
*
69+
*
70+
* @param timeValue The time length in the past from the latest finalized bucket to look for latent data.
71+
* If `null` is provided, the appropriate window is calculated when it is used
72+
**/
73+
public static DelayedDataCheckConfig enabledDelayedDataCheckConfig(TimeValue timeValue) {
74+
return new DelayedDataCheckConfig(true, timeValue);
75+
}
76+
77+
/**
78+
* This creates a new DelayedDataCheckConfig that disables the data check.
79+
*/
80+
public static DelayedDataCheckConfig disabledDelayedDataCheckConfig() {
81+
return new DelayedDataCheckConfig(false, null);
82+
}
83+
84+
private final boolean enabled;
85+
private final TimeValue checkWindow;
86+
87+
DelayedDataCheckConfig(Boolean enabled, TimeValue checkWindow) {
88+
this.enabled = enabled;
89+
this.checkWindow = checkWindow;
90+
}
91+
92+
public boolean isEnabled() {
93+
return enabled;
94+
}
95+
96+
@Nullable
97+
public TimeValue getCheckWindow() {
98+
return checkWindow;
99+
}
100+
101+
@Override
102+
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
103+
builder.startObject();
104+
builder.field(ENABLED.getPreferredName(), enabled);
105+
if (checkWindow != null) {
106+
builder.field(CHECK_WINDOW.getPreferredName(), checkWindow.getStringRep());
107+
}
108+
builder.endObject();
109+
return builder;
110+
}
111+
112+
@Override
113+
public int hashCode() {
114+
return Objects.hash(enabled, checkWindow);
115+
}
116+
117+
@Override
118+
public boolean equals(Object obj) {
119+
if (this == obj) {
120+
return true;
121+
}
122+
if (obj == null || getClass() != obj.getClass()) {
123+
return false;
124+
}
125+
126+
DelayedDataCheckConfig other = (DelayedDataCheckConfig) obj;
127+
return Objects.equals(this.enabled, other.enabled) && Objects.equals(this.checkWindow, other.checkWindow);
128+
}
129+
130+
}

client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
import org.elasticsearch.client.ml.datafeed.DatafeedConfig;
9696
import org.elasticsearch.client.ml.datafeed.DatafeedStats;
9797
import org.elasticsearch.client.ml.datafeed.DatafeedUpdate;
98+
import org.elasticsearch.client.ml.datafeed.DelayedDataCheckConfig;
9899
import org.elasticsearch.client.ml.job.config.AnalysisConfig;
99100
import org.elasticsearch.client.ml.job.config.AnalysisLimits;
100101
import org.elasticsearch.client.ml.job.config.DataDescription;
@@ -583,6 +584,14 @@ public void testPutDatafeed() throws Exception {
583584
datafeedBuilder.setQueryDelay(TimeValue.timeValueMinutes(1)); // <1>
584585
// end::put-datafeed-config-set-query-delay
585586

587+
// tag::put-datafeed-config-set-delayed-data-check-config
588+
datafeedBuilder.setDelayedDataCheckConfig(DelayedDataCheckConfig
589+
.enabledDelayedDataCheckConfig(TimeValue.timeValueHours(1))); // <1>
590+
// end::put-datafeed-config-set-delayed-data-check-config
591+
592+
// no need to accidentally trip internal validations due to job bucket size
593+
datafeedBuilder.setDelayedDataCheckConfig(null);
594+
586595
List<SearchSourceBuilder.ScriptField> scriptFields = Collections.emptyList();
587596
// tag::put-datafeed-config-set-script-fields
588597
datafeedBuilder.setScriptFields(scriptFields); // <1>

client/rest-high-level/src/test/java/org/elasticsearch/client/ml/datafeed/DatafeedConfigTests.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,9 @@ public static DatafeedConfig.Builder createRandomBuilder() {
103103
if (randomBoolean()) {
104104
builder.setChunkingConfig(ChunkingConfigTests.createRandomizedChunk());
105105
}
106+
if (randomBoolean()) {
107+
builder.setDelayedDataCheckConfig(DelayedDataCheckConfigTests.createRandomizedConfig());
108+
}
106109
return builder;
107110
}
108111

client/rest-high-level/src/test/java/org/elasticsearch/client/ml/datafeed/DatafeedUpdateTests.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ public static DatafeedUpdate createRandom() {
8383
if (randomBoolean()) {
8484
builder.setChunkingConfig(ChunkingConfigTests.createRandomizedChunk());
8585
}
86+
if (randomBoolean()) {
87+
builder.setDelayedDataCheckConfig(DelayedDataCheckConfigTests.createRandomizedConfig());
88+
}
8689
return builder.build();
8790
}
8891

0 commit comments

Comments
 (0)