Skip to content

Commit fd83c18

Browse files
authored
[ML] Add lazy assignment job config option (#47726)
This change adds: - A new option, allow_lazy_open, to anomaly detection jobs - A new option, allow_lazy_start, to data frame analytics jobs Both work in the same way: they allow a job to be opened/started even if no ML node exists that can accommodate the job immediately. In this situation the job waits in the opening/starting state until ML node capacity is available. (The starting state for data frame analytics jobs is new in this change.) Additionally, the ML nightly maintenance tasks now creates audit warnings for ML jobs that are unassigned. This means that jobs that cannot be assigned to an ML node for a very long time will show a yellow warning triangle in the UI. A final change is that it is now possible to close a job that is not assigned to a node without using force. This is because previously jobs that were open but not assigned to a node were an aberration, whereas after this change they'll be relatively common.
1 parent 29ac95a commit fd83c18

File tree

46 files changed

+723
-197
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+723
-197
lines changed

client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/DataFrameAnalyticsConfig.java

+22-4
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ public static Builder builder() {
5656
private static final ParseField MODEL_MEMORY_LIMIT = new ParseField("model_memory_limit");
5757
private static final ParseField CREATE_TIME = new ParseField("create_time");
5858
private static final ParseField VERSION = new ParseField("version");
59+
private static final ParseField ALLOW_LAZY_START = new ParseField("allow_lazy_start");
5960

6061
private static ObjectParser<Builder, Void> PARSER = new ObjectParser<>("data_frame_analytics_config", true, Builder::new);
6162

@@ -86,6 +87,7 @@ public static Builder builder() {
8687
},
8788
VERSION,
8889
ValueType.STRING);
90+
PARSER.declareBoolean(Builder::setAllowLazyStart, ALLOW_LAZY_START);
8991
}
9092

9193
private static DataFrameAnalysis parseAnalysis(XContentParser parser) throws IOException {
@@ -105,11 +107,12 @@ private static DataFrameAnalysis parseAnalysis(XContentParser parser) throws IOE
105107
private final ByteSizeValue modelMemoryLimit;
106108
private final Instant createTime;
107109
private final Version version;
110+
private final Boolean allowLazyStart;
108111

109112
private DataFrameAnalyticsConfig(@Nullable String id, @Nullable String description, @Nullable DataFrameAnalyticsSource source,
110113
@Nullable DataFrameAnalyticsDest dest, @Nullable DataFrameAnalysis analysis,
111114
@Nullable FetchSourceContext analyzedFields, @Nullable ByteSizeValue modelMemoryLimit,
112-
@Nullable Instant createTime, @Nullable Version version) {
115+
@Nullable Instant createTime, @Nullable Version version, @Nullable Boolean allowLazyStart) {
113116
this.id = id;
114117
this.description = description;
115118
this.source = source;
@@ -119,6 +122,7 @@ private DataFrameAnalyticsConfig(@Nullable String id, @Nullable String descripti
119122
this.modelMemoryLimit = modelMemoryLimit;
120123
this.createTime = createTime == null ? null : Instant.ofEpochMilli(createTime.toEpochMilli());;
121124
this.version = version;
125+
this.allowLazyStart = allowLazyStart;
122126
}
123127

124128
public String getId() {
@@ -157,6 +161,10 @@ public Version getVersion() {
157161
return version;
158162
}
159163

164+
public Boolean getAllowLazyStart() {
165+
return allowLazyStart;
166+
}
167+
160168
@Override
161169
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
162170
builder.startObject();
@@ -190,6 +198,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
190198
if (version != null) {
191199
builder.field(VERSION.getPreferredName(), version);
192200
}
201+
if (allowLazyStart != null) {
202+
builder.field(ALLOW_LAZY_START.getPreferredName(), allowLazyStart);
203+
}
193204
builder.endObject();
194205
return builder;
195206
}
@@ -208,12 +219,13 @@ public boolean equals(Object o) {
208219
&& Objects.equals(analyzedFields, other.analyzedFields)
209220
&& Objects.equals(modelMemoryLimit, other.modelMemoryLimit)
210221
&& Objects.equals(createTime, other.createTime)
211-
&& Objects.equals(version, other.version);
222+
&& Objects.equals(version, other.version)
223+
&& Objects.equals(allowLazyStart, other.allowLazyStart);
212224
}
213225

214226
@Override
215227
public int hashCode() {
216-
return Objects.hash(id, description, source, dest, analysis, analyzedFields, modelMemoryLimit, createTime, version);
228+
return Objects.hash(id, description, source, dest, analysis, analyzedFields, modelMemoryLimit, createTime, version, allowLazyStart);
217229
}
218230

219231
@Override
@@ -232,6 +244,7 @@ public static class Builder {
232244
private ByteSizeValue modelMemoryLimit;
233245
private Instant createTime;
234246
private Version version;
247+
private Boolean allowLazyStart;
235248

236249
private Builder() {}
237250

@@ -280,9 +293,14 @@ public Builder setVersion(Version version) {
280293
return this;
281294
}
282295

296+
public Builder setAllowLazyStart(Boolean allowLazyStart) {
297+
this.allowLazyStart = allowLazyStart;
298+
return this;
299+
}
300+
283301
public DataFrameAnalyticsConfig build() {
284302
return new DataFrameAnalyticsConfig(id, description, source, dest, analysis, analyzedFields, modelMemoryLimit, createTime,
285-
version);
303+
version, allowLazyStart);
286304
}
287305
}
288306
}

client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/DataFrameAnalyticsState.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import java.util.Locale;
2323

2424
public enum DataFrameAnalyticsState {
25-
STARTED, REINDEXING, ANALYZING, STOPPING, STOPPED;
25+
STARTED, REINDEXING, ANALYZING, STOPPING, STOPPED, STARTING;
2626

2727
public static DataFrameAnalyticsState fromString(String name) {
2828
return valueOf(name.trim().toUpperCase(Locale.ROOT));

client/rest-high-level/src/main/java/org/elasticsearch/client/ml/job/config/Job.java

+23-4
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ public class Job implements ToXContentObject {
6767
public static final ParseField MODEL_SNAPSHOT_ID = new ParseField("model_snapshot_id");
6868
public static final ParseField RESULTS_INDEX_NAME = new ParseField("results_index_name");
6969
public static final ParseField DELETING = new ParseField("deleting");
70+
public static final ParseField ALLOW_LAZY_OPEN = new ParseField("allow_lazy_open");
7071

7172
public static final ObjectParser<Builder, Void> PARSER = new ObjectParser<>("job_details", true, Builder::new);
7273

@@ -96,6 +97,7 @@ public class Job implements ToXContentObject {
9697
PARSER.declareStringOrNull(Builder::setModelSnapshotId, MODEL_SNAPSHOT_ID);
9798
PARSER.declareString(Builder::setResultsIndexName, RESULTS_INDEX_NAME);
9899
PARSER.declareBoolean(Builder::setDeleting, DELETING);
100+
PARSER.declareBoolean(Builder::setAllowLazyOpen, ALLOW_LAZY_OPEN);
99101
}
100102

101103
private final String jobId;
@@ -117,13 +119,14 @@ public class Job implements ToXContentObject {
117119
private final String modelSnapshotId;
118120
private final String resultsIndexName;
119121
private final Boolean deleting;
122+
private final Boolean allowLazyOpen;
120123

121124
private Job(String jobId, String jobType, List<String> groups, String description,
122125
Date createTime, Date finishedTime,
123126
AnalysisConfig analysisConfig, AnalysisLimits analysisLimits, DataDescription dataDescription,
124127
ModelPlotConfig modelPlotConfig, Long renormalizationWindowDays, TimeValue backgroundPersistInterval,
125128
Long modelSnapshotRetentionDays, Long resultsRetentionDays, Map<String, Object> customSettings,
126-
String modelSnapshotId, String resultsIndexName, Boolean deleting) {
129+
String modelSnapshotId, String resultsIndexName, Boolean deleting, Boolean allowLazyOpen) {
127130

128131
this.jobId = jobId;
129132
this.jobType = jobType;
@@ -143,6 +146,7 @@ private Job(String jobId, String jobType, List<String> groups, String descriptio
143146
this.modelSnapshotId = modelSnapshotId;
144147
this.resultsIndexName = resultsIndexName;
145148
this.deleting = deleting;
149+
this.allowLazyOpen = allowLazyOpen;
146150
}
147151

148152
/**
@@ -271,6 +275,10 @@ public Boolean getDeleting() {
271275
return deleting;
272276
}
273277

278+
public Boolean getAllowLazyOpen() {
279+
return allowLazyOpen;
280+
}
281+
274282
@Override
275283
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
276284
builder.startObject();
@@ -326,6 +334,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
326334
if (deleting != null) {
327335
builder.field(DELETING.getPreferredName(), deleting);
328336
}
337+
if (allowLazyOpen != null) {
338+
builder.field(ALLOW_LAZY_OPEN.getPreferredName(), allowLazyOpen);
339+
}
329340
builder.endObject();
330341
return builder;
331342
}
@@ -358,15 +369,16 @@ public boolean equals(Object other) {
358369
&& Objects.equals(this.customSettings, that.customSettings)
359370
&& Objects.equals(this.modelSnapshotId, that.modelSnapshotId)
360371
&& Objects.equals(this.resultsIndexName, that.resultsIndexName)
361-
&& Objects.equals(this.deleting, that.deleting);
372+
&& Objects.equals(this.deleting, that.deleting)
373+
&& Objects.equals(this.allowLazyOpen, that.allowLazyOpen);
362374
}
363375

364376
@Override
365377
public int hashCode() {
366378
return Objects.hash(jobId, jobType, groups, description, createTime, finishedTime,
367379
analysisConfig, analysisLimits, dataDescription, modelPlotConfig, renormalizationWindowDays,
368380
backgroundPersistInterval, modelSnapshotRetentionDays, resultsRetentionDays, customSettings,
369-
modelSnapshotId, resultsIndexName, deleting);
381+
modelSnapshotId, resultsIndexName, deleting, allowLazyOpen);
370382
}
371383

372384
@Override
@@ -398,6 +410,7 @@ public static class Builder {
398410
private String modelSnapshotId;
399411
private String resultsIndexName;
400412
private Boolean deleting;
413+
private Boolean allowLazyOpen;
401414

402415
private Builder() {
403416
}
@@ -425,6 +438,7 @@ public Builder(Job job) {
425438
this.modelSnapshotId = job.getModelSnapshotId();
426439
this.resultsIndexName = job.getResultsIndexNameNoPrefix();
427440
this.deleting = job.getDeleting();
441+
this.allowLazyOpen = job.getAllowLazyOpen();
428442
}
429443

430444
public Builder setId(String id) {
@@ -521,6 +535,11 @@ Builder setDeleting(Boolean deleting) {
521535
return this;
522536
}
523537

538+
Builder setAllowLazyOpen(Boolean allowLazyOpen) {
539+
this.allowLazyOpen = allowLazyOpen;
540+
return this;
541+
}
542+
524543
/**
525544
* Builds a job.
526545
*
@@ -533,7 +552,7 @@ public Job build() {
533552
id, jobType, groups, description, createTime, finishedTime,
534553
analysisConfig, analysisLimits, dataDescription, modelPlotConfig, renormalizationWindowDays,
535554
backgroundPersistInterval, modelSnapshotRetentionDays, resultsRetentionDays, customSettings,
536-
modelSnapshotId, resultsIndexName, deleting);
555+
modelSnapshotId, resultsIndexName, deleting, allowLazyOpen);
537556
}
538557
}
539558
}

client/rest-high-level/src/main/java/org/elasticsearch/client/ml/job/config/JobUpdate.java

+23-4
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ public class JobUpdate implements ToXContentObject {
5454
PARSER.declareLong(Builder::setModelSnapshotRetentionDays, Job.MODEL_SNAPSHOT_RETENTION_DAYS);
5555
PARSER.declareStringArray(Builder::setCategorizationFilters, AnalysisConfig.CATEGORIZATION_FILTERS);
5656
PARSER.declareField(Builder::setCustomSettings, (p, c) -> p.map(), Job.CUSTOM_SETTINGS, ObjectParser.ValueType.OBJECT);
57+
PARSER.declareBoolean(Builder::setAllowLazyOpen, Job.ALLOW_LAZY_OPEN);
5758
}
5859

5960
private final String jobId;
@@ -68,13 +69,14 @@ public class JobUpdate implements ToXContentObject {
6869
private final Long resultsRetentionDays;
6970
private final List<String> categorizationFilters;
7071
private final Map<String, Object> customSettings;
72+
private final Boolean allowLazyOpen;
7173

7274
private JobUpdate(String jobId, @Nullable List<String> groups, @Nullable String description,
7375
@Nullable List<DetectorUpdate> detectorUpdates, @Nullable ModelPlotConfig modelPlotConfig,
7476
@Nullable AnalysisLimits analysisLimits, @Nullable TimeValue backgroundPersistInterval,
7577
@Nullable Long renormalizationWindowDays, @Nullable Long resultsRetentionDays,
7678
@Nullable Long modelSnapshotRetentionDays, @Nullable List<String> categorisationFilters,
77-
@Nullable Map<String, Object> customSettings) {
79+
@Nullable Map<String, Object> customSettings, @Nullable Boolean allowLazyOpen) {
7880
this.jobId = jobId;
7981
this.groups = groups;
8082
this.description = description;
@@ -87,6 +89,7 @@ private JobUpdate(String jobId, @Nullable List<String> groups, @Nullable String
8789
this.resultsRetentionDays = resultsRetentionDays;
8890
this.categorizationFilters = categorisationFilters;
8991
this.customSettings = customSettings;
92+
this.allowLazyOpen = allowLazyOpen;
9093
}
9194

9295
public String getJobId() {
@@ -137,6 +140,10 @@ public Map<String, Object> getCustomSettings() {
137140
return customSettings;
138141
}
139142

143+
public Boolean getAllowLazyOpen() {
144+
return allowLazyOpen;
145+
}
146+
140147
@Override
141148
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
142149
builder.startObject();
@@ -174,6 +181,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
174181
if (customSettings != null) {
175182
builder.field(Job.CUSTOM_SETTINGS.getPreferredName(), customSettings);
176183
}
184+
if (allowLazyOpen != null) {
185+
builder.field(Job.ALLOW_LAZY_OPEN.getPreferredName(), allowLazyOpen);
186+
}
177187
builder.endObject();
178188
return builder;
179189
}
@@ -201,13 +211,15 @@ public boolean equals(Object other) {
201211
&& Objects.equals(this.modelSnapshotRetentionDays, that.modelSnapshotRetentionDays)
202212
&& Objects.equals(this.resultsRetentionDays, that.resultsRetentionDays)
203213
&& Objects.equals(this.categorizationFilters, that.categorizationFilters)
204-
&& Objects.equals(this.customSettings, that.customSettings);
214+
&& Objects.equals(this.customSettings, that.customSettings)
215+
&& Objects.equals(this.allowLazyOpen, that.allowLazyOpen);
205216
}
206217

207218
@Override
208219
public int hashCode() {
209220
return Objects.hash(jobId, groups, description, detectorUpdates, modelPlotConfig, analysisLimits, renormalizationWindowDays,
210-
backgroundPersistInterval, modelSnapshotRetentionDays, resultsRetentionDays, categorizationFilters, customSettings);
221+
backgroundPersistInterval, modelSnapshotRetentionDays, resultsRetentionDays, categorizationFilters, customSettings,
222+
allowLazyOpen);
211223
}
212224

213225
public static class DetectorUpdate implements ToXContentObject {
@@ -303,6 +315,7 @@ public static class Builder {
303315
private Long resultsRetentionDays;
304316
private List<String> categorizationFilters;
305317
private Map<String, Object> customSettings;
318+
private Boolean allowLazyOpen;
306319

307320
/**
308321
* New {@link JobUpdate.Builder} object for the existing job
@@ -446,9 +459,15 @@ public Builder setCustomSettings(Map<String, Object> customSettings) {
446459
return this;
447460
}
448461

462+
public Builder setAllowLazyOpen(boolean allowLazyOpen) {
463+
this.allowLazyOpen = allowLazyOpen;
464+
return this;
465+
}
466+
449467
public JobUpdate build() {
450468
return new JobUpdate(jobId, groups, description, detectorUpdates, modelPlotConfig, analysisLimits, backgroundPersistInterval,
451-
renormalizationWindowDays, resultsRetentionDays, modelSnapshotRetentionDays, categorizationFilters, customSettings);
469+
renormalizationWindowDays, resultsRetentionDays, modelSnapshotRetentionDays, categorizationFilters, customSettings,
470+
allowLazyOpen);
452471
}
453472
}
454473
}

client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/DataFrameAnalyticsConfigTests.java

+3
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ public static DataFrameAnalyticsConfig randomDataFrameAnalyticsConfig() {
6666
if (randomBoolean()) {
6767
builder.setVersion(Version.CURRENT);
6868
}
69+
if (randomBoolean()) {
70+
builder.setAllowLazyStart(randomBoolean());
71+
}
6972
return builder.build();
7073
}
7174

client/rest-high-level/src/test/java/org/elasticsearch/client/ml/job/config/JobTests.java

+3
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,9 @@ public static Job.Builder createRandomizedJobBuilder() {
159159
if (randomBoolean()) {
160160
builder.setDeleting(randomBoolean());
161161
}
162+
if (randomBoolean()) {
163+
builder.setAllowLazyOpen(randomBoolean());
164+
}
162165
return builder;
163166
}
164167

client/rest-high-level/src/test/java/org/elasticsearch/client/ml/job/config/JobUpdateTests.java

+3
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ public static JobUpdate createRandom(String jobId) {
7979
if (randomBoolean()) {
8080
update.setCustomSettings(Collections.singletonMap(randomAlphaOfLength(10), randomAlphaOfLength(10)));
8181
}
82+
if (randomBoolean()) {
83+
update.setAllowLazyOpen(randomBoolean());
84+
}
8285

8386
return update.build();
8487
}

docs/reference/ml/anomaly-detection/apis/get-job.asciidoc

+2-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,8 @@ The API returns the following results:
122122
"time_format": "epoch_ms"
123123
},
124124
"model_snapshot_retention_days": 1,
125-
"results_index_name": "shared"
125+
"results_index_name": "shared",
126+
"allow_lazy_open": false
126127
}
127128
]
128129
}

docs/reference/ml/anomaly-detection/apis/jobresource.asciidoc

+13
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,19 @@ so do not set the `background_persist_interval` value too low.
9595
deleted from Elasticsearch. The default value is null, which means results
9696
are retained.
9797

98+
`allow_lazy_open`::
99+
(boolean) Advanced configuration option.
100+
Whether this job should be allowed to open when there is insufficient
101+
{ml} node capacity for it to be immediately assigned to a node.
102+
The default is `false`, which means that the <<ml-open-job>>
103+
will return an error if a {ml} node with capacity to run the
104+
job cannot immediately be found. (However, this is also subject to
105+
the cluster-wide `xpack.ml.max_lazy_ml_nodes` setting - see
106+
<<advanced-ml-settings>>.) If this option is set to `true` then
107+
the <<ml-open-job>> will not return an error, and the job will
108+
wait in the `opening` state until sufficient {ml} node capacity
109+
is available.
110+
98111
[[ml-analysisconfig]]
99112
==== Analysis Configuration Objects
100113

docs/reference/ml/anomaly-detection/apis/put-job.asciidoc

+2-1
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,8 @@ When the job is created, you receive the following results:
149149
"time_format" : "epoch_ms"
150150
},
151151
"model_snapshot_retention_days" : 1,
152-
"results_index_name" : "shared"
152+
"results_index_name" : "shared",
153+
"allow_lazy_open" : false
153154
}
154155
----
155156
// TESTRESPONSE[s/"job_version" : "8.0.0"/"job_version" : $body.job_version/]

0 commit comments

Comments
 (0)