Skip to content

Commit 4bfd076

Browse files
authored
[ML] Fix monitoring if orphaned anomaly detector persistent tasks exist (#57235)
Since #51888 the ML job stats endpoint has returned entries for jobs that have a persistent task but not job config. Such orphaned tasks caused monitoring to fail. This change ignores any such corrupt jobs for monitoring purposes.
1 parent 80778f5 commit 4bfd076

File tree

2 files changed

+45
-1
lines changed

2 files changed

+45
-1
lines changed

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningUsageTransportAction.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,9 +184,14 @@ private void addJobsUsage(GetJobsStatsAction.Response response, List<Job> jobs,
184184
Map<String, Long> allJobsCreatedBy = jobs.stream().map(this::jobCreatedBy)
185185
.collect(Collectors.groupingBy(item -> item, Collectors.counting()));;
186186
for (GetJobsStatsAction.Response.JobStats jobStats : jobsStats) {
187-
ModelSizeStats modelSizeStats = jobStats.getModelSizeStats();
188187
Job job = jobMap.get(jobStats.getJobId());
188+
if (job == null) {
189+
// It's possible we can get job stats without a corresponding job config, if a
190+
// persistent task is orphaned. Omit these corrupt jobs from the usage info.
191+
continue;
192+
}
189193
int detectorsCount = job.getAnalysisConfig().getDetectors().size();
194+
ModelSizeStats modelSizeStats = jobStats.getModelSizeStats();
190195
double modelSize = modelSizeStats == null ? 0.0
191196
: jobStats.getModelSizeStats().getModelBytes();
192197

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,45 @@ public void testUsage() throws Exception {
327327
}
328328
}
329329

330+
public void testUsageWithOrphanedTask() throws Exception {
331+
when(licenseState.isAllowed(XPackLicenseState.Feature.MACHINE_LEARNING)).thenReturn(true);
332+
Settings.Builder settings = Settings.builder().put(commonSettings);
333+
settings.put("xpack.ml.enabled", true);
334+
335+
Job opened1 = buildJob("opened1", Collections.singletonList(buildMinDetector("foo")),
336+
Collections.singletonMap("created_by", randomFrom("a-cool-module", "a_cool_module", "a cool module")));
337+
GetJobsStatsAction.Response.JobStats opened1JobStats = buildJobStats("opened1", JobState.OPENED, 100L, 3L);
338+
// NB: we have JobStats but no Job for "opened2"
339+
GetJobsStatsAction.Response.JobStats opened2JobStats = buildJobStats("opened2", JobState.OPENED, 200L, 8L);
340+
Job closed1 = buildJob("closed1", Arrays.asList(buildMinDetector("foo"), buildMinDetector("bar"), buildMinDetector("foobar")));
341+
GetJobsStatsAction.Response.JobStats closed1JobStats = buildJobStats("closed1", JobState.CLOSED, 300L, 0);
342+
givenJobs(Arrays.asList(opened1, closed1), Arrays.asList(opened1JobStats, opened2JobStats, closed1JobStats));
343+
344+
var usageAction = newUsageAction(settings.build());
345+
PlainActionFuture<XPackUsageFeatureResponse> future = new PlainActionFuture<>();
346+
usageAction.masterOperation(null, null, ClusterState.EMPTY_STATE, future);
347+
XPackFeatureSet.Usage usage = future.get().getUsage();
348+
349+
XContentSource source;
350+
try (XContentBuilder builder = XContentFactory.jsonBuilder()) {
351+
usage.toXContent(builder, ToXContent.EMPTY_PARAMS);
352+
source = new XContentSource(builder);
353+
}
354+
355+
// The orphaned job should be excluded from the usage info
356+
assertThat(source.getValue("jobs._all.count"), equalTo(2));
357+
assertThat(source.getValue("jobs._all.detectors.min"), equalTo(1.0));
358+
assertThat(source.getValue("jobs._all.detectors.max"), equalTo(3.0));
359+
assertThat(source.getValue("jobs._all.detectors.total"), equalTo(4.0));
360+
assertThat(source.getValue("jobs._all.detectors.avg"), equalTo(2.0));
361+
assertThat(source.getValue("jobs._all.model_size.min"), equalTo(100.0));
362+
assertThat(source.getValue("jobs._all.model_size.max"), equalTo(300.0));
363+
assertThat(source.getValue("jobs._all.model_size.total"), equalTo(400.0));
364+
assertThat(source.getValue("jobs._all.model_size.avg"), equalTo(200.0));
365+
assertThat(source.getValue("jobs._all.created_by.a_cool_module"), equalTo(1));
366+
assertThat(source.getValue("jobs._all.created_by.unknown"), equalTo(1));
367+
}
368+
330369
public void testUsageDisabledML() throws Exception {
331370
when(licenseState.isAllowed(XPackLicenseState.Feature.MACHINE_LEARNING)).thenReturn(true);
332371
Settings.Builder settings = Settings.builder().put(commonSettings);

0 commit comments

Comments
 (0)