Skip to content

Commit 4fa85de

Browse files
authored
[7.8][ML] Fix monitoring if orphaned anomaly detector persistent tasks exist (#57242)
Since #51888 the ML job stats endpoint has returned entries for jobs that have a persistent task but not job config. Such orphaned tasks caused monitoring to fail. This change ignores any such corrupt jobs for monitoring purposes. Backport of #57235
1 parent a5631b4 commit 4fa85de

File tree

2 files changed

+46
-1
lines changed

2 files changed

+46
-1
lines changed

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningFeatureSet.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,9 +215,14 @@ private void addJobsUsage(GetJobsStatsAction.Response response, List<Job> jobs)
215215
Map<String, Long> allJobsCreatedBy = jobs.stream().map(this::jobCreatedBy)
216216
.collect(Collectors.groupingBy(item -> item, Collectors.counting()));;
217217
for (GetJobsStatsAction.Response.JobStats jobStats : jobsStats) {
218-
ModelSizeStats modelSizeStats = jobStats.getModelSizeStats();
219218
Job job = jobMap.get(jobStats.getJobId());
219+
if (job == null) {
220+
// It's possible we can get job stats without a corresponding job config, if a
221+
// persistent task is orphaned. Omit these corrupt jobs from the usage info.
222+
continue;
223+
}
220224
int detectorsCount = job.getAnalysisConfig().getDetectors().size();
225+
ModelSizeStats modelSizeStats = jobStats.getModelSizeStats();
221226
double modelSize = modelSizeStats == null ? 0.0
222227
: jobStats.getModelSizeStats().getModelBytes();
223228

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningFeatureSetTests.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,46 @@ public void testUsage() throws Exception {
337337
}
338338
}
339339

340+
public void testUsageWithOrphanedTask() throws Exception {
341+
when(licenseState.isAllowed(XPackLicenseState.Feature.MACHINE_LEARNING)).thenReturn(true);
342+
Settings.Builder settings = Settings.builder().put(commonSettings);
343+
settings.put("xpack.ml.enabled", true);
344+
345+
Job opened1 = buildJob("opened1", Collections.singletonList(buildMinDetector("foo")),
346+
Collections.singletonMap("created_by", randomFrom("a-cool-module", "a_cool_module", "a cool module")));
347+
GetJobsStatsAction.Response.JobStats opened1JobStats = buildJobStats("opened1", JobState.OPENED, 100L, 3L);
348+
// NB: we have JobStats but no Job for "opened2"
349+
GetJobsStatsAction.Response.JobStats opened2JobStats = buildJobStats("opened2", JobState.OPENED, 200L, 8L);
350+
Job closed1 = buildJob("closed1", Arrays.asList(buildMinDetector("foo"), buildMinDetector("bar"), buildMinDetector("foobar")));
351+
GetJobsStatsAction.Response.JobStats closed1JobStats = buildJobStats("closed1", JobState.CLOSED, 300L, 0);
352+
givenJobs(Arrays.asList(opened1, closed1), Arrays.asList(opened1JobStats, opened2JobStats, closed1JobStats));
353+
354+
MachineLearningFeatureSet featureSet = new MachineLearningFeatureSet(TestEnvironment.newEnvironment(settings.build()),
355+
clusterService, client, licenseState, jobManagerHolder);
356+
PlainActionFuture<Usage> future = new PlainActionFuture<>();
357+
featureSet.usage(future);
358+
XPackFeatureSet.Usage usage = future.get();
359+
360+
XContentSource source;
361+
try (XContentBuilder builder = XContentFactory.jsonBuilder()) {
362+
usage.toXContent(builder, ToXContent.EMPTY_PARAMS);
363+
source = new XContentSource(builder);
364+
}
365+
366+
// The orphaned job should be excluded from the usage info
367+
assertThat(source.getValue("jobs._all.count"), equalTo(2));
368+
assertThat(source.getValue("jobs._all.detectors.min"), equalTo(1.0));
369+
assertThat(source.getValue("jobs._all.detectors.max"), equalTo(3.0));
370+
assertThat(source.getValue("jobs._all.detectors.total"), equalTo(4.0));
371+
assertThat(source.getValue("jobs._all.detectors.avg"), equalTo(2.0));
372+
assertThat(source.getValue("jobs._all.model_size.min"), equalTo(100.0));
373+
assertThat(source.getValue("jobs._all.model_size.max"), equalTo(300.0));
374+
assertThat(source.getValue("jobs._all.model_size.total"), equalTo(400.0));
375+
assertThat(source.getValue("jobs._all.model_size.avg"), equalTo(200.0));
376+
assertThat(source.getValue("jobs._all.created_by.a_cool_module"), equalTo(1));
377+
assertThat(source.getValue("jobs._all.created_by.unknown"), equalTo(1));
378+
}
379+
340380
public void testUsageDisabledML() throws Exception {
341381
when(licenseState.isAllowed(XPackLicenseState.Feature.MACHINE_LEARNING)).thenReturn(true);
342382
Settings.Builder settings = Settings.builder().put(commonSettings);

0 commit comments

Comments
 (0)