Skip to content

Commit f7b2638

Browse files
authored
[ML] Improve messages related to assigning machine learning jobs (elastic#69752) (elastic#69905)
1 parent 32b6f3d commit f7b2638

File tree

4 files changed

+26
-26
lines changed

4 files changed

+26
-26
lines changed

x-pack/plugin/ml/src/internalClusterTest/java/org/elasticsearch/xpack/ml/integration/TooManyJobsIT.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,13 +175,13 @@ private void verifyMaxNumberOfJobsLimit(int numNodes, int maxNumberOfJobsPerNode
175175
if (expectMemoryLimitBeforeCountLimit) {
176176
int expectedJobsAlreadyOpenOnNode = (i - 1) / numNodes;
177177
assertTrue(detailedMessage,
178-
detailedMessage.endsWith("because this node has insufficient available memory. Available memory for ML [" +
178+
detailedMessage.endsWith("node has insufficient available memory. Available memory for ML [" +
179179
maxMlMemoryPerNode + "], memory required by existing jobs [" +
180180
(expectedJobsAlreadyOpenOnNode * memoryFootprintPerJob) + "], estimated memory required for this job [" +
181-
memoryFootprintPerJob + "]]"));
181+
memoryFootprintPerJob + "].]"));
182182
} else {
183-
assertTrue(detailedMessage, detailedMessage.endsWith("because this node is full. Number of opened jobs [" +
184-
maxNumberOfJobsPerNode + "], xpack.ml.max_open_jobs [" + maxNumberOfJobsPerNode + "]]"));
183+
assertTrue(detailedMessage, detailedMessage.endsWith("node is full. Number of opened jobs [" +
184+
maxNumberOfJobsPerNode + "], xpack.ml.max_open_jobs [" + maxNumberOfJobsPerNode + "].]"));
185185
}
186186
logger.info("good news everybody --> reached maximum number of allowed opened jobs, after trying to open the {}th job", i);
187187

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/JobNodeSelector.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ public class JobNodeSelector {
6060
private static String createReason(String job, String node, String msg, Object... params) {
6161
String preamble = String.format(
6262
Locale.ROOT,
63-
"Not opening job [%s] on node [%s], because ",
63+
"Not opening job [%s] on node [%s]. Reason: ",
6464
job,
6565
node);
6666
return preamble + ParameterizedMessage.format(msg, params);
@@ -94,7 +94,7 @@ public JobNodeSelector(ClusterState clusterState,
9494
if (MachineLearning.isMlNode(node)) {
9595
return (nodeFilter != null) ? nodeFilter.apply(node) : null;
9696
}
97-
return createReason(jobId, nodeNameOrId(node), "this node isn't a ml node.");
97+
return createReason(jobId, nodeNameOrId(node), "This node isn't a machine learning node.");
9898
};
9999
}
100100

@@ -181,7 +181,7 @@ public PersistentTasksCustomMetadata.Assignment selectNode(int dynamicMaxOpenJob
181181
if (currentLoad.getNumAllocatingJobs() >= maxConcurrentJobAllocations) {
182182
reason = createReason(jobId,
183183
nodeNameAndMlAttributes(node),
184-
"node exceeds [{}] the maximum number of jobs [{}] in opening state",
184+
"Node exceeds [{}] the maximum number of jobs [{}] in opening state.",
185185
currentLoad.getNumAllocatingJobs(),
186186
maxConcurrentJobAllocations);
187187
logger.trace(reason);
@@ -193,7 +193,7 @@ public PersistentTasksCustomMetadata.Assignment selectNode(int dynamicMaxOpenJob
193193
if (availableCount == 0) {
194194
reason = createReason(jobId,
195195
nodeNameAndMlAttributes(node),
196-
"this node is full. Number of opened jobs [{}], {} [{}]",
196+
"This node is full. Number of opened jobs [{}], {} [{}].",
197197
currentLoad.getNumAssignedJobs(),
198198
MAX_OPEN_JOBS_PER_NODE.getKey(),
199199
maxNumberOfOpenJobs);
@@ -220,9 +220,9 @@ public PersistentTasksCustomMetadata.Assignment selectNode(int dynamicMaxOpenJob
220220
if (estimatedMemoryFootprint > availableMemory) {
221221
reason = createReason(jobId,
222222
nodeNameAndMlAttributes(node),
223-
"this node has insufficient available memory. Available memory for ML [{} ({})], "
223+
"This node has insufficient available memory. Available memory for ML [{} ({})], "
224224
+ "memory required by existing jobs [{} ({})], "
225-
+ "estimated memory required for this job [{} ({})]",
225+
+ "estimated memory required for this job [{} ({})].",
226226
currentLoad.getMaxMlMemory(),
227227
ByteSizeValue.ofBytes(currentLoad.getMaxMlMemory()).toString(),
228228
currentLoad.getAssignedJobMemory(),

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsActionTests.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,9 @@ public void testGetAssignment_NoMlNodes() {
9393
assertThat(
9494
assignment.getExplanation(),
9595
allOf(
96-
containsString("Not opening job [data_frame_id] on node [_node_name0], because this node isn't a ml node."),
97-
containsString("Not opening job [data_frame_id] on node [_node_name1], because this node isn't a ml node."),
98-
containsString("Not opening job [data_frame_id] on node [_node_name2], because this node isn't a ml node.")));
96+
containsString("Not opening job [data_frame_id] on node [_node_name0]. Reason: This node isn't a machine learning node."),
97+
containsString("Not opening job [data_frame_id] on node [_node_name1]. Reason: This node isn't a machine learning node."),
98+
containsString("Not opening job [data_frame_id] on node [_node_name2]. Reason: This node isn't a machine learning node.")));
9999
}
100100

101101
// Cannot assign the node because none of the existing nodes is appropriate:

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/JobNodeSelectorTests.java

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ public void testSelectLeastLoadedMlNodeForAnomalyDetectorJob_maxCapacityCountLim
159159
isMemoryTrackerRecentlyRefreshed,
160160
false);
161161
assertNull(result.getExecutorNode());
162-
assertThat(result.getExplanation(), containsString("because this node is full. Number of opened jobs ["
162+
assertThat(result.getExplanation(), containsString("node is full. Number of opened jobs ["
163163
+ maxRunningJobsPerNode + "], xpack.ml.max_open_jobs [" + maxRunningJobsPerNode + "]"));
164164
}
165165

@@ -187,7 +187,7 @@ public void testSelectLeastLoadedMlNodeForDataFrameAnalyticsJob_maxCapacityCount
187187
isMemoryTrackerRecentlyRefreshed,
188188
false);
189189
assertNull(result.getExecutorNode());
190-
assertThat(result.getExplanation(), containsString("because this node is full. Number of opened jobs ["
190+
assertThat(result.getExplanation(), containsString("node is full. Number of opened jobs ["
191191
+ maxRunningJobsPerNode + "], xpack.ml.max_open_jobs [" + maxRunningJobsPerNode + "]"));
192192
}
193193

@@ -220,7 +220,7 @@ public void testSelectLeastLoadedMlNodeForAnomalyDetectorJob_maxCapacityMemoryLi
220220
isMemoryTrackerRecentlyRefreshed,
221221
false);
222222
assertNull(result.getExecutorNode());
223-
assertThat(result.getExplanation(), containsString("because this node has insufficient available memory. "
223+
assertThat(result.getExplanation(), containsString("node has insufficient available memory. "
224224
+ "Available memory for ML [" + currentlyRunningJobMemory + " (" + ByteSizeValue.ofBytes(currentlyRunningJobMemory)
225225
+ ")], memory required by existing jobs ["
226226
+ currentlyRunningJobMemory + " (" + ByteSizeValue.ofBytes(currentlyRunningJobMemory)
@@ -277,7 +277,7 @@ public void testSelectLeastLoadedMlNodeForAnomalyDetectorJob_firstJobTooBigMemor
277277
isMemoryTrackerRecentlyRefreshed,
278278
false);
279279
assertNull(result.getExecutorNode());
280-
assertThat(result.getExplanation(), containsString("because this node has insufficient available memory. "
280+
assertThat(result.getExplanation(), containsString("node has insufficient available memory. "
281281
+ "Available memory for ML [" + (firstJobTotalMemory - 1) + " (" + ByteSizeValue.ofBytes((firstJobTotalMemory - 1))
282282
+ ")], memory required by existing jobs [0 (0b)], estimated memory required for this job ["
283283
+ firstJobTotalMemory + " (" + ByteSizeValue.ofBytes(firstJobTotalMemory) + ")]"));
@@ -314,7 +314,7 @@ public void testSelectLeastLoadedMlNodeForDataFrameAnalyticsJob_maxCapacityMemor
314314
isMemoryTrackerRecentlyRefreshed,
315315
false);
316316
assertNull(result.getExecutorNode());
317-
assertThat(result.getExplanation(), containsString("because this node has insufficient available memory. "
317+
assertThat(result.getExplanation(), containsString("node has insufficient available memory. "
318318
+ "Available memory for ML [" + currentlyRunningJobMemory + " (" + ByteSizeValue.ofBytes(currentlyRunningJobMemory)
319319
+")], memory required by existing jobs [" + currentlyRunningJobMemory + " (" + ByteSizeValue.ofBytes(currentlyRunningJobMemory)
320320
+")], estimated memory required for this job [" + JOB_MEMORY_REQUIREMENT.getBytes() + " ("
@@ -347,7 +347,7 @@ public void testSelectLeastLoadedMlNodeForDataFrameAnalyticsJob_firstJobTooBigMe
347347
isMemoryTrackerRecentlyRefreshed,
348348
false);
349349
assertNull(result.getExecutorNode());
350-
assertThat(result.getExplanation(), containsString("because this node has insufficient available memory. "
350+
assertThat(result.getExplanation(), containsString("node has insufficient available memory. "
351351
+ "Available memory for ML [" + (firstJobTotalMemory - 1) + " (" + ByteSizeValue.ofBytes(firstJobTotalMemory - 1)
352352
+ ")], memory required by existing jobs [0 (0b)], estimated memory required for this job ["
353353
+ firstJobTotalMemory + " (" + ByteSizeValue.ofBytes(firstJobTotalMemory) + ")]"));
@@ -382,7 +382,7 @@ public void testSelectLeastLoadedMlNode_noMlNodes() {
382382
MAX_JOB_BYTES,
383383
isMemoryTrackerRecentlyRefreshed,
384384
false);
385-
assertTrue(result.getExplanation().contains("because this node isn't a ml node"));
385+
assertTrue(result.getExplanation().contains("node isn't a machine learning node"));
386386
assertNull(result.getExecutorNode());
387387
}
388388

@@ -447,7 +447,7 @@ public void testSelectLeastLoadedMlNode_maxConcurrentOpeningJobs() {
447447
isMemoryTrackerRecentlyRefreshed,
448448
false);
449449
assertNull("no node selected, because OPENING state", result.getExecutorNode());
450-
assertTrue(result.getExplanation().contains("because node exceeds [2] the maximum number of jobs [2] in opening state"));
450+
assertTrue(result.getExplanation().contains("Node exceeds [2] the maximum number of jobs [2] in opening state"));
451451

452452
tasksBuilder = PersistentTasksCustomMetadata.builder(tasks);
453453
tasksBuilder.reassignTask(MlTasks.jobTaskId(job6.getId()),
@@ -461,7 +461,7 @@ public void testSelectLeastLoadedMlNode_maxConcurrentOpeningJobs() {
461461
node -> nodeFilter(node, job7));
462462
result = jobNodeSelector.selectNode(10, 2, 30, MAX_JOB_BYTES, isMemoryTrackerRecentlyRefreshed, false);
463463
assertNull("no node selected, because stale task", result.getExecutorNode());
464-
assertTrue(result.getExplanation().contains("because node exceeds [2] the maximum number of jobs [2] in opening state"));
464+
assertTrue(result.getExplanation().contains("Node exceeds [2] the maximum number of jobs [2] in opening state"));
465465

466466
tasksBuilder = PersistentTasksCustomMetadata.builder(tasks);
467467
tasksBuilder.updateTaskState(MlTasks.jobTaskId(job6.getId()), null);
@@ -475,7 +475,7 @@ public void testSelectLeastLoadedMlNode_maxConcurrentOpeningJobs() {
475475
node -> nodeFilter(node, job7));
476476
result = jobNodeSelector.selectNode(10, 2, 30, MAX_JOB_BYTES, isMemoryTrackerRecentlyRefreshed, false);
477477
assertNull("no node selected, because null state", result.getExecutorNode());
478-
assertTrue(result.getExplanation().contains("because node exceeds [2] the maximum number of jobs [2] in opening state"));
478+
assertTrue(result.getExplanation().contains("Node exceeds [2] the maximum number of jobs [2] in opening state"));
479479
}
480480

481481
public void testSelectLeastLoadedMlNode_concurrentOpeningJobsAndStaleFailedJob() {
@@ -537,7 +537,7 @@ public void testSelectLeastLoadedMlNode_concurrentOpeningJobsAndStaleFailedJob()
537537
node -> nodeFilter(node, job8));
538538
result = jobNodeSelector.selectNode(10, 2, 30, MAX_JOB_BYTES, isMemoryTrackerRecentlyRefreshed, false);
539539
assertNull("no node selected, because OPENING state", result.getExecutorNode());
540-
assertTrue(result.getExplanation().contains("because node exceeds [2] the maximum number of jobs [2] in opening state"));
540+
assertTrue(result.getExplanation().contains("Node exceeds [2] the maximum number of jobs [2] in opening state"));
541541
}
542542

543543
public void testSelectLeastLoadedMlNode_noCompatibleJobTypeNodes() {
@@ -576,7 +576,7 @@ public void testSelectLeastLoadedMlNode_noCompatibleJobTypeNodes() {
576576
MAX_JOB_BYTES,
577577
isMemoryTrackerRecentlyRefreshed,
578578
false);
579-
assertThat(result.getExplanation(), containsString("because this node does not support jobs of type [incompatible_type]"));
579+
assertThat(result.getExplanation(), containsString("node does not support jobs of type [incompatible_type]"));
580580
assertNull(result.getExecutorNode());
581581
}
582582

@@ -614,7 +614,7 @@ public void testSelectLeastLoadedMlNode_noNodesMatchingModelSnapshotMinVersion()
614614
isMemoryTrackerRecentlyRefreshed,
615615
false);
616616
assertThat(result.getExplanation(), containsString(
617-
"because the job's model snapshot requires a node of version [6.3.0] or higher"));
617+
"job's model snapshot requires a node of version [6.3.0] or higher"));
618618
assertNull(result.getExecutorNode());
619619
}
620620

0 commit comments

Comments
 (0)