Skip to content

Commit 77aa8c0

Browse files
[ML] Include start params in _stats for non-started model deployments (#89091)
Adds the missing start parameters to the _stats API response for non-started deployments.
1 parent 4e1a063 commit 77aa8c0

File tree

3 files changed

+81
-1
lines changed

3 files changed

+81
-1
lines changed

docs/changelog/89091.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 89091
2+
summary: Include start params in `_stats` for non-started model deployments
3+
area: Machine Learning
4+
type: bug
5+
issues: []

x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -813,6 +813,71 @@ public void testStartDeployment_TooManyAllocations() throws IOException {
813813
assertThat(EntityUtils.toString(response.getEntity()), not(containsString("deployment_stats")));
814814
}
815815

816+
@SuppressWarnings("unchecked")
817+
public void testStartDeployment_GivenNoProcessorsLeft_AndLazyStartEnabled() throws Exception {
818+
// We start 2 models. The first needs so many allocations it won't possibly
819+
// get them all. This would leave no space to allocate the second model at all.
820+
821+
// Enable lazy starting so that the deployments start even if they cannot get fully allocated.
822+
// The setting is cleared in the cleanup method of these tests.
823+
Request loggingSettings = new Request("PUT", "_cluster/settings");
824+
loggingSettings.setJsonEntity("""
825+
{"persistent" : {
826+
"xpack.ml.max_lazy_ml_nodes": 5
827+
}}""");
828+
client().performRequest(loggingSettings);
829+
830+
String modelId1 = "model_1";
831+
createTrainedModel(modelId1);
832+
putModelDefinition(modelId1);
833+
putVocabulary(List.of("these", "are", "my", "words"), modelId1);
834+
835+
String modelId2 = "model_2";
836+
createTrainedModel(modelId2);
837+
putModelDefinition(modelId2);
838+
putVocabulary(List.of("these", "are", "my", "words"), modelId2);
839+
840+
startDeployment(modelId1, AllocationStatus.State.STARTED.toString(), 100, 1);
841+
842+
{
843+
Request request = new Request(
844+
"POST",
845+
"/_ml/trained_models/"
846+
+ modelId2
847+
+ "/deployment/_start?timeout=40s&wait_for=starting&"
848+
+ "number_of_allocations=4&threads_per_allocation=2&queue_capacity=500&cache_size=100Kb"
849+
);
850+
client().performRequest(request);
851+
}
852+
853+
// Check second model did not get any allocations
854+
assertAllocationCount(modelId2, 0);
855+
856+
// Verify stats shows model is starting and deployment settings are present
857+
{
858+
Response statsResponse = getTrainedModelStats(modelId2);
859+
var responseMap = entityAsMap(statsResponse);
860+
List<Map<String, Object>> stats = (List<Map<String, Object>>) responseMap.get("trained_model_stats");
861+
assertThat(stats, hasSize(1));
862+
String statusState = (String) XContentMapValues.extractValue("deployment_stats.allocation_status.state", stats.get(0));
863+
assertThat(statusState, equalTo("starting"));
864+
int numberOfAllocations = (int) XContentMapValues.extractValue("deployment_stats.number_of_allocations", stats.get(0));
865+
assertThat(numberOfAllocations, equalTo(4));
866+
int threadsPerAllocation = (int) XContentMapValues.extractValue("deployment_stats.threads_per_allocation", stats.get(0));
867+
assertThat(threadsPerAllocation, equalTo(2));
868+
int queueCapacity = (int) XContentMapValues.extractValue("deployment_stats.queue_capacity", stats.get(0));
869+
assertThat(queueCapacity, equalTo(500));
870+
ByteSizeValue cacheSize = ByteSizeValue.parseBytesSizeValue(
871+
(String) XContentMapValues.extractValue("deployment_stats.cache_size", stats.get(0)),
872+
"cache_size)"
873+
);
874+
assertThat(cacheSize, equalTo(ByteSizeValue.ofKb(100)));
875+
}
876+
877+
stopDeployment(modelId1);
878+
stopDeployment(modelId2);
879+
}
880+
816881
@SuppressWarnings("unchecked")
817882
private void assertAllocationCount(String modelId, int expectedAllocationCount) throws IOException {
818883
Response response = getTrainedModelStats(modelId);

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportGetDeploymentStatsAction.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,17 @@ static GetDeploymentStatsAction.Response addFailedRoutes(
269269

270270
nodeStats.sort(Comparator.comparing(n -> n.getNode().getId()));
271271

272-
updatedAssignmentStats.add(new AssignmentStats(modelId, null, null, null, null, assignment.getStartTime(), nodeStats));
272+
updatedAssignmentStats.add(
273+
new AssignmentStats(
274+
modelId,
275+
assignment.getTaskParams().getThreadsPerAllocation(),
276+
assignment.getTaskParams().getNumberOfAllocations(),
277+
assignment.getTaskParams().getQueueCapacity(),
278+
assignment.getTaskParams().getCacheSize().orElse(null),
279+
assignment.getStartTime(),
280+
nodeStats
281+
)
282+
);
273283
}
274284
}
275285

0 commit comments

Comments
 (0)