Skip to content

Commit a1ff9c3

Browse files
[ML] Include start params in _stats for non-started model deployments (#89091) (#89092)
Adds the missing start parameters to the _stats API response for non-started deployments.
1 parent a884f56 commit a1ff9c3

File tree

3 files changed

+81
-1
lines changed

3 files changed

+81
-1
lines changed

docs/changelog/89091.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 89091
2+
summary: Include start params in `_stats` for non-started model deployments
3+
area: Machine Learning
4+
type: bug
5+
issues: []

x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,71 @@ public void testStartDeployment_TooManyAllocations() throws IOException {
812812
assertThat(EntityUtils.toString(response.getEntity()), not(containsString("deployment_stats")));
813813
}
814814

815+
@SuppressWarnings("unchecked")
816+
public void testStartDeployment_GivenNoProcessorsLeft_AndLazyStartEnabled() throws Exception {
817+
// We start 2 models. The first needs so many allocations it won't possibly
818+
// get them all. This would leave no space to allocate the second model at all.
819+
820+
// Enable lazy starting so that the deployments start even if they cannot get fully allocated.
821+
// The setting is cleared in the cleanup method of these tests.
822+
Request loggingSettings = new Request("PUT", "_cluster/settings");
823+
loggingSettings.setJsonEntity("""
824+
{"persistent" : {
825+
"xpack.ml.max_lazy_ml_nodes": 5
826+
}}""");
827+
client().performRequest(loggingSettings);
828+
829+
String modelId1 = "model_1";
830+
createTrainedModel(modelId1);
831+
putModelDefinition(modelId1);
832+
putVocabulary(List.of("these", "are", "my", "words"), modelId1);
833+
834+
String modelId2 = "model_2";
835+
createTrainedModel(modelId2);
836+
putModelDefinition(modelId2);
837+
putVocabulary(List.of("these", "are", "my", "words"), modelId2);
838+
839+
startDeployment(modelId1, AllocationStatus.State.STARTED.toString(), 100, 1);
840+
841+
{
842+
Request request = new Request(
843+
"POST",
844+
"/_ml/trained_models/"
845+
+ modelId2
846+
+ "/deployment/_start?timeout=40s&wait_for=starting&"
847+
+ "number_of_allocations=4&threads_per_allocation=2&queue_capacity=500&cache_size=100Kb"
848+
);
849+
client().performRequest(request);
850+
}
851+
852+
// Check second model did not get any allocations
853+
assertAllocationCount(modelId2, 0);
854+
855+
// Verify stats shows model is starting and deployment settings are present
856+
{
857+
Response statsResponse = getTrainedModelStats(modelId2);
858+
var responseMap = entityAsMap(statsResponse);
859+
List<Map<String, Object>> stats = (List<Map<String, Object>>) responseMap.get("trained_model_stats");
860+
assertThat(stats, hasSize(1));
861+
String statusState = (String) XContentMapValues.extractValue("deployment_stats.allocation_status.state", stats.get(0));
862+
assertThat(statusState, equalTo("starting"));
863+
int numberOfAllocations = (int) XContentMapValues.extractValue("deployment_stats.number_of_allocations", stats.get(0));
864+
assertThat(numberOfAllocations, equalTo(4));
865+
int threadsPerAllocation = (int) XContentMapValues.extractValue("deployment_stats.threads_per_allocation", stats.get(0));
866+
assertThat(threadsPerAllocation, equalTo(2));
867+
int queueCapacity = (int) XContentMapValues.extractValue("deployment_stats.queue_capacity", stats.get(0));
868+
assertThat(queueCapacity, equalTo(500));
869+
ByteSizeValue cacheSize = ByteSizeValue.parseBytesSizeValue(
870+
(String) XContentMapValues.extractValue("deployment_stats.cache_size", stats.get(0)),
871+
"cache_size)"
872+
);
873+
assertThat(cacheSize, equalTo(ByteSizeValue.ofKb(100)));
874+
}
875+
876+
stopDeployment(modelId1);
877+
stopDeployment(modelId2);
878+
}
879+
815880
@SuppressWarnings("unchecked")
816881
private void assertAllocationCount(String modelId, int expectedAllocationCount) throws IOException {
817882
Response response = getTrainedModelStats(modelId);

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportGetDeploymentStatsAction.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,17 @@ static GetDeploymentStatsAction.Response addFailedRoutes(
269269

270270
nodeStats.sort(Comparator.comparing(n -> n.getNode().getId()));
271271

272-
updatedAssignmentStats.add(new AssignmentStats(modelId, null, null, null, null, assignment.getStartTime(), nodeStats));
272+
updatedAssignmentStats.add(
273+
new AssignmentStats(
274+
modelId,
275+
assignment.getTaskParams().getThreadsPerAllocation(),
276+
assignment.getTaskParams().getNumberOfAllocations(),
277+
assignment.getTaskParams().getQueueCapacity(),
278+
assignment.getTaskParams().getCacheSize().orElse(null),
279+
assignment.getStartTime(),
280+
nodeStats
281+
)
282+
);
273283
}
274284
}
275285

0 commit comments

Comments
 (0)