@@ -813,6 +813,71 @@ public void testStartDeployment_TooManyAllocations() throws IOException {
813
813
assertThat (EntityUtils .toString (response .getEntity ()), not (containsString ("deployment_stats" )));
814
814
}
815
815
816
+ @ SuppressWarnings ("unchecked" )
817
+ public void testStartDeployment_GivenNoProcessorsLeft_AndLazyStartEnabled () throws Exception {
818
+ // We start 2 models. The first needs so many allocations it won't possibly
819
+ // get them all. This would leave no space to allocate the second model at all.
820
+
821
+ // Enable lazy starting so that the deployments start even if they cannot get fully allocated.
822
+ // The setting is cleared in the cleanup method of these tests.
823
+ Request loggingSettings = new Request ("PUT" , "_cluster/settings" );
824
+ loggingSettings .setJsonEntity ("""
825
+ {"persistent" : {
826
+ "xpack.ml.max_lazy_ml_nodes": 5
827
+ }}""" );
828
+ client ().performRequest (loggingSettings );
829
+
830
+ String modelId1 = "model_1" ;
831
+ createTrainedModel (modelId1 );
832
+ putModelDefinition (modelId1 );
833
+ putVocabulary (List .of ("these" , "are" , "my" , "words" ), modelId1 );
834
+
835
+ String modelId2 = "model_2" ;
836
+ createTrainedModel (modelId2 );
837
+ putModelDefinition (modelId2 );
838
+ putVocabulary (List .of ("these" , "are" , "my" , "words" ), modelId2 );
839
+
840
+ startDeployment (modelId1 , AllocationStatus .State .STARTED .toString (), 100 , 1 );
841
+
842
+ {
843
+ Request request = new Request (
844
+ "POST" ,
845
+ "/_ml/trained_models/"
846
+ + modelId2
847
+ + "/deployment/_start?timeout=40s&wait_for=starting&"
848
+ + "number_of_allocations=4&threads_per_allocation=2&queue_capacity=500&cache_size=100Kb"
849
+ );
850
+ client ().performRequest (request );
851
+ }
852
+
853
+ // Check second model did not get any allocations
854
+ assertAllocationCount (modelId2 , 0 );
855
+
856
+ // Verify stats shows model is starting and deployment settings are present
857
+ {
858
+ Response statsResponse = getTrainedModelStats (modelId2 );
859
+ var responseMap = entityAsMap (statsResponse );
860
+ List <Map <String , Object >> stats = (List <Map <String , Object >>) responseMap .get ("trained_model_stats" );
861
+ assertThat (stats , hasSize (1 ));
862
+ String statusState = (String ) XContentMapValues .extractValue ("deployment_stats.allocation_status.state" , stats .get (0 ));
863
+ assertThat (statusState , equalTo ("starting" ));
864
+ int numberOfAllocations = (int ) XContentMapValues .extractValue ("deployment_stats.number_of_allocations" , stats .get (0 ));
865
+ assertThat (numberOfAllocations , equalTo (4 ));
866
+ int threadsPerAllocation = (int ) XContentMapValues .extractValue ("deployment_stats.threads_per_allocation" , stats .get (0 ));
867
+ assertThat (threadsPerAllocation , equalTo (2 ));
868
+ int queueCapacity = (int ) XContentMapValues .extractValue ("deployment_stats.queue_capacity" , stats .get (0 ));
869
+ assertThat (queueCapacity , equalTo (500 ));
870
+ ByteSizeValue cacheSize = ByteSizeValue .parseBytesSizeValue (
871
+ (String ) XContentMapValues .extractValue ("deployment_stats.cache_size" , stats .get (0 )),
872
+ "cache_size)"
873
+ );
874
+ assertThat (cacheSize , equalTo (ByteSizeValue .ofKb (100 )));
875
+ }
876
+
877
+ stopDeployment (modelId1 );
878
+ stopDeployment (modelId2 );
879
+ }
880
+
816
881
@ SuppressWarnings ("unchecked" )
817
882
private void assertAllocationCount (String modelId , int expectedAllocationCount ) throws IOException {
818
883
Response response = getTrainedModelStats (modelId );
0 commit comments