@@ -812,6 +812,71 @@ public void testStartDeployment_TooManyAllocations() throws IOException {
812
812
assertThat (EntityUtils .toString (response .getEntity ()), not (containsString ("deployment_stats" )));
813
813
}
814
814
815
+ @ SuppressWarnings ("unchecked" )
816
+ public void testStartDeployment_GivenNoProcessorsLeft_AndLazyStartEnabled () throws Exception {
817
+ // We start 2 models. The first needs so many allocations it won't possibly
818
+ // get them all. This would leave no space to allocate the second model at all.
819
+
820
+ // Enable lazy starting so that the deployments start even if they cannot get fully allocated.
821
+ // The setting is cleared in the cleanup method of these tests.
822
+ Request loggingSettings = new Request ("PUT" , "_cluster/settings" );
823
+ loggingSettings .setJsonEntity ("""
824
+ {"persistent" : {
825
+ "xpack.ml.max_lazy_ml_nodes": 5
826
+ }}""" );
827
+ client ().performRequest (loggingSettings );
828
+
829
+ String modelId1 = "model_1" ;
830
+ createTrainedModel (modelId1 );
831
+ putModelDefinition (modelId1 );
832
+ putVocabulary (List .of ("these" , "are" , "my" , "words" ), modelId1 );
833
+
834
+ String modelId2 = "model_2" ;
835
+ createTrainedModel (modelId2 );
836
+ putModelDefinition (modelId2 );
837
+ putVocabulary (List .of ("these" , "are" , "my" , "words" ), modelId2 );
838
+
839
+ startDeployment (modelId1 , AllocationStatus .State .STARTED .toString (), 100 , 1 );
840
+
841
+ {
842
+ Request request = new Request (
843
+ "POST" ,
844
+ "/_ml/trained_models/"
845
+ + modelId2
846
+ + "/deployment/_start?timeout=40s&wait_for=starting&"
847
+ + "number_of_allocations=4&threads_per_allocation=2&queue_capacity=500&cache_size=100Kb"
848
+ );
849
+ client ().performRequest (request );
850
+ }
851
+
852
+ // Check second model did not get any allocations
853
+ assertAllocationCount (modelId2 , 0 );
854
+
855
+ // Verify stats shows model is starting and deployment settings are present
856
+ {
857
+ Response statsResponse = getTrainedModelStats (modelId2 );
858
+ var responseMap = entityAsMap (statsResponse );
859
+ List <Map <String , Object >> stats = (List <Map <String , Object >>) responseMap .get ("trained_model_stats" );
860
+ assertThat (stats , hasSize (1 ));
861
+ String statusState = (String ) XContentMapValues .extractValue ("deployment_stats.allocation_status.state" , stats .get (0 ));
862
+ assertThat (statusState , equalTo ("starting" ));
863
+ int numberOfAllocations = (int ) XContentMapValues .extractValue ("deployment_stats.number_of_allocations" , stats .get (0 ));
864
+ assertThat (numberOfAllocations , equalTo (4 ));
865
+ int threadsPerAllocation = (int ) XContentMapValues .extractValue ("deployment_stats.threads_per_allocation" , stats .get (0 ));
866
+ assertThat (threadsPerAllocation , equalTo (2 ));
867
+ int queueCapacity = (int ) XContentMapValues .extractValue ("deployment_stats.queue_capacity" , stats .get (0 ));
868
+ assertThat (queueCapacity , equalTo (500 ));
869
+ ByteSizeValue cacheSize = ByteSizeValue .parseBytesSizeValue (
870
+ (String ) XContentMapValues .extractValue ("deployment_stats.cache_size" , stats .get (0 )),
871
+ "cache_size)"
872
+ );
873
+ assertThat (cacheSize , equalTo (ByteSizeValue .ofKb (100 )));
874
+ }
875
+
876
+ stopDeployment (modelId1 );
877
+ stopDeployment (modelId2 );
878
+ }
879
+
815
880
@ SuppressWarnings ("unchecked" )
816
881
private void assertAllocationCount (String modelId , int expectedAllocationCount ) throws IOException {
817
882
Response response = getTrainedModelStats (modelId );
0 commit comments