Server: add test for num slots

JohannesGaessler · JohannesGaessler · commit 3e8054e0545e · 2024-04-27T23:34:47.000+02:00
diff --git a/examples/server/tests/features/results.feature b/examples/server/tests/features/results.feature
@@ -10,6 +10,40 @@ Feature: Results
     And   256 KV cache size
     And   128 max tokens to predict
 
+  Scenario Outline: num slots invariant
+    Given 1 slots
+    # And   0 as draft
+    And   128 KV cache size
+    And   continuous batching
+    Then  the server is starting
+    Then  the server is healthy
+
+    Given 1 prompts "Write a very long story about AI." with seed 42
+    And   concurrent completion requests
+
+    Then  the server is busy
+    Then  the server is idle
+    And   all slots are idle
+
+    Given <n_slots> slots
+    And   <n_kv> KV cache size
+    Then  the server is starting
+    Then  the server is healthy
+
+    Given 8 prompts "Write a very long story about AI." with seed 42
+    And   concurrent completion requests
+
+    Then the server is busy
+    Then the server is idle
+    And  all slots are idle
+    Then all predictions are equal
+    Examples:
+      | n_slots | n_kv |
+      |  1      |  128 |
+      |  2      |  256 |
+      |  4      |  512 |
+      |  8      | 1024 |
+
   Scenario Outline: Multi users completion
     Given <n_slots> slots
     And   continuous batching
diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py
@@ -3,6 +3,7 @@
 import json
 import os
 import re
+import signal
 import socket
 import subprocess
 import sys
@@ -457,6 +458,14 @@ def step_a_prompt_prompt(context, prompt):
     context.n_prompts = len(context.prompts)
 
 
+@step('{num_prompts:d} prompts {prompt} with seed {seed:d}')
+def step_many_prompts(context, num_prompts, prompt, seed):
+    for _ in range(num_prompts):
+        context.seed = seed
+        context.prompts.append(prompt)
+    context.n_prompts = len(context.prompts)
+
+
 @step('concurrent completion requests')
 @async_run_until_complete()
 async def step_concurrent_completion_requests(context):
@@ -1223,6 +1232,15 @@ def start_server_background(context):
         'stdout': subprocess.PIPE,
         'stderr': subprocess.PIPE
     }
+
+    # Shut down previous server if there is one:
+    if context.server_process is not None:
+        if os.name == 'nt':
+            interrupt = signal.CTRL_C_EVENT
+        else:
+            interrupt = signal.SIGINT
+        context.server_process.send_signal(interrupt)
+
     context.server_process = subprocess.Popen(
         [str(arg) for arg in [context.server_path, *server_args]],
         **pkwargs)