Skip to content

Commit 3e8054e

Browse files
Server: add test for num slots
1 parent 4dba7e8 commit 3e8054e

File tree

2 files changed

+52
-0
lines changed

2 files changed

+52
-0
lines changed

examples/server/tests/features/results.feature

+34
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,40 @@ Feature: Results
1010
And 256 KV cache size
1111
And 128 max tokens to predict
1212

13+
Scenario Outline: num slots invariant
14+
Given 1 slots
15+
# And 0 as draft
16+
And 128 KV cache size
17+
And continuous batching
18+
Then the server is starting
19+
Then the server is healthy
20+
21+
Given 1 prompts "Write a very long story about AI." with seed 42
22+
And concurrent completion requests
23+
24+
Then the server is busy
25+
Then the server is idle
26+
And all slots are idle
27+
28+
Given <n_slots> slots
29+
And <n_kv> KV cache size
30+
Then the server is starting
31+
Then the server is healthy
32+
33+
Given 8 prompts "Write a very long story about AI." with seed 42
34+
And concurrent completion requests
35+
36+
Then the server is busy
37+
Then the server is idle
38+
And all slots are idle
39+
Then all predictions are equal
40+
Examples:
41+
| n_slots | n_kv |
42+
| 1 | 128 |
43+
| 2 | 256 |
44+
| 4 | 512 |
45+
| 8 | 1024 |
46+
1347
Scenario Outline: Multi users completion
1448
Given <n_slots> slots
1549
And continuous batching

examples/server/tests/features/steps/steps.py

+18
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import json
44
import os
55
import re
6+
import signal
67
import socket
78
import subprocess
89
import sys
@@ -457,6 +458,14 @@ def step_a_prompt_prompt(context, prompt):
457458
context.n_prompts = len(context.prompts)
458459

459460

461+
@step('{num_prompts:d} prompts {prompt} with seed {seed:d}')
462+
def step_many_prompts(context, num_prompts, prompt, seed):
463+
for _ in range(num_prompts):
464+
context.seed = seed
465+
context.prompts.append(prompt)
466+
context.n_prompts = len(context.prompts)
467+
468+
460469
@step('concurrent completion requests')
461470
@async_run_until_complete()
462471
async def step_concurrent_completion_requests(context):
@@ -1223,6 +1232,15 @@ def start_server_background(context):
12231232
'stdout': subprocess.PIPE,
12241233
'stderr': subprocess.PIPE
12251234
}
1235+
1236+
# Shut down previous server if there is one:
1237+
if context.server_process is not None:
1238+
if os.name == 'nt':
1239+
interrupt = signal.CTRL_C_EVENT
1240+
else:
1241+
interrupt = signal.SIGINT
1242+
context.server_process.send_signal(interrupt)
1243+
12261244
context.server_process = subprocess.Popen(
12271245
[str(arg) for arg in [context.server_path, *server_args]],
12281246
**pkwargs)

0 commit comments

Comments
 (0)