@@ -301,6 +301,104 @@ run_serving_tests() {
301
301
kill_gpu_processes
302
302
}
303
303
304
+ run_genai_perf_tests () {
305
+ # run genai-perf tests
306
+
307
+ # $1: a json file specifying genai-perf test cases
308
+ local genai_perf_test_file
309
+ genai_perf_test_file=$1
310
+
311
+ # Iterate over genai-perf tests
312
+ jq -c ' .[]' " $genai_perf_test_file " | while read -r params; do
313
+ # get the test name, and append the GPU type back to it.
314
+ test_name=$( echo " $params " | jq -r ' .test_name' )
315
+
316
+ # if TEST_SELECTOR is set, only run the test cases that match the selector
317
+ if [[ -n " $TEST_SELECTOR " ]] && [[ ! " $test_name " =~ $TEST_SELECTOR ]]; then
318
+ echo " Skip test case $test_name ."
319
+ continue
320
+ fi
321
+
322
+ # prepend the current serving engine to the test name
323
+ test_name=${CURRENT_LLM_SERVING_ENGINE} _${test_name}
324
+
325
+ # get common parameters
326
+ common_params=$( echo " $params " | jq -r ' .common_parameters' )
327
+ model=$( echo " $common_params " | jq -r ' .model' )
328
+ tp=$( echo " $common_params " | jq -r ' .tp' )
329
+ dataset_name=$( echo " $common_params " | jq -r ' .dataset_name' )
330
+ dataset_path=$( echo " $common_params " | jq -r ' .dataset_path' )
331
+ port=$( echo " $common_params " | jq -r ' .port' )
332
+ num_prompts=$( echo " $common_params " | jq -r ' .num_prompts' )
333
+ reuse_server=$( echo " $common_params " | jq -r ' .reuse_server' )
334
+
335
+ # get client and server arguments
336
+ server_params=$( echo " $params " | jq -r " .${CURRENT_LLM_SERVING_ENGINE} _server_parameters" )
337
+ qps_list=$( echo " $params " | jq -r ' .qps_list' )
338
+ qps_list=$( echo " $qps_list " | jq -r ' .[] | @sh' )
339
+ echo " Running over qps list $qps_list "
340
+
341
+ # check if there is enough GPU to run the test
342
+ if [[ $gpu_count -lt $tp ]]; then
343
+ echo " Required num-shard $tp but only $gpu_count GPU found. Skip testcase $test_name ."
344
+ continue
345
+ fi
346
+
347
+ if [[ $reuse_server == " true" ]]; then
348
+ echo " Reuse previous server for test case $test_name "
349
+ else
350
+ kill_gpu_processes
351
+ bash " $VLLM_SOURCE_CODE_LOC /.buildkite/nightly-benchmarks/scripts/launch-server.sh" \
352
+ " $server_params " " $common_params "
353
+ fi
354
+
355
+ if wait_for_server; then
356
+ echo " "
357
+ echo " $CURRENT_LLM_SERVING_ENGINE server is up and running."
358
+ else
359
+ echo " "
360
+ echo " $CURRENT_LLM_SERVING_ENGINE failed to start within the timeout period."
361
+ break
362
+ fi
363
+
364
+ # iterate over different QPS
365
+ for qps in $qps_list ; do
366
+ # remove the surrounding single quote from qps
367
+ if [[ " $qps " == * " inf" * ]]; then
368
+ echo " qps was $qps "
369
+ qps=$num_prompts
370
+ echo " now qps is $qps "
371
+ fi
372
+
373
+ new_test_name=$test_name " _qps_" $qps
374
+ backend=$CURRENT_LLM_SERVING_ENGINE
375
+
376
+ if [[ " $backend " == * " vllm" * ]]; then
377
+ backend=" vllm"
378
+ fi
379
+ # TODO: add output dir.
380
+ client_command=" genai-perf profile \
381
+ -m $model \
382
+ --service-kind openai \
383
+ --backend vllm \
384
+ --endpoint-type chat \
385
+ --streaming \
386
+ --url localhost:$port \
387
+ --request-rate $qps \
388
+ --num-prompts $num_prompts \
389
+ "
390
+
391
+ echo " Client command: $client_command "
392
+
393
+ eval " $client_command "
394
+
395
+ # TODO: process/record outputs
396
+ done
397
+ done
398
+
399
+ kill_gpu_processes
400
+
401
+ }
304
402
305
403
prepare_dataset () {
306
404
@@ -328,12 +426,17 @@ main() {
328
426
329
427
pip install -U transformers
330
428
429
+ pip install -r requirements-dev.txt
430
+ which genai-perf
431
+
331
432
# check storage
332
433
df -h
333
434
334
435
ensure_installed wget
335
436
ensure_installed curl
336
437
ensure_installed jq
438
+ # genai-perf dependency
439
+ ensure_installed libb64-0d
337
440
338
441
prepare_dataset
339
442
@@ -345,6 +448,10 @@ main() {
345
448
# run the test
346
449
run_serving_tests " $BENCHMARK_ROOT /tests/nightly-tests.json"
347
450
451
+ # run genai-perf tests
452
+ run_genai_perf_tests " $BENCHMARK_ROOT /tests/genai-perf-tests.json"
453
+ mv artifacts/ $RESULTS_FOLDER /
454
+
348
455
# upload benchmark results to buildkite
349
456
python3 -m pip install tabulate pandas
350
457
python3 " $BENCHMARK_ROOT /scripts/summary-nightly-results.py"
0 commit comments