@@ -358,16 +358,26 @@ run_serving_tests() {
358
358
# run the server
359
359
echo " Running test case $test_name "
360
360
echo " Server command: $server_command "
361
- bash -c " $server_command " &
362
- server_pid=$!
363
-
364
- # wait until the server is alive
365
- if wait_for_server; then
366
- echo " "
367
- echo " vllm server is up and running."
361
+ # support remote vllm server
362
+ client_remote_args=" "
363
+ if [[ -z " ${REMOTE_HOST} " ]]; then
364
+ bash -c " $server_command " &
365
+ server_pid=$!
366
+ # wait until the server is alive
367
+ if wait_for_server; then
368
+ echo " "
369
+ echo " vllm server is up and running."
370
+ else
371
+ echo " "
372
+ echo " vllm failed to start within the timeout period."
373
+ fi
368
374
else
369
- echo " "
370
- echo " vllm failed to start within the timeout period."
375
+ server_command=" Using Remote Server $REMOTE_HOST $REMOTE_PORT "
376
+ if [[ ${REMOTE_PORT} ]]; then
377
+ client_remote_args=" --host=$REMOTE_HOST --port=$REMOTE_PORT "
378
+ else
379
+ client_remote_args=" --host=$REMOTE_HOST "
380
+ fi
371
381
fi
372
382
373
383
# iterate over different QPS
@@ -389,7 +399,7 @@ run_serving_tests() {
389
399
--result-filename ${new_test_name} .json \
390
400
--request-rate $qps \
391
401
--metadata " tensor_parallel_size=$tp " \
392
- $client_args "
402
+ $client_args $client_remote_args "
393
403
394
404
echo " Running test case $test_name with qps $qps "
395
405
echo " Client command: $client_command "
0 commit comments