Skip to content

Commit 6685a3c

Browse files
committed
Miss some files
1 parent 1543060 commit 6685a3c

File tree

2 files changed

+25
-23
lines changed

2 files changed

+25
-23
lines changed

examples/mini_testbench.sh

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,37 +2,41 @@
22
log_sum="log/service_model_device.txt"
33

44
model_ids=("TinyLlama/TinyLlama-1.1B-Chat-v1.0") # "facebook/opt-1.3b" "huggyllama/llama-7b")
5-
num_devices=(2)
5+
num_lpu_devices=(2) #4
6+
num_gpu_devices=(0)
67

78
current_datetime=$(date "+%Y-%m-%d %H:%M:%S")
89
echo "$current_datetime"
910
echo "$current_datetime" >> ${log_sum}
1011

1112
# LLMEngine Test
1213
for model_id in "${model_ids[@]}"; do
13-
for num_device in "${num_devices[@]}"; do
14+
for num_lpu_device in "${num_lpu_devices[@]}"; do
15+
for num_gpu_device in "${num_gpu_devices[@]}"; do
1416
#IFS='\' read -ra parts <<< "$model_id"
1517
#model_name="${parts[-1]}"
1618
model_name=$(echo "$model_id" | awk -F'/' '{print $NF}')
1719
echo "*********************************"
18-
echo "**** Start inference_${model_name}_${num_device}"
20+
echo "**** Start inference_${model_name}_${num_lpu_device}_${num_gpu_device}"
1921
echo "*********************************"
20-
python lpu_inference_arg.py -m ${model_id} -n ${num_device} > log/inference_${model_name}_${num_device}.txt
22+
python lpu_inference_arg.py -m ${model_id} -l ${num_lpu_device} -g ${num_gpu_device} > log/inference_${model_name}_${num_lpu_device}_${num_gpu_device}.txt
2123
echo "*********************************" >> ${log_sum}
22-
echo "[Testbench] The Result of log/inference_${model_name}_${num_device}.txt" >> ${log_sum}
23-
tail -n 1 "log/inference_${model_name}_${num_device}.txt" >> ${log_sum}
24+
echo "[Testbench] The Result of log/inference_${model_name}_${num_lpu_device}_${num_gpu_device}.txt" >> ${log_sum}
25+
tail -n 1 "log/inference_${model_name}_${num_lpu_device}_${num_gpu_device}.txt" >> ${log_sum}
2426
echo "" >> ${log_sum}
27+
done
2528
done
2629
done
2730

2831
# LLMEngineAsync Test with vLLM serve
2932
for model_id in "${model_ids[@]}"; do
30-
for num_device in "${num_devices[@]}"; do
33+
for num_lpu_device in "${num_lpu_devices[@]}"; do
34+
for num_gpu_device in "${num_gpu_devices[@]}"; do
3135
model_name=$(echo "$model_id" | awk -F'/' '{print $NF}')
3236
echo "*********************************"
33-
echo "**** Start serving_${model_name}_${num_device}"
37+
echo "**** Start serving_${model_name}_${num_lpu_device}_${num_gpu_device}"
3438
echo "*********************************"
35-
python -m vllm.entrypoints.api_server --model ${model_id} --device fpga --tensor-parallel-size ${num_device} &
39+
python -m vllm.entrypoints.api_server --model ${model_id} --device fpga --num-lpu-devices ${num_lpu_device} --num-gpu-devices ${num_gpu_device} &
3640

3741
# Waiting for server
3842
while ! nc -z localhost "8000"; do
@@ -41,7 +45,7 @@ for model_id in "${model_ids[@]}"; do
4145
done
4246
echo "[Testbench] The server is ready!"
4347

44-
python lpu_client.py > log/vllm_serve_${model_name}_${num_device}.txt
48+
python lpu_client.py > log/vllm_serve_${model_name}_${num_lpu_device}_${num_gpu_device}.txt
4549

4650
# Waiting for process kill
4751
PID=$(jobs -p | tail -n 1)
@@ -60,22 +64,24 @@ for model_id in "${model_ids[@]}"; do
6064

6165
# Write log in text file
6266
echo "*********************************" >> ${log_sum}
63-
echo "The Result of log/vllm_serve_${model_name}_${num_device}.txt" >> ${log_sum}
64-
tail -n 1 "log/vllm_serve_${model_name}_${num_device}.txt" >> ${log_sum}
67+
echo "The Result of log/vllm_serve_${model_name}_${num_lpu_device}_${num_gpu_device}.txt" >> ${log_sum}
68+
tail -n 1 "log/vllm_serve_${model_name}_${num_lpu_device}_${num_gpu_device}.txt" >> ${log_sum}
6569
echo "" >> ${log_sum}
70+
done
6671
done
6772
done
6873

6974

7075

7176
# OpenAI API Test
7277
model_id=${model_ids[0]}
73-
num_device=${num_devices[0]}
78+
num_lpu_device=${num_lpu_devices[0]}
79+
num_gpu_device=${num_gpu_devices[0]}
7480
model_name=$(echo "$model_id" | awk -F'/' '{print $NF}')
7581
echo "*********************************"
76-
echo "**** Start serving_${model_name}_${num_device}"
82+
echo "**** Start serving_${model_name}_${num_lpu_device}_${num_gpu_device}"
7783
echo "*********************************"
78-
python -m vllm.entrypoints.api_server --model ${model_id} --device fpga --tensor-parallel-size ${num_device} &
84+
python -m vllm.entrypoints.openai.api_server --model ${model_id} --device fpga --num-lpu-devices ${num_lpu_device} --num_gpu_devices ${num_gpu_device} &
7985

8086
# Waiting for server
8187
while ! nc -z localhost "8000"; do
@@ -84,7 +90,7 @@ while ! nc -z localhost "8000"; do
8490
done
8591
echo "[Testbench] The server is ready!"
8692

87-
python lpu_openai_completion_client.py > log/openai_serve_${model_name}_${num_device}.txt
93+
python lpu_openai_completion_client.py > log/openai_serve_${model_name}_${num_lpu_device}_${num_gpu_device}.txt
8894

8995
# Waiting for process kill
9096
PID=$(jobs -p | tail -n 1)
@@ -103,8 +109,6 @@ fi
103109

104110
# Write log in text file
105111
echo "*********************************" >> ${log_sum}
106-
echo "The Result of log/openai_serve_${model_name}_${num_device}.txt" >> ${log_sum}
107-
tail -n 1 "log/openai_serve_${model_name}_${num_device}.txt" >> ${log_sum}
112+
echo "The Result of log/openai_serve_${model_name}_${num_lpu_device}_${num_gpu_device}.txt" >> ${log_sum}
113+
tail -n 1 "log/openai_serve_${model_name}_${num_lpu_device}_${num_gpu_device}.txt" >> ${log_sum}
108114
echo "" >> ${log_sum}
109-
110-

examples/vllm_serve.sh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,2 @@
11

2-
#python -m vllm.entrypoints.api_server --model facebook/opt-1.3b --device fpga --tensor-parallel-size 2
3-
python -m vllm.entrypoints.api_server --model facebook/opt-1.3b --device fpga --num-gpu-devices 1 --num-lpu-devices 2
4-
#python -m vllm.entrypoints.api_server --model facebook/opt-1.3b --device fpga --num_gpu_devices 1 --num_lpu_devices 2
2+
python -m vllm.entrypoints.api_server --model huggyllama/llama-7b --device fpga --num-gpu-devices 0 --num-lpu-devices 2

0 commit comments

Comments
 (0)