Skip to content

Commit 72d890c

Browse files
committed
pre-commit fix and throughput test fixes
Signed-off-by: Tsai, Louie <[email protected]>
1 parent 0fbb0d2 commit 72d890c

File tree

5 files changed

+27
-56
lines changed

5 files changed

+27
-56
lines changed

.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ def results_to_json(latency, throughput, serving):
7575
}
7676
)
7777

78+
7879
def get_size(bytes, suffix="B"):
7980
"""
8081
Scale bytes to its proper format
@@ -88,6 +89,7 @@ def get_size(bytes, suffix="B"):
8889
return f"{bytes:.2f}{unit}{suffix}"
8990
bytes /= factor
9091

92+
9193
if __name__ == "__main__":
9294
# collect results
9395
for test_file in results_folder.glob("*.json"):
@@ -168,20 +170,23 @@ def get_size(bytes, suffix="B"):
168170
serving_results = pd.DataFrame.from_dict(serving_results)
169171
throughput_results = pd.DataFrame.from_dict(throughput_results)
170172

171-
from cpuinfo import get_cpu_info
173+
import pandas as pd
172174
import psutil
175+
from cpuinfo import get_cpu_info
173176
from numa import info
174-
import pandas as pd
177+
175178
svmem = psutil.virtual_memory()
176179
numa_size = info.get_num_configured_nodes()
177180
platform_data = {
178-
'CPU Brand': [get_cpu_info()['brand_raw']],
179-
'Physical cores': [psutil.cpu_count(logical=False)],
180-
'Total cores': [psutil.cpu_count(logical=True)],
181-
'Total Memory': [get_size(svmem.total)],
182-
'Total NUMA nodes': [numa_size]
181+
"CPU Brand": [get_cpu_info()["brand_raw"]],
182+
"Physical cores": [psutil.cpu_count(logical=False)],
183+
"Total cores": [psutil.cpu_count(logical=True)],
184+
"Total Memory": [get_size(svmem.total)],
185+
"Total NUMA nodes": [numa_size],
183186
}
184-
platform_results = pd.DataFrame.from_dict(platform_data, orient='index', columns=['Platform Info'])
187+
platform_results = pd.DataFrame.from_dict(
188+
platform_data, orient="index", columns=["Platform Info"]
189+
)
185190

186191
raw_results_json = results_to_json(
187192
latency_results, throughput_results, serving_results
@@ -228,10 +233,9 @@ def get_size(bytes, suffix="B"):
228233
throughput_md_table = tabulate(
229234
throughput_results, headers="keys", tablefmt="pipe", showindex=False
230235
)
231-
platform_md_table = tabulate(platform_results,
232-
headers='keys',
233-
tablefmt='pipe',
234-
showindex=True)
236+
platform_md_table = tabulate(
237+
platform_results, headers="keys", tablefmt="pipe", showindex=True
238+
)
235239

236240
# document the result
237241
with open(results_folder / "benchmark_results.md", "w") as f:

.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ run_throughput_tests() {
272272
fi
273273
fi
274274

275-
throughput_command="python3 benchmark_throughput.py \
275+
throughput_command=" $throughput_envs python3 benchmark_throughput.py \
276276
--output-json $RESULTS_FOLDER/${test_name}.json \
277277
$throughput_args"
278278

@@ -329,7 +329,7 @@ run_serving_tests() {
329329
qps_list=$(echo "$qps_list" | jq -r '.[] | @sh')
330330
echo "Running over qps list $qps_list"
331331

332-
# check if there is enough resouces to run the test
332+
# check if there is enough resources to run the test
333333
tp=$(echo "$server_params" | jq -r '.tensor_parallel_size')
334334
if [ "$ON_CPU" == "1" ];then
335335
if [[ $numa_count -lt $tp ]]; then
@@ -455,7 +455,7 @@ main() {
455455

456456
# postprocess benchmarking results
457457
pip install tabulate pandas
458-
ON_CPU=$ON_CPU python3 $QUICK_BENCHMARK_ROOT/scripts/convert-results-json-to-markdown.py
458+
python3 $QUICK_BENCHMARK_ROOT/scripts/convert-results-json-to-markdown.py
459459

460460
upload_to_buildkite
461461
}

.buildkite/nightly-benchmarks/tests/latency-tests-cpu.json

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,35 +16,19 @@
1616
}
1717
},
1818
{
19-
"test_name": "latency_llama70B_tp4",
19+
"test_name": "latency_llama8B_tp4",
2020
"environment_variables": {
2121
"VLLM_RPC_TIMEOUT": 1000000,
2222
"VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
2323
"VLLM_ENGINE_ITERATION_TIMEOUT_S": 600,
2424
"VLLM_CPU_KVCACHE_SPACE": 40
2525
},
2626
"parameters": {
27-
"model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
27+
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
2828
"tensor_parallel_size": 4,
2929
"load_format": "dummy",
30-
"num-iters-warmup": 5,
31-
"num-iters": 15
32-
}
33-
},
34-
{
35-
"test_name": "latency_mixtral8x7B_tp2",
36-
"environment_variables": {
37-
"VLLM_RPC_TIMEOUT": 1000000,
38-
"VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
39-
"VLLM_ENGINE_ITERATION_TIMEOUT_S": 600,
40-
"VLLM_CPU_KVCACHE_SPACE": 40
41-
},
42-
"parameters": {
43-
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
44-
"tensor_parallel_size": 2,
45-
"load_format": "dummy",
46-
"num-iters-warmup": 5,
47-
"num-iters": 15
30+
"num_iters_warmup": 5,
31+
"num_iters": 15
4832
}
4933
}
5034
]

.buildkite/nightly-benchmarks/tests/serving-tests-cpu.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
"distributed_executor_backend": "mp",
4646
"block_size": 128,
4747
"trust_remote_code": "",
48-
"enable_chunked_prefill": "True",
48+
"enable_chunked_prefill": "",
4949
"disable_log_stats": "",
5050
"disable_log_requests": "",
5151
"load_format": "dummy"
@@ -77,7 +77,7 @@
7777
"distributed_executor_backend": "mp",
7878
"block_size": 128,
7979
"trust_remote_code": "",
80-
"enable_chunked_prefill": "True",
80+
"enable_chunked_prefill": "",
8181
"disable_log_stats": "",
8282
"disable_log_requests": "",
8383
"load_format": "dummy"

.buildkite/nightly-benchmarks/tests/throughput-tests-cpu.json

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17,37 +17,20 @@
1717
}
1818
},
1919
{
20-
"test_name": "throughput_llama70B_tp4",
20+
"test_name": "throughput_llama8B_tp4",
2121
"environment_variables": {
2222
"VLLM_RPC_TIMEOUT": 1000000,
2323
"VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
2424
"VLLM_ENGINE_ITERATION_TIMEOUT_S": 600,
2525
"VLLM_CPU_KVCACHE_SPACE": 40
2626
},
2727
"parameters": {
28-
"model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
28+
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
2929
"tensor_parallel_size": 4,
3030
"load_format": "dummy",
3131
"dataset": "./ShareGPT_V3_unfiltered_cleaned_split.json",
3232
"num_prompts": 200,
3333
"backend": "vllm"
3434
}
35-
},
36-
{
37-
"test_name": "throughput_mixtral8x7B_tp2",
38-
"environment_variables": {
39-
"VLLM_RPC_TIMEOUT": 1000000,
40-
"VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
41-
"VLLM_ENGINE_ITERATION_TIMEOUT_S": 600,
42-
"VLLM_CPU_KVCACHE_SPACE": 40
43-
},
44-
"parameters": {
45-
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
46-
"tensor_parallel_size": 2,
47-
"load_format": "dummy",
48-
"dataset": "./ShareGPT_V3_unfiltered_cleaned_split.json",
49-
"num_prompts": 200,
50-
"backend": "vllm"
51-
}
5235
}
5336
]

0 commit comments

Comments
 (0)