Skip to content

Commit 2da84c3

Browse files
b8zhongDamonFool
authored andcommitted
[Bugfix] Correctly call cudaProfilerStop in benchmarks script (vllm-project#14183)
Signed-off-by: Brayden Zhong <[email protected]>
1 parent 81c0a0e commit 2da84c3

6 files changed

+5
-6
lines changed

benchmarks/kernels/benchmark_layernorm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def run_cuda_benchmark(num_iters: int, profile: bool = False) -> float:
4040

4141
end_time = time.perf_counter()
4242
if profile:
43-
torch.cuda.cudart().cudaProfilerStart()
43+
torch.cuda.cudart().cudaProfilerStop()
4444
return (end_time - start_time) / num_iters
4545

4646
# Warmup.

benchmarks/kernels/benchmark_lora.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,6 @@ def ref_group_gemm(ref_out: torch.Tensor, input: torch.Tensor,
153153
result = torch.nn.functional.linear(x, w)
154154
result *= scaling
155155
out_list.append(result)
156-
torch.cat(out_list, dim=0)
157156

158157
cat_result = torch.cat(out_list, dim=0)
159158

benchmarks/kernels/benchmark_machete.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ def terse_type_name(dt):
4545
torch.float16: "fp16",
4646
torch.int8: "int8",
4747
torch.float8_e4m3fn: "fp8",
48-
torch.bfloat16: "bf16",
4948
torch.float: "float",
5049
torch.int: "int",
5150
}[dt]
@@ -259,7 +258,7 @@ def machete_create_bench_fn(bt: BenchmarkTensors,
259258

260259
return lambda: ops.machete_mm(
261260
a=bt.a,
262-
b_q=bt.w_q,
261+
b_q=w_q,
263262
b_type=bt.wtype,
264263
b_group_scales=bt.w_g_s,
265264
b_group_zeros=w_g_zp,

benchmarks/kernels/benchmark_moe.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
import argparse
4+
import json
45
import time
56
from contextlib import nullcontext
67
from datetime import datetime

benchmarks/kernels/benchmark_paged_attention.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ def run_cuda_benchmark(num_iters: int, profile: bool = False) -> float:
176176

177177
end_time = time.perf_counter()
178178
if profile:
179-
torch.cuda.cudart().cudaProfilerStart()
179+
torch.cuda.cudart().cudaProfilerStop()
180180
return (end_time - start_time) / num_iters
181181

182182
# Warmup.

benchmarks/kernels/benchmark_quant.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def run_cuda_benchmark(num_iters: int, profile: bool = False) -> float:
4040

4141
end_time = time.perf_counter()
4242
if profile:
43-
torch.cuda.cudart().cudaProfilerStart()
43+
torch.cuda.cudart().cudaProfilerStop()
4444
return (end_time - start_time) / num_iters
4545

4646
# Warmup.

0 commit comments

Comments
 (0)