Fix benchmark_moe.py tuning for CUDA devices (vllm-project#14164)

mgoin · lulmer · commit a83467c7d423 · 2025-04-07T07:38:21.000Z
Signed-off-by: Louis Ulmer &lt;ulmerlouis@gmail.com&gt;
diff --git a/benchmarks/kernels/benchmark_moe.py b/benchmarks/kernels/benchmark_moe.py
@@ -2,6 +2,7 @@
 
 import argparse
 import time
+from contextlib import nullcontext
 from datetime import datetime
 from itertools import product
 from typing import Any, TypedDict
@@ -412,7 +413,8 @@ def tune(
                                                    hidden_size, search_space,
                                                    is_fp16, topk)
 
-        with torch.cuda.device(self.device_id):
+        with torch.cuda.device(self.device_id) if current_platform.is_rocm(
+        ) else nullcontext():
             for config in tqdm(search_space):
                 try:
                     kernel_time = benchmark_config(