Skip to content

Commit 3a3f009

Browse files
gshtrasshreyankg
authored andcommitted
[ROCm][Bugfix] Bring back fallback to eager mode removed in vllm-project#14917, but for ROCm only (vllm-project#15413)
Signed-off-by: Gregory Shtrasberg <[email protected]>
1 parent 1e0e21d commit 3a3f009

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

vllm/config.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from vllm.model_executor.layers.quantization import (QUANTIZATION_METHODS,
3030
get_quantization_config)
3131
from vllm.model_executor.models import ModelRegistry
32-
from vllm.platforms import CpuArchEnum
32+
from vllm.platforms import CpuArchEnum, current_platform
3333
from vllm.sampling_params import GuidedDecodingParams
3434
from vllm.tracing import is_otel_available, otel_import_error_traceback
3535
from vllm.transformers_utils.config import (
@@ -684,6 +684,13 @@ def _verify_cuda_graph(self) -> None:
684684
self.max_seq_len_to_capture = self.max_model_len
685685
self.max_seq_len_to_capture = min(self.max_seq_len_to_capture,
686686
self.max_model_len)
687+
ROCM_UNSUPPORTED_MODELS = ['mllama']
688+
if (self.hf_config.model_type in ROCM_UNSUPPORTED_MODELS
689+
and not self.enforce_eager and current_platform.is_rocm()):
690+
logger.warning(
691+
"CUDA graph is not supported for %s on ROCm yet, fallback "
692+
"to the eager mode.", self.hf_config.model_type)
693+
self.enforce_eager = True
687694

688695
def _verify_bnb_config(self) -> None:
689696
"""

0 commit comments

Comments
 (0)