File tree Expand file tree Collapse file tree 1 file changed +8
-1
lines changed Expand file tree Collapse file tree 1 file changed +8
-1
lines changed Original file line number Diff line number Diff line change 29
29
from vllm .model_executor .layers .quantization import (QUANTIZATION_METHODS ,
30
30
get_quantization_config )
31
31
from vllm .model_executor .models import ModelRegistry
32
- from vllm .platforms import CpuArchEnum
32
+ from vllm .platforms import CpuArchEnum , current_platform
33
33
from vllm .sampling_params import GuidedDecodingParams
34
34
from vllm .tracing import is_otel_available , otel_import_error_traceback
35
35
from vllm .transformers_utils .config import (
@@ -684,6 +684,13 @@ def _verify_cuda_graph(self) -> None:
684
684
self .max_seq_len_to_capture = self .max_model_len
685
685
self .max_seq_len_to_capture = min (self .max_seq_len_to_capture ,
686
686
self .max_model_len )
687
+ ROCM_UNSUPPORTED_MODELS = ['mllama' ]
688
+ if (self .hf_config .model_type in ROCM_UNSUPPORTED_MODELS
689
+ and not self .enforce_eager and current_platform .is_rocm ()):
690
+ logger .warning (
691
+ "CUDA graph is not supported for %s on ROCm yet, fallback "
692
+ "to the eager mode." , self .hf_config .model_type )
693
+ self .enforce_eager = True
687
694
688
695
def _verify_bnb_config (self ) -> None :
689
696
"""
You can’t perform that action at this time.
0 commit comments