Skip to content

Commit d06e824

Browse files
authored
[Bugfix] Set enforce_eager automatically for mllama (#12127)
Signed-off-by: Chen Zhang <[email protected]>
1 parent 62b06ba commit d06e824

File tree

3 files changed

+5
-5
lines changed

3 files changed

+5
-5
lines changed

examples/offline_inference/vision_language.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,6 @@ def run_mllama(question: str, modality: str):
325325
model=model_name,
326326
max_model_len=4096,
327327
max_num_seqs=16,
328-
enforce_eager=True,
329328
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
330329
)
331330

examples/offline_inference/vision_language_multi_image.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,6 @@ def load_mllama(question, image_urls: List[str]) -> ModelRequestData:
186186
model=model_name,
187187
max_model_len=4096,
188188
max_num_seqs=16,
189-
enforce_eager=True,
190189
limit_mm_per_prompt={"image": len(image_urls)},
191190
)
192191

vllm/config.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -607,10 +607,12 @@ def _verify_cuda_graph(self) -> None:
607607
self.max_seq_len_to_capture = min(self.max_seq_len_to_capture,
608608
self.max_model_len)
609609

610-
if (self.hf_config.model_type == 'deepseek_v3'
610+
MODEL_NOT_SUPPORT_CUDA_GRAPH = ['deepseek_v3', 'mllama']
611+
if (self.hf_config.model_type in MODEL_NOT_SUPPORT_CUDA_GRAPH
611612
and not self.enforce_eager):
612-
logger.warning("CUDA graph is not supported for Deepseek V3 yet, "
613-
"fallback to the eager mode.")
613+
logger.warning(
614+
"CUDA graph is not supported for %s yet, fallback to the eager "
615+
"mode.", self.hf_config.model_type)
614616
self.enforce_eager = True
615617

616618
def _verify_bnb_config(self) -> None:

0 commit comments

Comments
 (0)