diff --git a/vllm/config.py b/vllm/config.py index a5f2161068d..b7d53fe9a22 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -3172,7 +3172,8 @@ def __post_init__(self): if self.compilation_config is None: self.compilation_config = CompilationConfig() - if envs.VLLM_USE_V1 and not self.model_config.enforce_eager: + if envs.VLLM_USE_V1 and self.model_config is not None and \ + not self.model_config.enforce_eager: # NOTE(woosuk): Currently, we use inductor because the piecewise # CUDA graphs do not work properly with the custom CUDA kernels. # FIXME(woosuk): Disable inductor to reduce the compilation time