fix: make vllm --version work

davidxia · davidxia · commit 31074c25d4c0 · 2025-04-22T20:21:19.000Z
diff --git a/vllm/config.py b/vllm/config.py
@@ -28,8 +28,7 @@
 import vllm.envs as envs
 from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass
 from vllm.logger import init_logger
-from vllm.model_executor.layers.quantization import (QUANTIZATION_METHODS,
-                                                     get_quantization_config)
+from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
 from vllm.platforms import CpuArchEnum
 from vllm.sampling_params import GuidedDecodingParams
 from vllm.tracing import is_otel_available, otel_import_error_traceback
@@ -42,8 +41,8 @@
 from vllm.transformers_utils.utils import is_s3, maybe_model_redirect
 from vllm.utils import (GiB_bytes, LayerBlockType, LazyLoader,
                         cuda_device_count_stateless, get_cpu_memory,
-                        get_open_port, is_torch_equal_or_newer,
-                        random_uuid, resolve_obj_by_qualname)
+                        get_open_port, is_torch_equal_or_newer, random_uuid,
+                        resolve_obj_by_qualname)
 
 if TYPE_CHECKING:
     from _typeshed import DataclassInstance
@@ -68,7 +67,6 @@
     HfOverrides = None
     QuantizationConfig = None
 
-from packaging.version import Version
 
 logger = init_logger(__name__)
 
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
@@ -291,7 +291,9 @@ def can_be_type(cls: TypeHint, type: TypeHintT) -> TypeIs[TypeHintT]:
 
         def is_custom_type(cls: TypeHint) -> bool:
             """Check if the class is a custom type."""
-            return cls.__module__ != "builtins"
+            if isinstance(cls, type):
+                return cls.__module__ != "builtins"
+            return True
 
         def get_kwargs(cls: type[Any]) -> dict[str, Any]:
             cls_docs = get_attr_docs(cls)
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
+from __future__ import annotations
+
 import asyncio
 import json
 from abc import ABC, abstractmethod
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
@@ -28,39 +28,40 @@
 # yapf: disable
 if TYPE_CHECKING:
     import huggingface_hub as hfhub
-    import huggingface_hub.utils as hfhub_utils
-    from transformers import GenerationConfig, PretrainedConfig
+    import huggingface_hub.errors as hfhub_errors
+    from transformers.configuration_utils import PretrainedConfig
+    from transformers.generation.configuration_utils import GenerationConfig
 else:
     hfhub = LazyLoader("hfhub", globals(), "huggingface_hub")
-    hfhub_utils = LazyLoader("hfhub_utils", globals(), "huggingface_hub.utils")
+    hfhub_errors = LazyLoader("hfhub_errors", globals(), "huggingface_hub.errors")
 
 _CONFIG_REGISTRY_OVERRIDE_HF: Dict[str, str] = {
     "mllama": "MllamaConfig"
 }
 
-_CONFIG_REGISTRY: Dict[str, Type[PretrainedConfig]] = {
-    "chatglm": ChatGLMConfig,
-    "cohere2": Cohere2Config,
-    "dbrx": DbrxConfig,
-    "deepseek_vl_v2": DeepseekVLV2Config,
-    "kimi_vl": KimiVLConfig,
-    "mpt": MPTConfig,
-    "RefinedWeb": RWConfig,  # For tiiuae/falcon-40b(-instruct)
-    "RefinedWebModel": RWConfig,  # For tiiuae/falcon-7b(-instruct)
-    "jais": JAISConfig,
-    "mlp_speculator": MLPSpeculatorConfig,
-    "medusa": MedusaConfig,
-    "eagle": EAGLEConfig,
-    "exaone": ExaoneConfig,
-    "h2ovl_chat": H2OVLChatConfig,
-    "internvl_chat": InternVLChatConfig,
-    "nemotron": NemotronConfig,
-    "NVLM_D": NVLM_D_Config,
-    "olmo2": Olmo2Config,
-    "solar": SolarConfig,
-    "skywork_chat": SkyworkR1VChatConfig,
-    "telechat": Telechat2Config,
-    "ultravox": UltravoxConfig,
+_CONFIG_REGISTRY: Dict[str, str] = {
+    "chatglm": "ChatGLMConfig",
+    "cohere2": "Cohere2Config",
+    "dbrx": "DbrxConfig",
+    "deepseek_vl_v2": "DeepseekVLV2Config",
+    "kimi_vl": "KimiVLConfig",
+    "mpt": "MPTConfig",
+    "RefinedWeb": "RWConfig",  # For tiiuae/falcon-40b(-instruct)
+    "RefinedWebModel": "RWConfig",  # For tiiuae/falcon-7b(-instruct)
+    "jais": "JAISConfig",
+    "mlp_speculator": "MLPSpeculatorConfig",
+    "medusa": "MedusaConfig",
+    "eagle": "EAGLEConfig",
+    "exaone": "ExaoneConfig",
+    "h2ovl_chat": "H2OVLChatConfig",
+    "internvl_chat": "InternVLChatConfig",
+    "nemotron": "NemotronConfig",
+    "NVLM_D": "NVLM_D_Config",
+    "olmo2": "Olmo2Config",
+    "solar": "SolarConfig",
+    "skywork_chat": "SkyworkR1VChatConfig",
+    "telechat": "Telechat2Config",
+    "ultravox": "UltravoxConfig",
     **_CONFIG_REGISTRY_OVERRIDE_HF
 }
 
@@ -371,7 +372,7 @@ def try_get_local_file(model: Union[str, Path],
                                                      revision=revision)
             if isinstance(cached_filepath, str):
                 return Path(cached_filepath)
-        except hfhub_utils.HFValidationError:
+        except hfhub_errors.HFValidationError:
             ...
     return None