Skip to content

Commit 5d73641

Browse files
committed
refactor: move backends lists to config.py
Signed-off-by: Travis Johnson <[email protected]>
1 parent 353a0a1 commit 5d73641

File tree

4 files changed

+16
-21
lines changed

4 files changed

+16
-21
lines changed

vllm/config.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@
2727
import vllm.envs as envs
2828
from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass
2929
from vllm.logger import init_logger
30-
from vllm.model_executor.guided_decoding import (GUIDED_DECODING_BACKENDS_V0,
31-
GUIDED_DECODING_BACKENDS_V1)
3230
from vllm.model_executor.layers.quantization import (QUANTIZATION_METHODS,
3331
get_quantization_config)
3432
from vllm.model_executor.models import ModelRegistry
@@ -64,6 +62,11 @@
6462

6563
logger = init_logger(__name__)
6664

65+
GUIDED_DECODING_BACKENDS_V0 = [
66+
"outlines", "lm-format-enforcer", "xgrammar", "guidance", "auto"
67+
]
68+
GUIDED_DECODING_BACKENDS_V1 = ["xgrammar", "guidance", "auto"]
69+
6770
# This value is chosen to have a balance between ITL and TTFT. Note it is
6871
# not optimized for throughput.
6972
_DEFAULT_MAX_NUM_BATCHED_TOKENS = 2048

vllm/engine/arg_utils.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616

1717
import vllm.envs as envs
1818
from vllm import version
19-
from vllm.config import (CacheConfig, CompilationConfig, ConfigFormat,
20-
DecodingConfig, DeviceConfig,
21-
DistributedExecutorBackend, HfOverrides,
19+
from vllm.config import (GUIDED_DECODING_BACKENDS_V1, CacheConfig,
20+
CompilationConfig, ConfigFormat, DecodingConfig,
21+
DeviceConfig, DistributedExecutorBackend, HfOverrides,
2222
KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig,
2323
ModelConfig, ModelImpl, ObservabilityConfig,
2424
ParallelConfig, PoolerConfig, PromptAdapterConfig,
@@ -27,14 +27,13 @@
2727
get_attr_docs)
2828
from vllm.executor.executor_base import ExecutorBase
2929
from vllm.logger import init_logger
30-
from vllm.model_executor.guided_decoding import GUIDED_DECODING_BACKENDS_V1
3130
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
3231
from vllm.plugins import load_general_plugins
3332
from vllm.reasoning import ReasoningParserManager
3433
from vllm.test_utils import MODEL_WEIGHTS_S3_BUCKET, MODELS_ON_S3
3534
from vllm.transformers_utils.utils import check_gguf_file
3635
from vllm.usage.usage_lib import UsageContext
37-
from vllm.utils import FlexibleArgumentParser, StoreBoolean, is_in_ray_actor
36+
from vllm.utils import FlexibleArgumentParser, is_in_ray_actor
3837

3938
# yapf: enable
4039

vllm/entrypoints/chat_utils.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
from collections.abc import Awaitable, Iterable
88
from functools import cache, lru_cache, partial
99
from pathlib import Path
10-
from typing import (TYPE_CHECKING, Any, Callable, Generic, Literal, Optional,
11-
TypeVar, Union, cast)
10+
from typing import (Any, Callable, Generic, Literal, Optional, TypeVar, Union,
11+
cast)
1212

1313
import jinja2.nodes
1414
import transformers.utils.chat_template_utils as hf_chat_utils
@@ -33,15 +33,13 @@
3333
ProcessorMixin)
3434
from typing_extensions import Required, TypeAlias, TypedDict
3535

36+
from vllm.config import ModelConfig
3637
from vllm.logger import init_logger
3738
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict
3839
from vllm.multimodal.utils import MediaConnector
3940
from vllm.transformers_utils.processor import cached_get_processor
4041
from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
4142

42-
if TYPE_CHECKING:
43-
from vllm.config import ModelConfig
44-
4543
logger = init_logger(__name__)
4644

4745

@@ -449,7 +447,7 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
449447
maximum per prompt.
450448
"""
451449

452-
def __init__(self, model_config: "ModelConfig", tokenizer: AnyTokenizer):
450+
def __init__(self, model_config: ModelConfig, tokenizer: AnyTokenizer):
453451
super().__init__()
454452

455453
self._model_config = model_config
@@ -458,7 +456,7 @@ def __init__(self, model_config: "ModelConfig", tokenizer: AnyTokenizer):
458456
self._items_by_modality = defaultdict[str, list[_T]](list)
459457

460458
@property
461-
def model_config(self) -> "ModelConfig":
459+
def model_config(self) -> ModelConfig:
462460
return self._model_config
463461

464462
@property
@@ -1124,7 +1122,7 @@ def _postprocess_messages(messages: list[ConversationMessage]) -> None:
11241122

11251123
def parse_chat_messages(
11261124
messages: list[ChatCompletionMessageParam],
1127-
model_config: "ModelConfig",
1125+
model_config: ModelConfig,
11281126
tokenizer: AnyTokenizer,
11291127
content_format: _ChatTemplateContentFormat,
11301128
) -> tuple[list[ConversationMessage], Optional[MultiModalDataDict]]:
@@ -1147,7 +1145,7 @@ def parse_chat_messages(
11471145

11481146
def parse_chat_messages_futures(
11491147
messages: list[ChatCompletionMessageParam],
1150-
model_config: "ModelConfig",
1148+
model_config: ModelConfig,
11511149
tokenizer: AnyTokenizer,
11521150
content_format: _ChatTemplateContentFormat,
11531151
) -> tuple[list[ConversationMessage], Awaitable[Optional[MultiModalDataDict]]]:

vllm/model_executor/guided_decoding/__init__.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,6 @@
1919

2020
logger = init_logger(__name__)
2121

22-
GUIDED_DECODING_BACKENDS_V0 = [
23-
"outlines", "lm-format-enforcer", "xgrammar", "guidance", "auto"
24-
]
25-
GUIDED_DECODING_BACKENDS_V1 = ["xgrammar", "guidance", "auto"]
26-
2722

2823
def maybe_backend_fallback(
2924
guided_params: GuidedDecodingParams) -> GuidedDecodingParams:

0 commit comments

Comments
 (0)