Skip to content

Commit d532ed8

Browse files
DarkLight1337amitm02
authored andcommitted
[Bugfix] Fix Mistral-format models with sliding window (vllm-project#18693)
Signed-off-by: DarkLight1337 <[email protected]> Signed-off-by: amit <[email protected]>
1 parent 7646523 commit d532ed8

File tree

1 file changed

+10
-4
lines changed

1 file changed

+10
-4
lines changed

vllm/config.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -542,8 +542,10 @@ def __post_init__(self) -> None:
542542
sliding_window = getattr(self.hf_text_config, "sliding_window", None)
543543
sliding_window_pattern = getattr(self.hf_text_config,
544544
"sliding_window_pattern", None)
545+
has_interleaved_attention = sliding_window_pattern is not None or (
546+
isinstance(sliding_window, list))
545547

546-
if not (self.disable_sliding_window or sliding_window_pattern is None):
548+
if not self.disable_sliding_window and has_interleaved_attention:
547549
if (backend :=
548550
envs.VLLM_ATTENTION_BACKEND) in ("XFORMERS", "FLASHINFER"):
549551
sliding_window_len_min = get_min_sliding_window(
@@ -563,7 +565,10 @@ def __post_init__(self) -> None:
563565
# only the attention layer itself is aware of the sliding
564566
# window, and use the window size to compute the attention.
565567
self.hf_text_config.interleaved_sliding_window = sliding_window
566-
delattr(self.hf_text_config, "sliding_window")
568+
569+
if hasattr(self.hf_text_config, "sliding_window"):
570+
delattr(self.hf_text_config, "sliding_window")
571+
567572
sliding_window = None
568573

569574
self.max_model_len = _get_and_verify_max_len(
@@ -1041,7 +1046,8 @@ def verify_with_parallel_config(
10411046
if self.use_async_output_proc:
10421047
self.use_async_output_proc = False
10431048

1044-
def get_hf_config_sliding_window(self) -> Optional[int]:
1049+
def get_hf_config_sliding_window(
1050+
self) -> Union[Optional[int], list[Optional[int]]]:
10451051
"""Get the sliding window size, or None if disabled."""
10461052

10471053
# Some models, like Qwen2 and Qwen1.5, use `use_sliding_window` in
@@ -1052,7 +1058,7 @@ def get_hf_config_sliding_window(self) -> Optional[int]:
10521058
return None
10531059
return getattr(self.hf_text_config, "sliding_window", None)
10541060

1055-
def get_sliding_window(self) -> Optional[int]:
1061+
def get_sliding_window(self) -> Optional[Union[int, list[Optional[int]]]]:
10561062
"""Get the sliding window size, or None if disabled.
10571063
"""
10581064
# If user disables sliding window, return None.

0 commit comments

Comments
 (0)