@@ -542,8 +542,10 @@ def __post_init__(self) -> None:
542
542
sliding_window = getattr (self .hf_text_config , "sliding_window" , None )
543
543
sliding_window_pattern = getattr (self .hf_text_config ,
544
544
"sliding_window_pattern" , None )
545
+ has_interleaved_attention = sliding_window_pattern is not None or (
546
+ isinstance (sliding_window , list ))
545
547
546
- if not ( self .disable_sliding_window or sliding_window_pattern is None ) :
548
+ if not self .disable_sliding_window and has_interleaved_attention :
547
549
if (backend :=
548
550
envs .VLLM_ATTENTION_BACKEND ) in ("XFORMERS" , "FLASHINFER" ):
549
551
sliding_window_len_min = get_min_sliding_window (
@@ -563,7 +565,10 @@ def __post_init__(self) -> None:
563
565
# only the attention layer itself is aware of the sliding
564
566
# window, and use the window size to compute the attention.
565
567
self .hf_text_config .interleaved_sliding_window = sliding_window
566
- delattr (self .hf_text_config , "sliding_window" )
568
+
569
+ if hasattr (self .hf_text_config , "sliding_window" ):
570
+ delattr (self .hf_text_config , "sliding_window" )
571
+
567
572
sliding_window = None
568
573
569
574
self .max_model_len = _get_and_verify_max_len (
@@ -1041,7 +1046,8 @@ def verify_with_parallel_config(
1041
1046
if self .use_async_output_proc :
1042
1047
self .use_async_output_proc = False
1043
1048
1044
- def get_hf_config_sliding_window (self ) -> Optional [int ]:
1049
+ def get_hf_config_sliding_window (
1050
+ self ) -> Union [Optional [int ], list [Optional [int ]]]:
1045
1051
"""Get the sliding window size, or None if disabled."""
1046
1052
1047
1053
# Some models, like Qwen2 and Qwen1.5, use `use_sliding_window` in
@@ -1052,7 +1058,7 @@ def get_hf_config_sliding_window(self) -> Optional[int]:
1052
1058
return None
1053
1059
return getattr (self .hf_text_config , "sliding_window" , None )
1054
1060
1055
- def get_sliding_window (self ) -> Optional [int ]:
1061
+ def get_sliding_window (self ) -> Optional [Union [ int , list [ Optional [ int ]]] ]:
1056
1062
"""Get the sliding window size, or None if disabled.
1057
1063
"""
1058
1064
# If user disables sliding window, return None.
0 commit comments