Skip to content

Commit fcf2e3d

Browse files
authored
[Bugfix] Fix OpenVINO model runner (#12750)
1 parent 58b218d commit fcf2e3d

File tree

3 files changed

+12
-12
lines changed

3 files changed

+12
-12
lines changed

vllm/attention/backends/openvino.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,3 +140,7 @@ class OpenVINOAttentionMetadata:
140140
# `model_executable`.
141141
multi_modal_placeholder_index_maps: Optional[Dict[
142142
str, MultiModalPlaceholderMap.IndexMap]]
143+
144+
# Enable/disable KV scales calculation. This is so that we can disable the
145+
# calculation until after prefill and cuda graph capture.
146+
enable_kv_scales_calculation: bool

vllm/model_executor/model_loader/openvino.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
import vllm.envs as envs
1515
from vllm.attention.backends.openvino import OpenVINOAttentionMetadata
16-
from vllm.config import DeviceConfig, ModelConfig
16+
from vllm.config import ModelConfig, VllmConfig, set_current_vllm_config
1717
from vllm.logger import init_logger
1818
from vllm.model_executor.layers.logits_processor import (LogitsProcessor,
1919
_prune_hidden_states)
@@ -103,7 +103,6 @@ def __init__(
103103
self,
104104
ov_core: ov.Core,
105105
model_config: ModelConfig,
106-
device_config: DeviceConfig,
107106
kv_cache_dtype: ov.Type,
108107
) -> None:
109108
super().__init__()
@@ -187,8 +186,7 @@ def sample(
187186

188187

189188
def get_model(
190-
model_config: ModelConfig,
191-
device_config: DeviceConfig,
189+
vllm_config: VllmConfig,
192190
kv_cache_dtype: ov.Type,
193191
**kwargs,
194192
) -> torch.nn.Module:
@@ -201,5 +199,6 @@ def get_model(
201199
"be added in the future. If this is important to you, "
202200
"please open an issue on github.")
203201

204-
return OpenVINOCausalLM(ov_core, model_config, device_config,
205-
kv_cache_dtype)
202+
with set_current_vllm_config(vllm_config):
203+
return OpenVINOCausalLM(ov_core, vllm_config.model_config,
204+
kv_cache_dtype)

vllm/worker/openvino_model_runner.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,13 @@ def __init__(
5454
):
5555
self.ov_core = ov_core
5656
ModelRunnerBase.__init__(self, vllm_config=vllm_config)
57-
cache_config = self.cache_config
58-
model_config = self.model_config
5957
self.is_driver_worker = is_driver_worker
6058

6159
self.device = self.device_config.device
6260

6361
self.kv_cache_dtype = kv_cache_dtype
64-
self.sliding_window = model_config.get_sliding_window()
65-
self.block_size = cache_config.block_size
62+
self.sliding_window = self.model_config.get_sliding_window()
63+
self.block_size = self.cache_config.block_size
6664

6765
self.attn_backend = get_attn_backend(
6866
self.model_config.get_head_size(),
@@ -81,8 +79,7 @@ def __init__(
8179
self.model: nn.Module # Set after init_Model
8280

8381
def load_model(self) -> None:
84-
self.model = get_model(model_config=self.model_config,
85-
device_config=self.device_config,
82+
self.model = get_model(vllm_config=self.vllm_config,
8683
kv_cache_dtype=self.kv_cache_dtype,
8784
ov_core=self.ov_core)
8885

0 commit comments

Comments
 (0)