File tree 1 file changed +4
-3
lines changed
1 file changed +4
-3
lines changed Original file line number Diff line number Diff line change 18
18
from vllm .model_executor import set_random_seed
19
19
from vllm .utils import STR_DTYPE_TO_TORCH_DTYPE
20
20
from vllm .v1 .core .sched .output import SchedulerOutput
21
- from vllm .v1 .kv_cache_interface import (FullAttentionSpec , KVCacheConfig ,
21
+ from vllm .v1 .kv_cache_interface import (AttentionSpec , KVCacheConfig ,
22
22
KVCacheSpec )
23
23
from vllm .v1 .outputs import ModelRunnerOutput
24
24
from vllm .v1 .utils import bind_kv_cache
@@ -137,7 +137,7 @@ def determine_available_memory(self) -> int:
137
137
kv_caches : dict [str , torch .Tensor ] = {}
138
138
kv_cache_spec = self .model_runner .get_kv_cache_spec ()
139
139
for layer_name , layer_spec in kv_cache_spec .items ():
140
- if isinstance (layer_spec , FullAttentionSpec ):
140
+ if isinstance (layer_spec , AttentionSpec ):
141
141
dtype = layer_spec .dtype
142
142
143
143
# Use an empty tensor instead of `None`` to force Dynamo to pass
@@ -147,7 +147,8 @@ def determine_available_memory(self) -> int:
147
147
device = self .device )
148
148
kv_caches [layer_name ] = tpu_kv_cache
149
149
else :
150
- raise NotImplementedError
150
+ raise NotImplementedError (
151
+ f"Unsupported KV cache spec '{ type (layer_spec )} '" )
151
152
152
153
runner_kv_caches : list [torch .Tensor ] = []
153
154
bind_kv_cache (
You can’t perform that action at this time.
0 commit comments