We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c710306 commit 9111a91Copy full SHA for 9111a91
vllm/v1/engine/core.py
@@ -79,12 +79,11 @@ def _initialize_kv_caches(self,
79
80
# Get the kv cache tensor size
81
kv_cache_configs = []
82
- num_gpu_blocks = None
83
for kv_cache_spec in kv_cache_specs:
84
kv_cache_config = get_kv_cache_config(vllm_config, kv_cache_spec,
85
available_gpu_memory)
86
kv_cache_configs.append(kv_cache_config)
87
- assert len(set(config.num_blocks for config in kv_cache_configs)) == 1,
+ assert len(set(config.num_blocks for config in kv_cache_configs)) == 1, \
88
f"num_gpu_blocks need to be the same across workers: {num_gpu_blocks} != {kv_cache_config.num_blocks}"
89
num_gpu_blocks = kv_cache_configs[0].num_blocks
90
num_cpu_blocks = 0
0 commit comments