We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 4fa51d2 commit a895df4Copy full SHA for a895df4
vllm/worker/tpu_worker.py
@@ -163,8 +163,8 @@ def determine_num_available_blocks(self) -> Tuple[int, int]:
163
usable_memory_size = int(total_memory_size *
164
self.cache_config.gpu_memory_utilization)
165
tpu_kv_cache_bytes = max(usable_memory_size - profiled, 0)
166
- dtype_btyes = get_dtype_size(self.cache_dtype)
167
- block_size_bytes = (dtype_btyes * self.cache_config.block_size *
+ dtype_bytes = get_dtype_size(self.cache_dtype)
+ block_size_bytes = (dtype_bytes * self.cache_config.block_size *
168
num_layers * 2 * head_size * num_kv_heads)
169
num_tpu_blocks = tpu_kv_cache_bytes // block_size_bytes
170
num_tpu_blocks = (num_tpu_blocks // 8) * 8 # Round down to 8.
0 commit comments