We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 03d2cb9 commit 934cf70Copy full SHA for 934cf70
vllm/v1/attention/backends/flash_attn.py
@@ -264,7 +264,7 @@ def make_local_attention_virtual_batches(
264
np.arange(pages_per_local_batch, dtype=np.int32),
265
(virtual_batches, pages_per_local_batch)) \
266
+ np.expand_dims(block_starts, axis=1)
267
- block_indices = block_indices.flatten()
+ block_indices = block_indices.flatten().clip(max=block_table.shape[1] - 1)
268
batch_indices = np.repeat(np.arange(actual_batch_size, dtype=np.int32),
269
local_blocks * pages_per_local_batch)
270
block_table_local = block_table[batch_indices, block_indices]\
0 commit comments