We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 606a702 commit 56a31a3Copy full SHA for 56a31a3
vllm/v1/spec_decode/eagle.py
@@ -250,13 +250,12 @@ def prepare_input_kernel(
250
num_tokens = end_pos - start_pos
251
252
index_start = tl.load(cu_query_lens_ptr + pid)
253
- indices = index_start + tl.arange(0, BLOCK_SIZE)
254
255
num_blocks = tl.cdiv(num_tokens, BLOCK_SIZE)
256
for i in tl.range(num_blocks):
257
offset = i * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
258
tl.store(
259
out_ptr + start_pos + offset,
260
- indices,
+ index_start + offset,
261
mask=offset < num_tokens,
262
)
0 commit comments