[Spec Decode] Fix input triton kernel for eagle (vllm-project#15909)

ekagra-ranjan · Alex4210987 · commit 56a31a33c566 · 2025-04-05T14:42:05.000Z
Signed-off-by: xinyuxiao &lt;xinyuxiao2024@gmail.com&gt;
diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py
@@ -250,13 +250,12 @@ def prepare_input_kernel(
     num_tokens = end_pos - start_pos
 
     index_start = tl.load(cu_query_lens_ptr + pid)
-    indices = index_start + tl.arange(0, BLOCK_SIZE)
 
     num_blocks = tl.cdiv(num_tokens, BLOCK_SIZE)
     for i in tl.range(num_blocks):
         offset = i * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
         tl.store(
             out_ptr + start_pos + offset,
-            indices,
+            index_start + offset,
             mask=offset < num_tokens,
         )