Skip to content

Commit 5d240a0

Browse files
committed
two more mistakes in chunk_scan_fwd
Signed-off-by: Yu Chin Fabian Lim <[email protected]>
1 parent eba332a commit 5d240a0

File tree

1 file changed

+11
-2
lines changed

1 file changed

+11
-2
lines changed

vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,11 @@ def _chunk_scan_fwd_kernel(
293293
dA_cs_m_boundary = tl.load(
294294
dA_cumsum_ptr +
295295
(pid_m * BLOCK_SIZE_M + c_off - 1) * stride_dA_cs_csize,
296-
mask=(pid_m * BLOCK_SIZE_M + c_off - 1) > -1,
296+
mask=(
297+
((pid_m * BLOCK_SIZE_M + c_off - 1) > -1)
298+
and
299+
((pid_m * BLOCK_SIZE_M + c_off) < chunk_size)
300+
),
297301
other=0.0).to(tl.float32)
298302

299303
if HAS_SEQ_IDX:
@@ -463,12 +467,17 @@ def _seq_idx_to_chunk_indices_offsets(seq_idx, chunk_size: int):
463467
p += (s % chunk_size > 0)
464468

465469
# get the dimensions
466-
_s, _e = s // chunk_size + p, e // chunk_size + p + 1
470+
# - the + 1 for _e is to shift the boundary by one chunk
471+
# - this shifting is not needed if chunk_size divides e
472+
_s, _e = s // chunk_size + p, e // chunk_size + p + (e % chunk_size > 0)
467473

468474
# adjust inidces and offsets
469475
chunk_indices[_s:_e] -= p
470476
chunk_offsets[_s] = s % chunk_size
471477

478+
#if (chunk_indices < 0).sum() > 0:
479+
# import pdb; pdb.set_trace()
480+
472481
return chunk_indices, chunk_offsets
473482

474483

0 commit comments

Comments
 (0)