We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 58d3593 commit 9907bc1Copy full SHA for 9907bc1
python/flashinfer/prefill.py
@@ -1215,8 +1215,8 @@ def __init__(
1215
raise ValueError(
1216
"kv_indptr_buf should be a torch.Tensor in cuda graph mode"
1217
)
1218
- self._fixed_batch_size = len(qo_indptr_buf)
1219
- if len(kv_indptr_buf) != self._fixed_batch_size:
+ self._fixed_batch_size = len(qo_indptr_buf) - 1
+ if len(kv_indptr_buf) != self._fixed_batch_size + 1:
1220
1221
"The length of kv_indptr_buf ({}) should be the same as qo_indptr_buf ({}).".format(
1222
len(kv_indptr_buf), self._fixed_batch_size
0 commit comments