Skip to content

Commit 594febe

Browse files
authored
misc: Temporarily disable POD from AOT wheels (#956)
We currently don't generate the AOT implementations for POD Attention. This causes missing symbols when loading AOT shared library. I'll try to work on #791 later this week so that we can have a unified code path for AOT and JIT.
1 parent 30b2838 commit 594febe

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

csrc/flashinfer_ops.cu

+2-1
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,8 @@ TORCH_LIBRARY_FRAGMENT(TORCH_EXTENSION_NAME, m) {
284284
m.def("batch_prefill_with_paged_kv_cache_run", BatchPrefillWithPagedKVCacheRun);
285285

286286
// pod-attention
287-
m.def("pod_with_kv_cache_tensor", pod_with_kv_cache_tensor);
287+
// Temporarily disabled because we don't generate the implementation yet.
288+
// m.def("pod_with_kv_cache_tensor", pod_with_kv_cache_tensor);
288289

289290
// quantization
290291
// GPU packbits operator

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ def __init__(self, *args, **kwargs) -> None:
243243
"csrc/batch_prefill.cu",
244244
"csrc/single_decode.cu",
245245
"csrc/single_prefill.cu",
246-
"csrc/pod.cu",
246+
# "csrc/pod.cu", # Temporarily disabled
247247
"csrc/flashinfer_ops.cu",
248248
]
249249
kernel_sm90_sources = [

0 commit comments

Comments
 (0)