misc: Temporarily disable POD from AOT wheels (#956)

abcdabcd987 · web-flow · commit 594febea6019 · 2025-03-17T14:51:51.000-07:00
We currently don't generate the AOT implementations for POD Attention. This causes missing symbols when loading AOT shared library. I'll try to work on #791 later this week so that we can have a unified code path for AOT and JIT.
diff --git a/csrc/flashinfer_ops.cu b/csrc/flashinfer_ops.cu
@@ -284,7 +284,8 @@ TORCH_LIBRARY_FRAGMENT(TORCH_EXTENSION_NAME, m) {
   m.def("batch_prefill_with_paged_kv_cache_run", BatchPrefillWithPagedKVCacheRun);
 
   // pod-attention
-  m.def("pod_with_kv_cache_tensor", pod_with_kv_cache_tensor);
+  // Temporarily disabled because we don't generate the implementation yet.
+  // m.def("pod_with_kv_cache_tensor", pod_with_kv_cache_tensor);
 
   // quantization
   // GPU packbits operator
diff --git a/setup.py b/setup.py
@@ -243,7 +243,7 @@ def __init__(self, *args, **kwargs) -> None:
         "csrc/batch_prefill.cu",
         "csrc/single_decode.cu",
         "csrc/single_prefill.cu",
-        "csrc/pod.cu",
+        # "csrc/pod.cu",  # Temporarily disabled
         "csrc/flashinfer_ops.cu",
     ]
     kernel_sm90_sources = [

Original file line number	Diff line number	Diff line change
`@@ -243,7 +243,7 @@ def __init__(self, args, *kwargs) -> None:`
`243`	`243`	`"csrc/batch_prefill.cu",`
`244`	`244`	`"csrc/single_decode.cu",`
`245`	`245`	`"csrc/single_prefill.cu",`
`246`		`- "csrc/pod.cu",`
	`246`	`+ # "csrc/pod.cu", # Temporarily disabled`
`247`	`247`	`"csrc/flashinfer_ops.cu",`
`248`	`248`	`]`
`249`	`249`	`kernel_sm90_sources = [`