Skip to content

Commit 034fc18

Browse files
authored
bugfix: Fix compilation on cuda 12.2 (#961)
Compiling FlashInfer on CUDA 12.2 triggers errors such as those shown below. This PR aims to fix this issue. ``` #10 1380.2 /usr/flexflow-serve/deps/flashinfer/include/flashinfer/attention/../vec_dtypes.cuh(55): error: more than one instance of function "flashinfer::__hmul" matches the argument list: #10 1380.2 function "__hmul(__nv_bfloat16, __nv_bfloat16)" (declared at line 3518 of /usr/local/cuda/include/cuda_bf16.hpp) #10 1380.2 function "flashinfer::__hmul(__nv_bfloat16, __nv_bfloat16)" (declared at line 44) #10 1380.2 argument types are: (const __nv_bfloat16, const __nv_bfloat16) #10 1380.2 val.x = __hmul(a.x, b.x); #10 1380.2 ^ #10 1380.2 #10 1380.2 /usr/flexflow-serve/deps/flashinfer/include/flashinfer/attention/../vec_dtypes.cuh(56): error: more than one instance of function "flashinfer::__hmul" matches the argument list: #10 1380.2 function "__hmul(__nv_bfloat16, __nv_bfloat16)" (declared at line 3518 of /usr/local/cuda/include/cuda_bf16.hpp) #10 1380.2 function "flashinfer::__hmul(__nv_bfloat16, __nv_bfloat16)" (declared at line 44) #10 1380.2 argument types are: (const __nv_bfloat16, const __nv_bfloat16) #10 1380.2 val.y = __hmul(a.y, b.y); #10 1380.2 ^ #10 1380.2 #10 1380.2 /usr/flexflow-serve/deps/flashinfer/include/flashinfer/attention/../vec_dtypes.cuh(1158): error: more than one instance of function "flashinfer::make_bfloat162" matches the argument list: #10 1380.2 function "make_bfloat162(__nv_bfloat16, __nv_bfloat16)" (declared at line 1189 of /usr/local/cuda/include/cuda_bf16.hpp) #10 1380.2 function "flashinfer::make_bfloat162(__nv_bfloat16, __nv_bfloat16)" (declared at line 37) #10 1380.2 argument types are: (nv_bfloat16, nv_bfloat16) #10 1380.2 data = make_bfloat162(val, val); #10 1380.2 ^ #10 1380.2 #10 1380.2 /usr/flexflow-serve/deps/flashinfer/include/flashinfer/attention/../vec_dtypes.cuh(1203): error: more than one instance of function "flashinfer::make_bfloat162" matches the argument list: #10 1380.2 function "make_bfloat162(__nv_bfloat16, __nv_bfloat16)" (declared at line 1189 of /usr/local/cuda/include/cuda_bf16.hpp) #10 1380.2 function "flashinfer::make_bfloat162(__nv_bfloat16, __nv_bfloat16)" (declared at line 37) #10 1380.2 argument types are: (nv_bfloat16, nv_bfloat16) #10 1380.2 *(nv_bfloat162*)(&data.x) = make_bfloat162(val, val); #10 1380.2 ^ #10 1380.2 #10 1380.2 /usr/flexflow-serve/deps/flashinfer/include/flashinfer/attention/../vec_dtypes.cuh(1204): error: more than one instance of function "flashinfer::make_bfloat162" matches the argument list: #10 1380.2 function "make_bfloat162(__nv_bfloat16, __nv_bfloat16)" (declared at line 1189 of /usr/local/cuda/include/cuda_bf16.hpp) #10 1380.2 function "flashinfer::make_bfloat162(__nv_bfloat16, __nv_bfloat16)" (declared at line 37) #10 1380.2 argument types are: (nv_bfloat16, nv_bfloat16) #10 1380.2 *(nv_bfloat162*)(&data.y) = make_bfloat162(val, val); #10 1380.2 ^ #10 1380.2 #10 1384.6 /usr/flexflow-serve/deps/flashinfer/include/flashinfer/attention/../mma.cuh(524): warning #177-D: variable "s_u32" was declared but never referenced #10 1384.6 uint32_t* s_u32 = (uint32_t*)(s); #10 1384.6 ^ #10 1384.6 detected during: #10 1384.6 instantiation of "void flashinfer::<unnamed>::compute_sfm_v<KTraits>(flashinfer::smem_t<KTraits::SWIZZLE_MODE_KV> *, uint32_t *, KTraits::DTypeQKAccum (*)[KTraits::NUM_MMA_KV][8], float (*)[KTraits::NUM_MMA_D_VO][8], float (*)[2]) [with KTraits=flashinfer::KernelTraits<flashinfer::MaskMode::kNone, 128U, 2U, 8U, 4U, 4U, 4U, 1U, flashinfer::PosEncodingMode::kNone, half, half, half, float, int32_t, flashinfer::DefaultAttention<false, false, false, false>>]" at line 2127 of /usr/flexflow-serve/deps/flashinfer/include/flashinfer/attention/prefill.cuh ```
1 parent 2be9ad7 commit 034fc18

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

include/flashinfer/vec_dtypes.cuh

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ namespace flashinfer {
3131

3232
#define FLASHINFER_INLINE inline __attribute__((always_inline)) __device__
3333

34-
#if (__CUDACC_VER_MAJOR__ * 10000 + __CUDACC_VER_MINOR__ * 100 < 120400) && \
34+
#if (__CUDACC_VER_MAJOR__ * 10000 + __CUDACC_VER_MINOR__ * 100 < 120200) && \
3535
(defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800))
36-
// CUDA version < 12.4 and GPU architecture < 80
36+
// CUDA version < 12.2 and GPU architecture < 80
3737
FLASHINFER_INLINE __nv_bfloat162 make_bfloat162(const __nv_bfloat16 x, const __nv_bfloat16 y) {
3838
__nv_bfloat162 t;
3939
t.x = x;

0 commit comments

Comments
 (0)