You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Compiling FlashInfer on CUDA 12.2 triggers errors such as those shown
below. This PR aims to fix this issue.
```
#10 1380.2 /usr/flexflow-serve/deps/flashinfer/include/flashinfer/attention/../vec_dtypes.cuh(55): error: more than one instance of function "flashinfer::__hmul" matches the argument list:
#10 1380.2 function "__hmul(__nv_bfloat16, __nv_bfloat16)" (declared at line 3518 of /usr/local/cuda/include/cuda_bf16.hpp)
#10 1380.2 function "flashinfer::__hmul(__nv_bfloat16, __nv_bfloat16)" (declared at line 44)
#10 1380.2 argument types are: (const __nv_bfloat16, const __nv_bfloat16)
#10 1380.2 val.x = __hmul(a.x, b.x);
#10 1380.2 ^
#10 1380.2
#10 1380.2 /usr/flexflow-serve/deps/flashinfer/include/flashinfer/attention/../vec_dtypes.cuh(56): error: more than one instance of function "flashinfer::__hmul" matches the argument list:
#10 1380.2 function "__hmul(__nv_bfloat16, __nv_bfloat16)" (declared at line 3518 of /usr/local/cuda/include/cuda_bf16.hpp)
#10 1380.2 function "flashinfer::__hmul(__nv_bfloat16, __nv_bfloat16)" (declared at line 44)
#10 1380.2 argument types are: (const __nv_bfloat16, const __nv_bfloat16)
#10 1380.2 val.y = __hmul(a.y, b.y);
#10 1380.2 ^
#10 1380.2
#10 1380.2 /usr/flexflow-serve/deps/flashinfer/include/flashinfer/attention/../vec_dtypes.cuh(1158): error: more than one instance of function "flashinfer::make_bfloat162" matches the argument list:
#10 1380.2 function "make_bfloat162(__nv_bfloat16, __nv_bfloat16)" (declared at line 1189 of /usr/local/cuda/include/cuda_bf16.hpp)
#10 1380.2 function "flashinfer::make_bfloat162(__nv_bfloat16, __nv_bfloat16)" (declared at line 37)
#10 1380.2 argument types are: (nv_bfloat16, nv_bfloat16)
#10 1380.2 data = make_bfloat162(val, val);
#10 1380.2 ^
#10 1380.2
#10 1380.2 /usr/flexflow-serve/deps/flashinfer/include/flashinfer/attention/../vec_dtypes.cuh(1203): error: more than one instance of function "flashinfer::make_bfloat162" matches the argument list:
#10 1380.2 function "make_bfloat162(__nv_bfloat16, __nv_bfloat16)" (declared at line 1189 of /usr/local/cuda/include/cuda_bf16.hpp)
#10 1380.2 function "flashinfer::make_bfloat162(__nv_bfloat16, __nv_bfloat16)" (declared at line 37)
#10 1380.2 argument types are: (nv_bfloat16, nv_bfloat16)
#10 1380.2 *(nv_bfloat162*)(&data.x) = make_bfloat162(val, val);
#10 1380.2 ^
#10 1380.2
#10 1380.2 /usr/flexflow-serve/deps/flashinfer/include/flashinfer/attention/../vec_dtypes.cuh(1204): error: more than one instance of function "flashinfer::make_bfloat162" matches the argument list:
#10 1380.2 function "make_bfloat162(__nv_bfloat16, __nv_bfloat16)" (declared at line 1189 of /usr/local/cuda/include/cuda_bf16.hpp)
#10 1380.2 function "flashinfer::make_bfloat162(__nv_bfloat16, __nv_bfloat16)" (declared at line 37)
#10 1380.2 argument types are: (nv_bfloat16, nv_bfloat16)
#10 1380.2 *(nv_bfloat162*)(&data.y) = make_bfloat162(val, val);
#10 1380.2 ^
#10 1380.2
#10 1384.6 /usr/flexflow-serve/deps/flashinfer/include/flashinfer/attention/../mma.cuh(524): warning #177-D: variable "s_u32" was declared but never referenced
#10 1384.6 uint32_t* s_u32 = (uint32_t*)(s);
#10 1384.6 ^
#10 1384.6 detected during:
#10 1384.6 instantiation of "void flashinfer::<unnamed>::compute_sfm_v<KTraits>(flashinfer::smem_t<KTraits::SWIZZLE_MODE_KV> *, uint32_t *, KTraits::DTypeQKAccum (*)[KTraits::NUM_MMA_KV][8], float (*)[KTraits::NUM_MMA_D_VO][8], float (*)[2]) [with KTraits=flashinfer::KernelTraits<flashinfer::MaskMode::kNone, 128U, 2U, 8U, 4U, 4U, 4U, 1U, flashinfer::PosEncodingMode::kNone, half, half, half, float, int32_t, flashinfer::DefaultAttention<false, false, false, false>>]" at line 2127 of /usr/flexflow-serve/deps/flashinfer/include/flashinfer/attention/prefill.cuh
```
0 commit comments