We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent c66115b commit b19fefeCopy full SHA for b19fefe
ggml-cuda.cu
@@ -17,6 +17,7 @@
17
#define CUBLAS_TF32_TENSOR_OP_MATH 0
18
#define CUDA_R_16F HIPBLAS_R_16F
19
#define CUDA_R_32F HIPBLAS_R_32F
20
+#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
21
#define cublasCreate hipblasCreate
22
#define cublasGemmEx hipblasGemmEx
23
#define cublasHandle_t hipblasHandle_t
0 commit comments