Skip to content

Commit a1caa48

Browse files
committed
add more cuda defines
This is so 'slaren/cuda-f16f32' would merge.
1 parent ecc0565 commit a1caa48

File tree

1 file changed

+10
-0
lines changed

1 file changed

+10
-0
lines changed

ggml-cuda.h

+10
Original file line numberDiff line numberDiff line change
@@ -6,26 +6,36 @@
66
#define CUBLAS_OP_N HIPBLAS_OP_N
77
#define CUBLAS_OP_T HIPBLAS_OP_T
88
#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
9+
#define CUBLAS_TF32_TENSOR_OP_MATH 0
910
#define cublasCreate hipblasCreate
1011
#define cublasGemmEx hipblasGemmEx
1112
#define cublasHandle_t hipblasHandle_t
13+
#define cublasSetMathMode(h, m) HIPBLAS_STATUS_SUCCESS
1214
#define cublasSetStream hipblasSetStream
1315
#define cublasSgemm hipblasSgemm
1416
#define cublasStatus_t hipblasStatus_t
1517
#define CUDA_R_16F HIPBLAS_R_16F
1618
#define CUDA_R_32F HIPBLAS_R_32F
19+
#define cudaDeviceSynchronize hipDeviceSynchronize
1720
#define cudaError_t hipError_t
21+
#define cudaEvent_t hipEvent_t
22+
#define cudaEventCreateWithFlags hipEventCreateWithFlags
23+
#define cudaEventDisableTiming hipEventDisableTiming
24+
#define cudaEventRecord hipEventRecord
1825
#define cudaFree hipFree
26+
#define cudaFreeHost hipFreeHost
1927
#define cudaGetErrorString hipGetErrorString
2028
#define cudaGetLastError hipGetLastError
2129
#define cudaMalloc hipMalloc
30+
#define cudaMallocHost hipMallocHost
2231
#define cudaMemcpyAsync hipMemcpyAsync
2332
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
2433
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
2534
#define cudaStream_t hipStream_t
2635
#define cudaStreamCreateWithFlags hipStreamCreateWithFlags
2736
#define cudaStreamNonBlocking hipStreamNonBlocking
2837
#define cudaStreamSynchronize hipStreamSynchronize
38+
#define cudaStreamWaitEvent hipStreamWaitEvent
2939
#define cudaSuccess hipSuccess
3040
#define GGML_USE_CUBLAS
3141
#else

0 commit comments

Comments
 (0)