|
5 | 5 | #include <stdio.h>
|
6 | 6 | #include <atomic>
|
7 | 7 | #include <assert.h>
|
8 |
| -#if defined(GGML_USE_HIPBLAS) |
9 |
| -#include <hip/hip_runtime.h> |
10 |
| -#include <hipblas/hipblas.h> |
11 |
| -#include <hip/hip_fp16.h> |
12 |
| -#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F |
13 |
| -#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F |
14 |
| -#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT |
15 |
| -#define CUBLAS_OP_N HIPBLAS_OP_N |
16 |
| -#define CUBLAS_OP_T HIPBLAS_OP_T |
17 |
| -#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS |
18 |
| -#define CUBLAS_TF32_TENSOR_OP_MATH 0 |
19 |
| -#define CUDA_R_16F HIPBLAS_R_16F |
20 |
| -#define CUDA_R_32F HIPBLAS_R_32F |
21 |
| -#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width) |
22 |
| -#define cublasCreate hipblasCreate |
23 |
| -#define cublasGemmEx hipblasGemmEx |
24 |
| -#define cublasHandle_t hipblasHandle_t |
25 |
| -#define cublasSetMathMode(handle, mode) CUBLAS_STATUS_SUCCESS |
26 |
| -#define cublasSetStream hipblasSetStream |
27 |
| -#define cublasSgemm hipblasSgemm |
28 |
| -#define cublasStatus_t hipblasStatus_t |
29 |
| -#define cudaDeviceProp hipDeviceProp_t |
30 |
| -#define cudaDeviceSynchronize hipDeviceSynchronize |
31 |
| -#define cudaError_t hipError_t |
32 |
| -#define cudaEventCreateWithFlags hipEventCreateWithFlags |
33 |
| -#define cudaEventDisableTiming hipEventDisableTiming |
34 |
| -#define cudaEventRecord hipEventRecord |
35 |
| -#define cudaEvent_t hipEvent_t |
36 |
| -#define cudaFree hipFree |
37 |
| -#define cudaFreeHost hipHostFree |
38 |
| -#define cudaGetDevice hipGetDevice |
39 |
| -#define cudaGetDeviceCount hipGetDeviceCount |
40 |
| -#define cudaGetDeviceProperties hipGetDeviceProperties |
41 |
| -#define cudaGetErrorString hipGetErrorString |
42 |
| -#define cudaGetLastError hipGetLastError |
43 |
| -#define cudaMalloc hipMalloc |
44 |
| -#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault) |
45 |
| -#define cudaMemcpy hipMemcpy |
46 |
| -#define cudaMemcpy2DAsync hipMemcpy2DAsync |
47 |
| -#define cudaMemcpyAsync hipMemcpyAsync |
48 |
| -#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice |
49 |
| -#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost |
50 |
| -#define cudaMemcpyHostToDevice hipMemcpyHostToDevice |
51 |
| -#define cudaMemcpyKind hipMemcpyKind |
52 |
| -#define cudaMemset hipMemset |
53 |
| -#define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize |
54 |
| -#define cudaSetDevice hipSetDevice |
55 |
| -#define cudaStreamCreateWithFlags hipStreamCreateWithFlags |
56 |
| -#define cudaStreamNonBlocking hipStreamNonBlocking |
57 |
| -#define cudaStreamSynchronize hipStreamSynchronize |
58 |
| -#define cudaStreamWaitEvent hipStreamWaitEvent |
59 |
| -#define cudaStream_t hipStream_t |
60 |
| -#define cudaSuccess hipSuccess |
61 |
| -#else |
| 8 | + |
| 9 | +#ifndef GGML_USE_HIPBLAS |
62 | 10 | #include <cuda_runtime.h>
|
63 | 11 | #include <cublas_v2.h>
|
64 | 12 | #include <cuda_fp16.h>
|
|
0 commit comments