Skip to content

Commit 1f6294d

Browse files
authored
Fix multi GPU on multiple amd architectures with rocblas_initialize() (ggml-org#5)
* initialize rocblas
1 parent 3db70b5 commit 1f6294d

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

ggml-cuda.cu

+5
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <hip/hip_runtime.h>
1111
#include <hipblas/hipblas.h>
1212
#include <hip/hip_fp16.h>
13+
#include "rocblas/rocblas.h"
1314
#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F
1415
#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F
1516
#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
@@ -2531,6 +2532,10 @@ void ggml_init_cublas() {
25312532
static bool initialized = false;
25322533

25332534
if (!initialized) {
2535+
#ifdef GGML_USE_HIPBLAS
2536+
rocblas_initialize();
2537+
hipDeviceSynchronize();
2538+
#endif
25342539
CUDA_CHECK(cudaGetDeviceCount(&g_device_count));
25352540
GGML_ASSERT(g_device_count <= GGML_CUDA_MAX_DEVICES);
25362541
int64_t total_vram = 0;

0 commit comments

Comments
 (0)