Skip to content

Commit 9351f91

Browse files
authored
[BugFix][ROCm] Fix GGUF MoE Dispatch Block_Dim for ROCm (#16247)
Signed-off-by: Tianyuan Wu <[email protected]>
1 parent 5a1e1c8 commit 9351f91

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

csrc/quantization/gguf/moe.cuh

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ static __device__ __forceinline__ void moe_q(
129129
}
130130

131131
#if defined(USE_ROCM)
132-
#define MOE_X_Q4_0 64
132+
#define MOE_X_Q4_0 8
133133
#define MOE_Y_Q4_0 128
134134
#define NWARPS_Q4_0 8
135135
#else
@@ -190,7 +190,7 @@ static void ggml_moe_q4_0_q8_1_cuda(
190190
}
191191

192192
#if defined(USE_ROCM)
193-
#define MOE_X_Q4_1 64
193+
#define MOE_X_Q4_1 8
194194
#define MOE_Y_Q4_1 128
195195
#define NWARPS_Q4_1 8
196196
#else
@@ -251,7 +251,7 @@ static void ggml_moe_q4_1_q8_1_cuda(
251251
}
252252

253253
#if defined(USE_ROCM)
254-
#define MOE_X_Q5_0 64
254+
#define MOE_X_Q5_0 8
255255
#define MOE_Y_Q5_0 128
256256
#define NWARPS_Q5_0 8
257257
#else
@@ -312,7 +312,7 @@ static void ggml_moe_q5_0_q8_1_cuda(
312312
}
313313

314314
#if defined(USE_ROCM)
315-
#define MOE_X_Q5_1 64
315+
#define MOE_X_Q5_1 8
316316
#define MOE_Y_Q5_1 128
317317
#define NWARPS_Q5_1 8
318318
#else
@@ -373,7 +373,7 @@ static void ggml_moe_q5_1_q8_1_cuda(
373373
}
374374

375375
#if defined(USE_ROCM)
376-
#define MOE_X_Q8_0 64
376+
#define MOE_X_Q8_0 8
377377
#define MOE_Y_Q8_0 128
378378
#define NWARPS_Q8_0 8
379379
#else
@@ -434,7 +434,7 @@ static void ggml_moe_q8_0_q8_1_cuda(
434434
}
435435

436436
#if defined(USE_ROCM)
437-
#define MOE_X_Q2_K 64
437+
#define MOE_X_Q2_K 8
438438
#define MOE_Y_Q2_K 128
439439
#define NWARPS_Q2_K 8
440440
#else
@@ -495,7 +495,7 @@ static void ggml_moe_q2_K_q8_1_cuda(
495495
}
496496

497497
#if defined(USE_ROCM)
498-
#define MOE_X_Q3_K 64
498+
#define MOE_X_Q3_K 8
499499
#define MOE_Y_Q3_K 128
500500
#define NWARPS_Q3_K 8
501501
#else
@@ -556,7 +556,7 @@ static void ggml_moe_q3_K_q8_1_cuda(
556556
}
557557

558558
#if defined(USE_ROCM)
559-
#define MOE_X_Q4_K 64
559+
#define MOE_X_Q4_K 8
560560
#define MOE_Y_Q4_K 128
561561
#define NWARPS_Q4_K 8
562562
#else
@@ -617,7 +617,7 @@ static void ggml_moe_q4_K_q8_1_cuda(
617617
}
618618

619619
#if defined(USE_ROCM)
620-
#define MOE_X_Q5_K 64
620+
#define MOE_X_Q5_K 8
621621
#define MOE_Y_Q5_K 128
622622
#define NWARPS_Q5_K 8
623623
#else
@@ -678,7 +678,7 @@ static void ggml_moe_q5_K_q8_1_cuda(
678678
}
679679

680680
#if defined(USE_ROCM)
681-
#define MOE_X_Q6_K 64
681+
#define MOE_X_Q6_K 8
682682
#define MOE_Y_Q6_K 128
683683
#define NWARPS_Q6_K 8
684684
#else

0 commit comments

Comments
 (0)