Skip to content

Commit 1ba4ce4

Browse files
committed
Revert "warp size fixes"
It seems like 32 is faster for me, at least and it won't cause so many conflicts. This reverts commit 5d6eb72.
1 parent 5d6eb72 commit 1ba4ce4

File tree

1 file changed

+2
-6
lines changed

1 file changed

+2
-6
lines changed

ggml-cuda.cu

+2-6
Original file line numberDiff line numberDiff line change
@@ -182,11 +182,7 @@ typedef struct {
182182
} block_q6_k;
183183
static_assert(sizeof(block_q6_k) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_k block size/padding");
184184

185-
#if defined(GGML_USE_HIPBLAS)
186-
#define WARP_SIZE warpSize
187-
#else
188185
#define WARP_SIZE 32
189-
#endif
190186

191187
#define CUDA_MUL_BLOCK_SIZE 256
192188

@@ -683,8 +679,8 @@ static __global__ void dequantize_mul_mat_vec(const void * vx, const float * y,
683679
// sum up partial sums and write back result
684680
__syncthreads();
685681
#pragma unroll
686-
for (int mask = WARP_SIZE/2; mask > 0; mask >>= 1) {
687-
tmp += __shfl_xor_sync(0xffffffff, tmp, mask, WARP_SIZE);
682+
for (int mask = 16; mask > 0; mask >>= 1) {
683+
tmp += __shfl_xor_sync(0xffffffff, tmp, mask, 32);
688684
}
689685

690686
if (tid == 0) {

0 commit comments

Comments
 (0)