Skip to content

Commit 600ace3

Browse files
committed
update warp size
1 parent b19fefe commit 600ace3

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml-cuda.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + QK8_0, "wrong q8_0 blo
132132

133133
#define CUDA_MUL_BLOCK_SIZE 256
134134
#define CUDA_DEQUANTIZE_BLOCK_SIZE 256
135-
#define CUDA_DMMV_BLOCK_SIZE 32 // dmmv = dequantize_mul_mat_vec
135+
#define CUDA_DMMV_BLOCK_SIZE 64 // dmmv = dequantize_mul_mat_vec
136136

137137
static __global__ void mul_f32(const float * x, const float * y, float * dst, const int kx, const int ky) {
138138
const int i = blockDim.x*blockIdx.x + threadIdx.x;

0 commit comments

Comments
 (0)