Skip to content

Commit 40e5ce0

Browse files
committed
CLBlast: Fix temporary buffer size for f16 conversion (wsize)
Fix buffer overflow. Reduce the size to fit just one 2D slice. Assert sufficient size.
1 parent a5e8c1d commit 40e5ce0

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

ggml-opencl.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1568,7 +1568,7 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
15681568
ggml_cl_pool_free(d_D, d_size);
15691569
}
15701570

1571-
static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, void * wdata, size_t /* wsize */) {
1571+
static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, void * wdata, size_t wsize) {
15721572
GGML_ASSERT(fp16_support);
15731573

15741574
const int64_t ne00 = src0->ne[0];
@@ -1598,6 +1598,10 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
15981598
const int y_ne = ne11 * ne10;
15991599
const int d_ne = ne11 * ne01;
16001600

1601+
GGML_ASSERT(wsize >= sizeof(ggml_fp16_t) * y_ne);
1602+
GGML_ASSERT(wsize >= sizeof(ggml_fp16_t) * d_ne);
1603+
ggml_fp16_t * const tmp = (ggml_fp16_t *) wdata;
1604+
16011605
size_t x_size;
16021606
size_t y_size;
16031607
size_t d_size;
@@ -1634,7 +1638,6 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
16341638

16351639
// convert src1 to fp16
16361640
// TODO: use multiple threads
1637-
ggml_fp16_t * const tmp = (ggml_fp16_t *) wdata + (ne11 * ne10) * (i13 * ne12 + i12);
16381641
char * src1i = (char *) src1->data + i13*nb13 + i12*nb12;
16391642
if (src1_cont_rows) {
16401643
if (src1_cont_cols) {
@@ -1897,8 +1900,8 @@ void ggml_cl_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor *
18971900
}
18981901

18991902
size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
1900-
if (ggml_cl_mul_mat_use_f16(src0, src1, dst)) {
1901-
return ggml_nelements(src1) * sizeof(ggml_fp16_t);
1903+
if (src0->type == GGML_TYPE_F16 && ggml_cl_mul_mat_use_f16(src0, src1, dst)) {
1904+
return sizeof(ggml_fp16_t) * std::max(src1->ne[0] * src1->ne[1], dst->ne[0] * dst->ne[1]);
19021905
}
19031906
return 0;
19041907
}

0 commit comments

Comments
 (0)