Skip to content

Commit 9dba0c9

Browse files
SlyEchoardforkKerfuffleV2
authored andcommitted
Fix merge
--------- Co-authored-by: ardfork <[email protected]> Co-authored-by: Kerfuffle <[email protected]>
1 parent f570b5c commit 9dba0c9

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

ggml-cuda.cu

+3-3
Original file line numberDiff line numberDiff line change
@@ -1641,8 +1641,8 @@ template <int vdr> static __device__ __forceinline__ float vec_dot_q8_1_q8_1_imp
16411641
#else
16421642
const float2 dm8f = __half22float2(dm8);
16431643
const float2 ds8f = __half22float2(ds8);
1644-
const float d8d8 = dm8f.x * ds8f.x;
1645-
const float m8s8 = dm8f.y * ds8f.y;
1644+
const float d8d8 = __low2float(dm8) * __low2float(ds8);
1645+
const float m8s8 = __high2float(dm8) * __high2float(ds8);
16461646
#endif // GGML_CUDA_F16
16471647

16481648
// scale second part of sum by QI8_1/ vdr to compensate for multiple threads adding it
@@ -3281,7 +3281,7 @@ static __global__ void mul_mat_q(
32813281
*dsi_dst = *dsi_src;
32823282
} else {
32833283
float * dfi_dst = (float *) dsi_dst;
3284-
*dfi_dst = (*dsi_src).x;
3284+
*dfi_dst = __low2half(*dsi_src);
32853285
}
32863286
}
32873287

0 commit comments

Comments
 (0)