Skip to content

Commit 8fbd593

Browse files
committed
ggml-quants : attempt to fix Arm 32-bit support
1 parent ec50944 commit 8fbd593

File tree

2 files changed

+8
-11
lines changed

2 files changed

+8
-11
lines changed

Diff for: ggml/src/ggml-impl.h

+4-7
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ typedef __fp16 ggml_fp16_internal_t;
177177

178178
// 32-bit ARM compatibility
179179

180-
// vaddvq_s16
180+
// vaddlvq_s16
181181
// vpaddq_s16
182182
// vpaddq_s32
183183
// vaddvq_s32
@@ -187,12 +187,9 @@ typedef __fp16 ggml_fp16_internal_t;
187187
// vzip1_u8
188188
// vzip2_u8
189189

190-
inline static int32_t vaddvq_s16(int16x8_t v) {
191-
return
192-
(int32_t)vgetq_lane_s16(v, 0) + (int32_t)vgetq_lane_s16(v, 1) +
193-
(int32_t)vgetq_lane_s16(v, 2) + (int32_t)vgetq_lane_s16(v, 3) +
194-
(int32_t)vgetq_lane_s16(v, 4) + (int32_t)vgetq_lane_s16(v, 5) +
195-
(int32_t)vgetq_lane_s16(v, 6) + (int32_t)vgetq_lane_s16(v, 7);
190+
inline static int32_t vaddlvq_s16(int16x8_t v) {
191+
int32x4_t v0 = vreinterpretq_s32_s64(vpaddlq_s32(vpaddlq_s16(v)));
192+
return vgetq_lane_s32(v0, 0) + vgetq_lane_s32(v0, 2);
196193
}
197194

198195
inline static int16x8_t vpaddq_s16(int16x8_t a, int16x8_t b) {

Diff for: ggml/src/ggml-quants.c

+4-4
Original file line numberDiff line numberDiff line change
@@ -11483,10 +11483,10 @@ void ggml_vec_dot_q1_3_q8_0(int n, float * restrict s, size_t bs, const void * r
1148311483
// WARNING: reading 3 bytes further than necessary
1148411484
const uint8x16_t x13b = vld1q_u8((const uint8_t *) x);
1148511485

11486-
uint8x16_t x0 = vqtbl1q_u8(x13b, mask0);
11487-
uint8x16_t x1 = vqtbl1q_u8(x13b, mask1);
11488-
uint8x16_t x2 = vqtbl1q_u8(x13b, mask2);
11489-
uint8x16_t x3 = vqtbl1q_u8(x13b, mask3);
11486+
uint8x16_t x0 = ggml_vqtbl1q_u8(x13b, mask0);
11487+
uint8x16_t x1 = ggml_vqtbl1q_u8(x13b, mask1);
11488+
uint8x16_t x2 = ggml_vqtbl1q_u8(x13b, mask2);
11489+
uint8x16_t x3 = ggml_vqtbl1q_u8(x13b, mask3);
1149011490

1149111491
x0 = vmulq_u8(x0, shift0);
1149211492
x1 = vmulq_u8(x1, shift0);

0 commit comments

Comments
 (0)