@@ -1836,7 +1836,7 @@ static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
1836
1836
.dequantize_row_q = dequantize_row_q4_0 ,
1837
1837
.quantize_row_q = quantize_row_q4_0 ,
1838
1838
.quantize_row_q_reference = (quantize_row_q_t ) quantize_row_q4_0_reference ,
1839
- .quantize_row_q_dot = quantize_row_q8_1 ,
1839
+ .quantize_row_q_dot = quantize_row_q8_0 ,
1840
1840
.vec_dot_q = ggml_vec_dot_q4_0_q8_0 ,
1841
1841
},
1842
1842
[GGML_TYPE_Q4_1 ] = {
@@ -1850,7 +1850,7 @@ static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
1850
1850
.dequantize_row_q = dequantize_row_q4_2 ,
1851
1851
.quantize_row_q = quantize_row_q4_2 ,
1852
1852
.quantize_row_q_reference = (quantize_row_q_t ) quantize_row_q4_2_reference ,
1853
- .quantize_row_q_dot = quantize_row_q8_1 ,
1853
+ .quantize_row_q_dot = quantize_row_q8_0 ,
1854
1854
.vec_dot_q = ggml_vec_dot_q4_2_q8_0 ,
1855
1855
},
1856
1856
[GGML_TYPE_Q4_3 ] = {
@@ -2482,7 +2482,7 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
2482
2482
assert (nb % 2 == 0 );
2483
2483
2484
2484
const block_q4_0 * restrict x = vx ;
2485
- const block_q8_1 * restrict y = vy ;
2485
+ const block_q8_0 * restrict y = vy ;
2486
2486
2487
2487
#if defined(__ARM_NEON )
2488
2488
float32x4_t sumv0 = vdupq_n_f32 (0.0f );
@@ -2491,8 +2491,8 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
2491
2491
for (int i = 0 ; i < nb ; i += 2 ) {
2492
2492
const block_q4_0 * restrict x0 = & x [i + 0 ];
2493
2493
const block_q4_0 * restrict x1 = & x [i + 1 ];
2494
- const block_q8_1 * restrict y0 = & y [i + 0 ];
2495
- const block_q8_1 * restrict y1 = & y [i + 1 ];
2494
+ const block_q8_0 * restrict y0 = & y [i + 0 ];
2495
+ const block_q8_0 * restrict y1 = & y [i + 1 ];
2496
2496
2497
2497
const uint8x16_t m4b = vdupq_n_u8 (0xf );
2498
2498
const int8x16_t s8b = vdupq_n_s8 (0x8 );
@@ -2786,7 +2786,7 @@ static void ggml_vec_dot_q4_2_q8_0(const int n, float * restrict s, const void *
2786
2786
assert (QK8_1 == 2 * QK4_2 );
2787
2787
2788
2788
const block_q4_2 * restrict x = vx ;
2789
- const block_q8_1 * restrict y = vy ;
2789
+ const block_q8_0 * restrict y = vy ;
2790
2790
2791
2791
#if defined(__ARM_NEON )
2792
2792
float32x4_t sumv0 = vdupq_n_f32 (0.0f );
@@ -2798,8 +2798,8 @@ static void ggml_vec_dot_q4_2_q8_0(const int n, float * restrict s, const void *
2798
2798
const block_q4_2 * restrict x1_0 = & x [2 * (i + 1 ) + 0 ];
2799
2799
const block_q4_2 * restrict x1_1 = & x [2 * (i + 1 ) + 1 ];
2800
2800
2801
- const block_q8_1 * restrict y0 = & y [i + 0 ];
2802
- const block_q8_1 * restrict y1 = & y [i + 1 ];
2801
+ const block_q8_0 * restrict y0 = & y [i + 0 ];
2802
+ const block_q8_0 * restrict y1 = & y [i + 1 ];
2803
2803
2804
2804
const uint8x16_t m4b = vdupq_n_u8 (0xf );
2805
2805
const int8x16_t s8b = vdupq_n_s8 (0x8 );
0 commit comments