61
61
GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_XXS,
62
62
GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_XS,
63
63
GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ3_XXS,
64
+ GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ1_S,
64
65
GGML_METAL_KERNEL_TYPE_GET_ROWS_I32,
65
66
GGML_METAL_KERNEL_TYPE_RMS_NORM,
66
67
GGML_METAL_KERNEL_TYPE_GROUP_NORM,
83
84
GGML_METAL_KERNEL_TYPE_MUL_MV_IQ2_XXS_F32,
84
85
GGML_METAL_KERNEL_TYPE_MUL_MV_IQ2_XS_F32,
85
86
GGML_METAL_KERNEL_TYPE_MUL_MV_IQ3_XXS_F32,
87
+ GGML_METAL_KERNEL_TYPE_MUL_MV_IQ1_S_F32,
86
88
GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F32_F32,
87
89
// GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F16_F16,
88
90
GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F16_F32,
101
103
GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ2_XXS_F32,
102
104
GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ2_XS_F32,
103
105
GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ3_XXS_F32,
106
+ GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ1_S_F32,
104
107
GGML_METAL_KERNEL_TYPE_MUL_MM_F32_F32,
105
108
GGML_METAL_KERNEL_TYPE_MUL_MM_F16_F32,
106
109
GGML_METAL_KERNEL_TYPE_MUL_MM_Q4_0_F32,
116
119
GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_XXS_F32,
117
120
GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_XS_F32,
118
121
GGML_METAL_KERNEL_TYPE_MUL_MM_IQ3_XXS_F32,
122
+ GGML_METAL_KERNEL_TYPE_MUL_MM_IQ1_S_F32,
119
123
GGML_METAL_KERNEL_TYPE_MUL_MM_ID_F32_F32,
120
124
GGML_METAL_KERNEL_TYPE_MUL_MM_ID_F16_F32,
121
125
GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q4_0_F32,
131
135
GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_XXS_F32,
132
136
GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_XS_F32,
133
137
GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ3_XXS_F32,
138
+ GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ1_S_F32,
134
139
GGML_METAL_KERNEL_TYPE_ROPE_F32,
135
140
GGML_METAL_KERNEL_TYPE_ROPE_F16,
136
141
GGML_METAL_KERNEL_TYPE_ALIBI_F32,
@@ -433,6 +438,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
433
438
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_XXS, get_rows_iq2_xxs, true );
434
439
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_XS, get_rows_iq2_xs, true );
435
440
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ3_XXS, get_rows_iq3_xxs, true );
441
+ GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ1_S, get_rows_iq1_s, true );
436
442
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GET_ROWS_I32, get_rows_i32, true );
437
443
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_RMS_NORM, rms_norm, ctx->support_simdgroup_reduction );
438
444
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GROUP_NORM, group_norm, ctx->support_simdgroup_reduction );
@@ -455,6 +461,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
455
461
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_IQ2_XXS_F32, mul_mv_iq2_xxs_f32, ctx->support_simdgroup_reduction );
456
462
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_IQ2_XS_F32, mul_mv_iq2_xs_f32, ctx->support_simdgroup_reduction );
457
463
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_IQ3_XXS_F32, mul_mv_iq3_xxs_f32, ctx->support_simdgroup_reduction );
464
+ GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_IQ1_S_F32, mul_mv_iq1_s_f32, ctx->support_simdgroup_reduction );
458
465
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F32_F32, mul_mv_id_f32_f32, ctx->support_simdgroup_reduction );
459
466
// GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F16_F16, mul_mv_id_f16_f16, ctx->support_simdgroup_reduction);
460
467
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F16_F32, mul_mv_id_f16_f32, ctx->support_simdgroup_reduction );
@@ -473,6 +480,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
473
480
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ2_XXS_F32, mul_mv_id_iq2_xxs_f32, ctx->support_simdgroup_reduction );
474
481
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ2_XS_F32, mul_mv_id_iq2_xs_f32, ctx->support_simdgroup_reduction );
475
482
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ3_XXS_F32, mul_mv_id_iq3_xxs_f32, ctx->support_simdgroup_reduction );
483
+ GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ1_S_F32, mul_mv_id_iq1_s_f32, ctx->support_simdgroup_reduction );
476
484
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_F32_F32, mul_mm_f32_f32, ctx->support_simdgroup_mm );
477
485
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_F16_F32, mul_mm_f16_f32, ctx->support_simdgroup_mm );
478
486
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_Q4_0_F32, mul_mm_q4_0_f32, ctx->support_simdgroup_mm );
@@ -488,6 +496,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
488
496
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_XXS_F32, mul_mm_iq2_xxs_f32, ctx->support_simdgroup_mm );
489
497
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_XS_F32, mul_mm_iq2_xs_f32, ctx->support_simdgroup_mm );
490
498
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_IQ3_XXS_F32, mul_mm_iq3_xxs_f32, ctx->support_simdgroup_mm );
499
+ GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_IQ1_S_F32, mul_mm_iq1_s_f32, ctx->support_simdgroup_mm );
491
500
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_F32_F32, mul_mm_id_f32_f32, ctx->support_simdgroup_mm );
492
501
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_F16_F32, mul_mm_id_f16_f32, ctx->support_simdgroup_mm );
493
502
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q4_0_F32, mul_mm_id_q4_0_f32, ctx->support_simdgroup_mm );
@@ -503,6 +512,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
503
512
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_XXS_F32, mul_mm_id_iq2_xxs_f32, ctx->support_simdgroup_mm );
504
513
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_XS_F32, mul_mm_id_iq2_xs_f32, ctx->support_simdgroup_mm );
505
514
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ3_XXS_F32, mul_mm_id_iq3_xxs_f32, ctx->support_simdgroup_mm );
515
+ GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ1_S_F32, mul_mm_id_iq1_s_f32, ctx->support_simdgroup_mm );
506
516
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_ROPE_F32, rope_f32, true );
507
517
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_ROPE_F16, rope_f16, true );
508
518
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_ALIBI_F32, alibi_f32, true );
@@ -1297,6 +1307,7 @@ static bool ggml_metal_graph_compute(
1297
1307
case GGML_TYPE_IQ2_XXS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_XXS_F32].pipeline ; break ;
1298
1308
case GGML_TYPE_IQ2_XS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_XS_F32 ].pipeline ; break ;
1299
1309
case GGML_TYPE_IQ3_XXS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_IQ3_XXS_F32].pipeline ; break ;
1310
+ case GGML_TYPE_IQ1_S: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_IQ1_S_F32 ].pipeline ; break ;
1300
1311
default : GGML_ASSERT (false && " MUL MAT-MAT not implemented" );
1301
1312
}
1302
1313
@@ -1431,6 +1442,12 @@ static bool ggml_metal_graph_compute(
1431
1442
nth1 = 16 ;
1432
1443
pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_IQ3_XXS_F32].pipeline ;
1433
1444
} break ;
1445
+ case GGML_TYPE_IQ1_S:
1446
+ {
1447
+ nth0 = 4 ;
1448
+ nth1 = 16 ;
1449
+ pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_IQ1_S_F32].pipeline ;
1450
+ } break ;
1434
1451
default :
1435
1452
{
1436
1453
GGML_METAL_LOG_ERROR (" Asserting on type %d \n " , (int )src0t);
@@ -1465,7 +1482,7 @@ static bool ggml_metal_graph_compute(
1465
1482
1466
1483
if (src0t == GGML_TYPE_Q4_0 || src0t == GGML_TYPE_Q4_1 ||
1467
1484
src0t == GGML_TYPE_Q5_0 || src0t == GGML_TYPE_Q5_1 || src0t == GGML_TYPE_Q8_0 ||
1468
- src0t == GGML_TYPE_Q2_K) { // || src0t == GGML_TYPE_Q4_K) {
1485
+ src0t == GGML_TYPE_Q2_K || src0t == GGML_TYPE_IQ1_S ) { // || src0t == GGML_TYPE_Q4_K) {
1469
1486
[encoder dispatchThreadgroups: MTLSizeMake ((ne01 + 7 )/8 , ne11, ne12*ne13) threadsPerThreadgroup: MTLSizeMake (nth0, nth1, 1 )];
1470
1487
}
1471
1488
else if (src0t == GGML_TYPE_IQ2_XXS || src0t == GGML_TYPE_IQ2_XS) {
@@ -1573,6 +1590,7 @@ static bool ggml_metal_graph_compute(
1573
1590
case GGML_TYPE_IQ2_XXS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_XXS_F32].pipeline ; break ;
1574
1591
case GGML_TYPE_IQ2_XS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_XS_F32 ].pipeline ; break ;
1575
1592
case GGML_TYPE_IQ3_XXS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ3_XXS_F32].pipeline ; break ;
1593
+ case GGML_TYPE_IQ1_S: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ1_S_F32 ].pipeline ; break ;
1576
1594
default : GGML_ASSERT (false && " MUL_MAT_ID not implemented" );
1577
1595
}
1578
1596
@@ -1710,6 +1728,12 @@ static bool ggml_metal_graph_compute(
1710
1728
nth1 = 16 ;
1711
1729
pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ3_XXS_F32].pipeline ;
1712
1730
} break ;
1731
+ case GGML_TYPE_IQ1_S:
1732
+ {
1733
+ nth0 = 4 ;
1734
+ nth1 = 16 ;
1735
+ pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ1_S_F32].pipeline ;
1736
+ } break ;
1713
1737
default :
1714
1738
{
1715
1739
GGML_METAL_LOG_ERROR (" Asserting on type %d \n " , (int )src2t);
@@ -1760,7 +1784,7 @@ static bool ggml_metal_graph_compute(
1760
1784
1761
1785
if (src2t == GGML_TYPE_Q4_0 || src2t == GGML_TYPE_Q4_1 ||
1762
1786
src2t == GGML_TYPE_Q5_0 || src2t == GGML_TYPE_Q5_1 || src2t == GGML_TYPE_Q8_0 ||
1763
- src2t == GGML_TYPE_Q2_K) { // || src2t == GGML_TYPE_Q4_K) {
1787
+ src2t == GGML_TYPE_Q2_K || src2t == GGML_TYPE_IQ1_S ) { // || src2t == GGML_TYPE_Q4_K) {
1764
1788
[encoder dispatchThreadgroups: MTLSizeMake ((ne21 + 7 )/8 , _ne1, ne01*ne12*ne13) threadsPerThreadgroup: MTLSizeMake (nth0, nth1, 1 )];
1765
1789
}
1766
1790
else if (src2t == GGML_TYPE_IQ2_XXS || src2t == GGML_TYPE_IQ2_XS) {
@@ -1814,6 +1838,7 @@ static bool ggml_metal_graph_compute(
1814
1838
case GGML_TYPE_IQ2_XXS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_XXS].pipeline ; break ;
1815
1839
case GGML_TYPE_IQ2_XS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_XS ].pipeline ; break ;
1816
1840
case GGML_TYPE_IQ3_XXS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ3_XXS].pipeline ; break ;
1841
+ case GGML_TYPE_IQ1_S: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ1_S ].pipeline ; break ;
1817
1842
case GGML_TYPE_I32: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_GET_ROWS_I32 ].pipeline ; break ;
1818
1843
default : GGML_ASSERT (false && " not implemented" );
1819
1844
}
0 commit comments