61
61
GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_XXS,
62
62
GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_XS,
63
63
GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ3_XXS,
64
+ GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ1_S,
64
65
GGML_METAL_KERNEL_TYPE_GET_ROWS_I32,
65
66
GGML_METAL_KERNEL_TYPE_RMS_NORM,
66
67
GGML_METAL_KERNEL_TYPE_GROUP_NORM,
83
84
GGML_METAL_KERNEL_TYPE_MUL_MV_IQ2_XXS_F32,
84
85
GGML_METAL_KERNEL_TYPE_MUL_MV_IQ2_XS_F32,
85
86
GGML_METAL_KERNEL_TYPE_MUL_MV_IQ3_XXS_F32,
87
+ GGML_METAL_KERNEL_TYPE_MUL_MV_IQ1_S_F32,
86
88
GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F32_F32,
87
89
// GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F16_F16,
88
90
GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F16_F32,
101
103
GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ2_XXS_F32,
102
104
GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ2_XS_F32,
103
105
GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ3_XXS_F32,
106
+ GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ1_S_F32,
104
107
GGML_METAL_KERNEL_TYPE_MUL_MM_F32_F32,
105
108
GGML_METAL_KERNEL_TYPE_MUL_MM_F16_F32,
106
109
GGML_METAL_KERNEL_TYPE_MUL_MM_Q4_0_F32,
116
119
GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_XXS_F32,
117
120
GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_XS_F32,
118
121
GGML_METAL_KERNEL_TYPE_MUL_MM_IQ3_XXS_F32,
122
+ GGML_METAL_KERNEL_TYPE_MUL_MM_IQ1_S_F32,
119
123
GGML_METAL_KERNEL_TYPE_MUL_MM_ID_F32_F32,
120
124
GGML_METAL_KERNEL_TYPE_MUL_MM_ID_F16_F32,
121
125
GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q4_0_F32,
131
135
GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_XXS_F32,
132
136
GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_XS_F32,
133
137
GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ3_XXS_F32,
138
+ GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ1_S_F32,
134
139
GGML_METAL_KERNEL_TYPE_ROPE_F32,
135
140
GGML_METAL_KERNEL_TYPE_ROPE_F16,
136
141
GGML_METAL_KERNEL_TYPE_ALIBI_F32,
@@ -433,6 +438,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
433
438
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_XXS, get_rows_iq2_xxs, true );
434
439
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_XS, get_rows_iq2_xs, true );
435
440
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ3_XXS, get_rows_iq3_xxs, true );
441
+ GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ1_S, get_rows_iq1_s, true );
436
442
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GET_ROWS_I32, get_rows_i32, true );
437
443
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_RMS_NORM, rms_norm, ctx->support_simdgroup_reduction );
438
444
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GROUP_NORM, group_norm, ctx->support_simdgroup_reduction );
@@ -455,6 +461,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
455
461
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_IQ2_XXS_F32, mul_mv_iq2_xxs_f32, ctx->support_simdgroup_reduction );
456
462
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_IQ2_XS_F32, mul_mv_iq2_xs_f32, ctx->support_simdgroup_reduction );
457
463
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_IQ3_XXS_F32, mul_mv_iq3_xxs_f32, ctx->support_simdgroup_reduction );
464
+ GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_IQ1_S_F32, mul_mv_iq1_s_f32, ctx->support_simdgroup_reduction );
458
465
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F32_F32, mul_mv_id_f32_f32, ctx->support_simdgroup_reduction );
459
466
// GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F16_F16, mul_mv_id_f16_f16, ctx->support_simdgroup_reduction);
460
467
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F16_F32, mul_mv_id_f16_f32, ctx->support_simdgroup_reduction );
@@ -473,6 +480,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
473
480
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ2_XXS_F32, mul_mv_id_iq2_xxs_f32, ctx->support_simdgroup_reduction );
474
481
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ2_XS_F32, mul_mv_id_iq2_xs_f32, ctx->support_simdgroup_reduction );
475
482
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ3_XXS_F32, mul_mv_id_iq3_xxs_f32, ctx->support_simdgroup_reduction );
483
+ GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ1_S_F32, mul_mv_id_iq1_s_f32, ctx->support_simdgroup_reduction );
476
484
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_F32_F32, mul_mm_f32_f32, ctx->support_simdgroup_mm );
477
485
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_F16_F32, mul_mm_f16_f32, ctx->support_simdgroup_mm );
478
486
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_Q4_0_F32, mul_mm_q4_0_f32, ctx->support_simdgroup_mm );
@@ -488,6 +496,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
488
496
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_XXS_F32, mul_mm_iq2_xxs_f32, ctx->support_simdgroup_mm );
489
497
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_XS_F32, mul_mm_iq2_xs_f32, ctx->support_simdgroup_mm );
490
498
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_IQ3_XXS_F32, mul_mm_iq3_xxs_f32, ctx->support_simdgroup_mm );
499
+ GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_IQ1_S_F32, mul_mm_iq1_s_f32, ctx->support_simdgroup_mm );
491
500
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_F32_F32, mul_mm_id_f32_f32, ctx->support_simdgroup_mm );
492
501
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_F16_F32, mul_mm_id_f16_f32, ctx->support_simdgroup_mm );
493
502
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q4_0_F32, mul_mm_id_q4_0_f32, ctx->support_simdgroup_mm );
@@ -503,6 +512,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
503
512
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_XXS_F32, mul_mm_id_iq2_xxs_f32, ctx->support_simdgroup_mm );
504
513
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_XS_F32, mul_mm_id_iq2_xs_f32, ctx->support_simdgroup_mm );
505
514
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ3_XXS_F32, mul_mm_id_iq3_xxs_f32, ctx->support_simdgroup_mm );
515
+ GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ1_S_F32, mul_mm_id_iq1_s_f32, ctx->support_simdgroup_mm );
506
516
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_ROPE_F32, rope_f32, true );
507
517
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_ROPE_F16, rope_f16, true );
508
518
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_ALIBI_F32, alibi_f32, true );
@@ -1318,6 +1328,7 @@ static bool ggml_metal_graph_compute(
1318
1328
case GGML_TYPE_IQ2_XXS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_XXS_F32].pipeline ; break ;
1319
1329
case GGML_TYPE_IQ2_XS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_XS_F32 ].pipeline ; break ;
1320
1330
case GGML_TYPE_IQ3_XXS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_IQ3_XXS_F32].pipeline ; break ;
1331
+ case GGML_TYPE_IQ1_S: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_IQ1_S_F32 ].pipeline ; break ;
1321
1332
default : GGML_ASSERT (false && " MUL MAT-MAT not implemented" );
1322
1333
}
1323
1334
@@ -1452,6 +1463,12 @@ static bool ggml_metal_graph_compute(
1452
1463
nth1 = 16 ;
1453
1464
pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_IQ3_XXS_F32].pipeline ;
1454
1465
} break ;
1466
+ case GGML_TYPE_IQ1_S:
1467
+ {
1468
+ nth0 = 4 ;
1469
+ nth1 = 16 ;
1470
+ pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_IQ1_S_F32].pipeline ;
1471
+ } break ;
1455
1472
default :
1456
1473
{
1457
1474
GGML_METAL_LOG_ERROR (" Asserting on type %d \n " , (int )src0t);
@@ -1486,7 +1503,7 @@ static bool ggml_metal_graph_compute(
1486
1503
1487
1504
if (src0t == GGML_TYPE_Q4_0 || src0t == GGML_TYPE_Q4_1 ||
1488
1505
src0t == GGML_TYPE_Q5_0 || src0t == GGML_TYPE_Q5_1 || src0t == GGML_TYPE_Q8_0 ||
1489
- src0t == GGML_TYPE_Q2_K) { // || src0t == GGML_TYPE_Q4_K) {
1506
+ src0t == GGML_TYPE_Q2_K || src0t == GGML_TYPE_IQ1_S ) { // || src0t == GGML_TYPE_Q4_K) {
1490
1507
[encoder dispatchThreadgroups: MTLSizeMake ((ne01 + 7 )/8 , ne11, ne12*ne13) threadsPerThreadgroup: MTLSizeMake (nth0, nth1, 1 )];
1491
1508
}
1492
1509
else if (src0t == GGML_TYPE_IQ2_XXS || src0t == GGML_TYPE_IQ2_XS) {
@@ -1592,6 +1609,7 @@ static bool ggml_metal_graph_compute(
1592
1609
case GGML_TYPE_IQ2_XXS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_XXS_F32].pipeline ; break ;
1593
1610
case GGML_TYPE_IQ2_XS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_XS_F32 ].pipeline ; break ;
1594
1611
case GGML_TYPE_IQ3_XXS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ3_XXS_F32].pipeline ; break ;
1612
+ case GGML_TYPE_IQ1_S: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ1_S_F32 ].pipeline ; break ;
1595
1613
default : GGML_ASSERT (false && " MUL_MAT_ID not implemented" );
1596
1614
}
1597
1615
@@ -1729,6 +1747,12 @@ static bool ggml_metal_graph_compute(
1729
1747
nth1 = 16 ;
1730
1748
pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ3_XXS_F32].pipeline ;
1731
1749
} break ;
1750
+ case GGML_TYPE_IQ1_S:
1751
+ {
1752
+ nth0 = 4 ;
1753
+ nth1 = 16 ;
1754
+ pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ1_S_F32].pipeline ;
1755
+ } break ;
1732
1756
default :
1733
1757
{
1734
1758
GGML_METAL_LOG_ERROR (" Asserting on type %d \n " , (int )src2t);
@@ -1779,7 +1803,7 @@ static bool ggml_metal_graph_compute(
1779
1803
1780
1804
if (src2t == GGML_TYPE_Q4_0 || src2t == GGML_TYPE_Q4_1 ||
1781
1805
src2t == GGML_TYPE_Q5_0 || src2t == GGML_TYPE_Q5_1 || src2t == GGML_TYPE_Q8_0 ||
1782
- src2t == GGML_TYPE_Q2_K) { // || src2t == GGML_TYPE_Q4_K) {
1806
+ src2t == GGML_TYPE_Q2_K || src2t == GGML_TYPE_IQ1_S ) { // || src2t == GGML_TYPE_Q4_K) {
1783
1807
[encoder dispatchThreadgroups: MTLSizeMake ((ne21 + 7 )/8 , _ne1, ne01*ne12*ne13) threadsPerThreadgroup: MTLSizeMake (nth0, nth1, 1 )];
1784
1808
}
1785
1809
else if (src2t == GGML_TYPE_IQ2_XXS || src2t == GGML_TYPE_IQ2_XS) {
@@ -1833,6 +1857,7 @@ static bool ggml_metal_graph_compute(
1833
1857
case GGML_TYPE_IQ2_XXS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_XXS].pipeline ; break ;
1834
1858
case GGML_TYPE_IQ2_XS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_XS ].pipeline ; break ;
1835
1859
case GGML_TYPE_IQ3_XXS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ3_XXS].pipeline ; break ;
1860
+ case GGML_TYPE_IQ1_S: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ1_S ].pipeline ; break ;
1836
1861
case GGML_TYPE_I32: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_GET_ROWS_I32 ].pipeline ; break ;
1837
1862
default : GGML_ASSERT (false && " not implemented" );
1838
1863
}
0 commit comments