@@ -811,6 +811,7 @@ enum e_model {
811
811
MODEL_7B,
812
812
MODEL_13B,
813
813
MODEL_30B,
814
+ MODEL_40B,
814
815
MODEL_65B,
815
816
MODEL_70B,
816
817
};
@@ -1489,9 +1490,10 @@ static const char * llama_model_type_name(e_model type) {
1489
1490
case MODEL_7B: return " 7B" ;
1490
1491
case MODEL_13B: return " 13B" ;
1491
1492
case MODEL_30B: return " 30B" ;
1493
+ case MODEL_40B: return " 40B" ;
1492
1494
case MODEL_65B: return " 65B" ;
1493
1495
case MODEL_70B: return " 70B" ;
1494
- default : GGML_ASSERT ( false ) ;
1496
+ default : return " ?B " ;
1495
1497
}
1496
1498
}
1497
1499
@@ -1555,40 +1557,29 @@ static void llm_load_hparams(
1555
1557
case LLM_ARCH_LLAMA:
1556
1558
{
1557
1559
GGUF_GET_KEY (ctx, hparams.f_norm_rms_eps , gguf_get_val_f32, GGUF_TYPE_FLOAT32, true , kv (LLM_KV_ATTENTION_LAYERNORM_RMS_EPS));
1560
+
1561
+ switch (hparams.n_layer ) {
1562
+ case 26 : model.type = e_model::MODEL_3B; break ;
1563
+ case 32 : model.type = e_model::MODEL_7B; break ;
1564
+ case 40 : model.type = e_model::MODEL_13B; break ;
1565
+ case 60 : model.type = e_model::MODEL_30B; break ;
1566
+ case 80 : model.type = hparams.n_head == hparams.n_head_kv ? e_model::MODEL_65B : e_model::MODEL_70B; break ;
1567
+ default : model.type = e_model::MODEL_UNKNOWN;
1568
+ }
1558
1569
} break ;
1559
1570
case LLM_ARCH_FALCON:
1560
1571
{
1561
1572
GGUF_GET_KEY (ctx, hparams.f_norm_eps , gguf_get_val_f32, GGUF_TYPE_FLOAT32, true , kv (LLM_KV_ATTENTION_LAYERNORM_EPS));
1573
+
1574
+ switch (hparams.n_layer ) {
1575
+ case 32 : model.type = e_model::MODEL_7B; break ;
1576
+ case 60 : model.type = e_model::MODEL_40B; break ;
1577
+ default : model.type = e_model::MODEL_UNKNOWN;
1578
+ }
1562
1579
} break ;
1563
1580
default : (void )0 ;
1564
1581
};
1565
1582
1566
- // TODO: generalize to non-LLaMA models
1567
- switch (hparams.n_layer ) {
1568
- case 26 : model.type = e_model::MODEL_3B; break ;
1569
- case 32 : model.type = e_model::MODEL_7B; break ;
1570
- case 40 : model.type = e_model::MODEL_13B; break ;
1571
- case 60 : model.type = e_model::MODEL_30B; break ;
1572
- case 80 : model.type = e_model::MODEL_65B; break ;
1573
- default :
1574
- {
1575
- if (hparams.n_layer < 32 ) {
1576
- model.type = e_model::MODEL_7B;
1577
- }
1578
- } break ;
1579
- }
1580
-
1581
- // LLaMAv2
1582
- // TODO: probably not needed
1583
- {
1584
- const auto n_gqa = hparams.n_gqa ();
1585
-
1586
- if (model.type == e_model::MODEL_65B && n_gqa == 8 ) {
1587
- LLAMA_LOG_WARN (" %s: assuming 70B model based on GQA == %d\n " , __func__, n_gqa);
1588
- model.type = e_model::MODEL_70B;
1589
- }
1590
- }
1591
-
1592
1583
model.ftype = ml.ftype ;
1593
1584
1594
1585
hparams.n_ctx = n_ctx;
@@ -5015,7 +5006,10 @@ int llama_model_n_embd(const struct llama_model * model) {
5015
5006
}
5016
5007
5017
5008
int llama_model_type (const struct llama_model * model, char * buf, size_t buf_size) {
5018
- return snprintf (buf, buf_size, " LLaMA %s %s" , llama_model_type_name (model->type ), llama_model_ftype_name (model->ftype ).c_str ());
5009
+ return snprintf (buf, buf_size, " %s %s %s" ,
5010
+ model->name .c_str (),
5011
+ llama_model_type_name (model->type ),
5012
+ llama_model_ftype_name (model->ftype ).c_str ());
5019
5013
}
5020
5014
5021
5015
int llama_model_quantize (
0 commit comments