@@ -1705,6 +1705,8 @@ enum e_model {
1705
1705
MODEL_MEDIUM,
1706
1706
MODEL_LARGE,
1707
1707
MODEL_XL,
1708
+ MODEL_8x7B,
1709
+ MODEL_8x22B,
1708
1710
};
1709
1711
1710
1712
static const size_t kiB = 1024;
@@ -3558,6 +3560,8 @@ static const char * llama_model_type_name(e_model type) {
3558
3560
case MODEL_MEDIUM: return "0.4B";
3559
3561
case MODEL_LARGE: return "0.8B";
3560
3562
case MODEL_XL: return "1.5B";
3563
+ case MODEL_8x7B: return "8x7B";
3564
+ case MODEL_8x22B: return "8x22B";
3561
3565
default: return "?B";
3562
3566
}
3563
3567
}
@@ -3672,15 +3676,23 @@ static void llm_load_hparams(
3672
3676
{
3673
3677
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
3674
3678
3675
- switch (hparams.n_layer) {
3676
- case 22: model.type = e_model::MODEL_1B; break;
3677
- case 26: model.type = e_model::MODEL_3B; break;
3678
- case 32: model.type = e_model::MODEL_7B; break;
3679
- case 40: model.type = e_model::MODEL_13B; break;
3680
- case 48: model.type = e_model::MODEL_34B; break;
3681
- case 60: model.type = e_model::MODEL_30B; break;
3682
- case 80: model.type = hparams.n_head == hparams.n_head_kv ? e_model::MODEL_65B : e_model::MODEL_70B; break;
3683
- default: model.type = e_model::MODEL_UNKNOWN;
3679
+ if (hparams.n_expert == 8) {
3680
+ switch (hparams.n_layer) {
3681
+ case 32: model.type = e_model::MODEL_8x7B; break;
3682
+ case 56: model.type = e_model::MODEL_8x22B; break;
3683
+ default: model.type = e_model::MODEL_UNKNOWN;
3684
+ }
3685
+ } else {
3686
+ switch (hparams.n_layer) {
3687
+ case 22: model.type = e_model::MODEL_1B; break;
3688
+ case 26: model.type = e_model::MODEL_3B; break;
3689
+ case 32: model.type = e_model::MODEL_7B; break;
3690
+ case 40: model.type = e_model::MODEL_13B; break;
3691
+ case 48: model.type = e_model::MODEL_34B; break;
3692
+ case 60: model.type = e_model::MODEL_30B; break;
3693
+ case 80: model.type = hparams.n_head == hparams.n_head_kv ? e_model::MODEL_65B : e_model::MODEL_70B; break;
3694
+ default: model.type = e_model::MODEL_UNKNOWN;
3695
+ }
3684
3696
}
3685
3697
} break;
3686
3698
case LLM_ARCH_MINICPM:
0 commit comments