Skip to content

Commit 176ea71

Browse files
committed
llama : better model naming and size reporting
1 parent e729965 commit 176ea71

File tree

3 files changed

+28
-30
lines changed

3 files changed

+28
-30
lines changed

convert-falcon-hf-to-gguf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def count_model_parts(dir_model: str) -> int:
9494

9595
block_count = hparams["n_layer"]
9696

97-
gguf_writer.add_name(last_dir)
97+
gguf_writer.add_name("Falcon")
9898
gguf_writer.add_context_length(2048) # not in config.json
9999
gguf_writer.add_tensor_data_layout("jploski") # qkv tensor transform
100100
gguf_writer.add_embedding_length(hparams["hidden_size"])

convert.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -733,7 +733,11 @@ def __init__(self, fname_out: Path) -> None:
733733
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
734734

735735
def add_meta_arch(self, params: Params) -> None:
736-
self.gguf.add_name ("LLaMA")
736+
ver = None
737+
if (params.n_ctx == 4096):
738+
ver = "v2"
739+
740+
self.gguf.add_name ("LLaMA" if ver == None else "LLaMA " + ver)
737741
self.gguf.add_context_length (params.n_ctx)
738742
self.gguf.add_embedding_length (params.n_embd)
739743
self.gguf.add_block_count (params.n_layer)

llama.cpp

Lines changed: 22 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,7 @@ enum e_model {
811811
MODEL_7B,
812812
MODEL_13B,
813813
MODEL_30B,
814+
MODEL_40B,
814815
MODEL_65B,
815816
MODEL_70B,
816817
};
@@ -1489,9 +1490,10 @@ static const char * llama_model_type_name(e_model type) {
14891490
case MODEL_7B: return "7B";
14901491
case MODEL_13B: return "13B";
14911492
case MODEL_30B: return "30B";
1493+
case MODEL_40B: return "40B";
14921494
case MODEL_65B: return "65B";
14931495
case MODEL_70B: return "70B";
1494-
default: GGML_ASSERT(false);
1496+
default: return "?B";
14951497
}
14961498
}
14971499

@@ -1555,40 +1557,29 @@ static void llm_load_hparams(
15551557
case LLM_ARCH_LLAMA:
15561558
{
15571559
GGUF_GET_KEY(ctx, hparams.f_norm_rms_eps, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS));
1560+
1561+
switch (hparams.n_layer) {
1562+
case 26: model.type = e_model::MODEL_3B; break;
1563+
case 32: model.type = e_model::MODEL_7B; break;
1564+
case 40: model.type = e_model::MODEL_13B; break;
1565+
case 60: model.type = e_model::MODEL_30B; break;
1566+
case 80: model.type = hparams.n_head == hparams.n_head_kv ? e_model::MODEL_65B : e_model::MODEL_70B; break;
1567+
default: model.type = e_model::MODEL_UNKNOWN;
1568+
}
15581569
} break;
15591570
case LLM_ARCH_FALCON:
15601571
{
15611572
GGUF_GET_KEY(ctx, hparams.f_norm_eps, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_EPS));
1573+
1574+
switch (hparams.n_layer) {
1575+
case 32: model.type = e_model::MODEL_7B; break;
1576+
case 60: model.type = e_model::MODEL_40B; break;
1577+
default: model.type = e_model::MODEL_UNKNOWN;
1578+
}
15621579
} break;
15631580
default: (void)0;
15641581
};
15651582

1566-
// TODO: generalize to non-LLaMA models
1567-
switch (hparams.n_layer) {
1568-
case 26: model.type = e_model::MODEL_3B; break;
1569-
case 32: model.type = e_model::MODEL_7B; break;
1570-
case 40: model.type = e_model::MODEL_13B; break;
1571-
case 60: model.type = e_model::MODEL_30B; break;
1572-
case 80: model.type = e_model::MODEL_65B; break;
1573-
default:
1574-
{
1575-
if (hparams.n_layer < 32) {
1576-
model.type = e_model::MODEL_7B;
1577-
}
1578-
} break;
1579-
}
1580-
1581-
// LLaMAv2
1582-
// TODO: probably not needed
1583-
{
1584-
const auto n_gqa = hparams.n_gqa();
1585-
1586-
if (model.type == e_model::MODEL_65B && n_gqa == 8) {
1587-
LLAMA_LOG_WARN("%s: assuming 70B model based on GQA == %d\n", __func__, n_gqa);
1588-
model.type = e_model::MODEL_70B;
1589-
}
1590-
}
1591-
15921583
model.ftype = ml.ftype;
15931584

15941585
hparams.n_ctx = n_ctx;
@@ -5015,7 +5006,10 @@ int llama_model_n_embd(const struct llama_model * model) {
50155006
}
50165007

50175008
int llama_model_type(const struct llama_model * model, char * buf, size_t buf_size) {
5018-
return snprintf(buf, buf_size, "LLaMA %s %s", llama_model_type_name(model->type), llama_model_ftype_name(model->ftype).c_str());
5009+
return snprintf(buf, buf_size, "%s %s %s",
5010+
model->name.c_str(),
5011+
llama_model_type_name(model->type),
5012+
llama_model_ftype_name(model->ftype).c_str());
50195013
}
50205014

50215015
int llama_model_quantize(

0 commit comments

Comments
 (0)