Skip to content

Commit 180b693

Browse files
comexblackhole89
authored andcommitted
Print model version.
Also improve model type printing, and fix indentation of an unrelated switch statement.
1 parent f963b63 commit 180b693

File tree

1 file changed

+46
-23
lines changed

1 file changed

+46
-23
lines changed

llama.cpp

+46-23
Original file line numberDiff line numberDiff line change
@@ -356,17 +356,17 @@ struct llama_load_tensor {
356356
LLAMA_ASSERT(shards.size() <= UINT32_MAX);
357357
uint32_t n_shards = (uint32_t) shards.size();
358358
switch (split_type) {
359-
case SPLIT_NONE:
360-
ne = first_shard.ne;
361-
break;
362-
case SPLIT_BY_COLUMNS:
363-
ne = {checked_mul<uint32_t>(first_shard.ne[0], n_shards),
364-
first_shard.ne[1]};
365-
break;
366-
case SPLIT_BY_ROWS:
367-
ne = {first_shard.ne[0],
368-
checked_mul<uint32_t>(first_shard.ne[1], n_shards)};
369-
break;
359+
case SPLIT_NONE:
360+
ne = first_shard.ne;
361+
break;
362+
case SPLIT_BY_COLUMNS:
363+
ne = {checked_mul<uint32_t>(first_shard.ne[0], n_shards),
364+
first_shard.ne[1]};
365+
break;
366+
case SPLIT_BY_ROWS:
367+
ne = {first_shard.ne[0],
368+
checked_mul<uint32_t>(first_shard.ne[1], n_shards)};
369+
break;
370370
}
371371
}
372372

@@ -806,6 +806,25 @@ bool llama_mlock_supported() {
806806
// model loading
807807
//
808808

809+
static const char *llama_file_version_name(llama_file_version version) {
810+
switch (version) {
811+
case LLAMA_FILE_VERSION_GGML: return "'ggml' (old version with low tokenizer quality and no mmap support)";
812+
case LLAMA_FILE_VERSION_GGMF_V1: return "ggmf v1 (old version with no mmap support)";
813+
case LLAMA_FILE_VERSION_GGJT_V1: return "ggjt v1 (latest)";
814+
default: LLAMA_ASSERT(false);
815+
}
816+
}
817+
818+
static const char *llama_model_type_name(e_model type) {
819+
switch (type) {
820+
case MODEL_7B: return "7B";
821+
case MODEL_13B: return "13B";
822+
case MODEL_30B: return "30B";
823+
case MODEL_65B: return "65B";
824+
default: LLAMA_ASSERT(false);
825+
}
826+
}
827+
809828
static void llama_model_load_internal(
810829
const std::string & fname,
811830
llama_context & lctx,
@@ -823,8 +842,9 @@ static void llama_model_load_internal(
823842

824843
lctx.vocab = std::move(ml->file_loaders.at(0)->vocab);
825844
auto & model = lctx.model;
845+
model.hparams = ml->file_loaders.at(0)->hparams;
846+
llama_file_version file_version = ml->file_loaders.at(0)->file_version;
826847
auto & hparams = model.hparams;
827-
hparams = ml->file_loaders.at(0)->hparams;
828848
uint32_t n_ff = ((2*(4*hparams.n_embd)/3 + hparams.n_mult - 1)/hparams.n_mult)*hparams.n_mult;
829849

830850
{
@@ -836,18 +856,21 @@ static void llama_model_load_internal(
836856
}
837857

838858
hparams.n_ctx = n_ctx;
859+
}
839860

840-
fprintf(stderr, "%s: n_vocab = %u\n", __func__, hparams.n_vocab);
841-
fprintf(stderr, "%s: n_ctx = %u\n", __func__, hparams.n_ctx);
842-
fprintf(stderr, "%s: n_embd = %u\n", __func__, hparams.n_embd);
843-
fprintf(stderr, "%s: n_mult = %u\n", __func__, hparams.n_mult);
844-
fprintf(stderr, "%s: n_head = %u\n", __func__, hparams.n_head);
845-
fprintf(stderr, "%s: n_layer = %u\n", __func__, hparams.n_layer);
846-
fprintf(stderr, "%s: n_rot = %u\n", __func__, hparams.n_rot);
847-
fprintf(stderr, "%s: f16 = %u\n", __func__, hparams.f16);
848-
fprintf(stderr, "%s: n_ff = %u\n", __func__, n_ff);
849-
fprintf(stderr, "%s: n_parts = %zu\n", __func__, ml->file_loaders.size());
850-
fprintf(stderr, "%s: type = %u\n", __func__, model.type);
861+
{
862+
fprintf(stderr, "%s: format = %s\n", __func__, llama_file_version_name(file_version));
863+
fprintf(stderr, "%s: n_vocab = %u\n", __func__, hparams.n_vocab);
864+
fprintf(stderr, "%s: n_ctx = %u\n", __func__, hparams.n_ctx);
865+
fprintf(stderr, "%s: n_embd = %u\n", __func__, hparams.n_embd);
866+
fprintf(stderr, "%s: n_mult = %u\n", __func__, hparams.n_mult);
867+
fprintf(stderr, "%s: n_head = %u\n", __func__, hparams.n_head);
868+
fprintf(stderr, "%s: n_layer = %u\n", __func__, hparams.n_layer);
869+
fprintf(stderr, "%s: n_rot = %u\n", __func__, hparams.n_rot);
870+
fprintf(stderr, "%s: f16 = %u\n", __func__, hparams.f16);
871+
fprintf(stderr, "%s: n_ff = %u\n", __func__, n_ff);
872+
fprintf(stderr, "%s: n_parts = %zu\n", __func__, ml->file_loaders.size());
873+
fprintf(stderr, "%s: model size = %s\n", __func__, llama_model_type_name(model.type));
851874
}
852875

853876
if (vocab_only) {

0 commit comments

Comments
 (0)