Skip to content

Commit 737b228

Browse files
slarenolexiyb
authored andcommitted
llama : add functions to get the model's metadata (ggml-org#4013)
* llama : add functions to get the model's metadata * format -> std::to_string * better documentation
1 parent d1b0896 commit 737b228

File tree

4 files changed

+167
-7
lines changed

4 files changed

+167
-7
lines changed

ggml.c

+25
Original file line numberDiff line numberDiff line change
@@ -18452,95 +18452,120 @@ int gguf_find_key(const struct gguf_context * ctx, const char * key) {
1845218452
}
1845318453

1845418454
const char * gguf_get_key(const struct gguf_context * ctx, int key_id) {
18455+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1845518456
return ctx->kv[key_id].key.data;
1845618457
}
1845718458

1845818459
enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int key_id) {
18460+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1845918461
return ctx->kv[key_id].type;
1846018462
}
1846118463

1846218464
enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id) {
18465+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1846318466
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
1846418467
return ctx->kv[key_id].value.arr.type;
1846518468
}
1846618469

1846718470
const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id) {
18471+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1846818472
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
1846918473
return ctx->kv[key_id].value.arr.data;
1847018474
}
1847118475

1847218476
const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) {
18477+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1847318478
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
1847418479
struct gguf_kv * kv = &ctx->kv[key_id];
1847518480
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
1847618481
return str->data;
1847718482
}
1847818483

1847918484
int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) {
18485+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1848018486
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
1848118487
return ctx->kv[key_id].value.arr.n;
1848218488
}
1848318489

1848418490
uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int key_id) {
18491+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1848518492
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT8);
1848618493
return ctx->kv[key_id].value.uint8;
1848718494
}
1848818495

1848918496
int8_t gguf_get_val_i8(const struct gguf_context * ctx, int key_id) {
18497+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1849018498
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT8);
1849118499
return ctx->kv[key_id].value.int8;
1849218500
}
1849318501

1849418502
uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int key_id) {
18503+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1849518504
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT16);
1849618505
return ctx->kv[key_id].value.uint16;
1849718506
}
1849818507

1849918508
int16_t gguf_get_val_i16(const struct gguf_context * ctx, int key_id) {
18509+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1850018510
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT16);
1850118511
return ctx->kv[key_id].value.int16;
1850218512
}
1850318513

1850418514
uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int key_id) {
18515+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1850518516
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT32);
1850618517
return ctx->kv[key_id].value.uint32;
1850718518
}
1850818519

1850918520
int32_t gguf_get_val_i32(const struct gguf_context * ctx, int key_id) {
18521+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1851018522
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT32);
1851118523
return ctx->kv[key_id].value.int32;
1851218524
}
1851318525

1851418526
float gguf_get_val_f32(const struct gguf_context * ctx, int key_id) {
18527+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1851518528
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT32);
1851618529
return ctx->kv[key_id].value.float32;
1851718530
}
1851818531

1851918532
uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int key_id) {
18533+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1852018534
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT64);
1852118535
return ctx->kv[key_id].value.uint64;
1852218536
}
1852318537

1852418538
int64_t gguf_get_val_i64(const struct gguf_context * ctx, int key_id) {
18539+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1852518540
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT64);
1852618541
return ctx->kv[key_id].value.int64;
1852718542
}
1852818543

1852918544
double gguf_get_val_f64(const struct gguf_context * ctx, int key_id) {
18545+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1853018546
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT64);
1853118547
return ctx->kv[key_id].value.float64;
1853218548
}
1853318549

1853418550
bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id) {
18551+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1853518552
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_BOOL);
1853618553
return ctx->kv[key_id].value.bool_;
1853718554
}
1853818555

1853918556
const char * gguf_get_val_str(const struct gguf_context * ctx, int key_id) {
18557+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1854018558
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_STRING);
1854118559
return ctx->kv[key_id].value.str.data;
1854218560
}
1854318561

18562+
const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id) {
18563+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18564+
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_ARRAY);
18565+
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_STRING);
18566+
return &ctx->kv[key_id].value;
18567+
}
18568+
1854418569
int gguf_get_n_tensors(const struct gguf_context * ctx) {
1854518570
return ctx->header.n_tensors;
1854618571
}

ggml.h

+1
Original file line numberDiff line numberDiff line change
@@ -2045,6 +2045,7 @@ extern "C" {
20452045
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
20462046
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
20472047
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
2048+
GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
20482049
GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
20492050
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
20502051
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);

llama.cpp

+124-7
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,60 @@ static int8_t llama_rope_scaling_type_from_string(const std::string & name) {
604604
return LLAMA_ROPE_SCALING_UNSPECIFIED;
605605
}
606606

607+
static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) {
608+
switch (type) {
609+
case GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]);
610+
case GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]);
611+
case GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]);
612+
case GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]);
613+
case GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]);
614+
case GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]);
615+
case GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]);
616+
case GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]);
617+
case GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]);
618+
case GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]);
619+
case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false";
620+
default: return format("unknown type %d", type);
621+
}
622+
}
623+
624+
static std::string gguf_kv_to_str(struct gguf_context * ctx_gguf, int i) {
625+
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
626+
627+
switch (type) {
628+
case GGUF_TYPE_STRING:
629+
return gguf_get_val_str(ctx_gguf, i);
630+
case GGUF_TYPE_ARRAY:
631+
{
632+
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
633+
int arr_n = gguf_get_arr_n(ctx_gguf, i);
634+
const void * data = gguf_get_arr_data(ctx_gguf, i);
635+
std::stringstream ss;
636+
ss << "[";
637+
for (int j = 0; j < arr_n; j++) {
638+
if (arr_type == GGUF_TYPE_STRING) {
639+
std::string val = gguf_get_arr_str(ctx_gguf, i, j);
640+
// escape quotes
641+
replace_all(val, "\\", "\\\\");
642+
replace_all(val, "\"", "\\\"");
643+
ss << '"' << val << '"';
644+
} else if (arr_type == GGUF_TYPE_ARRAY) {
645+
ss << "???";
646+
} else {
647+
ss << gguf_data_to_str(arr_type, data, j);
648+
}
649+
if (j < arr_n - 1) {
650+
ss << ", ";
651+
}
652+
}
653+
ss << "]";
654+
return ss.str();
655+
}
656+
default:
657+
return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0);
658+
}
659+
}
660+
607661
//
608662
// ggml helpers
609663
//
@@ -1327,6 +1381,9 @@ struct llama_model {
13271381

13281382
int n_gpu_layers;
13291383

1384+
// gguf metadata
1385+
std::unordered_map<std::string, std::string> gguf_kv;
1386+
13301387
// context
13311388
struct ggml_context * ctx = NULL;
13321389

@@ -1785,10 +1842,10 @@ struct llama_model_loader {
17851842
case GGML_TYPE_Q5_K: ftype = LLAMA_FTYPE_MOSTLY_Q5_K_M; break;
17861843
case GGML_TYPE_Q6_K: ftype = LLAMA_FTYPE_MOSTLY_Q6_K; break;
17871844
default:
1788-
{
1789-
LLAMA_LOG_WARN("%s: unknown type %s\n", __func__, ggml_type_name(type_max));
1790-
ftype = LLAMA_FTYPE_ALL_F32;
1791-
} break;
1845+
{
1846+
LLAMA_LOG_WARN("%s: unknown type %s\n", __func__, ggml_type_name(type_max));
1847+
ftype = LLAMA_FTYPE_ALL_F32;
1848+
} break;
17921849
}
17931850

17941851
// this is a way to mark that we have "guessed" the file type
@@ -1802,10 +1859,20 @@ struct llama_model_loader {
18021859
}
18031860

18041861
for (int i = 0; i < n_kv; i++) {
1805-
const char * name = gguf_get_key(ctx_gguf, i);
1806-
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
1862+
const char * name = gguf_get_key(ctx_gguf, i);
1863+
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
1864+
const std::string type_name =
1865+
type == GGUF_TYPE_ARRAY
1866+
? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(ctx_gguf, i)), gguf_get_arr_n(ctx_gguf, i))
1867+
: gguf_type_name(type);
1868+
1869+
std::string value = gguf_kv_to_str(ctx_gguf, i);
1870+
const size_t MAX_VALUE_LEN = 40;
1871+
if (value.size() > MAX_VALUE_LEN) {
1872+
value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str());
1873+
}
18071874

1808-
LLAMA_LOG_INFO("%s: - kv %3d: %42s %-8s\n", __func__, i, name, gguf_type_name(type));
1875+
LLAMA_LOG_INFO("%s: - kv %3d: %42s %-16s = %s\n", __func__, i, name, type_name.c_str(), value.c_str());
18091876
}
18101877

18111878
// print type counts
@@ -2100,6 +2167,17 @@ static void llm_load_hparams(
21002167

21012168
auto & hparams = model.hparams;
21022169

2170+
// get metadata as string
2171+
for (int i = 0; i < gguf_get_n_kv(ctx); i++) {
2172+
enum gguf_type type = gguf_get_kv_type(ctx, i);
2173+
if (type == GGUF_TYPE_ARRAY) {
2174+
continue;
2175+
}
2176+
const char * name = gguf_get_key(ctx, i);
2177+
const std::string value = gguf_kv_to_str(ctx, i);
2178+
model.gguf_kv.emplace(name, value);
2179+
}
2180+
21032181
// get general kv
21042182
GGUF_GET_KEY(ctx, model.name, gguf_get_val_str, GGUF_TYPE_STRING, false, kv(LLM_KV_GENERAL_NAME));
21052183

@@ -8671,6 +8749,45 @@ float llama_rope_freq_scale_train(const struct llama_model * model) {
86718749
return model->hparams.rope_freq_scale_train;
86728750
}
86738751

8752+
int llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size) {
8753+
const auto & it = model->gguf_kv.find(key);
8754+
if (it == model->gguf_kv.end()) {
8755+
if (buf_size > 0) {
8756+
buf[0] = '\0';
8757+
}
8758+
return -1;
8759+
}
8760+
return snprintf(buf, buf_size, "%s", it->second.c_str());
8761+
}
8762+
8763+
int llama_model_meta_count(const struct llama_model * model) {
8764+
return (int)model->gguf_kv.size();
8765+
}
8766+
8767+
int llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {
8768+
if (i < 0 || i >= (int)model->gguf_kv.size()) {
8769+
if (buf_size > 0) {
8770+
buf[0] = '\0';
8771+
}
8772+
return -1;
8773+
}
8774+
auto it = model->gguf_kv.begin();
8775+
std::advance(it, i);
8776+
return snprintf(buf, buf_size, "%s", it->first.c_str());
8777+
}
8778+
8779+
int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {
8780+
if (i < 0 || i >= (int)model->gguf_kv.size()) {
8781+
if (buf_size > 0) {
8782+
buf[0] = '\0';
8783+
}
8784+
return -1;
8785+
}
8786+
auto it = model->gguf_kv.begin();
8787+
std::advance(it, i);
8788+
return snprintf(buf, buf_size, "%s", it->second.c_str());
8789+
}
8790+
86748791
int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {
86758792
return snprintf(buf, buf_size, "%s %s %s",
86768793
llama_model_arch_name(model->arch).c_str(),

llama.h

+17
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,23 @@ extern "C" {
301301
// Get the model's RoPE frequency scaling factor
302302
LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
303303

304+
// Functions to access the model's GGUF metadata scalar values
305+
// - The functions return the length of the string on success, or -1 on failure
306+
// - The output string is always null-terminated and cleared on failure
307+
// - GGUF array values are not supported by these functions
308+
309+
// Get metadata value as a string by key name
310+
LLAMA_API int llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size);
311+
312+
// Get the number of metadata key/value pairs
313+
LLAMA_API int llama_model_meta_count(const struct llama_model * model);
314+
315+
// Get metadata key name by index
316+
LLAMA_API int llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size);
317+
318+
// Get metadata value as a string by index
319+
LLAMA_API int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size);
320+
304321
// Get a string describing the model type
305322
LLAMA_API int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);
306323

0 commit comments

Comments
 (0)