@@ -604,6 +604,60 @@ static int8_t llama_rope_scaling_type_from_string(const std::string & name) {
604
604
return LLAMA_ROPE_SCALING_UNSPECIFIED;
605
605
}
606
606
607
+ static std::string gguf_data_to_str (enum gguf_type type, const void * data, int i) {
608
+ switch (type) {
609
+ case GGUF_TYPE_UINT8: return std::to_string (((const uint8_t *)data)[i]);
610
+ case GGUF_TYPE_INT8: return std::to_string (((const int8_t *)data)[i]);
611
+ case GGUF_TYPE_UINT16: return std::to_string (((const uint16_t *)data)[i]);
612
+ case GGUF_TYPE_INT16: return std::to_string (((const int16_t *)data)[i]);
613
+ case GGUF_TYPE_UINT32: return std::to_string (((const uint32_t *)data)[i]);
614
+ case GGUF_TYPE_INT32: return std::to_string (((const int32_t *)data)[i]);
615
+ case GGUF_TYPE_UINT64: return std::to_string (((const uint64_t *)data)[i]);
616
+ case GGUF_TYPE_INT64: return std::to_string (((const int64_t *)data)[i]);
617
+ case GGUF_TYPE_FLOAT32: return std::to_string (((const float *)data)[i]);
618
+ case GGUF_TYPE_FLOAT64: return std::to_string (((const double *)data)[i]);
619
+ case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? " true" : " false" ;
620
+ default : return format (" unknown type %d" , type);
621
+ }
622
+ }
623
+
624
+ static std::string gguf_kv_to_str (struct gguf_context * ctx_gguf, int i) {
625
+ const enum gguf_type type = gguf_get_kv_type (ctx_gguf, i);
626
+
627
+ switch (type) {
628
+ case GGUF_TYPE_STRING:
629
+ return gguf_get_val_str (ctx_gguf, i);
630
+ case GGUF_TYPE_ARRAY:
631
+ {
632
+ const enum gguf_type arr_type = gguf_get_arr_type (ctx_gguf, i);
633
+ int arr_n = gguf_get_arr_n (ctx_gguf, i);
634
+ const void * data = gguf_get_arr_data (ctx_gguf, i);
635
+ std::stringstream ss;
636
+ ss << " [" ;
637
+ for (int j = 0 ; j < arr_n; j++) {
638
+ if (arr_type == GGUF_TYPE_STRING) {
639
+ std::string val = gguf_get_arr_str (ctx_gguf, i, j);
640
+ // escape quotes
641
+ replace_all (val, " \\ " , " \\\\ " );
642
+ replace_all (val, " \" " , " \\\" " );
643
+ ss << ' "' << val << ' "' ;
644
+ } else if (arr_type == GGUF_TYPE_ARRAY) {
645
+ ss << " ???" ;
646
+ } else {
647
+ ss << gguf_data_to_str (arr_type, data, j);
648
+ }
649
+ if (j < arr_n - 1 ) {
650
+ ss << " , " ;
651
+ }
652
+ }
653
+ ss << " ]" ;
654
+ return ss.str ();
655
+ }
656
+ default :
657
+ return gguf_data_to_str (type, gguf_get_val_data (ctx_gguf, i), 0 );
658
+ }
659
+ }
660
+
607
661
//
608
662
// ggml helpers
609
663
//
@@ -1327,6 +1381,9 @@ struct llama_model {
1327
1381
1328
1382
int n_gpu_layers;
1329
1383
1384
+ // gguf metadata
1385
+ std::unordered_map<std::string, std::string> gguf_kv;
1386
+
1330
1387
// context
1331
1388
struct ggml_context * ctx = NULL ;
1332
1389
@@ -1785,10 +1842,10 @@ struct llama_model_loader {
1785
1842
case GGML_TYPE_Q5_K: ftype = LLAMA_FTYPE_MOSTLY_Q5_K_M; break ;
1786
1843
case GGML_TYPE_Q6_K: ftype = LLAMA_FTYPE_MOSTLY_Q6_K; break ;
1787
1844
default :
1788
- {
1789
- LLAMA_LOG_WARN (" %s: unknown type %s\n " , __func__, ggml_type_name (type_max));
1790
- ftype = LLAMA_FTYPE_ALL_F32;
1791
- } break ;
1845
+ {
1846
+ LLAMA_LOG_WARN (" %s: unknown type %s\n " , __func__, ggml_type_name (type_max));
1847
+ ftype = LLAMA_FTYPE_ALL_F32;
1848
+ } break ;
1792
1849
}
1793
1850
1794
1851
// this is a way to mark that we have "guessed" the file type
@@ -1802,10 +1859,20 @@ struct llama_model_loader {
1802
1859
}
1803
1860
1804
1861
for (int i = 0 ; i < n_kv; i++) {
1805
- const char * name = gguf_get_key (ctx_gguf, i);
1806
- const enum gguf_type type = gguf_get_kv_type (ctx_gguf, i);
1862
+ const char * name = gguf_get_key (ctx_gguf, i);
1863
+ const enum gguf_type type = gguf_get_kv_type (ctx_gguf, i);
1864
+ const std::string type_name =
1865
+ type == GGUF_TYPE_ARRAY
1866
+ ? format (" %s[%s,%d]" , gguf_type_name (type), gguf_type_name (gguf_get_arr_type (ctx_gguf, i)), gguf_get_arr_n (ctx_gguf, i))
1867
+ : gguf_type_name (type);
1868
+
1869
+ std::string value = gguf_kv_to_str (ctx_gguf, i);
1870
+ const size_t MAX_VALUE_LEN = 40 ;
1871
+ if (value.size () > MAX_VALUE_LEN) {
1872
+ value = format (" %s..." , value.substr (0 , MAX_VALUE_LEN - 3 ).c_str ());
1873
+ }
1807
1874
1808
- LLAMA_LOG_INFO (" %s: - kv %3d: %42s %-8s \n " , __func__, i, name, gguf_type_name (type ));
1875
+ LLAMA_LOG_INFO (" %s: - kv %3d: %42s %-16s = %s \n " , __func__, i, name, type_name. c_str (), value. c_str ( ));
1809
1876
}
1810
1877
1811
1878
// print type counts
@@ -2100,6 +2167,17 @@ static void llm_load_hparams(
2100
2167
2101
2168
auto & hparams = model.hparams ;
2102
2169
2170
+ // get metadata as string
2171
+ for (int i = 0 ; i < gguf_get_n_kv (ctx); i++) {
2172
+ enum gguf_type type = gguf_get_kv_type (ctx, i);
2173
+ if (type == GGUF_TYPE_ARRAY) {
2174
+ continue ;
2175
+ }
2176
+ const char * name = gguf_get_key (ctx, i);
2177
+ const std::string value = gguf_kv_to_str (ctx, i);
2178
+ model.gguf_kv .emplace (name, value);
2179
+ }
2180
+
2103
2181
// get general kv
2104
2182
GGUF_GET_KEY (ctx, model.name , gguf_get_val_str, GGUF_TYPE_STRING, false , kv (LLM_KV_GENERAL_NAME));
2105
2183
@@ -8671,6 +8749,45 @@ float llama_rope_freq_scale_train(const struct llama_model * model) {
8671
8749
return model->hparams .rope_freq_scale_train ;
8672
8750
}
8673
8751
8752
+ int llama_model_meta_val_str (const struct llama_model * model, const char * key, char * buf, size_t buf_size) {
8753
+ const auto & it = model->gguf_kv .find (key);
8754
+ if (it == model->gguf_kv .end ()) {
8755
+ if (buf_size > 0 ) {
8756
+ buf[0 ] = ' \0 ' ;
8757
+ }
8758
+ return -1 ;
8759
+ }
8760
+ return snprintf (buf, buf_size, " %s" , it->second .c_str ());
8761
+ }
8762
+
8763
+ int llama_model_meta_count (const struct llama_model * model) {
8764
+ return (int )model->gguf_kv .size ();
8765
+ }
8766
+
8767
+ int llama_model_meta_key_by_index (const struct llama_model * model, int i, char * buf, size_t buf_size) {
8768
+ if (i < 0 || i >= (int )model->gguf_kv .size ()) {
8769
+ if (buf_size > 0 ) {
8770
+ buf[0 ] = ' \0 ' ;
8771
+ }
8772
+ return -1 ;
8773
+ }
8774
+ auto it = model->gguf_kv .begin ();
8775
+ std::advance (it, i);
8776
+ return snprintf (buf, buf_size, " %s" , it->first .c_str ());
8777
+ }
8778
+
8779
+ int llama_model_meta_val_str_by_index (const struct llama_model * model, int i, char * buf, size_t buf_size) {
8780
+ if (i < 0 || i >= (int )model->gguf_kv .size ()) {
8781
+ if (buf_size > 0 ) {
8782
+ buf[0 ] = ' \0 ' ;
8783
+ }
8784
+ return -1 ;
8785
+ }
8786
+ auto it = model->gguf_kv .begin ();
8787
+ std::advance (it, i);
8788
+ return snprintf (buf, buf_size, " %s" , it->second .c_str ());
8789
+ }
8790
+
8674
8791
int llama_model_desc (const struct llama_model * model, char * buf, size_t buf_size) {
8675
8792
return snprintf (buf, buf_size, " %s %s %s" ,
8676
8793
llama_model_arch_name (model->arch ).c_str (),
0 commit comments