@@ -929,23 +929,22 @@ static const size_t kB = 1024;
929
929
static const size_t MB = kB *kB ;
930
930
static const size_t GB = kB *kB *kB ;
931
931
932
- // default hparams (LLaMA 7B)
933
932
struct llama_hparams {
934
- uint32_t n_vocab = 32000 ;
935
- uint32_t n_ctx_train = 2048 ; // the context size used during training
936
- uint32_t n_ctx = 512 ; // the context size used during inference
937
- uint32_t n_embd = 4096 ;
938
- uint32_t n_head = 32 ;
939
- uint32_t n_head_kv = 32 ;
940
- uint32_t n_layer = 32 ;
941
- uint32_t n_rot = 64 ;
942
- uint32_t n_ff = 11008 ;
943
-
944
- float f_norm_eps = 1e-5 ;
945
- float f_norm_rms_eps = 1e-5 ;
946
-
947
- float rope_freq_base = 10000 . 0f ;
948
- float rope_freq_scale = 1 . 0f ;
933
+ uint32_t n_vocab;
934
+ uint32_t n_ctx_train; // context size the model was trained on
935
+ uint32_t n_ctx; // context size used during inference
936
+ uint32_t n_embd;
937
+ uint32_t n_head;
938
+ uint32_t n_head_kv;
939
+ uint32_t n_layer;
940
+ uint32_t n_rot;
941
+ uint32_t n_ff;
942
+
943
+ float f_norm_eps;
944
+ float f_norm_rms_eps;
945
+
946
+ float rope_freq_base;
947
+ float rope_freq_scale;
949
948
950
949
bool operator !=(const llama_hparams & other) const {
951
950
return static_cast <bool >(memcmp (this , &other, sizeof (llama_hparams))); // NOLINT
@@ -1076,7 +1075,7 @@ struct llama_model {
1076
1075
1077
1076
std::string name = " n/a" ;
1078
1077
1079
- llama_hparams hparams;
1078
+ llama_hparams hparams = {} ;
1080
1079
llama_vocab vocab;
1081
1080
1082
1081
struct ggml_tensor * tok_embeddings;
@@ -1674,28 +1673,17 @@ static void llm_load_hparams(
1674
1673
hparams.n_head_kv = hparams.n_head ;
1675
1674
GGUF_GET_KEY (ctx, hparams.n_head_kv , gguf_get_val_u32, GGUF_TYPE_UINT32, false , kv (LLM_KV_ATTENTION_HEAD_COUNT_KV));
1676
1675
1677
- // TODO: manually setting rope freq base and scale should override this
1678
- // FIXME: partial fix when the param specified is not the default value, but
1679
- // will not work for overriding the model value to the params default
1680
-
1681
- llama_context_params defaults = llama_context_default_params ();
1682
-
1683
- // rope_freq_base
1684
- {
1685
- float ropebase = 10000 .0f ;
1686
- GGUF_GET_KEY (ctx, ropebase, gguf_get_val_f32, GGUF_TYPE_FLOAT32, false , kv (LLM_KV_ROPE_FREQ_BASE));
1687
- if (ropebase != 10000 .0f && rope_freq_base == defaults.rope_freq_base ) {
1688
- rope_freq_base = ropebase;
1689
- }
1676
+ // rope_freq_base (optional)
1677
+ if (rope_freq_base == 0 .0f ) {
1678
+ rope_freq_base = 10000 .0f ;
1679
+ GGUF_GET_KEY (ctx, rope_freq_base, gguf_get_val_f32, GGUF_TYPE_FLOAT32, false , kv (LLM_KV_ROPE_FREQ_BASE));
1690
1680
}
1691
1681
1692
1682
// rope_freq_scale (inverse of the kv) is optional
1693
- {
1683
+ if (rope_freq_scale == 0 . 0f ) {
1694
1684
float ropescale = 1 .0f ;
1695
1685
GGUF_GET_KEY (ctx, ropescale, gguf_get_val_f32, GGUF_TYPE_FLOAT32, false , kv (LLM_KV_ROPE_SCALE_LINEAR));
1696
- if (ropescale != 1 .0f && rope_freq_scale == defaults.rope_freq_scale ) {
1697
- rope_freq_scale = 1 .0f /ropescale;
1698
- }
1686
+ rope_freq_scale = 1 .0f /ropescale;
1699
1687
}
1700
1688
1701
1689
// sanity check for n_rot (optional)
@@ -6188,8 +6176,8 @@ struct llama_context_params llama_context_default_params() {
6188
6176
/* .n_gpu_layers =*/ 0 ,
6189
6177
/* .main_gpu =*/ 0 ,
6190
6178
/* .tensor_split =*/ nullptr ,
6191
- /* .rope_freq_base =*/ 10000 .0f ,
6192
- /* .rope_freq_scale =*/ 1 .0f ,
6179
+ /* .rope_freq_base =*/ 0 .0f ,
6180
+ /* .rope_freq_scale =*/ 0 .0f ,
6193
6181
/* .progress_callback =*/ nullptr ,
6194
6182
/* .progress_callback_user_data =*/ nullptr ,
6195
6183
/* .low_vram =*/ false ,
0 commit comments