File tree 1 file changed +4
-1
lines changed
1 file changed +4
-1
lines changed Original file line number Diff line number Diff line change @@ -259,6 +259,7 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
259
259
260
260
n_experts = None
261
261
n_experts_used = None
262
+ f_rope_freq_base = None
262
263
263
264
# hack to determine LLaMA v1 vs v2 vs CodeLlama
264
265
if config .get ("moe" ):
@@ -281,6 +282,8 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
281
282
n_ff = model ["layers.0.feed_forward.experts.0.w1.weight" ].shape [0 ]
282
283
n_experts = config ["moe" ]["num_experts" ]
283
284
n_experts_used = config ["moe" ]["num_experts_per_tok" ]
285
+ f_rope_freq_base = 1e6
286
+
284
287
285
288
return Params (
286
289
n_vocab = model ["tok_embeddings.weight" ].shape [0 ],
@@ -293,7 +296,7 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
293
296
n_experts = n_experts ,
294
297
n_experts_used = n_experts_used ,
295
298
f_norm_eps = config ["norm_eps" ],
296
- f_rope_freq_base = config .get ("rope_theta" ),
299
+ f_rope_freq_base = config .get ("rope_theta" , f_rope_freq_base ),
297
300
)
298
301
299
302
@staticmethod
You can’t perform that action at this time.
0 commit comments