Skip to content

Commit 7dc75e3

Browse files
committed
convert : use 1e6 rope_freq_base for mixtral
1 parent 296c945 commit 7dc75e3

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

convert.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
259259

260260
n_experts = None
261261
n_experts_used = None
262+
f_rope_freq_base = None
262263

263264
# hack to determine LLaMA v1 vs v2 vs CodeLlama
264265
if config.get("moe"):
@@ -281,6 +282,8 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
281282
n_ff = model["layers.0.feed_forward.experts.0.w1.weight"].shape[0]
282283
n_experts = config["moe"]["num_experts"]
283284
n_experts_used = config["moe"]["num_experts_per_tok"]
285+
f_rope_freq_base = 1e6
286+
284287

285288
return Params(
286289
n_vocab = model["tok_embeddings.weight"].shape[0],
@@ -293,7 +296,7 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
293296
n_experts = n_experts,
294297
n_experts_used = n_experts_used,
295298
f_norm_eps = config["norm_eps"],
296-
f_rope_freq_base = config.get("rope_theta"),
299+
f_rope_freq_base = config.get("rope_theta", f_rope_freq_base),
297300
)
298301

299302
@staticmethod

0 commit comments

Comments
 (0)