Skip to content

Commit 6a419f4

Browse files
committed
convert : support safetensors format
1 parent f1cbfab commit 6a419f4

File tree

2 files changed

+20
-6
lines changed

2 files changed

+20
-6
lines changed

Diff for: convert.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
ARCH = gguf.MODEL_ARCH.LLAMA
4343

4444
DEFAULT_CONCURRENCY = 8
45+
4546
#
4647
# data types
4748
#
@@ -235,6 +236,13 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
235236
raise Exception("failed to guess 'n_ctx'. This model is unknown or unsupported.\n"
236237
"Suggestion: provide 'config.json' of the model in the same directory containing model files.")
237238

239+
n_experts = None
240+
n_experts_used = None
241+
242+
if "num_local_experts" in config:
243+
n_experts = config["num_local_experts"]
244+
n_experts_used = config["num_experts_per_tok"]
245+
238246
return Params(
239247
n_vocab = config["vocab_size"],
240248
n_embd = config["hidden_size"],
@@ -243,6 +251,8 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
243251
n_ff = config["intermediate_size"],
244252
n_head = (n_head := config["num_attention_heads"]),
245253
n_head_kv = config.get("num_key_value_heads", n_head),
254+
n_experts = n_experts,
255+
n_experts_used = n_experts_used,
246256
f_norm_eps = config["rms_norm_eps"],
247257
f_rope_freq_base = config.get("rope_theta"),
248258
rope_scaling_type = rope_scaling_type,
@@ -257,7 +267,7 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
257267
def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
258268
config = json.load(open(config_path))
259269

260-
n_experts = None
270+
n_experts = None
261271
n_experts_used = None
262272
f_rope_freq_base = None
263273

@@ -280,7 +290,7 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
280290

281291
if config.get("moe"):
282292
n_ff = model["layers.0.feed_forward.experts.0.w1.weight"].shape[0]
283-
n_experts = config["moe"]["num_experts"]
293+
n_experts = config["moe"]["num_experts"]
284294
n_experts_used = config["moe"]["num_experts_per_tok"]
285295
f_rope_freq_base = 1e6
286296

Diff for: gguf-py/gguf/tensor_mapping.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,8 @@ class TensorNameMap:
150150
),
151151

152152
MODEL_TENSOR.FFN_GATE_INP: (
153-
"layers.{bid}.feed_forward.gate", # mixtral
153+
"layers.{bid}.feed_forward.gate", # mixtral
154+
"model.layers.{bid}.block_sparse_moe.gate", # mixtral
154155
),
155156

156157
# Feed-forward up
@@ -169,7 +170,8 @@ class TensorNameMap:
169170
),
170171

171172
MODEL_TENSOR.FFN_UP_EXP: (
172-
"layers.{bid}.feed_forward.experts.{xid}.w3", # mixtral
173+
"layers.{bid}.feed_forward.experts.{xid}.w3", # mixtral
174+
"model.layers.{bid}.block_sparse_moe.experts.{xid}.w3", # mixtral
173175
),
174176

175177
# Feed-forward gate
@@ -180,7 +182,8 @@ class TensorNameMap:
180182
),
181183

182184
MODEL_TENSOR.FFN_GATE_EXP: (
183-
"layers.{bid}.feed_forward.experts.{xid}.w1", # mixtral
185+
"layers.{bid}.feed_forward.experts.{xid}.w1", # mixtral
186+
"model.layers.{bid}.block_sparse_moe.experts.{xid}.w1", # mixtral
184187
),
185188

186189
# Feed-forward down
@@ -198,7 +201,8 @@ class TensorNameMap:
198201
),
199202

200203
MODEL_TENSOR.FFN_DOWN_EXP: (
201-
"layers.{bid}.feed_forward.experts.{xid}.w2", # mixtral
204+
"layers.{bid}.feed_forward.experts.{xid}.w2", # mixtral
205+
"model.layers.{bid}.block_sparse_moe.experts.{xid}.w2", # mixtral
202206
),
203207

204208
MODEL_TENSOR.ATTN_Q_NORM: (

0 commit comments

Comments
 (0)