42
42
ARCH = gguf .MODEL_ARCH .LLAMA
43
43
44
44
DEFAULT_CONCURRENCY = 8
45
+
45
46
#
46
47
# data types
47
48
#
@@ -235,6 +236,13 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
235
236
raise Exception ("failed to guess 'n_ctx'. This model is unknown or unsupported.\n "
236
237
"Suggestion: provide 'config.json' of the model in the same directory containing model files." )
237
238
239
+ n_experts = None
240
+ n_experts_used = None
241
+
242
+ if "num_local_experts" in config :
243
+ n_experts = config ["num_local_experts" ]
244
+ n_experts_used = config ["num_experts_per_tok" ]
245
+
238
246
return Params (
239
247
n_vocab = config ["vocab_size" ],
240
248
n_embd = config ["hidden_size" ],
@@ -243,6 +251,8 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
243
251
n_ff = config ["intermediate_size" ],
244
252
n_head = (n_head := config ["num_attention_heads" ]),
245
253
n_head_kv = config .get ("num_key_value_heads" , n_head ),
254
+ n_experts = n_experts ,
255
+ n_experts_used = n_experts_used ,
246
256
f_norm_eps = config ["rms_norm_eps" ],
247
257
f_rope_freq_base = config .get ("rope_theta" ),
248
258
rope_scaling_type = rope_scaling_type ,
@@ -257,7 +267,7 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
257
267
def loadOriginalParamsJson (model : LazyModel , config_path : Path ) -> Params :
258
268
config = json .load (open (config_path ))
259
269
260
- n_experts = None
270
+ n_experts = None
261
271
n_experts_used = None
262
272
f_rope_freq_base = None
263
273
@@ -280,7 +290,7 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
280
290
281
291
if config .get ("moe" ):
282
292
n_ff = model ["layers.0.feed_forward.experts.0.w1.weight" ].shape [0 ]
283
- n_experts = config ["moe" ]["num_experts" ]
293
+ n_experts = config ["moe" ]["num_experts" ]
284
294
n_experts_used = config ["moe" ]["num_experts_per_tok" ]
285
295
f_rope_freq_base = 1e6
286
296
0 commit comments