Skip to content

Commit 06bb53a

Browse files
authoredApr 11, 2025··
llama-model : add Glm4Model implementation for GLM-4-0414 (#12867)
* GLM-4-0414 * use original one * Using with tensor map * fix bug * change order * change order * format with flask8
1 parent 0c50923 commit 06bb53a

9 files changed

+273
-4
lines changed
 

Diff for: ‎README.md

+1
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
9797
- [x] [Flan T5](https://huggingface.co/models?search=flan-t5)
9898
- [x] [Open Elm models](https://huggingface.co/collections/apple/openelm-instruct-models-6619ad295d7ae9f868b759ca)
9999
- [x] [ChatGLM3-6b](https://huggingface.co/THUDM/chatglm3-6b) + [ChatGLM4-9b](https://huggingface.co/THUDM/glm-4-9b) + [GLMEdge-1.5b](https://huggingface.co/THUDM/glm-edge-1.5b-chat) + [GLMEdge-4b](https://huggingface.co/THUDM/glm-edge-4b-chat)
100+
- [x] [GLM-4-0414](https://huggingface.co/collections/THUDM/glm-4-0414-67f3cbcb34dd9d252707cb2e)
100101
- [x] [SmolLM](https://huggingface.co/collections/HuggingFaceTB/smollm-6695016cad7167254ce15966)
101102
- [x] [EXAONE-3.0-7.8B-Instruct](https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct)
102103
- [x] [FalconMamba Models](https://huggingface.co/collections/tiiuae/falconmamba-7b-66b9a580324dd1598b0f6d4a)

Diff for: ‎convert_hf_to_gguf.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
735735
if chkhsh == "d353350c764d8c3b39c763113960e4fb4919bea5fbf208a0e3b22e8469dc7406":
736736
# ref: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
737737
res = "llama4"
738+
if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2":
739+
# ref: https://huggingface.co/THUDM/glm-4-9b-hf
740+
res = "glm4"
738741

739742
if res is None:
740743
logger.warning("\n")
@@ -4897,6 +4900,22 @@ def prepare_tensors(self):
48974900
self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias)
48984901

48994902

4903+
@Model.register("Glm4ForCausalLM")
4904+
class Glm4Model(Model):
4905+
model_arch = gguf.MODEL_ARCH.GLM4
4906+
4907+
def set_vocab(self):
4908+
self._set_vocab_gpt2()
4909+
4910+
def set_gguf_parameters(self):
4911+
super().set_gguf_parameters()
4912+
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
4913+
if self.hparams["rope_scaling"].get("type") == "yarn":
4914+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
4915+
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
4916+
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"])
4917+
4918+
49004919
@Model.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration")
49014920
class ChatGLMModel(Model):
49024921
model_arch = gguf.MODEL_ARCH.CHATGLM
@@ -5588,7 +5607,6 @@ def main() -> None:
55885607
with torch.inference_mode():
55895608
output_type = ftype_map[args.outtype]
55905609
model_architecture = hparams["architectures"][0]
5591-
55925610
try:
55935611
model_class = Model.from_model_architecture(model_architecture)
55945612
except NotImplementedError:

Diff for: ‎convert_hf_to_gguf_update.py

+1
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ class TOKENIZER_TYPE(IntEnum):
114114
{"name": "trillion", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/trillionlabs/Trillion-7B-preview", },
115115
{"name": "bailingmoe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inclusionAI/Ling-lite", },
116116
{"name": "llama4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", },
117+
{"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", },
117118
]
118119

119120

Diff for: ‎gguf-py/gguf/constants.py

+19
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ class MODEL_ARCH(IntEnum):
280280
DEEPSEEK = auto()
281281
DEEPSEEK2 = auto()
282282
CHATGLM = auto()
283+
GLM4 = auto()
283284
BITNET = auto()
284285
T5 = auto()
285286
T5ENCODER = auto()
@@ -487,6 +488,7 @@ class MODEL_TENSOR(IntEnum):
487488
MODEL_ARCH.DEEPSEEK: "deepseek",
488489
MODEL_ARCH.DEEPSEEK2: "deepseek2",
489490
MODEL_ARCH.CHATGLM: "chatglm",
491+
MODEL_ARCH.GLM4: "glm4",
490492
MODEL_ARCH.BITNET: "bitnet",
491493
MODEL_ARCH.T5: "t5",
492494
MODEL_ARCH.T5ENCODER: "t5encoder",
@@ -1561,6 +1563,23 @@ class MODEL_TENSOR(IntEnum):
15611563
MODEL_TENSOR.FFN_DOWN,
15621564
MODEL_TENSOR.FFN_UP,
15631565
],
1566+
MODEL_ARCH.GLM4 : [
1567+
MODEL_TENSOR.TOKEN_EMBD,
1568+
MODEL_TENSOR.ROPE_FREQS,
1569+
MODEL_TENSOR.OUTPUT_NORM,
1570+
MODEL_TENSOR.OUTPUT,
1571+
MODEL_TENSOR.ATTN_NORM,
1572+
MODEL_TENSOR.ATTN_QKV,
1573+
MODEL_TENSOR.ATTN_Q,
1574+
MODEL_TENSOR.ATTN_K,
1575+
MODEL_TENSOR.ATTN_V,
1576+
MODEL_TENSOR.ATTN_OUT,
1577+
MODEL_TENSOR.FFN_NORM,
1578+
MODEL_TENSOR.FFN_DOWN,
1579+
MODEL_TENSOR.FFN_UP,
1580+
MODEL_TENSOR.ATTN_POST_NORM,
1581+
MODEL_TENSOR.FFN_POST_NORM,
1582+
],
15641583
MODEL_ARCH.BITNET: [
15651584
MODEL_TENSOR.ATTN_Q,
15661585
MODEL_TENSOR.ATTN_K,

Diff for: ‎gguf-py/gguf/tensor_mapping.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class TensorNameMap:
1313
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
1414
"transformer.word_embeddings", # falcon
1515
"word_embeddings", # bloom
16-
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2
16+
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414
1717
"tok_embeddings", # llama-pth
1818
"embeddings.word_embeddings", # bert nomic-bert
1919
"language_model.embedding.word_embeddings", # persimmon
@@ -241,7 +241,8 @@ class TensorNameMap:
241241
),
242242

243243
MODEL_TENSOR.ATTN_POST_NORM: (
244-
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2
244+
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2 # ge
245+
"model.layers.{bid}.post_self_attn_layernorm", # glm-4-0414
245246
),
246247

247248
# Rotary embeddings
@@ -278,6 +279,7 @@ class TensorNameMap:
278279
# Post feed-forward norm
279280
MODEL_TENSOR.FFN_POST_NORM: (
280281
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
282+
"model.layers.{bid}.post_mlp_layernorm", # glm-4-0414
281283
),
282284

283285
MODEL_TENSOR.FFN_GATE_INP: (
@@ -316,7 +318,7 @@ class TensorNameMap:
316318
"h.{bid}.mlp.c_fc", # gpt2
317319
"transformer.h.{bid}.mlp.fc1", # phi2
318320
"model.layers.{bid}.mlp.fc1", # phi2
319-
"model.layers.{bid}.mlp.gate_up_proj", # phi3
321+
"model.layers.{bid}.mlp.gate_up_proj", # phi3 glm-4-0414
320322
"model.layers.layers.{bid}.mlp.up_proj", # plamo
321323
"model.layers.{bid}.feed_forward.w3", # internlm2
322324
"encoder.layers.{bid}.mlp.fc11", # nomic-bert

Diff for: ‎src/llama-arch.cpp

+20
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
5454
{ LLM_ARCH_DEEPSEEK, "deepseek" },
5555
{ LLM_ARCH_DEEPSEEK2, "deepseek2" },
5656
{ LLM_ARCH_CHATGLM, "chatglm" },
57+
{ LLM_ARCH_GLM4, "glm4" },
5758
{ LLM_ARCH_BITNET, "bitnet" },
5859
{ LLM_ARCH_T5, "t5" },
5960
{ LLM_ARCH_T5ENCODER, "t5encoder" },
@@ -1152,6 +1153,25 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
11521153
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
11531154
},
11541155
},
1156+
{
1157+
LLM_ARCH_GLM4,
1158+
{
1159+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1160+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1161+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1162+
{ LLM_TENSOR_OUTPUT, "output" },
1163+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1164+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1165+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1166+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1167+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1168+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1169+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1170+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1171+
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
1172+
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
1173+
},
1174+
},
11551175
{
11561176
LLM_ARCH_BITNET,
11571177
{

Diff for: ‎src/llama-arch.h

+3
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ enum llm_arch {
5858
LLM_ARCH_DEEPSEEK,
5959
LLM_ARCH_DEEPSEEK2,
6060
LLM_ARCH_CHATGLM,
61+
LLM_ARCH_GLM4,
6162
LLM_ARCH_BITNET,
6263
LLM_ARCH_T5,
6364
LLM_ARCH_T5ENCODER,
@@ -256,6 +257,8 @@ enum llm_tensor {
256257
LLM_TENSOR_ATTN_Q_NORM,
257258
LLM_TENSOR_ATTN_K_NORM,
258259
LLM_TENSOR_LAYER_OUT_NORM,
260+
LLM_TENSOR_POST_ATTN_NORM,
261+
LLM_TENSOR_POST_MLP_NORM,
259262
LLM_TENSOR_SSM_IN,
260263
LLM_TENSOR_SSM_CONV1D,
261264
LLM_TENSOR_SSM_X,

0 commit comments

Comments
 (0)
Please sign in to comment.