1
1
# Adapted from
2
- # https://github.com/THUDM/GLM-4
3
- """Inference-only ChatGLM model compatible with THUDM weights."""
2
+ # https://github.com/THUDM/CogAgent
3
+ """Inference-only CogAgent model compatible with THUDM weights."""
4
4
from argparse import Namespace
5
5
from array import array
6
6
from typing import (Dict , Iterable , List , Mapping , Optional , Set , Tuple ,
@@ -201,7 +201,6 @@ def input_processor_for_glmv(ctx: InputContext, inputs: DecoderOnlyInputs):
201
201
202
202
new_input_ids = []
203
203
final_processed_position = 0
204
- final_processed_position = 0
205
204
206
205
for boi_position , eoi_position in zip (boi_positions , eoi_positions ):
207
206
assert boi_position < eoi_position
@@ -275,12 +274,15 @@ def __init__(
275
274
# https://huggingface.co/THUDM/chatglm3-6b-32k/blob/e210410255278dd9d74463cf396ba559c0ef801c/modeling_chatglm.py#L141
276
275
rope_ratio = getattr (config , "rope_ratio" , 1.0 )
277
276
max_positions = getattr (config , "seq_length" , 8192 )
277
+ # NOTE: THUDM/cogagent-9b-20241220 uses original_rope=False,
278
+ # which is equivalent to is_neox_style=True
279
+ is_neox_style = not config .original_rope
278
280
self .rotary_emb = get_rope (
279
281
self .head_dim ,
280
282
rotary_dim = self .head_dim // 2 ,
281
283
max_position = max_positions ,
282
284
base = 10000 * rope_ratio ,
283
- is_neox_style = False ,
285
+ is_neox_style = is_neox_style ,
284
286
)
285
287
self .attn = Attention (self .num_heads ,
286
288
self .head_dim ,
@@ -779,4 +781,4 @@ def __new__(
779
781
return ChatGLMV (vllm_config = vllm_config , prefix = prefix )
780
782
# Initialize LLM
781
783
else :
782
- return ChatGLM (vllm_config = vllm_config , prefix = prefix )
784
+ return ChatGLM (vllm_config = vllm_config , prefix = prefix )
0 commit comments