diff --git a/vllm/model_executor/models/telechat2.py b/vllm/model_executor/models/telechat2.py index 28c37bb9661..02ca7fe08e5 100644 --- a/vllm/model_executor/models/telechat2.py +++ b/vllm/model_executor/models/telechat2.py @@ -31,19 +31,6 @@ class TeleChat2Model(LlamaModel): - hf_to_vllm_mapper = WeightsMapper( - orig_to_new_prefix={ - "transformer.": "model.", - }, - orig_to_new_substr={ - ".h.": ".layers.", - ".self_attention.": ".self_attn.", - ".word_embeddings.": ".embed_tokens.", - ".dense.": ".o_proj.", - ".ln_f.": ".norm.", - }, - ) - def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # 1. Initialize the LlamaModel with bias vllm_config.model_config.hf_config.bias = True @@ -118,6 +105,19 @@ def load_weights(self, weights: Iterable[Tuple[str, class TeleChat2ForCausalLM(LlamaForCausalLM): + hf_to_vllm_mapper = WeightsMapper( + orig_to_new_prefix={ + "transformer.": "model.", + }, + orig_to_new_substr={ + ".h.": ".layers.", + ".self_attention.": ".self_attn.", + ".word_embeddings.": ".embed_tokens.", + ".dense.": ".o_proj.", + ".ln_f.": ".norm.", + }, + ) + def _init_model(self, vllm_config: VllmConfig, prefix: str = ""): return TeleChat2Model(vllm_config=vllm_config, prefix=prefix)