diff --git a/vllm/model_executor/models/telechat2.py b/vllm/model_executor/models/telechat2.py
index 28c37bb9661..02ca7fe08e5 100644
--- a/vllm/model_executor/models/telechat2.py
+++ b/vllm/model_executor/models/telechat2.py
@@ -31,19 +31,6 @@
 
 class TeleChat2Model(LlamaModel):
 
-    hf_to_vllm_mapper = WeightsMapper(
-        orig_to_new_prefix={
-            "transformer.": "model.",
-        },
-        orig_to_new_substr={
-            ".h.": ".layers.",
-            ".self_attention.": ".self_attn.",
-            ".word_embeddings.": ".embed_tokens.",
-            ".dense.": ".o_proj.",
-            ".ln_f.": ".norm.",
-        },
-    )
-
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         # 1. Initialize the LlamaModel with bias
         vllm_config.model_config.hf_config.bias = True
@@ -118,6 +105,19 @@ def load_weights(self, weights: Iterable[Tuple[str,
 
 class TeleChat2ForCausalLM(LlamaForCausalLM):
 
+    hf_to_vllm_mapper = WeightsMapper(
+        orig_to_new_prefix={
+            "transformer.": "model.",
+        },
+        orig_to_new_substr={
+            ".h.": ".layers.",
+            ".self_attention.": ".self_attn.",
+            ".word_embeddings.": ".embed_tokens.",
+            ".dense.": ".o_proj.",
+            ".ln_f.": ".norm.",
+        },
+    )
+
     def _init_model(self, vllm_config: VllmConfig, prefix: str = ""):
         return TeleChat2Model(vllm_config=vllm_config, prefix=prefix)