@@ -617,6 +617,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
617
617
if chkhsh == "4e2b24cc4770243d65a2c9ec19770a72f08cffc161adbb73fcbb6b7dd45a0aae" :
618
618
# ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct
619
619
res = "exaone"
620
+ if chkhsh == "60824e3c0d9401f89943cbb2fff727f0e2d4c545ba4df2d6e4f09a6db0f5b450" :
621
+ # ref: https://huggingface.co/facebook/chameleon-7b
622
+ res = "chameleon"
620
623
621
624
if res is None :
622
625
logger .warning ("\n " )
@@ -3872,6 +3875,48 @@ def prepare_tensors(self):
3872
3875
3873
3876
super ().prepare_tensors ()
3874
3877
3878
+
3879
+ @Model .register ("ChameleonForCausalLM" )
3880
+ class ChameleonModel (Model ):
3881
+ model_arch = gguf .MODEL_ARCH .CHAMELEON
3882
+
3883
+ def set_gguf_parameters (self ):
3884
+ super ().set_gguf_parameters ()
3885
+ self .gguf_writer .add_swin_norm (self .hparams .get ("swin_norm" , False ))
3886
+
3887
+ def set_vocab (self ):
3888
+ self ._set_vocab_gpt2 ()
3889
+
3890
+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
3891
+ # ignore image tokenizer for now
3892
+ # TODO: remove this once image support is implemented for Chameleon
3893
+ if name .startswith ("model.vqmodel" ):
3894
+ return []
3895
+
3896
+ n_head = self .hparams ["num_attention_heads" ]
3897
+ n_kv_head = self .hparams .get ("num_key_value_heads" )
3898
+ hidden_dim = self .hparams .get ("hidden_size" )
3899
+
3900
+ if name .endswith (("q_proj.weight" , "q_proj.bias" )):
3901
+ data_torch = LlamaModel .permute (data_torch , n_head , n_head )
3902
+ if name .endswith (("k_proj.weight" , "k_proj.bias" )):
3903
+ data_torch = LlamaModel .permute (data_torch , n_head , n_kv_head )
3904
+ if name .endswith (("q_norm.weight" , "q_norm.bias" )):
3905
+ data_torch = ChameleonModel ._reverse_hf_permute (data_torch , n_head , hidden_dim )
3906
+ if name .endswith (("k_norm.weight" , "k_norm.bias" )):
3907
+ data_torch = ChameleonModel ._reverse_hf_permute (data_torch , n_kv_head , hidden_dim )
3908
+
3909
+ return [(self .map_tensor_name (name ), data_torch )]
3910
+
3911
+ # see: https://github.com/huggingface/transformers/blob/72fb02c47dbbe1999ae105319f24631cad6e2e00/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py#L176-L203
3912
+ @staticmethod
3913
+ def _reverse_hf_permute (data_torch , n_heads , hidden_dim ):
3914
+ head_dim = hidden_dim // n_heads
3915
+ data_torch = data_torch [0 ].view (2 , head_dim // 2 ).t ().reshape (1 , - 1 )
3916
+ data_torch = data_torch .repeat_interleave (n_heads , 0 )
3917
+ return data_torch
3918
+
3919
+
3875
3920
###### CONVERSION LOGIC ######
3876
3921
3877
3922
0 commit comments