@@ -184,6 +184,8 @@ def from_model_architecture(model_architecture):
184
184
return MixtralModel
185
185
if model_architecture == "PhiForCausalLM" :
186
186
return Phi2Model
187
+ if model_architecture == "PlamoForCausalLM" :
188
+ return PlamoModel
187
189
return Model
188
190
189
191
def _is_model_safetensors (self ) -> bool :
@@ -225,6 +227,8 @@ def _get_model_architecture(self) -> gguf.MODEL_ARCH:
225
227
return gguf .MODEL_ARCH .LLAMA
226
228
if arch == "PhiForCausalLM" :
227
229
return gguf .MODEL_ARCH .PHI2
230
+ if arch == "PlamoForCausalLM" :
231
+ return gguf .MODEL_ARCH .PLAMO
228
232
229
233
raise NotImplementedError (f'Architecture "{ arch } " not supported!' )
230
234
@@ -1002,11 +1006,91 @@ def set_gguf_parameters(self):
1002
1006
self .gguf_writer .add_add_bos_token (False )
1003
1007
1004
1008
1009
+ class PlamoModel (Model ):
1010
+ def set_vocab (self ):
1011
+ self ._set_vocab_sentencepiece ()
1012
+
1013
+ def set_gguf_parameters (self ):
1014
+ hparams = self .hparams
1015
+ block_count = hparams ["num_hidden_layers" ]
1016
+
1017
+ self .gguf_writer .add_name ("PLaMo" )
1018
+ self .gguf_writer .add_context_length (4096 ) # not in config.json
1019
+ self .gguf_writer .add_embedding_length (hparams ["hidden_size" ])
1020
+ self .gguf_writer .add_feed_forward_length (hparams ["intermediate_size" ])
1021
+ self .gguf_writer .add_block_count (block_count )
1022
+ self .gguf_writer .add_head_count (hparams ["num_attention_heads" ])
1023
+ self .gguf_writer .add_head_count_kv (5 ) # hparams["num_key_value_heads"]) is wrong
1024
+ self .gguf_writer .add_layer_norm_rms_eps (hparams ["rms_norm_eps" ])
1025
+
1026
+ def shuffle_attn_q_weight (self , data_torch ):
1027
+ assert data_torch .size () == (5120 , 5120 )
1028
+ data_torch = data_torch .reshape (8 , 5 , 128 , 5120 )
1029
+ data_torch = torch .permute (data_torch , (1 , 0 , 2 , 3 ))
1030
+ data_torch = torch .reshape (data_torch , (5120 , 5120 ))
1031
+ return data_torch
1032
+
1033
+ def shuffle_attn_output_weight (self , data_torch ):
1034
+ assert data_torch .size () == (5120 , 5120 )
1035
+ data_torch = data_torch .reshape (5120 , 8 , 5 , 128 )
1036
+ data_torch = torch .permute (data_torch , (0 , 2 , 1 , 3 ))
1037
+ data_torch = torch .reshape (data_torch , (5120 , 5120 ))
1038
+ return data_torch
1039
+
1040
+ def write_tensors (self ):
1041
+ block_count = self .hparams .get ("num_layers" , self .hparams .get ("num_hidden_layers" ))
1042
+ tensor_map = gguf .get_tensor_name_map (self .model_arch , block_count )
1043
+
1044
+ for name , data_torch in self .get_tensors ():
1045
+ if "self_attn.rotary_emb.inv_freq" in name :
1046
+ continue
1047
+
1048
+ # map tensor names
1049
+ new_name = tensor_map .get_name (name , try_suffixes = (".weight" , ".bias" ))
1050
+ if new_name is None :
1051
+ print (f"Can not map tensor { name !r} " )
1052
+ sys .exit ()
1053
+
1054
+ # shuffle for broadcasting of gqa in ggml_mul_mat
1055
+ if new_name .endswith ("attn_q.weight" ):
1056
+ data_torch = self .shuffle_attn_q_weight (data_torch )
1057
+ elif new_name .endswith ("attn_output.weight" ):
1058
+ data_torch = self .shuffle_attn_output_weight (data_torch )
1059
+
1060
+ old_dtype = data_torch .dtype
1061
+
1062
+ # convert any unsupported data types to float32
1063
+ if data_torch .dtype not in (torch .float16 , torch .float32 ):
1064
+ data_torch = data_torch .to (torch .float32 )
1065
+
1066
+ data = data_torch .squeeze ().numpy ()
1067
+
1068
+ n_dims = len (data .shape )
1069
+ data_dtype = data .dtype
1070
+
1071
+ # if f32 desired, convert any float16 to float32
1072
+ if self .ftype == 0 and data_dtype == np .float16 :
1073
+ data = data .astype (np .float32 )
1074
+
1075
+ # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
1076
+ if self .ftype == 1 and data_dtype == np .float16 and n_dims == 1 :
1077
+ data = data .astype (np .float32 )
1078
+
1079
+ # if f16 desired, convert any float32 2-dim weight tensors to float16
1080
+ if self .ftype == 1 and data_dtype == np .float32 and name .endswith (".weight" ) and n_dims == 2 :
1081
+ data = data .astype (np .float16 )
1082
+
1083
+ print (f"{ new_name } , n_dims = { n_dims } , { old_dtype } --> { data .dtype } " )
1084
+
1085
+ self .gguf_writer .add_tensor (new_name , data )
1086
+
1087
+
1005
1088
###### CONVERSION LOGIC ######
1006
1089
1007
1090
1008
1091
def parse_args () -> argparse .Namespace :
1009
- parser = argparse .ArgumentParser (description = "Convert a huggingface model to a GGML compatible file" )
1092
+ parser = argparse .ArgumentParser (
1093
+ description = "Convert a huggingface model to a GGML compatible file" )
1010
1094
parser .add_argument (
1011
1095
"--vocab-only" , action = "store_true" ,
1012
1096
help = "extract only the vocab" ,
0 commit comments