1
+ import json
1
2
import os
2
3
import re
3
4
import struct
14
15
class UnquantizedDataType :
15
16
name : str
16
17
17
- DT_F16 = UnquantizedDataType ('F16' )
18
- DT_F32 = UnquantizedDataType ('F32' )
18
+
19
+ DT_F16 = UnquantizedDataType ("F16" )
20
+ DT_F32 = UnquantizedDataType ("F32" )
21
+
19
22
20
23
@dataclass (frozen = True )
21
24
class QuantizedDataType :
22
25
groupsize : int
23
26
have_addends : bool
24
27
have_g_idx : bool
25
28
29
+
26
30
DataType = UnquantizedDataType
27
31
28
32
DATA_TYPE_TO_FTYPE : dict [DataType , int ] = {
@@ -35,17 +39,28 @@ class QuantizedDataType:
35
39
DT_F32 : np .dtype (np .float32 ),
36
40
}
37
41
38
- NUMPY_TYPE_TO_DATA_TYPE : dict [np .dtype [Any ], DataType ] = {dtype : data_type for (data_type , dtype ) in DATA_TYPE_TO_NUMPY .items ()}
42
+ NUMPY_TYPE_TO_DATA_TYPE : dict [np .dtype [Any ], DataType ] = {
43
+ dtype : data_type for (data_type , dtype ) in DATA_TYPE_TO_NUMPY .items ()
44
+ }
39
45
40
46
HF_SUBLAYER_TO_GGML = {
41
47
"self_attn.q_proj" : "attention.wq.weight" ,
42
48
"self_attn.k_proj" : "attention.wk.weight" ,
43
49
"self_attn.v_proj" : "attention.wv.weight" ,
44
50
"self_attn.o_proj" : "attention.wo.weight" ,
51
+ # "embed_tokens.weight": "tok_embeddings.weight",
52
+ # "norm.weight": "norm.weight",
53
+ # "lm_head.weight": "output.weight",
54
+ # "mlp.gate_proj": "feed_forward.w1.weight",
55
+ # "mlp.down_proj": "feed_forward.w2.weight",
56
+ # "mlp.up_proj": "feed_forward.w3.weight",
57
+ # "input_layernorm": "attention_norm.weight",
58
+ # "post_attention_layernorm": "ffn_norm.weight",
45
59
}
46
60
61
+
47
62
def translate_tensor_name (t ):
48
- match = re .match (r' .*layers\.(\d+)\.(\w+\.\w+)\.lora_(A|B)\.weight' , t )
63
+ match = re .match (r" .*layers\.(\d+)\.(\w+\.\w+)\.lora_(A|B)\.weight" , t )
49
64
if match :
50
65
nn = match .group (1 )
51
66
sub_layer = match .group (2 )
@@ -54,50 +69,83 @@ def translate_tensor_name(t):
54
69
sub_layer_renamed = HF_SUBLAYER_TO_GGML .get (sub_layer )
55
70
if sub_layer_renamed is None :
56
71
print (f"Error: unrecognized sub-layer { sub_layer } in tensor { t } " )
57
- exit (1 )
72
+ sys . exit (1 )
58
73
59
74
output_string = f"layers.{ nn } .{ HF_SUBLAYER_TO_GGML [sub_layer ]} .lora{ lora_type } "
60
75
return output_string
61
76
else :
62
77
print (f"Error: unrecognized tensor { t } " )
63
- exit (1 )
78
+ sys .exit (1 )
79
+
64
80
65
- def write_file_header (fout ):
66
- fout .write (b"ggla" [::- 1 ]) # magic (ggml lora)
67
- fout .write (struct .pack ("i" , 1 )) # file version
81
+ def write_file_header (fout , params ):
82
+ fout .write (b"ggla" [::- 1 ]) # magic (ggml lora)
83
+ fout .write (struct .pack ("i" , 1 )) # file version
84
+ fout .write (struct .pack ("ii" , params ["r" ], params ["lora_alpha" ]))
68
85
69
86
70
87
def write_tensor_header (self , name : str , shape : Sequence [int ], data_type : 1 ) -> None :
71
- sname = name .encode ('utf-8' )
72
- fout .write (struct .pack ("iii" , len (shape ), len (sname ), DATA_TYPE_TO_FTYPE [NUMPY_TYPE_TO_DATA_TYPE [data_type ]]))
88
+ sname = name .encode ("utf-8" )
89
+ fout .write (
90
+ struct .pack (
91
+ "iii" ,
92
+ len (shape ),
93
+ len (sname ),
94
+ DATA_TYPE_TO_FTYPE [NUMPY_TYPE_TO_DATA_TYPE [data_type ]],
95
+ )
96
+ )
73
97
fout .write (struct .pack ("i" * len (shape ), * shape [::- 1 ]))
74
98
fout .write (sname )
75
99
fout .seek ((fout .tell () + 31 ) & - 32 )
76
-
77
100
78
- if len (sys .argv ) < 2 :
79
- print (f"Usage: python { sys .argv [0 ]} adapter_model.bin [ggml_adapter_model.bin]" )
101
+
102
+ if len (sys .argv ) != 2 :
103
+ print (f"Usage: python { sys .argv [0 ]} <path>" )
104
+ print (
105
+ "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
106
+ )
107
+ sys .exit (1 )
108
+
109
+ input_json = os .path .join (sys .argv [1 ], "adapter_config.json" )
110
+ input_model = os .path .join (sys .argv [1 ], "adapter_model.bin" )
111
+ output_path = os .path .join (sys .argv [1 ], "ggml-adapter-model.bin" )
112
+
113
+ model = torch .load (input_model , map_location = "cpu" )
114
+
115
+ with open (input_json , "r" ) as f :
116
+ params = json .load (f )
117
+
118
+ if params ["peft_type" ] != "LORA" :
119
+ print (f"Error: unsupported adapter type { params ['peft_type' ]} expected LORA" )
80
120
sys .exit (1 )
81
121
82
- input_path = sys .argv [1 ]
83
- if len (sys .argv ) > 2 :
84
- output_path = sys .argv [2 ]
85
- else :
86
- output_filename = f"ggml_{ os .path .basename (input_path )} "
87
- output_path = os .path .join (os .path .dirname (input_path ), output_filename )
122
+ if params ["fan_in_fan_out" ] == True :
123
+ print ("Error: param fan_in_fan_out is not supported" )
124
+ sys .exit (1 )
88
125
89
- model = torch .load (input_path , map_location = "cpu" )
126
+ # TODO: these seem to be layers that have been trained but without lora.
127
+ # doesn't seem widely used but eventually should be supported
128
+ if params ["modules_to_save" ] is not None and len (params ["modules_to_save" ]) > 0 :
129
+ print ("Error: param modules_to_save is not supported" )
130
+ sys .exit (1 )
90
131
91
132
with open (output_path , "wb" ) as fout :
92
- write_file_header (fout )
133
+ fout .truncate ()
134
+
135
+ write_file_header (fout , params )
93
136
for k , v in model .items ():
94
137
# since ggml doesn't always support other types for the second operand,
95
138
# the tensors are always converted and exported as f32
96
- t = v .float ().numpy ()
139
+ if v .dtype != torch .float16 or v .dtype != torch .float32 :
140
+ v = v .float ()
141
+
142
+ t = v .numpy ()
97
143
if "lora_A" in k :
98
144
t = t .T
99
- print (f"{ k } => { translate_tensor_name (k )} { t .shape } { t .dtype } { t .nbytes / 1024 / 1024 :.2f} MB" )
145
+ print (
146
+ f"{ k } => { translate_tensor_name (k )} { t .shape } { t .dtype } { t .nbytes / 1024 / 1024 :.2f} MB"
147
+ )
100
148
write_tensor_header (fout , translate_tensor_name (k ), t .shape , t .dtype )
101
149
t .tofile (fout )
102
150
103
- print (f"Converted { input_path } to { output_path } " )
151
+ print (f"Converted { input_json } and { input_model } to { output_path } " )
0 commit comments