14
14
sys .path .insert (1 , str (Path (__file__ ).parent / 'gguf-py' ))
15
15
import gguf
16
16
17
+
17
18
class GGMLFormat (IntEnum ):
18
19
GGML = 0
19
20
GGMF = 1
20
21
GGJT = 2
21
22
23
+
22
24
class GGMLFType (IntEnum ):
23
25
ALL_F32 = 0
24
26
MOSTLY_F16 = 1
@@ -38,6 +40,7 @@ class GGMLFType(IntEnum):
38
40
MOSTLY_Q5_K_M = 17
39
41
MOSTLY_Q6_K = 18
40
42
43
+
41
44
class Hyperparameters :
42
45
def __init__ (self ):
43
46
self .n_vocab = self .n_embd = self .n_mult = self .n_head = 0
@@ -69,6 +72,7 @@ def load(self, data, offset):
69
72
def __str__ (self ):
70
73
return f'<Hyperparameters: n_vocab={ self .n_vocab } , n_embd={ self .n_embd } , n_mult={ self .n_mult } , n_head={ self .n_head } , n_layer={ self .n_layer } , n_rot={ self .n_rot } , n_ff={ self .n_ff } , ftype={ self .ftype .name } >'
71
74
75
+
72
76
class Vocab :
73
77
def __init__ (self , load_scores = True ):
74
78
self .items = []
@@ -90,6 +94,7 @@ def load(self, data, offset, n_vocab):
90
94
self .items .append ((item_text , item_score ))
91
95
return offset - orig_offset
92
96
97
+
93
98
class Tensor :
94
99
def __init__ (self , use_padding = True ):
95
100
self .name = None
@@ -123,6 +128,7 @@ def load(self, data, offset):
123
128
# print(n_dims, name_len, dtype, self.dims, self.name, pad)
124
129
return offset - orig_offset
125
130
131
+
126
132
class GGMLModel :
127
133
def __init__ (self ):
128
134
self .hyperparameters = None
@@ -159,8 +165,8 @@ def validate_conversion(self, ftype):
159
165
if ftype not in (GGMLFType .ALL_F32 , GGMLFType .MOSTLY_F16 ):
160
166
err = 'Quantizations changed in GGJTv2. Can only convert unquantized GGML files older than GGJTv2.'
161
167
elif (self .file_format == GGMLFormat .GGJT and self .format_version == 2 ):
162
- if ftype in ( GGMLFType .MOSTLY_Q4_0 , GGMLFType .MOSTLY_Q4_1 ,
163
- GGMLFType .MOSTLY_Q4_1_SOME_F16 , GGMLFType .MOSTLY_Q8_0 ):
168
+ if ftype in (GGMLFType .MOSTLY_Q4_0 , GGMLFType .MOSTLY_Q4_1 ,
169
+ GGMLFType .MOSTLY_Q4_1_SOME_F16 , GGMLFType .MOSTLY_Q8_0 ):
164
170
err = 'Q4 and Q8 quantizations changed in GGJTv3.'
165
171
if len (err ) > 0 :
166
172
raise ValueError (f'{ err } Sorry, your { self .file_format .name } v{ self .format_version } file of type { ftype .name } is not eligible for conversion.' )
@@ -187,6 +193,7 @@ def load(self, data, offset):
187
193
hp .set_n_ff (self )
188
194
return offset
189
195
196
+
190
197
class GGMLToGGUF :
191
198
def __init__ (self , ggml_model , data , cfg , params_override = None , vocab_override = None , special_vocab = None ):
192
199
hp = ggml_model .hyperparameters
@@ -217,7 +224,7 @@ def save(self):
217
224
gguf_writer = gguf .GGUFWriter (
218
225
self .cfg .output ,
219
226
gguf .MODEL_ARCH_NAMES [gguf .MODEL_ARCH .LLAMA ],
220
- use_temp_file = False )
227
+ use_temp_file = False )
221
228
self .add_params (gguf_writer )
222
229
self .add_vocab (gguf_writer )
223
230
if self .special_vocab is not None :
@@ -341,7 +348,8 @@ def add_tensors(self, gguf_writer):
341
348
mapped_name ,
342
349
data [tensor .start_offset :tensor .start_offset + tensor .len_bytes ],
343
350
raw_shape = tempdims ,
344
- raw_dtype = tensor .dtype )
351
+ raw_dtype = tensor .dtype )
352
+
345
353
346
354
def handle_metadata (cfg , hp ):
347
355
import convert
@@ -365,38 +373,40 @@ def handle_metadata(cfg, hp):
365
373
raise ValueError ('Unable to load metadata' )
366
374
vocab = convert .load_vocab (
367
375
cfg .vocab_dir if cfg .vocab_dir is not None else cfg .model_metadata_dir ,
368
- cfg .vocabtype )
376
+ cfg .vocabtype )
369
377
# FIXME: Respect cfg.vocab_dir?
370
378
svocab = gguf .SpecialVocab (cfg .model_metadata_dir ,
371
- load_merges = cfg .vocabtype == 'bpe' ,
372
- n_vocab = vocab .vocab_size )
379
+ load_merges = cfg .vocabtype == 'bpe' ,
380
+ n_vocab = vocab .vocab_size )
373
381
convert .check_vocab_size (params , vocab )
374
382
return (params , vocab , svocab )
375
383
384
+
376
385
def handle_args ():
377
386
parser = argparse .ArgumentParser (description = 'Convert GGML models to GGUF' )
378
387
parser .add_argument ('--input' , '-i' , type = Path , required = True ,
379
- help = 'Input GGMLv3 filename' )
388
+ help = 'Input GGMLv3 filename' )
380
389
parser .add_argument ('--output' , '-o' , type = Path , required = True ,
381
- help = 'Output GGUF filename' )
390
+ help = 'Output GGUF filename' )
382
391
parser .add_argument ('--name' ,
383
- help = 'Set model name' )
392
+ help = 'Set model name' )
384
393
parser .add_argument ('--desc' ,
385
- help = 'Set model description' )
394
+ help = 'Set model description' )
386
395
parser .add_argument ('--gqa' , type = int , default = 1 ,
387
- help = 'grouped-query attention factor (use 8 for LLaMA2 70B)' )
396
+ help = 'grouped-query attention factor (use 8 for LLaMA2 70B)' )
388
397
parser .add_argument ('--eps' , default = '5.0e-06' ,
389
- help = 'RMS norm eps: Use 1e-6 for LLaMA1 and OpenLLaMA, use 1e-5 for LLaMA2' )
398
+ help = 'RMS norm eps: Use 1e-6 for LLaMA1 and OpenLLaMA, use 1e-5 for LLaMA2' )
390
399
parser .add_argument ('--context-length' , '-c' , type = int , default = 2048 ,
391
- help = 'Default max context length: LLaMA1 is typically 2048, LLaMA2 is typically 4096' )
400
+ help = 'Default max context length: LLaMA1 is typically 2048, LLaMA2 is typically 4096' )
392
401
parser .add_argument ('--model-metadata-dir' , '-m' , type = Path ,
393
- help = 'Load HuggingFace/.pth vocab and metadata from the specified directory' )
402
+ help = 'Load HuggingFace/.pth vocab and metadata from the specified directory' )
394
403
parser .add_argument ("--vocab-dir" , type = Path ,
395
- help = "directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir" )
404
+ help = "directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir" )
396
405
parser .add_argument ("--vocabtype" , choices = ["spm" , "bpe" ], default = "spm" ,
397
- help = "vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm)" )
406
+ help = "vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm)" )
398
407
return parser .parse_args ()
399
408
409
+
400
410
def main ():
401
411
cfg = handle_args ()
402
412
print (f'* Using config: { cfg } ' )
@@ -406,7 +416,7 @@ def main():
406
416
data = np .memmap (cfg .input , mode = 'r' )
407
417
model = GGMLModel ()
408
418
print ('* Scanning GGML input file' )
409
- offset = model .load (data , 0 )
419
+ offset = model .load (data , 0 ) # noqa
410
420
print (f'* GGML model hyperparameters: { model .hyperparameters } ' )
411
421
vocab_override = None
412
422
params_override = None
@@ -421,12 +431,15 @@ def main():
421
431
print ('\n === WARNING === Special tokens may not be converted correctly. Use --model-metadata-dir if possible === WARNING ===\n ' )
422
432
if model .file_format == GGMLFormat .GGML :
423
433
print ('! This is a very old GGML file that does not contain vocab scores. Strongly recommend using model metadata!' )
424
- converter = GGMLToGGUF (model , data , cfg ,
434
+ converter = GGMLToGGUF (
435
+ model , data , cfg ,
425
436
params_override = params_override ,
426
437
vocab_override = vocab_override ,
427
- special_vocab = special_vocab )
438
+ special_vocab = special_vocab
439
+ )
428
440
converter .save ()
429
441
print (f'* Successful completion. Output saved to: { cfg .output } ' )
430
442
443
+
431
444
if __name__ == '__main__' :
432
445
main ()
0 commit comments