@@ -60,6 +60,7 @@ class Model:
60
60
tensor_map : gguf .TensorNameMap
61
61
tensor_names : set [str ] | None
62
62
fname_out : Path
63
+ fname_default : Path
63
64
gguf_writer : gguf .GGUFWriter
64
65
65
66
# subclasses should define this!
@@ -93,8 +94,27 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
93
94
self .ftype = gguf .LlamaFileType .MOSTLY_BF16
94
95
ftype_up : str = self .ftype .name .partition ("_" )[2 ].upper ()
95
96
ftype_lw : str = ftype_up .lower ()
97
+
98
+ # Generate default filename based on model specification and available metadata
99
+ version_string = None # TODO: Add metadata support
100
+ expert_count = self .hparams ["num_local_experts" ] if "num_local_experts" in self .hparams else None
101
+ encodingScheme = {
102
+ gguf .LlamaFileType .ALL_F32 : "F32" ,
103
+ gguf .LlamaFileType .MOSTLY_F16 : "F16" ,
104
+ gguf .LlamaFileType .MOSTLY_BF16 : "BF16" ,
105
+ gguf .LlamaFileType .MOSTLY_Q8_0 : "Q8_0" ,
106
+ }[self .ftype ]
107
+ self .fname_default = f"{ gguf .naming_convention (dir_model .name , version_string , expert_count , self .parameter_count (), encodingScheme )} "
108
+
109
+ # Filename Output
110
+ if fname_out is not None :
111
+ # custom defined filename and path was provided
112
+ self .fname_out = fname_out
113
+ else :
114
+ # output in the same directory as the model by default
115
+ self .fname_out = dir_model .parent / self .fname_default
116
+
96
117
# allow templating the file name with the output ftype, useful with the "auto" ftype
97
- self .fname_out = fname_out .parent / fname_out .name .format (ftype_lw , outtype = ftype_lw , ftype = ftype_lw , OUTTYPE = ftype_up , FTYPE = ftype_up )
98
118
self .gguf_writer = gguf .GGUFWriter (self .fname_out , gguf .MODEL_ARCH_NAMES [self .model_arch ], endianess = self .endianess , use_temp_file = self .use_temp_file )
99
119
100
120
@classmethod
@@ -240,6 +260,25 @@ def extra_f16_tensors(self, name: str, new_name: str, bid: int | None, n_dims: i
240
260
241
261
return False
242
262
263
+ def parameter_count (self ):
264
+ total_model_parameters = 0
265
+ for name , data_torch in self .get_tensors ():
266
+ # Got A Tensor
267
+
268
+ # We don't need these
269
+ if name .endswith ((".attention.masked_bias" , ".attention.bias" , ".rotary_emb.inv_freq" )):
270
+ continue
271
+
272
+ # Calculate Tensor Volume
273
+ sum_weights_in_tensor = 1
274
+ for dim in data_torch .shape :
275
+ sum_weights_in_tensor *= dim
276
+
277
+ # Add Tensor Volume To Running Count
278
+ total_model_parameters += sum_weights_in_tensor
279
+
280
+ return total_model_parameters
281
+
243
282
def write_tensors (self ):
244
283
max_name_len = max (len (s ) for _ , s in self .tensor_map .mapping .values ()) + len (".weight," )
245
284
@@ -2551,14 +2590,24 @@ def parse_args() -> argparse.Namespace:
2551
2590
"--verbose" , action = "store_true" ,
2552
2591
help = "increase output verbosity" ,
2553
2592
)
2593
+ parser .add_argument (
2594
+ "--get-outfile" , action = "store_true" ,
2595
+ help = "get calculated default outfile name"
2596
+ )
2554
2597
2555
2598
return parser .parse_args ()
2556
2599
2557
2600
2558
2601
def main () -> None :
2559
2602
args = parse_args ()
2560
2603
2561
- logging .basicConfig (level = logging .DEBUG if args .verbose else logging .INFO )
2604
+ if args .verbose :
2605
+ logging .basicConfig (level = logging .DEBUG )
2606
+ elif args .get_outfile :
2607
+ # Avoid printing anything besides the dump output
2608
+ logging .basicConfig (level = logging .WARNING )
2609
+ else :
2610
+ logging .basicConfig (level = logging .INFO )
2562
2611
2563
2612
dir_model = args .model
2564
2613
@@ -2587,19 +2636,19 @@ def main() -> None:
2587
2636
"auto" : gguf .LlamaFileType .GUESSED ,
2588
2637
}
2589
2638
2590
- if args .outfile is not None :
2591
- fname_out = args .outfile
2592
- else :
2593
- # output in the same directory as the model by default
2594
- fname_out = dir_model / 'ggml-model-{ftype}.gguf'
2595
-
2596
2639
logger .info (f"Loading model: { dir_model .name } " )
2597
2640
2598
2641
hparams = Model .load_hparams (dir_model )
2599
2642
2600
2643
with torch .inference_mode ():
2601
- model_class = Model .from_model_architecture (hparams ["architectures" ][0 ])
2602
- model_instance = model_class (dir_model , ftype_map [args .outtype ], fname_out , args .bigendian , args .use_temp_file , args .no_lazy )
2644
+ encodingScheme = ftype_map [args .outtype ]
2645
+ model_architecture = hparams ["architectures" ][0 ]
2646
+ model_class = Model .from_model_architecture (model_architecture )
2647
+ model_instance = model_class (dir_model , encodingScheme , args .outfile , args .bigendian , args .use_temp_file , args .no_lazy )
2648
+
2649
+ if args .get_outfile :
2650
+ print (f"{ model_instance .fname_default } " ) # noqa: NP100
2651
+ return
2603
2652
2604
2653
logger .info ("Set model parameters" )
2605
2654
model_instance .set_gguf_parameters ()
0 commit comments