@@ -133,15 +133,19 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
133
133
self .ftype = gguf .LlamaFileType .MOSTLY_BF16
134
134
135
135
# Generate default filename based on model specification and available metadata
136
- version_string = None # TODO: Add metadata support
136
+ def get_model_name (metadata , dir_model ):
137
+ if metadata is not None and metadata .name is not None :
138
+ return metadata .name
139
+ elif dir_model is not None and dir_model .name is not None :
140
+ return dir_model .name
141
+ return None
142
+ def extract_encoding_scheme (ftype ):
143
+ # Extracts and converts the encoding scheme from the given file type name.
144
+ # e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32'
145
+ return ftype .name .partition ("_" )[2 ].upper ()
146
+ model_name = get_model_name (metadata .name , dir_model )
137
147
expert_count = self .hparams ["num_local_experts" ] if "num_local_experts" in self .hparams else None
138
- encodingScheme = {
139
- gguf .LlamaFileType .ALL_F32 : "F32" ,
140
- gguf .LlamaFileType .MOSTLY_F16 : "F16" ,
141
- gguf .LlamaFileType .MOSTLY_BF16 : "BF16" ,
142
- gguf .LlamaFileType .MOSTLY_Q8_0 : "Q8_0" ,
143
- }[self .ftype ]
144
- self .fname_default = f"{ gguf .naming_convention (dir_model .name , version_string , expert_count , self .parameter_count (), encodingScheme )} "
148
+ self .fname_default = f"{ gguf .naming_convention (model_name , self .metadata .version , expert_count , self .parameter_count (), extract_encoding_scheme (self .ftype ))} "
145
149
146
150
# Filename Output
147
151
if fname_out is not None :
@@ -151,7 +155,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
151
155
# output in the same directory as the model by default
152
156
self .fname_out = dir_model .parent / self .fname_default
153
157
154
- # allow templating the file name with the output ftype, useful with the "auto" ftype
158
+ # Configure GGUF Writer
155
159
self .gguf_writer = gguf .GGUFWriter (self .fname_out , gguf .MODEL_ARCH_NAMES [self .model_arch ], endianess = self .endianess , use_temp_file = self .use_temp_file )
156
160
157
161
@classmethod
@@ -324,6 +328,9 @@ def extra_f16_tensors(self, name: str, new_name: str, bid: int | None, n_dims: i
324
328
return False
325
329
326
330
def parameter_count (self ):
331
+ # TODO: Ensure parameter count is accurate throughout various model type
332
+ # May currently overestimate parameter count in Mamba model because
333
+ # output weights is tied with token embeddings.
327
334
total_model_parameters = 0
328
335
for name , data_torch in self .get_tensors ():
329
336
# Got A Tensor
0 commit comments