Skip to content

Commit 438e88d

Browse files
committed
convert-hf-to-gguf.py: use metadata override info to calc default filename
1 parent c704442 commit 438e88d

File tree

2 files changed

+19
-9
lines changed

2 files changed

+19
-9
lines changed

convert-hf-to-gguf.py

+16-9
Original file line numberDiff line numberDiff line change
@@ -133,15 +133,19 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
133133
self.ftype = gguf.LlamaFileType.MOSTLY_BF16
134134

135135
# Generate default filename based on model specification and available metadata
136-
version_string = None # TODO: Add metadata support
136+
def get_model_name(metadata, dir_model):
137+
if metadata is not None and metadata.name is not None:
138+
return metadata.name
139+
elif dir_model is not None and dir_model.name is not None:
140+
return dir_model.name
141+
return None
142+
def extract_encoding_scheme(ftype):
143+
# Extracts and converts the encoding scheme from the given file type name.
144+
# e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32'
145+
return ftype.name.partition("_")[2].upper()
146+
model_name = get_model_name(metadata.name, dir_model)
137147
expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
138-
encodingScheme = {
139-
gguf.LlamaFileType.ALL_F32 : "F32",
140-
gguf.LlamaFileType.MOSTLY_F16 : "F16",
141-
gguf.LlamaFileType.MOSTLY_BF16 : "BF16",
142-
gguf.LlamaFileType.MOSTLY_Q8_0 : "Q8_0",
143-
}[self.ftype]
144-
self.fname_default = f"{gguf.naming_convention(dir_model.name, version_string, expert_count, self.parameter_count(), encodingScheme)}"
148+
self.fname_default = f"{gguf.naming_convention(model_name, self.metadata.version, expert_count, self.parameter_count(), extract_encoding_scheme(self.ftype))}"
145149

146150
# Filename Output
147151
if fname_out is not None:
@@ -151,7 +155,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
151155
# output in the same directory as the model by default
152156
self.fname_out = dir_model.parent / self.fname_default
153157

154-
# allow templating the file name with the output ftype, useful with the "auto" ftype
158+
# Configure GGUF Writer
155159
self.gguf_writer = gguf.GGUFWriter(self.fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file)
156160

157161
@classmethod
@@ -324,6 +328,9 @@ def extra_f16_tensors(self, name: str, new_name: str, bid: int | None, n_dims: i
324328
return False
325329

326330
def parameter_count(self):
331+
# TODO: Ensure parameter count is accurate throughout various model type
332+
# May currently overestimate parameter count in Mamba model because
333+
# output weights is tied with token embeddings.
327334
total_model_parameters = 0
328335
for name, data_torch in self.get_tensors():
329336
# Got A Tensor

convert.py

+3
Original file line numberDiff line numberDiff line change
@@ -1319,6 +1319,9 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
13191319

13201320

13211321
def model_parameter_count(model: LazyModel) -> int:
1322+
# TODO: Ensure parameter count is accurate throughout various model type
1323+
# May currently overestimate parameter count in Mamba model because
1324+
# output weights is tied with token embeddings.
13221325
total_model_parameters = 0
13231326
for name, lazy_tensor in model.items():
13241327
# Got A Tensor

0 commit comments

Comments
 (0)