convert-hf-to-gguf.py: use metadata override info to calc default filename

mofosyne · mofosyne · commit 438e88db6f1b · 2024-05-24T13:41:27.000+10:00
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
@@ -133,15 +133,19 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
                 self.ftype = gguf.LlamaFileType.MOSTLY_BF16
 
         # Generate default filename based on model specification and available metadata
-        version_string = None # TODO: Add metadata support
+        def get_model_name(metadata, dir_model):
+            if metadata is not None and metadata.name is not None:
+                return metadata.name
+            elif dir_model is not None and dir_model.name is not None:
+                return dir_model.name
+            return None
+        def extract_encoding_scheme(ftype):
+            # Extracts and converts the encoding scheme from the given file type name.
+            # e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32'
+            return ftype.name.partition("_")[2].upper()
+        model_name = get_model_name(metadata.name, dir_model)
         expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
-        encodingScheme = {
-            gguf.LlamaFileType.ALL_F32     : "F32",
-            gguf.LlamaFileType.MOSTLY_F16  : "F16",
-            gguf.LlamaFileType.MOSTLY_BF16 : "BF16",
-            gguf.LlamaFileType.MOSTLY_Q8_0 : "Q8_0",
-        }[self.ftype]
-        self.fname_default = f"{gguf.naming_convention(dir_model.name, version_string, expert_count, self.parameter_count(), encodingScheme)}"
+        self.fname_default = f"{gguf.naming_convention(model_name, self.metadata.version, expert_count, self.parameter_count(), extract_encoding_scheme(self.ftype))}"
 
         # Filename Output
         if fname_out is not None:
@@ -151,7 +155,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
             # output in the same directory as the model by default
             self.fname_out = dir_model.parent / self.fname_default
 
-        # allow templating the file name with the output ftype, useful with the "auto" ftype
+        # Configure GGUF Writer
         self.gguf_writer = gguf.GGUFWriter(self.fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file)
 
     @classmethod
@@ -324,6 +328,9 @@ def extra_f16_tensors(self, name: str, new_name: str, bid: int | None, n_dims: i
         return False
 
     def parameter_count(self):
+        # TODO: Ensure parameter count is accurate throughout various model type
+        #       May currently overestimate parameter count in Mamba model because
+        #       output weights is tied with token embeddings.
         total_model_parameters = 0
         for name, data_torch in self.get_tensors():
             # Got A Tensor
diff --git a/convert.py b/convert.py
@@ -1319,6 +1319,9 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
 
 
 def model_parameter_count(model: LazyModel) -> int:
+    # TODO: Ensure parameter count is accurate throughout various model type
+    #       May currently overestimate parameter count in Mamba model because
+    #       output weights is tied with token embeddings.
     total_model_parameters = 0
     for name, lazy_tensor in model.items():
         # Got A Tensor