Skip to content

Commit 672a6f1

Browse files
mofosynecompiladengxson
authored
convert-*.py: GGUF Naming Convention Refactor and Metadata Override Refactor (#7499)
Main thing is that the default output filename will take this form {name}{parameters}{finetune}{version}{encoding}{kind} In addition this add and remove some entries in the KV store and adds a metadata class with automatic heuristics capability to derive some values based on model card content * No Change: - Internal GGUF Spec - `general.architecture` - `general.quantization_version` - `general.alignment` - `general.file_type` - General Model Details - `general.name` - `general.author` - `general.version` - `general.description` - Licensing details - `general.license` - Typically represents the converted GGUF repo (Unless made from scratch) - `general.url` - Model Source during conversion - `general.source.url` * Removed: - Model Source during conversion - `general.source.huggingface.repository` * Added: - General Model Details - `general.organization` - `general.finetune` - `general.basename` - `general.quantized_by` - `general.size_label` - Licensing details - `general.license.name` - `general.license.link` - Typically represents the converted GGUF repo (Unless made from scratch) - `general.doi` - `general.uuid` - `general.repo_url` - Model Source during conversion - `general.source.doi` - `general.source.uuid` - `general.source.repo_url` - Base Model Source - `general.base_model.count` - `general.base_model.{id}.name` - `general.base_model.{id}.author` - `general.base_model.{id}.version` - `general.base_model.{id}.organization` - `general.base_model.{id}.url` (Model Website/Paper) - `general.base_model.{id}.doi` - `general.base_model.{id}.uuid` - `general.base_model.{id}.repo_url` (Model Source Repository (git/svn/etc...)) - Array based KV stores - `general.tags` - `general.languages` - `general.datasets` --------- Co-authored-by: compilade <[email protected]> Co-authored-by: Xuan Son Nguyen <[email protected]>
1 parent 3807c3d commit 672a6f1

13 files changed

+1177
-231
lines changed

convert_hf_to_gguf.py

+112-75
Large diffs are not rendered by default.

convert_lora_to_gguf.py

+19-10
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,10 @@ def parse_args() -> argparse.Namespace:
251251
"--verbose", action="store_true",
252252
help="increase output verbosity",
253253
)
254+
parser.add_argument(
255+
"--dry-run", action="store_true",
256+
help="only print out what will be done, without writing any new files",
257+
)
254258
parser.add_argument(
255259
"--base", type=Path, required=True,
256260
help="directory containing base model file",
@@ -300,6 +304,12 @@ def parse_args() -> argparse.Namespace:
300304
# load base model
301305
logger.info(f"Loading base model: {dir_base_model.name}")
302306
hparams = Model.load_hparams(dir_base_model)
307+
308+
with open(lora_config, "r") as f:
309+
lparams: dict[str, Any] = json.load(f)
310+
311+
alpha: float = lparams["lora_alpha"]
312+
303313
with torch.inference_mode():
304314
try:
305315
model_class = Model.from_model_architecture(hparams["architectures"][0])
@@ -310,6 +320,14 @@ def parse_args() -> argparse.Namespace:
310320
class LoraModel(model_class):
311321
model_arch = model_class.model_arch
312322

323+
def set_type(self):
324+
self.gguf_writer.add_type(gguf.GGUFType.ADAPTER)
325+
self.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")
326+
327+
def set_gguf_parameters(self):
328+
self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, float(alpha))
329+
super().set_gguf_parameters()
330+
313331
def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
314332
tensor_map: dict[str, PartialLoraTensor] = {}
315333

@@ -357,18 +375,9 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
357375
is_big_endian=args.bigendian,
358376
use_temp_file=False,
359377
eager=args.no_lazy,
360-
model_name=None,
378+
dry_run=args.dry_run,
361379
)
362380

363-
with open(lora_config, "r") as f:
364-
lparams: dict[str, Any] = json.load(f)
365-
366-
alpha = lparams["lora_alpha"]
367-
368-
model_instance.gguf_writer.add_string(gguf.Keys.General.TYPE, gguf.GGUFType.ADAPTER)
369-
model_instance.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")
370-
model_instance.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, float(alpha))
371-
model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
372381
logger.info("Exporting model...")
373382
model_instance.write()
374383
logger.info(f"Model successfully exported to {model_instance.fname_out}")

examples/convert_legacy_llama.py

+128-107
Large diffs are not rendered by default.

gguf-py/README.md

+8
Original file line numberDiff line numberDiff line change
@@ -78,5 +78,13 @@ python -m build
7878
python -m twine upload dist/*
7979
```
8080

81+
## Run Unit Tests
82+
83+
From root of this repository you can run this command to run all the unit tests
84+
85+
```bash
86+
python -m unittest discover ./gguf-py -v
87+
```
88+
8189
## TODO
8290
- [ ] Include conversion scripts as command line entry points in this package.

gguf-py/gguf/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@
55
from .quants import *
66
from .tensor_mapping import *
77
from .vocab import *
8+
from .utility import *
9+
from .metadata import *

gguf-py/gguf/constants.py

+54-14
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,60 @@
1919

2020
class Keys:
2121
class General:
22-
TYPE = "general.type"
23-
ARCHITECTURE = "general.architecture"
24-
QUANTIZATION_VERSION = "general.quantization_version"
25-
ALIGNMENT = "general.alignment"
26-
NAME = "general.name"
27-
AUTHOR = "general.author"
28-
VERSION = "general.version"
29-
URL = "general.url"
30-
DESCRIPTION = "general.description"
31-
LICENSE = "general.license"
32-
SOURCE_URL = "general.source.url"
33-
SOURCE_HF_REPO = "general.source.huggingface.repository"
34-
FILE_TYPE = "general.file_type"
22+
TYPE = "general.type"
23+
ARCHITECTURE = "general.architecture"
24+
QUANTIZATION_VERSION = "general.quantization_version"
25+
ALIGNMENT = "general.alignment"
26+
FILE_TYPE = "general.file_type"
27+
28+
# Authorship Metadata
29+
NAME = "general.name"
30+
AUTHOR = "general.author"
31+
VERSION = "general.version"
32+
ORGANIZATION = "general.organization"
33+
34+
FINETUNE = "general.finetune"
35+
BASENAME = "general.basename"
36+
37+
DESCRIPTION = "general.description"
38+
QUANTIZED_BY = "general.quantized_by"
39+
40+
SIZE_LABEL = "general.size_label"
41+
42+
# Licensing details
43+
LICENSE = "general.license"
44+
LICENSE_NAME = "general.license.name"
45+
LICENSE_LINK = "general.license.link"
46+
47+
# Typically represents the converted GGUF repo (Unless native)
48+
URL = "general.url" # Model Website/Paper
49+
DOI = "general.doi"
50+
UUID = "general.uuid"
51+
REPO_URL = "general.repo_url" # Model Source Repository (git/svn/etc...)
52+
53+
# Model Source during conversion
54+
SOURCE_URL = "general.source.url" # Model Website/Paper
55+
SOURCE_DOI = "general.source.doi"
56+
SOURCE_UUID = "general.source.uuid"
57+
SOURCE_REPO_URL = "general.source.repo_url" # Model Source Repository (git/svn/etc...)
58+
59+
# Base Model Source. There can be more than one source if it's a merged
60+
# model like with 'Mistral-7B-Merge-14-v0.1'. This will assist in
61+
# tracing linage of models as it is finetuned or merged over time.
62+
BASE_MODEL_COUNT = "general.base_model.count"
63+
BASE_MODEL_NAME = "general.base_model.{id}.name"
64+
BASE_MODEL_AUTHOR = "general.base_model.{id}.author"
65+
BASE_MODEL_VERSION = "general.base_model.{id}.version"
66+
BASE_MODEL_ORGANIZATION = "general.base_model.{id}.organization"
67+
BASE_MODEL_URL = "general.base_model.{id}.url" # Model Website/Paper
68+
BASE_MODEL_DOI = "general.base_model.{id}.doi"
69+
BASE_MODEL_UUID = "general.base_model.{id}.uuid"
70+
BASE_MODEL_REPO_URL = "general.base_model.{id}.repo_url" # Model Source Repository (git/svn/etc...)
71+
72+
# Array based KV stores
73+
TAGS = "general.tags"
74+
LANGUAGES = "general.languages"
75+
DATASETS = "general.datasets"
3576

3677
class LLM:
3778
VOCAB_SIZE = "{arch}.vocab_size"
@@ -1233,7 +1274,6 @@ def get_type(val: Any) -> GGUFValueType:
12331274
KEY_GENERAL_DESCRIPTION = Keys.General.DESCRIPTION
12341275
KEY_GENERAL_LICENSE = Keys.General.LICENSE
12351276
KEY_GENERAL_SOURCE_URL = Keys.General.SOURCE_URL
1236-
KEY_GENERAL_SOURCE_HF_REPO = Keys.General.SOURCE_HF_REPO
12371277
KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE
12381278

12391279
# LLM

gguf-py/gguf/gguf_writer.py

+140-18
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import tempfile
88
from dataclasses import dataclass
99
from enum import Enum, auto
10+
from math import prod
1011
from pathlib import Path
1112
from io import BufferedWriter
1213
from typing import IO, Any, Sequence, Mapping
@@ -106,6 +107,53 @@ def __init__(
106107

107108
self.add_architecture()
108109

110+
def get_total_parameter_count(self) -> tuple[int, int, int, int]:
111+
total_params = 0
112+
shared_params = 0
113+
expert_params = 0
114+
115+
expert_sum = 0
116+
n_expert_tensors = 0
117+
118+
last_lora_a: tuple[str, TensorInfo] | None = None
119+
120+
for tensors in self.tensors:
121+
for name, info in tensors.items():
122+
123+
shape = info.shape
124+
125+
if name.endswith(".lora_a"):
126+
last_lora_a = (name, info)
127+
continue
128+
elif name.endswith(".lora_b"):
129+
if last_lora_a is None or last_lora_a[0] != name[:-1] + "a":
130+
# Bail when the LoRA pair can't be found trivially
131+
logger.warning("can't measure LoRA size correctly, tensor order is unusual")
132+
return 0, 0, 0, 0
133+
else:
134+
shape = (*shape[:-1], last_lora_a[1].shape[-1])
135+
136+
size = prod(shape)
137+
138+
if "_exps." in name:
139+
expert_params += (size // shape[-3])
140+
expert_sum += shape[-3]
141+
n_expert_tensors += 1
142+
else:
143+
shared_params += size
144+
145+
total_params += size
146+
147+
# Hopefully this should work even for variable-expert-count models
148+
expert_count = (expert_sum // n_expert_tensors) if n_expert_tensors > 0 else 0
149+
150+
# Negate the total to signal it's likely not exact
151+
if last_lora_a is not None:
152+
total_params = -total_params
153+
154+
# NOTE: keep the output in the same order as accepted by 'size_label' in gguf-py/gguf/utility.py
155+
return total_params, shared_params, expert_params, expert_count
156+
109157
def format_shard_names(self, path: Path) -> list[Path]:
110158
if len(self.tensors) == 1:
111159
return [path]
@@ -115,6 +163,7 @@ def open_output_file(self, path: Path | None = None) -> None:
115163
if self.state is WriterState.EMPTY and self.fout is not None and (path is None or path == self.path):
116164
# allow calling this multiple times as long as the path is the same
117165
return
166+
118167
if self.state is not WriterState.NO_FILE:
119168
raise ValueError(f'Expected output file to be not yet opened, got {self.state}')
120169

@@ -136,6 +185,8 @@ def print_plan(self) -> list[Path]:
136185

137186
if self.dry_run:
138187
logger.info("Dry run, not writing files")
188+
for name in filenames:
189+
print(name) # noqa: NP100
139190
exit()
140191

141192
return filenames
@@ -430,43 +481,114 @@ def add_type(self, type_name: str) -> None:
430481
def add_architecture(self) -> None:
431482
self.add_string(Keys.General.ARCHITECTURE, self.arch)
432483

484+
def add_quantization_version(self, quantization_version: int) -> None:
485+
self.add_uint32(Keys.General.QUANTIZATION_VERSION, quantization_version)
486+
487+
def add_custom_alignment(self, alignment: int) -> None:
488+
self.data_alignment = alignment
489+
self.add_uint32(Keys.General.ALIGNMENT, alignment)
490+
491+
def add_file_type(self, ftype: int) -> None:
492+
self.add_uint32(Keys.General.FILE_TYPE, ftype)
493+
494+
def add_name(self, name: str) -> None:
495+
self.add_string(Keys.General.NAME, name)
496+
433497
def add_author(self, author: str) -> None:
434498
self.add_string(Keys.General.AUTHOR, author)
435499

436500
def add_version(self, version: str) -> None:
437501
self.add_string(Keys.General.VERSION, version)
438502

439-
def add_tensor_data_layout(self, layout: str) -> None:
440-
self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
503+
def add_organization(self, organization: str) -> None:
504+
self.add_string(Keys.General.ORGANIZATION, organization)
441505

442-
def add_url(self, url: str) -> None:
443-
self.add_string(Keys.General.URL, url)
506+
def add_finetune(self, finetune: str) -> None:
507+
self.add_string(Keys.General.FINETUNE, finetune)
508+
509+
def add_basename(self, basename: str) -> None:
510+
self.add_string(Keys.General.BASENAME, basename)
444511

445512
def add_description(self, description: str) -> None:
446513
self.add_string(Keys.General.DESCRIPTION, description)
447514

448-
def add_licence(self, licence: str) -> None:
449-
self.add_string(Keys.General.LICENSE, licence)
515+
def add_quantized_by(self, quantized: str) -> None:
516+
self.add_string(Keys.General.QUANTIZED_BY, quantized)
517+
518+
def add_size_label(self, size_label: str) -> None:
519+
self.add_string(Keys.General.SIZE_LABEL, size_label)
520+
521+
def add_license(self, license: str) -> None:
522+
self.add_string(Keys.General.LICENSE, license)
523+
524+
def add_license_name(self, license: str) -> None:
525+
self.add_string(Keys.General.LICENSE_NAME, license)
526+
527+
def add_license_link(self, license: str) -> None:
528+
self.add_string(Keys.General.LICENSE_LINK, license)
529+
530+
def add_url(self, url: str) -> None:
531+
self.add_string(Keys.General.URL, url)
532+
533+
def add_doi(self, doi: str) -> None:
534+
self.add_string(Keys.General.DOI, doi)
535+
536+
def add_uuid(self, uuid: str) -> None:
537+
self.add_string(Keys.General.UUID, uuid)
538+
539+
def add_repo_url(self, repo_url: str) -> None:
540+
self.add_string(Keys.General.REPO_URL, repo_url)
450541

451542
def add_source_url(self, url: str) -> None:
452543
self.add_string(Keys.General.SOURCE_URL, url)
453544

454-
def add_source_hf_repo(self, repo: str) -> None:
455-
self.add_string(Keys.General.SOURCE_HF_REPO, repo)
545+
def add_source_doi(self, doi: str) -> None:
546+
self.add_string(Keys.General.SOURCE_DOI, doi)
456547

457-
def add_file_type(self, ftype: int) -> None:
458-
self.add_uint32(Keys.General.FILE_TYPE, ftype)
548+
def add_source_uuid(self, uuid: str) -> None:
549+
self.add_string(Keys.General.SOURCE_UUID, uuid)
459550

460-
def add_name(self, name: str) -> None:
461-
self.add_string(Keys.General.NAME, name)
551+
def add_source_repo_url(self, repo_url: str) -> None:
552+
self.add_string(Keys.General.SOURCE_REPO_URL, repo_url)
462553

463-
def add_quantization_version(self, quantization_version: int) -> None:
464-
self.add_uint32(
465-
Keys.General.QUANTIZATION_VERSION, quantization_version)
554+
def add_base_model_count(self, source_count: int) -> None:
555+
self.add_uint32(Keys.General.BASE_MODEL_COUNT, source_count)
466556

467-
def add_custom_alignment(self, alignment: int) -> None:
468-
self.data_alignment = alignment
469-
self.add_uint32(Keys.General.ALIGNMENT, alignment)
557+
def add_base_model_name(self, source_id: int, name: str) -> None:
558+
self.add_string(Keys.General.BASE_MODEL_NAME.format(id=source_id), name)
559+
560+
def add_base_model_author(self, source_id: int, author: str) -> None:
561+
self.add_string(Keys.General.BASE_MODEL_AUTHOR.format(id=source_id), author)
562+
563+
def add_base_model_version(self, source_id: int, version: str) -> None:
564+
self.add_string(Keys.General.BASE_MODEL_VERSION.format(id=source_id), version)
565+
566+
def add_base_model_organization(self, source_id: int, organization: str) -> None:
567+
self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)
568+
569+
def add_base_model_url(self, source_id: int, url: str) -> None:
570+
self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
571+
572+
def add_base_model_doi(self, source_id: int, doi: str) -> None:
573+
self.add_string(Keys.General.BASE_MODEL_DOI.format(id=source_id), doi)
574+
575+
def add_base_model_uuid(self, source_id: int, uuid: str) -> None:
576+
self.add_string(Keys.General.BASE_MODEL_UUID.format(id=source_id), uuid)
577+
578+
def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None:
579+
self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url)
580+
581+
def add_tags(self, tags: Sequence[str]) -> None:
582+
self.add_array(Keys.General.TAGS, tags)
583+
584+
def add_languages(self, languages: Sequence[str]) -> None:
585+
self.add_array(Keys.General.LANGUAGES, languages)
586+
587+
def add_datasets(self, datasets: Sequence[str]) -> None:
588+
self.add_array(Keys.General.DATASETS, datasets)
589+
590+
def add_tensor_data_layout(self, layout: str) -> None:
591+
self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
470592

471593
def add_vocab_size(self, size: int) -> None:
472594
self.add_uint32(Keys.LLM.VOCAB_SIZE.format(arch=self.arch), size)

0 commit comments

Comments
 (0)