Skip to content

Commit 65dfe23

Browse files
committed
convert-hf-to-gguf.py: add --get-outfile command and refactor
1 parent 9b82476 commit 65dfe23

File tree

4 files changed

+114
-52
lines changed

4 files changed

+114
-52
lines changed

convert-hf-to-gguf.py

+59-10
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ class Model:
6060
tensor_map: gguf.TensorNameMap
6161
tensor_names: set[str] | None
6262
fname_out: Path
63+
fname_default: Path
6364
gguf_writer: gguf.GGUFWriter
6465

6566
# subclasses should define this!
@@ -93,8 +94,27 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
9394
self.ftype = gguf.LlamaFileType.MOSTLY_BF16
9495
ftype_up: str = self.ftype.name.partition("_")[2].upper()
9596
ftype_lw: str = ftype_up.lower()
97+
98+
# Generate default filename based on model specification and available metadata
99+
version_string = None # TODO: Add metadata support
100+
expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
101+
encodingScheme = {
102+
gguf.LlamaFileType.ALL_F32 : "F32",
103+
gguf.LlamaFileType.MOSTLY_F16 : "F16",
104+
gguf.LlamaFileType.MOSTLY_BF16 : "BF16",
105+
gguf.LlamaFileType.MOSTLY_Q8_0 : "Q8_0",
106+
}[self.ftype]
107+
self.fname_default = f"{gguf.naming_convention(dir_model.name, version_string, expert_count, self.parameter_count(), encodingScheme)}"
108+
109+
# Filename Output
110+
if fname_out is not None:
111+
# custom defined filename and path was provided
112+
self.fname_out = fname_out
113+
else:
114+
# output in the same directory as the model by default
115+
self.fname_out = dir_model.parent / self.fname_default
116+
96117
# allow templating the file name with the output ftype, useful with the "auto" ftype
97-
self.fname_out = fname_out.parent / fname_out.name.format(ftype_lw, outtype=ftype_lw, ftype=ftype_lw, OUTTYPE=ftype_up, FTYPE=ftype_up)
98118
self.gguf_writer = gguf.GGUFWriter(self.fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file)
99119

100120
@classmethod
@@ -240,6 +260,25 @@ def extra_f16_tensors(self, name: str, new_name: str, bid: int | None, n_dims: i
240260

241261
return False
242262

263+
def parameter_count(self):
264+
total_model_parameters = 0
265+
for name, data_torch in self.get_tensors():
266+
# Got A Tensor
267+
268+
# We don't need these
269+
if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
270+
continue
271+
272+
# Calculate Tensor Volume
273+
sum_weights_in_tensor = 1
274+
for dim in data_torch.shape:
275+
sum_weights_in_tensor *= dim
276+
277+
# Add Tensor Volume To Running Count
278+
total_model_parameters += sum_weights_in_tensor
279+
280+
return total_model_parameters
281+
243282
def write_tensors(self):
244283
max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,")
245284

@@ -2551,14 +2590,24 @@ def parse_args() -> argparse.Namespace:
25512590
"--verbose", action="store_true",
25522591
help="increase output verbosity",
25532592
)
2593+
parser.add_argument(
2594+
"--get-outfile", action="store_true",
2595+
help="get calculated default outfile name"
2596+
)
25542597

25552598
return parser.parse_args()
25562599

25572600

25582601
def main() -> None:
25592602
args = parse_args()
25602603

2561-
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
2604+
if args.verbose:
2605+
logging.basicConfig(level=logging.DEBUG)
2606+
elif args.get_outfile:
2607+
# Avoid printing anything besides the dump output
2608+
logging.basicConfig(level=logging.WARNING)
2609+
else:
2610+
logging.basicConfig(level=logging.INFO)
25622611

25632612
dir_model = args.model
25642613

@@ -2587,19 +2636,19 @@ def main() -> None:
25872636
"auto": gguf.LlamaFileType.GUESSED,
25882637
}
25892638

2590-
if args.outfile is not None:
2591-
fname_out = args.outfile
2592-
else:
2593-
# output in the same directory as the model by default
2594-
fname_out = dir_model / 'ggml-model-{ftype}.gguf'
2595-
25962639
logger.info(f"Loading model: {dir_model.name}")
25972640

25982641
hparams = Model.load_hparams(dir_model)
25992642

26002643
with torch.inference_mode():
2601-
model_class = Model.from_model_architecture(hparams["architectures"][0])
2602-
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file, args.no_lazy)
2644+
encodingScheme = ftype_map[args.outtype]
2645+
model_architecture = hparams["architectures"][0]
2646+
model_class = Model.from_model_architecture(model_architecture)
2647+
model_instance = model_class(dir_model, encodingScheme, args.outfile, args.bigendian, args.use_temp_file, args.no_lazy)
2648+
2649+
if args.get_outfile:
2650+
print(f"{model_instance.fname_default}") # noqa: NP100
2651+
return
26032652

26042653
logger.info("Set model parameters")
26052654
model_instance.set_gguf_parameters()

convert.py

+19-42
Original file line numberDiff line numberDiff line change
@@ -1320,35 +1320,17 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
13201320

13211321
def model_parameter_count(model: LazyModel) -> int:
13221322
total_model_parameters = 0
1323-
for i, (name, lazy_tensor) in enumerate(model.items()):
1323+
for name, lazy_tensor in model.items():
1324+
# Got A Tensor
13241325
sum_weights_in_tensor = 1
1326+
# Tensor Volume
13251327
for dim in lazy_tensor.shape:
13261328
sum_weights_in_tensor *= dim
1329+
# Add Tensor Volume To Running Count
13271330
total_model_parameters += sum_weights_in_tensor
13281331
return total_model_parameters
13291332

13301333

1331-
def model_parameter_count_rounded_notation(model_params_count: int) -> str:
1332-
if model_params_count > 1e12 :
1333-
# Trillions Of Parameters
1334-
scaled_model_params = model_params_count * 1e-12
1335-
scale_suffix = "T"
1336-
elif model_params_count > 1e9 :
1337-
# Billions Of Parameters
1338-
scaled_model_params = model_params_count * 1e-9
1339-
scale_suffix = "B"
1340-
elif model_params_count > 1e6 :
1341-
# Millions Of Parameters
1342-
scaled_model_params = model_params_count * 1e-6
1343-
scale_suffix = "M"
1344-
else:
1345-
# Thousands Of Parameters
1346-
scaled_model_params = model_params_count * 1e-3
1347-
scale_suffix = "K"
1348-
1349-
return f"{round(scaled_model_params)}{scale_suffix}"
1350-
1351-
13521334
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
13531335
return {name: tensor.astype(output_type.type_for_tensor(name, tensor))
13541336
for (name, tensor) in model.items()}
@@ -1529,29 +1511,24 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) ->
15291511

15301512

15311513
def default_convention_outfile(file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> str:
1532-
quantization = {
1533-
GGMLFileType.AllF32: "F32",
1534-
GGMLFileType.MostlyF16: "F16",
1535-
GGMLFileType.MostlyQ8_0: "Q8_0",
1536-
}[file_type]
1537-
1538-
parameters = model_parameter_count_rounded_notation(model_params_count)
15391514

1540-
expert_count = ""
1541-
if params.n_experts is not None:
1542-
expert_count = f"{params.n_experts}x"
1543-
1544-
version = ""
1545-
if metadata is not None and metadata.version is not None:
1546-
version = f"-{metadata.version}"
1547-
1548-
name = "ggml-model"
1515+
name = None
15491516
if metadata is not None and metadata.name is not None:
15501517
name = metadata.name
15511518
elif params.path_model is not None:
15521519
name = params.path_model.name
15531520

1554-
return f"{name}{version}-{expert_count}{parameters}-{quantization}"
1521+
version = metadata.version if metadata is not None and metadata.version is not None else None
1522+
1523+
expert_count = params.n_experts if params.n_experts is not None else None
1524+
1525+
encodingScheme = {
1526+
GGMLFileType.AllF32: "F32",
1527+
GGMLFileType.MostlyF16: "F16",
1528+
GGMLFileType.MostlyQ8_0: "Q8_0",
1529+
}[file_type]
1530+
1531+
return gguf.naming_convention(name, version, expert_count, model_params_count, encodingScheme)
15551532

15561533

15571534
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> Path:
@@ -1612,9 +1589,9 @@ def main(args_in: list[str] | None = None) -> None:
16121589
if args.get_outfile:
16131590
model_plus = load_some_model(args.model)
16141591
params = Params.load(model_plus)
1615-
model = convert_model_names(model_plus.model, params, args.skip_unknown)
1592+
model = convert_model_names(model_plus.model, params, args.skip_unknown)
16161593
model_params_count = model_parameter_count(model_plus.model)
1617-
ftype = pick_output_type(model, args.outtype)
1594+
ftype = pick_output_type(model, args.outtype)
16181595
print(f"{default_convention_outfile(ftype, params, model_params_count, metadata)}") # noqa: NP100
16191596
return
16201597

@@ -1632,7 +1609,7 @@ def main(args_in: list[str] | None = None) -> None:
16321609
model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None)
16331610

16341611
model_params_count = model_parameter_count(model_plus.model)
1635-
logger.info(f"model parameters count : {model_params_count} ({model_parameter_count_rounded_notation(model_params_count)})")
1612+
logger.info(f"model parameters count : {model_params_count} ({gguf.model_parameter_count_rounded_notation(model_params_count)})")
16361613

16371614
if args.dump:
16381615
do_dump_model(model_plus)

gguf-py/gguf/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@
55
from .quants import *
66
from .tensor_mapping import *
77
from .vocab import *
8+
from .utility import *

gguf-py/gguf/utility.py

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from __future__ import annotations
2+
3+
4+
def model_parameter_count_rounded_notation(model_params_count: int) -> str:
5+
if model_params_count > 1e15 :
6+
# Quadrillion Of Parameters
7+
scaled_model_params = model_params_count * 1e-15
8+
scale_suffix = "Q"
9+
elif model_params_count > 1e12 :
10+
# Trillions Of Parameters
11+
scaled_model_params = model_params_count * 1e-12
12+
scale_suffix = "T"
13+
elif model_params_count > 1e9 :
14+
# Billions Of Parameters
15+
scaled_model_params = model_params_count * 1e-9
16+
scale_suffix = "B"
17+
elif model_params_count > 1e6 :
18+
# Millions Of Parameters
19+
scaled_model_params = model_params_count * 1e-6
20+
scale_suffix = "M"
21+
else:
22+
# Thousands Of Parameters
23+
scaled_model_params = model_params_count * 1e-3
24+
scale_suffix = "K"
25+
return f"{round(scaled_model_params)}{scale_suffix}"
26+
27+
28+
def naming_convention(model_name: str, version_string:str, expert_count_int:int, model_params_count: int, encodingScheme: str) -> str:
29+
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
30+
name = model_name.strip().replace(' ', '-') if model_name is not None else "ggml-model"
31+
version = f"-{version_string}" if version_string is not None else ""
32+
expert_count_chunk = f"{expert_count_int}x" if expert_count_int is not None else ""
33+
parameters = model_parameter_count_rounded_notation(model_params_count)
34+
encodingScheme = encodingScheme.upper()
35+
return f"{name}{version}-{expert_count_chunk}{parameters}-{encodingScheme}"

0 commit comments

Comments
 (0)