Skip to content

Commit 6566d17

Browse files
pcuencacebtenzzre
authored andcommitted
convert : allow conversion of Mistral HF models (ggml-org#6144)
* Allow conversion of Mistral HF models * Homogenize Llama, Mistral, Mixtral under the same entry. * Fix tokenizer, permute tensors * Use sentencepiece tokenizer, or fall back to hfft. * convert-hf : small fix for mypy * convert-hf : fix duplicated block_count * convert-hf : add vocab size to metadata --------- Co-authored-by: Jared Van Bortel <[email protected]>
1 parent 11f290f commit 6566d17

File tree

1 file changed

+64
-4
lines changed

1 file changed

+64
-4
lines changed

convert-hf-to-gguf.py

+64-4
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
2424
import gguf
2525

26-
from convert import LlamaHfVocab
26+
from convert import LlamaHfVocab, permute
2727

2828

2929
###### MODEL DEFINITIONS ######
@@ -1052,12 +1052,72 @@ def set_gguf_parameters(self):
10521052
self.gguf_writer.add_layer_norm_eps(self.find_hparam(["layer_norm_eps", "norm_eps"]))
10531053

10541054

1055-
@Model.register("MixtralForCausalLM")
1056-
class MixtralModel(Model):
1055+
@Model.register("LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM")
1056+
class LlamaModel(Model):
10571057
model_arch = gguf.MODEL_ARCH.LLAMA
10581058

10591059
def set_vocab(self):
1060-
self._set_vocab_sentencepiece()
1060+
try:
1061+
self. _set_vocab_sentencepiece()
1062+
except FileNotFoundError:
1063+
self._set_vocab_llama_hf()
1064+
1065+
def set_gguf_parameters(self):
1066+
super().set_gguf_parameters()
1067+
hparams = self.hparams
1068+
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
1069+
self.gguf_writer.add_rope_dimension_count(hparams["hidden_size"] // hparams["num_attention_heads"])
1070+
1071+
# Same as super class, but permuting q_proj, k_proj
1072+
def write_tensors(self):
1073+
block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer")))
1074+
tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count)
1075+
n_head = self.hparams.get("num_attention_heads")
1076+
n_kv_head = self.hparams.get("num_key_value_heads")
1077+
for name, data_torch in self.get_tensors():
1078+
# we don't need these
1079+
if name.endswith((".attention.masked_bias", ".attention.bias", ".attention.rotary_emb.inv_freq")):
1080+
continue
1081+
1082+
old_dtype = data_torch.dtype
1083+
1084+
# convert any unsupported data types to float32
1085+
if data_torch.dtype not in (torch.float16, torch.float32):
1086+
data_torch = data_torch.to(torch.float32)
1087+
1088+
data = data_torch.numpy()
1089+
1090+
if name.endswith("q_proj.weight"):
1091+
data = permute(data, n_head, n_head)
1092+
if name.endswith("k_proj.weight"):
1093+
data = permute(data, n_head, n_kv_head)
1094+
1095+
data = data.squeeze()
1096+
1097+
# map tensor names
1098+
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
1099+
if new_name is None:
1100+
print(f"Can not map tensor {name!r}")
1101+
sys.exit()
1102+
1103+
n_dims = len(data.shape)
1104+
data_dtype = data.dtype
1105+
1106+
# if f32 desired, convert any float16 to float32
1107+
if self.ftype == 0 and data_dtype == np.float16:
1108+
data = data.astype(np.float32)
1109+
1110+
# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
1111+
if self.ftype == 1 and data_dtype == np.float16 and n_dims == 1:
1112+
data = data.astype(np.float32)
1113+
1114+
# if f16 desired, convert any float32 2-dim weight tensors to float16
1115+
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
1116+
data = data.astype(np.float16)
1117+
1118+
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
1119+
1120+
self.gguf_writer.add_tensor(new_name, data)
10611121

10621122

10631123
@Model.register("GrokForCausalLM")

0 commit comments

Comments
 (0)