|
23 | 23 | sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
|
24 | 24 | import gguf
|
25 | 25 |
|
26 |
| -from convert import LlamaHfVocab |
| 26 | +from convert import LlamaHfVocab, permute |
27 | 27 |
|
28 | 28 |
|
29 | 29 | ###### MODEL DEFINITIONS ######
|
@@ -1052,12 +1052,72 @@ def set_gguf_parameters(self):
|
1052 | 1052 | self.gguf_writer.add_layer_norm_eps(self.find_hparam(["layer_norm_eps", "norm_eps"]))
|
1053 | 1053 |
|
1054 | 1054 |
|
1055 |
| -@Model.register("MixtralForCausalLM") |
1056 |
| -class MixtralModel(Model): |
| 1055 | +@Model.register("LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM") |
| 1056 | +class LlamaModel(Model): |
1057 | 1057 | model_arch = gguf.MODEL_ARCH.LLAMA
|
1058 | 1058 |
|
1059 | 1059 | def set_vocab(self):
|
1060 |
| - self._set_vocab_sentencepiece() |
| 1060 | + try: |
| 1061 | + self. _set_vocab_sentencepiece() |
| 1062 | + except FileNotFoundError: |
| 1063 | + self._set_vocab_llama_hf() |
| 1064 | + |
| 1065 | + def set_gguf_parameters(self): |
| 1066 | + super().set_gguf_parameters() |
| 1067 | + hparams = self.hparams |
| 1068 | + self.gguf_writer.add_vocab_size(hparams["vocab_size"]) |
| 1069 | + self.gguf_writer.add_rope_dimension_count(hparams["hidden_size"] // hparams["num_attention_heads"]) |
| 1070 | + |
| 1071 | + # Same as super class, but permuting q_proj, k_proj |
| 1072 | + def write_tensors(self): |
| 1073 | + block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer"))) |
| 1074 | + tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count) |
| 1075 | + n_head = self.hparams.get("num_attention_heads") |
| 1076 | + n_kv_head = self.hparams.get("num_key_value_heads") |
| 1077 | + for name, data_torch in self.get_tensors(): |
| 1078 | + # we don't need these |
| 1079 | + if name.endswith((".attention.masked_bias", ".attention.bias", ".attention.rotary_emb.inv_freq")): |
| 1080 | + continue |
| 1081 | + |
| 1082 | + old_dtype = data_torch.dtype |
| 1083 | + |
| 1084 | + # convert any unsupported data types to float32 |
| 1085 | + if data_torch.dtype not in (torch.float16, torch.float32): |
| 1086 | + data_torch = data_torch.to(torch.float32) |
| 1087 | + |
| 1088 | + data = data_torch.numpy() |
| 1089 | + |
| 1090 | + if name.endswith("q_proj.weight"): |
| 1091 | + data = permute(data, n_head, n_head) |
| 1092 | + if name.endswith("k_proj.weight"): |
| 1093 | + data = permute(data, n_head, n_kv_head) |
| 1094 | + |
| 1095 | + data = data.squeeze() |
| 1096 | + |
| 1097 | + # map tensor names |
| 1098 | + new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) |
| 1099 | + if new_name is None: |
| 1100 | + print(f"Can not map tensor {name!r}") |
| 1101 | + sys.exit() |
| 1102 | + |
| 1103 | + n_dims = len(data.shape) |
| 1104 | + data_dtype = data.dtype |
| 1105 | + |
| 1106 | + # if f32 desired, convert any float16 to float32 |
| 1107 | + if self.ftype == 0 and data_dtype == np.float16: |
| 1108 | + data = data.astype(np.float32) |
| 1109 | + |
| 1110 | + # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32 |
| 1111 | + if self.ftype == 1 and data_dtype == np.float16 and n_dims == 1: |
| 1112 | + data = data.astype(np.float32) |
| 1113 | + |
| 1114 | + # if f16 desired, convert any float32 2-dim weight tensors to float16 |
| 1115 | + if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: |
| 1116 | + data = data.astype(np.float16) |
| 1117 | + |
| 1118 | + print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") |
| 1119 | + |
| 1120 | + self.gguf_writer.add_tensor(new_name, data) |
1061 | 1121 |
|
1062 | 1122 |
|
1063 | 1123 | @Model.register("GrokForCausalLM")
|
|
0 commit comments