Skip to content

Commit fabf30b

Browse files
authored
llama : remove Persimmon (#7408)
* llama : remove Persimmon * requirements : remove
1 parent 20385ce commit fabf30b

7 files changed

+0
-485
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@ Typically finetunes of the base models below are supported as well.
107107
- [X] [Aquila 1 & 2](https://huggingface.co/models?search=BAAI/Aquila)
108108
- [X] [Starcoder models](https://github.com/ggerganov/llama.cpp/pull/3187)
109109
- [X] [Refact](https://huggingface.co/smallcloudai/Refact-1_6B-fim)
110-
- [X] [Persimmon 8B](https://github.com/ggerganov/llama.cpp/pull/3410)
111110
- [X] [MPT](https://github.com/ggerganov/llama.cpp/pull/3417)
112111
- [X] [Bloom](https://github.com/ggerganov/llama.cpp/pull/3553)
113112
- [x] [Yi models](https://huggingface.co/models?search=01-ai/Yi)

convert-hf-to-gguf.py

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1148,45 +1148,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
11481148
return tensors
11491149

11501150

1151-
@Model.register("PersimmonForCausalLM")
1152-
class PersimmonModel(Model):
1153-
model_arch = gguf.MODEL_ARCH.PERSIMMON
1154-
1155-
def set_gguf_parameters(self):
1156-
block_count = self.hparams.get("num_layers", self.hparams.get("num_hidden_layers"))
1157-
head_count = self.hparams["num_attention_heads"]
1158-
head_count_kv = head_count
1159-
hidden_size = self.hparams["hidden_size"]
1160-
1161-
self.gguf_writer.add_name('persimmon-8b-chat')
1162-
self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
1163-
self.gguf_writer.add_embedding_length(hidden_size)
1164-
self.gguf_writer.add_block_count(block_count)
1165-
self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
1166-
1167-
# NOTE: not sure about this change - why does the model not have a rope dimension count when it is smaller
1168-
# than the head size?
1169-
# ref: https://github.com/ggerganov/llama.cpp/pull/4889
1170-
# self.gguf_writer.add_rope_dimension_count(hidden_size // head_count)
1171-
self.gguf_writer.add_rope_dimension_count(hidden_size // head_count // 2)
1172-
1173-
self.gguf_writer.add_head_count(head_count)
1174-
self.gguf_writer.add_head_count_kv(head_count_kv)
1175-
self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"])
1176-
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_eps"])
1177-
1178-
def set_vocab(self):
1179-
self._set_vocab_sentencepiece()
1180-
# self.gguf_writer.add_bos_token_id(71013)
1181-
# self.gguf_writer.add_eos_token_id(71013)
1182-
1183-
def extra_f32_tensors(self, name: str, new_name: str, bid: int | None, n_dims: int) -> bool:
1184-
del name, new_name, bid, n_dims # unused
1185-
1186-
# TODO: FP16 conversion produces garbage outputs. (Q8_0 does not, so..?)
1187-
return True
1188-
1189-
11901151
@Model.register("StableLmForCausalLM", "StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM")
11911152
class StableLMModel(Model):
11921153
model_arch = gguf.MODEL_ARCH.STABLELM

convert-persimmon-to-gguf.py

Lines changed: 0 additions & 143 deletions
This file was deleted.

gguf-py/gguf/constants.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,6 @@ class MODEL_ARCH(IntEnum):
115115
GPTNEOX = auto()
116116
MPT = auto()
117117
STARCODER = auto()
118-
PERSIMMON = auto()
119118
REFACT = auto()
120119
BERT = auto()
121120
NOMIC_BERT = auto()
@@ -193,7 +192,6 @@ class MODEL_TENSOR(IntEnum):
193192
MODEL_ARCH.GPTNEOX: "gptneox",
194193
MODEL_ARCH.MPT: "mpt",
195194
MODEL_ARCH.STARCODER: "starcoder",
196-
MODEL_ARCH.PERSIMMON: "persimmon",
197195
MODEL_ARCH.REFACT: "refact",
198196
MODEL_ARCH.BERT: "bert",
199197
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
@@ -426,20 +424,6 @@ class MODEL_TENSOR(IntEnum):
426424
MODEL_TENSOR.FFN_DOWN,
427425
MODEL_TENSOR.FFN_UP,
428426
],
429-
MODEL_ARCH.PERSIMMON: [
430-
MODEL_TENSOR.TOKEN_EMBD,
431-
MODEL_TENSOR.OUTPUT,
432-
MODEL_TENSOR.OUTPUT_NORM,
433-
MODEL_TENSOR.ATTN_NORM,
434-
MODEL_TENSOR.ATTN_QKV,
435-
MODEL_TENSOR.ATTN_OUT,
436-
MODEL_TENSOR.FFN_NORM,
437-
MODEL_TENSOR.FFN_DOWN,
438-
MODEL_TENSOR.FFN_UP,
439-
MODEL_TENSOR.ATTN_Q_NORM,
440-
MODEL_TENSOR.ATTN_K_NORM,
441-
MODEL_TENSOR.ATTN_ROT_EMBD,
442-
],
443427
MODEL_ARCH.REFACT: [
444428
MODEL_TENSOR.TOKEN_EMBD,
445429
MODEL_TENSOR.OUTPUT_NORM,
@@ -756,9 +740,6 @@ class MODEL_TENSOR(IntEnum):
756740
MODEL_TENSOR.ROPE_FREQS,
757741
MODEL_TENSOR.ATTN_ROT_EMBD,
758742
],
759-
MODEL_ARCH.PERSIMMON: [
760-
MODEL_TENSOR.ROPE_FREQS,
761-
],
762743
MODEL_ARCH.QWEN: [
763744
MODEL_TENSOR.ROPE_FREQS,
764745
MODEL_TENSOR.ATTN_ROT_EMBD,

0 commit comments

Comments
 (0)