Skip to content

Commit 893c091

Browse files
committed
add plamo-13b implementation
1 parent abb29f1 commit 893c091

File tree

2 files changed

+531
-25
lines changed

2 files changed

+531
-25
lines changed

gguf-py/gguf/gguf.py

+51-25
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ class MODEL_ARCH(IntEnum):
8888
PERSIMMON : int = auto()
8989
REFACT : int = auto()
9090
BERT : int = auto()
91+
PLAMO : int = auto()
9192

9293

9394
class MODEL_TENSOR(IntEnum):
@@ -125,6 +126,7 @@ class MODEL_TENSOR(IntEnum):
125126
MODEL_ARCH.PERSIMMON: "persimmon",
126127
MODEL_ARCH.REFACT: "refact",
127128
MODEL_ARCH.BERT: "bert",
129+
MODEL_ARCH.PLAMO: "plamo",
128130
}
129131

130132
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
@@ -282,6 +284,21 @@ class MODEL_TENSOR(IntEnum):
282284
MODEL_TENSOR.FFN_DOWN,
283285
MODEL_TENSOR.FFN_UP,
284286
],
287+
MODEL_ARCH.PLAMO: [
288+
MODEL_TENSOR.TOKEN_EMBD,
289+
MODEL_TENSOR.OUTPUT_NORM,
290+
MODEL_TENSOR.OUTPUT,
291+
MODEL_TENSOR.ROPE_FREQS,
292+
MODEL_TENSOR.ATTN_NORM,
293+
MODEL_TENSOR.ATTN_Q,
294+
MODEL_TENSOR.ATTN_K,
295+
MODEL_TENSOR.ATTN_V,
296+
MODEL_TENSOR.ATTN_OUT,
297+
MODEL_TENSOR.ATTN_ROT_EMBD,
298+
MODEL_TENSOR.FFN_GATE,
299+
MODEL_TENSOR.FFN_DOWN,
300+
MODEL_TENSOR.FFN_UP,
301+
],
285302
MODEL_ARCH.GPT2: [
286303
# TODO
287304
],
@@ -366,6 +383,7 @@ class TensorNameMap:
366383
"layers.{bid}.attention_norm", # llama-pth
367384
"encoder.layer.{bid}.attention.output.LayerNorm", # bert
368385
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
386+
"model.layers.layers.{bid}.norm", # plamo
369387
),
370388

371389
# Attention norm 2
@@ -384,45 +402,50 @@ class TensorNameMap:
384402

385403
# Attention query
386404
MODEL_TENSOR.ATTN_Q: (
387-
"model.layers.{bid}.self_attn.q_proj", # llama-hf
388-
"layers.{bid}.attention.wq", # llama-pth
389-
"encoder.layer.{bid}.attention.self.query", # bert
390-
"transformer.h.{bid}.attn.q_proj", # gpt-j
405+
"model.layers.{bid}.self_attn.q_proj", # llama-hf
406+
"layers.{bid}.attention.wq", # llama-pth
407+
"encoder.layer.{bid}.attention.self.query", # bert
408+
"transformer.h.{bid}.attn.q_proj", # gpt-j
409+
"model.layers.layers.{bid}.self_attn.q_proj", # plamo
391410
),
392411

393412
# Attention key
394413
MODEL_TENSOR.ATTN_K: (
395-
"model.layers.{bid}.self_attn.k_proj", # llama-hf
396-
"layers.{bid}.attention.wk", # llama-pth
397-
"encoder.layer.{bid}.attention.self.key", # bert
398-
"transformer.h.{bid}.attn.k_proj", # gpt-j
414+
"model.layers.{bid}.self_attn.k_proj", # llama-hf
415+
"layers.{bid}.attention.wk", # llama-pth
416+
"encoder.layer.{bid}.attention.self.key", # bert
417+
"transformer.h.{bid}.attn.k_proj", # gpt-j
418+
"model.layers.layers.{bid}.self_attn.k_proj", # plamo
399419
),
400420

401421
# Attention value
402422
MODEL_TENSOR.ATTN_V: (
403-
"model.layers.{bid}.self_attn.v_proj", # llama-hf
404-
"layers.{bid}.attention.wv", # llama-pth
405-
"encoder.layer.{bid}.attention.self.value", # bert
406-
"transformer.h.{bid}.attn.v_proj", # gpt-j
423+
"model.layers.{bid}.self_attn.v_proj", # llama-hf
424+
"layers.{bid}.attention.wv", # llama-pth
425+
"encoder.layer.{bid}.attention.self.value", # bert
426+
"transformer.h.{bid}.attn.v_proj", # gpt-j
427+
"model.layers.layers.{bid}.self_attn.v_proj", # plamo
407428
),
408429

409430
# Attention output
410431
MODEL_TENSOR.ATTN_OUT: (
411-
"gpt_neox.layers.{bid}.attention.dense", # gptneox
412-
"transformer.h.{bid}.attn.c_proj", # gpt2 refact
413-
"transformer.blocks.{bid}.attn.out_proj", # mpt
414-
"transformer.h.{bid}.self_attention.dense", # falcon
415-
"model.layers.{bid}.self_attn.o_proj", # llama-hf
416-
"layers.{bid}.attention.wo", # llama-pth
417-
"encoder.layer.{bid}.attention.output.dense", # bert
418-
"transformer.h.{bid}.attn.out_proj", # gpt-j
419-
"language_model.encoder.layers.{bid}.self_attention.dense" # persimmon
432+
"gpt_neox.layers.{bid}.attention.dense", # gptneox
433+
"transformer.h.{bid}.attn.c_proj", # gpt2 refact
434+
"transformer.blocks.{bid}.attn.out_proj", # mpt
435+
"transformer.h.{bid}.self_attention.dense", # falcon
436+
"model.layers.{bid}.self_attn.o_proj", # llama-hf
437+
"layers.{bid}.attention.wo", # llama-pth
438+
"encoder.layer.{bid}.attention.output.dense", # bert
439+
"transformer.h.{bid}.attn.out_proj", # gpt-j
440+
"language_model.encoder.layers.{bid}.self_attention.dense", # persimmon
441+
"model.layers.layers.{bid}.self_attn.o_proj", # plamo
420442
),
421443

422444
# Rotary embeddings
423445
MODEL_TENSOR.ATTN_ROT_EMBD: (
424-
"model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
425-
"layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth
446+
"model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
447+
"layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth
448+
"model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq", # plamo
426449
),
427450

428451
# Feed-forward norm
@@ -447,12 +470,14 @@ class TensorNameMap:
447470
"encoder.layer.{bid}.intermediate.dense", # bert
448471
"transformer.h.{bid}.mlp.fc_in", # gpt-j
449472
"language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
473+
"model.layers.layers.{bid}.mlp.up_proj", # plamo
450474
),
451475

452476
# Feed-forward gate
453477
MODEL_TENSOR.FFN_GATE: (
454-
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
455-
"layers.{bid}.feed_forward.w1", # llama-pth
478+
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
479+
"layers.{bid}.feed_forward.w1", # llama-pth
480+
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
456481
),
457482

458483
# Feed-forward down
@@ -466,6 +491,7 @@ class TensorNameMap:
466491
"encoder.layer.{bid}.output.dense", # bert
467492
"transformer.h.{bid}.mlp.fc_out", # gpt-j
468493
"language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon
494+
"model.layers.layers.{bid}.mlp.down_proj", # plamo
469495
),
470496

471497
MODEL_TENSOR.ATTN_Q_NORM: (

0 commit comments

Comments
 (0)