Skip to content

Commit 30ce1ed

Browse files
authored
Merge pull request #135 from ggerganov/master
Add support for ArcticForCausalLM (ggml-org#7020)
2 parents 0df0aa8 + fbca2f2 commit 30ce1ed

File tree

4 files changed

+456
-43
lines changed

4 files changed

+456
-43
lines changed

convert-hf-to-gguf.py

+151
Original file line numberDiff line numberDiff line change
@@ -2466,6 +2466,157 @@ def set_vocab(self, *args, **kwargs):
24662466
self.gguf_writer.add_add_eos_token(True)
24672467

24682468

2469+
@Model.register("ArcticForCausalLM")
2470+
class ArcticModel(Model):
2471+
model_arch = gguf.MODEL_ARCH.ARCTIC
2472+
2473+
def set_vocab(self):
2474+
# The reason for using a custom implementation here is that the
2475+
# snowflake-arctic-instruct model redefined tokens 31998 and 31999 from
2476+
# tokenizer.model and used them as BOS and EOS instead of adding new tokens.
2477+
from sentencepiece import SentencePieceProcessor
2478+
2479+
tokenizer_path = self.dir_model / 'tokenizer.model'
2480+
2481+
if not tokenizer_path.is_file():
2482+
logger.error(f'Error: Missing {tokenizer_path}')
2483+
sys.exit(1)
2484+
2485+
# Read the whole vocabulary from the tokenizer.model file
2486+
tokenizer = SentencePieceProcessor()
2487+
tokenizer.LoadFromFile(str(tokenizer_path))
2488+
2489+
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
2490+
2491+
tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
2492+
scores: list[float] = [-10000.0] * vocab_size
2493+
toktypes: list[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size
2494+
2495+
for token_id in range(tokenizer.vocab_size()):
2496+
2497+
piece = tokenizer.IdToPiece(token_id)
2498+
text = piece.encode("utf-8")
2499+
score = tokenizer.GetScore(token_id)
2500+
2501+
toktype = SentencePieceTokenTypes.NORMAL
2502+
if tokenizer.IsUnknown(token_id):
2503+
toktype = SentencePieceTokenTypes.UNKNOWN
2504+
elif tokenizer.IsControl(token_id):
2505+
toktype = SentencePieceTokenTypes.CONTROL
2506+
elif tokenizer.IsUnused(token_id):
2507+
toktype = SentencePieceTokenTypes.UNUSED
2508+
elif tokenizer.IsByte(token_id):
2509+
toktype = SentencePieceTokenTypes.BYTE
2510+
2511+
tokens[token_id] = text
2512+
scores[token_id] = score
2513+
toktypes[token_id] = toktype
2514+
2515+
# Use the added_tokens_decoder field from tokeniser_config.json as the source
2516+
# of information about added/redefined tokens and modify them accordingly.
2517+
tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
2518+
if tokenizer_config_file.is_file():
2519+
with open(tokenizer_config_file, "r", encoding="utf-8") as f:
2520+
tokenizer_config_json = json.load(f)
2521+
2522+
if "added_tokens_decoder" in tokenizer_config_json:
2523+
added_tokens_decoder = tokenizer_config_json["added_tokens_decoder"]
2524+
for token_id, token_json in added_tokens_decoder.items():
2525+
token_id = int(token_id)
2526+
if (token_id >= vocab_size):
2527+
logger.debug(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
2528+
continue
2529+
2530+
token_content = token_json["content"]
2531+
token_type = SentencePieceTokenTypes.USER_DEFINED
2532+
token_score = -10000.0
2533+
2534+
# Map unk_token to UNKNOWN, other special tokens to CONTROL
2535+
# Set the score to 0.0 as in the original tokenizer.model
2536+
if ("special" in token_json) and token_json["special"]:
2537+
if token_content == tokenizer_config_json["unk_token"]:
2538+
token_type = SentencePieceTokenTypes.UNKNOWN
2539+
else:
2540+
token_type = SentencePieceTokenTypes.CONTROL
2541+
token_score = 0.0
2542+
2543+
logger.info(f"Setting added token {token_id} to '{token_content}' (type: {token_type}, score: {token_score:.2f})")
2544+
tokens[token_id] = token_content.encode("utf-8")
2545+
toktypes[token_id] = token_type
2546+
scores[token_id] = token_score
2547+
2548+
self.gguf_writer.add_tokenizer_model("llama")
2549+
self.gguf_writer.add_tokenizer_pre("default")
2550+
self.gguf_writer.add_token_list(tokens)
2551+
self.gguf_writer.add_token_scores(scores)
2552+
self.gguf_writer.add_token_types(toktypes)
2553+
2554+
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
2555+
special_vocab.add_to_gguf(self.gguf_writer)
2556+
2557+
def set_gguf_parameters(self):
2558+
super().set_gguf_parameters()
2559+
hparams = self.hparams
2560+
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
2561+
self.gguf_writer.add_rope_dimension_count(hparams["hidden_size"] // hparams["num_attention_heads"])
2562+
2563+
_experts: list[dict[str, Tensor]] | None = None
2564+
2565+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2566+
n_head = self.hparams["num_attention_heads"]
2567+
n_kv_head = self.hparams.get("num_key_value_heads")
2568+
2569+
if name.endswith("q_proj.weight"):
2570+
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
2571+
if name.endswith("k_proj.weight"):
2572+
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
2573+
2574+
# process the experts separately
2575+
if name.find("block_sparse_moe.experts") != -1:
2576+
n_experts = self.hparams["num_local_experts"]
2577+
2578+
assert bid is not None
2579+
2580+
if self._experts is None:
2581+
self._experts = [{} for _ in range(self.block_count)]
2582+
2583+
self._experts[bid][name] = data_torch
2584+
2585+
if len(self._experts[bid]) >= n_experts * 3:
2586+
tensors: list[tuple[str, Tensor]] = []
2587+
2588+
# merge the experts into a single 3d tensor
2589+
for wid in ["w1", "w2", "w3"]:
2590+
datas: list[Tensor] = []
2591+
2592+
for xid in range(n_experts):
2593+
ename = f"model.layers.{bid}.block_sparse_moe.experts.{xid}.{wid}.weight"
2594+
datas.append(self._experts[bid][ename])
2595+
del self._experts[bid][ename]
2596+
2597+
data_torch = torch.stack(datas, dim=0)
2598+
2599+
merged_name = f"layers.{bid}.feed_forward.experts.{wid}.weight"
2600+
2601+
new_name = self.map_tensor_name(merged_name)
2602+
2603+
tensors.append((new_name, data_torch))
2604+
return tensors
2605+
else:
2606+
return []
2607+
2608+
return [(self.map_tensor_name(name), data_torch)]
2609+
2610+
def write_tensors(self):
2611+
super().write_tensors()
2612+
2613+
if self._experts is not None:
2614+
# flatten `list[dict[str, Tensor]]` into `list[str]`
2615+
experts = [k for d in self._experts for k in d.keys()]
2616+
if len(experts) > 0:
2617+
raise ValueError(f"Unprocessed experts: {experts}")
2618+
2619+
24692620
###### CONVERSION LOGIC ######
24702621

24712622

gguf-py/gguf/constants.py

+25
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ class MODEL_ARCH(IntEnum):
139139
COMMAND_R = auto()
140140
DBRX = auto()
141141
OLMO = auto()
142+
ARCTIC = auto()
142143

143144

144145
class MODEL_TENSOR(IntEnum):
@@ -167,6 +168,7 @@ class MODEL_TENSOR(IntEnum):
167168
FFN_DOWN = auto()
168169
FFN_UP = auto()
169170
FFN_ACT = auto()
171+
FFN_NORM_EXP = auto()
170172
FFN_GATE_EXP = auto()
171173
FFN_DOWN_EXP = auto()
172174
FFN_UP_EXP = auto()
@@ -218,6 +220,7 @@ class MODEL_TENSOR(IntEnum):
218220
MODEL_ARCH.COMMAND_R: "command-r",
219221
MODEL_ARCH.DBRX: "dbrx",
220222
MODEL_ARCH.OLMO: "olmo",
223+
MODEL_ARCH.ARCTIC: "arctic",
221224
}
222225

223226
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
@@ -251,6 +254,7 @@ class MODEL_TENSOR(IntEnum):
251254
MODEL_TENSOR.FFN_DOWN_SHEXP: "blk.{bid}.ffn_down_shexp",
252255
MODEL_TENSOR.FFN_UP_SHEXP: "blk.{bid}.ffn_up_shexp",
253256
MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
257+
MODEL_TENSOR.FFN_NORM_EXP: "blk.{bid}.ffn_norm_exps",
254258
MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
255259
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
256260
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
@@ -732,6 +736,27 @@ class MODEL_TENSOR(IntEnum):
732736
MODEL_TENSOR.FFN_DOWN,
733737
MODEL_TENSOR.FFN_UP,
734738
],
739+
MODEL_ARCH.ARCTIC: [
740+
MODEL_TENSOR.TOKEN_EMBD,
741+
MODEL_TENSOR.OUTPUT_NORM,
742+
MODEL_TENSOR.OUTPUT,
743+
MODEL_TENSOR.ROPE_FREQS,
744+
MODEL_TENSOR.ATTN_NORM,
745+
MODEL_TENSOR.ATTN_Q,
746+
MODEL_TENSOR.ATTN_K,
747+
MODEL_TENSOR.ATTN_V,
748+
MODEL_TENSOR.ATTN_OUT,
749+
MODEL_TENSOR.ATTN_ROT_EMBD,
750+
MODEL_TENSOR.FFN_GATE_INP,
751+
MODEL_TENSOR.FFN_NORM,
752+
MODEL_TENSOR.FFN_GATE,
753+
MODEL_TENSOR.FFN_DOWN,
754+
MODEL_TENSOR.FFN_UP,
755+
MODEL_TENSOR.FFN_NORM_EXP,
756+
MODEL_TENSOR.FFN_GATE_EXP,
757+
MODEL_TENSOR.FFN_DOWN_EXP,
758+
MODEL_TENSOR.FFN_UP_EXP,
759+
],
735760
# TODO
736761
}
737762

gguf-py/gguf/tensor_mapping.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ class TensorNameMap:
244244
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
245245
"model.layers.{bid}.mlp.c_fc", # starcoder2
246246
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
247+
"model.layers.{bid}.residual_mlp.w3", # arctic
247248
),
248249

249250
MODEL_TENSOR.FFN_UP_EXP: (
@@ -272,6 +273,7 @@ class TensorNameMap:
272273
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
273274
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
274275
"transformer.h.{bid}.mlp.linear_1", # refact
276+
"model.layers.{bid}.residual_mlp.w1", # arctic
275277
),
276278

277279
MODEL_TENSOR.FFN_GATE_EXP: (
@@ -306,6 +308,7 @@ class TensorNameMap:
306308
"encoder.layers.{bid}.mlp.fc2", # nomic-bert
307309
"model.layers.{bid}.mlp.c_proj", # starcoder2
308310
"encoder.layer.{bid}.mlp.wo", # jina-bert-v2
311+
"model.layers.{bid}.residual_mlp.w2", # arctic
309312
),
310313

311314
MODEL_TENSOR.FFN_DOWN_EXP: (
@@ -382,6 +385,18 @@ class TensorNameMap:
382385
),
383386
}
384387

388+
# architecture-specific block mappings
389+
arch_block_mappings_cfg: dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]] = {
390+
MODEL_ARCH.ARCTIC: {
391+
MODEL_TENSOR.FFN_NORM: (
392+
"model.layers.{bid}.residual_layernorm",
393+
),
394+
MODEL_TENSOR.FFN_NORM_EXP: (
395+
"model.layers.{bid}.post_attention_layernorm",
396+
),
397+
},
398+
}
399+
385400
mapping: dict[str, tuple[MODEL_TENSOR, str]]
386401

387402
def __init__(self, arch: MODEL_ARCH, n_blocks: int):
@@ -393,12 +408,14 @@ def __init__(self, arch: MODEL_ARCH, n_blocks: int):
393408
self.mapping[tensor_name] = (tensor, tensor_name)
394409
for key in keys:
395410
self.mapping[key] = (tensor, tensor_name)
411+
if arch in self.arch_block_mappings_cfg:
412+
self.block_mappings_cfg.update(self.arch_block_mappings_cfg[arch])
396413
for bid in range(n_blocks):
397414
for tensor, keys in self.block_mappings_cfg.items():
398415
if tensor not in MODEL_TENSORS[arch]:
399416
continue
400417
# TODO: make this configurable
401-
n_experts = 60
418+
n_experts = 128
402419
for xid in range(n_experts):
403420
tensor_name = TENSOR_NAMES[tensor].format(bid = bid, xid = xid)
404421
self.mapping[tensor_name] = (tensor, tensor_name)

0 commit comments

Comments
 (0)