Skip to content

Commit 811f653

Browse files
committed
py : cosmetics
1 parent 49c25cc commit 811f653

File tree

1 file changed

+8
-7
lines changed

1 file changed

+8
-7
lines changed

convert.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ def __init__(self, fname_tokenizer: Path, fname_added_tokens: Optional[Path]) ->
255255
self.fname_tokenizer = fname_tokenizer
256256
self.fname_added_tokens = fname_added_tokens
257257

258-
def bpe_tokens(self) -> Iterable[Tuple[bytes, float]]:
258+
def bpe_tokens(self) -> Iterable[Tuple[bytes, float, gguf.TokenType]]:
259259
tokenizer = self.bpe_tokenizer
260260
from transformers.models.gpt2 import tokenization_gpt2
261261
byte_encoder = tokenization_gpt2.bytes_to_unicode()
@@ -265,12 +265,12 @@ def bpe_tokens(self) -> Iterable[Tuple[bytes, float]]:
265265
score: float = -i
266266
yield text, score, gguf.TokenType.USER_DEFINED
267267

268-
def added_tokens(self) -> Iterable[Tuple[bytes, float]]:
268+
def added_tokens(self) -> Iterable[Tuple[bytes, float, gguf.TokenType]]:
269269
for text in self.added_tokens_list:
270270
score = -1000.0
271271
yield text.encode("utf-8"), score, gguf.TokenType.USER_DEFINED
272272

273-
def all_tokens(self) -> Iterable[Tuple[bytes, float]]:
273+
def all_tokens(self) -> Iterable[Tuple[bytes, float, gguf.TokenType]]:
274274
yield from self.bpe_tokens()
275275
yield from self.added_tokens()
276276

@@ -286,6 +286,7 @@ def __init__(self, fname_tokenizer: Path, fname_added_tokens: Optional[Path]) ->
286286
added_tokens = json.load(open(fname_added_tokens, encoding="utf-8"))
287287
else:
288288
added_tokens = {}
289+
289290
vocab_size: int = self.sentencepiece_tokenizer.vocab_size()
290291
expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
291292
actual_ids = sorted(added_tokens.values())
@@ -299,7 +300,7 @@ def __init__(self, fname_tokenizer: Path, fname_added_tokens: Optional[Path]) ->
299300
self.fname_tokenizer = fname_tokenizer
300301
self.fname_added_tokens = fname_added_tokens
301302

302-
def sentencepiece_tokens(self) -> Iterable[Tuple[bytes, float]]:
303+
def sentencepiece_tokens(self) -> Iterable[Tuple[bytes, float, gguf.TokenType]]:
303304
tokenizer = self.sentencepiece_tokenizer
304305
for i in range(tokenizer.vocab_size()):
305306
piece = tokenizer.id_to_piece(i)
@@ -323,12 +324,12 @@ def sentencepiece_tokens(self) -> Iterable[Tuple[bytes, float]]:
323324

324325
yield text, score, toktype
325326

326-
def added_tokens(self) -> Iterable[Tuple[bytes, float]]:
327+
def added_tokens(self) -> Iterable[Tuple[bytes, float, gguf.TokenType]]:
327328
for text in self.added_tokens_list:
328329
score = -1000.0
329330
yield text.encode("utf-8"), score, gguf.TokenType.USER_DEFINED
330331

331-
def all_tokens(self) -> Iterable[Tuple[bytes, float]]:
332+
def all_tokens(self) -> Iterable[Tuple[bytes, float, gguf.TokenType]]:
332333
yield from self.sentencepiece_tokens()
333334
yield from self.added_tokens()
334335

@@ -727,7 +728,7 @@ def __init__(self, fname_out: Path) -> None:
727728
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
728729

729730
def add_meta_arch(self, params: Params) -> None:
730-
self.gguf.add_name ("llama")
731+
self.gguf.add_name ("LLaMA")
731732
self.gguf.add_context_length (params.n_ctx)
732733
self.gguf.add_embedding_length (params.n_embd)
733734
self.gguf.add_block_count (params.n_layer)

0 commit comments

Comments
 (0)