Skip to content

Commit 074ded4

Browse files
committed
convert-hf : fix type of tokens after #3252
1 parent 9699157 commit 074ded4

File tree

1 file changed

+4
-6
lines changed

1 file changed

+4
-6
lines changed

convert-hf-to-gguf.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ def _get_part_names(self):
230230
def _set_vocab_gpt2(self):
231231
dir_model = self.dir_model
232232
hparams = self.hparams
233-
tokens: list[bytearray] = []
233+
tokens: list[str] = []
234234
toktypes: list[int] = []
235235

236236
from transformers import AutoTokenizer
@@ -243,8 +243,7 @@ def _set_vocab_gpt2(self):
243243

244244
for i in range(vocab_size):
245245
if i not in reverse_vocab:
246-
pad_token = f"[PAD{i}]".encode('utf-8')
247-
tokens.append(bytearray(pad_token))
246+
tokens.append(f"[PAD{i}]")
248247
toktypes.append(gguf.TokenType.USER_DEFINED)
249248
elif reverse_vocab[i] in added_vocab:
250249
tokens.append(reverse_vocab[i])
@@ -266,7 +265,7 @@ def _set_vocab_gpt2(self):
266265
def _set_vocab_qwen(self):
267266
dir_model = self.dir_model
268267
hparams = self.hparams
269-
tokens: list[bytearray] = []
268+
tokens: list[str] = []
270269
toktypes: list[int] = []
271270

272271
from transformers import AutoTokenizer
@@ -291,8 +290,7 @@ def _set_vocab_qwen(self):
291290

292291
for i in range(vocab_size):
293292
if i not in reverse_vocab:
294-
pad_token = f"[PAD{i}]".encode("utf-8")
295-
tokens.append(bytearray(pad_token))
293+
tokens.append(f"[PAD{i}]")
296294
toktypes.append(gguf.TokenType.USER_DEFINED)
297295
elif reverse_vocab[i] in added_vocab:
298296
tokens.append(reverse_vocab[i])

0 commit comments

Comments
 (0)