@@ -230,7 +230,7 @@ def _get_part_names(self):
230
230
def _set_vocab_gpt2 (self ):
231
231
dir_model = self .dir_model
232
232
hparams = self .hparams
233
- tokens : list [bytearray ] = []
233
+ tokens : list [str ] = []
234
234
toktypes : list [int ] = []
235
235
236
236
from transformers import AutoTokenizer
@@ -243,8 +243,7 @@ def _set_vocab_gpt2(self):
243
243
244
244
for i in range (vocab_size ):
245
245
if i not in reverse_vocab :
246
- pad_token = f"[PAD{ i } ]" .encode ('utf-8' )
247
- tokens .append (bytearray (pad_token ))
246
+ tokens .append (f"[PAD{ i } ]" )
248
247
toktypes .append (gguf .TokenType .USER_DEFINED )
249
248
elif reverse_vocab [i ] in added_vocab :
250
249
tokens .append (reverse_vocab [i ])
@@ -266,7 +265,7 @@ def _set_vocab_gpt2(self):
266
265
def _set_vocab_qwen (self ):
267
266
dir_model = self .dir_model
268
267
hparams = self .hparams
269
- tokens : list [bytearray ] = []
268
+ tokens : list [str ] = []
270
269
toktypes : list [int ] = []
271
270
272
271
from transformers import AutoTokenizer
@@ -291,8 +290,7 @@ def _set_vocab_qwen(self):
291
290
292
291
for i in range (vocab_size ):
293
292
if i not in reverse_vocab :
294
- pad_token = f"[PAD{ i } ]" .encode ("utf-8" )
295
- tokens .append (bytearray (pad_token ))
293
+ tokens .append (f"[PAD{ i } ]" )
296
294
toktypes .append (gguf .TokenType .USER_DEFINED )
297
295
elif reverse_vocab [i ] in added_vocab :
298
296
tokens .append (reverse_vocab [i ])
0 commit comments