You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Improve handling of special tokens in GGML to GGUF converter (#2725)
* Improve UNK, BOS, EOS token handling when converting without metadata.
* Allow importing as a module.
* Remove some obsolete code and minor cleanups.
* Set default UNK token mapping from -1 to 0 in llama.cpp
* Try to handle overflow due to buggy Windows Python with a better error message
for (idx, (vbytes, score, ttype)) inenumerate(vo.all_tokens()):
221
+
tokens.append(vbytes)
222
+
scores.append(score)
223
+
toktypes.append(ttype)
227
224
assertlen(tokens) ==hp.n_vocab, f'Override vocab has a different number of items than hyperparameters - override = {len(tokens)} but n_vocab={hp.n_vocab}'
228
225
gguf_writer.add_token_list(tokens)
229
226
gguf_writer.add_token_scores(scores)
230
227
iflen(toktypes) >0:
231
228
gguf_writer.add_token_types(toktypes)
232
229
return
233
230
print(f'* Adding {hp.n_vocab} vocab item(s)')
231
+
assertlen(self.model.vocab.items) >=3, 'Cannot handle unexpectedly short model vocab'
234
232
for (tokid, (vbytes, vscore)) inenumerate(self.model.vocab.items):
print(f'!!! Caught overflow loading tensors. The most likely issue is running on Windows but not in WSL. Try running in WSL if possible.', file=sys.stderr)
334
+
raise
319
335
print(f'* GGML model hyperparameters: {model.hyperparameters}')
320
336
vocab_override=None
321
337
params_override=None
@@ -330,4 +346,5 @@ def main():
330
346
converter.save()
331
347
print(f'* Successful completion. Output saved to: {cfg.output}')
0 commit comments