File tree 1 file changed +8
-2
lines changed
1 file changed +8
-2
lines changed Original file line number Diff line number Diff line change 11
11
from pathlib import Path
12
12
from typing import TYPE_CHECKING , Any
13
13
import itertools
14
- import gguf
15
14
import numpy as np
16
15
import torch
17
16
from sentencepiece import SentencePieceProcessor # type: ignore[import]
18
17
18
+ if 'NO_LOCAL_GGUF' not in os .environ :
19
+ sys .path .insert (1 , str (Path (__file__ ).parent / 'gguf-py' / 'gguf' ))
20
+ import gguf
21
+
19
22
20
23
if TYPE_CHECKING :
21
24
from typing import TypeAlias
@@ -174,8 +177,11 @@ def parse_args() -> argparse.Namespace:
174
177
print ("gguf: get sentencepiece tokenizer vocab, scores and token types" )
175
178
176
179
tokenizer = SentencePieceProcessor (str (tokenizer_model_file ))
180
+ vocab_size = hparams .get ('vocab_size' )
181
+ if vocab_size is None :
182
+ vocab_size = tokenizer .vocab_size ()
177
183
178
- for i in range (tokenizer . vocab_size () ):
184
+ for i in range (vocab_size ):
179
185
text : bytes
180
186
score : float
181
187
You can’t perform that action at this time.
0 commit comments