File tree 2 files changed +7
-1
lines changed
2 files changed +7
-1
lines changed Original file line number Diff line number Diff line change @@ -116,15 +116,21 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
116
116
// Vocab utils
117
117
//
118
118
119
+ // tokenizes a string into a vector of tokens
120
+ // should work similar to Python's `tokenizer.encode`
119
121
std::vector<llama_token> llama_tokenize (
120
122
struct llama_context * ctx,
121
123
const std::string & text,
122
124
bool add_bos);
123
125
126
+ // tokenizes a token into a piece
127
+ // should work similar to Python's `tokenizer.id_to_piece`
124
128
std::string llama_token_to_piece (
125
129
const struct llama_context * ctx,
126
130
llama_token token);
127
131
132
+ // detokenizes a vector of tokens into a string
133
+ // should work similar to Python's `tokenizer.decode`
128
134
// removes the leading space from the first non-BOS token
129
135
std::string llama_detokenize (
130
136
llama_context * ctx,
Original file line number Diff line number Diff line change @@ -384,7 +384,7 @@ extern "C" {
384
384
// Token Id -> Piece.
385
385
// Uses the vocabulary in the provided context.
386
386
// Does not write null terminator to the buffer.
387
- // Use code is responsible to remove the leading whitespace of the first non-BOS token.
387
+ // User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens .
388
388
LLAMA_API int llama_token_to_piece (
389
389
const struct llama_context * ctx,
390
390
llama_token token,
You can’t perform that action at this time.
0 commit comments