Skip to content

Commit 1e7a033

Browse files
committed
common : add comments
1 parent 9668aa1 commit 1e7a033

File tree

2 files changed

+7
-1
lines changed

2 files changed

+7
-1
lines changed

common/common.h

+6
Original file line numberDiff line numberDiff line change
@@ -116,15 +116,21 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
116116
// Vocab utils
117117
//
118118

119+
// tokenizes a string into a vector of tokens
120+
// should work similar to Python's `tokenizer.encode`
119121
std::vector<llama_token> llama_tokenize(
120122
struct llama_context * ctx,
121123
const std::string & text,
122124
bool add_bos);
123125

126+
// tokenizes a token into a piece
127+
// should work similar to Python's `tokenizer.id_to_piece`
124128
std::string llama_token_to_piece(
125129
const struct llama_context * ctx,
126130
llama_token token);
127131

132+
// detokenizes a vector of tokens into a string
133+
// should work similar to Python's `tokenizer.decode`
128134
// removes the leading space from the first non-BOS token
129135
std::string llama_detokenize(
130136
llama_context * ctx,

llama.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ extern "C" {
384384
// Token Id -> Piece.
385385
// Uses the vocabulary in the provided context.
386386
// Does not write null terminator to the buffer.
387-
// Use code is responsible to remove the leading whitespace of the first non-BOS token.
387+
// User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
388388
LLAMA_API int llama_token_to_piece(
389389
const struct llama_context * ctx,
390390
llama_token token,

0 commit comments

Comments
 (0)