Skip to content

Commit 3e6e7a6

Browse files
authored
tokenize : escape the prompt (#11058)
* tokenize : escape the prompt * tokenize : update help
1 parent ae2f606 commit 3e6e7a6

File tree

1 file changed

+10
-0
lines changed

1 file changed

+10
-0
lines changed

examples/tokenize/tokenize.cpp

+10
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ static void print_usage_information(const char * argv0) {
3131
printf(" -p PROMPT, --prompt PROMPT read prompt from the argument.\n");
3232
printf(" --stdin read prompt from standard input.\n");
3333
printf(" --no-bos do not ever add a BOS token to the prompt, even if normally the model uses a BOS token.\n");
34+
printf(" --no-escape do not escape input (such as \\n, \\t, etc.).\n");
3435
printf(" --no-parse-special do not parse control tokens.\n");
3536
printf(" --log-disable disable logs. Makes stderr quiet when loading the model.\n");
3637
printf(" --show-count print the total number of tokens.\n");
@@ -198,6 +199,7 @@ int main(int raw_argc, char ** raw_argv) {
198199
// variables where to put any arguments we see.
199200
bool printing_ids = false;
200201
bool no_bos = false;
202+
bool no_escape = false;
201203
bool no_parse_special = false;
202204
bool disable_logging = false;
203205
bool show_token_count = false;
@@ -233,6 +235,9 @@ int main(int raw_argc, char ** raw_argv) {
233235
else if (arg == "--no-bos") {
234236
no_bos = true;
235237
}
238+
else if (arg == "--no-escape") {
239+
no_escape = true;
240+
}
236241
else if (arg == "--no-parse-special") {
237242
no_parse_special = true;
238243
}
@@ -363,6 +368,11 @@ int main(int raw_argc, char ** raw_argv) {
363368
const bool model_wants_add_bos = llama_add_bos_token(model);
364369
const bool add_bos = model_wants_add_bos && !no_bos;
365370
const bool parse_special = !no_parse_special;
371+
const bool escape = !no_escape;
372+
373+
if (escape) {
374+
string_process_escapes(prompt);
375+
}
366376

367377
std::vector<llama_token> tokens;
368378
tokens = common_tokenize(model, prompt, add_bos, parse_special);

0 commit comments

Comments
 (0)