Skip to content

Commit 8070def

Browse files
author
Aragula
committed
Revert "main : add --in-prefix-bos to prefix BOS to user inputs; keep EOS (ggml-org#2304)"
This reverts commit 0c06204.
1 parent 3323112 commit 8070def

File tree

3 files changed

+17
-34
lines changed

3 files changed

+17
-34
lines changed

examples/common.cpp

-3
Original file line numberDiff line numberDiff line change
@@ -445,8 +445,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
445445
exit(0);
446446
} else if (arg == "--random-prompt") {
447447
params.random_prompt = true;
448-
} else if (arg == "--in-prefix-bos") {
449-
params.input_prefix_bos = true;
450448
} else if (arg == "--in-prefix") {
451449
if (++i >= argc) {
452450
invalid_param = true;
@@ -532,7 +530,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
532530
fprintf(stdout, " not supported with --interactive or other interactive options\n");
533531
fprintf(stdout, " --prompt-cache-ro if specified, uses the prompt cache but does not update it.\n");
534532
fprintf(stdout, " --random-prompt start with a randomized prompt.\n");
535-
fprintf(stdout, " --in-prefix-bos prefix BOS to user inputs, preceding the `--in-prefix` string\n");
536533
fprintf(stdout, " --in-prefix STRING string to prefix user inputs with (default: empty)\n");
537534
fprintf(stdout, " --in-suffix STRING string to suffix after user inputs with (default: empty)\n");
538535
fprintf(stdout, " -f FNAME, --file FNAME\n");

examples/common.h

-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ struct gpt_params {
8282
bool multiline_input = false; // reverse the usage of `\`
8383
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
8484

85-
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
8685
bool instruct = false; // instruction mode (used for Alpaca models)
8786
bool penalize_nl = true; // consider newlines as a repeatable token
8887
bool perplexity = false; // compute perplexity over the prompt

examples/main/main.cpp

+17-30
Original file line numberDiff line numberDiff line change
@@ -322,10 +322,6 @@ int main(int argc, char ** argv) {
322322
}
323323
}
324324

325-
if (params.input_prefix_bos) {
326-
fprintf(stderr, "Input prefix with BOS\n");
327-
}
328-
329325
if (!params.input_prefix.empty()) {
330326
fprintf(stderr, "Input prefix: '%s'\n", params.input_prefix.c_str());
331327
}
@@ -634,6 +630,16 @@ int main(int argc, char ** argv) {
634630
last_n_tokens.push_back(id);
635631
}
636632

633+
// replace end of text token with newline token when in interactive mode
634+
if (id == llama_token_eos() && params.interactive && !params.instruct) {
635+
id = llama_token_newline.front();
636+
if (params.antiprompt.size() != 0) {
637+
// tokenize and inject first reverse prompt
638+
const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false);
639+
embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
640+
}
641+
}
642+
637643
// add it to the context
638644
embd.push_back(id);
639645

@@ -699,34 +705,11 @@ int main(int argc, char ** argv) {
699705
}
700706
}
701707

702-
// deal with end of text token in interactive mode
703-
if (last_n_tokens.back() == llama_token_eos()) {
704-
if (params.interactive) {
705-
if (params.antiprompt.size() != 0) {
706-
// tokenize and inject first reverse prompt
707-
const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false);
708-
embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
709-
is_antiprompt = true;
710-
}
711-
712-
is_interacting = true;
713-
printf("\n");
714-
console::set_display(console::user_input);
715-
fflush(stdout);
716-
} else if (params.instruct) {
717-
is_interacting = true;
718-
}
719-
}
720-
721708
if (n_past > 0 && is_interacting) {
722709
if (params.instruct) {
723710
printf("\n> ");
724711
}
725712

726-
if (params.input_prefix_bos) {
727-
embd_inp.push_back(llama_token_bos());
728-
}
729-
730713
std::string buffer;
731714
if (!params.input_prefix.empty()) {
732715
buffer += params.input_prefix;
@@ -790,9 +773,13 @@ int main(int argc, char ** argv) {
790773
}
791774

792775
// end of text token
793-
if (!embd.empty() && embd.back() == llama_token_eos() && !(params.instruct || params.interactive)) {
794-
fprintf(stderr, " [end of text]\n");
795-
break;
776+
if (!embd.empty() && embd.back() == llama_token_eos()) {
777+
if (params.instruct) {
778+
is_interacting = true;
779+
} else {
780+
fprintf(stderr, " [end of text]\n");
781+
break;
782+
}
796783
}
797784

798785
// In interactive mode, respect the maximum number of tokens and drop back to user input when reached.

0 commit comments

Comments
 (0)