Revert "main : add --in-prefix-bos to prefix BOS to user inputs; keep EOS (ggml-org#2304)"

Aragula · Aragula · commit 8070def979f8 · 2023-08-04T14:11:09.000-07:00
This reverts commit 0c06204.
diff --git a/examples/common.cpp b/examples/common.cpp
@@ -445,8 +445,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
             exit(0);
         } else if (arg == "--random-prompt") {
             params.random_prompt = true;
-        } else if (arg == "--in-prefix-bos") {
-            params.input_prefix_bos = true;
         } else if (arg == "--in-prefix") {
             if (++i >= argc) {
                 invalid_param = true;
@@ -532,7 +530,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     fprintf(stdout, "                        not supported with --interactive or other interactive options\n");
     fprintf(stdout, "  --prompt-cache-ro     if specified, uses the prompt cache but does not update it.\n");
     fprintf(stdout, "  --random-prompt       start with a randomized prompt.\n");
-    fprintf(stdout, "  --in-prefix-bos       prefix BOS to user inputs, preceding the `--in-prefix` string\n");
     fprintf(stdout, "  --in-prefix STRING    string to prefix user inputs with (default: empty)\n");
     fprintf(stdout, "  --in-suffix STRING    string to suffix after user inputs with (default: empty)\n");
     fprintf(stdout, "  -f FNAME, --file FNAME\n");
diff --git a/examples/common.h b/examples/common.h
@@ -82,7 +82,6 @@ struct gpt_params {
     bool multiline_input   = false; // reverse the usage of `\`
     bool simple_io         = false; // improves compatibility with subprocesses and limited consoles
 
-    bool input_prefix_bos  = false; // prefix BOS to user inputs, preceding input_prefix
     bool instruct          = false; // instruction mode (used for Alpaca models)
     bool penalize_nl       = true;  // consider newlines as a repeatable token
     bool perplexity        = false; // compute perplexity over the prompt
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
@@ -322,10 +322,6 @@ int main(int argc, char ** argv) {
             }
         }
 
-        if (params.input_prefix_bos) {
-            fprintf(stderr, "Input prefix with BOS\n");
-        }
-
         if (!params.input_prefix.empty()) {
             fprintf(stderr, "Input prefix: '%s'\n", params.input_prefix.c_str());
         }
@@ -634,6 +630,16 @@ int main(int argc, char ** argv) {
                 last_n_tokens.push_back(id);
             }
 
+            // replace end of text token with newline token when in interactive mode
+            if (id == llama_token_eos() && params.interactive && !params.instruct) {
+                id = llama_token_newline.front();
+                if (params.antiprompt.size() != 0) {
+                    // tokenize and inject first reverse prompt
+                    const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false);
+                    embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
+                }
+            }
+
             // add it to the context
             embd.push_back(id);
 
@@ -699,34 +705,11 @@ int main(int argc, char ** argv) {
                 }
             }
 
-            // deal with end of text token in interactive mode
-            if (last_n_tokens.back() == llama_token_eos()) {
-                if (params.interactive) {
-                    if (params.antiprompt.size() != 0) {
-                        // tokenize and inject first reverse prompt
-                        const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false);
-                        embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
-                        is_antiprompt = true;
-                    }
-
-                    is_interacting = true;
-                    printf("\n");
-                    console::set_display(console::user_input);
-                    fflush(stdout);
-                } else if (params.instruct) {
-                    is_interacting = true;
-                }
-            }
-
             if (n_past > 0 && is_interacting) {
                 if (params.instruct) {
                     printf("\n> ");
                 }
 
-                if (params.input_prefix_bos) {
-                    embd_inp.push_back(llama_token_bos());
-                }
-
                 std::string buffer;
                 if (!params.input_prefix.empty()) {
                     buffer += params.input_prefix;
@@ -790,9 +773,13 @@ int main(int argc, char ** argv) {
         }
 
         // end of text token
-        if (!embd.empty() && embd.back() == llama_token_eos() && !(params.instruct || params.interactive)) {
-            fprintf(stderr, " [end of text]\n");
-            break;
+        if (!embd.empty() && embd.back() == llama_token_eos()) {
+            if (params.instruct) {
+                is_interacting = true;
+            } else {
+                fprintf(stderr, " [end of text]\n");
+                break;
+            }
         }
 
         // In interactive mode, respect the maximum number of tokens and drop back to user input when reached.