main : inject reverse prompt after EOS + update examples/chat.sh

ggerganov · ggerganov · commit e3da126f2a86 · 2023-08-21T16:41:27.000+03:00
diff --git a/examples/chat.sh b/examples/chat.sh
@@ -11,6 +11,8 @@ cd ..
 #
 #   "--keep 48" is based on the contents of prompts/chat-with-bob.txt
 #
-./main -m ./models/7B/ggml-model-q4_0.bin -c 512 -b 1024 -n 256 --keep 48 \
-    --repeat_penalty 1.0 --color -i \
-    -r "User:" -f prompts/chat-with-bob.txt
+./main -m ./models/7B/ggml-model-q4_0.bin -c 512 -b 1024 -n -1 --keep 48 \
+    --repeat_penalty 1.0 --color \
+    -i --interactive-first \
+    -r "User:" --in-prefix " " \
+    -f prompts/chat-with-bob.txt
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
@@ -634,9 +634,14 @@ int main(int argc, char ** argv) {
                     llama_grammar_accept_token(ctx, grammar, id);
                 }
 
-                // replace end of text token with newline token when in interactive mode
+                // replace end of text token with newline token and inject reverse prompt when in interactive mode
                 if (id == llama_token_eos() && params.interactive && !params.instruct && !params.input_prefix_bos) {
                     id = llama_token_nl();
+                    if (params.antiprompt.size() != 0) {
+                        // tokenize and inject first reverse prompt
+                        const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false);
+                        embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
+                    }
                 }
 
                 last_n_tokens.erase(last_n_tokens.begin());

Original file line number	Diff line number	Diff line change
`@@ -11,6 +11,8 @@ cd ..`
`11`	`11`	`#`
`12`	`12`	`# "--keep 48" is based on the contents of prompts/chat-with-bob.txt`
`13`	`13`	`#`
`14`		`-./main -m ./models/7B/ggml-model-q4_0.bin -c 512 -b 1024 -n 256 --keep 48 \`
`15`		`- --repeat_penalty 1.0 --color -i \`
`16`		`- -r "User:" -f prompts/chat-with-bob.txt`
	`14`	`+./main -m ./models/7B/ggml-model-q4_0.bin -c 512 -b 1024 -n -1 --keep 48 \`
	`15`	`+ --repeat_penalty 1.0 --color \`
	`16`	`+ -i --interactive-first \`
	`17`	`+ -r "User:" --in-prefix " " \`
	`18`	`+ -f prompts/chat-with-bob.txt`
Original file line number	Diff line number	Diff line change
`@@ -634,9 +634,14 @@ int main(int argc, char ** argv) {`
`634`	`634`	`llama_grammar_accept_token(ctx, grammar, id);`
`635`	`635`	`}`
`636`	`636`
`637`		`- // replace end of text token with newline token when in interactive mode`
	`637`	`+ // replace end of text token with newline token and inject reverse prompt when in interactive mode`
`638`	`638`	`if (id == llama_token_eos() && params.interactive && !params.instruct && !params.input_prefix_bos) {`
`639`	`639`	`id = llama_token_nl();`
	`640`	`+ if (params.antiprompt.size() != 0) {`
	`641`	`+ // tokenize and inject first reverse prompt`
	`642`	`+ const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false);`
	`643`	`+ embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());`
	`644`	`+ }`
`640`	`645`	`}`
`641`	`646`
`642`	`647`	`last_n_tokens.erase(last_n_tokens.begin());`