File tree 2 files changed +11
-4
lines changed
2 files changed +11
-4
lines changed Original file line number Diff line number Diff line change 11
11
#
12
12
# "--keep 48" is based on the contents of prompts/chat-with-bob.txt
13
13
#
14
- ./main -m ./models/7B/ggml-model-q4_0.bin -c 512 -b 1024 -n 256 --keep 48 \
15
- --repeat_penalty 1.0 --color -i \
16
- -r " User:" -f prompts/chat-with-bob.txt
14
+ ./main -m ./models/7B/ggml-model-q4_0.bin -c 512 -b 1024 -n -1 --keep 48 \
15
+ --repeat_penalty 1.0 --color \
16
+ -i --interactive-first \
17
+ -r " User:" --in-prefix " " \
18
+ -f prompts/chat-with-bob.txt
Original file line number Diff line number Diff line change @@ -634,9 +634,14 @@ int main(int argc, char ** argv) {
634
634
llama_grammar_accept_token (ctx, grammar, id);
635
635
}
636
636
637
- // replace end of text token with newline token when in interactive mode
637
+ // replace end of text token with newline token and inject reverse prompt when in interactive mode
638
638
if (id == llama_token_eos () && params.interactive && !params.instruct && !params.input_prefix_bos ) {
639
639
id = llama_token_nl ();
640
+ if (params.antiprompt .size () != 0 ) {
641
+ // tokenize and inject first reverse prompt
642
+ const auto first_antiprompt = ::llama_tokenize (ctx, params.antiprompt .front (), false );
643
+ embd_inp.insert (embd_inp.end (), first_antiprompt.begin (), first_antiprompt.end ());
644
+ }
640
645
}
641
646
642
647
last_n_tokens.erase (last_n_tokens.begin ());
You can’t perform that action at this time.
0 commit comments