@@ -322,10 +322,6 @@ int main(int argc, char ** argv) {
322
322
}
323
323
}
324
324
325
- if (params.input_prefix_bos ) {
326
- fprintf (stderr, " Input prefix with BOS\n " );
327
- }
328
-
329
325
if (!params.input_prefix .empty ()) {
330
326
fprintf (stderr, " Input prefix: '%s'\n " , params.input_prefix .c_str ());
331
327
}
@@ -634,6 +630,16 @@ int main(int argc, char ** argv) {
634
630
last_n_tokens.push_back (id);
635
631
}
636
632
633
+ // replace end of text token with newline token when in interactive mode
634
+ if (id == llama_token_eos () && params.interactive && !params.instruct ) {
635
+ id = llama_token_newline.front ();
636
+ if (params.antiprompt .size () != 0 ) {
637
+ // tokenize and inject first reverse prompt
638
+ const auto first_antiprompt = ::llama_tokenize (ctx, params.antiprompt .front (), false );
639
+ embd_inp.insert (embd_inp.end (), first_antiprompt.begin (), first_antiprompt.end ());
640
+ }
641
+ }
642
+
637
643
// add it to the context
638
644
embd.push_back (id);
639
645
@@ -699,34 +705,11 @@ int main(int argc, char ** argv) {
699
705
}
700
706
}
701
707
702
- // deal with end of text token in interactive mode
703
- if (last_n_tokens.back () == llama_token_eos ()) {
704
- if (params.interactive ) {
705
- if (params.antiprompt .size () != 0 ) {
706
- // tokenize and inject first reverse prompt
707
- const auto first_antiprompt = ::llama_tokenize (ctx, params.antiprompt .front (), false );
708
- embd_inp.insert (embd_inp.end (), first_antiprompt.begin (), first_antiprompt.end ());
709
- is_antiprompt = true ;
710
- }
711
-
712
- is_interacting = true ;
713
- printf (" \n " );
714
- console::set_display (console::user_input);
715
- fflush (stdout);
716
- } else if (params.instruct ) {
717
- is_interacting = true ;
718
- }
719
- }
720
-
721
708
if (n_past > 0 && is_interacting) {
722
709
if (params.instruct ) {
723
710
printf (" \n > " );
724
711
}
725
712
726
- if (params.input_prefix_bos ) {
727
- embd_inp.push_back (llama_token_bos ());
728
- }
729
-
730
713
std::string buffer;
731
714
if (!params.input_prefix .empty ()) {
732
715
buffer += params.input_prefix ;
@@ -790,9 +773,13 @@ int main(int argc, char ** argv) {
790
773
}
791
774
792
775
// end of text token
793
- if (!embd.empty () && embd.back () == llama_token_eos () && !(params.instruct || params.interactive )) {
794
- fprintf (stderr, " [end of text]\n " );
795
- break ;
776
+ if (!embd.empty () && embd.back () == llama_token_eos ()) {
777
+ if (params.instruct ) {
778
+ is_interacting = true ;
779
+ } else {
780
+ fprintf (stderr, " [end of text]\n " );
781
+ break ;
782
+ }
796
783
}
797
784
798
785
// In interactive mode, respect the maximum number of tokens and drop back to user input when reached.
0 commit comments