@@ -829,6 +829,29 @@ void sigint_handler(int signo) {
829
829
}
830
830
#endif
831
831
832
+
833
+ std::string escapeString (std::string stdstr) {
834
+ const char * str = stdstr.c_str ();
835
+ std::string escapedStr;
836
+ for (const char * c = str; *c != ' \0 ' ; ++c) {
837
+ switch (*c) {
838
+ case ' \a ' : escapedStr += " \\ a" ; break ;
839
+ case ' \b ' : escapedStr += " \\ b" ; break ;
840
+ case ' \f ' : escapedStr += " \\ f" ; break ;
841
+ case ' \n ' : escapedStr += " \\ n" ; break ;
842
+ case ' \r ' : escapedStr += " \\ r" ; break ;
843
+ case ' \t ' : escapedStr += " \\ t" ; break ;
844
+ case ' \v ' : escapedStr += " \\ v" ; break ;
845
+ case ' \\ ' : escapedStr += " \\\\ " ; break ;
846
+ case ' \" ' : escapedStr += " \\\" " ; break ;
847
+ case ' \' ' : escapedStr += " \\\' " ; break ;
848
+ default : escapedStr += *c; break ;
849
+ }
850
+ }
851
+ // std::cout << "test string" << escapedStr << std::endl;
852
+ return escapedStr;
853
+ }
854
+
832
855
int llama_main (
833
856
gpt_params params,
834
857
llama_vocab vocab,
@@ -842,8 +865,12 @@ int llama_main(
842
865
if (params.seed < 0 ) {
843
866
params.seed = time (NULL );
844
867
}
845
-
846
- fprintf (errstream, " %s: seed = %d\n " , __func__, params.seed );
868
+ if (params.protocol_mode ) {
869
+ fprintf (outstream, " %s" , " HELO\n " );
870
+ fprintf (outstream, " KV seed=%d\n " , params.seed );
871
+ } else {
872
+ fprintf (errstream, " %s: seed = %d\n " , __func__, params.seed );
873
+ }
847
874
848
875
std::mt19937 rng (params.seed );
849
876
if (params.random_prompt ) {
@@ -891,13 +918,24 @@ int llama_main(
891
918
params.interactive = true ;
892
919
}
893
920
894
- fprintf (errstream, " \n " );
895
- fprintf (errstream, " %s: prompt: '%s'\n " , __func__, params.prompt .c_str ());
896
- fprintf (errstream, " %s: number of tokens in prompt = %zu\n " , __func__, embd_inp.size ());
921
+ if (params.protocol_mode ) {
922
+ fprintf (outstream, " PROMPT %s\n " , escapeString (params.prompt ).c_str ());
923
+ fprintf (outstream, " KV prompt_tokens=%zu\n " ,embd_inp.size ());
924
+ } else {
925
+ fprintf (errstream, " \n " );
926
+ fprintf (errstream, " %s: prompt: '%s'\n " , __func__, params.prompt .c_str ());
927
+ fprintf (errstream, " %s: number of tokens in prompt = %zu\n " , __func__, embd_inp.size ());
928
+ }
897
929
for (int i = 0 ; i < (int ) embd_inp.size (); i++) {
898
- fprintf (errstream, " %6d -> '%s'\n " , embd_inp[i], vocab.id_to_token .at (embd_inp[i]).c_str ());
930
+ if (params.protocol_mode ) {
931
+ fprintf (outstream, " DEBUG %d -> '%s'\n " , embd_inp[i], escapeString (vocab.id_to_token .at (embd_inp[i])).c_str ());
932
+ } else {
933
+ fprintf (errstream, " %6d -> '%s'\n " , embd_inp[i], vocab.id_to_token .at (embd_inp[i]).c_str ());
934
+ }
935
+ }
936
+ if (!params.protocol_mode ) {
937
+ fprintf (errstream, " \n " );
899
938
}
900
- fprintf (errstream, " \n " );
901
939
if (params.interactive ) {
902
940
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
903
941
struct sigaction sigint_action;
@@ -909,16 +947,32 @@ int llama_main(
909
947
signal (SIGINT, sigint_handler);
910
948
#endif
911
949
912
- fprintf (errstream, " %s: interactive mode on.\n " , __func__);
950
+ if (params.protocol_mode ) {
951
+ fprintf (outstream, " KV interactive_mode=true\n " );
952
+ } else {
953
+ fprintf (errstream, " %s: interactive mode on.\n " , __func__);
954
+ }
913
955
914
956
if (params.antiprompt .size ()) {
915
957
for (auto antiprompt : params.antiprompt ) {
916
- fprintf (errstream, " Reverse prompt: '%s'\n " , antiprompt.c_str ());
958
+ if (params.protocol_mode ) {
959
+ fprintf (outstream, " KV reverse_prompt=\" %s\"\n " , escapeString (antiprompt).c_str ());
960
+ } else {
961
+ fprintf (errstream, " Reverse prompt: '%s'\n " , antiprompt.c_str ());
962
+ }
917
963
}
918
964
}
919
965
}
920
- fprintf (errstream, " sampling parameters: temp = %f, top_k = %d, top_p = %f, repeat_last_n = %i, repeat_penalty = %f\n " , params.temp , params.top_k , params.top_p , params.repeat_last_n , params.repeat_penalty );
921
- fprintf (errstream, " \n\n " );
966
+ if (params.protocol_mode ) {
967
+ fprintf (errstream, " KV temp=%f\n " , params.temp );
968
+ fprintf (errstream, " KV top_k=%d\n " , params.top_k );
969
+ fprintf (errstream, " KV top_p=%f\n " , params.top_p );
970
+ fprintf (errstream, " KV repeat_last_n=%i\n " , params.repeat_last_n );
971
+ fprintf (errstream, " KV repeat_penalty=%f\n " , params.repeat_penalty );
972
+ } else {
973
+ fprintf (errstream, " sampling parameters: temp = %f, top_k = %d, top_p = %f, repeat_last_n = %i, repeat_penalty = %f\n " , params.temp , params.top_k , params.top_p , params.repeat_last_n , params.repeat_penalty );
974
+ fprintf (errstream, " \n\n " );
975
+ }
922
976
923
977
std::vector<llama_vocab::id> embd;
924
978
@@ -927,12 +981,14 @@ int llama_main(
927
981
std::fill (last_n_tokens.begin (), last_n_tokens.end (), 0 );
928
982
929
983
if (params.interactive ) {
930
- fprintf (errstream, " == Running in interactive mode. ==\n "
984
+ if (!params.protocol_mode ) {
985
+ fprintf (errstream, " == Running in interactive mode. ==\n "
931
986
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
932
- " - Press Ctrl+C to interject at any time.\n "
987
+ " - Press Ctrl+C to interject at any time.\n "
933
988
#endif
934
- " - Press Return to return control to LLaMa.\n "
935
- " - If you want to submit another line, end your input in '\\ '.\n\n " );
989
+ " - Press Return to return control to LLaMa.\n "
990
+ " - If you want to submit another line, end your input in '\\ '.\n\n " );
991
+ }
936
992
is_interacting = true ;
937
993
}
938
994
@@ -955,12 +1011,19 @@ int llama_main(
955
1011
}
956
1012
957
1013
while (remaining_tokens > 0 || params.interactive ) {
1014
+ if (params.protocol_mode && !params.interactive ) {
1015
+ fprintf (outstream, " KV remaining_tokens=%d\n " , remaining_tokens);
1016
+ }
958
1017
// predict
959
1018
if (embd.size () > 0 ) {
960
1019
const int64_t t_start_us = ggml_time_us ();
961
1020
962
1021
if (!llama_eval (model, params.n_threads , n_past, embd, logits, mem_per_token)) {
963
- fprintf (errstream, " Failed to predict\n " );
1022
+ if (params.protocol_mode ) {
1023
+ fprintf (outstream, " FATAL Error: Failed to predict\n " );
1024
+ } else {
1025
+ fprintf (errstream, " Failed to predict\n " );
1026
+ }
964
1027
return 1 ;
965
1028
}
966
1029
@@ -1020,8 +1083,16 @@ int llama_main(
1020
1083
1021
1084
// display text
1022
1085
if (!input_noecho) {
1086
+ if (params.protocol_mode ) {
1087
+ fprintf (outstream, " OUTPUT " );
1088
+ }
1023
1089
for (auto id : embd) {
1024
- fprintf (outstream, " %s" , vocab.id_to_token [id].c_str ());
1090
+ fprintf (outstream, " %s" , params.protocol_mode ?
1091
+ escapeString (vocab.id_to_token [id]).c_str () :
1092
+ vocab.id_to_token [id].c_str ());
1093
+ }
1094
+ if (params.protocol_mode ) {
1095
+ fprintf (outstream, " \n " );
1025
1096
}
1026
1097
fflush (outstream);
1027
1098
}
@@ -1047,11 +1118,17 @@ int llama_main(
1047
1118
}
1048
1119
}
1049
1120
if (is_interacting) {
1121
+ if (params.protocol_mode ) {
1122
+ fprintf (outstream, " KV awaiting_prompt=true\n " );
1123
+ fflush (outstream);
1124
+ }
1050
1125
if (params.instruct ) {
1051
1126
input_consumed = embd_inp.size ();
1052
1127
embd_inp.insert (embd_inp.end (), inp_pfx.begin (), inp_pfx.end ());
1053
1128
1054
- fprintf (outstream, " \n > " );
1129
+ if (!params.protocol_mode ) {
1130
+ fprintf (outstream, " \n > " );
1131
+ }
1055
1132
}
1056
1133
1057
1134
// currently being interactive
@@ -1068,6 +1145,7 @@ int llama_main(
1068
1145
}
1069
1146
buffer += line + ' \n ' ; // Append the line to the result
1070
1147
} while (another_line);
1148
+ fprintf (outstream, " PROMPT %s\n " , escapeString (line).c_str ());
1071
1149
if (params.use_color ) fprintf (outstream, ANSI_COLOR_RESET);
1072
1150
1073
1151
std::vector<llama_vocab::id> line_inp = ::llama_tokenize (vocab, buffer, false );
@@ -1080,6 +1158,10 @@ int llama_main(
1080
1158
remaining_tokens -= line_inp.size ();
1081
1159
1082
1160
input_noecho = true ; // do not echo this again
1161
+ if (params.protocol_mode ) {
1162
+ fprintf (outstream, " KV awaiting_prompt=false\n " );
1163
+ fflush (outstream);
1164
+ }
1083
1165
}
1084
1166
is_interacting = false ;
1085
1167
}
@@ -1089,7 +1171,13 @@ int llama_main(
1089
1171
if (params.interactive ) {
1090
1172
is_interacting = true ;
1091
1173
} else {
1092
- fprintf (errstream, " [end of text]\n " );
1174
+ if (params.protocol_mode ) {
1175
+ fprintf (outstream, " END_OF_TEXT\n " );
1176
+ fflush (outstream);
1177
+ } else {
1178
+ fprintf (errstream, " [end of text]\n " );
1179
+ fflush (errstream);
1180
+ }
1093
1181
break ;
1094
1182
}
1095
1183
}
0 commit comments