Skip to content

Commit fa84c4b

Browse files
authored
Fix issue where interactive mode crashes when input exceeds ctx size (ggml-org#1789)
* Fix issue where interactive mode in the main example crashes when input exceeds ctx size * Ensure the context size is at least 8 tokens in the main example. Closes ggml-org#1768
1 parent 12b063f commit fa84c4b

File tree

3 files changed

+21
-1
lines changed

3 files changed

+21
-1
lines changed

examples/common.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,9 @@ void console_set_color(console_state & con_st, console_color_t color) {
632632
case CONSOLE_COLOR_USER_INPUT:
633633
fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_GREEN);
634634
break;
635+
case CONSOLE_COLOR_ERROR:
636+
fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_RED);
637+
break;
635638
}
636639
con_st.color = color;
637640
fflush(con_st.out);

examples/common.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,8 @@ struct llama_context * llama_init_from_gpt_params(const gpt_params & params);
112112
enum console_color_t {
113113
CONSOLE_COLOR_DEFAULT=0,
114114
CONSOLE_COLOR_PROMPT,
115-
CONSOLE_COLOR_USER_INPUT
115+
CONSOLE_COLOR_USER_INPUT,
116+
CONSOLE_COLOR_ERROR
116117
};
117118

118119
struct console_state {

examples/main/main.cpp

+16
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ int main(int argc, char ** argv) {
8181
if (params.n_ctx > 2048) {
8282
fprintf(stderr, "%s: warning: model does not support context sizes greater than 2048 tokens (%d specified);"
8383
"expect poor results\n", __func__, params.n_ctx);
84+
} else if (params.n_ctx < 8) {
85+
fprintf(stderr, "%s: warning: minimum context size is 8, using minimum size.\n", __func__);
86+
params.n_ctx = 8;
8487
}
8588

8689
fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
@@ -331,6 +334,19 @@ int main(int argc, char ** argv) {
331334
while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
332335
// predict
333336
if (embd.size() > 0) {
337+
// Note: n_ctx - 4 here is to match the logic for commandline prompt handling via
338+
// --prompt or --file which uses the same value.
339+
auto max_embd_size = n_ctx - 4;
340+
// Ensure the input doesn't exceed the context size by truncating embd if necessary.
341+
if ((int)embd.size() > max_embd_size) {
342+
auto skipped_tokens = embd.size() - max_embd_size;
343+
console_set_color(con_st, CONSOLE_COLOR_ERROR);
344+
printf("<<input too long: skipped %ld token%s>>", skipped_tokens, skipped_tokens != 1 ? "s" : "");
345+
console_set_color(con_st, CONSOLE_COLOR_DEFAULT);
346+
fflush(stdout);
347+
embd.resize(max_embd_size);
348+
}
349+
334350
// infinite text generation via context swapping
335351
// if we run out of context:
336352
// - take the n_keep first tokens from the original prompt (via n_past)

0 commit comments

Comments
 (0)