From 1133eea479b82d753ef756d879b5c9d6d2bfcb15 Mon Sep 17 00:00:00 2001 From: Matthew McAllister Date: Sun, 12 Mar 2023 21:49:17 -0700 Subject: [PATCH] Truncate prompt if longer than context + n_predict --- main.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/main.cpp b/main.cpp index f02b5ddbde94d..10fa5011391af 100644 --- a/main.cpp +++ b/main.cpp @@ -782,8 +782,10 @@ int main(int argc, char ** argv) { // tokenize the prompt std::vector embd_inp = ::llama_tokenize(vocab, params.prompt, true); - - params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size()); + if (embd_inp.size() + params.n_predict > model.hparams.n_ctx) { + int offset = embd_inp.size() - model.hparams.n_ctx + params.n_predict; + embd_inp = std::vector(embd_inp.begin() + offset, embd_inp.end()); + } printf("\n"); printf("%s: prompt: '%s'\n", __func__, params.prompt.c_str());