Skip to content

Commit 043e7c4

Browse files
committed
initial commit
1 parent ce60541 commit 043e7c4

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

src/deepsparse/transformers/pipelines/text_generation.py

+5
Original file line numberDiff line numberDiff line change
@@ -834,6 +834,11 @@ def engine_forward(
834834
generated_tokens.append(token)
835835
generated_logits.append(logits)
836836

837+
if session.total_num_processed_tokens >= session.capacity:
838+
# if the kv cache is full, stop generation
839+
finished_reason.append(FinishReason.CAPACITY)
840+
break
841+
837842
if (
838843
token == self.tokenizer.eos_token_id
839844
and not self.force_max_tokens

0 commit comments

Comments
 (0)