We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9313039 commit 404466bCopy full SHA for 404466b
vllm/worker/model_runner.py
@@ -455,7 +455,6 @@ def __init__(self,
455
self.enable_prompt_adapter = (self.runner.prompt_adapter_config
456
is not None)
457
self.multi_modal_input_mapper = self.runner.multi_modal_input_mapper
458
- self.decode_only = True
459
460
# Attention metadata inputs.
461
if self.attn_backend is not None:
@@ -477,6 +476,10 @@ def prepare(self,
477
476
finished_requests_ids: Optional[List[str]] = None) -> None:
478
self.finished_requests_ids = finished_requests_ids
479
+ # if the current batch is decode-only.
480
+ # will be set to False if there is any non-decode request.
481
+ self.decode_only = True
482
+
483
# Intermediate data (data in CPU before going to GPU) for
484
# the current sequence group.
485
self.inter_data_list: List[
0 commit comments