Skip to content

Commit 75c53e3

Browse files
youkaichaoGWS0428
authored andcommitted
[perf] fix perf regression from vllm-project#12253 (vllm-project#12380)
Signed-off-by: youkaichao <[email protected]>
1 parent ad381fa commit 75c53e3

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

vllm/worker/model_runner.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,6 @@ def __init__(self,
455455
self.enable_prompt_adapter = (self.runner.prompt_adapter_config
456456
is not None)
457457
self.multi_modal_input_mapper = self.runner.multi_modal_input_mapper
458-
self.decode_only = True
459458

460459
# Attention metadata inputs.
461460
if self.attn_backend is not None:
@@ -477,6 +476,10 @@ def prepare(self,
477476
finished_requests_ids: Optional[List[str]] = None) -> None:
478477
self.finished_requests_ids = finished_requests_ids
479478

479+
# if the current batch is decode-only.
480+
# will be set to False if there is any non-decode request.
481+
self.decode_only = True
482+
480483
# Intermediate data (data in CPU before going to GPU) for
481484
# the current sequence group.
482485
self.inter_data_list: List[

0 commit comments

Comments
 (0)