Skip to content

Commit e69a3d6

Browse files
bnellnmlulmer
authored andcommitted
[Bug] [V1] Try fetching stop_reason from EngineOutput before checking the request (vllm-project#13108)
Signed-off-by: Louis Ulmer <[email protected]>
1 parent 6de9144 commit e69a3d6

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

vllm/v1/engine/output_processor.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import asyncio
44
from dataclasses import dataclass
5-
from typing import Dict, List, Optional
5+
from typing import Dict, List, Optional, Union
66

77
from vllm.outputs import RequestOutput
88
from vllm.sampling_params import RequestOutputKind
@@ -164,6 +164,7 @@ def process_outputs(
164164

165165
new_token_ids = engine_core_output.new_token_ids
166166
finish_reason = engine_core_output.finish_reason
167+
stop_reason = engine_core_output.stop_reason
167168

168169
# TODO(andy): prompt logprobs + chunked prefill can
169170
# result in engine core returning an output for a
@@ -181,9 +182,10 @@ def process_outputs(
181182

182183
# 2) Detokenize the token ids into text and check for stop
183184
# strings.
184-
stop_reason = req_state.detokenizer.update(new_token_ids)
185-
if stop_reason:
185+
stop_string = req_state.detokenizer.update(new_token_ids)
186+
if stop_string and finish_reason != FinishReason.STOP:
186187
finish_reason = FinishReason.STOP
188+
stop_reason = stop_string
187189

188190
# 3) Compute sample and prompt logprobs for request,
189191
# if required.
@@ -250,7 +252,7 @@ def _make_request_output(
250252
request_state: RequestState,
251253
new_token_ids: List[int],
252254
finish_reason: Optional[FinishReason],
253-
stop_reason: Optional[str],
255+
stop_reason: Union[int, str, None],
254256
) -> Optional[RequestOutput]:
255257

256258
finished = finish_reason is not None

0 commit comments

Comments
 (0)