Skip to content

Commit 3bbaacb

Browse files
authored
[Bugfix][Frontend] Eliminate regex based check in reasoning full generator (#14821)
Signed-off-by: Ce Gao <[email protected]>
1 parent a10314c commit 3bbaacb

File tree

2 files changed

+89
-18
lines changed

2 files changed

+89
-18
lines changed

tests/reasoning/test_deepseekr1_reasoning_parser.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,40 @@ def deepseek_r1_qwen_tokenizer():
9090
"content": "This is the rest",
9191
"is_reasoning_end": True,
9292
}
93+
THINK_NO_END = {
94+
"output": "<think>This is a reasoning section",
95+
"reasoning_content": "This is a reasoning section",
96+
"content": None,
97+
"is_reasoning_end": False,
98+
}
99+
EMPTY = {
100+
"output": "",
101+
"reasoning_content": "",
102+
"content": None,
103+
"is_reasoning_end": False,
104+
}
105+
EMPTY_STREAMING = {
106+
"output": "",
107+
"reasoning_content": None,
108+
"content": None,
109+
"is_reasoning_end": False,
110+
}
111+
NEW_LINE = {
112+
"output": "\n<think>This is a reasoning section</think>\nThis is the rest",
113+
"reasoning_content": "This is a reasoning section",
114+
"content": "\nThis is the rest",
115+
"is_reasoning_end": True,
116+
}
117+
# Streaming cannot handle new lines at the beginning of the output
118+
# because we need to support <think>...</think> and </think>...
119+
# We cannot know if the text before <think> is reasoning content
120+
# or not.
121+
NEW_LINE_STREAMING = {
122+
"output": "\n<think>This is a reasoning section</think>\nThis is the rest",
123+
"reasoning_content": "\nThis is a reasoning section",
124+
"content": "\nThis is the rest",
125+
"is_reasoning_end": True,
126+
}
93127

94128
TEST_CASES = [
95129
pytest.param(
@@ -182,6 +216,36 @@ def deepseek_r1_qwen_tokenizer():
182216
SHORTEST_REASONING_WITH_THINK,
183217
id="shortest_with_think_streaming",
184218
),
219+
pytest.param(
220+
False,
221+
THINK_NO_END,
222+
id="think_no_end",
223+
),
224+
pytest.param(
225+
True,
226+
THINK_NO_END,
227+
id="think_no_end_streaming",
228+
),
229+
pytest.param(
230+
False,
231+
EMPTY,
232+
id="empty",
233+
),
234+
pytest.param(
235+
True,
236+
EMPTY_STREAMING,
237+
id="empty_streaming",
238+
),
239+
pytest.param(
240+
False,
241+
NEW_LINE,
242+
id="new_line",
243+
),
244+
pytest.param(
245+
True,
246+
NEW_LINE_STREAMING,
247+
id="new_line_streaming",
248+
),
185249
]
186250

187251

vllm/reasoning/deepseek_r1_reasoning_parser.py

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# SPDX-License-Identifier: Apache-2.0
22

3-
import re
43
from collections.abc import Sequence
54
from typing import Optional, Union
65

@@ -32,9 +31,6 @@ class DeepSeekR1ReasoningParser(ReasoningParser):
3231
def __init__(self, tokenizer: PreTrainedTokenizerBase):
3332
super().__init__(tokenizer)
3433

35-
self.reasoning_regex = re.compile(
36-
rf"{self.start_token}(.*?){self.end_token}", re.DOTALL)
37-
3834
if not self.model_tokenizer:
3935
raise ValueError(
4036
"The model tokenizer must be passed to the ReasoningParser "
@@ -143,23 +139,34 @@ def extract_reasoning_content_streaming(
143139
def extract_reasoning_content(
144140
self, model_output: str, request: ChatCompletionRequest
145141
) -> tuple[Optional[str], Optional[str]]:
142+
"""
143+
Extract reasoning content from the model output.
144+
145+
For text <think>abc</think>xyz:
146+
- 'abc' goes to reasoning_content
147+
- 'xyz' goes to content
148+
149+
Returns:
150+
tuple[Optional[str], Optional[str]]: reasoning content and content
151+
"""
152+
153+
# Check if the start token is present in the model output, remove it
154+
# if it is present.
155+
model_output_parts = model_output.partition(self.start_token)
156+
model_output = model_output_parts[2] if model_output_parts[
157+
1] else model_output_parts[0]
158+
146159
# DeepSeek R1 doesn't generate <think> now.
147160
# Thus we assume the reasoning content is always at the start.
148161
# Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
149162
if self.end_token not in model_output:
150163
return model_output, None
151164
else:
152-
# Add a start token if it's missing to keep compatibility.
153-
if self.start_token not in model_output:
154-
model_output = f"{self.start_token}{model_output}"
155-
# Use a regex to find the reasoning content
156-
reasoning_content = self.reasoning_regex.findall(model_output)[0]
157-
158-
end_index = len(
159-
f"{self.start_token}{reasoning_content}{self.end_token}")
160-
final_output = model_output[end_index:]
161-
162-
if len(final_output) == 0:
163-
return reasoning_content, None
164-
165-
return reasoning_content, final_output
165+
reasoning_content, _, content = model_output.partition(
166+
self.end_token)
167+
# If the end token is not found, return the model output as is.
168+
# It should not happen since we already checked for the presence
169+
# of the end token.
170+
# If generation stops right after end-of-think, return null content
171+
final_content = content or None
172+
return reasoning_content, final_content

0 commit comments

Comments
 (0)