Skip to content

[Misc] Optimize the Qwen3_ReasoningParser extract_reasoning_content #17515

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 1, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 27 additions & 26 deletions vllm/reasoning/qwen3_reasoning_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: Apache-2.0

import re
from collections.abc import Sequence
from typing import Optional, Union

Expand Down Expand Up @@ -31,9 +30,6 @@ def __init__(self, tokenizer: PreTrainedTokenizerBase):
self.think_start_token = "<think>"
self.think_end_token = "</think>"

self.reasoning_regex = re.compile(
rf"{self.think_start_token}(.*?){self.think_end_token}", re.DOTALL)

if not self.model_tokenizer:
raise ValueError(
"The model tokenizer must be passed to the ReasoningParser "
Expand Down Expand Up @@ -121,29 +117,34 @@ def extract_reasoning_content_streaming(
def extract_reasoning_content(
self, model_output: str, request: ChatCompletionRequest
) -> tuple[Optional[str], Optional[str]]:
"""
Extract reasoning content from the model output.

For text <think>abc</think>xyz:
- 'abc' goes to reasoning_content
- 'xyz' goes to content

# Check if the model output contains the <think> tokens.
Returns:
tuple[Optional[str], Optional[str]]: reasoning content and content
"""

# Check if the model output contains the <think> and </think> tokens.
if (self.think_start_token not in model_output
or self.think_end_token not in model_output):
return None, model_output
else:
# Use a regex to find the reasoning content
reasoning_content = self.reasoning_regex.findall(model_output)[0]

# Remove the reasoning content from the model output
# Although <think> token is always at the
# beginning of the line, we cannot guarantee that the
# other models will follow this convention.
# Therefore, we need to add :start_index.
start_index = model_output.find(self.think_start_token)
if start_index != -1:
end_index = start_index + len(
f"{self.think_start_token}{reasoning_content}{self.think_end_token}"
)
model_output = model_output[:start_index] + \
model_output[end_index:]

if len(model_output) == 0:
return reasoning_content, None

return reasoning_content, model_output
# Check if the <think> is present in the model output, remove it
# if it is present.
model_output_parts = model_output.partition(self.think_start_token)
model_output = model_output_parts[2] if model_output_parts[
1] else model_output_parts[0]
# Check if the model output contains the </think> tokens.
# If the end token is not found, return the model output as is.
if self.think_end_token not in model_output:
return None, model_output

# Extract reasoning content from the model output.
reasoning_content, _, content = model_output.partition(
self.think_end_token)

final_content = content or None
return reasoning_content, final_content