@@ -67,6 +67,8 @@ def extract_reasoning_content_streaming(
67
67
]):
68
68
return None
69
69
70
+ # Check if <think> is present in previous or delta.
71
+ # Keep compatibility with models that don't generate <think> tokens.
70
72
if self .think_start_token_id in previous_token_ids :
71
73
if self .think_end_token_id in delta_token_ids :
72
74
# <think> in previous, </think> in delta,
@@ -85,7 +87,6 @@ def extract_reasoning_content_streaming(
85
87
# reasoning content continues
86
88
return DeltaMessage (reasoning_content = delta_text )
87
89
elif self .think_start_token_id in delta_token_ids :
88
- logger .info (delta_text )
89
90
if self .think_end_token_id in delta_token_ids :
90
91
# <think> in delta, </think> in delta, extract reasoning content
91
92
start_index = delta_text .find (self .think_start_token )
@@ -101,35 +102,46 @@ def extract_reasoning_content_streaming(
101
102
# reasoning content continues
102
103
return DeltaMessage (reasoning_content = delta_text )
103
104
else :
104
- # No <think> in previous or delta, reasoning content continues.
105
- return DeltaMessage (content = delta_text )
105
+ # No <think> in previous or delta, also need to check for </think>.
106
+ # Because the model may have generated </think> without <think>
107
+ # Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
108
+ if self .think_end_token_id in delta_token_ids :
109
+ # </think> in delta with more tokens,
110
+ # extract reasoning content and content
111
+ end_index = delta_text .find (self .think_end_token )
112
+ reasoning_content = delta_text [:end_index ]
113
+ content = delta_text [end_index + len (self .think_end_token ):]
114
+ return DeltaMessage (reasoning_content = reasoning_content ,
115
+ content = content if content else None )
116
+ elif self .think_end_token_id in previous_token_ids :
117
+ # </think> in previous, thinking content ends
118
+ return DeltaMessage (content = delta_text )
119
+ else :
120
+ # no </think> in previous or delta, reasoning content continues
121
+ return DeltaMessage (reasoning_content = delta_text )
106
122
107
123
def extract_reasoning_content (
108
124
self , model_output : str , request : ChatCompletionRequest
109
125
) -> Tuple [Optional [str ], Optional [str ]]:
110
126
111
- # Check if the model output contains the <think> tokens.
112
- if (self .think_start_token not in model_output
113
- or self .think_end_token not in model_output ):
127
+ # DeepSeek R1 doesn't generate <think> now.
128
+ # Thus we assume the reasoning content is always at the start.
129
+ # Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
130
+ if self .think_end_token not in model_output :
114
131
return None , model_output
115
132
else :
133
+ # Add a start token if it's missing to keep compatibility.
134
+ if self .think_start_token not in model_output :
135
+ model_output = f"{ self .think_start_token } { model_output } "
116
136
# Use a regex to find the reasoning content
117
137
reasoning_content = self .reasoning_regex .findall (model_output )[0 ]
118
138
119
- # Remove the reasoning content from the model output
120
- # Although deepseek's <think> token is always at the
121
- # beginning of the line, we cannot guarantee that the
122
- # other models will follow this convention.
123
- # Therefore, we need to add :start_index.
124
- start_index = model_output .find (self .think_start_token )
125
- if start_index != - 1 :
126
- end_index = start_index + len (
127
- f"{ self .think_start_token } { reasoning_content } { self .think_end_token } "
128
- )
129
- model_output = model_output [:start_index ] + \
130
- model_output [end_index :]
131
-
132
- if len (model_output ) == 0 :
133
- return reasoning_content , None
134
-
135
- return reasoning_content , model_output
139
+ end_index = len (
140
+ f"{ self .think_start_token } { reasoning_content } { self .think_end_token } "
141
+ )
142
+ final_output = model_output [end_index :]
143
+
144
+ if len (final_output ) == 0 :
145
+ return reasoning_content , None
146
+
147
+ return reasoning_content , final_output
0 commit comments