Skip to content

Commit f0b2f56

Browse files
liuyanyishreyankg
authored andcommitted
[Doc] Update reasoning with stream example to use OpenAI library (vllm-project#14077)
Signed-off-by: liuyanyi <[email protected]>
1 parent 929da81 commit f0b2f56

File tree

2 files changed

+82
-57
lines changed

2 files changed

+82
-57
lines changed

docs/source/features/reasoning_outputs.md

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,55 @@ Streaming chat completions are also supported for reasoning models. The `reasoni
7878
}
7979
```
8080

81-
Please note that it is not compatible with the OpenAI Python client library. You can use the `requests` library to make streaming requests. You could checkout the [example](https://github.com/vllm-project/vllm/blob/main/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py).
81+
OpenAI Python client library does not officially support `reasoning_content` attribute for streaming output. But the client support extra attributes in the response. You can use `hasattr` to check if the `reasoning_content` attribute is present in the response. For example:
82+
83+
```python
84+
from openai import OpenAI
85+
86+
# Modify OpenAI's API key and API base to use vLLM's API server.
87+
openai_api_key = "EMPTY"
88+
openai_api_base = "http://localhost:8000/v1"
89+
90+
client = OpenAI(
91+
api_key=openai_api_key,
92+
base_url=openai_api_base,
93+
)
94+
95+
models = client.models.list()
96+
model = models.data[0].id
97+
98+
messages = [{"role": "user", "content": "9.11 and 9.8, which is greater?"}]
99+
stream = client.chat.completions.create(model=model,
100+
messages=messages,
101+
stream=True)
102+
103+
print("client: Start streaming chat completions...")
104+
printed_reasoning_content = False
105+
printed_content = False
106+
107+
for chunk in stream:
108+
reasoning_content = None
109+
content = None
110+
# Check the content is reasoning_content or content
111+
if hasattr(chunk.choices[0].delta, "reasoning_content"):
112+
reasoning_content = chunk.choices[0].delta.reasoning_content
113+
elif hasattr(chunk.choices[0].delta, "content"):
114+
content = chunk.choices[0].delta.content
115+
116+
if reasoning_content is not None:
117+
if not printed_reasoning_content:
118+
printed_reasoning_content = True
119+
print("reasoning_content:", end="", flush=True)
120+
print(reasoning_content, end="", flush=True)
121+
elif content is not None:
122+
if not printed_content:
123+
printed_content = True
124+
print("\ncontent:", end="", flush=True)
125+
# Extract and print the content
126+
print(content, end="", flush=True)
127+
```
128+
129+
Remember to check whether the `reasoning_content` exists in the response before accessing it. You could checkout the [example](https://github.com/vllm-project/vllm/blob/main/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py).
82130

83131
## Structured output
84132

examples/online_serving/openai_chat_completion_with_reasoning_streaming.py

Lines changed: 33 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -19,73 +19,50 @@
1919
where you want to display chat completions to the user as they are generated
2020
by the model.
2121
22-
Here we do not use the OpenAI Python client library, because it does not support
23-
`reasoning_content` fields in the response.
22+
Remember to check content and reasoning_content exist in `ChatCompletionChunk`,
23+
content may not exist leading to errors if you try to access it.
2424
"""
2525

26-
import json
27-
28-
import requests
26+
from openai import OpenAI
2927

3028
# Modify OpenAI's API key and API base to use vLLM's API server.
3129
openai_api_key = "EMPTY"
3230
openai_api_base = "http://localhost:8000/v1"
3331

34-
models = requests.get(
35-
f"{openai_api_base}/models",
36-
headers={
37-
"Authorization": f"Bearer {openai_api_key}"
38-
},
39-
).json()
40-
model = models["data"][0]["id"]
32+
client = OpenAI(
33+
api_key=openai_api_key,
34+
base_url=openai_api_base,
35+
)
4136

42-
# Streaming chat completions
43-
messages = [{"role": "user", "content": "9.11 and 9.8, which is greater?"}]
37+
models = client.models.list()
38+
model = models.data[0].id
4439

45-
response = requests.post(
46-
f"{openai_api_base}/chat/completions",
47-
headers={"Authorization": f"Bearer {openai_api_key}"},
48-
json={
49-
"model": model,
50-
"messages": messages,
51-
"stream": True
52-
},
53-
)
40+
messages = [{"role": "user", "content": "9.11 and 9.8, which is greater?"}]
41+
stream = client.chat.completions.create(model=model,
42+
messages=messages,
43+
stream=True)
5444

5545
print("client: Start streaming chat completions...")
5646
printed_reasoning_content = False
5747
printed_content = False
58-
# Make the streaming request
59-
if response.status_code == 200:
60-
# Process the streaming response
61-
for line in response.iter_lines():
62-
if line: # Filter out keep-alive new lines
63-
# Decode the line and parse the JSON
64-
decoded_line = line.decode("utf-8")
65-
if decoded_line.startswith("data:"):
66-
data = decoded_line[5:].strip() # Remove "data:" prefix
67-
if data == "[DONE]": # End of stream
68-
print("\nclient: Stream completed.")
69-
break
70-
try:
71-
# Parse the JSON data
72-
chunk = json.loads(data)
73-
reasoning_content = chunk["choices"][0]["delta"].get(
74-
"reasoning_content", "")
75-
content = chunk["choices"][0]["delta"].get("content", "")
7648

77-
if reasoning_content:
78-
if not printed_reasoning_content:
79-
printed_reasoning_content = True
80-
print("reasoning_content:", end="", flush=True)
81-
print(reasoning_content, end="", flush=True)
82-
elif content:
83-
if not printed_content:
84-
printed_content = True
85-
print("\ncontent:", end="", flush=True)
86-
# Extract and print the content
87-
print(content, end="", flush=True)
88-
except json.JSONDecodeError:
89-
print("Error decoding JSON:", decoded_line)
90-
else:
91-
print(f"Error: {response.status_code} - {response.text}")
49+
for chunk in stream:
50+
reasoning_content = None
51+
content = None
52+
# Check the content is reasoning_content or content
53+
if hasattr(chunk.choices[0].delta, "reasoning_content"):
54+
reasoning_content = chunk.choices[0].delta.reasoning_content
55+
elif hasattr(chunk.choices[0].delta, "content"):
56+
content = chunk.choices[0].delta.content
57+
58+
if reasoning_content is not None:
59+
if not printed_reasoning_content:
60+
printed_reasoning_content = True
61+
print("reasoning_content:", end="", flush=True)
62+
print(reasoning_content, end="", flush=True)
63+
elif content is not None:
64+
if not printed_content:
65+
printed_content = True
66+
print("\ncontent:", end="", flush=True)
67+
# Extract and print the content
68+
print(content, end="", flush=True)

0 commit comments

Comments
 (0)