Skip to content

Commit 3f50523

Browse files
authored
[Doc] Add stream flag for chat completion example (#18524)
Signed-off-by: calvin chen <[email protected]>
1 parent 4e04ece commit 3f50523

File tree

1 file changed

+21
-3
lines changed

1 file changed

+21
-3
lines changed

examples/online_serving/openai_chat_completion_client.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
NOTE: start a supported chat completion model server with `vllm serve`, e.g.
44
vllm serve meta-llama/Llama-2-7b-chat-hf
55
"""
6+
7+
import argparse
8+
69
from openai import OpenAI
710

811
# Modify OpenAI's API key and API base to use vLLM's API server.
@@ -24,7 +27,15 @@
2427
}]
2528

2629

27-
def main():
30+
def parse_args():
31+
parser = argparse.ArgumentParser(description="Client for vLLM API server")
32+
parser.add_argument("--stream",
33+
action="store_true",
34+
help="Enable streaming response")
35+
return parser.parse_args()
36+
37+
38+
def main(args):
2839
client = OpenAI(
2940
# defaults to os.environ.get("OPENAI_API_KEY")
3041
api_key=openai_api_key,
@@ -34,16 +45,23 @@ def main():
3445
models = client.models.list()
3546
model = models.data[0].id
3647

48+
# Chat Completion API
3749
chat_completion = client.chat.completions.create(
3850
messages=messages,
3951
model=model,
52+
stream=args.stream,
4053
)
4154

4255
print("-" * 50)
4356
print("Chat completion results:")
44-
print(chat_completion)
57+
if args.stream:
58+
for c in chat_completion:
59+
print(c)
60+
else:
61+
print(chat_completion)
4562
print("-" * 50)
4663

4764

4865
if __name__ == "__main__":
49-
main()
66+
args = parse_args()
67+
main(args)

0 commit comments

Comments
 (0)