Skip to content

Commit 1ce7b8b

Browse files
committed
add steam flag for chat completion example
Signed-off-by: calvin chen <[email protected]>
1 parent 5179777 commit 1ce7b8b

File tree

1 file changed

+19
-3
lines changed

1 file changed

+19
-3
lines changed

examples/online_serving/openai_chat_completion_client.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
NOTE: start a supported chat completion model server with `vllm serve`, e.g.
44
vllm serve meta-llama/Llama-2-7b-chat-hf
55
"""
6+
7+
import argparse
8+
69
from openai import OpenAI
710

811
# Modify OpenAI's API key and API base to use vLLM's API server.
@@ -23,8 +26,14 @@
2326
"content": "Where was it played?"
2427
}]
2528

29+
def parse_args():
30+
parser = argparse.ArgumentParser(description="Client for vLLM API server")
31+
parser.add_argument("--stream",
32+
action="store_true",
33+
help="Enable streaming response")
34+
return parser.parse_args()
2635

27-
def main():
36+
def main(args):
2837
client = OpenAI(
2938
# defaults to os.environ.get("OPENAI_API_KEY")
3039
api_key=openai_api_key,
@@ -34,16 +43,23 @@ def main():
3443
models = client.models.list()
3544
model = models.data[0].id
3645

46+
# Chat Completion API
3747
chat_completion = client.chat.completions.create(
3848
messages=messages,
3949
model=model,
50+
stream=args.stream,
4051
)
4152

4253
print("-" * 50)
4354
print("Chat completion results:")
44-
print(chat_completion)
55+
if args.stream:
56+
for c in chat_completion:
57+
print(c)
58+
else:
59+
print(chat_completion)
4560
print("-" * 50)
4661

4762

4863
if __name__ == "__main__":
49-
main()
64+
args = parse_args()
65+
main(args)

0 commit comments

Comments
 (0)