File tree Expand file tree Collapse file tree 1 file changed +19
-3
lines changed Expand file tree Collapse file tree 1 file changed +19
-3
lines changed Original file line number Diff line number Diff line change 3
3
NOTE: start a supported chat completion model server with `vllm serve`, e.g.
4
4
vllm serve meta-llama/Llama-2-7b-chat-hf
5
5
"""
6
+
7
+ import argparse
8
+
6
9
from openai import OpenAI
7
10
8
11
# Modify OpenAI's API key and API base to use vLLM's API server.
23
26
"content" : "Where was it played?"
24
27
}]
25
28
29
+ def parse_args ():
30
+ parser = argparse .ArgumentParser (description = "Client for vLLM API server" )
31
+ parser .add_argument ("--stream" ,
32
+ action = "store_true" ,
33
+ help = "Enable streaming response" )
34
+ return parser .parse_args ()
26
35
27
- def main ():
36
+ def main (args ):
28
37
client = OpenAI (
29
38
# defaults to os.environ.get("OPENAI_API_KEY")
30
39
api_key = openai_api_key ,
@@ -34,16 +43,23 @@ def main():
34
43
models = client .models .list ()
35
44
model = models .data [0 ].id
36
45
46
+ # Chat Completion API
37
47
chat_completion = client .chat .completions .create (
38
48
messages = messages ,
39
49
model = model ,
50
+ stream = args .stream ,
40
51
)
41
52
42
53
print ("-" * 50 )
43
54
print ("Chat completion results:" )
44
- print (chat_completion )
55
+ if args .stream :
56
+ for c in chat_completion :
57
+ print (c )
58
+ else :
59
+ print (chat_completion )
45
60
print ("-" * 50 )
46
61
47
62
48
63
if __name__ == "__main__" :
49
- main ()
64
+ args = parse_args ()
65
+ main (args )
You can’t perform that action at this time.
0 commit comments