File tree 1 file changed +21
-3
lines changed 1 file changed +21
-3
lines changed Original file line number Diff line number Diff line change 3
3
NOTE: start a supported chat completion model server with `vllm serve`, e.g.
4
4
vllm serve meta-llama/Llama-2-7b-chat-hf
5
5
"""
6
+
7
+ import argparse
8
+
6
9
from openai import OpenAI
7
10
8
11
# Modify OpenAI's API key and API base to use vLLM's API server.
24
27
}]
25
28
26
29
27
- def main ():
30
+ def parse_args ():
31
+ parser = argparse .ArgumentParser (description = "Client for vLLM API server" )
32
+ parser .add_argument ("--stream" ,
33
+ action = "store_true" ,
34
+ help = "Enable streaming response" )
35
+ return parser .parse_args ()
36
+
37
+
38
+ def main (args ):
28
39
client = OpenAI (
29
40
# defaults to os.environ.get("OPENAI_API_KEY")
30
41
api_key = openai_api_key ,
@@ -34,16 +45,23 @@ def main():
34
45
models = client .models .list ()
35
46
model = models .data [0 ].id
36
47
48
+ # Chat Completion API
37
49
chat_completion = client .chat .completions .create (
38
50
messages = messages ,
39
51
model = model ,
52
+ stream = args .stream ,
40
53
)
41
54
42
55
print ("-" * 50 )
43
56
print ("Chat completion results:" )
44
- print (chat_completion )
57
+ if args .stream :
58
+ for c in chat_completion :
59
+ print (c )
60
+ else :
61
+ print (chat_completion )
45
62
print ("-" * 50 )
46
63
47
64
48
65
if __name__ == "__main__" :
49
- main ()
66
+ args = parse_args ()
67
+ main (args )
You can’t perform that action at this time.
0 commit comments