Skip to content

Commit d46d490

Browse files
authored
[Frontend] Move CLI code into vllm.cmd package (#12971)
1 parent 04f50ad commit d46d490

File tree

9 files changed

+348
-205
lines changed

9 files changed

+348
-205
lines changed

docs/source/design/arch_overview.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ This server can be started using the `vllm serve` command.
6666
vllm serve <model>
6767
```
6868

69-
The code for the `vllm` CLI can be found in <gh-file:vllm/scripts.py>.
69+
The code for the `vllm` CLI can be found in <gh-file:vllm/entrypoints/cli/main.py>.
7070

7171
Sometimes you may see the API server entrypoint used directly instead of via the
7272
`vllm` CLI command. For example:

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -689,7 +689,7 @@ def _read_requirements(filename: str) -> List[str]:
689689
package_data=package_data,
690690
entry_points={
691691
"console_scripts": [
692-
"vllm=vllm.scripts:main",
692+
"vllm=vllm.entrypoints.cli.main:main",
693693
],
694694
},
695695
)

vllm/entrypoints/cli/__init__.py

Whitespace-only changes.

vllm/entrypoints/cli/main.py

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
3+
# The CLI entrypoint to vLLM.
4+
import os
5+
import signal
6+
import sys
7+
8+
import vllm.entrypoints.cli.openai
9+
import vllm.entrypoints.cli.serve
10+
import vllm.version
11+
from vllm.logger import init_logger
12+
from vllm.utils import FlexibleArgumentParser
13+
14+
logger = init_logger(__name__)
15+
16+
CMD_MODULES = [
17+
vllm.entrypoints.cli.openai,
18+
vllm.entrypoints.cli.serve,
19+
]
20+
21+
22+
def register_signal_handlers():
23+
24+
def signal_handler(sig, frame):
25+
sys.exit(0)
26+
27+
signal.signal(signal.SIGINT, signal_handler)
28+
signal.signal(signal.SIGTSTP, signal_handler)
29+
30+
31+
def env_setup():
32+
# The safest multiprocessing method is `spawn`, as the default `fork` method
33+
# is not compatible with some accelerators. The default method will be
34+
# changing in future versions of Python, so we should use it explicitly when
35+
# possible.
36+
#
37+
# We only set it here in the CLI entrypoint, because changing to `spawn`
38+
# could break some existing code using vLLM as a library. `spawn` will cause
39+
# unexpected behavior if the code is not protected by
40+
# `if __name__ == "__main__":`.
41+
#
42+
# References:
43+
# - https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
44+
# - https://pytorch.org/docs/stable/notes/multiprocessing.html#cuda-in-multiprocessing
45+
# - https://pytorch.org/docs/stable/multiprocessing.html#sharing-cuda-tensors
46+
# - https://docs.habana.ai/en/latest/PyTorch/Getting_Started_with_PyTorch_and_Gaudi/Getting_Started_with_PyTorch.html?highlight=multiprocessing#torch-multiprocessing-for-dataloaders
47+
if "VLLM_WORKER_MULTIPROC_METHOD" not in os.environ:
48+
logger.debug("Setting VLLM_WORKER_MULTIPROC_METHOD to 'spawn'")
49+
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
50+
51+
52+
def main():
53+
env_setup()
54+
55+
parser = FlexibleArgumentParser(description="vLLM CLI")
56+
parser.add_argument('-v',
57+
'--version',
58+
action='version',
59+
version=vllm.version.__version__)
60+
subparsers = parser.add_subparsers(required=False, dest="subparser")
61+
cmds = {}
62+
for cmd_module in CMD_MODULES:
63+
new_cmds = cmd_module.cmd_init()
64+
for cmd in new_cmds:
65+
cmd.subparser_init(subparsers).set_defaults(
66+
dispatch_function=cmd.cmd)
67+
cmds[cmd.name] = cmd
68+
args = parser.parse_args()
69+
if args.subparser in cmds:
70+
cmds[args.subparser].validate(args)
71+
72+
if hasattr(args, "dispatch_function"):
73+
args.dispatch_function(args)
74+
else:
75+
parser.print_help()
76+
77+
78+
if __name__ == "__main__":
79+
main()

vllm/entrypoints/cli/openai.py

+172
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# Commands that act as an interactive OpenAI API client
3+
4+
import argparse
5+
import os
6+
import signal
7+
import sys
8+
from typing import List, Optional, Tuple
9+
10+
from openai import OpenAI
11+
from openai.types.chat import ChatCompletionMessageParam
12+
13+
from vllm.entrypoints.cli.types import CLISubcommand
14+
from vllm.utils import FlexibleArgumentParser
15+
16+
17+
def _register_signal_handlers():
18+
19+
def signal_handler(sig, frame):
20+
sys.exit(0)
21+
22+
signal.signal(signal.SIGINT, signal_handler)
23+
signal.signal(signal.SIGTSTP, signal_handler)
24+
25+
26+
def _interactive_cli(args: argparse.Namespace) -> Tuple[str, OpenAI]:
27+
_register_signal_handlers()
28+
29+
base_url = args.url
30+
api_key = args.api_key or os.environ.get("OPENAI_API_KEY", "EMPTY")
31+
openai_client = OpenAI(api_key=api_key, base_url=base_url)
32+
33+
if args.model_name:
34+
model_name = args.model_name
35+
else:
36+
available_models = openai_client.models.list()
37+
model_name = available_models.data[0].id
38+
39+
print(f"Using model: {model_name}")
40+
41+
return model_name, openai_client
42+
43+
44+
def chat(system_prompt: Optional[str], model_name: str,
45+
client: OpenAI) -> None:
46+
conversation: List[ChatCompletionMessageParam] = []
47+
if system_prompt is not None:
48+
conversation.append({"role": "system", "content": system_prompt})
49+
50+
print("Please enter a message for the chat model:")
51+
while True:
52+
try:
53+
input_message = input("> ")
54+
except EOFError:
55+
return
56+
conversation.append({"role": "user", "content": input_message})
57+
58+
chat_completion = client.chat.completions.create(model=model_name,
59+
messages=conversation)
60+
61+
response_message = chat_completion.choices[0].message
62+
output = response_message.content
63+
64+
conversation.append(response_message) # type: ignore
65+
print(output)
66+
67+
68+
def _add_query_options(
69+
parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
70+
parser.add_argument(
71+
"--url",
72+
type=str,
73+
default="http://localhost:8000/v1",
74+
help="url of the running OpenAI-Compatible RESTful API server")
75+
parser.add_argument(
76+
"--model-name",
77+
type=str,
78+
default=None,
79+
help=("The model name used in prompt completion, default to "
80+
"the first model in list models API call."))
81+
parser.add_argument(
82+
"--api-key",
83+
type=str,
84+
default=None,
85+
help=(
86+
"API key for OpenAI services. If provided, this api key "
87+
"will overwrite the api key obtained through environment variables."
88+
))
89+
return parser
90+
91+
92+
class ChatCommand(CLISubcommand):
93+
"""The `chat` subcommand for the vLLM CLI. """
94+
95+
def __init__(self):
96+
self.name = "chat"
97+
super().__init__()
98+
99+
@staticmethod
100+
def cmd(args: argparse.Namespace) -> None:
101+
model_name, client = _interactive_cli(args)
102+
system_prompt = args.system_prompt
103+
conversation: List[ChatCompletionMessageParam] = []
104+
if system_prompt is not None:
105+
conversation.append({"role": "system", "content": system_prompt})
106+
107+
print("Please enter a message for the chat model:")
108+
while True:
109+
try:
110+
input_message = input("> ")
111+
except EOFError:
112+
return
113+
conversation.append({"role": "user", "content": input_message})
114+
115+
chat_completion = client.chat.completions.create(
116+
model=model_name, messages=conversation)
117+
118+
response_message = chat_completion.choices[0].message
119+
output = response_message.content
120+
121+
conversation.append(response_message) # type: ignore
122+
print(output)
123+
124+
def subparser_init(
125+
self,
126+
subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
127+
chat_parser = subparsers.add_parser(
128+
"chat",
129+
help="Generate chat completions via the running API server",
130+
usage="vllm chat [options]")
131+
_add_query_options(chat_parser)
132+
chat_parser.add_argument(
133+
"--system-prompt",
134+
type=str,
135+
default=None,
136+
help=("The system prompt to be added to the chat template, "
137+
"used for models that support system prompts."))
138+
return chat_parser
139+
140+
141+
class CompleteCommand(CLISubcommand):
142+
"""The `complete` subcommand for the vLLM CLI. """
143+
144+
def __init__(self):
145+
self.name = "complete"
146+
super().__init__()
147+
148+
@staticmethod
149+
def cmd(args: argparse.Namespace) -> None:
150+
model_name, client = _interactive_cli(args)
151+
print("Please enter prompt to complete:")
152+
while True:
153+
input_prompt = input("> ")
154+
completion = client.completions.create(model=model_name,
155+
prompt=input_prompt)
156+
output = completion.choices[0].text
157+
print(output)
158+
159+
def subparser_init(
160+
self,
161+
subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
162+
complete_parser = subparsers.add_parser(
163+
"complete",
164+
help=("Generate text completions based on the given prompt "
165+
"via the running API server"),
166+
usage="vllm complete [options]")
167+
_add_query_options(complete_parser)
168+
return complete_parser
169+
170+
171+
def cmd_init() -> List[CLISubcommand]:
172+
return [ChatCommand(), CompleteCommand()]

vllm/entrypoints/cli/serve.py

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
3+
import argparse
4+
from typing import List
5+
6+
import uvloop
7+
8+
from vllm.engine.arg_utils import EngineArgs
9+
from vllm.entrypoints.cli.types import CLISubcommand
10+
from vllm.entrypoints.openai.api_server import run_server
11+
from vllm.entrypoints.openai.cli_args import (make_arg_parser,
12+
validate_parsed_serve_args)
13+
from vllm.utils import FlexibleArgumentParser
14+
15+
16+
class ServeSubcommand(CLISubcommand):
17+
"""The `serve` subcommand for the vLLM CLI. """
18+
19+
def __init__(self):
20+
self.name = "serve"
21+
super().__init__()
22+
23+
@staticmethod
24+
def cmd(args: argparse.Namespace) -> None:
25+
# The default value of `--model`
26+
if args.model != EngineArgs.model:
27+
raise ValueError(
28+
"With `vllm serve`, you should provide the model as a "
29+
"positional argument instead of via the `--model` option.")
30+
31+
# EngineArgs expects the model name to be passed as --model.
32+
args.model = args.model_tag
33+
34+
uvloop.run(run_server(args))
35+
36+
def validate(self, args: argparse.Namespace) -> None:
37+
validate_parsed_serve_args(args)
38+
39+
def subparser_init(
40+
self,
41+
subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
42+
serve_parser = subparsers.add_parser(
43+
"serve",
44+
help="Start the vLLM OpenAI Compatible API server",
45+
usage="vllm serve <model_tag> [options]")
46+
serve_parser.add_argument("model_tag",
47+
type=str,
48+
help="The model tag to serve")
49+
serve_parser.add_argument(
50+
"--config",
51+
type=str,
52+
default='',
53+
required=False,
54+
help="Read CLI options from a config file."
55+
"Must be a YAML with the following options:"
56+
"https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#cli-reference"
57+
)
58+
59+
return make_arg_parser(serve_parser)
60+
61+
62+
def cmd_init() -> List[CLISubcommand]:
63+
return [ServeSubcommand()]

vllm/entrypoints/cli/types.py

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
3+
import argparse
4+
5+
from vllm.utils import FlexibleArgumentParser
6+
7+
8+
class CLISubcommand:
9+
"""Base class for CLI argument handlers."""
10+
11+
name: str
12+
13+
@staticmethod
14+
def cmd(args: argparse.Namespace) -> None:
15+
raise NotImplementedError("Subclasses should implement this method")
16+
17+
def validate(self, args: argparse.Namespace) -> None:
18+
# No validation by default
19+
pass
20+
21+
def subparser_init(
22+
self,
23+
subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
24+
raise NotImplementedError("Subclasses should implement this method")

vllm/entrypoints/openai/api_server.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -901,7 +901,8 @@ def signal_handler(*_) -> None:
901901

902902
if __name__ == "__main__":
903903
# NOTE(simon):
904-
# This section should be in sync with vllm/scripts.py for CLI entrypoints.
904+
# This section should be in sync with vllm/entrypoints/cli/main.py for CLI
905+
# entrypoints.
905906
parser = FlexibleArgumentParser(
906907
description="vLLM OpenAI-Compatible RESTful API server.")
907908
parser = make_arg_parser(parser)

0 commit comments

Comments
 (0)