Skip to content

Commit 51bd1a9

Browse files
ruisearch42lk-chen
authored andcommitted
[Misc] Add example to run DeepSeek with Ray Serve LLM (vllm-project#17134)
Signed-off-by: Rui Qiao <[email protected]>
1 parent 596e20e commit 51bd1a9

File tree

1 file changed

+44
-0
lines changed

1 file changed

+44
-0
lines changed
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
"""
3+
Example to deploy DeepSeek R1 or V3 with Ray Serve LLM.
4+
See Ray Serve LLM documentation at:
5+
https://docs.ray.io/en/latest/serve/llm/serving-llms.html
6+
7+
Run `python3 ray_serve_deepseek.py` to deploy the model.
8+
"""
9+
10+
from ray import serve
11+
from ray.serve.llm import LLMConfig, LLMRouter, LLMServer
12+
13+
llm_config = LLMConfig(
14+
model_loading_config=dict(
15+
model_id="deepseek",
16+
# Change to model download path
17+
model_source="/path/to/the/model",
18+
),
19+
deployment_config=dict(autoscaling_config=dict(
20+
min_replicas=1,
21+
max_replicas=1,
22+
)),
23+
# Change to the accelerator type of the node
24+
accelerator_type="H100",
25+
runtime_env=dict(env_vars=dict(VLLM_USE_V1="1")),
26+
# Customize engine arguments as needed (e.g. vLLM engine kwargs)
27+
engine_kwargs=dict(
28+
tensor_parallel_size=8,
29+
pipeline_parallel_size=2,
30+
gpu_memory_utilization=0.92,
31+
dtype="auto",
32+
max_num_seqs=40,
33+
max_model_len=16384,
34+
enable_chunked_prefill=True,
35+
enable_prefix_caching=True,
36+
trust_remote_code=True,
37+
),
38+
)
39+
40+
# Deploy the application
41+
deployment = LLMServer.as_deployment(
42+
llm_config.get_serve_options(name_prefix="vLLM:")).bind(llm_config)
43+
llm_app = LLMRouter.as_deployment().bind([deployment])
44+
serve.run(llm_app)

0 commit comments

Comments
 (0)