Skip to content

Commit 1e358ff

Browse files
authored
add ROCm dockerfile (opendatahub-io#205)
- remove build steps/dependencies - allow for installing pre-built flash-attention/vllm wheels - default ROCM_VERSION to 6.3.4, allowing ovverride with env vars - cleanup rocm docker bake, defaults - amdsmi: use setup.py to build - add amdsmi bind mount - remove flashinfer from rocm target - bump vllm-tgis-adapter to 0.7.0 - Dockerfile*.ubi: bump ubi base
2 parents 8fcb848 + ae418bd commit 1e358ff

File tree

2 files changed

+210
-6
lines changed

2 files changed

+210
-6
lines changed

Dockerfile.rocm.ubi

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
## Global Args ##################################################################
2+
ARG BASE_UBI_IMAGE_TAG=9.5-1742914212
3+
ARG PYTHON_VERSION=3.12
4+
ARG VLLM_TGIS_ADAPTER_VERSION=0.7.0
5+
6+
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS base
7+
8+
ARG PYTHON_VERSION
9+
10+
ENV VIRTUAL_ENV=/opt/vllm
11+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
12+
13+
RUN --mount=type=cache,target=/root/.cache/pip \
14+
microdnf -y update && \
15+
microdnf install -y --setopt=install_weak_deps=0 --nodocs \
16+
python${PYTHON_VERSION}-devel \
17+
python${PYTHON_VERSION}-pip \
18+
python${PYTHON_VERSION}-wheel && \
19+
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && \
20+
pip install -U pip wheel setuptools uv && \
21+
microdnf clean all
22+
23+
24+
FROM base AS rocm_base
25+
ARG ROCM_VERSION=6.3.4
26+
ARG PYTHON_VERSION
27+
ARG BASE_UBI_IMAGE_TAG
28+
29+
RUN printf "[amdgpu]\n\
30+
name=amdgpu\n\
31+
baseurl=https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/${BASE_UBI_IMAGE_TAG/-*/}/main/x86_64/\n\
32+
enabled=1\n\
33+
priority=50\n\
34+
gpgcheck=1\n\
35+
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key\n\
36+
[ROCm-${ROCM_VERSION}]\n\
37+
name=ROCm${ROCM_VERSION}\n\
38+
baseurl=https://repo.radeon.com/rocm/rhel9/${ROCM_VERSION}/main\n\
39+
enabled=1\n\
40+
priority=50\n\
41+
gpgcheck=1\n\
42+
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/amdgpu.repo
43+
44+
45+
RUN --mount=type=cache,target=/root/.cache/uv \
46+
export version="$(awk -F. '{print $1"."$2}' <<< $ROCM_VERSION)" && \
47+
uv pip install --pre \
48+
--index-url "https://download.pytorch.org/whl/nightly/rocm${version}" \
49+
torch==2.7.0.dev20250308+rocm${version}\
50+
torchvision==0.22.0.dev20250308+rocm${version} && \
51+
# Install libdrm-amdgpu to avoid errors when retrieving device information (amdgpu.ids: No such file or directory)
52+
microdnf install -y --nodocs libdrm-amdgpu && \
53+
microdnf clean all
54+
55+
56+
ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/numpy.libs:$LD_LIBRARY_PATH"
57+
ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/pillow.libs:$LD_LIBRARY_PATH"
58+
ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/triton/backends/amd/lib:$LD_LIBRARY_PATH"
59+
ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/torch/lib:$LD_LIBRARY_PATH"
60+
61+
RUN echo $LD_LIBRARY_PATH | tr : \\n >> /etc/ld.so.conf.d/torch-venv.conf && \
62+
ldconfig
63+
64+
FROM rocm_base as build_amdsmi
65+
66+
RUN microdnf -y install \
67+
amd-smi-lib && \
68+
microdnf clean all
69+
70+
WORKDIR /opt/rocm/share/amd_smi
71+
72+
RUN python setup.py bdist_wheel --dist-dir=/dist/
73+
74+
#################### libsodium Build IMAGE ####################
75+
FROM rocm_base as libsodium-builder
76+
77+
RUN microdnf install -y --nodocs gcc gzip tar \
78+
&& microdnf clean all
79+
80+
WORKDIR /usr/src/libsodium
81+
82+
ARG LIBSODIUM_VERSION=1.0.20
83+
RUN curl -LO https://github.com/jedisct1/libsodium/releases/download/${LIBSODIUM_VERSION}-RELEASE/libsodium-${LIBSODIUM_VERSION}.tar.gz \
84+
&& tar -xzvf libsodium*.tar.gz \
85+
&& rm -f libsodium*.tar.gz \
86+
&& mv libsodium*/* ./
87+
88+
RUN CFLAGS="-O3 -Wall -Werror=format-security -Wno-unused-function -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection" \
89+
./configure \
90+
--prefix="/usr/" \
91+
--libdir=/usr/lib64 && \
92+
make -j $(nproc) && \
93+
make check
94+
95+
##################################################################################################
96+
97+
FROM rocm_base AS vllm-openai
98+
ARG FLASH_ATTENTION_WHEEL_STRATEGY
99+
ARG VLLM_WHEEL_STRATEGY
100+
101+
WORKDIR /workspace
102+
103+
ENV VIRTUAL_ENV=/opt/vllm
104+
ENV PATH=$VIRTUAL_ENV/bin:$PATH
105+
106+
# Required for triton
107+
RUN microdnf install -y --setopt=install_weak_deps=0 --nodocs gcc rsync && \
108+
microdnf clean all
109+
110+
# Install libsodium for Tensorizer encryption
111+
RUN --mount=type=bind,from=libsodium-builder,src=/usr/src/libsodium,target=/usr/src/libsodium \
112+
make -C /usr/src/libsodium install
113+
114+
RUN --mount=type=bind,from=build_amdsmi,src=/dist,target=/install/amdsmi/ \
115+
--mount=type=cache,target=/root/.cache/uv \
116+
--mount=type=bind,src=payload,target=/workspace/payload \
117+
./payload/run.sh
118+
119+
ENV HF_HUB_OFFLINE=1 \
120+
HOME=/home/vllm \
121+
# Allow requested max length to exceed what is extracted from the
122+
# config.json
123+
# see: https://github.com/vllm-project/vllm/pull/7080
124+
VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
125+
VLLM_USAGE_SOURCE=production-docker-image \
126+
VLLM_WORKER_MULTIPROC_METHOD=fork \
127+
VLLM_NO_USAGE_STATS=1 \
128+
# Silences the HF Tokenizers warning
129+
TOKENIZERS_PARALLELISM=false \
130+
RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 \
131+
VLLM_USE_TRITON_FLASH_ATTN=0 \
132+
HIP_FORCE_DEV_KERNARG=1 \
133+
OUTLINES_CACHE_DIR=/tmp/outlines \
134+
NUMBA_CACHE_DIR=/tmp/numba \
135+
TRITON_CACHE_DIR=/tmp/triton
136+
137+
# setup non-root user for OpenShift
138+
RUN umask 002 && \
139+
useradd --uid 2000 --gid 0 vllm && \
140+
mkdir -p /licenses /home/vllm && \
141+
chmod g+rwx /home/vllm
142+
143+
COPY LICENSE /licenses/vllm.md
144+
COPY examples/*.jinja /app/data/template/
145+
146+
USER 2000
147+
WORKDIR /home/vllm
148+
149+
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
150+
151+
152+
FROM vllm-openai as vllm-grpc-adapter
153+
ARG VLLM_TGIS_ADAPTER_VERSION
154+
155+
USER root
156+
157+
RUN --mount=type=bind,from=build_amdsmi,src=/dist,target=/install/amdsmi/ \
158+
--mount=type=cache,target=/root/.cache/uv \
159+
--mount=type=bind,src=payload,target=/workspace/payload \
160+
cd /workspace && \
161+
env HOME=/root VLLM_TGIS_ADAPTER_VERSION=${VLLM_TGIS_ADAPTER_VERSION} \
162+
./payload/run.sh
163+
164+
ENV GRPC_PORT=8033 \
165+
PORT=8000 \
166+
# As an optimization, vLLM disables logprobs when using spec decoding by
167+
# default, but this would be unexpected to users of a hosted model that
168+
# happens to have spec decoding
169+
# see: https://github.com/vllm-project/vllm/pull/6485
170+
DISABLE_LOGPROBS_DURING_SPEC_DECODING=false
171+
172+
USER 2000
173+
ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"]

docker-bake.hcl

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,33 @@ variable "REPOSITORY" {
22
default = "quay.io/vllm/vllm"
33
}
44

5-
# GITHUB_* variables are only available in github actions
5+
# GITHUB_* variables are set as env vars in github actions
66
variable "GITHUB_SHA" {}
77
variable "GITHUB_REPOSITORY" {}
88
variable "GITHUB_RUN_ID" {}
99

10-
variable "VLLM_VERSION" {} # set by github actions or manually?
10+
variable "VLLM_VERSION" {}
11+
12+
variable "PYTHON_VERSION" {
13+
default = "3.12"
14+
}
15+
16+
variable "ROCM_VERSION" {
17+
default = "6.3.4"
18+
}
19+
20+
variable "VLLM_TGIS_ADAPTER_VERSION" {
21+
default = "0.7.0"
22+
}
23+
1124

1225
target "docker-metadata-action" {} // populated by gha docker/metadata-action
1326

1427
target "_common" {
1528
context = "."
1629

1730
args = {
18-
BASE_UBI_IMAGE_TAG = "9.5-1736404155"
31+
BASE_UBI_IMAGE_TAG = "9.5-1742914212"
1932
PYTHON_VERSION = "3.12"
2033
}
2134

@@ -34,6 +47,7 @@ target "_common" {
3447
group "default" {
3548
targets = [
3649
"cuda",
50+
"rocm",
3751
]
3852
}
3953

@@ -42,11 +56,10 @@ target "cuda" {
4256
dockerfile = "Dockerfile.ubi"
4357

4458
args = {
45-
BASE_UBI_IMAGE_TAG = "9.5-1739420147"
46-
PYTHON_VERSION = "3.12"
59+
PYTHON_VERSION = "${PYTHON_VERSION}"
4760
# CUDA_VERSION = "12.4" # TODO: the dockerfile cannot consume the cuda version
4861
LIBSODIUM_VERSION = "1.0.20"
49-
VLLM_TGIS_ADAPTER_VERSION = "0.7.0"
62+
VLLM_TGIS_ADAPTER_VERSION = "${VLLM_TGIS_ADAPTER_VERSION}"
5063

5164
FLASHINFER_VERSION = "https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post2/flashinfer_python-0.2.1.post2+cu124torch2.6-cp38-abi3-linux_x86_64.whl"
5265
}
@@ -57,3 +70,21 @@ target "cuda" {
5770
"${REPOSITORY}:${formatdate("YYYY-MM-DD-hh-mm", timestamp())}"
5871
]
5972
}
73+
74+
target "rocm" {
75+
inherits = ["_common"]
76+
dockerfile = "Dockerfile.rocm.ubi"
77+
78+
args = {
79+
PYTHON_VERSION = "${PYTHON_VERSION}"
80+
ROCM_VERSION = "${ROCM_VERSION}"
81+
LIBSODIUM_VERSION = "1.0.20"
82+
VLLM_TGIS_ADAPTER_VERSION = "${VLLM_TGIS_ADAPTER_VERSION}"
83+
}
84+
85+
tags = [
86+
"${REPOSITORY}:${replace(VLLM_VERSION, "+", "_")}", # vllm_version might contain local version specifiers (+) which are not valid tags
87+
"${REPOSITORY}:${GITHUB_SHA}",
88+
"${REPOSITORY}:${formatdate("YYYY-MM-DD-hh-mm", timestamp())}"
89+
]
90+
}

0 commit comments

Comments
 (0)