1
1
## Global Args ##################################################################
2
- ARG BASE_UBI_IMAGE_TAG=9.5-1741850109
2
+ ARG BASE_UBI_IMAGE_TAG=9.5-1742914212
3
3
ARG PYTHON_VERSION=3.12
4
- # Default ROCm ARCHes to build vLLM for.
5
- ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100"
6
- ARG MAX_JOBS=12
4
+ ARG VLLM_TGIS_ADAPTER_VERSION=0.7.0
7
5
8
6
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS base
9
7
@@ -44,8 +42,7 @@ gpgcheck=1\n\
44
42
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/amdgpu.repo
45
43
46
44
47
- RUN --mount=type=cache,target=/root/.cache/pip \
48
- --mount=type=cache,target=/root/.cache/uv \
45
+ RUN --mount=type=cache,target=/root/.cache/uv \
49
46
export version="$(awk -F. '{print $1"."$2}' <<< $ROCM_VERSION)" && \
50
47
uv pip install --pre \
51
48
--index-url "https://download.pytorch.org/whl/nightly/rocm${version}" \
@@ -64,101 +61,15 @@ ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/torc
64
61
RUN echo $LD_LIBRARY_PATH | tr : \\n >> /etc/ld.so.conf.d/torch-venv.conf && \
65
62
ldconfig
66
63
67
- FROM rocm_base as rocm_devel
68
-
69
- ENV CCACHE_DIR=/root/.cache/ccache
70
-
71
- RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
72
- rpm -ql epel-release && \
73
- microdnf -y update && \
74
- microdnf --nodocs -y install \
75
- ccache \
76
- git \
77
- # packages required to build vllm
78
- amd-smi-lib \
79
- hipblas-devel \
80
- hipblaslt-devel \
81
- hipcc \
82
- hipcub-devel \
83
- hipfft-devel \
84
- hiprand-devel \
85
- hipsolver-devel \
86
- hipsparse-devel \
87
- hsa-rocr-devel \
88
- miopen-hip-devel \
89
- rccl-devel \
90
- rocblas-devel \
91
- rocm-device-libs \
92
- rocprim-devel \
93
- rocrand-devel \
94
- rocthrust-devel \
95
- # end packages required to build vllm
96
- wget \
97
- which && \
98
- microdnf clean all
99
-
100
- WORKDIR /workspace
101
-
102
- ENV LLVM_SYMBOLIZER_PATH=/opt/rocm/llvm/bin/llvm-symbolizer
103
- ENV PATH=$PATH:/opt/rocm/bin
104
- ENV CPLUS_INCLUDE_PATH=$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/torch/include:/opt/rocm/include
105
-
106
-
107
- FROM rocm_devel AS build_amdsmi
108
-
109
- # Build AMD SMI wheel
110
- RUN cd /opt/rocm/share/amd_smi && \
111
- python3 -m pip wheel . --wheel-dir=/install
112
-
113
- ##################################################################################################
114
-
115
- FROM rocm_devel AS build_flashattention
64
+ FROM rocm_base as build_amdsmi
116
65
117
- ARG FA_GFX_ARCHS="gfx90a;gfx942"
118
-
119
- # the FA_BRANCH commit belongs to the ROCm/flash-attention fork, `main_perf` branch
120
- ARG FA_BRANCH="3cea2fb"
121
- ARG MAX_JOBS
122
- ENV MAX_JOBS=${MAX_JOBS}
123
-
124
- RUN --mount=type=cache,target=/root/.cache/uv \
125
- --mount=type=cache,target=/workspace/build \
126
- mkdir -p /libs && \
127
- cd /libs && \
128
- git clone https://github.com/ROCm/flash-attention.git && \
129
- cd flash-attention && \
130
- git checkout ${FA_BRANCH} && \
131
- git submodule update --init && \
132
- uv pip install cmake ninja packaging && \
133
- env \
134
- GPU_ARCHS="${FA_GFX_ARCHS}" \
135
- python3 setup.py bdist_wheel --dist-dir=/install
136
-
137
- ##################################################################################################
138
-
139
- FROM rocm_devel AS build_vllm
140
- ARG PYTORCH_ROCM_ARCH
141
- ARG PYTHON_VERSION
142
- ARG MAX_JOBS
143
- ENV MAX_JOBS=${MAX_JOBS}
144
- ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}
145
-
146
- COPY . .
66
+ RUN microdnf -y install \
67
+ amd-smi-lib && \
68
+ microdnf clean all
147
69
148
- ENV VLLM_TARGET_DEVICE="rocm"
149
- ENV MAX_JOBS=${MAX_JOBS}
150
- # Make sure punica kernels are built (for LoRA)
151
- ENV VLLM_INSTALL_PUNICA_KERNELS=1
70
+ WORKDIR /opt/rocm/share/amd_smi
152
71
153
- RUN --mount=type=cache,target=/root/.cache/ccache \
154
- --mount=type=cache,target=/root/.cache/pip \
155
- --mount=type=cache,target=/root/.cache/uv \
156
- uv pip install -v -U \
157
- ninja setuptools-scm>=8 "cmake>=3.26" packaging && \
158
- env CFLAGS="-march=haswell" \
159
- CXXFLAGS="$CFLAGS $CXXFLAGS" \
160
- CMAKE_BUILD_TYPE=Release \
161
- python3 setup.py bdist_wheel --dist-dir=dist
72
+ RUN python setup.py bdist_wheel --dist-dir=/dist/
162
73
163
74
#################### libsodium Build IMAGE ####################
164
75
FROM rocm_base as libsodium-builder
@@ -184,7 +95,8 @@ RUN CFLAGS="-O3 -Wall -Werror=format-security -Wno-unused-function -Wp,-D_GLIBCX
184
95
##################################################################################################
185
96
186
97
FROM rocm_base AS vllm-openai
187
- ARG MAX_JOBS
98
+ ARG FLASH_ATTENTION_WHEEL_STRATEGY
99
+ ARG VLLM_WHEEL_STRATEGY
188
100
189
101
WORKDIR /workspace
190
102
@@ -197,21 +109,12 @@ RUN microdnf install -y --setopt=install_weak_deps=0 --nodocs gcc rsync && \
197
109
198
110
# Install libsodium for Tensorizer encryption
199
111
RUN --mount=type=bind,from=libsodium-builder,src=/usr/src/libsodium,target=/usr/src/libsodium \
200
- cd /usr/src/libsodium \
201
- && make install
112
+ make -C /usr/src/libsodium install
202
113
203
- RUN --mount=type=bind,from=build_amdsmi,src=/install,target=/install/amdsmi/ \
204
- --mount=type=bind,from=build_flashattention,src=/install,target=/install/flashattention \
205
- --mount=type=bind,from=build_vllm,src=/workspace/dist,target=/install/vllm/ \
206
- --mount=type=cache,target=/root/.cache/pip \
114
+ RUN --mount=type=bind,from=build_amdsmi,src=/dist,target=/install/amdsmi/ \
207
115
--mount=type=cache,target=/root/.cache/uv \
208
- export version="$(awk -F. '{print $1"."$2}' <<< $ROCM_VERSION)" && \
209
- uv pip install \
210
- --index-strategy=unsafe-best-match \
211
- --extra-index-url "https://download.pytorch.org/whl/nightly/rocm${version}" \
212
- /install/amdsmi/*.whl\
213
- /install/flashattention/*.whl\
214
- /install/vllm/*.whl
116
+ --mount=type=bind,src=payload,target=/workspace/payload \
117
+ ./payload/run.sh
215
118
216
119
ENV HF_HUB_OFFLINE=1 \
217
120
HOME=/home/vllm \
@@ -247,13 +150,16 @@ ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
247
150
248
151
249
152
FROM vllm-openai as vllm-grpc-adapter
153
+ ARG VLLM_TGIS_ADAPTER_VERSION
250
154
251
155
USER root
252
156
253
- RUN --mount=type=cache, target=/root/.cache/pip \
157
+ RUN --mount=type=bind,from=build_amdsmi,src=/dist, target=/install/amdsmi/ \
254
158
--mount=type=cache,target=/root/.cache/uv \
255
- --mount=type=bind,from=build_vllm,src=/workspace/dist,target=/install/vllm/ \
256
- HOME=/root uv pip install /install/vllm/*.whl vllm-tgis-adapter==0.6.3
159
+ --mount=type=bind,src=payload,target=/workspace/payload \
160
+ cd /workspace && \
161
+ env HOME=/root VLLM_TGIS_ADAPTER_VERSION=${VLLM_TGIS_ADAPTER_VERSION} \
162
+ ./payload/run.sh
257
163
258
164
ENV GRPC_PORT=8033 \
259
165
PORT=8000 \
0 commit comments