Skip to content

Commit ae418bd

Browse files
committed
Dockerfile.rocm.ubi: improvements
- remove build steps/dependencies - allow for installing pre-built flash-attention/vllm wheels - default ROCM_VERSION to 6.3.4, allowing ovverride with env vars - cleanup rocm docker bake, defaults - amdsmi: use setup.py to build - add amdsmi bind mount - remove flashinfer from rocm target - bump vllm-tgis-adapter to 0.7.0 - Dockerfile*.ubi: bump ubi base
1 parent bf36270 commit ae418bd

File tree

2 files changed

+58
-121
lines changed

2 files changed

+58
-121
lines changed

Dockerfile.rocm.ubi

Lines changed: 21 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
## Global Args ##################################################################
2-
ARG BASE_UBI_IMAGE_TAG=9.5-1741850109
2+
ARG BASE_UBI_IMAGE_TAG=9.5-1742914212
33
ARG PYTHON_VERSION=3.12
4-
# Default ROCm ARCHes to build vLLM for.
5-
ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100"
6-
ARG MAX_JOBS=12
4+
ARG VLLM_TGIS_ADAPTER_VERSION=0.7.0
75

86
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS base
97

@@ -44,8 +42,7 @@ gpgcheck=1\n\
4442
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/amdgpu.repo
4543

4644

47-
RUN --mount=type=cache,target=/root/.cache/pip \
48-
--mount=type=cache,target=/root/.cache/uv \
45+
RUN --mount=type=cache,target=/root/.cache/uv \
4946
export version="$(awk -F. '{print $1"."$2}' <<< $ROCM_VERSION)" && \
5047
uv pip install --pre \
5148
--index-url "https://download.pytorch.org/whl/nightly/rocm${version}" \
@@ -64,101 +61,15 @@ ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/torc
6461
RUN echo $LD_LIBRARY_PATH | tr : \\n >> /etc/ld.so.conf.d/torch-venv.conf && \
6562
ldconfig
6663

67-
FROM rocm_base as rocm_devel
68-
69-
ENV CCACHE_DIR=/root/.cache/ccache
70-
71-
RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
72-
rpm -ql epel-release && \
73-
microdnf -y update && \
74-
microdnf --nodocs -y install \
75-
ccache \
76-
git \
77-
# packages required to build vllm
78-
amd-smi-lib \
79-
hipblas-devel \
80-
hipblaslt-devel \
81-
hipcc \
82-
hipcub-devel \
83-
hipfft-devel \
84-
hiprand-devel \
85-
hipsolver-devel \
86-
hipsparse-devel \
87-
hsa-rocr-devel \
88-
miopen-hip-devel \
89-
rccl-devel \
90-
rocblas-devel \
91-
rocm-device-libs \
92-
rocprim-devel \
93-
rocrand-devel \
94-
rocthrust-devel \
95-
# end packages required to build vllm
96-
wget \
97-
which && \
98-
microdnf clean all
99-
100-
WORKDIR /workspace
101-
102-
ENV LLVM_SYMBOLIZER_PATH=/opt/rocm/llvm/bin/llvm-symbolizer
103-
ENV PATH=$PATH:/opt/rocm/bin
104-
ENV CPLUS_INCLUDE_PATH=$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/torch/include:/opt/rocm/include
105-
106-
107-
FROM rocm_devel AS build_amdsmi
108-
109-
# Build AMD SMI wheel
110-
RUN cd /opt/rocm/share/amd_smi && \
111-
python3 -m pip wheel . --wheel-dir=/install
112-
113-
##################################################################################################
114-
115-
FROM rocm_devel AS build_flashattention
64+
FROM rocm_base as build_amdsmi
11665

117-
ARG FA_GFX_ARCHS="gfx90a;gfx942"
118-
119-
# the FA_BRANCH commit belongs to the ROCm/flash-attention fork, `main_perf` branch
120-
ARG FA_BRANCH="3cea2fb"
121-
ARG MAX_JOBS
122-
ENV MAX_JOBS=${MAX_JOBS}
123-
124-
RUN --mount=type=cache,target=/root/.cache/uv \
125-
--mount=type=cache,target=/workspace/build \
126-
mkdir -p /libs && \
127-
cd /libs && \
128-
git clone https://github.com/ROCm/flash-attention.git && \
129-
cd flash-attention && \
130-
git checkout ${FA_BRANCH} && \
131-
git submodule update --init && \
132-
uv pip install cmake ninja packaging && \
133-
env \
134-
GPU_ARCHS="${FA_GFX_ARCHS}" \
135-
python3 setup.py bdist_wheel --dist-dir=/install
136-
137-
##################################################################################################
138-
139-
FROM rocm_devel AS build_vllm
140-
ARG PYTORCH_ROCM_ARCH
141-
ARG PYTHON_VERSION
142-
ARG MAX_JOBS
143-
ENV MAX_JOBS=${MAX_JOBS}
144-
ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}
145-
146-
COPY . .
66+
RUN microdnf -y install \
67+
amd-smi-lib && \
68+
microdnf clean all
14769

148-
ENV VLLM_TARGET_DEVICE="rocm"
149-
ENV MAX_JOBS=${MAX_JOBS}
150-
# Make sure punica kernels are built (for LoRA)
151-
ENV VLLM_INSTALL_PUNICA_KERNELS=1
70+
WORKDIR /opt/rocm/share/amd_smi
15271

153-
RUN --mount=type=cache,target=/root/.cache/ccache \
154-
--mount=type=cache,target=/root/.cache/pip \
155-
--mount=type=cache,target=/root/.cache/uv \
156-
uv pip install -v -U \
157-
ninja setuptools-scm>=8 "cmake>=3.26" packaging && \
158-
env CFLAGS="-march=haswell" \
159-
CXXFLAGS="$CFLAGS $CXXFLAGS" \
160-
CMAKE_BUILD_TYPE=Release \
161-
python3 setup.py bdist_wheel --dist-dir=dist
72+
RUN python setup.py bdist_wheel --dist-dir=/dist/
16273

16374
#################### libsodium Build IMAGE ####################
16475
FROM rocm_base as libsodium-builder
@@ -184,7 +95,8 @@ RUN CFLAGS="-O3 -Wall -Werror=format-security -Wno-unused-function -Wp,-D_GLIBCX
18495
##################################################################################################
18596

18697
FROM rocm_base AS vllm-openai
187-
ARG MAX_JOBS
98+
ARG FLASH_ATTENTION_WHEEL_STRATEGY
99+
ARG VLLM_WHEEL_STRATEGY
188100

189101
WORKDIR /workspace
190102

@@ -197,21 +109,12 @@ RUN microdnf install -y --setopt=install_weak_deps=0 --nodocs gcc rsync && \
197109

198110
# Install libsodium for Tensorizer encryption
199111
RUN --mount=type=bind,from=libsodium-builder,src=/usr/src/libsodium,target=/usr/src/libsodium \
200-
cd /usr/src/libsodium \
201-
&& make install
112+
make -C /usr/src/libsodium install
202113

203-
RUN --mount=type=bind,from=build_amdsmi,src=/install,target=/install/amdsmi/ \
204-
--mount=type=bind,from=build_flashattention,src=/install,target=/install/flashattention \
205-
--mount=type=bind,from=build_vllm,src=/workspace/dist,target=/install/vllm/ \
206-
--mount=type=cache,target=/root/.cache/pip \
114+
RUN --mount=type=bind,from=build_amdsmi,src=/dist,target=/install/amdsmi/ \
207115
--mount=type=cache,target=/root/.cache/uv \
208-
export version="$(awk -F. '{print $1"."$2}' <<< $ROCM_VERSION)" && \
209-
uv pip install \
210-
--index-strategy=unsafe-best-match \
211-
--extra-index-url "https://download.pytorch.org/whl/nightly/rocm${version}" \
212-
/install/amdsmi/*.whl\
213-
/install/flashattention/*.whl\
214-
/install/vllm/*.whl
116+
--mount=type=bind,src=payload,target=/workspace/payload \
117+
./payload/run.sh
215118

216119
ENV HF_HUB_OFFLINE=1 \
217120
HOME=/home/vllm \
@@ -247,13 +150,16 @@ ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
247150

248151

249152
FROM vllm-openai as vllm-grpc-adapter
153+
ARG VLLM_TGIS_ADAPTER_VERSION
250154

251155
USER root
252156

253-
RUN --mount=type=cache,target=/root/.cache/pip \
157+
RUN --mount=type=bind,from=build_amdsmi,src=/dist,target=/install/amdsmi/ \
254158
--mount=type=cache,target=/root/.cache/uv \
255-
--mount=type=bind,from=build_vllm,src=/workspace/dist,target=/install/vllm/ \
256-
HOME=/root uv pip install /install/vllm/*.whl vllm-tgis-adapter==0.6.3
159+
--mount=type=bind,src=payload,target=/workspace/payload \
160+
cd /workspace && \
161+
env HOME=/root VLLM_TGIS_ADAPTER_VERSION=${VLLM_TGIS_ADAPTER_VERSION} \
162+
./payload/run.sh
257163

258164
ENV GRPC_PORT=8033 \
259165
PORT=8000 \

docker-bake.hcl

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,33 @@ variable "REPOSITORY" {
22
default = "quay.io/vllm/vllm"
33
}
44

5-
# GITHUB_* variables are only available in github actions
5+
# GITHUB_* variables are set as env vars in github actions
66
variable "GITHUB_SHA" {}
77
variable "GITHUB_REPOSITORY" {}
88
variable "GITHUB_RUN_ID" {}
99

10-
variable "VLLM_VERSION" {} # set by github actions or manually?
10+
variable "VLLM_VERSION" {}
11+
12+
variable "PYTHON_VERSION" {
13+
default = "3.12"
14+
}
15+
16+
variable "ROCM_VERSION" {
17+
default = "6.3.4"
18+
}
19+
20+
variable "VLLM_TGIS_ADAPTER_VERSION" {
21+
default = "0.7.0"
22+
}
23+
1124

1225
target "docker-metadata-action" {} // populated by gha docker/metadata-action
1326

1427
target "_common" {
1528
context = "."
1629

1730
args = {
18-
BASE_UBI_IMAGE_TAG = "9.5-1736404155"
31+
BASE_UBI_IMAGE_TAG = "9.5-1742914212"
1932
PYTHON_VERSION = "3.12"
2033
}
2134

@@ -34,6 +47,7 @@ target "_common" {
3447
group "default" {
3548
targets = [
3649
"cuda",
50+
"rocm",
3751
]
3852
}
3953

@@ -42,11 +56,10 @@ target "cuda" {
4256
dockerfile = "Dockerfile.ubi"
4357

4458
args = {
45-
BASE_UBI_IMAGE_TAG = "9.5-1739420147"
46-
PYTHON_VERSION = "3.12"
59+
PYTHON_VERSION = "${PYTHON_VERSION}"
4760
# CUDA_VERSION = "12.4" # TODO: the dockerfile cannot consume the cuda version
4861
LIBSODIUM_VERSION = "1.0.20"
49-
VLLM_TGIS_ADAPTER_VERSION = "0.7.0"
62+
VLLM_TGIS_ADAPTER_VERSION = "${VLLM_TGIS_ADAPTER_VERSION}"
5063

5164
FLASHINFER_VERSION = "https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post2/flashinfer_python-0.2.1.post2+cu124torch2.6-cp38-abi3-linux_x86_64.whl"
5265
}
@@ -57,3 +70,21 @@ target "cuda" {
5770
"${REPOSITORY}:${formatdate("YYYY-MM-DD-hh-mm", timestamp())}"
5871
]
5972
}
73+
74+
target "rocm" {
75+
inherits = ["_common"]
76+
dockerfile = "Dockerfile.rocm.ubi"
77+
78+
args = {
79+
PYTHON_VERSION = "${PYTHON_VERSION}"
80+
ROCM_VERSION = "${ROCM_VERSION}"
81+
LIBSODIUM_VERSION = "1.0.20"
82+
VLLM_TGIS_ADAPTER_VERSION = "${VLLM_TGIS_ADAPTER_VERSION}"
83+
}
84+
85+
tags = [
86+
"${REPOSITORY}:${replace(VLLM_VERSION, "+", "_")}", # vllm_version might contain local version specifiers (+) which are not valid tags
87+
"${REPOSITORY}:${GITHUB_SHA}",
88+
"${REPOSITORY}:${formatdate("YYYY-MM-DD-hh-mm", timestamp())}"
89+
]
90+
}

0 commit comments

Comments
 (0)