|
| 1 | +## Global Args ################################################################## |
| 2 | +ARG BASE_UBI_IMAGE_TAG=9.5-1742914212 |
| 3 | +ARG PYTHON_VERSION=3.12 |
| 4 | +ARG VLLM_TGIS_ADAPTER_VERSION=0.7.0 |
| 5 | + |
| 6 | +FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS base |
| 7 | + |
| 8 | +ARG PYTHON_VERSION |
| 9 | + |
| 10 | +ENV VIRTUAL_ENV=/opt/vllm |
| 11 | +ENV PATH="$VIRTUAL_ENV/bin:$PATH" |
| 12 | + |
| 13 | +RUN --mount=type=cache,target=/root/.cache/pip \ |
| 14 | + microdnf -y update && \ |
| 15 | + microdnf install -y --setopt=install_weak_deps=0 --nodocs \ |
| 16 | + python${PYTHON_VERSION}-devel \ |
| 17 | + python${PYTHON_VERSION}-pip \ |
| 18 | + python${PYTHON_VERSION}-wheel && \ |
| 19 | + python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && \ |
| 20 | + pip install -U pip wheel setuptools uv && \ |
| 21 | + microdnf clean all |
| 22 | + |
| 23 | + |
| 24 | +FROM base AS rocm_base |
| 25 | +ARG ROCM_VERSION=6.3.4 |
| 26 | +ARG PYTHON_VERSION |
| 27 | +ARG BASE_UBI_IMAGE_TAG |
| 28 | + |
| 29 | +RUN printf "[amdgpu]\n\ |
| 30 | +name=amdgpu\n\ |
| 31 | +baseurl=https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/${BASE_UBI_IMAGE_TAG/-*/}/main/x86_64/\n\ |
| 32 | +enabled=1\n\ |
| 33 | +priority=50\n\ |
| 34 | +gpgcheck=1\n\ |
| 35 | +gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key\n\ |
| 36 | +[ROCm-${ROCM_VERSION}]\n\ |
| 37 | +name=ROCm${ROCM_VERSION}\n\ |
| 38 | +baseurl=https://repo.radeon.com/rocm/rhel9/${ROCM_VERSION}/main\n\ |
| 39 | +enabled=1\n\ |
| 40 | +priority=50\n\ |
| 41 | +gpgcheck=1\n\ |
| 42 | +gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/amdgpu.repo |
| 43 | + |
| 44 | + |
| 45 | +RUN --mount=type=cache,target=/root/.cache/uv \ |
| 46 | + export version="$(awk -F. '{print $1"."$2}' <<< $ROCM_VERSION)" && \ |
| 47 | + uv pip install --pre \ |
| 48 | + --index-url "https://download.pytorch.org/whl/nightly/rocm${version}" \ |
| 49 | + torch==2.7.0.dev20250308+rocm${version}\ |
| 50 | + torchvision==0.22.0.dev20250308+rocm${version} && \ |
| 51 | + # Install libdrm-amdgpu to avoid errors when retrieving device information (amdgpu.ids: No such file or directory) |
| 52 | + microdnf install -y --nodocs libdrm-amdgpu && \ |
| 53 | + microdnf clean all |
| 54 | + |
| 55 | + |
| 56 | +ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/numpy.libs:$LD_LIBRARY_PATH" |
| 57 | +ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/pillow.libs:$LD_LIBRARY_PATH" |
| 58 | +ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/triton/backends/amd/lib:$LD_LIBRARY_PATH" |
| 59 | +ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/torch/lib:$LD_LIBRARY_PATH" |
| 60 | + |
| 61 | +RUN echo $LD_LIBRARY_PATH | tr : \\n >> /etc/ld.so.conf.d/torch-venv.conf && \ |
| 62 | + ldconfig |
| 63 | + |
| 64 | +FROM rocm_base as build_amdsmi |
| 65 | + |
| 66 | +RUN microdnf -y install \ |
| 67 | + amd-smi-lib && \ |
| 68 | + microdnf clean all |
| 69 | + |
| 70 | +WORKDIR /opt/rocm/share/amd_smi |
| 71 | + |
| 72 | +RUN python setup.py bdist_wheel --dist-dir=/dist/ |
| 73 | + |
| 74 | +#################### libsodium Build IMAGE #################### |
| 75 | +FROM rocm_base as libsodium-builder |
| 76 | + |
| 77 | +RUN microdnf install -y --nodocs gcc gzip tar \ |
| 78 | + && microdnf clean all |
| 79 | + |
| 80 | +WORKDIR /usr/src/libsodium |
| 81 | + |
| 82 | +ARG LIBSODIUM_VERSION=1.0.20 |
| 83 | +RUN curl -LO https://github.com/jedisct1/libsodium/releases/download/${LIBSODIUM_VERSION}-RELEASE/libsodium-${LIBSODIUM_VERSION}.tar.gz \ |
| 84 | + && tar -xzvf libsodium*.tar.gz \ |
| 85 | + && rm -f libsodium*.tar.gz \ |
| 86 | + && mv libsodium*/* ./ |
| 87 | + |
| 88 | +RUN CFLAGS="-O3 -Wall -Werror=format-security -Wno-unused-function -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection" \ |
| 89 | + ./configure \ |
| 90 | + --prefix="/usr/" \ |
| 91 | + --libdir=/usr/lib64 && \ |
| 92 | + make -j $(nproc) && \ |
| 93 | + make check |
| 94 | + |
| 95 | +################################################################################################## |
| 96 | + |
| 97 | +FROM rocm_base AS vllm-openai |
| 98 | +ARG FLASH_ATTENTION_WHEEL_STRATEGY |
| 99 | +ARG VLLM_WHEEL_STRATEGY |
| 100 | + |
| 101 | +WORKDIR /workspace |
| 102 | + |
| 103 | +ENV VIRTUAL_ENV=/opt/vllm |
| 104 | +ENV PATH=$VIRTUAL_ENV/bin:$PATH |
| 105 | + |
| 106 | +# Required for triton |
| 107 | +RUN microdnf install -y --setopt=install_weak_deps=0 --nodocs gcc rsync && \ |
| 108 | + microdnf clean all |
| 109 | + |
| 110 | +# Install libsodium for Tensorizer encryption |
| 111 | +RUN --mount=type=bind,from=libsodium-builder,src=/usr/src/libsodium,target=/usr/src/libsodium \ |
| 112 | + make -C /usr/src/libsodium install |
| 113 | + |
| 114 | +RUN --mount=type=bind,from=build_amdsmi,src=/dist,target=/install/amdsmi/ \ |
| 115 | + --mount=type=cache,target=/root/.cache/uv \ |
| 116 | + --mount=type=bind,src=payload,target=/workspace/payload \ |
| 117 | + ./payload/run.sh |
| 118 | + |
| 119 | +ENV HF_HUB_OFFLINE=1 \ |
| 120 | + HOME=/home/vllm \ |
| 121 | + # Allow requested max length to exceed what is extracted from the |
| 122 | + # config.json |
| 123 | + # see: https://github.com/vllm-project/vllm/pull/7080 |
| 124 | + VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \ |
| 125 | + VLLM_USAGE_SOURCE=production-docker-image \ |
| 126 | + VLLM_WORKER_MULTIPROC_METHOD=fork \ |
| 127 | + VLLM_NO_USAGE_STATS=1 \ |
| 128 | + # Silences the HF Tokenizers warning |
| 129 | + TOKENIZERS_PARALLELISM=false \ |
| 130 | + RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 \ |
| 131 | + VLLM_USE_TRITON_FLASH_ATTN=0 \ |
| 132 | + HIP_FORCE_DEV_KERNARG=1 \ |
| 133 | + OUTLINES_CACHE_DIR=/tmp/outlines \ |
| 134 | + NUMBA_CACHE_DIR=/tmp/numba \ |
| 135 | + TRITON_CACHE_DIR=/tmp/triton |
| 136 | + |
| 137 | +# setup non-root user for OpenShift |
| 138 | +RUN umask 002 && \ |
| 139 | + useradd --uid 2000 --gid 0 vllm && \ |
| 140 | + mkdir -p /licenses /home/vllm && \ |
| 141 | + chmod g+rwx /home/vllm |
| 142 | + |
| 143 | +COPY LICENSE /licenses/vllm.md |
| 144 | +COPY examples/*.jinja /app/data/template/ |
| 145 | + |
| 146 | +USER 2000 |
| 147 | +WORKDIR /home/vllm |
| 148 | + |
| 149 | +ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] |
| 150 | + |
| 151 | + |
| 152 | +FROM vllm-openai as vllm-grpc-adapter |
| 153 | +ARG VLLM_TGIS_ADAPTER_VERSION |
| 154 | + |
| 155 | +USER root |
| 156 | + |
| 157 | +RUN --mount=type=bind,from=build_amdsmi,src=/dist,target=/install/amdsmi/ \ |
| 158 | + --mount=type=cache,target=/root/.cache/uv \ |
| 159 | + --mount=type=bind,src=payload,target=/workspace/payload \ |
| 160 | + cd /workspace && \ |
| 161 | + env HOME=/root VLLM_TGIS_ADAPTER_VERSION=${VLLM_TGIS_ADAPTER_VERSION} \ |
| 162 | + ./payload/run.sh |
| 163 | + |
| 164 | +ENV GRPC_PORT=8033 \ |
| 165 | + PORT=8000 \ |
| 166 | + # As an optimization, vLLM disables logprobs when using spec decoding by |
| 167 | + # default, but this would be unexpected to users of a hosted model that |
| 168 | + # happens to have spec decoding |
| 169 | + # see: https://github.com/vllm-project/vllm/pull/6485 |
| 170 | + DISABLE_LOGPROBS_DURING_SPEC_DECODING=false |
| 171 | + |
| 172 | +USER 2000 |
| 173 | +ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"] |
0 commit comments