-
-
Notifications
You must be signed in to change notification settings - Fork 7.7k
Update PyTorch to 2.7.0 #16859
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update PyTorch to 2.7.0 #16859
Changes from 33 commits
3498754
a131606
52a7c4b
c33698e
363303b
174c2a1
1be359a
32fe6df
a0e5706
3b27235
100b6a7
079c59b
99b7173
1f62dc1
2b70bf5
6e473d8
72f27d0
86a6954
71ca7fc
2b9ee67
b8e868e
97ef2f0
a2d82e5
c89b53d
916013c
92b9710
ad39de9
9eff00c
197646b
7c61758
ce11a99
1a4cc8c
06b877e
ca30743
2c8ab23
b50e2a0
1b1a4fa
e0599be
f6704fc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,11 +5,11 @@ | |
# docs/source/contributing/dockerfile/dockerfile.md and | ||
# docs/source/assets/contributing/dockerfile-stages-dependency.png | ||
|
||
ARG CUDA_VERSION=12.4.1 | ||
ARG CUDA_VERSION=12.8.1 | ||
#################### BASE BUILD IMAGE #################### | ||
# prepare basic build environment | ||
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS base | ||
ARG CUDA_VERSION=12.4.1 | ||
ARG CUDA_VERSION=12.8.1 | ||
ARG PYTHON_VERSION=3.12 | ||
ARG TARGETPLATFORM | ||
ENV DEBIAN_FRONTEND=noninteractive | ||
|
@@ -34,6 +34,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ | |
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out | ||
# Reference: https://github.com/astral-sh/uv/pull/1694 | ||
ENV UV_HTTP_TIMEOUT=500 | ||
ENV UV_INDEX_STRATEGY="unsafe-best-match" | ||
|
||
# Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519 | ||
# as it was causing spam when compiling the CUTLASS kernels | ||
|
@@ -77,6 +78,13 @@ ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} | |
# Override the arch list for flash-attn to reduce the binary size | ||
ARG vllm_fa_cmake_gpu_arches='80-real;90-real' | ||
ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches} | ||
|
||
# TESTING: install xformers from source until it's updated to work | ||
# with 2.7.0. Also pin it to the current main commit to avoid the issue | ||
# https://github.com/facebookresearch/xformers/issues/1229 with CUDA 12.8 | ||
RUN --mount=type=cache,target=/root/.cache/uv \ | ||
MAX_JOBS=16 TORCH_CUDA_ARCH_LIST='7.5 8.0 8.6 8.9 9.0+PTX' \ | ||
uv pip install --system --no-build-isolation "git+https://github.com/facebookresearch/xformers@f2de641ef670510cadab099ce6954031f52f191c" | ||
#################### BASE BUILD IMAGE #################### | ||
|
||
#################### WHEEL BUILD IMAGE #################### | ||
|
@@ -89,6 +97,7 @@ COPY requirements/build.txt requirements/build.txt | |
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out | ||
# Reference: https://github.com/astral-sh/uv/pull/1694 | ||
ENV UV_HTTP_TIMEOUT=500 | ||
ENV UV_INDEX_STRATEGY="unsafe-best-match" | ||
|
||
RUN --mount=type=cache,target=/root/.cache/uv \ | ||
uv pip install --system -r requirements/build.txt | ||
|
@@ -158,13 +167,15 @@ FROM base as dev | |
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out | ||
# Reference: https://github.com/astral-sh/uv/pull/1694 | ||
ENV UV_HTTP_TIMEOUT=500 | ||
ENV UV_INDEX_STRATEGY="unsafe-best-match" | ||
|
||
# Workaround for #17068 | ||
RUN --mount=type=cache,target=/root/.cache/uv \ | ||
uv pip install --system --no-build-isolation "git+https://github.com/state-spaces/[email protected]" | ||
|
||
COPY requirements/lint.txt requirements/lint.txt | ||
COPY requirements/test.txt requirements/test.txt | ||
COPY requirements/dev.txt requirements/dev.txt | ||
# Workaround for #17068 | ||
RUN --mount=type=cache,target=/root/.cache/uv \ | ||
uv pip install --system mamba-ssm==2.2.4 --no-build-isolation | ||
RUN --mount=type=cache,target=/root/.cache/uv \ | ||
uv pip install --system -r requirements/dev.txt | ||
#################### DEV IMAGE #################### | ||
|
@@ -173,7 +184,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ | |
# image with vLLM installed | ||
# TODO: Restore to base image after FlashInfer AOT wheel fixed | ||
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS vllm-base | ||
ARG CUDA_VERSION=12.4.1 | ||
ARG CUDA_VERSION=12.8.1 | ||
ARG PYTHON_VERSION=3.12 | ||
WORKDIR /vllm-workspace | ||
ENV DEBIAN_FRONTEND=noninteractive | ||
|
@@ -203,6 +214,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ | |
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out | ||
# Reference: https://github.com/astral-sh/uv/pull/1694 | ||
ENV UV_HTTP_TIMEOUT=500 | ||
ENV UV_INDEX_STRATEGY="unsafe-best-match" | ||
|
||
# Workaround for https://github.com/openai/triton/issues/2507 and | ||
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully | ||
|
@@ -223,7 +235,15 @@ RUN --mount=type=cache,target=/root/.cache/uv \ | |
# Install vllm wheel first, so that torch etc will be installed. | ||
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ | ||
--mount=type=cache,target=/root/.cache/uv \ | ||
uv pip install --system dist/*.whl --verbose | ||
uv pip install --system dist/*.whl --verbose \ | ||
--extra-index-url https://download.pytorch.org/whl/cu128 | ||
|
||
# TESTING: install xformers from source until it's updated to work | ||
# with 2.7.0. Also pin it to the current main commit to avoid the issue | ||
# https://github.com/facebookresearch/xformers/issues/1229 with CUDA 12.8 | ||
RUN --mount=type=cache,target=/root/.cache/uv \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the intention is that the we are just going to wait till xformers releases a 12.8 compat whl though ignore above There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO, I think we can just build the package from source for CI, then switch to the official xformers package once it's ready There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How long does this take to compile xformers? If it's too long I don't want to slow down our CI time for this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let me dig out the number for this once the build finish. Without caching, it would be significant from what I see locally, but let's see what it takes to build once this is cached. |
||
MAX_JOBS=16 TORCH_CUDA_ARCH_LIST='7.5 8.0 8.6 8.9 9.0+PTX' \ | ||
uv pip install --system --no-build-isolation "git+https://github.com/facebookresearch/xformers@f2de641ef670510cadab099ce6954031f52f191c" | ||
|
||
# If we need to build FlashInfer wheel before its release: | ||
# $ export FLASHINFER_ENABLE_AOT=1 | ||
|
@@ -240,12 +260,18 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist | |
RUN --mount=type=cache,target=/root/.cache/uv \ | ||
. /etc/environment && \ | ||
if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \ | ||
uv pip install --system https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post2/flashinfer_python-0.2.1.post2+cu124torch2.6-cp38-abi3-linux_x86_64.whl ; \ | ||
# TESTING: install FlashInfer from source to test 2.7.0 final RC | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would imagine FI has new release by now. |
||
FLASHINFER_ENABLE_AOT=1 TORCH_CUDA_ARCH_LIST='7.5 8.0 8.6 8.9 9.0+PTX' \ | ||
uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/[email protected]" ; \ | ||
fi | ||
COPY examples examples | ||
COPY benchmarks benchmarks | ||
COPY ./vllm/collect_env.py . | ||
|
||
RUN --mount=type=cache,target=/root/.cache/uv \ | ||
. /etc/environment && \ | ||
uv pip list | ||
|
||
# Although we build Flashinfer with AOT mode, there's still | ||
# some issues w.r.t. JIT compilation. Therefore we need to | ||
# install build dependencies for JIT compilation. | ||
|
@@ -266,11 +292,13 @@ ADD . /vllm-workspace/ | |
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out | ||
# Reference: https://github.com/astral-sh/uv/pull/1694 | ||
ENV UV_HTTP_TIMEOUT=500 | ||
ENV UV_INDEX_STRATEGY="unsafe-best-match" | ||
|
||
# install development dependencies (for testing) | ||
# Workaround for #17068 | ||
RUN --mount=type=cache,target=/root/.cache/uv \ | ||
uv pip install --system mamba-ssm==2.2.4 --no-build-isolation | ||
uv pip install --system --no-build-isolation "git+https://github.com/state-spaces/[email protected]" | ||
|
||
# install development dependencies (for testing) | ||
RUN --mount=type=cache,target=/root/.cache/uv \ | ||
uv pip install --system -r requirements/dev.txt | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,7 @@ image: | |
# -- Image tag | ||
tag: "latest" | ||
# -- Container launch command | ||
command: ["vllm", "serve", "/data/", "--served-model-name", "opt-125m", "--dtype", "bfloat16", "--host", "0.0.0.0", "--port", "8000"] | ||
command: ["vllm", "serve", "/data/", "--served-model-name", "opt-125m", "--dtype", "float32", "--block-size", "16", "--host", "0.0.0.0", "--port", "8000"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this need to be reverted i assume There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a curious issue that I have seen on CI where cpu build fails on that dtype. I look around and see a similar issue which suggests switching to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here is the full server log when serving |
||
|
||
# -- Container port | ||
containerPort: 8000 | ||
|
Uh oh!
There was an error while loading. Please reload this page.