Skip to content

Commit 091699e

Browse files
committed
Build pytorch 2.3.1
This (properly) builds pytorch 2.3.1, including mkl support.
1 parent fbf4c0c commit 091699e

File tree

5 files changed

+235
-149
lines changed

5 files changed

+235
-149
lines changed
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
FROM debian:bookworm AS bookworm_cuda
2+
3+
###
4+
#
5+
# Define environment
6+
7+
WORKDIR /workspace
8+
9+
10+
###
11+
#
12+
# Set global environment variables
13+
14+
ENV PYTORCH_VERSION="v2.3.1"
15+
ENV CUDA_VERSION="12.5.1_555.42.06"
16+
ENV PATH="$PATH:/usr/local/cuda/bin"
17+
ENV DEBIAN_FRONTEND="noninteractive"
18+
ENV NVIDIA_DRIVER_CAPABILITIES="compute,utility"
19+
ENV NVIDIA_VISIBLE_DEVICES="all"
20+
ENV VENV_PATH="/workspace/v"
21+
ENV PYTHON_VENV="${VENV_PATH}/bin/python"
22+
ENV PIP_BIN="${VENV_PATH}/bin/pip"
23+
24+
###
25+
#
26+
# Workaround gcc-12 issue:
27+
# https://github.com/pytorch/pytorch/issues/77939#issuecomment-1526844015
28+
29+
ENV CXXFLAGS='-Wno-maybe-uninitialized -Wno-uninitialized -Wno-free-nonheap-object -Wno-dev'
30+
ENV CFLAGS='-Wno-maybe-uninitialized -Wno-uninitialized -Wno-free-nonheap-object -Wno-dev'
31+
32+
33+
###
34+
#
35+
# Set pytorch specific build environment variables
36+
37+
ENV REL_WITH_DEB_INFO="ON"
38+
ENV MAX_JOBS="32"
39+
ENV USE_CUDA="ON"
40+
ENV USE_CUDNN=1
41+
ENV USE_CUSPARSELT=1
42+
ENV USE_FBGEMM="ON"
43+
ENV USE_KINETO="ON"
44+
ENV USE_NUMPY="ON"
45+
ENV USE_NNPACK="ON"
46+
ENV USE_DISTRIBUTED="ON"
47+
ENV USE_TENSORPIPE="ON"
48+
ENV USE_GLOO="ON"
49+
ENV USE_MPI="ON"
50+
ENV USE_SYSTEM_NCCL="OFF"
51+
ENV USE_OPENMP="ON"
52+
ENV USE_FLASH_ATTENTION="ON"
53+
ENV USE_MEM_EFF_ATTENTION="ON"
54+
ENV PYTORCH_BUILD_VERSION="2.3.1"
55+
ENV PYTORCH_BUILD_NUMBER="1"
56+
ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0"
57+
ENV CUDA_PATH="/usr/local/cuda"
58+
ENV CUDA_HOME="/usr/local/cuda"
59+
ENV CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda"
60+
ENV CUDA_NVCC_EXECUTABLE="/usr/local/cuda/bin/nvcc"
61+
ENV CUDA_INCLUDE_DIRS="/usr/local/cuda/include"
62+
ENV CUSPARSELT_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu"
63+
ENV CUSPARSE_INCLUDE_PATH="/usr/include/x86_64-linux-gnu"
64+
ENV CUDNN_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu"
65+
ENV CUDNN_INCLUDE_PATH="/usr/include/x86_64-linux-gnu"
66+
67+
#ENV USE_UCC="ON"
68+
ENV USE_MIMALLOC="ON"
69+
ENV USE_NCCL="ON"
70+
71+
#ENV ATEN_THREADING="NATIVE"
72+
#ENV USE_SYSTEM_LIBS ON
73+
74+
###
75+
#
76+
# Install toolchain and system dependencies
77+
78+
RUN apt update
79+
RUN apt -y install build-essential
80+
RUN apt -y install ca-certificates
81+
#RUN apt -y install ccache
82+
RUN apt -y install python3
83+
RUN apt -y install python3-full
84+
RUN apt -y install python3-venv
85+
RUN apt -y install python3-pip
86+
RUN apt -y install swig
87+
RUN apt -y install ninja-build
88+
RUN apt -y install git
89+
RUN apt -y install cmake
90+
RUN apt -y install gpg
91+
RUN apt -y install curl
92+
RUN apt -y install zstd
93+
94+
RUN apt -y install libnuma-dev
95+
RUN apt -y install libssl-dev
96+
RUN apt -y install libzstd-dev
97+
RUN apt -y install libucx-dev
98+
RUN apt -y install libmpfr-dev
99+
RUN apt -y install libgmp3-dev
100+
RUN apt -y install libfftw3-dev
101+
#RUN apt -y install libmagma-dev
102+
103+
104+
###
105+
#
106+
# Not sure if or why these are needed
107+
108+
RUN apt -y install libjpeg-dev
109+
RUN apt -y install libpng-dev
110+
111+
#RUN /usr/sbin/update-ccache-symlinks
112+
RUN mkdir -p /opt/ccache
113+
#RUN ccache --set-config=cache_dir=/opt/ccache
114+
115+
116+
###
117+
#
118+
# Setup build environment and clone pytorch
119+
120+
RUN mkdir -p /workspace/build
121+
RUN mkdir -p /workspace/${PYTORCH_VERSION}
122+
RUN mkdir -p /workspace/tmp
123+
RUN mkdir -p /workspace/added
124+
RUN mkdir -p /workspace/uncompressed
125+
RUN mkdir -p /workspace/target
126+
RUN mkdir -p /workspace/patches
127+
128+
RUN git clone --depth 1 --jobs ${MAX_JOBS} "https://github.com/pytorch/pytorch" --branch "${PYTORCH_VERSION}" --recurse-submodules --shallow-submodules build
129+
130+
###
131+
#
132+
#
133+
134+
COPY /workspace/patches/pytorch-compute-86-override.patch /workspace/patches
135+
RUN patch --directory build -p1 pytorch-compute-86-override.patch
136+
137+
###
138+
#
139+
# Install NVIDIA CUDA SDK
140+
141+
RUN curl -LO https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb
142+
RUN dpkg -i cuda-keyring_1.1-1_all.deb
143+
RUN apt-get update
144+
RUN apt -y install software-properties-common
145+
RUN add-apt-repository contrib
146+
RUN apt-get update
147+
RUN apt -y install cuda-toolkit-12-5
148+
RUN apt -y install libcusparselt-dev
149+
RUN apt -y install cudnn
150+
151+
152+
###
153+
#
154+
# Install Intel MKL BLAS
155+
156+
RUN curl --location "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB" | gpg --dearmor > /usr/share/keyrings/oneapi-archive-keyring.gpg
157+
RUN echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/oneAPI.list
158+
159+
RUN apt update
160+
RUN apt install -y intel-oneapi-mkl
161+
RUN apt install -y intel-oneapi-mkl-devel
162+
163+
ENV MKL_VERSION="2024.2"
164+
ENV MKL_ROOT="/opt/intel/oneapi/mkl/${MKL_VERSION}/lib/intel64"
165+
ENV MKL_MODEL="ilp64"
166+
ENV MKL_LIBRARIES="-Wl,--start-group;${MKL_ROOT}/libmkl_intel_${MKL_MODEL}.a;${MKL_ROOT}/libmkl_gnu_thread.a;${MKL_ROOT}/libmkl_core.a;-Wl,--end-group"
167+
ENV CUDA_ARCHS="80;86;89;90"
168+
ENV BLA_VENDOR=Intel10_64ilp
169+
ENV BLA_STATIS=True
170+
171+
172+
###
173+
#
174+
# Install Python virtual environmnet
175+
176+
RUN python3 -m venv ${VENV_PATH}
177+
RUN ${PIP_BIN} install six
178+
RUN ${PIP_BIN} install numpy
179+
RUN ${PIP_BIN} install swig
180+
RUN ${PIP_BIN} install build
181+
RUN ${PIP_BIN} install wheel
182+
RUN ${PIP_BIN} install pyyaml
183+
RUN ${PIP_BIN} install cmake
184+
RUN ${PIP_BIN} install ninja
185+
RUN ${PIP_BIN} install -r /workspace/build/requirements.txt
186+
187+
188+
###
189+
#
190+
# Hardcode the cuda library path for the system loader
191+
192+
RUN echo "/opt/nvidia/cuda/lib64" > /etc/ld.so.conf.d/cuda.conf
193+
RUN ldconfig -v
194+
195+
196+
###
197+
#
198+
# Build pytorch
199+
200+
WORKDIR /workspace/build
201+
RUN ${PYTHON_VENV} -m build --wheel --sdist --no-isolation
202+
203+
204+
###
205+
#
206+
# Produce a clean image of build results for output from buildx
207+
208+
FROM scratch
209+
COPY --from=bookworm_cuda /workspace/build/dist /

platform/Dockerfiles/pytorch/build.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
###
2+
#
3+
# Build pytorch and output the build results to "${PWD}/target"
4+
5+
mkdir -p "${PWD}/target"
6+
docker buildx build --progress plain --output type=local,dest="${PWD}/target" . -t pytorch:v2.3.1
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
diff --git a/aten/src/ATen/native/cuda/Blas.cpp b/aten/src/ATen/native/cuda/Blas.cpp
2+
index 8d3b3dbea7..5f04c0cdd1 100644
3+
--- a/aten/src/ATen/native/cuda/Blas.cpp
4+
+++ b/aten/src/ATen/native/cuda/Blas.cpp
5+
@@ -820,7 +820,7 @@ static bool _scaled_mm_allowed_device() {
6+
}
7+
return false;
8+
#else
9+
- return dprops->major >= 9 || (dprops->major == 8 && dprops->minor == 9);
10+
+ return (dprops->major == 8 && dprops->minor >= 0);
11+
#endif
12+
}
13+
14+
diff --git a/third_party/cutlass b/third_party/cutlass
15+
index bbe579a9e3..56b46e2d13 160000
16+
--- a/third_party/cutlass
17+
+++ b/third_party/cutlass
18+
@@ -1 +1 @@
19+
-Subproject commit bbe579a9e3beb6ea6626d9227ec32d0dae119a49
20+
+Subproject commit 56b46e2d13875b46b8f6a03f9f5ac91e2bfdc01a

platform/packaging/build/pytorch/Dockerfile

Lines changed: 0 additions & 143 deletions
This file was deleted.

platform/packaging/build/pytorch/build.sh

Lines changed: 0 additions & 6 deletions
This file was deleted.

0 commit comments

Comments
 (0)