Skip to content

Commit 753df25

Browse files
(cmake) Fix cuda arch selection (#1091)
* (cmake) Fix generation of targets for nvcc * Typo * (ci) linux + CUDA workflow: make sure we specify target architectures * fix * fix one more time * (cmake) Default in CMAKE_CUDA_ARCHITECTURES_ALL when cmake<3.23, make sure we build only selected cubins and only ptx for latest capability * Fix static lookup for CMAKE_CUDA_ARCHITECTURES_ALL on cmake<3.23 * Remove debug setting * clarification
1 parent 433275e commit 753df25

File tree

2 files changed

+39
-7
lines changed

2 files changed

+39
-7
lines changed

.github/workflows/python-package.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ jobs:
125125
docker run --platform linux/$build_arch -i -w /src -v $PWD:/src $image sh -c \
126126
"apt-get update \
127127
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \
128-
&& cmake -DCOMPUTE_BACKEND=cuda -DNO_CUBLASLT=${NO_CUBLASLT} . \
128+
&& cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"50;52;60;61;70;75;80;86;89;90\" -DNO_CUBLASLT=${NO_CUBLASLT} . \
129129
&& cmake --build ."
130130
else
131131
cmake -G Ninja -DCOMPUTE_BACKEND=cuda -DNO_CUBLASLT=${NO_CUBLASLT} -DCMAKE_BUILD_TYPE=Release -S .

CMakeLists.txt

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ endif()
3333

3434
set(BNB_OUTPUT_NAME "bitsandbytes")
3535

36-
message(STATUS "Building with backend ${COMPUTE_BACKEND}")
36+
message(STATUS "Configuring ${PROJECT_NAME} (Backend: ${COMPUTE_BACKEND})")
3737

3838
if(${COMPUTE_BACKEND} STREQUAL "cuda")
3939
if(APPLE)
@@ -82,6 +82,31 @@ if(BUILD_CUDA)
8282
message(FATAL_ERROR "CUDA Version > 12 is not supported")
8383
endif()
8484

85+
# CMake < 3.23.0 does not define CMAKE_CUDA_ARCHITECTURES_ALL.
86+
if(CMAKE_VERSION VERSION_LESS "3.23.0")
87+
message(STATUS "CMake < 3.23.0; determining CUDA architectures supported...")
88+
89+
# 11.x and 12.x both support these at a minimum.
90+
set(CMAKE_CUDA_ARCHITECTURES_ALL 50 52 53 60 61 62 70 72 75 80)
91+
set(CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 50 60 70 80)
92+
93+
# CUDA 11.1 adds Ampere support for GA102-GA107.
94+
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.1")
95+
list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 86)
96+
endif()
97+
98+
# CUDA 11.4 adds Ampere support for GA10B.
99+
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.4")
100+
list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 87)
101+
endif()
102+
103+
# CUDA 11.8 adds support for Ada and Hopper.
104+
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.8")
105+
list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 89 90)
106+
list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 90)
107+
endif()
108+
endif()
109+
85110
string(APPEND CMAKE_CUDA_FLAGS " --use_fast_math")
86111
if(PTXAS_VERBOSE)
87112
# Verbose? Outputs register usage information, and other things...
@@ -103,10 +128,18 @@ if(BUILD_CUDA)
103128
message(STATUS "CUDA Capabilities Available: ${POSSIBLE_CAPABILITIES}")
104129
message(STATUS "CUDA Capabilities Selected: ${COMPUTE_CAPABILITY}")
105130

106-
foreach(capability ${COMPUTE_CAPABILITY})
107-
string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_${capability},code=sm_${capability}")
108-
endforeach()
109-
131+
# Use the "real" option to build native cubin for all selections.
132+
# Ensure we build the PTX for the latest version.
133+
# This behavior of adding a PTX (virtual) target for the highest architecture
134+
# is similar to how the "all" and "all-major" options would behave in CMake >= 3.23.
135+
# TODO: Consider bumping CMake requirement and using CMAKE_CUDA_ARCHITECTURES=[all | native] by default
136+
list(REMOVE_DUPLICATES COMPUTE_CAPABILITY)
137+
list(SORT COMPUTE_CAPABILITY COMPARE NATURAL)
138+
list(POP_BACK COMPUTE_CAPABILITY _LATEST_CAPABILITY)
139+
list(TRANSFORM COMPUTE_CAPABILITY APPEND "-real" OUTPUT_VARIABLE CMAKE_CUDA_ARCHITECTURES)
140+
list(APPEND CMAKE_CUDA_ARCHITECTURES ${_LATEST_CAPABILITY})
141+
142+
message(STATUS "CUDA Targets: ${CMAKE_CUDA_ARCHITECTURES}")
110143
message(STATUS "CUDA NVCC Flags: ${CMAKE_CUDA_FLAGS}")
111144

112145
list(APPEND SRC_FILES ${CUDA_FILES})
@@ -149,7 +182,6 @@ endif()
149182
# Weird MSVC hacks
150183
if(MSVC)
151184
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2 /fp:fast")
152-
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2 /fp:fast")
153185
endif()
154186

155187
set_source_files_properties(${CPP_FILES} PROPERTIES LANGUAGE CXX)

0 commit comments

Comments
 (0)