@@ -33,7 +33,7 @@ endif()
33
33
34
34
set (BNB_OUTPUT_NAME "bitsandbytes" )
35
35
36
- message (STATUS "Building with backend ${COMPUTE_BACKEND} " )
36
+ message (STATUS "Configuring ${PROJECT_NAME} (Backend: ${COMPUTE_BACKEND} ) " )
37
37
38
38
if (${COMPUTE_BACKEND} STREQUAL "cuda" )
39
39
if (APPLE )
@@ -82,6 +82,31 @@ if(BUILD_CUDA)
82
82
message (FATAL_ERROR "CUDA Version > 12 is not supported" )
83
83
endif ()
84
84
85
+ # CMake < 3.23.0 does not define CMAKE_CUDA_ARCHITECTURES_ALL.
86
+ if (CMAKE_VERSION VERSION_LESS "3.23.0" )
87
+ message (STATUS "CMake < 3.23.0; determining CUDA architectures supported..." )
88
+
89
+ # 11.x and 12.x both support these at a minimum.
90
+ set (CMAKE_CUDA_ARCHITECTURES_ALL 50 52 53 60 61 62 70 72 75 80)
91
+ set (CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 50 60 70 80)
92
+
93
+ # CUDA 11.1 adds Ampere support for GA102-GA107.
94
+ if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.1" )
95
+ list (APPEND CMAKE_CUDA_ARCHITECTURES_ALL 86)
96
+ endif ()
97
+
98
+ # CUDA 11.4 adds Ampere support for GA10B.
99
+ if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.4" )
100
+ list (APPEND CMAKE_CUDA_ARCHITECTURES_ALL 87)
101
+ endif ()
102
+
103
+ # CUDA 11.8 adds support for Ada and Hopper.
104
+ if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.8" )
105
+ list (APPEND CMAKE_CUDA_ARCHITECTURES_ALL 89 90)
106
+ list (APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 90)
107
+ endif ()
108
+ endif ()
109
+
85
110
string (APPEND CMAKE_CUDA_FLAGS " --use_fast_math" )
86
111
if (PTXAS_VERBOSE)
87
112
# Verbose? Outputs register usage information, and other things...
@@ -103,10 +128,18 @@ if(BUILD_CUDA)
103
128
message (STATUS "CUDA Capabilities Available: ${POSSIBLE_CAPABILITIES} " )
104
129
message (STATUS "CUDA Capabilities Selected: ${COMPUTE_CAPABILITY} " )
105
130
106
- foreach (capability ${COMPUTE_CAPABILITY} )
107
- string (APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_${capability} ,code=sm_${capability} " )
108
- endforeach ()
109
-
131
+ # Use the "real" option to build native cubin for all selections.
132
+ # Ensure we build the PTX for the latest version.
133
+ # This behavior of adding a PTX (virtual) target for the highest architecture
134
+ # is similar to how the "all" and "all-major" options would behave in CMake >= 3.23.
135
+ # TODO: Consider bumping CMake requirement and using CMAKE_CUDA_ARCHITECTURES=[all | native] by default
136
+ list (REMOVE_DUPLICATES COMPUTE_CAPABILITY)
137
+ list (SORT COMPUTE_CAPABILITY COMPARE NATURAL)
138
+ list (POP_BACK COMPUTE_CAPABILITY _LATEST_CAPABILITY)
139
+ list (TRANSFORM COMPUTE_CAPABILITY APPEND "-real" OUTPUT_VARIABLE CMAKE_CUDA_ARCHITECTURES)
140
+ list (APPEND CMAKE_CUDA_ARCHITECTURES ${_LATEST_CAPABILITY} )
141
+
142
+ message (STATUS "CUDA Targets: ${CMAKE_CUDA_ARCHITECTURES} " )
110
143
message (STATUS "CUDA NVCC Flags: ${CMAKE_CUDA_FLAGS} " )
111
144
112
145
list (APPEND SRC_FILES ${CUDA_FILES} )
@@ -149,7 +182,6 @@ endif()
149
182
# Weird MSVC hacks
150
183
if (MSVC )
151
184
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2 /fp:fast" )
152
- set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2 /fp:fast" )
153
185
endif ()
154
186
155
187
set_source_files_properties (${CPP_FILES} PROPERTIES LANGUAGE CXX)
0 commit comments