Skip to content

Commit 36bacf7

Browse files
refactor: Clean up CMakeLists.txt (NVIDIA#3479)
Signed-off-by: Yuan Tong <[email protected]>
1 parent 93bb99c commit 36bacf7

File tree

15 files changed

+541
-415
lines changed

15 files changed

+541
-415
lines changed

cpp/CMakeLists.txt

+26-296
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@
1616
#
1717

1818
cmake_minimum_required(VERSION 3.27 FATAL_ERROR)
19+
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules")
1920
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
2021

21-
include(CheckLanguage)
22-
include(cmake/modules/set_ifndef.cmake)
23-
include(cmake/modules/find_library_create_target.cmake)
24-
include(cmake/modules/resolve_dirs.cmake)
25-
include(cmake/modules/parse_make_options.cmake)
22+
include(resolve_dirs)
23+
include(parse_make_options)
24+
include(cuda_configuration)
25+
include(sanitizers)
2626

2727
project(tensorrt_llm LANGUAGES CXX)
2828

@@ -44,9 +44,6 @@ option(ENABLE_MULTI_DEVICE
4444
option(ENABLE_UCX "Enable building with UCX (Uniform Communication X) support"
4545
ON)
4646

47-
# Always use static NVRTC for IP protection reasons.
48-
set(USE_SHARED_NVRTC OFF)
49-
5047
if(NVTX_DISABLE)
5148
add_compile_definitions("NVTX_DISABLE")
5249
message(STATUS "NVTX is disabled")
@@ -143,158 +140,21 @@ configure_file(
143140
cmake/templates/version.h
144141
${CMAKE_CURRENT_SOURCE_DIR}/include/tensorrt_llm/executor/version.h)
145142

146-
# Determine CUDA version before enabling the language extension
147-
# check_language(CUDA) clears CMAKE_CUDA_HOST_COMPILER if CMAKE_CUDA_COMPILER is
148-
# not set
149-
if(NOT CMAKE_CUDA_COMPILER AND CMAKE_CUDA_HOST_COMPILER)
150-
set(CMAKE_CUDA_HOST_COMPILER_BACKUP ${CMAKE_CUDA_HOST_COMPILER})
151-
endif()
152-
check_language(CUDA)
153-
if(CMAKE_CUDA_HOST_COMPILER_BACKUP)
154-
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CUDA_HOST_COMPILER_BACKUP})
155-
check_language(CUDA)
156-
endif()
157-
if(CMAKE_CUDA_COMPILER)
158-
message(STATUS "CUDA compiler: ${CMAKE_CUDA_COMPILER}")
159-
if(NOT WIN32) # Linux
160-
execute_process(
161-
COMMAND
162-
"bash" "-c"
163-
"${CMAKE_CUDA_COMPILER} --version | egrep -o 'V[0-9]+.[0-9]+.[0-9]+' | cut -c2-"
164-
RESULT_VARIABLE _BASH_SUCCESS
165-
OUTPUT_VARIABLE CMAKE_CUDA_COMPILER_VERSION
166-
OUTPUT_STRIP_TRAILING_WHITESPACE)
167-
168-
if(NOT _BASH_SUCCESS EQUAL 0)
169-
message(FATAL_ERROR "Failed to determine CUDA version")
170-
endif()
171-
172-
else() # Windows
173-
execute_process(
174-
COMMAND ${CMAKE_CUDA_COMPILER} --version
175-
OUTPUT_VARIABLE versionString
176-
RESULT_VARIABLE versionResult)
177-
178-
if(versionResult EQUAL 0 AND versionString MATCHES
179-
"V[0-9]+\\.[0-9]+\\.[0-9]+")
180-
string(REGEX REPLACE "V" "" version ${CMAKE_MATCH_0})
181-
set(CMAKE_CUDA_COMPILER_VERSION "${version}")
182-
else()
183-
message(FATAL_ERROR "Failed to determine CUDA version")
184-
endif()
185-
endif()
186-
else()
187-
message(FATAL_ERROR "No CUDA compiler found")
188-
endif()
189-
190-
set(CUDA_REQUIRED_VERSION "11.2")
191-
if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS CUDA_REQUIRED_VERSION)
192-
message(
193-
FATAL_ERROR
194-
"CUDA version ${CMAKE_CUDA_COMPILER_VERSION} must be at least ${CUDA_REQUIRED_VERSION}"
195-
)
196-
endif()
197-
198-
# cmake-format: off
199-
# Initialize and normalize CMAKE_CUDA_ARCHITECTURES before enabling CUDA.
200-
# Special values:
201-
# * `native` is resolved to HIGHEST available architecture.
202-
# * Fallback to `all` if detection failed.
203-
# * `all`/unset is resolved to a set of architectures we optimized for and compiler supports.
204-
# * `all-major` is unsupported.
205-
# Numerical architectures:
206-
# * PTX is never included in result binary.
207-
# * `*-virtual` architectures are therefore rejected.
208-
# * `-real` suffix is automatically added to exclude PTX.
209-
# * Always use accelerated (`-a` suffix) target for supported architectures.
210-
# cmake-format: on
211-
212-
if(CMAKE_CUDA_ARCHITECTURES STREQUAL "native")
213-
# Detect highest available compute capability
214-
set(OUTPUTFILE ${PROJECT_BINARY_DIR}/detect_cuda_arch)
215-
set(CUDAFILE ${CMAKE_SOURCE_DIR}/cmake/utils/detect_cuda_arch.cu)
216-
execute_process(COMMAND ${CMAKE_CUDA_COMPILER} -lcuda ${CUDAFILE} -o
217-
${OUTPUTFILE})
218-
message(VERBOSE "Detecting native CUDA compute capability")
219-
execute_process(
220-
COMMAND ${OUTPUTFILE}
221-
RESULT_VARIABLE CUDA_RETURN_CODE
222-
OUTPUT_VARIABLE CUDA_ARCH_OUTPUT)
223-
if(NOT ${CUDA_RETURN_CODE} EQUAL 0)
224-
message(WARNING "Detecting native CUDA compute capability - fail")
225-
message(
226-
WARNING
227-
"CUDA compute capability detection failed, compiling for all optimized architectures"
228-
)
229-
unset(CMAKE_CUDA_ARCHITECTURES)
230-
else()
231-
message(STATUS "Detecting native CUDA compute capability - done")
232-
set(CMAKE_CUDA_ARCHITECTURES "${CUDA_ARCH_OUTPUT}")
233-
endif()
234-
elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "all")
235-
unset(CMAKE_CUDA_ARCHITECTURES)
236-
message(
237-
STATUS
238-
"Setting CMAKE_CUDA_ARCHITECTURES to all enables all architectures TensorRT-LLM optimized for, "
239-
"not all architectures CUDA compiler supports.")
240-
elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "all-major")
241-
message(
242-
FATAL_ERROR
243-
"Setting CMAKE_CUDA_ARCHITECTURES to all-major does not make sense for TensorRT-LLM. "
244-
"Please enable all architectures you intend to run on, so we can enable optimized kernels for them."
245-
)
246-
else()
247-
unset(CMAKE_CUDA_ARCHITECTURES_CLEAN)
248-
foreach(CUDA_ARCH IN LISTS CMAKE_CUDA_ARCHITECTURES)
249-
if(CUDA_ARCH MATCHES "^([1-9])([0-9])+a?-virtual$")
250-
message(FATAL_ERROR "Including PTX in compiled binary is unsupported.")
251-
elseif(CUDA_ARCH MATCHES "^(([1-9])([0-9])+)a?(-real)?$")
252-
list(APPEND CMAKE_CUDA_ARCHITECTURES_CLEAN ${CMAKE_MATCH_1})
253-
else()
254-
message(FATAL_ERROR "Unrecognized CUDA architecture: ${CUDA_ARCH}")
255-
endif()
256-
endforeach()
257-
list(REMOVE_DUPLICATES CMAKE_CUDA_ARCHITECTURES_CLEAN)
258-
set(CMAKE_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES_CLEAN})
259-
endif()
260-
261-
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
262-
set(CMAKE_CUDA_ARCHITECTURES "80" "86")
263-
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.8")
264-
list(APPEND CMAKE_CUDA_ARCHITECTURES "89" "90")
265-
endif()
266-
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "12.7")
267-
list(APPEND CMAKE_CUDA_ARCHITECTURES "100" "120")
268-
endif()
269-
endif()
270-
271-
# CMAKE_CUDA_ARCHITECTURES_ORIG contains all architectures enabled, without
272-
# automatically added -real or -a suffix.
273-
set(CMAKE_CUDA_ARCHITECTURES_ORIG "${CMAKE_CUDA_ARCHITECTURES}")
274-
message(STATUS "GPU architectures: ${CMAKE_CUDA_ARCHITECTURES_ORIG}")
275-
276-
set(ARCHITECTURES_WITH_KERNELS "80" "86" "89" "90" "100" "120")
277-
foreach(CUDA_ARCH IN LISTS ARCHITECTURES_WITH_KERNELS)
278-
if(NOT "${CUDA_ARCH}" IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG)
279-
add_definitions("-DEXCLUDE_SM_${CUDA_ARCH}")
280-
message(STATUS "Excluding SM ${CUDA_ARCH}")
281-
endif()
282-
endforeach()
283-
284-
set(ARCHITECTURES_WITH_ACCEL "90" "100" "101" "120")
285-
unset(CMAKE_CUDA_ARCHITECTURES_NORMALIZED)
286-
foreach(CUDA_ARCH IN LISTS CMAKE_CUDA_ARCHITECTURES)
287-
if("${CUDA_ARCH}" IN_LIST ARCHITECTURES_WITH_ACCEL)
288-
list(APPEND CMAKE_CUDA_ARCHITECTURES_NORMALIZED "${CUDA_ARCH}a-real")
289-
else()
290-
list(APPEND CMAKE_CUDA_ARCHITECTURES_NORMALIZED "${CUDA_ARCH}-real")
291-
endif()
292-
endforeach()
293-
set(CMAKE_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES_NORMALIZED})
143+
setup_cuda_compiler()
144+
setup_cuda_architectures()
294145

295146
enable_language(C CXX CUDA)
296147

297-
find_package(CUDAToolkit REQUIRED)
148+
find_package(CUDAToolkit 11.2 REQUIRED COMPONENTS cudart_static cuda_driver
149+
cublas cublasLt curand nvml)
150+
151+
set(CUBLAS_LIB CUDA::cublas)
152+
set(CUBLASLT_LIB CUDA::cublasLt)
153+
set(CURAND_LIB CUDA::curand)
154+
set(CUDA_DRV_LIB CUDA::cuda_driver)
155+
set(CUDA_NVML_LIB CUDA::nvml)
156+
set(CUDA_RT_LIB CUDA::cudart_static)
157+
set(CMAKE_CUDA_RUNTIME_LIBRARY Static)
298158

299159
resolve_dirs(CUDAToolkit_INCLUDE_DIRS "${CUDAToolkit_INCLUDE_DIRS}")
300160

@@ -307,57 +167,18 @@ message(STATUS " include path: ${CUDAToolkit_INCLUDE_DIRS}")
307167
# pick up on the includes
308168
set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_INCLUDES 0)
309169

310-
if(USE_SHARED_NVRTC)
311-
if(WIN32)
312-
message(FATAL_ERROR "Cannot use NVRTC shared library on Windows.")
313-
else()
314-
find_library(
315-
NVRTC_LIB nvrtc
316-
HINTS ${CUDAToolkit_LIBRARY_DIR}
317-
PATH_SUFFIXES lib64 lib lib/x64)
318-
find_library(
319-
NVRTC_BUILTINS_LIB nvrtc-builtins
320-
HINTS ${CUDAToolkit_LIBRARY_DIR}
321-
PATH_SUFFIXES lib64 lib lib/x64)
322-
endif()
323-
else()
324-
if(WIN32)
325-
find_library(
326-
NVRTC_LIB nvrtc
327-
HINTS ${CUDAToolkit_LIBRARY_DIR}
328-
PATH_SUFFIXES lib64 lib lib/x64)
329-
else()
330-
find_library(
331-
NVRTC_LIB nvrtc_static
332-
HINTS ${CUDAToolkit_LIBRARY_DIR}
333-
PATH_SUFFIXES lib64 lib lib/x64)
334-
find_library(
335-
NVRTC_BUILTINS_LIB nvrtc-builtins_static
336-
HINTS ${CUDAToolkit_LIBRARY_DIR}
337-
PATH_SUFFIXES lib64 lib lib/x64)
338-
find_library(
339-
NVPTXCOMPILER_LIB nvptxcompiler_static
340-
HINTS ${CUDAToolkit_LIBRARY_DIR}
341-
PATH_SUFFIXES lib64 lib lib/x64)
342-
endif()
343-
endif()
344-
345-
set(CUBLAS_LIB CUDA::cublas)
346-
set(CUBLASLT_LIB CUDA::cublasLt)
347-
set(CUDA_DRV_LIB CUDA::cuda_driver)
348-
set(CUDA_NVML_LIB CUDA::nvml)
349-
set(CUDA_RT_LIB CUDA::cudart_static)
350-
set(CMAKE_CUDA_RUNTIME_LIBRARY Static)
351-
352170
find_library(RT_LIB rt)
353171

354172
if(ENABLE_MULTI_DEVICE)
355173
# NCCL dependencies
356-
set_ifndef(NCCL_LIB_DIR /usr/lib/${CMAKE_SYSTEM_PROCESSOR}-linux-gnu/)
357-
set_ifndef(NCCL_INCLUDE_DIR /usr/include/)
358-
find_library(NCCL_LIB nccl HINTS ${NCCL_LIB_DIR})
174+
find_package(NCCL 2 REQUIRED)
175+
set(NCCL_LIB NCCL::nccl)
359176
endif()
360177

178+
# TRT dependencies
179+
find_package(TensorRT 10 REQUIRED COMPONENTS OnnxParser)
180+
set(TRT_LIB TensorRT::NvInfer)
181+
361182
get_filename_component(TRT_LLM_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} PATH)
362183

363184
set(3RDPARTY_DIR ${TRT_LLM_ROOT_DIR}/3rdparty)
@@ -368,27 +189,13 @@ include_directories(
368189
SYSTEM
369190
${CUDAToolkit_INCLUDE_DIRS}
370191
${CUDNN_ROOT_DIR}/include
371-
${NCCL_INCLUDE_DIR}
192+
$<TARGET_PROPERTY:TensorRT::NvInfer,INTERFACE_INCLUDE_DIRECTORIES>
372193
${3RDPARTY_DIR}/cutlass/include
373194
${3RDPARTY_DIR}/cutlass/tools/util/include
374195
${3RDPARTY_DIR}/NVTX/include
375196
${3RDPARTY_DIR}/json/include
376197
${3RDPARTY_DIR}/pybind11/include)
377198

378-
# TRT dependencies
379-
set_ifndef(TRT_LIB_DIR ${CMAKE_BINARY_DIR})
380-
set_ifndef(TRT_INCLUDE_DIR /usr/include/${CMAKE_SYSTEM_PROCESSOR}-linux-gnu)
381-
set(TRT_LIB nvinfer)
382-
383-
# On Windows major version is appended to nvinfer libs.
384-
if(WIN32)
385-
set(TRT_LIB_NAME nvinfer_10)
386-
else()
387-
set(TRT_LIB_NAME nvinfer)
388-
endif()
389-
390-
find_library_create_target(${TRT_LIB} ${TRT_LIB_NAME} SHARED ${TRT_LIB_DIR})
391-
392199
if(${CUDAToolkit_VERSION} VERSION_GREATER_EQUAL "11")
393200
add_definitions("-DENABLE_BF16")
394201
message(
@@ -503,61 +310,7 @@ if((WIN32))
503310
endif()
504311
endif()
505312

506-
if(SANITIZE)
507-
if(WIN32)
508-
message(FATAL_ERROR "Sanitizing support is unimplemented on Windows.")
509-
endif()
510-
511-
macro(add_clang_rt_lib lib_name)
512-
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
513-
execute_process(
514-
COMMAND
515-
${CMAKE_CXX_COMPILER}
516-
"-print-file-name=libclang_rt.${lib_name}-${CMAKE_SYSTEM_PROCESSOR}.so"
517-
OUTPUT_VARIABLE CLANG_SAN_LIBRARY_PATH OUTPUT_STRIP_TRAILING_WHITESPACE)
518-
link_libraries(${CLANG_SAN_LIBRARY_PATH})
519-
endif()
520-
endmacro()
521-
522-
string(TOLOWER ${SANITIZE} SANITIZE)
523-
524-
if("undefined" IN_LIST SANITIZE)
525-
message(STATUS "Enabling extra sub-sanitizers for UBSan")
526-
list(APPEND SANITIZE "float-divide-by-zero")
527-
528-
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
529-
list(APPEND SANITIZE "unsigned-integer-overflow" "implicit-conversion"
530-
"local-bounds")
531-
endif()
532-
add_clang_rt_lib("ubsan_standalone")
533-
add_compile_definitions("SANITIZE_UNDEFINED")
534-
endif()
535-
536-
if("address" IN_LIST SANITIZE)
537-
message(STATUS "Enabling extra sub-sanitizers for ASan")
538-
list(APPEND SANITIZE "pointer-compare" "pointer-subtract")
539-
add_compile_options("-fno-omit-frame-pointer;-fno-optimize-sibling-calls")
540-
541-
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
542-
add_compile_options("-fsanitize-address-use-after-return=always")
543-
add_link_options("-fsanitize-address-use-after-return=always")
544-
endif()
545-
add_clang_rt_lib("asan")
546-
endif()
547-
548-
if("thread" IN_LIST SANITIZE)
549-
add_compile_options("-ftls-model=local-dynamic")
550-
add_clang_rt_lib("tsan")
551-
endif()
552-
553-
list(REMOVE_DUPLICATES SANITIZE)
554-
message(STATUS "Enabled sanitizers: ${SANITIZE}")
555-
556-
foreach(SANITIZER IN LISTS SANITIZE)
557-
add_compile_options("-fsanitize=${SANITIZER}")
558-
add_link_options("-fsanitize=${SANITIZER}")
559-
endforeach()
560-
endif()
313+
setup_sanitizers()
561314

562315
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")
563316
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
@@ -694,32 +447,9 @@ else()
694447
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_UCX=0")
695448
endif()
696449

697-
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" VERSION_STRINGS
698-
REGEX "#define NV_TENSORRT_.*")
699-
foreach(TYPE MAJOR MINOR PATCH BUILD)
700-
string(REGEX MATCH "NV_TENSORRT_${TYPE} [0-9]+" TRT_TYPE_STRING
701-
${VERSION_STRINGS})
702-
string(REGEX MATCH "[0-9]+" TRT_${TYPE} ${TRT_TYPE_STRING})
703-
endforeach(TYPE)
704-
705-
set(TRT_VERSION
706-
"${TRT_MAJOR}.${TRT_MINOR}.${TRT_PATCH}"
707-
CACHE STRING "TensorRT project version")
708-
set(TRT_SOVERSION
709-
"${TRT_MAJOR}"
710-
CACHE STRING "TensorRT library so version")
711-
message(
712-
STATUS
713-
"Building for TensorRT version: ${TRT_VERSION}, library version: ${TRT_SOVERSION}"
714-
)
715-
716-
if(${TRT_MAJOR} LESS 10)
717-
message(FATAL_ERROR "TensorRT version must be at least 10.0")
718-
endif()
719-
720450
list(APPEND COMMON_HEADER_DIRS)
721451
include_directories(${COMMON_HEADER_DIRS})
722-
include_directories(SYSTEM ${TORCH_INCLUDE_DIRS} ${TRT_INCLUDE_DIR})
452+
include_directories(SYSTEM ${TORCH_INCLUDE_DIRS})
723453

724454
add_subdirectory(tensorrt_llm)
725455

0 commit comments

Comments
 (0)