16
16
#
17
17
18
18
cmake_minimum_required (VERSION 3.27 FATAL_ERROR)
19
+ list (APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR} /cmake/modules" )
19
20
set (CMAKE_EXPORT_COMPILE_COMMANDS ON )
20
21
21
- include (CheckLanguage)
22
- include (cmake/modules/set_ifndef.cmake)
23
- include (cmake/modules/find_library_create_target.cmake)
24
- include (cmake/modules/resolve_dirs.cmake)
25
- include (cmake/modules/parse_make_options.cmake)
22
+ include (resolve_dirs)
23
+ include (parse_make_options)
24
+ include (cuda_configuration)
25
+ include (sanitizers)
26
26
27
27
project (tensorrt_llm LANGUAGES CXX)
28
28
@@ -44,9 +44,6 @@ option(ENABLE_MULTI_DEVICE
44
44
option (ENABLE_UCX "Enable building with UCX (Uniform Communication X) support"
45
45
ON )
46
46
47
- # Always use static NVRTC for IP protection reasons.
48
- set (USE_SHARED_NVRTC OFF )
49
-
50
47
if (NVTX_DISABLE)
51
48
add_compile_definitions ("NVTX_DISABLE" )
52
49
message (STATUS "NVTX is disabled" )
@@ -143,158 +140,21 @@ configure_file(
143
140
cmake/templates/version .h
144
141
${CMAKE_CURRENT_SOURCE_DIR} /include /tensorrt_llm/executor/version .h)
145
142
146
- # Determine CUDA version before enabling the language extension
147
- # check_language(CUDA) clears CMAKE_CUDA_HOST_COMPILER if CMAKE_CUDA_COMPILER is
148
- # not set
149
- if (NOT CMAKE_CUDA_COMPILER AND CMAKE_CUDA_HOST_COMPILER)
150
- set (CMAKE_CUDA_HOST_COMPILER_BACKUP ${CMAKE_CUDA_HOST_COMPILER} )
151
- endif ()
152
- check_language(CUDA)
153
- if (CMAKE_CUDA_HOST_COMPILER_BACKUP)
154
- set (CMAKE_CUDA_HOST_COMPILER ${CMAKE_CUDA_HOST_COMPILER_BACKUP} )
155
- check_language(CUDA)
156
- endif ()
157
- if (CMAKE_CUDA_COMPILER)
158
- message (STATUS "CUDA compiler: ${CMAKE_CUDA_COMPILER} " )
159
- if (NOT WIN32 ) # Linux
160
- execute_process (
161
- COMMAND
162
- "bash" "-c"
163
- "${CMAKE_CUDA_COMPILER} --version | egrep -o 'V[0-9]+.[0-9]+.[0-9]+' | cut -c2-"
164
- RESULT_VARIABLE _BASH_SUCCESS
165
- OUTPUT_VARIABLE CMAKE_CUDA_COMPILER_VERSION
166
- OUTPUT_STRIP_TRAILING_WHITESPACE)
167
-
168
- if (NOT _BASH_SUCCESS EQUAL 0)
169
- message (FATAL_ERROR "Failed to determine CUDA version" )
170
- endif ()
171
-
172
- else () # Windows
173
- execute_process (
174
- COMMAND ${CMAKE_CUDA_COMPILER} --version
175
- OUTPUT_VARIABLE versionString
176
- RESULT_VARIABLE versionResult)
177
-
178
- if (versionResult EQUAL 0 AND versionString MATCHES
179
- "V[0-9]+\\ .[0-9]+\\ .[0-9]+" )
180
- string (REGEX REPLACE "V" "" version ${CMAKE_MATCH_0} )
181
- set (CMAKE_CUDA_COMPILER_VERSION "${version} " )
182
- else ()
183
- message (FATAL_ERROR "Failed to determine CUDA version" )
184
- endif ()
185
- endif ()
186
- else ()
187
- message (FATAL_ERROR "No CUDA compiler found" )
188
- endif ()
189
-
190
- set (CUDA_REQUIRED_VERSION "11.2" )
191
- if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS CUDA_REQUIRED_VERSION)
192
- message (
193
- FATAL_ERROR
194
- "CUDA version ${CMAKE_CUDA_COMPILER_VERSION} must be at least ${CUDA_REQUIRED_VERSION} "
195
- )
196
- endif ()
197
-
198
- # cmake-format: off
199
- # Initialize and normalize CMAKE_CUDA_ARCHITECTURES before enabling CUDA.
200
- # Special values:
201
- # * `native` is resolved to HIGHEST available architecture.
202
- # * Fallback to `all` if detection failed.
203
- # * `all`/unset is resolved to a set of architectures we optimized for and compiler supports.
204
- # * `all-major` is unsupported.
205
- # Numerical architectures:
206
- # * PTX is never included in result binary.
207
- # * `*-virtual` architectures are therefore rejected.
208
- # * `-real` suffix is automatically added to exclude PTX.
209
- # * Always use accelerated (`-a` suffix) target for supported architectures.
210
- # cmake-format: on
211
-
212
- if (CMAKE_CUDA_ARCHITECTURES STREQUAL "native" )
213
- # Detect highest available compute capability
214
- set (OUTPUTFILE ${PROJECT_BINARY_DIR} /detect_cuda_arch)
215
- set (CUDAFILE ${CMAKE_SOURCE_DIR} /cmake/utils/detect_cuda_arch.cu)
216
- execute_process (COMMAND ${CMAKE_CUDA_COMPILER} -lcuda ${CUDAFILE} -o
217
- ${OUTPUTFILE} )
218
- message (VERBOSE "Detecting native CUDA compute capability" )
219
- execute_process (
220
- COMMAND ${OUTPUTFILE}
221
- RESULT_VARIABLE CUDA_RETURN_CODE
222
- OUTPUT_VARIABLE CUDA_ARCH_OUTPUT)
223
- if (NOT ${CUDA_RETURN_CODE} EQUAL 0)
224
- message (WARNING "Detecting native CUDA compute capability - fail" )
225
- message (
226
- WARNING
227
- "CUDA compute capability detection failed, compiling for all optimized architectures"
228
- )
229
- unset (CMAKE_CUDA_ARCHITECTURES)
230
- else ()
231
- message (STATUS "Detecting native CUDA compute capability - done" )
232
- set (CMAKE_CUDA_ARCHITECTURES "${CUDA_ARCH_OUTPUT} " )
233
- endif ()
234
- elseif (CMAKE_CUDA_ARCHITECTURES STREQUAL "all" )
235
- unset (CMAKE_CUDA_ARCHITECTURES)
236
- message (
237
- STATUS
238
- "Setting CMAKE_CUDA_ARCHITECTURES to all enables all architectures TensorRT-LLM optimized for, "
239
- "not all architectures CUDA compiler supports." )
240
- elseif (CMAKE_CUDA_ARCHITECTURES STREQUAL "all-major" )
241
- message (
242
- FATAL_ERROR
243
- "Setting CMAKE_CUDA_ARCHITECTURES to all-major does not make sense for TensorRT-LLM. "
244
- "Please enable all architectures you intend to run on, so we can enable optimized kernels for them."
245
- )
246
- else ()
247
- unset (CMAKE_CUDA_ARCHITECTURES_CLEAN)
248
- foreach (CUDA_ARCH IN LISTS CMAKE_CUDA_ARCHITECTURES)
249
- if (CUDA_ARCH MATCHES "^([1-9])([0-9])+a?-virtual$" )
250
- message (FATAL_ERROR "Including PTX in compiled binary is unsupported." )
251
- elseif (CUDA_ARCH MATCHES "^(([1-9])([0-9])+)a?(-real)?$" )
252
- list (APPEND CMAKE_CUDA_ARCHITECTURES_CLEAN ${CMAKE_MATCH_1} )
253
- else ()
254
- message (FATAL_ERROR "Unrecognized CUDA architecture: ${CUDA_ARCH} " )
255
- endif ()
256
- endforeach ()
257
- list (REMOVE_DUPLICATES CMAKE_CUDA_ARCHITECTURES_CLEAN)
258
- set (CMAKE_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES_CLEAN} )
259
- endif ()
260
-
261
- if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
262
- set (CMAKE_CUDA_ARCHITECTURES "80" "86" )
263
- if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.8" )
264
- list (APPEND CMAKE_CUDA_ARCHITECTURES "89" "90" )
265
- endif ()
266
- if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "12.7" )
267
- list (APPEND CMAKE_CUDA_ARCHITECTURES "100" "120" )
268
- endif ()
269
- endif ()
270
-
271
- # CMAKE_CUDA_ARCHITECTURES_ORIG contains all architectures enabled, without
272
- # automatically added -real or -a suffix.
273
- set (CMAKE_CUDA_ARCHITECTURES_ORIG "${CMAKE_CUDA_ARCHITECTURES} " )
274
- message (STATUS "GPU architectures: ${CMAKE_CUDA_ARCHITECTURES_ORIG} " )
275
-
276
- set (ARCHITECTURES_WITH_KERNELS "80" "86" "89" "90" "100" "120" )
277
- foreach (CUDA_ARCH IN LISTS ARCHITECTURES_WITH_KERNELS)
278
- if (NOT "${CUDA_ARCH} " IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG)
279
- add_definitions ("-DEXCLUDE_SM_${CUDA_ARCH} " )
280
- message (STATUS "Excluding SM ${CUDA_ARCH} " )
281
- endif ()
282
- endforeach ()
283
-
284
- set (ARCHITECTURES_WITH_ACCEL "90" "100" "101" "120" )
285
- unset (CMAKE_CUDA_ARCHITECTURES_NORMALIZED)
286
- foreach (CUDA_ARCH IN LISTS CMAKE_CUDA_ARCHITECTURES)
287
- if ("${CUDA_ARCH} " IN_LIST ARCHITECTURES_WITH_ACCEL)
288
- list (APPEND CMAKE_CUDA_ARCHITECTURES_NORMALIZED "${CUDA_ARCH} a-real" )
289
- else ()
290
- list (APPEND CMAKE_CUDA_ARCHITECTURES_NORMALIZED "${CUDA_ARCH} -real" )
291
- endif ()
292
- endforeach ()
293
- set (CMAKE_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES_NORMALIZED} )
143
+ setup_cuda_compiler()
144
+ setup_cuda_architectures()
294
145
295
146
enable_language (C CXX CUDA)
296
147
297
- find_package (CUDAToolkit REQUIRED)
148
+ find_package (CUDAToolkit 11.2 REQUIRED COMPONENTS cudart_static cuda_driver
149
+ cublas cublasLt curand nvml)
150
+
151
+ set (CUBLAS_LIB CUDA::cublas)
152
+ set (CUBLASLT_LIB CUDA::cublasLt)
153
+ set (CURAND_LIB CUDA::curand)
154
+ set (CUDA_DRV_LIB CUDA::cuda_driver)
155
+ set (CUDA_NVML_LIB CUDA::nvml)
156
+ set (CUDA_RT_LIB CUDA::cudart_static)
157
+ set (CMAKE_CUDA_RUNTIME_LIBRARY Static )
298
158
299
159
resolve_dirs(CUDAToolkit_INCLUDE_DIRS "${CUDAToolkit_INCLUDE_DIRS} " )
300
160
@@ -307,57 +167,18 @@ message(STATUS " include path: ${CUDAToolkit_INCLUDE_DIRS}")
307
167
# pick up on the includes
308
168
set (CMAKE_CUDA_USE_RESPONSE_FILE_FOR_INCLUDES 0)
309
169
310
- if (USE_SHARED_NVRTC)
311
- if (WIN32 )
312
- message (FATAL_ERROR "Cannot use NVRTC shared library on Windows." )
313
- else ()
314
- find_library (
315
- NVRTC_LIB nvrtc
316
- HINTS ${CUDAToolkit_LIBRARY_DIR}
317
- PATH_SUFFIXES lib64 lib lib/x64)
318
- find_library (
319
- NVRTC_BUILTINS_LIB nvrtc-builtins
320
- HINTS ${CUDAToolkit_LIBRARY_DIR}
321
- PATH_SUFFIXES lib64 lib lib/x64)
322
- endif ()
323
- else ()
324
- if (WIN32 )
325
- find_library (
326
- NVRTC_LIB nvrtc
327
- HINTS ${CUDAToolkit_LIBRARY_DIR}
328
- PATH_SUFFIXES lib64 lib lib/x64)
329
- else ()
330
- find_library (
331
- NVRTC_LIB nvrtc_static
332
- HINTS ${CUDAToolkit_LIBRARY_DIR}
333
- PATH_SUFFIXES lib64 lib lib/x64)
334
- find_library (
335
- NVRTC_BUILTINS_LIB nvrtc-builtins_static
336
- HINTS ${CUDAToolkit_LIBRARY_DIR}
337
- PATH_SUFFIXES lib64 lib lib/x64)
338
- find_library (
339
- NVPTXCOMPILER_LIB nvptxcompiler_static
340
- HINTS ${CUDAToolkit_LIBRARY_DIR}
341
- PATH_SUFFIXES lib64 lib lib/x64)
342
- endif ()
343
- endif ()
344
-
345
- set (CUBLAS_LIB CUDA::cublas)
346
- set (CUBLASLT_LIB CUDA::cublasLt)
347
- set (CUDA_DRV_LIB CUDA::cuda_driver)
348
- set (CUDA_NVML_LIB CUDA::nvml)
349
- set (CUDA_RT_LIB CUDA::cudart_static)
350
- set (CMAKE_CUDA_RUNTIME_LIBRARY Static )
351
-
352
170
find_library (RT_LIB rt)
353
171
354
172
if (ENABLE_MULTI_DEVICE)
355
173
# NCCL dependencies
356
- set_ifndef(NCCL_LIB_DIR /usr/lib/${CMAKE_SYSTEM_PROCESSOR} -linux-gnu/)
357
- set_ifndef(NCCL_INCLUDE_DIR /usr/include /)
358
- find_library (NCCL_LIB nccl HINTS ${NCCL_LIB_DIR} )
174
+ find_package (NCCL 2 REQUIRED)
175
+ set (NCCL_LIB NCCL::nccl)
359
176
endif ()
360
177
178
+ # TRT dependencies
179
+ find_package (TensorRT 10 REQUIRED COMPONENTS OnnxParser)
180
+ set (TRT_LIB TensorRT::NvInfer)
181
+
361
182
get_filename_component (TRT_LLM_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} PATH )
362
183
363
184
set (3RDPARTY_DIR ${TRT_LLM_ROOT_DIR} /3rdparty)
@@ -368,27 +189,13 @@ include_directories(
368
189
SYSTEM
369
190
${CUDAToolkit_INCLUDE_DIRS}
370
191
${CUDNN_ROOT_DIR} /include
371
- ${NCCL_INCLUDE_DIR}
192
+ $<TARGET_PROPERTY:TensorRT::NvInfer, INTERFACE_INCLUDE_DIRECTORIES >
372
193
${3RDPARTY_DIR} /cutlass/include
373
194
${3RDPARTY_DIR} /cutlass/tools/util/include
374
195
${3RDPARTY_DIR} /NVTX/include
375
196
${3RDPARTY_DIR} /json/include
376
197
${3RDPARTY_DIR} /pybind11/include )
377
198
378
- # TRT dependencies
379
- set_ifndef(TRT_LIB_DIR ${CMAKE_BINARY_DIR} )
380
- set_ifndef(TRT_INCLUDE_DIR /usr/include /${CMAKE_SYSTEM_PROCESSOR} -linux-gnu)
381
- set (TRT_LIB nvinfer)
382
-
383
- # On Windows major version is appended to nvinfer libs.
384
- if (WIN32 )
385
- set (TRT_LIB_NAME nvinfer_10)
386
- else ()
387
- set (TRT_LIB_NAME nvinfer)
388
- endif ()
389
-
390
- find_library_create_target(${TRT_LIB} ${TRT_LIB_NAME} SHARED ${TRT_LIB_DIR} )
391
-
392
199
if (${CUDAToolkit_VERSION} VERSION_GREATER_EQUAL "11" )
393
200
add_definitions ("-DENABLE_BF16" )
394
201
message (
@@ -503,61 +310,7 @@ if((WIN32))
503
310
endif ()
504
311
endif ()
505
312
506
- if (SANITIZE)
507
- if (WIN32 )
508
- message (FATAL_ERROR "Sanitizing support is unimplemented on Windows." )
509
- endif ()
510
-
511
- macro (add_clang_rt_lib lib_name)
512
- if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" )
513
- execute_process (
514
- COMMAND
515
- ${CMAKE_CXX_COMPILER}
516
- "-print-file-name=libclang_rt.${lib_name} -${CMAKE_SYSTEM_PROCESSOR} .so"
517
- OUTPUT_VARIABLE CLANG_SAN_LIBRARY_PATH OUTPUT_STRIP_TRAILING_WHITESPACE)
518
- link_libraries (${CLANG_SAN_LIBRARY_PATH} )
519
- endif ()
520
- endmacro ()
521
-
522
- string (TOLOWER ${SANITIZE} SANITIZE)
523
-
524
- if ("undefined" IN_LIST SANITIZE)
525
- message (STATUS "Enabling extra sub-sanitizers for UBSan" )
526
- list (APPEND SANITIZE "float-divide-by-zero" )
527
-
528
- if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" )
529
- list (APPEND SANITIZE "unsigned-integer-overflow" "implicit-conversion"
530
- "local-bounds" )
531
- endif ()
532
- add_clang_rt_lib("ubsan_standalone" )
533
- add_compile_definitions ("SANITIZE_UNDEFINED" )
534
- endif ()
535
-
536
- if ("address" IN_LIST SANITIZE)
537
- message (STATUS "Enabling extra sub-sanitizers for ASan" )
538
- list (APPEND SANITIZE "pointer-compare" "pointer-subtract" )
539
- add_compile_options ("-fno-omit-frame-pointer;-fno-optimize-sibling-calls" )
540
-
541
- if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" )
542
- add_compile_options ("-fsanitize-address-use-after-return=always" )
543
- add_link_options ("-fsanitize-address-use-after-return=always" )
544
- endif ()
545
- add_clang_rt_lib("asan" )
546
- endif ()
547
-
548
- if ("thread" IN_LIST SANITIZE)
549
- add_compile_options ("-ftls-model=local-dynamic" )
550
- add_clang_rt_lib("tsan" )
551
- endif ()
552
-
553
- list (REMOVE_DUPLICATES SANITIZE)
554
- message (STATUS "Enabled sanitizers: ${SANITIZE} " )
555
-
556
- foreach (SANITIZER IN LISTS SANITIZE)
557
- add_compile_options ("-fsanitize=${SANITIZER} " )
558
- add_link_options ("-fsanitize=${SANITIZER} " )
559
- endforeach ()
560
- endif ()
313
+ setup_sanitizers()
561
314
562
315
set (CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda" )
563
316
set (CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr" )
@@ -694,32 +447,9 @@ else()
694
447
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_UCX=0" )
695
448
endif ()
696
449
697
- file (STRINGS "${TRT_INCLUDE_DIR} /NvInferVersion.h" VERSION_STRINGS
698
- REGEX "#define NV_TENSORRT_.*" )
699
- foreach (TYPE MAJOR MINOR PATCH BUILD )
700
- string (REGEX MATCH "NV_TENSORRT_${TYPE} [0-9]+" TRT_TYPE_STRING
701
- ${VERSION_STRINGS} )
702
- string (REGEX MATCH "[0-9]+" TRT_${TYPE} ${TRT_TYPE_STRING} )
703
- endforeach (TYPE )
704
-
705
- set (TRT_VERSION
706
- "${TRT_MAJOR} .${TRT_MINOR} .${TRT_PATCH} "
707
- CACHE STRING "TensorRT project version" )
708
- set (TRT_SOVERSION
709
- "${TRT_MAJOR} "
710
- CACHE STRING "TensorRT library so version" )
711
- message (
712
- STATUS
713
- "Building for TensorRT version: ${TRT_VERSION} , library version: ${TRT_SOVERSION} "
714
- )
715
-
716
- if (${TRT_MAJOR} LESS 10)
717
- message (FATAL_ERROR "TensorRT version must be at least 10.0" )
718
- endif ()
719
-
720
450
list (APPEND COMMON_HEADER_DIRS)
721
451
include_directories (${COMMON_HEADER_DIRS} )
722
- include_directories (SYSTEM ${TORCH_INCLUDE_DIRS} ${TRT_INCLUDE_DIR} )
452
+ include_directories (SYSTEM ${TORCH_INCLUDE_DIRS} )
723
453
724
454
add_subdirectory (tensorrt_llm)
725
455
0 commit comments