Skip to content

Commit 8c6d69d

Browse files
authored
TensorRT 10.9 OSS Release. (#4381)
Signed-off-by: Leo Dong <[email protected]>
1 parent 64e56ab commit 8c6d69d

File tree

474 files changed

+12214
-6837
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

474 files changed

+12214
-6837
lines changed

.gitignore

-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ build/
33
/demo/BERT/engines
44
/demo/BERT/squad/*.json
55
/docker/jetpack_files/*
6-
*.nvmk
76
*.sln
87
*.vcxproj
98
externals/

CHANGELOG.md

+320-187
Large diffs are not rendered by default.

CMakeLists.txt

+11-25
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
# SPDX-License-Identifier: Apache-2.0
44
#
55
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -176,43 +176,29 @@ set(CUDA_LIBRARIES ${CUDART_LIB})
176176
if (DEFINED GPU_ARCHS)
177177
message(STATUS "GPU_ARCHS defined as ${GPU_ARCHS}. Generating CUDA code for SM ${GPU_ARCHS}")
178178
separate_arguments(GPU_ARCHS)
179+
foreach(SM IN LISTS GPU_ARCHS)
180+
list(APPEND CMAKE_CUDA_ARCHITECTURES SM)
181+
endforeach()
179182
else()
180-
list(APPEND GPU_ARCHS
181-
75
182-
)
183-
184-
find_file(IS_L4T_NATIVE nv_tegra_release PATHS /env/)
185-
set (IS_L4T_CROSS "False")
186-
if (DEFINED ENV{IS_L4T_CROSS})
187-
set(IS_L4T_CROSS $ENV{IS_L4T_CROSS})
183+
list(APPEND CMAKE_CUDA_ARCHITECTURES 72 75 80 86 87 89 90)
184+
185+
if(CUDA_VERSION VERSION_GREATER_EQUAL 12.8)
186+
list(APPEND CMAKE_CUDA_ARCHITECTURES 100 120)
188187
endif()
189188

190-
if (IS_L4T_NATIVE OR ${IS_L4T_CROSS} STREQUAL "True")
191-
# Only Orin (SM87) supported
192-
list(APPEND GPU_ARCHS 87)
193-
endif()
194-
195-
if (CUDA_VERSION VERSION_GREATER_EQUAL 11.0)
196-
# Ampere GPU (SM80) support is only available in CUDA versions > 11.0
197-
list(APPEND GPU_ARCHS 80)
198-
endif()
199-
if (CUDA_VERSION VERSION_GREATER_EQUAL 11.1)
200-
list(APPEND GPU_ARCHS 86)
201-
endif()
202-
203-
message(STATUS "GPU_ARCHS is not defined. Generating CUDA code for default SMs: ${GPU_ARCHS}")
189+
message(STATUS "GPU_ARCHS is not defined. Generating CUDA code for default SMs: ${CMAKE_CUDA_ARCHITECTURES}")
204190
endif()
205191
set(BERT_GENCODES)
206192
# Generate SASS for each architecture
207-
foreach(arch ${GPU_ARCHS})
193+
foreach(arch ${CMAKE_CUDA_ARCHITECTURES})
208194
if (${arch} GREATER_EQUAL 75)
209195
set(BERT_GENCODES "${BERT_GENCODES} -gencode arch=compute_${arch},code=sm_${arch}")
210196
endif()
211197
set(GENCODES "${GENCODES} -gencode arch=compute_${arch},code=sm_${arch}")
212198
endforeach()
213199

214200
# Generate PTX for the last architecture in the list.
215-
list(GET GPU_ARCHS -1 LATEST_SM)
201+
list(GET CMAKE_CUDA_ARCHITECTURES -1 LATEST_SM)
216202
set(GENCODES "${GENCODES} -gencode arch=compute_${LATEST_SM},code=compute_${LATEST_SM}")
217203
if (${LATEST_SM} GREATER_EQUAL 75)
218204
set(BERT_GENCODES "${BERT_GENCODES} -gencode arch=compute_${LATEST_SM},code=compute_${LATEST_SM}")

README.md

+168-147
Large diffs are not rendered by default.

VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
10.8.0.43
1+
10.9.0.34

cmake/toolchains/cmake_aarch64_cross.toolchain

+2
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,5 @@ set(CMAKE_CUDA_COMPILER_FORCED TRUE)
5353
set(CUDA_LIBS -L${CUDA_ROOT}/lib)
5454

5555
set(ADDITIONAL_PLATFORM_LIB_FLAGS ${CUDA_LIBS} -lcublas -lcudart -lstdc++ -lm)
56+
57+
link_directories(${CUDA_ROOT}/lib)

demo/BERT/README.md

+345-313
Large diffs are not rendered by default.

demo/DeBERTa/README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ Note that the performance gap between BERT's self-attention and DeBERTa's disent
7575
## Environment Setup
7676
It is recommended to use docker for reproducing the following steps. Follow the setup steps in TensorRT OSS [README](https://github.com/NVIDIA/TensorRT#setting-up-the-build-environment) to build and launch the container and build OSS:
7777

78-
**Example: Ubuntu 20.04 on x86-64 with cuda-12.5 (default)**
78+
**Example: Ubuntu 20.04 on x86-64 with cuda-12.8 (default)**
7979
```bash
8080
# Download this TensorRT OSS repo
8181
git clone -b main https://github.com/nvidia/TensorRT TensorRT
@@ -84,10 +84,10 @@ git submodule update --init --recursive
8484

8585
## at root of TensorRT OSS
8686
# build container
87-
./docker/build.sh --file docker/ubuntu-20.04.Dockerfile --tag tensorrt-ubuntu20.04-cuda12.5
87+
./docker/build.sh --file docker/ubuntu-20.04.Dockerfile --tag tensorrt-ubuntu20.04-cuda12.8
8888

8989
# launch container
90-
./docker/launch.sh --tag tensorrt-ubuntu20.04-cuda12.5 --gpus all
90+
./docker/launch.sh --tag tensorrt-ubuntu20.04-cuda12.8 --gpus all
9191

9292
## now inside container
9393
# build OSS (only required for pre-8.4.3 TensorRT versions)

demo/Diffusion/.gitignore

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
__pycache__/
2-
onnx/*.onnx
3-
engine/*.plan
4-
output/*.png
2+
onnx/
3+
engine/
4+
output/
5+
pytorch_model/
6+
artifacts_cache/

demo/Diffusion/README.md

+40-53
Large diffs are not rendered by default.

demo/Diffusion/demo_controlnet.py

+53-19
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,14 @@
2222
from cuda import cudart
2323
from PIL import Image
2424

25-
from stable_diffusion_pipeline import StableDiffusionPipeline
26-
from utilities import PIPELINE_TYPE, TRT_LOGGER, add_arguments, download_image, process_pipeline_args
25+
from demo_diffusion import dd_argparse
26+
from demo_diffusion import image as image_module
27+
from demo_diffusion import pipeline as pipeline_module
28+
2729

2830
def parseArgs():
2931
parser = argparse.ArgumentParser(description="Options for Stable Diffusion ControlNet Demo", conflict_handler='resolve')
30-
parser = add_arguments(parser)
32+
parser = dd_argparse.add_arguments(parser)
3133
parser.add_argument('--scheduler', type=str, default="UniPC", choices=["DDIM", "DPM", "EulerA", "LMSD", "PNDM", "UniPC"], help="Scheduler for diffusion process")
3234
parser.add_argument('--input-image', nargs = '+', type=str, default=[], help="Path to the input image/images already prepared for ControlNet modality. For example: canny edged image for canny ControlNet, not just regular rgb image")
3335
parser.add_argument('--controlnet-type', nargs='+', type=str, default=["canny"], help="Controlnet type, can be `None`, `str` or `str` list from ['canny', 'depth', 'hed', 'mlsd', 'normal', 'openpose', 'scribble', 'seg']")
@@ -41,15 +43,15 @@ def parseArgs():
4143
# Controlnet configuration
4244
if not isinstance(args.controlnet_type, list):
4345
raise ValueError(f"`--controlnet-type` must be of type `str` or `str` list, but is {type(args.controlnet_type)}")
44-
46+
4547
# Controlnet configuration
4648
if not isinstance(args.controlnet_scale, list):
4749
raise ValueError(f"`--controlnet-scale`` must be of type `float` or `float` list, but is {type(args.controlnet_scale)}")
48-
50+
4951
# Check number of ControlNets to ControlNet scales
5052
if len(args.controlnet_type) != len(args.controlnet_scale):
5153
raise ValueError(f"Numbers of ControlNets {len(args.controlnet_type)} should be equal to number of ControlNet scales {len(args.controlnet_scale)}.")
52-
54+
5355
# Convert controlnet scales to tensor
5456
controlnet_scale = torch.FloatTensor(args.controlnet_scale)
5557

@@ -61,48 +63,80 @@ def parseArgs():
6163
else:
6264
for controlnet in args.controlnet_type:
6365
if controlnet == "canny":
64-
canny_image = download_image("https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png")
65-
canny_image = controlnet_aux.CannyDetector()(canny_image)
66+
if args.version == "xl-1.0":
67+
canny_image = image_module.download_image(
68+
"https://huggingface.co/diffusers/controlnet-canny-sdxl-1.0/resolve/main/out_bird.png"
69+
)
70+
# "out_bird.png" has 5 images combined in a row. We pick the first image which is the input image.
71+
canny_image = canny_image.crop((0, 0, canny_image.width / 5, canny_image.height))
72+
elif args.version == "1.5":
73+
canny_image = image_module.download_image(
74+
"https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png"
75+
)
76+
canny_image = controlnet_aux.CannyDetector()(canny_image)
77+
else:
78+
raise ValueError(
79+
f"This demo supports ControlNets for v1.5 and SDXL base pipelines only. Version provided: {args.version}"
80+
)
6681
input_images.append(canny_image.resize((args.height, args.width)))
6782
elif controlnet == "normal":
68-
normal_image = download_image("https://huggingface.co/lllyasviel/sd-controlnet-normal/resolve/main/images/toy.png")
83+
normal_image = image_module.download_image(
84+
"https://huggingface.co/lllyasviel/sd-controlnet-normal/resolve/main/images/toy.png"
85+
)
6986
normal_image = controlnet_aux.NormalBaeDetector.from_pretrained("lllyasviel/Annotators")(normal_image)
7087
input_images.append(normal_image.resize((args.height, args.width)))
7188
elif controlnet == "depth":
72-
depth_image = download_image("https://huggingface.co/lllyasviel/sd-controlnet-depth/resolve/main/images/stormtrooper.png")
89+
depth_image = image_module.download_image(
90+
"https://huggingface.co/lllyasviel/sd-controlnet-depth/resolve/main/images/stormtrooper.png"
91+
)
7392
depth_image = controlnet_aux.LeresDetector.from_pretrained("lllyasviel/Annotators")(depth_image)
7493
input_images.append(depth_image.resize((args.height, args.width)))
7594
elif controlnet == "hed":
76-
hed_image = download_image("https://huggingface.co/lllyasviel/sd-controlnet-hed/resolve/main/images/man.png")
95+
hed_image = image_module.download_image(
96+
"https://huggingface.co/lllyasviel/sd-controlnet-hed/resolve/main/images/man.png"
97+
)
7798
hed_image = controlnet_aux.HEDdetector.from_pretrained("lllyasviel/Annotators")(hed_image)
7899
input_images.append(hed_image.resize((args.height, args.width)))
79100
elif controlnet == "mlsd":
80-
mlsd_image = download_image("https://huggingface.co/lllyasviel/sd-controlnet-mlsd/resolve/main/images/room.png")
101+
mlsd_image = image_module.download_image(
102+
"https://huggingface.co/lllyasviel/sd-controlnet-mlsd/resolve/main/images/room.png"
103+
)
81104
mlsd_image = controlnet_aux.MLSDdetector.from_pretrained("lllyasviel/Annotators")(mlsd_image)
82105
input_images.append(mlsd_image.resize((args.height, args.width)))
83106
elif controlnet == "openpose":
84-
openpose_image = download_image("https://huggingface.co/lllyasviel/sd-controlnet-openpose/resolve/main/images/pose.png")
107+
openpose_image = image_module.download_image(
108+
"https://huggingface.co/lllyasviel/sd-controlnet-openpose/resolve/main/images/pose.png"
109+
)
85110
openpose_image = controlnet_aux.OpenposeDetector.from_pretrained("lllyasviel/Annotators")(openpose_image)
86111
input_images.append(openpose_image.resize((args.height, args.width)))
87112
elif controlnet == "scribble":
88-
scribble_image = download_image("https://huggingface.co/lllyasviel/sd-controlnet-scribble/resolve/main/images/bag.png")
113+
scribble_image = image_module.download_image(
114+
"https://huggingface.co/lllyasviel/sd-controlnet-scribble/resolve/main/images/bag.png"
115+
)
89116
scribble_image = controlnet_aux.HEDdetector.from_pretrained("lllyasviel/Annotators")(scribble_image, scribble=True)
90117
input_images.append(scribble_image.resize((args.height, args.width)))
91118
elif controlnet == "seg":
92-
seg_image = download_image("https://huggingface.co/lllyasviel/sd-controlnet-seg/resolve/main/images/house.png")
119+
seg_image = image_module.download_image(
120+
"https://huggingface.co/lllyasviel/sd-controlnet-seg/resolve/main/images/house.png"
121+
)
93122
seg_image = controlnet_aux.SamDetector.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints")(seg_image)
94123
input_images.append(seg_image.resize((args.height, args.width)))
95124
else:
96125
raise ValueError(f"You should implement the conditonal image of this controlnet: {controlnet}")
97126
assert len(input_images) > 0
98127

99-
kwargs_init_pipeline, kwargs_load_engine, args_run_demo = process_pipeline_args(args)
128+
kwargs_init_pipeline, kwargs_load_engine, args_run_demo = dd_argparse.process_pipeline_args(args)
100129

101130
# Initialize demo
102-
demo = StableDiffusionPipeline(
103-
pipeline_type=PIPELINE_TYPE.CONTROLNET,
131+
demo = pipeline_module.StableDiffusionPipeline(
132+
pipeline_type=(
133+
pipeline_module.PIPELINE_TYPE.CONTROLNET
134+
if args.version != "xl-1.0"
135+
else pipeline_module.PIPELINE_TYPE.XL_CONTROLNET
136+
),
104137
controlnets=args.controlnet_type,
105-
**kwargs_init_pipeline)
138+
**kwargs_init_pipeline,
139+
)
106140

107141
# Load TensorRT engines and pytorch modules
108142
demo.loadEngines(
File renamed without changes.

0 commit comments

Comments
 (0)