From f2777a33f1d1ab21890bfe004acef5f5173cb53a Mon Sep 17 00:00:00 2001 From: "Zewen (Evan) Li" Date: Thu, 25 Apr 2024 15:05:24 -0700 Subject: [PATCH 1/4] upgrade to trt-10-GA --- .github/scripts/install-torch-tensorrt.sh | 6 +++--- .github/workflows/build-test.yml | 16 +++++++-------- README.md | 2 +- WORKSPACE | 20 +++++-------------- packaging/pre_build_script.sh | 6 +++--- py/requirements.txt | 2 +- .../WORKSPACE.x86_64.release.rhel.tmpl | 8 ++++---- 7 files changed, 25 insertions(+), 35 deletions(-) diff --git a/.github/scripts/install-torch-tensorrt.sh b/.github/scripts/install-torch-tensorrt.sh index b2b19b139d..a7fa682139 100644 --- a/.github/scripts/install-torch-tensorrt.sh +++ b/.github/scripts/install-torch-tensorrt.sh @@ -7,9 +7,9 @@ ${CONDA_RUN} python -m pip install pyyaml mpmath==1.3.0 export TRT_VERSION=$(${CONDA_RUN} python -c "import versions; versions.tensorrt_version()") # Install TensorRT manually -wget -q -P /opt/torch-tensorrt-builds/ https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.0/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz -tar -xzf /opt/torch-tensorrt-builds/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz -C /opt/torch-tensorrt-builds/ -python -m pip install /opt/torch-tensorrt-builds/TensorRT-10.0.0.6/python/tensorrt-10.0.0b6-cp${PYTHON_VERSION//./}-none-linux_x86_64.whl +wget -q -P /opt/torch-tensorrt-builds/ https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz +tar -xzf /opt/torch-tensorrt-builds/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz -C /opt/torch-tensorrt-builds/ +python -m pip install /opt/torch-tensorrt-builds/TensorRT-10.0.1.6/python/tensorrt-10.0.1-cp${PYTHON_VERSION//./}-none-linux_x86_64.whl # Install Torch-TensorRT ${CONDA_RUN} python -m pip install /opt/torch-tensorrt-builds/torch_tensorrt*+${CU_VERSION}*.whl diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index f4d39bd056..784eeceb6f 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -21,7 +21,7 @@ jobs: os: linux test-infra-repository: pytorch/test-infra test-infra-ref: main - channel: test + channel: release with-rocm: false with-cpu: false @@ -78,7 +78,7 @@ jobs: script: | export USE_HOST_DEPS=1 export LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH - export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.0.6/lib:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH pushd . cd tests/modules # Don't use requirements.txt here as it contains tensorrt and torch which should have been installed by now. @@ -115,7 +115,7 @@ jobs: pre-script: ${{ matrix.pre-script }} script: | export USE_HOST_DEPS=1 - export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.0.6/lib:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH pushd . cd tests/py/dynamo ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver @@ -144,7 +144,7 @@ jobs: pre-script: ${{ matrix.pre-script }} script: | export USE_HOST_DEPS=1 - export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.0.6/lib:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH pushd . cd tests/py/dynamo ${CONDA_RUN} python -m pip install --pre pytest timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver @@ -174,7 +174,7 @@ jobs: pre-script: ${{ matrix.pre-script }} script: | export USE_HOST_DEPS=1 - export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.0.6/lib:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH pushd . cd tests/py/dynamo ${CONDA_RUN} python -m pip install --pre pytest timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver @@ -203,7 +203,7 @@ jobs: pre-script: ${{ matrix.pre-script }} script: | export USE_HOST_DEPS=1 - export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.0.6/lib:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH pushd . cd tests/py/dynamo ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver @@ -234,7 +234,7 @@ jobs: pre-script: ${{ matrix.pre-script }} script: | export USE_HOST_DEPS=1 - export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.0.6/lib:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH pushd . cd tests/py/dynamo ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver @@ -264,7 +264,7 @@ jobs: pre-script: ${{ matrix.pre-script }} script: | export USE_HOST_DEPS=1 - export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.0.6/lib:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH pushd . cd tests/py/core ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver diff --git a/README.md b/README.md index eecae762cf..248a58292a 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,7 @@ These are the following dependencies used to verify the testcases. Torch-TensorR - Libtorch 2.3.0 (built with CUDA 12.1) - CUDA 12.1 - cuDNN 8.9.5 -- TensorRT 10.0.0.6 +- TensorRT 10.0.1.6 ## Prebuilt Binaries and Wheel files diff --git a/WORKSPACE b/WORKSPACE index edc5c9a050..66971f77bc 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -54,37 +54,27 @@ http_archive( name = "libtorch", build_file = "@//third_party/libtorch:BUILD", strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/test/cu121/libtorch-cxx11-abi-shared-with-deps-2.3.0%2Bcu121.zip"], + urls = ["https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.3.0%2Bcu121.zip"], ) http_archive( name = "libtorch_pre_cxx11_abi", build_file = "@//third_party/libtorch:BUILD", strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/test/cu121/libtorch-shared-with-deps-2.3.0%2Bcu121.zip"], + urls = ["https://download.pytorch.org/libtorch/cu121/libtorch-shared-with-deps-2.3.0%2Bcu121.zip"], ) # Download these tarballs manually from the NVIDIA website # Either place them in the distdir directory in third_party and use the --distdir flag # or modify the urls to "file:////.tar.gz -http_archive( - name = "cudnn", - build_file = "@//third_party/cudnn/archive:BUILD", - sha256 = "2a2eb89a2ab51071151c6082f1e816c702167a711a9372f9f73a7b5c4b06e01a", - strip_prefix = "cudnn-linux-x86_64-8.9.5.30_cuda12-archive", - urls = [ - "https://developer.nvidia.com/downloads/compute/cudnn/secure/8.9.5/local_installers/12.x/cudnn-linux-x86_64-8.9.5.30_cuda12-archive.tar.xz", - ], -) - http_archive( name = "tensorrt", build_file = "@//third_party/tensorrt/archive:BUILD", - sha256 = "0f8157a5fc5329943b338b893591373350afa90ca81239cdadd7580cd1eba254", - strip_prefix = "TensorRT-8.6.1.6", + sha256 = "a5cd2863793d69187ce4c73b2fffc1f470ff28cfd91e3640017e53b8916453d5", + strip_prefix = "TensorRT-10.0.1.6", urls = [ - "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.6.1/tars/TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-12.0.tar.gz", + "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz", ], ) diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh index 8f5d1d8acc..349b273a52 100755 --- a/packaging/pre_build_script.sh +++ b/packaging/pre_build_script.sh @@ -4,9 +4,9 @@ python3 -m pip install pyyaml yum install -y ninja-build gettext TRT_VERSION=$(python3 -c "import versions; versions.tensorrt_version()") -wget -q -P /opt/torch-tensorrt-builds/ https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.0/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz -tar -xzf /opt/torch-tensorrt-builds/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz -C /opt/torch-tensorrt-builds/ -export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.0.6/lib:$LD_LIBRARY_PATH +wget -q -P /opt/torch-tensorrt-builds/ https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz +tar -xzf /opt/torch-tensorrt-builds/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz -C /opt/torch-tensorrt-builds/ +export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH wget https://github.com/bazelbuild/bazelisk/releases/download/v1.17.0/bazelisk-linux-amd64 \ && mv bazelisk-linux-amd64 /usr/bin/bazel \ && chmod +x /usr/bin/bazel diff --git a/py/requirements.txt b/py/requirements.txt index 291f7b7457..5ed9d809f6 100644 --- a/py/requirements.txt +++ b/py/requirements.txt @@ -1,7 +1,7 @@ numpy packaging pybind11==2.6.2 ---extra-index-url https://download.pytorch.org/whl/test/cu121 +--extra-index-url https://download.pytorch.org/whl/cu121 torch==2.3.0 torchvision==0.18.0 --extra-index-url https://pypi.ngc.nvidia.com diff --git a/toolchains/ci_workspaces/WORKSPACE.x86_64.release.rhel.tmpl b/toolchains/ci_workspaces/WORKSPACE.x86_64.release.rhel.tmpl index cad54b1707..7098165c33 100644 --- a/toolchains/ci_workspaces/WORKSPACE.x86_64.release.rhel.tmpl +++ b/toolchains/ci_workspaces/WORKSPACE.x86_64.release.rhel.tmpl @@ -58,21 +58,21 @@ http_archive( name = "libtorch", build_file = "@//third_party/libtorch:BUILD", strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/test/cu121/libtorch-cxx11-abi-shared-with-deps-2.3.0%2Bcu121.zip"], + urls = ["https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.3.0%2Bcu121.zip"], ) http_archive( name = "libtorch_pre_cxx11_abi", build_file = "@//third_party/libtorch:BUILD", strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/test/cu121/libtorch-shared-with-deps-2.3.0%2Bcu121.zip"], + urls = ["https://download.pytorch.org/libtorch/cu121/libtorch-shared-with-deps-2.3.0%2Bcu121.zip"], ) http_archive( name = "tensorrt", - urls = ["file:////opt/torch-tensorrt-builds/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz",], + urls = ["file:////opt/torch-tensorrt-builds/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz",], build_file = "@//third_party/tensorrt/archive:BUILD", - strip_prefix = "TensorRT-10.0.0.6" + strip_prefix = "TensorRT-10.0.1.6" ) # ######################################################################### From 926149a07ebbc77694d90cce3ce8a42b4eb3ceca Mon Sep 17 00:00:00 2001 From: "Zewen (Evan) Li" Date: Thu, 25 Apr 2024 17:24:28 -0700 Subject: [PATCH 2/4] fix bug: Command '['ninja', '-v']' returned non-zero exit status 1. --- py/torch_tensorrt/csrc/torch_tensorrt_py.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp b/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp index 81814486f6..e9ad8b159c 100644 --- a/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp +++ b/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp @@ -2,7 +2,7 @@ #include "pybind11/stl.h" #include "ATen/core/jit_type.h" -#include "NvInferRuntimeBase.h" +#include "NvInferRuntime.h" #include "Python.h" #include "core/compiler.h" #include "core/conversion/conversion.h" From 756757f9ce646b88b784fb2a9baa632a0dc4611b Mon Sep 17 00:00:00 2001 From: "Zewen (Evan) Li" Date: Fri, 26 Apr 2024 11:47:15 -0700 Subject: [PATCH 3/4] remove cudnn dependency --- README.md | 1 - WORKSPACE | 6 ------ dev_dep_versions.yml | 3 +-- py/requirements.txt | 2 +- 4 files changed, 2 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 248a58292a..dd77385ef5 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,6 @@ These are the following dependencies used to verify the testcases. Torch-TensorR - Bazel 5.2.0 - Libtorch 2.3.0 (built with CUDA 12.1) - CUDA 12.1 -- cuDNN 8.9.5 - TensorRT 10.0.1.6 ## Prebuilt Binaries and Wheel files diff --git a/WORKSPACE b/WORKSPACE index 66971f77bc..f3b522352a 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -101,12 +101,6 @@ http_archive( # build_file = "third_party/libtorch/BUILD" #) -#new_local_repository( -# name = "cudnn", -# path = "/usr/", -# build_file = "@//third_party/cudnn/local:BUILD" -#) - #new_local_repository( # name = "tensorrt", # path = "/usr/", diff --git a/dev_dep_versions.yml b/dev_dep_versions.yml index 4bbfe9d188..c74d162146 100644 --- a/dev_dep_versions.yml +++ b/dev_dep_versions.yml @@ -1,4 +1,3 @@ __version__: "2.3.0" __cuda_version__: "12.1" -__cudnn_version__: "8.9" -__tensorrt_version__: "10.0.0.6" +__tensorrt_version__: "10.0.1.6" diff --git a/py/requirements.txt b/py/requirements.txt index 5ed9d809f6..58fffa6728 100644 --- a/py/requirements.txt +++ b/py/requirements.txt @@ -6,4 +6,4 @@ torch==2.3.0 torchvision==0.18.0 --extra-index-url https://pypi.ngc.nvidia.com pyyaml -tensorrt +tensorrt==10.0.1.6 \ No newline at end of file From 9e06604f297695bd759be6449fc067dddec04ed9 Mon Sep 17 00:00:00 2001 From: "Zewen (Evan) Li" Date: Fri, 26 Apr 2024 12:18:57 -0700 Subject: [PATCH 4/4] recover dev_dep_versions.yml --- dev_dep_versions.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/dev_dep_versions.yml b/dev_dep_versions.yml index c74d162146..2688c478f2 100644 --- a/dev_dep_versions.yml +++ b/dev_dep_versions.yml @@ -1,3 +1,4 @@ __version__: "2.3.0" __cuda_version__: "12.1" +__cudnn_version__: "8.9" __tensorrt_version__: "10.0.1.6"