pytorch · zewenli98 · Mar 31, 2025 · Mar 31, 2025 · Apr 1, 2025 · Apr 1, 2025
diff --git a/.github/scripts/filter-matrix.py b/.github/scripts/filter-matrix.py
@@ -3,8 +3,9 @@
 import argparse
 import json
 import sys
+from typing import List
 
-disabled_python_versions = "3.13"
+disabled_python_versions: List[str] = []
 
 
 def main(args: list[str]) -> None:

diff --git a/.github/scripts/generate-tensorrt-test-matrix.py b/.github/scripts/generate-tensorrt-test-matrix.py
@@ -28,6 +28,10 @@
 # please update the future tensorRT version you want to test here
 TENSORRT_VERSIONS_DICT = {
     "windows": {
+        "10.3.0": {
+            "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/zip/TensorRT-10.3.0.26.Windows.win10.cuda-12.5.zip",
+            "strip_prefix": "TensorRT-10.3.0.26",
+        },
         "10.7.0": {
             "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/zip/TensorRT-10.7.0.23.Windows.win10.cuda-12.6.zip",
             "strip_prefix": "TensorRT-10.7.0.23",
@@ -42,6 +46,10 @@
         },
     },
     "linux": {
+        "10.3.0": {
+            "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/tars/TensorRT-10.3.0.26.Linux.x86_64-gnu.cuda-12.5.tar.gz",
+            "strip_prefix": "TensorRT-10.3.0.26",
+        },
         "10.7.0": {
             "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/tars/TensorRT-10.7.0.23.Linux.x86_64-gnu.cuda-12.6.tar.gz",
             "strip_prefix": "TensorRT-10.7.0.23",

diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py
@@ -18,15 +18,16 @@
 import sys
 from typing import Any, Callable, Dict, List, Optional, Tuple
 
+PYTHON_VERSIONS_FOR_PR_BUILD = ["3.11"]
 PYTHON_ARCHES_DICT = {
-    "nightly": ["3.9", "3.10", "3.11", "3.12"],
-    "test": ["3.9", "3.10", "3.11", "3.12"],
-    "release": ["3.9", "3.10", "3.11", "3.12"],
+    "nightly": ["3.9", "3.10", "3.11", "3.12", "3.13"],
+    "test": ["3.9", "3.10", "3.11", "3.12", "3.13"],
+    "release": ["3.9", "3.10", "3.11", "3.12", "3.13"],
 }
 CUDA_ARCHES_DICT = {
     "nightly": ["11.8", "12.6", "12.8"],
     "test": ["11.8", "12.6", "12.8"],
-    "release": ["11.8", "12.6", "12.8"],
+    "release": ["11.8", "12.4", "12.6"],
 }
 ROCM_ARCHES_DICT = {
     "nightly": ["6.1", "6.2"],
@@ -422,11 +423,6 @@ def generate_wheels_matrix(
         # Define default python version
         python_versions = list(PYTHON_ARCHES)
 
-        # If the list of python versions is set explicitly by the caller, stick with it instead
-        # of trying to add more versions behind the scene
-        if channel == NIGHTLY and (os in (LINUX, MACOS_ARM64, LINUX_AARCH64)):
-            python_versions += ["3.13"]
-
     if os == LINUX:
         # NOTE: We only build manywheel packages for linux
         package_type = "manywheel"
@@ -456,7 +452,7 @@ def generate_wheels_matrix(
             arches += [XPU]
 
     if limit_pr_builds:
-        python_versions = [python_versions[0]]
+        python_versions = PYTHON_VERSIONS_FOR_PR_BUILD
 
     global WHEEL_CONTAINER_IMAGES
 

diff --git a/.github/workflows/build-test-linux.yml b/.github/workflows/build-test-linux.yml
@@ -23,7 +23,6 @@ jobs:
       test-infra-ref: main
       with-rocm: false
       with-cpu: false
-      python-versions: '["3.11", "3.12", "3.10", "3.9"]'
 
   filter-matrix:
     needs: [generate-matrix]

diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml
@@ -23,7 +23,6 @@ jobs:
       test-infra-ref: main
       with-rocm: false
       with-cpu: false
-      python-versions: '["3.11", "3.12", "3.10", "3.9"]'
 
   substitute-runner:
     needs: generate-matrix

diff --git a/py/torch_tensorrt/dynamo/_refit.py b/py/torch_tensorrt/dynamo/_refit.py
@@ -109,7 +109,9 @@ def construct_refit_mapping(
 
 
 def construct_refit_mapping_from_weight_name_map(
-    weight_name_map: dict[Any, Any], state_dict: dict[Any, Any]
+    weight_name_map: dict[Any, Any],
+    state_dict: dict[Any, Any],
+    settings: CompilationSettings,
 ) -> dict[Any, Any]:
     engine_weight_map = {}
     for engine_weight_name, (sd_weight_name, np_weight_type) in weight_name_map.items():
@@ -120,7 +122,9 @@ def construct_refit_mapping_from_weight_name_map(
             # If weights is not in sd, we can leave it unchanged
             continue
         else:
-            engine_weight_map[engine_weight_name] = state_dict[sd_weight_name]
+            engine_weight_map[engine_weight_name] = state_dict[sd_weight_name].to(
+                to_torch_device(settings.device)
+            )
 
         engine_weight_map[engine_weight_name] = (
             engine_weight_map[engine_weight_name]
@@ -163,7 +167,7 @@ def _refit_single_trt_engine_with_gm(
                 "constant_mapping", {}
             )  # type: ignore
             mapping = construct_refit_mapping_from_weight_name_map(
-                weight_name_map, new_gm.state_dict()
+                weight_name_map, new_gm.state_dict(), settings
             )
             constant_mapping_with_type = {}
 

diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py
@@ -44,7 +44,7 @@
     get_trt_tensor,
     to_torch,
 )
-from torch_tensorrt.dynamo.utils import DYNAMIC_DIM, get_model_device, to_torch_device
+from torch_tensorrt.dynamo.utils import DYNAMIC_DIM, to_torch_device
 from torch_tensorrt.fx.observer import Observer
 from torch_tensorrt.logging import TRT_LOGGER
 
@@ -491,15 +491,10 @@ def _save_weight_mapping(self) -> None:
         _LOGGER.info("Building weight name mapping...")
         # Stage 1: Name mapping
         torch_device = to_torch_device(self.compilation_settings.device)
-        gm_is_on_cuda = get_model_device(self.module).type == "cuda"
-        if not gm_is_on_cuda:
-            # If the model original position is on CPU, move it GPU
-            sd = {
-                k: v.reshape(-1).to(torch_device)
-                for k, v in self.module.state_dict().items()
-            }
-        else:
-            sd = {k: v.reshape(-1) for k, v in self.module.state_dict().items()}
+        sd = {
+            k: v.reshape(-1).to(torch_device)
+            for k, v in self.module.state_dict().items()
+        }
         weight_name_map: dict[str, Any] = {}
         np_map = {}
         constant_mapping = {}

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [build-system]
 requires = [
-    "setuptools>=68.0.0",
+    "setuptools>=77.0.0",
     "packaging>=23.1",
     "wheel>=0.40.0",
     "ninja>=1.11.0",

diff --git a/setup.py b/setup.py
@@ -18,12 +18,12 @@
 import torch
 import yaml
 from setuptools import Extension, find_namespace_packages, setup
+from setuptools.command.bdist_wheel import bdist_wheel
 from setuptools.command.build_ext import build_ext
 from setuptools.command.develop import develop
 from setuptools.command.editable_wheel import editable_wheel
 from setuptools.command.install import install
 from torch.utils.cpp_extension import IS_WINDOWS, BuildExtension, CUDAExtension
-from wheel.bdist_wheel import bdist_wheel
 
 __version__: str = "0.0.0"
 __cuda_version__: str = "0.0"

diff --git a/tests/modules/custom_models.py b/tests/modules/custom_models.py
@@ -3,7 +3,6 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from transformers import BertConfig, BertModel, BertTokenizer
 
 
 # Sample Pool Model (for testing plugin serialization)
@@ -165,6 +164,8 @@ def forward(self, z: List[torch.Tensor]):
 
 
 def BertModule():
+    from transformers import BertConfig, BertModel, BertTokenizer
+
     enc = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
     text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
     tokenized_text = enc.tokenize(text)

diff --git a/tests/modules/hub.py b/tests/modules/hub.py
@@ -4,10 +4,7 @@
 import custom_models as cm
 import timm
 import torch
-import torch.nn as nn
-import torch.nn.functional as F
 import torchvision.models as models
-from transformers import BertConfig, BertModel, BertTokenizer
 
 torch.hub._validate_not_a_forked_repo = lambda a, b, c: True
 

diff --git a/tests/py/dynamo/models/test_model_refit.py b/tests/py/dynamo/models/test_model_refit.py
@@ -1,3 +1,4 @@
+import importlib
 import os
 import tempfile
 import unittest
@@ -21,7 +22,6 @@
     pre_export_lowering,
 )
 from torch_tensorrt.logging import TRT_LOGGER
-from transformers import BertModel
 
 assertions = unittest.TestCase()
 
@@ -238,8 +238,14 @@ def test_refit_one_engine_with_wrong_weightmap():
     not torch_trt.ENABLED_FEATURES.torch_tensorrt_runtime,
     "TorchScript Frontend is not available",
 )
+@unittest.skipIf(
+    not importlib.util.find_spec("transformers"),
+    "transformers is required to run this test",
+)
 @pytest.mark.unit
 def test_refit_one_engine_bert_with_weightmap():
+    from transformers import BertModel
+
     inputs = [
         torch.randint(0, 2, (1, 14), dtype=torch.int32).to("cuda"),
     ]
@@ -506,8 +512,14 @@ def test_refit_one_engine_without_weightmap():
     not torch_trt.ENABLED_FEATURES.torch_tensorrt_runtime,
     "TorchScript Frontend is not available",
 )
+@unittest.skipIf(
+    not importlib.util.find_spec("transformers"),
+    "transformers is required to run this test",
+)
 @pytest.mark.unit
 def test_refit_one_engine_bert_without_weightmap():
+    from transformers import BertModel
+
     inputs = [
         torch.randint(0, 2, (1, 14), dtype=torch.int32).to("cuda"),
     ]

diff --git a/tests/py/dynamo/models/test_models.py b/tests/py/dynamo/models/test_models.py
@@ -1,5 +1,5 @@
 # type: ignore
-
+import importlib
 import unittest
 
 import pytest
@@ -8,7 +8,6 @@
 import torch_tensorrt as torchtrt
 import torchvision.models as models
 from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity
-from transformers import BertModel
 
 assertions = unittest.TestCase()
 
@@ -109,10 +108,16 @@ def test_efficientnet_b0(ir):
 
 
 @pytest.mark.unit
+@unittest.skipIf(
+    not importlib.util.find_spec("transformers"),
+    "transformers is required to run this test",
+)
 def test_bert_base_uncased(ir):
+    from transformers import BertModel
+
     model = BertModel.from_pretrained("bert-base-uncased").cuda().eval()
-    input = torch.randint(0, 1, (1, 14), dtype=torch.int32).to("cuda")
-    input2 = torch.randint(0, 1, (1, 14), dtype=torch.int32).to("cuda")
+    input = torch.randint(0, 2, (1, 14), dtype=torch.int32).to("cuda")
+    input2 = torch.randint(0, 2, (1, 14), dtype=torch.int32).to("cuda")
 
     compile_spec = {
         "inputs": [

diff --git a/tests/py/dynamo/models/test_models_export.py b/tests/py/dynamo/models/test_models_export.py
@@ -10,7 +10,6 @@
 import torch_tensorrt as torchtrt
 import torchvision.models as models
 from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity
-from transformers import BertModel
 
 from packaging.version import Version
 
@@ -114,12 +113,18 @@ def test_efficientnet_b0(ir):
 
 
 @pytest.mark.unit
+@unittest.skipIf(
+    not importlib.util.find_spec("transformers"),
+    "transformers is required to run this test",
+)
 def test_bert_base_uncased(ir):
+    from transformers import BertModel
+
     model = (
         BertModel.from_pretrained("bert-base-uncased", return_dict=False).cuda().eval()
     )
-    input = torch.randint(0, 1, (1, 14), dtype=torch.int32).to("cuda")
-    input2 = torch.randint(0, 1, (1, 14), dtype=torch.int32).to("cuda")
+    input = torch.randint(0, 2, (1, 14), dtype=torch.int32).to("cuda")
+    input2 = torch.randint(0, 2, (1, 14), dtype=torch.int32).to("cuda")
 
     compile_spec = {
         "inputs": [

diff --git a/tests/py/requirements.txt b/tests/py/requirements.txt
@@ -8,6 +8,6 @@ pytest>=8.2.1
 pytest-xdist>=3.6.1
 pyyaml
 timm>=1.0.3
-transformers==4.40.2
-nvidia-modelopt[deploy,hf,torch]~=0.17.0
+transformers==4.49.0
+nvidia-modelopt[deploy,hf,torch]~=0.17.0; python_version < "3.13"
 --extra-index-url https://pypi.nvidia.com