ONNX-TensorRT 10.7-GA Release (#1004)

kevinch-nv · web-flow · commit 9c69a24bc2e2 · 2024-12-03T14:56:13.000-08:00
Signed-off-by: Kevin Chen &lt;kevinch@nvidia.com&gt;
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -28,7 +28,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}")
 # Version information
 #--------------------------------------------------
 set(ONNX2TRT_MAJOR 10)
-set(ONNX2TRT_MINOR 6)
+set(ONNX2TRT_MINOR 7)
 set(ONNX2TRT_PATCH 0)
 set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version")
 
diff --git a/ImporterContext.hpp b/ImporterContext.hpp
@@ -378,6 +378,13 @@ class ImporterContext
     {
         return mBaseNameScopeStack.size();
     }
+
+    // Returns if the underlying network was created with the KSTRONGLY_TYPED flag.
+    bool const isStronglyTyped()
+    {
+        assert(mNetwork != nullptr);
+        return mNetwork->getFlag(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED);
+    }
 };
 
 typedef std::vector<TensorOrWeights> NodeOutputs;
diff --git a/ModelImporter.cpp b/ModelImporter.cpp
@@ -108,6 +108,18 @@ static std::string makeErrorExplanation(std::exception const& e, std::string con
     return result.str();
 }
 
+bool isNodeInPluginRegistry(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node)
+{
+    OnnxAttrs attrs(node, ctx);
+    std::string const pluginVersion{attrs.get<std::string>("plugin_version", "1")};
+    std::string const pluginNamespace{attrs.get<std::string>("plugin_namespace", "")};
+    LOG_INFO("Checking if node can be treated as plugin: " << node.op_type() << ", plugin_version: " << pluginVersion
+                                                           << ", plugin_namespace: " << pluginNamespace);
+    nvinfer1::IPluginCreatorInterface* creator
+        = importPluginCreator(ctx, node.op_type(), pluginVersion, pluginNamespace);
+    return creator;
+}
+
 void parseNode(
     ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, bool deserializingINetwork)
 {
@@ -158,8 +170,17 @@ void parseNode(
     }
     else if (ctx->localFunctions().count(nodeType))
     {
-        LOG_INFO("Found regisitered local function: " << nodeType << ". Importing as a local function.");
-        importFunc = &opImporters.at("LocalFunctionImporter");
+        // Let plugin take precedence over local function. So first check if this can be dispatched to a plugin.
+        if (isNodeInPluginRegistry(ctx, node))
+        {
+            LOG_INFO("Found registered plugin: " << nodeType << ". Importing local function as a plugin.");
+            importFunc = &opImporters.at("FallbackPluginImporter");
+        }
+        else
+        {
+            LOG_INFO("Found registered local function: " << nodeType << ". Importing as a local function.");
+            importFunc = &opImporters.at("LocalFunctionImporter");
+        }
     }
     else
     {
@@ -295,8 +316,17 @@ void parseNodeStaticCheck(
     }
     else if (ctx->localFunctions().count(nodeType))
     {
-        LOG_INFO("Found regisitered local function: " << nodeType << ". Checking as a local function.");
-        checkerFunc = &opCheckers.at("LocalFunctionImporter");
+        // Let plugin take precedence over local function. So first check if this can be dispatched to a plugin.
+        if (isNodeInPluginRegistry(ctx, node))
+        {
+            LOG_INFO("Found registered plugin: " << nodeType << ". Importing local function as a plugin.");
+            checkerFunc = &opCheckers.at("FallbackPluginImporter");
+        }
+        else
+        {
+            LOG_INFO("Found registered local function: " << nodeType << ". Importing as a local function.");
+            checkerFunc = &opCheckers.at("LocalFunctionImporter");
+        }
     }
     else
     {
@@ -447,17 +477,6 @@ void importLocalFunctions(ImporterContext* ctx, ::ONNX_NAMESPACE::ModelProto con
     }
 }
 
-// Internal helper function used for ONNXRT-TRT EP to filter out DDS nodes
-bool isDDSOp(char const* op_name)
-{
-    auto is = [op_name](char const* name) { return std::strcmp(op_name, name) == 0; };
-    if (is("NonMaxSuppression") || is("NonZero") || is("RoiAlign"))
-    {
-        return true;
-    }
-    return false;
-}
-
 std::pair<bool, ModelImporter::SubGraphSupportVector_t> ModelImporter::doSupportsModel(
     void const* serialized_onnx_model, size_t serialized_onnx_model_size, char const* model_path)
 {
@@ -524,13 +543,11 @@ std::pair<bool, ModelImporter::SubGraphSupportVector_t> ModelImporter::doSupport
     {
         ::ONNX_NAMESPACE::NodeProto const& node = model.graph().node(node_idx);
         // Add the node to the subgraph if:
-        //     1. It is not a node that requires DDS
-        //     2. It is not directly connected to an unsupported input
-        //     3. The importer function did not throw an assertion
-        bool unsupportedDDS = isDDSOp(node.op_type().c_str());
+        //     1. It is not directly connected to an unsupported input
+        //     2. The importer function did not throw an assertion
         bool unsupportedInput = (input_node.empty()) ? false : checkForInput(node);
         bool unsuccessfulParse = node_idx == error_node;
-        if (!unsupportedDDS && !unsupportedInput && !unsuccessfulParse)
+        if (!unsupportedInput && !unsuccessfulParse)
         {
             if (newSubGraph)
             {
diff --git a/NvOnnxParser.h b/NvOnnxParser.h
@@ -175,6 +175,9 @@ class IParserError
 //!
 //! \brief an object for parsing ONNX models into a TensorRT network definition
 //!
+//! \warning If the ONNX model has a graph output with the same name as a graph input,
+//!          the output will be renamed by prepending "__".
+//!
 //! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
 //!
 class IParser
diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia.
 
 ## Supported TensorRT Versions
 
-Development on the this branch is for the latest version of [TensorRT 10.6](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
+Development on the this branch is for the latest version of [TensorRT 10.7](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
 
 For previous versions of TensorRT, refer to their respective branches.
 
@@ -29,8 +29,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs
 ### Dependencies
 
  - [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases)
- - [TensorRT 10.6](https://developer.nvidia.com/tensorrt)
- - [TensorRT 10.6 open source libaries] (https://github.com/NVIDIA/TensorRT/)
+ - [TensorRT 10.7](https://developer.nvidia.com/tensorrt)
+ - [TensorRT 10.7 open source libaries] (https://github.com/NVIDIA/TensorRT/)
 
 ### Building
 
@@ -82,7 +82,7 @@ Refer to the link or run `polygraphy run -h` for more information on CLI options
 
 Python bindings for the ONNX-TensorRT parser are packaged in the shipped `.whl` files.
 
-TensorRT 10.6 supports ONNX release 1.17.0. Install it with:
+TensorRT 10.7 supports ONNX release 1.17.0. Install it with:
 
     python3 -m pip install onnx==1.17.0
 
diff --git a/RNNHelpers.cpp b/RNNHelpers.cpp
@@ -24,7 +24,7 @@ nvinfer1::ITensor* addRNNInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::Nod
 
     if (direction == "forward")
     {
-        iterationInput = unsqueezeTensor(ctx, node, *N_CHECK(loop->addIterator(*input)->getOutput(0)), std::vector<int>{0});
+        iterationInput = unsqueezeTensor(ctx, *N_CHECK(loop->addIterator(*input)->getOutput(0)), std::vector<int>{0});
 
         if (isRagged)
         {
@@ -38,7 +38,7 @@ nvinfer1::ITensor* addRNNInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::Nod
         nvinfer1::IIteratorLayer* reverseIterator = N_CHECK(loop->addIterator(*input));
         reverseIterator->setReverse(true);
         auto reverseIteratorOutput = N_CHECK(reverseIterator->getOutput(0));
-        iterationInput = unsqueezeTensor(ctx, node, *reverseIteratorOutput, std::vector<int>{0});
+        iterationInput = unsqueezeTensor(ctx, *reverseIteratorOutput, std::vector<int>{0});
         if (isRagged)
         {
             nvinfer1::ITensor* seqLens = &convertToTensor(inputs.at(sequenceLenIndex), ctx);
@@ -52,8 +52,8 @@ nvinfer1::ITensor* addRNNInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::Nod
         nvinfer1::IIteratorLayer* reverse = N_CHECK(loop->addIterator(*input));
         reverse->setReverse(true);
 
-        auto forwardInput = unsqueezeTensor(ctx, node, *N_CHECK(forward->getOutput(0)), std::vector<int>{0});
-        auto reverseInput = unsqueezeTensor(ctx, node, *N_CHECK(reverse->getOutput(0)), std::vector<int>{0});
+        auto forwardInput = unsqueezeTensor(ctx, *N_CHECK(forward->getOutput(0)), std::vector<int>{0});
+        auto reverseInput = unsqueezeTensor(ctx, *N_CHECK(reverse->getOutput(0)), std::vector<int>{0});
         if (isRagged)
         {
             nvinfer1::ITensor* seqLens = &convertToTensor(inputs.at(sequenceLenIndex), ctx);
@@ -165,16 +165,16 @@ nvinfer1::ITensor* getRaggedMask(ImporterContext* ctx, const ::ONNX_NAMESPACE::N
     nvinfer1::ITensor* seqMask;
     if (reverse)
     {
-        counter
-            = getElementWiseResult(ctx, *unsqueezeTensor(ctx, node, *maxLen, {0}), *counter, nvinfer1::ElementWiseOperation::kSUB);
+        counter = getElementWiseResult(
+            ctx, *unsqueezeTensor(ctx, *maxLen, {0}), *counter, nvinfer1::ElementWiseOperation::kSUB);
         seqMask = getElementWiseResult(ctx, *seqLens, *counter, nvinfer1::ElementWiseOperation::kLESS);
         seqMask = getUnaryResult(ctx, *seqMask, nvinfer1::UnaryOperation::kNOT);
     }
     else
     {
         seqMask = getElementWiseResult(ctx, *counter, *seqLens, nvinfer1::ElementWiseOperation::kLESS);
     }
-    return unsqueezeTensor(ctx, node, *seqMask, std::vector<int>{0, 2});
+    return unsqueezeTensor(ctx, *seqMask, std::vector<int>{0, 2});
 }
 
 } // namespace onnx2trt
diff --git a/docs/Changelog.md b/docs/Changelog.md
@@ -2,6 +2,13 @@
 
 # ONNX-TensorRT Changelog
 
+# TensorRT 10.7 GA Release - 2024-12-3
+For more details, see the 10.7 GA release notes
+
+- Now prioritizes using plugins over local functions when a corresponding plugin is available in the registry
+- Added dynamic axes support for `Squeeze` and `Unsqueeze` operations
+- Added support for parsing mixed-precision `BatchNormalization` nodes in strongly-typed mode
+
 # TensorRT 10.6 GA Release - 2024-11-1
 For more details, see the 10.6 GA release notes
 
diff --git a/docs/operators.md b/docs/operators.md
@@ -2,7 +2,7 @@
 
 # Supported ONNX Operators
 
-TensorRT 10.6 supports operators in the inclusive range of opset 9 to opset 22. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
+TensorRT 10.7 supports operators in the inclusive range of opset 9 to opset 22. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
 
 TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOAT16, INT32, INT64, FP8, INT8, INT4, UINT8, and BOOL
 
@@ -193,7 +193,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
 | Split                     | Y          | FP32, FP16, BF16, INT32, INT64, BOOL |                                                                                                          |
 | SplitToSequence           | N          |
 | Sqrt                      | Y          | FP32, FP16, BF16 |
-| Squeeze                   | Y          | FP32, FP16, BF16, INT32, INT64, BOOL | `axes` must be an initializer                                                                                                            |
+| Squeeze                   | Y          | FP32, FP16, BF16, INT32, INT64, BOOL | `axes` must be resolvable to a constant.                    |
 | StringConcat              | N          |
 | StringNormalizer          | N          |
 | StringSplit               | N          |
@@ -208,7 +208,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
 | Transpose                 | Y          | FP32, FP16, BF16, INT32, INT64, BOOL |
 | Trilu                     | Y          | FP32, FP16, BF16, INT32, INT64, BOOL |
 | Unique                    | N          |
-| Unsqueeze                 | Y          | FP32, FP16, BF16, INT32, INT64, BOOL | `axes` must be a constant tensor                                                                                                         |
+| Unsqueeze                 | Y          | FP32, FP16, BF16, INT32, INT64, BOOL | `axes` must be resolvable to a constant.                       |
 | Upsample                  | Y          | FP32, FP16, BF16 |
 | Where                     | Y          | FP32, FP16, BF16, INT32, INT64, BOOL |
 | Xor                       | Y          | BOOL
diff --git a/importerUtils.cpp b/importerUtils.cpp
diff --git a/importerUtils.hpp b/importerUtils.hpp
diff --git a/onnxOpImporters.cpp b/onnxOpImporters.cpp
diff --git a/onnx_tensorrt/__init__.py b/onnx_tensorrt/__init__.py

Original file line number	Diff line number	Diff line change
`@@ -378,6 +378,13 @@ class ImporterContext`
`378`	`378`	`{`
`379`	`379`	`return mBaseNameScopeStack.size();`
`380`	`380`	`}`
	`381`	`+`
	`382`	`+ // Returns if the underlying network was created with the KSTRONGLY_TYPED flag.`
	`383`	`+ bool const isStronglyTyped()`
	`384`	`+ {`
	`385`	`+ assert(mNetwork != nullptr);`
	`386`	`+ return mNetwork->getFlag(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED);`
	`387`	`+ }`
`381`	`388`	`};`
`382`	`389`
`383`	`390`	`typedef std::vector<TensorOrWeights> NodeOutputs;`
Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@ nvinfer1::ITensor* addRNNInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::Nod`
`24`	`24`
`25`	`25`	`if (direction == "forward")`
`26`	`26`	`{`
`27`		`- iterationInput = unsqueezeTensor(ctx, node, N_CHECK(loop->addIterator(input)->getOutput(0)), std::vector<int>{0});`
	`27`	`+ iterationInput = unsqueezeTensor(ctx, N_CHECK(loop->addIterator(input)->getOutput(0)), std::vector<int>{0});`
`28`	`28`
`29`	`29`	`if (isRagged)`
`30`	`30`	`{`
`@@ -38,7 +38,7 @@ nvinfer1::ITensor* addRNNInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::Nod`
`38`	`38`	`nvinfer1::IIteratorLayer* reverseIterator = N_CHECK(loop->addIterator(*input));`
`39`	`39`	`reverseIterator->setReverse(true);`
`40`	`40`	`auto reverseIteratorOutput = N_CHECK(reverseIterator->getOutput(0));`
`41`		`- iterationInput = unsqueezeTensor(ctx, node, *reverseIteratorOutput, std::vector<int>{0});`
	`41`	`+ iterationInput = unsqueezeTensor(ctx, *reverseIteratorOutput, std::vector<int>{0});`
`42`	`42`	`if (isRagged)`
`43`	`43`	`{`
`44`	`44`	`nvinfer1::ITensor* seqLens = &convertToTensor(inputs.at(sequenceLenIndex), ctx);`
`@@ -52,8 +52,8 @@ nvinfer1::ITensor* addRNNInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::Nod`
`52`	`52`	`nvinfer1::IIteratorLayer* reverse = N_CHECK(loop->addIterator(*input));`
`53`	`53`	`reverse->setReverse(true);`
`54`	`54`
`55`		`- auto forwardInput = unsqueezeTensor(ctx, node, *N_CHECK(forward->getOutput(0)), std::vector<int>{0});`
`56`		`- auto reverseInput = unsqueezeTensor(ctx, node, *N_CHECK(reverse->getOutput(0)), std::vector<int>{0});`
	`55`	`+ auto forwardInput = unsqueezeTensor(ctx, *N_CHECK(forward->getOutput(0)), std::vector<int>{0});`
	`56`	`+ auto reverseInput = unsqueezeTensor(ctx, *N_CHECK(reverse->getOutput(0)), std::vector<int>{0});`
`57`	`57`	`if (isRagged)`
`58`	`58`	`{`
`59`	`59`	`nvinfer1::ITensor* seqLens = &convertToTensor(inputs.at(sequenceLenIndex), ctx);`
`@@ -165,16 +165,16 @@ nvinfer1::ITensor* getRaggedMask(ImporterContext* ctx, const ::ONNX_NAMESPACE::N`
`165`	`165`	`nvinfer1::ITensor* seqMask;`
`166`	`166`	`if (reverse)`
`167`	`167`	`{`
`168`		`- counter`
`169`		`- = getElementWiseResult(ctx, unsqueezeTensor(ctx, node, maxLen, {0}), *counter, nvinfer1::ElementWiseOperation::kSUB);`
	`168`	`+ counter = getElementWiseResult(`
	`169`	`+ ctx, unsqueezeTensor(ctx, maxLen, {0}), *counter, nvinfer1::ElementWiseOperation::kSUB);`
`170`	`170`	`seqMask = getElementWiseResult(ctx, seqLens, counter, nvinfer1::ElementWiseOperation::kLESS);`
`171`	`171`	`seqMask = getUnaryResult(ctx, *seqMask, nvinfer1::UnaryOperation::kNOT);`
`172`	`172`	`}`
`173`	`173`	`else`
`174`	`174`	`{`
`175`	`175`	`seqMask = getElementWiseResult(ctx, counter, seqLens, nvinfer1::ElementWiseOperation::kLESS);`
`176`	`176`	`}`
`177`		`- return unsqueezeTensor(ctx, node, *seqMask, std::vector<int>{0, 2});`
	`177`	`+ return unsqueezeTensor(ctx, *seqMask, std::vector<int>{0, 2});`
`178`	`178`	`}`
`179`	`179`
`180`	`180`	`} // namespace onnx2trt`