Skip to content

Commit 9c69a24

Browse files
authored
ONNX-TensorRT 10.7-GA Release (#1004)
Signed-off-by: Kevin Chen <[email protected]>
1 parent 4442153 commit 9c69a24

12 files changed

+224
-198
lines changed

CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}")
2828
# Version information
2929
#--------------------------------------------------
3030
set(ONNX2TRT_MAJOR 10)
31-
set(ONNX2TRT_MINOR 6)
31+
set(ONNX2TRT_MINOR 7)
3232
set(ONNX2TRT_PATCH 0)
3333
set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version")
3434

ImporterContext.hpp

+7
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,13 @@ class ImporterContext
378378
{
379379
return mBaseNameScopeStack.size();
380380
}
381+
382+
// Returns if the underlying network was created with the KSTRONGLY_TYPED flag.
383+
bool const isStronglyTyped()
384+
{
385+
assert(mNetwork != nullptr);
386+
return mNetwork->getFlag(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED);
387+
}
381388
};
382389

383390
typedef std::vector<TensorOrWeights> NodeOutputs;

ModelImporter.cpp

+37-20
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,18 @@ static std::string makeErrorExplanation(std::exception const& e, std::string con
108108
return result.str();
109109
}
110110

111+
bool isNodeInPluginRegistry(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node)
112+
{
113+
OnnxAttrs attrs(node, ctx);
114+
std::string const pluginVersion{attrs.get<std::string>("plugin_version", "1")};
115+
std::string const pluginNamespace{attrs.get<std::string>("plugin_namespace", "")};
116+
LOG_INFO("Checking if node can be treated as plugin: " << node.op_type() << ", plugin_version: " << pluginVersion
117+
<< ", plugin_namespace: " << pluginNamespace);
118+
nvinfer1::IPluginCreatorInterface* creator
119+
= importPluginCreator(ctx, node.op_type(), pluginVersion, pluginNamespace);
120+
return creator;
121+
}
122+
111123
void parseNode(
112124
ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, bool deserializingINetwork)
113125
{
@@ -158,8 +170,17 @@ void parseNode(
158170
}
159171
else if (ctx->localFunctions().count(nodeType))
160172
{
161-
LOG_INFO("Found regisitered local function: " << nodeType << ". Importing as a local function.");
162-
importFunc = &opImporters.at("LocalFunctionImporter");
173+
// Let plugin take precedence over local function. So first check if this can be dispatched to a plugin.
174+
if (isNodeInPluginRegistry(ctx, node))
175+
{
176+
LOG_INFO("Found registered plugin: " << nodeType << ". Importing local function as a plugin.");
177+
importFunc = &opImporters.at("FallbackPluginImporter");
178+
}
179+
else
180+
{
181+
LOG_INFO("Found registered local function: " << nodeType << ". Importing as a local function.");
182+
importFunc = &opImporters.at("LocalFunctionImporter");
183+
}
163184
}
164185
else
165186
{
@@ -295,8 +316,17 @@ void parseNodeStaticCheck(
295316
}
296317
else if (ctx->localFunctions().count(nodeType))
297318
{
298-
LOG_INFO("Found regisitered local function: " << nodeType << ". Checking as a local function.");
299-
checkerFunc = &opCheckers.at("LocalFunctionImporter");
319+
// Let plugin take precedence over local function. So first check if this can be dispatched to a plugin.
320+
if (isNodeInPluginRegistry(ctx, node))
321+
{
322+
LOG_INFO("Found registered plugin: " << nodeType << ". Importing local function as a plugin.");
323+
checkerFunc = &opCheckers.at("FallbackPluginImporter");
324+
}
325+
else
326+
{
327+
LOG_INFO("Found registered local function: " << nodeType << ". Importing as a local function.");
328+
checkerFunc = &opCheckers.at("LocalFunctionImporter");
329+
}
300330
}
301331
else
302332
{
@@ -447,17 +477,6 @@ void importLocalFunctions(ImporterContext* ctx, ::ONNX_NAMESPACE::ModelProto con
447477
}
448478
}
449479

450-
// Internal helper function used for ONNXRT-TRT EP to filter out DDS nodes
451-
bool isDDSOp(char const* op_name)
452-
{
453-
auto is = [op_name](char const* name) { return std::strcmp(op_name, name) == 0; };
454-
if (is("NonMaxSuppression") || is("NonZero") || is("RoiAlign"))
455-
{
456-
return true;
457-
}
458-
return false;
459-
}
460-
461480
std::pair<bool, ModelImporter::SubGraphSupportVector_t> ModelImporter::doSupportsModel(
462481
void const* serialized_onnx_model, size_t serialized_onnx_model_size, char const* model_path)
463482
{
@@ -524,13 +543,11 @@ std::pair<bool, ModelImporter::SubGraphSupportVector_t> ModelImporter::doSupport
524543
{
525544
::ONNX_NAMESPACE::NodeProto const& node = model.graph().node(node_idx);
526545
// Add the node to the subgraph if:
527-
// 1. It is not a node that requires DDS
528-
// 2. It is not directly connected to an unsupported input
529-
// 3. The importer function did not throw an assertion
530-
bool unsupportedDDS = isDDSOp(node.op_type().c_str());
546+
// 1. It is not directly connected to an unsupported input
547+
// 2. The importer function did not throw an assertion
531548
bool unsupportedInput = (input_node.empty()) ? false : checkForInput(node);
532549
bool unsuccessfulParse = node_idx == error_node;
533-
if (!unsupportedDDS && !unsupportedInput && !unsuccessfulParse)
550+
if (!unsupportedInput && !unsuccessfulParse)
534551
{
535552
if (newSubGraph)
536553
{

NvOnnxParser.h

+3
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,9 @@ class IParserError
175175
//!
176176
//! \brief an object for parsing ONNX models into a TensorRT network definition
177177
//!
178+
//! \warning If the ONNX model has a graph output with the same name as a graph input,
179+
//! the output will be renamed by prepending "__".
180+
//!
178181
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
179182
//!
180183
class IParser

README.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia.
1616

1717
## Supported TensorRT Versions
1818

19-
Development on the this branch is for the latest version of [TensorRT 10.6](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
19+
Development on the this branch is for the latest version of [TensorRT 10.7](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
2020

2121
For previous versions of TensorRT, refer to their respective branches.
2222

@@ -29,8 +29,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs
2929
### Dependencies
3030

3131
- [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases)
32-
- [TensorRT 10.6](https://developer.nvidia.com/tensorrt)
33-
- [TensorRT 10.6 open source libaries] (https://github.com/NVIDIA/TensorRT/)
32+
- [TensorRT 10.7](https://developer.nvidia.com/tensorrt)
33+
- [TensorRT 10.7 open source libaries] (https://github.com/NVIDIA/TensorRT/)
3434

3535
### Building
3636

@@ -82,7 +82,7 @@ Refer to the link or run `polygraphy run -h` for more information on CLI options
8282

8383
Python bindings for the ONNX-TensorRT parser are packaged in the shipped `.whl` files.
8484

85-
TensorRT 10.6 supports ONNX release 1.17.0. Install it with:
85+
TensorRT 10.7 supports ONNX release 1.17.0. Install it with:
8686

8787
python3 -m pip install onnx==1.17.0
8888

RNNHelpers.cpp

+7-7
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ nvinfer1::ITensor* addRNNInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::Nod
2424

2525
if (direction == "forward")
2626
{
27-
iterationInput = unsqueezeTensor(ctx, node, *N_CHECK(loop->addIterator(*input)->getOutput(0)), std::vector<int>{0});
27+
iterationInput = unsqueezeTensor(ctx, *N_CHECK(loop->addIterator(*input)->getOutput(0)), std::vector<int>{0});
2828

2929
if (isRagged)
3030
{
@@ -38,7 +38,7 @@ nvinfer1::ITensor* addRNNInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::Nod
3838
nvinfer1::IIteratorLayer* reverseIterator = N_CHECK(loop->addIterator(*input));
3939
reverseIterator->setReverse(true);
4040
auto reverseIteratorOutput = N_CHECK(reverseIterator->getOutput(0));
41-
iterationInput = unsqueezeTensor(ctx, node, *reverseIteratorOutput, std::vector<int>{0});
41+
iterationInput = unsqueezeTensor(ctx, *reverseIteratorOutput, std::vector<int>{0});
4242
if (isRagged)
4343
{
4444
nvinfer1::ITensor* seqLens = &convertToTensor(inputs.at(sequenceLenIndex), ctx);
@@ -52,8 +52,8 @@ nvinfer1::ITensor* addRNNInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::Nod
5252
nvinfer1::IIteratorLayer* reverse = N_CHECK(loop->addIterator(*input));
5353
reverse->setReverse(true);
5454

55-
auto forwardInput = unsqueezeTensor(ctx, node, *N_CHECK(forward->getOutput(0)), std::vector<int>{0});
56-
auto reverseInput = unsqueezeTensor(ctx, node, *N_CHECK(reverse->getOutput(0)), std::vector<int>{0});
55+
auto forwardInput = unsqueezeTensor(ctx, *N_CHECK(forward->getOutput(0)), std::vector<int>{0});
56+
auto reverseInput = unsqueezeTensor(ctx, *N_CHECK(reverse->getOutput(0)), std::vector<int>{0});
5757
if (isRagged)
5858
{
5959
nvinfer1::ITensor* seqLens = &convertToTensor(inputs.at(sequenceLenIndex), ctx);
@@ -165,16 +165,16 @@ nvinfer1::ITensor* getRaggedMask(ImporterContext* ctx, const ::ONNX_NAMESPACE::N
165165
nvinfer1::ITensor* seqMask;
166166
if (reverse)
167167
{
168-
counter
169-
= getElementWiseResult(ctx, *unsqueezeTensor(ctx, node, *maxLen, {0}), *counter, nvinfer1::ElementWiseOperation::kSUB);
168+
counter = getElementWiseResult(
169+
ctx, *unsqueezeTensor(ctx, *maxLen, {0}), *counter, nvinfer1::ElementWiseOperation::kSUB);
170170
seqMask = getElementWiseResult(ctx, *seqLens, *counter, nvinfer1::ElementWiseOperation::kLESS);
171171
seqMask = getUnaryResult(ctx, *seqMask, nvinfer1::UnaryOperation::kNOT);
172172
}
173173
else
174174
{
175175
seqMask = getElementWiseResult(ctx, *counter, *seqLens, nvinfer1::ElementWiseOperation::kLESS);
176176
}
177-
return unsqueezeTensor(ctx, node, *seqMask, std::vector<int>{0, 2});
177+
return unsqueezeTensor(ctx, *seqMask, std::vector<int>{0, 2});
178178
}
179179

180180
} // namespace onnx2trt

docs/Changelog.md

+7
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22

33
# ONNX-TensorRT Changelog
44

5+
# TensorRT 10.7 GA Release - 2024-12-3
6+
For more details, see the 10.7 GA release notes
7+
8+
- Now prioritizes using plugins over local functions when a corresponding plugin is available in the registry
9+
- Added dynamic axes support for `Squeeze` and `Unsqueeze` operations
10+
- Added support for parsing mixed-precision `BatchNormalization` nodes in strongly-typed mode
11+
512
# TensorRT 10.6 GA Release - 2024-11-1
613
For more details, see the 10.6 GA release notes
714

docs/operators.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# Supported ONNX Operators
44

5-
TensorRT 10.6 supports operators in the inclusive range of opset 9 to opset 22. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
5+
TensorRT 10.7 supports operators in the inclusive range of opset 9 to opset 22. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
66

77
TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOAT16, INT32, INT64, FP8, INT8, INT4, UINT8, and BOOL
88

@@ -193,7 +193,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
193193
| Split | Y | FP32, FP16, BF16, INT32, INT64, BOOL | |
194194
| SplitToSequence | N |
195195
| Sqrt | Y | FP32, FP16, BF16 |
196-
| Squeeze | Y | FP32, FP16, BF16, INT32, INT64, BOOL | `axes` must be an initializer |
196+
| Squeeze | Y | FP32, FP16, BF16, INT32, INT64, BOOL | `axes` must be resolvable to a constant. |
197197
| StringConcat | N |
198198
| StringNormalizer | N |
199199
| StringSplit | N |
@@ -208,7 +208,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
208208
| Transpose | Y | FP32, FP16, BF16, INT32, INT64, BOOL |
209209
| Trilu | Y | FP32, FP16, BF16, INT32, INT64, BOOL |
210210
| Unique | N |
211-
| Unsqueeze | Y | FP32, FP16, BF16, INT32, INT64, BOOL | `axes` must be a constant tensor |
211+
| Unsqueeze | Y | FP32, FP16, BF16, INT32, INT64, BOOL | `axes` must be resolvable to a constant. |
212212
| Upsample | Y | FP32, FP16, BF16 |
213213
| Where | Y | FP32, FP16, BF16, INT32, INT64, BOOL |
214214
| Xor | Y | BOOL

0 commit comments

Comments
 (0)