Fixes batchnorm importer in non-4D/5D cases (#569)

pranavm-nvidia · kevinch-nv · commit 537428337280 · 2021-07-02T10:36:42.000-07:00
Signed-off-by: pranavm &lt;pranavm@nvidia.com&gt;
Signed-off-by: Kevin Chen &lt;kevinch@nvidia.com&gt;
diff --git a/builtin_op_importers.cpp b/builtin_op_importers.cpp
@@ -186,8 +186,9 @@ NodeImportResult batchnormFallback(
     nvinfer1::ITensor* mean = &convertToTensor(inputs.at(3), ctx);
     nvinfer1::ITensor* variance = &convertToTensor(inputs.at(4), ctx);
 
-    const bool hasCDimension = rank > 1;
-    if (hasCDimension)
+    // Reshape batchnorm weights from [C] to [N, C, ...]
+    const bool needsExpandDims = rank > 1;
+    if (needsExpandDims)
     {
         std::vector<int> axes(rank - 1);
         axes[0] = 0;
@@ -223,7 +224,7 @@ NodeImportResult batchnormFallback(
              ->getOutput(0),
         *bias, eOp::kSUM);
 
-    ctx->registerLayer(layer, node.name());
+    ctx->registerLayer(layer, getNodeName(node));
 
     RETURN_FIRST_OUTPUT(layer);
 }
@@ -254,25 +255,13 @@ DEFINE_BUILTIN_OP_IMPORTER(BatchNormalization)
     OnnxAttrs attrs(node, ctx);
     float eps = attrs.get<float>("epsilon", 1e-5f);
 
-    nvinfer1::Dims dims = tensorPtr->getDimensions();
-
-    bool needToExpandDims = (dims.nbDims == 3);
-    if (needToExpandDims)
-    {
-        // Expand spatial dims from 1D to 2D
-        std::vector<int> axes{3};
-        tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, axes);
-        ASSERT(tensorPtr, ErrorCode::kUNSUPPORTED_NODE);
-        dims = tensorPtr->getDimensions();
-    }
-
     // Number of channels is equal to the length of scale_weights.
     int nchan = scale_weights.shape.d[0];
     nvinfer1::Dims weights_shape{1, {nchan}};
-    ASSERT(scale_weights.shape == weights_shape, ErrorCode::kINVALID_NODE);
-    ASSERT(bias_weights.shape == weights_shape, ErrorCode::kINVALID_NODE);
-    ASSERT(mean_weights.shape == weights_shape, ErrorCode::kINVALID_NODE);
-    ASSERT(variance_weights.shape == weights_shape, ErrorCode::kINVALID_NODE);
+    ASSERT((scale_weights.shape == weights_shape) && "The shape of input scale must be (C)", ErrorCode::kINVALID_NODE);
+    ASSERT((bias_weights.shape == weights_shape) && "The shape of input bias must be (C)", ErrorCode::kINVALID_NODE);
+    ASSERT((mean_weights.shape == weights_shape) && "The shape of input mean must be (C)", ErrorCode::kINVALID_NODE);
+    ASSERT((variance_weights.shape == weights_shape) && "The shape of input var must be (C)", ErrorCode::kINVALID_NODE);
     auto combined_scale_weights = ctx->createTempWeights(scale_weights.type, scale_weights.shape);
     auto combined_bias_weights = ctx->createTempWeights(bias_weights.type, bias_weights.shape);
     size_t nweight = nchan;
@@ -289,23 +278,9 @@ DEFINE_BUILTIN_OP_IMPORTER(BatchNormalization)
         combined_bias_ref = bias - mean * combined_scale_ref;
     }
 
-    // If dimensions were not expanded return the output of the scale operation
-    if (!needToExpandDims)
-    {
-        return scaleHelper(
-            ctx, node, *tensorPtr, nvinfer1::ScaleMode::kCHANNEL, combined_bias_weights, combined_scale_weights, {}, bias_weights.getName(), scale_weights.getName());
-    }
-    else
-    {
-        auto scaledResult = scaleHelper(
-            ctx, node, *tensorPtr, nvinfer1::ScaleMode::kCHANNEL, combined_bias_weights, combined_scale_weights, {}, bias_weights.getName(), scale_weights.getName());
-        // Squeeze spatial dims back to 1D
-        tensorPtr = &convertToTensor(scaledResult.value().at(0), ctx);
-        std::vector<int> axes{3};
-        tensorPtr = squeezeTensor(ctx, node, *tensorPtr, axes);
-        ASSERT(tensorPtr, ErrorCode::kUNSUPPORTED_NODE);
-        return {{tensorPtr}};
-    }
+    return scaleHelper(ctx, node, *tensorPtr, nvinfer1::ScaleMode::kCHANNEL, combined_bias_weights,
+        combined_scale_weights, ShapedWeights::empty(scale_weights.type), bias_weights.getName(),
+        scale_weights.getName());
 }
 
 DEFINE_BUILTIN_OP_IMPORTER(Cast)
diff --git a/onnx2trt_utils.cpp b/onnx2trt_utils.cpp
@@ -347,13 +347,13 @@ bool convertOnnxWeights(
             {
                 continue;
             }
-            else 
+            else
             {
                 LOG_ERROR("Key value of: " << keyName << " was not expected!");
                 return false;
             }
         }
-        
+
         // Buffer to hold the data read from the file
         std::vector<char> dataBuf;
         // Will update dataBuf and nbytes by reference.
@@ -1315,7 +1315,7 @@ nvinfer1::Dims insertDimension(const nvinfer1::Dims& dims, const int axis, const
 bool parseExternalWeights(IImporterContext* ctx, std::string file, std::string path, int offset, int length,
     std::vector<char>& weightsBuf, size_t& size)
 {
-    // The weight paths in the ONNX model are relative paths to the main ONNX file. 
+    // The weight paths in the ONNX model are relative paths to the main ONNX file.
 #ifdef _MSC_VER
     size_t slash = path.rfind("\\");
 #else
@@ -1486,71 +1486,47 @@ nvinfer1::ITensor* reshapeTensor(IImporterContext* ctx, nvinfer1::ITensor& tenso
     return layer->getOutput(0);
 }
 
-NodeImportResult scaleHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, nvinfer1::ITensor& tensor_, nvinfer1::ScaleMode mode,
-    nvinfer1::Weights shift, nvinfer1::Weights scale, nvinfer1::Weights power, std::string shiftName, std::string scaleName)
+NodeImportResult scaleHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, nvinfer1::ITensor& tensor_,
+    nvinfer1::ScaleMode mode, const nvinfer1::Weights& shift, const nvinfer1::Weights& scale,
+    const nvinfer1::Weights& power, const char* shiftName, const char* scaleName)
 {
-    nvinfer1::ITensor* tensor_ptr = &tensor_;
-    nvinfer1::Dims dims = tensor_ptr->getDimensions();
+    nvinfer1::ITensor* tensorPtr = &tensor_;
+    const ShapeTensor origShape = shapeOf(*tensorPtr);
 
     // TensorRT scale layers support 4D(NCHW) or 5D(NCDHW) input.
-    // For input other than 4D or 5D will be expanded to 4D.
-    int expectedNbDims = 4;
-    bool needToExpandDims = (dims.nbDims != 4 && dims.nbDims != 5);
-    nvinfer1::Dims orig_shape = dims;
-    if (needToExpandDims)
+    // For input other than 4D or 5D will be expanded or squeezed to 4D.
+    bool needToReshape = (origShape.size() != 4 && origShape.size() != 5);
+    if (needToReshape)
     {
-        // Expand or squash dims to 4D
-        nvinfer1::Dims new_shape = dims;
-        while (new_shape.nbDims < expectedNbDims)
+        if (origShape.size() < 4)
         {
-            new_shape.d[new_shape.nbDims++] = 1;
+            std::vector<int> expandAxes(4 - origShape.size());
+            std::iota(expandAxes.begin(), expandAxes.end(), origShape.size());
+            tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, expandAxes);
         }
-        while (new_shape.nbDims > expectedNbDims)
+        else
         {
-            new_shape.d[3] *= new_shape.d[--new_shape.nbDims];
+            // Collapse trailing dimensions if origShape.size() > 5
+            const ShapeTensor collapsedDim = product(ctx, origShape, 3, origShape.size(), 1);
+            const ShapeTensor collapsedShape = concat(ctx, gather(ctx, origShape, iotaShapeVector(3)), collapsedDim);
+            tensorPtr = &reshape(ctx, *tensorPtr, collapsedShape);
         }
-        tensor_ptr = reshapeTensor(ctx, *tensor_ptr, new_shape);
-        ASSERT(tensor_ptr, ErrorCode::kUNSUPPORTED_NODE);
-        dims = tensor_ptr->getDimensions();
     }
 
-    ASSERT(dims.nbDims == 4 || dims.nbDims == 5, ErrorCode::kUNSUPPORTED_NODE);
-
-    // Fill in dtype for any unused (dummy) weights
-    nvinfer1::DataType* dtype_ptr = nullptr;
-    if (shift.count)
-    {
-        dtype_ptr = &shift.type;
-    }
-    if (scale.count)
-    {
-        ASSERT(!dtype_ptr || *dtype_ptr == scale.type, ErrorCode::kUNSUPPORTED_NODE);
-        dtype_ptr = &scale.type;
-    }
-    if (power.count)
-    {
-        ASSERT(!dtype_ptr || *dtype_ptr == power.type, ErrorCode::kUNSUPPORTED_NODE);
-        dtype_ptr = &power.type;
-    }
-    ASSERT(dtype_ptr, ErrorCode::kINTERNAL_ERROR);
-    shift.type = *dtype_ptr;
-    scale.type = *dtype_ptr;
-    power.type = *dtype_ptr;
-    auto* layer = ctx->network()->addScaleNd(*tensor_ptr, mode, shift, scale, power, 1);
-    ASSERT(layer, ErrorCode::kUNSUPPORTED_NODE);
+    auto* layer = ctx->network()->addScaleNd(*tensorPtr, mode, shift, scale, power, 1);
+    ASSERT(layer && "Failed to add a Scale layer.", ErrorCode::kUNSUPPORTED_NODE);
     // Register layer name, and shift and scale weight names for the refit map.
     ctx->registerLayer(layer, getNodeName(node));
-    ctx->insertRefitMap(shiftName, getNodeName(node), nvinfer1::WeightsRole::kSHIFT);
-    ctx->insertRefitMap(scaleName, getNodeName(node), nvinfer1::WeightsRole::kSCALE);
-    tensor_ptr = layer->getOutput(0);
 
-    if (needToExpandDims)
+    tensorPtr = layer->getOutput(0);
+
+    if (needToReshape)
     {
-        tensor_ptr = reshapeTensor(ctx, *tensor_ptr, orig_shape);
-        ASSERT(tensor_ptr, ErrorCode::kUNSUPPORTED_NODE);
+        tensorPtr = &reshape(ctx, *tensorPtr, origShape);
+        ASSERT(tensorPtr && "Failed to reshape tensor.", ErrorCode::kUNSUPPORTED_NODE);
     }
 
-    return {{tensor_ptr}};
+    return {{tensorPtr}};
 }
 
 void setAttr(
diff --git a/onnx2trt_utils.hpp b/onnx2trt_utils.hpp
@@ -286,8 +286,9 @@ NodeImportResult reduceTensor(IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto
 nvinfer1::ITensor* reshapeTensor(IImporterContext* ctx, nvinfer1::ITensor& tensor, nvinfer1::Dims shape);
 
 // Helper function to map attributes to a TRT scale layer
-NodeImportResult scaleHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, nvinfer1::ITensor& tensor_, nvinfer1::ScaleMode mode,
-    nvinfer1::Weights shift, nvinfer1::Weights scale, nvinfer1::Weights power, std::string shiftName, std::string scaleName);
+NodeImportResult scaleHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, nvinfer1::ITensor& tensor_,
+    nvinfer1::ScaleMode mode, const nvinfer1::Weights& shift, const nvinfer1::Weights& scale,
+    const nvinfer1::Weights& power, const char* shiftName, const char* scaleName);
 
 // Helper function to set an ONNX attribute
 void setAttr(