feat: Enable TRT 8.0 QAT functionality in TRTorch

peri044 · peri044 · commit c76a28ae7512 · 2021-07-13T17:33:42.000-07:00
Signed-off-by: Dheeraj Peri &lt;peri.dheeraj@gmail.com&gt;
diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp
@@ -72,7 +72,8 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
       input_type = nvinfer1::DataType::kFLOAT;
       // TRTORCH_CHECK(
       //     settings.calibrator != nullptr,
-      //     "Requested inference in INT8 but no calibrator provided, set the ptq_calibrator field in the CompileSpec struct with your calibrator");
+      //     "Requested inference in INT8 but no calibrator provided, set the ptq_calibrator field in the CompileSpec
+      //     struct with your calibrator");
       // cfg->setInt8Calibrator(settings.calibrator);
       break;
     case nvinfer1::DataType::kFLOAT:
diff --git a/core/conversion/converters/impl/conv_deconv.cpp b/core/conversion/converters/impl/conv_deconv.cpp
@@ -45,21 +45,23 @@ bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args)
   if (args[2].IValue()->isTensor()) {
     bias = Weights(ctx, args[2].unwrapToTensor());
   } else {
-    bias = Weights(); //nvinfer1::Weights{nvinfer1::DataType::kFLOAT, nullptr, 0};
+    bias = Weights(); // nvinfer1::Weights{nvinfer1::DataType::kFLOAT, nullptr, 0};
   }
 
   // Handle case when weights of conv/deconv is an ITensor. This case happens for QAT networks where
   // conv_weights -> Quantize -> Dequantize -> new_conv_weights -> conv <- input
   // new_conv_weights will be an ITensor because it is an output of Dequantize layer defined in impl/quantization.cpp
-  if (args[1].isITensor()){
+  if (args[1].isITensor()) {
     // Get the kernel tensor
     auto kernel = args[1].ITensor();
     auto kernel_dims = kernel->getDimensions();
 
     // Make a new Dims with only the spatial dimensions.
     nvinfer1::Dims filter_dim;
     int64_t nbSpatialDims = in->getDimensions().nbDims - 2;
-    TRTORCH_CHECK(nbSpatialDims = kernel_dims.nbDims - 2, "Number of input spatial dimensions should match the kernel spatial dimensions");
+    TRTORCH_CHECK(
+        nbSpatialDims = kernel_dims.nbDims - 2,
+        "Number of input spatial dimensions should match the kernel spatial dimensions");
     filter_dim.nbDims = nbSpatialDims;
     filter_dim.d[0] = kernel_dims.d[2];
     filter_dim.d[1] = kernel_dims.d[3];
@@ -68,9 +70,9 @@ bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args)
     auto kernel_weights = nvinfer1::Weights{nvinfer1::DataType::kFLOAT, nullptr, 0};
 
     nvinfer1::ILayer* layer = nullptr;
-    if (transposed){
-      nvinfer1::IDeconvolutionLayer* deconvLayer
-            = ctx->net->addDeconvolutionNd(*in, kernel_dims.d[0], filter_dim, kernel_weights, bias.data);
+    if (transposed) {
+      nvinfer1::IDeconvolutionLayer* deconvLayer =
+          ctx->net->addDeconvolutionNd(*in, kernel_dims.d[0], filter_dim, kernel_weights, bias.data);
       deconvLayer->setStrideNd(stride);
       deconvLayer->setDilationNd(dilation);
       deconvLayer->setNbGroups(groups);
@@ -79,9 +81,9 @@ bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args)
       deconvLayer->setInput(1, *kernel);
       TRTORCH_CHECK(deconvLayer, "Unable to create deconv layer with non-const weights from node: " << *n);
       layer = deconvLayer;
-    } else{
-      nvinfer1::IConvolutionLayer* convLayer
-            = ctx->net->addConvolutionNd(*in, kernel_dims.d[0], filter_dim, kernel_weights, bias.data);
+    } else {
+      nvinfer1::IConvolutionLayer* convLayer =
+          ctx->net->addConvolutionNd(*in, kernel_dims.d[0], filter_dim, kernel_weights, bias.data);
       convLayer->setStrideNd(stride);
       convLayer->setPaddingMode(nvinfer1::PaddingMode::kCAFFE_ROUND_DOWN);
       convLayer->setPaddingNd(padding);
diff --git a/core/conversion/converters/impl/linear.cpp b/core/conversion/converters/impl/linear.cpp
@@ -42,16 +42,17 @@ auto linear_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns().patt
 
        // Get the bias
        Weights bias;
-       if(!args[2].IValue()->isNone()){
+       if (!args[2].IValue()->isNone()) {
          bias = Weights(ctx, args[2].IValue()->toTensor());
-       }else {
+       } else {
          bias = Weights();
        }
 
        // Handle case when weights of conv/deconv is an ITensor. This case happens for QAT networks where
        // conv_weights -> Quantize -> Dequantize -> new_conv_weights -> conv <- input
-       // new_conv_weights will be an ITensor because it is an output of Dequantize layer defined in impl/quantization.cpp
-       if(args[1].isITensor()){
+       // new_conv_weights will be an ITensor because it is an output of Dequantize layer defined in
+       // impl/quantization.cpp
+       if (args[1].isITensor()) {
          auto kernel_tensor = args[1].ITensor();
          auto kernel_dims = args[1].ITensor()->getDimensions();
          // Initialize a dummy constant kernel to pass it to INetwork->addConvolutionNd/addDeconvolutionNd API.
diff --git a/core/conversion/converters/impl/matrix_multiply.cpp b/core/conversion/converters/impl/matrix_multiply.cpp
@@ -1,3 +1,4 @@
+#include <torch/torch.h>
 #include "core/conversion/converters/converter_util.h"
 #include "core/conversion/converters/converters.h"
 #include "core/util/prelude.h"
@@ -72,6 +73,60 @@ auto mm_registrations TRTORCH_UNUSED =
 
                LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
                return true;
+             }})
+        .pattern(
+            {"aten::addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> (Tensor)",
+             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+               auto self = args[0].ITensorOrFreeze(ctx);
+               auto mat1 = args[1].ITensorOrFreeze(ctx);
+               auto mat2 = args[2].ITensorOrFreeze(ctx);
+               auto beta = args[4].unwrapToScalar().to<float>();
+               auto betaTensor = tensor_to_const(ctx, torch::tensor({beta}));
+               auto alpha = args[5].unwrapToScalar().to<float>();
+               auto alphaTensor = tensor_to_const(ctx, torch::tensor({alpha}));
+
+               // Ensure self and other tensors have same nbDims by expanding the dimensions (from 0 axis) if
+               // necessary.
+               if (mat1->getDimensions().nbDims < mat2->getDimensions().nbDims) {
+                 mat1 = addPadding(ctx, n, mat1, mat2->getDimensions().nbDims, false, false);
+               } else {
+                 mat2 = addPadding(ctx, n, mat2, mat1->getDimensions().nbDims, false, false);
+               }
+
+               auto mat2_dims = mat2->getDimensions();
+               nvinfer1::Dims transposed_mat2_dims;
+               for (int i = mat2_dims.nbDims - 1; i >= 0; i--) {
+                 transposed_mat2_dims.d[i] = mat2_dims.d[mat2_dims.nbDims - 1 - i];
+               }
+               auto shuffle_layer = ctx->net->addShuffle(*mat2);
+               shuffle_layer->setReshapeDimensions(transposed_mat2_dims);
+               mat2 = shuffle_layer->getOutput(0);
+
+               auto mm_layer = ctx->net->addMatrixMultiply(
+                   *mat1, nvinfer1::MatrixOperation::kNONE, *mat2, nvinfer1::MatrixOperation::kNONE);
+               TRTORCH_CHECK(mm_layer, "Unable to create matrix multiplication layer in node: " << *n);
+               auto mm_scale_layer = add_elementwise(
+                   ctx,
+                   nvinfer1::ElementWiseOperation::kPROD,
+                   mm_layer->getOutput(0),
+                   alphaTensor,
+                   util::node_info(n) + "_alphaScale");
+               TRTORCH_CHECK(mm_scale_layer, "Unable to create alpha scaling layer in node: " << *n);
+               auto beta_scale_layer = add_elementwise(
+                   ctx, nvinfer1::ElementWiseOperation::kPROD, self, betaTensor, util::node_info(n) + "_betaScale");
+               TRTORCH_CHECK(beta_scale_layer, "Unable to create beta scaling layer in node: " << *n);
+               auto add_mm_layer = add_elementwise(
+                   ctx,
+                   nvinfer1::ElementWiseOperation::kSUM,
+                   beta_scale_layer->getOutput(0),
+                   mm_scale_layer->getOutput(0),
+                   util::node_info(n));
+               TRTORCH_CHECK(add_mm_layer, "Unable to create addmm layer in node: " << *n);
+
+               auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], add_mm_layer->getOutput(0));
+
+               LOG_DEBUG("[AddMM layer] Output tensor shape: " << out_tensor->getDimensions());
+               return true;
              }});
 } // namespace
 } // namespace impl
diff --git a/core/conversion/evaluators/aten.cpp b/core/conversion/evaluators/aten.cpp
@@ -430,9 +430,14 @@ auto aten_registrations TRTORCH_UNUSED =
         .evaluator({c10::Symbol::fromQualString("aten::t"),
                     [](const torch::jit::Node* n, kwargs& args) -> c10::optional<torch::jit::IValue> {
                       auto tensor_var = args.at(n->input(0));
-                      if (tensor_var.IValue()->isTensor()) {
+                      if (tensor_var.isIValue() && tensor_var.IValue()->isTensor()) {
                         auto tensor = tensor_var.unwrapToTensor();
                         return tensor.t();
+                      } else if (tensor_var.isITensor()) {
+                        auto tensor_holder = TensorContainer();
+                        tensor_holder.hold_tensor(tensor_var.ITensor());
+                        auto ival = c10::IValue(std::move(c10::make_intrusive<TensorContainer>(tensor_holder)));
+                        return ival;
                       } else {
                         TRTORCH_THROW_ERROR("Unimplemented data type for aten::t evaluator: ITensor");
                         return {};
diff --git a/core/lowering/lowering.cpp b/core/lowering/lowering.cpp
@@ -63,9 +63,9 @@ torch::jit::Module LowerModule(const torch::jit::script::Module& mod) {
 std::pair<std::shared_ptr<torch::jit::Graph>, std::vector<torch::jit::IValue>> Lower(
     const torch::jit::script::Module& mod,
     std::string method_name) {
-  auto lowered_mod = LowerModule(mod);
+  auto lowered_mod = mod; // LowerModule(mod);
   auto g = lowered_mod.get_method(method_name).graph();
-  LOG_GRAPH(*g);
+  LOG_INFO(*g);
 
   // Go through TRTorch Lowering to reformat graph to be conversion friendly
   // and also segment for accelerators and executors (TRT-DLA, TRT-GPU, PYT)
diff --git a/core/plugins/impl/interpolate_plugin.cpp b/core/plugins/impl/interpolate_plugin.cpp
@@ -105,7 +105,6 @@ std::vector<int64_t> InterpolatePlugin::getOutputSize() {
   return size_;
 }
 
-
 int InterpolatePlugin::getNbOutputs() const noexcept {
   if (mode_ == "adaptive_max_pool2d") {
     return 2;
@@ -170,7 +169,6 @@ nvinfer1::DataType InterpolatePlugin::getOutputDataType(int index, const nvinfer
   return nvinfer1::DataType::kFLOAT;
 }
 
-
 int InterpolatePlugin::initialize() noexcept {
   return 0;
 }
@@ -208,7 +206,6 @@ bool InterpolatePlugin::supportsFormatCombination(
     const nvinfer1::PluginTensorDesc* inOut,
     int nbInputs,
     int nbOutputs) noexcept {
-
   TRTORCH_ASSERT(nbInputs == 1, "Expected a single tensor as input to interpolate plugin");
 
   if (mode_ == "adaptive_max_pool2d") {

Original file line number	Diff line number	Diff line change
`@@ -105,7 +105,6 @@ std::vector<int64_t> InterpolatePlugin::getOutputSize() {`
`105`	`105`	`return size_;`
`106`	`106`	`}`
`107`	`107`
`108`		`-`
`109`	`108`	`int InterpolatePlugin::getNbOutputs() const noexcept {`
`110`	`109`	`if (mode_ == "adaptive_max_pool2d") {`
`111`	`110`	`return 2;`
`@@ -170,7 +169,6 @@ nvinfer1::DataType InterpolatePlugin::getOutputDataType(int index, const nvinfer`
`170`	`169`	`return nvinfer1::DataType::kFLOAT;`
`171`	`170`	`}`
`172`	`171`
`173`		`-`
`174`	`172`	`int InterpolatePlugin::initialize() noexcept {`
`175`	`173`	`return 0;`
`176`	`174`	`}`
`@@ -208,7 +206,6 @@ bool InterpolatePlugin::supportsFormatCombination(`
`208`	`206`	`const nvinfer1::PluginTensorDesc* inOut,`
`209`	`207`	`int nbInputs,`
`210`	`208`	`int nbOutputs) noexcept {`
`211`		`-`
`212`	`209`	`TRTORCH_ASSERT(nbInputs == 1, "Expected a single tensor as input to interpolate plugin");`
`213`	`210`
`214`	`211`	`if (mode_ == "adaptive_max_pool2d") {`