Merge pull request #156 from NVIDIA/prelu

narendasan · web-flow · commit 362c932227dc · 2020-07-27T14:55:23.000-06:00
Implements prelu and a broadcasting checker
diff --git a/core/conversion/converters/converters.h b/core/conversion/converters/converters.h
@@ -55,7 +55,10 @@ struct Weights {
 
 inline nvinfer1::ITensor* tensor_to_const(ConversionCtx* ctx, at::Tensor t) {
     auto t_weights = Weights(ctx, t);
-    return ctx->net->addConstant(t_weights.shape, t_weights.data)->getOutput(0);
+    auto const_layer = ctx->net->addConstant(t_weights.shape, t_weights.data);
+    TRTORCH_CHECK(const_layer, "Unable to freeze tensor");
+    const_layer->setName("[Freeze Tensor]");
+    return const_layer->getOutput(0);
 }
 
 } // namespace converters
diff --git a/core/conversion/converters/impl/activation.cpp b/core/conversion/converters/impl/activation.cpp
@@ -79,6 +79,43 @@ auto acthardtanh TRTORCH_UNUSED = RegisterNodeConversionPatterns()
       new_layer->setName(util::node_info(n).c_str());
       auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], new_layer->getOutput(0));
 
+      LOG_DEBUG("Output shape: " << out_tensor->getDimensions());
+      return true;
+    }
+  }).pattern({
+    "aten::prelu(Tensor self, Tensor weight) -> (Tensor)",
+    [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+      auto in = args[0].ITensor();
+      auto slopes = args[1].unwrapToTensor();
+
+      bool to_reshape = false;
+      auto original_shape = in->getDimensions();
+      if (slopes.numel() != 1 && !util::broadcastable(in->getDimensions(), util::toDims(slopes.sizes()), /*multidirectional=*/false)) {
+        if (util::volume(in->getDimensions()) == util::volume(util::toDims(slopes.sizes()))) {
+          to_reshape = true;
+          LOG_DEBUG("Input shape is not broadcastable inserting shuffle layers to reshape to " << util::toDims(slopes.sizes()));
+          auto in_shuffle = ctx->net->addShuffle(*in);
+          TRTORCH_CHECK(in_shuffle, "Unable to create resize layer for aten::prelu input");
+          in_shuffle->setReshapeDimensions(util::toDims(slopes.sizes()));
+          in_shuffle->setName(std::string("[Reshape in to " + util::toStr(util::toDims(slopes.sizes())) + " for broadcasting]").c_str());
+          in = in_shuffle->getOutput(0);
+        }
+      }
+
+      auto slope_tensor = tensor_to_const(ctx, slopes);
+      auto new_layer = ctx->net->addParametricReLU(*in, *slope_tensor);
+      new_layer->setName(util::node_info(n).c_str());
+      auto out_tensor = new_layer->getOutput(0);
+
+      if (to_reshape) {
+        auto out_shuffle = ctx->net->addShuffle(*out_tensor);
+        TRTORCH_CHECK(out_shuffle, "Unable to create resize layer for aten::prelu output");
+        out_shuffle->setReshapeDimensions(original_shape);
+        out_shuffle->setName((std::string("[Reshape back to ") + util::toStr(original_shape) + std::string("]")).c_str());
+        out_tensor = out_shuffle->getOutput(0);
+      }
+
+      out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], out_tensor);
       LOG_DEBUG("Output shape: " << out_tensor->getDimensions());
       return true;
     }
diff --git a/core/util/trt_util.cpp b/core/util/trt_util.cpp
@@ -6,6 +6,59 @@ namespace trtorch {
 namespace core {
 namespace util {
 
+bool broadcastable(nvinfer1::Dims a, nvinfer1::Dims b, bool multidirectional) {
+    if (a == b) {
+        return true;
+    }
+
+    if (multidirectional) {
+        nvinfer1::Dims a_dims_eq;
+        nvinfer1::Dims b_dims_eq;
+        if (a.nbDims > b.nbDims) {
+            a_dims_eq = a;
+            b_dims_eq = toDimsPad(toVec(b), a.nbDims);
+        } else if (a.nbDims < b.nbDims) {
+            a_dims_eq = toDimsPad(toVec(a), b.nbDims);
+            b_dims_eq = b;
+        } else {
+            a_dims_eq = a;
+            b_dims_eq = b;
+        }
+
+        bool broadcastable = true;
+        for (int i = 0; i < a_dims_eq.nbDims; i++) {
+            if (b_dims_eq.d[i] == a_dims_eq.d[i] || (b_dims_eq.d[i] == 1 || a_dims_eq.d[i] == 1)) {
+                continue;
+            } else {
+                broadcastable = false;
+                break;
+            }
+        }
+        return broadcastable;
+    } else {
+        nvinfer1::Dims b_dims_eq;
+        if (a.nbDims > b.nbDims) {
+            b_dims_eq = toDimsPad(toVec(b), a.nbDims);
+        } else if (a.nbDims < b.nbDims) {
+            return false;
+        } else {
+            b_dims_eq = b;
+        }
+
+        bool broadcastable = true;
+        for (int i = 0; i < a.nbDims; i++) {
+            if (b_dims_eq.d[i] == a.d[i] || b_dims_eq.d[i] == 1) {
+                continue;
+            } else {
+                broadcastable = false;
+                break;
+            }
+        }
+        return broadcastable;
+    }
+}
+
+
 int64_t volume(const nvinfer1::Dims& d) {
     return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
 }
@@ -16,10 +69,7 @@ nvinfer1::Dims toDimsPad(c10::IntArrayRef l, uint64_t pad_to) {
         return toDims(l);
     }
 
-    if (pad_to > nvinfer1::Dims::MAX_DIMS) {
-        //TODO: Handle this with exceptions or whatever
-        LOG_INTERNAL_ERROR("The list requested to be converted to nvinfer1::Dims exceeds the max number of dimensions for TensorRT");
-    }
+    TRTORCH_CHECK(pad_to <= nvinfer1::Dims::MAX_DIMS, "The list requested to be converted to nvinfer1::Dims exceeds the max number of dimensions for TensorRT");
 
     nvinfer1::Dims dims;
     dims.nbDims = pad_to;
@@ -34,10 +84,8 @@ nvinfer1::Dims toDimsPad(c10::IntArrayRef l, uint64_t pad_to) {
 }
 
 nvinfer1::Dims toDims(c10::IntArrayRef l) {
-    if (l.size() > nvinfer1::Dims::MAX_DIMS) {
-        //TODO: Handle this with exceptions or whatever
-        LOG_INTERNAL_ERROR("The list requested to be converted to nvinfer1::Dims exceeds the max number of dimensions for TensorRT");
-    }
+    TRTORCH_CHECK(l.size() <= nvinfer1::Dims::MAX_DIMS, "The list requested to be converted to nvinfer1::Dims exceeds the max number of dimensions for TensorRT");
+
     nvinfer1::Dims dims;
     dims.nbDims = l.size();
     for (size_t i = 0; i < l.size(); i++) {
@@ -47,10 +95,8 @@ nvinfer1::Dims toDims(c10::IntArrayRef l) {
 }
 
 nvinfer1::Dims toDims(c10::List<int64_t> l) {
-    if (l.size() > nvinfer1::Dims::MAX_DIMS) {
-        //TODO: Handle this with exceptions or whatever
-        LOG_INTERNAL_ERROR("The list requested to be converted to nvinfer1::Dims exceeds the max number of dimensions for TensorRT");
-    }
+    TRTORCH_CHECK(l.size() <= nvinfer1::Dims::MAX_DIMS, "The list requested to be converted to nvinfer1::Dims exceeds the max number of dimensions for TensorRT");
+
     nvinfer1::Dims dims;
     dims.nbDims = l.size();
     for (size_t i = 0; i < l.size(); i++) {
@@ -65,10 +111,8 @@ nvinfer1::Dims toDimsPad(c10::List<int64_t> l, uint64_t pad_to) {
         return toDims(l);
     }
 
-    if (pad_to > nvinfer1::Dims::MAX_DIMS) {
-        //TODO: Handle this with exceptions or whatever
-        LOG_INTERNAL_ERROR("The list requested to be converted to nvinfer1::Dims exceeds the max number of dimensions for TensorRT");
-    }
+    TRTORCH_CHECK(pad_to <= nvinfer1::Dims::MAX_DIMS, "The list requested to be converted to nvinfer1::Dims exceeds the max number of dimensions for TensorRT");
+
 
     nvinfer1::Dims dims;
     dims.nbDims = pad_to;
@@ -109,7 +153,7 @@ nvinfer1::Dims unpadDims(const nvinfer1::Dims& d) {
 nvinfer1::Dims unsqueezeDims(const nvinfer1::Dims& d, int pos) {
     // acceptable range for pos is [0, d.nbDims]
     TRTORCH_ASSERT(pos >= 0 && pos <= d.nbDims, "ERROR: Index to unsqueeze is out of bounds.");
-    
+
     nvinfer1::Dims dims;
 
     int i = 0;
@@ -148,10 +192,8 @@ std::string toStr(nvinfer1::Dims d) {
 
 
 nvinfer1::DimsHW toDimsHW(c10::List<int64_t> l) {
-    if (l.size() != 2) {
-        //TODO: Handle this with exceptions or whatever
-        LOG_INTERNAL_ERROR("The list requested to be converted to nvinfer1::DimsHW is not 2");
-    }
+    TRTORCH_CHECK(l.size() == 2, "The list requested to be converted to nvinfer1::DimsHW is not 2");
+
     nvinfer1::DimsHW dims;
     dims.nbDims = l.size();
     for (size_t i = 0; i < l.size(); i++) {
@@ -161,10 +203,8 @@ nvinfer1::DimsHW toDimsHW(c10::List<int64_t> l) {
 }
 
 nvinfer1::DimsHW toDimsHW(c10::IntArrayRef l) {
-    if (l.size() != 2) {
-        //TODO: Handle this with exceptions or whatever
-        LOG_INTERNAL_ERROR("The list requested to be converted to nvinfer1::DimsHW is not 2");
-    }
+    TRTORCH_CHECK(l.size() == 2, "The list requested to be converted to nvinfer1::DimsHW is not 2");
+
     nvinfer1::DimsHW dims;
     dims.nbDims = l.size();
     for (size_t i = 0; i < l.size(); i++) {
diff --git a/core/util/trt_util.h b/core/util/trt_util.h
@@ -77,6 +77,7 @@ namespace util {
 
 int64_t volume(const nvinfer1::Dims& d);
 
+bool broadcastable(nvinfer1::Dims a, nvinfer1::Dims b, bool multidirectional=true);
 nvinfer1::Dims toDimsPad(c10::IntArrayRef l, uint64_t pad_to);
 nvinfer1::Dims toDimsPad(c10::List<int64_t> l, uint64_t pad_to);
 nvinfer1::Dims unpadDims(const nvinfer1::Dims& d);
diff --git a/tests/core/converters/test_activation.cpp b/tests/core/converters/test_activation.cpp
@@ -109,3 +109,50 @@ TEST(Converters, ATenHardTanhCustomRangeConvertsCorrectly) {
     ASSERT_TRUE(trtorch::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
 }
 
+TEST(Converters, ATenPReLUConvertsCorrectly) {
+    const auto graph = R"IR(
+      graph(%0 : Tensor,
+            %1 : Float(1)):
+        %3 : Tensor = aten::prelu(%0, %1)
+        return (%3))IR";
+
+    auto g = std::make_shared<torch::jit::Graph>();
+    torch::jit::parseIR(graph, &*g);
+
+    auto in = at::randint(-5, 5, {5}, {at::kCUDA});
+    auto slope = at::randint(-5, 5, {1}, {at::kCUDA});
+
+    auto params = trtorch::core::conversion::get_named_params(g->inputs(), {slope});
+    auto jit_results = trtorch::tests::util::RunGraph(g, params, {in});
+
+    in = at::clone(in);
+    params = trtorch::core::conversion::get_named_params(g->inputs(), {slope});
+    auto trt_results = trtorch::tests::util::RunGraphEngine(g, params, {in});
+
+    ASSERT_TRUE(trtorch::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
+}
+
+TEST(Converters, ATenPReLUMultiChannelConvertsCorrectly) {
+    const auto graph = R"IR(
+      graph(%0 : Tensor,
+            %1 : Float(10)):
+        %3 : Tensor = aten::prelu(%0, %1)
+        return (%3))IR";
+
+    auto g = std::make_shared<torch::jit::Graph>();
+    torch::jit::parseIR(graph, &*g);
+
+    auto in = at::randint(-5, 5, {1,10, 1, 1}, {at::kCUDA});
+    auto slope = at::randint(-5, 5, {10}, {at::kCUDA});
+
+    auto params = trtorch::core::conversion::get_named_params(g->inputs(), {slope});
+    auto jit_results = trtorch::tests::util::RunGraph(g, params, {in});
+
+    in = at::clone(in);
+    params = trtorch::core::conversion::get_named_params(g->inputs(), {slope});
+    auto trt_results = trtorch::tests::util::RunGraphEngine(g, params, {in});
+
+    ASSERT_TRUE(trtorch::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
+}
+
+