pytorch
diff --git a/‎.github/code-owners.yml
Lines changed: 0 additions & 12 deletions b/‎.github/code-owners.yml
Lines changed: 0 additions & 12 deletions
diff --git a/‎README.md
Lines changed: 4 additions & 0 deletions b/‎README.md
Lines changed: 4 additions & 0 deletions
diff --git a/‎core/conversion/converters/impl/einsum.cpp
Lines changed: 7 additions & 0 deletions b/‎core/conversion/converters/impl/einsum.cpp
Lines changed: 7 additions & 0 deletions
diff --git a/‎core/conversion/converters/impl/normalize.cpp
Lines changed: 112 additions & 17 deletions b/‎core/conversion/converters/impl/normalize.cpp
Lines changed: 112 additions & 17 deletions
diff --git a/‎core/conversion/converters/impl/select.cpp
Lines changed: 39 additions & 8 deletions b/‎core/conversion/converters/impl/select.cpp
Lines changed: 39 additions & 8 deletions
diff --git a/‎core/lowering/lowering.cpp
Lines changed: 3 additions & 0 deletions b/‎core/lowering/lowering.cpp
Lines changed: 3 additions & 0 deletions
diff --git a/‎core/lowering/passes/BUILD
Lines changed: 1 addition & 0 deletions b/‎core/lowering/passes/BUILD
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/lowering/passes/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎core/lowering/passes/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/lowering/passes/exception_elimination.cpp
Lines changed: 0 additions & 2 deletions b/‎core/lowering/passes/exception_elimination.cpp
Lines changed: 0 additions & 2 deletions
diff --git a/‎core/lowering/passes/passes.h
Lines changed: 1 addition & 0 deletions b/‎core/lowering/passes/passes.h
Lines changed: 1 addition & 0 deletions
@@ -9,7 +9,6 @@
 
 "component: build system":
   - "narendasan"
-  - "andi4191"
 
 "component: conversion":
   - "narendasan"
@@ -29,7 +28,6 @@
   - "peri044"
 
 "component: execution":
-  - "andi4191"
   - "narendasan"
 
 "component: lowering":
@@ -48,15 +46,12 @@
   - "peri044"
 
 "component: runtime":
-  - "andi4191"
   - "narendasan"
 
 "component: tests":
-  - "andi4191"
   - "narendasan"
 
 "component: torchtrtc":
-  - "andi4191"
   - "narendasan"
 
 "component: dependencies":
@@ -74,24 +69,20 @@
   - "tanayvarshney"
 
 "infrastructre":
-  - "andi4191"
   - "narendasan"
 
 "component: packaging":
   - "narendasan"
-  - "andi4191"
   - "peri044"
 
 "channel: NGC":
-  - "andi4191"
   - "peri044"
 
 "channel: linux-x86":
   - "narendasan"
   - "peri044"
 
 "channel: linux-sbsa":
-  - "andi4191"
   - "bowang007"
 
 "channel: windows":
@@ -102,16 +93,13 @@
   - "bowang007"
 
 "component: tooling":
-  - "andi4191"
   - "narendasan"
 
 "performance":
-  - "andi4191"
   - "peri044"
   - "bowang007"
 
 "channel: docker":
-  - "andi4191"
   - "narendasan"
 
 "ux":
 
@@ -122,6 +122,10 @@ These are the following dependencies used to verify the testcases. Torch-TensorR
 
 Releases: https://github.com/pytorch/TensorRT/releases
 
+```
+pip install torch-tensorrt==1.2.0 --find-links https://github.com/pytorch/TensorRT/releases/expanded_assets/v1.2.0
+```
+
 ## Compiling Torch-TensorRT
 
 ### Installing Dependencies
 
@@ -18,6 +18,13 @@ auto einsum_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns().pat
        auto equation = args[0].unwrapToString();
        auto in = args[1].IValue()->toListRef();
 
+       TORCHTRT_CHECK(
+           in.size() <= 2,
+           "TensorRT currently supports up to 2 input tensors "
+               << "to einsum but operation had " << in.size()
+               << " input tensors, please specify torch_executed_ops=[\"aten::einsum\"] "
+               << "at compilation time to avoid this error.");
+
        std::vector<nvinfer1::ITensor*> tensors;
 
        // Populate vector of ITensor pointers
 
@@ -53,23 +53,118 @@ void create_plugin(
   LOG_DEBUG("Normalize layer output tensor shape: " << layer_output->getDimensions());
 }
 
-auto normalize_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns().pattern(
-    {"aten::norm.ScalarOpt_dim(Tensor self, Scalar? p, int[1] dim, bool keepdim=False) -> (Tensor)",
-     [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
-       auto in = args[0].ITensor();
-       auto in_shape = util::toVec(in->getDimensions());
-       auto order = args[1].unwrapToScalar().to<int32_t>();
-       auto axes_values = args[2].unwrapToIntList().vec();
-       std::vector<int32_t> axes(axes_values.begin(), axes_values.end());
-       auto keep_dims = (int32_t)args[3].unwrapToBool();
-       LOG_DEBUG("Order of normalize_plugin: " << order);
-       LOG_DEBUG("Axis: " << axes);
-       LOG_DEBUG("keep_dims: " << keep_dims);
-       create_plugin(ctx, n, in, order, axes, keep_dims, "NormalizePluginTorchTRT");
-       return true;
-     }
-
-    });
+int32_t axes_mask_from_axes_values(
+    const torch::jit::Node* n,
+    int32_t nb_dims,
+    const std::vector<int64_t>& axes_values) {
+  int32_t axes_mask = 0;
+  for (size_t i = 0UL; i < axes_values.size(); ++i) {
+    auto axis = axes_values[i];
+    if (axis < 0) {
+      axis += nb_dims;
+    }
+    TORCHTRT_CHECK(
+        axis < nb_dims, util::node_info(n) << " axis " << i << " with value: " << axis << " exceeds input rank");
+    axes_mask += 1 << axis;
+  }
+  return axes_mask;
+}
+
+nvinfer1::ITensor* frobenius_norm(
+    ConversionCtx* ctx,
+    const torch::jit::Node* n,
+    nvinfer1::ITensor* self,
+    int32_t axes_mask,
+    bool keep_dims) {
+  auto squared_layer =
+      add_elementwise(ctx, nvinfer1::ElementWiseOperation::kPROD, self, self, util::node_info(n) + "_squared");
+  TORCHTRT_CHECK(squared_layer, "Unabled to create square layer from node: " << *n);
+  auto squared_output = squared_layer->getOutput(0);
+
+  auto sum_layer = ctx->net->addReduce(*squared_output, nvinfer1::ReduceOperation::kSUM, axes_mask, keep_dims);
+  TORCHTRT_CHECK(sum_layer, "Unable to create sum layer from node: " << *n);
+  sum_layer->setName((util::node_info(n) + "_sum").c_str());
+  auto sum_output = sum_layer->getOutput(0);
+  LOG_DEBUG("SUM SHAPE: " << sum_output->getDimensions());
+
+  auto sqrt_layer = ctx->net->addUnary(*sum_output, nvinfer1::UnaryOperation::kSQRT);
+  TORCHTRT_CHECK(sqrt_layer, "Unable to create sqrt layer from node: " << *n);
+  sqrt_layer->setName((util::node_info(n) + "_sqrt").c_str());
+  auto sqrt_output = sqrt_layer->getOutput(0);
+  return sqrt_output;
+}
+
+auto normalize_registrations TORCHTRT_UNUSED =
+    RegisterNodeConversionPatterns()
+        .pattern(
+            {"aten::norm.ScalarOpt_dim(Tensor self, Scalar? p, int[1] dim, bool keepdim=False) -> (Tensor)",
+             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+               auto in = args[0].ITensorOrFreeze(ctx);
+               auto in_shape = util::toVec(in->getDimensions());
+               auto order = args[1].unwrapToScalar().to<int32_t>();
+               auto axes_values = args[2].unwrapToIntList().vec();
+               std::vector<int32_t> axes(axes_values.begin(), axes_values.end());
+               auto keep_dims = (int32_t)args[3].unwrapToBool();
+               LOG_DEBUG("Order of normalize_plugin: " << order);
+               LOG_DEBUG("Axis: " << axes);
+               LOG_DEBUG("keep_dims: " << keep_dims);
+               create_plugin(ctx, n, in, order, axes, keep_dims, "NormalizePluginTorchTRT");
+               return true;
+             }
+
+            })
+        .pattern(
+            {"aten::frobenius_norm.dim(Tensor self, int[1] dim, bool keepdim=False) -> (Tensor)",
+             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+               auto self = args[0].ITensorOrFreeze(ctx);
+               auto axes_values = args[1].unwrapToIntList().vec();
+               auto keep_dims = args[2].unwrapToBool();
+
+               auto axes_mask = axes_mask_from_axes_values(n, self->getDimensions().nbDims, axes_values);
+
+               auto norm = frobenius_norm(ctx, n, self, axes_mask, keep_dims);
+               auto out = ctx->AssociateValueAndTensor(n->outputs()[0], norm);
+               LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+               return true;
+             }})
+        .pattern(
+            {"aten::linalg_norm(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, int? dtype=None) -> (Tensor)",
+             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+               // https://pytorch.org/docs/stable/generated/torch.linalg.norm.html
+               auto self = args[0].ITensorOrFreeze(ctx);
+               TORCHTRT_CHECK(
+                   args[1].IValue()->isNone(),
+                   "aten::linalg_norm converter does not yet support non-None 'ord' arguments. Add aten::linalg_norm to torch_executed_ops to force it to fallback.");
+               auto keep_dims = args[3].unwrapToBool();
+               auto self_nb_dims = self->getDimensions().nbDims;
+
+               if (!args.back().IValue()->isNone()) {
+                 // If specified, the input tensor is cast to dtype before performing the operation, and the returned
+                 // tensor’s type will be dtype
+                 auto dtype = args.back().unwrapToScalar().to<int64_t>();
+                 auto trt_dtype = util::ScalarTypeToTRTDataType(static_cast<at::ScalarType>(dtype));
+                 self = castITensor(ctx, self, trt_dtype);
+               }
+
+               int32_t axes_mask = 0;
+               if (args[2].IValue()->isNone()) {
+                 // If dim= None and ord= None, self will be flattened to 1D and the 2-norm of the resulting vector will
+                 // be computed.
+                 axes_mask = 1;
+                 keep_dims = true; // the single output dim is always preserved
+                 auto flatten_layer = ctx->net->addShuffle(*self);
+                 TORCHTRT_CHECK(flatten_layer, "Unable to create shuffle layer from node: " << *n);
+                 flatten_layer->setReshapeDimensions(util::toDims(std::vector<int64_t>({-1})));
+                 flatten_layer->setName((util::node_info(n) + "_flatten").c_str());
+                 self = flatten_layer->getOutput(0);
+               } else {
+                 axes_mask = axes_mask_from_axes_values(n, self_nb_dims, args[2].unwrapToIntList().vec());
+               }
+               auto norm = frobenius_norm(ctx, n, self, axes_mask, keep_dims);
+               auto out = ctx->AssociateValueAndTensor(n->outputs()[0], norm);
+               LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+               return true;
+             }});
 
 } // namespace
 } // namespace impl
 
@@ -17,17 +17,23 @@ namespace {
 
 bool add_split(ConversionCtx* ctx, const torch::jit::Node* n, args& args, bool split_list, bool unbind) {
   auto in = args[0].ITensor();
-  auto numOutputs = 1, numRemainder = 0, axis = 0;
+  auto numOutputs = 1, numRemainder = 0;
   std::vector<int64_t> sizes;
 
+  // Precompute axis along which to apply split, ensuring negative dimensions are re-indexed
+  auto maxDim = static_cast<int64_t>(in->getDimensions().nbDims);
+  auto input_axis = unbind ? args[1].unwrapToInt() : args[2].unwrapToInt();
+  auto axis = input_axis < 0 ? input_axis + maxDim : input_axis;
+
+  // Ensure input axis is valid for input tensor
+  TORCHTRT_CHECK(
+      (axis >= 0) && (axis < maxDim),
+      "Expected input axis to fall in range [-" << maxDim << ", " << (maxDim - 1) << "], got " << input_axis);
+
   if (unbind) {
-    axis = args[1].unwrapToInt();
-    auto maxDim = static_cast<int64_t>(in->getDimensions().nbDims);
-    axis = axis < 0 ? axis + maxDim : axis;
     numOutputs = in->getDimensions().d[axis];
     sizes.insert(sizes.end(), numOutputs, 1);
   } else {
-    axis = args[2].unwrapToInt();
     auto inDimSize = in->getDimensions().d[axis];
     if (split_list) {
       sizes = args[1].unwrapToIntList().vec();
@@ -274,7 +280,8 @@ auto select_registrations TORCHTRT_UNUSED =
         .pattern(
             {"aten::index.Tensor(Tensor self, Tensor?[] indices) -> (Tensor)",
              [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
-               // refer to https://github.com/pytorch/pytorch/blob/master/torch/onnx/symbolic_opset9.py#L4627
+               // refer to
+               // https://github.com/pytorch/pytorch/blob/974ad8fa6cc63b89234beb5ebff54c2d42711932/torch/onnx/symbolic_opset9.py#L4627
                auto in = args[0].ITensorOrFreeze(ctx);
                auto ts = args[1].IValue()->toListRef();
 
@@ -655,8 +662,15 @@ auto select_registrations TORCHTRT_UNUSED =
                auto self = args[0].ITensorOrFreeze(ctx);
                auto mask = args[1].ITensorOrFreeze(ctx);
                mask = addPadding(ctx, n, mask, self->getDimensions().nbDims, false, true);
-               auto val = args[2].unwrapToScalar().to<float>();
-               auto val_t = tensor_to_const(ctx, torch::full(util::toVec(self->getDimensions()), val));
+               auto val = args[2].unwrapToScalar();
+
+               // Tensor type to use for initializing constant tensor used in Select
+               // value should inherit its type from self
+               auto val_t_dtype = util::TRTDataTypeToScalarType(self->getType());
+
+               // Initialize contant tensor for fill with the inherited data type
+               auto val_t = tensor_to_const(
+                   ctx, torch::full(util::toVec(self->getDimensions()), val, {torch::dtype(val_t_dtype)}));
 
                TORCHTRT_CHECK(
                    util::broadcastable(self->getDimensions(), mask->getDimensions(), /*multidirectional=*/false),
@@ -714,6 +728,23 @@ auto select_registrations TORCHTRT_UNUSED =
 
                layer->setName(util::node_info(n).c_str());
 
+               auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], layer->getOutput(0));
+               LOG_DEBUG("Output shape: " << out_tensor->getDimensions());
+               return true;
+             }})
+        .pattern(
+            {"aten::where.self(Tensor condition, Tensor self, Tensor other) -> (Tensor)",
+             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+               auto condition = args[0].ITensorOrFreeze(ctx);
+               auto x = args[1].ITensorOrFreeze(ctx);
+               auto y = args[2].ITensorOrFreeze(ctx);
+
+               auto layer = ctx->net->addSelect(*condition, *x, *y);
+
+               TORCHTRT_CHECK(layer, "Unable to create select layer for aten::where.self");
+
+               layer->setName(util::node_info(n).c_str());
+
                auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], layer->getOutput(0));
                LOG_DEBUG("Output shape: " << out_tensor->getDimensions());
                return true;
 
@@ -9,6 +9,7 @@
 #include "torch/csrc/jit/passes/lower_graph.h"
 #include "torch/csrc/jit/passes/lower_tuples.h"
 #include "torch/csrc/jit/passes/peephole.h"
+#include "torch/csrc/jit/passes/remove_exceptions.h"
 #include "torch/csrc/jit/passes/remove_mutation.h"
 
 #include "core/lowering/lowering.h"
@@ -33,6 +34,7 @@ void LowerGraph(std::shared_ptr<torch::jit::Graph>& g, LowerInfo lower_info) {
   torch::jit::InlineFunctionalGraphs(g);
   torch::jit::PeepholeOptimize(g, false);
   torch::jit::FuseLinear(g);
+  torch::jit::EliminateExceptions(g);
   if (!lower_info.disable_cse) {
     torch::jit::EliminateCommonSubexpression(g);
   }
@@ -60,6 +62,7 @@ void LowerGraph(std::shared_ptr<torch::jit::Graph>& g, LowerInfo lower_info) {
   passes::UnpackAddMM(g);
   // passes::UnpackBatchNorm(g);
   passes::UnpackLogSoftmax(g);
+  passes::UnpackRsqrt(g);
   passes::UnpackStd(g);
   passes::UnpackVar(g);
   passes::RemoveNOPs(g);
 
@@ -33,6 +33,7 @@ cc_library(
         "unpack_hardsigmoid.cpp",
         "unpack_hardswish.cpp",
         "unpack_log_softmax.cpp",
+        "unpack_rsqrt.cpp",
         "unpack_std.cpp",
         "unpack_var.cpp",
         "view_to_reshape.cpp",
 
@@ -20,6 +20,7 @@ target_sources(${lib_name}
             "${CMAKE_CURRENT_SOURCE_DIR}/unpack_hardsigmoid.cpp"
             "${CMAKE_CURRENT_SOURCE_DIR}/unpack_hardswish.cpp"
             "${CMAKE_CURRENT_SOURCE_DIR}/unpack_log_softmax.cpp"
+            "${CMAKE_CURRENT_SOURCE_DIR}/unpack_rsqrt.cpp"
             "${CMAKE_CURRENT_SOURCE_DIR}/unpack_std.cpp"
             "${CMAKE_CURRENT_SOURCE_DIR}/unpack_var.cpp"
             "${CMAKE_CURRENT_SOURCE_DIR}/view_to_reshape.cpp"
 
@@ -4,7 +4,6 @@
 #include "torch/csrc/jit/passes/dead_code_elimination.h"
 #include "torch/csrc/jit/passes/guard_elimination.h"
 #include "torch/csrc/jit/passes/peephole.h"
-#include "torch/csrc/jit/passes/remove_exceptions.h"
 #include "torch/csrc/jit/runtime/graph_executor.h"
 
 #include "core/util/prelude.h"
@@ -22,7 +21,6 @@ struct ExceptionOrPassPatternElimination {
 
   void run() {
     findExceptionOrPassNodes(graph_->block());
-    torch::jit::EliminateExceptions(graph_);
     torch::jit::EliminateDeadCode(graph_);
     LOG_GRAPH("Post exeception or pass elimination: " << *graph_);
   }
 
@@ -33,6 +33,7 @@ void RemoveUnnecessaryCasts(std::shared_ptr<torch::jit::Graph>& graph);
 void UnpackAddMM(std::shared_ptr<torch::jit::Graph>& graph);
 void UnpackBatchNorm(std::shared_ptr<torch::jit::Graph>& graph);
 void UnpackLogSoftmax(std::shared_ptr<torch::jit::Graph>& graph);
+void UnpackRsqrt(std::shared_ptr<torch::jit::Graph>& graph);
 void UnpackStd(std::shared_ptr<torch::jit::Graph>& graph);
 void UnpackVar(std::shared_ptr<torch::jit::Graph>& graph);
 void AliasOperators(std::shared_ptr<torch::jit::Graph>& graph);