pytorch
diff --git a/‎core/compiler.cpp
+65-56 b/‎core/compiler.cpp
+65-56
diff --git a/‎core/conversion/conversion.cpp
+7-8 b/‎core/conversion/conversion.cpp
+7-8
diff --git a/‎core/conversion/conversionctx/ConversionCtx.cpp
+4-4 b/‎core/conversion/conversionctx/ConversionCtx.cpp
+4-4
diff --git a/‎core/conversion/converters/converter_util.cpp
+12-9 b/‎core/conversion/converters/converter_util.cpp
+12-9
diff --git a/‎core/conversion/converters/converter_util.h
+1-1 b/‎core/conversion/converters/converter_util.h
+1-1
@@ -308,70 +308,78 @@ void MapInputsAndDetermineDTypes(
     std::shared_ptr<torch::jit::Graph>& g,
     ir::StaticParams& static_params,
     ir::CollectionTypeMap& first_use_type_map) {
-    cfg.convert_info.collection_input_spec_map = std::move(ir::associate_specs_with_collection_inputs(g, cfg.graph_inputs, static_params));
+  cfg.convert_info.collection_input_spec_map =
+      std::move(ir::associate_specs_with_collection_inputs(g, cfg.graph_inputs, static_params));
 
-    auto collection_inputs = ir::get_collection_inputs(g, static_params);
-    LOG_DEBUG("In MapInputsAndDetermineDTypes, the g->inputs() size is " << g->inputs().size() << ", CollectionInputSpecMap size is" << collection_inputs.size());
+  auto collection_inputs = ir::get_collection_inputs(g, static_params);
+  LOG_DEBUG(
+      "In MapInputsAndDetermineDTypes, the g->inputs() size is "
+      << g->inputs().size() << ", CollectionInputSpecMap size is" << collection_inputs.size());
 
-    for (auto in : collection_inputs) {
-      std::vector<ir::Input>& spec = cfg.convert_info.collection_input_spec_map.find(in)->second;
-      std::vector<c10::optional<at::ScalarType>> est_type_opt;
+  for (auto in : collection_inputs) {
+    std::vector<ir::Input>& spec = cfg.convert_info.collection_input_spec_map.find(in)->second;
+    std::vector<c10::optional<at::ScalarType>> est_type_opt;
 
-      auto est_it = first_use_type_map.find(in);
-      if (est_it != first_use_type_map.end()) {
-        est_type_opt = first_use_type_map.find(in)->second;
-      }
-      // traverse elements in est_type_out and spec
-      for (size_t i = 0; i < est_type_opt.size(); i++) {
-        if (est_type_opt[i] && !spec[i].dtype_is_user_defined) {
-          // If we can calculate the type from the graph and the type was not defined by the user then use the calculated
-          // type
-          LOG_INFO(
-              "Since input type is not explicitly defined, infering using first tensor calculation\n  Inferred input "
-              << in->debugName() << " has type " << est_type_opt[i].value());
-          spec[i].dtype = util::ScalarTypeToTRTDataType(est_type_opt[i].value());
-        } else if (!est_type_opt[i] && !spec[i].dtype_is_user_defined) {
-          // If we cannot calculate the type and the user did not define the type, then default to FP32
-          LOG_WARNING(
-              "Cannot infer input type from calcuations in graph for input "
-              << in->debugName() << ". Assuming it is Float32. If not, specify input type explicity");
-          spec[i].dtype = nvinfer1::DataType::kFLOAT;
-        } else if (spec[i].dtype_is_user_defined && cfg.partition_info.enabled) {
-          if (!est_type_opt[i]) {
-            LOG_INFO("Cannot infer input tensor dtype in graph, compiler is going to use the user setting");
+    auto est_it = first_use_type_map.find(in);
+    if (est_it != first_use_type_map.end()) {
+      est_type_opt = first_use_type_map.find(in)->second;
+    }
+    // traverse elements in est_type_out and spec
+    for (size_t i = 0; i < est_type_opt.size(); i++) {
+      if (est_type_opt[i] && !spec[i].dtype_is_user_defined) {
+        // If we can calculate the type from the graph and the type was not defined by the user then use the calculated
+        // type
+        LOG_INFO(
+            "Since input type is not explicitly defined, infering using first tensor calculation\n  Inferred input "
+            << in->debugName() << " has type " << est_type_opt[i].value());
+        spec[i].dtype = util::ScalarTypeToTRTDataType(est_type_opt[i].value());
+      } else if (!est_type_opt[i] && !spec[i].dtype_is_user_defined) {
+        // If we cannot calculate the type and the user did not define the type, then default to FP32
+        LOG_WARNING(
+            "Cannot infer input type from calcuations in graph for input "
+            << in->debugName() << ". Assuming it is Float32. If not, specify input type explicity");
+        spec[i].dtype = nvinfer1::DataType::kFLOAT;
+      } else if (spec[i].dtype_is_user_defined && cfg.partition_info.enabled) {
+        if (!est_type_opt[i]) {
+          LOG_INFO("Cannot infer input tensor dtype in graph, compiler is going to use the user setting");
+          std::stringstream ss;
+          ss << "For input " << in->debugName() << ", found user specified input dtype as ";
+          ss << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
+          ss << ". The compiler is going to use the user setting "
+             << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
+          auto warn_str = ss.str();
+          LOG_WARNING(warn_str);
+          // Overwrite type map with user settings
+          first_use_type_map[in][i] = {
+              util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype)};
+
+        } else {
+          if (util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype) !=
+              est_type_opt[i].value()) {
             std::stringstream ss;
             ss << "For input " << in->debugName() << ", found user specified input dtype as ";
             ss << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
-            ss << ". The compiler is going to use the user setting " << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
+            ss << ", however when inspecting the graph, the input type expected was inferred to be ";
+            ss << est_type_opt[i].value() << std::endl;
+            ss << "The compiler is going to use the user setting "
+               << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
+            ss << "\nThis conflict may cause an error at runtime due to partial compilation being enabled and therefore\n";
+            ss << "compatibility with PyTorch's data type convention is required.\n";
+            ss << "If you do indeed see errors at runtime either:\n";
+            ss << "- Remove the dtype spec for " << in->debugName() << std::endl;
+            ss << "- Disable partial compilation by setting require_full_compilation to True";
             auto warn_str = ss.str();
             LOG_WARNING(warn_str);
             // Overwrite type map with user settings
-            first_use_type_map[in][i] = {util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype)};
-
-          } else {
-            if (util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype) != est_type_opt[i].value()) {
-              std::stringstream ss;
-              ss << "For input " << in->debugName() << ", found user specified input dtype as ";
-              ss << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
-              ss << ", however when inspecting the graph, the input type expected was inferred to be ";
-              ss << est_type_opt[i].value() << std::endl;
-              ss << "The compiler is going to use the user setting " << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
-              ss << "\nThis conflict may cause an error at runtime due to partial compilation being enabled and therefore\n";
-              ss << "compatibility with PyTorch's data type convention is required.\n";
-              ss << "If you do indeed see errors at runtime either:\n";
-              ss << "- Remove the dtype spec for " << in->debugName() << std::endl;
-              ss << "- Disable partial compilation by setting require_full_compilation to True";
-              auto warn_str = ss.str();
-              LOG_WARNING(warn_str);
-              // Overwrite type map with user settings
-              first_use_type_map[in][i] = {util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype)};
-            }
+            first_use_type_map[in][i] = {
+                util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype)};
           }
-        } else {
-          // The user defined the type so no changes are necessary
         }
+      } else {
+        // The user defined the type so no changes are necessary
       }
     }
+  }
   // }
 }
 
@@ -425,12 +433,13 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg)
 
       if (cfg.partition_info.enabled &&
           (!(cfg.lower_info.forced_fallback_modules.size() == 0 &&
-            cfg.partition_info.forced_fallback_operators.size() == 0 && isBlockConvertible)
-            || outputIsCollection)) {
-
+             cfg.partition_info.forced_fallback_operators.size() == 0 && isBlockConvertible) ||
+           outputIsCollection)) {
         std::unordered_map<torch::jit::Node*, int> fallback_nodes;
-        auto collection_input_ivalues_map = partitioning::generateRandomInputs(cfg.convert_info.collection_input_spec_map, first_use_types);
-        auto graph_and_mapping = ConstructFallbackGraph(new_mod, g->block(), collection_input_ivalues_map, cfg, static_params, fallback_nodes);
+        auto collection_input_ivalues_map =
+            partitioning::generateRandomInputs(cfg.convert_info.collection_input_spec_map, first_use_types);
+        auto graph_and_mapping = ConstructFallbackGraph(
+            new_mod, g->block(), collection_input_ivalues_map, cfg, static_params, fallback_nodes);
         new_g = graph_and_mapping.first;
         // renaming the input name of graph after fallback to ensure pytorch deserialize it correctly
         for (size_t i = 0; i < new_g->inputs().size(); ++i) {
 
@@ -135,12 +135,10 @@ void AddLayer(ConversionCtx* ctx, const torch::jit::Node* n) {
                        << "please report this error to https://www.github.com/NVIDIA/Torch-TensorRT/issues");
 }
 
-void AddInputs(
-    ConversionCtx* ctx,
-    c10::ArrayRef<const torch::jit::Value*> inputs,
-    ConversionInfo& conversion_info) {
+void AddInputs(ConversionCtx* ctx, c10::ArrayRef<const torch::jit::Value*> inputs, ConversionInfo& conversion_info) {
   std::unordered_map<const torch::jit::Value*, ir::Input>& input_specs = conversion_info.inputs;
-  std::unordered_map<const torch::jit::Value*, std::vector<ir::Input>> collection_input_spec = conversion_info.collection_input_spec_map;
+  std::unordered_map<const torch::jit::Value*, std::vector<ir::Input>> collection_input_spec =
+      conversion_info.collection_input_spec_map;
 
   std::vector<const torch::jit::Value*> input_tensors;
   for (auto in : inputs) {
@@ -173,7 +171,7 @@ void AddInputs(
         "Cannot find an input spec associated with input: " << in->debugName());
     ir::Input spec;
     if (input_specs.find(in) != input_specs.end()) {
-        spec = input_specs.find(in)->second;
+      spec = input_specs.find(in)->second;
     } else {
       spec = collection_input_spec.find(in)->second[0]; // assume input is tensor
     }
@@ -559,8 +557,9 @@ std::set<std::string> ConvertableOpsInBlock(const torch::jit::Block* b) {
 }
 
 bool OutputIsCollection(const torch::jit::Block* b) {
-  for (auto out: b->outputs()) {
-    if(out->type()->kind() == torch::jit::TypeKind::TupleType || out->type()->kind() == torch::jit::TypeKind::ListType) {
+  for (auto out : b->outputs()) {
+    if (out->type()->kind() == torch::jit::TypeKind::TupleType ||
+        out->type()->kind() == torch::jit::TypeKind::ListType) {
       return true;
     }
   }
 
@@ -107,7 +107,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
   }
 
   cfg->setAvgTimingIterations(settings.num_avg_timing_iters);
-  if (settings.workspace_size != 0){
+  if (settings.workspace_size != 0) {
     cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, settings.workspace_size);
   }
 
@@ -124,13 +124,13 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
         settings.enabled_precisions.find(nvinfer1::DataType::kFLOAT) == settings.enabled_precisions.end(),
         "DLA supports only fp16 or int8 precision");
     cfg->setDLACore(settings.device.dla_core);
-    if (settings.dla_sram_size != 1048576){
+    if (settings.dla_sram_size != 1048576) {
       cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_MANAGED_SRAM, settings.dla_sram_size);
     }
-    if (settings.dla_local_dram_size != 1073741824){
+    if (settings.dla_local_dram_size != 1073741824) {
       cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_LOCAL_DRAM, settings.dla_local_dram_size);
     }
-    if (settings.dla_global_dram_size != 536870912){
+    if (settings.dla_global_dram_size != 536870912) {
       cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_GLOBAL_DRAM, settings.dla_global_dram_size);
     }
   }
 
@@ -207,13 +207,13 @@ nvinfer1::ITensor* clamp(
     nvinfer1::ITensor* lower_bound,
     nvinfer1::ITensor* upper_bound,
     std::string const& name) {
-
   auto max_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMAX, x, lower_bound, "max layer for " + name);
   TORCHTRT_CHECK(max_layer, "Unable to create max layer for clamp");
   LOG_DEBUG(ctx->logger, "Create " << max_layer->getName() << " for clamp");
   auto max_itensor = max_layer->getOutput(0);
 
-  auto min_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
+  auto min_layer =
+      add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
   TORCHTRT_CHECK(min_layer, "Unable to create min layer for clamp");
   LOG_DEBUG(ctx->logger, "Create " << min_layer->getName() << " for clamp");
   auto min_itensor = min_layer->getOutput(0);
@@ -227,13 +227,13 @@ nvinfer1::ITensor* clamp_to_input_dim(
     nvinfer1::ITensor* input_dim,
     int nbdims,
     std::string const& name) {
-
   auto zero = torch::zeros({nbdims}).to(torch::kI32);
   auto zero_itensor = tensor_to_const(ctx, zero);
   auto one = torch::ones({nbdims}).to(torch::kI32);
   auto one_itensor = tensor_to_const(ctx, one);
 
-  auto upper_bound_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, input_dim, one_itensor, "sub layer for " + name);
+  auto upper_bound_layer =
+      add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, input_dim, one_itensor, "sub layer for " + name);
   TORCHTRT_CHECK(upper_bound_layer, "Unable to create sub layer for clamp to inputDim");
   LOG_DEBUG(ctx->logger, "Create " << upper_bound_layer->getName() << " for clamp to inputDim");
   auto upper_bound = upper_bound_layer->getOutput(0);
@@ -243,7 +243,8 @@ nvinfer1::ITensor* clamp_to_input_dim(
   LOG_DEBUG(ctx->logger, "Create " << max_layer->getName() << " for clamp to inputDim");
   auto max_itensor = max_layer->getOutput(0);
 
-  auto min_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
+  auto min_layer =
+      add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
   TORCHTRT_CHECK(min_layer, "Unable to create min_layer for clamp to inputDim");
   LOG_DEBUG(ctx->logger, "Create " << min_layer->getName() << " for clamp to inputDim");
   auto min_itensor = min_layer->getOutput(0);
@@ -257,7 +258,6 @@ nvinfer1::ITensor* normalize_indices(
     nvinfer1::ITensor* indices,
     int nbdims,
     std::string const& name) {
-
   auto zero = torch::zeros({nbdims}).to(torch::kI32);
   auto neg = -torch::ones({nbdims}).to(torch::kI32);
   auto zero_itensor = tensor_to_const(ctx, zero);
@@ -307,17 +307,20 @@ nvinfer1::ITensor* get_slice_size(
   at::Tensor one_tensor = torch::ones({nbdims}).to(torch::kI32);
   auto one_itensor = tensor_to_const(ctx, one_tensor);
 
-  auto sub_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, end, start, "get_slice_size sub layer for " + name);
+  auto sub_layer =
+      add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, end, start, "get_slice_size sub layer for " + name);
   TORCHTRT_CHECK(sub_layer, "Unable to create sub layer in calculate_output_size");
   LOG_DEBUG(ctx->logger, "Create " << sub_layer->getName() << " for calculate_output_size");
   auto sub_itensor = sub_layer->getOutput(0);
 
-  auto div_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kDIV, sub_itensor, stride, "get_slice_size div layer for " + name);
+  auto div_layer = add_elementwise(
+      ctx, nvinfer1::ElementWiseOperation::kDIV, sub_itensor, stride, "get_slice_size div layer for " + name);
   TORCHTRT_CHECK(div_layer, "Unable to create div layer in calculate_output_size");
   LOG_DEBUG(ctx->logger, "Create " << div_layer->getName() << " for calculate_output_size");
   auto div_itensor = div_layer->getOutput(0);
 
-  auto add_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUM, div_itensor, one_itensor, "get_slice_size sum layer for " + name);
+  auto add_layer = add_elementwise(
+      ctx, nvinfer1::ElementWiseOperation::kSUM, div_itensor, one_itensor, "get_slice_size sum layer for " + name);
   TORCHTRT_CHECK(add_layer, "Unable to create add layer in calculate_output_size");
   LOG_DEBUG(ctx->logger, "Create " << add_layer->getName() << " for calculate_output_size");
   auto size_itensor = add_layer->getOutput(0);
 
@@ -1,8 +1,8 @@
 #pragma once
 
+#include <limits>
 #include <map>
 #include <string>
-#include <limits>
 
 #include "core/conversion/conversionctx/ConversionCtx.h"
 #include "core/conversion/converters/Weights.h"
Original file line number	Diff line number	Diff line change
`@@ -107,7 +107,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)`
`107`	`107`	`}`
`108`	`108`
`109`	`109`	`cfg->setAvgTimingIterations(settings.num_avg_timing_iters);`
`110`		`- if (settings.workspace_size != 0){`
	`110`	`+ if (settings.workspace_size != 0) {`
`111`	`111`	`cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, settings.workspace_size);`
`112`	`112`	`}`
`113`	`113`
`@@ -124,13 +124,13 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)`
`124`	`124`	`settings.enabled_precisions.find(nvinfer1::DataType::kFLOAT) == settings.enabled_precisions.end(),`
`125`	`125`	`"DLA supports only fp16 or int8 precision");`
`126`	`126`	`cfg->setDLACore(settings.device.dla_core);`
`127`		`- if (settings.dla_sram_size != 1048576){`
	`127`	`+ if (settings.dla_sram_size != 1048576) {`
`128`	`128`	`cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_MANAGED_SRAM, settings.dla_sram_size);`
`129`	`129`	`}`
`130`		`- if (settings.dla_local_dram_size != 1073741824){`
	`130`	`+ if (settings.dla_local_dram_size != 1073741824) {`
`131`	`131`	`cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_LOCAL_DRAM, settings.dla_local_dram_size);`
`132`	`132`	`}`
`133`		`- if (settings.dla_global_dram_size != 536870912){`
	`133`	`+ if (settings.dla_global_dram_size != 536870912) {`
`134`	`134`	`cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_GLOBAL_DRAM, settings.dla_global_dram_size);`
`135`	`135`	`}`
`136`	`136`	`}`