From b8fa228777f196143220e180266b3a302763fc0b Mon Sep 17 00:00:00 2001
From: Naren Dasan <naren@narendasan.com>
Date: Wed, 30 Sep 2020 16:42:05 -0700
Subject: [PATCH 1/6] refactor!: Renaming extra info to compile spec to be more
 consistent with other backends and between APIs in TRTorch

BREAKING CHANGE: This changes the top level api for setting the
specification for compilation, a simple find and replace should allow
users to port forward

Signed-off-by: Naren Dasan <naren@narendasan.com>
Signed-off-by: Naren Dasan <narens@nvidia.com>
---
 .bazelrc                                      |  1 +
 README.md                                     |  4 +-
 core/compiler.cpp                             | 12 ++--
 core/compiler.h                               |  8 +--
 .../conversionctx/ConversionCtx.cpp           |  2 +-
 cpp/api/BUILD                                 |  2 +-
 cpp/api/README.md                             | 16 ++---
 cpp/api/include/trtorch/ptq.h                 |  4 +-
 cpp/api/include/trtorch/trtorch.h             | 16 ++---
 .../src/{extra_info.cpp => compile_spec.cpp}  | 40 +++++------
 cpp/api/src/trtorch.cpp                       | 12 ++--
 cpp/benchmark/main.cpp                        | 10 +--
 cpp/ptq/README.md                             | 12 ++--
 cpp/ptq/main.cpp                              | 14 ++--
 cpp/trtorchc/main.cpp                         | 22 +++---
 cpp/trtorchexec/main.cpp                      |  8 +--
 docsrc/tutorials/getting_started.rst          | 10 +--
 docsrc/tutorials/ptq.rst                      | 12 ++--
 py/BUILD                                      |  2 +-
 .../{_extra_info.py => _compile_spec.py}      | 70 +++++++++----------
 py/trtorch/_compiler.py                       | 18 ++---
 py/trtorch/csrc/trtorch_py.cpp                | 48 +++++++------
 tests/accuracy/test_fp16_accuracy.cpp         |  6 +-
 tests/accuracy/test_fp32_accuracy.cpp         |  6 +-
 tests/accuracy/test_int8_accuracy.cpp         | 12 ++--
 tests/modules/test_serialization.cpp          |  6 +-
 tests/py/test_api.py                          |  8 +--
 27 files changed, 194 insertions(+), 187 deletions(-)
 rename cpp/api/src/{extra_info.cpp => compile_spec.cpp} (73%)
 rename py/trtorch/{_extra_info.py => _compile_spec.py} (64%)

diff --git a/.bazelrc b/.bazelrc
index 4a2a7423df..0a89848888 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -29,6 +29,7 @@ build --cxxopt='-std=c++14'
 build:python --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0"
 build:python --linkopt="-D_GLIBCXX_USE_CXX11_ABI=0"
 build:python --define=abi=pre_cxx11_abi
+build:python --define=target_lang=python
 
 build:pre_cxx11_abi --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0"
 build:pre_cxx11_abi --linkopt="-D_GLIBCXX_USE_CXX11_ABI=0"
diff --git a/README.md b/README.md
index c9c6c90ccf..61f71fa493 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ More Information / System Architecture:
 #include "trtorch/trtorch.h"
 
 ...
-auto compile_settings = trtorch::ExtraInfo(dims);
+auto compile_settings = trtorch::CompileSpec(dims);
 // FP16 execution
 compile_settings.op_precision = torch::kFloat;
 // Compile module
@@ -54,7 +54,7 @@ torch.jit.save(trt_ts_module, "trt_torchscript_module.ts")
 ```
 
 > Notes on running in lower precisions:
-> - Set precision with extra_info.op_precision
+> - Set precision with compile_spec.op_precision
 > - The module should be left in FP32 before compilation (FP16 can support half tensor models)
 > - In FP16 only input tensors should be converted to FP16, other precisions use FP32
 
diff --git a/core/compiler.cpp b/core/compiler.cpp
index d45dcf4a38..099651a1fa 100644
--- a/core/compiler.cpp
+++ b/core/compiler.cpp
@@ -20,7 +20,7 @@
 
 #include "core/lowering/lowering.h"
 #include "core/conversion/conversion.h"
-#include "core/execution/execution.h"
+#include "core/runtime/runtime.h"
 
 namespace trtorch {
 namespace core {
@@ -42,7 +42,7 @@ c10::FunctionSchema GenerateGraphSchema(torch::jit::script::Module mod, std::str
 
 
 void AddEngineToGraph(torch::jit::script::Module mod, std::shared_ptr<torch::jit::Graph>& g, std::string& serialized_engine) {
-    auto engine_ptr = c10::make_intrusive<execution::TRTEngine>(mod._ivalue()->name(), serialized_engine);
+    auto engine_ptr = c10::make_intrusive<runtime::TRTEngine>(mod._ivalue()->name(), serialized_engine);
     // Get required metadata about the engine out
     auto num_io = engine_ptr->num_io;
     auto name = engine_ptr->name;
@@ -50,7 +50,7 @@ void AddEngineToGraph(torch::jit::script::Module mod, std::shared_ptr<torch::jit
     // Add the engine as an attribute of the module, this will let the engine be serialized and deserialized
     mod.register_attribute(
         name,
-        c10::getCustomClassType<c10::intrusive_ptr<execution::TRTEngine>>(),
+        c10::getCustomClassType<c10::intrusive_ptr<runtime::TRTEngine>>(),
         c10::IValue(std::move(engine_ptr)),
         false
     );
@@ -125,7 +125,7 @@ bool CheckMethodOperatorSupport(const torch::jit::script::Module& mod,
 
 std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod,
                                     std::string method_name,
-                                    ExtraInfo cfg) {
+                                    CompileSpec cfg) {
 
     // Go through Lowering to simplify graph and extract weight parameters
     auto graph_and_parameters = lowering::Lower(mod, method_name);
@@ -137,12 +137,12 @@ std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod,
 
     LOG_INFO(*g << "(CompileGraph)\n");
 
-    auto engine = ConvertBlockToEngine(g->block(), convert_cfg, named_params);
+    auto engine = conversion::ConvertBlockToEngine(g->block(), convert_cfg, named_params);
     return std::move(engine);
 }
 
 torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod,
-                                        ExtraInfo cfg) {
+                                        CompileSpec cfg) {
     // TODO: Should be doing a functional transform but need PR #31978
     // [jit] More robust mangling
     //torch::jit::script::Module new_mod = mod.clone();
diff --git a/core/compiler.h b/core/compiler.h
index f9ff400159..281973d4d6 100644
--- a/core/compiler.h
+++ b/core/compiler.h
@@ -7,8 +7,8 @@
 namespace trtorch {
 namespace core {
 
-struct ExtraInfo {
-    ExtraInfo(std::vector<conversion::InputRange> input_ranges)
+struct CompileSpec {
+    CompileSpec(std::vector<conversion::InputRange> input_ranges)
         : convert_info(std::move(input_ranges)) {}
     conversion::ConversionInfo convert_info;
 };
@@ -16,9 +16,9 @@ struct ExtraInfo {
 bool CheckMethodOperatorSupport(const torch::jit::script::Module& mod, std::string method_name);
 
 std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod,
-                                    std::string method_name, ExtraInfo cfg);
+                                    std::string method_name, CompileSpec cfg);
 
-torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, ExtraInfo cfg);
+torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, CompileSpec cfg);
 
 } // namespace core
 } // namespace trtorch
diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp
index 2993ee593e..3280464635 100644
--- a/core/conversion/conversionctx/ConversionCtx.cpp
+++ b/core/conversion/conversionctx/ConversionCtx.cpp
@@ -55,7 +55,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
             cfg->setFlag(nvinfer1::BuilderFlag::kFP16);
         }
         input_type = nvinfer1::DataType::kFLOAT;
-        TRTORCH_CHECK(settings.calibrator != nullptr, "Requested inference in INT8 but no calibrator provided, set the ptq_calibrator field in the ExtraInfo struct with your calibrator");
+        TRTORCH_CHECK(settings.calibrator != nullptr, "Requested inference in INT8 but no calibrator provided, set the ptq_calibrator field in the CompileSpec struct with your calibrator");
         cfg->setInt8Calibrator(settings.calibrator);
         break;
     case nvinfer1::DataType::kFLOAT:
diff --git a/cpp/api/BUILD b/cpp/api/BUILD
index d396d1690a..18ce5b8118 100644
--- a/cpp/api/BUILD
+++ b/cpp/api/BUILD
@@ -9,7 +9,7 @@ cc_library(
         "include/trtorch/ptq.h"
     ],
     srcs = [
-        "src/extra_info.cpp",
+        "src/compile_spec.cpp",
         "src/logging.cpp",
         "src/trtorch.cpp",
         "src/ptq.cpp"
diff --git a/cpp/api/README.md b/cpp/api/README.md
index ab1bf03cfe..4bdbae379b 100644
--- a/cpp/api/README.md
+++ b/cpp/api/README.md
@@ -31,7 +31,7 @@ namespace trtorch {
  * Settings data structure for TRTorch compilation
  *
  */
-struct TRTORCH_API ExtraInfo {
+struct TRTORCH_API CompileSpec {
     /**
      * @brief A struct to hold an input range (used by TensorRT Optimization profile)
      *
@@ -132,10 +132,10 @@ struct TRTORCH_API ExtraInfo {
         kSAFE_DLA,
     };
 
-    ExtraInfo(std::vector<InputRange> input_ranges)
+    CompileSpec(std::vector<InputRange> input_ranges)
         : input_ranges(std::move(input_ranges)) {}
-    ExtraInfo(std::vector<std::vector<int64_t>> fixed_sizes);
-    ExtraInfo(std::vector<c10::ArrayRef<int64_t>> fixed_sizes);
+    CompileSpec(std::vector<std::vector<int64_t>> fixed_sizes);
+    CompileSpec(std::vector<c10::ArrayRef<int64_t>> fixed_sizes);
 
     // Defaults should reflect TensorRT defaults for BuilderConfig
 
@@ -236,27 +236,27 @@ TRTORCH_API bool CheckMethodOperatorSupport(const torch::jit::script::Module& mo
  * @brief Compile a TorchScript module for NVIDIA GPUs using TensorRT
  *
  * @param module: torch::jit::script::Module - Existing TorchScript module
- * @param info: trtorch::ExtraInfo - Compilation settings
+ * @param info: trtorch::CompileSpec - Compilation settings
  *
  * Takes a existing TorchScript module and a set of settings to configure the compiler
  * and will convert methods to JIT Graphs which call equivalent TensorRT engines
  *
  * Converts specifically the forward method of a TorchScript Module
  */
-TRTORCH_API torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, ExtraInfo info);
+TRTORCH_API torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, CompileSpec info);
 
 /**
  * @brief Compile a TorchScript method for NVIDIA GPUs using TensorRT
  *
  * @param module: torch::jit::script::Module - Existing TorchScript module
  * @param method_name: std::string - Name of method to compile
- * @param info: trtorch::ExtraInfo - Compilation settings
+ * @param info: trtorch::CompileSpec - Compilation settings
  *
  * Takes a existing TorchScript module and a set of settings to configure the compiler
  * and will convert selected method to a serialized TensorRT engine which can be run with
  * TensorRT
  */
-TRTORCH_API std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& module, std::string method_name, ExtraInfo info);
+TRTORCH_API std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& module, std::string method_name, CompileSpec info);
 
 namespace ptq {
 /**
diff --git a/cpp/api/include/trtorch/ptq.h b/cpp/api/include/trtorch/ptq.h
index 05f3583947..4932218405 100644
--- a/cpp/api/include/trtorch/ptq.h
+++ b/cpp/api/include/trtorch/ptq.h
@@ -145,7 +145,7 @@ class Int8Calibrator : Algorithm {
     /**
      * @brief operator to cast to nvinfer1::IInt8Calibrator*
      *
-     * Convience function to convert to a IInt8Calibrator* to easily be assigned to the ptq_calibrator field in ExtraInfo
+     * Convience function to convert to a IInt8Calibrator* to easily be assigned to the ptq_calibrator field in CompileSpec
      *
      * @return nvinfer1::IInt8Calibrator*
      */
@@ -259,7 +259,7 @@ class Int8CacheCalibrator : Algorithm {
     /**
      * @brief operator to cast to nvinfer1::IInt8Calibrator*
      *
-     * Convience function to convert to a IInt8Calibrator* to easily be assigned to the ptq_calibrator field in ExtraInfo
+     * Convience function to convert to a IInt8Calibrator* to easily be assigned to the ptq_calibrator field in CompileSpec
      *
      * @return nvinfer1::IInt8Calibrator*
      */
diff --git a/cpp/api/include/trtorch/trtorch.h b/cpp/api/include/trtorch/trtorch.h
index 8e2757ad3b..cf8bd9e329 100644
--- a/cpp/api/include/trtorch/trtorch.h
+++ b/cpp/api/include/trtorch/trtorch.h
@@ -39,7 +39,7 @@ namespace trtorch {
  * Settings data structure for TRTorch compilation
  *
  */
-struct TRTORCH_API ExtraInfo {
+struct TRTORCH_API CompileSpec {
     /**
      * @brief A struct to hold an input range (used by TensorRT Optimization profile)
      *
@@ -256,7 +256,7 @@ struct TRTORCH_API ExtraInfo {
      *
      * @param input_ranges
      */
-    ExtraInfo(std::vector<InputRange> input_ranges)
+    CompileSpec(std::vector<InputRange> input_ranges)
         : input_ranges(std::move(input_ranges)) {}
     /**
      * @brief Construct a new Extra Info object
@@ -265,14 +265,14 @@ struct TRTORCH_API ExtraInfo {
      *
      * @param fixed_sizes
      */
-    ExtraInfo(std::vector<std::vector<int64_t>> fixed_sizes);
+    CompileSpec(std::vector<std::vector<int64_t>> fixed_sizes);
     /**
      * @brief Construct a new Extra Info object
      * Convienence constructor to set fixed input size from c10::ArrayRef's (the output of tensor.sizes()) describing size of input tensors.
      * Each entry in the vector represents a input and should be provided in call order.
      * @param fixed_sizes
      */
-    ExtraInfo(std::vector<c10::ArrayRef<int64_t>> fixed_sizes);
+    CompileSpec(std::vector<c10::ArrayRef<int64_t>> fixed_sizes);
 
     // Defaults should reflect TensorRT defaults for BuilderConfig
 
@@ -379,7 +379,7 @@ TRTORCH_API bool CheckMethodOperatorSupport(const torch::jit::Module& module, st
  * @brief Compile a TorchScript module for NVIDIA GPUs using TensorRT
  *
  * @param module: torch::jit::Module - Existing TorchScript module
- * @param info: trtorch::ExtraInfo - Compilation settings
+ * @param info: trtorch::CompileSpec - Compilation settings
  *
  * Takes a existing TorchScript module and a set of settings to configure the compiler
  * and will convert methods to JIT Graphs which call equivalent TensorRT engines
@@ -388,14 +388,14 @@ TRTORCH_API bool CheckMethodOperatorSupport(const torch::jit::Module& module, st
  *
  * @return: A new module trageting a TensorRT engine
  */
-TRTORCH_API torch::jit::Module CompileGraph(const torch::jit::Module& module, ExtraInfo info);
+TRTORCH_API torch::jit::Module CompileGraph(const torch::jit::Module& module, CompileSpec info);
 
 /**
  * @brief Compile a TorchScript method for NVIDIA GPUs using TensorRT
  *
  * @param module: torch::jit::Module - Existing TorchScript module
  * @param method_name: std::string - Name of method to compile
- * @param info: trtorch::ExtraInfo - Compilation settings
+ * @param info: trtorch::CompileSpec - Compilation settings
  *
  * Takes a existing TorchScript module and a set of settings to configure the compiler
  * and will convert selected method to a serialized TensorRT engine which can be run with
@@ -403,5 +403,5 @@ TRTORCH_API torch::jit::Module CompileGraph(const torch::jit::Module& module, Ex
  *
  * @return: std::string: Serialized TensorRT engine equivilant to the method graph
  */
-TRTORCH_API std::string ConvertGraphToTRTEngine(const torch::jit::Module& module, std::string method_name, ExtraInfo info);
+TRTORCH_API std::string ConvertGraphToTRTEngine(const torch::jit::Module& module, std::string method_name, CompileSpec info);
 } // namespace trtorch
diff --git a/cpp/api/src/extra_info.cpp b/cpp/api/src/compile_spec.cpp
similarity index 73%
rename from cpp/api/src/extra_info.cpp
rename to cpp/api/src/compile_spec.cpp
index 5bc12fa204..bfec3e7ba7 100644
--- a/cpp/api/src/extra_info.cpp
+++ b/cpp/api/src/compile_spec.cpp
@@ -6,7 +6,7 @@
 #include "trtorch/trtorch.h"
 
 namespace trtorch {
-ExtraInfo::DataType::DataType(c10::ScalarType t) {
+CompileSpec::DataType::DataType(c10::ScalarType t) {
     TRTORCH_CHECK(t == at::kHalf || t == at::kFloat || t == at::kChar, "Data type is unsupported");
     switch (t) {
     case at::kHalf:
@@ -21,52 +21,52 @@ ExtraInfo::DataType::DataType(c10::ScalarType t) {
     }
 }
 
-ExtraInfo::DeviceType::DeviceType(c10::DeviceType t) {
+CompileSpec::DeviceType::DeviceType(c10::DeviceType t) {
     TRTORCH_CHECK(t == at::kCUDA, "Device type when specified using torch device enum must be torch::kCUDA");
     value = DeviceType::kGPU;
 }
 
-ExtraInfo::InputRange::InputRange(std::vector<int64_t> opt) {
+CompileSpec::InputRange::InputRange(std::vector<int64_t> opt) {
     this->opt = opt;
     this->min = opt;
     this->max = opt;
 }
 
-ExtraInfo::InputRange::InputRange(c10::IntArrayRef opt) {
+CompileSpec::InputRange::InputRange(c10::IntArrayRef opt) {
     this->opt = core::util::toVec(opt);
     this->min = core::util::toVec(opt);
     this->max = core::util::toVec(opt);
 }
 
-ExtraInfo::InputRange::InputRange(std::vector<int64_t> min, std::vector<int64_t> opt, std::vector<int64_t> max) {
+CompileSpec::InputRange::InputRange(std::vector<int64_t> min, std::vector<int64_t> opt, std::vector<int64_t> max) {
     this->opt = opt;
     this->min = min;
     this->max = max;
 }
 
-ExtraInfo::InputRange::InputRange(c10::IntArrayRef min, c10::IntArrayRef opt, c10::IntArrayRef max) {
+CompileSpec::InputRange::InputRange(c10::IntArrayRef min, c10::IntArrayRef opt, c10::IntArrayRef max) {
     this->opt = core::util::toVec(opt);
     this->min = core::util::toVec(min);
     this->max = core::util::toVec(max);
 }
 
-ExtraInfo::ExtraInfo(std::vector<c10::ArrayRef<int64_t>> fixed_sizes) {
+CompileSpec::CompileSpec(std::vector<c10::ArrayRef<int64_t>> fixed_sizes) {
     for (auto in : fixed_sizes) {
         input_ranges.push_back(InputRange(in));
     }
 }
 
-ExtraInfo::ExtraInfo(std::vector<std::vector<int64_t>> fixed_sizes) {
+CompileSpec::CompileSpec(std::vector<std::vector<int64_t>> fixed_sizes) {
     for (auto in : fixed_sizes) {
         input_ranges.push_back(InputRange(in));
     }
 }
 
-core::conversion::InputRange to_internal_input_range(ExtraInfo::InputRange i) {
+core::conversion::InputRange to_internal_input_range(CompileSpec::InputRange i) {
     return core::conversion::InputRange(i.min, i.opt, i.max);
 }
 
-std::vector<core::conversion::InputRange> to_vec_internal_input_ranges(std::vector<ExtraInfo::InputRange> external) {
+std::vector<core::conversion::InputRange> to_vec_internal_input_ranges(std::vector<CompileSpec::InputRange> external) {
     std::vector<core::conversion::InputRange> internal;
     for (auto range : external) {
         internal.push_back(to_internal_input_range(range));
@@ -74,17 +74,17 @@ std::vector<core::conversion::InputRange> to_vec_internal_input_ranges(std::vect
     return internal;
 }
 
-core::ExtraInfo to_internal_extra_info(ExtraInfo external) {
-    core::ExtraInfo internal(to_vec_internal_input_ranges(external.input_ranges));
+core::CompileSpec to_internal_compile_spec(CompileSpec external) {
+    core::CompileSpec internal(to_vec_internal_input_ranges(external.input_ranges));
 
     switch(external.op_precision) {
-    case ExtraInfo::DataType::kChar:
+    case CompileSpec::DataType::kChar:
         internal.convert_info.engine_settings.op_precision = nvinfer1::DataType::kINT8;
         break;
-    case ExtraInfo::DataType::kHalf:
+    case CompileSpec::DataType::kHalf:
         internal.convert_info.engine_settings.op_precision = nvinfer1::DataType::kHALF;
         break;
-    case ExtraInfo::DataType::kFloat:
+    case CompileSpec::DataType::kFloat:
     default:
         internal.convert_info.engine_settings.op_precision = nvinfer1::DataType::kFLOAT;
     }
@@ -96,22 +96,22 @@ core::ExtraInfo to_internal_extra_info(ExtraInfo external) {
     internal.convert_info.engine_settings.max_batch_size = external.max_batch_size;
 
     switch(external.device) {
-    case ExtraInfo::DeviceType::kDLA:
+    case CompileSpec::DeviceType::kDLA:
         internal.convert_info.engine_settings.device = nvinfer1::DeviceType::kDLA;
         break;
-    case ExtraInfo::DeviceType::kGPU:
+    case CompileSpec::DeviceType::kGPU:
     default:
         internal.convert_info.engine_settings.device = nvinfer1::DeviceType::kGPU;
     }
 
     switch(external.capability) {
-    case ExtraInfo::EngineCapability::kSAFE_GPU:
+    case CompileSpec::EngineCapability::kSAFE_GPU:
         internal.convert_info.engine_settings.capability = nvinfer1::EngineCapability::kSAFE_GPU;
         break;
-    case ExtraInfo::EngineCapability::kSAFE_DLA:
+    case CompileSpec::EngineCapability::kSAFE_DLA:
         internal.convert_info.engine_settings.capability = nvinfer1::EngineCapability::kSAFE_DLA;
         break;
-    case ExtraInfo::EngineCapability::kDEFAULT:
+    case CompileSpec::EngineCapability::kDEFAULT:
     default:
         internal.convert_info.engine_settings.capability = nvinfer1::EngineCapability::kDEFAULT;
 
diff --git a/cpp/api/src/trtorch.cpp b/cpp/api/src/trtorch.cpp
index e6a1940db1..742b4111a9 100644
--- a/cpp/api/src/trtorch.cpp
+++ b/cpp/api/src/trtorch.cpp
@@ -7,8 +7,8 @@
 
 namespace trtorch {
 
-// Defined in extra_info.cpp
-core::ExtraInfo to_internal_extra_info(ExtraInfo external);
+// Defined in compile_spec.cpp
+core::CompileSpec to_internal_compile_spec(CompileSpec external);
 
 bool CheckMethodOperatorSupport(const torch::jit::script::Module& module,
                                 std::string method_name) {
@@ -16,18 +16,18 @@ bool CheckMethodOperatorSupport(const torch::jit::script::Module& module,
 }
 
 std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& module,
-                                    std::string method_name, ExtraInfo info) {
+                                    std::string method_name, CompileSpec info) {
     LOG_DEBUG(get_build_info());
     // Want to export a much simpler (non TRT header dependent) API so doing the
     // type conversion here
-    return std::move(core::ConvertGraphToTRTEngine(module, method_name, to_internal_extra_info(info)));
+    return std::move(core::ConvertGraphToTRTEngine(module, method_name, to_internal_compile_spec(info)));
 }
 
-torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, ExtraInfo info) {
+torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, CompileSpec info) {
     LOG_DEBUG(get_build_info());
     // Want to export a much simpler (non TRT header dependent) API so doing the
     // type conversion here
-    return core::CompileGraph(module, to_internal_extra_info(info));
+    return core::CompileGraph(module, to_internal_compile_spec(info));
 }
 
 std::string get_build_info() {
diff --git a/cpp/benchmark/main.cpp b/cpp/benchmark/main.cpp
index e73f1da4e8..48566b60c6 100644
--- a/cpp/benchmark/main.cpp
+++ b/cpp/benchmark/main.cpp
@@ -121,18 +121,18 @@ int main(int argc, const char* argv[]) {
     at::globalContext().setBenchmarkCuDNN(true);
 
 #ifdef TRT
-    auto extra_info = trtorch::ExtraInfo(dims);
-    extra_info.workspace_size = 1 << 20;
+    auto compile_spec = trtorch::CompileSpec(dims);
+    compile_spec.workspace_size = 1 << 20;
 
 #ifdef HALF
-    extra_info.op_precision = torch::kF16;
+    compile_spec.op_precision = torch::kF16;
 #endif
 
-    auto trt_mod = trtorch::CompileGraph(mod, extra_info);
+    auto trt_mod = trtorch::CompileGraph(mod, compile_spec);
 
 #ifdef SAVE_ENGINE
     std::cout << "Compiling graph to save as TRT engine (/tmp/engine_converted_from_jit.trt)" << std::endl;
-    auto engine = trtorch::ConvertGraphToTRTEngine(mod, "forward", extra_info);
+    auto engine = trtorch::ConvertGraphToTRTEngine(mod, "forward", compile_spec);
     std::ofstream out("/tmp/engine_converted_from_jit.trt");
     out << engine;
     out.close();
diff --git a/cpp/ptq/README.md b/cpp/ptq/README.md
index 70eb990fb3..ceffb6dcec 100644
--- a/cpp/ptq/README.md
+++ b/cpp/ptq/README.md
@@ -92,20 +92,20 @@ The calibrator factories create a calibrator that inherits from a `nvinfer1::IIn
 auto calibrator = trtorch::ptq::make_int8_calibrator<nvinfer1::IInt8MinMaxCalibrator>(std::move(calibration_dataloader), calibration_cache_file, true);
 ```
 
-Then all thats required to setup the module for INT8 calibration is to set the following compile settings in the `trtorch::ExtraInfo` struct and compiling the module:
+Then all thats required to setup the module for INT8 calibration is to set the following compile settings in the `trtorch::CompileSpec` struct and compiling the module:
 
 ```C++
     std::vector<std::vector<int64_t>> input_shape = {{32, 3, 32, 32}};
     /// Configure settings for compilation
-    auto extra_info = trtorch::ExtraInfo({input_shape});
+    auto compile_spec = trtorch::CompileSpec({input_shape});
     /// Set operating precision to INT8
-    extra_info.op_precision = torch::kI8;
+    compile_spec.op_precision = torch::kI8;
     /// Use the TensorRT Entropy Calibrator
-    extra_info.ptq_calibrator = calibrator;
+    compile_spec.ptq_calibrator = calibrator;
     /// Set a larger workspace (you may get better performace from doing so)
-    extra_info.workspace_size = 1 << 28;
+    compile_spec.workspace_size = 1 << 28;
 
-    auto trt_mod = trtorch::CompileGraph(mod, extra_info);
+    auto trt_mod = trtorch::CompileGraph(mod, compile_spec);
 ```
 
 If you have an existing Calibrator implementation for TensorRT you may directly set the `ptq_calibrator` field with a pointer to your calibrator and it will work as well.
diff --git a/cpp/ptq/main.cpp b/cpp/ptq/main.cpp
index 241261dfba..340ec9cd66 100644
--- a/cpp/ptq/main.cpp
+++ b/cpp/ptq/main.cpp
@@ -50,28 +50,28 @@ torch::jit::Module compile_int8_model(const std::string& data_dir, torch::jit::M
 
     std::vector<std::vector<int64_t>> input_shape = {{32, 3, 32, 32}};
     /// Configure settings for compilation
-    auto extra_info = trtorch::ExtraInfo({input_shape});
+    auto compile_spec = trtorch::CompileSpec({input_shape});
     /// Set operating precision to INT8
-    extra_info.op_precision = torch::kI8;
+    compile_spec.op_precision = torch::kI8;
     /// Use the TensorRT Entropy Calibrator
-    extra_info.ptq_calibrator = calibrator;
+    compile_spec.ptq_calibrator = calibrator;
     /// Set max batch size for the engine
-    extra_info.max_batch_size = 32;
+    compile_spec.max_batch_size = 32;
     /// Set a larger workspace
-    extra_info.workspace_size = 1 << 28;
+    compile_spec.workspace_size = 1 << 28;
 
     mod.eval();
 
 #ifdef SAVE_ENGINE
     std::cout << "Compiling graph to save as TRT engine (/tmp/engine_converted_from_jit.trt)" << std::endl;
-    auto engine = trtorch::ConvertGraphToTRTEngine(mod, "forward", extra_info);
+    auto engine = trtorch::ConvertGraphToTRTEngine(mod, "forward", compile_spec);
     std::ofstream out("/tmp/engine_converted_from_jit.trt");
     out << engine;
     out.close();
 #endif
 
     std::cout << "Compiling and quantizing module" << std::endl;
-    auto trt_mod = trtorch::CompileGraph(mod, extra_info);
+    auto trt_mod = trtorch::CompileGraph(mod, compile_spec);
     return std::move(trt_mod);
 }
 
diff --git a/cpp/trtorchc/main.cpp b/cpp/trtorchc/main.cpp
index b37e2a53e9..0e3aaf61d8 100644
--- a/cpp/trtorchc/main.cpp
+++ b/cpp/trtorchc/main.cpp
@@ -66,7 +66,7 @@ std::vector<int64_t> parseSingleDim(std::string shape_str) {
     return {};
 }
 
-trtorch::ExtraInfo::InputRange parseDynamicDim(std::string shape_str) {
+trtorch::CompileSpec::InputRange parseDynamicDim(std::string shape_str) {
     shape_str = shape_str.substr(1, shape_str.size() - 2);
     std::vector<std::vector<int64_t>> shape;
     std::stringstream ss;
@@ -89,7 +89,7 @@ trtorch::ExtraInfo::InputRange parseDynamicDim(std::string shape_str) {
         exit(1);
     }
 
-    return trtorch::ExtraInfo::InputRange(shape[0], shape[1], shape[2]);
+    return trtorch::CompileSpec::InputRange(shape[0], shape[1], shape[2]);
 }
 
 std::string get_cwd() {
@@ -190,10 +190,10 @@ int main(int argc, char** argv) {
     }
 
 
-    std::vector<trtorch::ExtraInfo::InputRange> ranges;
+    std::vector<trtorch::CompileSpec::InputRange> ranges;
     for (const auto shapes : args::get(input_shapes)) {
         if (shapes.rfind("(", 0) == 0) {
-            ranges.push_back(trtorch::ExtraInfo::InputRange(parseSingleDim(shapes)));
+            ranges.push_back(trtorch::CompileSpec::InputRange(parseSingleDim(shapes)));
         } else if (shapes.rfind("[", 0) == 0) {
             ranges.push_back(parseDynamicDim(shapes));
         } else {
@@ -203,7 +203,7 @@ int main(int argc, char** argv) {
         }
     }
 
-    auto compile_settings = trtorch::ExtraInfo(ranges);
+    auto compile_settings = trtorch::CompileSpec(ranges);
 
     if (build_debuggable_engine) {
         compile_settings.debug = true;
@@ -251,9 +251,9 @@ int main(int argc, char** argv) {
         auto device = args::get(device_type);
         std::transform(device.begin(), device.end(), device.begin(), [](unsigned char c){ return std::tolower(c); });
         if (device == "gpu") {
-            compile_settings.device = trtorch::ExtraInfo::DeviceType::kGPU;
+            compile_settings.device = trtorch::CompileSpec::DeviceType::kGPU;
         } else if (device == "dla") {
-            compile_settings.device = trtorch::ExtraInfo::DeviceType::kDLA;
+            compile_settings.device = trtorch::CompileSpec::DeviceType::kDLA;
         } else {
             trtorch::logging::log(trtorch::logging::Level::kERROR, "Invalid device type, options are [ gpu | dla ]");
             std::cerr << parser;
@@ -265,11 +265,11 @@ int main(int argc, char** argv) {
         auto capability = args::get(engine_capability);
         std::transform(capability.begin(), capability.end(), capability.begin(), [](unsigned char c){ return std::tolower(c); });
         if (capability == "default") {
-            compile_settings.capability = trtorch::ExtraInfo::EngineCapability::kDEFAULT;
+            compile_settings.capability = trtorch::CompileSpec::EngineCapability::kDEFAULT;
         } else if (capability == "safe_gpu") {
-            compile_settings.capability = trtorch::ExtraInfo::EngineCapability::kSAFE_GPU;
+            compile_settings.capability = trtorch::CompileSpec::EngineCapability::kSAFE_GPU;
         } else if (capability == "safe_dla") {
-            compile_settings.capability = trtorch::ExtraInfo::EngineCapability::kSAFE_DLA;
+            compile_settings.capability = trtorch::CompileSpec::EngineCapability::kSAFE_DLA;
         } else {
             trtorch::logging::log(trtorch::logging::Level::kERROR, "Invalid engine capability, options are [ default | safe_gpu | safe_dla ]");
             std::cerr << parser;
@@ -320,7 +320,7 @@ int main(int argc, char** argv) {
     } else {
         auto trt_mod = trtorch::CompileGraph(mod, compile_settings);
 
-        if (compile_settings.op_precision == trtorch::ExtraInfo::DataType::kFloat) {
+        if (compile_settings.op_precision == trtorch::CompileSpec::DataType::kFloat) {
             double threshold_val = 2e-5;
             if (threshold) {
                 threshold_val = args::get(threshold);
diff --git a/cpp/trtorchexec/main.cpp b/cpp/trtorchexec/main.cpp
index 8b3e114e62..1dcc74e91b 100644
--- a/cpp/trtorchexec/main.cpp
+++ b/cpp/trtorchexec/main.cpp
@@ -56,8 +56,8 @@ int main(int argc, const char* argv[]) {
         dims.push_back(v);
     }
 
-    auto extra_info = trtorch::ExtraInfo(dims);
-    extra_info.workspace_size = 1 << 24;
+    auto compile_spec = trtorch::CompileSpec(dims);
+    compile_spec.workspace_size = 1 << 24;
 
     std::cout << "Checking operator support" << std::endl;
     if (!trtorch::CheckMethodOperatorSupport(mod, "forward")) {
@@ -66,7 +66,7 @@ int main(int argc, const char* argv[]) {
     }
 
     std::cout << "Compiling graph to save as TRT engine (/tmp/engine_converted_from_jit.trt)" << std::endl;
-    auto engine = trtorch::ConvertGraphToTRTEngine(mod, "forward", extra_info);
+    auto engine = trtorch::ConvertGraphToTRTEngine(mod, "forward", compile_spec);
     std::ofstream out("/tmp/engine_converted_from_jit.trt");
     out << engine;
     out.close();
@@ -89,7 +89,7 @@ int main(int argc, const char* argv[]) {
     }
 
     std::cout << "Compiling graph as module" << std::endl;
-    auto trt_mod = trtorch::CompileGraph(mod, extra_info);
+    auto trt_mod = trtorch::CompileGraph(mod, compile_spec);
     std::cout << "Running TRT module" << std::endl;
     torch::jit::IValue trt_results_ivalues = trt_mod.forward(trt_inputs_ivalues);
     std::vector<at::Tensor> trt_results;
diff --git a/docsrc/tutorials/getting_started.rst b/docsrc/tutorials/getting_started.rst
index 05c4e9efba..a1978927b1 100644
--- a/docsrc/tutorials/getting_started.rst
+++ b/docsrc/tutorials/getting_started.rst
@@ -305,7 +305,7 @@ With out module loaded, we can feed it into the TRTorch compiler. When we do so
         mod.eval();
 
         auto in = torch::randn({1, 1, 32, 32}, {torch::kCUDA});
-        auto trt_mod = trtorch::CompileGraph(mod, std::vector<trtorch::ExtraInfo::InputRange>{{in.sizes()}});
+        auto trt_mod = trtorch::CompileGraph(mod, std::vector<trtorch::CompileSpec::InputRange>{{in.sizes()}});
         auto out = trt_mod.forward({in});
 
 Thats it! Now the graph runs primarily not with the JIT compiler but using TensorRT (though we execute the graph using the JIT runtime).
@@ -322,8 +322,8 @@ We can also set settings like operating precision to run in FP16.
         mod.eval();
 
         auto in = torch::randn({1, 1, 32, 32}, {torch::kCUDA}).to(torch::kHALF);
-        auto input_sizes = std::vector<trtorch::ExtraInfo::InputRange>({in.sizes()});
-        trtorch::ExtraInfo info(input_sizes);
+        auto input_sizes = std::vector<trtorch::CompileSpec::InputRange>({in.sizes()});
+        trtorch::CompileSpec info(input_sizes);
         info.op_precision = torch::kHALF;
         auto trt_mod = trtorch::CompileGraph(mod, info);
         auto out = trt_mod.forward({in});
@@ -370,8 +370,8 @@ If you want to save the engine produced by TRTorch to use in a TensorRT applicat
         mod.eval();
 
         auto in = torch::randn({1, 1, 32, 32}, {torch::kCUDA}).to(torch::kHALF);
-        auto input_sizes = std::vector<trtorch::ExtraInfo::InputRange>({in.sizes()});
-        trtorch::ExtraInfo info(input_sizes);
+        auto input_sizes = std::vector<trtorch::CompileSpec::InputRange>({in.sizes()});
+        trtorch::CompileSpec info(input_sizes);
         info.op_precision = torch::kHALF;
         auto trt_mod = trtorch::ConvertGraphToTRTEngine(mod, "forward", info);
         std::ofstream out("/tmp/engine_converted_from_jit.trt");
diff --git a/docsrc/tutorials/ptq.rst b/docsrc/tutorials/ptq.rst
index fb12e46ef4..28d60acec3 100644
--- a/docsrc/tutorials/ptq.rst
+++ b/docsrc/tutorials/ptq.rst
@@ -115,21 +115,21 @@ defines the calibration algorithm used when calibrating. You can explicitly make
     // MinMax Calibrator is geared more towards NLP tasks
     auto calibrator = trtorch::ptq::make_int8_calibrator<nvinfer1::IInt8MinMaxCalibrator>(std::move(calibration_dataloader), calibration_cache_file, true);
 
-Then all thats required to setup the module for INT8 calibration is to set the following compile settings in the `trtorch::ExtraInfo` struct and compiling the module:
+Then all thats required to setup the module for INT8 calibration is to set the following compile settings in the `trtorch::CompileSpec` struct and compiling the module:
 
 .. code-block:: c++
 
     std::vector<std::vector<int64_t>> input_shape = {{32, 3, 32, 32}};
     /// Configure settings for compilation
-    auto extra_info = trtorch::ExtraInfo({input_shape});
+    auto compile_spec = trtorch::CompileSpec({input_shape});
     /// Set operating precision to INT8
-    extra_info.op_precision = torch::kI8;
+    compile_spec.op_precision = torch::kI8;
     /// Use the TensorRT Entropy Calibrator
-    extra_info.ptq_calibrator = calibrator;
+    compile_spec.ptq_calibrator = calibrator;
     /// Set a larger workspace (you may get better performace from doing so)
-    extra_info.workspace_size = 1 << 28;
+    compile_spec.workspace_size = 1 << 28;
 
-    auto trt_mod = trtorch::CompileGraph(mod, extra_info);
+    auto trt_mod = trtorch::CompileGraph(mod, compile_spec);
 
 If you have an existing Calibrator implementation for TensorRT you may directly set the ``ptq_calibrator`` field with a pointer to your calibrator and it will work as well.
 
diff --git a/py/BUILD b/py/BUILD
index a2eb3c004b..be5b2d7047 100644
--- a/py/BUILD
+++ b/py/BUILD
@@ -9,7 +9,7 @@ py_library(
         "trtorch/__init__.py",
         "trtorch/_version.py",
         "trtorch/_compiler.py",
-        "trtorch/_extra_info.py",
+        "trtorch/_compile_spec.py",
         "trtorch/_types.py",
         "trtorch/logging.py"
     ],
diff --git a/py/trtorch/_extra_info.py b/py/trtorch/_compile_spec.py
similarity index 64%
rename from py/trtorch/_extra_info.py
rename to py/trtorch/_compile_spec.py
index 5247b91a0a..aa060bd085 100644
--- a/py/trtorch/_extra_info.py
+++ b/py/trtorch/_compile_spec.py
@@ -84,53 +84,53 @@ def _parse_device_type(device: Any) -> _types.DeviceType:
     else:
         raise TypeError("Device specification must be of type torch.device or trtorch.DeviceType, but got: " + str(type(device)))
 
-def _parse_extra_info(extra_info: Dict[str, Any]) -> trtorch._C.ExtraInfo:
-    info = trtorch._C.ExtraInfo()
-    if "input_shapes" not in extra_info:
+def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec:
+    info = trtorch._C.CompileSpec()
+    if "input_shapes" not in compile_spec:
         raise KeyError("Input shapes for inputs are required as a List, provided as either a static sizes or a range of three sizes (min, opt, max) as Dict")
 
-    info.input_ranges = _parse_input_ranges(extra_info["input_shapes"])
+    info.input_ranges = _parse_input_ranges(compile_spec["input_shapes"])
 
-    if "op_precision" in extra_info:
-        info.op_precision = _parse_op_precision(extra_info["op_precision"])
+    if "op_precision" in compile_spec:
+        info.op_precision = _parse_op_precision(compile_spec["op_precision"])
 
-    if "refit" in extra_info:
-        assert isinstance(extra_info["refit"], bool)
-        info.refit = extra_info["refit"]
+    if "refit" in compile_spec:
+        assert isinstance(compile_spec["refit"], bool)
+        info.refit = compile_spec["refit"]
 
-    if "debug" in extra_info:
-        assert isinstance(extra_info["debug"], bool)
-        info.debug = extra_info["debug"]
+    if "debug" in compile_spec:
+        assert isinstance(compile_spec["debug"], bool)
+        info.debug = compile_spec["debug"]
 
-    if "strict_types" in extra_info:
-        assert isinstance(extra_info["strict_types"], bool)
-        info.strict_types = extra_info["strict_types"]
+    if "strict_types" in compile_spec:
+        assert isinstance(compile_spec["strict_types"], bool)
+        info.strict_types = compile_spec["strict_types"]
 
-    if "allow_gpu_fallback" in extra_info:
-        assert isinstance(extra_info["allow_gpu_fallback"], bool)
-        info.allow_gpu_fallback = extra_info["allow_gpu_fallback"]
+    if "allow_gpu_fallback" in compile_spec:
+        assert isinstance(compile_spec["allow_gpu_fallback"], bool)
+        info.allow_gpu_fallback = compile_spec["allow_gpu_fallback"]
 
-    if "device" in extra_info:
-        info.device = _parse_device_type(extra_info["device"])
+    if "device" in compile_spec:
+        info.device = _parse_device_type(compile_spec["device"])
 
-    if "capability" in extra_info:
-        assert isinstance(extra_info["capability"], type.EngineCapability)
-        info.capability = extra_info["capability"]
+    if "capability" in compile_spec:
+        assert isinstance(compile_spec["capability"], type.EngineCapability)
+        info.capability = compile_spec["capability"]
 
-    if "num_min_timing_iters" in extra_info:
-        assert type(extra_info["num_min_timing_iters"]) is int
-        info.num_min_timing_iters = extra_info["num_min_timing_iters"]
+    if "num_min_timing_iters" in compile_spec:
+        assert type(compile_spec["num_min_timing_iters"]) is int
+        info.num_min_timing_iters = compile_spec["num_min_timing_iters"]
 
-    if "num_avg_timing_iters" in extra_info:
-        assert type(extra_info["num_avg_timing_iters"]) is int
-        info.num_avg_timing_iters = extra_info["num_avg_timing_iters"]
+    if "num_avg_timing_iters" in compile_spec:
+        assert type(compile_spec["num_avg_timing_iters"]) is int
+        info.num_avg_timing_iters = compile_spec["num_avg_timing_iters"]
 
-    if "workspace_size" in extra_info:
-        assert type(extra_info["workspace_size"]) is int
-        info.workspace_size = extra_info["workspace_size"]
+    if "workspace_size" in compile_spec:
+        assert type(compile_spec["workspace_size"]) is int
+        info.workspace_size = compile_spec["workspace_size"]
 
-    if "max_batch_size" in extra_info:
-        assert type(extra_info["max_batch_size"]) is int
-        info.max_batch_size = extra_info["max_batch_size"]
+    if "max_batch_size" in compile_spec:
+        assert type(compile_spec["max_batch_size"]) is int
+        info.max_batch_size = compile_spec["max_batch_size"]
 
     return info
\ No newline at end of file
diff --git a/py/trtorch/_compiler.py b/py/trtorch/_compiler.py
index 1627e5a05f..1c35dbe4a1 100644
--- a/py/trtorch/_compiler.py
+++ b/py/trtorch/_compiler.py
@@ -3,12 +3,12 @@
 from torch import nn
 
 import trtorch._C
-from trtorch._extra_info import _parse_extra_info
+from trtorch._compile_spec import _parse_compile_spec
 from trtorch._version import __version__
 from types import FunctionType
 
 
-def compile(module: torch.jit.ScriptModule, extra_info: Any) -> torch.jit.ScriptModule:
+def compile(module: torch.jit.ScriptModule, compile_spec: Any) -> torch.jit.ScriptModule:
     """Compile a TorchScript module for NVIDIA GPUs using TensorRT
 
     Takes a existing TorchScript module and a set of settings to configure the compiler
@@ -19,13 +19,13 @@ def compile(module: torch.jit.ScriptModule, extra_info: Any) -> torch.jit.Script
     Args:
         module (torch.jit.ScriptModule): Source module, a result of tracing or scripting a PyTorch
             ``torch.nn.Module``
-        extra_info (dict): Compilation settings including operating precision, target device, etc.
+        compile_spec (dict): Compilation settings including operating precision, target device, etc.
             One key is required which is ``input_shapes``, describing the input sizes or ranges for inputs
             to the graph. All other keys are optional
 
             .. code-block:: py
 
-                ExtraInfo = {
+                compile_spec = {
                     "input_shapes": [
                         (1, 3, 224, 224), # Static input shape for input #1
                         {
@@ -58,11 +58,11 @@ def compile(module: torch.jit.ScriptModule, extra_info: Any) -> torch.jit.Script
     if isinstance(module, torch.jit.ScriptFunction):
         raise TypeError("torch.jit.ScriptFunction currently is not directly supported, wrap the function in a module to compile")
 
-    compiled_cpp_mod = trtorch._C.compile_graph(module._c, _parse_extra_info(extra_info))
+    compiled_cpp_mod = trtorch._C.compile_graph(module._c, _parse_compile_spec(compile_spec))
     compiled_module = torch.jit._recursive.wrap_cpp_module(compiled_cpp_mod)
     return compiled_module
 
-def convert_method_to_trt_engine(module: torch.jit.ScriptModule, method_name: str, extra_info: Any) -> str:
+def convert_method_to_trt_engine(module: torch.jit.ScriptModule, method_name: str, compile_spec: Any) -> str:
     """Convert a TorchScript module method to a serialized TensorRT engine
 
     Converts a specified method of a module to a serialized TensorRT engine given a dictionary of conversion settings
@@ -71,13 +71,13 @@ def convert_method_to_trt_engine(module: torch.jit.ScriptModule, method_name: st
         module (torch.jit.ScriptModule): Source module, a result of tracing or scripting a PyTorch
             ``torch.nn.Module``
         method_name (str): Name of method to convert
-        extra_info (dict): Compilation settings including operating precision, target device, etc.
+        compile_spec (dict): Compilation settings including operating precision, target device, etc.
             One key is required which is ``input_shapes``, describing the input sizes or ranges for inputs
             to the graph. All other keys are optional
 
             .. code-block:: py
 
-                ExtraInfo = {
+                CompileSpec = {
                     "input_shapes": [
                         (1, 3, 224, 224), # Static input shape for input #1
                         {
@@ -109,7 +109,7 @@ def convert_method_to_trt_engine(module: torch.jit.ScriptModule, method_name: st
     if isinstance(module, torch.jit.ScriptFunction):
         raise TypeError("torch.jit.ScriptFunctions currently are not directly supported, wrap the function in a module to compile")
 
-    return trtorch._C.convert_graph_to_trt_engine(module._c, method_name, _parse_extra_info(extra_info))
+    return trtorch._C.convert_graph_to_trt_engine(module._c, method_name, _parse_compile_spec(compile_spec))
 
 def check_method_op_support(module: torch.jit.ScriptModule, method_name: str) -> bool:
     """Checks to see if a method is fully supported by TRTorch
diff --git a/py/trtorch/csrc/trtorch_py.cpp b/py/trtorch/csrc/trtorch_py.cpp
index 765f75d56a..da6d2b2688 100644
--- a/py/trtorch/csrc/trtorch_py.cpp
+++ b/py/trtorch/csrc/trtorch_py.cpp
@@ -1,5 +1,7 @@
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
+//TODO: Remove when we have access to PyTorch to_backend autoregistration
+#include "core/backend.h"
 #include "core/compiler.h"
 #include "core/conversion/conversion.h"
 #include "torch/torch.h"
@@ -73,13 +75,13 @@ nvinfer1::EngineCapability toTRTEngineCapability(EngineCapability value) {
   }
 }
 
-struct ExtraInfo {
+struct CompileSpec {
 
-  core::ExtraInfo toInternalExtraInfo() {
+  core::CompileSpec toInternalCompileSpec() {
     for (auto i : input_ranges) {
       internal_input_ranges.push_back(i.toInternalInputRange());
     }
-    auto info = core::ExtraInfo(internal_input_ranges);
+    auto info = core::CompileSpec(internal_input_ranges);
     info.convert_info.engine_settings.op_precision = toTRTDataType(op_precision);
     info.convert_info.engine_settings.refit = refit;
     info.convert_info.engine_settings.debug = debug;
@@ -109,15 +111,15 @@ struct ExtraInfo {
   uint64_t max_batch_size = 0;
 };
 
-torch::jit::Module CompileGraph(const torch::jit::Module& mod, ExtraInfo& info) {
+torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec& info) {
   py::gil_scoped_acquire gil;
-  auto trt_mod = core::CompileGraph(mod, info.toInternalExtraInfo());
+  auto trt_mod = core::CompileGraph(mod, info.toInternalCompileSpec());
   return trt_mod;
 }
 
-py::bytes ConvertGraphToTRTEngine(const torch::jit::Module& mod, const std::string& method_name, ExtraInfo& info) {
+py::bytes ConvertGraphToTRTEngine(const torch::jit::Module& mod, const std::string& method_name, CompileSpec& info) {
   py::gil_scoped_acquire gil;
-  auto trt_engine = core::ConvertGraphToTRTEngine(mod, method_name, info.toInternalExtraInfo());
+  auto trt_engine = core::ConvertGraphToTRTEngine(mod, method_name, info.toInternalCompileSpec());
   return py::bytes(trt_engine);
 }
 
@@ -189,20 +191,20 @@ PYBIND11_MODULE(_C, m) {
     .value("safe_dla", EngineCapability::kSAFE_DLA, "Use safety DLA kernels only")
     .value("default",  EngineCapability::kDEFAULT, "Use default behavior");
 
-  py::class_<ExtraInfo>(m, "ExtraInfo")
+  py::class_<CompileSpec>(m, "CompileSpec")
     .def(py::init<>())
-    .def_readwrite("input_ranges",         &ExtraInfo::input_ranges)
-    .def_readwrite("op_precision",         &ExtraInfo::op_precision)
-    .def_readwrite("refit",                &ExtraInfo::refit)
-    .def_readwrite("debug",                &ExtraInfo::debug)
-    .def_readwrite("strict_types",         &ExtraInfo::strict_types)
-    .def_readwrite("allow_gpu_fallback",   &ExtraInfo::allow_gpu_fallback)
-    .def_readwrite("device",               &ExtraInfo::device)
-    .def_readwrite("capability",           &ExtraInfo::capability)
-    .def_readwrite("num_min_timing_iters", &ExtraInfo::num_min_timing_iters)
-    .def_readwrite("num_avg_timing_iters", &ExtraInfo::num_avg_timing_iters)
-    .def_readwrite("workspace_size",       &ExtraInfo::workspace_size)
-    .def_readwrite("max_batch_size",       &ExtraInfo::max_batch_size);
+    .def_readwrite("input_ranges",         &CompileSpec::input_ranges)
+    .def_readwrite("op_precision",         &CompileSpec::op_precision)
+    .def_readwrite("refit",                &CompileSpec::refit)
+    .def_readwrite("debug",                &CompileSpec::debug)
+    .def_readwrite("strict_types",         &CompileSpec::strict_types)
+    .def_readwrite("allow_gpu_fallback",   &CompileSpec::allow_gpu_fallback)
+    .def_readwrite("device",               &CompileSpec::device)
+    .def_readwrite("capability",           &CompileSpec::capability)
+    .def_readwrite("num_min_timing_iters", &CompileSpec::num_min_timing_iters)
+    .def_readwrite("num_avg_timing_iters", &CompileSpec::num_avg_timing_iters)
+    .def_readwrite("workspace_size",       &CompileSpec::workspace_size)
+    .def_readwrite("max_batch_size",       &CompileSpec::max_batch_size);
 
   m.doc() = "TRTorch Internal C Bindings: Ahead of Time compilation for PyTorch JIT. A tool to convert PyTorch JIT to TensorRT";
   m.def("compile_graph",               &trtorch::pyapi::CompileGraph, "Ingest a PyTorch JIT module and convert supported subgraphs to TensorRT engines, returns a JIT module with the engines embedded");
@@ -225,7 +227,11 @@ PYBIND11_MODULE(_C, m) {
     .value("INFO", core::util::logging::LogLevel::kINFO)
     .value("DEBUG", core::util::logging::LogLevel::kDEBUG)
     .export_values();
+
+  //TODO: Remove when we have access to PyTorch autoregistration
+  //m.def("to_tensorrt", backend::GetTensorRTBackend().generateToBackendFn());
 }
 
-} // namespace py
+
+} // namespace pyapi
 } // namespace trtorch
diff --git a/tests/accuracy/test_fp16_accuracy.cpp b/tests/accuracy/test_fp16_accuracy.cpp
index 6de40a6c31..b19c01cb38 100644
--- a/tests/accuracy/test_fp16_accuracy.cpp
+++ b/tests/accuracy/test_fp16_accuracy.cpp
@@ -27,10 +27,10 @@ TEST_P(AccuracyTests, FP16AccuracyIsClose) {
     torch::Tensor jit_accuracy = (jit_correct / jit_total) * 100;
 
     std::vector<std::vector<int64_t>> input_shape = {{32, 3, 32, 32}};
-    auto extra_info = trtorch::ExtraInfo({input_shape});
-    extra_info.op_precision = torch::kF16;
+    auto compile_spec = trtorch::CompileSpec({input_shape});
+    compile_spec.op_precision = torch::kF16;
 
-    auto trt_mod = trtorch::CompileGraph(mod, extra_info);
+    auto trt_mod = trtorch::CompileGraph(mod, compile_spec);
 
     torch::Tensor trt_correct = torch::zeros({1}, {torch::kCUDA}), trt_total = torch::zeros({1}, {torch::kCUDA});
     for (auto batch : *eval_dataloader) {
diff --git a/tests/accuracy/test_fp32_accuracy.cpp b/tests/accuracy/test_fp32_accuracy.cpp
index d3d8bddb96..11ed944077 100644
--- a/tests/accuracy/test_fp32_accuracy.cpp
+++ b/tests/accuracy/test_fp32_accuracy.cpp
@@ -27,10 +27,10 @@ TEST_P(AccuracyTests, FP16AccuracyIsClose) {
     torch::Tensor jit_accuracy = (jit_correct / jit_total) * 100;
 
     std::vector<std::vector<int64_t>> input_shape = {{32, 3, 32, 32}};
-    auto extra_info = trtorch::ExtraInfo({input_shape});
-    extra_info.op_precision = torch::kF32;
+    auto compile_spec = trtorch::CompileSpec({input_shape});
+    compile_spec.op_precision = torch::kF32;
 
-    auto trt_mod = trtorch::CompileGraph(mod, extra_info);
+    auto trt_mod = trtorch::CompileGraph(mod, compile_spec);
 
     torch::Tensor trt_correct = torch::zeros({1}, {torch::kCUDA}), trt_total = torch::zeros({1}, {torch::kCUDA});
     for (auto batch : *eval_dataloader) {
diff --git a/tests/accuracy/test_int8_accuracy.cpp b/tests/accuracy/test_int8_accuracy.cpp
index aa4824948a..db5b259657 100644
--- a/tests/accuracy/test_int8_accuracy.cpp
+++ b/tests/accuracy/test_int8_accuracy.cpp
@@ -20,15 +20,15 @@ TEST_P(AccuracyTests, FP16AccuracyIsClose) {
 
     std::vector<std::vector<int64_t>> input_shape = {{32, 3, 32, 32}};
     // Configure settings for compilation
-    auto extra_info = trtorch::ExtraInfo({input_shape});
+    auto compile_spec = trtorch::CompileSpec({input_shape});
     // Set operating precision to INT8
-    extra_info.op_precision = torch::kI8;
+    compile_spec.op_precision = torch::kI8;
     // Use the TensorRT Entropy Calibrator
-    extra_info.ptq_calibrator = calibrator;
+    compile_spec.ptq_calibrator = calibrator;
     // Set max batch size for the engine
-    extra_info.max_batch_size = 32;
+    compile_spec.max_batch_size = 32;
     // Set a larger workspace
-    extra_info.workspace_size = 1 << 28;
+    compile_spec.workspace_size = 1 << 28;
 
     mod.eval();
 
@@ -57,7 +57,7 @@ TEST_P(AccuracyTests, FP16AccuracyIsClose) {
     torch::Tensor jit_accuracy = (jit_correct / jit_total) * 100;
 
     // Compile Graph
-    auto trt_mod = trtorch::CompileGraph(mod, extra_info);
+    auto trt_mod = trtorch::CompileGraph(mod, compile_spec);
 
     // Check the INT8 accuracy in TRT
     torch::Tensor trt_correct = torch::zeros({1}, {torch::kCUDA}), trt_total = torch::zeros({1}, {torch::kCUDA});
diff --git a/tests/modules/test_serialization.cpp b/tests/modules/test_serialization.cpp
index a7fcea5558..bb6b984f4f 100644
--- a/tests/modules/test_serialization.cpp
+++ b/tests/modules/test_serialization.cpp
@@ -1,7 +1,7 @@
 #include "module_test.h"
 
-std::vector<trtorch::ExtraInfo::InputRange> toInputRangesDynamic(std::vector<std::vector<int64_t>> opts) {
-    std::vector<trtorch::ExtraInfo::InputRange> a;
+std::vector<trtorch::CompileSpec::InputRange> toInputRangesDynamic(std::vector<std::vector<int64_t>> opts) {
+    std::vector<trtorch::CompileSpec::InputRange> a;
 
     for (auto opt : opts) {
         std::vector<int64_t> min_range(opt);
@@ -12,7 +12,7 @@ std::vector<trtorch::ExtraInfo::InputRange> toInputRangesDynamic(std::vector<std
         min_range[2] = ceil(opt[2]/2.0);
         max_range[2] = 2*opt[2];
 
-        a.push_back(trtorch::ExtraInfo::InputRange(min_range, opt, max_range));
+        a.push_back(trtorch::CompileSpec::InputRange(min_range, opt, max_range));
     }
 
     return std::move(a);
diff --git a/tests/py/test_api.py b/tests/py/test_api.py
index 2f3ed2ad96..e0cd113db6 100644
--- a/tests/py/test_api.py
+++ b/tests/py/test_api.py
@@ -26,20 +26,20 @@ def setUp(self):
         self.scripted_model = torch.jit.script(self.model)
 
     def test_compile_traced(self):
-        extra_info = {
+        compile_spec = {
             "input_shapes": [self.input.shape],
         }
 
-        trt_mod = trtorch.compile(self.traced_model, extra_info)
+        trt_mod = trtorch.compile(self.traced_model, compile_spec)
         same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max()
         self.assertTrue(same < 2e-3)
 
     def test_compile_script(self):
-        extra_info = {
+        compile_spec = {
             "input_shapes": [self.input.shape],
         }
 
-        trt_mod = trtorch.compile(self.scripted_model, extra_info)
+        trt_mod = trtorch.compile(self.scripted_model, compile_spec)
         same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max()
         self.assertTrue(same < 2e-3)
 

From b24c0d894e739d267b041cb9b4a528cc09fc0521 Mon Sep 17 00:00:00 2001
From: Naren Dasan <naren@narendasan.com>
Date: Wed, 21 Oct 2020 15:18:28 -0700
Subject: [PATCH 2/6] refactor(//core/runtime): Renaming execution -> runtime

Signed-off-by: Naren Dasan <naren@narendasan.com>
Signed-off-by: Naren Dasan <narens@nvidia.com>
---
 BUILD                                             |  2 +-
 core/BUILD                                        | 15 ++++++++++++---
 core/{execution => runtime}/BUILD                 |  8 ++++----
 core/{execution => runtime}/TRTEngine.cpp         |  9 +++++----
 core/{execution => runtime}/register_trt_op.cpp   |  4 ++--
 core/{execution/execution.h => runtime/runtime.h} |  4 ++--
 docsrc/contributors/phases.rst                    |  9 ++++-----
 .../contributors/{execution.rst => runtime.rst}   |  4 ++--
 docsrc/index.rst                                  |  4 ----
 tests/util/run_graph_engine.cpp                   |  6 +++---
 10 files changed, 35 insertions(+), 30 deletions(-)
 rename core/{execution => runtime}/BUILD (84%)
 rename core/{execution => runtime}/TRTEngine.cpp (96%)
 rename core/{execution => runtime}/register_trt_op.cpp (97%)
 rename core/{execution/execution.h => runtime/runtime.h} (95%)
 rename docsrc/contributors/{execution.rst => runtime.rst} (96%)

diff --git a/BUILD b/BUILD
index d1db473c18..baa54db29a 100644
--- a/BUILD
+++ b/BUILD
@@ -19,7 +19,7 @@ pkg_tar(
         "//core/conversion/tensorcontainer:include",
         "//core/conversion/evaluators:include",
         "//core/conversion/converters/impl/plugins:include",
-        "//core/execution:include",
+        "//core/runtime:include",
         "//core/lowering:include",
         "//core/lowering/passes:include",
         "//core/util:include",
diff --git a/core/BUILD b/core/BUILD
index 8d2beae23c..6bf5a057a1 100644
--- a/core/BUILD
+++ b/core/BUILD
@@ -7,6 +7,13 @@ config_setting(
     }
 )
 
+config_setting(
+    name = "python_core",
+    values = {
+        "define": "target_lang=python"
+    }
+)
+
 cc_library(
     name = "core",
     hdrs = [
@@ -17,7 +24,7 @@ cc_library(
     ],
     deps = [
         "//core/conversion",
-        "//core/execution",
+        "//core/runtime",
         "//core/lowering",
         "//core/util/logging",
         "@tensorrt//:nvinfer"
@@ -28,11 +35,13 @@ cc_library(
     alwayslink=True,
 )
 
-
 load("@rules_pkg//:pkg.bzl", "pkg_tar")
 
 pkg_tar(
     name = "include",
     package_dir = "core/",
-    srcs = ["compiler.h"],
+    srcs = [
+        "backend.h",
+        "compiler.h",
+    ],
 )
diff --git a/core/execution/BUILD b/core/runtime/BUILD
similarity index 84%
rename from core/execution/BUILD
rename to core/runtime/BUILD
index 1741249624..3e7e6d8a57 100644
--- a/core/execution/BUILD
+++ b/core/runtime/BUILD
@@ -8,9 +8,9 @@ config_setting(
 )
 
 cc_library(
-    name = "execution",
+    name = "runtime",
     hdrs = [
-        "execution.h",
+        "runtime.h",
     ],
     srcs = [
         "TRTEngine.cpp",
@@ -30,6 +30,6 @@ load("@rules_pkg//:pkg.bzl", "pkg_tar")
 
 pkg_tar(
     name = "include",
-    package_dir = "core/execution/",
-    srcs = ["execution.h"],
+    package_dir = "core/runtime/",
+    srcs = ["runtime.h"],
 )
diff --git a/core/execution/TRTEngine.cpp b/core/runtime/TRTEngine.cpp
similarity index 96%
rename from core/execution/TRTEngine.cpp
rename to core/runtime/TRTEngine.cpp
index ca4b5f10e6..f149d0a638 100644
--- a/core/execution/TRTEngine.cpp
+++ b/core/runtime/TRTEngine.cpp
@@ -4,11 +4,11 @@
 #include "torch/csrc/jit/frontend/function_schema_parser.h"
 
 #include "core/util/prelude.h"
-#include "core/execution/execution.h"
+#include "core/runtime/runtime.h"
 
 namespace trtorch {
 namespace core {
-namespace execution {
+namespace runtime {
 
 std::string slugify(std::string s) {
     std::replace(s.begin(), s.end(), '.', '_');
@@ -81,6 +81,7 @@ TRTEngine::~TRTEngine() {
 //     return c10::List<at::Tensor>(output_vec);
 // }
 
+namespace {
 static auto TRTORCH_UNUSED TRTEngineTSRegistrtion = torch::class_<TRTEngine>("tensorrt", "Engine")
     .def(torch::init<std::string>())
     // TODO: .def("__call__", &TRTEngine::Run)
@@ -94,7 +95,7 @@ static auto TRTORCH_UNUSED TRTEngineTSRegistrtion = torch::class_<TRTEngine>("te
             return c10::make_intrusive<TRTEngine>(std::move(seralized_engine));
         }
     );
-
-} // namespace execution
+} // namespace
+} // namespace runtime
 } // namespace core
 } // namespace trtorch
diff --git a/core/execution/register_trt_op.cpp b/core/runtime/register_trt_op.cpp
similarity index 97%
rename from core/execution/register_trt_op.cpp
rename to core/runtime/register_trt_op.cpp
index f16f106350..75d34c701e 100644
--- a/core/execution/register_trt_op.cpp
+++ b/core/runtime/register_trt_op.cpp
@@ -4,11 +4,11 @@
 #include "torch/csrc/jit/runtime/custom_operator.h"
 
 #include "core/util/prelude.h"
-#include "core/execution/execution.h"
+#include "core/runtime/runtime.h"
 
 namespace trtorch {
 namespace core {
-namespace execution {
+namespace runtime {
 
 std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intrusive_ptr<TRTEngine> compiled_engine) {
     LOG_DEBUG("Attempting to run engine (ID: " << compiled_engine->name << ")");
diff --git a/core/execution/execution.h b/core/runtime/runtime.h
similarity index 95%
rename from core/execution/execution.h
rename to core/runtime/runtime.h
index 3f61160e06..ef7670412d 100644
--- a/core/execution/execution.h
+++ b/core/runtime/runtime.h
@@ -8,7 +8,7 @@
 
 namespace trtorch {
 namespace core {
-namespace execution {
+namespace runtime {
 
 using EngineID = int64_t;
 
@@ -35,6 +35,6 @@ struct TRTEngine : torch::CustomClassHolder {
 
 std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intrusive_ptr<TRTEngine> compiled_engine);
 
-} // namespace execution
+} // namespace runtime
 } // namespace core
 } // namespace trtorch
diff --git a/docsrc/contributors/phases.rst b/docsrc/contributors/phases.rst
index c78f50efe2..09f7f7d690 100644
--- a/docsrc/contributors/phases.rst
+++ b/docsrc/contributors/phases.rst
@@ -27,12 +27,11 @@ The conversion phase is made up of three main components, a context to manage co
 a evaluator library which will execute operations that can be resolved at compile time and a converter
 library which maps an op from JIT to TensorRT.
 
-Execution
-^^^^^^^^^^^
+Compilation and Runtime
+^^^^^^^^^^^^^^^^^^^^^^^^
 :ref:`execution`
 
-The execution phase constructs a TorchScript program to run the converted TensorRT engine. It
+The final compilation phase constructs a TorchScript program to run the converted TensorRT engine. It
 takes a serialized engine and instantiates it within a engine manager, then the compiler will
 build out a JIT graph that references this engine and wraps it in a module to return to the user.
-When the user executes the module, the JIT program will look up the engine and pass the inputs
-to it, then return the results.
\ No newline at end of file
+When the user executes the module, the JIT program run in the JIT runtime extended by TRTorch with the data providied from the user.
\ No newline at end of file
diff --git a/docsrc/contributors/execution.rst b/docsrc/contributors/runtime.rst
similarity index 96%
rename from docsrc/contributors/execution.rst
rename to docsrc/contributors/runtime.rst
index 0e08650fb8..ba64a9066d 100644
--- a/docsrc/contributors/execution.rst
+++ b/docsrc/contributors/runtime.rst
@@ -1,9 +1,9 @@
 .. _execution:
 
-Execution Phase
+Runtime Phase
 ================
 
-The execution phase is responsible for constructing self standing TorchScript graphs with embedded TensorRT engines and serving as the runtime
+The Runtime phase is responsible for constructing self standing TorchScript graphs with embedded TensorRT engines and serving as the runtime
 when these engines are called. The main interface accepts a serialized TensorRT engine. The execution phase
 will deserialize and wrap this engine in a class which maintains a execution context for each engine
 and some metadata about its inputs and outputs and is compatable with the TorchScript interpreter so that
diff --git a/docsrc/index.rst b/docsrc/index.rst
index f65a6a62be..2db21d6e2d 100644
--- a/docsrc/index.rst
+++ b/docsrc/index.rst
@@ -38,10 +38,6 @@ Getting Started
    tutorials/trtorchc
    _notebooks/lenet
 
-Notebooks
-------------
-* :ref:`lenet`
-
 .. toctree::
    :caption: Notebooks
    :maxdepth: 1
diff --git a/tests/util/run_graph_engine.cpp b/tests/util/run_graph_engine.cpp
index 49259bfd51..c052812e3c 100644
--- a/tests/util/run_graph_engine.cpp
+++ b/tests/util/run_graph_engine.cpp
@@ -5,7 +5,7 @@
 #include "torch/csrc/jit/ir/irparser.h"
 #include "torch/custom_class.h"
 #include "core/conversion/conversion.h"
-#include "core/execution/execution.h"
+#include "core/runtime/runtime.h"
 #include "cuda_runtime_api.h"
 
 #include <vector>
@@ -43,8 +43,8 @@ std::vector<core::conversion::InputRange> toInputRangesDynamic(std::vector<at::T
 
 std::vector<at::Tensor> RunEngine(std::string& eng, std::vector<at::Tensor> inputs) {
     LOG_DEBUG("Running TRT version");
-    auto engine_ptr = c10::make_intrusive<trtorch::core::execution::TRTEngine>("test_engine", eng);
-    auto outputs = trtorch::core::execution::execute_engine(inputs, engine_ptr);
+    auto engine_ptr = c10::make_intrusive<trtorch::core::runtime::TRTEngine>("test_engine", eng);
+    auto outputs = trtorch::core::runtime::execute_engine(inputs, engine_ptr);
     return outputs;
 }
 

From 59113cfa52a46a5ea0d889bc88e467d0d3350f71 Mon Sep 17 00:00:00 2001
From: Naren Dasan <naren@narendasan.com>
Date: Wed, 21 Oct 2020 15:22:54 -0700
Subject: [PATCH 3/6] feat(//py): Initial compiliant implementation of the
 to_backend api for PyTorch

Users can now use a direct PyTorch integration by just importing the
trtorch package. The only difference between torch._C._jit_to_tensorrt
and trtorch.compile is that you need to use the
trtorch.TensorRTCompileSpec constructor to build a wrapper around your
spec dictionary

Signed-off-by: Naren Dasan <naren@narendasan.com>
Signed-off-by: Naren Dasan <narens@nvidia.com>
---
 py/setup.py                                   |   8 +-
 py/trtorch/__init__.py                        |   1 +
 py/trtorch/_compile_spec.py                   |  93 ++++++++++--
 py/trtorch/_compiler.py                       |   4 +-
 py/trtorch/csrc/register_tensorrt_classes.cpp |  47 ++++++
 py/trtorch/csrc/tensorrt_backend.cpp          |  86 +++++++++++
 py/trtorch/csrc/tensorrt_backend.h            |  19 +++
 py/trtorch/csrc/tensorrt_classes.cpp          | 143 ++++++++++++++++++
 py/trtorch/csrc/tensorrt_classes.h            | 101 +++++++++++++
 py/trtorch/csrc/trtorch_py.cpp                | 106 +------------
 tests/BUILD                                   |   3 +-
 tests/py/BUILD                                |  16 +-
 tests/py/model_test_case.py                   |  19 +++
 tests/py/test_api.py                          |  16 +-
 tests/py/test_to_backend_api.py               |  44 ++++++
 15 files changed, 573 insertions(+), 133 deletions(-)
 create mode 100644 py/trtorch/csrc/register_tensorrt_classes.cpp
 create mode 100644 py/trtorch/csrc/tensorrt_backend.cpp
 create mode 100644 py/trtorch/csrc/tensorrt_backend.h
 create mode 100644 py/trtorch/csrc/tensorrt_classes.cpp
 create mode 100644 py/trtorch/csrc/tensorrt_classes.h
 create mode 100644 tests/py/model_test_case.py
 create mode 100644 tests/py/test_to_backend_api.py

diff --git a/py/setup.py b/py/setup.py
index 53f85dada1..01dfdfdfb7 100644
--- a/py/setup.py
+++ b/py/setup.py
@@ -156,7 +156,12 @@ def run(self):
 
 ext_modules = [
     cpp_extension.CUDAExtension('trtorch._C',
-                                ['trtorch/csrc/trtorch_py.cpp'],
+                                [
+                                    'trtorch/csrc/trtorch_py.cpp',
+                                    'trtorch/csrc/tensorrt_backend.cpp',
+                                    'trtorch/csrc/tensorrt_classes.cpp',
+                                    'trtorch/csrc/register_tensorrt_classes.cpp',
+                                ],
                                 library_dirs=[
                                     (dir_path + '/trtorch/lib/'),
                                     "/opt/conda/lib/python3.6/config-3.6m-x86_64-linux-gnu"
@@ -165,6 +170,7 @@ def run(self):
                                     "trtorch"
                                 ],
                                 include_dirs=[
+                                    dir_path + "trtorch/csrc",
                                     dir_path + "/../",
                                     dir_path + "/../bazel-TRTorch/external/tensorrt/include",
                                 ],
diff --git a/py/trtorch/__init__.py b/py/trtorch/__init__.py
index 88e1ca6db9..772b6ff08f 100644
--- a/py/trtorch/__init__.py
+++ b/py/trtorch/__init__.py
@@ -9,6 +9,7 @@
 
 from trtorch._version import __version__
 from trtorch._compiler import *
+from trtorch._compile_spec import TensorRTCompileSpec
 from trtorch._types import *
 from trtorch import logging
 
diff --git a/py/trtorch/_compile_spec.py b/py/trtorch/_compile_spec.py
index aa060bd085..6f0ff49d4a 100644
--- a/py/trtorch/_compile_spec.py
+++ b/py/trtorch/_compile_spec.py
@@ -73,16 +73,21 @@ def _parse_op_precision(precision: Any) -> _types.dtype:
 
 def _parse_device_type(device: Any) -> _types.DeviceType:
     if isinstance(device, torch.device):
-        if torch.device.type == 'cuda':
+        if device.type == 'cuda':
             return _types.DeviceType.gpu
         else:
-            raise TypeError("Valid device choices are GPU (and DLA if on Jetson platforms) however got device type" + str(device.type))
-
+            ValueError("Got a device type other than GPU or DLA (type: " + str(device.type) + ")")
     elif isinstance(device, _types.DeviceType):
         return device
-
+    elif isinstance(device, str):
+        if device == "gpu" or device == "GPU":
+            return _types.DeviceType.gpu
+        elif device == "dla" or device == "DLA":
+            return _types.DeviceType.dla
+        else:
+            ValueError("Got a device type other than GPU or DLA (type: " + str(device) + ")")
     else:
-        raise TypeError("Device specification must be of type torch.device or trtorch.DeviceType, but got: " + str(type(device)))
+        raise TypeError("Device specification must be of type torch.device, string or trtorch.DeviceType, but got: " + str(type(device)))
 
 def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec:
     info = trtorch._C.CompileSpec()
@@ -110,11 +115,11 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec:
         assert isinstance(compile_spec["allow_gpu_fallback"], bool)
         info.allow_gpu_fallback = compile_spec["allow_gpu_fallback"]
 
-    if "device" in compile_spec:
-        info.device = _parse_device_type(compile_spec["device"])
+    if "device_type" in compile_spec:
+        info.device = _parse_device_type(compile_spec["device_type"])
 
     if "capability" in compile_spec:
-        assert isinstance(compile_spec["capability"], type.EngineCapability)
+        assert isinstance(compile_spec["capability"], _types.EngineCapability)
         info.capability = compile_spec["capability"]
 
     if "num_min_timing_iters" in compile_spec:
@@ -133,4 +138,74 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec:
         assert type(compile_spec["max_batch_size"]) is int
         info.max_batch_size = compile_spec["max_batch_size"]
 
-    return info
\ No newline at end of file
+    return info
+
+def TensorRTCompileSpec(compile_spec: Dict[str, Any]):
+    """
+    Utility to create a formated spec dictionary for using the PyTorch TensorRT backend
+
+    Args:
+        compile_spec (dict): Compilation settings including operating precision, target device, etc.
+            One key is required which is ``input_shapes``, describing the input sizes or ranges for inputs
+            to the graph. All other keys are optional. Entries for each method to be compiled.
+
+            .. code-block:: py
+
+                CompileSpec = {
+                    "forward" : trtorch.TensorRTCompileSpec({
+                        "input_shapes": [
+                            (1, 3, 224, 224), # Static input shape for input #1
+                            {
+                                "min": (1, 3, 224, 224),
+                                "opt": (1, 3, 512, 512),
+                                "max": (1, 3, 1024, 1024)
+                            } # Dynamic input shape for input #2
+                        ],
+                        "op_precision": torch.half, # Operating precision set to FP16
+                        "refit": false, # enable refit
+                        "debug": false, # enable debuggable engine
+                        "strict_types": false, # kernels should strictly run in operating precision
+                        "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU
+                        "device": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA)
+                        "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels
+                        "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels
+                        "num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels
+                        "workspace_size": 0, # Maximum size of workspace given to TensorRT
+                        "max_batch_size": 0, # Maximum batch size (must be >= 1 to be set, 0 means not set)
+                    })
+                }
+
+            Input Sizes can be specified as torch sizes, tuples or lists. Op precisions can be specified using
+            torch datatypes or trtorch datatypes and you can use either torch devices or the trtorch device type enum
+            to select device type.
+
+    Returns:
+        torch.classes.tensorrt.CompileSpec: List of methods and formated spec objects to be provided to ``torch._C._jit_to_tensorrt``
+    """
+
+    parsed_spec = _parse_compile_spec(compile_spec)
+
+    backend_spec = torch.classes.tensorrt.CompileSpec()
+
+    for i in parsed_spec.input_ranges:
+        ir = torch.classes.tensorrt.InputRange()
+        ir.set_min(i.min)
+        ir.set_opt(i.opt)
+        ir.set_max(i.max)
+        backend_spec.append_input_range(ir)
+
+    backend_spec.set_op_precision(int(parsed_spec.op_precision))
+    backend_spec.set_refit(parsed_spec.refit)
+    backend_spec.set_debug(parsed_spec.debug)
+    backend_spec.set_refit(parsed_spec.refit)
+    backend_spec.set_strict_types(parsed_spec.strict_types)
+    backend_spec.set_allow_gpu_fallback(parsed_spec.allow_gpu_fallback)
+    backend_spec.set_device(int(parsed_spec.device))
+    backend_spec.set_capability(int(parsed_spec.capability))
+    backend_spec.set_num_min_timing_iters(parsed_spec.num_min_timing_iters)
+    backend_spec.set_num_avg_timing_iters(parsed_spec.num_avg_timing_iters)
+    backend_spec.set_workspace_size(parsed_spec.workspace_size)
+    backend_spec.set_max_batch_size(parsed_spec.max_batch_size)
+
+    return backend_spec
+
diff --git a/py/trtorch/_compiler.py b/py/trtorch/_compiler.py
index 1c35dbe4a1..443db12a7b 100644
--- a/py/trtorch/_compiler.py
+++ b/py/trtorch/_compiler.py
@@ -39,7 +39,7 @@ def compile(module: torch.jit.ScriptModule, compile_spec: Any) -> torch.jit.Scri
                     "debug": false, # enable debuggable engine
                     "strict_types": false, # kernels should strictly run in operating precision
                     "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU
-                    "device": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA)
+                    "device_type": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA)
                     "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels
                     "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels
                     "num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels
@@ -91,7 +91,7 @@ def convert_method_to_trt_engine(module: torch.jit.ScriptModule, method_name: st
                     "debug": false, # enable debuggable engine
                     "strict_types": false, # kernels should strictly run in operating precision
                     "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU
-                    "device": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA)
+                    "device_type": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA)
                     "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels
                     "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels
                     "num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels
diff --git a/py/trtorch/csrc/register_tensorrt_classes.cpp b/py/trtorch/csrc/register_tensorrt_classes.cpp
new file mode 100644
index 0000000000..7d66ca6580
--- /dev/null
+++ b/py/trtorch/csrc/register_tensorrt_classes.cpp
@@ -0,0 +1,47 @@
+#include "tensorrt_classes.h"
+
+namespace trtorch {
+namespace backend {
+namespace {
+  void RegisterTRTCompileSpec() {
+    #define ADD_FIELD_GET_SET_REGISTRATION(registry, class_name, field_name) \
+      (registry).def("set_"#field_name, &class_name::set_##field_name);      \
+      (registry).def("get_"#field_name, &class_name::get_##field_name);
+
+    static auto TRTORCH_UNUSED TRTInputRangeTSRegistrtion = torch::class_<trtorch::pyapi::InputRange>("tensorrt", "InputRange")
+      .def(torch::init<>());
+
+    ADD_FIELD_GET_SET_REGISTRATION(TRTInputRangeTSRegistrtion, trtorch::pyapi::InputRange, min);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTInputRangeTSRegistrtion, trtorch::pyapi::InputRange, opt);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTInputRangeTSRegistrtion, trtorch::pyapi::InputRange, max);
+
+    static auto TRTORCH_UNUSED TRTCompileSpecTSRegistrtion = torch::class_<trtorch::pyapi::CompileSpec>("tensorrt", "CompileSpec")
+      .def(torch::init<>())
+      .def("append_input_range", &trtorch::pyapi::CompileSpec::appendInputRange)
+      .def("__str__", &trtorch::pyapi::CompileSpec::stringify);
+
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, op_precision);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, refit);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, debug);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, strict_types);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, allow_gpu_fallback);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, device);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, capability);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, num_min_timing_iters);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, num_avg_timing_iters);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, workspace_size);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, max_batch_size);
+  }
+
+struct TRTTSRegistrations {
+  TRTTSRegistrations() {
+    RegisterTRTCompileSpec();
+  }
+};
+
+static TRTTSRegistrations register_trt_classes = TRTTSRegistrations();
+}
+} // namespace backend
+} // namespace trtorch
+
+
diff --git a/py/trtorch/csrc/tensorrt_backend.cpp b/py/trtorch/csrc/tensorrt_backend.cpp
new file mode 100644
index 0000000000..1d679450c6
--- /dev/null
+++ b/py/trtorch/csrc/tensorrt_backend.cpp
@@ -0,0 +1,86 @@
+#include "torch/csrc/jit/passes/lower_graph.h"
+
+#include "tensorrt_backend.h"
+#include "tensorrt_classes.h"
+
+#include "core/compiler.h"
+#include "core/lowering/lowering.h"
+#include "core/runtime/runtime.h"
+
+namespace trtorch {
+namespace backend {
+
+c10::IValue TensorRTBackend::preprocess(c10::IValue mod, c10::impl::GenericDict method_compile_spec) {
+  auto mod_ = mod.toModule();
+  LOG_DEBUG("Placing module in eval mode if not already");
+  mod_.eval();
+  mod_ = core::lowering::LowerModule(mod_);
+
+  auto spec =
+      c10::impl::toTypedDict<std::string, at::IValue>(method_compile_spec);
+
+  for (auto it = spec.begin(), end = spec.end(); it != end; ++it) {
+    TRTORCH_CHECK(core::CheckMethodOperatorSupport(mod.toModule(), it->key()),
+        "Method " << it->key() << "cannot be compiled by TRTorch");
+  }
+
+  for (auto it = spec.begin(), end = spec.end(); it != end; ++it) {
+    const auto& method_name = it->key();
+    auto method = mod_.get_method(method_name);
+    auto graph = method.graph();
+    core::lowering::LowerGraph(graph);
+  }
+
+  return mod_._ivalue();
+}
+
+c10::impl::GenericDict TensorRTBackend::compile(c10::IValue processed_mod, c10::impl::GenericDict method_compile_spec) {
+  auto mod = processed_mod.toModule();
+  auto spec =
+      c10::impl::toTypedDict<std::string, at::IValue>(method_compile_spec);
+
+  auto handles = c10::impl::GenericDict(c10::StringType::get(), c10::getCustomClassType<c10::intrusive_ptr<core::runtime::TRTEngine>>());
+
+  for (auto it = spec.begin(), end = spec.end(); it != end; ++it) {
+    const auto& method_name = it->key();
+    auto method = mod.get_method(method_name);
+    auto g = method.graph();
+
+    auto raw_spec = it->value().toGenericDict().at(it->key()).toCustomClass<trtorch::pyapi::CompileSpec>();
+    LOG_DEBUG(raw_spec->stringify());
+    auto cfg = raw_spec->toInternalCompileSpec();
+    auto convert_cfg = std::move(cfg.convert_info);
+    auto graph_and_ivalues = torch::jit::LowerGraph(*g, mod._ivalue());
+
+    g = graph_and_ivalues.first;
+    auto params = graph_and_ivalues.second;
+    auto named_params = core::conversion::get_named_params(g->inputs(), params);
+
+    auto serialized_engine = core::conversion::ConvertBlockToEngine(g->block(), convert_cfg, named_params);
+    auto engine_handle = c10::make_intrusive<core::runtime::TRTEngine>(it->key(), serialized_engine);
+    handles.insert(method.name(), at::IValue(engine_handle));
+  }
+
+  return c10::impl::toGenericDict(handles);
+}
+
+
+c10::impl::GenericList TensorRTBackend::execute(c10::IValue handle, c10::impl::GenericList inputs) {
+  TRTORCH_ASSERT(inputs.size() > 0, "Trying to execute on empty list of arguments");
+  auto engine = handle.toCustomClass<core::runtime::TRTEngine>();
+  std::vector<at::Tensor> in_vec;
+  for (size_t i = 0, e = inputs.size(); i < e; ++i) {
+    c10::IValue val = inputs[i];
+    TRTORCH_CHECK(val.isTensor(), "TensorRT currently only accepts Tensors as inputs");
+    in_vec.push_back(val.toTensor());
+  }
+  auto outputs = core::runtime::execute_engine(in_vec, engine);
+  return c10::impl::toList(c10::List<at::Tensor>(outputs));
+}
+
+namespace {
+static auto reg = torch::jit::backend<TensorRTBackend>("tensorrt");
+}
+
+} // namespace backend
+} // namespace trtorch
\ No newline at end of file
diff --git a/py/trtorch/csrc/tensorrt_backend.h b/py/trtorch/csrc/tensorrt_backend.h
new file mode 100644
index 0000000000..6150604b3e
--- /dev/null
+++ b/py/trtorch/csrc/tensorrt_backend.h
@@ -0,0 +1,19 @@
+#pragma once
+#include "torch/csrc/jit/api/module.h"
+#include "torch/csrc/jit/backends/backend.h"
+
+namespace trtorch {
+namespace backend {
+
+class TensorRTBackend: public torch::jit::PyTorchBackendInterface {
+  public:
+    explicit TensorRTBackend() {}
+    virtual ~TensorRTBackend() = default;
+
+  c10::IValue preprocess(c10::IValue mod, c10::impl::GenericDict method_compile_spec) override;
+  c10::impl::GenericDict compile(c10::IValue processed_mod, c10::impl::GenericDict method_compile_spec) override;
+  c10::impl::GenericList execute(c10::IValue handle, c10::impl::GenericList inputs) override;
+};
+
+} // namespace backend
+} // namespace trtorch
\ No newline at end of file
diff --git a/py/trtorch/csrc/tensorrt_classes.cpp b/py/trtorch/csrc/tensorrt_classes.cpp
new file mode 100644
index 0000000000..43e63d553b
--- /dev/null
+++ b/py/trtorch/csrc/tensorrt_classes.cpp
@@ -0,0 +1,143 @@
+
+#include "tensorrt_classes.h"
+
+namespace trtorch {
+namespace pyapi {
+
+std::string to_str(InputRange& value) {
+    auto vec_to_str = [](std::vector<int64_t> shape) -> std::string {
+        std::stringstream ss;
+        ss << '[';
+        for(auto i : shape) {
+            ss << i << ',';
+        }
+        ss << ']';
+        return ss.str();
+    };
+
+    std::stringstream ss;
+    ss << "        {" << std::endl;
+    ss << "            min: " << vec_to_str(value.min) << ',' << std::endl;
+    ss << "            opt: " << vec_to_str(value.opt) << ',' << std::endl;
+    ss << "            max: " << vec_to_str(value.max) << ',' << std::endl;
+    ss << "        }" << std::endl;
+    return ss.str();
+}
+
+std::string to_str(DataType value) {
+  switch (value) {
+    case DataType::kHalf:
+      return "Half";
+    case DataType::kChar:
+      return "Int8";
+    case DataType::kFloat:
+    default:
+      return "Float";
+  }
+}
+
+nvinfer1::DataType toTRTDataType(DataType value) {
+  switch (value) {
+  case DataType::kChar:
+    return nvinfer1::DataType::kINT8;
+  case DataType::kHalf:
+    return nvinfer1::DataType::kHALF;
+  case DataType::kFloat:
+  default:
+    return nvinfer1::DataType::kFLOAT;
+  }
+}
+
+std::string to_str(DeviceType value) {
+  switch (value) {
+    case DeviceType::kDLA:
+      return "DLA";
+    case DeviceType::kGPU:
+    default:
+      return "GPU";
+  }
+}
+
+nvinfer1::DeviceType toTRTDeviceType(DeviceType value) {
+  switch (value) {
+  case DeviceType::kDLA:
+    return nvinfer1::DeviceType::kDLA;
+  case DeviceType::kGPU:
+  default:
+    return nvinfer1::DeviceType::kGPU;
+  }
+}
+
+std::string to_str(EngineCapability value) {
+  switch (value) {
+    case EngineCapability::kSAFE_GPU:
+      return "Safe GPU";
+    case EngineCapability::kSAFE_DLA:
+      return "Safe DLA";
+    case EngineCapability::kDEFAULT:
+    default:
+      return "Default";
+  }
+}
+
+nvinfer1::EngineCapability toTRTEngineCapability(EngineCapability value) {
+  switch (value) {
+  case EngineCapability::kSAFE_DLA:
+    return nvinfer1::EngineCapability::kSAFE_DLA;
+  case EngineCapability::kSAFE_GPU:
+    return nvinfer1::EngineCapability::kSAFE_GPU;
+  case EngineCapability::kDEFAULT:
+  default:
+    return nvinfer1::EngineCapability::kDEFAULT;
+  }
+}
+
+core::CompileSpec CompileSpec::toInternalCompileSpec() {
+    std::vector<core::conversion::InputRange> internal_input_ranges;
+    for (auto i : input_ranges) {
+        internal_input_ranges.push_back(i.toInternalInputRange());
+    }
+    auto info = core::CompileSpec(internal_input_ranges);
+    info.convert_info.engine_settings.op_precision = toTRTDataType(op_precision);
+    info.convert_info.engine_settings.refit = refit;
+    info.convert_info.engine_settings.debug = debug;
+    info.convert_info.engine_settings.strict_types = strict_types;
+    info.convert_info.engine_settings.allow_gpu_fallback = allow_gpu_fallback;
+    info.convert_info.engine_settings.device = toTRTDeviceType(device);
+    info.convert_info.engine_settings.capability = toTRTEngineCapability(capability);
+    TRTORCH_CHECK(num_min_timing_iters >= 0,  "num_min_timing_iters must be 0 or greater");
+    info.convert_info.engine_settings.num_min_timing_iters = num_min_timing_iters;
+    TRTORCH_CHECK(num_avg_timing_iters >= 0, "num_avg_timing_iters must be 0 or greater");
+    info.convert_info.engine_settings.num_avg_timing_iters = num_avg_timing_iters;
+    TRTORCH_CHECK(workspace_size >= 0, "workspace_size must be 0 or greater");
+    info.convert_info.engine_settings.workspace_size = workspace_size;
+    TRTORCH_CHECK(max_batch_size >= 0, "max_batch_size must be 0 or greater");
+    info.convert_info.engine_settings.max_batch_size = max_batch_size;
+    return info;
+}
+
+std::string CompileSpec::stringify() {
+    std::stringstream ss;
+    ss << "TensorRT Compile Spec: {" << std::endl;
+    ss << "     \"Input Shapes\": [" << std::endl;
+    for (auto i : input_ranges) {
+    ss << to_str(i);
+    }
+    ss << "     ]" << std::endl;
+    ss << "     \"Op Precision\": " << to_str(op_precision) << std::endl;
+    ss << "     \"Refit\": " << refit << std::endl;
+    ss << "     \"Debug\": " << debug << std::endl;
+    ss << "     \"Strict Types\": " << strict_types << std::endl;
+    ss << "     \"Allow GPU Fallback\": " << allow_gpu_fallback << std::endl;
+    ss << "     \"Device\": " << to_str(capability) << std::endl;
+    ss << "     \"Engine Capability\": " << to_str(capability) << std::endl;
+    ss << "     \"Num Min Timing Iters\": " << num_min_timing_iters << std::endl;
+    ss << "     \"Num Avg Timing Iters\": " << num_avg_timing_iters << std::endl;
+    ss << "     \"Workspace Size\": " << workspace_size << std::endl;
+    ss << "     \"Max Batch Size\": " << max_batch_size << std::endl;
+    ss << "}";
+    return ss.str();
+}
+
+} // namespace pyapi
+} // namespace trtorch
\ No newline at end of file
diff --git a/py/trtorch/csrc/tensorrt_classes.h b/py/trtorch/csrc/tensorrt_classes.h
new file mode 100644
index 0000000000..e98a093358
--- /dev/null
+++ b/py/trtorch/csrc/tensorrt_classes.h
@@ -0,0 +1,101 @@
+#pragma once
+
+#include "core/compiler.h"
+#include "core/conversion/conversion.h"
+#include "torch/torch.h"
+#include "torch/script.h"
+#include "torch/custom_class.h"
+
+namespace trtorch {
+namespace pyapi {
+
+#define ADD_FIELD_GET_SET(field_name, type)             \
+  void set_##field_name(type val) {field_name = val;}   \
+  type get_##field_name() {return field_name;}
+
+struct InputRange : torch::CustomClassHolder {
+  std::vector<int64_t> min;
+  std::vector<int64_t> opt;
+  std::vector<int64_t> max;
+
+  core::conversion::InputRange toInternalInputRange() {
+    return core::conversion::InputRange(min, opt, max);
+  }
+
+  ADD_FIELD_GET_SET(min, std::vector<int64_t>);
+  ADD_FIELD_GET_SET(opt, std::vector<int64_t>);
+  ADD_FIELD_GET_SET(max, std::vector<int64_t>);
+};
+
+std::string to_str(InputRange& value);
+
+
+enum class DataType : int8_t {
+  kFloat,
+  kHalf,
+  kChar,
+};
+
+std::string to_str(DataType value);
+nvinfer1::DataType toTRTDataType(DataType value);
+
+enum DeviceType : int8_t {
+  kGPU,
+  kDLA,
+};
+
+std::string to_str(DeviceType value);
+nvinfer1::DeviceType toTRTDeviceType(DeviceType value);
+
+enum class EngineCapability : int8_t {
+    kDEFAULT,
+    kSAFE_GPU,
+    kSAFE_DLA,
+};
+
+std::string to_str(EngineCapability value);
+nvinfer1::EngineCapability toTRTEngineCapability(EngineCapability value);
+
+// TODO: Make this error message more informative
+#define ADD_ENUM_GET_SET(field_name, type, max_val)                \
+  void set_##field_name(int64_t val) {                             \
+    TRTORCH_CHECK(val < max_val, "Invalid enum value for field");  \
+    field_name = static_cast<type>(val);                           \
+  }                                                                \
+  int64_t get_##field_name() {return static_cast<int64_t>(field_name);}
+
+struct CompileSpec : torch::CustomClassHolder {
+  core::CompileSpec toInternalCompileSpec();
+  std::string stringify();
+  void appendInputRange(const c10::intrusive_ptr<InputRange>& ir) {
+    input_ranges.push_back(*ir);
+  }
+
+  ADD_ENUM_GET_SET(op_precision, DataType, 3);
+  ADD_FIELD_GET_SET(refit, bool);
+  ADD_FIELD_GET_SET(debug, bool);
+  ADD_FIELD_GET_SET(strict_types, bool);
+  ADD_FIELD_GET_SET(allow_gpu_fallback, bool);
+  ADD_ENUM_GET_SET(device, DeviceType, 2);
+  ADD_ENUM_GET_SET(capability, EngineCapability, 3);
+  ADD_FIELD_GET_SET(num_min_timing_iters, int64_t);
+  ADD_FIELD_GET_SET(num_avg_timing_iters, int64_t);
+  ADD_FIELD_GET_SET(workspace_size, int64_t);
+  ADD_FIELD_GET_SET(max_batch_size, int64_t);
+
+  std::vector<InputRange> input_ranges;
+  DataType op_precision = DataType::kFloat;
+  bool refit = false;
+  bool debug = false;
+  bool strict_types = false;
+  bool allow_gpu_fallback = true;
+  DeviceType device = DeviceType::kGPU;
+  EngineCapability capability = EngineCapability::kDEFAULT;
+  int64_t num_min_timing_iters = 2;
+  int64_t num_avg_timing_iters = 1;
+  int64_t workspace_size = 0;
+  int64_t max_batch_size = 0;
+};
+
+} // namespace pyapi
+} // namespace trtorch
\ No newline at end of file
diff --git a/py/trtorch/csrc/trtorch_py.cpp b/py/trtorch/csrc/trtorch_py.cpp
index da6d2b2688..4f9363542d 100644
--- a/py/trtorch/csrc/trtorch_py.cpp
+++ b/py/trtorch/csrc/trtorch_py.cpp
@@ -1,11 +1,12 @@
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
-//TODO: Remove when we have access to PyTorch to_backend autoregistration
-#include "core/backend.h"
+
+#include "tensorrt_classes.h"
 #include "core/compiler.h"
 #include "core/conversion/conversion.h"
 #include "torch/torch.h"
 #include "torch/script.h"
+#include "torch/custom_class.h"
 #include "torch/csrc/jit/python/pybind_utils.h"
 #include "Python.h"
 
@@ -14,103 +15,6 @@ namespace py = pybind11;
 namespace trtorch {
 namespace pyapi {
 
-struct InputRange {
-  std::vector<int64_t> min;
-  std::vector<int64_t> opt;
-  std::vector<int64_t> max;
-
-  core::conversion::InputRange toInternalInputRange() {
-    return core::conversion::InputRange(min, opt, max);
-  }
-};
-
-enum class DataType : int8_t {
-  kFloat,
-  kHalf,
-  kChar,
-};
-
-nvinfer1::DataType toTRTDataType(DataType value) {
-  switch (value) {
-  case DataType::kChar:
-    return nvinfer1::DataType::kINT8;
-  case DataType::kHalf:
-    return nvinfer1::DataType::kHALF;
-  case DataType::kFloat:
-  default:
-    return nvinfer1::DataType::kFLOAT;
-  }
-}
-
-enum DeviceType : int8_t {
-  kGPU,
-  kDLA,
-};
-
-nvinfer1::DeviceType toTRTDeviceType(DeviceType value) {
-  switch (value) {
-  case DeviceType::kDLA:
-    return nvinfer1::DeviceType::kDLA;
-  case DeviceType::kGPU:
-  default:
-    return nvinfer1::DeviceType::kGPU;
-  }
-}
-
-enum class EngineCapability : int8_t {
-    kDEFAULT,
-    kSAFE_GPU,
-    kSAFE_DLA,
-};
-
-nvinfer1::EngineCapability toTRTEngineCapability(EngineCapability value) {
-  switch (value) {
-  case EngineCapability::kSAFE_DLA:
-    return nvinfer1::EngineCapability::kSAFE_DLA;
-  case EngineCapability::kSAFE_GPU:
-    return nvinfer1::EngineCapability::kSAFE_GPU;
-  case EngineCapability::kDEFAULT:
-  default:
-    return nvinfer1::EngineCapability::kDEFAULT;
-  }
-}
-
-struct CompileSpec {
-
-  core::CompileSpec toInternalCompileSpec() {
-    for (auto i : input_ranges) {
-      internal_input_ranges.push_back(i.toInternalInputRange());
-    }
-    auto info = core::CompileSpec(internal_input_ranges);
-    info.convert_info.engine_settings.op_precision = toTRTDataType(op_precision);
-    info.convert_info.engine_settings.refit = refit;
-    info.convert_info.engine_settings.debug = debug;
-    info.convert_info.engine_settings.strict_types = strict_types;
-    info.convert_info.engine_settings.allow_gpu_fallback = allow_gpu_fallback;
-    info.convert_info.engine_settings.device = toTRTDeviceType(device);
-    info.convert_info.engine_settings.capability = toTRTEngineCapability(capability);
-    info.convert_info.engine_settings.num_min_timing_iters = num_min_timing_iters;
-    info.convert_info.engine_settings.num_avg_timing_iters = num_avg_timing_iters;
-    info.convert_info.engine_settings.workspace_size = workspace_size;
-    info.convert_info.engine_settings.max_batch_size = max_batch_size;
-    return info;
-  }
-
-  std::vector<InputRange> input_ranges;
-  std::vector<core::conversion::InputRange> internal_input_ranges;
-  DataType op_precision = DataType::kFloat;
-  bool refit = false;
-  bool debug = false;
-  bool strict_types = false;
-  bool allow_gpu_fallback = true;
-  DeviceType device = DeviceType::kGPU;
-  EngineCapability capability = EngineCapability::kDEFAULT;
-  uint64_t num_min_timing_iters = 2;
-  uint64_t num_avg_timing_iters = 1;
-  uint64_t workspace_size = 0;
-  uint64_t max_batch_size = 0;
-};
-
 torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec& info) {
   py::gil_scoped_acquire gil;
   auto trt_mod = core::CompileGraph(mod, info.toInternalCompileSpec());
@@ -227,11 +131,7 @@ PYBIND11_MODULE(_C, m) {
     .value("INFO", core::util::logging::LogLevel::kINFO)
     .value("DEBUG", core::util::logging::LogLevel::kDEBUG)
     .export_values();
-
-  //TODO: Remove when we have access to PyTorch autoregistration
-  //m.def("to_tensorrt", backend::GetTensorRTBackend().generateToBackendFn());
 }
 
-
 } // namespace pyapi
 } // namespace trtorch
diff --git a/tests/BUILD b/tests/BUILD
index f784798a57..81a43aecbc 100644
--- a/tests/BUILD
+++ b/tests/BUILD
@@ -17,6 +17,7 @@ test_suite(
 test_suite(
     name = "python_api_tests",
     tests = [
-        "//tests/py:test_api"
+        "//tests/py:test_api",
+        "//tests/py:test_to_backend_api"
     ]
 )
\ No newline at end of file
diff --git a/tests/py/BUILD b/tests/py/BUILD
index 054e1cbbb3..0d643d65d8 100644
--- a/tests/py/BUILD
+++ b/tests/py/BUILD
@@ -5,9 +5,21 @@ load("@py_test_deps//:requirements.bzl", "requirement")
 py_test(
     name = "test_api",
     srcs = [
-        "test_api.py"
+        "test_api.py",
+        "model_test_case.py"
     ],
     deps = [
         requirement("torchvision")
     ]
-)
\ No newline at end of file
+)
+
+py_test(
+    name = "test_to_backend_api",
+    srcs = [
+        "test_to_backend_api.py",
+        "model_test_case.py"
+    ],
+    deps = [
+        requirement("torchvision")
+    ]
+)
diff --git a/tests/py/model_test_case.py b/tests/py/model_test_case.py
new file mode 100644
index 0000000000..3730f6507b
--- /dev/null
+++ b/tests/py/model_test_case.py
@@ -0,0 +1,19 @@
+import unittest
+import trtorch
+import torch
+import torchvision.models as models
+
+class ModelTestCase(unittest.TestCase):
+    def __init__(self, methodName='runTest', model=None):
+        super(ModelTestCase, self).__init__(methodName)
+        self.model = model
+        self.model.eval().to("cuda")
+
+    @staticmethod
+    def parametrize(testcase_class, model=None):
+        testloader = unittest.TestLoader()
+        testnames = testloader.getTestCaseNames(testcase_class)
+        suite = unittest.TestSuite()
+        for name in testnames:
+            suite.addTest(testcase_class(name, model=model))
+        return suite
\ No newline at end of file
diff --git a/tests/py/test_api.py b/tests/py/test_api.py
index e0cd113db6..2d9d2d1e56 100644
--- a/tests/py/test_api.py
+++ b/tests/py/test_api.py
@@ -3,21 +3,7 @@
 import torch
 import torchvision.models as models
 
-
-class ModelTestCase(unittest.TestCase):
-    def __init__(self, methodName='runTest', model=None):
-        super(ModelTestCase, self).__init__(methodName)
-        self.model = model
-        self.model.eval().to("cuda")
-
-    @staticmethod
-    def parametrize(testcase_class, model=None):
-        testloader = unittest.TestLoader()
-        testnames = testloader.getTestCaseNames(testcase_class)
-        suite = unittest.TestSuite()
-        for name in testnames:
-            suite.addTest(testcase_class(name, model=model))
-        return suite
+from model_test_case import ModelTestCase
 
 class TestCompile(ModelTestCase):
     def setUp(self):
diff --git a/tests/py/test_to_backend_api.py b/tests/py/test_to_backend_api.py
new file mode 100644
index 0000000000..e643aa6ce2
--- /dev/null
+++ b/tests/py/test_to_backend_api.py
@@ -0,0 +1,44 @@
+import unittest
+import trtorch
+import torch
+import torchvision.models as models
+
+from model_test_case import ModelTestCase
+
+class TestToBackendLowering(ModelTestCase):
+    def setUp(self):
+        self.input = torch.randn((1, 3, 300, 300)).to("cuda")
+        self.scripted_model = torch.jit.script(self.model)
+        self.spec = {
+            "forward": trtorch.TensorRTCompileSpec({
+                "input_shapes": [[1, 3, 300, 300]],
+                "op_precision": torch.float,
+                "refit": False,
+                "debug": False,
+                "strict_types": False,
+                "allow_gpu_fallback": True,
+                "device_type": "gpu",
+                "capability": trtorch.EngineCapability.default,
+                "num_min_timing_iters": 2,
+                "num_avg_timing_iters": 1,
+                "max_batch_size": 0,
+            })
+        }
+
+    def test_to_backend_lowering(self):
+        trt_mod = torch._C._jit_to_tensorrt(self.scripted_model._c, {"forward": self.spec})
+        same = (trt_mod.forward(self.input) - self.scripted_model(self.input)).abs().max()
+        self.assertTrue(same < 2e-3)
+
+def test_suite():
+    suite = unittest.TestSuite()
+    suite.addTest(TestToBackendLowering.parametrize(TestToBackendLowering, model=models.mobilenet_v2(pretrained=True)))
+
+    return suite
+
+suite = test_suite()
+
+runner = unittest.TextTestRunner()
+result = runner.run(suite)
+
+exit(int(not result.wasSuccessful()))
\ No newline at end of file

From a720f918aafe19d09461a53ce18bad07ff612eb3 Mon Sep 17 00:00:00 2001
From: Naren Dasan <naren@narendasan.com>
Date: Wed, 21 Oct 2020 15:33:52 -0700
Subject: [PATCH 4/6] refactor: A couple more renames

Signed-off-by: Naren Dasan <naren@narendasan.com>
Signed-off-by: Naren Dasan <narens@nvidia.com>
---
 .github/pr-labels.yml            | 6 +++---
 core/runtime/register_trt_op.cpp | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/pr-labels.yml b/.github/pr-labels.yml
index 32730e9d48..b71be34bd5 100644
--- a/.github/pr-labels.yml
+++ b/.github/pr-labels.yml
@@ -16,8 +16,8 @@
 "component: evaluators":
   - core/conversion/evaluators/**/*
 
-"component: execution":
-  - core/execution/**/*
+"component: runtime":
+  - core/runtime/**/*
 
 "component: lowering":
   - core/lowering/**/*
@@ -32,4 +32,4 @@
 "documentation":
   - docs/**/*
   - docsrc/**/*
- 
+
diff --git a/core/runtime/register_trt_op.cpp b/core/runtime/register_trt_op.cpp
index 75d34c701e..22e412dc42 100644
--- a/core/runtime/register_trt_op.cpp
+++ b/core/runtime/register_trt_op.cpp
@@ -30,7 +30,7 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
         gpu_handles.push_back(contig_inputs.back().data_ptr());
     }
 
-    TRTORCH_CHECK(compiled_engine->exec_ctx->allInputDimensionsSpecified(), "Not enough inputs provided (execution.RunCudaEngine)");
+    TRTORCH_CHECK(compiled_engine->exec_ctx->allInputDimensionsSpecified(), "Not enough inputs provided (runtime.RunCudaEngine)");
 
     std::vector<at::Tensor> outputs(compiled_engine->num_io.second);
     for (size_t o = inputs.size(); o < (compiled_engine->num_io.first + compiled_engine->num_io.second); o++) {
@@ -53,6 +53,6 @@ TORCH_LIBRARY(tensorrt, m) {
   m.def("execute_engine", execute_engine);
 }
 
-} // namespace execution
+} // namespace runtime
 } // namespace core
 } // namespace trtorch

From 7e4b07c715181c1868122e42c28fc6f56fa261f0 Mon Sep 17 00:00:00 2001
From: Naren Dasan <naren@narendasan.com>
Date: Wed, 21 Oct 2020 16:14:32 -0700
Subject: [PATCH 5/6] docs: New documentation on the to_backend api integration

Signed-off-by: Naren Dasan <naren@narendasan.com>
Signed-off-by: Naren Dasan <narens@nvidia.com>
---
 docsrc/index.rst                      |  2 +
 docsrc/py_api/trtorch.rst             |  4 +-
 docsrc/tutorials/use_from_pytorch.rst | 62 +++++++++++++++++++++++++++
 3 files changed, 67 insertions(+), 1 deletion(-)
 create mode 100644 docsrc/tutorials/use_from_pytorch.rst

diff --git a/docsrc/index.rst b/docsrc/index.rst
index 2db21d6e2d..f322d14114 100644
--- a/docsrc/index.rst
+++ b/docsrc/index.rst
@@ -25,6 +25,7 @@ Getting Started
 * :ref:`getting_started`
 * :ref:`ptq`
 * :ref:`trtorchc`
+* :ref:`use_from_pytorch`
 
 
 .. toctree::
@@ -36,6 +37,7 @@ Getting Started
    tutorials/getting_started
    tutorials/ptq
    tutorials/trtorchc
+   tutorials/use_from_pytorch
    _notebooks/lenet
 
 .. toctree::
diff --git a/docsrc/py_api/trtorch.rst b/docsrc/py_api/trtorch.rst
index 3f34ca9617..d7376cb2f0 100644
--- a/docsrc/py_api/trtorch.rst
+++ b/docsrc/py_api/trtorch.rst
@@ -17,9 +17,11 @@ Functions
 
 .. autofunction:: check_method_op_support
 
+.. autofunction:: get_build_info
+
 .. autofunction:: dump_build_info
 
-.. autofunction:: get_build_info
+.. autofunction:: TensorRTCompileSpec
 
 Enums
 -------
diff --git a/docsrc/tutorials/use_from_pytorch.rst b/docsrc/tutorials/use_from_pytorch.rst
new file mode 100644
index 0000000000..322efd29a9
--- /dev/null
+++ b/docsrc/tutorials/use_from_pytorch.rst
@@ -0,0 +1,62 @@
+.. _use_from_pytorch:
+
+Using TRTorch Directly From PyTorch
+====================================
+
+Starting in TRTorch 0.1.0, you will now be able to directly access TensorRT from PyTorch APIs. The process to use this feature
+is very similar to the compilation workflow described in :ref:`getting_started`
+
+Start by loading ``trtorch`` into your application.
+
+.. code-block:: python
+
+    import torch
+    import trtorch
+
+
+Then given a TorchScript module, you can lower it to TensorRT using the ``torch._C._jit_to_tensorrt`` API.
+
+.. code-block:: python
+
+    import torchvision.models as models
+
+    model = models.mobilenet_v2(pretrained=True)
+    script_model = torch.jit.script(model)
+
+Unlike the ``compile`` API in TRTorch which assumes you are trying to compile the ``forward`` function of a module
+or the ``convert_method_to_trt_engine`` which converts a specified function to a TensorRT engine, the backend API
+will take a dictionary which maps names of functions to compile to Compilation Spec objects which wrap the same
+sort of dictionary you would provide to ``compile``. For more information on the compile spec dictionary take a look
+at the documentation for the TRTorch ``TensorRTCompileSpec`` API.
+
+.. code-block:: python
+
+    spec = {
+        "forward": trtorch.TensorRTCompileSpec({
+            "input_shapes": [[1, 3, 300, 300]],
+            "op_precision": torch.half,
+            "refit": False,
+            "debug": False,
+            "strict_types": False,
+            "allow_gpu_fallback": True,
+            "device_type": "gpu",
+            "capability": trtorch.EngineCapability.default,
+            "num_min_timing_iters": 2,
+            "num_avg_timing_iters": 1,
+            "max_batch_size": 0,
+        })
+    }
+
+Now to compile with TRTorch, provide the target module objects and the spec dictionary to ``torch._C._jit_to_tensorrt``
+
+.. code-block:: python
+
+    trt_model = torch._C._jit_to_tensorrt(script_model._c, spec)
+
+To run explicitly call the function of the method you want to run (vs. how you can just call on the module itself in standard PyTorch)
+
+.. code-block:: python
+
+    input = torch.randn((1, 3, 300, 300).to("cuda").to(torch.half)
+    print(trt_model.forward(input))
+

From d150930181dbf9e5a4cbb9c8f74401806c32533d Mon Sep 17 00:00:00 2001
From: Naren Dasan <naren@narendasan.com>
Date: Thu, 22 Oct 2020 11:00:08 -0700
Subject: [PATCH 6/6] docs(//py): Clarify docstrings in python package

Signed-off-by: Naren Dasan <naren@narendasan.com>
Signed-off-by: Naren Dasan <narens@nvidia.com>
---
 py/trtorch/_compile_spec.py |  8 ++++----
 py/trtorch/_compiler.py     | 10 +++++-----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/py/trtorch/_compile_spec.py b/py/trtorch/_compile_spec.py
index 6f0ff49d4a..221c0c2e5d 100644
--- a/py/trtorch/_compile_spec.py
+++ b/py/trtorch/_compile_spec.py
@@ -162,10 +162,10 @@ def TensorRTCompileSpec(compile_spec: Dict[str, Any]):
                             } # Dynamic input shape for input #2
                         ],
                         "op_precision": torch.half, # Operating precision set to FP16
-                        "refit": false, # enable refit
-                        "debug": false, # enable debuggable engine
-                        "strict_types": false, # kernels should strictly run in operating precision
-                        "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU
+                        "refit": False, # enable refit
+                        "debug": False, # enable debuggable engine
+                        "strict_types": False, # kernels should strictly run in operating precision
+                        "allow_gpu_fallback": True, # (DLA only) Allow layers unsupported on DLA to run on GPU
                         "device": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA)
                         "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels
                         "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels
diff --git a/py/trtorch/_compiler.py b/py/trtorch/_compiler.py
index 443db12a7b..cfd9fd39a3 100644
--- a/py/trtorch/_compiler.py
+++ b/py/trtorch/_compiler.py
@@ -38,7 +38,7 @@ def compile(module: torch.jit.ScriptModule, compile_spec: Any) -> torch.jit.Scri
                     "refit": false, # enable refit
                     "debug": false, # enable debuggable engine
                     "strict_types": false, # kernels should strictly run in operating precision
-                    "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU
+                    "allow_gpu_fallback": true, # (DLA only) Allow layers unsupported on DLA to run on GPU
                     "device_type": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA)
                     "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels
                     "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels
@@ -87,10 +87,10 @@ def convert_method_to_trt_engine(module: torch.jit.ScriptModule, method_name: st
                         } # Dynamic input shape for input #2
                     ],
                     "op_precision": torch.half, # Operating precision set to FP16
-                    "refit": false, # enable refit
-                    "debug": false, # enable debuggable engine
-                    "strict_types": false, # kernels should strictly run in operating precision
-                    "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU
+                    "refit": False, # enable refit
+                    "debug": False, # enable debuggable engine
+                    "strict_types": False, # kernels should strictly run in operating precision
+                    "allow_gpu_fallback": True, # (DLA only) Allow layers unsupported on DLA to run on GPU
                     "device_type": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA)
                     "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels
                     "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels