feat(trtorchc): Adding new support for dtypes and formats in

narendasan · narendasan · commit 547f5540d1fe · 2021-07-20T20:23:01.000-07:00
trtorchc

Signed-off-by: Naren Dasan &lt;naren@narendasan.com&gt;
Signed-off-by: Naren Dasan &lt;narens@nvidia.com&gt;
diff --git a/core/ir/Input.cpp b/core/ir/Input.cpp
@@ -128,8 +128,6 @@ Input::Input(std::vector<int64_t> shape, nvinfer1::DataType dtype, nvinfer1::Ten
   max = util::toDims(shape);
   input_shape = util::toDims(shape);
   input_is_dynamic = false;
-  format = nvinfer1::TensorFormat::kLINEAR;
-  dtype = dtype;
 
   TRTORCH_CHECK(valid_input_dtype(dtype), "Unsupported input data type: " << dtype);
   this->dtype = dtype;
@@ -156,8 +154,6 @@ Input::Input(std::vector<int64_t> min_shape, std::vector<int64_t> opt_shape, std
   min = util::toDims(min_shape);
   opt = util::toDims(opt_shape);
   max = util::toDims(max_shape);
-  format = nvinfer1::TensorFormat::kLINEAR;
-  dtype = nvinfer1::DataType::kFLOAT;
 
   std::vector<int64_t> dyn_shape;
   for (size_t i = 0; i < opt_shape.size(); i++) {
diff --git a/cpp/api/include/trtorch/trtorch.h b/cpp/api/include/trtorch/trtorch.h
@@ -9,6 +9,7 @@
 #pragma once
 
 #include <cuda_runtime.h>
+#include <iostream>
 #include <memory>
 #include <string>
 #include <vector>
@@ -66,10 +67,12 @@ struct TRTORCH_API CompileSpec {
       kHalf,
       /// INT8
       kChar,
-      /// INT32
-      kInt32,
+      /// INT
+      kInt,
       /// Bool
       kBool,
+      /// Sentinel value
+      kUnknown
     };
 
     /**
@@ -139,6 +142,7 @@ struct TRTORCH_API CompileSpec {
     }
 
    private:
+    friend std::ostream& operator<<(std::ostream& os, const DataType& dtype);
     Value value;
   };
 
@@ -278,6 +282,8 @@ struct TRTORCH_API CompileSpec {
       kContiguous,
       /// Channel Last / NHWC
       kChannelsLast,
+      /// Sentinel value
+      kUnknown,
     };
 
     /**
@@ -346,7 +352,9 @@ struct TRTORCH_API CompileSpec {
       return value != other;
     }
 
+
    private:
+    friend std::ostream& operator<<(std::ostream& os, const TensorFormat& format);
     Value value;
   };
 
@@ -472,6 +480,7 @@ struct TRTORCH_API CompileSpec {
 
     bool get_explicit_set_dtype() {return explicit_set_dtype;}
   private:
+    friend std::ostream& operator<<(std::ostream& os, const Input& input);
     bool input_is_dynamic;
     bool explicit_set_dtype;
   };
diff --git a/cpp/api/src/compile_spec.cpp b/cpp/api/src/compile_spec.cpp
@@ -9,13 +9,74 @@
 
 namespace trtorch {
 
+std::ostream& operator<<(std::ostream& os, const CompileSpec::DataType& dtype) {
+  switch (dtype) {
+    case CompileSpec::DataType::kChar:
+      os << "char";
+      break;
+    case CompileSpec::DataType::kHalf:
+      os << "half";
+      break;
+    case CompileSpec::DataType::kInt:
+      os << "int";
+      break;
+    case CompileSpec::DataType::kBool:
+      os << "bool";
+      break;
+    case CompileSpec::DataType::kFloat:
+      os << "float";
+      break;
+    case CompileSpec::DataType::kUnknown:
+    default:
+      os << "unknown";
+      break;
+  }
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const CompileSpec::TensorFormat& format) {
+  switch (format) {
+    case CompileSpec::TensorFormat::kChannelsLast:
+      os << "channels last";
+      break;
+    case CompileSpec::TensorFormat::kContiguous:
+      os << "contiguous";
+      break;
+    case CompileSpec::TensorFormat::kUnknown:
+    default:
+      os << "unknown";
+      break;
+  }
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const CompileSpec::Input& input) {
+  auto vec_to_str = [](std::vector<int64_t> shape) -> std::string {
+    std::stringstream ss;
+    ss << '[';
+    for (auto i : shape) {
+      ss << i << ',';
+    }
+    ss << ']';
+    return ss.str();
+  };
+
+  if (!input.input_is_dynamic) {
+    os << "Input(shape: " << vec_to_str(input.shape) << ", dtype: " << input.dtype << ", format: " << input.format << ')';
+  } else {
+    os << "Input(shape: " << vec_to_str(input.shape) << ", min: " << vec_to_str(input.min_shape) << ", opt: " << vec_to_str(input.opt_shape) << ", max: " << vec_to_str(input.max_shape) << ", dtype: " << input.dtype << ", format: " << input.format << ')';
+  }
+  return os;
+}
+
+
 nvinfer1::DataType toTRTDataType(CompileSpec::DataType value) {
   switch (value) {
     case CompileSpec::DataType::kChar:
       return nvinfer1::DataType::kINT8;
     case CompileSpec::DataType::kHalf:
       return nvinfer1::DataType::kHALF;
-    case CompileSpec::DataType::kInt32:
+    case CompileSpec::DataType::kInt:
       return nvinfer1::DataType::kINT32;
     case CompileSpec::DataType::kBool:
       return nvinfer1::DataType::kBOOL;
@@ -47,7 +108,7 @@ CompileSpec::DataType::DataType(c10::ScalarType t) {
       value = DataType::kChar;
       break;
     case at::kInt:
-      value = DataType::kInt32;
+      value = DataType::kInt;
       break;
     case at::kBool:
       value = DataType::kBool;
@@ -250,7 +311,6 @@ core::CompileSpec to_internal_compile_spec(CompileSpec external) {
   /* We want default behavior for types to match PyTorch, so in the case the user did not explicitly set the dtype for
   inputs they will follow PyTorch convetions */
   for (size_t i = 0; i < external.inputs.size(); i++) {
-    std::cout << "EXPLICIT " << external.inputs[i].get_explicit_set_dtype() << std::endl;
     if (!external.inputs[i].get_explicit_set_dtype()) {
       auto& precisions = internal.convert_info.engine_settings.enabled_precisions;
       auto& internal_ins = internal.convert_info.inputs;
@@ -261,9 +321,9 @@ core::CompileSpec to_internal_compile_spec(CompileSpec external) {
       } else {
         internal_ins[i].dtype = nvinfer1::DataType::kFLOAT;
       }
-      std::cout << "internal type: " << internal_ins[i].dtype;
     }
   }
+
   internal.convert_info.engine_settings.disable_tf32 = external.disable_tf32;
   internal.convert_info.engine_settings.refit = external.refit;
   internal.convert_info.engine_settings.debug = external.debug;
diff --git a/cpp/trtorchc/README.md b/cpp/trtorchc/README.md
@@ -14,7 +14,7 @@ to standard TorchScript. Load with `torch.jit.load()` and run like you would run
 
 ```
 trtorchc [input_file_path] [output_file_path]
-    [input_shapes...] {OPTIONS}
+    [input_specs...] {OPTIONS}
 
     TRTorch is a compiler for TorchScript, it will compile and optimize
     TorchScript programs to run on NVIDIA GPUs using TensorRT
@@ -28,24 +28,29 @@ trtorchc [input_file_path] [output_file_path]
         -w, --warnings                    Disables warnings generated during
                                           compilation onto the console (warnings
                                           are on by default)
-        --info                            Dumps info messages generated during
+        --i, --info                       Dumps info messages generated during
                                           compilation onto the console
       --build-debuggable-engine         Creates a debuggable engine
       --use-strict-types                Restrict operating type to only use set
-                                        default operation precision
-                                        (op_precision)
+                                        operation precision
       --allow-gpu-fallback              (Only used when targeting DLA
                                         (device-type)) Lets engine run layers on
                                         GPU if they are not supported on DLA
-      -p[precision],
-      --default-op-precision=[precision]
-                                        Default operating precision for the
-                                        engine (Int8 requires a
+      --disable-tf32                    Prevent Float32 layers from using the
+                                        TF32 data format
+      -p[precision...],
+      --enabled-precison=[precision...] (Repeatable) Enabling an operating
+                                        precision for kernels to use when
+                                        building the engine (Int8 requires a
                                         calibration-cache argument) [ float |
                                         float32 | f32 | half | float16 | f16 |
                                         int8 | i8 ] (default: float)
       -d[type], --device-type=[type]    The type of device the engine should be
                                         built for [ gpu | dla ] (default: gpu)
+      --gpu-id=[gpu_id]                 GPU id if running on multi-GPU platform
+                                        (defaults to 0)
+      --dla-core=[dla_core]             DLACore id if running on available DLA
+                                        (defaults to 0)
       --engine-capability=[capability]  The type of device the engine should be
                                         built for [ default | safe_gpu |
                                         safe_dla ]
@@ -72,16 +77,21 @@ trtorchc [input_file_path] [output_file_path]
       input_file_path                   Path to input TorchScript file
       output_file_path                  Path for compiled TorchScript (or
                                         TensorRT engine) file
-      input_shapes...                   Sizes for inputs to engine, can either
+      input_specs...                    Specs for inputs to engine, can either
                                         be a single size or a range defined by
                                         Min, Optimal, Max sizes, e.g.
                                         "(N,..,C,H,W)"
-                                        "[(MIN_N,..,MIN_C,MIN_H,MIN_W);(OPT_N,..,OPT_C,OPT_H,OPT_W);(MAX_N,..,MAX_C,MAX_H,MAX_W)]"
+                                        "[(MIN_N,..,MIN_C,MIN_H,MIN_W);(OPT_N,..,OPT_C,OPT_H,OPT_W);(MAX_N,..,MAX_C,MAX_H,MAX_W)]".
+                                        Data Type and format can be specified by
+                                        adding an "@" followed by dtype and "%"
+                                        followed by format to the end of the
+                                        shape spec. e.g. "(3, 3, 32,
+                                        32)@f16%NHWC"
       "--" can be used to terminate flag options and force all following
       arguments to be treated as positional options
 ```
 
 e.g.
 ```
-trtorchc tests/modules/ssd_traced.jit.pt ssd_trt.ts "[(1,3,300,300); (1,3,512,512); (1, 3, 1024, 1024)]" -p f16
+trtorchc tests/modules/ssd_traced.jit.pt ssd_trt.ts "[(1,3,300,300); (1,3,512,512); (1, 3, 1024, 1024)]@fp16%contiguous" -p f16
 ```
diff --git a/cpp/trtorchc/main.cpp b/cpp/trtorchc/main.cpp