fix: Address issues in PR

narendasan · narendasan · commit cd24f269f534 · 2020-04-24T18:40:57.000-07:00
Signed-off-by: Naren Dasan &lt;naren@narendasan.com&gt;
Signed-off-by: Naren Dasan &lt;narens@nvidia.com&gt;
diff --git a/core/conversion/conversion.cpp b/core/conversion/conversion.cpp
@@ -235,7 +235,7 @@ bool VerifyConverterSupportForBlock(const torch::jit::Block* b) {
         if (!OpSupported(n)) {
             auto schema = n->maybeSchema();
             TRTORCH_CHECK(schema, "Unable to get schema for Node " << util::node_info(n) \
-                                    << " (conversion.VerifyCoverterSupportForBloxk");
+                                    << " (conversion.VerifyCoverterSupportForBlock");
             std::stringstream ss;
             ss << *schema;
             unsupported_ops.insert(ss.str());
diff --git a/cpp/api/README.md b/cpp/api/README.md
@@ -1,10 +1,10 @@
 # C++ API
 
-Targets in module create the user facing C++ library for the TRTorch core. 
+Targets in module create the user facing C++ library for the TRTorch core.
 
 ## Building libtrtorch.so
 
-### Debug build 
+### Debug build
 ``` shell
 bazel build //cpp/api:libtrtorch.so --compilation_mode=dbg
 ```
@@ -26,12 +26,19 @@ bazel build //cpp/api:libtrtorch.so --cxxopt="-DNDEBUG"
 > Temporary, will get real documentation soon
 
 ```c++
+namespace trtorch {
 /**
  * Settings data structure for TRTorch compilation
  *
  */
 struct TRTORCH_API ExtraInfo {
-    //struct TRTORCH_API InputRangesArray {
+    /**
+     * @brief A struct to hold an input range (used by TensorRT Optimization profile)
+     *
+     * This struct can either hold a single vector representing an input shape, signifying a
+     * static input shape or a set of three input shapes representing the min, optiminal and max
+     * input shapes allowed for the engine.
+     */
     struct TRTORCH_API InputRange {
         std::vector<int64_t> min;
         std::vector<int64_t> opt;
@@ -46,7 +53,7 @@ struct TRTORCH_API ExtraInfo {
      * Supported Data Types that can be used with TensorRT engines
      *
      * This class is compatable with c10::DataTypes (but will check for TRT support)
-     * so there should not be a reason that you need to use this type explictly. 
+     * so there should not be a reason that you need to use this type explictly.
      */
     class DataType {
     public:
@@ -59,14 +66,14 @@ struct TRTORCH_API ExtraInfo {
          * ex. trtorch::DataType type = DataType::kFloat;
          */
         enum Value : int8_t {
-            /// FP32  
+            /// FP32
             kFloat,
             /// FP16
             kHalf,
             /// INT8
-            /*kChar, char or int8? */
+            kChar,
         };
-        
+
         DataType() = default;
         constexpr DataType(Value t) : value(t) {}
         DataType(c10::ScalarType t);
@@ -83,7 +90,7 @@ struct TRTORCH_API ExtraInfo {
      *
      * This class is compatable with c10::DeviceTypes (but will check for TRT support)
      * but the only applicable value is at::kCUDA, which maps to DeviceType::kGPU
-     * 
+     *
      * To use the DataType class itself, interface using the enum vs. normal instatination
      *
      * ex. trtorch::DeviceType type = DeviceType::kGPU;
@@ -117,7 +124,7 @@ struct TRTORCH_API ExtraInfo {
     };
 
     /**
-     * Emum for selecting engine capability 
+     * Emum for selecting engine capability
      */
     enum class EngineCapability : int8_t {
         kDEFAULT,
@@ -129,24 +136,24 @@ struct TRTORCH_API ExtraInfo {
         : input_ranges(std::move(input_ranges)) {}
     ExtraInfo(std::vector<std::vector<int64_t>> fixed_sizes);
     ExtraInfo(std::vector<c10::ArrayRef<int64_t>> fixed_sizes);
-        
+
     // Defaults should reflect TensorRT defaults for BuilderConfig
 
-    /** 
+    /**
      * Sizes for inputs to engine, can either be a single size or a range
-     * defined by Min, Optimal, Max sizes 
-     * 
-     * Order is should match call order 
+     * defined by Min, Optimal, Max sizes
+     *
+     * Order is should match call order
      */
     std::vector<InputRange> input_ranges;
 
     /**
-     * Default operating precision for the engine 
+     * Default operating precision for the engine
      */
     DataType op_precision = DataType::kFloat;
-    
+
     /**
-     * Build a refitable engine 
+     * Build a refitable engine
      */
     bool refit = false;
 
@@ -158,10 +165,10 @@ struct TRTORCH_API ExtraInfo {
     /**
      * Restrict operating type to only set default operation precision (op_precision)
      */
-    bool strict_type = false;
+    bool strict_types = false;
 
     /**
-     * (Only used when targeting DLA (device)) 
+     * (Only used when targeting DLA (device))
      * Lets engine run layers on GPU if they are not supported on DLA
      */
     bool allow_gpu_fallback = true;
@@ -189,6 +196,16 @@ struct TRTORCH_API ExtraInfo {
      * Maximum size of workspace given to TensorRT
      */
     uint64_t workspace_size = 0;
+
+    /**
+     * Maximum batch size (must be =< 1 to be set, 0 means not set)
+     */
+    uint64_t max_batch_size = 0;
+
+    /**
+     * Calibration dataloaders for each input for post training quantizatiom
+     */
+    nvinfer1::IInt8Calibrator* ptq_calibrator = nullptr;
 };
 
 /**
@@ -198,37 +215,89 @@ TRTORCH_API std::string get_build_info();
 
 /**
  * Dump the version information for TRTorch including base libtorch and TensorRT versions
- * to stdout 
+ * to stdout
  */
 TRTORCH_API void dump_build_info();
 
+/**
+ * @brief Check to see if a module is fully supported by the compiler
+ *
+ * @param module: torch::jit::script::Module - Existing TorchScript module
+ * @param method_name: std::string - Name of method to compile
+ *
+ * Takes a module and a method name and checks if the method graph contains purely
+ * convertable operators
+ *
+ * Will print out a list of unsupported operators if the graph is unsupported
+ */
+TRTORCH_API bool CheckMethodOperatorSupport(const torch::jit::script::Module& module, std::string method_name);
+
 /**
  * @brief Compile a TorchScript module for NVIDIA GPUs using TensorRT
  *
- * @param module: torch::jit::script::Module - Existing TorchScript module 
- * @param info: trtorch::ExtraInfo - Compilation settings 
+ * @param module: torch::jit::script::Module - Existing TorchScript module
+ * @param info: trtorch::ExtraInfo - Compilation settings
  *
  * Takes a existing TorchScript module and a set of settings to configure the compiler
  * and will convert methods to JIT Graphs which call equivalent TensorRT engines
  *
- * Converts specifically the forward method of a TorchScript Module 
- */ 
+ * Converts specifically the forward method of a TorchScript Module
+ */
 TRTORCH_API torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, ExtraInfo info);
 
 /**
  * @brief Compile a TorchScript method for NVIDIA GPUs using TensorRT
  *
- * @param module: torch::jit::script::Module - Existing TorchScript module 
+ * @param module: torch::jit::script::Module - Existing TorchScript module
  * @param method_name: std::string - Name of method to compile
- * @param info: trtorch::ExtraInfo - Compilation settings 
+ * @param info: trtorch::ExtraInfo - Compilation settings
  *
  * Takes a existing TorchScript module and a set of settings to configure the compiler
  * and will convert selected method to a serialized TensorRT engine which can be run with
  * TensorRT
  */
-TRTORCH_API std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod, std::string method_name, ExtraInfo info);
+TRTORCH_API std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& module, std::string method_name, ExtraInfo info);
+
+namespace ptq {
+/**
+ * @brief A factory to build a post training quantization calibrator from a torch dataloader
+ *
+ * Creates a calibrator to use for post training quantization
+ * If there are multiple inputs, the dataset should produce a example which is a vector (or similar container) of tensors vs a single tensor
+ *
+ * By default the returned calibrator uses TensorRT Entropy v2 algorithm to perform calibration. This is recommended for feed forward networks
+ * You can override the algorithm selection (such as to use the MinMax Calibrator recomended for NLP tasks) by calling make_int8_calibrator with
+ * the calibrator class as a template parameter.
+ *
+ * e.g. trtorch::ptq::make_int8_calibrator<nvinfer1::IInt8MinMaxCalibrator>(std::move(calibration_dataloader), calibration_cache_file, use_cache);
+ */
+template<typename Algorithm = nvinfer1::IInt8EntropyCalibrator2, typename DataLoader>
+TRTORCH_API inline Int8Calibrator<Algorithm, DataLoader> make_int8_calibrator(DataLoader dataloader, const std::string& cache_file_path, bool use_cache) {
+    return Int8Calibrator<Algorithm, DataLoader>(std::move(dataloader), cache_file_path, use_cache);
+}
+
+/**
+ * @brief A factory to build a post training quantization calibrator from a torch dataloader that only uses the calibration cache
+ *
+ * Creates a calibrator to use for post training quantization which reads from a previously created calibration cache, therefore
+ * you can have a calibration cache generating program that requires a dataloader and a dataset, then save the cache to use later
+ * in a different program that needs to calibrate from scratch and not have the dataset dependency. However, the network should also
+ *  be recalibrated if its structure changes, or the input data set changes, and it is the responsibility of the application to ensure this.
+ *
+ * By default the returned calibrator uses TensorRT Entropy v2 algorithm to perform calibration. This is recommended for feed forward networks
+ * You can override the algorithm selection (such as to use the MinMax Calibrator recomended for NLP tasks) by calling make_int8_calibrator with
+ * the calibrator class as a template parameter.
+ *
+ * e.g. trtorch::ptq::make_int8_cache_calibrator<nvinfer1::IInt8MinMaxCalibrator>(calibration_cache_file);
+ */
+template<typename Algorithm = nvinfer1::IInt8EntropyCalibrator2>
+TRTORCH_API inline Int8CacheCalibrator<Algorithm> make_int8_cache_calibrator(const std::string& cache_file_path) {
+    return Int8CacheCalibrator<Algorithm>(cache_file_path);
+}
+} // namespace ptq
 } // namespace trtorch
 
+
 ```
 
 
diff --git a/cpp/ptq/training/vgg16/README.md b/cpp/ptq/training/vgg16/README.md
@@ -12,7 +12,7 @@ pip3 install -r requirements.txt --user
 
 The following recipe should get somewhere between 89-92% accuracy on the CIFAR10 testset
 ```
-python3 main.py --lr 0.01 --batch-size 256 --drop-ratio 0.15 --ckpt-dir $(pwd)/vgg16_ckpts --epochs 100
+python3 main.py --lr 0.01 --batch-size 128 --drop-ratio 0.15 --ckpt-dir $(pwd)/vgg16_ckpts --epochs 100
 ```
 
 > 545 was the seed used in testing
diff --git a/cpp/ptq/training/vgg16/export_ckpt.py b/cpp/ptq/training/vgg16/export_ckpt.py
@@ -19,6 +19,7 @@ def test(model, dataloader, crit):
     class_probs = []
     class_preds = []
     model.eval()
+
     with torch.no_grad():
         for data, labels in dataloader:
             data, labels = data.cuda(), labels.cuda(async=True)
@@ -53,21 +54,24 @@ def test(model, dataloader, crit):
     weights = new_state_dict
 
 model.load_state_dict(weights)
+model.eval()
 
 jit_model = torch.jit.trace(model, torch.rand([32, 3, 32, 32]).to("cuda"))
 
 testing_dataset = datasets.CIFAR10(root='./data', train=False, download=True,
                                     transform=transforms.Compose([
                                     transforms.ToTensor(),
-                                        transforms.Normalize((0.4914, 0.4822, 0.4465),
-                                                            (0.2023, 0.1994, 0.2010))]))
+                                    transforms.Normalize((0.4914, 0.4822, 0.4465),
+                                                         (0.2023, 0.1994, 0.2010))]))
 
 testing_dataloader = torch.utils.data.DataLoader(testing_dataset, batch_size=32,
                                                  shuffle=False, num_workers=2)
 
 crit = torch.nn.CrossEntropyLoss()
 test_loss, test_acc = test(model, testing_dataloader, crit)
 print("[PTH] Test Loss: {:.5f} Test Acc: {:.2f}%".format(test_loss, 100 * test_acc))
-print("[JIT] Test Loss: {:.5f} Test Acc: {:.2f}%".format(test_loss, 100 * test_acc))
 
 torch.jit.save(jit_model, "trained_vgg16.jit.pt")
+jit_model = torch.jit.load("trained_vgg16.jit.pt")
+test_loss, test_acc = test(jit_model, testing_dataloader, crit)
+print("[JIT] Test Loss: {:.5f} Test Acc: {:.2f}%".format(test_loss, 100 * test_acc))