From b8fa228777f196143220e180266b3a302763fc0b Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Wed, 30 Sep 2020 16:42:05 -0700 Subject: [PATCH 1/6] refactor!: Renaming extra info to compile spec to be more consistent with other backends and between APIs in TRTorch BREAKING CHANGE: This changes the top level api for setting the specification for compilation, a simple find and replace should allow users to port forward Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- .bazelrc | 1 + README.md | 4 +- core/compiler.cpp | 12 ++-- core/compiler.h | 8 +-- .../conversionctx/ConversionCtx.cpp | 2 +- cpp/api/BUILD | 2 +- cpp/api/README.md | 16 ++--- cpp/api/include/trtorch/ptq.h | 4 +- cpp/api/include/trtorch/trtorch.h | 16 ++--- .../src/{extra_info.cpp => compile_spec.cpp} | 40 +++++------ cpp/api/src/trtorch.cpp | 12 ++-- cpp/benchmark/main.cpp | 10 +-- cpp/ptq/README.md | 12 ++-- cpp/ptq/main.cpp | 14 ++-- cpp/trtorchc/main.cpp | 22 +++--- cpp/trtorchexec/main.cpp | 8 +-- docsrc/tutorials/getting_started.rst | 10 +-- docsrc/tutorials/ptq.rst | 12 ++-- py/BUILD | 2 +- .../{_extra_info.py => _compile_spec.py} | 70 +++++++++---------- py/trtorch/_compiler.py | 18 ++--- py/trtorch/csrc/trtorch_py.cpp | 48 +++++++------ tests/accuracy/test_fp16_accuracy.cpp | 6 +- tests/accuracy/test_fp32_accuracy.cpp | 6 +- tests/accuracy/test_int8_accuracy.cpp | 12 ++-- tests/modules/test_serialization.cpp | 6 +- tests/py/test_api.py | 8 +-- 27 files changed, 194 insertions(+), 187 deletions(-) rename cpp/api/src/{extra_info.cpp => compile_spec.cpp} (73%) rename py/trtorch/{_extra_info.py => _compile_spec.py} (64%) diff --git a/.bazelrc b/.bazelrc index 4a2a7423df..0a89848888 100644 --- a/.bazelrc +++ b/.bazelrc @@ -29,6 +29,7 @@ build --cxxopt='-std=c++14' build:python --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" build:python --linkopt="-D_GLIBCXX_USE_CXX11_ABI=0" build:python --define=abi=pre_cxx11_abi +build:python --define=target_lang=python build:pre_cxx11_abi --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" build:pre_cxx11_abi --linkopt="-D_GLIBCXX_USE_CXX11_ABI=0" diff --git a/README.md b/README.md index c9c6c90ccf..61f71fa493 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ More Information / System Architecture: #include "trtorch/trtorch.h" ... -auto compile_settings = trtorch::ExtraInfo(dims); +auto compile_settings = trtorch::CompileSpec(dims); // FP16 execution compile_settings.op_precision = torch::kFloat; // Compile module @@ -54,7 +54,7 @@ torch.jit.save(trt_ts_module, "trt_torchscript_module.ts") ``` > Notes on running in lower precisions: -> - Set precision with extra_info.op_precision +> - Set precision with compile_spec.op_precision > - The module should be left in FP32 before compilation (FP16 can support half tensor models) > - In FP16 only input tensors should be converted to FP16, other precisions use FP32 diff --git a/core/compiler.cpp b/core/compiler.cpp index d45dcf4a38..099651a1fa 100644 --- a/core/compiler.cpp +++ b/core/compiler.cpp @@ -20,7 +20,7 @@ #include "core/lowering/lowering.h" #include "core/conversion/conversion.h" -#include "core/execution/execution.h" +#include "core/runtime/runtime.h" namespace trtorch { namespace core { @@ -42,7 +42,7 @@ c10::FunctionSchema GenerateGraphSchema(torch::jit::script::Module mod, std::str void AddEngineToGraph(torch::jit::script::Module mod, std::shared_ptr& g, std::string& serialized_engine) { - auto engine_ptr = c10::make_intrusive(mod._ivalue()->name(), serialized_engine); + auto engine_ptr = c10::make_intrusive(mod._ivalue()->name(), serialized_engine); // Get required metadata about the engine out auto num_io = engine_ptr->num_io; auto name = engine_ptr->name; @@ -50,7 +50,7 @@ void AddEngineToGraph(torch::jit::script::Module mod, std::shared_ptr>(), + c10::getCustomClassType>(), c10::IValue(std::move(engine_ptr)), false ); @@ -125,7 +125,7 @@ bool CheckMethodOperatorSupport(const torch::jit::script::Module& mod, std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod, std::string method_name, - ExtraInfo cfg) { + CompileSpec cfg) { // Go through Lowering to simplify graph and extract weight parameters auto graph_and_parameters = lowering::Lower(mod, method_name); @@ -137,12 +137,12 @@ std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod, LOG_INFO(*g << "(CompileGraph)\n"); - auto engine = ConvertBlockToEngine(g->block(), convert_cfg, named_params); + auto engine = conversion::ConvertBlockToEngine(g->block(), convert_cfg, named_params); return std::move(engine); } torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod, - ExtraInfo cfg) { + CompileSpec cfg) { // TODO: Should be doing a functional transform but need PR #31978 // [jit] More robust mangling //torch::jit::script::Module new_mod = mod.clone(); diff --git a/core/compiler.h b/core/compiler.h index f9ff400159..281973d4d6 100644 --- a/core/compiler.h +++ b/core/compiler.h @@ -7,8 +7,8 @@ namespace trtorch { namespace core { -struct ExtraInfo { - ExtraInfo(std::vector input_ranges) +struct CompileSpec { + CompileSpec(std::vector input_ranges) : convert_info(std::move(input_ranges)) {} conversion::ConversionInfo convert_info; }; @@ -16,9 +16,9 @@ struct ExtraInfo { bool CheckMethodOperatorSupport(const torch::jit::script::Module& mod, std::string method_name); std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod, - std::string method_name, ExtraInfo cfg); + std::string method_name, CompileSpec cfg); -torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, ExtraInfo cfg); +torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, CompileSpec cfg); } // namespace core } // namespace trtorch diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp index 2993ee593e..3280464635 100644 --- a/core/conversion/conversionctx/ConversionCtx.cpp +++ b/core/conversion/conversionctx/ConversionCtx.cpp @@ -55,7 +55,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings) cfg->setFlag(nvinfer1::BuilderFlag::kFP16); } input_type = nvinfer1::DataType::kFLOAT; - TRTORCH_CHECK(settings.calibrator != nullptr, "Requested inference in INT8 but no calibrator provided, set the ptq_calibrator field in the ExtraInfo struct with your calibrator"); + TRTORCH_CHECK(settings.calibrator != nullptr, "Requested inference in INT8 but no calibrator provided, set the ptq_calibrator field in the CompileSpec struct with your calibrator"); cfg->setInt8Calibrator(settings.calibrator); break; case nvinfer1::DataType::kFLOAT: diff --git a/cpp/api/BUILD b/cpp/api/BUILD index d396d1690a..18ce5b8118 100644 --- a/cpp/api/BUILD +++ b/cpp/api/BUILD @@ -9,7 +9,7 @@ cc_library( "include/trtorch/ptq.h" ], srcs = [ - "src/extra_info.cpp", + "src/compile_spec.cpp", "src/logging.cpp", "src/trtorch.cpp", "src/ptq.cpp" diff --git a/cpp/api/README.md b/cpp/api/README.md index ab1bf03cfe..4bdbae379b 100644 --- a/cpp/api/README.md +++ b/cpp/api/README.md @@ -31,7 +31,7 @@ namespace trtorch { * Settings data structure for TRTorch compilation * */ -struct TRTORCH_API ExtraInfo { +struct TRTORCH_API CompileSpec { /** * @brief A struct to hold an input range (used by TensorRT Optimization profile) * @@ -132,10 +132,10 @@ struct TRTORCH_API ExtraInfo { kSAFE_DLA, }; - ExtraInfo(std::vector input_ranges) + CompileSpec(std::vector input_ranges) : input_ranges(std::move(input_ranges)) {} - ExtraInfo(std::vector> fixed_sizes); - ExtraInfo(std::vector> fixed_sizes); + CompileSpec(std::vector> fixed_sizes); + CompileSpec(std::vector> fixed_sizes); // Defaults should reflect TensorRT defaults for BuilderConfig @@ -236,27 +236,27 @@ TRTORCH_API bool CheckMethodOperatorSupport(const torch::jit::script::Module& mo * @brief Compile a TorchScript module for NVIDIA GPUs using TensorRT * * @param module: torch::jit::script::Module - Existing TorchScript module - * @param info: trtorch::ExtraInfo - Compilation settings + * @param info: trtorch::CompileSpec - Compilation settings * * Takes a existing TorchScript module and a set of settings to configure the compiler * and will convert methods to JIT Graphs which call equivalent TensorRT engines * * Converts specifically the forward method of a TorchScript Module */ -TRTORCH_API torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, ExtraInfo info); +TRTORCH_API torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, CompileSpec info); /** * @brief Compile a TorchScript method for NVIDIA GPUs using TensorRT * * @param module: torch::jit::script::Module - Existing TorchScript module * @param method_name: std::string - Name of method to compile - * @param info: trtorch::ExtraInfo - Compilation settings + * @param info: trtorch::CompileSpec - Compilation settings * * Takes a existing TorchScript module and a set of settings to configure the compiler * and will convert selected method to a serialized TensorRT engine which can be run with * TensorRT */ -TRTORCH_API std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& module, std::string method_name, ExtraInfo info); +TRTORCH_API std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& module, std::string method_name, CompileSpec info); namespace ptq { /** diff --git a/cpp/api/include/trtorch/ptq.h b/cpp/api/include/trtorch/ptq.h index 05f3583947..4932218405 100644 --- a/cpp/api/include/trtorch/ptq.h +++ b/cpp/api/include/trtorch/ptq.h @@ -145,7 +145,7 @@ class Int8Calibrator : Algorithm { /** * @brief operator to cast to nvinfer1::IInt8Calibrator* * - * Convience function to convert to a IInt8Calibrator* to easily be assigned to the ptq_calibrator field in ExtraInfo + * Convience function to convert to a IInt8Calibrator* to easily be assigned to the ptq_calibrator field in CompileSpec * * @return nvinfer1::IInt8Calibrator* */ @@ -259,7 +259,7 @@ class Int8CacheCalibrator : Algorithm { /** * @brief operator to cast to nvinfer1::IInt8Calibrator* * - * Convience function to convert to a IInt8Calibrator* to easily be assigned to the ptq_calibrator field in ExtraInfo + * Convience function to convert to a IInt8Calibrator* to easily be assigned to the ptq_calibrator field in CompileSpec * * @return nvinfer1::IInt8Calibrator* */ diff --git a/cpp/api/include/trtorch/trtorch.h b/cpp/api/include/trtorch/trtorch.h index 8e2757ad3b..cf8bd9e329 100644 --- a/cpp/api/include/trtorch/trtorch.h +++ b/cpp/api/include/trtorch/trtorch.h @@ -39,7 +39,7 @@ namespace trtorch { * Settings data structure for TRTorch compilation * */ -struct TRTORCH_API ExtraInfo { +struct TRTORCH_API CompileSpec { /** * @brief A struct to hold an input range (used by TensorRT Optimization profile) * @@ -256,7 +256,7 @@ struct TRTORCH_API ExtraInfo { * * @param input_ranges */ - ExtraInfo(std::vector input_ranges) + CompileSpec(std::vector input_ranges) : input_ranges(std::move(input_ranges)) {} /** * @brief Construct a new Extra Info object @@ -265,14 +265,14 @@ struct TRTORCH_API ExtraInfo { * * @param fixed_sizes */ - ExtraInfo(std::vector> fixed_sizes); + CompileSpec(std::vector> fixed_sizes); /** * @brief Construct a new Extra Info object * Convienence constructor to set fixed input size from c10::ArrayRef's (the output of tensor.sizes()) describing size of input tensors. * Each entry in the vector represents a input and should be provided in call order. * @param fixed_sizes */ - ExtraInfo(std::vector> fixed_sizes); + CompileSpec(std::vector> fixed_sizes); // Defaults should reflect TensorRT defaults for BuilderConfig @@ -379,7 +379,7 @@ TRTORCH_API bool CheckMethodOperatorSupport(const torch::jit::Module& module, st * @brief Compile a TorchScript module for NVIDIA GPUs using TensorRT * * @param module: torch::jit::Module - Existing TorchScript module - * @param info: trtorch::ExtraInfo - Compilation settings + * @param info: trtorch::CompileSpec - Compilation settings * * Takes a existing TorchScript module and a set of settings to configure the compiler * and will convert methods to JIT Graphs which call equivalent TensorRT engines @@ -388,14 +388,14 @@ TRTORCH_API bool CheckMethodOperatorSupport(const torch::jit::Module& module, st * * @return: A new module trageting a TensorRT engine */ -TRTORCH_API torch::jit::Module CompileGraph(const torch::jit::Module& module, ExtraInfo info); +TRTORCH_API torch::jit::Module CompileGraph(const torch::jit::Module& module, CompileSpec info); /** * @brief Compile a TorchScript method for NVIDIA GPUs using TensorRT * * @param module: torch::jit::Module - Existing TorchScript module * @param method_name: std::string - Name of method to compile - * @param info: trtorch::ExtraInfo - Compilation settings + * @param info: trtorch::CompileSpec - Compilation settings * * Takes a existing TorchScript module and a set of settings to configure the compiler * and will convert selected method to a serialized TensorRT engine which can be run with @@ -403,5 +403,5 @@ TRTORCH_API torch::jit::Module CompileGraph(const torch::jit::Module& module, Ex * * @return: std::string: Serialized TensorRT engine equivilant to the method graph */ -TRTORCH_API std::string ConvertGraphToTRTEngine(const torch::jit::Module& module, std::string method_name, ExtraInfo info); +TRTORCH_API std::string ConvertGraphToTRTEngine(const torch::jit::Module& module, std::string method_name, CompileSpec info); } // namespace trtorch diff --git a/cpp/api/src/extra_info.cpp b/cpp/api/src/compile_spec.cpp similarity index 73% rename from cpp/api/src/extra_info.cpp rename to cpp/api/src/compile_spec.cpp index 5bc12fa204..bfec3e7ba7 100644 --- a/cpp/api/src/extra_info.cpp +++ b/cpp/api/src/compile_spec.cpp @@ -6,7 +6,7 @@ #include "trtorch/trtorch.h" namespace trtorch { -ExtraInfo::DataType::DataType(c10::ScalarType t) { +CompileSpec::DataType::DataType(c10::ScalarType t) { TRTORCH_CHECK(t == at::kHalf || t == at::kFloat || t == at::kChar, "Data type is unsupported"); switch (t) { case at::kHalf: @@ -21,52 +21,52 @@ ExtraInfo::DataType::DataType(c10::ScalarType t) { } } -ExtraInfo::DeviceType::DeviceType(c10::DeviceType t) { +CompileSpec::DeviceType::DeviceType(c10::DeviceType t) { TRTORCH_CHECK(t == at::kCUDA, "Device type when specified using torch device enum must be torch::kCUDA"); value = DeviceType::kGPU; } -ExtraInfo::InputRange::InputRange(std::vector opt) { +CompileSpec::InputRange::InputRange(std::vector opt) { this->opt = opt; this->min = opt; this->max = opt; } -ExtraInfo::InputRange::InputRange(c10::IntArrayRef opt) { +CompileSpec::InputRange::InputRange(c10::IntArrayRef opt) { this->opt = core::util::toVec(opt); this->min = core::util::toVec(opt); this->max = core::util::toVec(opt); } -ExtraInfo::InputRange::InputRange(std::vector min, std::vector opt, std::vector max) { +CompileSpec::InputRange::InputRange(std::vector min, std::vector opt, std::vector max) { this->opt = opt; this->min = min; this->max = max; } -ExtraInfo::InputRange::InputRange(c10::IntArrayRef min, c10::IntArrayRef opt, c10::IntArrayRef max) { +CompileSpec::InputRange::InputRange(c10::IntArrayRef min, c10::IntArrayRef opt, c10::IntArrayRef max) { this->opt = core::util::toVec(opt); this->min = core::util::toVec(min); this->max = core::util::toVec(max); } -ExtraInfo::ExtraInfo(std::vector> fixed_sizes) { +CompileSpec::CompileSpec(std::vector> fixed_sizes) { for (auto in : fixed_sizes) { input_ranges.push_back(InputRange(in)); } } -ExtraInfo::ExtraInfo(std::vector> fixed_sizes) { +CompileSpec::CompileSpec(std::vector> fixed_sizes) { for (auto in : fixed_sizes) { input_ranges.push_back(InputRange(in)); } } -core::conversion::InputRange to_internal_input_range(ExtraInfo::InputRange i) { +core::conversion::InputRange to_internal_input_range(CompileSpec::InputRange i) { return core::conversion::InputRange(i.min, i.opt, i.max); } -std::vector to_vec_internal_input_ranges(std::vector external) { +std::vector to_vec_internal_input_ranges(std::vector external) { std::vector internal; for (auto range : external) { internal.push_back(to_internal_input_range(range)); @@ -74,17 +74,17 @@ std::vector to_vec_internal_input_ranges(std::vect return internal; } -core::ExtraInfo to_internal_extra_info(ExtraInfo external) { - core::ExtraInfo internal(to_vec_internal_input_ranges(external.input_ranges)); +core::CompileSpec to_internal_compile_spec(CompileSpec external) { + core::CompileSpec internal(to_vec_internal_input_ranges(external.input_ranges)); switch(external.op_precision) { - case ExtraInfo::DataType::kChar: + case CompileSpec::DataType::kChar: internal.convert_info.engine_settings.op_precision = nvinfer1::DataType::kINT8; break; - case ExtraInfo::DataType::kHalf: + case CompileSpec::DataType::kHalf: internal.convert_info.engine_settings.op_precision = nvinfer1::DataType::kHALF; break; - case ExtraInfo::DataType::kFloat: + case CompileSpec::DataType::kFloat: default: internal.convert_info.engine_settings.op_precision = nvinfer1::DataType::kFLOAT; } @@ -96,22 +96,22 @@ core::ExtraInfo to_internal_extra_info(ExtraInfo external) { internal.convert_info.engine_settings.max_batch_size = external.max_batch_size; switch(external.device) { - case ExtraInfo::DeviceType::kDLA: + case CompileSpec::DeviceType::kDLA: internal.convert_info.engine_settings.device = nvinfer1::DeviceType::kDLA; break; - case ExtraInfo::DeviceType::kGPU: + case CompileSpec::DeviceType::kGPU: default: internal.convert_info.engine_settings.device = nvinfer1::DeviceType::kGPU; } switch(external.capability) { - case ExtraInfo::EngineCapability::kSAFE_GPU: + case CompileSpec::EngineCapability::kSAFE_GPU: internal.convert_info.engine_settings.capability = nvinfer1::EngineCapability::kSAFE_GPU; break; - case ExtraInfo::EngineCapability::kSAFE_DLA: + case CompileSpec::EngineCapability::kSAFE_DLA: internal.convert_info.engine_settings.capability = nvinfer1::EngineCapability::kSAFE_DLA; break; - case ExtraInfo::EngineCapability::kDEFAULT: + case CompileSpec::EngineCapability::kDEFAULT: default: internal.convert_info.engine_settings.capability = nvinfer1::EngineCapability::kDEFAULT; diff --git a/cpp/api/src/trtorch.cpp b/cpp/api/src/trtorch.cpp index e6a1940db1..742b4111a9 100644 --- a/cpp/api/src/trtorch.cpp +++ b/cpp/api/src/trtorch.cpp @@ -7,8 +7,8 @@ namespace trtorch { -// Defined in extra_info.cpp -core::ExtraInfo to_internal_extra_info(ExtraInfo external); +// Defined in compile_spec.cpp +core::CompileSpec to_internal_compile_spec(CompileSpec external); bool CheckMethodOperatorSupport(const torch::jit::script::Module& module, std::string method_name) { @@ -16,18 +16,18 @@ bool CheckMethodOperatorSupport(const torch::jit::script::Module& module, } std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& module, - std::string method_name, ExtraInfo info) { + std::string method_name, CompileSpec info) { LOG_DEBUG(get_build_info()); // Want to export a much simpler (non TRT header dependent) API so doing the // type conversion here - return std::move(core::ConvertGraphToTRTEngine(module, method_name, to_internal_extra_info(info))); + return std::move(core::ConvertGraphToTRTEngine(module, method_name, to_internal_compile_spec(info))); } -torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, ExtraInfo info) { +torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, CompileSpec info) { LOG_DEBUG(get_build_info()); // Want to export a much simpler (non TRT header dependent) API so doing the // type conversion here - return core::CompileGraph(module, to_internal_extra_info(info)); + return core::CompileGraph(module, to_internal_compile_spec(info)); } std::string get_build_info() { diff --git a/cpp/benchmark/main.cpp b/cpp/benchmark/main.cpp index e73f1da4e8..48566b60c6 100644 --- a/cpp/benchmark/main.cpp +++ b/cpp/benchmark/main.cpp @@ -121,18 +121,18 @@ int main(int argc, const char* argv[]) { at::globalContext().setBenchmarkCuDNN(true); #ifdef TRT - auto extra_info = trtorch::ExtraInfo(dims); - extra_info.workspace_size = 1 << 20; + auto compile_spec = trtorch::CompileSpec(dims); + compile_spec.workspace_size = 1 << 20; #ifdef HALF - extra_info.op_precision = torch::kF16; + compile_spec.op_precision = torch::kF16; #endif - auto trt_mod = trtorch::CompileGraph(mod, extra_info); + auto trt_mod = trtorch::CompileGraph(mod, compile_spec); #ifdef SAVE_ENGINE std::cout << "Compiling graph to save as TRT engine (/tmp/engine_converted_from_jit.trt)" << std::endl; - auto engine = trtorch::ConvertGraphToTRTEngine(mod, "forward", extra_info); + auto engine = trtorch::ConvertGraphToTRTEngine(mod, "forward", compile_spec); std::ofstream out("/tmp/engine_converted_from_jit.trt"); out << engine; out.close(); diff --git a/cpp/ptq/README.md b/cpp/ptq/README.md index 70eb990fb3..ceffb6dcec 100644 --- a/cpp/ptq/README.md +++ b/cpp/ptq/README.md @@ -92,20 +92,20 @@ The calibrator factories create a calibrator that inherits from a `nvinfer1::IIn auto calibrator = trtorch::ptq::make_int8_calibrator(std::move(calibration_dataloader), calibration_cache_file, true); ``` -Then all thats required to setup the module for INT8 calibration is to set the following compile settings in the `trtorch::ExtraInfo` struct and compiling the module: +Then all thats required to setup the module for INT8 calibration is to set the following compile settings in the `trtorch::CompileSpec` struct and compiling the module: ```C++ std::vector> input_shape = {{32, 3, 32, 32}}; /// Configure settings for compilation - auto extra_info = trtorch::ExtraInfo({input_shape}); + auto compile_spec = trtorch::CompileSpec({input_shape}); /// Set operating precision to INT8 - extra_info.op_precision = torch::kI8; + compile_spec.op_precision = torch::kI8; /// Use the TensorRT Entropy Calibrator - extra_info.ptq_calibrator = calibrator; + compile_spec.ptq_calibrator = calibrator; /// Set a larger workspace (you may get better performace from doing so) - extra_info.workspace_size = 1 << 28; + compile_spec.workspace_size = 1 << 28; - auto trt_mod = trtorch::CompileGraph(mod, extra_info); + auto trt_mod = trtorch::CompileGraph(mod, compile_spec); ``` If you have an existing Calibrator implementation for TensorRT you may directly set the `ptq_calibrator` field with a pointer to your calibrator and it will work as well. diff --git a/cpp/ptq/main.cpp b/cpp/ptq/main.cpp index 241261dfba..340ec9cd66 100644 --- a/cpp/ptq/main.cpp +++ b/cpp/ptq/main.cpp @@ -50,28 +50,28 @@ torch::jit::Module compile_int8_model(const std::string& data_dir, torch::jit::M std::vector> input_shape = {{32, 3, 32, 32}}; /// Configure settings for compilation - auto extra_info = trtorch::ExtraInfo({input_shape}); + auto compile_spec = trtorch::CompileSpec({input_shape}); /// Set operating precision to INT8 - extra_info.op_precision = torch::kI8; + compile_spec.op_precision = torch::kI8; /// Use the TensorRT Entropy Calibrator - extra_info.ptq_calibrator = calibrator; + compile_spec.ptq_calibrator = calibrator; /// Set max batch size for the engine - extra_info.max_batch_size = 32; + compile_spec.max_batch_size = 32; /// Set a larger workspace - extra_info.workspace_size = 1 << 28; + compile_spec.workspace_size = 1 << 28; mod.eval(); #ifdef SAVE_ENGINE std::cout << "Compiling graph to save as TRT engine (/tmp/engine_converted_from_jit.trt)" << std::endl; - auto engine = trtorch::ConvertGraphToTRTEngine(mod, "forward", extra_info); + auto engine = trtorch::ConvertGraphToTRTEngine(mod, "forward", compile_spec); std::ofstream out("/tmp/engine_converted_from_jit.trt"); out << engine; out.close(); #endif std::cout << "Compiling and quantizing module" << std::endl; - auto trt_mod = trtorch::CompileGraph(mod, extra_info); + auto trt_mod = trtorch::CompileGraph(mod, compile_spec); return std::move(trt_mod); } diff --git a/cpp/trtorchc/main.cpp b/cpp/trtorchc/main.cpp index b37e2a53e9..0e3aaf61d8 100644 --- a/cpp/trtorchc/main.cpp +++ b/cpp/trtorchc/main.cpp @@ -66,7 +66,7 @@ std::vector parseSingleDim(std::string shape_str) { return {}; } -trtorch::ExtraInfo::InputRange parseDynamicDim(std::string shape_str) { +trtorch::CompileSpec::InputRange parseDynamicDim(std::string shape_str) { shape_str = shape_str.substr(1, shape_str.size() - 2); std::vector> shape; std::stringstream ss; @@ -89,7 +89,7 @@ trtorch::ExtraInfo::InputRange parseDynamicDim(std::string shape_str) { exit(1); } - return trtorch::ExtraInfo::InputRange(shape[0], shape[1], shape[2]); + return trtorch::CompileSpec::InputRange(shape[0], shape[1], shape[2]); } std::string get_cwd() { @@ -190,10 +190,10 @@ int main(int argc, char** argv) { } - std::vector ranges; + std::vector ranges; for (const auto shapes : args::get(input_shapes)) { if (shapes.rfind("(", 0) == 0) { - ranges.push_back(trtorch::ExtraInfo::InputRange(parseSingleDim(shapes))); + ranges.push_back(trtorch::CompileSpec::InputRange(parseSingleDim(shapes))); } else if (shapes.rfind("[", 0) == 0) { ranges.push_back(parseDynamicDim(shapes)); } else { @@ -203,7 +203,7 @@ int main(int argc, char** argv) { } } - auto compile_settings = trtorch::ExtraInfo(ranges); + auto compile_settings = trtorch::CompileSpec(ranges); if (build_debuggable_engine) { compile_settings.debug = true; @@ -251,9 +251,9 @@ int main(int argc, char** argv) { auto device = args::get(device_type); std::transform(device.begin(), device.end(), device.begin(), [](unsigned char c){ return std::tolower(c); }); if (device == "gpu") { - compile_settings.device = trtorch::ExtraInfo::DeviceType::kGPU; + compile_settings.device = trtorch::CompileSpec::DeviceType::kGPU; } else if (device == "dla") { - compile_settings.device = trtorch::ExtraInfo::DeviceType::kDLA; + compile_settings.device = trtorch::CompileSpec::DeviceType::kDLA; } else { trtorch::logging::log(trtorch::logging::Level::kERROR, "Invalid device type, options are [ gpu | dla ]"); std::cerr << parser; @@ -265,11 +265,11 @@ int main(int argc, char** argv) { auto capability = args::get(engine_capability); std::transform(capability.begin(), capability.end(), capability.begin(), [](unsigned char c){ return std::tolower(c); }); if (capability == "default") { - compile_settings.capability = trtorch::ExtraInfo::EngineCapability::kDEFAULT; + compile_settings.capability = trtorch::CompileSpec::EngineCapability::kDEFAULT; } else if (capability == "safe_gpu") { - compile_settings.capability = trtorch::ExtraInfo::EngineCapability::kSAFE_GPU; + compile_settings.capability = trtorch::CompileSpec::EngineCapability::kSAFE_GPU; } else if (capability == "safe_dla") { - compile_settings.capability = trtorch::ExtraInfo::EngineCapability::kSAFE_DLA; + compile_settings.capability = trtorch::CompileSpec::EngineCapability::kSAFE_DLA; } else { trtorch::logging::log(trtorch::logging::Level::kERROR, "Invalid engine capability, options are [ default | safe_gpu | safe_dla ]"); std::cerr << parser; @@ -320,7 +320,7 @@ int main(int argc, char** argv) { } else { auto trt_mod = trtorch::CompileGraph(mod, compile_settings); - if (compile_settings.op_precision == trtorch::ExtraInfo::DataType::kFloat) { + if (compile_settings.op_precision == trtorch::CompileSpec::DataType::kFloat) { double threshold_val = 2e-5; if (threshold) { threshold_val = args::get(threshold); diff --git a/cpp/trtorchexec/main.cpp b/cpp/trtorchexec/main.cpp index 8b3e114e62..1dcc74e91b 100644 --- a/cpp/trtorchexec/main.cpp +++ b/cpp/trtorchexec/main.cpp @@ -56,8 +56,8 @@ int main(int argc, const char* argv[]) { dims.push_back(v); } - auto extra_info = trtorch::ExtraInfo(dims); - extra_info.workspace_size = 1 << 24; + auto compile_spec = trtorch::CompileSpec(dims); + compile_spec.workspace_size = 1 << 24; std::cout << "Checking operator support" << std::endl; if (!trtorch::CheckMethodOperatorSupport(mod, "forward")) { @@ -66,7 +66,7 @@ int main(int argc, const char* argv[]) { } std::cout << "Compiling graph to save as TRT engine (/tmp/engine_converted_from_jit.trt)" << std::endl; - auto engine = trtorch::ConvertGraphToTRTEngine(mod, "forward", extra_info); + auto engine = trtorch::ConvertGraphToTRTEngine(mod, "forward", compile_spec); std::ofstream out("/tmp/engine_converted_from_jit.trt"); out << engine; out.close(); @@ -89,7 +89,7 @@ int main(int argc, const char* argv[]) { } std::cout << "Compiling graph as module" << std::endl; - auto trt_mod = trtorch::CompileGraph(mod, extra_info); + auto trt_mod = trtorch::CompileGraph(mod, compile_spec); std::cout << "Running TRT module" << std::endl; torch::jit::IValue trt_results_ivalues = trt_mod.forward(trt_inputs_ivalues); std::vector trt_results; diff --git a/docsrc/tutorials/getting_started.rst b/docsrc/tutorials/getting_started.rst index 05c4e9efba..a1978927b1 100644 --- a/docsrc/tutorials/getting_started.rst +++ b/docsrc/tutorials/getting_started.rst @@ -305,7 +305,7 @@ With out module loaded, we can feed it into the TRTorch compiler. When we do so mod.eval(); auto in = torch::randn({1, 1, 32, 32}, {torch::kCUDA}); - auto trt_mod = trtorch::CompileGraph(mod, std::vector{{in.sizes()}}); + auto trt_mod = trtorch::CompileGraph(mod, std::vector{{in.sizes()}}); auto out = trt_mod.forward({in}); Thats it! Now the graph runs primarily not with the JIT compiler but using TensorRT (though we execute the graph using the JIT runtime). @@ -322,8 +322,8 @@ We can also set settings like operating precision to run in FP16. mod.eval(); auto in = torch::randn({1, 1, 32, 32}, {torch::kCUDA}).to(torch::kHALF); - auto input_sizes = std::vector({in.sizes()}); - trtorch::ExtraInfo info(input_sizes); + auto input_sizes = std::vector({in.sizes()}); + trtorch::CompileSpec info(input_sizes); info.op_precision = torch::kHALF; auto trt_mod = trtorch::CompileGraph(mod, info); auto out = trt_mod.forward({in}); @@ -370,8 +370,8 @@ If you want to save the engine produced by TRTorch to use in a TensorRT applicat mod.eval(); auto in = torch::randn({1, 1, 32, 32}, {torch::kCUDA}).to(torch::kHALF); - auto input_sizes = std::vector({in.sizes()}); - trtorch::ExtraInfo info(input_sizes); + auto input_sizes = std::vector({in.sizes()}); + trtorch::CompileSpec info(input_sizes); info.op_precision = torch::kHALF; auto trt_mod = trtorch::ConvertGraphToTRTEngine(mod, "forward", info); std::ofstream out("/tmp/engine_converted_from_jit.trt"); diff --git a/docsrc/tutorials/ptq.rst b/docsrc/tutorials/ptq.rst index fb12e46ef4..28d60acec3 100644 --- a/docsrc/tutorials/ptq.rst +++ b/docsrc/tutorials/ptq.rst @@ -115,21 +115,21 @@ defines the calibration algorithm used when calibrating. You can explicitly make // MinMax Calibrator is geared more towards NLP tasks auto calibrator = trtorch::ptq::make_int8_calibrator(std::move(calibration_dataloader), calibration_cache_file, true); -Then all thats required to setup the module for INT8 calibration is to set the following compile settings in the `trtorch::ExtraInfo` struct and compiling the module: +Then all thats required to setup the module for INT8 calibration is to set the following compile settings in the `trtorch::CompileSpec` struct and compiling the module: .. code-block:: c++ std::vector> input_shape = {{32, 3, 32, 32}}; /// Configure settings for compilation - auto extra_info = trtorch::ExtraInfo({input_shape}); + auto compile_spec = trtorch::CompileSpec({input_shape}); /// Set operating precision to INT8 - extra_info.op_precision = torch::kI8; + compile_spec.op_precision = torch::kI8; /// Use the TensorRT Entropy Calibrator - extra_info.ptq_calibrator = calibrator; + compile_spec.ptq_calibrator = calibrator; /// Set a larger workspace (you may get better performace from doing so) - extra_info.workspace_size = 1 << 28; + compile_spec.workspace_size = 1 << 28; - auto trt_mod = trtorch::CompileGraph(mod, extra_info); + auto trt_mod = trtorch::CompileGraph(mod, compile_spec); If you have an existing Calibrator implementation for TensorRT you may directly set the ``ptq_calibrator`` field with a pointer to your calibrator and it will work as well. diff --git a/py/BUILD b/py/BUILD index a2eb3c004b..be5b2d7047 100644 --- a/py/BUILD +++ b/py/BUILD @@ -9,7 +9,7 @@ py_library( "trtorch/__init__.py", "trtorch/_version.py", "trtorch/_compiler.py", - "trtorch/_extra_info.py", + "trtorch/_compile_spec.py", "trtorch/_types.py", "trtorch/logging.py" ], diff --git a/py/trtorch/_extra_info.py b/py/trtorch/_compile_spec.py similarity index 64% rename from py/trtorch/_extra_info.py rename to py/trtorch/_compile_spec.py index 5247b91a0a..aa060bd085 100644 --- a/py/trtorch/_extra_info.py +++ b/py/trtorch/_compile_spec.py @@ -84,53 +84,53 @@ def _parse_device_type(device: Any) -> _types.DeviceType: else: raise TypeError("Device specification must be of type torch.device or trtorch.DeviceType, but got: " + str(type(device))) -def _parse_extra_info(extra_info: Dict[str, Any]) -> trtorch._C.ExtraInfo: - info = trtorch._C.ExtraInfo() - if "input_shapes" not in extra_info: +def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec: + info = trtorch._C.CompileSpec() + if "input_shapes" not in compile_spec: raise KeyError("Input shapes for inputs are required as a List, provided as either a static sizes or a range of three sizes (min, opt, max) as Dict") - info.input_ranges = _parse_input_ranges(extra_info["input_shapes"]) + info.input_ranges = _parse_input_ranges(compile_spec["input_shapes"]) - if "op_precision" in extra_info: - info.op_precision = _parse_op_precision(extra_info["op_precision"]) + if "op_precision" in compile_spec: + info.op_precision = _parse_op_precision(compile_spec["op_precision"]) - if "refit" in extra_info: - assert isinstance(extra_info["refit"], bool) - info.refit = extra_info["refit"] + if "refit" in compile_spec: + assert isinstance(compile_spec["refit"], bool) + info.refit = compile_spec["refit"] - if "debug" in extra_info: - assert isinstance(extra_info["debug"], bool) - info.debug = extra_info["debug"] + if "debug" in compile_spec: + assert isinstance(compile_spec["debug"], bool) + info.debug = compile_spec["debug"] - if "strict_types" in extra_info: - assert isinstance(extra_info["strict_types"], bool) - info.strict_types = extra_info["strict_types"] + if "strict_types" in compile_spec: + assert isinstance(compile_spec["strict_types"], bool) + info.strict_types = compile_spec["strict_types"] - if "allow_gpu_fallback" in extra_info: - assert isinstance(extra_info["allow_gpu_fallback"], bool) - info.allow_gpu_fallback = extra_info["allow_gpu_fallback"] + if "allow_gpu_fallback" in compile_spec: + assert isinstance(compile_spec["allow_gpu_fallback"], bool) + info.allow_gpu_fallback = compile_spec["allow_gpu_fallback"] - if "device" in extra_info: - info.device = _parse_device_type(extra_info["device"]) + if "device" in compile_spec: + info.device = _parse_device_type(compile_spec["device"]) - if "capability" in extra_info: - assert isinstance(extra_info["capability"], type.EngineCapability) - info.capability = extra_info["capability"] + if "capability" in compile_spec: + assert isinstance(compile_spec["capability"], type.EngineCapability) + info.capability = compile_spec["capability"] - if "num_min_timing_iters" in extra_info: - assert type(extra_info["num_min_timing_iters"]) is int - info.num_min_timing_iters = extra_info["num_min_timing_iters"] + if "num_min_timing_iters" in compile_spec: + assert type(compile_spec["num_min_timing_iters"]) is int + info.num_min_timing_iters = compile_spec["num_min_timing_iters"] - if "num_avg_timing_iters" in extra_info: - assert type(extra_info["num_avg_timing_iters"]) is int - info.num_avg_timing_iters = extra_info["num_avg_timing_iters"] + if "num_avg_timing_iters" in compile_spec: + assert type(compile_spec["num_avg_timing_iters"]) is int + info.num_avg_timing_iters = compile_spec["num_avg_timing_iters"] - if "workspace_size" in extra_info: - assert type(extra_info["workspace_size"]) is int - info.workspace_size = extra_info["workspace_size"] + if "workspace_size" in compile_spec: + assert type(compile_spec["workspace_size"]) is int + info.workspace_size = compile_spec["workspace_size"] - if "max_batch_size" in extra_info: - assert type(extra_info["max_batch_size"]) is int - info.max_batch_size = extra_info["max_batch_size"] + if "max_batch_size" in compile_spec: + assert type(compile_spec["max_batch_size"]) is int + info.max_batch_size = compile_spec["max_batch_size"] return info \ No newline at end of file diff --git a/py/trtorch/_compiler.py b/py/trtorch/_compiler.py index 1627e5a05f..1c35dbe4a1 100644 --- a/py/trtorch/_compiler.py +++ b/py/trtorch/_compiler.py @@ -3,12 +3,12 @@ from torch import nn import trtorch._C -from trtorch._extra_info import _parse_extra_info +from trtorch._compile_spec import _parse_compile_spec from trtorch._version import __version__ from types import FunctionType -def compile(module: torch.jit.ScriptModule, extra_info: Any) -> torch.jit.ScriptModule: +def compile(module: torch.jit.ScriptModule, compile_spec: Any) -> torch.jit.ScriptModule: """Compile a TorchScript module for NVIDIA GPUs using TensorRT Takes a existing TorchScript module and a set of settings to configure the compiler @@ -19,13 +19,13 @@ def compile(module: torch.jit.ScriptModule, extra_info: Any) -> torch.jit.Script Args: module (torch.jit.ScriptModule): Source module, a result of tracing or scripting a PyTorch ``torch.nn.Module`` - extra_info (dict): Compilation settings including operating precision, target device, etc. + compile_spec (dict): Compilation settings including operating precision, target device, etc. One key is required which is ``input_shapes``, describing the input sizes or ranges for inputs to the graph. All other keys are optional .. code-block:: py - ExtraInfo = { + compile_spec = { "input_shapes": [ (1, 3, 224, 224), # Static input shape for input #1 { @@ -58,11 +58,11 @@ def compile(module: torch.jit.ScriptModule, extra_info: Any) -> torch.jit.Script if isinstance(module, torch.jit.ScriptFunction): raise TypeError("torch.jit.ScriptFunction currently is not directly supported, wrap the function in a module to compile") - compiled_cpp_mod = trtorch._C.compile_graph(module._c, _parse_extra_info(extra_info)) + compiled_cpp_mod = trtorch._C.compile_graph(module._c, _parse_compile_spec(compile_spec)) compiled_module = torch.jit._recursive.wrap_cpp_module(compiled_cpp_mod) return compiled_module -def convert_method_to_trt_engine(module: torch.jit.ScriptModule, method_name: str, extra_info: Any) -> str: +def convert_method_to_trt_engine(module: torch.jit.ScriptModule, method_name: str, compile_spec: Any) -> str: """Convert a TorchScript module method to a serialized TensorRT engine Converts a specified method of a module to a serialized TensorRT engine given a dictionary of conversion settings @@ -71,13 +71,13 @@ def convert_method_to_trt_engine(module: torch.jit.ScriptModule, method_name: st module (torch.jit.ScriptModule): Source module, a result of tracing or scripting a PyTorch ``torch.nn.Module`` method_name (str): Name of method to convert - extra_info (dict): Compilation settings including operating precision, target device, etc. + compile_spec (dict): Compilation settings including operating precision, target device, etc. One key is required which is ``input_shapes``, describing the input sizes or ranges for inputs to the graph. All other keys are optional .. code-block:: py - ExtraInfo = { + CompileSpec = { "input_shapes": [ (1, 3, 224, 224), # Static input shape for input #1 { @@ -109,7 +109,7 @@ def convert_method_to_trt_engine(module: torch.jit.ScriptModule, method_name: st if isinstance(module, torch.jit.ScriptFunction): raise TypeError("torch.jit.ScriptFunctions currently are not directly supported, wrap the function in a module to compile") - return trtorch._C.convert_graph_to_trt_engine(module._c, method_name, _parse_extra_info(extra_info)) + return trtorch._C.convert_graph_to_trt_engine(module._c, method_name, _parse_compile_spec(compile_spec)) def check_method_op_support(module: torch.jit.ScriptModule, method_name: str) -> bool: """Checks to see if a method is fully supported by TRTorch diff --git a/py/trtorch/csrc/trtorch_py.cpp b/py/trtorch/csrc/trtorch_py.cpp index 765f75d56a..da6d2b2688 100644 --- a/py/trtorch/csrc/trtorch_py.cpp +++ b/py/trtorch/csrc/trtorch_py.cpp @@ -1,5 +1,7 @@ #include "pybind11/pybind11.h" #include "pybind11/stl.h" +//TODO: Remove when we have access to PyTorch to_backend autoregistration +#include "core/backend.h" #include "core/compiler.h" #include "core/conversion/conversion.h" #include "torch/torch.h" @@ -73,13 +75,13 @@ nvinfer1::EngineCapability toTRTEngineCapability(EngineCapability value) { } } -struct ExtraInfo { +struct CompileSpec { - core::ExtraInfo toInternalExtraInfo() { + core::CompileSpec toInternalCompileSpec() { for (auto i : input_ranges) { internal_input_ranges.push_back(i.toInternalInputRange()); } - auto info = core::ExtraInfo(internal_input_ranges); + auto info = core::CompileSpec(internal_input_ranges); info.convert_info.engine_settings.op_precision = toTRTDataType(op_precision); info.convert_info.engine_settings.refit = refit; info.convert_info.engine_settings.debug = debug; @@ -109,15 +111,15 @@ struct ExtraInfo { uint64_t max_batch_size = 0; }; -torch::jit::Module CompileGraph(const torch::jit::Module& mod, ExtraInfo& info) { +torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec& info) { py::gil_scoped_acquire gil; - auto trt_mod = core::CompileGraph(mod, info.toInternalExtraInfo()); + auto trt_mod = core::CompileGraph(mod, info.toInternalCompileSpec()); return trt_mod; } -py::bytes ConvertGraphToTRTEngine(const torch::jit::Module& mod, const std::string& method_name, ExtraInfo& info) { +py::bytes ConvertGraphToTRTEngine(const torch::jit::Module& mod, const std::string& method_name, CompileSpec& info) { py::gil_scoped_acquire gil; - auto trt_engine = core::ConvertGraphToTRTEngine(mod, method_name, info.toInternalExtraInfo()); + auto trt_engine = core::ConvertGraphToTRTEngine(mod, method_name, info.toInternalCompileSpec()); return py::bytes(trt_engine); } @@ -189,20 +191,20 @@ PYBIND11_MODULE(_C, m) { .value("safe_dla", EngineCapability::kSAFE_DLA, "Use safety DLA kernels only") .value("default", EngineCapability::kDEFAULT, "Use default behavior"); - py::class_(m, "ExtraInfo") + py::class_(m, "CompileSpec") .def(py::init<>()) - .def_readwrite("input_ranges", &ExtraInfo::input_ranges) - .def_readwrite("op_precision", &ExtraInfo::op_precision) - .def_readwrite("refit", &ExtraInfo::refit) - .def_readwrite("debug", &ExtraInfo::debug) - .def_readwrite("strict_types", &ExtraInfo::strict_types) - .def_readwrite("allow_gpu_fallback", &ExtraInfo::allow_gpu_fallback) - .def_readwrite("device", &ExtraInfo::device) - .def_readwrite("capability", &ExtraInfo::capability) - .def_readwrite("num_min_timing_iters", &ExtraInfo::num_min_timing_iters) - .def_readwrite("num_avg_timing_iters", &ExtraInfo::num_avg_timing_iters) - .def_readwrite("workspace_size", &ExtraInfo::workspace_size) - .def_readwrite("max_batch_size", &ExtraInfo::max_batch_size); + .def_readwrite("input_ranges", &CompileSpec::input_ranges) + .def_readwrite("op_precision", &CompileSpec::op_precision) + .def_readwrite("refit", &CompileSpec::refit) + .def_readwrite("debug", &CompileSpec::debug) + .def_readwrite("strict_types", &CompileSpec::strict_types) + .def_readwrite("allow_gpu_fallback", &CompileSpec::allow_gpu_fallback) + .def_readwrite("device", &CompileSpec::device) + .def_readwrite("capability", &CompileSpec::capability) + .def_readwrite("num_min_timing_iters", &CompileSpec::num_min_timing_iters) + .def_readwrite("num_avg_timing_iters", &CompileSpec::num_avg_timing_iters) + .def_readwrite("workspace_size", &CompileSpec::workspace_size) + .def_readwrite("max_batch_size", &CompileSpec::max_batch_size); m.doc() = "TRTorch Internal C Bindings: Ahead of Time compilation for PyTorch JIT. A tool to convert PyTorch JIT to TensorRT"; m.def("compile_graph", &trtorch::pyapi::CompileGraph, "Ingest a PyTorch JIT module and convert supported subgraphs to TensorRT engines, returns a JIT module with the engines embedded"); @@ -225,7 +227,11 @@ PYBIND11_MODULE(_C, m) { .value("INFO", core::util::logging::LogLevel::kINFO) .value("DEBUG", core::util::logging::LogLevel::kDEBUG) .export_values(); + + //TODO: Remove when we have access to PyTorch autoregistration + //m.def("to_tensorrt", backend::GetTensorRTBackend().generateToBackendFn()); } -} // namespace py + +} // namespace pyapi } // namespace trtorch diff --git a/tests/accuracy/test_fp16_accuracy.cpp b/tests/accuracy/test_fp16_accuracy.cpp index 6de40a6c31..b19c01cb38 100644 --- a/tests/accuracy/test_fp16_accuracy.cpp +++ b/tests/accuracy/test_fp16_accuracy.cpp @@ -27,10 +27,10 @@ TEST_P(AccuracyTests, FP16AccuracyIsClose) { torch::Tensor jit_accuracy = (jit_correct / jit_total) * 100; std::vector> input_shape = {{32, 3, 32, 32}}; - auto extra_info = trtorch::ExtraInfo({input_shape}); - extra_info.op_precision = torch::kF16; + auto compile_spec = trtorch::CompileSpec({input_shape}); + compile_spec.op_precision = torch::kF16; - auto trt_mod = trtorch::CompileGraph(mod, extra_info); + auto trt_mod = trtorch::CompileGraph(mod, compile_spec); torch::Tensor trt_correct = torch::zeros({1}, {torch::kCUDA}), trt_total = torch::zeros({1}, {torch::kCUDA}); for (auto batch : *eval_dataloader) { diff --git a/tests/accuracy/test_fp32_accuracy.cpp b/tests/accuracy/test_fp32_accuracy.cpp index d3d8bddb96..11ed944077 100644 --- a/tests/accuracy/test_fp32_accuracy.cpp +++ b/tests/accuracy/test_fp32_accuracy.cpp @@ -27,10 +27,10 @@ TEST_P(AccuracyTests, FP16AccuracyIsClose) { torch::Tensor jit_accuracy = (jit_correct / jit_total) * 100; std::vector> input_shape = {{32, 3, 32, 32}}; - auto extra_info = trtorch::ExtraInfo({input_shape}); - extra_info.op_precision = torch::kF32; + auto compile_spec = trtorch::CompileSpec({input_shape}); + compile_spec.op_precision = torch::kF32; - auto trt_mod = trtorch::CompileGraph(mod, extra_info); + auto trt_mod = trtorch::CompileGraph(mod, compile_spec); torch::Tensor trt_correct = torch::zeros({1}, {torch::kCUDA}), trt_total = torch::zeros({1}, {torch::kCUDA}); for (auto batch : *eval_dataloader) { diff --git a/tests/accuracy/test_int8_accuracy.cpp b/tests/accuracy/test_int8_accuracy.cpp index aa4824948a..db5b259657 100644 --- a/tests/accuracy/test_int8_accuracy.cpp +++ b/tests/accuracy/test_int8_accuracy.cpp @@ -20,15 +20,15 @@ TEST_P(AccuracyTests, FP16AccuracyIsClose) { std::vector> input_shape = {{32, 3, 32, 32}}; // Configure settings for compilation - auto extra_info = trtorch::ExtraInfo({input_shape}); + auto compile_spec = trtorch::CompileSpec({input_shape}); // Set operating precision to INT8 - extra_info.op_precision = torch::kI8; + compile_spec.op_precision = torch::kI8; // Use the TensorRT Entropy Calibrator - extra_info.ptq_calibrator = calibrator; + compile_spec.ptq_calibrator = calibrator; // Set max batch size for the engine - extra_info.max_batch_size = 32; + compile_spec.max_batch_size = 32; // Set a larger workspace - extra_info.workspace_size = 1 << 28; + compile_spec.workspace_size = 1 << 28; mod.eval(); @@ -57,7 +57,7 @@ TEST_P(AccuracyTests, FP16AccuracyIsClose) { torch::Tensor jit_accuracy = (jit_correct / jit_total) * 100; // Compile Graph - auto trt_mod = trtorch::CompileGraph(mod, extra_info); + auto trt_mod = trtorch::CompileGraph(mod, compile_spec); // Check the INT8 accuracy in TRT torch::Tensor trt_correct = torch::zeros({1}, {torch::kCUDA}), trt_total = torch::zeros({1}, {torch::kCUDA}); diff --git a/tests/modules/test_serialization.cpp b/tests/modules/test_serialization.cpp index a7fcea5558..bb6b984f4f 100644 --- a/tests/modules/test_serialization.cpp +++ b/tests/modules/test_serialization.cpp @@ -1,7 +1,7 @@ #include "module_test.h" -std::vector toInputRangesDynamic(std::vector> opts) { - std::vector a; +std::vector toInputRangesDynamic(std::vector> opts) { + std::vector a; for (auto opt : opts) { std::vector min_range(opt); @@ -12,7 +12,7 @@ std::vector toInputRangesDynamic(std::vector Date: Wed, 21 Oct 2020 15:18:28 -0700 Subject: [PATCH 2/6] refactor(//core/runtime): Renaming execution -> runtime Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- BUILD | 2 +- core/BUILD | 15 ++++++++++++--- core/{execution => runtime}/BUILD | 8 ++++---- core/{execution => runtime}/TRTEngine.cpp | 9 +++++---- core/{execution => runtime}/register_trt_op.cpp | 4 ++-- core/{execution/execution.h => runtime/runtime.h} | 4 ++-- docsrc/contributors/phases.rst | 9 ++++----- .../contributors/{execution.rst => runtime.rst} | 4 ++-- docsrc/index.rst | 4 ---- tests/util/run_graph_engine.cpp | 6 +++--- 10 files changed, 35 insertions(+), 30 deletions(-) rename core/{execution => runtime}/BUILD (84%) rename core/{execution => runtime}/TRTEngine.cpp (96%) rename core/{execution => runtime}/register_trt_op.cpp (97%) rename core/{execution/execution.h => runtime/runtime.h} (95%) rename docsrc/contributors/{execution.rst => runtime.rst} (96%) diff --git a/BUILD b/BUILD index d1db473c18..baa54db29a 100644 --- a/BUILD +++ b/BUILD @@ -19,7 +19,7 @@ pkg_tar( "//core/conversion/tensorcontainer:include", "//core/conversion/evaluators:include", "//core/conversion/converters/impl/plugins:include", - "//core/execution:include", + "//core/runtime:include", "//core/lowering:include", "//core/lowering/passes:include", "//core/util:include", diff --git a/core/BUILD b/core/BUILD index 8d2beae23c..6bf5a057a1 100644 --- a/core/BUILD +++ b/core/BUILD @@ -7,6 +7,13 @@ config_setting( } ) +config_setting( + name = "python_core", + values = { + "define": "target_lang=python" + } +) + cc_library( name = "core", hdrs = [ @@ -17,7 +24,7 @@ cc_library( ], deps = [ "//core/conversion", - "//core/execution", + "//core/runtime", "//core/lowering", "//core/util/logging", "@tensorrt//:nvinfer" @@ -28,11 +35,13 @@ cc_library( alwayslink=True, ) - load("@rules_pkg//:pkg.bzl", "pkg_tar") pkg_tar( name = "include", package_dir = "core/", - srcs = ["compiler.h"], + srcs = [ + "backend.h", + "compiler.h", + ], ) diff --git a/core/execution/BUILD b/core/runtime/BUILD similarity index 84% rename from core/execution/BUILD rename to core/runtime/BUILD index 1741249624..3e7e6d8a57 100644 --- a/core/execution/BUILD +++ b/core/runtime/BUILD @@ -8,9 +8,9 @@ config_setting( ) cc_library( - name = "execution", + name = "runtime", hdrs = [ - "execution.h", + "runtime.h", ], srcs = [ "TRTEngine.cpp", @@ -30,6 +30,6 @@ load("@rules_pkg//:pkg.bzl", "pkg_tar") pkg_tar( name = "include", - package_dir = "core/execution/", - srcs = ["execution.h"], + package_dir = "core/runtime/", + srcs = ["runtime.h"], ) diff --git a/core/execution/TRTEngine.cpp b/core/runtime/TRTEngine.cpp similarity index 96% rename from core/execution/TRTEngine.cpp rename to core/runtime/TRTEngine.cpp index ca4b5f10e6..f149d0a638 100644 --- a/core/execution/TRTEngine.cpp +++ b/core/runtime/TRTEngine.cpp @@ -4,11 +4,11 @@ #include "torch/csrc/jit/frontend/function_schema_parser.h" #include "core/util/prelude.h" -#include "core/execution/execution.h" +#include "core/runtime/runtime.h" namespace trtorch { namespace core { -namespace execution { +namespace runtime { std::string slugify(std::string s) { std::replace(s.begin(), s.end(), '.', '_'); @@ -81,6 +81,7 @@ TRTEngine::~TRTEngine() { // return c10::List(output_vec); // } +namespace { static auto TRTORCH_UNUSED TRTEngineTSRegistrtion = torch::class_("tensorrt", "Engine") .def(torch::init()) // TODO: .def("__call__", &TRTEngine::Run) @@ -94,7 +95,7 @@ static auto TRTORCH_UNUSED TRTEngineTSRegistrtion = torch::class_("te return c10::make_intrusive(std::move(seralized_engine)); } ); - -} // namespace execution +} // namespace +} // namespace runtime } // namespace core } // namespace trtorch diff --git a/core/execution/register_trt_op.cpp b/core/runtime/register_trt_op.cpp similarity index 97% rename from core/execution/register_trt_op.cpp rename to core/runtime/register_trt_op.cpp index f16f106350..75d34c701e 100644 --- a/core/execution/register_trt_op.cpp +++ b/core/runtime/register_trt_op.cpp @@ -4,11 +4,11 @@ #include "torch/csrc/jit/runtime/custom_operator.h" #include "core/util/prelude.h" -#include "core/execution/execution.h" +#include "core/runtime/runtime.h" namespace trtorch { namespace core { -namespace execution { +namespace runtime { std::vector execute_engine(std::vector inputs, c10::intrusive_ptr compiled_engine) { LOG_DEBUG("Attempting to run engine (ID: " << compiled_engine->name << ")"); diff --git a/core/execution/execution.h b/core/runtime/runtime.h similarity index 95% rename from core/execution/execution.h rename to core/runtime/runtime.h index 3f61160e06..ef7670412d 100644 --- a/core/execution/execution.h +++ b/core/runtime/runtime.h @@ -8,7 +8,7 @@ namespace trtorch { namespace core { -namespace execution { +namespace runtime { using EngineID = int64_t; @@ -35,6 +35,6 @@ struct TRTEngine : torch::CustomClassHolder { std::vector execute_engine(std::vector inputs, c10::intrusive_ptr compiled_engine); -} // namespace execution +} // namespace runtime } // namespace core } // namespace trtorch diff --git a/docsrc/contributors/phases.rst b/docsrc/contributors/phases.rst index c78f50efe2..09f7f7d690 100644 --- a/docsrc/contributors/phases.rst +++ b/docsrc/contributors/phases.rst @@ -27,12 +27,11 @@ The conversion phase is made up of three main components, a context to manage co a evaluator library which will execute operations that can be resolved at compile time and a converter library which maps an op from JIT to TensorRT. -Execution -^^^^^^^^^^^ +Compilation and Runtime +^^^^^^^^^^^^^^^^^^^^^^^^ :ref:`execution` -The execution phase constructs a TorchScript program to run the converted TensorRT engine. It +The final compilation phase constructs a TorchScript program to run the converted TensorRT engine. It takes a serialized engine and instantiates it within a engine manager, then the compiler will build out a JIT graph that references this engine and wraps it in a module to return to the user. -When the user executes the module, the JIT program will look up the engine and pass the inputs -to it, then return the results. \ No newline at end of file +When the user executes the module, the JIT program run in the JIT runtime extended by TRTorch with the data providied from the user. \ No newline at end of file diff --git a/docsrc/contributors/execution.rst b/docsrc/contributors/runtime.rst similarity index 96% rename from docsrc/contributors/execution.rst rename to docsrc/contributors/runtime.rst index 0e08650fb8..ba64a9066d 100644 --- a/docsrc/contributors/execution.rst +++ b/docsrc/contributors/runtime.rst @@ -1,9 +1,9 @@ .. _execution: -Execution Phase +Runtime Phase ================ -The execution phase is responsible for constructing self standing TorchScript graphs with embedded TensorRT engines and serving as the runtime +The Runtime phase is responsible for constructing self standing TorchScript graphs with embedded TensorRT engines and serving as the runtime when these engines are called. The main interface accepts a serialized TensorRT engine. The execution phase will deserialize and wrap this engine in a class which maintains a execution context for each engine and some metadata about its inputs and outputs and is compatable with the TorchScript interpreter so that diff --git a/docsrc/index.rst b/docsrc/index.rst index f65a6a62be..2db21d6e2d 100644 --- a/docsrc/index.rst +++ b/docsrc/index.rst @@ -38,10 +38,6 @@ Getting Started tutorials/trtorchc _notebooks/lenet -Notebooks ------------- -* :ref:`lenet` - .. toctree:: :caption: Notebooks :maxdepth: 1 diff --git a/tests/util/run_graph_engine.cpp b/tests/util/run_graph_engine.cpp index 49259bfd51..c052812e3c 100644 --- a/tests/util/run_graph_engine.cpp +++ b/tests/util/run_graph_engine.cpp @@ -5,7 +5,7 @@ #include "torch/csrc/jit/ir/irparser.h" #include "torch/custom_class.h" #include "core/conversion/conversion.h" -#include "core/execution/execution.h" +#include "core/runtime/runtime.h" #include "cuda_runtime_api.h" #include @@ -43,8 +43,8 @@ std::vector toInputRangesDynamic(std::vector RunEngine(std::string& eng, std::vector inputs) { LOG_DEBUG("Running TRT version"); - auto engine_ptr = c10::make_intrusive("test_engine", eng); - auto outputs = trtorch::core::execution::execute_engine(inputs, engine_ptr); + auto engine_ptr = c10::make_intrusive("test_engine", eng); + auto outputs = trtorch::core::runtime::execute_engine(inputs, engine_ptr); return outputs; } From 59113cfa52a46a5ea0d889bc88e467d0d3350f71 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Wed, 21 Oct 2020 15:22:54 -0700 Subject: [PATCH 3/6] feat(//py): Initial compiliant implementation of the to_backend api for PyTorch Users can now use a direct PyTorch integration by just importing the trtorch package. The only difference between torch._C._jit_to_tensorrt and trtorch.compile is that you need to use the trtorch.TensorRTCompileSpec constructor to build a wrapper around your spec dictionary Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- py/setup.py | 8 +- py/trtorch/__init__.py | 1 + py/trtorch/_compile_spec.py | 93 ++++++++++-- py/trtorch/_compiler.py | 4 +- py/trtorch/csrc/register_tensorrt_classes.cpp | 47 ++++++ py/trtorch/csrc/tensorrt_backend.cpp | 86 +++++++++++ py/trtorch/csrc/tensorrt_backend.h | 19 +++ py/trtorch/csrc/tensorrt_classes.cpp | 143 ++++++++++++++++++ py/trtorch/csrc/tensorrt_classes.h | 101 +++++++++++++ py/trtorch/csrc/trtorch_py.cpp | 106 +------------ tests/BUILD | 3 +- tests/py/BUILD | 16 +- tests/py/model_test_case.py | 19 +++ tests/py/test_api.py | 16 +- tests/py/test_to_backend_api.py | 44 ++++++ 15 files changed, 573 insertions(+), 133 deletions(-) create mode 100644 py/trtorch/csrc/register_tensorrt_classes.cpp create mode 100644 py/trtorch/csrc/tensorrt_backend.cpp create mode 100644 py/trtorch/csrc/tensorrt_backend.h create mode 100644 py/trtorch/csrc/tensorrt_classes.cpp create mode 100644 py/trtorch/csrc/tensorrt_classes.h create mode 100644 tests/py/model_test_case.py create mode 100644 tests/py/test_to_backend_api.py diff --git a/py/setup.py b/py/setup.py index 53f85dada1..01dfdfdfb7 100644 --- a/py/setup.py +++ b/py/setup.py @@ -156,7 +156,12 @@ def run(self): ext_modules = [ cpp_extension.CUDAExtension('trtorch._C', - ['trtorch/csrc/trtorch_py.cpp'], + [ + 'trtorch/csrc/trtorch_py.cpp', + 'trtorch/csrc/tensorrt_backend.cpp', + 'trtorch/csrc/tensorrt_classes.cpp', + 'trtorch/csrc/register_tensorrt_classes.cpp', + ], library_dirs=[ (dir_path + '/trtorch/lib/'), "/opt/conda/lib/python3.6/config-3.6m-x86_64-linux-gnu" @@ -165,6 +170,7 @@ def run(self): "trtorch" ], include_dirs=[ + dir_path + "trtorch/csrc", dir_path + "/../", dir_path + "/../bazel-TRTorch/external/tensorrt/include", ], diff --git a/py/trtorch/__init__.py b/py/trtorch/__init__.py index 88e1ca6db9..772b6ff08f 100644 --- a/py/trtorch/__init__.py +++ b/py/trtorch/__init__.py @@ -9,6 +9,7 @@ from trtorch._version import __version__ from trtorch._compiler import * +from trtorch._compile_spec import TensorRTCompileSpec from trtorch._types import * from trtorch import logging diff --git a/py/trtorch/_compile_spec.py b/py/trtorch/_compile_spec.py index aa060bd085..6f0ff49d4a 100644 --- a/py/trtorch/_compile_spec.py +++ b/py/trtorch/_compile_spec.py @@ -73,16 +73,21 @@ def _parse_op_precision(precision: Any) -> _types.dtype: def _parse_device_type(device: Any) -> _types.DeviceType: if isinstance(device, torch.device): - if torch.device.type == 'cuda': + if device.type == 'cuda': return _types.DeviceType.gpu else: - raise TypeError("Valid device choices are GPU (and DLA if on Jetson platforms) however got device type" + str(device.type)) - + ValueError("Got a device type other than GPU or DLA (type: " + str(device.type) + ")") elif isinstance(device, _types.DeviceType): return device - + elif isinstance(device, str): + if device == "gpu" or device == "GPU": + return _types.DeviceType.gpu + elif device == "dla" or device == "DLA": + return _types.DeviceType.dla + else: + ValueError("Got a device type other than GPU or DLA (type: " + str(device) + ")") else: - raise TypeError("Device specification must be of type torch.device or trtorch.DeviceType, but got: " + str(type(device))) + raise TypeError("Device specification must be of type torch.device, string or trtorch.DeviceType, but got: " + str(type(device))) def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec: info = trtorch._C.CompileSpec() @@ -110,11 +115,11 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec: assert isinstance(compile_spec["allow_gpu_fallback"], bool) info.allow_gpu_fallback = compile_spec["allow_gpu_fallback"] - if "device" in compile_spec: - info.device = _parse_device_type(compile_spec["device"]) + if "device_type" in compile_spec: + info.device = _parse_device_type(compile_spec["device_type"]) if "capability" in compile_spec: - assert isinstance(compile_spec["capability"], type.EngineCapability) + assert isinstance(compile_spec["capability"], _types.EngineCapability) info.capability = compile_spec["capability"] if "num_min_timing_iters" in compile_spec: @@ -133,4 +138,74 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec: assert type(compile_spec["max_batch_size"]) is int info.max_batch_size = compile_spec["max_batch_size"] - return info \ No newline at end of file + return info + +def TensorRTCompileSpec(compile_spec: Dict[str, Any]): + """ + Utility to create a formated spec dictionary for using the PyTorch TensorRT backend + + Args: + compile_spec (dict): Compilation settings including operating precision, target device, etc. + One key is required which is ``input_shapes``, describing the input sizes or ranges for inputs + to the graph. All other keys are optional. Entries for each method to be compiled. + + .. code-block:: py + + CompileSpec = { + "forward" : trtorch.TensorRTCompileSpec({ + "input_shapes": [ + (1, 3, 224, 224), # Static input shape for input #1 + { + "min": (1, 3, 224, 224), + "opt": (1, 3, 512, 512), + "max": (1, 3, 1024, 1024) + } # Dynamic input shape for input #2 + ], + "op_precision": torch.half, # Operating precision set to FP16 + "refit": false, # enable refit + "debug": false, # enable debuggable engine + "strict_types": false, # kernels should strictly run in operating precision + "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU + "device": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA) + "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels + "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels + "num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels + "workspace_size": 0, # Maximum size of workspace given to TensorRT + "max_batch_size": 0, # Maximum batch size (must be >= 1 to be set, 0 means not set) + }) + } + + Input Sizes can be specified as torch sizes, tuples or lists. Op precisions can be specified using + torch datatypes or trtorch datatypes and you can use either torch devices or the trtorch device type enum + to select device type. + + Returns: + torch.classes.tensorrt.CompileSpec: List of methods and formated spec objects to be provided to ``torch._C._jit_to_tensorrt`` + """ + + parsed_spec = _parse_compile_spec(compile_spec) + + backend_spec = torch.classes.tensorrt.CompileSpec() + + for i in parsed_spec.input_ranges: + ir = torch.classes.tensorrt.InputRange() + ir.set_min(i.min) + ir.set_opt(i.opt) + ir.set_max(i.max) + backend_spec.append_input_range(ir) + + backend_spec.set_op_precision(int(parsed_spec.op_precision)) + backend_spec.set_refit(parsed_spec.refit) + backend_spec.set_debug(parsed_spec.debug) + backend_spec.set_refit(parsed_spec.refit) + backend_spec.set_strict_types(parsed_spec.strict_types) + backend_spec.set_allow_gpu_fallback(parsed_spec.allow_gpu_fallback) + backend_spec.set_device(int(parsed_spec.device)) + backend_spec.set_capability(int(parsed_spec.capability)) + backend_spec.set_num_min_timing_iters(parsed_spec.num_min_timing_iters) + backend_spec.set_num_avg_timing_iters(parsed_spec.num_avg_timing_iters) + backend_spec.set_workspace_size(parsed_spec.workspace_size) + backend_spec.set_max_batch_size(parsed_spec.max_batch_size) + + return backend_spec + diff --git a/py/trtorch/_compiler.py b/py/trtorch/_compiler.py index 1c35dbe4a1..443db12a7b 100644 --- a/py/trtorch/_compiler.py +++ b/py/trtorch/_compiler.py @@ -39,7 +39,7 @@ def compile(module: torch.jit.ScriptModule, compile_spec: Any) -> torch.jit.Scri "debug": false, # enable debuggable engine "strict_types": false, # kernels should strictly run in operating precision "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU - "device": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA) + "device_type": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA) "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels "num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels @@ -91,7 +91,7 @@ def convert_method_to_trt_engine(module: torch.jit.ScriptModule, method_name: st "debug": false, # enable debuggable engine "strict_types": false, # kernels should strictly run in operating precision "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU - "device": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA) + "device_type": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA) "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels "num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels diff --git a/py/trtorch/csrc/register_tensorrt_classes.cpp b/py/trtorch/csrc/register_tensorrt_classes.cpp new file mode 100644 index 0000000000..7d66ca6580 --- /dev/null +++ b/py/trtorch/csrc/register_tensorrt_classes.cpp @@ -0,0 +1,47 @@ +#include "tensorrt_classes.h" + +namespace trtorch { +namespace backend { +namespace { + void RegisterTRTCompileSpec() { + #define ADD_FIELD_GET_SET_REGISTRATION(registry, class_name, field_name) \ + (registry).def("set_"#field_name, &class_name::set_##field_name); \ + (registry).def("get_"#field_name, &class_name::get_##field_name); + + static auto TRTORCH_UNUSED TRTInputRangeTSRegistrtion = torch::class_("tensorrt", "InputRange") + .def(torch::init<>()); + + ADD_FIELD_GET_SET_REGISTRATION(TRTInputRangeTSRegistrtion, trtorch::pyapi::InputRange, min); + ADD_FIELD_GET_SET_REGISTRATION(TRTInputRangeTSRegistrtion, trtorch::pyapi::InputRange, opt); + ADD_FIELD_GET_SET_REGISTRATION(TRTInputRangeTSRegistrtion, trtorch::pyapi::InputRange, max); + + static auto TRTORCH_UNUSED TRTCompileSpecTSRegistrtion = torch::class_("tensorrt", "CompileSpec") + .def(torch::init<>()) + .def("append_input_range", &trtorch::pyapi::CompileSpec::appendInputRange) + .def("__str__", &trtorch::pyapi::CompileSpec::stringify); + + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, op_precision); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, refit); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, debug); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, strict_types); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, allow_gpu_fallback); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, device); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, capability); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, num_min_timing_iters); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, num_avg_timing_iters); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, workspace_size); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, max_batch_size); + } + +struct TRTTSRegistrations { + TRTTSRegistrations() { + RegisterTRTCompileSpec(); + } +}; + +static TRTTSRegistrations register_trt_classes = TRTTSRegistrations(); +} +} // namespace backend +} // namespace trtorch + + diff --git a/py/trtorch/csrc/tensorrt_backend.cpp b/py/trtorch/csrc/tensorrt_backend.cpp new file mode 100644 index 0000000000..1d679450c6 --- /dev/null +++ b/py/trtorch/csrc/tensorrt_backend.cpp @@ -0,0 +1,86 @@ +#include "torch/csrc/jit/passes/lower_graph.h" + +#include "tensorrt_backend.h" +#include "tensorrt_classes.h" + +#include "core/compiler.h" +#include "core/lowering/lowering.h" +#include "core/runtime/runtime.h" + +namespace trtorch { +namespace backend { + +c10::IValue TensorRTBackend::preprocess(c10::IValue mod, c10::impl::GenericDict method_compile_spec) { + auto mod_ = mod.toModule(); + LOG_DEBUG("Placing module in eval mode if not already"); + mod_.eval(); + mod_ = core::lowering::LowerModule(mod_); + + auto spec = + c10::impl::toTypedDict(method_compile_spec); + + for (auto it = spec.begin(), end = spec.end(); it != end; ++it) { + TRTORCH_CHECK(core::CheckMethodOperatorSupport(mod.toModule(), it->key()), + "Method " << it->key() << "cannot be compiled by TRTorch"); + } + + for (auto it = spec.begin(), end = spec.end(); it != end; ++it) { + const auto& method_name = it->key(); + auto method = mod_.get_method(method_name); + auto graph = method.graph(); + core::lowering::LowerGraph(graph); + } + + return mod_._ivalue(); +} + +c10::impl::GenericDict TensorRTBackend::compile(c10::IValue processed_mod, c10::impl::GenericDict method_compile_spec) { + auto mod = processed_mod.toModule(); + auto spec = + c10::impl::toTypedDict(method_compile_spec); + + auto handles = c10::impl::GenericDict(c10::StringType::get(), c10::getCustomClassType>()); + + for (auto it = spec.begin(), end = spec.end(); it != end; ++it) { + const auto& method_name = it->key(); + auto method = mod.get_method(method_name); + auto g = method.graph(); + + auto raw_spec = it->value().toGenericDict().at(it->key()).toCustomClass(); + LOG_DEBUG(raw_spec->stringify()); + auto cfg = raw_spec->toInternalCompileSpec(); + auto convert_cfg = std::move(cfg.convert_info); + auto graph_and_ivalues = torch::jit::LowerGraph(*g, mod._ivalue()); + + g = graph_and_ivalues.first; + auto params = graph_and_ivalues.second; + auto named_params = core::conversion::get_named_params(g->inputs(), params); + + auto serialized_engine = core::conversion::ConvertBlockToEngine(g->block(), convert_cfg, named_params); + auto engine_handle = c10::make_intrusive(it->key(), serialized_engine); + handles.insert(method.name(), at::IValue(engine_handle)); + } + + return c10::impl::toGenericDict(handles); +} + + +c10::impl::GenericList TensorRTBackend::execute(c10::IValue handle, c10::impl::GenericList inputs) { + TRTORCH_ASSERT(inputs.size() > 0, "Trying to execute on empty list of arguments"); + auto engine = handle.toCustomClass(); + std::vector in_vec; + for (size_t i = 0, e = inputs.size(); i < e; ++i) { + c10::IValue val = inputs[i]; + TRTORCH_CHECK(val.isTensor(), "TensorRT currently only accepts Tensors as inputs"); + in_vec.push_back(val.toTensor()); + } + auto outputs = core::runtime::execute_engine(in_vec, engine); + return c10::impl::toList(c10::List(outputs)); +} + +namespace { +static auto reg = torch::jit::backend("tensorrt"); +} + +} // namespace backend +} // namespace trtorch \ No newline at end of file diff --git a/py/trtorch/csrc/tensorrt_backend.h b/py/trtorch/csrc/tensorrt_backend.h new file mode 100644 index 0000000000..6150604b3e --- /dev/null +++ b/py/trtorch/csrc/tensorrt_backend.h @@ -0,0 +1,19 @@ +#pragma once +#include "torch/csrc/jit/api/module.h" +#include "torch/csrc/jit/backends/backend.h" + +namespace trtorch { +namespace backend { + +class TensorRTBackend: public torch::jit::PyTorchBackendInterface { + public: + explicit TensorRTBackend() {} + virtual ~TensorRTBackend() = default; + + c10::IValue preprocess(c10::IValue mod, c10::impl::GenericDict method_compile_spec) override; + c10::impl::GenericDict compile(c10::IValue processed_mod, c10::impl::GenericDict method_compile_spec) override; + c10::impl::GenericList execute(c10::IValue handle, c10::impl::GenericList inputs) override; +}; + +} // namespace backend +} // namespace trtorch \ No newline at end of file diff --git a/py/trtorch/csrc/tensorrt_classes.cpp b/py/trtorch/csrc/tensorrt_classes.cpp new file mode 100644 index 0000000000..43e63d553b --- /dev/null +++ b/py/trtorch/csrc/tensorrt_classes.cpp @@ -0,0 +1,143 @@ + +#include "tensorrt_classes.h" + +namespace trtorch { +namespace pyapi { + +std::string to_str(InputRange& value) { + auto vec_to_str = [](std::vector shape) -> std::string { + std::stringstream ss; + ss << '['; + for(auto i : shape) { + ss << i << ','; + } + ss << ']'; + return ss.str(); + }; + + std::stringstream ss; + ss << " {" << std::endl; + ss << " min: " << vec_to_str(value.min) << ',' << std::endl; + ss << " opt: " << vec_to_str(value.opt) << ',' << std::endl; + ss << " max: " << vec_to_str(value.max) << ',' << std::endl; + ss << " }" << std::endl; + return ss.str(); +} + +std::string to_str(DataType value) { + switch (value) { + case DataType::kHalf: + return "Half"; + case DataType::kChar: + return "Int8"; + case DataType::kFloat: + default: + return "Float"; + } +} + +nvinfer1::DataType toTRTDataType(DataType value) { + switch (value) { + case DataType::kChar: + return nvinfer1::DataType::kINT8; + case DataType::kHalf: + return nvinfer1::DataType::kHALF; + case DataType::kFloat: + default: + return nvinfer1::DataType::kFLOAT; + } +} + +std::string to_str(DeviceType value) { + switch (value) { + case DeviceType::kDLA: + return "DLA"; + case DeviceType::kGPU: + default: + return "GPU"; + } +} + +nvinfer1::DeviceType toTRTDeviceType(DeviceType value) { + switch (value) { + case DeviceType::kDLA: + return nvinfer1::DeviceType::kDLA; + case DeviceType::kGPU: + default: + return nvinfer1::DeviceType::kGPU; + } +} + +std::string to_str(EngineCapability value) { + switch (value) { + case EngineCapability::kSAFE_GPU: + return "Safe GPU"; + case EngineCapability::kSAFE_DLA: + return "Safe DLA"; + case EngineCapability::kDEFAULT: + default: + return "Default"; + } +} + +nvinfer1::EngineCapability toTRTEngineCapability(EngineCapability value) { + switch (value) { + case EngineCapability::kSAFE_DLA: + return nvinfer1::EngineCapability::kSAFE_DLA; + case EngineCapability::kSAFE_GPU: + return nvinfer1::EngineCapability::kSAFE_GPU; + case EngineCapability::kDEFAULT: + default: + return nvinfer1::EngineCapability::kDEFAULT; + } +} + +core::CompileSpec CompileSpec::toInternalCompileSpec() { + std::vector internal_input_ranges; + for (auto i : input_ranges) { + internal_input_ranges.push_back(i.toInternalInputRange()); + } + auto info = core::CompileSpec(internal_input_ranges); + info.convert_info.engine_settings.op_precision = toTRTDataType(op_precision); + info.convert_info.engine_settings.refit = refit; + info.convert_info.engine_settings.debug = debug; + info.convert_info.engine_settings.strict_types = strict_types; + info.convert_info.engine_settings.allow_gpu_fallback = allow_gpu_fallback; + info.convert_info.engine_settings.device = toTRTDeviceType(device); + info.convert_info.engine_settings.capability = toTRTEngineCapability(capability); + TRTORCH_CHECK(num_min_timing_iters >= 0, "num_min_timing_iters must be 0 or greater"); + info.convert_info.engine_settings.num_min_timing_iters = num_min_timing_iters; + TRTORCH_CHECK(num_avg_timing_iters >= 0, "num_avg_timing_iters must be 0 or greater"); + info.convert_info.engine_settings.num_avg_timing_iters = num_avg_timing_iters; + TRTORCH_CHECK(workspace_size >= 0, "workspace_size must be 0 or greater"); + info.convert_info.engine_settings.workspace_size = workspace_size; + TRTORCH_CHECK(max_batch_size >= 0, "max_batch_size must be 0 or greater"); + info.convert_info.engine_settings.max_batch_size = max_batch_size; + return info; +} + +std::string CompileSpec::stringify() { + std::stringstream ss; + ss << "TensorRT Compile Spec: {" << std::endl; + ss << " \"Input Shapes\": [" << std::endl; + for (auto i : input_ranges) { + ss << to_str(i); + } + ss << " ]" << std::endl; + ss << " \"Op Precision\": " << to_str(op_precision) << std::endl; + ss << " \"Refit\": " << refit << std::endl; + ss << " \"Debug\": " << debug << std::endl; + ss << " \"Strict Types\": " << strict_types << std::endl; + ss << " \"Allow GPU Fallback\": " << allow_gpu_fallback << std::endl; + ss << " \"Device\": " << to_str(capability) << std::endl; + ss << " \"Engine Capability\": " << to_str(capability) << std::endl; + ss << " \"Num Min Timing Iters\": " << num_min_timing_iters << std::endl; + ss << " \"Num Avg Timing Iters\": " << num_avg_timing_iters << std::endl; + ss << " \"Workspace Size\": " << workspace_size << std::endl; + ss << " \"Max Batch Size\": " << max_batch_size << std::endl; + ss << "}"; + return ss.str(); +} + +} // namespace pyapi +} // namespace trtorch \ No newline at end of file diff --git a/py/trtorch/csrc/tensorrt_classes.h b/py/trtorch/csrc/tensorrt_classes.h new file mode 100644 index 0000000000..e98a093358 --- /dev/null +++ b/py/trtorch/csrc/tensorrt_classes.h @@ -0,0 +1,101 @@ +#pragma once + +#include "core/compiler.h" +#include "core/conversion/conversion.h" +#include "torch/torch.h" +#include "torch/script.h" +#include "torch/custom_class.h" + +namespace trtorch { +namespace pyapi { + +#define ADD_FIELD_GET_SET(field_name, type) \ + void set_##field_name(type val) {field_name = val;} \ + type get_##field_name() {return field_name;} + +struct InputRange : torch::CustomClassHolder { + std::vector min; + std::vector opt; + std::vector max; + + core::conversion::InputRange toInternalInputRange() { + return core::conversion::InputRange(min, opt, max); + } + + ADD_FIELD_GET_SET(min, std::vector); + ADD_FIELD_GET_SET(opt, std::vector); + ADD_FIELD_GET_SET(max, std::vector); +}; + +std::string to_str(InputRange& value); + + +enum class DataType : int8_t { + kFloat, + kHalf, + kChar, +}; + +std::string to_str(DataType value); +nvinfer1::DataType toTRTDataType(DataType value); + +enum DeviceType : int8_t { + kGPU, + kDLA, +}; + +std::string to_str(DeviceType value); +nvinfer1::DeviceType toTRTDeviceType(DeviceType value); + +enum class EngineCapability : int8_t { + kDEFAULT, + kSAFE_GPU, + kSAFE_DLA, +}; + +std::string to_str(EngineCapability value); +nvinfer1::EngineCapability toTRTEngineCapability(EngineCapability value); + +// TODO: Make this error message more informative +#define ADD_ENUM_GET_SET(field_name, type, max_val) \ + void set_##field_name(int64_t val) { \ + TRTORCH_CHECK(val < max_val, "Invalid enum value for field"); \ + field_name = static_cast(val); \ + } \ + int64_t get_##field_name() {return static_cast(field_name);} + +struct CompileSpec : torch::CustomClassHolder { + core::CompileSpec toInternalCompileSpec(); + std::string stringify(); + void appendInputRange(const c10::intrusive_ptr& ir) { + input_ranges.push_back(*ir); + } + + ADD_ENUM_GET_SET(op_precision, DataType, 3); + ADD_FIELD_GET_SET(refit, bool); + ADD_FIELD_GET_SET(debug, bool); + ADD_FIELD_GET_SET(strict_types, bool); + ADD_FIELD_GET_SET(allow_gpu_fallback, bool); + ADD_ENUM_GET_SET(device, DeviceType, 2); + ADD_ENUM_GET_SET(capability, EngineCapability, 3); + ADD_FIELD_GET_SET(num_min_timing_iters, int64_t); + ADD_FIELD_GET_SET(num_avg_timing_iters, int64_t); + ADD_FIELD_GET_SET(workspace_size, int64_t); + ADD_FIELD_GET_SET(max_batch_size, int64_t); + + std::vector input_ranges; + DataType op_precision = DataType::kFloat; + bool refit = false; + bool debug = false; + bool strict_types = false; + bool allow_gpu_fallback = true; + DeviceType device = DeviceType::kGPU; + EngineCapability capability = EngineCapability::kDEFAULT; + int64_t num_min_timing_iters = 2; + int64_t num_avg_timing_iters = 1; + int64_t workspace_size = 0; + int64_t max_batch_size = 0; +}; + +} // namespace pyapi +} // namespace trtorch \ No newline at end of file diff --git a/py/trtorch/csrc/trtorch_py.cpp b/py/trtorch/csrc/trtorch_py.cpp index da6d2b2688..4f9363542d 100644 --- a/py/trtorch/csrc/trtorch_py.cpp +++ b/py/trtorch/csrc/trtorch_py.cpp @@ -1,11 +1,12 @@ #include "pybind11/pybind11.h" #include "pybind11/stl.h" -//TODO: Remove when we have access to PyTorch to_backend autoregistration -#include "core/backend.h" + +#include "tensorrt_classes.h" #include "core/compiler.h" #include "core/conversion/conversion.h" #include "torch/torch.h" #include "torch/script.h" +#include "torch/custom_class.h" #include "torch/csrc/jit/python/pybind_utils.h" #include "Python.h" @@ -14,103 +15,6 @@ namespace py = pybind11; namespace trtorch { namespace pyapi { -struct InputRange { - std::vector min; - std::vector opt; - std::vector max; - - core::conversion::InputRange toInternalInputRange() { - return core::conversion::InputRange(min, opt, max); - } -}; - -enum class DataType : int8_t { - kFloat, - kHalf, - kChar, -}; - -nvinfer1::DataType toTRTDataType(DataType value) { - switch (value) { - case DataType::kChar: - return nvinfer1::DataType::kINT8; - case DataType::kHalf: - return nvinfer1::DataType::kHALF; - case DataType::kFloat: - default: - return nvinfer1::DataType::kFLOAT; - } -} - -enum DeviceType : int8_t { - kGPU, - kDLA, -}; - -nvinfer1::DeviceType toTRTDeviceType(DeviceType value) { - switch (value) { - case DeviceType::kDLA: - return nvinfer1::DeviceType::kDLA; - case DeviceType::kGPU: - default: - return nvinfer1::DeviceType::kGPU; - } -} - -enum class EngineCapability : int8_t { - kDEFAULT, - kSAFE_GPU, - kSAFE_DLA, -}; - -nvinfer1::EngineCapability toTRTEngineCapability(EngineCapability value) { - switch (value) { - case EngineCapability::kSAFE_DLA: - return nvinfer1::EngineCapability::kSAFE_DLA; - case EngineCapability::kSAFE_GPU: - return nvinfer1::EngineCapability::kSAFE_GPU; - case EngineCapability::kDEFAULT: - default: - return nvinfer1::EngineCapability::kDEFAULT; - } -} - -struct CompileSpec { - - core::CompileSpec toInternalCompileSpec() { - for (auto i : input_ranges) { - internal_input_ranges.push_back(i.toInternalInputRange()); - } - auto info = core::CompileSpec(internal_input_ranges); - info.convert_info.engine_settings.op_precision = toTRTDataType(op_precision); - info.convert_info.engine_settings.refit = refit; - info.convert_info.engine_settings.debug = debug; - info.convert_info.engine_settings.strict_types = strict_types; - info.convert_info.engine_settings.allow_gpu_fallback = allow_gpu_fallback; - info.convert_info.engine_settings.device = toTRTDeviceType(device); - info.convert_info.engine_settings.capability = toTRTEngineCapability(capability); - info.convert_info.engine_settings.num_min_timing_iters = num_min_timing_iters; - info.convert_info.engine_settings.num_avg_timing_iters = num_avg_timing_iters; - info.convert_info.engine_settings.workspace_size = workspace_size; - info.convert_info.engine_settings.max_batch_size = max_batch_size; - return info; - } - - std::vector input_ranges; - std::vector internal_input_ranges; - DataType op_precision = DataType::kFloat; - bool refit = false; - bool debug = false; - bool strict_types = false; - bool allow_gpu_fallback = true; - DeviceType device = DeviceType::kGPU; - EngineCapability capability = EngineCapability::kDEFAULT; - uint64_t num_min_timing_iters = 2; - uint64_t num_avg_timing_iters = 1; - uint64_t workspace_size = 0; - uint64_t max_batch_size = 0; -}; - torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec& info) { py::gil_scoped_acquire gil; auto trt_mod = core::CompileGraph(mod, info.toInternalCompileSpec()); @@ -227,11 +131,7 @@ PYBIND11_MODULE(_C, m) { .value("INFO", core::util::logging::LogLevel::kINFO) .value("DEBUG", core::util::logging::LogLevel::kDEBUG) .export_values(); - - //TODO: Remove when we have access to PyTorch autoregistration - //m.def("to_tensorrt", backend::GetTensorRTBackend().generateToBackendFn()); } - } // namespace pyapi } // namespace trtorch diff --git a/tests/BUILD b/tests/BUILD index f784798a57..81a43aecbc 100644 --- a/tests/BUILD +++ b/tests/BUILD @@ -17,6 +17,7 @@ test_suite( test_suite( name = "python_api_tests", tests = [ - "//tests/py:test_api" + "//tests/py:test_api", + "//tests/py:test_to_backend_api" ] ) \ No newline at end of file diff --git a/tests/py/BUILD b/tests/py/BUILD index 054e1cbbb3..0d643d65d8 100644 --- a/tests/py/BUILD +++ b/tests/py/BUILD @@ -5,9 +5,21 @@ load("@py_test_deps//:requirements.bzl", "requirement") py_test( name = "test_api", srcs = [ - "test_api.py" + "test_api.py", + "model_test_case.py" ], deps = [ requirement("torchvision") ] -) \ No newline at end of file +) + +py_test( + name = "test_to_backend_api", + srcs = [ + "test_to_backend_api.py", + "model_test_case.py" + ], + deps = [ + requirement("torchvision") + ] +) diff --git a/tests/py/model_test_case.py b/tests/py/model_test_case.py new file mode 100644 index 0000000000..3730f6507b --- /dev/null +++ b/tests/py/model_test_case.py @@ -0,0 +1,19 @@ +import unittest +import trtorch +import torch +import torchvision.models as models + +class ModelTestCase(unittest.TestCase): + def __init__(self, methodName='runTest', model=None): + super(ModelTestCase, self).__init__(methodName) + self.model = model + self.model.eval().to("cuda") + + @staticmethod + def parametrize(testcase_class, model=None): + testloader = unittest.TestLoader() + testnames = testloader.getTestCaseNames(testcase_class) + suite = unittest.TestSuite() + for name in testnames: + suite.addTest(testcase_class(name, model=model)) + return suite \ No newline at end of file diff --git a/tests/py/test_api.py b/tests/py/test_api.py index e0cd113db6..2d9d2d1e56 100644 --- a/tests/py/test_api.py +++ b/tests/py/test_api.py @@ -3,21 +3,7 @@ import torch import torchvision.models as models - -class ModelTestCase(unittest.TestCase): - def __init__(self, methodName='runTest', model=None): - super(ModelTestCase, self).__init__(methodName) - self.model = model - self.model.eval().to("cuda") - - @staticmethod - def parametrize(testcase_class, model=None): - testloader = unittest.TestLoader() - testnames = testloader.getTestCaseNames(testcase_class) - suite = unittest.TestSuite() - for name in testnames: - suite.addTest(testcase_class(name, model=model)) - return suite +from model_test_case import ModelTestCase class TestCompile(ModelTestCase): def setUp(self): diff --git a/tests/py/test_to_backend_api.py b/tests/py/test_to_backend_api.py new file mode 100644 index 0000000000..e643aa6ce2 --- /dev/null +++ b/tests/py/test_to_backend_api.py @@ -0,0 +1,44 @@ +import unittest +import trtorch +import torch +import torchvision.models as models + +from model_test_case import ModelTestCase + +class TestToBackendLowering(ModelTestCase): + def setUp(self): + self.input = torch.randn((1, 3, 300, 300)).to("cuda") + self.scripted_model = torch.jit.script(self.model) + self.spec = { + "forward": trtorch.TensorRTCompileSpec({ + "input_shapes": [[1, 3, 300, 300]], + "op_precision": torch.float, + "refit": False, + "debug": False, + "strict_types": False, + "allow_gpu_fallback": True, + "device_type": "gpu", + "capability": trtorch.EngineCapability.default, + "num_min_timing_iters": 2, + "num_avg_timing_iters": 1, + "max_batch_size": 0, + }) + } + + def test_to_backend_lowering(self): + trt_mod = torch._C._jit_to_tensorrt(self.scripted_model._c, {"forward": self.spec}) + same = (trt_mod.forward(self.input) - self.scripted_model(self.input)).abs().max() + self.assertTrue(same < 2e-3) + +def test_suite(): + suite = unittest.TestSuite() + suite.addTest(TestToBackendLowering.parametrize(TestToBackendLowering, model=models.mobilenet_v2(pretrained=True))) + + return suite + +suite = test_suite() + +runner = unittest.TextTestRunner() +result = runner.run(suite) + +exit(int(not result.wasSuccessful())) \ No newline at end of file From a720f918aafe19d09461a53ce18bad07ff612eb3 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Wed, 21 Oct 2020 15:33:52 -0700 Subject: [PATCH 4/6] refactor: A couple more renames Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- .github/pr-labels.yml | 6 +++--- core/runtime/register_trt_op.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/pr-labels.yml b/.github/pr-labels.yml index 32730e9d48..b71be34bd5 100644 --- a/.github/pr-labels.yml +++ b/.github/pr-labels.yml @@ -16,8 +16,8 @@ "component: evaluators": - core/conversion/evaluators/**/* -"component: execution": - - core/execution/**/* +"component: runtime": + - core/runtime/**/* "component: lowering": - core/lowering/**/* @@ -32,4 +32,4 @@ "documentation": - docs/**/* - docsrc/**/* - + diff --git a/core/runtime/register_trt_op.cpp b/core/runtime/register_trt_op.cpp index 75d34c701e..22e412dc42 100644 --- a/core/runtime/register_trt_op.cpp +++ b/core/runtime/register_trt_op.cpp @@ -30,7 +30,7 @@ std::vector execute_engine(std::vector inputs, c10::intr gpu_handles.push_back(contig_inputs.back().data_ptr()); } - TRTORCH_CHECK(compiled_engine->exec_ctx->allInputDimensionsSpecified(), "Not enough inputs provided (execution.RunCudaEngine)"); + TRTORCH_CHECK(compiled_engine->exec_ctx->allInputDimensionsSpecified(), "Not enough inputs provided (runtime.RunCudaEngine)"); std::vector outputs(compiled_engine->num_io.second); for (size_t o = inputs.size(); o < (compiled_engine->num_io.first + compiled_engine->num_io.second); o++) { @@ -53,6 +53,6 @@ TORCH_LIBRARY(tensorrt, m) { m.def("execute_engine", execute_engine); } -} // namespace execution +} // namespace runtime } // namespace core } // namespace trtorch From 7e4b07c715181c1868122e42c28fc6f56fa261f0 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Wed, 21 Oct 2020 16:14:32 -0700 Subject: [PATCH 5/6] docs: New documentation on the to_backend api integration Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- docsrc/index.rst | 2 + docsrc/py_api/trtorch.rst | 4 +- docsrc/tutorials/use_from_pytorch.rst | 62 +++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 docsrc/tutorials/use_from_pytorch.rst diff --git a/docsrc/index.rst b/docsrc/index.rst index 2db21d6e2d..f322d14114 100644 --- a/docsrc/index.rst +++ b/docsrc/index.rst @@ -25,6 +25,7 @@ Getting Started * :ref:`getting_started` * :ref:`ptq` * :ref:`trtorchc` +* :ref:`use_from_pytorch` .. toctree:: @@ -36,6 +37,7 @@ Getting Started tutorials/getting_started tutorials/ptq tutorials/trtorchc + tutorials/use_from_pytorch _notebooks/lenet .. toctree:: diff --git a/docsrc/py_api/trtorch.rst b/docsrc/py_api/trtorch.rst index 3f34ca9617..d7376cb2f0 100644 --- a/docsrc/py_api/trtorch.rst +++ b/docsrc/py_api/trtorch.rst @@ -17,9 +17,11 @@ Functions .. autofunction:: check_method_op_support +.. autofunction:: get_build_info + .. autofunction:: dump_build_info -.. autofunction:: get_build_info +.. autofunction:: TensorRTCompileSpec Enums ------- diff --git a/docsrc/tutorials/use_from_pytorch.rst b/docsrc/tutorials/use_from_pytorch.rst new file mode 100644 index 0000000000..322efd29a9 --- /dev/null +++ b/docsrc/tutorials/use_from_pytorch.rst @@ -0,0 +1,62 @@ +.. _use_from_pytorch: + +Using TRTorch Directly From PyTorch +==================================== + +Starting in TRTorch 0.1.0, you will now be able to directly access TensorRT from PyTorch APIs. The process to use this feature +is very similar to the compilation workflow described in :ref:`getting_started` + +Start by loading ``trtorch`` into your application. + +.. code-block:: python + + import torch + import trtorch + + +Then given a TorchScript module, you can lower it to TensorRT using the ``torch._C._jit_to_tensorrt`` API. + +.. code-block:: python + + import torchvision.models as models + + model = models.mobilenet_v2(pretrained=True) + script_model = torch.jit.script(model) + +Unlike the ``compile`` API in TRTorch which assumes you are trying to compile the ``forward`` function of a module +or the ``convert_method_to_trt_engine`` which converts a specified function to a TensorRT engine, the backend API +will take a dictionary which maps names of functions to compile to Compilation Spec objects which wrap the same +sort of dictionary you would provide to ``compile``. For more information on the compile spec dictionary take a look +at the documentation for the TRTorch ``TensorRTCompileSpec`` API. + +.. code-block:: python + + spec = { + "forward": trtorch.TensorRTCompileSpec({ + "input_shapes": [[1, 3, 300, 300]], + "op_precision": torch.half, + "refit": False, + "debug": False, + "strict_types": False, + "allow_gpu_fallback": True, + "device_type": "gpu", + "capability": trtorch.EngineCapability.default, + "num_min_timing_iters": 2, + "num_avg_timing_iters": 1, + "max_batch_size": 0, + }) + } + +Now to compile with TRTorch, provide the target module objects and the spec dictionary to ``torch._C._jit_to_tensorrt`` + +.. code-block:: python + + trt_model = torch._C._jit_to_tensorrt(script_model._c, spec) + +To run explicitly call the function of the method you want to run (vs. how you can just call on the module itself in standard PyTorch) + +.. code-block:: python + + input = torch.randn((1, 3, 300, 300).to("cuda").to(torch.half) + print(trt_model.forward(input)) + From d150930181dbf9e5a4cbb9c8f74401806c32533d Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Thu, 22 Oct 2020 11:00:08 -0700 Subject: [PATCH 6/6] docs(//py): Clarify docstrings in python package Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- py/trtorch/_compile_spec.py | 8 ++++---- py/trtorch/_compiler.py | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/py/trtorch/_compile_spec.py b/py/trtorch/_compile_spec.py index 6f0ff49d4a..221c0c2e5d 100644 --- a/py/trtorch/_compile_spec.py +++ b/py/trtorch/_compile_spec.py @@ -162,10 +162,10 @@ def TensorRTCompileSpec(compile_spec: Dict[str, Any]): } # Dynamic input shape for input #2 ], "op_precision": torch.half, # Operating precision set to FP16 - "refit": false, # enable refit - "debug": false, # enable debuggable engine - "strict_types": false, # kernels should strictly run in operating precision - "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU + "refit": False, # enable refit + "debug": False, # enable debuggable engine + "strict_types": False, # kernels should strictly run in operating precision + "allow_gpu_fallback": True, # (DLA only) Allow layers unsupported on DLA to run on GPU "device": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA) "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels diff --git a/py/trtorch/_compiler.py b/py/trtorch/_compiler.py index 443db12a7b..cfd9fd39a3 100644 --- a/py/trtorch/_compiler.py +++ b/py/trtorch/_compiler.py @@ -38,7 +38,7 @@ def compile(module: torch.jit.ScriptModule, compile_spec: Any) -> torch.jit.Scri "refit": false, # enable refit "debug": false, # enable debuggable engine "strict_types": false, # kernels should strictly run in operating precision - "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU + "allow_gpu_fallback": true, # (DLA only) Allow layers unsupported on DLA to run on GPU "device_type": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA) "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels @@ -87,10 +87,10 @@ def convert_method_to_trt_engine(module: torch.jit.ScriptModule, method_name: st } # Dynamic input shape for input #2 ], "op_precision": torch.half, # Operating precision set to FP16 - "refit": false, # enable refit - "debug": false, # enable debuggable engine - "strict_types": false, # kernels should strictly run in operating precision - "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU + "refit": False, # enable refit + "debug": False, # enable debuggable engine + "strict_types": False, # kernels should strictly run in operating precision + "allow_gpu_fallback": True, # (DLA only) Allow layers unsupported on DLA to run on GPU "device_type": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA) "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels