1
1
# C++ API
2
2
3
- Targets in module create the user facing C++ library for the TRTorch core.
3
+ Targets in module create the user facing C++ library for the TRTorch core.
4
4
5
5
## Building libtrtorch.so
6
6
7
- ### Debug build
7
+ ### Debug build
8
8
``` shell
9
9
bazel build //cpp/api:libtrtorch.so --compilation_mode=dbg
10
10
```
@@ -26,12 +26,19 @@ bazel build //cpp/api:libtrtorch.so --cxxopt="-DNDEBUG"
26
26
> Temporary, will get real documentation soon
27
27
28
28
``` c++
29
+ namespace trtorch {
29
30
/**
30
31
* Settings data structure for TRTorch compilation
31
32
*
32
33
* /
33
34
struct TRTORCH_API ExtraInfo {
34
- //struct TRTORCH_API InputRangesArray {
35
+ /**
36
+ * @brief A struct to hold an input range (used by TensorRT Optimization profile)
37
+ *
38
+ * This struct can either hold a single vector representing an input shape, signifying a
39
+ * static input shape or a set of three input shapes representing the min, optiminal and max
40
+ * input shapes allowed for the engine.
41
+ * /
35
42
struct TRTORCH_API InputRange {
36
43
std::vector<int64_t> min;
37
44
std::vector<int64_t> opt;
@@ -46,7 +53,7 @@ struct TRTORCH_API ExtraInfo {
46
53
* Supported Data Types that can be used with TensorRT engines
47
54
*
48
55
* This class is compatable with c10::DataTypes (but will check for TRT support)
49
- * so there should not be a reason that you need to use this type explictly.
56
+ * so there should not be a reason that you need to use this type explictly.
50
57
*/
51
58
class DataType {
52
59
public:
@@ -59,14 +66,14 @@ struct TRTORCH_API ExtraInfo {
59
66
* ex. trtorch::DataType type = DataType::kFloat;
60
67
*/
61
68
enum Value : int8_t {
62
- /// FP32
69
+ /// FP32
63
70
kFloat,
64
71
/// FP16
65
72
kHalf,
66
73
/// INT8
67
- /* kChar, char or int8? */
74
+ kChar,
68
75
};
69
-
76
+
70
77
DataType() = default;
71
78
constexpr DataType(Value t) : value(t) {}
72
79
DataType (c10::ScalarType t);
@@ -83,7 +90,7 @@ struct TRTORCH_API ExtraInfo {
83
90
*
84
91
* This class is compatable with c10::DeviceTypes (but will check for TRT support)
85
92
* but the only applicable value is at::kCUDA, which maps to DeviceType::kGPU
86
- *
93
+ *
87
94
* To use the DataType class itself, interface using the enum vs. normal instatination
88
95
*
89
96
* ex. trtorch::DeviceType type = DeviceType::kGPU;
@@ -117,7 +124,7 @@ struct TRTORCH_API ExtraInfo {
117
124
};
118
125
119
126
/**
120
- * Emum for selecting engine capability
127
+ * Emum for selecting engine capability
121
128
*/
122
129
enum class EngineCapability : int8_t {
123
130
kDEFAULT,
@@ -129,24 +136,24 @@ struct TRTORCH_API ExtraInfo {
129
136
: input_ranges(std::move(input_ranges)) {}
130
137
ExtraInfo(std::vector<std::vector<int64_t>> fixed_sizes);
131
138
ExtraInfo(std::vector<c10::ArrayRef<int64_t>> fixed_sizes);
132
-
139
+
133
140
// Defaults should reflect TensorRT defaults for BuilderConfig
134
141
135
- /**
142
+ /**
136
143
* Sizes for inputs to engine, can either be a single size or a range
137
- * defined by Min, Optimal, Max sizes
138
- *
139
- * Order is should match call order
144
+ * defined by Min, Optimal, Max sizes
145
+ *
146
+ * Order is should match call order
140
147
*/
141
148
std::vector<InputRange> input_ranges;
142
149
143
150
/**
144
- * Default operating precision for the engine
151
+ * Default operating precision for the engine
145
152
*/
146
153
DataType op_precision = DataType::kFloat;
147
-
154
+
148
155
/**
149
- * Build a refitable engine
156
+ * Build a refitable engine
150
157
*/
151
158
bool refit = false;
152
159
@@ -158,10 +165,10 @@ struct TRTORCH_API ExtraInfo {
158
165
/**
159
166
* Restrict operating type to only set default operation precision (op_precision)
160
167
*/
161
- bool strict_type = false;
168
+ bool strict_types = false;
162
169
163
170
/**
164
- * (Only used when targeting DLA (device))
171
+ * (Only used when targeting DLA (device))
165
172
* Lets engine run layers on GPU if they are not supported on DLA
166
173
*/
167
174
bool allow_gpu_fallback = true;
@@ -189,6 +196,16 @@ struct TRTORCH_API ExtraInfo {
189
196
* Maximum size of workspace given to TensorRT
190
197
*/
191
198
uint64_t workspace_size = 0;
199
+
200
+ /**
201
+ * Maximum batch size (must be =< 1 to be set, 0 means not set)
202
+ */
203
+ uint64_t max_batch_size = 0;
204
+
205
+ /**
206
+ * Calibration dataloaders for each input for post training quantizatiom
207
+ */
208
+ nvinfer1::IInt8Calibrator* ptq_calibrator = nullptr;
192
209
};
193
210
194
211
/**
@@ -198,37 +215,89 @@ TRTORCH_API std::string get_build_info();
198
215
199
216
/**
200
217
* Dump the version information for TRTorch including base libtorch and TensorRT versions
201
- * to stdout
218
+ * to stdout
202
219
* /
203
220
TRTORCH_API void dump_build_info();
204
221
222
+ /**
223
+ * @brief Check to see if a module is fully supported by the compiler
224
+ *
225
+ * @param module: torch::jit::script::Module - Existing TorchScript module
226
+ * @param method_name: std::string - Name of method to compile
227
+ *
228
+ * Takes a module and a method name and checks if the method graph contains purely
229
+ * convertable operators
230
+ *
231
+ * Will print out a list of unsupported operators if the graph is unsupported
232
+ * /
233
+ TRTORCH_API bool CheckMethodOperatorSupport(const torch::jit::script::Module& module, std::string method_name);
234
+
205
235
/**
206
236
* @brief Compile a TorchScript module for NVIDIA GPUs using TensorRT
207
237
*
208
- * @param module: torch::jit::script::Module - Existing TorchScript module
209
- * @param info: trtorch::ExtraInfo - Compilation settings
238
+ * @param module: torch::jit::script::Module - Existing TorchScript module
239
+ * @param info: trtorch::ExtraInfo - Compilation settings
210
240
*
211
241
* Takes a existing TorchScript module and a set of settings to configure the compiler
212
242
* and will convert methods to JIT Graphs which call equivalent TensorRT engines
213
243
*
214
- * Converts specifically the forward method of a TorchScript Module
215
- * /
244
+ * Converts specifically the forward method of a TorchScript Module
245
+ * /
216
246
TRTORCH_API torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, ExtraInfo info);
217
247
218
248
/**
219
249
* @brief Compile a TorchScript method for NVIDIA GPUs using TensorRT
220
250
*
221
- * @param module: torch::jit::script::Module - Existing TorchScript module
251
+ * @param module: torch::jit::script::Module - Existing TorchScript module
222
252
* @param method_name: std::string - Name of method to compile
223
- * @param info: trtorch::ExtraInfo - Compilation settings
253
+ * @param info: trtorch::ExtraInfo - Compilation settings
224
254
*
225
255
* Takes a existing TorchScript module and a set of settings to configure the compiler
226
256
* and will convert selected method to a serialized TensorRT engine which can be run with
227
257
* TensorRT
228
258
* /
229
- TRTORCH_API std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod, std::string method_name, ExtraInfo info);
259
+ TRTORCH_API std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& module, std::string method_name, ExtraInfo info);
260
+
261
+ namespace ptq {
262
+ /**
263
+ * @brief A factory to build a post training quantization calibrator from a torch dataloader
264
+ *
265
+ * Creates a calibrator to use for post training quantization
266
+ * If there are multiple inputs, the dataset should produce a example which is a vector (or similar container) of tensors vs a single tensor
267
+ *
268
+ * By default the returned calibrator uses TensorRT Entropy v2 algorithm to perform calibration. This is recommended for feed forward networks
269
+ * You can override the algorithm selection (such as to use the MinMax Calibrator recomended for NLP tasks) by calling make_int8_calibrator with
270
+ * the calibrator class as a template parameter.
271
+ *
272
+ * e.g. trtorch::ptq::make_int8_calibrator< nvinfer1::IInt8MinMaxCalibrator > (std::move(calibration_dataloader), calibration_cache_file, use_cache);
273
+ * /
274
+ template<typename Algorithm = nvinfer1::IInt8EntropyCalibrator2, typename DataLoader >
275
+ TRTORCH_API inline Int8Calibrator<Algorithm, DataLoader> make_int8_calibrator(DataLoader dataloader, const std::string& cache_file_path, bool use_cache) {
276
+ return Int8Calibrator<Algorithm, DataLoader>(std::move(dataloader), cache_file_path, use_cache);
277
+ }
278
+
279
+ /**
280
+ * @brief A factory to build a post training quantization calibrator from a torch dataloader that only uses the calibration cache
281
+ *
282
+ * Creates a calibrator to use for post training quantization which reads from a previously created calibration cache, therefore
283
+ * you can have a calibration cache generating program that requires a dataloader and a dataset, then save the cache to use later
284
+ * in a different program that needs to calibrate from scratch and not have the dataset dependency. However, the network should also
285
+ * be recalibrated if its structure changes, or the input data set changes, and it is the responsibility of the application to ensure this.
286
+ *
287
+ * By default the returned calibrator uses TensorRT Entropy v2 algorithm to perform calibration. This is recommended for feed forward networks
288
+ * You can override the algorithm selection (such as to use the MinMax Calibrator recomended for NLP tasks) by calling make_int8_calibrator with
289
+ * the calibrator class as a template parameter.
290
+ *
291
+ * e.g. trtorch::ptq::make_int8_cache_calibrator< nvinfer1::IInt8MinMaxCalibrator > (calibration_cache_file);
292
+ * /
293
+ template<typename Algorithm = nvinfer1::IInt8EntropyCalibrator2 >
294
+ TRTORCH_API inline Int8CacheCalibrator<Algorithm > make_int8_cache_calibrator(const std::string& cache_file_path) {
295
+ return Int8CacheCalibrator<Algorithm >(cache_file_path);
296
+ }
297
+ } // namespace ptq
230
298
} // namespace trtorch
231
299
300
+
232
301
```
233
302
234
303
0 commit comments