@@ -78,6 +78,10 @@ def run_torch_tensorrt(model, input_tensors, params, precision):
78
78
"inputs" : input_tensors ,
79
79
"enabled_precisions" : {precision_to_dtype (precision )}
80
80
}
81
+
82
+ if precision == 'int8' :
83
+ compile_settings .update ({"calib" : params .get ('calibration_cache' )})
84
+
81
85
82
86
model = torchtrt .compile (model , ** compile_settings )
83
87
@@ -166,26 +170,35 @@ def run_tensorrt(model, input_tensors, params, precision, is_trt_engine=False):
166
170
k += 1
167
171
168
172
timings = []
169
- with torch .no_grad ():
170
- with engine .create_execution_context () as context :
171
- for i in range (WARMUP_ITER ):
172
- context .execute_async (batch_size , bindings , torch .cuda .current_stream ().cuda_stream )
173
- torch .cuda .synchronize ()
174
-
175
- for i in range (iters ):
176
- start_time = timeit .default_timer ()
177
- context .execute_async (batch_size , bindings , torch .cuda .current_stream ().cuda_stream )
178
- torch .cuda .synchronize ()
179
- end_time = timeit .default_timer ()
180
- meas_time = end_time - start_time
181
- timings .append (meas_time )
182
- print ("Iterations {}: {:.6f} s" .format (i , end_time - start_time ))
173
+ with engine .create_execution_context () as context :
174
+ for i in range (WARMUP_ITER ):
175
+ context .execute_async (batch_size , bindings , torch .cuda .current_stream ().cuda_stream )
176
+ torch .cuda .synchronize ()
177
+
178
+ for i in range (iters ):
179
+ start_time = timeit .default_timer ()
180
+ context .execute_async (batch_size , bindings , torch .cuda .current_stream ().cuda_stream )
181
+ torch .cuda .synchronize ()
182
+ end_time = timeit .default_timer ()
183
+ meas_time = end_time - start_time
184
+ timings .append (meas_time )
185
+ print ("Iterations {}: {:.6f} s" .format (i , end_time - start_time ))
183
186
184
187
printStats ("TensorRT" , timings , precision )
185
188
186
189
# Deploys inference run for different backend configurations
187
190
def run (model , input_tensors , params , precision , is_trt_engine = False ):
188
191
for backend in params .get ('backend' ):
192
+
193
+ if precision == 'int8' :
194
+ if backend == 'all' or backend == 'torch' :
195
+ print ("int8 precision is not supported for torch runtime in this script yet" )
196
+ return False
197
+
198
+ if backend == 'all' or backend == 'torch_tensorrt' or params .get ('calibration_cache' , None ) == None :
199
+ print ("int8 precision expects calibration cache file for inference" )
200
+ return False
201
+
189
202
if backend == 'all' :
190
203
run_torch (model , input_tensors , params , precision )
191
204
run_torch_tensorrt (model , input_tensors , params , precision )
@@ -280,20 +293,25 @@ def load_model(params):
280
293
# Create random input tensor of certain size
281
294
torch .manual_seed (12345 )
282
295
283
- num_input = params .get ('input' ).get ('num_of_input ' )
296
+ num_input = params .get ('input' ).get ('num_inputs ' )
284
297
for precision in params .get ('runtime' ).get ('precision' , 'fp32' ):
285
298
input_tensors = []
286
- num_input = params .get ('input' ).get ('num_of_input ' , 1 )
299
+ num_input = params .get ('input' ).get ('num_inputs ' , 1 )
287
300
for i in range (num_input ):
288
301
inp_tensor = params .get ('input' ).get ('input' + str (i ))
289
302
input_tensors .append (torch .randint (0 , 2 , tuple (d for d in inp_tensor ), dtype = precision_to_dtype (precision )).cuda ())
290
303
304
+ if is_trt_engine :
305
+ print ("Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results" )
306
+
291
307
if not is_trt_engine and precision == "fp16" or precision == "half" :
292
308
# If model is TensorRT serialized engine then model.half will report failure
293
309
model = model .half ()
294
310
295
311
# Run inference
296
- run (model , input_tensors , params , precision , is_trt_engine )
312
+ status = run (model , input_tensors , params , precision , is_trt_engine )
313
+ if status == False :
314
+ continue
297
315
298
316
# Generate report
299
317
print ('Model Summary:' )
0 commit comments