Skip to content

feat: Add functionality to performance tooling #1451

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions tools/perf/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,11 @@ do
--truncate \
--report "bert_base_perf_bs${bs}.txt"
done

# Collect and concatenate all results
echo "Concatenating all results"
(echo "Output of All Model Runs"; echo) >> all_outputs.txt;

for i in $(ls *_bs*.txt);
do (echo $i; cat $i; echo; echo) >> all_outputs.txt;
done
14 changes: 11 additions & 3 deletions tools/perf/perf_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from __future__ import absolute_import
from __future__ import division

import time
import timeit
import numpy as np
import torch.backends.cudnn as cudnn
Expand Down Expand Up @@ -103,7 +104,10 @@ def run_torch_tensorrt(
if precision == "int8":
compile_settings.update({"calib": params.get("calibration_cache")})

start_compile = time.time_ns()
model = torchtrt.compile(model, **compile_settings)
end_compile = time.time_ns()
compile_time_ms = (end_compile - start_compile) / 1e6

iters = params.get("iterations", 20)
# Warm up
Expand All @@ -123,7 +127,7 @@ def run_torch_tensorrt(
meas_time = end_time - start_time
timings.append(meas_time)

recordStats("Torch-TensorRT", timings, precision, batch_size)
recordStats("Torch-TensorRT", timings, precision, batch_size, compile_time_ms)


# Runs inference using FX2TRT backend
Expand All @@ -136,13 +140,16 @@ def run_fx2trt(model, input_tensors, params, precision, batch_size):
model.half()
input_tensors = [tensor.half() for tensor in input_tensors]
# Run lowering eager mode benchmark
start_compile = time.time_ns()
model = compile(
model,
input_tensors,
max_batch_size=batch_size,
lower_precision=precision,
verbose_log=False,
)
end_compile = time.time_ns()
compile_time_ms = (end_compile - start_compile) / 1e6

iters = params.get("iterations", 20)
# Warm up
Expand All @@ -162,7 +169,7 @@ def run_fx2trt(model, input_tensors, params, precision, batch_size):
meas_time = end_time - start_time
timings.append(meas_time)

recordStats("FX-TensorRT", timings, precision, batch_size)
recordStats("FX-TensorRT", timings, precision, batch_size, compile_time_ms)


def torch_dtype_from_trt(dtype):
Expand Down Expand Up @@ -331,7 +338,7 @@ def run(


# Generate report
def recordStats(backend, timings, precision, batch_size=1):
def recordStats(backend, timings, precision, batch_size=1, compile_time_ms=None):
times = np.array(timings)
steps = len(times)
speeds = batch_size / times
Expand All @@ -350,6 +357,7 @@ def recordStats(backend, timings, precision, batch_size=1):
"Mean(FPS)": speed_mean,
"Median-Latency(ms)": time_med * 1000,
"Mean-Latency(ms)": time_mean * 1000,
"Compile Time(ms)": compile_time_ms,
}
results.append(stats)

Expand Down