cognitivexr
diff --git a/Diff for: ‎deploy/data/depth.npy
196 KB b/Diff for: ‎deploy/data/depth.npy
196 KB
diff --git a/Diff for: ‎deploy/data/depth.png
38 KB b/Diff for: ‎deploy/data/depth.png
38 KB
diff --git a/Diff for: ‎deploy/data/pred.npy
196 KB b/Diff for: ‎deploy/data/pred.npy
196 KB
diff --git a/Diff for: ‎deploy/data/pred.png
18.4 KB b/Diff for: ‎deploy/data/pred.png
18.4 KB
diff --git a/Diff for: ‎deploy/data/rgb.npy
1.15 MB b/Diff for: ‎deploy/data/rgb.npy
1.15 MB
diff --git a/Diff for: ‎deploy/data/rgb.png
109 KB b/Diff for: ‎deploy/data/rgb.png
109 KB
diff --git a/Diff for: ‎deploy/data/visualize.py
+35 b/Diff for: ‎deploy/data/visualize.py
+35
diff --git a/Diff for: ‎deploy/tx2_run_tvm.py
+92 b/Diff for: ‎deploy/tx2_run_tvm.py
+92
@@ -0,0 +1,35 @@
+import numpy as np
+import matplotlib as mp
+
+mp.use("pdf")
+import matplotlib.pyplot as plt
+
+cmap = plt.cm.viridis
+
+def colored_depthmap(depth, d_min=None, d_max=None):
+    if d_min is None:
+        d_min = np.min(depth)
+    if d_max is None:
+        d_max = np.max(depth)
+    depth_relative = (depth - d_min) / (d_max - d_min)
+    return 255 * cmap(depth_relative)[:,:,:3] # HWC
+
+def save_rgb_image(rgb_npy_fp, filename):
+    rgb_np = np.load(rgb_npy_fp)
+    rgb_scaled = 255 * np.transpose(np.squeeze(rgb_np), (0,1,2)) # HWC
+    mp.image.imsave('rgb.png', rgb_scaled.astype('uint8'))
+
+def save_depth_image(depth_npy_fp, filename):
+    depth_np = np.load(depth_npy_fp)
+    depth_np_color = colored_depthmap(depth_np)
+    mp.image.imsave('depth.png', depth_np_color.astype('uint8'))
+
+def save_pred_image(pred_npy_fp, filename):
+    pred_np = np.load(pred_npy_fp)
+    pred_np_2d = pred_np[0,0,:,:] # HW
+    pred_np_color = colored_depthmap(pred_np_2d)
+    mp.image.imsave('pred.png', pred_np_color.astype('uint8'))
+
+save_rgb_image('rgb.npy', 'rgb.png')
+save_depth_image('depth.npy', 'depth.png')
+save_pred_image('pred.npy', 'pred.png')
@@ -0,0 +1,92 @@
+import tvm
+import numpy as np
+import argparse
+import os
+import time
+
+def run_model(model_dir, input_fp, output_fp, warmup_trials, run_trials, cuda, try_randin):
+    # import compiled graph
+    print("=> [TVM on TX2] using model files in {}".format(model_dir))
+    assert(os.path.isdir(model_dir))
+
+    print("=> [TVM on TX2] loading model lib and ptx")
+    loaded_lib = tvm.module.load(os.path.join(model_dir, "deploy_lib.o"))
+    if cuda:
+        dev_lib = tvm.module.load(os.path.join(model_dir, "deploy_cuda.ptx"))
+        loaded_lib.import_module(dev_lib)
+
+    print("=> [TVM on TX2] loading model graph and params")
+    loaded_graph = open(os.path.join(model_dir,"deploy_graph.json")).read()
+    loaded_params = bytearray(open(os.path.join(model_dir, "deploy_param.params"), "rb").read())
+    
+    print("=> [TVM on TX2] creating TVM runtime module")
+    fcreate = tvm.get_global_func("tvm.graph_runtime.create")
+    ctx = tvm.gpu(0) if cuda else tvm.cpu(0)
+    gmodule = fcreate(loaded_graph, loaded_lib, ctx.device_type, ctx.device_id)
+    set_input, get_output, run = gmodule["set_input"], gmodule["get_output"], gmodule["run"]
+
+    print("=> [TVM on TX2] feeding inputs and params into TVM module")
+    rgb_np = np.load(input_fp) # HWC
+    x = np.zeros([1,3,224,224]) # NCHW
+    x[0,:,:,:] = np.transpose(rgb_np, (2,0,1))
+    set_input('0', tvm.nd.array(x.astype('float32')))
+    gmodule["load_params"](loaded_params)
+
+    print("=> [TVM on TX2] running TVM module, saving output")
+    run() # not gmodule.run()
+    out_shape = (1, 1, 224, 224)
+    out = tvm.nd.empty(out_shape, "float32")
+    get_output(0, out)
+    np.save(output_fp, out.asnumpy())
+
+    print("=> [TVM on TX2] benchmarking: {} warmup, {} run trials".format(warmup_trials, run_trials))
+    # run model several times as a warmup
+    for i in range(warmup_trials):
+        run()
+        ctx.sync()
+
+    # profile runtime using TVM time evaluator
+    ftimer = gmodule.time_evaluator("run", ctx, number=1, repeat=run_trials)
+    profile_result = ftimer()
+    profiled_runtime = profile_result[0]
+    
+    print("=> [TVM on TX2] profiled runtime (in ms): {:.5f}".format(1000*profiled_runtime))
+
+    # try randomizing input
+    if try_randin:
+        randin_runtime = 0 
+        for i in range(run_trials):
+            x = np.random.randn(1, 3, 224, 224)
+            set_input('0', tvm.nd.array(x.astype('float32')))
+            randin_ftimer = gmodule.time_evaluator("run", ctx, number=1, repeat=1)
+            randin_profile_result = randin_ftimer()
+            randin_runtime += randin_profile_result[0]
+        randomized_input_runtime = randin_runtime/run_trials
+        print("=> [TVM on TX2] with randomized input on every run, profiled runtime (in ms): {:.5f}".format(1000*randomized_input_runtime))
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model-dir', type=str, required=True, 
+        help='path to folder with TVM-compiled model files (required)')
+    parser.add_argument('--input-fp', type=str, default='data/rgb.npy', 
+        help='numpy file containing input rgb data (default: data/rgb.npy')
+    parser.add_argument('--output-fp', type=str, default='data/pred.npy',
+        help='numpy file to store output prediction data (default: data/pred.npy')
+
+
+    parser.add_argument('--warmup', type=int, default=10, 
+        help='number of inference warmup trials (default: 10)')
+    parser.add_argument('--run', type=int, default=100,
+        help='number of inference run trials (default: 100)')
+    parser.add_argument('--cuda', type=bool, default=False,
+        help='run with CUDA (default: False)')
+
+    parser.add_argument('--randin', type=bool, default=False,
+        help='profile runtime while randomizing input on every run (default: False)')
+
+    args = parser.parse_args()
+    run_model(args.model_dir, args.input_fp, args.output_fp, args.warmup, args.run, args.cuda,  try_randin=args.randin)
+
+if __name__ == '__main__':
+    main()
+