From 70fda8e57fbddc6b310725aaf9ae02f367fffbc4 Mon Sep 17 00:00:00 2001 From: Jerry Zhang Date: Mon, 19 Aug 2024 19:22:15 -0700 Subject: [PATCH] Small fix for micro benchmark code Summary: There seems to be some problems running benchmark_aq.py: ``` torch._dynamo.exc.Unsupported: torch.* op returned non-Tensor int call_function ``` when we run the benchmark with multiple shapes sometimes. But the problem will be gone if we reset the dynamo caches before each benchmark run Test Plan: python benchmarks/benchmark_aq.py Reviewers: Subscribers: Tasks: Tags: --- benchmarks/benchmark_aq.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/benchmarks/benchmark_aq.py b/benchmarks/benchmark_aq.py index 174038d206..ebf9e1e738 100644 --- a/benchmarks/benchmark_aq.py +++ b/benchmarks/benchmark_aq.py @@ -17,6 +17,7 @@ _replace_with_custom_fn_if_matches_filter, ) import copy +from torchao.utils import unwrap_tensor_subclass def _int8wo_api(mod, **kwargs): if TORCH_VERSION_AT_LEAST_2_4: @@ -133,15 +134,17 @@ def _bench_quantized_tensor_subclass_perf(api, ref_api, M, N, K, kwargs=None): WARMUP = 20 RUNS = 100 + torch._dynamo.reset() m_ref = torch.compile(m_ref, mode='max-autotune', fullgraph=True) benchmark_model(m_ref, WARMUP, example_inputs) ref_elapsed_time = benchmark_model(m_ref, RUNS, example_inputs) + torch._dynamo.reset() m = torch.compile(m, mode='max-autotune', fullgraph=True) benchmark_model(m, WARMUP, example_inputs) elapsed_time = benchmark_model(m, RUNS, example_inputs) - + torch._dynamo.reset() m_bf16 = torch.compile(m_bf16, mode='max-autotune', fullgraph=True) benchmark_model(m_bf16, WARMUP, example_inputs) bf16_elapsed_time = benchmark_model(m_bf16, RUNS, example_inputs)