From 70fda8e57fbddc6b310725aaf9ae02f367fffbc4 Mon Sep 17 00:00:00 2001
From: Jerry Zhang <jerryzh168@gmail.com>
Date: Mon, 19 Aug 2024 19:22:15 -0700
Subject: [PATCH] Small fix for micro benchmark code

Summary:
There seems to be some problems running benchmark_aq.py:
```
torch._dynamo.exc.Unsupported: torch.* op returned non-Tensor int call_function <method 'size' of 'torch._C.TensorBase' objects>
```
when we run the benchmark with multiple shapes sometimes. But the problem will be gone if we reset the dynamo caches before
each benchmark run

Test Plan:
python benchmarks/benchmark_aq.py

Reviewers:

Subscribers:

Tasks:

Tags:
---
 benchmarks/benchmark_aq.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/benchmarks/benchmark_aq.py b/benchmarks/benchmark_aq.py
index 174038d206..ebf9e1e738 100644
--- a/benchmarks/benchmark_aq.py
+++ b/benchmarks/benchmark_aq.py
@@ -17,6 +17,7 @@
     _replace_with_custom_fn_if_matches_filter,
 )
 import copy
+from torchao.utils import unwrap_tensor_subclass
 
 def _int8wo_api(mod, **kwargs):
     if TORCH_VERSION_AT_LEAST_2_4:
@@ -133,15 +134,17 @@ def _bench_quantized_tensor_subclass_perf(api, ref_api, M, N, K, kwargs=None):
     WARMUP = 20
     RUNS = 100
 
+    torch._dynamo.reset()
     m_ref = torch.compile(m_ref, mode='max-autotune', fullgraph=True)
     benchmark_model(m_ref, WARMUP, example_inputs)
     ref_elapsed_time = benchmark_model(m_ref, RUNS, example_inputs)
 
+    torch._dynamo.reset()
     m = torch.compile(m, mode='max-autotune', fullgraph=True)
     benchmark_model(m, WARMUP, example_inputs)
     elapsed_time = benchmark_model(m, RUNS, example_inputs)
 
-
+    torch._dynamo.reset()
     m_bf16 = torch.compile(m_bf16, mode='max-autotune', fullgraph=True)
     benchmark_model(m_bf16, WARMUP, example_inputs)
     bf16_elapsed_time = benchmark_model(m_bf16, RUNS, example_inputs)