feat: Transition export workflows to use torch._export APIs (#2195)

peri044 · gs-olive · web-flow · commit c1f130a0a55f · 2023-09-18T11:39:40.000-07:00
Signed-off-by: Dheeraj Peri &lt;peri.dheeraj@gmail.com&gt;
Co-authored-by: gs-olive &lt;113141689+gs-olive@users.noreply.github.com&gt;
diff --git a/py/torch_tensorrt/dynamo/aten_tracer.py b/py/torch_tensorrt/dynamo/aten_tracer.py
@@ -1,160 +1,34 @@
 from __future__ import annotations
 
-import copy
 import logging
-import sys
-from contextlib import contextmanager
-from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union
+import unittest.mock
+from typing import Any, Tuple
 
 import torch
-import torch._dynamo as torchdynamo
-from torch.fx.passes.infra.pass_base import PassResult
-from torch_tensorrt.dynamo.utils import req_torch_version
-from torch_tensorrt.fx.passes.lower_basic_pass_aten import (
-    compose_bmm,
-    compose_chunk,
-    compose_getitem_slice,
-    remove_ops,
-    replace_aten_op_with_indices,
-    replace_aten_reshape_alias_with_replace,
-    replace_builtin_ops,
-    replace_inplace_ops,
-    replace_native_layernorm_with_layernorm,
-    replace_transpose_mm_op_with_linear,
-    run_const_fold,
-)
-from typing_extensions import TypeAlias
-
-Value: TypeAlias = Union[Tuple["Value", ...], List["Value"], Dict[str, "Value"]]
+from torch._export import export
+from torch_tensorrt.dynamo.backend.backends import constant_fold
+from torch_tensorrt.dynamo.lowering import get_decompositions
+from torch_tensorrt.dynamo.utils import set_log_level
 
 logger = logging.getLogger(__name__)
 
 
-class DynamoConfig:
-    """
-    Manage Exir-specific configurations of Dynamo.
-    """
-
-    def __init__(
-        self,
-        capture_scalar_outputs: bool = True,
-        guard_nn_modules: bool = True,
-        dynamic_shapes: bool = True,
-        specialize_int: bool = True,
-        verbose: bool = True,
-    ) -> None:
-        self.capture_scalar_outputs = capture_scalar_outputs
-        self.guard_nn_modules = guard_nn_modules
-        self.dynamic_shapes = dynamic_shapes
-        self.specialize_int = specialize_int
-        self.verbose = verbose
-
-    def activate(self) -> None:
-        torchdynamo.config.capture_scalar_outputs = self.capture_scalar_outputs
-        torchdynamo.config.guard_nn_modules = self.guard_nn_modules
-        torchdynamo.config.dynamic_shapes = self.dynamic_shapes
-        torchdynamo.config.specialize_int = self.specialize_int
-        torchdynamo.config.verbose = self.verbose
-
-    def deactivate(self) -> None:
-        torchdynamo.config.capture_scalar_outputs = True
-        torchdynamo.config.guard_nn_modules = True
-        torchdynamo.config.dynamic_shapes = True
-        torchdynamo.config.specialize_int = True
-        torchdynamo.config.verbose = True
-
-
-@contextmanager
-def using_config(config: DynamoConfig) -> Generator[DynamoConfig, None, None]:
-    config.activate()
-    try:
-        yield config
-    finally:
-        config.deactivate()
-
-
-@contextmanager
-def setting_python_recursive_limit(limit: int = 10000) -> Generator[None, None, None]:
-    """
-    Temporarily increase the python interpreter stack recursion limit.
-    This is mostly used for pickling large scale modules.
-    """
-    default = sys.getrecursionlimit()
-    if limit > default:
-        sys.setrecursionlimit(limit)
-    try:
-        yield
-    finally:
-        sys.setrecursionlimit(default)
-
-
-@req_torch_version("2.dev")
-def dynamo_trace(
-    f: Callable[..., Value],
-    # pyre-ignore
-    args: Tuple[Any, ...],
-    aten_graph: bool,
-    tracing_mode: str = "real",
-    dynamo_config: Optional[DynamoConfig] = None,
-) -> Any:  # Tuple[torch.fx.GraphModule, Set[_guards.Guard]]:
-    """
-    TODO: Once we fully migrate to torchdynamo frontend, we will remove
-    this config option alltogether.  For now, it helps with quick
-    experiments with playing around with TorchDynamo
-    """
-    if dynamo_config is None:
-        dynamo_config = DynamoConfig()
-    with using_config(dynamo_config), setting_python_recursive_limit(2000):
-        torchdynamo.reset()
-        try:
-            return torchdynamo.export(
-                f,
-                *copy.deepcopy(args),
-                aten_graph=aten_graph,
-                tracing_mode=tracing_mode,
-            )
-        except torchdynamo.exc.Unsupported as exc:
-            raise RuntimeError(
-                "The user code is using a feature we don't support. "
-                "Please try torchdynamo.explain() to get possible the reasons",
-            ) from exc
-        except Exception as exc:
-            raise RuntimeError(
-                "torchdynamo internal error occured. Please see above stacktrace"
-            ) from exc
-
-
-@req_torch_version("2.dev")
 def trace(
     model: torch.nn.Module | torch.fx.GraphModule,
     inputs: Tuple[Any, ...],
     **kwargs: Any,
 ) -> torch.fx.GraphModule:
-    """
-    Optimized trace with necessary passes which re-compose some ops or replace some ops
-    These passes should be general and functional purpose
-    """
-    passes_list = [
-        compose_bmm,
-        compose_chunk,
-        compose_getitem_slice,
-        replace_aten_reshape_alias_with_replace,
-        replace_aten_op_with_indices,
-        replace_transpose_mm_op_with_linear,  # after compose_bmm
-        replace_native_layernorm_with_layernorm,
-        remove_ops,
-        replace_builtin_ops,  # after replace_native_layernorm_with_layernorm
-        replace_inplace_ops,  # remove it once functionalization is enabled
-    ]
-
-    fx_module, __package__ = dynamo_trace(model, inputs, True, "symbolic")
-
-    for passes in passes_list:
-        pr: PassResult = passes(fx_module)
-        fx_module = pr.graph_module
-
-    fx_module(*inputs)
-
-    fx_module = run_const_fold(fx_module)
-    logger.info("Post export graph : %s\n", fx_module.graph)
-    return fx_module
+    # Set log level at the top of compilation (torch_tensorrt.dynamo)
+    if "debug" in kwargs and kwargs["debug"]:
+        set_log_level(logger.parent, logging.DEBUG)
+
+    experimental_decompositions = kwargs.get(
+        "enable_experimental_decompositions", False
+    )
+    with unittest.mock.patch(
+        "torch._export.DECOMP_TABLE", get_decompositions(experimental_decompositions)
+    ):
+        graph_module = export(model, tuple(inputs)).module()
+        constant_fold(graph_module)
+    logger.debug("Post export graph: " + str(graph_module.graph))
+    return graph_module
diff --git a/py/torch_tensorrt/dynamo/backend/backends.py b/py/torch_tensorrt/dynamo/backend/backends.py
@@ -7,20 +7,20 @@
 import torch
 import torch._dynamo as td
 import torch.utils._pytree as pytree
-import torch_tensorrt
 from torch._dynamo.utils import detect_fake_mode
 from torch._functorch.aot_autograd import _aot_export_function
 from torch._ops import OpOverload
+from torch_tensorrt._utils import sanitized_torch_version
 from torch_tensorrt.dynamo import CompilationSettings
 from torch_tensorrt.dynamo.compile import compile_module
 from torch_tensorrt.dynamo.lowering._decompositions import get_decompositions
 from torch_tensorrt.dynamo.lowering._pre_aot_lowering import pre_aot_substitutions
-from torch_tensorrt.dynamo.utils import parse_dynamo_kwargs
+from torch_tensorrt.dynamo.utils import parse_dynamo_kwargs, set_log_level
 
 from packaging import version
 
 # Modify import location of utilities based on Torch version
-if version.parse(torch_tensorrt.sanitized_torch_version()) < version.parse("2.1.1"):
+if version.parse(sanitized_torch_version()) < version.parse("2.1.1"):
     from torch._inductor.freezing import ConstantFolder, replace_node_with_constant
 else:
     from torch._inductor.constant_folding import (
@@ -38,14 +38,11 @@ def torch_tensorrt_backend(
 ) -> torch.nn.Module:
     # Set log level at the top of compilation (torch_tensorrt.dynamo)
     if (
-        (
-            "options" in kwargs
-            and "debug" in kwargs["options"]
-            and kwargs["options"]["debug"]
-        )
-        or ("debug" in kwargs and kwargs["debug"])
-    ) and logger.parent:
-        logger.parent.setLevel(logging.DEBUG)
+        "options" in kwargs
+        and "debug" in kwargs["options"]
+        and kwargs["options"]["debug"]
+    ) or ("debug" in kwargs and kwargs["debug"]):
+        set_log_level(logger.parent, logging.DEBUG)
 
     DEFAULT_BACKEND = aot_torch_tensorrt_aten_backend
 
diff --git a/py/torch_tensorrt/dynamo/compile.py b/py/torch_tensorrt/dynamo/compile.py
@@ -33,6 +33,7 @@
 )
 from torch_tensorrt.dynamo.utils import (
     prepare_inputs,
+    set_log_level,
     to_torch_device,
     to_torch_tensorrt_device,
 )
@@ -72,8 +73,7 @@ def compile(
     **kwargs: Any,
 ) -> torch.fx.GraphModule:
     if debug:
-        if logger.parent:
-            logger.parent.setLevel(logging.DEBUG)
+        set_log_level(logger.parent, logging.DEBUG)
 
     enabled_precisions = set(enabled_precisions)
 
diff --git a/py/torch_tensorrt/dynamo/utils.py b/py/torch_tensorrt/dynamo/utils.py
@@ -63,6 +63,16 @@ def cosine_similarity(gt_tensor: torch.Tensor, pred_tensor: torch.Tensor) -> flo
     return res
 
 
+def set_log_level(parent_logger: Any, level: Any) -> None:
+    """
+    Sets the log level to the user provided level.
+    This is used to set debug logging at a global level
+    at entry points of tracing, dynamo and torch_compile compilation.
+    """
+    if parent_logger:
+        parent_logger.setLevel(level)
+
+
 def prepare_inputs(
     inputs: Input | torch.Tensor | Sequence[Any] | Dict[Any, Any],
     device: torch.device = torch.device("cuda"),
diff --git a/tests/py/dynamo/models/test_models.py b/tests/py/dynamo/models/test_models.py
@@ -40,9 +40,6 @@ def test_resnet18(ir):
     # Clean up model env
     torch._dynamo.reset()
 
-    with torch.no_grad():
-        torch.cuda.empty_cache()
-
 
 @pytest.mark.unit
 def test_mobilenet_v2(ir):
@@ -74,9 +71,6 @@ def test_mobilenet_v2(ir):
     # Clean up model env
     torch._dynamo.reset()
 
-    with torch.no_grad():
-        torch.cuda.empty_cache()
-
 
 @pytest.mark.unit
 def test_efficientnet_b0(ir):
@@ -108,9 +102,6 @@ def test_efficientnet_b0(ir):
     # Clean up model env
     torch._dynamo.reset()
 
-    with torch.no_grad():
-        torch.cuda.empty_cache()
-
 
 @pytest.mark.unit
 def test_bert_base_uncased(ir):
@@ -155,9 +146,6 @@ def test_bert_base_uncased(ir):
     # Clean up model env
     torch._dynamo.reset()
 
-    with torch.no_grad():
-        torch.cuda.empty_cache()
-
 
 @pytest.mark.unit
 def test_resnet18_half(ir):
@@ -187,6 +175,3 @@ def test_resnet18_half(ir):
 
     # Clean up model env
     torch._dynamo.reset()
-
-    with torch.no_grad():
-        torch.cuda.empty_cache()
diff --git a/tests/py/dynamo/models/test_models_export.py b/tests/py/dynamo/models/test_models_export.py