Fix tensor_constant name collision in aot_export_module (pytorch#151123)

yushangdi · pytorchmergebot · commit cd1db5581757 · 2025-04-17T01:52:21.000Z
Summary: When we have an exported program that looks like this: ``` ExportedProgram: class GraphModule(torch.nn.Module): def forward(self, b__tensor_constant0: "f32[1]", ... c_lifted_tensor_0: "i64[925]", …. , tupleized_input_0_0: "f32[10, 2139]", clone: "i64[925]" = torch.ops.aten.clone.default(c_lifted_tensor_0); c_lifted_tensor_0 = None index_select: "f32[10, 925]" = torch.ops.aten.index_select.default(tupleized_input_0_0, 1, clone); clone = None ``` The graph after `aot_export_module` could have a name collision, notice that `_tensor_constant0` arg of `clone` is different from the `_tensor_constant0` in the input module . ``` def forward(self): arg9_1: "f32[10, 2139]" _tensor_constant0: "f32[1]" = self._tensor_constant0 # this should be int64, conflicted with the original _tensor_constant0, had a clone on this constant before lifting index: "f32[10, 925]" = torch.ops.aten.index.Tensor(arg9_1, [None, _tensor_constant0]); _tensor_constant0 = None ``` This caused the `tensors used as indices must binary, int...` aoti error on PT2I dashboard because later we used `clone` as index. We had this error because we created a new `_tensor_constant0` at [here](https://github.com/pytorch/pytorch/blob/main/torch/fx/_symbolic_trace.py#L403-L412), and the new `_tensor_constant0` overrides the original `_tensor_constant0` on the input Module in `_unlift_graph`. The `arg` for `clone` is created at `create_proxy` in `proxy.py`. To fix this, we do a graph pass before we unlift the graph inputs to avoid name collision Test Plan: ``` buck run fbcode//mode/dev-nosan //caffe2/test/inductor:torchbind -- -r aot_compile_constant_folding buck2 run mode/dev-nosan caffe2/test/inductor:test_aot_inductor -- -r aoti_constant_tensor_name_collision ``` Differential Revision: D72761937 Pull Request resolved: pytorch#151123 Approved by: https://github.com/tugsbayasgalan, https://github.com/jingsh
diff --git a/test/inductor/test_aot_inductor.py b/test/inductor/test_aot_inductor.py
@@ -1200,6 +1200,48 @@ def forward(self, x):
         example_inputs = (torch.ones(4, 4, device=self.device),)
         self.check_model(Foo(self.device), example_inputs)
 
+    def test_aoti_constant_tensor_name_collision(self):
+        class SubModule(torch.nn.Module):
+            def __init__(self, device):
+                super().__init__()
+                self.register_buffer(
+                    "_tensor_constant1",
+                    torch.ones(1, device=device, dtype=torch.float32),
+                    persistent=True,
+                )
+
+            def forward(self, x):
+                return self.linear(x)
+
+        class Foo(torch.nn.Module):
+            def __init__(self, user_float_feature_idx, device):
+                super().__init__()
+                self.user_float_feature_idx = user_float_feature_idx
+                self.register_buffer(
+                    "_tensor_constant0",
+                    torch.ones(1, device=device, dtype=torch.float32),
+                    persistent=True,
+                )
+                self.sub_mod = SubModule(device)
+
+            def forward(self, x):
+                return (
+                    torch.index_select(
+                        x, 1, torch.tensor(self.user_float_feature_idx, device=x.device)
+                    ),
+                    self._tensor_constant0,
+                    self.sub_mod._tensor_constant1,
+                )
+
+        example_inputs = (torch.ones(4, 4, device=self.device),)
+        user_float_feature_idx = [1]
+        # we have to have run_decomposition first to trigger the name collision
+        ep = torch.export.export(
+            Foo(user_float_feature_idx, self.device), example_inputs, strict=False
+        ).run_decompositions()
+        gm = ep.module()
+        self.check_model(gm, example_inputs)
+
     def test_large_grid(self):
         if self.device != GPU_TYPE:
             raise unittest.SkipTest("requires GPU")
diff --git a/torch/_inductor/compile_fx.py b/torch/_inductor/compile_fx.py
@@ -16,6 +16,7 @@
 from contextlib import AbstractContextManager
 from inspect import currentframe
 from itertools import count
+from operator import attrgetter
 from typing import Any, Callable, Optional, TYPE_CHECKING, TypeVar, Union
 from typing_extensions import Never, override, ParamSpec, Protocol, TypedDict, Unpack
 from unittest import mock
@@ -81,6 +82,7 @@
     should_use_remote_fx_graph_cache,
     tensor_is_aligned,
 )
+from torch._library.fake_class_registry import FakeScriptObject
 from torch._logging import trace_structured
 from torch._utils_internal import compile_time_strobelight_meta
 from torch.fx import GraphModule
@@ -246,11 +248,62 @@ def _warn_tf32_disabled() -> None:
         )
 
 
+def _resolve_name_collision(mod: GraphModule, gm: GraphModule) -> None:
+    """
+    In aot_export_module (make_fx), we create get_attr nodes with name prefix
+    "_tensor_constant" and "_torchbind_obj". See Tracer.create_arg() in
+    torch/fx/_symbolic_trace.py
+
+    However, this might result in name collision if the original mod already
+    has a different buffer with the same name.
+
+    We resolve this potential name collision here by changing the target name
+    with a new number post fix.
+    """
+
+    def find_smallest_i(graph: fx.Graph, prefix: str) -> int:
+        i = 0
+        for node in graph.nodes:
+            if node.op == "get_attr" and node.target.startswith(prefix):
+                i = max(i, int(node.target.split(prefix)[-1]))
+        return i + 1
+
+    for node in gm.graph.nodes:
+        if node.op == "get_attr":
+            target_name = node.target
+            if not target_name.startswith(
+                "_tensor_constant"
+            ) and not target_name.startswith("_torchbind_obj"):
+                continue
+
+            if not hasattr(mod, target_name):
+                continue
+            gm_target = attrgetter(target_name)(gm)
+            model_target = attrgetter(target_name)(mod)
+            if (
+                torch.equal(gm_target, model_target)
+                and gm_target.dtype == model_target.dtype
+            ):
+                continue
+
+            prefix = (
+                "_tensor_constant"
+                if target_name.startswith("_tensor_constant")
+                else "_torchbind_obj"
+            )
+            new_id = find_smallest_i(gm.graph, prefix)
+            new_target_name = f"{prefix}{new_id}"
+            node.target = new_target_name
+            setattr(gm, new_target_name, gm_target)
+
+
 def _unlift_graph(
     mod: GraphModule, gm: GraphModule, graph_signature: GraphSignature
 ) -> GraphModule:
     from torch.export.unflatten import _assign_attr, _AttrKind
 
+    _resolve_name_collision(mod, gm)
+
     state_dict: dict[str, Union[torch.nn.parameter.Parameter, torch.Tensor]] = {}
     for name, param in mod.named_parameters(remove_duplicate=False):
         state_dict[name] = param
@@ -1138,13 +1191,14 @@ def log_graph_runnable() -> str:
                 if aot_mode and config.aot_inductor.use_runtime_constant_folding:
                     # torchbind objects have name that starts with _torchbind_obj
                     # See caffe2/torch/fx/_symbolic_trace.py?lines=406
-                    # We don't use node.meta["val"] because we don't typically
-                    # attach meta["val"] for get_attr nodes.
                     const_gm, const_output_index = split_const_gm(
                         gm,
                         skip_folding_node_fn=lambda node: node.op == "get_attr"
                         and isinstance(node.target, str)
-                        and node.target.startswith("_torchbind_obj"),
+                        and (
+                            node.target.startswith("_torchbind_obj")
+                            or isinstance(node.meta.get("val", None), FakeScriptObject)
+                        ),
                     )
 
                     const_graph = GraphLowering(
@@ -2161,11 +2215,19 @@ def bw_compiler(
                 # this will go away.
                 for node in gm.graph.nodes:
                     if node.op == "get_attr" and "val" not in node.meta:
-                        target = getattr(gm, node.target)
+                        target = attrgetter(node.target)(gm)
                         if isinstance(target, torch.Tensor):
                             node.meta["val"] = fake_mode.from_tensor(
                                 target, static_shapes=True
                             )
+                        elif isinstance(target, torch.ScriptObject):
+                            node.meta["val"] = (
+                                torch._library.fake_class_registry.maybe_to_fake_obj(
+                                    fake_mode, target
+                                )
+                            )
+                        elif isinstance(target, FakeScriptObject):
+                            node.meta["val"] = target
 
             unlifted_gm = _unlift_graph(model_, gm, graph_signature)
             if "dynamo_flat_name_to_original_fqn" in model_.meta: