update SqueezeInt4LinearInputs to process relu/gelu inputs too

nathanaelsee · facebook-github-bot · commit 90c24a8f0387 · 2025-02-20T12:29:30.000-08:00
Summary: Update/rename SqueezeInt4LinearInputs pass so it wraps gelu/relu with squeeze/unsqueeze view ops too

Differential Revision: D69673068
diff --git a/backends/transforms/fuse_view_copy.py b/backends/transforms/fuse_view_copy.py
@@ -39,8 +39,23 @@ def merge_view_copy_chains(graph: torch.fx.Graph) -> torch.fx.Graph:
     graph.eliminate_dead_code()
     return graph
 
+def remove_noop_view_copy(graph: torch.fx.Graph) -> torch.fx.Graph:
+    """
+    Remove view_copy nodes that are no-ops.
+    """
+    ops = exir_ops.edge
+    view_op = ops.aten.view_copy.default
+    for node in graph.nodes:
+        if node.op == "call_function" and node.target == view_op:
+            input_shape = list(node.args[0].meta["val"].shape)
+            target_shape = node.args[1]
+            if input_shape == target_shape:
+                node.replace_all_uses_with(node.args[0])
+    graph.eliminate_dead_code()
+    return graph
 
 class FuseViewCopyTransform(ExportPass):
     def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
         graph_module.graph = merge_view_copy_chains(graph_module.graph)
+        graph_module.graph = remove_noop_view_copy(graph_module.graph)
         return PassResult(graph_module, True)
diff --git a/backends/vulkan/_passes/TARGETS b/backends/vulkan/_passes/TARGETS
@@ -31,9 +31,9 @@ runtime.python_library(
 )
 
 runtime.python_library(
-    name = "squeeze_int4_linear_inputs",
+    name = "squeeze_unsqueeze_inputs",
     srcs = [
-        "squeeze_int4_linear_inputs.py",
+        "squeeze_unsqueeze_inputs.py",
     ],
     visibility = [
         "//executorch/backends/...",
@@ -114,7 +114,7 @@ runtime.python_library(
         ":remove_asserts",
         ":remove_local_scalar_dense",
         ":remove_redundant_ops",
-        ":squeeze_int4_linear_inputs",
+        ":squeeze_unsqueeze_inputs",
         ":tag_memory_meta_pass",
     ]
 )
diff --git a/backends/vulkan/_passes/__init__.py b/backends/vulkan/_passes/__init__.py
@@ -20,8 +20,8 @@
 from executorch.backends.vulkan._passes.remove_redundant_ops import (
     RemoveRedundantOpsTransform,
 )
-from executorch.backends.vulkan._passes.squeeze_int4_linear_inputs import (
-    SqueezeInt4LinearInputs,
+from executorch.backends.vulkan._passes.squeeze_unsqueeze_inputs import (
+    SqueezeUnsqueezeInputs,
 )
 from executorch.backends.vulkan._passes.tag_memory_meta_pass import TagMemoryMetaPass
 
@@ -32,6 +32,6 @@
     "RemoveAssertsTransform",
     "RemoveLocalScalarDenseOpsTransform",
     "RemoveRedundantOpsTransform",
-    "SqueezeInt4LinearInputs",
+    "SqueezeUnsqueezeInputs",
     "TagMemoryMetaPass",
 ]
diff --git a/backends/vulkan/_passes/squeeze_unsqueeze_inputs.py b/backends/vulkan/_passes/squeeze_unsqueeze_inputs.py
@@ -14,8 +14,13 @@
 
 from torch.fx.node import Argument
 
+class SqueezeUnsqueezeInputs(ExportPass):
+    _squeezable_ops = [
+        exir_ops.edge.et_vk.linear_weight_int4.default,
+        exir_ops.edge.aten.relu.default,
+        exir_ops.edge.aten.gelu.default,
+    ]
 
-class SqueezeInt4LinearInputs(ExportPass):
     def call_operator(
         self,
         op,  # pyre-ignore
@@ -26,7 +31,7 @@ def call_operator(
         def _squeezable(shape: List[int]) -> bool:
             return len(shape) > 2 and 1 in shape
 
-        if op != exir_ops.edge.et_vk.linear_weight_int4.default:
+        if op not in self._squeezable_ops:
             return super().call_operator(op, args, kwargs, meta)
 
         # pyre-ignore[16]: `None` has no attribute `node`
diff --git a/backends/vulkan/vulkan_preprocess.py b/backends/vulkan/vulkan_preprocess.py
@@ -26,7 +26,7 @@
     insert_prepack_nodes,
     RemoveLocalScalarDenseOpsTransform,
     RemoveRedundantOpsTransform,
-    SqueezeInt4LinearInputs,
+    SqueezeUnsqueezeInputs,
     TagMemoryMetaPass,
 )
 
@@ -153,7 +153,7 @@ def preprocess(  # noqa: C901
                 RemoveRedundantOpsTransform(),
                 AddmmToLinearTransform(),
                 FuseDequantLinearPass(),
-                SqueezeInt4LinearInputs(),
+                SqueezeUnsqueezeInputs(),
                 FuseViewCopyTransform(),
                 ViewCopyToSqueezeUnsqueezePass(),
                 FuseBatchNormWithConvPass(program),

Original file line number	Diff line number	Diff line change
`@@ -31,9 +31,9 @@ runtime.python_library(`
`31`	`31`	`)`
`32`	`32`
`33`	`33`	`runtime.python_library(`
`34`		`- name = "squeeze_int4_linear_inputs",`
	`34`	`+ name = "squeeze_unsqueeze_inputs",`
`35`	`35`	`srcs = [`
`36`		`- "squeeze_int4_linear_inputs.py",`
	`36`	`+ "squeeze_unsqueeze_inputs.py",`
`37`	`37`	`],`
`38`	`38`	`visibility = [`
`39`	`39`	`"//executorch/backends/...",`
`@@ -114,7 +114,7 @@ runtime.python_library(`
`114`	`114`	`":remove_asserts",`
`115`	`115`	`":remove_local_scalar_dense",`
`116`	`116`	`":remove_redundant_ops",`
`117`		`- ":squeeze_int4_linear_inputs",`
	`117`	`+ ":squeeze_unsqueeze_inputs",`
`118`	`118`	`":tag_memory_meta_pass",`
`119`	`119`	`]`
`120`	`120`	`)`
Original file line number	Diff line number	Diff line change
`@@ -20,8 +20,8 @@`
`20`	`20`	`from executorch.backends.vulkan._passes.remove_redundant_ops import (`
`21`	`21`	`RemoveRedundantOpsTransform,`
`22`	`22`	`)`
`23`		`-from executorch.backends.vulkan._passes.squeeze_int4_linear_inputs import (`
`24`		`- SqueezeInt4LinearInputs,`
	`23`	`+from executorch.backends.vulkan._passes.squeeze_unsqueeze_inputs import (`
	`24`	`+ SqueezeUnsqueezeInputs,`
`25`	`25`	`)`
`26`	`26`	`from executorch.backends.vulkan._passes.tag_memory_meta_pass import TagMemoryMetaPass`
`27`	`27`
`@@ -32,6 +32,6 @@`
`32`	`32`	`"RemoveAssertsTransform",`
`33`	`33`	`"RemoveLocalScalarDenseOpsTransform",`
`34`	`34`	`"RemoveRedundantOpsTransform",`
`35`		`- "SqueezeInt4LinearInputs",`
	`35`	`+ "SqueezeUnsqueezeInputs",`
`36`	`36`	`"TagMemoryMetaPass",`
`37`	`37`	`]`