Arm backend: enable dim_order (#7831)

oscarandersson8218 · web-flow · commit 135e875f2f0c · 2025-01-23T14:49:44.000+01:00
Add support for to_dim_order_copy

With edge_compile_config.skip_dim_order = True removed, to_copy will
be converted into to_dim_order_copy nodes. This commit moves our logic
from to_copy into to_dim_order_copy.

Signed-off-by: Oscar Andersson &lt;oscar.andersson@arm.com&gt;
diff --git a/backends/arm/operator_support/to_copy_support.py b/backends/arm/operator_support/to_copy_support.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -22,7 +22,10 @@
 
 @register_tosa_support_check
 class ToCopySupported(SupportedTOSAOperatorCheck):
-    targets = [exir_ops.edge.aten._to_copy.default]
+    targets = [
+        exir_ops.edge.aten._to_copy.default,
+        exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
+    ]
 
     tosa_specs = [
         TosaSpecification.create_from_string("TOSA-0.80+BI"),
@@ -110,7 +113,7 @@ def is_node_supported(self, node: fx.Node, tosa_spec: TosaSpecification) -> bool
             )
             return False
 
-        # Check memory format
+        # Check memory format (to_copy)
         if "memory_format" in node.kwargs:
             if node.kwargs["memory_format"] in (torch.preserve_format,):
                 logger.info(
@@ -119,4 +122,14 @@ def is_node_supported(self, node: fx.Node, tosa_spec: TosaSpecification) -> bool
                 )
                 return False
 
+        # Check dim_order (to_dim_order_copy)
+        if "dim_order" in node.kwargs:
+            dim_order = node.kwargs["dim_order"]
+            if dim_order != list(range(len(dim_order))):
+                logger.info(
+                    f"Argument {dim_order=} is not supported for "
+                    f"{node.target.name()} right now."  # pyre-ignore[16]
+                )
+                return False
+
         return True
diff --git a/backends/arm/operators/__init__.py b/backends/arm/operators/__init__.py
@@ -35,6 +35,7 @@
     op_table,
     op_tanh,
     op_to_copy,
+    op_to_dim_order_copy,
     op_transpose,
     op_upsample_nearest2d,
     op_view,
diff --git a/backends/arm/operators/op_to_dim_order_copy.py b/backends/arm/operators/op_to_dim_order_copy.py
@@ -0,0 +1,40 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+from typing import List
+
+import serializer.tosa_serializer as ts
+import torch
+import tosa.Op as TosaOp
+
+from executorch.backends.arm.operators.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.arm.tosa_mapping import TosaArg
+
+
+@register_node_visitor
+class ToDimOrderCopyVisitor(NodeVisitor):
+    """
+    Implement the type cast functionality of _to_dim_order_copy.
+
+    Other features like setting of the dim_order or moving a tensor to a
+    different device are not supported.
+
+    Also note that the node should not be quantized.
+    """
+
+    target = "dim_order_ops._to_dim_order_copy.default"
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        tosa_graph: ts.TosaSerializer,
+        inputs: List[TosaArg],
+        output: TosaArg,
+    ) -> None:
+        tosa_graph.addOperator(TosaOp.Op().CAST, [inputs[0].name], [output.name])
diff --git a/backends/arm/test/models/test_mobilenet_v2_arm.py b/backends/arm/test/models/test_mobilenet_v2_arm.py
@@ -14,7 +14,6 @@
 from executorch.backends.arm.test import common, conftest
 
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
-from executorch.exir import EdgeCompileConfig
 from torchvision import models, transforms
 from torchvision.models.mobilenetv2 import MobileNet_V2_Weights
 
@@ -47,10 +46,6 @@ class TestMobileNetV2(unittest.TestCase):
         "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",
     }
 
-    _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(
-        _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
-    )
-
     def test_mv2_tosa_MI(self):
         (
             ArmTester(
@@ -59,7 +54,7 @@ def test_mv2_tosa_MI(self):
                 compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"),
             )
             .export()
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .to_executorch()
             .run_method_and_compare_outputs(inputs=self.model_inputs)
         )
@@ -73,7 +68,7 @@ def test_mv2_tosa_BI(self):
             )
             .quantize()
             .export()
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .to_executorch()
             # atol=1.0 is a defensive upper limit
             # TODO MLETROCH-72
@@ -92,7 +87,7 @@ def test_mv2_u55_BI(self):
             )
             .quantize()
             .export()
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .to_executorch()
             .serialize()
         )
@@ -112,7 +107,7 @@ def test_mv2_u85_BI(self):
             )
             .quantize()
             .export()
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .to_executorch()
             .serialize()
         )
diff --git a/backends/arm/test/ops/test_add.py b/backends/arm/test/ops/test_add.py
@@ -13,7 +13,6 @@
 import torch
 from executorch.backends.arm.test import common, conftest
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
-from executorch.exir import EdgeCompileConfig
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
 
@@ -51,10 +50,6 @@ def __init__(self):
         def forward(self, x, y):
             return x + y
 
-    _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(
-        _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
-    )
-
     def _test_add_tosa_MI_pipeline(
         self, module: torch.nn.Module, test_data: Tuple[torch.Tensor]
     ):
@@ -67,7 +62,7 @@ def _test_add_tosa_MI_pipeline(
             .export()
             .check_count({"torch.ops.aten.add.Tensor": 1})
             .check_not(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
@@ -87,7 +82,7 @@ def _test_add_tosa_BI_pipeline(
             .export()
             .check_count({"torch.ops.aten.add.Tensor": 1})
             .check(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
diff --git a/backends/arm/test/ops/test_linear.py b/backends/arm/test/ops/test_linear.py
@@ -16,7 +16,6 @@
 from executorch.backends.arm.test import common, conftest
 
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
-from executorch.exir import EdgeCompileConfig
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
 
@@ -108,10 +107,6 @@
 class TestLinear(unittest.TestCase):
     """tests the linear operation y = Ax + b"""
 
-    _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(
-        _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
-    )
-
     class Linear(torch.nn.Module):
         def __init__(
             self,
@@ -143,7 +138,7 @@ def _test_linear_tosa_MI_pipeline(
             .export()
             .check_count({"torch.ops.aten.linear.default": 1})
             .check_not(["torch.ops.quantized_decomposed"])
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
             .run_method_and_compare_outputs(inputs=test_data)
@@ -164,7 +159,7 @@ def _test_linear_tosa_BI_pipeline(
             .export()
             .check_count({"torch.ops.aten.linear.default": 1})
             .check(["torch.ops.quantized_decomposed"])
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
             .run_method_and_compare_outputs(inputs=test_data, qtol=1)
@@ -186,7 +181,7 @@ def _test_linear_tosa_ethosu_BI_pipeline(
             .export()
             .check_count({"torch.ops.aten.linear.default": 1})
             .check(["torch.ops.quantized_decomposed"])
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
             .serialize()
diff --git a/backends/arm/test/ops/test_maximum.py b/backends/arm/test/ops/test_maximum.py
@@ -12,7 +12,6 @@
 import torch
 from executorch.backends.arm.test import common, conftest
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
-from executorch.exir import EdgeCompileConfig
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
 
@@ -38,10 +37,6 @@ def __init__(self):
         def forward(self, x, y):
             return torch.maximum(x, y)
 
-    _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(
-        _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
-    )
-
     def _test_maximum_tosa_MI_pipeline(
         self, module: torch.nn.Module, test_data: Tuple[torch.Tensor]
     ):
@@ -54,7 +49,7 @@ def _test_maximum_tosa_MI_pipeline(
             .export()
             .check_count({"torch.ops.aten.maximum.default": 1})
             .check_not(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
@@ -74,7 +69,7 @@ def _test_maximum_tosa_BI_pipeline(
             .export()
             .check_count({"torch.ops.aten.maximum.default": 1})
             .check(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
diff --git a/backends/arm/test/ops/test_minimum.py b/backends/arm/test/ops/test_minimum.py
@@ -12,7 +12,6 @@
 import torch
 from executorch.backends.arm.test import common, conftest
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
-from executorch.exir import EdgeCompileConfig
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
 
@@ -38,10 +37,6 @@ def __init__(self):
         def forward(self, x, y):
             return torch.minimum(x, y)
 
-    _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(
-        _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
-    )
-
     def _test_minimum_tosa_MI_pipeline(
         self, module: torch.nn.Module, test_data: Tuple[torch.Tensor]
     ):
@@ -54,7 +49,7 @@ def _test_minimum_tosa_MI_pipeline(
             .export()
             .check_count({"torch.ops.aten.minimum.default": 1})
             .check_not(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
@@ -74,7 +69,7 @@ def _test_minimum_tosa_BI_pipeline(
             .export()
             .check_count({"torch.ops.aten.minimum.default": 1})
             .check(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
diff --git a/backends/arm/test/ops/test_sum.py b/backends/arm/test/ops/test_sum.py
@@ -11,7 +11,6 @@
 import torch
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
-from executorch.exir import EdgeCompileConfig
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
 
@@ -47,10 +46,6 @@ class Sum(torch.nn.Module):
         def forward(self, x: torch.Tensor, dim: int, keepdim: bool):
             return x.sum(dim=dim, keepdim=keepdim)
 
-    _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(
-        _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
-    )
-
     def _test_sum_tosa_MI_pipeline(
         self, module: torch.nn.Module, test_data: tuple[exampledata_t]
     ):
@@ -63,7 +58,7 @@ def _test_sum_tosa_MI_pipeline(
             .export()
             .check_count({"torch.ops.aten.sum.dim_IntList": 1})
             .check_not(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
@@ -83,7 +78,7 @@ def _test_sum_tosa_BI_pipeline(
             .export()
             .check_count({"torch.ops.aten.sum.dim_IntList": 1})
             .check(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
diff --git a/backends/arm/test/tester/arm_tester.py b/backends/arm/test/tester/arm_tester.py
@@ -227,8 +227,6 @@ def to_edge(
             if config is not None:
                 to_edge_stage.edge_compile_conf = config
 
-        # TODO(T182928844): Delegate dim order op to backend.
-        to_edge_stage.edge_compile_conf._skip_dim_order = True
         return super().to_edge(to_edge_stage)
 
     def partition(self, partition_stage: Optional[Partition] = None):
@@ -254,7 +252,6 @@ def to_edge_transform_and_lower(
                 to_edge_and_lower_stage.partitioners = partitioners
             if edge_compile_config is not None:
                 to_edge_and_lower_stage.edge_compile_conf = edge_compile_config
-        to_edge_and_lower_stage.edge_compile_conf._skip_dim_order = True
         return super().to_edge_transform_and_lower(to_edge_and_lower_stage)
 
     def to_executorch(self, to_executorch_stage: Optional[ToExecutorch] | None = None):
diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py
@@ -527,7 +527,6 @@ def get_args():
             partitioner=[ArmPartitioner(compile_spec)],
             compile_config=EdgeCompileConfig(
                 _check_ir_validity=False,
-                _skip_dim_order=True,
             ),
         )
 
@@ -553,7 +552,6 @@ def get_args():
             exported_program,
             compile_config=EdgeCompileConfig(
                 _check_ir_validity=False,
-                _skip_dim_order=True,
             ),
         )
 

Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,6 @@`
`14`	`14`	`from executorch.backends.arm.test import common, conftest`
`15`	`15`
`16`	`16`	`from executorch.backends.arm.test.tester.arm_tester import ArmTester`
`17`		`-from executorch.exir import EdgeCompileConfig`
`18`	`17`	`from torchvision import models, transforms`
`19`	`18`	`from torchvision.models.mobilenetv2 import MobileNet_V2_Weights`
`20`	`19`
`@@ -47,10 +46,6 @@ class TestMobileNetV2(unittest.TestCase):`
`47`	`46`	`"executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",`
`48`	`47`	`}`
`49`	`48`
`50`		`- _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(`
`51`		`- _skip_dim_order=True, # TODO(T182928844): Delegate dim order op to backend.`
`52`		`- )`
`53`		`-`
`54`	`49`	`def test_mv2_tosa_MI(self):`
`55`	`50`	`(`
`56`	`51`	`ArmTester(`
`@@ -59,7 +54,7 @@ def test_mv2_tosa_MI(self):`
`59`	`54`	`compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"),`
`60`	`55`	`)`
`61`	`56`	`.export()`
`62`		`- .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)`
	`57`	`+ .to_edge_transform_and_lower()`
`63`	`58`	`.to_executorch()`
`64`	`59`	`.run_method_and_compare_outputs(inputs=self.model_inputs)`
`65`	`60`	`)`
`@@ -73,7 +68,7 @@ def test_mv2_tosa_BI(self):`
`73`	`68`	`)`
`74`	`69`	`.quantize()`
`75`	`70`	`.export()`
`76`		`- .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)`
	`71`	`+ .to_edge_transform_and_lower()`
`77`	`72`	`.to_executorch()`
`78`	`73`	`# atol=1.0 is a defensive upper limit`
`79`	`74`	`# TODO MLETROCH-72`
`@@ -92,7 +87,7 @@ def test_mv2_u55_BI(self):`
`92`	`87`	`)`
`93`	`88`	`.quantize()`
`94`	`89`	`.export()`
`95`		`- .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)`
	`90`	`+ .to_edge_transform_and_lower()`
`96`	`91`	`.to_executorch()`
`97`	`92`	`.serialize()`
`98`	`93`	`)`
`@@ -112,7 +107,7 @@ def test_mv2_u85_BI(self):`
`112`	`107`	`)`
`113`	`108`	`.quantize()`
`114`	`109`	`.export()`
`115`		`- .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)`
	`110`	`+ .to_edge_transform_and_lower()`
`116`	`111`	`.to_executorch()`
`117`	`112`	`.serialize()`
`118`	`113`	`)`
Original file line number	Diff line number	Diff line change
`@@ -527,7 +527,6 @@ def get_args():`
`527`	`527`	`partitioner=[ArmPartitioner(compile_spec)],`
`528`	`528`	`compile_config=EdgeCompileConfig(`
`529`	`529`	`_check_ir_validity=False,`
`530`		`- _skip_dim_order=True,`
`531`	`530`	`),`
`532`	`531`	`)`
`533`	`532`
`@@ -553,7 +552,6 @@ def get_args():`
`553`	`552`	`exported_program,`
`554`	`553`	`compile_config=EdgeCompileConfig(`
`555`	`554`	`_check_ir_validity=False,`
`556`		`- _skip_dim_order=True,`
`557`	`555`	`),`
`558`	`556`	`)`
`559`	`557`