pytorch · zingo · Jan 23, 2025 · Jan 17, 2025 · digantdesai · Jan 22, 2025
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -22,7 +22,10 @@
 
 @register_tosa_support_check
 class ToCopySupported(SupportedTOSAOperatorCheck):
-    targets = [exir_ops.edge.aten._to_copy.default]
+    targets = [
+        exir_ops.edge.aten._to_copy.default,
+        exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
+    ]
 
     tosa_specs = [
         TosaSpecification.create_from_string("TOSA-0.80+BI"),
@@ -110,7 +113,7 @@ def is_node_supported(self, node: fx.Node, tosa_spec: TosaSpecification) -> bool
             )
             return False
 
-        # Check memory format
+        # Check memory format (to_copy)
         if "memory_format" in node.kwargs:
             if node.kwargs["memory_format"] in (torch.preserve_format,):
                 logger.info(
@@ -119,4 +122,14 @@ def is_node_supported(self, node: fx.Node, tosa_spec: TosaSpecification) -> bool
                 )
                 return False
 
+        # Check dim_order (to_dim_order_copy)
+        if "dim_order" in node.kwargs:
+            dim_order = node.kwargs["dim_order"]
+            if dim_order != list(range(len(dim_order))):
+                logger.info(
+                    f"Argument {dim_order=} is not supported for "
+                    f"{node.target.name()} right now."  # pyre-ignore[16]
+                )
+                return False
+
         return True
@@ -36,6 +36,7 @@
     op_table,
     op_tanh,
     op_to_copy,
+    op_to_dim_order_copy,
     op_transpose,
     op_unsqueeze,
     op_upsample_nearest2d,

@@ -0,0 +1,40 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+from typing import List
+
+import serializer.tosa_serializer as ts
+import torch
+import tosa.Op as TosaOp
+
+from executorch.backends.arm.operators.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.arm.tosa_mapping import TosaArg
+
+
+@register_node_visitor
+class ToDimOrderCopyVisitor(NodeVisitor):
+    """
+    Implement the type cast functionality of _to_dim_order_copy.
+
+    Other features like setting of the dim_order or moving a tensor to a
+    different device are not supported.
+
+    Also note that the node should not be quantized.
+    """
+
+    target = "dim_order_ops._to_dim_order_copy.default"
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        tosa_graph: ts.TosaSerializer,
+        inputs: List[TosaArg],
+        output: TosaArg,
+    ) -> None:
+        tosa_graph.addOperator(TosaOp.Op().CAST, [inputs[0].name], [output.name])
@@ -14,7 +14,6 @@
 from executorch.backends.arm.test import common, conftest
 
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
-from executorch.exir import EdgeCompileConfig
 from torchvision import models, transforms
 from torchvision.models.mobilenetv2 import MobileNet_V2_Weights
 
@@ -47,10 +46,6 @@ class TestMobileNetV2(unittest.TestCase):
         "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",
     }
 
-    _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(
-        _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
-    )
-
     def test_mv2_tosa_MI(self):
         (
             ArmTester(
@@ -59,7 +54,7 @@ def test_mv2_tosa_MI(self):
                 compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"),
             )
             .export()
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .to_executorch()
             .run_method_and_compare_outputs(inputs=self.model_inputs)
         )
@@ -73,7 +68,7 @@ def test_mv2_tosa_BI(self):
             )
             .quantize()
             .export()
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .to_executorch()
             # atol=1.0 is a defensive upper limit
             # TODO MLETROCH-72
@@ -92,7 +87,7 @@ def test_mv2_u55_BI(self):
             )
             .quantize()
             .export()
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .to_executorch()
             .serialize()
         )
@@ -112,7 +107,7 @@ def test_mv2_u85_BI(self):
             )
             .quantize()
             .export()
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .to_executorch()
             .serialize()
         )

@@ -13,7 +13,6 @@
 import torch
 from executorch.backends.arm.test import common, conftest
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
-from executorch.exir import EdgeCompileConfig
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
 
@@ -51,10 +50,6 @@ def __init__(self):
         def forward(self, x, y):
             return x + y
 
-    _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(
-        _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
-    )
-
     def _test_add_tosa_MI_pipeline(
         self, module: torch.nn.Module, test_data: Tuple[torch.Tensor]
     ):
@@ -67,7 +62,7 @@ def _test_add_tosa_MI_pipeline(
             .export()
             .check_count({"torch.ops.aten.add.Tensor": 1})
             .check_not(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
@@ -87,7 +82,7 @@ def _test_add_tosa_BI_pipeline(
             .export()
             .check_count({"torch.ops.aten.add.Tensor": 1})
             .check(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()

@@ -16,7 +16,6 @@
 from executorch.backends.arm.test import common, conftest
 
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
-from executorch.exir import EdgeCompileConfig
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
 
@@ -108,10 +107,6 @@
 class TestLinear(unittest.TestCase):
     """tests the linear operation y = Ax + b"""
 
-    _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(
-        _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
-    )
-
     class Linear(torch.nn.Module):
         def __init__(
             self,
@@ -143,7 +138,7 @@ def _test_linear_tosa_MI_pipeline(
             .export()
             .check_count({"torch.ops.aten.linear.default": 1})
             .check_not(["torch.ops.quantized_decomposed"])
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
             .run_method_and_compare_outputs(inputs=test_data)
@@ -164,7 +159,7 @@ def _test_linear_tosa_BI_pipeline(
             .export()
             .check_count({"torch.ops.aten.linear.default": 1})
             .check(["torch.ops.quantized_decomposed"])
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
             .run_method_and_compare_outputs(inputs=test_data, qtol=1)
@@ -186,7 +181,7 @@ def _test_linear_tosa_ethosu_BI_pipeline(
             .export()
             .check_count({"torch.ops.aten.linear.default": 1})
             .check(["torch.ops.quantized_decomposed"])
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
             .serialize()

@@ -12,7 +12,6 @@
 import torch
 from executorch.backends.arm.test import common, conftest
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
-from executorch.exir import EdgeCompileConfig
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
 
@@ -38,10 +37,6 @@ def __init__(self):
         def forward(self, x, y):
             return torch.maximum(x, y)
 
-    _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(
-        _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
-    )
-
     def _test_maximum_tosa_MI_pipeline(
         self, module: torch.nn.Module, test_data: Tuple[torch.Tensor]
     ):
@@ -54,7 +49,7 @@ def _test_maximum_tosa_MI_pipeline(
             .export()
             .check_count({"torch.ops.aten.maximum.default": 1})
             .check_not(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
@@ -74,7 +69,7 @@ def _test_maximum_tosa_BI_pipeline(
             .export()
             .check_count({"torch.ops.aten.maximum.default": 1})
             .check(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()

@@ -12,7 +12,6 @@
 import torch
 from executorch.backends.arm.test import common, conftest
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
-from executorch.exir import EdgeCompileConfig
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
 
@@ -38,10 +37,6 @@ def __init__(self):
         def forward(self, x, y):
             return torch.minimum(x, y)
 
-    _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(
-        _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
-    )
-
     def _test_minimum_tosa_MI_pipeline(
         self, module: torch.nn.Module, test_data: Tuple[torch.Tensor]
     ):
@@ -54,7 +49,7 @@ def _test_minimum_tosa_MI_pipeline(
             .export()
             .check_count({"torch.ops.aten.minimum.default": 1})
             .check_not(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
@@ -74,7 +69,7 @@ def _test_minimum_tosa_BI_pipeline(
             .export()
             .check_count({"torch.ops.aten.minimum.default": 1})
             .check(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()

@@ -11,7 +11,6 @@
 import torch
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
-from executorch.exir import EdgeCompileConfig
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
 
@@ -47,10 +46,6 @@ class Sum(torch.nn.Module):
         def forward(self, x: torch.Tensor, dim: int, keepdim: bool):
             return x.sum(dim=dim, keepdim=keepdim)
 
-    _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(
-        _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
-    )
-
     def _test_sum_tosa_MI_pipeline(
         self, module: torch.nn.Module, test_data: tuple[exampledata_t]
     ):
@@ -63,7 +58,7 @@ def _test_sum_tosa_MI_pipeline(
             .export()
             .check_count({"torch.ops.aten.sum.dim_IntList": 1})
             .check_not(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
@@ -83,7 +78,7 @@ def _test_sum_tosa_BI_pipeline(
             .export()
             .check_count({"torch.ops.aten.sum.dim_IntList": 1})
             .check(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()

@@ -227,8 +227,6 @@ def to_edge(
             if config is not None:
                 to_edge_stage.edge_compile_conf = config
 
-        # TODO(T182928844): Delegate dim order op to backend.
-        to_edge_stage.edge_compile_conf._skip_dim_order = True
         return super().to_edge(to_edge_stage)
 
     def partition(self, partition_stage: Optional[Partition] = None):
@@ -254,7 +252,6 @@ def to_edge_transform_and_lower(
                 to_edge_and_lower_stage.partitioners = partitioners
             if edge_compile_config is not None:
                 to_edge_and_lower_stage.edge_compile_conf = edge_compile_config
-        to_edge_and_lower_stage.edge_compile_conf._skip_dim_order = True
         return super().to_edge_transform_and_lower(to_edge_and_lower_stage)
 
     def to_executorch(self, to_executorch_stage: Optional[ToExecutorch] | None = None):

@@ -527,7 +527,6 @@ def get_args():
             partitioner=[ArmPartitioner(compile_spec)],
             compile_config=EdgeCompileConfig(
                 _check_ir_validity=False,
-                _skip_dim_order=True,
             ),
         )
 
@@ -553,7 +552,6 @@ def get_args():
             exported_program,
             compile_config=EdgeCompileConfig(
                 _check_ir_validity=False,
-                _skip_dim_order=True,
             ),
         )