Add complex dtype support to mul

tarun292 · web-flow · commit 2d8642487fad · 2025-05-01T13:01:07.000-07:00
Differential Revision: D73877440 Pull Request resolved: #10560
diff --git a/kernels/optimized/cpu/op_mul.cpp b/kernels/optimized/cpu/op_mul.cpp
@@ -120,21 +120,47 @@ Tensor& opt_mul_out(
         out,
         "Failed to resize output tensor.");
 
-    ET_SWITCH_REALB_TYPES(out_type, ctx, "mul.out", CTYPE, [&]() {
-      using Vec = executorch::vec::Vectorized<CTYPE>;
-      executorch::vec::map2<CTYPE>(
-          [](Vec x, Vec y) { return x * y; },
-          out.mutable_data_ptr<CTYPE>(),
-          a.const_data_ptr<CTYPE>(),
-          b.const_data_ptr<CTYPE>(),
-          out.numel());
-    });
+    if (executorch::runtime::isComplexType(out_type)) {
+      ET_KERNEL_CHECK(
+          ctx, a_type == b_type && a_type == out_type, InvalidArgument, out);
+
+      ET_SWITCH_COMPLEXH_TYPES(out_type, ctx, "mul.out", CTYPE, [&]() {
+        using Vec = executorch::vec::Vectorized<CTYPE>;
+        executorch::vec::map2<CTYPE>(
+            [](Vec x, Vec y) { return x * y; },
+            out.mutable_data_ptr<CTYPE>(),
+            a.const_data_ptr<CTYPE>(),
+            b.const_data_ptr<CTYPE>(),
+            out.numel());
+      });
+    } else {
+      ET_SWITCH_REALB_TYPES(out_type, ctx, "mul.out", CTYPE, [&]() {
+        using Vec = executorch::vec::Vectorized<CTYPE>;
+        executorch::vec::map2<CTYPE>(
+            [](Vec x, Vec y) { return x * y; },
+            out.mutable_data_ptr<CTYPE>(),
+            a.const_data_ptr<CTYPE>(),
+            b.const_data_ptr<CTYPE>(),
+            out.numel());
+      });
+    }
   } else if (selected_optimized_path != ElementwiseOptimizedPath::kNone) {
-    ET_SWITCH_REALB_TYPES(out_type, ctx, "mul.out", CTYPE, [&]() {
-      auto mul_lambda = [](auto x, auto y) { return x * y; };
-      return torch::executor::handle_broadcast_elementwise<CTYPE>(
-          ctx, mul_lambda, a, b, out, selected_optimized_path);
-    });
+    if (executorch::runtime::isComplexType(out_type)) {
+      ET_KERNEL_CHECK(
+          ctx, a_type == b_type && a_type == out_type, InvalidArgument, out);
+
+      ET_SWITCH_COMPLEXH_TYPES(out_type, ctx, "mul.out", CTYPE, [&]() {
+        auto mul_lambda = [](auto x, auto y) { return x * y; };
+        return torch::executor::handle_broadcast_elementwise<CTYPE>(
+            ctx, mul_lambda, a, b, out, selected_optimized_path);
+      });
+    } else {
+      ET_SWITCH_REALB_TYPES(out_type, ctx, "mul.out", CTYPE, [&]() {
+        auto mul_lambda = [](auto x, auto y) { return x * y; };
+        return torch::executor::handle_broadcast_elementwise<CTYPE>(
+            ctx, mul_lambda, a, b, out, selected_optimized_path);
+      });
+    }
   } else {
     ScalarType common_type =
         promoteTypes(a_type, b_type, /*half_to_float*/ true);
@@ -146,26 +172,42 @@ Tensor& opt_mul_out(
         InvalidArgument,
         out);
 
-    ET_SWITCH_REALHBBF16_TYPES(a_type, ctx, "mul.out", CTYPE_A, [&]() {
-      ET_SWITCH_REALHBBF16_TYPES(b_type, ctx, "mul.out", CTYPE_B, [&]() {
-        using CTYPE_IN = typename torch::executor::
-            promote_types<CTYPE_A, CTYPE_B, /*half_to_float*/ true>::type;
-        ET_DCHECK(CppTypeToScalarType<CTYPE_IN>::value == common_type);
-        ET_SWITCH_REALHBBF16_TYPES(out_type, ctx, "mul.out", CTYPE_OUT, [&]() {
-          apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
-              [](const CTYPE_A val_a, const CTYPE_B val_b) {
-                CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
-                CTYPE_IN b_casted = static_cast<CTYPE_IN>(val_b);
-                CTYPE_IN value = a_casted * b_casted;
-
-                return static_cast<CTYPE_OUT>(value);
-              },
-              a,
-              b,
-              out);
+    if (executorch::runtime::isComplexType(a_type) ||
+        executorch::runtime::isComplexType(b_type) ||
+        executorch::runtime::isComplexType(out_type)) {
+      ET_KERNEL_CHECK(
+          ctx, a_type == b_type && a_type == out_type, InvalidArgument, out);
+
+      ET_SWITCH_COMPLEXH_TYPES(out_type, ctx, "mul.out", CTYPE, [&]() {
+        apply_binary_elementwise_fn<CTYPE, CTYPE, CTYPE>(
+            [](const CTYPE val_a, const CTYPE val_b) { return val_a * val_b; },
+            a,
+            b,
+            out);
+      });
+    } else {
+      ET_SWITCH_REALHBBF16_TYPES(a_type, ctx, "mul.out", CTYPE_A, [&]() {
+        ET_SWITCH_REALHBBF16_TYPES(b_type, ctx, "mul.out", CTYPE_B, [&]() {
+          using CTYPE_IN = typename torch::executor::
+              promote_types<CTYPE_A, CTYPE_B, /*half_to_float*/ true>::type;
+          ET_DCHECK(CppTypeToScalarType<CTYPE_IN>::value == common_type);
+          ET_SWITCH_REALHBBF16_TYPES(
+              out_type, ctx, "mul.out", CTYPE_OUT, [&]() {
+                apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
+                    [](const CTYPE_A val_a, const CTYPE_B val_b) {
+                      CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
+                      CTYPE_IN b_casted = static_cast<CTYPE_IN>(val_b);
+                      CTYPE_IN value = a_casted * b_casted;
+
+                      return static_cast<CTYPE_OUT>(value);
+                    },
+                    a,
+                    b,
+                    out);
+              });
         });
       });
-    });
+    }
   }
 
   return out;
diff --git a/kernels/portable/cpu/op_mul.cpp b/kernels/portable/cpu/op_mul.cpp
@@ -47,25 +47,42 @@ Tensor& mul_out(
   ET_KERNEL_CHECK(
       ctx,
       (executorch::runtime::isRealType(compute_type) ||
+       executorch::runtime::isComplexType(compute_type) ||
        compute_type == ScalarType::Bool),
       InvalidArgument,
       out);
 
-  ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
-    utils::apply_bitensor_elementwise_fn<
-        CTYPE_COMPUTE,
-        op_name,
-        utils::SupportedTensorDtypes::REALHBBF16>(
-        [](const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) {
-          return val_a * val_b;
-        },
+  if (executorch::runtime::isComplexType(compute_type)) {
+    ET_KERNEL_CHECK(
         ctx,
-        a,
-        utils::SupportedTensorDtypes::REALHBBF16,
-        b,
-        utils::SupportedTensorDtypes::REALHBBF16,
+        a.scalar_type() == b.scalar_type() &&
+            a.scalar_type() == out.scalar_type(),
+        InvalidArgument,
         out);
-  });
+    ET_SWITCH_COMPLEXH_TYPES(out.scalar_type(), ctx, "mul.out", CTYPE, [&]() {
+      apply_binary_elementwise_fn<CTYPE, CTYPE, CTYPE>(
+          [](const CTYPE val_a, const CTYPE val_b) { return val_a * val_b; },
+          a,
+          b,
+          out);
+    });
+  } else {
+    ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
+      utils::apply_bitensor_elementwise_fn<
+          CTYPE_COMPUTE,
+          op_name,
+          utils::SupportedTensorDtypes::REALHBBF16>(
+          [](const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) {
+            return val_a * val_b;
+          },
+          ctx,
+          a,
+          utils::SupportedTensorDtypes::REALHBBF16,
+          b,
+          utils::SupportedTensorDtypes::REALHBBF16,
+          out);
+    });
+  }
 
   return out;
 }
diff --git a/kernels/test/op_mul_test.cpp b/kernels/test/op_mul_test.cpp
@@ -322,6 +322,38 @@ class OpMulOutTest : public OperatorTest {
     EXPECT_TENSOR_CLOSE(op_mul_out(a, b, out), expected);
     EXPECT_TENSOR_CLOSE(op_mul_out(b, a, out), expected);
   }
+
+  template <typename CTYPE, ScalarType DTYPE>
+  void test_complex_dtype() {
+    TensorFactory<DTYPE> tf;
+    const std::vector<int32_t> sizes = {2, 2};
+
+    // Create complex tensors with real and imaginary parts
+    Tensor x =
+        tf.make(sizes, {CTYPE(1, 2), CTYPE(3, 4), CTYPE(5, 6), CTYPE(7, 8)});
+
+    Tensor y =
+        tf.make(sizes, {CTYPE(2, 3), CTYPE(4, 5), CTYPE(6, 7), CTYPE(8, 9)});
+
+    // Expected result: (a+bi) * (c+di) = (ac-bd) + (ad+bc)i
+    // (1+2i) * (2+3i) = (1*2-2*3) + (1*3+2*2)i = -4 + 7i
+    // (3+4i) * (4+5i) = (3*4-4*5) + (3*5+4*4)i = -8 + 31i
+    // (5+6i) * (6+7i) = (5*6-6*7) + (5*7+6*6)i = -12 + 71i
+    // (7+8i) * (8+9i) = (7*8-8*9) + (7*9+8*8)i = -16 + 127i
+    Tensor expected = tf.make(
+        sizes, {CTYPE(-4, 7), CTYPE(-8, 31), CTYPE(-12, 71), CTYPE(-16, 127)});
+
+    Tensor out = tf.make(
+        {2, 2},
+        {
+            CTYPE(0, 0),
+            CTYPE(0, 0),
+            CTYPE(0, 0),
+            CTYPE(0, 0),
+        });
+    op_mul_out(x, y, out);
+    EXPECT_TENSOR_CLOSE(out, expected);
+  }
 };
 
 class OpMulScalarOutTest : public OperatorTest {
@@ -472,6 +504,16 @@ TEST_F(OpMulOutTest, BothScalarInputBroadcastTest) {
   test_both_scalar_input_broadcast<ScalarType::BFloat16>();
 }
 
+TEST_F(OpMulOutTest, AllComplexDtypesSupported) {
+#define TEST_ENTRY(ctype, dtype) test_complex_dtype<ctype, ScalarType::dtype>();
+  if (torch::executor::testing::SupportedFeatures::get()->is_aten) {
+    ET_FORALL_COMPLEX_TYPES(TEST_ENTRY);
+  } else {
+    ET_FORALL_COMPLEXH_TYPES(TEST_ENTRY);
+  }
+#undef TEST_ENTRY
+}
+
 TEST_F(OpMulOutTest, MismatchedOutputShapesDies) {
   if (SupportedFeatures::get()->is_aten) {
     GTEST_SKIP() << "ATen currently supports mismatched shapes";