Code-gen addcdiv again (#4447)

alanwaketan · ManfeiBai · commit f8f5dc408757 · 2023-01-19T18:25:44.000Z
Summary: This pull request redoes the addcdiv and addcmul code-gen, and adds a test case to verify that if we reuse the DataCache for scalars. This needs pytorch/pytorch#92066 to function. Test Plan: PJRT_DEVICE=CPU python test/test_operations.py -v -k test_cached_addcdiv Fixes #4213.
diff --git a/test/test_operations.py b/test/test_operations.py
@@ -1893,6 +1893,23 @@ def test_sigmoid_bounds(self):
       assert torch.all(lower_bound >= 0.0)
       assert torch.all(upper_bound <= 1.0)
 
+  def test_cached_addcdiv(self):
+    xla_device = xm.xla_device()
+    met.clear_all()
+
+    t1 = torch.randn(1, 3).to(xla_device)
+    t2 = torch.randn(1, 3).to(xla_device)
+    t3 = torch.randn(1, 3).to(xla_device)
+    t1.addcdiv_(t2, t3, value=0.1)
+    xm.mark_step()
+    self.assertEqual(met.metric_data("TransferToServerTime")[0], 4)
+
+    # The following two scalars shouldn't trigger TransferToServerTime.
+    t1.addcdiv_(t2, t3, value=0.1)
+    t1.addcdiv_(t2, t3, value=0.1)
+    xm.mark_step()
+    self.assertEqual(met.metric_data("TransferToServerTime")[0], 4)
+
 
 class MNISTComparator(nn.Module):
 
diff --git a/torch_xla/csrc/aten_xla_type.cpp b/torch_xla/csrc/aten_xla_type.cpp
@@ -596,37 +596,6 @@ at::Tensor XLANativeFunctions::add(const at::Tensor& self,
                     });
 }
 
-at::Tensor XLANativeFunctions::addcdiv(const at::Tensor& self,
-                                       const at::Tensor& tensor1,
-                                       const at::Tensor& tensor2,
-                                       const at::Scalar& value) {
-  TORCH_LAZY_FN_COUNTER("xla::");
-  return bridge::AtenFromXlaTensor(tensor_methods::addcdiv(
-      bridge::GetXlaTensor(self), value, bridge::GetXlaTensor(tensor1),
-      bridge::GetXlaTensor(tensor2)));
-}
-
-at::Tensor& XLANativeFunctions::addcdiv_(at::Tensor& self,
-                                         const at::Tensor& tensor1,
-                                         const at::Tensor& tensor2,
-                                         const at::Scalar& value) {
-  TORCH_LAZY_FN_COUNTER("xla::");
-  XLATensorPtr self_tensor = bridge::GetXlaTensor(self);
-  tensor_methods::addcdiv_(self_tensor, value, bridge::GetXlaTensor(tensor1),
-                           bridge::GetXlaTensor(tensor2));
-  return self;
-}
-
-at::Tensor XLANativeFunctions::addcmul(const at::Tensor& self,
-                                       const at::Tensor& tensor1,
-                                       const at::Tensor& tensor2,
-                                       const at::Scalar& value) {
-  TORCH_LAZY_FN_COUNTER("xla::");
-  return bridge::AtenFromXlaTensor(tensor_methods::addcmul(
-      bridge::GetXlaTensor(self), value, bridge::GetXlaTensor(tensor1),
-      bridge::GetXlaTensor(tensor2)));
-}
-
 at::Tensor XLANativeFunctions::addmm(const at::Tensor& self,
                                      const at::Tensor& mat1,
                                      const at::Tensor& mat2,
diff --git a/torch_xla/csrc/ops/ops_lower_fn.cpp b/torch_xla/csrc/ops/ops_lower_fn.cpp
@@ -56,6 +56,22 @@ torch_xla::XlaOpVector AdaptiveAvgPool3dBackward::Lower(
   return ReturnOp(xla_output, loctx);
 }
 
+torch_xla::XlaOpVector Addcdiv::Lower(LoweringContext* loctx) const {
+  xla::XlaOp xla_input = loctx->GetOutputOp(operand(0));
+  xla::XlaOp xla_t1 = loctx->GetOutputOp(operand(1));
+  xla::XlaOp xla_t2 = loctx->GetOutputOp(operand(2));
+  xla::XlaOp xla_val = loctx->GetOutputOp(operand(3));
+  return ReturnOp(BuildAddcdiv(xla_input, xla_t1, xla_t2, xla_val), loctx);
+}
+
+torch_xla::XlaOpVector Addcmul::Lower(LoweringContext* loctx) const {
+  xla::XlaOp xla_input = loctx->GetOutputOp(operand(0));
+  xla::XlaOp xla_t1 = loctx->GetOutputOp(operand(1));
+  xla::XlaOp xla_t2 = loctx->GetOutputOp(operand(2));
+  xla::XlaOp xla_val = loctx->GetOutputOp(operand(3));
+  return ReturnOp(BuildAddcmul(xla_input, xla_t1, xla_t2, xla_val), loctx);
+}
+
 torch_xla::XlaOpVector All::Lower(LoweringContext* loctx) const {
   xla::XlaOp input = loctx->GetOutputOp(operand(0));
   std::vector<int64_t> dimensions =
diff --git a/torch_xla/csrc/ops/ops_xla_shape_fn.cpp b/torch_xla/csrc/ops/ops_xla_shape_fn.cpp
@@ -97,6 +97,31 @@ xla::Shape AdaptiveAvgPool3dBackwardOutputShape(
                           lower_for_shape_fn);
 }
 
+xla::Shape AddcdivOutputShape(const torch::lazy::Value& input,
+                              const torch::lazy::Value& t1,
+                              const torch::lazy::Value& t2,
+                              const torch::lazy::Value& value) {
+  auto shape_fn = [](absl::Span<const xla::XlaOp> operands) -> xla::XlaOp {
+    return BuildAddcdiv(operands[0], operands[1], operands[2], operands[3]);
+  };
+  return InferOutputShape({GetXlaShape(input), GetXlaShape(t1), GetXlaShape(t2),
+                           GetXlaShape(value)},
+                          shape_fn);
+}
+
+xla::Shape AddcmulOutputShape(const torch::lazy::Value& input,
+                              const torch::lazy::Value& t1,
+                              const torch::lazy::Value& t2,
+                              const torch::lazy::Value& value) {
+  auto shape_fn = [](absl::Span<const xla::XlaOp> operands) -> xla::XlaOp {
+    return BuildAddcmul(operands[0], operands[1], operands[2], operands[3]);
+  };
+
+  return InferOutputShape({GetXlaShape(input), GetXlaShape(t1), GetXlaShape(t2),
+                           GetXlaShape(value)},
+                          shape_fn);
+}
+
 xla::Shape AllOutputShape(const torch::lazy::Value& input) {
   std::vector<int64_t> dimensions =
       torch::lazy::Iota<int64_t>(GetXlaShape(input).rank());
diff --git a/torch_xla/csrc/ops/ops_xla_shape_fn.h b/torch_xla/csrc/ops/ops_xla_shape_fn.h
@@ -21,6 +21,16 @@ xla::Shape AdaptiveAvgPool3dOutputShape(const torch::lazy::Value& input,
 xla::Shape AdaptiveAvgPool3dBackwardOutputShape(
     const torch::lazy::Value& grad_output, const torch::lazy::Value& input);
 
+xla::Shape AddcdivOutputShape(const torch::lazy::Value& input,
+                              const torch::lazy::Value& t1,
+                              const torch::lazy::Value& t2,
+                              const torch::lazy::Value& value);
+
+xla::Shape AddcmulOutputShape(const torch::lazy::Value& input,
+                              const torch::lazy::Value& t1,
+                              const torch::lazy::Value& t2,
+                              const torch::lazy::Value& value);
+
 xla::Shape AllOutputShape(const torch::lazy::Value& input);
 
 xla::Shape AllDimOutputShape(const torch::lazy::Value& input, const int64_t dim,
diff --git a/torch_xla/csrc/tensor_methods.cpp b/torch_xla/csrc/tensor_methods.cpp
@@ -671,30 +671,6 @@ XLATensorPtr add(const XLATensorPtr& input, const at::Scalar& other,
       logical_element_type);
 }
 
-XLATensorPtr addcdiv(const XLATensorPtr& input, const at::Scalar& value,
-                     const XLATensorPtr& tensor1, const XLATensorPtr& tensor2) {
-  torch::lazy::Value constant = XLAGraphExecutor::Get()->GetIrValueForScalar(
-      value, tensor1->shape().get().element_type(), input->GetDevice());
-  torch::lazy::Value div = tensor1->GetIrValue() / tensor2->GetIrValue();
-  return input->CreateFrom(input->GetIrValue() + div * constant);
-}
-
-void addcdiv_(XLATensorPtr& input, const at::Scalar& value,
-              const XLATensorPtr& tensor1, const XLATensorPtr& tensor2) {
-  torch::lazy::Value constant = XLAGraphExecutor::Get()->GetIrValueForScalar(
-      value, tensor1->shape().get().element_type(), input->GetDevice());
-  torch::lazy::Value div = tensor1->GetIrValue() / tensor2->GetIrValue();
-  input->SetInPlaceIrValue(input->GetIrValue() + div * constant);
-}
-
-XLATensorPtr addcmul(const XLATensorPtr& input, const at::Scalar& value,
-                     const XLATensorPtr& tensor1, const XLATensorPtr& tensor2) {
-  torch::lazy::Value constant = XLAGraphExecutor::Get()->GetIrValueForScalar(
-      value, tensor1->shape().get().element_type(), input->GetDevice());
-  torch::lazy::Value mul = tensor1->GetIrValue() * tensor2->GetIrValue();
-  return input->CreateFrom(input->GetIrValue() + mul * constant);
-}
-
 XLATensorPtr addmm(const XLATensorPtr& input, const XLATensorPtr& weight,
                    const XLATensorPtr& bias) {
   return input->CreateFrom(AddMatMulOp(
diff --git a/xla_native_functions.yaml b/xla_native_functions.yaml
@@ -6,6 +6,8 @@ full_codegen:
   - abs
   - all
   - all.dim
+  - addcdiv
+  - addcmul
   - amax
   - amin
   - any
@@ -123,9 +125,6 @@ supported:
   - adaptive_max_pool2d_backward
   - add.Scalar
   - add.Tensor
-  - addcdiv
-  - addcdiv_
-  - addcmul
   - addmm
   - alias
   - arange.start_out