[DAGCombine][ARM] x ==/!= c -> (x - c) ==/!= 0 iff '-c' can be folded into the x node.

LebedevRI · LebedevRI · commit 4334892e7b07 · 2019-10-22T22:56:35.000+03:00
Summary: This fold, helps recover from the rest of the D62266 ARM regressions. https://rise4fun.com/Alive/TvpC Note that while the fold is quite flexible, i've restricted it to the single interesting pattern at the moment. Reviewers: efriedma, craig.topper, spatel, RKSimon, deadalnix Reviewed By: deadalnix Subscribers: javed.absar, kristof.beyls, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62450
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4268,6 +4268,13 @@ class TargetLowering : public TargetLoweringBase {
   SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode,
                           ISD::CondCode Cond, DAGCombinerInfo &DCI,
                           const SDLoc &DL) const;
+
+  /// x ==/!= c  ->  (x - c) ==/!= 0  iff '-c' can be folded into the x node.
+  SDValue optimizeSetCCToComparisonWithZero(EVT SCCVT, SDValue N0,
+                                            ConstantSDNode *N1C,
+                                            ISD::CondCode Cond,
+                                            DAGCombinerInfo &DCI,
+                                            const SDLoc &DL) const;
 };
 
 /// Given an LLVM IR type and return type attributes, compute the return value
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3045,6 +3045,62 @@ SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
   return DAG.getSetCC(DL, VT, X, YShl1, Cond);
 }
 
+/// x ==/!= c  ->  (x - c) ==/!= 0  iff '-c' can be folded into the x node.
+SDValue TargetLowering::optimizeSetCCToComparisonWithZero(
+    EVT SCCVT, SDValue N0, ConstantSDNode *N1C, ISD::CondCode Cond,
+    DAGCombinerInfo &DCI, const SDLoc &DL) const {
+  assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+         "Only for equality-comparisons.");
+
+  // LHS should not be used elsewhere, to avoid creating an extra node.
+  if (!N0.hasOneUse())
+    return SDValue();
+
+  // Will we able to fold the '-c' into 'x' node?
+  bool IsAdd;
+  switch (N0.getOpcode()) {
+  default:
+    return SDValue(); // Don't know about that node.
+  case ISD::ADD:
+  case ISD::SUB:
+    return SDValue(); // Let's not touch these.
+  case ISD::ADDCARRY:
+    IsAdd = true;
+    break;
+  case ISD::SUBCARRY:
+    IsAdd = false;
+    break;
+  }
+
+  // Second operand must be a constant.
+  ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
+  if (!N01C)
+    return SDValue();
+
+  // And let's be even more specific for now, it must be a zero constant.
+  // It is possible to relax this requirement, but a precise cost-model needed.
+  if (!N01C->isNullValue())
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  EVT OpVT = N0.getValueType();
+
+  // (y + N01C) - N1C = y + (N01C - N1C)
+  // (y - N01C) - N1C = y - (N01C + N1C)
+  SDValue NewC = DAG.FoldConstantArithmetic(IsAdd ? ISD::SUB : ISD::ADD, DL,
+                                            OpVT, N01C, N1C);
+  assert(NewC && "Constant-folding failed!");
+
+  SmallVector<SDValue, 3> N0Ops(N0.getNode()->ops().begin(),
+                                N0.getNode()->ops().end());
+  N0Ops[1] = NewC;
+
+  N0 = DAG.getNode(N0.getOpcode(), DL, N0->getVTList(), N0Ops);
+
+  SDValue Zero = DAG.getConstant(0, DL, OpVT);
+  return DAG.getSetCC(DL, SCCVT, N0, Zero, Cond);
+}
+
 /// Try to simplify a setcc built with the specified operands and cc. If it is
 /// unable to simplify it, return a null SDValue.
 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -3578,6 +3634,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           return CC;
     }
 
+    if (Cond == ISD::SETEQ || Cond == ISD::SETNE)
+      if (SDValue CC =
+              optimizeSetCCToComparisonWithZero(VT, N0, N1C, Cond, DCI, dl))
+        return CC;
+
     // If we have "setcc X, C0", check to see if we can shrink the immediate
     // by changing cc.
     // TODO: Support this for vectors after legalize ops.
diff --git a/llvm/test/CodeGen/ARM/addsubcarry-promotion.ll b/llvm/test/CodeGen/ARM/addsubcarry-promotion.ll
@@ -14,61 +14,35 @@ define void @fn1(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 {
 ; ARM-NEXT:    adds r0, r1, r0
 ; ARM-NEXT:    movw r1, #65535
 ; ARM-NEXT:    sxth r2, r2
-; ARM-NEXT:    adc r0, r2, #0
-; ARM-NEXT:    uxth r0, r0
-; ARM-NEXT:    cmp r0, r1
+; ARM-NEXT:    adc r0, r2, #1
+; ARM-NEXT:    tst r0, r1
 ; ARM-NEXT:    bxeq lr
 ; ARM-NEXT:  .LBB0_1: @ %for.cond
 ; ARM-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; ARM-NEXT:    b .LBB0_1
 ;
-; THUMBV6M-LABEL: fn1:
-; THUMBV6M:       @ %bb.0: @ %entry
-; THUMBV6M-NEXT:    rsbs r2, r2, #0
-; THUMBV6M-NEXT:    sxth r2, r2
-; THUMBV6M-NEXT:    movs r3, #0
-; THUMBV6M-NEXT:    adds r0, r1, r0
-; THUMBV6M-NEXT:    adcs r3, r2
-; THUMBV6M-NEXT:    uxth r0, r3
-; THUMBV6M-NEXT:    ldr r1, .LCPI0_0
-; THUMBV6M-NEXT:    cmp r0, r1
-; THUMBV6M-NEXT:    beq .LBB0_2
-; THUMBV6M-NEXT:  .LBB0_1: @ %for.cond
-; THUMBV6M-NEXT:    @ =>This Inner Loop Header: Depth=1
-; THUMBV6M-NEXT:    b .LBB0_1
-; THUMBV6M-NEXT:  .LBB0_2: @ %if.end
-; THUMBV6M-NEXT:    bx lr
-; THUMBV6M-NEXT:    .p2align 2
-; THUMBV6M-NEXT:  @ %bb.3:
-; THUMBV6M-NEXT:  .LCPI0_0:
-; THUMBV6M-NEXT:    .long 65535 @ 0xffff
-;
-; THUMBV8M-BASE-LABEL: fn1:
-; THUMBV8M-BASE:       @ %bb.0: @ %entry
-; THUMBV8M-BASE-NEXT:    rsbs r2, r2, #0
-; THUMBV8M-BASE-NEXT:    sxth r2, r2
-; THUMBV8M-BASE-NEXT:    movs r3, #0
-; THUMBV8M-BASE-NEXT:    adds r0, r1, r0
-; THUMBV8M-BASE-NEXT:    adcs r3, r2
-; THUMBV8M-BASE-NEXT:    uxth r0, r3
-; THUMBV8M-BASE-NEXT:    movw r1, #65535
-; THUMBV8M-BASE-NEXT:    cmp r0, r1
-; THUMBV8M-BASE-NEXT:    beq .LBB0_2
-; THUMBV8M-BASE-NEXT:  .LBB0_1: @ %for.cond
-; THUMBV8M-BASE-NEXT:    @ =>This Inner Loop Header: Depth=1
-; THUMBV8M-BASE-NEXT:    b .LBB0_1
-; THUMBV8M-BASE-NEXT:  .LBB0_2: @ %if.end
-; THUMBV8M-BASE-NEXT:    bx lr
+; THUMB1-LABEL: fn1:
+; THUMB1:       @ %bb.0: @ %entry
+; THUMB1-NEXT:    rsbs r2, r2, #0
+; THUMB1-NEXT:    sxth r2, r2
+; THUMB1-NEXT:    movs r3, #1
+; THUMB1-NEXT:    adds r0, r1, r0
+; THUMB1-NEXT:    adcs r3, r2
+; THUMB1-NEXT:    lsls r0, r3, #16
+; THUMB1-NEXT:    beq .LBB0_2
+; THUMB1-NEXT:  .LBB0_1: @ %for.cond
+; THUMB1-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMB1-NEXT:    b .LBB0_1
+; THUMB1-NEXT:  .LBB0_2: @ %if.end
+; THUMB1-NEXT:    bx lr
 ;
 ; THUMB-LABEL: fn1:
 ; THUMB:       @ %bb.0: @ %entry
 ; THUMB-NEXT:    rsbs r2, r2, #0
 ; THUMB-NEXT:    adds r0, r0, r1
-; THUMB-NEXT:    movw r1, #65535
 ; THUMB-NEXT:    sxth r2, r2
-; THUMB-NEXT:    adc r0, r2, #0
-; THUMB-NEXT:    uxth r0, r0
-; THUMB-NEXT:    cmp r0, r1
+; THUMB-NEXT:    adc r0, r2, #1
+; THUMB-NEXT:    lsls r0, r0, #16
 ; THUMB-NEXT:    it eq
 ; THUMB-NEXT:    bxeq lr
 ; THUMB-NEXT:  .LBB0_1: @ %for.cond