Skip to content

Commit a756a6b

Browse files
authored
[TargetLowering][RISCV] Introduce shouldFoldSelectWithSingleBitTest and RISC-V implement. (#72978)
DAGCombiner folds (select_cc seteq (and x, y), 0, 0, A) to (and (sra (shl x)) A) where y has a single bit set. Previously, DAGCombiner relies on `shouldAvoidTransformToShift` to decide when to do the combine, but `shouldAvoidTransformToShift` is only about shift cost. This patch introuduces a specific hook to decide when to do the combine and disable the combine when Zicond enabled and AndMask <= 1024.
1 parent 60fa8cf commit a756a6b

File tree

6 files changed

+243
-67
lines changed

6 files changed

+243
-67
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3259,6 +3259,14 @@ class TargetLoweringBase {
32593259
return false;
32603260
}
32613261

3262+
// Should we fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x))
3263+
// A) where y has a single bit set?
3264+
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT,
3265+
const APInt &AndMask) const {
3266+
unsigned ShCt = AndMask.getBitWidth() - 1;
3267+
return !shouldAvoidTransformToShift(VT, ShCt);
3268+
}
3269+
32623270
/// Does this target require the clearing of high-order bits in a register
32633271
/// passed to the fp16 to fp conversion library function.
32643272
virtual bool shouldKeepZExtForFP16Conv() const { return false; }

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27252,8 +27252,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
2725227252
if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
2725327253
// Shift the tested bit over the sign bit.
2725427254
const APInt &AndMask = ConstAndRHS->getAPIntValue();
27255-
unsigned ShCt = AndMask.getBitWidth() - 1;
27256-
if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
27255+
if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
27256+
unsigned ShCt = AndMask.getBitWidth() - 1;
2725727257
SDValue ShlAmt =
2725827258
DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS),
2725927259
getShiftAmountTy(AndLHS.getValueType()));

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19695,6 +19695,13 @@ RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
1969519695
return SDValue();
1969619696
return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
1969719697
}
19698+
19699+
bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
19700+
EVT VT, const APInt &AndMask) const {
19701+
if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
19702+
return AndMask.ugt(1024);
19703+
return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask);
19704+
}
1969819705
namespace llvm::RISCVVIntrinsicsTable {
1969919706

1970019707
#define GET_RISCVVIntrinsicsTable_IMPL

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,9 @@ class RISCVTargetLowering : public TargetLowering {
959959

960960
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
961961
SmallVectorImpl<SDNode *> &Created) const override;
962+
963+
bool shouldFoldSelectWithSingleBitTest(EVT VT,
964+
const APInt &AndMask) const override;
962965
};
963966

964967
namespace RISCV {

llvm/test/CodeGen/RISCV/condops.ll

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3533,3 +3533,103 @@ define signext i16 @numsignbits(i16 signext %0, i16 signext %1, i16 signext %2,
35333533
}
35343534

35353535
declare void @bat(i16 signext)
3536+
3537+
define i64 @single_bit(i64 %x) {
3538+
; RV32I-LABEL: single_bit:
3539+
; RV32I: # %bb.0: # %entry
3540+
; RV32I-NEXT: slli a2, a0, 21
3541+
; RV32I-NEXT: srai a2, a2, 31
3542+
; RV32I-NEXT: and a0, a2, a0
3543+
; RV32I-NEXT: and a1, a2, a1
3544+
; RV32I-NEXT: ret
3545+
;
3546+
; RV64I-LABEL: single_bit:
3547+
; RV64I: # %bb.0: # %entry
3548+
; RV64I-NEXT: slli a1, a0, 53
3549+
; RV64I-NEXT: srai a1, a1, 63
3550+
; RV64I-NEXT: and a0, a1, a0
3551+
; RV64I-NEXT: ret
3552+
;
3553+
; RV64XVENTANACONDOPS-LABEL: single_bit:
3554+
; RV64XVENTANACONDOPS: # %bb.0: # %entry
3555+
; RV64XVENTANACONDOPS-NEXT: andi a1, a0, 1024
3556+
; RV64XVENTANACONDOPS-NEXT: vt.maskc a0, a0, a1
3557+
; RV64XVENTANACONDOPS-NEXT: ret
3558+
;
3559+
; RV64XTHEADCONDMOV-LABEL: single_bit:
3560+
; RV64XTHEADCONDMOV: # %bb.0: # %entry
3561+
; RV64XTHEADCONDMOV-NEXT: slli a1, a0, 53
3562+
; RV64XTHEADCONDMOV-NEXT: srai a1, a1, 63
3563+
; RV64XTHEADCONDMOV-NEXT: and a0, a1, a0
3564+
; RV64XTHEADCONDMOV-NEXT: ret
3565+
;
3566+
; RV32ZICOND-LABEL: single_bit:
3567+
; RV32ZICOND: # %bb.0: # %entry
3568+
; RV32ZICOND-NEXT: andi a2, a0, 1024
3569+
; RV32ZICOND-NEXT: czero.eqz a0, a0, a2
3570+
; RV32ZICOND-NEXT: czero.eqz a1, a1, a2
3571+
; RV32ZICOND-NEXT: ret
3572+
;
3573+
; RV64ZICOND-LABEL: single_bit:
3574+
; RV64ZICOND: # %bb.0: # %entry
3575+
; RV64ZICOND-NEXT: andi a1, a0, 1024
3576+
; RV64ZICOND-NEXT: czero.eqz a0, a0, a1
3577+
; RV64ZICOND-NEXT: ret
3578+
entry:
3579+
%and = and i64 %x, 1024
3580+
%tobool.not = icmp eq i64 %and, 0
3581+
%cond = select i1 %tobool.not, i64 0, i64 %x
3582+
ret i64 %cond
3583+
}
3584+
3585+
; Test to fold select with single bit check to (and (sra (shl x))).
3586+
define i64 @single_bit2(i64 %x) {
3587+
; RV32I-LABEL: single_bit2:
3588+
; RV32I: # %bb.0: # %entry
3589+
; RV32I-NEXT: slli a2, a0, 20
3590+
; RV32I-NEXT: srai a2, a2, 31
3591+
; RV32I-NEXT: and a0, a2, a0
3592+
; RV32I-NEXT: and a1, a2, a1
3593+
; RV32I-NEXT: ret
3594+
;
3595+
; RV64I-LABEL: single_bit2:
3596+
; RV64I: # %bb.0: # %entry
3597+
; RV64I-NEXT: slli a1, a0, 52
3598+
; RV64I-NEXT: srai a1, a1, 63
3599+
; RV64I-NEXT: and a0, a1, a0
3600+
; RV64I-NEXT: ret
3601+
;
3602+
; RV64XVENTANACONDOPS-LABEL: single_bit2:
3603+
; RV64XVENTANACONDOPS: # %bb.0: # %entry
3604+
; RV64XVENTANACONDOPS-NEXT: slli a1, a0, 52
3605+
; RV64XVENTANACONDOPS-NEXT: srai a1, a1, 63
3606+
; RV64XVENTANACONDOPS-NEXT: and a0, a1, a0
3607+
; RV64XVENTANACONDOPS-NEXT: ret
3608+
;
3609+
; RV64XTHEADCONDMOV-LABEL: single_bit2:
3610+
; RV64XTHEADCONDMOV: # %bb.0: # %entry
3611+
; RV64XTHEADCONDMOV-NEXT: slli a1, a0, 52
3612+
; RV64XTHEADCONDMOV-NEXT: srai a1, a1, 63
3613+
; RV64XTHEADCONDMOV-NEXT: and a0, a1, a0
3614+
; RV64XTHEADCONDMOV-NEXT: ret
3615+
;
3616+
; RV32ZICOND-LABEL: single_bit2:
3617+
; RV32ZICOND: # %bb.0: # %entry
3618+
; RV32ZICOND-NEXT: slli a2, a0, 20
3619+
; RV32ZICOND-NEXT: srai a2, a2, 31
3620+
; RV32ZICOND-NEXT: and a0, a2, a0
3621+
; RV32ZICOND-NEXT: and a1, a2, a1
3622+
; RV32ZICOND-NEXT: ret
3623+
;
3624+
; RV64ZICOND-LABEL: single_bit2:
3625+
; RV64ZICOND: # %bb.0: # %entry
3626+
; RV64ZICOND-NEXT: slli a1, a0, 52
3627+
; RV64ZICOND-NEXT: srai a1, a1, 63
3628+
; RV64ZICOND-NEXT: and a0, a1, a0
3629+
; RV64ZICOND-NEXT: ret
3630+
entry:
3631+
%and = and i64 %x, 2048
3632+
%tobool.not = icmp eq i64 %and, 0
3633+
%cond = select i1 %tobool.not, i64 0, i64 %x
3634+
ret i64 %cond
3635+
}

llvm/test/CodeGen/RISCV/select.ll

Lines changed: 123 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,42 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK32,RV32IM %s
3-
; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK64,RV64IM %s
4-
; RUN: llc -mtriple=riscv64 -mattr=+m,+xventanacondops -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK64,RV64IMXVTCONDOPS %s
5-
; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-zicond -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK32,CHECKZICOND,RV32IMZICOND %s
6-
; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-zicond -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK64,CHECKZICOND,RV64IMZICOND %s
2+
; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32IM %s
3+
; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64IM %s
4+
; RUN: llc -mtriple=riscv64 -mattr=+m,+xventanacondops -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64IMXVTCONDOPS %s
5+
; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-zicond -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECKZICOND,RV32IMZICOND %s
6+
; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-zicond -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECKZICOND,RV64IMZICOND %s
77

88
define i16 @select_xor_1(i16 %A, i8 %cond) {
9-
; CHECK32-LABEL: select_xor_1:
10-
; CHECK32: # %bb.0: # %entry
11-
; CHECK32-NEXT: slli a1, a1, 31
12-
; CHECK32-NEXT: srai a1, a1, 31
13-
; CHECK32-NEXT: andi a1, a1, 43
14-
; CHECK32-NEXT: xor a0, a0, a1
15-
; CHECK32-NEXT: ret
16-
;
17-
; CHECK64-LABEL: select_xor_1:
18-
; CHECK64: # %bb.0: # %entry
19-
; CHECK64-NEXT: slli a1, a1, 63
20-
; CHECK64-NEXT: srai a1, a1, 63
21-
; CHECK64-NEXT: andi a1, a1, 43
22-
; CHECK64-NEXT: xor a0, a0, a1
23-
; CHECK64-NEXT: ret
9+
; RV32IM-LABEL: select_xor_1:
10+
; RV32IM: # %bb.0: # %entry
11+
; RV32IM-NEXT: slli a1, a1, 31
12+
; RV32IM-NEXT: srai a1, a1, 31
13+
; RV32IM-NEXT: andi a1, a1, 43
14+
; RV32IM-NEXT: xor a0, a0, a1
15+
; RV32IM-NEXT: ret
16+
;
17+
; RV64IM-LABEL: select_xor_1:
18+
; RV64IM: # %bb.0: # %entry
19+
; RV64IM-NEXT: slli a1, a1, 63
20+
; RV64IM-NEXT: srai a1, a1, 63
21+
; RV64IM-NEXT: andi a1, a1, 43
22+
; RV64IM-NEXT: xor a0, a0, a1
23+
; RV64IM-NEXT: ret
24+
;
25+
; RV64IMXVTCONDOPS-LABEL: select_xor_1:
26+
; RV64IMXVTCONDOPS: # %bb.0: # %entry
27+
; RV64IMXVTCONDOPS-NEXT: andi a1, a1, 1
28+
; RV64IMXVTCONDOPS-NEXT: li a2, 43
29+
; RV64IMXVTCONDOPS-NEXT: vt.maskc a1, a2, a1
30+
; RV64IMXVTCONDOPS-NEXT: xor a0, a0, a1
31+
; RV64IMXVTCONDOPS-NEXT: ret
32+
;
33+
; CHECKZICOND-LABEL: select_xor_1:
34+
; CHECKZICOND: # %bb.0: # %entry
35+
; CHECKZICOND-NEXT: andi a1, a1, 1
36+
; CHECKZICOND-NEXT: li a2, 43
37+
; CHECKZICOND-NEXT: czero.eqz a1, a2, a1
38+
; CHECKZICOND-NEXT: xor a0, a0, a1
39+
; CHECKZICOND-NEXT: ret
2440
entry:
2541
%and = and i8 %cond, 1
2642
%cmp10 = icmp eq i8 %and, 0
@@ -72,21 +88,35 @@ entry:
7288
}
7389

7490
define i32 @select_xor_2(i32 %A, i32 %B, i8 %cond) {
75-
; CHECK32-LABEL: select_xor_2:
76-
; CHECK32: # %bb.0: # %entry
77-
; CHECK32-NEXT: slli a2, a2, 31
78-
; CHECK32-NEXT: srai a2, a2, 31
79-
; CHECK32-NEXT: and a1, a2, a1
80-
; CHECK32-NEXT: xor a0, a0, a1
81-
; CHECK32-NEXT: ret
82-
;
83-
; CHECK64-LABEL: select_xor_2:
84-
; CHECK64: # %bb.0: # %entry
85-
; CHECK64-NEXT: slli a2, a2, 63
86-
; CHECK64-NEXT: srai a2, a2, 63
87-
; CHECK64-NEXT: and a1, a2, a1
88-
; CHECK64-NEXT: xor a0, a0, a1
89-
; CHECK64-NEXT: ret
91+
; RV32IM-LABEL: select_xor_2:
92+
; RV32IM: # %bb.0: # %entry
93+
; RV32IM-NEXT: slli a2, a2, 31
94+
; RV32IM-NEXT: srai a2, a2, 31
95+
; RV32IM-NEXT: and a1, a2, a1
96+
; RV32IM-NEXT: xor a0, a0, a1
97+
; RV32IM-NEXT: ret
98+
;
99+
; RV64IM-LABEL: select_xor_2:
100+
; RV64IM: # %bb.0: # %entry
101+
; RV64IM-NEXT: slli a2, a2, 63
102+
; RV64IM-NEXT: srai a2, a2, 63
103+
; RV64IM-NEXT: and a1, a2, a1
104+
; RV64IM-NEXT: xor a0, a0, a1
105+
; RV64IM-NEXT: ret
106+
;
107+
; RV64IMXVTCONDOPS-LABEL: select_xor_2:
108+
; RV64IMXVTCONDOPS: # %bb.0: # %entry
109+
; RV64IMXVTCONDOPS-NEXT: andi a2, a2, 1
110+
; RV64IMXVTCONDOPS-NEXT: vt.maskc a1, a1, a2
111+
; RV64IMXVTCONDOPS-NEXT: xor a0, a0, a1
112+
; RV64IMXVTCONDOPS-NEXT: ret
113+
;
114+
; CHECKZICOND-LABEL: select_xor_2:
115+
; CHECKZICOND: # %bb.0: # %entry
116+
; CHECKZICOND-NEXT: andi a2, a2, 1
117+
; CHECKZICOND-NEXT: czero.eqz a1, a1, a2
118+
; CHECKZICOND-NEXT: xor a0, a0, a1
119+
; CHECKZICOND-NEXT: ret
90120
entry:
91121
%and = and i8 %cond, 1
92122
%cmp10 = icmp eq i8 %and, 0
@@ -296,21 +326,35 @@ entry:
296326
}
297327

298328
define i32 @select_or(i32 %A, i32 %B, i8 %cond) {
299-
; CHECK32-LABEL: select_or:
300-
; CHECK32: # %bb.0: # %entry
301-
; CHECK32-NEXT: slli a2, a2, 31
302-
; CHECK32-NEXT: srai a2, a2, 31
303-
; CHECK32-NEXT: and a1, a2, a1
304-
; CHECK32-NEXT: or a0, a0, a1
305-
; CHECK32-NEXT: ret
306-
;
307-
; CHECK64-LABEL: select_or:
308-
; CHECK64: # %bb.0: # %entry
309-
; CHECK64-NEXT: slli a2, a2, 63
310-
; CHECK64-NEXT: srai a2, a2, 63
311-
; CHECK64-NEXT: and a1, a2, a1
312-
; CHECK64-NEXT: or a0, a0, a1
313-
; CHECK64-NEXT: ret
329+
; RV32IM-LABEL: select_or:
330+
; RV32IM: # %bb.0: # %entry
331+
; RV32IM-NEXT: slli a2, a2, 31
332+
; RV32IM-NEXT: srai a2, a2, 31
333+
; RV32IM-NEXT: and a1, a2, a1
334+
; RV32IM-NEXT: or a0, a0, a1
335+
; RV32IM-NEXT: ret
336+
;
337+
; RV64IM-LABEL: select_or:
338+
; RV64IM: # %bb.0: # %entry
339+
; RV64IM-NEXT: slli a2, a2, 63
340+
; RV64IM-NEXT: srai a2, a2, 63
341+
; RV64IM-NEXT: and a1, a2, a1
342+
; RV64IM-NEXT: or a0, a0, a1
343+
; RV64IM-NEXT: ret
344+
;
345+
; RV64IMXVTCONDOPS-LABEL: select_or:
346+
; RV64IMXVTCONDOPS: # %bb.0: # %entry
347+
; RV64IMXVTCONDOPS-NEXT: andi a2, a2, 1
348+
; RV64IMXVTCONDOPS-NEXT: vt.maskc a1, a1, a2
349+
; RV64IMXVTCONDOPS-NEXT: or a0, a0, a1
350+
; RV64IMXVTCONDOPS-NEXT: ret
351+
;
352+
; CHECKZICOND-LABEL: select_or:
353+
; CHECKZICOND: # %bb.0: # %entry
354+
; CHECKZICOND-NEXT: andi a2, a2, 1
355+
; CHECKZICOND-NEXT: czero.eqz a1, a1, a2
356+
; CHECKZICOND-NEXT: or a0, a0, a1
357+
; CHECKZICOND-NEXT: ret
314358
entry:
315359
%and = and i8 %cond, 1
316360
%cmp10 = icmp eq i8 %and, 0
@@ -360,21 +404,35 @@ entry:
360404
}
361405

362406
define i32 @select_or_1(i32 %A, i32 %B, i32 %cond) {
363-
; CHECK32-LABEL: select_or_1:
364-
; CHECK32: # %bb.0: # %entry
365-
; CHECK32-NEXT: slli a2, a2, 31
366-
; CHECK32-NEXT: srai a2, a2, 31
367-
; CHECK32-NEXT: and a1, a2, a1
368-
; CHECK32-NEXT: or a0, a0, a1
369-
; CHECK32-NEXT: ret
370-
;
371-
; CHECK64-LABEL: select_or_1:
372-
; CHECK64: # %bb.0: # %entry
373-
; CHECK64-NEXT: slli a2, a2, 63
374-
; CHECK64-NEXT: srai a2, a2, 63
375-
; CHECK64-NEXT: and a1, a2, a1
376-
; CHECK64-NEXT: or a0, a0, a1
377-
; CHECK64-NEXT: ret
407+
; RV32IM-LABEL: select_or_1:
408+
; RV32IM: # %bb.0: # %entry
409+
; RV32IM-NEXT: slli a2, a2, 31
410+
; RV32IM-NEXT: srai a2, a2, 31
411+
; RV32IM-NEXT: and a1, a2, a1
412+
; RV32IM-NEXT: or a0, a0, a1
413+
; RV32IM-NEXT: ret
414+
;
415+
; RV64IM-LABEL: select_or_1:
416+
; RV64IM: # %bb.0: # %entry
417+
; RV64IM-NEXT: slli a2, a2, 63
418+
; RV64IM-NEXT: srai a2, a2, 63
419+
; RV64IM-NEXT: and a1, a2, a1
420+
; RV64IM-NEXT: or a0, a0, a1
421+
; RV64IM-NEXT: ret
422+
;
423+
; RV64IMXVTCONDOPS-LABEL: select_or_1:
424+
; RV64IMXVTCONDOPS: # %bb.0: # %entry
425+
; RV64IMXVTCONDOPS-NEXT: andi a2, a2, 1
426+
; RV64IMXVTCONDOPS-NEXT: vt.maskc a1, a1, a2
427+
; RV64IMXVTCONDOPS-NEXT: or a0, a0, a1
428+
; RV64IMXVTCONDOPS-NEXT: ret
429+
;
430+
; CHECKZICOND-LABEL: select_or_1:
431+
; CHECKZICOND: # %bb.0: # %entry
432+
; CHECKZICOND-NEXT: andi a2, a2, 1
433+
; CHECKZICOND-NEXT: czero.eqz a1, a1, a2
434+
; CHECKZICOND-NEXT: or a0, a0, a1
435+
; CHECKZICOND-NEXT: ret
378436
entry:
379437
%and = and i32 %cond, 1
380438
%cmp10 = icmp eq i32 %and, 0

0 commit comments

Comments
 (0)