Skip to content

Commit 3c0990c

Browse files
authored
[RISCV] Generalize the (ADD (SLLI X, 32), X) special case in constant materialization. (#66931)
We don't have to limit ourselves to a shift amount of 32. We can support other shift amounts that make the upper 32 bits line up.
1 parent 451255b commit 3c0990c

File tree

6 files changed

+126
-128
lines changed

6 files changed

+126
-128
lines changed

llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,39 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
438438
return Res;
439439
}
440440

441+
InstSeq generateTwoRegInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures,
442+
unsigned &ShiftAmt, unsigned &AddOpc) {
443+
int64_t LoVal = SignExtend64<32>(Val);
444+
if (LoVal == 0)
445+
return RISCVMatInt::InstSeq();
446+
447+
// Subtract the LoVal to emulate the effect of the final ADD.
448+
uint64_t Tmp = (uint64_t)Val - (uint64_t)LoVal;
449+
assert(Tmp != 0);
450+
451+
// Use trailing zero counts to figure how far we need to shift LoVal to line
452+
// up with the remaining constant.
453+
// TODO: This algorithm assumes all non-zero bits in the low 32 bits of the
454+
// final constant come from LoVal.
455+
unsigned TzLo = llvm::countr_zero((uint64_t)LoVal);
456+
unsigned TzHi = llvm::countr_zero(Tmp);
457+
assert(TzLo < 32 && TzHi >= 32);
458+
ShiftAmt = TzHi - TzLo;
459+
AddOpc = RISCV::ADD;
460+
461+
if (Tmp == ((uint64_t)LoVal << ShiftAmt))
462+
return RISCVMatInt::generateInstSeq(LoVal, ActiveFeatures);
463+
464+
// If we have Zba, we can use (ADD_UW X, (SLLI X, 32)).
465+
if (ActiveFeatures[RISCV::FeatureStdExtZba] && Lo_32(Val) == Hi_32(Val)) {
466+
ShiftAmt = 32;
467+
AddOpc = RISCV::ADD_UW;
468+
return RISCVMatInt::generateInstSeq(LoVal, ActiveFeatures);
469+
}
470+
471+
return RISCVMatInt::InstSeq();
472+
}
473+
441474
int getIntMatCost(const APInt &Val, unsigned Size,
442475
const FeatureBitset &ActiveFeatures, bool CompressionCost) {
443476
bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];

llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,14 @@ using InstSeq = SmallVector<Inst, 8>;
4848
// instruction selection.
4949
InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures);
5050

51+
// Helper to generate an instruction sequence that can materialize the given
52+
// immediate value into a register using an additional temporary register. This
53+
// handles cases where the constant can be generated by (ADD (SLLI X, C), X) or
54+
// (ADD_UW (SLLI X, C) X). The sequence to generate X is returned. ShiftAmt is
55+
// provides the SLLI and AddOpc indicates ADD or ADD_UW.
56+
InstSeq generateTwoRegInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures,
57+
unsigned &ShiftAmt, unsigned &AddOpc);
58+
5159
// Helper to estimate the number of instructions required to materialise the
5260
// given immediate value into a register. This estimate does not account for
5361
// `Val` possibly fitting into an immediate, and so may over-estimate.

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -195,29 +195,23 @@ static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
195195
RISCVMatInt::InstSeq Seq =
196196
RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
197197

198-
// See if we can create this constant as (ADD (SLLI X, 32), X) where X is at
198+
// See if we can create this constant as (ADD (SLLI X, C), X) where X is at
199199
// worst an LUI+ADDIW. This will require an extra register, but avoids a
200200
// constant pool.
201201
// If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
202202
// low and high 32 bits are the same and bit 31 and 63 are set.
203203
if (Seq.size() > 3) {
204-
int64_t LoVal = SignExtend64<32>(Imm);
205-
int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32);
206-
if (LoVal == HiVal ||
207-
(Subtarget.hasStdExtZba() && Lo_32(Imm) == Hi_32(Imm))) {
208-
RISCVMatInt::InstSeq SeqLo =
209-
RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits());
210-
if ((SeqLo.size() + 2) < Seq.size()) {
211-
SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
212-
213-
SDValue SLLI = SDValue(
214-
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
215-
CurDAG->getTargetConstant(32, DL, VT)),
216-
0);
217-
// Prefer ADD when possible.
218-
unsigned AddOpc = (LoVal == HiVal) ? RISCV::ADD : RISCV::ADD_UW;
219-
return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
220-
}
204+
unsigned ShiftAmt, AddOpc;
205+
RISCVMatInt::InstSeq SeqLo = RISCVMatInt::generateTwoRegInstSeq(
206+
Imm, Subtarget.getFeatureBits(), ShiftAmt, AddOpc);
207+
if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
208+
SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
209+
210+
SDValue SLLI = SDValue(
211+
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
212+
CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
213+
0);
214+
return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
221215
}
222216
}
223217

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4972,24 +4972,22 @@ static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
49724972
if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
49734973
return Op;
49744974

4975-
// Special case. See if we can build the constant as (ADD (SLLI X, 32), X) do
4975+
// Optimizations below are disabled for opt size. If we're optimizing for
4976+
// size, use a constant pool.
4977+
if (DAG.shouldOptForSize())
4978+
return SDValue();
4979+
4980+
// Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
49764981
// that if it will avoid a constant pool.
49774982
// It will require an extra temporary register though.
49784983
// If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
49794984
// low and high 32 bits are the same and bit 31 and 63 are set.
4980-
if (!DAG.shouldOptForSize()) {
4981-
int64_t LoVal = SignExtend64<32>(Imm);
4982-
int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32);
4983-
if (LoVal == HiVal ||
4984-
(Subtarget.hasStdExtZba() && Lo_32(Imm) == Hi_32(Imm))) {
4985-
RISCVMatInt::InstSeq SeqLo =
4986-
RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits());
4987-
if ((SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
4988-
return Op;
4989-
}
4990-
}
4985+
unsigned ShiftAmt, AddOpc;
4986+
RISCVMatInt::InstSeq SeqLo = RISCVMatInt::generateTwoRegInstSeq(
4987+
Imm, Subtarget.getFeatureBits(), ShiftAmt, AddOpc);
4988+
if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
4989+
return Op;
49914990

4992-
// Expand to a constant pool using the default expansion code.
49934991
return SDValue();
49944992
}
49954993

llvm/test/CodeGen/RISCV/imm.ll

Lines changed: 58 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,60 +1157,44 @@ define i64 @imm_2reg_1() nounwind {
11571157
; RV32I-NEXT: lui a1, 983040
11581158
; RV32I-NEXT: ret
11591159
;
1160-
; RV64-NOPOOL-LABEL: imm_2reg_1:
1161-
; RV64-NOPOOL: # %bb.0:
1162-
; RV64-NOPOOL-NEXT: lui a0, 1048430
1163-
; RV64-NOPOOL-NEXT: addiw a0, a0, 1493
1164-
; RV64-NOPOOL-NEXT: slli a0, a0, 13
1165-
; RV64-NOPOOL-NEXT: addi a0, a0, -1921
1166-
; RV64-NOPOOL-NEXT: srli a0, a0, 4
1167-
; RV64-NOPOOL-NEXT: not a0, a0
1168-
; RV64-NOPOOL-NEXT: ret
1169-
;
1170-
; RV64I-POOL-LABEL: imm_2reg_1:
1171-
; RV64I-POOL: # %bb.0:
1172-
; RV64I-POOL-NEXT: lui a0, %hi(.LCPI27_0)
1173-
; RV64I-POOL-NEXT: ld a0, %lo(.LCPI27_0)(a0)
1174-
; RV64I-POOL-NEXT: ret
1160+
; RV64I-LABEL: imm_2reg_1:
1161+
; RV64I: # %bb.0:
1162+
; RV64I-NEXT: lui a0, 74565
1163+
; RV64I-NEXT: addiw a0, a0, 1656
1164+
; RV64I-NEXT: slli a1, a0, 57
1165+
; RV64I-NEXT: add a0, a0, a1
1166+
; RV64I-NEXT: ret
11751167
;
11761168
; RV64IZBA-LABEL: imm_2reg_1:
11771169
; RV64IZBA: # %bb.0:
1178-
; RV64IZBA-NEXT: lui a0, 1048430
1179-
; RV64IZBA-NEXT: addiw a0, a0, 1493
1180-
; RV64IZBA-NEXT: slli a0, a0, 13
1181-
; RV64IZBA-NEXT: addi a0, a0, -1921
1182-
; RV64IZBA-NEXT: srli a0, a0, 4
1183-
; RV64IZBA-NEXT: not a0, a0
1170+
; RV64IZBA-NEXT: lui a0, 74565
1171+
; RV64IZBA-NEXT: addiw a0, a0, 1656
1172+
; RV64IZBA-NEXT: slli a1, a0, 57
1173+
; RV64IZBA-NEXT: add a0, a0, a1
11841174
; RV64IZBA-NEXT: ret
11851175
;
11861176
; RV64IZBB-LABEL: imm_2reg_1:
11871177
; RV64IZBB: # %bb.0:
1188-
; RV64IZBB-NEXT: lui a0, 1048430
1189-
; RV64IZBB-NEXT: addiw a0, a0, 1493
1190-
; RV64IZBB-NEXT: slli a0, a0, 13
1191-
; RV64IZBB-NEXT: addi a0, a0, -1921
1192-
; RV64IZBB-NEXT: srli a0, a0, 4
1193-
; RV64IZBB-NEXT: not a0, a0
1178+
; RV64IZBB-NEXT: lui a0, 74565
1179+
; RV64IZBB-NEXT: addiw a0, a0, 1656
1180+
; RV64IZBB-NEXT: slli a1, a0, 57
1181+
; RV64IZBB-NEXT: add a0, a0, a1
11941182
; RV64IZBB-NEXT: ret
11951183
;
11961184
; RV64IZBS-LABEL: imm_2reg_1:
11971185
; RV64IZBS: # %bb.0:
1198-
; RV64IZBS-NEXT: lui a0, 1048430
1199-
; RV64IZBS-NEXT: addiw a0, a0, 1493
1200-
; RV64IZBS-NEXT: slli a0, a0, 13
1201-
; RV64IZBS-NEXT: addi a0, a0, -1921
1202-
; RV64IZBS-NEXT: srli a0, a0, 4
1203-
; RV64IZBS-NEXT: not a0, a0
1186+
; RV64IZBS-NEXT: lui a0, 74565
1187+
; RV64IZBS-NEXT: addiw a0, a0, 1656
1188+
; RV64IZBS-NEXT: slli a1, a0, 57
1189+
; RV64IZBS-NEXT: add a0, a0, a1
12041190
; RV64IZBS-NEXT: ret
12051191
;
12061192
; RV64IXTHEADBB-LABEL: imm_2reg_1:
12071193
; RV64IXTHEADBB: # %bb.0:
1208-
; RV64IXTHEADBB-NEXT: lui a0, 1048430
1209-
; RV64IXTHEADBB-NEXT: addiw a0, a0, 1493
1210-
; RV64IXTHEADBB-NEXT: slli a0, a0, 13
1211-
; RV64IXTHEADBB-NEXT: addi a0, a0, -1921
1212-
; RV64IXTHEADBB-NEXT: srli a0, a0, 4
1213-
; RV64IXTHEADBB-NEXT: not a0, a0
1194+
; RV64IXTHEADBB-NEXT: lui a0, 74565
1195+
; RV64IXTHEADBB-NEXT: addiw a0, a0, 1656
1196+
; RV64IXTHEADBB-NEXT: slli a1, a0, 57
1197+
; RV64IXTHEADBB-NEXT: add a0, a0, a1
12141198
; RV64IXTHEADBB-NEXT: ret
12151199
ret i64 -1152921504301427080 ; 0xF000_0000_1234_5678
12161200
}
@@ -1703,40 +1687,28 @@ define i64 @imm_neg_9223372034778874949() {
17031687
; RV32I-NEXT: lui a1, 524288
17041688
; RV32I-NEXT: ret
17051689
;
1706-
; RV64-NOPOOL-LABEL: imm_neg_9223372034778874949:
1707-
; RV64-NOPOOL: # %bb.0:
1708-
; RV64-NOPOOL-NEXT: lui a0, 1048329
1709-
; RV64-NOPOOL-NEXT: addiw a0, a0, -1911
1710-
; RV64-NOPOOL-NEXT: slli a0, a0, 12
1711-
; RV64-NOPOOL-NEXT: addi a0, a0, -1911
1712-
; RV64-NOPOOL-NEXT: srli a0, a0, 1
1713-
; RV64-NOPOOL-NEXT: not a0, a0
1714-
; RV64-NOPOOL-NEXT: ret
1715-
;
1716-
; RV64I-POOL-LABEL: imm_neg_9223372034778874949:
1717-
; RV64I-POOL: # %bb.0:
1718-
; RV64I-POOL-NEXT: lui a0, %hi(.LCPI38_0)
1719-
; RV64I-POOL-NEXT: ld a0, %lo(.LCPI38_0)(a0)
1720-
; RV64I-POOL-NEXT: ret
1690+
; RV64I-LABEL: imm_neg_9223372034778874949:
1691+
; RV64I: # %bb.0:
1692+
; RV64I-NEXT: lui a0, 506812
1693+
; RV64I-NEXT: addiw a0, a0, -1093
1694+
; RV64I-NEXT: slli a1, a0, 63
1695+
; RV64I-NEXT: add a0, a0, a1
1696+
; RV64I-NEXT: ret
17211697
;
17221698
; RV64IZBA-LABEL: imm_neg_9223372034778874949:
17231699
; RV64IZBA: # %bb.0:
1724-
; RV64IZBA-NEXT: lui a0, 1048329
1725-
; RV64IZBA-NEXT: addiw a0, a0, -1911
1726-
; RV64IZBA-NEXT: slli a0, a0, 12
1727-
; RV64IZBA-NEXT: addi a0, a0, -1911
1728-
; RV64IZBA-NEXT: srli a0, a0, 1
1729-
; RV64IZBA-NEXT: not a0, a0
1700+
; RV64IZBA-NEXT: lui a0, 506812
1701+
; RV64IZBA-NEXT: addiw a0, a0, -1093
1702+
; RV64IZBA-NEXT: slli a1, a0, 63
1703+
; RV64IZBA-NEXT: add a0, a0, a1
17301704
; RV64IZBA-NEXT: ret
17311705
;
17321706
; RV64IZBB-LABEL: imm_neg_9223372034778874949:
17331707
; RV64IZBB: # %bb.0:
1734-
; RV64IZBB-NEXT: lui a0, 1048329
1735-
; RV64IZBB-NEXT: addiw a0, a0, -1911
1736-
; RV64IZBB-NEXT: slli a0, a0, 12
1737-
; RV64IZBB-NEXT: addi a0, a0, -1911
1738-
; RV64IZBB-NEXT: srli a0, a0, 1
1739-
; RV64IZBB-NEXT: not a0, a0
1708+
; RV64IZBB-NEXT: lui a0, 506812
1709+
; RV64IZBB-NEXT: addiw a0, a0, -1093
1710+
; RV64IZBB-NEXT: slli a1, a0, 63
1711+
; RV64IZBB-NEXT: add a0, a0, a1
17401712
; RV64IZBB-NEXT: ret
17411713
;
17421714
; RV64IZBS-LABEL: imm_neg_9223372034778874949:
@@ -1748,12 +1720,10 @@ define i64 @imm_neg_9223372034778874949() {
17481720
;
17491721
; RV64IXTHEADBB-LABEL: imm_neg_9223372034778874949:
17501722
; RV64IXTHEADBB: # %bb.0:
1751-
; RV64IXTHEADBB-NEXT: lui a0, 1048329
1752-
; RV64IXTHEADBB-NEXT: addiw a0, a0, -1911
1753-
; RV64IXTHEADBB-NEXT: slli a0, a0, 12
1754-
; RV64IXTHEADBB-NEXT: addi a0, a0, -1911
1755-
; RV64IXTHEADBB-NEXT: srli a0, a0, 1
1756-
; RV64IXTHEADBB-NEXT: not a0, a0
1723+
; RV64IXTHEADBB-NEXT: lui a0, 506812
1724+
; RV64IXTHEADBB-NEXT: addiw a0, a0, -1093
1725+
; RV64IXTHEADBB-NEXT: slli a1, a0, 63
1726+
; RV64IXTHEADBB-NEXT: add a0, a0, a1
17571727
; RV64IXTHEADBB-NEXT: ret
17581728
ret i64 -9223372034778874949 ; 0x800000007bbbbbbb
17591729
}
@@ -1932,29 +1902,26 @@ define i64 @imm_9223372034904144827() {
19321902
;
19331903
; RV64I-LABEL: imm_9223372034904144827:
19341904
; RV64I: # %bb.0:
1935-
; RV64I-NEXT: lui a0, 1048343
1936-
; RV64I-NEXT: addiw a0, a0, 1911
1937-
; RV64I-NEXT: slli a0, a0, 12
1938-
; RV64I-NEXT: addi a0, a0, 1911
1939-
; RV64I-NEXT: srli a0, a0, 1
1905+
; RV64I-NEXT: lui a0, 572348
1906+
; RV64I-NEXT: addiw a0, a0, -1093
1907+
; RV64I-NEXT: slli a1, a0, 63
1908+
; RV64I-NEXT: add a0, a0, a1
19401909
; RV64I-NEXT: ret
19411910
;
19421911
; RV64IZBA-LABEL: imm_9223372034904144827:
19431912
; RV64IZBA: # %bb.0:
1944-
; RV64IZBA-NEXT: lui a0, 1048343
1945-
; RV64IZBA-NEXT: addiw a0, a0, 1911
1946-
; RV64IZBA-NEXT: slli a0, a0, 12
1947-
; RV64IZBA-NEXT: addi a0, a0, 1911
1948-
; RV64IZBA-NEXT: srli a0, a0, 1
1913+
; RV64IZBA-NEXT: lui a0, 572348
1914+
; RV64IZBA-NEXT: addiw a0, a0, -1093
1915+
; RV64IZBA-NEXT: slli a1, a0, 63
1916+
; RV64IZBA-NEXT: add a0, a0, a1
19491917
; RV64IZBA-NEXT: ret
19501918
;
19511919
; RV64IZBB-LABEL: imm_9223372034904144827:
19521920
; RV64IZBB: # %bb.0:
1953-
; RV64IZBB-NEXT: lui a0, 1048343
1954-
; RV64IZBB-NEXT: addiw a0, a0, 1911
1955-
; RV64IZBB-NEXT: slli a0, a0, 12
1956-
; RV64IZBB-NEXT: addi a0, a0, 1911
1957-
; RV64IZBB-NEXT: srli a0, a0, 1
1921+
; RV64IZBB-NEXT: lui a0, 572348
1922+
; RV64IZBB-NEXT: addiw a0, a0, -1093
1923+
; RV64IZBB-NEXT: slli a1, a0, 63
1924+
; RV64IZBB-NEXT: add a0, a0, a1
19581925
; RV64IZBB-NEXT: ret
19591926
;
19601927
; RV64IZBS-LABEL: imm_9223372034904144827:
@@ -1966,11 +1933,10 @@ define i64 @imm_9223372034904144827() {
19661933
;
19671934
; RV64IXTHEADBB-LABEL: imm_9223372034904144827:
19681935
; RV64IXTHEADBB: # %bb.0:
1969-
; RV64IXTHEADBB-NEXT: lui a0, 1048343
1970-
; RV64IXTHEADBB-NEXT: addiw a0, a0, 1911
1971-
; RV64IXTHEADBB-NEXT: slli a0, a0, 12
1972-
; RV64IXTHEADBB-NEXT: addi a0, a0, 1911
1973-
; RV64IXTHEADBB-NEXT: srli a0, a0, 1
1936+
; RV64IXTHEADBB-NEXT: lui a0, 572348
1937+
; RV64IXTHEADBB-NEXT: addiw a0, a0, -1093
1938+
; RV64IXTHEADBB-NEXT: slli a1, a0, 63
1939+
; RV64IXTHEADBB-NEXT: add a0, a0, a1
19741940
; RV64IXTHEADBB-NEXT: ret
19751941
ret i64 9223372034904144827 ; 0x7fffffff8bbbbbbb
19761942
}

llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,10 @@ define i64 @orcb64_knownbits(i64 %a) nounwind {
6161
; RV64ZBB-NEXT: lui a1, 65535
6262
; RV64ZBB-NEXT: slli a1, a1, 12
6363
; RV64ZBB-NEXT: and a0, a0, a1
64-
; RV64ZBB-NEXT: lui a1, 131073
65-
; RV64ZBB-NEXT: slli a1, a1, 13
66-
; RV64ZBB-NEXT: addi a1, a1, 1
67-
; RV64ZBB-NEXT: slli a1, a1, 20
68-
; RV64ZBB-NEXT: addi a1, a1, 8
64+
; RV64ZBB-NEXT: lui a1, 256
65+
; RV64ZBB-NEXT: addiw a1, a1, 8
66+
; RV64ZBB-NEXT: slli a2, a1, 42
67+
; RV64ZBB-NEXT: add a1, a1, a2
6968
; RV64ZBB-NEXT: or a0, a0, a1
7069
; RV64ZBB-NEXT: orc.b a0, a0
7170
; RV64ZBB-NEXT: ret

0 commit comments

Comments
 (0)