Skip to content

Commit c1511a6

Browse files
authored
[AMDGPU] Folding imm offset in more cases for scratch access (#70634)
For scratch load/store, our hardware only accept non-negative value in SGPR/VGPR. Besides the case that we can prove from known bits, we can also prove that the value in `base` will be non-negative: 1.) When the ADD for the address calculation has NonUnsignedWrap flag. 2.) When the immediate offset is already negative.
1 parent c6d6a57 commit c1511a6

13 files changed

+826
-839
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 69 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,13 +1152,64 @@ bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
11521152
return CurDAG->SignBitIsZero(Base);
11531153
}
11541154

1155-
bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Base,
1156-
uint64_t FlatVariant) const {
1157-
if (FlatVariant != SIInstrFlags::FlatScratch)
1155+
// Return whether the operation has NoUnsignedWrap property.
1156+
static bool isNoUnsignedWrap(SDValue Addr) {
1157+
return (Addr.getOpcode() == ISD::ADD &&
1158+
Addr->getFlags().hasNoUnsignedWrap()) ||
1159+
Addr->getOpcode() == ISD::OR;
1160+
}
1161+
1162+
// Check that the base address of flat scratch load/store in the form of `base +
1163+
// offset` is legal to be put in SGPR/VGPR (i.e. unsigned per hardware
1164+
// requirement). We always treat the first operand as the base address here.
1165+
bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Addr) const {
1166+
if (isNoUnsignedWrap(Addr))
11581167
return true;
1159-
// When value in 32-bit Base can be negative calculate scratch offset using
1160-
// 32-bit add instruction, otherwise use Base(unsigned) + offset.
1161-
return CurDAG->SignBitIsZero(Base);
1168+
1169+
auto LHS = Addr.getOperand(0);
1170+
auto RHS = Addr.getOperand(1);
1171+
1172+
// If the immediate offset is negative and within certain range, the base
1173+
// address cannot also be negative. If the base is also negative, the sum
1174+
// would be either negative or much larger than the valid range of scratch
1175+
// memory a thread can access.
1176+
ConstantSDNode *ImmOp = nullptr;
1177+
if (Addr.getOpcode() == ISD::ADD && (ImmOp = dyn_cast<ConstantSDNode>(RHS))) {
1178+
if (ImmOp->getSExtValue() < 0 && ImmOp->getSExtValue() > -0x40000000)
1179+
return true;
1180+
}
1181+
1182+
return CurDAG->SignBitIsZero(LHS);
1183+
}
1184+
1185+
// Check address value in SGPR/VGPR are legal for flat scratch in the form
1186+
// of: SGPR + VGPR.
1187+
bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(SDValue Addr) const {
1188+
if (isNoUnsignedWrap(Addr))
1189+
return true;
1190+
1191+
auto LHS = Addr.getOperand(0);
1192+
auto RHS = Addr.getOperand(1);
1193+
return CurDAG->SignBitIsZero(RHS) && CurDAG->SignBitIsZero(LHS);
1194+
}
1195+
1196+
// Check address value in SGPR/VGPR are legal for flat scratch in the form
1197+
// of: SGPR + VGPR + Imm.
1198+
bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(SDValue Addr) const {
1199+
auto Base = Addr.getOperand(0);
1200+
auto *RHSImm = cast<ConstantSDNode>(Addr.getOperand(1));
1201+
// If the immediate offset is negative and within certain range, the base
1202+
// address cannot also be negative. If the base is also negative, the sum
1203+
// would be either negative or much larger than the valid range of scratch
1204+
// memory a thread can access.
1205+
if (isNoUnsignedWrap(Base) &&
1206+
(isNoUnsignedWrap(Addr) ||
1207+
(RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1208+
return true;
1209+
1210+
auto LHS = Base.getOperand(0);
1211+
auto RHS = Base.getOperand(1);
1212+
return CurDAG->SignBitIsZero(RHS) && CurDAG->SignBitIsZero(LHS);
11621213
}
11631214

11641215
// TODO: If offset is too big, put low 16-bit into offset.
@@ -1555,7 +1606,8 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
15551606
if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
15561607
SDValue N0, N1;
15571608
if (isBaseWithConstantOffset64(Addr, N0, N1) &&
1558-
isFlatScratchBaseLegal(N0, FlatVariant)) {
1609+
(FlatVariant != SIInstrFlags::FlatScratch ||
1610+
isFlatScratchBaseLegal(Addr))) {
15591611
int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
15601612

15611613
const SIInstrInfo *TII = Subtarget->getInstrInfo();
@@ -1787,8 +1839,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
17871839

17881840
int64_t COffsetVal = 0;
17891841

1790-
if (CurDAG->isBaseWithConstantOffset(Addr) &&
1791-
isFlatScratchBaseLegal(Addr.getOperand(0))) {
1842+
if (CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {
17921843
COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
17931844
SAddr = Addr.getOperand(0);
17941845
} else {
@@ -1845,6 +1896,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
18451896
int64_t ImmOffset = 0;
18461897

18471898
SDValue LHS, RHS;
1899+
SDValue OrigAddr = Addr;
18481900
if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
18491901
int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
18501902
const SIInstrInfo *TII = Subtarget->getInstrInfo();
@@ -1866,7 +1918,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
18661918
CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
18671919
VAddr = SDValue(VMov, 0);
18681920
SAddr = LHS;
1869-
if (!isFlatScratchBaseLegal(SAddr) || !isFlatScratchBaseLegal(VAddr))
1921+
if (!isFlatScratchBaseLegal(Addr))
18701922
return false;
18711923
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
18721924
return false;
@@ -1892,8 +1944,13 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
18921944
return false;
18931945
}
18941946

1895-
if (!isFlatScratchBaseLegal(SAddr) || !isFlatScratchBaseLegal(VAddr))
1896-
return false;
1947+
if (OrigAddr != Addr) {
1948+
if (!isFlatScratchBaseLegalSVImm(OrigAddr))
1949+
return false;
1950+
} else {
1951+
if (!isFlatScratchBaseLegalSV(OrigAddr))
1952+
return false;
1953+
}
18971954

18981955
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
18991956
return false;

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,10 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
154154
bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
155155
bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
156156
unsigned Size) const;
157-
bool isFlatScratchBaseLegal(
158-
SDValue Base, uint64_t FlatVariant = SIInstrFlags::FlatScratch) const;
157+
158+
bool isFlatScratchBaseLegal(SDValue Addr) const;
159+
bool isFlatScratchBaseLegalSV(SDValue Addr) const;
160+
bool isFlatScratchBaseLegalSVImm(SDValue Addr) const;
159161

160162
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
161163
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 82 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4103,7 +4103,9 @@ AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root,
41034103
int64_t ConstOffset;
41044104
std::tie(PtrBase, ConstOffset) =
41054105
getPtrBaseWithConstantOffset(Root.getReg(), *MRI);
4106-
if (ConstOffset == 0 || !isFlatScratchBaseLegal(PtrBase, FlatVariant))
4106+
4107+
if (ConstOffset == 0 || (FlatVariant == SIInstrFlags::FlatScratch &&
4108+
!isFlatScratchBaseLegal(Root.getReg())))
41074109
return Default;
41084110

41094111
unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();
@@ -4266,7 +4268,7 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
42664268
// possible.
42674269
std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);
42684270

4269-
if (ConstOffset != 0 && isFlatScratchBaseLegal(PtrBase) &&
4271+
if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
42704272
TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS,
42714273
SIInstrFlags::FlatScratch)) {
42724274
Addr = PtrBase;
@@ -4343,6 +4345,7 @@ AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {
43434345
// possible.
43444346
std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);
43454347

4348+
Register OrigAddr = Addr;
43464349
if (ConstOffset != 0 &&
43474350
TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS, true)) {
43484351
Addr = PtrBase;
@@ -4360,8 +4363,13 @@ AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {
43604363
Register LHS = AddrDef->MI->getOperand(1).getReg();
43614364
auto LHSDef = getDefSrcRegIgnoringCopies(LHS, *MRI);
43624365

4363-
if (!isFlatScratchBaseLegal(LHS) || !isFlatScratchBaseLegal(RHS))
4364-
return std::nullopt;
4366+
if (OrigAddr != Addr) {
4367+
if (!isFlatScratchBaseLegalSVImm(OrigAddr))
4368+
return std::nullopt;
4369+
} else {
4370+
if (!isFlatScratchBaseLegalSV(OrigAddr))
4371+
return std::nullopt;
4372+
}
43654373

43664374
if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
43674375
return std::nullopt;
@@ -4494,14 +4502,78 @@ bool AMDGPUInstructionSelector::isDSOffset2Legal(Register Base, int64_t Offset0,
44944502
return KB->signBitIsZero(Base);
44954503
}
44964504

4497-
bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
4498-
Register Base, uint64_t FlatVariant) const {
4499-
if (FlatVariant != SIInstrFlags::FlatScratch)
4505+
// Return whether the operation has NoUnsignedWrap property.
4506+
bool isNoUnsignedWrap(MachineInstr *Addr) {
4507+
return Addr->getOpcode() == TargetOpcode::G_OR ||
4508+
(Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
4509+
Addr->getFlag(MachineInstr::NoUWrap));
4510+
};
4511+
4512+
// Check that the base address of flat scratch load/store in the form of `base +
4513+
// offset` is legal to be put in SGPR/VGPR (i.e. unsigned per hardware
4514+
// requirement). We always treat the first operand as the base address here.
4515+
bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(Register Addr) const {
4516+
MachineInstr *AddrMI = getDefIgnoringCopies(Addr, *MRI);
4517+
4518+
if (isNoUnsignedWrap(AddrMI))
45004519
return true;
45014520

4502-
// When value in 32-bit Base can be negative calculate scratch offset using
4503-
// 32-bit add instruction, otherwise use Base(unsigned) + offset.
4504-
return KB->signBitIsZero(Base);
4521+
Register LHS = AddrMI->getOperand(1).getReg();
4522+
Register RHS = AddrMI->getOperand(2).getReg();
4523+
4524+
if (AddrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
4525+
std::optional<ValueAndVReg> RhsValReg =
4526+
getIConstantVRegValWithLookThrough(RHS, *MRI);
4527+
// If the immediate offset is negative and within certain range, the base
4528+
// address cannot also be negative. If the base is also negative, the sum
4529+
// would be either negative or much larger than the valid range of scratch
4530+
// memory a thread can access.
4531+
if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
4532+
RhsValReg->Value.getSExtValue() > -0x40000000)
4533+
return true;
4534+
}
4535+
4536+
return KB->signBitIsZero(LHS);
4537+
}
4538+
4539+
// Check address value in SGPR/VGPR are legal for flat scratch in the form
4540+
// of: SGPR + VGPR.
4541+
bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(Register Addr) const {
4542+
MachineInstr *AddrMI = getDefIgnoringCopies(Addr, *MRI);
4543+
4544+
if (isNoUnsignedWrap(AddrMI))
4545+
return true;
4546+
4547+
Register LHS = AddrMI->getOperand(1).getReg();
4548+
Register RHS = AddrMI->getOperand(2).getReg();
4549+
return KB->signBitIsZero(RHS) && KB->signBitIsZero(LHS);
4550+
}
4551+
4552+
// Check address value in SGPR/VGPR are legal for flat scratch in the form
4553+
// of: SGPR + VGPR + Imm.
4554+
bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
4555+
Register Addr) const {
4556+
MachineInstr *AddrMI = getDefIgnoringCopies(Addr, *MRI);
4557+
Register Base = AddrMI->getOperand(1).getReg();
4558+
std::optional<DefinitionAndSourceRegister> BaseDef =
4559+
getDefSrcRegIgnoringCopies(Base, *MRI);
4560+
std::optional<ValueAndVReg> RHSOffset =
4561+
getIConstantVRegValWithLookThrough(AddrMI->getOperand(2).getReg(), *MRI);
4562+
assert(RHSOffset);
4563+
4564+
// If the immediate offset is negative and within certain range, the base
4565+
// address cannot also be negative. If the base is also negative, the sum
4566+
// would be either negative or much larger than the valid range of scratch
4567+
// memory a thread can access.
4568+
if (isNoUnsignedWrap(BaseDef->MI) &&
4569+
(isNoUnsignedWrap(AddrMI) ||
4570+
(RHSOffset->Value.getSExtValue() < 0 &&
4571+
RHSOffset->Value.getSExtValue() > -0x40000000)))
4572+
return true;
4573+
4574+
Register LHS = BaseDef->MI->getOperand(1).getReg();
4575+
Register RHS = BaseDef->MI->getOperand(2).getReg();
4576+
return KB->signBitIsZero(RHS) && KB->signBitIsZero(LHS);
45054577
}
45064578

45074579
bool AMDGPUInstructionSelector::isUnneededShiftMask(const MachineInstr &MI,

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,8 +243,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
243243
bool isDSOffsetLegal(Register Base, int64_t Offset) const;
244244
bool isDSOffset2Legal(Register Base, int64_t Offset0, int64_t Offset1,
245245
unsigned Size) const;
246-
bool isFlatScratchBaseLegal(
247-
Register Base, uint64_t FlatVariant = SIInstrFlags::FlatScratch) const;
246+
bool isFlatScratchBaseLegal(Register Addr) const;
247+
bool isFlatScratchBaseLegalSV(Register Addr) const;
248+
bool isFlatScratchBaseLegalSVImm(Register Addr) const;
248249

249250
std::pair<Register, unsigned>
250251
selectDS1Addr1OffsetImpl(MachineOperand &Root) const;

0 commit comments

Comments
 (0)