@@ -4103,7 +4103,9 @@ AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root,
4103
4103
int64_t ConstOffset;
4104
4104
std::tie (PtrBase, ConstOffset) =
4105
4105
getPtrBaseWithConstantOffset (Root.getReg (), *MRI);
4106
- if (ConstOffset == 0 || !isFlatScratchBaseLegal (PtrBase, FlatVariant))
4106
+
4107
+ if (ConstOffset == 0 || (FlatVariant == SIInstrFlags::FlatScratch &&
4108
+ !isFlatScratchBaseLegal (Root.getReg ())))
4107
4109
return Default;
4108
4110
4109
4111
unsigned AddrSpace = (*MI->memoperands_begin ())->getAddrSpace ();
@@ -4266,7 +4268,7 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
4266
4268
// possible.
4267
4269
std::tie (PtrBase, ConstOffset) = getPtrBaseWithConstantOffset (Addr, *MRI);
4268
4270
4269
- if (ConstOffset != 0 && isFlatScratchBaseLegal (PtrBase ) &&
4271
+ if (ConstOffset != 0 && isFlatScratchBaseLegal (Addr ) &&
4270
4272
TII.isLegalFLATOffset (ConstOffset, AMDGPUAS::PRIVATE_ADDRESS,
4271
4273
SIInstrFlags::FlatScratch)) {
4272
4274
Addr = PtrBase;
@@ -4343,6 +4345,7 @@ AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {
4343
4345
// possible.
4344
4346
std::tie (PtrBase, ConstOffset) = getPtrBaseWithConstantOffset (Addr, *MRI);
4345
4347
4348
+ Register OrigAddr = Addr;
4346
4349
if (ConstOffset != 0 &&
4347
4350
TII.isLegalFLATOffset (ConstOffset, AMDGPUAS::PRIVATE_ADDRESS, true )) {
4348
4351
Addr = PtrBase;
@@ -4360,8 +4363,13 @@ AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {
4360
4363
Register LHS = AddrDef->MI ->getOperand (1 ).getReg ();
4361
4364
auto LHSDef = getDefSrcRegIgnoringCopies (LHS, *MRI);
4362
4365
4363
- if (!isFlatScratchBaseLegal (LHS) || !isFlatScratchBaseLegal (RHS))
4364
- return std::nullopt;
4366
+ if (OrigAddr != Addr) {
4367
+ if (!isFlatScratchBaseLegalSVImm (OrigAddr))
4368
+ return std::nullopt;
4369
+ } else {
4370
+ if (!isFlatScratchBaseLegalSV (OrigAddr))
4371
+ return std::nullopt;
4372
+ }
4365
4373
4366
4374
if (checkFlatScratchSVSSwizzleBug (RHS, LHS, ImmOffset))
4367
4375
return std::nullopt;
@@ -4494,14 +4502,78 @@ bool AMDGPUInstructionSelector::isDSOffset2Legal(Register Base, int64_t Offset0,
4494
4502
return KB->signBitIsZero (Base);
4495
4503
}
4496
4504
4497
- bool AMDGPUInstructionSelector::isFlatScratchBaseLegal (
4498
- Register Base, uint64_t FlatVariant) const {
4499
- if (FlatVariant != SIInstrFlags::FlatScratch)
4505
+ // Return whether the operation has NoUnsignedWrap property.
4506
+ bool isNoUnsignedWrap (MachineInstr *Addr) {
4507
+ return Addr->getOpcode () == TargetOpcode::G_OR ||
4508
+ (Addr->getOpcode () == TargetOpcode::G_PTR_ADD &&
4509
+ Addr->getFlag (MachineInstr::NoUWrap));
4510
+ };
4511
+
4512
+ // Check that the base address of flat scratch load/store in the form of `base +
4513
+ // offset` is legal to be put in SGPR/VGPR (i.e. unsigned per hardware
4514
+ // requirement). We always treat the first operand as the base address here.
4515
+ bool AMDGPUInstructionSelector::isFlatScratchBaseLegal (Register Addr) const {
4516
+ MachineInstr *AddrMI = getDefIgnoringCopies (Addr, *MRI);
4517
+
4518
+ if (isNoUnsignedWrap (AddrMI))
4500
4519
return true ;
4501
4520
4502
- // When value in 32-bit Base can be negative calculate scratch offset using
4503
- // 32-bit add instruction, otherwise use Base(unsigned) + offset.
4504
- return KB->signBitIsZero (Base);
4521
+ Register LHS = AddrMI->getOperand (1 ).getReg ();
4522
+ Register RHS = AddrMI->getOperand (2 ).getReg ();
4523
+
4524
+ if (AddrMI->getOpcode () == TargetOpcode::G_PTR_ADD) {
4525
+ std::optional<ValueAndVReg> RhsValReg =
4526
+ getIConstantVRegValWithLookThrough (RHS, *MRI);
4527
+ // If the immediate offset is negative and within certain range, the base
4528
+ // address cannot also be negative. If the base is also negative, the sum
4529
+ // would be either negative or much larger than the valid range of scratch
4530
+ // memory a thread can access.
4531
+ if (RhsValReg && RhsValReg->Value .getSExtValue () < 0 &&
4532
+ RhsValReg->Value .getSExtValue () > -0x40000000 )
4533
+ return true ;
4534
+ }
4535
+
4536
+ return KB->signBitIsZero (LHS);
4537
+ }
4538
+
4539
+ // Check address value in SGPR/VGPR are legal for flat scratch in the form
4540
+ // of: SGPR + VGPR.
4541
+ bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV (Register Addr) const {
4542
+ MachineInstr *AddrMI = getDefIgnoringCopies (Addr, *MRI);
4543
+
4544
+ if (isNoUnsignedWrap (AddrMI))
4545
+ return true ;
4546
+
4547
+ Register LHS = AddrMI->getOperand (1 ).getReg ();
4548
+ Register RHS = AddrMI->getOperand (2 ).getReg ();
4549
+ return KB->signBitIsZero (RHS) && KB->signBitIsZero (LHS);
4550
+ }
4551
+
4552
+ // Check address value in SGPR/VGPR are legal for flat scratch in the form
4553
+ // of: SGPR + VGPR + Imm.
4554
+ bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm (
4555
+ Register Addr) const {
4556
+ MachineInstr *AddrMI = getDefIgnoringCopies (Addr, *MRI);
4557
+ Register Base = AddrMI->getOperand (1 ).getReg ();
4558
+ std::optional<DefinitionAndSourceRegister> BaseDef =
4559
+ getDefSrcRegIgnoringCopies (Base, *MRI);
4560
+ std::optional<ValueAndVReg> RHSOffset =
4561
+ getIConstantVRegValWithLookThrough (AddrMI->getOperand (2 ).getReg (), *MRI);
4562
+ assert (RHSOffset);
4563
+
4564
+ // If the immediate offset is negative and within certain range, the base
4565
+ // address cannot also be negative. If the base is also negative, the sum
4566
+ // would be either negative or much larger than the valid range of scratch
4567
+ // memory a thread can access.
4568
+ if (isNoUnsignedWrap (BaseDef->MI ) &&
4569
+ (isNoUnsignedWrap (AddrMI) ||
4570
+ (RHSOffset->Value .getSExtValue () < 0 &&
4571
+ RHSOffset->Value .getSExtValue () > -0x40000000 )))
4572
+ return true ;
4573
+
4574
+ Register LHS = BaseDef->MI ->getOperand (1 ).getReg ();
4575
+ Register RHS = BaseDef->MI ->getOperand (2 ).getReg ();
4576
+ return KB->signBitIsZero (RHS) && KB->signBitIsZero (LHS);
4505
4577
}
4506
4578
4507
4579
bool AMDGPUInstructionSelector::isUnneededShiftMask (const MachineInstr &MI,
0 commit comments