Skip to content

Commit 90681d3

Browse files
authored
AMDGPU: Return legal addressmode correctly for flat scratch (#71494)
1 parent e31a758 commit 90681d3

File tree

3 files changed

+28
-24
lines changed

3 files changed

+28
-24
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1322,37 +1322,37 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
13221322
}
13231323
}
13241324

1325-
bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
1325+
bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM,
1326+
unsigned AddrSpace,
1327+
uint64_t FlatVariant) const {
13261328
if (!Subtarget->hasFlatInstOffsets()) {
13271329
// Flat instructions do not have offsets, and only have the register
13281330
// address.
13291331
return AM.BaseOffs == 0 && AM.Scale == 0;
13301332
}
13311333

13321334
return AM.Scale == 0 &&
1333-
(AM.BaseOffs == 0 ||
1334-
Subtarget->getInstrInfo()->isLegalFLATOffset(
1335-
AM.BaseOffs, AMDGPUAS::FLAT_ADDRESS, SIInstrFlags::FLAT));
1335+
(AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset(
1336+
AM.BaseOffs, AddrSpace, FlatVariant));
13361337
}
13371338

13381339
bool SITargetLowering::isLegalGlobalAddressingMode(const AddrMode &AM) const {
13391340
if (Subtarget->hasFlatGlobalInsts())
1340-
return AM.Scale == 0 &&
1341-
(AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset(
1342-
AM.BaseOffs, AMDGPUAS::GLOBAL_ADDRESS,
1343-
SIInstrFlags::FlatGlobal));
1341+
return isLegalFlatAddressingMode(AM, AMDGPUAS::GLOBAL_ADDRESS,
1342+
SIInstrFlags::FlatGlobal);
13441343

13451344
if (!Subtarget->hasAddr64() || Subtarget->useFlatForGlobal()) {
1346-
// Assume the we will use FLAT for all global memory accesses
1347-
// on VI.
1348-
// FIXME: This assumption is currently wrong. On VI we still use
1349-
// MUBUF instructions for the r + i addressing mode. As currently
1350-
// implemented, the MUBUF instructions only work on buffer < 4GB.
1351-
// It may be possible to support > 4GB buffers with MUBUF instructions,
1352-
// by setting the stride value in the resource descriptor which would
1353-
// increase the size limit to (stride * 4GB). However, this is risky,
1354-
// because it has never been validated.
1355-
return isLegalFlatAddressingMode(AM);
1345+
// Assume the we will use FLAT for all global memory accesses
1346+
// on VI.
1347+
// FIXME: This assumption is currently wrong. On VI we still use
1348+
// MUBUF instructions for the r + i addressing mode. As currently
1349+
// implemented, the MUBUF instructions only work on buffer < 4GB.
1350+
// It may be possible to support > 4GB buffers with MUBUF instructions,
1351+
// by setting the stride value in the resource descriptor which would
1352+
// increase the size limit to (stride * 4GB). However, this is risky,
1353+
// because it has never been validated.
1354+
return isLegalFlatAddressingMode(AM, AMDGPUAS::FLAT_ADDRESS,
1355+
SIInstrFlags::FLAT);
13561356
}
13571357

13581358
return isLegalMUBUFAddressingMode(AM);
@@ -1449,7 +1449,10 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
14491449
}
14501450

14511451
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
1452-
return isLegalMUBUFAddressingMode(AM);
1452+
return Subtarget->enableFlatScratch()
1453+
? isLegalFlatAddressingMode(AM, AMDGPUAS::PRIVATE_ADDRESS,
1454+
SIInstrFlags::FlatScratch)
1455+
: isLegalMUBUFAddressingMode(AM);
14531456

14541457
if (AS == AMDGPUAS::LOCAL_ADDRESS ||
14551458
(AS == AMDGPUAS::REGION_ADDRESS && Subtarget->hasGDS())) {
@@ -1475,7 +1478,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
14751478
// computation. We don't have instructions that compute pointers with any
14761479
// addressing modes, so treat them as having no offset like flat
14771480
// instructions.
1478-
return isLegalFlatAddressingMode(AM);
1481+
return isLegalFlatAddressingMode(AM, AMDGPUAS::FLAT_ADDRESS,
1482+
SIInstrFlags::FLAT);
14791483
}
14801484

14811485
// Assume a user alias of global for unknown address spaces.

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
221221
SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
222222
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
223223

224-
bool isLegalFlatAddressingMode(const AddrMode &AM) const;
224+
bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace,
225+
uint64_t FlatVariant) const;
225226
bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
226227

227228
unsigned isCFIntrinsic(const SDNode *Intr) const;

llvm/test/CodeGen/AMDGPU/scratch-pointer-sink.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,15 @@ define amdgpu_gfx i32 @sink_scratch_pointer(ptr addrspace(5) %stack, i32 inreg %
66
; GCN-LABEL: sink_scratch_pointer:
77
; GCN: ; %bb.0:
88
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9-
; GCN-NEXT: v_add_nc_u32_e32 v0, -4, v0
109
; GCN-NEXT: s_cmp_lg_u32 s4, 0
1110
; GCN-NEXT: s_cbranch_scc0 .LBB0_2
1211
; GCN-NEXT: ; %bb.1: ; %bb2
13-
; GCN-NEXT: scratch_load_b32 v0, v0, off
12+
; GCN-NEXT: scratch_load_b32 v0, v0, off offset:-4
1413
; GCN-NEXT: s_waitcnt vmcnt(0)
1514
; GCN-NEXT: s_setpc_b64 s[30:31]
1615
; GCN-NEXT: .LBB0_2: ; %bb1
1716
; GCN-NEXT: v_mov_b32_e32 v1, 1
18-
; GCN-NEXT: scratch_store_b32 v0, v1, off
17+
; GCN-NEXT: scratch_store_b32 v0, v1, off offset:-4
1918
; GCN-NEXT: v_mov_b32_e32 v0, 0
2019
; GCN-NEXT: s_setpc_b64 s[30:31]
2120
;

0 commit comments

Comments
 (0)