Skip to content

AMDGPU: Return legal addressmode correctly for flat scratch #71494

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 24 additions & 20 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1322,37 +1322,37 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
}
}

bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM,
unsigned AddrSpace,
uint64_t FlatVariant) const {
if (!Subtarget->hasFlatInstOffsets()) {
// Flat instructions do not have offsets, and only have the register
// address.
return AM.BaseOffs == 0 && AM.Scale == 0;
}

return AM.Scale == 0 &&
(AM.BaseOffs == 0 ||
Subtarget->getInstrInfo()->isLegalFLATOffset(
AM.BaseOffs, AMDGPUAS::FLAT_ADDRESS, SIInstrFlags::FLAT));
(AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset(
AM.BaseOffs, AddrSpace, FlatVariant));
}

bool SITargetLowering::isLegalGlobalAddressingMode(const AddrMode &AM) const {
if (Subtarget->hasFlatGlobalInsts())
return AM.Scale == 0 &&
(AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset(
AM.BaseOffs, AMDGPUAS::GLOBAL_ADDRESS,
SIInstrFlags::FlatGlobal));
return isLegalFlatAddressingMode(AM, AMDGPUAS::GLOBAL_ADDRESS,
SIInstrFlags::FlatGlobal);

if (!Subtarget->hasAddr64() || Subtarget->useFlatForGlobal()) {
// Assume the we will use FLAT for all global memory accesses
// on VI.
// FIXME: This assumption is currently wrong. On VI we still use
// MUBUF instructions for the r + i addressing mode. As currently
// implemented, the MUBUF instructions only work on buffer < 4GB.
// It may be possible to support > 4GB buffers with MUBUF instructions,
// by setting the stride value in the resource descriptor which would
// increase the size limit to (stride * 4GB). However, this is risky,
// because it has never been validated.
return isLegalFlatAddressingMode(AM);
// Assume the we will use FLAT for all global memory accesses
// on VI.
// FIXME: This assumption is currently wrong. On VI we still use
// MUBUF instructions for the r + i addressing mode. As currently
// implemented, the MUBUF instructions only work on buffer < 4GB.
// It may be possible to support > 4GB buffers with MUBUF instructions,
// by setting the stride value in the resource descriptor which would
// increase the size limit to (stride * 4GB). However, this is risky,
// because it has never been validated.
return isLegalFlatAddressingMode(AM, AMDGPUAS::FLAT_ADDRESS,
SIInstrFlags::FLAT);
}

return isLegalMUBUFAddressingMode(AM);
Expand Down Expand Up @@ -1449,7 +1449,10 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
}

if (AS == AMDGPUAS::PRIVATE_ADDRESS)
return isLegalMUBUFAddressingMode(AM);
return Subtarget->enableFlatScratch()
? isLegalFlatAddressingMode(AM, AMDGPUAS::PRIVATE_ADDRESS,
SIInstrFlags::FlatScratch)
: isLegalMUBUFAddressingMode(AM);

if (AS == AMDGPUAS::LOCAL_ADDRESS ||
(AS == AMDGPUAS::REGION_ADDRESS && Subtarget->hasGDS())) {
Expand All @@ -1475,7 +1478,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
// computation. We don't have instructions that compute pointers with any
// addressing modes, so treat them as having no offset like flat
// instructions.
return isLegalFlatAddressingMode(AM);
return isLegalFlatAddressingMode(AM, AMDGPUAS::FLAT_ADDRESS,
SIInstrFlags::FLAT);
}

// Assume a user alias of global for unknown address spaces.
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;

bool isLegalFlatAddressingMode(const AddrMode &AM) const;
bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace,
uint64_t FlatVariant) const;
bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;

unsigned isCFIntrinsic(const SDNode *Intr) const;
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/AMDGPU/scratch-pointer-sink.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,15 @@ define amdgpu_gfx i32 @sink_scratch_pointer(ptr addrspace(5) %stack, i32 inreg %
; GCN-LABEL: sink_scratch_pointer:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_add_nc_u32_e32 v0, -4, v0
; GCN-NEXT: s_cmp_lg_u32 s4, 0
; GCN-NEXT: s_cbranch_scc0 .LBB0_2
; GCN-NEXT: ; %bb.1: ; %bb2
; GCN-NEXT: scratch_load_b32 v0, v0, off
; GCN-NEXT: scratch_load_b32 v0, v0, off offset:-4
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: .LBB0_2: ; %bb1
; GCN-NEXT: v_mov_b32_e32 v1, 1
; GCN-NEXT: scratch_store_b32 v0, v1, off
; GCN-NEXT: scratch_store_b32 v0, v1, off offset:-4
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_setpc_b64 s[30:31]
;
Expand Down