Skip to content

Commit 952e890

Browse files
authored
[AMDGPU] Support dynamic alloca (llvm#1174)
2 parents b9da5c2 + a4db099 commit 952e890

File tree

107 files changed

+24242
-2089
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+24242
-2089
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

+35
Original file line numberDiff line numberDiff line change
@@ -1409,6 +1409,23 @@ class AMDGPUStructBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntri
14091409
def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad;
14101410
def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad;
14111411

1412+
class AMDGPUStructAtomicBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
1413+
[data_ty],
1414+
[llvm_v4i32_ty, // rsrc(SGPR)
1415+
llvm_i32_ty, // vindex(VGPR)
1416+
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
1417+
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
1418+
llvm_i32_ty], // auxiliary/cachepolicy(imm):
1419+
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
1420+
// bit 3 = swz, bit 4 = scc (gfx90a)
1421+
// gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1422+
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
1423+
// bit 6 = swz
1424+
// all: volatile op (bit 31, stripped at lowering)
1425+
[ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
1426+
AMDGPURsrcIntrinsic<0>;
1427+
def int_amdgcn_struct_atomic_buffer_load : AMDGPUStructAtomicBufferLoad;
1428+
14121429
class AMDGPUStructPtrBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
14131430
[data_ty],
14141431
[AMDGPUBufferRsrcTy, // rsrc(SGPR)
@@ -1428,6 +1445,24 @@ class AMDGPUStructPtrBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIn
14281445
def int_amdgcn_struct_ptr_buffer_load_format : AMDGPUStructPtrBufferLoad;
14291446
def int_amdgcn_struct_ptr_buffer_load : AMDGPUStructPtrBufferLoad;
14301447

1448+
class AMDGPUStructPtrAtomicBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
1449+
[data_ty],
1450+
[AMDGPUBufferRsrcTy, // rsrc(SGPR)
1451+
llvm_i32_ty, // vindex(VGPR)
1452+
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
1453+
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
1454+
llvm_i32_ty], // auxiliary/cachepolicy(imm):
1455+
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
1456+
// bit 3 = swz, bit 4 = scc (gfx90a)
1457+
// gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1458+
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
1459+
// bit 6 = swz
1460+
// all: volatile op (bit 31, stripped at lowering)
1461+
[IntrArgMemOnly, NoCapture<ArgIndex<0>>,
1462+
ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
1463+
AMDGPURsrcIntrinsic<0>;
1464+
def int_amdgcn_struct_ptr_atomic_buffer_load : AMDGPUStructPtrAtomicBufferLoad;
1465+
14311466
class AMDGPURawBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
14321467
[],
14331468
[data_ty, // vdata(VGPR)

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

+44-19
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
492492
return false;
493493

494494
const unsigned Size = Ty.getSizeInBits();
495+
if (Ty.isPointerVector())
496+
return true;
495497
if (Size <= 64)
496498
return false;
497499
// Address space 8 pointers get their own workaround.
@@ -500,9 +502,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
500502
if (!Ty.isVector())
501503
return true;
502504

503-
if (Ty.isPointerVector())
504-
return true;
505-
506505
unsigned EltSize = Ty.getScalarSizeInBits();
507506
return EltSize != 32 && EltSize != 64;
508507
}
@@ -5809,8 +5808,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
58095808
return Reg;
58105809
}
58115810

5812-
Register AMDGPULegalizerInfo::fixStoreSourceType(
5813-
MachineIRBuilder &B, Register VData, bool IsFormat) const {
5811+
Register AMDGPULegalizerInfo::fixStoreSourceType(MachineIRBuilder &B,
5812+
Register VData, LLT MemTy,
5813+
bool IsFormat) const {
58145814
MachineRegisterInfo *MRI = B.getMRI();
58155815
LLT Ty = MRI->getType(VData);
58165816

@@ -5820,6 +5820,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58205820
if (hasBufferRsrcWorkaround(Ty))
58215821
return castBufferRsrcToV4I32(VData, B);
58225822

5823+
if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
5824+
Ty = getBitcastRegisterType(Ty);
5825+
VData = B.buildBitcast(Ty, VData).getReg(0);
5826+
}
58235827
// Fixup illegal register types for i8 stores.
58245828
if (Ty == LLT::scalar(8) || Ty == S16) {
58255829
Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
@@ -5837,22 +5841,26 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58375841
}
58385842

58395843
bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI,
5840-
MachineRegisterInfo &MRI,
5841-
MachineIRBuilder &B,
5844+
LegalizerHelper &Helper,
58425845
bool IsTyped,
58435846
bool IsFormat) const {
5847+
MachineIRBuilder &B = Helper.MIRBuilder;
5848+
MachineRegisterInfo &MRI = *B.getMRI();
5849+
58445850
Register VData = MI.getOperand(1).getReg();
58455851
LLT Ty = MRI.getType(VData);
58465852
LLT EltTy = Ty.getScalarType();
58475853
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
58485854
const LLT S32 = LLT::scalar(32);
58495855

5850-
VData = fixStoreSourceType(B, VData, IsFormat);
5851-
castBufferRsrcArgToV4I32(MI, B, 2);
5852-
Register RSrc = MI.getOperand(2).getReg();
5853-
58545856
MachineMemOperand *MMO = *MI.memoperands_begin();
58555857
const int MemSize = MMO->getSize().getValue();
5858+
LLT MemTy = MMO->getMemoryType();
5859+
5860+
VData = fixStoreSourceType(B, VData, MemTy, IsFormat);
5861+
5862+
castBufferRsrcArgToV4I32(MI, B, 2);
5863+
Register RSrc = MI.getOperand(2).getReg();
58565864

58575865
unsigned ImmOffset;
58585866

@@ -5945,10 +5953,13 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
59455953
}
59465954

59475955
bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
5948-
MachineRegisterInfo &MRI,
5949-
MachineIRBuilder &B,
5956+
LegalizerHelper &Helper,
59505957
bool IsFormat,
59515958
bool IsTyped) const {
5959+
MachineIRBuilder &B = Helper.MIRBuilder;
5960+
MachineRegisterInfo &MRI = *B.getMRI();
5961+
GISelChangeObserver &Observer = Helper.Observer;
5962+
59525963
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
59535964
MachineMemOperand *MMO = *MI.memoperands_begin();
59545965
const LLT MemTy = MMO->getMemoryType();
@@ -5997,9 +6008,21 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
59976008
// Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
59986009
// logic doesn't have to handle that case.
59996010
if (hasBufferRsrcWorkaround(Ty)) {
6011+
Observer.changingInstr(MI);
60006012
Ty = castBufferRsrcFromV4I32(MI, B, MRI, 0);
6013+
Observer.changedInstr(MI);
60016014
Dst = MI.getOperand(0).getReg();
6015+
B.setInsertPt(B.getMBB(), MI);
60026016
}
6017+
if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
6018+
Ty = getBitcastRegisterType(Ty);
6019+
Observer.changingInstr(MI);
6020+
Helper.bitcastDst(MI, Ty, 0);
6021+
Observer.changedInstr(MI);
6022+
Dst = MI.getOperand(0).getReg();
6023+
B.setInsertPt(B.getMBB(), MI);
6024+
}
6025+
60036026
LLT EltTy = Ty.getScalarType();
60046027
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
60056028
const bool Unpacked = ST.hasUnpackedD16VMem();
@@ -7367,34 +7390,36 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73677390
case Intrinsic::amdgcn_raw_ptr_buffer_store:
73687391
case Intrinsic::amdgcn_struct_buffer_store:
73697392
case Intrinsic::amdgcn_struct_ptr_buffer_store:
7370-
return legalizeBufferStore(MI, MRI, B, false, false);
7393+
return legalizeBufferStore(MI, Helper, false, false);
73717394
case Intrinsic::amdgcn_raw_buffer_store_format:
73727395
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
73737396
case Intrinsic::amdgcn_struct_buffer_store_format:
73747397
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
7375-
return legalizeBufferStore(MI, MRI, B, false, true);
7398+
return legalizeBufferStore(MI, Helper, false, true);
73767399
case Intrinsic::amdgcn_raw_tbuffer_store:
73777400
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
73787401
case Intrinsic::amdgcn_struct_tbuffer_store:
73797402
case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
7380-
return legalizeBufferStore(MI, MRI, B, true, true);
7403+
return legalizeBufferStore(MI, Helper, true, true);
73817404
case Intrinsic::amdgcn_raw_buffer_load:
73827405
case Intrinsic::amdgcn_raw_ptr_buffer_load:
73837406
case Intrinsic::amdgcn_raw_atomic_buffer_load:
73847407
case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load:
73857408
case Intrinsic::amdgcn_struct_buffer_load:
73867409
case Intrinsic::amdgcn_struct_ptr_buffer_load:
7387-
return legalizeBufferLoad(MI, MRI, B, false, false);
7410+
case Intrinsic::amdgcn_struct_atomic_buffer_load:
7411+
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
7412+
return legalizeBufferLoad(MI, Helper, false, false);
73887413
case Intrinsic::amdgcn_raw_buffer_load_format:
73897414
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
73907415
case Intrinsic::amdgcn_struct_buffer_load_format:
73917416
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
7392-
return legalizeBufferLoad(MI, MRI, B, true, false);
7417+
return legalizeBufferLoad(MI, Helper, true, false);
73937418
case Intrinsic::amdgcn_raw_tbuffer_load:
73947419
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
73957420
case Intrinsic::amdgcn_struct_tbuffer_load:
73967421
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
7397-
return legalizeBufferLoad(MI, MRI, B, true, true);
7422+
return legalizeBufferLoad(MI, Helper, true, true);
73987423
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
73997424
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
74007425
case Intrinsic::amdgcn_struct_buffer_atomic_swap:

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

+5-7
Original file line numberDiff line numberDiff line change
@@ -195,15 +195,13 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
195195

196196
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
197197
Register Reg, bool ImageStore = false) const;
198-
Register fixStoreSourceType(MachineIRBuilder &B, Register VData,
198+
Register fixStoreSourceType(MachineIRBuilder &B, Register VData, LLT MemTy,
199199
bool IsFormat) const;
200200

201-
bool legalizeBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
202-
MachineIRBuilder &B, bool IsTyped,
203-
bool IsFormat) const;
204-
bool legalizeBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
205-
MachineIRBuilder &B, bool IsFormat,
206-
bool IsTyped) const;
201+
bool legalizeBufferStore(MachineInstr &MI, LegalizerHelper &Helper,
202+
bool IsTyped, bool IsFormat) const;
203+
bool legalizeBufferLoad(MachineInstr &MI, LegalizerHelper &Helper,
204+
bool IsFormat, bool IsTyped) const;
207205
bool legalizeBufferAtomic(MachineInstr &MI, MachineIRBuilder &B,
208206
Intrinsic::ID IID) const;
209207

0 commit comments

Comments
 (0)