Skip to content

Commit 41d53a2

Browse files
jayfoadvikramRH
authored andcommitted
[AMDGPU] Build lane intrinsics in a mangling-agnostic way. NFC. (llvm#91583)
Use the form of CreateIntrinsic that takes an explicit return type and works out the mangling based on that and the types of the arguments. The advantage is that this still works if intrinsics are changed to have type mangling, e.g. if readlane/readfirstlane/writelane are changed to work on any type. Change-Id: I95b72b04c5788d9f29bb24c4c1f035fa5401042a
1 parent aebe131 commit 41d53a2

File tree

1 file changed

+11
-11
lines changed

1 file changed

+11
-11
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -493,8 +493,8 @@ Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B,
493493
if (!ST->isWave32()) {
494494
// Combine lane 31 into lanes 32..63.
495495
V = B.CreateBitCast(V, IntNTy);
496-
Value *const Lane31 = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
497-
{V, B.getInt32(31)});
496+
Value *const Lane31 = B.CreateIntrinsic(
497+
V->getType(), Intrinsic::amdgcn_readlane, {V, B.getInt32(31)});
498498

499499
Value *UpdateDPPCall = B.CreateCall(
500500
UpdateDPP, {Identity, Lane31, B.getInt32(DPP::QUAD_PERM_ID),
@@ -598,16 +598,16 @@ std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively(
598598

599599
// Get the value required for atomic operation
600600
V = B.CreateBitCast(V, IntNTy);
601-
Value *LaneValue =
602-
B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {}, {V, LaneIdxInt});
601+
Value *LaneValue = B.CreateIntrinsic(V->getType(), Intrinsic::amdgcn_readlane,
602+
{V, LaneIdxInt});
603603
LaneValue = B.CreateBitCast(LaneValue, Ty);
604604

605605
// Perform writelane if intermediate scan results are required later in the
606606
// kernel computations
607607
Value *OldValue = nullptr;
608608
if (NeedResult) {
609609
OldValue =
610-
B.CreateIntrinsic(Intrinsic::amdgcn_writelane, {},
610+
B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_writelane,
611611
{B.CreateBitCast(Accumulator, IntNTy), LaneIdxInt,
612612
B.CreateBitCast(OldValuePhi, IntNTy)});
613613
OldValue = B.CreateBitCast(OldValue, Ty);
@@ -789,7 +789,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
789789
Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1);
790790
assert(TyBitWidth == 32);
791791
NewV = B.CreateBitCast(NewV, IntNTy);
792-
NewV = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
792+
NewV = B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_readlane,
793793
{NewV, LastLaneIdx});
794794
NewV = B.CreateBitCast(NewV, Ty);
795795
}
@@ -936,10 +936,10 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
936936
Value *const ExtractLo = B.CreateTrunc(CastedPhi, Int32Ty);
937937
Value *const ExtractHi =
938938
B.CreateTrunc(B.CreateLShr(CastedPhi, 32), Int32Ty);
939-
CallInst *const ReadFirstLaneLo =
940-
B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo);
941-
CallInst *const ReadFirstLaneHi =
942-
B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
939+
CallInst *const ReadFirstLaneLo = B.CreateIntrinsic(
940+
Int32Ty, Intrinsic::amdgcn_readfirstlane, ExtractLo);
941+
CallInst *const ReadFirstLaneHi = B.CreateIntrinsic(
942+
Int32Ty, Intrinsic::amdgcn_readfirstlane, ExtractHi);
943943
Value *const PartialInsert = B.CreateInsertElement(
944944
PoisonValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
945945
Value *const Insert =
@@ -948,7 +948,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
948948
} else if (TyBitWidth == 32) {
949949
Value *CastedPhi = B.CreateBitCast(PHI, IntNTy);
950950
BroadcastI =
951-
B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, CastedPhi);
951+
B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_readfirstlane, CastedPhi);
952952
BroadcastI = B.CreateBitCast(BroadcastI, Ty);
953953

954954
} else {

0 commit comments

Comments
 (0)