[AMDGPU] Build lane intrinsics in a mangling-agnostic way. NFC. (llvm#91583)

jayfoad · vikramRH · commit 41d53a215dd3 · 2024-08-12T23:52:35.000+05:30
Use the form of CreateIntrinsic that takes an explicit return type and
works out the mangling based on that and the types of the arguments. The
advantage is that this still works if intrinsics are changed to have
type mangling, e.g. if readlane/readfirstlane/writelane are changed to
work on any type.

Change-Id: I95b72b04c5788d9f29bb24c4c1f035fa5401042a
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -493,8 +493,8 @@ Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B,
     if (!ST->isWave32()) {
       // Combine lane 31 into lanes 32..63.
       V = B.CreateBitCast(V, IntNTy);
-      Value *const Lane31 = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
-                                              {V, B.getInt32(31)});
+      Value *const Lane31 = B.CreateIntrinsic(
+          V->getType(), Intrinsic::amdgcn_readlane, {V, B.getInt32(31)});
 
       Value *UpdateDPPCall = B.CreateCall(
           UpdateDPP, {Identity, Lane31, B.getInt32(DPP::QUAD_PERM_ID),
@@ -598,16 +598,16 @@ std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively(
 
   // Get the value required for atomic operation
   V = B.CreateBitCast(V, IntNTy);
-  Value *LaneValue =
-      B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {}, {V, LaneIdxInt});
+  Value *LaneValue = B.CreateIntrinsic(V->getType(), Intrinsic::amdgcn_readlane,
+                                       {V, LaneIdxInt});
   LaneValue = B.CreateBitCast(LaneValue, Ty);
 
   // Perform writelane if intermediate scan results are required later in the
   // kernel computations
   Value *OldValue = nullptr;
   if (NeedResult) {
     OldValue =
-        B.CreateIntrinsic(Intrinsic::amdgcn_writelane, {},
+        B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_writelane,
                           {B.CreateBitCast(Accumulator, IntNTy), LaneIdxInt,
                            B.CreateBitCast(OldValuePhi, IntNTy)});
     OldValue = B.CreateBitCast(OldValue, Ty);
@@ -789,7 +789,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
         Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1);
         assert(TyBitWidth == 32);
         NewV = B.CreateBitCast(NewV, IntNTy);
-        NewV = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
+        NewV = B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_readlane,
                                  {NewV, LastLaneIdx});
         NewV = B.CreateBitCast(NewV, Ty);
       }
@@ -936,10 +936,10 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
       Value *const ExtractLo = B.CreateTrunc(CastedPhi, Int32Ty);
       Value *const ExtractHi =
           B.CreateTrunc(B.CreateLShr(CastedPhi, 32), Int32Ty);
-      CallInst *const ReadFirstLaneLo =
-          B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo);
-      CallInst *const ReadFirstLaneHi =
-          B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
+      CallInst *const ReadFirstLaneLo = B.CreateIntrinsic(
+          Int32Ty, Intrinsic::amdgcn_readfirstlane, ExtractLo);
+      CallInst *const ReadFirstLaneHi = B.CreateIntrinsic(
+          Int32Ty, Intrinsic::amdgcn_readfirstlane, ExtractHi);
       Value *const PartialInsert = B.CreateInsertElement(
           PoisonValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
       Value *const Insert =
@@ -948,7 +948,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
     } else if (TyBitWidth == 32) {
       Value *CastedPhi = B.CreateBitCast(PHI, IntNTy);
       BroadcastI =
-          B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, CastedPhi);
+          B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_readfirstlane, CastedPhi);
       BroadcastI = B.CreateBitCast(BroadcastI, Ty);
 
     } else {