@@ -492,6 +492,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
492
492
return false ;
493
493
494
494
const unsigned Size = Ty.getSizeInBits ();
495
+ if (Ty.isPointerVector ())
496
+ return true ;
495
497
if (Size <= 64 )
496
498
return false ;
497
499
// Address space 8 pointers get their own workaround.
@@ -500,9 +502,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
500
502
if (!Ty.isVector ())
501
503
return true ;
502
504
503
- if (Ty.isPointerVector ())
504
- return true ;
505
-
506
505
unsigned EltSize = Ty.getScalarSizeInBits ();
507
506
return EltSize != 32 && EltSize != 64 ;
508
507
}
@@ -5809,8 +5808,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
5809
5808
return Reg;
5810
5809
}
5811
5810
5812
- Register AMDGPULegalizerInfo::fixStoreSourceType (
5813
- MachineIRBuilder &B, Register VData, bool IsFormat) const {
5811
+ Register AMDGPULegalizerInfo::fixStoreSourceType (MachineIRBuilder &B,
5812
+ Register VData, LLT MemTy,
5813
+ bool IsFormat) const {
5814
5814
MachineRegisterInfo *MRI = B.getMRI ();
5815
5815
LLT Ty = MRI->getType (VData);
5816
5816
@@ -5820,6 +5820,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
5820
5820
if (hasBufferRsrcWorkaround (Ty))
5821
5821
return castBufferRsrcToV4I32 (VData, B);
5822
5822
5823
+ if (shouldBitcastLoadStoreType (ST, Ty, MemTy)) {
5824
+ Ty = getBitcastRegisterType (Ty);
5825
+ VData = B.buildBitcast (Ty, VData).getReg (0 );
5826
+ }
5823
5827
// Fixup illegal register types for i8 stores.
5824
5828
if (Ty == LLT::scalar (8 ) || Ty == S16) {
5825
5829
Register AnyExt = B.buildAnyExt (LLT::scalar (32 ), VData).getReg (0 );
@@ -5837,22 +5841,26 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
5837
5841
}
5838
5842
5839
5843
bool AMDGPULegalizerInfo::legalizeBufferStore (MachineInstr &MI,
5840
- MachineRegisterInfo &MRI,
5841
- MachineIRBuilder &B,
5844
+ LegalizerHelper &Helper,
5842
5845
bool IsTyped,
5843
5846
bool IsFormat) const {
5847
+ MachineIRBuilder &B = Helper.MIRBuilder ;
5848
+ MachineRegisterInfo &MRI = *B.getMRI ();
5849
+
5844
5850
Register VData = MI.getOperand (1 ).getReg ();
5845
5851
LLT Ty = MRI.getType (VData);
5846
5852
LLT EltTy = Ty.getScalarType ();
5847
5853
const bool IsD16 = IsFormat && (EltTy.getSizeInBits () == 16 );
5848
5854
const LLT S32 = LLT::scalar (32 );
5849
5855
5850
- VData = fixStoreSourceType (B, VData, IsFormat);
5851
- castBufferRsrcArgToV4I32 (MI, B, 2 );
5852
- Register RSrc = MI.getOperand (2 ).getReg ();
5853
-
5854
5856
MachineMemOperand *MMO = *MI.memoperands_begin ();
5855
5857
const int MemSize = MMO->getSize ().getValue ();
5858
+ LLT MemTy = MMO->getMemoryType ();
5859
+
5860
+ VData = fixStoreSourceType (B, VData, MemTy, IsFormat);
5861
+
5862
+ castBufferRsrcArgToV4I32 (MI, B, 2 );
5863
+ Register RSrc = MI.getOperand (2 ).getReg ();
5856
5864
5857
5865
unsigned ImmOffset;
5858
5866
@@ -5945,10 +5953,13 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
5945
5953
}
5946
5954
5947
5955
bool AMDGPULegalizerInfo::legalizeBufferLoad (MachineInstr &MI,
5948
- MachineRegisterInfo &MRI,
5949
- MachineIRBuilder &B,
5956
+ LegalizerHelper &Helper,
5950
5957
bool IsFormat,
5951
5958
bool IsTyped) const {
5959
+ MachineIRBuilder &B = Helper.MIRBuilder ;
5960
+ MachineRegisterInfo &MRI = *B.getMRI ();
5961
+ GISelChangeObserver &Observer = Helper.Observer ;
5962
+
5952
5963
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
5953
5964
MachineMemOperand *MMO = *MI.memoperands_begin ();
5954
5965
const LLT MemTy = MMO->getMemoryType ();
@@ -5997,9 +6008,21 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
5997
6008
// Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
5998
6009
// logic doesn't have to handle that case.
5999
6010
if (hasBufferRsrcWorkaround (Ty)) {
6011
+ Observer.changingInstr (MI);
6000
6012
Ty = castBufferRsrcFromV4I32 (MI, B, MRI, 0 );
6013
+ Observer.changedInstr (MI);
6001
6014
Dst = MI.getOperand (0 ).getReg ();
6015
+ B.setInsertPt (B.getMBB (), MI);
6002
6016
}
6017
+ if (shouldBitcastLoadStoreType (ST, Ty, MemTy)) {
6018
+ Ty = getBitcastRegisterType (Ty);
6019
+ Observer.changingInstr (MI);
6020
+ Helper.bitcastDst (MI, Ty, 0 );
6021
+ Observer.changedInstr (MI);
6022
+ Dst = MI.getOperand (0 ).getReg ();
6023
+ B.setInsertPt (B.getMBB (), MI);
6024
+ }
6025
+
6003
6026
LLT EltTy = Ty.getScalarType ();
6004
6027
const bool IsD16 = IsFormat && (EltTy.getSizeInBits () == 16 );
6005
6028
const bool Unpacked = ST.hasUnpackedD16VMem ();
@@ -7367,34 +7390,36 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
7367
7390
case Intrinsic::amdgcn_raw_ptr_buffer_store:
7368
7391
case Intrinsic::amdgcn_struct_buffer_store:
7369
7392
case Intrinsic::amdgcn_struct_ptr_buffer_store:
7370
- return legalizeBufferStore (MI, MRI, B , false , false );
7393
+ return legalizeBufferStore (MI, Helper , false , false );
7371
7394
case Intrinsic::amdgcn_raw_buffer_store_format:
7372
7395
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
7373
7396
case Intrinsic::amdgcn_struct_buffer_store_format:
7374
7397
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
7375
- return legalizeBufferStore (MI, MRI, B , false , true );
7398
+ return legalizeBufferStore (MI, Helper , false , true );
7376
7399
case Intrinsic::amdgcn_raw_tbuffer_store:
7377
7400
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
7378
7401
case Intrinsic::amdgcn_struct_tbuffer_store:
7379
7402
case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
7380
- return legalizeBufferStore (MI, MRI, B , true , true );
7403
+ return legalizeBufferStore (MI, Helper , true , true );
7381
7404
case Intrinsic::amdgcn_raw_buffer_load:
7382
7405
case Intrinsic::amdgcn_raw_ptr_buffer_load:
7383
7406
case Intrinsic::amdgcn_raw_atomic_buffer_load:
7384
7407
case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load:
7385
7408
case Intrinsic::amdgcn_struct_buffer_load:
7386
7409
case Intrinsic::amdgcn_struct_ptr_buffer_load:
7387
- return legalizeBufferLoad (MI, MRI, B, false , false );
7410
+ case Intrinsic::amdgcn_struct_atomic_buffer_load:
7411
+ case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
7412
+ return legalizeBufferLoad (MI, Helper, false , false );
7388
7413
case Intrinsic::amdgcn_raw_buffer_load_format:
7389
7414
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
7390
7415
case Intrinsic::amdgcn_struct_buffer_load_format:
7391
7416
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
7392
- return legalizeBufferLoad (MI, MRI, B , true , false );
7417
+ return legalizeBufferLoad (MI, Helper , true , false );
7393
7418
case Intrinsic::amdgcn_raw_tbuffer_load:
7394
7419
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
7395
7420
case Intrinsic::amdgcn_struct_tbuffer_load:
7396
7421
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
7397
- return legalizeBufferLoad (MI, MRI, B , true , true );
7422
+ return legalizeBufferLoad (MI, Helper , true , true );
7398
7423
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
7399
7424
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
7400
7425
case Intrinsic::amdgcn_struct_buffer_atomic_swap:
0 commit comments