@@ -3544,7 +3544,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
3544
3544
Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3545
3545
Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3546
3546
Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3547
- Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3548
3547
Opc == AMDGPU::V_FMAC_F16_fake16_e64) {
3549
3548
// Don't fold if we are using source or output modifiers. The new VOP2
3550
3549
// instructions don't have them.
@@ -3565,7 +3564,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
3565
3564
bool IsFMA =
3566
3565
Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3567
3566
Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3568
- Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3569
3567
Opc == AMDGPU::V_FMAC_F16_fake16_e64;
3570
3568
MachineOperand *Src1 = getNamedOperand (UseMI, AMDGPU::OpName::src1);
3571
3569
MachineOperand *Src2 = getNamedOperand (UseMI, AMDGPU::OpName::src2);
@@ -3599,19 +3597,16 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
3599
3597
3600
3598
unsigned NewOpc =
3601
3599
IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3602
- : ST.hasTrue16BitInsts () ? ST.useRealTrue16Insts ()
3603
- ? AMDGPU::V_FMAMK_F16_t16
3604
- : AMDGPU::V_FMAMK_F16_fake16
3600
+ : ST.hasTrue16BitInsts () ? AMDGPU::V_FMAMK_F16_fake16
3605
3601
: AMDGPU::V_FMAMK_F16)
3606
3602
: (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
3607
3603
if (pseudoToMCOpcode (NewOpc) == -1 )
3608
3604
return false ;
3609
3605
3610
- // V_FMAMK_F16_t16 takes VGPR_16_Lo128 operands while V_FMAMK_F16_fake16
3611
- // takes VGPR_32_Lo128 operands, so the rewrite would also require
3612
- // restricting their register classes. For now just bail out.
3613
- if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3614
- NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3606
+ // V_FMAMK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
3607
+ // would also require restricting their register classes. For now
3608
+ // just bail out.
3609
+ if (NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3615
3610
return false ;
3616
3611
3617
3612
const int64_t Imm = getImmFor (RegSrc == Src1 ? *Src0 : *Src1);
@@ -3626,7 +3621,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
3626
3621
Src0->setIsKill (RegSrc->isKill ());
3627
3622
3628
3623
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3629
- Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3624
+ Opc == AMDGPU::V_FMAC_F32_e64 ||
3630
3625
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
3631
3626
UseMI.untieRegOperand (
3632
3627
AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src2));
@@ -3681,26 +3676,23 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
3681
3676
3682
3677
unsigned NewOpc =
3683
3678
IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3684
- : ST.hasTrue16BitInsts () ? ST.useRealTrue16Insts ()
3685
- ? AMDGPU::V_FMAAK_F16_t16
3686
- : AMDGPU::V_FMAAK_F16_fake16
3679
+ : ST.hasTrue16BitInsts () ? AMDGPU::V_FMAAK_F16_fake16
3687
3680
: AMDGPU::V_FMAAK_F16)
3688
3681
: (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
3689
3682
if (pseudoToMCOpcode (NewOpc) == -1 )
3690
3683
return false ;
3691
3684
3692
- // V_FMAAK_F16_t16 takes VGPR_16_Lo128 operands while V_FMAAK_F16_fake16
3693
- // takes VGPR_32_Lo128 operands, so the rewrite would also require
3694
- // restricting their register classes. For now just bail out.
3695
- if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3696
- NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3685
+ // V_FMAAK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
3686
+ // would also require restricting their register classes. For now
3687
+ // just bail out.
3688
+ if (NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3697
3689
return false ;
3698
3690
3699
3691
// FIXME: This would be a lot easier if we could return a new instruction
3700
3692
// instead of having to modify in place.
3701
3693
3702
3694
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3703
- Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3695
+ Opc == AMDGPU::V_FMAC_F32_e64 ||
3704
3696
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
3705
3697
UseMI.untieRegOperand (
3706
3698
AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src2));
@@ -3887,11 +3879,8 @@ static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc) {
3887
3879
return AMDGPU::V_FMA_LEGACY_F32_e64;
3888
3880
case AMDGPU::V_FMAC_F16_e32:
3889
3881
case AMDGPU::V_FMAC_F16_e64:
3890
- case AMDGPU::V_FMAC_F16_t16_e64:
3891
3882
case AMDGPU::V_FMAC_F16_fake16_e64:
3892
- return ST.hasTrue16BitInsts () ? ST.useRealTrue16Insts ()
3893
- ? AMDGPU::V_FMA_F16_gfx9_t16_e64
3894
- : AMDGPU::V_FMA_F16_gfx9_fake16_e64
3883
+ return ST.hasTrue16BitInsts () ? AMDGPU::V_FMA_F16_gfx9_fake16_e64
3895
3884
: AMDGPU::V_FMA_F16_gfx9_e64;
3896
3885
case AMDGPU::V_FMAC_F32_e32:
3897
3886
case AMDGPU::V_FMAC_F32_e64:
@@ -3957,22 +3946,19 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
3957
3946
return MIB;
3958
3947
}
3959
3948
3960
- assert (Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
3961
- Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
3962
- " V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
3963
- " present "
3964
- " pre-RA" );
3949
+ assert (
3950
+ Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
3951
+ " V_FMAC_F16_fake16_e32 is not supported and not expected to be present "
3952
+ " pre-RA" );
3965
3953
3966
3954
// Handle MAC/FMAC.
3967
3955
bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
3968
3956
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3969
- Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3970
3957
Opc == AMDGPU::V_FMAC_F16_fake16_e64;
3971
3958
bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3972
3959
Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3973
3960
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
3974
3961
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3975
- Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3976
3962
Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3977
3963
Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3978
3964
bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
@@ -3987,7 +3973,6 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
3987
3973
return nullptr ;
3988
3974
case AMDGPU::V_MAC_F16_e64:
3989
3975
case AMDGPU::V_FMAC_F16_e64:
3990
- case AMDGPU::V_FMAC_F16_t16_e64:
3991
3976
case AMDGPU::V_FMAC_F16_fake16_e64:
3992
3977
case AMDGPU::V_MAC_F32_e64:
3993
3978
case AMDGPU::V_MAC_LEGACY_F32_e64:
@@ -4073,11 +4058,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
4073
4058
int64_t Imm;
4074
4059
if (!Src0Literal && getFoldableImm (Src2, Imm, &DefMI)) {
4075
4060
unsigned NewOpc =
4076
- IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts ()
4077
- ? ST.useRealTrue16Insts ()
4078
- ? AMDGPU::V_FMAAK_F16_t16
4079
- : AMDGPU::V_FMAAK_F16_fake16
4080
- : AMDGPU::V_FMAAK_F16)
4061
+ IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts () ? AMDGPU::V_FMAAK_F16_fake16
4062
+ : AMDGPU::V_FMAAK_F16)
4081
4063
: AMDGPU::V_FMAAK_F32)
4082
4064
: (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
4083
4065
if (pseudoToMCOpcode (NewOpc) != -1 ) {
@@ -4094,14 +4076,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
4094
4076
return MIB;
4095
4077
}
4096
4078
}
4097
- unsigned NewOpc = IsFMA
4098
- ? (IsF16 ? (ST.hasTrue16BitInsts ()
4099
- ? ST.useRealTrue16Insts ()
4100
- ? AMDGPU::V_FMAMK_F16_t16
4101
- : AMDGPU::V_FMAMK_F16_fake16
4102
- : AMDGPU::V_FMAMK_F16)
4103
- : AMDGPU::V_FMAMK_F32)
4104
- : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
4079
+ unsigned NewOpc =
4080
+ IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts () ? AMDGPU::V_FMAMK_F16_fake16
4081
+ : AMDGPU::V_FMAMK_F16)
4082
+ : AMDGPU::V_FMAMK_F32)
4083
+ : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
4105
4084
if (!Src0Literal && getFoldableImm (Src1, Imm, &DefMI)) {
4106
4085
if (pseudoToMCOpcode (NewOpc) != -1 ) {
4107
4086
MIB = BuildMI (MBB, MI, MI.getDebugLoc (), get (NewOpc))
@@ -4547,7 +4526,6 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
4547
4526
case AMDGPU::V_MAC_F32_e64:
4548
4527
case AMDGPU::V_MAC_LEGACY_F32_e64:
4549
4528
case AMDGPU::V_FMAC_F16_e64:
4550
- case AMDGPU::V_FMAC_F16_t16_e64:
4551
4529
case AMDGPU::V_FMAC_F16_fake16_e64:
4552
4530
case AMDGPU::V_FMAC_F32_e64:
4553
4531
case AMDGPU::V_FMAC_F64_e64:
@@ -5604,9 +5582,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
5604
5582
case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
5605
5583
case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5606
5584
case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
5607
- case AMDGPU::S_FMAC_F16:
5608
- return ST.useRealTrue16Insts () ? AMDGPU::V_FMAC_F16_t16_e64
5609
- : AMDGPU::V_FMAC_F16_fake16_e64;
5585
+ case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_fake16_e64;
5610
5586
case AMDGPU::S_FMAMK_F32: return AMDGPU::V_FMAMK_F32;
5611
5587
case AMDGPU::S_FMAAK_F32: return AMDGPU::V_FMAAK_F32;
5612
5588
case AMDGPU::S_CMP_LT_F32: return AMDGPU::V_CMP_LT_F32_e64;
0 commit comments