Skip to content

Commit cf1165c

Browse files
authored
Revert "[AMDGPU][True16][CodeGen] true16 codegen pattern for fma (#12… (#127175)
Reverting this patch since it raise buildbot failure This reverts commit 2a7487c.
1 parent c08b80e commit cf1165c

10 files changed

+244
-871
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

-2
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,6 @@ static unsigned macToMad(unsigned Opc) {
198198
return AMDGPU::V_FMA_F32_e64;
199199
case AMDGPU::V_FMAC_F16_e64:
200200
return AMDGPU::V_FMA_F16_gfx9_e64;
201-
case AMDGPU::V_FMAC_F16_t16_e64:
202-
return AMDGPU::V_FMA_F16_gfx9_t16_e64;
203201
case AMDGPU::V_FMAC_F16_fake16_e64:
204202
return AMDGPU::V_FMA_F16_gfx9_fake16_e64;
205203
case AMDGPU::V_FMAC_LEGACY_F32_e64:

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

+25-49
Original file line numberDiff line numberDiff line change
@@ -3544,7 +3544,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35443544
Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
35453545
Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
35463546
Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3547-
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
35483547
Opc == AMDGPU::V_FMAC_F16_fake16_e64) {
35493548
// Don't fold if we are using source or output modifiers. The new VOP2
35503549
// instructions don't have them.
@@ -3565,7 +3564,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35653564
bool IsFMA =
35663565
Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
35673566
Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3568-
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
35693567
Opc == AMDGPU::V_FMAC_F16_fake16_e64;
35703568
MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
35713569
MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
@@ -3599,19 +3597,16 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35993597

36003598
unsigned NewOpc =
36013599
IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3602-
: ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3603-
? AMDGPU::V_FMAMK_F16_t16
3604-
: AMDGPU::V_FMAMK_F16_fake16
3600+
: ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
36053601
: AMDGPU::V_FMAMK_F16)
36063602
: (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
36073603
if (pseudoToMCOpcode(NewOpc) == -1)
36083604
return false;
36093605

3610-
// V_FMAMK_F16_t16 takes VGPR_16_Lo128 operands while V_FMAMK_F16_fake16
3611-
// takes VGPR_32_Lo128 operands, so the rewrite would also require
3612-
// restricting their register classes. For now just bail out.
3613-
if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3614-
NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3606+
// V_FMAMK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
3607+
// would also require restricting their register classes. For now
3608+
// just bail out.
3609+
if (NewOpc == AMDGPU::V_FMAMK_F16_fake16)
36153610
return false;
36163611

36173612
const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
@@ -3626,7 +3621,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
36263621
Src0->setIsKill(RegSrc->isKill());
36273622

36283623
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3629-
Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3624+
Opc == AMDGPU::V_FMAC_F32_e64 ||
36303625
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
36313626
UseMI.untieRegOperand(
36323627
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
@@ -3681,26 +3676,23 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
36813676

36823677
unsigned NewOpc =
36833678
IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3684-
: ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3685-
? AMDGPU::V_FMAAK_F16_t16
3686-
: AMDGPU::V_FMAAK_F16_fake16
3679+
: ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
36873680
: AMDGPU::V_FMAAK_F16)
36883681
: (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
36893682
if (pseudoToMCOpcode(NewOpc) == -1)
36903683
return false;
36913684

3692-
// V_FMAAK_F16_t16 takes VGPR_16_Lo128 operands while V_FMAAK_F16_fake16
3693-
// takes VGPR_32_Lo128 operands, so the rewrite would also require
3694-
// restricting their register classes. For now just bail out.
3695-
if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3696-
NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3685+
// V_FMAAK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
3686+
// would also require restricting their register classes. For now
3687+
// just bail out.
3688+
if (NewOpc == AMDGPU::V_FMAAK_F16_fake16)
36973689
return false;
36983690

36993691
// FIXME: This would be a lot easier if we could return a new instruction
37003692
// instead of having to modify in place.
37013693

37023694
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3703-
Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3695+
Opc == AMDGPU::V_FMAC_F32_e64 ||
37043696
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
37053697
UseMI.untieRegOperand(
37063698
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
@@ -3887,11 +3879,8 @@ static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc) {
38873879
return AMDGPU::V_FMA_LEGACY_F32_e64;
38883880
case AMDGPU::V_FMAC_F16_e32:
38893881
case AMDGPU::V_FMAC_F16_e64:
3890-
case AMDGPU::V_FMAC_F16_t16_e64:
38913882
case AMDGPU::V_FMAC_F16_fake16_e64:
3892-
return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3893-
? AMDGPU::V_FMA_F16_gfx9_t16_e64
3894-
: AMDGPU::V_FMA_F16_gfx9_fake16_e64
3883+
return ST.hasTrue16BitInsts() ? AMDGPU::V_FMA_F16_gfx9_fake16_e64
38953884
: AMDGPU::V_FMA_F16_gfx9_e64;
38963885
case AMDGPU::V_FMAC_F32_e32:
38973886
case AMDGPU::V_FMAC_F32_e64:
@@ -3957,22 +3946,19 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
39573946
return MIB;
39583947
}
39593948

3960-
assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
3961-
Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
3962-
"V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
3963-
"present "
3964-
"pre-RA");
3949+
assert(
3950+
Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
3951+
"V_FMAC_F16_fake16_e32 is not supported and not expected to be present "
3952+
"pre-RA");
39653953

39663954
// Handle MAC/FMAC.
39673955
bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
39683956
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3969-
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
39703957
Opc == AMDGPU::V_FMAC_F16_fake16_e64;
39713958
bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
39723959
Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
39733960
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
39743961
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3975-
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
39763962
Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
39773963
Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
39783964
bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
@@ -3987,7 +3973,6 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
39873973
return nullptr;
39883974
case AMDGPU::V_MAC_F16_e64:
39893975
case AMDGPU::V_FMAC_F16_e64:
3990-
case AMDGPU::V_FMAC_F16_t16_e64:
39913976
case AMDGPU::V_FMAC_F16_fake16_e64:
39923977
case AMDGPU::V_MAC_F32_e64:
39933978
case AMDGPU::V_MAC_LEGACY_F32_e64:
@@ -4073,11 +4058,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40734058
int64_t Imm;
40744059
if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) {
40754060
unsigned NewOpc =
4076-
IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts()
4077-
? ST.useRealTrue16Insts()
4078-
? AMDGPU::V_FMAAK_F16_t16
4079-
: AMDGPU::V_FMAAK_F16_fake16
4080-
: AMDGPU::V_FMAAK_F16)
4061+
IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
4062+
: AMDGPU::V_FMAAK_F16)
40814063
: AMDGPU::V_FMAAK_F32)
40824064
: (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
40834065
if (pseudoToMCOpcode(NewOpc) != -1) {
@@ -4094,14 +4076,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40944076
return MIB;
40954077
}
40964078
}
4097-
unsigned NewOpc = IsFMA
4098-
? (IsF16 ? (ST.hasTrue16BitInsts()
4099-
? ST.useRealTrue16Insts()
4100-
? AMDGPU::V_FMAMK_F16_t16
4101-
: AMDGPU::V_FMAMK_F16_fake16
4102-
: AMDGPU::V_FMAMK_F16)
4103-
: AMDGPU::V_FMAMK_F32)
4104-
: (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
4079+
unsigned NewOpc =
4080+
IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
4081+
: AMDGPU::V_FMAMK_F16)
4082+
: AMDGPU::V_FMAMK_F32)
4083+
: (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
41054084
if (!Src0Literal && getFoldableImm(Src1, Imm, &DefMI)) {
41064085
if (pseudoToMCOpcode(NewOpc) != -1) {
41074086
MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
@@ -4547,7 +4526,6 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
45474526
case AMDGPU::V_MAC_F32_e64:
45484527
case AMDGPU::V_MAC_LEGACY_F32_e64:
45494528
case AMDGPU::V_FMAC_F16_e64:
4550-
case AMDGPU::V_FMAC_F16_t16_e64:
45514529
case AMDGPU::V_FMAC_F16_fake16_e64:
45524530
case AMDGPU::V_FMAC_F32_e64:
45534531
case AMDGPU::V_FMAC_F64_e64:
@@ -5604,9 +5582,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
56045582
case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
56055583
case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
56065584
case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
5607-
case AMDGPU::S_FMAC_F16:
5608-
return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
5609-
: AMDGPU::V_FMAC_F16_fake16_e64;
5585+
case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_fake16_e64;
56105586
case AMDGPU::S_FMAMK_F32: return AMDGPU::V_FMAMK_F32;
56115587
case AMDGPU::S_FMAAK_F32: return AMDGPU::V_FMAAK_F32;
56125588
case AMDGPU::S_CMP_LT_F32: return AMDGPU::V_CMP_LT_F32_e64;

llvm/lib/Target/AMDGPU/SIInstructions.td

-8
Original file line numberDiff line numberDiff line change
@@ -3287,14 +3287,6 @@ def : GCNPat <
32873287
(V_FMAC_F16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
32883288
SRCMODS.NONE, $src2)
32893289
>;
3290-
let True16Predicate = UseRealTrue16Insts in
3291-
def : GCNPat <
3292-
(fma (f16 (VOP3NoMods f16:$src0)),
3293-
(f16 (VOP3NoMods f16:$src1)),
3294-
(f16 (VOP3NoMods f16:$src2))),
3295-
(V_FMAC_F16_t16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
3296-
SRCMODS.NONE, $src2)
3297-
>;
32983290
let True16Predicate = UseFakeTrue16Insts in
32993291
def : GCNPat <
33003292
(fma (f16 (VOP3NoMods f16:$src0)),

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

+4-13
Original file line numberDiff line numberDiff line change
@@ -455,13 +455,9 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
455455
break;
456456
case AMDGPU::V_FMA_F16_e64:
457457
case AMDGPU::V_FMA_F16_gfx9_e64:
458-
NewOpcode = AMDGPU::V_FMAAK_F16;
459-
break;
460-
case AMDGPU::V_FMA_F16_gfx9_t16_e64:
461-
NewOpcode = AMDGPU::V_FMAAK_F16_t16;
462-
break;
463458
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
464-
NewOpcode = AMDGPU::V_FMAAK_F16_fake16;
459+
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
460+
: AMDGPU::V_FMAAK_F16;
465461
break;
466462
}
467463
}
@@ -489,13 +485,9 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
489485
break;
490486
case AMDGPU::V_FMA_F16_e64:
491487
case AMDGPU::V_FMA_F16_gfx9_e64:
492-
NewOpcode = AMDGPU::V_FMAMK_F16;
493-
break;
494-
case AMDGPU::V_FMA_F16_gfx9_t16_e64:
495-
NewOpcode = AMDGPU::V_FMAMK_F16_t16;
496-
break;
497488
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
498-
NewOpcode = AMDGPU::V_FMAMK_F16_fake16;
489+
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
490+
: AMDGPU::V_FMAMK_F16;
499491
break;
500492
}
501493
}
@@ -967,7 +959,6 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
967959
MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
968960
MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
969961
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||
970-
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_t16_e64 ||
971962
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) {
972963
shrinkMadFma(MI);
973964
continue;

llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll

+21-47
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
44
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
55
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6-
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
7-
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
6+
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
87

98
define float @v_fma_f32(float %x, float %y, float %z) {
109
; GFX6-LABEL: v_fma_f32:
@@ -108,18 +107,11 @@ define half @v_fma_f16(half %x, half %y, half %z) {
108107
; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2
109108
; GFX10-NEXT: s_setpc_b64 s[30:31]
110109
;
111-
; GFX11-TRUE16-LABEL: v_fma_f16:
112-
; GFX11-TRUE16: ; %bb.0:
113-
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114-
; GFX11-TRUE16-NEXT: v_fmac_f16_e32 v2.l, v0.l, v1.l
115-
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v2
116-
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
117-
;
118-
; GFX11-FAKE16-LABEL: v_fma_f16:
119-
; GFX11-FAKE16: ; %bb.0:
120-
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121-
; GFX11-FAKE16-NEXT: v_fma_f16 v0, v0, v1, v2
122-
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
110+
; GFX11-LABEL: v_fma_f16:
111+
; GFX11: ; %bb.0:
112+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113+
; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2
114+
; GFX11-NEXT: s_setpc_b64 s[30:31]
123115
%fma = call half @llvm.fma.f16(half %x, half %y, half %z)
124116
ret half %fma
125117
}
@@ -153,17 +145,11 @@ define half @v_fma_f16_fneg_lhs(half %x, half %y, half %z) {
153145
; GFX10-NEXT: v_fma_f16 v0, -v0, v1, v2
154146
; GFX10-NEXT: s_setpc_b64 s[30:31]
155147
;
156-
; GFX11-TRUE16-LABEL: v_fma_f16_fneg_lhs:
157-
; GFX11-TRUE16: ; %bb.0:
158-
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159-
; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, -v0.l, v1.l, v2.l
160-
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
161-
;
162-
; GFX11-FAKE16-LABEL: v_fma_f16_fneg_lhs:
163-
; GFX11-FAKE16: ; %bb.0:
164-
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165-
; GFX11-FAKE16-NEXT: v_fma_f16 v0, -v0, v1, v2
166-
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
148+
; GFX11-LABEL: v_fma_f16_fneg_lhs:
149+
; GFX11: ; %bb.0:
150+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151+
; GFX11-NEXT: v_fma_f16 v0, -v0, v1, v2
152+
; GFX11-NEXT: s_setpc_b64 s[30:31]
167153
%neg.x = fneg half %x
168154
%fma = call half @llvm.fma.f16(half %neg.x, half %y, half %z)
169155
ret half %fma
@@ -198,17 +184,11 @@ define half @v_fma_f16_fneg_rhs(half %x, half %y, half %z) {
198184
; GFX10-NEXT: v_fma_f16 v0, v0, -v1, v2
199185
; GFX10-NEXT: s_setpc_b64 s[30:31]
200186
;
201-
; GFX11-TRUE16-LABEL: v_fma_f16_fneg_rhs:
202-
; GFX11-TRUE16: ; %bb.0:
203-
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
204-
; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, -v1.l, v2.l
205-
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
206-
;
207-
; GFX11-FAKE16-LABEL: v_fma_f16_fneg_rhs:
208-
; GFX11-FAKE16: ; %bb.0:
209-
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210-
; GFX11-FAKE16-NEXT: v_fma_f16 v0, v0, -v1, v2
211-
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
187+
; GFX11-LABEL: v_fma_f16_fneg_rhs:
188+
; GFX11: ; %bb.0:
189+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190+
; GFX11-NEXT: v_fma_f16 v0, v0, -v1, v2
191+
; GFX11-NEXT: s_setpc_b64 s[30:31]
212192
%neg.y = fneg half %y
213193
%fma = call half @llvm.fma.f16(half %x, half %neg.y, half %z)
214194
ret half %fma
@@ -243,17 +223,11 @@ define half @v_fma_f16_fneg_add(half %x, half %y, half %z) {
243223
; GFX10-NEXT: v_fma_f16 v0, v0, v1, -v2
244224
; GFX10-NEXT: s_setpc_b64 s[30:31]
245225
;
246-
; GFX11-TRUE16-LABEL: v_fma_f16_fneg_add:
247-
; GFX11-TRUE16: ; %bb.0:
248-
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
249-
; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v1.l, -v2.l
250-
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
251-
;
252-
; GFX11-FAKE16-LABEL: v_fma_f16_fneg_add:
253-
; GFX11-FAKE16: ; %bb.0:
254-
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
255-
; GFX11-FAKE16-NEXT: v_fma_f16 v0, v0, v1, -v2
256-
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
226+
; GFX11-LABEL: v_fma_f16_fneg_add:
227+
; GFX11: ; %bb.0:
228+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229+
; GFX11-NEXT: v_fma_f16 v0, v0, v1, -v2
230+
; GFX11-NEXT: s_setpc_b64 s[30:31]
257231
%neg.z = fneg half %z
258232
%fma = call half @llvm.fma.f16(half %x, half %y, half %neg.z)
259233
ret half %fma

llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2-
# FIXME-TRUE16. reenable after fix-sgpr-copies is fixed for true16 flow
3-
# XUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,REAL16 %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,REAL16 %s
43
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,FAKE16 %s
54

65
---

0 commit comments

Comments
 (0)