Skip to content

Commit 530f0e6

Browse files
authored
[AMDGPU] Replace isInlinableLiteral16 with specific version (#81345)
1 parent 6e36ceb commit 530f0e6

11 files changed

+230
-119
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2006,8 +2006,12 @@ static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
20062006
return isInlinableIntLiteral(Val);
20072007
}
20082008

2009-
// f16/v2f16 operands work correctly for all values.
2010-
return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
2009+
if (VT.getScalarType() == MVT::f16)
2010+
return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2011+
2012+
assert(VT.getScalarType() == MVT::bf16);
2013+
2014+
return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
20112015
}
20122016

20132017
bool AMDGPUOperand::isInlinableImm(MVT type) const {
@@ -2375,15 +2379,26 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
23752379
return;
23762380

23772381
case AMDGPU::OPERAND_REG_IMM_INT16:
2378-
case AMDGPU::OPERAND_REG_IMM_FP16:
2379-
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
23802382
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2381-
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
23822383
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2384+
if (isSafeTruncation(Val, 16) &&
2385+
AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2386+
Inst.addOperand(MCOperand::createImm(Val));
2387+
setImmKindConst();
2388+
return;
2389+
}
2390+
2391+
Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2392+
setImmKindLiteral();
2393+
return;
2394+
2395+
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2396+
case AMDGPU::OPERAND_REG_IMM_FP16:
2397+
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
23832398
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
23842399
if (isSafeTruncation(Val, 16) &&
2385-
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2386-
AsmParser->hasInv2PiInlineImm())) {
2400+
AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2401+
AsmParser->hasInv2PiInlineImm())) {
23872402
Inst.addOperand(MCOperand::createImm(Val));
23882403
setImmKindConst();
23892404
return;
@@ -2410,12 +2425,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
24102425
return;
24112426

24122427
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2428+
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: {
2429+
assert(isSafeTruncation(Val, 16));
2430+
assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2431+
Inst.addOperand(MCOperand::createImm(Val));
2432+
return;
2433+
}
24132434
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2414-
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
24152435
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
24162436
assert(isSafeTruncation(Val, 16));
2417-
assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2418-
AsmParser->hasInv2PiInlineImm()));
2437+
assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2438+
AsmParser->hasInv2PiInlineImm()));
24192439

24202440
Inst.addOperand(MCOperand::createImm(Val));
24212441
return;
@@ -3559,7 +3579,19 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
35593579
OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
35603580
return AMDGPU::isInlinableLiteralV2BF16(Val);
35613581

3562-
return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3582+
if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
3583+
OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 ||
3584+
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16 ||
3585+
OperandType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED)
3586+
return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3587+
3588+
if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 ||
3589+
OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16 ||
3590+
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_BF16 ||
3591+
OperandType == AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED)
3592+
return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3593+
3594+
llvm_unreachable("invalid operand type");
35633595
}
35643596
default:
35653597
llvm_unreachable("invalid operand size");

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -460,10 +460,8 @@ void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
460460
}
461461
}
462462

463-
// This must accept a 32-bit immediate value to correctly handle packed 16-bit
464-
// operations.
465-
static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
466-
raw_ostream &O) {
463+
static bool printImmediateFP16(uint32_t Imm, const MCSubtargetInfo &STI,
464+
raw_ostream &O) {
467465
if (Imm == 0x3C00)
468466
O << "1.0";
469467
else if (Imm == 0xBC00)
@@ -529,17 +527,17 @@ void AMDGPUInstPrinter::printImmediateBF16(uint32_t Imm,
529527
O << formatHex(static_cast<uint64_t>(Imm));
530528
}
531529

532-
void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
533-
const MCSubtargetInfo &STI,
534-
raw_ostream &O) {
530+
void AMDGPUInstPrinter::printImmediateF16(uint32_t Imm,
531+
const MCSubtargetInfo &STI,
532+
raw_ostream &O) {
535533
int16_t SImm = static_cast<int16_t>(Imm);
536534
if (isInlinableIntLiteral(SImm)) {
537535
O << SImm;
538536
return;
539537
}
540538

541539
uint16_t HImm = static_cast<uint16_t>(Imm);
542-
if (printImmediateFloat16(HImm, STI, O))
540+
if (printImmediateFP16(HImm, STI, O))
543541
return;
544542

545543
uint64_t Imm16 = static_cast<uint16_t>(Imm);
@@ -566,7 +564,7 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
566564
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
567565
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
568566
if (isUInt<16>(Imm) &&
569-
printImmediateFloat16(static_cast<uint16_t>(Imm), STI, O))
567+
printImmediateFP16(static_cast<uint16_t>(Imm), STI, O))
570568
return;
571569
break;
572570
case AMDGPU::OPERAND_REG_IMM_V2BF16:
@@ -845,7 +843,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
845843
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
846844
case AMDGPU::OPERAND_REG_IMM_FP16:
847845
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
848-
printImmediate16(Op.getImm(), STI, O);
846+
printImmediateF16(Op.getImm(), STI, O);
849847
break;
850848
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
851849
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,10 @@ class AMDGPUInstPrinter : public MCInstPrinter {
8686
raw_ostream &O);
8787
void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI,
8888
raw_ostream &O);
89-
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
90-
raw_ostream &O);
9189
void printImmediateBF16(uint32_t Imm, const MCSubtargetInfo &STI,
9290
raw_ostream &O);
91+
void printImmediateF16(uint32_t Imm, const MCSubtargetInfo &STI,
92+
raw_ostream &O);
9393
void printImmediateV216(uint32_t Imm, uint8_t OpType,
9494
const MCSubtargetInfo &STI, raw_ostream &O);
9595
bool printImmediateFloat32(uint32_t Imm, const MCSubtargetInfo &STI,

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15425,16 +15425,32 @@ bool SITargetLowering::checkAsmConstraintVal(SDValue Op, StringRef Constraint,
1542515425
llvm_unreachable("Invalid asm constraint");
1542615426
}
1542715427

15428-
bool SITargetLowering::checkAsmConstraintValA(SDValue Op,
15429-
uint64_t Val,
15428+
bool SITargetLowering::checkAsmConstraintValA(SDValue Op, uint64_t Val,
1543015429
unsigned MaxSize) const {
1543115430
unsigned Size = std::min<unsigned>(Op.getScalarValueSizeInBits(), MaxSize);
1543215431
bool HasInv2Pi = Subtarget->hasInv2PiInlineImm();
15433-
if ((Size == 16 && AMDGPU::isInlinableLiteral16(Val, HasInv2Pi)) ||
15434-
(Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
15435-
(Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi))) {
15432+
if (Size == 16) {
15433+
MVT VT = Op.getSimpleValueType();
15434+
switch (VT.SimpleTy) {
15435+
default:
15436+
return false;
15437+
case MVT::i16:
15438+
return AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
15439+
case MVT::f16:
15440+
return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
15441+
case MVT::bf16:
15442+
return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
15443+
case MVT::v2i16:
15444+
return AMDGPU::getInlineEncodingV2I16(Val).has_value();
15445+
case MVT::v2f16:
15446+
return AMDGPU::getInlineEncodingV2F16(Val).has_value();
15447+
case MVT::v2bf16:
15448+
return AMDGPU::getInlineEncodingV2BF16(Val).has_value();
15449+
}
15450+
}
15451+
if ((Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
15452+
(Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi)))
1543615453
return true;
15437-
}
1543815454
return false;
1543915455
}
1544015456

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4121,13 +4121,32 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
41214121
ST.hasInv2PiInlineImm());
41224122
case 16:
41234123
return ST.has16BitInsts() &&
4124-
AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
4125-
ST.hasInv2PiInlineImm());
4124+
AMDGPU::isInlinableLiteralI16(Imm.getSExtValue(),
4125+
ST.hasInv2PiInlineImm());
41264126
default:
41274127
llvm_unreachable("invalid bitwidth");
41284128
}
41294129
}
41304130

4131+
bool SIInstrInfo::isInlineConstant(const APFloat &Imm) const {
4132+
APInt IntImm = Imm.bitcastToAPInt();
4133+
int64_t IntImmVal = IntImm.getSExtValue();
4134+
bool HasInv2Pi = ST.hasInv2PiInlineImm();
4135+
switch (APFloat::SemanticsToEnum(Imm.getSemantics())) {
4136+
default:
4137+
llvm_unreachable("invalid fltSemantics");
4138+
case APFloatBase::S_IEEEsingle:
4139+
case APFloatBase::S_IEEEdouble:
4140+
return isInlineConstant(IntImm);
4141+
case APFloatBase::S_BFloat:
4142+
return ST.has16BitInsts() &&
4143+
AMDGPU::isInlinableLiteralBF16(IntImmVal, HasInv2Pi);
4144+
case APFloatBase::S_IEEEhalf:
4145+
return ST.has16BitInsts() &&
4146+
AMDGPU::isInlinableLiteralFP16(IntImmVal, HasInv2Pi);
4147+
}
4148+
}
4149+
41314150
bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
41324151
uint8_t OperandType) const {
41334152
assert(!MO.isReg() && "isInlineConstant called on register operand!");
@@ -4200,7 +4219,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
42004219
// constants in these cases
42014220
int16_t Trunc = static_cast<int16_t>(Imm);
42024221
return ST.has16BitInsts() &&
4203-
AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
4222+
AMDGPU::isInlinableLiteralFP16(Trunc, ST.hasInv2PiInlineImm());
42044223
}
42054224

42064225
return false;

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -984,9 +984,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
984984

985985
bool isInlineConstant(const APInt &Imm) const;
986986

987-
bool isInlineConstant(const APFloat &Imm) const {
988-
return isInlineConstant(Imm.bitcastToAPInt());
989-
}
987+
bool isInlineConstant(const APFloat &Imm) const;
990988

991989
// Returns true if this non-register operand definitely does not need to be
992990
// encoded as a 32-bit literal. Note that this function handles all kinds of

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2647,13 +2647,19 @@ bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
26472647
Val == 0x3E22; // 1.0 / (2.0 * pi)
26482648
}
26492649

2650-
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
2650+
bool isInlinableLiteralI16(int16_t Literal, bool HasInv2Pi) {
26512651
if (!HasInv2Pi)
26522652
return false;
2653-
26542653
if (isInlinableIntLiteral(Literal))
26552654
return true;
2655+
return Literal == static_cast<int16_t>(0x3e22f983);
2656+
}
26562657

2658+
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
2659+
if (!HasInv2Pi)
2660+
return false;
2661+
if (isInlinableIntLiteral(Literal))
2662+
return true;
26572663
uint16_t Val = static_cast<uint16_t>(Literal);
26582664
return Val == 0x3C00 || // 1.0
26592665
Val == 0xBC00 || // -1.0

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1397,7 +1397,13 @@ LLVM_READNONE
13971397
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
13981398

13991399
LLVM_READNONE
1400-
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
1400+
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1401+
1402+
LLVM_READNONE
1403+
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1404+
1405+
LLVM_READNONE
1406+
bool isInlinableLiteralI16(int16_t Literal, bool HasInv2Pi);
14011407

14021408
LLVM_READNONE
14031409
std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);

llvm/test/CodeGen/AMDGPU/immv216.ll

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -577,40 +577,40 @@ define amdgpu_kernel void @add_inline_imm_64_v2f16(ptr addrspace(1) %out, <2 x h
577577
}
578578

579579
; GCN-LABEL: {{^}}mul_inline_imm_0.5_v2i16:
580-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x38003800
581-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
580+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3800
581+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
582582

583-
; GFX10: v_pk_mul_lo_u16 v0, 0x38003800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x38]
583+
; GFX10: v_pk_mul_lo_u16 v0, 0x3800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x00]
584584
define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) {
585585
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0.5, half 0.5> to <2 x i16>)
586586
ret <2 x i16> %y
587587
}
588588

589589
; GCN-LABEL: {{^}}mul_inline_imm_neg_0.5_v2i16:
590-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xb800b800
591-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
590+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xb800
591+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
592592

593-
; GFX10: v_pk_mul_lo_u16 v0, 0xb800b800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0x00,0xb8]
593+
; GFX10: v_pk_mul_lo_u16 v0, 0xffffb800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0xff,0xff]
594594
define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) {
595595
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -0.5, half -0.5> to <2 x i16>)
596596
ret <2 x i16> %y
597597
}
598598

599599
; GCN-LABEL: {{^}}mul_inline_imm_1.0_v2i16:
600-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x3c003c00
601-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
600+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3c00
601+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
602602

603-
; GFX10: v_pk_mul_lo_u16 v0, 0x3c003c00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x3c]
603+
; GFX10: v_pk_mul_lo_u16 v0, 0x3c00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x00]
604604
define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) {
605605
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 1.0, half 1.0> to <2 x i16>)
606606
ret <2 x i16> %y
607607
}
608608

609609
; GCN-LABEL: {{^}}mul_inline_imm_neg_1.0_v2i16:
610-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xbc00bc00
611-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
610+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xbc00
611+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
612612

613-
; GFX10: v_pk_mul_lo_u16 v0, 0xbc00bc00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0x00,0xbc]
613+
; GFX10: v_pk_mul_lo_u16 v0, 0xffffbc00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0xff,0xff]
614614
define <2 x i16> @mul_inline_imm_neg_1.0_v2i16(<2 x i16> %x) {
615615
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -1.0, half -1.0> to <2 x i16>)
616616
ret <2 x i16> %y
@@ -635,31 +635,31 @@ define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) {
635635
}
636636

637637
; GCN-LABEL: {{^}}mul_inline_imm_4.0_v2i16:
638-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x44004400
639-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
638+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x4400
639+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
640640

641-
; GFX10: v_pk_mul_lo_u16 v0, 0x44004400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x44]
641+
; GFX10: v_pk_mul_lo_u16 v0, 0x4400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x00]
642642
define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) {
643643
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 4.0, half 4.0> to <2 x i16>)
644644
ret <2 x i16> %y
645645

646646
}
647647

648648
; GCN-LABEL: {{^}}mul_inline_imm_neg_4.0_v2i16:
649-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xc400c400
650-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
649+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xc400
650+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
651651

652-
; GFX10: v_pk_mul_lo_u16 v0, 0xc400c400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0x00,0xc4]
652+
; GFX10: v_pk_mul_lo_u16 v0, 0xffffc400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0xff,0xff]
653653
define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) {
654654
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -4.0, half -4.0> to <2 x i16>)
655655
ret <2 x i16> %y
656656
}
657657

658658
; GCN-LABEL: {{^}}mul_inline_imm_inv2pi_v2i16:
659-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x31183118
660-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
659+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3118
660+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
661661

662-
; GFX10: v_pk_mul_lo_u16 v0, 0x31183118, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x18,0x31]
662+
; GFX10: v_pk_mul_lo_u16 v0, 0x3118, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x00,0x00]
663663
define <2 x i16> @mul_inline_imm_inv2pi_v2i16(<2 x i16> %x) {
664664
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0xH3118, half 0xH3118> to <2 x i16>)
665665
ret <2 x i16> %y

0 commit comments

Comments
 (0)