Skip to content

Commit 5400f6c

Browse files
committed
[WIP][AMDGPU] Split isInlinableLiteral16 into three and call the specific version if possible
The current implementation of `isInlinableLiteral16` assumes, a 16-bit inlinable literal is either an i16 or a fp16. This is not always true because of bf16. However, we can't tell fp16 and bf16 apart by just looking at the value. This patch tries to split `isInlinableLiteral16` into three versions, i16, fp16, bf16 respectively, and call the corresponding version. This patch is based on #81282. The current status is, only two uses of original `isInlinableLiteral16` are still there. We need to add an extra argument to indicate the type of the operand the immediate corresponds to. This will also require the change of the function signature of the two callers.
1 parent 7c206c7 commit 5400f6c

11 files changed

+220
-117
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1982,8 +1982,12 @@ static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
19821982
return isInlinableIntLiteral(Val);
19831983
}
19841984

1985-
// f16/v2f16 operands work correctly for all values.
1986-
return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1985+
if (VT.getScalarType() == MVT::f16)
1986+
return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
1987+
1988+
assert(VT.getScalarType() == MVT::bf16);
1989+
1990+
return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
19871991
}
19881992

19891993
bool AMDGPUOperand::isInlinableImm(MVT type) const {
@@ -2351,15 +2355,26 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
23512355
return;
23522356

23532357
case AMDGPU::OPERAND_REG_IMM_INT16:
2354-
case AMDGPU::OPERAND_REG_IMM_FP16:
2355-
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
23562358
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2357-
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
23582359
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2360+
if (isSafeTruncation(Val, 16) &&
2361+
AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2362+
Inst.addOperand(MCOperand::createImm(Val));
2363+
setImmKindConst();
2364+
return;
2365+
}
2366+
2367+
Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2368+
setImmKindLiteral();
2369+
return;
2370+
2371+
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2372+
case AMDGPU::OPERAND_REG_IMM_FP16:
2373+
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
23592374
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
23602375
if (isSafeTruncation(Val, 16) &&
2361-
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2362-
AsmParser->hasInv2PiInlineImm())) {
2376+
AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2377+
AsmParser->hasInv2PiInlineImm())) {
23632378
Inst.addOperand(MCOperand::createImm(Val));
23642379
setImmKindConst();
23652380
return;
@@ -2386,12 +2401,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
23862401
return;
23872402

23882403
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2404+
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: {
2405+
assert(isSafeTruncation(Val, 16));
2406+
assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2407+
Inst.addOperand(MCOperand::createImm(Val));
2408+
return;
2409+
}
23892410
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2390-
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
23912411
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
23922412
assert(isSafeTruncation(Val, 16));
2393-
assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2394-
AsmParser->hasInv2PiInlineImm()));
2413+
assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2414+
AsmParser->hasInv2PiInlineImm()));
23952415

23962416
Inst.addOperand(MCOperand::createImm(Val));
23972417
return;
@@ -3535,7 +3555,13 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
35353555
OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
35363556
return AMDGPU::isInlinableLiteralV2BF16(Val);
35373557

3538-
return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3558+
if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
3559+
OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 ||
3560+
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16 ||
3561+
OperandType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED)
3562+
return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3563+
3564+
llvm_unreachable("invalid operand type");
35393565
}
35403566
default:
35413567
llvm_unreachable("invalid operand size");

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -462,8 +462,8 @@ void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
462462

463463
// This must accept a 32-bit immediate value to correctly handle packed 16-bit
464464
// operations.
465-
static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
466-
raw_ostream &O) {
465+
static bool printImmediateFP16(uint32_t Imm, const MCSubtargetInfo &STI,
466+
raw_ostream &O) {
467467
if (Imm == 0x3C00)
468468
O << "1.0";
469469
else if (Imm == 0xBC00)
@@ -529,17 +529,17 @@ void AMDGPUInstPrinter::printImmediateBF16(uint32_t Imm,
529529
O << formatHex(static_cast<uint64_t>(Imm));
530530
}
531531

532-
void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
533-
const MCSubtargetInfo &STI,
534-
raw_ostream &O) {
532+
void AMDGPUInstPrinter::printImmediateF16(uint32_t Imm,
533+
const MCSubtargetInfo &STI,
534+
raw_ostream &O) {
535535
int16_t SImm = static_cast<int16_t>(Imm);
536536
if (isInlinableIntLiteral(SImm)) {
537537
O << SImm;
538538
return;
539539
}
540540

541541
uint16_t HImm = static_cast<uint16_t>(Imm);
542-
if (printImmediateFloat16(HImm, STI, O))
542+
if (printImmediateFP16(HImm, STI, O))
543543
return;
544544

545545
uint64_t Imm16 = static_cast<uint16_t>(Imm);
@@ -566,7 +566,7 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
566566
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
567567
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
568568
if (isUInt<16>(Imm) &&
569-
printImmediateFloat16(static_cast<uint16_t>(Imm), STI, O))
569+
printImmediateFP16(static_cast<uint16_t>(Imm), STI, O))
570570
return;
571571
break;
572572
case AMDGPU::OPERAND_REG_IMM_V2BF16:
@@ -845,7 +845,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
845845
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
846846
case AMDGPU::OPERAND_REG_IMM_FP16:
847847
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
848-
printImmediate16(Op.getImm(), STI, O);
848+
printImmediateF16(Op.getImm(), STI, O);
849849
break;
850850
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
851851
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,10 @@ class AMDGPUInstPrinter : public MCInstPrinter {
8686
raw_ostream &O);
8787
void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI,
8888
raw_ostream &O);
89-
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
90-
raw_ostream &O);
9189
void printImmediateBF16(uint32_t Imm, const MCSubtargetInfo &STI,
9290
raw_ostream &O);
91+
void printImmediateF16(uint32_t Imm, const MCSubtargetInfo &STI,
92+
raw_ostream &O);
9393
void printImmediateV216(uint32_t Imm, uint8_t OpType,
9494
const MCSubtargetInfo &STI, raw_ostream &O);
9595
bool printImmediateFloat32(uint32_t Imm, const MCSubtargetInfo &STI,

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15374,16 +15374,28 @@ bool SITargetLowering::checkAsmConstraintVal(SDValue Op, StringRef Constraint,
1537415374
llvm_unreachable("Invalid asm constraint");
1537515375
}
1537615376

15377-
bool SITargetLowering::checkAsmConstraintValA(SDValue Op,
15378-
uint64_t Val,
15377+
bool SITargetLowering::checkAsmConstraintValA(SDValue Op, uint64_t Val,
1537915378
unsigned MaxSize) const {
1538015379
unsigned Size = std::min<unsigned>(Op.getScalarValueSizeInBits(), MaxSize);
1538115380
bool HasInv2Pi = Subtarget->hasInv2PiInlineImm();
15382-
if ((Size == 16 && AMDGPU::isInlinableLiteral16(Val, HasInv2Pi)) ||
15383-
(Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
15384-
(Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi))) {
15385-
return true;
15381+
if (Size == 16) {
15382+
MVT VT = Op.getSimpleValueType();
15383+
if (VT == MVT::i16 && AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi))
15384+
return true;
15385+
if (VT == MVT::f16 && AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi))
15386+
return true;
15387+
if (VT == MVT::bf16 && AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi))
15388+
return true;
15389+
if (VT == MVT::v2i16 && AMDGPU::getInlineEncodingV2I16(Val).has_value())
15390+
return true;
15391+
if (VT == MVT::v2f16 && AMDGPU::getInlineEncodingV2F16(Val).has_value())
15392+
return true;
15393+
if (VT == MVT::v2bf16 && AMDGPU::getInlineEncodingV2BF16(Val).has_value())
15394+
return true;
1538615395
}
15396+
if ((Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
15397+
(Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi)))
15398+
return true;
1538715399
return false;
1538815400
}
1538915401

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4121,8 +4121,27 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
41214121
ST.hasInv2PiInlineImm());
41224122
case 16:
41234123
return ST.has16BitInsts() &&
4124-
AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
4125-
ST.hasInv2PiInlineImm());
4124+
AMDGPU::isInlinableLiteralI16(Imm.getSExtValue(),
4125+
ST.hasInv2PiInlineImm());
4126+
default:
4127+
llvm_unreachable("invalid bitwidth");
4128+
}
4129+
}
4130+
4131+
bool SIInstrInfo::isInlineConstant(const APFloat &Imm) const {
4132+
APInt IntImm = Imm.bitcastToAPInt();
4133+
bool HasInv2Pi = ST.hasInv2PiInlineImm();
4134+
switch (IntImm.getBitWidth()) {
4135+
case 32:
4136+
case 64:
4137+
return isInlineConstant(IntImm);
4138+
case 16:
4139+
if (Imm.isIEEE())
4140+
return ST.has16BitInsts() &&
4141+
AMDGPU::isInlinableLiteralFP16(IntImm.getSExtValue(), HasInv2Pi);
4142+
else
4143+
return ST.has16BitInsts() &&
4144+
AMDGPU::isInlinableLiteralBF16(IntImm.getSExtValue(), HasInv2Pi);
41264145
default:
41274146
llvm_unreachable("invalid bitwidth");
41284147
}
@@ -4200,7 +4219,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
42004219
// constants in these cases
42014220
int16_t Trunc = static_cast<int16_t>(Imm);
42024221
return ST.has16BitInsts() &&
4203-
AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
4222+
AMDGPU::isInlinableLiteralFP16(Trunc, ST.hasInv2PiInlineImm());
42044223
}
42054224

42064225
return false;

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -982,9 +982,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
982982

983983
bool isInlineConstant(const APInt &Imm) const;
984984

985-
bool isInlineConstant(const APFloat &Imm) const {
986-
return isInlineConstant(Imm.bitcastToAPInt());
987-
}
985+
bool isInlineConstant(const APFloat &Imm) const;
988986

989987
// Returns true if this non-register operand definitely does not need to be
990988
// encoded as a 32-bit literal. Note that this function handles all kinds of

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2657,13 +2657,19 @@ bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
26572657
Val == 0x3E22; // 1.0 / (2.0 * pi)
26582658
}
26592659

2660-
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
2660+
bool isInlinableLiteralI16(int16_t Literal, bool HasInv2Pi) {
26612661
if (!HasInv2Pi)
26622662
return false;
2663-
26642663
if (isInlinableIntLiteral(Literal))
26652664
return true;
2665+
return Literal == static_cast<int16_t>(0x3e22f983);
2666+
}
26662667

2668+
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
2669+
if (!HasInv2Pi)
2670+
return false;
2671+
if (isInlinableIntLiteral(Literal))
2672+
return true;
26672673
uint16_t Val = static_cast<uint16_t>(Literal);
26682674
return Val == 0x3C00 || // 1.0
26692675
Val == 0xBC00 || // -1.0

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1414,7 +1414,13 @@ LLVM_READNONE
14141414
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
14151415

14161416
LLVM_READNONE
1417-
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
1417+
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1418+
1419+
LLVM_READNONE
1420+
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1421+
1422+
LLVM_READNONE
1423+
bool isInlinableLiteralI16(int16_t Literal, bool HasInv2Pi);
14181424

14191425
LLVM_READNONE
14201426
std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);

llvm/test/CodeGen/AMDGPU/immv216.ll

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -577,40 +577,40 @@ define amdgpu_kernel void @add_inline_imm_64_v2f16(ptr addrspace(1) %out, <2 x h
577577
}
578578

579579
; GCN-LABEL: {{^}}mul_inline_imm_0.5_v2i16:
580-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x38003800
581-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
580+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3800
581+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
582582

583-
; GFX10: v_pk_mul_lo_u16 v0, 0x38003800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x38]
583+
; GFX10: v_pk_mul_lo_u16 v0, 0x3800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x00]
584584
define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) {
585585
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0.5, half 0.5> to <2 x i16>)
586586
ret <2 x i16> %y
587587
}
588588

589589
; GCN-LABEL: {{^}}mul_inline_imm_neg_0.5_v2i16:
590-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xb800b800
591-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
590+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xb800
591+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
592592

593-
; GFX10: v_pk_mul_lo_u16 v0, 0xb800b800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0x00,0xb8]
593+
; GFX10: v_pk_mul_lo_u16 v0, 0xffffb800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0xff,0xff]
594594
define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) {
595595
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -0.5, half -0.5> to <2 x i16>)
596596
ret <2 x i16> %y
597597
}
598598

599599
; GCN-LABEL: {{^}}mul_inline_imm_1.0_v2i16:
600-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x3c003c00
601-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
600+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3c00
601+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
602602

603-
; GFX10: v_pk_mul_lo_u16 v0, 0x3c003c00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x3c]
603+
; GFX10: v_pk_mul_lo_u16 v0, 0x3c00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x00]
604604
define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) {
605605
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 1.0, half 1.0> to <2 x i16>)
606606
ret <2 x i16> %y
607607
}
608608

609609
; GCN-LABEL: {{^}}mul_inline_imm_neg_1.0_v2i16:
610-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xbc00bc00
611-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
610+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xbc00
611+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
612612

613-
; GFX10: v_pk_mul_lo_u16 v0, 0xbc00bc00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0x00,0xbc]
613+
; GFX10: v_pk_mul_lo_u16 v0, 0xffffbc00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0xff,0xff]
614614
define <2 x i16> @mul_inline_imm_neg_1.0_v2i16(<2 x i16> %x) {
615615
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -1.0, half -1.0> to <2 x i16>)
616616
ret <2 x i16> %y
@@ -635,31 +635,31 @@ define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) {
635635
}
636636

637637
; GCN-LABEL: {{^}}mul_inline_imm_4.0_v2i16:
638-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x44004400
639-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
638+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x4400
639+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
640640

641-
; GFX10: v_pk_mul_lo_u16 v0, 0x44004400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x44]
641+
; GFX10: v_pk_mul_lo_u16 v0, 0x4400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x00]
642642
define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) {
643643
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 4.0, half 4.0> to <2 x i16>)
644644
ret <2 x i16> %y
645645

646646
}
647647

648648
; GCN-LABEL: {{^}}mul_inline_imm_neg_4.0_v2i16:
649-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xc400c400
650-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
649+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xc400
650+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
651651

652-
; GFX10: v_pk_mul_lo_u16 v0, 0xc400c400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0x00,0xc4]
652+
; GFX10: v_pk_mul_lo_u16 v0, 0xffffc400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0xff,0xff]
653653
define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) {
654654
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -4.0, half -4.0> to <2 x i16>)
655655
ret <2 x i16> %y
656656
}
657657

658658
; GCN-LABEL: {{^}}mul_inline_imm_inv2pi_v2i16:
659-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x31183118
660-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
659+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3118
660+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
661661

662-
; GFX10: v_pk_mul_lo_u16 v0, 0x31183118, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x18,0x31]
662+
; GFX10: v_pk_mul_lo_u16 v0, 0x3118, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x00,0x00]
663663
define <2 x i16> @mul_inline_imm_inv2pi_v2i16(<2 x i16> %x) {
664664
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0xH3118, half 0xH3118> to <2 x i16>)
665665
ret <2 x i16> %y

0 commit comments

Comments
 (0)