Skip to content

Commit 8c050e8

Browse files
committed
[WIP][AMDGPU] Split isInlinableLiteral16 into three and call the specific version if possible
The current implementation of `isInlinableLiteral16` assumes, a 16-bit inlinable literal is either an i16 or a fp16. This is not always true because of bf16. However, we can't tell fp16 and bf16 apart by just looking at the value. This patch tries to split `isInlinableLiteral16` into three versions, i16, fp16, bf16 respectively, and call the corresponding version. This patch is based on #81282. The current status is, only two uses of original `isInlinableLiteral16` are still there. We need to add an extra argument to indicate the type of the operand the immediate corresponds to. This will also require the change of the function signature of the two callers.
1 parent 4bc3b35 commit 8c050e8

11 files changed

+238
-117
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1982,8 +1982,12 @@ static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
19821982
return isInlinableIntLiteral(Val);
19831983
}
19841984

1985-
// f16/v2f16 operands work correctly for all values.
1986-
return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1985+
if (VT.getScalarType() == MVT::f16)
1986+
return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
1987+
1988+
assert(VT.getScalarType() == MVT::bf16);
1989+
1990+
return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
19871991
}
19881992

19891993
bool AMDGPUOperand::isInlinableImm(MVT type) const {
@@ -2351,15 +2355,26 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
23512355
return;
23522356

23532357
case AMDGPU::OPERAND_REG_IMM_INT16:
2354-
case AMDGPU::OPERAND_REG_IMM_FP16:
2355-
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
23562358
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2357-
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
23582359
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2360+
if (isSafeTruncation(Val, 16) &&
2361+
AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2362+
Inst.addOperand(MCOperand::createImm(Val));
2363+
setImmKindConst();
2364+
return;
2365+
}
2366+
2367+
Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2368+
setImmKindLiteral();
2369+
return;
2370+
2371+
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2372+
case AMDGPU::OPERAND_REG_IMM_FP16:
2373+
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
23592374
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
23602375
if (isSafeTruncation(Val, 16) &&
2361-
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2362-
AsmParser->hasInv2PiInlineImm())) {
2376+
AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2377+
AsmParser->hasInv2PiInlineImm())) {
23632378
Inst.addOperand(MCOperand::createImm(Val));
23642379
setImmKindConst();
23652380
return;
@@ -2386,12 +2401,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
23862401
return;
23872402

23882403
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2404+
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: {
2405+
assert(isSafeTruncation(Val, 16));
2406+
assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2407+
Inst.addOperand(MCOperand::createImm(Val));
2408+
return;
2409+
}
23892410
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2390-
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
23912411
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
23922412
assert(isSafeTruncation(Val, 16));
2393-
assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2394-
AsmParser->hasInv2PiInlineImm()));
2413+
assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2414+
AsmParser->hasInv2PiInlineImm()));
23952415

23962416
Inst.addOperand(MCOperand::createImm(Val));
23972417
return;
@@ -3535,7 +3555,13 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
35353555
OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
35363556
return AMDGPU::isInlinableLiteralV2BF16(Val);
35373557

3538-
return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3558+
if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
3559+
OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 ||
3560+
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16 ||
3561+
OperandType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED)
3562+
return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3563+
3564+
llvm_unreachable("invalid operand type");
35393565
}
35403566
default:
35413567
llvm_unreachable("invalid operand size");

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -462,8 +462,8 @@ void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
462462

463463
// This must accept a 32-bit immediate value to correctly handle packed 16-bit
464464
// operations.
465-
static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
466-
raw_ostream &O) {
465+
static bool printImmediateFP16(uint32_t Imm, const MCSubtargetInfo &STI,
466+
raw_ostream &O) {
467467
if (Imm == 0x3C00)
468468
O << "1.0";
469469
else if (Imm == 0xBC00)
@@ -529,7 +529,7 @@ void AMDGPUInstPrinter::printImmediateBF16(uint32_t Imm,
529529
O << formatHex(static_cast<uint64_t>(Imm));
530530
}
531531

532-
void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
532+
void AMDGPUInstPrinter::printImmediate16(uint32_t Imm, uint8_t OpType,
533533
const MCSubtargetInfo &STI,
534534
raw_ostream &O) {
535535
int16_t SImm = static_cast<int16_t>(Imm);
@@ -539,8 +539,17 @@ void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
539539
}
540540

541541
uint16_t HImm = static_cast<uint16_t>(Imm);
542-
if (printImmediateFloat16(HImm, STI, O))
543-
return;
542+
switch (OpType) {
543+
case AMDGPU::OPERAND_REG_IMM_FP16:
544+
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
545+
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
546+
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
547+
if (printImmediateFP16(HImm, STI, O))
548+
return;
549+
break;
550+
default:
551+
llvm_unreachable("bad operand type");
552+
}
544553

545554
uint64_t Imm16 = static_cast<uint16_t>(Imm);
546555
O << formatHex(Imm16);
@@ -566,7 +575,7 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
566575
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
567576
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
568577
if (isUInt<16>(Imm) &&
569-
printImmediateFloat16(static_cast<uint16_t>(Imm), STI, O))
578+
printImmediateFP16(static_cast<uint16_t>(Imm), STI, O))
570579
return;
571580
break;
572581
case AMDGPU::OPERAND_REG_IMM_V2BF16:
@@ -845,7 +854,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
845854
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
846855
case AMDGPU::OPERAND_REG_IMM_FP16:
847856
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
848-
printImmediate16(Op.getImm(), STI, O);
857+
printImmediate16(Op.getImm(), OpTy, STI, O);
849858
break;
850859
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
851860
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,10 @@ class AMDGPUInstPrinter : public MCInstPrinter {
8686
raw_ostream &O);
8787
void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI,
8888
raw_ostream &O);
89-
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
90-
raw_ostream &O);
9189
void printImmediateBF16(uint32_t Imm, const MCSubtargetInfo &STI,
9290
raw_ostream &O);
91+
void printImmediate16(uint32_t Imm, uint8_t OpType,
92+
const MCSubtargetInfo &STI, raw_ostream &O);
9393
void printImmediateV216(uint32_t Imm, uint8_t OpType,
9494
const MCSubtargetInfo &STI, raw_ostream &O);
9595
bool printImmediateFloat32(uint32_t Imm, const MCSubtargetInfo &STI,

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15374,16 +15374,28 @@ bool SITargetLowering::checkAsmConstraintVal(SDValue Op, StringRef Constraint,
1537415374
llvm_unreachable("Invalid asm constraint");
1537515375
}
1537615376

15377-
bool SITargetLowering::checkAsmConstraintValA(SDValue Op,
15378-
uint64_t Val,
15377+
bool SITargetLowering::checkAsmConstraintValA(SDValue Op, uint64_t Val,
1537915378
unsigned MaxSize) const {
1538015379
unsigned Size = std::min<unsigned>(Op.getScalarValueSizeInBits(), MaxSize);
1538115380
bool HasInv2Pi = Subtarget->hasInv2PiInlineImm();
15382-
if ((Size == 16 && AMDGPU::isInlinableLiteral16(Val, HasInv2Pi)) ||
15383-
(Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
15384-
(Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi))) {
15385-
return true;
15381+
if (Size == 16) {
15382+
MVT VT = Op.getSimpleValueType();
15383+
if (VT == MVT::i16 && AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi))
15384+
return true;
15385+
if (VT == MVT::f16 && AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi))
15386+
return true;
15387+
if (VT == MVT::bf16 && AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi))
15388+
return true;
15389+
if (VT == MVT::v2i16 && AMDGPU::getInlineEncodingV2I16(Val).has_value())
15390+
return true;
15391+
if (VT == MVT::v2f16 && AMDGPU::getInlineEncodingV2F16(Val).has_value())
15392+
return true;
15393+
if (VT == MVT::v2bf16 && AMDGPU::getInlineEncodingV2BF16(Val).has_value())
15394+
return true;
1538615395
}
15396+
if ((Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
15397+
(Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi)))
15398+
return true;
1538715399
return false;
1538815400
}
1538915401

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4121,8 +4121,27 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
41214121
ST.hasInv2PiInlineImm());
41224122
case 16:
41234123
return ST.has16BitInsts() &&
4124-
AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
4125-
ST.hasInv2PiInlineImm());
4124+
AMDGPU::isInlinableLiteralI16(Imm.getSExtValue(),
4125+
ST.hasInv2PiInlineImm());
4126+
default:
4127+
llvm_unreachable("invalid bitwidth");
4128+
}
4129+
}
4130+
4131+
bool SIInstrInfo::isInlineConstant(const APFloat &Imm) const {
4132+
APInt IntImm = Imm.bitcastToAPInt();
4133+
bool HasInv2Pi = ST.hasInv2PiInlineImm();
4134+
switch (IntImm.getBitWidth()) {
4135+
case 32:
4136+
case 64:
4137+
return isInlineConstant(IntImm);
4138+
case 16:
4139+
if (Imm.isIEEE())
4140+
return ST.has16BitInsts() &&
4141+
AMDGPU::isInlinableLiteralFP16(IntImm.getSExtValue(), HasInv2Pi);
4142+
else
4143+
return ST.has16BitInsts() &&
4144+
AMDGPU::isInlinableLiteralBF16(IntImm.getSExtValue(), HasInv2Pi);
41264145
default:
41274146
llvm_unreachable("invalid bitwidth");
41284147
}
@@ -4200,7 +4219,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
42004219
// constants in these cases
42014220
int16_t Trunc = static_cast<int16_t>(Imm);
42024221
return ST.has16BitInsts() &&
4203-
AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
4222+
AMDGPU::isInlinableLiteralFP16(Trunc, ST.hasInv2PiInlineImm());
42044223
}
42054224

42064225
return false;

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -982,9 +982,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
982982

983983
bool isInlineConstant(const APInt &Imm) const;
984984

985-
bool isInlineConstant(const APFloat &Imm) const {
986-
return isInlineConstant(Imm.bitcastToAPInt());
987-
}
985+
bool isInlineConstant(const APFloat &Imm) const;
988986

989987
// Returns true if this non-register operand definitely does not need to be
990988
// encoded as a 32-bit literal. Note that this function handles all kinds of

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2655,13 +2655,28 @@ bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
26552655
Val == 0x3E22; // 1.0 / (2.0 * pi)
26562656
}
26572657

2658-
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
2658+
bool isInlinableLiteralI16(int16_t Literal, bool HasInv2Pi) {
2659+
if (!HasInv2Pi)
2660+
return false;
2661+
if (isInlinableIntLiteral(Literal))
2662+
return true;
2663+
return (Literal == static_cast<int16_t>(llvm::bit_cast<uint32_t>(0.0f))) ||
2664+
(Literal == static_cast<int16_t>(llvm::bit_cast<uint32_t>(1.0f))) ||
2665+
(Literal == static_cast<int16_t>(llvm::bit_cast<uint32_t>(-1.0f))) ||
2666+
(Literal == static_cast<int16_t>(llvm::bit_cast<uint32_t>(0.5f))) ||
2667+
(Literal == static_cast<int16_t>(llvm::bit_cast<uint32_t>(-0.5f))) ||
2668+
(Literal == static_cast<int16_t>(llvm::bit_cast<uint32_t>(2.0f))) ||
2669+
(Literal == static_cast<int16_t>(llvm::bit_cast<uint32_t>(-2.0f))) ||
2670+
(Literal == static_cast<int16_t>(llvm::bit_cast<uint32_t>(4.0f))) ||
2671+
(Literal == static_cast<int16_t>(llvm::bit_cast<uint32_t>(-4.0f))) ||
2672+
(Literal == static_cast<int16_t>(0x3e22f983));
2673+
}
2674+
2675+
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
26592676
if (!HasInv2Pi)
26602677
return false;
2661-
26622678
if (isInlinableIntLiteral(Literal))
26632679
return true;
2664-
26652680
uint16_t Val = static_cast<uint16_t>(Literal);
26662681
return Val == 0x3C00 || // 1.0
26672682
Val == 0xBC00 || // -1.0

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1410,7 +1410,13 @@ LLVM_READNONE
14101410
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
14111411

14121412
LLVM_READNONE
1413-
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
1413+
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1414+
1415+
LLVM_READNONE
1416+
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1417+
1418+
LLVM_READNONE
1419+
bool isInlinableLiteralI16(int16_t Literal, bool HasInv2Pi);
14141420

14151421
LLVM_READNONE
14161422
std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);

llvm/test/CodeGen/AMDGPU/immv216.ll

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -577,40 +577,40 @@ define amdgpu_kernel void @add_inline_imm_64_v2f16(ptr addrspace(1) %out, <2 x h
577577
}
578578

579579
; GCN-LABEL: {{^}}mul_inline_imm_0.5_v2i16:
580-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x38003800
581-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
580+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3800
581+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
582582

583-
; GFX10: v_pk_mul_lo_u16 v0, 0x38003800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x38]
583+
; GFX10: v_pk_mul_lo_u16 v0, 0x3800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x00]
584584
define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) {
585585
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0.5, half 0.5> to <2 x i16>)
586586
ret <2 x i16> %y
587587
}
588588

589589
; GCN-LABEL: {{^}}mul_inline_imm_neg_0.5_v2i16:
590-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xb800b800
591-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
590+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xb800
591+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
592592

593-
; GFX10: v_pk_mul_lo_u16 v0, 0xb800b800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0x00,0xb8]
593+
; GFX10: v_pk_mul_lo_u16 v0, 0xffffb800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0xff,0xff]
594594
define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) {
595595
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -0.5, half -0.5> to <2 x i16>)
596596
ret <2 x i16> %y
597597
}
598598

599599
; GCN-LABEL: {{^}}mul_inline_imm_1.0_v2i16:
600-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x3c003c00
601-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
600+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3c00
601+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
602602

603-
; GFX10: v_pk_mul_lo_u16 v0, 0x3c003c00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x3c]
603+
; GFX10: v_pk_mul_lo_u16 v0, 0x3c00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x00]
604604
define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) {
605605
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 1.0, half 1.0> to <2 x i16>)
606606
ret <2 x i16> %y
607607
}
608608

609609
; GCN-LABEL: {{^}}mul_inline_imm_neg_1.0_v2i16:
610-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xbc00bc00
611-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
610+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xbc00
611+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
612612

613-
; GFX10: v_pk_mul_lo_u16 v0, 0xbc00bc00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0x00,0xbc]
613+
; GFX10: v_pk_mul_lo_u16 v0, 0xffffbc00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0xff,0xff]
614614
define <2 x i16> @mul_inline_imm_neg_1.0_v2i16(<2 x i16> %x) {
615615
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -1.0, half -1.0> to <2 x i16>)
616616
ret <2 x i16> %y
@@ -635,31 +635,31 @@ define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) {
635635
}
636636

637637
; GCN-LABEL: {{^}}mul_inline_imm_4.0_v2i16:
638-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x44004400
639-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
638+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x4400
639+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
640640

641-
; GFX10: v_pk_mul_lo_u16 v0, 0x44004400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x44]
641+
; GFX10: v_pk_mul_lo_u16 v0, 0x4400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x00]
642642
define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) {
643643
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 4.0, half 4.0> to <2 x i16>)
644644
ret <2 x i16> %y
645645

646646
}
647647

648648
; GCN-LABEL: {{^}}mul_inline_imm_neg_4.0_v2i16:
649-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xc400c400
650-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
649+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xc400
650+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
651651

652-
; GFX10: v_pk_mul_lo_u16 v0, 0xc400c400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0x00,0xc4]
652+
; GFX10: v_pk_mul_lo_u16 v0, 0xffffc400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0xff,0xff]
653653
define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) {
654654
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -4.0, half -4.0> to <2 x i16>)
655655
ret <2 x i16> %y
656656
}
657657

658658
; GCN-LABEL: {{^}}mul_inline_imm_inv2pi_v2i16:
659-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x31183118
660-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
659+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3118
660+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
661661

662-
; GFX10: v_pk_mul_lo_u16 v0, 0x31183118, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x18,0x31]
662+
; GFX10: v_pk_mul_lo_u16 v0, 0x3118, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x00,0x00]
663663
define <2 x i16> @mul_inline_imm_inv2pi_v2i16(<2 x i16> %x) {
664664
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0xH3118, half 0xH3118> to <2 x i16>)
665665
ret <2 x i16> %y

0 commit comments

Comments
 (0)