Skip to content

Commit e9c1dbb

Browse files
committed
Revert "[AMDGPU] Replace isInlinableLiteral16 with specific version (#81345)"
This reverts commit 530f0e6 because it breaks downstream.
1 parent c4979c9 commit e9c1dbb

11 files changed

+119
-230
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 11 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2006,12 +2006,8 @@ static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
20062006
return isInlinableIntLiteral(Val);
20072007
}
20082008

2009-
if (VT.getScalarType() == MVT::f16)
2010-
return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2011-
2012-
assert(VT.getScalarType() == MVT::bf16);
2013-
2014-
return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2009+
// f16/v2f16 operands work correctly for all values.
2010+
return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
20152011
}
20162012

20172013
bool AMDGPUOperand::isInlinableImm(MVT type) const {
@@ -2379,26 +2375,15 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
23792375
return;
23802376

23812377
case AMDGPU::OPERAND_REG_IMM_INT16:
2382-
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2383-
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2384-
if (isSafeTruncation(Val, 16) &&
2385-
AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2386-
Inst.addOperand(MCOperand::createImm(Val));
2387-
setImmKindConst();
2388-
return;
2389-
}
2390-
2391-
Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2392-
setImmKindLiteral();
2393-
return;
2394-
2395-
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
23962378
case AMDGPU::OPERAND_REG_IMM_FP16:
23972379
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2380+
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2381+
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2382+
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
23982383
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
23992384
if (isSafeTruncation(Val, 16) &&
2400-
AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2401-
AsmParser->hasInv2PiInlineImm())) {
2385+
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2386+
AsmParser->hasInv2PiInlineImm())) {
24022387
Inst.addOperand(MCOperand::createImm(Val));
24032388
setImmKindConst();
24042389
return;
@@ -2425,17 +2410,12 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
24252410
return;
24262411

24272412
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2428-
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: {
2429-
assert(isSafeTruncation(Val, 16));
2430-
assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2431-
Inst.addOperand(MCOperand::createImm(Val));
2432-
return;
2433-
}
24342413
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2414+
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
24352415
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
24362416
assert(isSafeTruncation(Val, 16));
2437-
assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2438-
AsmParser->hasInv2PiInlineImm()));
2417+
assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2418+
AsmParser->hasInv2PiInlineImm()));
24392419

24402420
Inst.addOperand(MCOperand::createImm(Val));
24412421
return;
@@ -3579,19 +3559,7 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
35793559
OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
35803560
return AMDGPU::isInlinableLiteralV2BF16(Val);
35813561

3582-
if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
3583-
OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 ||
3584-
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16 ||
3585-
OperandType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED)
3586-
return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3587-
3588-
if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 ||
3589-
OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16 ||
3590-
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_BF16 ||
3591-
OperandType == AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED)
3592-
return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3593-
3594-
llvm_unreachable("invalid operand type");
3562+
return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
35953563
}
35963564
default:
35973565
llvm_unreachable("invalid operand size");

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -460,8 +460,10 @@ void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
460460
}
461461
}
462462

463-
static bool printImmediateFP16(uint32_t Imm, const MCSubtargetInfo &STI,
464-
raw_ostream &O) {
463+
// This must accept a 32-bit immediate value to correctly handle packed 16-bit
464+
// operations.
465+
static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
466+
raw_ostream &O) {
465467
if (Imm == 0x3C00)
466468
O << "1.0";
467469
else if (Imm == 0xBC00)
@@ -527,17 +529,17 @@ void AMDGPUInstPrinter::printImmediateBF16(uint32_t Imm,
527529
O << formatHex(static_cast<uint64_t>(Imm));
528530
}
529531

530-
void AMDGPUInstPrinter::printImmediateF16(uint32_t Imm,
531-
const MCSubtargetInfo &STI,
532-
raw_ostream &O) {
532+
void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
533+
const MCSubtargetInfo &STI,
534+
raw_ostream &O) {
533535
int16_t SImm = static_cast<int16_t>(Imm);
534536
if (isInlinableIntLiteral(SImm)) {
535537
O << SImm;
536538
return;
537539
}
538540

539541
uint16_t HImm = static_cast<uint16_t>(Imm);
540-
if (printImmediateFP16(HImm, STI, O))
542+
if (printImmediateFloat16(HImm, STI, O))
541543
return;
542544

543545
uint64_t Imm16 = static_cast<uint16_t>(Imm);
@@ -564,7 +566,7 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
564566
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
565567
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
566568
if (isUInt<16>(Imm) &&
567-
printImmediateFP16(static_cast<uint16_t>(Imm), STI, O))
569+
printImmediateFloat16(static_cast<uint16_t>(Imm), STI, O))
568570
return;
569571
break;
570572
case AMDGPU::OPERAND_REG_IMM_V2BF16:
@@ -843,7 +845,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
843845
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
844846
case AMDGPU::OPERAND_REG_IMM_FP16:
845847
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
846-
printImmediateF16(Op.getImm(), STI, O);
848+
printImmediate16(Op.getImm(), STI, O);
847849
break;
848850
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
849851
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,10 @@ class AMDGPUInstPrinter : public MCInstPrinter {
8686
raw_ostream &O);
8787
void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI,
8888
raw_ostream &O);
89+
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
90+
raw_ostream &O);
8991
void printImmediateBF16(uint32_t Imm, const MCSubtargetInfo &STI,
9092
raw_ostream &O);
91-
void printImmediateF16(uint32_t Imm, const MCSubtargetInfo &STI,
92-
raw_ostream &O);
9393
void printImmediateV216(uint32_t Imm, uint8_t OpType,
9494
const MCSubtargetInfo &STI, raw_ostream &O);
9595
bool printImmediateFloat32(uint32_t Imm, const MCSubtargetInfo &STI,

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 6 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15423,32 +15423,16 @@ bool SITargetLowering::checkAsmConstraintVal(SDValue Op, StringRef Constraint,
1542315423
llvm_unreachable("Invalid asm constraint");
1542415424
}
1542515425

15426-
bool SITargetLowering::checkAsmConstraintValA(SDValue Op, uint64_t Val,
15426+
bool SITargetLowering::checkAsmConstraintValA(SDValue Op,
15427+
uint64_t Val,
1542715428
unsigned MaxSize) const {
1542815429
unsigned Size = std::min<unsigned>(Op.getScalarValueSizeInBits(), MaxSize);
1542915430
bool HasInv2Pi = Subtarget->hasInv2PiInlineImm();
15430-
if (Size == 16) {
15431-
MVT VT = Op.getSimpleValueType();
15432-
switch (VT.SimpleTy) {
15433-
default:
15434-
return false;
15435-
case MVT::i16:
15436-
return AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
15437-
case MVT::f16:
15438-
return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
15439-
case MVT::bf16:
15440-
return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
15441-
case MVT::v2i16:
15442-
return AMDGPU::getInlineEncodingV2I16(Val).has_value();
15443-
case MVT::v2f16:
15444-
return AMDGPU::getInlineEncodingV2F16(Val).has_value();
15445-
case MVT::v2bf16:
15446-
return AMDGPU::getInlineEncodingV2BF16(Val).has_value();
15447-
}
15448-
}
15449-
if ((Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
15450-
(Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi)))
15431+
if ((Size == 16 && AMDGPU::isInlinableLiteral16(Val, HasInv2Pi)) ||
15432+
(Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
15433+
(Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi))) {
1545115434
return true;
15435+
}
1545215436
return false;
1545315437
}
1545415438

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4121,32 +4121,13 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
41214121
ST.hasInv2PiInlineImm());
41224122
case 16:
41234123
return ST.has16BitInsts() &&
4124-
AMDGPU::isInlinableLiteralI16(Imm.getSExtValue(),
4125-
ST.hasInv2PiInlineImm());
4124+
AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
4125+
ST.hasInv2PiInlineImm());
41264126
default:
41274127
llvm_unreachable("invalid bitwidth");
41284128
}
41294129
}
41304130

4131-
bool SIInstrInfo::isInlineConstant(const APFloat &Imm) const {
4132-
APInt IntImm = Imm.bitcastToAPInt();
4133-
int64_t IntImmVal = IntImm.getSExtValue();
4134-
bool HasInv2Pi = ST.hasInv2PiInlineImm();
4135-
switch (APFloat::SemanticsToEnum(Imm.getSemantics())) {
4136-
default:
4137-
llvm_unreachable("invalid fltSemantics");
4138-
case APFloatBase::S_IEEEsingle:
4139-
case APFloatBase::S_IEEEdouble:
4140-
return isInlineConstant(IntImm);
4141-
case APFloatBase::S_BFloat:
4142-
return ST.has16BitInsts() &&
4143-
AMDGPU::isInlinableLiteralBF16(IntImmVal, HasInv2Pi);
4144-
case APFloatBase::S_IEEEhalf:
4145-
return ST.has16BitInsts() &&
4146-
AMDGPU::isInlinableLiteralFP16(IntImmVal, HasInv2Pi);
4147-
}
4148-
}
4149-
41504131
bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
41514132
uint8_t OperandType) const {
41524133
assert(!MO.isReg() && "isInlineConstant called on register operand!");
@@ -4219,7 +4200,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
42194200
// constants in these cases
42204201
int16_t Trunc = static_cast<int16_t>(Imm);
42214202
return ST.has16BitInsts() &&
4222-
AMDGPU::isInlinableLiteralFP16(Trunc, ST.hasInv2PiInlineImm());
4203+
AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
42234204
}
42244205

42254206
return false;

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -984,7 +984,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
984984

985985
bool isInlineConstant(const APInt &Imm) const;
986986

987-
bool isInlineConstant(const APFloat &Imm) const;
987+
bool isInlineConstant(const APFloat &Imm) const {
988+
return isInlineConstant(Imm.bitcastToAPInt());
989+
}
988990

989991
// Returns true if this non-register operand definitely does not need to be
990992
// encoded as a 32-bit literal. Note that this function handles all kinds of

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2647,19 +2647,13 @@ bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
26472647
Val == 0x3E22; // 1.0 / (2.0 * pi)
26482648
}
26492649

2650-
bool isInlinableLiteralI16(int16_t Literal, bool HasInv2Pi) {
2650+
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
26512651
if (!HasInv2Pi)
26522652
return false;
2653-
if (isInlinableIntLiteral(Literal))
2654-
return true;
2655-
return Literal == static_cast<int16_t>(0x3e22f983);
2656-
}
26572653

2658-
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
2659-
if (!HasInv2Pi)
2660-
return false;
26612654
if (isInlinableIntLiteral(Literal))
26622655
return true;
2656+
26632657
uint16_t Val = static_cast<uint16_t>(Literal);
26642658
return Val == 0x3C00 || // 1.0
26652659
Val == 0xBC00 || // -1.0

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1397,13 +1397,7 @@ LLVM_READNONE
13971397
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
13981398

13991399
LLVM_READNONE
1400-
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1401-
1402-
LLVM_READNONE
1403-
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1404-
1405-
LLVM_READNONE
1406-
bool isInlinableLiteralI16(int16_t Literal, bool HasInv2Pi);
1400+
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
14071401

14081402
LLVM_READNONE
14091403
std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);

llvm/test/CodeGen/AMDGPU/immv216.ll

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -577,40 +577,40 @@ define amdgpu_kernel void @add_inline_imm_64_v2f16(ptr addrspace(1) %out, <2 x h
577577
}
578578

579579
; GCN-LABEL: {{^}}mul_inline_imm_0.5_v2i16:
580-
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3800
581-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
580+
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x38003800
581+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
582582

583-
; GFX10: v_pk_mul_lo_u16 v0, 0x3800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x00]
583+
; GFX10: v_pk_mul_lo_u16 v0, 0x38003800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x38]
584584
define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) {
585585
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0.5, half 0.5> to <2 x i16>)
586586
ret <2 x i16> %y
587587
}
588588

589589
; GCN-LABEL: {{^}}mul_inline_imm_neg_0.5_v2i16:
590-
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xb800
591-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
590+
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xb800b800
591+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
592592

593-
; GFX10: v_pk_mul_lo_u16 v0, 0xffffb800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0xff,0xff]
593+
; GFX10: v_pk_mul_lo_u16 v0, 0xb800b800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0x00,0xb8]
594594
define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) {
595595
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -0.5, half -0.5> to <2 x i16>)
596596
ret <2 x i16> %y
597597
}
598598

599599
; GCN-LABEL: {{^}}mul_inline_imm_1.0_v2i16:
600-
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3c00
601-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
600+
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x3c003c00
601+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
602602

603-
; GFX10: v_pk_mul_lo_u16 v0, 0x3c00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x00]
603+
; GFX10: v_pk_mul_lo_u16 v0, 0x3c003c00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x3c]
604604
define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) {
605605
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 1.0, half 1.0> to <2 x i16>)
606606
ret <2 x i16> %y
607607
}
608608

609609
; GCN-LABEL: {{^}}mul_inline_imm_neg_1.0_v2i16:
610-
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xbc00
611-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
610+
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xbc00bc00
611+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
612612

613-
; GFX10: v_pk_mul_lo_u16 v0, 0xffffbc00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0xff,0xff]
613+
; GFX10: v_pk_mul_lo_u16 v0, 0xbc00bc00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0x00,0xbc]
614614
define <2 x i16> @mul_inline_imm_neg_1.0_v2i16(<2 x i16> %x) {
615615
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -1.0, half -1.0> to <2 x i16>)
616616
ret <2 x i16> %y
@@ -635,31 +635,31 @@ define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) {
635635
}
636636

637637
; GCN-LABEL: {{^}}mul_inline_imm_4.0_v2i16:
638-
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x4400
639-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
638+
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x44004400
639+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
640640

641-
; GFX10: v_pk_mul_lo_u16 v0, 0x4400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x00]
641+
; GFX10: v_pk_mul_lo_u16 v0, 0x44004400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x44]
642642
define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) {
643643
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 4.0, half 4.0> to <2 x i16>)
644644
ret <2 x i16> %y
645645

646646
}
647647

648648
; GCN-LABEL: {{^}}mul_inline_imm_neg_4.0_v2i16:
649-
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xc400
650-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
649+
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xc400c400
650+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
651651

652-
; GFX10: v_pk_mul_lo_u16 v0, 0xffffc400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0xff,0xff]
652+
; GFX10: v_pk_mul_lo_u16 v0, 0xc400c400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0x00,0xc4]
653653
define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) {
654654
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -4.0, half -4.0> to <2 x i16>)
655655
ret <2 x i16> %y
656656
}
657657

658658
; GCN-LABEL: {{^}}mul_inline_imm_inv2pi_v2i16:
659-
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3118
660-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
659+
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x31183118
660+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
661661

662-
; GFX10: v_pk_mul_lo_u16 v0, 0x3118, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x00,0x00]
662+
; GFX10: v_pk_mul_lo_u16 v0, 0x31183118, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x18,0x31]
663663
define <2 x i16> @mul_inline_imm_inv2pi_v2i16(<2 x i16> %x) {
664664
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0xH3118, half 0xH3118> to <2 x i16>)
665665
ret <2 x i16> %y

0 commit comments

Comments
 (0)