Skip to content

Commit 032763d

Browse files
committed
[WIP][AMDGPU] Split isInlinableLiteral16 into three and call the specific version if possible
The current implementation of `isInlinableLiteral16` assumes, a 16-bit inlinable literal is either an i16 or a fp16. This is not always true because of bf16. However, we can't tell fp16 and bf16 apart by just looking at the value. This patch tries to split `isInlinableLiteral16` into three versions, i16, fp16, bf16 respectively, and call the corresponding version. This patch is based on #81282. The current status is, only two uses of original `isInlinableLiteral16` are still there. We need to add an extra argument to indicate the type of the operand the immediate corresponds to. This will also require the change of the function signature of the two callers.
1 parent 7c3ad9e commit 032763d

11 files changed

+237
-110
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1979,8 +1979,12 @@ static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
19791979
return isInlinableIntLiteral(Val);
19801980
}
19811981

1982-
// f16/v2f16 operands work correctly for all values.
1983-
return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1982+
if (VT.getScalarType() == MVT::f16)
1983+
return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
1984+
1985+
assert(VT.getScalarType() == MVT::bf16);
1986+
1987+
return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
19841988
}
19851989

19861990
bool AMDGPUOperand::isInlinableImm(MVT type) const {
@@ -2337,15 +2341,26 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
23372341
return;
23382342

23392343
case AMDGPU::OPERAND_REG_IMM_INT16:
2340-
case AMDGPU::OPERAND_REG_IMM_FP16:
2341-
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
23422344
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2343-
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
23442345
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2346+
if (isSafeTruncation(Val, 16) &&
2347+
AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2348+
Inst.addOperand(MCOperand::createImm(Val));
2349+
setImmKindConst();
2350+
return;
2351+
}
2352+
2353+
Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2354+
setImmKindLiteral();
2355+
return;
2356+
2357+
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2358+
case AMDGPU::OPERAND_REG_IMM_FP16:
2359+
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
23452360
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
23462361
if (isSafeTruncation(Val, 16) &&
2347-
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2348-
AsmParser->hasInv2PiInlineImm())) {
2362+
AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2363+
AsmParser->hasInv2PiInlineImm())) {
23492364
Inst.addOperand(MCOperand::createImm(Val));
23502365
setImmKindConst();
23512366
return;
@@ -2372,12 +2387,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
23722387
return;
23732388

23742389
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2390+
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: {
2391+
assert(isSafeTruncation(Val, 16));
2392+
assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2393+
Inst.addOperand(MCOperand::createImm(Val));
2394+
return;
2395+
}
23752396
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2376-
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
23772397
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
23782398
assert(isSafeTruncation(Val, 16));
2379-
assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2380-
AsmParser->hasInv2PiInlineImm()));
2399+
assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2400+
AsmParser->hasInv2PiInlineImm()));
23812401

23822402
Inst.addOperand(MCOperand::createImm(Val));
23832403
return;
@@ -3521,7 +3541,13 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
35213541
OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
35223542
return AMDGPU::isInlinableLiteralV2BF16(Val);
35233543

3524-
return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3544+
if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
3545+
OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 ||
3546+
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16 ||
3547+
OperandType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED)
3548+
return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3549+
3550+
llvm_unreachable("invalid operand type");
35253551
}
35263552
default:
35273553
llvm_unreachable("invalid operand size");

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -462,8 +462,8 @@ void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
462462

463463
// This must accept a 32-bit immediate value to correctly handle packed 16-bit
464464
// operations.
465-
static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
466-
raw_ostream &O) {
465+
static bool printImmediateFP16(uint32_t Imm, const MCSubtargetInfo &STI,
466+
raw_ostream &O) {
467467
if (Imm == 0x3C00)
468468
O << "1.0";
469469
else if (Imm == 0xBC00)
@@ -529,7 +529,7 @@ void AMDGPUInstPrinter::printImmediateBF16(uint32_t Imm,
529529
O << formatHex(static_cast<uint64_t>(Imm));
530530
}
531531

532-
void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
532+
void AMDGPUInstPrinter::printImmediate16(uint32_t Imm, uint8_t OpType,
533533
const MCSubtargetInfo &STI,
534534
raw_ostream &O) {
535535
int16_t SImm = static_cast<int16_t>(Imm);
@@ -539,8 +539,17 @@ void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
539539
}
540540

541541
uint16_t HImm = static_cast<uint16_t>(Imm);
542-
if (printImmediateFloat16(HImm, STI, O))
543-
return;
542+
switch (OpType) {
543+
case AMDGPU::OPERAND_REG_IMM_FP16:
544+
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
545+
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
546+
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
547+
if (printImmediateFP16(HImm, STI, O))
548+
return;
549+
break;
550+
default:
551+
llvm_unreachable("bad operand type");
552+
}
544553

545554
uint64_t Imm16 = static_cast<uint16_t>(Imm);
546555
O << formatHex(Imm16);
@@ -566,7 +575,7 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
566575
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
567576
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
568577
if (isUInt<16>(Imm) &&
569-
printImmediateFloat16(static_cast<uint16_t>(Imm), STI, O))
578+
printImmediateFP16(static_cast<uint16_t>(Imm), STI, O))
570579
return;
571580
break;
572581
case AMDGPU::OPERAND_REG_IMM_V2BF16:
@@ -845,7 +854,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
845854
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
846855
case AMDGPU::OPERAND_REG_IMM_FP16:
847856
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
848-
printImmediate16(Op.getImm(), STI, O);
857+
printImmediate16(Op.getImm(), OpTy, STI, O);
849858
break;
850859
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
851860
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,10 @@ class AMDGPUInstPrinter : public MCInstPrinter {
8686
raw_ostream &O);
8787
void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI,
8888
raw_ostream &O);
89-
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
90-
raw_ostream &O);
9189
void printImmediateBF16(uint32_t Imm, const MCSubtargetInfo &STI,
9290
raw_ostream &O);
91+
void printImmediate16(uint32_t Imm, uint8_t OpType,
92+
const MCSubtargetInfo &STI, raw_ostream &O);
9393
void printImmediateV216(uint32_t Imm, uint8_t OpType,
9494
const MCSubtargetInfo &STI, raw_ostream &O);
9595
bool printImmediateFloat32(uint32_t Imm, const MCSubtargetInfo &STI,

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15389,16 +15389,22 @@ bool SITargetLowering::checkAsmConstraintVal(SDValue Op, StringRef Constraint,
1538915389
llvm_unreachable("Invalid asm constraint");
1539015390
}
1539115391

15392-
bool SITargetLowering::checkAsmConstraintValA(SDValue Op,
15393-
uint64_t Val,
15392+
bool SITargetLowering::checkAsmConstraintValA(SDValue Op, uint64_t Val,
1539415393
unsigned MaxSize) const {
1539515394
unsigned Size = std::min<unsigned>(Op.getScalarValueSizeInBits(), MaxSize);
1539615395
bool HasInv2Pi = Subtarget->hasInv2PiInlineImm();
15397-
if ((Size == 16 && AMDGPU::isInlinableLiteral16(Val, HasInv2Pi)) ||
15398-
(Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
15399-
(Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi))) {
15400-
return true;
15396+
if (Size == 16) {
15397+
MVT VT = Op.getSimpleValueType();
15398+
if (VT == MVT::i16 && AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi))
15399+
return true;
15400+
if (VT == MVT::f16 && AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi))
15401+
return true;
15402+
if (VT == MVT::bf16 && AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi))
15403+
return true;
1540115404
}
15405+
if ((Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
15406+
(Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi)))
15407+
return true;
1540215408
return false;
1540315409
}
1540415410

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4121,8 +4121,27 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
41214121
ST.hasInv2PiInlineImm());
41224122
case 16:
41234123
return ST.has16BitInsts() &&
4124-
AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
4125-
ST.hasInv2PiInlineImm());
4124+
AMDGPU::isInlinableLiteralI16(Imm.getSExtValue(),
4125+
ST.hasInv2PiInlineImm());
4126+
default:
4127+
llvm_unreachable("invalid bitwidth");
4128+
}
4129+
}
4130+
4131+
bool SIInstrInfo::isInlineConstant(const APFloat &Imm) const {
4132+
APInt IntImm = Imm.bitcastToAPInt();
4133+
bool HasInv2Pi = ST.hasInv2PiInlineImm();
4134+
switch (IntImm.getBitWidth()) {
4135+
case 32:
4136+
case 64:
4137+
return isInlineConstant(IntImm);
4138+
case 16:
4139+
if (Imm.isIEEE())
4140+
return ST.has16BitInsts() &&
4141+
AMDGPU::isInlinableLiteralFP16(IntImm.getSExtValue(), HasInv2Pi);
4142+
else
4143+
return ST.has16BitInsts() &&
4144+
AMDGPU::isInlinableLiteralBF16(IntImm.getSExtValue(), HasInv2Pi);
41264145
default:
41274146
llvm_unreachable("invalid bitwidth");
41284147
}
@@ -4200,7 +4219,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
42004219
// constants in these cases
42014220
int16_t Trunc = static_cast<int16_t>(Imm);
42024221
return ST.has16BitInsts() &&
4203-
AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
4222+
AMDGPU::isInlinableLiteralFP16(Trunc, ST.hasInv2PiInlineImm());
42044223
}
42054224

42064225
return false;

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -982,9 +982,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
982982

983983
bool isInlineConstant(const APInt &Imm) const;
984984

985-
bool isInlineConstant(const APFloat &Imm) const {
986-
return isInlineConstant(Imm.bitcastToAPInt());
987-
}
985+
bool isInlineConstant(const APFloat &Imm) const;
988986

989987
// Returns true if this non-register operand definitely does not need to be
990988
// encoded as a 32-bit literal. Note that this function handles all kinds of

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2669,13 +2669,28 @@ bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
26692669
Val == 0x3E22; // 1.0 / (2.0 * pi)
26702670
}
26712671

2672-
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
2672+
bool isInlinableLiteralI16(int16_t Literal, bool HasInv2Pi) {
2673+
if (!HasInv2Pi)
2674+
return false;
2675+
if (isInlinableIntLiteral(Literal))
2676+
return true;
2677+
return (Literal == static_cast<int16_t>(llvm::bit_cast<uint32_t>(0.0f))) ||
2678+
(Literal == static_cast<int16_t>(llvm::bit_cast<uint32_t>(1.0f))) ||
2679+
(Literal == static_cast<int16_t>(llvm::bit_cast<uint32_t>(-1.0f))) ||
2680+
(Literal == static_cast<int16_t>(llvm::bit_cast<uint32_t>(0.5f))) ||
2681+
(Literal == static_cast<int16_t>(llvm::bit_cast<uint32_t>(-0.5f))) ||
2682+
(Literal == static_cast<int16_t>(llvm::bit_cast<uint32_t>(2.0f))) ||
2683+
(Literal == static_cast<int16_t>(llvm::bit_cast<uint32_t>(-2.0f))) ||
2684+
(Literal == static_cast<int16_t>(llvm::bit_cast<uint32_t>(4.0f))) ||
2685+
(Literal == static_cast<int16_t>(llvm::bit_cast<uint32_t>(-4.0f))) ||
2686+
(Literal == static_cast<int16_t>(0x3e22f983));
2687+
}
2688+
2689+
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
26732690
if (!HasInv2Pi)
26742691
return false;
2675-
26762692
if (isInlinableIntLiteral(Literal))
26772693
return true;
2678-
26792694
uint16_t Val = static_cast<uint16_t>(Literal);
26802695
return Val == 0x3C00 || // 1.0
26812696
Val == 0xBC00 || // -1.0

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1384,7 +1384,13 @@ LLVM_READNONE
13841384
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
13851385

13861386
LLVM_READNONE
1387-
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
1387+
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1388+
1389+
LLVM_READNONE
1390+
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1391+
1392+
LLVM_READNONE
1393+
bool isInlinableLiteralI16(int16_t Literal, bool HasInv2Pi);
13881394

13891395
LLVM_READNONE
13901396
std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);

llvm/test/CodeGen/AMDGPU/immv216.ll

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -577,40 +577,40 @@ define amdgpu_kernel void @add_inline_imm_64_v2f16(ptr addrspace(1) %out, <2 x h
577577
}
578578

579579
; GCN-LABEL: {{^}}mul_inline_imm_0.5_v2i16:
580-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x38003800
581-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
580+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3800
581+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
582582

583-
; GFX10: v_pk_mul_lo_u16 v0, 0x38003800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x38]
583+
; GFX10: v_pk_mul_lo_u16 v0, 0x3800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x00]
584584
define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) {
585585
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0.5, half 0.5> to <2 x i16>)
586586
ret <2 x i16> %y
587587
}
588588

589589
; GCN-LABEL: {{^}}mul_inline_imm_neg_0.5_v2i16:
590-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xb800b800
591-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
590+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xb800
591+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
592592

593-
; GFX10: v_pk_mul_lo_u16 v0, 0xb800b800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0x00,0xb8]
593+
; GFX10: v_pk_mul_lo_u16 v0, 0xffffb800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0xff,0xff]
594594
define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) {
595595
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -0.5, half -0.5> to <2 x i16>)
596596
ret <2 x i16> %y
597597
}
598598

599599
; GCN-LABEL: {{^}}mul_inline_imm_1.0_v2i16:
600-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x3c003c00
601-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
600+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3c00
601+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
602602

603-
; GFX10: v_pk_mul_lo_u16 v0, 0x3c003c00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x3c]
603+
; GFX10: v_pk_mul_lo_u16 v0, 0x3c00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x00]
604604
define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) {
605605
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 1.0, half 1.0> to <2 x i16>)
606606
ret <2 x i16> %y
607607
}
608608

609609
; GCN-LABEL: {{^}}mul_inline_imm_neg_1.0_v2i16:
610-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xbc00bc00
611-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
610+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xbc00
611+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
612612

613-
; GFX10: v_pk_mul_lo_u16 v0, 0xbc00bc00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0x00,0xbc]
613+
; GFX10: v_pk_mul_lo_u16 v0, 0xffffbc00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0xff,0xff]
614614
define <2 x i16> @mul_inline_imm_neg_1.0_v2i16(<2 x i16> %x) {
615615
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -1.0, half -1.0> to <2 x i16>)
616616
ret <2 x i16> %y
@@ -635,31 +635,31 @@ define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) {
635635
}
636636

637637
; GCN-LABEL: {{^}}mul_inline_imm_4.0_v2i16:
638-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x44004400
639-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
638+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x4400
639+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
640640

641-
; GFX10: v_pk_mul_lo_u16 v0, 0x44004400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x44]
641+
; GFX10: v_pk_mul_lo_u16 v0, 0x4400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x00]
642642
define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) {
643643
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 4.0, half 4.0> to <2 x i16>)
644644
ret <2 x i16> %y
645645

646646
}
647647

648648
; GCN-LABEL: {{^}}mul_inline_imm_neg_4.0_v2i16:
649-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xc400c400
650-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
649+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xc400
650+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
651651

652-
; GFX10: v_pk_mul_lo_u16 v0, 0xc400c400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0x00,0xc4]
652+
; GFX10: v_pk_mul_lo_u16 v0, 0xffffc400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0xff,0xff]
653653
define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) {
654654
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -4.0, half -4.0> to <2 x i16>)
655655
ret <2 x i16> %y
656656
}
657657

658658
; GCN-LABEL: {{^}}mul_inline_imm_inv2pi_v2i16:
659-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x31183118
660-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
659+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3118
660+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
661661

662-
; GFX10: v_pk_mul_lo_u16 v0, 0x31183118, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x18,0x31]
662+
; GFX10: v_pk_mul_lo_u16 v0, 0x3118, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x00,0x00]
663663
define <2 x i16> @mul_inline_imm_inv2pi_v2i16(<2 x i16> %x) {
664664
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0xH3118, half 0xH3118> to <2 x i16>)
665665
ret <2 x i16> %y

0 commit comments

Comments
 (0)