Skip to content

Commit e47996c

Browse files
committed
[WIP][AMDGPU] Split isInlinableLiteral16 into three and call the specific version if possible
The current implementation of `isInlinableLiteral16` assumes, a 16-bit inlinable literal is either an i16 or a fp16. This is not always true because of bf16. However, we can't tell fp16 and bf16 apart by just looking at the value. This patch tries to split `isInlinableLiteral16` into three versions, i16, fp16, bf16 respectively, and call the corresponding version. This patch is based on llvm#81282. The current status is, only two uses of original `isInlinableLiteral16` are still there. We need to add an extra argument to indicate the type of the operand the immediate corresponds to. This will also require the change of the function signature of the two callers.
1 parent f14cb53 commit e47996c

File tree

6 files changed

+94
-21
lines changed

6 files changed

+94
-21
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1927,8 +1927,12 @@ static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
19271927
return isInlinableIntLiteral(Val);
19281928
}
19291929

1930-
// f16/v2f16 operands work correctly for all values.
1931-
return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1930+
if (VT.getScalarType() == MVT::f16)
1931+
return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
1932+
1933+
assert(VT.getScalarType() == MVT::bf16);
1934+
1935+
return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
19321936
}
19331937

19341938
bool AMDGPUOperand::isInlinableImm(MVT type) const {
@@ -2277,15 +2281,26 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
22772281
return;
22782282

22792283
case AMDGPU::OPERAND_REG_IMM_INT16:
2280-
case AMDGPU::OPERAND_REG_IMM_FP16:
2281-
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
22822284
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2283-
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
22842285
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2286+
if (isSafeTruncation(Val, 16) &&
2287+
AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2288+
Inst.addOperand(MCOperand::createImm(Val));
2289+
setImmKindConst();
2290+
return;
2291+
}
2292+
2293+
Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2294+
setImmKindLiteral();
2295+
return;
2296+
2297+
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2298+
case AMDGPU::OPERAND_REG_IMM_FP16:
2299+
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
22852300
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
22862301
if (isSafeTruncation(Val, 16) &&
2287-
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2288-
AsmParser->hasInv2PiInlineImm())) {
2302+
AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2303+
AsmParser->hasInv2PiInlineImm())) {
22892304
Inst.addOperand(MCOperand::createImm(Val));
22902305
setImmKindConst();
22912306
return;
@@ -2296,12 +2311,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
22962311
return;
22972312

22982313
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2314+
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: {
2315+
assert(isSafeTruncation(Val, 16));
2316+
assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2317+
Inst.addOperand(MCOperand::createImm(Val));
2318+
return;
2319+
}
22992320
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2300-
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
23012321
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
23022322
assert(isSafeTruncation(Val, 16));
2303-
assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2304-
AsmParser->hasInv2PiInlineImm()));
2323+
assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2324+
AsmParser->hasInv2PiInlineImm()));
23052325

23062326
Inst.addOperand(MCOperand::createImm(Val));
23072327
return;
@@ -3434,7 +3454,12 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
34343454
OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
34353455
return AMDGPU::isInlinableLiteralV2F16(Val);
34363456

3437-
return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3457+
if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
3458+
OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 ||
3459+
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16)
3460+
return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3461+
3462+
llvm_unreachable("invalid operand type");
34383463
}
34393464
default:
34403465
llvm_unreachable("invalid operand size");

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -462,8 +462,8 @@ void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
462462

463463
// This must accept a 32-bit immediate value to correctly handle packed 16-bit
464464
// operations.
465-
static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
466-
raw_ostream &O) {
465+
static bool printImmediateFP16(uint32_t Imm, const MCSubtargetInfo &STI,
466+
raw_ostream &O) {
467467
if (Imm == 0x3C00)
468468
O << "1.0";
469469
else if (Imm == 0xBC00)
@@ -488,7 +488,7 @@ static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
488488
return true;
489489
}
490490

491-
void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
491+
void AMDGPUInstPrinter::printImmediate16(uint32_t Imm, uint8_t OpType,
492492
const MCSubtargetInfo &STI,
493493
raw_ostream &O) {
494494
int16_t SImm = static_cast<int16_t>(Imm);
@@ -498,8 +498,16 @@ void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
498498
}
499499

500500
uint16_t HImm = static_cast<uint16_t>(Imm);
501-
if (printImmediateFloat16(HImm, STI, O))
502-
return;
501+
switch (OpType) {
502+
case AMDGPU::OPERAND_REG_IMM_FP16:
503+
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
504+
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
505+
if (printImmediateFP16(HImm, STI, O))
506+
return;
507+
break;
508+
default:
509+
llvm_unreachable("bad operand type");
510+
}
503511

504512
uint64_t Imm16 = static_cast<uint16_t>(Imm);
505513
O << formatHex(Imm16);
@@ -525,7 +533,7 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
525533
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
526534
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
527535
if (isUInt<16>(Imm) &&
528-
printImmediateFloat16(static_cast<uint16_t>(Imm), STI, O))
536+
printImmediateFP16(static_cast<uint16_t>(Imm), STI, O))
529537
return;
530538
break;
531539
default:
@@ -796,7 +804,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
796804
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
797805
case AMDGPU::OPERAND_REG_IMM_FP16:
798806
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
799-
printImmediate16(Op.getImm(), STI, O);
807+
printImmediate16(Op.getImm(), OpTy, STI, O);
800808
break;
801809
case AMDGPU::OPERAND_REG_IMM_V2INT16:
802810
case AMDGPU::OPERAND_REG_IMM_V2FP16:

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,8 @@ class AMDGPUInstPrinter : public MCInstPrinter {
8686
raw_ostream &O);
8787
void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI,
8888
raw_ostream &O);
89-
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
90-
raw_ostream &O);
89+
void printImmediate16(uint32_t Imm, uint8_t OpType,
90+
const MCSubtargetInfo &STI, raw_ostream &O);
9191
void printImmediateV216(uint32_t Imm, uint8_t OpType,
9292
const MCSubtargetInfo &STI, raw_ostream &O);
9393
bool printImmediateFloat32(uint32_t Imm, const MCSubtargetInfo &STI,

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4196,7 +4196,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
41964196
// constants in these cases
41974197
int16_t Trunc = static_cast<int16_t>(Imm);
41984198
return ST.has16BitInsts() &&
4199-
AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
4199+
AMDGPU::isInlinableLiteralFP16(Trunc, ST.hasInv2PiInlineImm());
42004200
}
42014201

42024202
return false;

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2655,6 +2655,40 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
26552655
Val == 0x3118; // 1/2pi
26562656
}
26572657

2658+
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
2659+
if (!HasInv2Pi)
2660+
return false;
2661+
if (isInlinableIntLiteral(Literal))
2662+
return true;
2663+
uint16_t Val = static_cast<uint16_t>(Literal);
2664+
return Val == 0x3C00 || // 1.0
2665+
Val == 0xBC00 || // -1.0
2666+
Val == 0x3800 || // 0.5
2667+
Val == 0xB800 || // -0.5
2668+
Val == 0x4000 || // 2.0
2669+
Val == 0xC000 || // -2.0
2670+
Val == 0x4400 || // 4.0
2671+
Val == 0xC400 || // -4.0
2672+
Val == 0x3118; // 1/2pi
2673+
}
2674+
2675+
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
2676+
if (!HasInv2Pi)
2677+
return false;
2678+
if (isInlinableIntLiteral(Literal))
2679+
return true;
2680+
uint16_t Val = static_cast<uint16_t>(Literal);
2681+
return Val == 0x3F00 || // 0.5
2682+
Val == 0xBF00 || // -0.5
2683+
Val == 0x3F80 || // 1.0
2684+
Val == 0xBF80 || // -1.0
2685+
Val == 0x4000 || // 2.0
2686+
Val == 0xC000 || // -2.0
2687+
Val == 0x4080 || // 4.0
2688+
Val == 0xC080 || // -4.0
2689+
Val == 0x3E22; // 1.0 / (2.0 * pi)
2690+
}
2691+
26582692
std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
26592693
// Unfortunately, the Instruction Set Architecture Reference Guide is
26602694
// misleading about how the inline operands work for (packed) 16-bit

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1359,6 +1359,12 @@ bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
13591359
LLVM_READNONE
13601360
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
13611361

1362+
LLVM_READNONE
1363+
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1364+
1365+
LLVM_READNONE
1366+
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1367+
13621368
LLVM_READNONE
13631369
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
13641370

0 commit comments

Comments
 (0)