@@ -475,6 +475,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
475
475
476
476
bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
477
477
478
+ bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
479
+
478
480
bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
479
481
480
482
bool isSSrcV2F16() const {
@@ -541,22 +543,40 @@ class AMDGPUOperand : public MCParsedAsmOperand {
541
543
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
542
544
}
543
545
546
+ bool isVCSrcTBF16() const {
547
+ return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
548
+ }
549
+
544
550
bool isVCSrcTF16() const {
545
551
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
546
552
}
547
553
554
+ bool isVCSrcTBF16_Lo128() const {
555
+ return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
556
+ }
557
+
548
558
bool isVCSrcTF16_Lo128() const {
549
559
return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
550
560
}
551
561
562
+ bool isVCSrcFake16BF16_Lo128() const {
563
+ return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
564
+ }
565
+
552
566
bool isVCSrcFake16F16_Lo128() const {
553
567
return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
554
568
}
555
569
570
+ bool isVCSrc_bf16() const {
571
+ return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
572
+ }
573
+
556
574
bool isVCSrc_f16() const {
557
575
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
558
576
}
559
577
578
+ bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
579
+
560
580
bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
561
581
562
582
bool isVSrc_b32() const {
@@ -597,18 +617,34 @@ class AMDGPUOperand : public MCParsedAsmOperand {
597
617
598
618
bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
599
619
620
+ bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
621
+
600
622
bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
601
623
624
+ bool isVSrcT_bf16_Lo128() const {
625
+ return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
626
+ }
627
+
602
628
bool isVSrcT_f16_Lo128() const {
603
629
return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
604
630
}
605
631
632
+ bool isVSrcFake16_bf16_Lo128() const {
633
+ return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
634
+ }
635
+
606
636
bool isVSrcFake16_f16_Lo128() const {
607
637
return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
608
638
}
609
639
640
+ bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
641
+
610
642
bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
611
643
644
+ bool isVSrc_v2bf16() const {
645
+ return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
646
+ }
647
+
612
648
bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
613
649
614
650
bool isVISrcB32() const {
@@ -635,6 +671,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
635
671
return isVISrcF16() || isVISrcB32();
636
672
}
637
673
674
+ bool isVISrc_64_bf16() const {
675
+ return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
676
+ }
677
+
638
678
bool isVISrc_64_f16() const {
639
679
return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
640
680
}
@@ -803,6 +843,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
803
843
return isAISrc_128F16() || isAISrc_128_b32();
804
844
}
805
845
846
+ bool isVISrc_128_bf16() const {
847
+ return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
848
+ }
849
+
806
850
bool isVISrc_128_f16() const {
807
851
return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
808
852
}
@@ -1890,6 +1934,14 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1890
1934
case AMDGPU::OPERAND_REG_IMM_V2FP16:
1891
1935
case AMDGPU::OPERAND_KIMM16:
1892
1936
return &APFloat::IEEEhalf();
1937
+ case AMDGPU::OPERAND_REG_IMM_BF16:
1938
+ case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
1939
+ case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1940
+ case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1941
+ case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
1942
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
1943
+ case AMDGPU::OPERAND_REG_IMM_V2BF16:
1944
+ return &APFloat::BFloat();
1893
1945
default:
1894
1946
llvm_unreachable("unsupported fp type");
1895
1947
}
@@ -2186,17 +2238,24 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
2186
2238
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2187
2239
case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2188
2240
case AMDGPU::OPERAND_REG_IMM_INT16:
2241
+ case AMDGPU::OPERAND_REG_IMM_BF16:
2189
2242
case AMDGPU::OPERAND_REG_IMM_FP16:
2243
+ case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
2190
2244
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2191
2245
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2246
+ case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2192
2247
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2193
2248
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2249
+ case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2194
2250
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2195
2251
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2252
+ case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
2196
2253
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2197
2254
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2255
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
2198
2256
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2199
2257
case AMDGPU::OPERAND_REG_IMM_V2INT16:
2258
+ case AMDGPU::OPERAND_REG_IMM_V2BF16:
2200
2259
case AMDGPU::OPERAND_REG_IMM_V2FP16:
2201
2260
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2202
2261
case AMDGPU::OPERAND_REG_IMM_V2FP32:
@@ -2240,6 +2299,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
2240
2299
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2241
2300
case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2242
2301
case AMDGPU::OPERAND_REG_IMM_V2INT16:
2302
+ case AMDGPU::OPERAND_REG_IMM_V2BF16:
2243
2303
case AMDGPU::OPERAND_REG_IMM_V2FP16:
2244
2304
case AMDGPU::OPERAND_REG_IMM_V2FP32:
2245
2305
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
@@ -2295,6 +2355,22 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
2295
2355
setImmKindLiteral();
2296
2356
return;
2297
2357
2358
+ case AMDGPU::OPERAND_REG_IMM_BF16:
2359
+ case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
2360
+ case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2361
+ case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
2362
+ if (isSafeTruncation(Val, 16) &&
2363
+ AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2364
+ AsmParser->hasInv2PiInlineImm())) {
2365
+ Inst.addOperand(MCOperand::createImm(Val));
2366
+ setImmKindConst();
2367
+ return;
2368
+ }
2369
+
2370
+ Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2371
+ setImmKindLiteral();
2372
+ return;
2373
+
2298
2374
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2299
2375
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2300
2376
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
@@ -2306,6 +2382,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
2306
2382
Inst.addOperand(MCOperand::createImm(Val));
2307
2383
return;
2308
2384
}
2385
+
2386
+ case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2387
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: {
2388
+ assert(isSafeTruncation(Val, 16));
2389
+ assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2390
+ AsmParser->hasInv2PiInlineImm()));
2391
+
2392
+ Inst.addOperand(MCOperand::createImm(Val));
2393
+ return;
2394
+ }
2395
+
2309
2396
case AMDGPU::OPERAND_KIMM32:
2310
2397
Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2311
2398
setImmKindMandatoryLiteral();
@@ -3429,6 +3516,11 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3429
3516
OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3430
3517
return AMDGPU::isInlinableLiteralV2F16(Val);
3431
3518
3519
+ if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 ||
3520
+ OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2BF16 ||
3521
+ OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
3522
+ return AMDGPU::isInlinableLiteralV2BF16(Val);
3523
+
3432
3524
return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3433
3525
}
3434
3526
default:
0 commit comments