Skip to content

[AMDGPU] Fix subtarget predicates for some V_MAD, V_FMA and V_DOT instructions. #71194

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 7, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 16 additions & 23 deletions llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ class UDot2Pat<Instruction Inst> : GCNPat <
(and i32:$src1, (i32 65535)))
),
(Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> {
let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate;
let Predicates = !cast<VOP_Pseudo>(Inst).Predicates;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a follow up maybe change the template argument to VOP_Pseudo Inst to avoid the cast?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}

class SDot2Pat<Instruction Inst> : GCNPat <
Expand All @@ -369,40 +369,35 @@ class SDot2Pat<Instruction Inst> : GCNPat <
(AMDGPUmul_i24_oneuse (sext_inreg i32:$src0, i16),
(sext_inreg i32:$src1, i16))),
(Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> {
let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate;
let Predicates = !cast<VOP_Pseudo>(Inst).Predicates;
}

let IsDOT = 1 in {
let SubtargetPredicate = HasDot2Insts in {

let OtherPredicates = [HasDot2Insts] in {
defm V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16",
VOP3P_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_sdot2, 1>;
defm V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16",
VOP3P_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_udot2, 1>;
} // End OtherPredicates = [HasDot2Insts]

} // End SubtargetPredicate = HasDot2Insts

let SubtargetPredicate = HasDot10Insts in
let OtherPredicates = [HasDot10Insts] in
defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
VOP3P_Profile<VOP_F32_V2F16_V2F16_F32, VOP3_REGULAR, /*HasDPP*/ 1>,
AMDGPUfdot2, 1/*ExplicitClamp*/>;

let SubtargetPredicate = HasDot7Insts in {
let OtherPredicates = [HasDot7Insts] in {
defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
} // End OtherPredicates = [HasDot7Insts]

} // End SubtargetPredicate = HasDot7Insts

let SubtargetPredicate = HasDot1Insts in {

let OtherPredicates = [HasDot1Insts] in {
defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8",
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>;
defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4",
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>;

} // End SubtargetPredicate = HasDot1Insts
} // End OtherPredicates = [HasDot1Insts]

def DOT2_BF16_Profile
: VOP3P_Profile<VOP_F32_V2I16_V2I16_F32, VOP3_REGULAR, /*HasDPP*/ 1> {
Expand Down Expand Up @@ -456,14 +451,14 @@ def : UDot2Pat<V_DOT2_U32_U16>;
def : SDot2Pat<V_DOT2_I32_I16>;

foreach Type = ["U", "I"] in
let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT4_"#Type#"32_"#Type#8).SubtargetPredicate in
let Predicates = !cast<VOP_Pseudo>("V_DOT4_"#Type#"32_"#Type#8).Predicates in
def : GCNPat <
!cast<dag>(!foldl((i32 i32:$src2), [0, 1, 2, 3], lhs, y,
(add_oneuse lhs, (!cast<PatFrag>("Mul"#Type#"_Elt"#y) i32:$src0, i32:$src1)))),
(!cast<VOP3P_Pseudo>("V_DOT4_"#Type#"32_"#Type#8) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;

foreach Type = ["U", "I"] in
let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in
let Predicates = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).Predicates in
def : GCNPat <
!cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
[1, 2, 3, 4, 5, 6, 7], lhs, y,
Expand All @@ -473,7 +468,7 @@ foreach Type = ["U", "I"] in
// Different variants of dot8 code-gen dag patterns are not generated through table-gen due to a huge increase
// in the compile time. Directly handle the pattern generated by the FE here.
foreach Type = ["U", "I"] in
let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in
let Predicates = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).Predicates in
def : GCNPat <
!cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
[7, 1, 2, 3, 4, 5, 6], lhs, y,
Expand Down Expand Up @@ -1128,25 +1123,22 @@ defm V_PK_ADD_F16 : VOP3P_Real_vi <0x0f>;
defm V_PK_MUL_F16 : VOP3P_Real_vi <0x10>;
defm V_PK_MIN_F16 : VOP3P_Real_vi <0x11>;
defm V_PK_MAX_F16 : VOP3P_Real_vi <0x12>;
} // End SubtargetPredicate = isGFX8GFX9

let SubtargetPredicate = HasMadMixInsts in {
let OtherPredicates = [HasMadMixInsts] in {
defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x20>;
defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x21>;
defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x22>;
}

let SubtargetPredicate = HasFmaMixInsts in {
let DecoderNamespace = "GFX9_DL" in {
let OtherPredicates = [HasFmaMixInsts],
DecoderNamespace = "GFX9_DL" in {
// The mad_mix instructions were renamed and their behaviors changed,
// but the opcode stayed the same so we need to put these in a
// different DecoderNamespace to avoid the ambiguity.
defm V_FMA_MIX_F32 : VOP3P_Real_vi <0x20>;
defm V_FMA_MIXLO_F16 : VOP3P_Real_vi <0x21>;
defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x22>;
}
}


defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>;
defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>;
Expand All @@ -1157,6 +1149,7 @@ defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>;

defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>;
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>;
} // End SubtargetPredicate = isGFX8GFX9

let OtherPredicates = [HasMAIInsts] in {

Expand Down