Skip to content

Commit 8104287

Browse files
committed
different true16/fake16 patterns
1 parent 5b41b6d commit 8104287

File tree

2 files changed

+1121
-25
lines changed

2 files changed

+1121
-25
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2163,23 +2163,44 @@ def : GCNPat <
21632163
(S_MOV_B32 $ga)
21642164
>;
21652165

2166-
def : GCNPat <
2167-
(VGPRImm<(i16 imm)>:$imm),
2168-
(V_MOV_B32_e32 imm:$imm)
2169-
>;
2166+
foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in {
2167+
let True16Predicate = pred in {
2168+
def : GCNPat <
2169+
(VGPRImm<(i16 imm)>:$imm),
2170+
(V_MOV_B32_e32 imm:$imm)
2171+
>;
2172+
}
21702173

2171-
// FIXME: Workaround for ordering issue with peephole optimizer where
2172-
// a register class copy interferes with immediate folding. Should
2173-
// use s_mov_b32, which can be shrunk to s_movk_i32
2174-
def : GCNPat <
2175-
(VGPRImm<(f16 fpimm)>:$imm),
2176-
(V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
2177-
>;
2174+
// FIXME: Workaround for ordering issue with peephole optimizer where
2175+
// a register class copy interferes with immediate folding. Should
2176+
// use s_mov_b32, which can be shrunk to s_movk_i32
2177+
def : GCNPat <
2178+
(VGPRImm<(f16 fpimm)>:$imm),
2179+
(V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
2180+
>;
21782181

2179-
def : GCNPat <
2180-
(VGPRImm<(bf16 fpimm)>:$imm),
2181-
(V_MOV_B32_e32 (bf16 (bitcast_fpimm_to_i32 $imm)))
2182-
>;
2182+
def : GCNPat <
2183+
(VGPRImm<(bf16 fpimm)>:$imm),
2184+
(V_MOV_B32_e32 (bf16 (bitcast_fpimm_to_i32 $imm)))
2185+
>;
2186+
}
2187+
2188+
let True16Predicate = UseRealTrue16Insts in {
2189+
def : GCNPat <
2190+
(VGPRImm<(i16 imm)>:$imm),
2191+
(V_MOV_B16_t16_e64 0, imm:$imm, 0)
2192+
>;
2193+
2194+
def : GCNPat <
2195+
(VGPRImm<(f16 fpimm)>:$imm),
2196+
(V_MOV_B16_t16_e64 0, $imm, 0)
2197+
>;
2198+
2199+
def : GCNPat <
2200+
(VGPRImm<(bf16 fpimm)>:$imm),
2201+
(V_MOV_B16_t16_e64 0, $imm, 0)
2202+
>;
2203+
}
21832204

21842205
// V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit
21852206
// immediate and wil be expanded as needed, but we will only use these patterns

0 commit comments

Comments
 (0)