@@ -2163,23 +2163,44 @@ def : GCNPat <
2163
2163
(S_MOV_B32 $ga)
2164
2164
>;
2165
2165
2166
- def : GCNPat <
2167
- (VGPRImm<(i16 imm)>:$imm),
2168
- (V_MOV_B32_e32 imm:$imm)
2169
- >;
2166
+ foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in {
2167
+ let True16Predicate = pred in {
2168
+ def : GCNPat <
2169
+ (VGPRImm<(i16 imm)>:$imm),
2170
+ (V_MOV_B32_e32 imm:$imm)
2171
+ >;
2172
+ }
2170
2173
2171
- // FIXME: Workaround for ordering issue with peephole optimizer where
2172
- // a register class copy interferes with immediate folding. Should
2173
- // use s_mov_b32, which can be shrunk to s_movk_i32
2174
- def : GCNPat <
2175
- (VGPRImm<(f16 fpimm)>:$imm),
2176
- (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
2177
- >;
2174
+ // FIXME: Workaround for ordering issue with peephole optimizer where
2175
+ // a register class copy interferes with immediate folding. Should
2176
+ // use s_mov_b32, which can be shrunk to s_movk_i32
2177
+ def : GCNPat <
2178
+ (VGPRImm<(f16 fpimm)>:$imm),
2179
+ (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
2180
+ >;
2178
2181
2179
- def : GCNPat <
2180
- (VGPRImm<(bf16 fpimm)>:$imm),
2181
- (V_MOV_B32_e32 (bf16 (bitcast_fpimm_to_i32 $imm)))
2182
- >;
2182
+ def : GCNPat <
2183
+ (VGPRImm<(bf16 fpimm)>:$imm),
2184
+ (V_MOV_B32_e32 (bf16 (bitcast_fpimm_to_i32 $imm)))
2185
+ >;
2186
+ }
2187
+
2188
+ let True16Predicate = UseRealTrue16Insts in {
2189
+ def : GCNPat <
2190
+ (VGPRImm<(i16 imm)>:$imm),
2191
+ (V_MOV_B16_t16_e64 0, imm:$imm, 0)
2192
+ >;
2193
+
2194
+ def : GCNPat <
2195
+ (VGPRImm<(f16 fpimm)>:$imm),
2196
+ (V_MOV_B16_t16_e64 0, $imm, 0)
2197
+ >;
2198
+
2199
+ def : GCNPat <
2200
+ (VGPRImm<(bf16 fpimm)>:$imm),
2201
+ (V_MOV_B16_t16_e64 0, $imm, 0)
2202
+ >;
2203
+ }
2183
2204
2184
2205
// V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit
2185
2206
// immediate and wil be expanded as needed, but we will only use these patterns
0 commit comments