Skip to content

Commit d2906c0

Browse files
committed
[AMDGPU] Extend llvm.amdgcn.update.dpp intrinsic to support f64 (llvm#91190)
Follow up patch to llvm#89217, before we make changes to atomic optimizer. Change-Id: I3857ef9314db77354875a3f3f2e1eb7f5fe8067a
1 parent 29cbe78 commit d2906c0

File tree

5 files changed

+457
-17
lines changed

5 files changed

+457
-17
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3358,13 +3358,15 @@ def : GCNPat <
33583358
(as_i1timm $bound_ctrl))
33593359
>;
33603360

3361+
foreach vt = Reg64Types.types in {
33613362
def : GCNPat <
3362-
(i64 (int_amdgcn_update_dpp i64:$old, i64:$src, timm:$dpp_ctrl, timm:$row_mask,
3363+
(vt (int_amdgcn_update_dpp vt:$old, vt:$src, timm:$dpp_ctrl, timm:$row_mask,
33633364
timm:$bank_mask, timm:$bound_ctrl)),
33643365
(V_MOV_B64_DPP_PSEUDO VReg_64_Align2:$old, VReg_64_Align2:$src, (as_i32timm $dpp_ctrl),
33653366
(as_i32timm $row_mask), (as_i32timm $bank_mask),
33663367
(as_i1timm $bound_ctrl))
33673368
>;
3369+
}
33683370

33693371
//===----------------------------------------------------------------------===//
33703372
// Fract Patterns

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,7 @@ class RegisterTypes<list<ValueType> reg_types> {
586586

587587
def Reg16Types : RegisterTypes<[i16, f16, bf16]>;
588588
def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>;
589+
def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0]>;
589590

590591
let HasVGPR = 1 in {
591592
// VOP3 and VINTERP can access 256 lo and 256 hi registers.

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1356,19 +1356,16 @@ def : GCNPat <
13561356
(as_i1timm $bound_ctrl))
13571357
>;
13581358

1359-
class UpdateDPPPat<ValueType vt> : GCNPat <
1359+
foreach vt = Reg32Types.types in {
1360+
def : GCNPat <
13601361
(vt (int_amdgcn_update_dpp vt:$old, vt:$src, timm:$dpp_ctrl,
13611362
timm:$row_mask, timm:$bank_mask,
13621363
timm:$bound_ctrl)),
13631364
(V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl),
13641365
(as_i32timm $row_mask), (as_i32timm $bank_mask),
13651366
(as_i1timm $bound_ctrl))
13661367
>;
1367-
1368-
def : UpdateDPPPat<i32>;
1369-
def : UpdateDPPPat<f32>;
1370-
def : UpdateDPPPat<v2i16>;
1371-
def : UpdateDPPPat<v2f16>;
1368+
}
13721369

13731370
} // End OtherPredicates = [isGFX8Plus]
13741371

0 commit comments

Comments
 (0)