Skip to content

Commit 94c3d18

Browse files
committed
[MLIR][AMDGPU] Add amdgpu.sched_barrier
This commit adds sched_barrier operator to AMDGPU dialect that lowers to rocdl.sched.barrier.
1 parent bd04ac0 commit 94c3d18

File tree

4 files changed

+103
-2
lines changed

4 files changed

+103
-2
lines changed

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,47 @@ def AMDGPU_LDSBarrierOp : AMDGPU_Op<"lds_barrier"> {
433433
let assemblyFormat = "attr-dict";
434434
}
435435

436+
def AMDGPU_SchedBarrierOpOpt : I32EnumAttr<"sched_barrier_opt_enum",
437+
"The possible options for scheduling barriers",
438+
[
439+
I32EnumAttrCase<"allow_none", 0x0000>,
440+
I32EnumAttrCase<"allow_non_mem_non_sideffect", 0x0001>,
441+
I32EnumAttrCase<"allow_valu", 0x0002>,
442+
I32EnumAttrCase<"allow_salu", 0x0004>,
443+
I32EnumAttrCase<"allow_mfma_wmma", 0x0008>,
444+
I32EnumAttrCase<"allow_all_vmem", 0x0010>,
445+
I32EnumAttrCase<"allow_vmem_read", 0x0020>,
446+
I32EnumAttrCase<"allow_vmem_write", 0x0040>,
447+
I32EnumAttrCase<"allow_all_ds", 0x0080>,
448+
I32EnumAttrCase<"allow_ds_read", 0x0100>,
449+
I32EnumAttrCase<"allow_ds_write", 0x0200>,
450+
I32EnumAttrCase<"allow_transcendental", 0x0400>
451+
]> {
452+
let genSpecializedAttr = 0;
453+
let cppNamespace = "::mlir::amdgpu";
454+
}
455+
456+
def AMDGPU_SchedBarrierOpOptAttr : EnumAttr<AMDGPU_Dialect, AMDGPU_SchedBarrierOpOpt,
457+
"sched_barrier_opt">;
458+
459+
def AMDGPU_SchedBarrierOpOptArrayAttr : TypedArrayAttrBase<AMDGPU_SchedBarrierOpOptAttr,
460+
"sched_barrier_opt array attribute">;
461+
462+
def AMDGPU_SchedBarrierOp :
463+
AMDGPU_Op<"sched_barrier">,
464+
Arguments<(ins AMDGPU_SchedBarrierOpOptArrayAttr:$opts)>
465+
{
466+
let summary = "Barrier that limits the backend scheduler of instruction movement";
467+
let description = [{
468+
`amdgpu.sched_barrier` serves as a barrier that could be
469+
configured to restrict movements of instructions through it as
470+
defined by sched_barrier_opts.
471+
}];
472+
let assemblyFormat = [{
473+
$opts attr-dict
474+
}];
475+
}
476+
436477
def AMDGPU_MFMAPermB : I32EnumAttr<"MFMAPermB",
437478
"The possible permutations of the lanes storing B available in an MFMA",
438479
[

mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,26 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
321321
return success();
322322
}
323323
};
324+
325+
struct SchedBarrierOpLowering : public ConvertOpToLLVMPattern<SchedBarrierOp> {
326+
SchedBarrierOpLowering(LLVMTypeConverter &converter, Chipset chipset)
327+
: ConvertOpToLLVMPattern<SchedBarrierOp>(converter), chipset(chipset) {}
328+
329+
Chipset chipset;
330+
331+
LogicalResult
332+
matchAndRewrite(SchedBarrierOp op, SchedBarrierOp::Adaptor adaptor,
333+
ConversionPatternRewriter &rewriter) const override {
334+
uint32_t combinedOpt = 0;
335+
for (Attribute opt : op.getOpts()) {
336+
combinedOpt |=
337+
(uint32_t)cast<amdgpu::sched_barrier_opt_enumAttr>(opt).getValue();
338+
}
339+
rewriter.replaceOpWithNewOp<ROCDL::SchedBarrier>(op, combinedOpt);
340+
return success();
341+
}
342+
};
343+
324344
} // namespace
325345

326346
/// If `input` is a vector of bytes, concatentate those bytes in little-endian
@@ -879,8 +899,8 @@ void mlir::populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter,
879899
ROCDL::RawPtrBufferAtomicUminOp>,
880900
RawBufferOpLowering<RawBufferAtomicCmpswapOp,
881901
ROCDL::RawPtrBufferAtomicCmpSwap>,
882-
LDSBarrierOpLowering, MFMAOpLowering, WMMAOpLowering,
883-
ExtPackedFp8OpLowering, PackedTrunc2xFp8OpLowering,
902+
LDSBarrierOpLowering, SchedBarrierOpLowering, MFMAOpLowering,
903+
WMMAOpLowering, ExtPackedFp8OpLowering, PackedTrunc2xFp8OpLowering,
884904
PackedStochRoundFp8OpLowering>(converter, chipset);
885905
}
886906

mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,3 +226,34 @@ func.func @lds_barrier() {
226226
amdgpu.lds_barrier
227227
func.return
228228
}
229+
230+
// CHECK-LABEL: func @sched_barrier
231+
func.func @sched_barrier() {
232+
// rocdl.sched.barrier 0
233+
amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_none>]
234+
// rocdl.sched.barrier 1
235+
amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_non_mem_non_sideffect>]
236+
// rocdl.sched.barrier 2
237+
amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_valu>]
238+
// rocdl.sched.barrier 4
239+
amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_salu>]
240+
// rocdl.sched.barrier 8
241+
amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_mfma_wmma>]
242+
// rocdl.sched.barrier 16
243+
amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_all_vmem>]
244+
// rocdl.sched.barrier 32
245+
amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_vmem_read>]
246+
// rocdl.sched.barrier 64
247+
amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_vmem_write>]
248+
// rocdl.sched.barrier 128
249+
amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_all_ds>]
250+
// rocdl.sched.barrier 256
251+
amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_ds_read>]
252+
// rocdl.sched.barrier 512
253+
amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_ds_write>]
254+
// rocdl.sched.barrier 1024
255+
amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_transcendental>]
256+
// rocdl.sched.barrier 18
257+
amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_valu>, #amdgpu<sched_barrier_opt allow_all_vmem>]
258+
func.return
259+
}

mlir/test/Dialect/AMDGPU/ops.mlir

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,15 @@ func.func @lds_barrier() {
109109
func.return
110110
}
111111

112+
// CHECK-LABEL: func @sched_barrier
113+
func.func @sched_barrier() {
114+
// CHECK: amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_none>]
115+
amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_none>]
116+
// CHECK: amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_valu>, #amdgpu<sched_barrier_opt allow_all_vmem>]
117+
amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_valu>, #amdgpu<sched_barrier_opt allow_all_vmem>]
118+
func.return
119+
}
120+
112121
// CHECK-LABEL: func @mfma
113122
func.func @mfma(%arg0 : f32, %arg1 : vector<32xf32>) -> vector<32xf32> {
114123
// CHECK: amdgpu.mfma

0 commit comments

Comments
 (0)