-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[MLIR][AMDGPU] Add amdgpu.sched_barrier #98911
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mlir-gpu @llvm/pr-subscribers-mlir-amdgpu Author: Manupa Karunaratne (manupak) ChangesThis commit adds sched_barrier operator Full diff: https://github.com/llvm/llvm-project/pull/98911.diff 4 Files Affected:
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 3f27e1541cf38..347ca6611571c 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -433,6 +433,47 @@ def AMDGPU_LDSBarrierOp : AMDGPU_Op<"lds_barrier"> {
let assemblyFormat = "attr-dict";
}
+def AMDGPU_SchedBarrierOpOpt : I32EnumAttr<"sched_barrier_opt_enum",
+ "The possible options for scheduling barriers",
+ [
+ I32EnumAttrCase<"allow_none", 0x0000>,
+ I32EnumAttrCase<"allow_non_mem_non_sideffect", 0x0001>,
+ I32EnumAttrCase<"allow_valu", 0x0002>,
+ I32EnumAttrCase<"allow_salu", 0x0004>,
+ I32EnumAttrCase<"allow_mfma_wmma", 0x0008>,
+ I32EnumAttrCase<"allow_all_vmem", 0x0010>,
+ I32EnumAttrCase<"allow_vmem_read", 0x0020>,
+ I32EnumAttrCase<"allow_vmem_write", 0x0040>,
+ I32EnumAttrCase<"allow_all_ds", 0x0080>,
+ I32EnumAttrCase<"allow_ds_read", 0x0100>,
+ I32EnumAttrCase<"allow_ds_write", 0x0200>,
+ I32EnumAttrCase<"allow_transcendental", 0x0400>
+ ]> {
+ let genSpecializedAttr = 0;
+ let cppNamespace = "::mlir::amdgpu";
+}
+
+def AMDGPU_SchedBarrierOpOptAttr : EnumAttr<AMDGPU_Dialect, AMDGPU_SchedBarrierOpOpt,
+ "sched_barrier_opt">;
+
+def AMDGPU_SchedBarrierOpOptArrayAttr : TypedArrayAttrBase<AMDGPU_SchedBarrierOpOptAttr,
+ "sched_barrier_opt array attribute">;
+
+def AMDGPU_SchedBarrierOp :
+ AMDGPU_Op<"sched_barrier">,
+ Arguments<(ins AMDGPU_SchedBarrierOpOptArrayAttr:$opts)>
+ {
+ let summary = "Barrier that limits the backend scheduler of instruction movement";
+ let description = [{
+ `amdgpu.sched_barrier` serves as a barrier that could be
+ configured to restrict movements of instructions through it as
+ defined by sched_barrier_opts.
+ }];
+ let assemblyFormat = [{
+ $opts attr-dict
+ }];
+}
+
def AMDGPU_MFMAPermB : I32EnumAttr<"MFMAPermB",
"The possible permutations of the lanes storing B available in an MFMA",
[
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 033e66c6118f3..2b165ac3e9653 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -321,6 +321,26 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
return success();
}
};
+
+struct SchedBarrierOpLowering : public ConvertOpToLLVMPattern<SchedBarrierOp> {
+ SchedBarrierOpLowering(LLVMTypeConverter &converter, Chipset chipset)
+ : ConvertOpToLLVMPattern<SchedBarrierOp>(converter), chipset(chipset) {}
+
+ Chipset chipset;
+
+ LogicalResult
+ matchAndRewrite(SchedBarrierOp op, SchedBarrierOp::Adaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ uint32_t combinedOpt = 0;
+ for (Attribute opt : op.getOpts()) {
+ combinedOpt |=
+ (uint32_t)cast<amdgpu::sched_barrier_opt_enumAttr>(opt).getValue();
+ }
+ rewriter.replaceOpWithNewOp<ROCDL::SchedBarrier>(op, combinedOpt);
+ return success();
+ }
+};
+
} // namespace
/// If `input` is a vector of bytes, concatentate those bytes in little-endian
@@ -879,8 +899,8 @@ void mlir::populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter,
ROCDL::RawPtrBufferAtomicUminOp>,
RawBufferOpLowering<RawBufferAtomicCmpswapOp,
ROCDL::RawPtrBufferAtomicCmpSwap>,
- LDSBarrierOpLowering, MFMAOpLowering, WMMAOpLowering,
- ExtPackedFp8OpLowering, PackedTrunc2xFp8OpLowering,
+ LDSBarrierOpLowering, SchedBarrierOpLowering, MFMAOpLowering,
+ WMMAOpLowering, ExtPackedFp8OpLowering, PackedTrunc2xFp8OpLowering,
PackedStochRoundFp8OpLowering>(converter, chipset);
}
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
index bb1cedaa276b3..c9ae42fb81a6f 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
@@ -226,3 +226,34 @@ func.func @lds_barrier() {
amdgpu.lds_barrier
func.return
}
+
+// CHECK-LABEL: func @sched_barrier
+func.func @sched_barrier() {
+ // rocdl.sched.barrier 0
+ amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_none>]
+ // rocdl.sched.barrier 1
+ amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_non_mem_non_sideffect>]
+ // rocdl.sched.barrier 2
+ amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_valu>]
+ // rocdl.sched.barrier 4
+ amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_salu>]
+ // rocdl.sched.barrier 8
+ amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_mfma_wmma>]
+ // rocdl.sched.barrier 16
+ amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_all_vmem>]
+ // rocdl.sched.barrier 32
+ amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_vmem_read>]
+ // rocdl.sched.barrier 64
+ amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_vmem_write>]
+ // rocdl.sched.barrier 128
+ amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_all_ds>]
+ // rocdl.sched.barrier 256
+ amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_ds_read>]
+ // rocdl.sched.barrier 512
+ amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_ds_write>]
+ // rocdl.sched.barrier 1024
+ amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_transcendental>]
+ // rocdl.sched.barrier 18
+ amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_valu>, #amdgpu<sched_barrier_opt allow_all_vmem>]
+ func.return
+}
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 744a096d757e0..114829c90e302 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -109,6 +109,15 @@ func.func @lds_barrier() {
func.return
}
+// CHECK-LABEL: func @sched_barrier
+func.func @sched_barrier() {
+ // CHECK: amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_none>]
+ amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_none>]
+ // CHECK: amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_valu>, #amdgpu<sched_barrier_opt allow_all_vmem>]
+ amdgpu.sched_barrier [#amdgpu<sched_barrier_opt allow_valu>, #amdgpu<sched_barrier_opt allow_all_vmem>]
+ func.return
+}
+
// CHECK-LABEL: func @mfma
func.func @mfma(%arg0 : f32, %arg1 : vector<32xf32>) -> vector<32xf32> {
// CHECK: amdgpu.mfma
|
94c3d18
to
20af1ec
Compare
cc: @krzysz00 |
20af1ec
to
88ab79f
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall seems fine, but you don,t need the <>
s in the syntax
@antiagainst when you have some time, Id appreciate another look here if there is more to be addressed |
This commit adds sched_barrier operator to AMDGPU dialect that lowers to rocdl.sched.barrier.
88ab79f
to
374407a
Compare
Thanks @antiagainst @krzysz00 for reviews. |
This commit adds sched_barrier operator
to AMDGPU dialect that lowers to rocdl.sched.barrier.