Skip to content

Commit 00e7a02

Browse files
authored
[ScheduleDAG] Allow disabling the SchedModel / Itineraries during Scheduling (llvm#138057)
This provides the `disable-schedmodel-in-sched-mi` flag. Using this, we will disable the SchedModel / Itineraries during scheduling. This has the effect of not using any latency / hardware resource information for scheduling decisions. We have the `schedmodel` flag, but this disables the `SchedModel` for all passes. This allows disabling only for scheduling while preserving the behavior of other passes (e.g. MachineLICM). This is conceptually similar to other flags like `enable-aa-sched-mi`
1 parent 9281947 commit 00e7a02

File tree

5 files changed

+77
-9
lines changed

5 files changed

+77
-9
lines changed

llvm/include/llvm/CodeGen/TargetSchedule.h

+12-1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,16 @@ class TargetSchedModel {
4545

4646
unsigned computeInstrLatency(const MCSchedClassDesc &SCDesc) const;
4747

48+
// EnableSchedModel and EnableSchedItins are used to control whether or not to
49+
// use the Target's {SchedMachineModel, InstrItins} for hardware infor based
50+
// Scheduling decisions. If both are enabled, as is the default, preference
51+
// will be given to one based on the API implementation. By disabling one, we
52+
// can force preference of the other. By disabling both, we will throw away
53+
// any target specific hardware details for scheduling decisions, and fall
54+
// into things that provide generic info such as defaultDefLatency.
55+
bool EnableSchedModel = true;
56+
bool EnableSchedItins = true;
57+
4858
public:
4959
TargetSchedModel() : SchedModel(MCSchedModel::Default) {}
5060

@@ -53,7 +63,8 @@ class TargetSchedModel {
5363
/// The machine model API keeps a copy of the top-level MCSchedModel table
5464
/// indices and may query TargetSubtargetInfo and TargetInstrInfo to resolve
5565
/// dynamic properties.
56-
void init(const TargetSubtargetInfo *TSInfo);
66+
void init(const TargetSubtargetInfo *TSInfo, bool EnableSModel = true,
67+
bool EnableSItins = true);
5768

5869
/// Return the MCSchedClassDesc for this instruction.
5970
const MCSchedClassDesc *resolveSchedClass(const MachineInstr *MI) const;

llvm/lib/CodeGen/ScheduleDAGInstrs.cpp

+9-1
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,14 @@ static cl::opt<bool>
6969
static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
7070
cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
7171

72+
static cl::opt<bool>
73+
EnableSchedModel("schedmodel", cl::Hidden, cl::init(true),
74+
cl::desc("Use TargetSchedModel for latency lookup"));
75+
76+
static cl::opt<bool>
77+
EnableSchedItins("scheditins", cl::Hidden, cl::init(true),
78+
cl::desc("Use InstrItineraryData for latency lookup"));
79+
7280
// Note: the two options below might be used in tuning compile time vs
7381
// output quality. Setting HugeRegion so large that it will never be
7482
// reached means best-effort, but may be slow.
@@ -121,7 +129,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
121129
DbgValues.clear();
122130

123131
const TargetSubtargetInfo &ST = mf.getSubtarget();
124-
SchedModel.init(&ST);
132+
SchedModel.init(&ST, EnableSchedModel, EnableSchedItins);
125133
}
126134

127135
/// If this machine instr has memory reference information and it can be

llvm/lib/CodeGen/TargetSchedule.cpp

+5-7
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,6 @@
2929

3030
using namespace llvm;
3131

32-
static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true),
33-
cl::desc("Use TargetSchedModel for latency lookup"));
34-
35-
static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true),
36-
cl::desc("Use InstrItineraryData for latency lookup"));
37-
3832
static cl::opt<bool> ForceEnableIntervals(
3933
"sched-model-force-enable-intervals", cl::Hidden, cl::init(false),
4034
cl::desc("Force the use of resource intervals in the schedule model"));
@@ -47,12 +41,16 @@ bool TargetSchedModel::hasInstrItineraries() const {
4741
return EnableSchedItins && !InstrItins.isEmpty();
4842
}
4943

50-
void TargetSchedModel::init(const TargetSubtargetInfo *TSInfo) {
44+
void TargetSchedModel::init(const TargetSubtargetInfo *TSInfo,
45+
bool EnableSModel, bool EnableSItins) {
5146
STI = TSInfo;
5247
SchedModel = TSInfo->getSchedModel();
5348
TII = TSInfo->getInstrInfo();
5449
STI->initInstrItins(InstrItins);
5550

51+
EnableSchedModel = EnableSModel;
52+
EnableSchedItins = EnableSItins;
53+
5654
unsigned NumRes = SchedModel.getNumProcResourceKinds();
5755
ResourceFactors.resize(NumRes);
5856
ResourceLCM = SchedModel.IssueWidth;

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx942.mir

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX942 %s
22
# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX950 %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -run-pass post-RA-hazard-rec --schedmodel=0 %s -o - | FileCheck -check-prefixes=GCN,GFX950 %s
34

45
# GCN-LABEL: name: valu_write_vgpr_sgemm_mfma_read
56
# GCN: V_MOV_B32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -misched-cluster=false --misched-prera-direction=topdown -run-pass=machine-scheduler --schedmodel=1 -o - %s | FileCheck -check-prefix=GCN %s
3+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -misched-cluster=false --misched-prera-direction=topdown -run-pass=machine-scheduler --schedmodel=0 -o - %s | FileCheck -check-prefix=GCN-NO-SCHEDMODEL %s
4+
5+
---
6+
name: sched_group_barrier_1_VMEM_READ_1_VALU_5_MFMA_1_VMEM_READ_3_VALU_2_VMEM_WRITE
7+
tracksRegLiveness: true
8+
body: |
9+
bb.0:
10+
11+
; GCN-LABEL: name: sched_group_barrier_1_VMEM_READ_1_VALU_5_MFMA_1_VMEM_READ_3_VALU_2_VMEM_WRITE
12+
; GCN: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
13+
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
14+
; GCN-NEXT: early-clobber %2:vreg_512_align2 = contract V_MFMA_F32_32X32X16_FP8_FP8_vgprcd_e64 [[DEF]].sub0_sub1, [[DEF1]].sub0_sub1, 0, 0, 0, 0, implicit $mode, implicit $exec
15+
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
16+
; GCN-NEXT: dead [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF2]], 0, 0, implicit $exec
17+
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
18+
; GCN-NEXT: dead [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF3]], 0, 0, implicit $exec
19+
; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
20+
; GCN-NEXT: dead [[DS_READ_U16_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF4]], 0, 0, implicit $exec
21+
; GCN-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 %2.sub0, %2.sub1, implicit $exec
22+
; GCN-NEXT: early-clobber %3:vreg_512_align2 = contract V_MFMA_F32_32X32X16_FP8_FP8_vgprcd_e64 [[DEF]].sub0_sub1, [[DEF1]].sub0_sub1, 0, 0, 0, 0, implicit $mode, implicit $exec
23+
; GCN-NEXT: S_ENDPGM 0, implicit %2, implicit %3, implicit [[V_MUL_LO_U32_e64_]]
24+
;
25+
; GCN-NO-SCHEDMODEL-LABEL: name: sched_group_barrier_1_VMEM_READ_1_VALU_5_MFMA_1_VMEM_READ_3_VALU_2_VMEM_WRITE
26+
; GCN-NO-SCHEDMODEL: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
27+
; GCN-NO-SCHEDMODEL-NEXT: [[DEF1:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
28+
; GCN-NO-SCHEDMODEL-NEXT: early-clobber %2:vreg_512_align2 = contract V_MFMA_F32_32X32X16_FP8_FP8_vgprcd_e64 [[DEF]].sub0_sub1, [[DEF1]].sub0_sub1, 0, 0, 0, 0, implicit $mode, implicit $exec
29+
; GCN-NO-SCHEDMODEL-NEXT: early-clobber %3:vreg_512_align2 = contract V_MFMA_F32_32X32X16_FP8_FP8_vgprcd_e64 [[DEF]].sub0_sub1, [[DEF1]].sub0_sub1, 0, 0, 0, 0, implicit $mode, implicit $exec
30+
; GCN-NO-SCHEDMODEL-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 %2.sub0, %2.sub1, implicit $exec
31+
; GCN-NO-SCHEDMODEL-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
32+
; GCN-NO-SCHEDMODEL-NEXT: dead [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF2]], 0, 0, implicit $exec
33+
; GCN-NO-SCHEDMODEL-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
34+
; GCN-NO-SCHEDMODEL-NEXT: dead [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF3]], 0, 0, implicit $exec
35+
; GCN-NO-SCHEDMODEL-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
36+
; GCN-NO-SCHEDMODEL-NEXT: dead [[DS_READ_U16_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF4]], 0, 0, implicit $exec
37+
; GCN-NO-SCHEDMODEL-NEXT: S_ENDPGM 0, implicit %2, implicit %3, implicit [[V_MUL_LO_U32_e64_]]
38+
%0:vreg_128_align2 = IMPLICIT_DEF
39+
%1:vreg_128_align2 = IMPLICIT_DEF
40+
%2:vreg_512_align2 = contract V_MFMA_F32_32X32X16_FP8_FP8_vgprcd_e64 %0.sub0_sub1:vreg_128_align2, %1.sub0_sub1:vreg_128_align2, 0, 0, 0, 0, implicit $mode, implicit $exec
41+
%3:vreg_512_align2 = contract V_MFMA_F32_32X32X16_FP8_FP8_vgprcd_e64 %0.sub0_sub1:vreg_128_align2, %1.sub0_sub1:vreg_128_align2, 0, 0, 0, 0, implicit $mode, implicit $exec
42+
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %2.sub0, %2.sub1, implicit $exec
43+
%5:vgpr_32 = IMPLICIT_DEF
44+
%6:vgpr_32 = DS_READ_U16_gfx9 %5, 0, 0, implicit $exec
45+
%7:vgpr_32 = IMPLICIT_DEF
46+
%8:vgpr_32 = DS_READ_U16_gfx9 %7, 0, 0, implicit $exec
47+
%9:vgpr_32 = IMPLICIT_DEF
48+
%10:vgpr_32 = DS_READ_U16_gfx9 %9, 0, 0, implicit $exec
49+
S_ENDPGM 0, implicit %2, implicit %3, implicit %4
50+
...

0 commit comments

Comments
 (0)