Skip to content

Commit f321456

Browse files
authored
[ScheduleDAG] Dirty height/depth in addPred/removePred even for latency zero (#102915)
A long time ago (back in 2009) there was a commit 52d4d82 that changed the scheduler to not dirty height/depth when adding or removing SUnit predecessors when the latency on the edge was zero. That commit message is claiming that the depth or height isn't affected when the latency is zero. As a matter of fact, the depth/height can change even with a zero latency on the edge. If for example adding a new SUnit A, with zero latency, but as a predecessor to a SUnit B, then both height of A and depth of B should be marked as dirty. If for example B has a greater height than A, then the height of A needs to be adjusted even if the latency is zero. I think this has been wrong for many years. Downstream we have had commit 52d4d82 reverted since back in 2016. There is no motivating lit test for 52d4d82 (only an incomplete C level reproducer in #3613). After commit 13d04fa there finally appeared an upstream lit test that shows that we get better code if marking height/depth as dirty (llvm/test/CodeGen/AArch64/abds.ll).
1 parent 21de049 commit f321456

File tree

2 files changed

+25
-29
lines changed

2 files changed

+25
-29
lines changed

llvm/lib/CodeGen/ScheduleDAG.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,9 @@ bool SUnit::addPred(const SDep &D, bool Required) {
125125
}
126126
}
127127
PredDep.setLatency(D.getLatency());
128+
// Changing latency, dirty the involved SUnits.
129+
this->setDepthDirty();
130+
D.getSUnit()->setHeightDirty();
128131
}
129132
return false;
130133
}
@@ -164,10 +167,8 @@ bool SUnit::addPred(const SDep &D, bool Required) {
164167
}
165168
Preds.push_back(D);
166169
N->Succs.push_back(P);
167-
if (P.getLatency() != 0) {
168-
this->setDepthDirty();
169-
N->setHeightDirty();
170-
}
170+
this->setDepthDirty();
171+
N->setHeightDirty();
171172
return true;
172173
}
173174

@@ -209,10 +210,8 @@ void SUnit::removePred(const SDep &D) {
209210
}
210211
N->Succs.erase(Succ);
211212
Preds.erase(I);
212-
if (P.getLatency() != 0) {
213-
this->setDepthDirty();
214-
N->setHeightDirty();
215-
}
213+
this->setDepthDirty();
214+
N->setHeightDirty();
216215
}
217216

218217
void SUnit::setDepthDirty() {

llvm/test/CodeGen/AArch64/abds.ll

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -180,14 +180,13 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
180180
define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
181181
; CHECK-LABEL: abd_ext_i128:
182182
; CHECK: // %bb.0:
183-
; CHECK-NEXT: cmp x2, x0
184-
; CHECK-NEXT: sbc x8, x3, x1
185-
; CHECK-NEXT: subs x9, x0, x2
186-
; CHECK-NEXT: sbc x10, x1, x3
187-
; CHECK-NEXT: subs x11, x2, x0
183+
; CHECK-NEXT: subs x8, x0, x2
184+
; CHECK-NEXT: sbc x9, x1, x3
185+
; CHECK-NEXT: subs x10, x2, x0
186+
; CHECK-NEXT: sbc x11, x3, x1
188187
; CHECK-NEXT: sbcs xzr, x3, x1
189-
; CHECK-NEXT: csel x0, x9, x11, lt
190-
; CHECK-NEXT: csel x1, x10, x8, lt
188+
; CHECK-NEXT: csel x0, x8, x10, lt
189+
; CHECK-NEXT: csel x1, x9, x11, lt
191190
; CHECK-NEXT: ret
192191
%aext = sext i128 %a to i256
193192
%bext = sext i128 %b to i256
@@ -200,14 +199,13 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
200199
define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
201200
; CHECK-LABEL: abd_ext_i128_undef:
202201
; CHECK: // %bb.0:
203-
; CHECK-NEXT: cmp x2, x0
204-
; CHECK-NEXT: sbc x8, x3, x1
205-
; CHECK-NEXT: subs x9, x0, x2
206-
; CHECK-NEXT: sbc x10, x1, x3
207-
; CHECK-NEXT: subs x11, x2, x0
202+
; CHECK-NEXT: subs x8, x0, x2
203+
; CHECK-NEXT: sbc x9, x1, x3
204+
; CHECK-NEXT: subs x10, x2, x0
205+
; CHECK-NEXT: sbc x11, x3, x1
208206
; CHECK-NEXT: sbcs xzr, x3, x1
209-
; CHECK-NEXT: csel x0, x9, x11, lt
210-
; CHECK-NEXT: csel x1, x10, x8, lt
207+
; CHECK-NEXT: csel x0, x8, x10, lt
208+
; CHECK-NEXT: csel x1, x9, x11, lt
211209
; CHECK-NEXT: ret
212210
%aext = sext i128 %a to i256
213211
%bext = sext i128 %b to i256
@@ -278,14 +276,13 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
278276
define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
279277
; CHECK-LABEL: abd_minmax_i128:
280278
; CHECK: // %bb.0:
281-
; CHECK-NEXT: cmp x2, x0
282-
; CHECK-NEXT: sbc x8, x3, x1
283-
; CHECK-NEXT: subs x9, x0, x2
284-
; CHECK-NEXT: sbc x10, x1, x3
285-
; CHECK-NEXT: subs x11, x2, x0
279+
; CHECK-NEXT: subs x8, x0, x2
280+
; CHECK-NEXT: sbc x9, x1, x3
281+
; CHECK-NEXT: subs x10, x2, x0
282+
; CHECK-NEXT: sbc x11, x3, x1
286283
; CHECK-NEXT: sbcs xzr, x3, x1
287-
; CHECK-NEXT: csel x0, x9, x11, lt
288-
; CHECK-NEXT: csel x1, x10, x8, lt
284+
; CHECK-NEXT: csel x0, x8, x10, lt
285+
; CHECK-NEXT: csel x1, x9, x11, lt
289286
; CHECK-NEXT: ret
290287
%min = call i128 @llvm.smin.i128(i128 %a, i128 %b)
291288
%max = call i128 @llvm.smax.i128(i128 %a, i128 %b)

0 commit comments

Comments
 (0)