Skip to content

Commit 2e5b820

Browse files
committed
[sil-loop-unroll] Do not unroll loops if their bodies contain function calls of big functions
It is more profitable in most cases to inline the big callee rather than unroll the loop, because unrolling it would create a lot of calls which cannot be further optimized due to increased size of the function containing a loop.
1 parent db5c967 commit 2e5b820

File tree

2 files changed

+84
-0
lines changed

2 files changed

+84
-0
lines changed

lib/SILOptimizer/LoopTransforms/LoopUnroll.cpp

+13
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "swift/SILOptimizer/Analysis/LoopAnalysis.h"
2020
#include "swift/SILOptimizer/PassManager/Passes.h"
2121
#include "swift/SILOptimizer/PassManager/Transforms.h"
22+
#include "swift/SILOptimizer/Utils/PerformanceInlinerUtils.h"
2223
#include "swift/SILOptimizer/Utils/SILInliner.h"
2324
#include "swift/SILOptimizer/Utils/SILSSAUpdater.h"
2425

@@ -185,12 +186,24 @@ static bool canAndShouldUnrollLoop(SILLoop *Loop, uint64_t TripCount) {
185186

186187
// We can unroll a loop if we can duplicate the instructions it holds.
187188
uint64_t Cost = 0;
189+
// Average number of instructions per basic block.
190+
// It is used to estimate the cost of the callee
191+
// inside a loop.
192+
const uint64_t InsnsPerBB = 4;
188193
for (auto *BB : Loop->getBlocks()) {
189194
for (auto &Inst : *BB) {
190195
if (!Loop->canDuplicate(&Inst))
191196
return false;
192197
if (instructionInlineCost(Inst) != InlineCost::Free)
193198
++Cost;
199+
if (auto AI = FullApplySite::isa(&Inst)) {
200+
auto Callee = AI.getCalleeFunction();
201+
if (Callee && getEligibleFunction(AI, InlineSelection::Everything)) {
202+
// If callee is rather big and potentialy inlineable, it may be better
203+
// not to unroll, so that the body of the calle can be inlined later.
204+
Cost += Callee->size() * InsnsPerBB;
205+
}
206+
}
194207
if (Cost * TripCount > SILLoopUnrollThreshold)
195208
return false;
196209
}

test/SILOptimizer/loop_unroll.sil

+71
Original file line numberDiff line numberDiff line change
@@ -210,3 +210,74 @@ bb3:
210210
%8 = tuple()
211211
return %8 : $()
212212
}
213+
214+
sil @big_func: $@convention(thin) () -> Builtin.Int64 {
215+
bb0:
216+
%x0 = integer_literal $Builtin.Int64, 1
217+
%overflow_check = integer_literal $Builtin.Int1, 0
218+
%sum1 = builtin "sadd_with_overflow_Int64"(%x0 : $Builtin.Int64, %x0 : $Builtin.Int64, %overflow_check : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
219+
%x1 = tuple_extract %sum1 : $(Builtin.Int64, Builtin.Int1), 0
220+
br bb1
221+
222+
bb1:
223+
%sum2 = builtin "sadd_with_overflow_Int64"(%x1 : $Builtin.Int64, %x1 : $Builtin.Int64, %overflow_check : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
224+
%x2 = tuple_extract %sum2 : $(Builtin.Int64, Builtin.Int1), 0
225+
br bb2
226+
227+
bb2:
228+
%sum3 = builtin "sadd_with_overflow_Int64"(%x2 : $Builtin.Int64, %x2 : $Builtin.Int64, %overflow_check : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
229+
%x3 = tuple_extract %sum3 : $(Builtin.Int64, Builtin.Int1), 0
230+
br bb3
231+
232+
bb3:
233+
%sum4 = builtin "sadd_with_overflow_Int64"(%x3 : $Builtin.Int64, %x3 : $Builtin.Int64, %overflow_check : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
234+
%x4 = tuple_extract %sum4 : $(Builtin.Int64, Builtin.Int1), 0
235+
br bb4
236+
237+
bb4:
238+
%sum5 = builtin "sadd_with_overflow_Int64"(%x4 : $Builtin.Int64, %x4 : $Builtin.Int64, %overflow_check : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
239+
%x5 = tuple_extract %sum5 : $(Builtin.Int64, Builtin.Int1), 0
240+
br bb5
241+
242+
bb5:
243+
%sum6 = builtin "sadd_with_overflow_Int64"(%x5 : $Builtin.Int64, %x5 : $Builtin.Int64, %overflow_check : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
244+
%x6 = tuple_extract %sum6 : $(Builtin.Int64, Builtin.Int1), 0
245+
br bb6
246+
247+
bb6:
248+
return %x6 : $Builtin.Int64
249+
}
250+
251+
// Check that the compiler does not unroll loops containing calls
252+
// of big inlineable functions.
253+
//
254+
// CHECK-LABEL: sil @unroll_with_apply
255+
// CHECK: apply
256+
// CHECK: // end sil function 'unroll_with_apply'
257+
sil @unroll_with_apply : $@convention(thin) () -> () {
258+
bb0:
259+
%0 = integer_literal $Builtin.Int64, 0
260+
%1 = integer_literal $Builtin.Int64, 1
261+
%2 = integer_literal $Builtin.Int64, 20
262+
%3 = integer_literal $Builtin.Int1, 1
263+
%f = function_ref @big_func: $@convention(thin) () -> Builtin.Int64
264+
br bb1(%0 : $Builtin.Int64)
265+
266+
bb1(%4 : $Builtin.Int64):
267+
%r = apply %f() : $@convention(thin) () -> Builtin.Int64
268+
br bb2
269+
270+
bb2:
271+
%5 = builtin "sadd_with_overflow_Int64"(%4 : $Builtin.Int64, %1 : $Builtin.Int64, %3 : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
272+
%6 = tuple_extract %5 : $(Builtin.Int64, Builtin.Int1), 0
273+
%7 = builtin "cmp_eq_Int64"(%6 : $Builtin.Int64, %2 : $Builtin.Int64) : $Builtin.Int1
274+
cond_br %7, bb4, bb3
275+
276+
bb3:
277+
br bb1(%6 : $Builtin.Int64)
278+
279+
bb4:
280+
%8 = tuple()
281+
return %8 : $()
282+
}
283+

0 commit comments

Comments
 (0)