Skip to content

Commit 9edef03

Browse files
authored
Merge pull request #11561 from swiftix/optimizer-improvements
A set of small optimizer improvements in preparation for the early SIL module serialization
2 parents 7e9dcae + 7bda199 commit 9edef03

File tree

8 files changed

+196
-19
lines changed

8 files changed

+196
-19
lines changed

Diff for: lib/SILOptimizer/LoopTransforms/LoopUnroll.cpp

+13
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "swift/SILOptimizer/Analysis/LoopAnalysis.h"
2020
#include "swift/SILOptimizer/PassManager/Passes.h"
2121
#include "swift/SILOptimizer/PassManager/Transforms.h"
22+
#include "swift/SILOptimizer/Utils/PerformanceInlinerUtils.h"
2223
#include "swift/SILOptimizer/Utils/SILInliner.h"
2324
#include "swift/SILOptimizer/Utils/SILSSAUpdater.h"
2425

@@ -185,12 +186,24 @@ static bool canAndShouldUnrollLoop(SILLoop *Loop, uint64_t TripCount) {
185186

186187
// We can unroll a loop if we can duplicate the instructions it holds.
187188
uint64_t Cost = 0;
189+
// Average number of instructions per basic block.
190+
// It is used to estimate the cost of the callee
191+
// inside a loop.
192+
const uint64_t InsnsPerBB = 4;
188193
for (auto *BB : Loop->getBlocks()) {
189194
for (auto &Inst : *BB) {
190195
if (!Loop->canDuplicate(&Inst))
191196
return false;
192197
if (instructionInlineCost(Inst) != InlineCost::Free)
193198
++Cost;
199+
if (auto AI = FullApplySite::isa(&Inst)) {
200+
auto Callee = AI.getCalleeFunction();
201+
if (Callee && getEligibleFunction(AI, InlineSelection::Everything)) {
202+
// If callee is rather big and potentialy inlineable, it may be better
203+
// not to unroll, so that the body of the calle can be inlined later.
204+
Cost += Callee->size() * InsnsPerBB;
205+
}
206+
}
194207
if (Cost * TripCount > SILLoopUnrollThreshold)
195208
return false;
196209
}

Diff for: lib/SILOptimizer/Mandatory/ConstantPropagation.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ constantFoldBinaryWithOverflow(BuiltinInst *BI, llvm::Intrinsic::ID ID,
8080
// If we can statically determine that the operation overflows,
8181
// warn about it if warnings are not disabled by ResultsInError being null.
8282
if (ResultsInError.hasValue() && Overflow && ReportOverflow) {
83+
if (BI->getFunction()->isSpecialization()) {
84+
// Do not report any constant propagation issues in specializations,
85+
// because they are eventually not present in the original function.
86+
return nullptr;
87+
}
8388
// Try to infer the type of the constant expression that the user operates
8489
// on. If the intrinsic was lowered from a call to a function that takes
8590
// two arguments of the same type, use the type of the LHS argument.

Diff for: lib/SILOptimizer/PassManager/PassPipeline.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,9 @@ void addSSAPasses(SILPassPipelinePlan &P, OptimizationLevelKind OpLevel) {
233233
// current function (after optimizing any new callees).
234234
P.addDevirtualizer();
235235
P.addGenericSpecializer();
236+
// Run devirtualizer after the specializer, because many
237+
// class_method/witness_method instructions may use concrete types now.
238+
P.addDevirtualizer();
236239

237240
switch (OpLevel) {
238241
case OptimizationLevelKind::HighLevel:
@@ -287,6 +290,11 @@ void addSSAPasses(SILPassPipelinePlan &P, OptimizationLevelKind OpLevel) {
287290
} else
288291
P.addEarlyCodeMotion();
289292

293+
P.addRetainSinking();
294+
// Retain sinking does not sink all retains in one round.
295+
// Let it run one more time time, because it can be beneficial.
296+
// FIXME: Improve the RetainSinking pass to sink more/all
297+
// retains in one go.
290298
P.addRetainSinking();
291299
P.addReleaseHoisting();
292300
P.addARCSequenceOpts();

Diff for: lib/SILOptimizer/Transforms/PerformanceInliner.cpp

+8-3
Original file line numberDiff line numberDiff line change
@@ -194,11 +194,16 @@ bool SILPerformanceInliner::isProfitableToInline(FullApplySite AI,
194194
BaseBenefit = BaseBenefit / 2;
195195
}
196196

197+
// It is always OK to inline a simple call.
198+
// TODO: May be consider also the size of the callee?
199+
if (isPureCall(AI, SEA))
200+
return true;
201+
197202
// Bail out if this generic call can be optimized by means of
198203
// the generic specialization, because we prefer generic specialization
199204
// to inlining of generics.
200205
if (IsGeneric && canSpecializeGeneric(AI, Callee, AI.getSubstitutions())) {
201-
return isPureCall(AI, SEA);
206+
return false;
202207
}
203208

204209
SILLoopInfo *LI = LA->get(Callee);
@@ -338,7 +343,7 @@ bool SILPerformanceInliner::isProfitableToInline(FullApplySite AI,
338343
// Only inline trivial functions into thunks (which will not increase the
339344
// code size).
340345
if (CalleeCost > TrivialFunctionThreshold) {
341-
return isPureCall(AI, SEA);
346+
return false;
342347
}
343348

344349
DEBUG(
@@ -359,7 +364,7 @@ bool SILPerformanceInliner::isProfitableToInline(FullApplySite AI,
359364

360365
// This is the final inlining decision.
361366
if (CalleeCost > Benefit) {
362-
return isPureCall(AI, SEA);
367+
return false;
363368
}
364369

365370
NumCallerBlocks += Callee->size();

Diff for: lib/SILOptimizer/Utils/PerformanceInlinerUtils.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,10 @@ static bool isConstantValue(SILValue V) {
756756
}
757757
return true;
758758
}
759+
if (auto *MT = dyn_cast<MetatypeInst>(V)) {
760+
if (!MT->getType().hasArchetype())
761+
return true;
762+
}
759763
return false;
760764
}
761765

Diff for: test/SILOptimizer/inline_heuristics.sil

+15-16
Original file line numberDiff line numberDiff line change
@@ -223,26 +223,25 @@ bb0(%0 : $Int32):
223223
// CHECK-LOG-LABEL: Inline into caller: testCondBr
224224
// CHECK-LOG-NEXT: decision {{.*}}, b=50,
225225

226-
sil @testCondBr : $@convention(thin) () -> () {
227-
bb0:
228-
229-
%0 = function_ref @condBrCallee : $@convention(thin) (Int32) -> Int32
226+
sil @testCondBr : $@convention(thin) (Int32) -> () {
227+
bb0(%a : $Int32):
228+
%0 = function_ref @condBrCallee : $@convention(thin) (Int32, Int32) -> Int32
230229
%1 = integer_literal $Builtin.Int32, 27
231230
%2 = struct $Int32 (%1 : $Builtin.Int32)
232-
%3 = apply %0(%2) : $@convention(thin) (Int32) -> Int32
231+
%3 = apply %0(%2, %a) : $@convention(thin) (Int32, Int32) -> Int32
233232
%4 = tuple ()
234233
return %4 : $()
235234
}
236235

237-
sil @condBrCallee : $@convention(thin) (Int32) -> Int32 {
238-
bb0(%0 : $Int32):
236+
sil @condBrCallee : $@convention(thin) (Int32, Int32) -> Int32 {
237+
bb0(%0 : $Int32, %r : $Int32):
239238
%1 = integer_literal $Builtin.Int32, 27
240239
%2 = struct_extract %0 : $Int32, #Int32._value
241240
%3 = builtin "cmp_eq_Word"(%2 : $Builtin.Int32, %1 : $Builtin.Int32) : $Builtin.Int1
242241
cond_br %3, bb1, bb2
243242

244243
bb1:
245-
br bb3(%0 : $Int32)
244+
br bb3(%r : $Int32)
246245

247246
bb2:
248247
// increase the scope length
@@ -263,24 +262,24 @@ bb3(%8 : $Int32):
263262
// CHECK: return %{{.*}} : $()
264263

265264
// CHECK-LOG-LABEL: Inline into caller: testSwitchValue
266-
// CHECK-LOG-NEXT: decision {{.*}}, b=50,
265+
// CHECK-LOG-NEXT: decision {{.*}}, b=40,
267266

268-
sil @testSwitchValue : $@convention(thin) () -> () {
269-
bb0:
267+
sil @testSwitchValue : $@convention(thin) (Int32) -> () {
268+
bb0(%a : $Int32):
270269

271-
%0 = function_ref @switchValueCallee : $@convention(thin) (Int32) -> Int32
270+
%0 = function_ref @switchValueCallee : $@convention(thin) (Int32, Int32) -> Int32
272271
%1 = integer_literal $Builtin.Int32, 28
273272
%2 = struct $Int32 (%1 : $Builtin.Int32)
274-
%3 = apply %0(%2) : $@convention(thin) (Int32) -> Int32
273+
%3 = apply %0(%2, %a) : $@convention(thin) (Int32, Int32) -> Int32
275274
%4 = tuple ()
276275
return %4 : $()
277276
}
278277

279-
sil @switchValueCallee : $@convention(thin) (Int32) -> Int32 {
280-
bb0(%0 : $Int32):
278+
sil @switchValueCallee : $@convention(thin) (Int32, Int32) -> Int32 {
279+
bb0(%0 : $Int32, %r : $Int32):
281280
%1 = integer_literal $Builtin.Int32, 27
282281
%2 = integer_literal $Builtin.Int32, 28
283-
%3 = struct_extract %0 : $Int32, #Int32._value
282+
%3 = struct_extract %r : $Int32, #Int32._value
284283
switch_value %3 : $Builtin.Int32, case %1 : bb1, case %2 : bb2, default bb3
285284

286285
bb1:

Diff for: test/SILOptimizer/loop_unroll.sil

+71
Original file line numberDiff line numberDiff line change
@@ -210,3 +210,74 @@ bb3:
210210
%8 = tuple()
211211
return %8 : $()
212212
}
213+
214+
sil @big_func: $@convention(thin) () -> Builtin.Int64 {
215+
bb0:
216+
%x0 = integer_literal $Builtin.Int64, 1
217+
%overflow_check = integer_literal $Builtin.Int1, 0
218+
%sum1 = builtin "sadd_with_overflow_Int64"(%x0 : $Builtin.Int64, %x0 : $Builtin.Int64, %overflow_check : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
219+
%x1 = tuple_extract %sum1 : $(Builtin.Int64, Builtin.Int1), 0
220+
br bb1
221+
222+
bb1:
223+
%sum2 = builtin "sadd_with_overflow_Int64"(%x1 : $Builtin.Int64, %x1 : $Builtin.Int64, %overflow_check : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
224+
%x2 = tuple_extract %sum2 : $(Builtin.Int64, Builtin.Int1), 0
225+
br bb2
226+
227+
bb2:
228+
%sum3 = builtin "sadd_with_overflow_Int64"(%x2 : $Builtin.Int64, %x2 : $Builtin.Int64, %overflow_check : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
229+
%x3 = tuple_extract %sum3 : $(Builtin.Int64, Builtin.Int1), 0
230+
br bb3
231+
232+
bb3:
233+
%sum4 = builtin "sadd_with_overflow_Int64"(%x3 : $Builtin.Int64, %x3 : $Builtin.Int64, %overflow_check : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
234+
%x4 = tuple_extract %sum4 : $(Builtin.Int64, Builtin.Int1), 0
235+
br bb4
236+
237+
bb4:
238+
%sum5 = builtin "sadd_with_overflow_Int64"(%x4 : $Builtin.Int64, %x4 : $Builtin.Int64, %overflow_check : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
239+
%x5 = tuple_extract %sum5 : $(Builtin.Int64, Builtin.Int1), 0
240+
br bb5
241+
242+
bb5:
243+
%sum6 = builtin "sadd_with_overflow_Int64"(%x5 : $Builtin.Int64, %x5 : $Builtin.Int64, %overflow_check : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
244+
%x6 = tuple_extract %sum6 : $(Builtin.Int64, Builtin.Int1), 0
245+
br bb6
246+
247+
bb6:
248+
return %x6 : $Builtin.Int64
249+
}
250+
251+
// Check that the compiler does not unroll loops containing calls
252+
// of big inlineable functions.
253+
//
254+
// CHECK-LABEL: sil @unroll_with_apply
255+
// CHECK: apply
256+
// CHECK: // end sil function 'unroll_with_apply'
257+
sil @unroll_with_apply : $@convention(thin) () -> () {
258+
bb0:
259+
%0 = integer_literal $Builtin.Int64, 0
260+
%1 = integer_literal $Builtin.Int64, 1
261+
%2 = integer_literal $Builtin.Int64, 20
262+
%3 = integer_literal $Builtin.Int1, 1
263+
%f = function_ref @big_func: $@convention(thin) () -> Builtin.Int64
264+
br bb1(%0 : $Builtin.Int64)
265+
266+
bb1(%4 : $Builtin.Int64):
267+
%r = apply %f() : $@convention(thin) () -> Builtin.Int64
268+
br bb2
269+
270+
bb2:
271+
%5 = builtin "sadd_with_overflow_Int64"(%4 : $Builtin.Int64, %1 : $Builtin.Int64, %3 : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
272+
%6 = tuple_extract %5 : $(Builtin.Int64, Builtin.Int1), 0
273+
%7 = builtin "cmp_eq_Int64"(%6 : $Builtin.Int64, %2 : $Builtin.Int64) : $Builtin.Int1
274+
cond_br %7, bb4, bb3
275+
276+
bb3:
277+
br bb1(%6 : $Builtin.Int64)
278+
279+
bb4:
280+
%8 = tuple()
281+
return %8 : $()
282+
}
283+

Diff for: test/SILOptimizer/retain_release_code_motion.sil

+72
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil -enable-sil-verify-all -retain-sinking -late-release-hoisting %s | %FileCheck %s
2+
// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil -enable-sil-verify-all -retain-sinking -retain-sinking -late-release-hoisting %s | %FileCheck --check-prefix=CHECK-MULTIPLE-RS-ROUNDS %s
23

34
import Builtin
45
import Swift
@@ -59,6 +60,8 @@ enum Optional<T> {
5960
case some(T)
6061
}
6162

63+
sil @createS : $@convention(thin) () -> @owned S
64+
6265
sil @use_C2 : $@convention(thin) (C2) -> ()
6366
sil @user : $@convention(thin) (Builtin.NativeObject) -> ()
6467
sil @user_int : $@convention(thin) (Int) -> ()
@@ -531,3 +534,72 @@ bb3:
531534
%1 = tuple()
532535
return %1 : $()
533536
}
537+
538+
/// Check that retain sinking needs multiple-passes to sink 2 retains.
539+
540+
/// One round of retain-sinking can sink only one of retains.
541+
/// CHECK-LABEL: sil @checkRetainSinkingMultipleRounds
542+
/// CHECK: bb9:
543+
/// CHECK-NEXT: retain_value %2 : $S
544+
/// CHECK-NEXT: release_value %2 : $S
545+
/// CHECK-NEXT: release_value %2 : $S
546+
/// In the ideal world, we should see a third retain_value here.
547+
/// But it would require another round of retain sinking.
548+
/// CHECK-NEXT: br bb5
549+
550+
551+
/// Two rounds of retain-sinking can sink only two retains.
552+
/// CHECK-MULTIPLE-RS-ROUNDS-LABEL: sil @checkRetainSinkingMultipleRounds
553+
/// CHECK-MULTIPLE-RS-ROUNDS: bb9:
554+
/// CHECK-MULTIPLE-RS-ROUNDS-NEXT: retain_value %2 : $S
555+
/// CHECK-MULTIPLE-RS-ROUNDS-NEXT: retain_value %2 : $S
556+
/// CHECK-MULTIPLE-RS-ROUNDS-NEXT: release_value %2 : $S
557+
/// CHECK-MULTIPLE-RS-ROUNDS-NEXT: release_value %2 : $S
558+
/// CHECK-MULTIPLE-RS-ROUNDS-NEXT: br bb5
559+
560+
sil @checkRetainSinkingMultipleRounds : $@convention(thin) (Int) -> () {
561+
bb0(%0 : $Int):
562+
%1 = function_ref @createS : $@convention(thin) () -> @owned S
563+
%2 = apply %1() : $@convention(thin) () -> @owned S
564+
br bb2
565+
566+
bb1:
567+
cond_br undef, bb10, bb11
568+
569+
bb2:
570+
cond_br undef, bb4, bb6
571+
572+
bb3:
573+
br bb2
574+
575+
bb4:
576+
retain_value %2 : $S
577+
br bb5
578+
579+
bb5:
580+
release_value %2 : $S
581+
cond_br undef, bb1, bb3
582+
583+
bb6:
584+
retain_value %2 : $S
585+
retain_value %2 : $S
586+
br bb7
587+
588+
bb7:
589+
cond_br undef, bb9, bb8
590+
591+
bb8:
592+
br bb7
593+
594+
bb9:
595+
release_value %2 : $S
596+
br bb5
597+
598+
bb10:
599+
unreachable
600+
601+
bb11:
602+
release_value %2 : $S
603+
%26 = tuple ()
604+
return %26 : $()
605+
}

0 commit comments

Comments
 (0)