From 428ab472ac36d3bb087455aa439a3180e103def7 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Mon, 8 Feb 2021 17:53:47 -0800
Subject: [PATCH 01/11] [sil-combine] Turn off try_apply convert_function
 elimination on both ossa/non-ossa SIL.

In SILCombine, we do not want to add or delete edges. We are ok with swapping
edges or replacing edges when the CFG structure is preserved. This becomes an
issue since by performing this optimization, we are going to get rid of the
error parameter but leave a try_apply, breaking SIL invariants. So to do perform
this optimization, we would need to convert to an apply and eliminate the error
edge, breaking the aforementioned SILCombine invariant. So, just do not perform
this for now and leave it to other passes like SimplifyCFG.
---
 .../SILCombiner/SILCombinerApplyVisitors.cpp  |  4 ---
 test/SILOptimizer/sil_combine.sil             | 25 +++++++++++++++++++
 test/SILOptimizer/sil_combine_ossa.sil        | 25 +++++++++++++++++++
 3 files changed, 50 insertions(+), 4 deletions(-)
diff --git a/lib/SILOptimizer/SILCombiner/SILCombinerApplyVisitors.cpp b/lib/SILOptimizer/SILCombiner/SILCombinerApplyVisitors.cpp
index 0413a90a713b1..0eeaa75bdb829 100644
--- a/lib/SILOptimizer/SILCombiner/SILCombinerApplyVisitors.cpp
+++ b/lib/SILOptimizer/SILCombiner/SILCombinerApplyVisitors.cpp
@@ -1709,10 +1709,6 @@ SILInstruction *SILCombiner::visitTryApplyInst(TryApplyInst *AI) {
   if (isa<PartialApplyInst>(AI->getCallee()))
     return nullptr;
 
-  if (auto *CFI = dyn_cast<ConvertFunctionInst>(AI->getCallee())) {
-    return optimizeApplyOfConvertFunctionInst(AI, CFI);
-  }
-
   // Optimize readonly functions with no meaningful users.
   SILFunction *Fn = AI->getReferencedFunctionOrNull();
   if (Fn && Fn->getEffectsKind() < EffectsKind::ReleaseNone) {
diff --git a/test/SILOptimizer/sil_combine.sil b/test/SILOptimizer/sil_combine.sil
index 55e04f7fc000e..e5578f387f7c9 100644
--- a/test/SILOptimizer/sil_combine.sil
+++ b/test/SILOptimizer/sil_combine.sil
@@ -4162,3 +4162,28 @@ bb0:
   %1 = load %0 : $*Int64
   return %1 : $Int64
 }
+
+sil @convert_raw_pointer_to_nativeobject : $@convention(thin) (Builtin.RawPointer) -> @owned Builtin.NativeObject
+
+// To eliminate the convert_function below we need to convert the try_apply to
+// an apply. We do not delete edges like this in SILCombine with ownership and
+// are trying to reduce it without ownership. So make sure we don't optimize
+// this. This should be done in a different pass that handles CFG issues.
+//
+// CHECK-LABEL: sil @do_not_eliminate_error_adding_convert_function_used_by_try_apply_nonownership : $@convention(thin) (Builtin.RawPointer) -> @owned Builtin.NativeObject {
+// CHECK: convert_function
+// CHECK: try_apply
+// CHECK: } // end sil function 'do_not_eliminate_error_adding_convert_function_used_by_try_apply_nonownership'
+sil @do_not_eliminate_error_adding_convert_function_used_by_try_apply_nonownership : $@convention(thin) (Builtin.RawPointer) -> @owned Builtin.NativeObject {
+bb0(%0 : $Builtin.RawPointer):
+  %f = function_ref @convert_raw_pointer_to_nativeobject : $@convention(thin) (Builtin.RawPointer) -> @owned Builtin.NativeObject
+  %t = thin_to_thick_function %f : $@convention(thin) (Builtin.RawPointer) -> @owned Builtin.NativeObject to $@noescape @callee_guaranteed (Builtin.RawPointer) -> @owned Builtin.NativeObject
+  %c = convert_function %t : $@noescape @callee_guaranteed (Builtin.RawPointer) -> @owned Builtin.NativeObject to $@noescape @callee_guaranteed (Builtin.RawPointer) -> (@owned Builtin.NativeObject, @error Error)
+  try_apply %c(%0) : $@noescape @callee_guaranteed (Builtin.RawPointer) -> (@owned Builtin.NativeObject, @error Error), normal bb1, error bb2
+
+bb1(%result : $Builtin.NativeObject):
+  return %result : $Builtin.NativeObject
+
+bb2(%error : $Error):
+  unreachable
+}
\ No newline at end of file
diff --git a/test/SILOptimizer/sil_combine_ossa.sil b/test/SILOptimizer/sil_combine_ossa.sil
index f61966018fcd1..dfa58e88d42c6 100644
--- a/test/SILOptimizer/sil_combine_ossa.sil
+++ b/test/SILOptimizer/sil_combine_ossa.sil
@@ -4901,3 +4901,28 @@ bb3(%4 : @owned $KlassNativeObjEither):
   return %4 : $KlassNativeObjEither
 }
 
+
+sil @convert_raw_pointer_to_nativeobject : $@convention(thin) (Builtin.RawPointer) -> @owned Builtin.NativeObject
+
+// To eliminate the convert_function below we need to convert the try_apply to
+// an apply. We do not delete edges like this in SILCombine with ownership and
+// are trying to reduce it without ownership. So make sure we don't optimize
+// this. This should be done in a different pass that handles CFG issues.
+//
+// CHECK-LABEL: sil [ossa] @do_not_eliminate_error_adding_convert_function_used_by_try_apply : $@convention(thin) (Builtin.RawPointer) -> @owned Builtin.NativeObject {
+// CHECK: convert_function
+// CHECK: try_apply
+// CHECK: } // end sil function 'do_not_eliminate_error_adding_convert_function_used_by_try_apply'
+sil [ossa] @do_not_eliminate_error_adding_convert_function_used_by_try_apply : $@convention(thin) (Builtin.RawPointer) -> @owned Builtin.NativeObject {
+bb0(%0 : $Builtin.RawPointer):
+  %f = function_ref @convert_raw_pointer_to_nativeobject : $@convention(thin) (Builtin.RawPointer) -> @owned Builtin.NativeObject
+  %t = thin_to_thick_function %f : $@convention(thin) (Builtin.RawPointer) -> @owned Builtin.NativeObject to $@noescape @callee_guaranteed (Builtin.RawPointer) -> @owned Builtin.NativeObject
+  %c = convert_function %t : $@noescape @callee_guaranteed (Builtin.RawPointer) -> @owned Builtin.NativeObject to $@noescape @callee_guaranteed (Builtin.RawPointer) -> (@owned Builtin.NativeObject, @error Error)
+  try_apply %c(%0) : $@noescape @callee_guaranteed (Builtin.RawPointer) -> (@owned Builtin.NativeObject, @error Error), normal bb1, error bb2
+
+bb1(%result : @owned $Builtin.NativeObject):
+  return %result : $Builtin.NativeObject
+
+bb2(%error : @owned $Error):
+  unreachable
+}
\ No newline at end of file

From 705373c20525f04b7be8f8f3600286532d546e75 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Mon, 8 Feb 2021 18:36:58 -0800
Subject: [PATCH 02/11] [pred-deadalloc-elim] Fix memory safety issue and
 handle promoting paths where there is dynamically no value by inserting
 compensating destroys.

This commit is fixing two things:

1. In certain cases, we are seeing cases where either SILGen or the optimizer
are eliminating destroy_addr along paths where we know that an enum is
dynamically trivial. This can not be expressed in OSSA, so I added code to
pred-deadalloc-elim so that I check if any of our available values after we
finish promoting away an allocation now need to have their consuming use set
completed.

2. That led me to discover that in certain cases load [take] that we were
promoting were available values of other load [take]. This means that we have a
memory safety issue if we promote one load before the other. Consider the
following SIL:

```
%mem = alloc_stack
store %arg to [init] %mem
%0 = load [take] %mem
store %0 to [init] %mem
%1 = load [take] %mem
destroy_value %1
dealloc_stack %mem
```

In this case, if we eliminate %0 before we eliminate %1, we will have a stale
pointer to %0.

I also took this as an opportunity to turn off predictable mem access opt on SIL
that was deserialized canonicalized and non-OSSA SIL. We evidently need to still
do this for pred mem opts for perf reasons (not sure why). But I am pretty sure
this isn't needed and allows me to avoid some nasty code.
---
 .../Mandatory/PredictableMemOpt.cpp           | 278 +++++++++++++++++-
 .../predictable_deadalloc_elim.sil            | 267 -----------------
 .../predictable_deadalloc_elim_ownership.sil  | 182 ++++++++++++
 test/SILOptimizer/predictable_memopt.sil      |   4 +-
 4 files changed, 451 insertions(+), 280 deletions(-)
 delete mode 100644 test/SILOptimizer/predictable_deadalloc_elim.sil

diff --git a/lib/SILOptimizer/Mandatory/PredictableMemOpt.cpp b/lib/SILOptimizer/Mandatory/PredictableMemOpt.cpp
index 0564a9d38bbf6..20d72e2ff98f9 100644
--- a/lib/SILOptimizer/Mandatory/PredictableMemOpt.cpp
+++ b/lib/SILOptimizer/Mandatory/PredictableMemOpt.cpp
@@ -13,18 +13,20 @@
 #define DEBUG_TYPE "predictable-memopt"
 
 #include "PMOMemoryUseCollector.h"
+#include "swift/Basic/BlotMapVector.h"
 #include "swift/Basic/BlotSetVector.h"
 #include "swift/Basic/FrozenMultiMap.h"
 #include "swift/Basic/STLExtras.h"
+#include "swift/SIL/BasicBlockBits.h"
 #include "swift/SIL/BasicBlockUtils.h"
 #include "swift/SIL/LinearLifetimeChecker.h"
 #include "swift/SIL/OwnershipUtils.h"
 #include "swift/SIL/SILBuilder.h"
-#include "swift/SIL/BasicBlockBits.h"
 #include "swift/SILOptimizer/PassManager/Passes.h"
 #include "swift/SILOptimizer/PassManager/Transforms.h"
 #include "swift/SILOptimizer/Utils/CFGOptUtils.h"
 #include "swift/SILOptimizer/Utils/InstOptUtils.h"
+#include "swift/SILOptimizer/Utils/OwnershipOptUtils.h"
 #include "swift/SILOptimizer/Utils/SILSSAUpdater.h"
 #include "swift/SILOptimizer/Utils/ValueLifetime.h"
 #include "llvm/ADT/SmallBitVector.h"
@@ -1959,7 +1961,17 @@ class AllocOptimize {
   bool promoteLoadCopy(LoadInst *li);
   bool promoteLoadBorrow(LoadBorrowInst *lbi);
   bool promoteCopyAddr(CopyAddrInst *cai);
-  void promoteLoadTake(LoadInst *Inst, MutableArrayRef<AvailableValue> values);
+
+  /// Promote a load take cleaning up everything except for RAUWing the
+  /// instruction with the aggregated result. The routine returns the new
+  /// aggreaged result to the caller and expects the caller to eventually RAUW
+  /// \p inst with the return value. The reason why we do this is to allow for
+  /// the caller to work around invalidation issues by not deleting the load
+  /// [take] until after all load [take] have been cleaned up.
+  ///
+  /// \returns the value that the caller will RAUW with \p inst.
+  SILValue promoteLoadTake(LoadInst *inst,
+                           MutableArrayRef<AvailableValue> values);
   void promoteDestroyAddr(DestroyAddrInst *dai,
                           MutableArrayRef<AvailableValue> values);
   bool canPromoteTake(SILInstruction *i,
@@ -2293,7 +2305,7 @@ void AllocOptimize::promoteDestroyAddr(
   dai->eraseFromParent();
 }
 
-void AllocOptimize::promoteLoadTake(
+SILValue AllocOptimize::promoteLoadTake(
     LoadInst *li, MutableArrayRef<AvailableValue> availableValues) {
   assert(li->getOwnershipQualifier() == LoadOwnershipQualifier::Take &&
          "load [copy], load [trivial], load should be handled by "
@@ -2311,15 +2323,15 @@ void AllocOptimize::promoteLoadTake(
   AvailableValueAggregator agg(li, availableValues, Uses, deadEndBlocks,
                                AvailableValueExpectedOwnership::Take);
   SILValue newVal = agg.aggregateValues(loadTy, address, firstElt);
+  assert(newVal);
 
   ++NumLoadTakePromoted;
 
   LLVM_DEBUG(llvm::dbgs() << "  *** Promoting load_take: " << *li);
   LLVM_DEBUG(llvm::dbgs() << "      To value: " << *newVal);
 
-  // Then perform the RAUW.
-  li->replaceAllUsesWith(newVal);
-  li->eraseFromParent();
+  // Our parent RAUWs with newVal/erases li.
+  return newVal;
 }
 
 namespace {
@@ -2335,6 +2347,33 @@ struct TakePromotionState {
 
   unsigned size() const { return takeInstIndices.size(); }
 
+  void verify() {
+#ifndef NDEBUG
+    for (unsigned i : range(size())) {
+      SILInstruction *inst;
+      MutableArrayRef<AvailableValue> data;
+      std::tie(inst, data) = getData(i);
+      assert(inst);
+      inst->verifyOperandOwnership();
+      assert(!data.empty() && "Value without any available values?!");
+    }
+#endif
+  }
+
+  void verify(unsigned startOffset) {
+#ifndef NDEBUG
+    assert(startOffset < size());
+    for (unsigned i : range(startOffset, size())) {
+      SILInstruction *inst;
+      MutableArrayRef<AvailableValue> data;
+      std::tie(inst, data) = getData(i);
+      assert(inst);
+      inst->verifyOperandOwnership();
+      assert(!data.empty() && "Value without any available values?!");
+    }
+#endif
+  }
+
   void initializeForTakeInst(unsigned takeInstIndex) {
     availableValueStartOffsets.push_back(availableValueList.size());
     takeInstIndices.push_back(takeInstIndex);
@@ -2352,9 +2391,8 @@ struct TakePromotionState {
       count = availableValueList.size() - startOffset;
     }
 
-    MutableArrayRef<AvailableValue> values(&availableValueList[startOffset],
-                                           count);
-    return {takeInsts[takeInstIndex], values};
+    auto values = MutableArrayRef<AvailableValue>(availableValueList);
+    return {takeInsts[takeInstIndex], values.slice(startOffset, count)};
   }
 };
 
@@ -2424,6 +2462,8 @@ static bool isRemovableAutogeneratedAllocation(AllocationInst *TheMemory) {
 }
 
 bool AllocOptimize::tryToRemoveDeadAllocation() {
+  assert(TheMemory->getFunction()->hasOwnership() &&
+         "Can only eliminate dead allocations with ownership enabled");
   assert((isa<AllocBoxInst>(TheMemory) || isa<AllocStackInst>(TheMemory)) &&
          "Unhandled allocation case");
 
@@ -2492,8 +2532,32 @@ bool AllocOptimize::tryToRemoveDeadAllocation() {
   }
 
   // If we reached this point, we can promote all of our destroy_addr and load
-  // take. Since our load [take] may be available values for our destroy_addr,
-  // we promote the destroy_addr first.
+  // take. Before we begin, gather up all found available values before we do
+  // anything so we can fix up lifetimes later if we need to.
+  SmallBlotSetVector<SILValue, 32> valuesNeedingLifetimeCompletion;
+  for (auto pmoMemUse : Uses) {
+    if (pmoMemUse.Inst && pmoMemUse.Kind == PMOUseKind::Initialization) {
+      // Today if we promote, this is always a store, since we would have
+      // blown up the copy_addr otherwise. Given that, always make sure we
+      // clean up the src as appropriate after we optimize.
+      auto *si = cast<StoreInst>(pmoMemUse.Inst);
+      auto src = si->getSrc();
+
+      // Bail if src has any uses that are forwarding unowned uses. This
+      // allows us to know that we never have to deal with forwarding unowned
+      // instructions like br. These are corner cases that complicate the
+      // logic below.
+      for (auto *use : src->getUses()) {
+        if (use->getOperandOwnership() == OperandOwnership::ForwardingUnowned)
+          return false;
+      }
+      valuesNeedingLifetimeCompletion.insert(src);
+    }
+  }
+
+  // Since our load [take] may be available values for our
+  // destroy_addr/load [take], we promote the destroy_addr first and then handle
+  // load [take] with extra rigour later to handle that possibility.
   for (unsigned i : range(destroyAddrState.size())) {
     SILInstruction *dai;
     MutableArrayRef<AvailableValue> values;
@@ -2501,11 +2565,59 @@ bool AllocOptimize::tryToRemoveDeadAllocation() {
     promoteDestroyAddr(cast<DestroyAddrInst>(dai), values);
     // We do not need to unset releases, since we are going to exit here.
   }
+
+  llvm::SmallMapVector<LoadInst *, SILValue, 32> loadsToDelete;
   for (unsigned i : range(loadTakeState.size())) {
     SILInstruction *li;
     MutableArrayRef<AvailableValue> values;
     std::tie(li, values) = loadTakeState.getData(i);
-    promoteLoadTake(cast<LoadInst>(li), values);
+
+    for (unsigned i : indices(values)) {
+      auto v = values[i].Value;
+      auto *li = dyn_cast<LoadInst>(v);
+      if (!li)
+        continue;
+
+      auto iter = loadsToDelete.find(li);
+      if (iter == loadsToDelete.end())
+        continue;
+
+      SILValue newValue = iter->second;
+      assert(newValue && "We should neer store a nil SILValue into this map");
+      values[i].Value = newValue;
+    }
+
+    auto *liCast = cast<LoadInst>(li);
+    SILValue result = promoteLoadTake(liCast, values);
+    assert(result);
+
+    // We need to erase liCast here before we erase it since a load [take] that
+    // we are promoting could be an available value for another load
+    // [take]. Consider the following SIL:
+    //
+    // %mem = alloc_stack
+    // store %arg to [init] %mem
+    // %0 = load [take] %mem
+    // store %0 to [init] %mem
+    // %1 = load [take] %mem
+    // destroy_value %1
+    // dealloc_stack %mem
+    //
+    // In such a case, we are going to delete %0 here, but %0 is an available
+    // value for %1, so we will
+    auto insertIter = loadsToDelete.insert({liCast, result});
+    valuesNeedingLifetimeCompletion.erase(liCast);
+    (void)insertIter;
+    assert(insertIter.second && "loadTakeState doesn't have unique loads?!");
+  }
+
+  // Now that we have promoted all of our load [take], perform the actual
+  // RAUW/removal.
+  for (auto p : loadsToDelete) {
+    LoadInst *li = p.first;
+    SILValue newValue = p.second;
+    li->replaceAllUsesWith(newValue);
+    li->eraseFromParent();
   }
 
   LLVM_DEBUG(llvm::dbgs() << "*** Removing autogenerated non-trivial alloc: "
@@ -2516,6 +2628,144 @@ bool AllocOptimize::tryToRemoveDeadAllocation() {
   // caller remove the allocation itself to avoid iterator invalidation.
   eraseUsesOfInstruction(TheMemory);
 
+  // Now look at all of our available values and complete any of their
+  // post-dominating consuming use sets. This can happen if we have an enum that
+  // is known dynamically none along a path. This is dynamically correct, but
+  // can not be represented in OSSA so we insert these destroys along said path.
+  SmallVector<SILBasicBlock *, 32> consumingUseBlocks;
+  while (!valuesNeedingLifetimeCompletion.empty()) {
+    auto optV = valuesNeedingLifetimeCompletion.pop_back_val();
+    if (!optV)
+      continue;
+    SILValue v = *optV;
+    if (v.getOwnershipKind() != OwnershipKind::Owned)
+      continue;
+
+    // First see if our value doesn't have any uses. In such a case, just
+    // insert a destroy_value at the next instruction and return.
+    if (v->use_empty()) {
+      auto *next = v->getNextInstruction();
+      auto loc = RegularLocation::getAutoGeneratedLocation();
+      SILBuilderWithScope localBuilder(next);
+      localBuilder.createDestroyValue(loc, v);
+      continue;
+    }
+
+    // Otherwise, we first see if we have any consuming uses at all. If we do,
+    // then we know that any such consuming uses since we have an owned value
+    // /must/ be strongly control equivalent to our value and unreachable from
+    // each other, so we can just use findJointPostDominatingSet to complete
+    // the set.
+    consumingUseBlocks.clear();
+    for (auto *use : v->getConsumingUses())
+      consumingUseBlocks.push_back(use->getParentBlock());
+
+    if (!consumingUseBlocks.empty()) {
+      findJointPostDominatingSet(
+          v->getParentBlock(), consumingUseBlocks, [](SILBasicBlock *) {},
+          [&](SILBasicBlock *result) {
+            auto loc = RegularLocation::getAutoGeneratedLocation();
+            SILBuilderWithScope builder(result);
+            builder.createDestroyValue(loc, v);
+          });
+      continue;
+    }
+
+    // If we do not have at least one consuming use, we need to do something
+    // different. This situation can occur given a non-trivial enum typed
+    // stack allocation that:
+    //
+    // 1. Had a destroy_addr eliminated along a path where we dynamically know
+    //    that the stack allocation is storing a trivial case.
+    //
+    // 2. Had some other paths where due to dead end blocks, no destroy_addr
+    //    is needed.
+    //
+    // To fix this, we just treat all uses as consuming blocks and insert
+    // destroys using the joint post dominance set computer and insert
+    // destroys at the end of all input blocks in the post dom set and at the
+    // beginning of any leaking blocks.
+    {
+      // TODO: Can we just pass this in to findJointPostDominatingSet instead
+      // of recomputing it there? Maybe an overload that lets us do this?
+      BasicBlockSet foundUseBlocks(v->getFunction());
+      for (auto *use : v->getUses()) {
+        auto *block = use->getParentBlock();
+        if (!foundUseBlocks.insert(block))
+          continue;
+        consumingUseBlocks.push_back(block);
+      }
+    }
+    findJointPostDominatingSet(
+        v->getParentBlock(), consumingUseBlocks,
+        [&](SILBasicBlock *foundInputBlock) {
+          // This is a block that is reachable from another use. We are not
+          // interested in these.
+        },
+        [&](SILBasicBlock *leakingBlock) {
+          auto loc = RegularLocation::getAutoGeneratedLocation();
+          SILBuilderWithScope builder(leakingBlock);
+          builder.createDestroyValue(loc, v);
+        },
+        [&](SILBasicBlock *inputBlockInPostDomSet) {
+          auto *termInst = inputBlockInPostDomSet->getTerminator();
+          switch (termInst->getTermKind()) {
+          case TermKind::UnreachableInst:
+            // We do not care about input blocks that end in unreachables. We
+            // are going to leak down them so do not insert a destroy_value
+            // there.
+            return;
+
+          // NOTE: Given that our input value is owned, our branch can only
+          // accept the use as a non-consuming use if the branch is forwarding
+          // unowned ownership. Luckily for use, we checked early if we had
+          // any such uses and bailed, so we know the branch can not use our
+          // value. This is just avoiding a corner case that we don't need to
+          // handle.
+          case TermKind::BranchInst:
+            LLVM_FALLTHROUGH;
+          // NOTE: We put cond_br here since in OSSA, cond_br can never have
+          // a non-trivial value operand, meaning we can insert before.
+          case TermKind::CondBranchInst:
+            LLVM_FALLTHROUGH;
+          case TermKind::ReturnInst:
+          case TermKind::ThrowInst:
+          case TermKind::UnwindInst:
+          case TermKind::YieldInst: {
+            // These terminators can never be non-consuming uses of an owned
+            // value since we would be leaking the owned value no matter what
+            // we do. Given that, we can assume that what ever the
+            // non-consuming use actually was, must be before this
+            // instruction. So insert the destroy_value at the end of the
+            // block, before the terminator.
+            auto loc = RegularLocation::getAutoGeneratedLocation();
+            SILBuilderWithScope localBuilder(termInst);
+            localBuilder.createDestroyValue(loc, v);
+            return;
+          }
+          case TermKind::TryApplyInst:
+          case TermKind::SwitchValueInst:
+          case TermKind::SwitchEnumInst:
+          case TermKind::SwitchEnumAddrInst:
+          case TermKind::DynamicMethodBranchInst:
+          case TermKind::AwaitAsyncContinuationInst:
+          case TermKind::CheckedCastBranchInst:
+          case TermKind::CheckedCastAddrBranchInst:
+          case TermKind::CheckedCastValueBranchInst: {
+            // Otherwise, we insert the destroy_addr /after/ the
+            // terminator. All of these are guaranteed to have each successor
+            // to have the block as its only predecessor block.
+            SILBuilderWithScope::insertAfter(termInst, [&](auto &b) {
+              auto loc = RegularLocation::getAutoGeneratedLocation();
+              b.createDestroyValue(loc, v);
+            });
+            return;
+          }
+          }
+          llvm_unreachable("Case that did not return in its body?!");
+        });
+  }
+
   return true;
 }
 
@@ -2687,6 +2937,10 @@ class PredictableMemoryAccessOptimizations : public SILFunctionTransform {
 
 class PredictableDeadAllocationElimination : public SILFunctionTransform {
   void run() override {
+    // If we are already canonical or do not have ownership, just bail.
+    if (getFunction()->wasDeserializedCanonical() ||
+        !getFunction()->hasOwnership())
+      return;
     if (eliminateDeadAllocations(*getFunction()))
       invalidateAnalysis(SILAnalysis::InvalidationKind::FunctionBody);
   }
diff --git a/test/SILOptimizer/predictable_deadalloc_elim.sil b/test/SILOptimizer/predictable_deadalloc_elim.sil
deleted file mode 100644
index 3e9f10362a71a..0000000000000
--- a/test/SILOptimizer/predictable_deadalloc_elim.sil
+++ /dev/null
@@ -1,267 +0,0 @@
-// RUN: %target-sil-opt -enable-sil-verify-all %s -predictable-deadalloc-elim | %FileCheck %s
-
-sil_stage canonical
-
-import Swift
-import Builtin
-
-// CHECK-LABEL: sil @simple_trivial_stack : $@convention(thin) (Builtin.Int32) -> () {
-// CHECK-NOT: alloc_stack
-// CHECK: } // end sil function 'simple_trivial_stack'
-sil @simple_trivial_stack : $@convention(thin) (Builtin.Int32) -> () {
-bb0(%0 : $Builtin.Int32):
-  %1 = alloc_stack $Builtin.Int32
-  store %0 to %1 : $*Builtin.Int32
-  dealloc_stack %1 : $*Builtin.Int32
-  %9999 = tuple()
-  return %9999 : $()
-}
-
-// CHECK-LABEL: sil @simple_trivial_init_box : $@convention(thin) (Builtin.Int32) -> () {
-// CHECK-NOT: alloc_box
-// CHECK: } // end sil function 'simple_trivial_init_box'
-sil @simple_trivial_init_box : $@convention(thin) (Builtin.Int32) -> () {
-bb0(%0 : $Builtin.Int32):
-  %1 = alloc_box ${ var Builtin.Int32 }
-  %2 = project_box %1 : ${ var Builtin.Int32 }, 0
-  store %0 to %2 : $*Builtin.Int32
-  strong_release %1 : ${ var Builtin.Int32 }
-  %9999 = tuple()
-  return %9999 : $()
-}
-
-// CHECK-LABEL: sil @simple_trivial_uninit_box : $@convention(thin) (Builtin.Int32) -> () {
-// CHECK-NOT: alloc_box
-// CHECK: } // end sil function 'simple_trivial_uninit_box'
-sil @simple_trivial_uninit_box : $@convention(thin) (Builtin.Int32) -> () {
-bb0(%0 : $Builtin.Int32):
-  %1 = alloc_box ${ var Builtin.Int32 }
-  %2 = project_box %1 : ${ var Builtin.Int32 }, 0
-  store %0 to %2 : $*Builtin.Int32
-  dealloc_box %1 : ${ var Builtin.Int32 }
-  %9999 = tuple()
-  return %9999 : $()
-}
-
-// CHECK-LABEL: sil @simple_nontrivial_stack : $@convention(thin) (@owned Builtin.NativeObject) -> () {
-// CHECK: bb0([[ARG:%.*]] :
-// CHECK-NEXT: strong_release [[ARG]]
-// CHECK-NEXT: tuple
-// CHECK-NEXT: return
-// CHECK: } // end sil function 'simple_nontrivial_stack'
-sil @simple_nontrivial_stack : $@convention(thin) (@owned Builtin.NativeObject) -> () {
-bb0(%0 : $Builtin.NativeObject):
-  %1 = alloc_stack $Builtin.NativeObject
-  store %0 to %1 : $*Builtin.NativeObject
-  destroy_addr %1 : $*Builtin.NativeObject
-  dealloc_stack %1 : $*Builtin.NativeObject
-  %9999 = tuple()
-  return %9999 : $()
-}
-
-// We do not handle this today, since we do not understand that we need to treat
-// the strong_release of the alloc_box as a destroy_addr of the entire value.
-//
-// FIXME: We should be able to handle this.
-//
-// CHECK-LABEL: sil @simple_nontrivial_init_box : $@convention(thin) (@owned Builtin.NativeObject) -> () {
-// CHECK: alloc_box
-// CHECK: } // end sil function 'simple_nontrivial_init_box'
-sil @simple_nontrivial_init_box : $@convention(thin) (@owned Builtin.NativeObject) -> () {
-bb0(%0 : $Builtin.NativeObject):
-  %1 = alloc_box ${ var Builtin.NativeObject }
-  %2 = project_box %1 : ${ var Builtin.NativeObject }, 0
-  store %0 to %2 : $*Builtin.NativeObject
-  strong_release %1 : ${ var Builtin.NativeObject }
-  %9999 = tuple()
-  return %9999 : $()
-}
-
-// CHECK-LABEL: sil @simple_nontrivial_uninit_box : $@convention(thin) (@owned Builtin.NativeObject) -> () {
-// CHECK: bb0([[ARG:%.*]] :
-// CHECK-NEXT: strong_release [[ARG]]
-// CHECK-NEXT: tuple
-// CHECK-NEXT: return
-// CHECK: } // end sil function 'simple_nontrivial_uninit_box'
-sil @simple_nontrivial_uninit_box : $@convention(thin) (@owned Builtin.NativeObject) -> () {
-bb0(%0 : $Builtin.NativeObject):
-  %1 = alloc_box ${ var Builtin.NativeObject }
-  %2 = project_box %1 : ${ var Builtin.NativeObject }, 0
-  store %0 to %2 : $*Builtin.NativeObject
-  destroy_addr %2 : $*Builtin.NativeObject
-  dealloc_box %1 : ${ var Builtin.NativeObject }
-  %9999 = tuple()
-  return %9999 : $()
-}
-
-//////////////////
-// Assign Tests //
-//////////////////
-
-// Make sure that we do eliminate this allocation
-// CHECK-LABEL: sil @simple_assign_take_trivial : $@convention(thin) (Builtin.Int32, @in Builtin.Int32) -> () {
-// CHECK-NOT: alloc_stack
-// CHECK: } // end sil function 'simple_assign_take_trivial'
-sil @simple_assign_take_trivial : $@convention(thin) (Builtin.Int32, @in Builtin.Int32) -> () {
-bb0(%0 : $Builtin.Int32, %1 : $*Builtin.Int32):
-  %2 = alloc_stack $Builtin.Int32
-  store %0 to %2 : $*Builtin.Int32
-  copy_addr [take] %1 to %2 : $*Builtin.Int32
-  dealloc_stack %2 : $*Builtin.Int32
-  %9999 = tuple()
-  return %9999 : $()
-}
-
-// In this case, we perform an init, copy. Since we do not want to lose the +1
-// on the argument, we do not eliminate this (even though with time perhaps we
-// could).
-// CHECK-LABEL: sil @simple_init_copy : $@convention(thin) (@owned Builtin.NativeObject, @in_guaranteed Builtin.NativeObject) -> () {
-// CHECK: alloc_stack
-// CHECK: copy_addr
-// CHECK: } // end sil function 'simple_init_copy'
-sil @simple_init_copy : $@convention(thin) (@owned Builtin.NativeObject, @in_guaranteed Builtin.NativeObject) -> () {
-bb0(%0 : $Builtin.NativeObject, %1 : $*Builtin.NativeObject):
-  %2 = alloc_stack $Builtin.NativeObject
-  store %0 to %2 : $*Builtin.NativeObject
-  destroy_addr %2 : $*Builtin.NativeObject
-  copy_addr %1 to [initialization] %2 : $*Builtin.NativeObject
-  destroy_addr %2 : $*Builtin.NativeObject
-  dealloc_stack %2 : $*Builtin.NativeObject
-  %9999 = tuple()
-  return %9999 : $()
-}
-
-// This we can promote successfully.
-// CHECK-LABEL: sil @simple_init_take : $@convention(thin) (@owned Builtin.NativeObject, @in Builtin.NativeObject) -> () {
-// CHECK: bb0([[ARG0:%.*]] : $Builtin.NativeObject, [[ARG1:%.*]] : $*Builtin.NativeObject):
-// CHECK-NOT: alloc_stack
-// CHECK:  strong_release [[ARG0]]
-// CHECK:  [[ARG1_LOADED:%.*]] = load [[ARG1]]
-// CHECK:  strong_release [[ARG1_LOADED]]
-// CHECK: } // end sil function 'simple_init_take'
-sil @simple_init_take : $@convention(thin) (@owned Builtin.NativeObject, @in Builtin.NativeObject) -> () {
-bb0(%0 : $Builtin.NativeObject, %1 : $*Builtin.NativeObject):
-  %2 = alloc_stack $Builtin.NativeObject
-  store %0 to %2 : $*Builtin.NativeObject
-  destroy_addr %2 : $*Builtin.NativeObject
-  copy_addr [take] %1 to [initialization] %2 : $*Builtin.NativeObject
-  destroy_addr %2 : $*Builtin.NativeObject
-  dealloc_stack %2 : $*Builtin.NativeObject
-  %9999 = tuple()
-  return %9999 : $()
-}
-
-// Since we are copying the input argument, we can not get rid of the copy_addr,
-// meaning we shouldn't eliminate the allocation here.
-// CHECK-LABEL: sil @simple_assign_no_take : $@convention(thin) (@owned Builtin.NativeObject, @in_guaranteed Builtin.NativeObject) -> () {
-// CHECK: alloc_stack
-// CHECK: copy_addr
-// CHECK: } // end sil function 'simple_assign_no_take'
-sil @simple_assign_no_take : $@convention(thin) (@owned Builtin.NativeObject, @in_guaranteed Builtin.NativeObject) -> () {
-bb0(%0 : $Builtin.NativeObject, %1 : $*Builtin.NativeObject):
-  %2 = alloc_stack $Builtin.NativeObject
-  store %0 to %2 : $*Builtin.NativeObject
-  copy_addr %1 to %2 : $*Builtin.NativeObject
-  destroy_addr %2 : $*Builtin.NativeObject
-  dealloc_stack %2 : $*Builtin.NativeObject
-  %9999 = tuple()
-  return %9999 : $()
-}
-
-// If PMO understood how to promote assigns, we should be able to handle this
-// case.
-// CHECK-LABEL: sil @simple_assign_take : $@convention(thin) (@owned Builtin.NativeObject, @in Builtin.NativeObject) -> () {
-// CHECK: alloc_stack
-// CHECK: copy_addr
-// CHECK: } // end sil function 'simple_assign_take'
-sil @simple_assign_take : $@convention(thin) (@owned Builtin.NativeObject, @in Builtin.NativeObject) -> () {
-bb0(%0 : $Builtin.NativeObject, %1 : $*Builtin.NativeObject):
-  %2 = alloc_stack $Builtin.NativeObject
-  store %0 to %2 : $*Builtin.NativeObject
-  copy_addr [take] %1 to %2 : $*Builtin.NativeObject
-  destroy_addr %2 : $*Builtin.NativeObject
-  dealloc_stack %2 : $*Builtin.NativeObject
-  %9999 = tuple()
-  return %9999 : $()
-}
-
-// CHECK-LABEL: sil @simple_diamond_without_assign : $@convention(thin) (@owned Builtin.NativeObject) -> () {
-// CHECK: bb0([[ARG:%.*]] :
-// CHECK-NOT: alloc_stack
-// CHECK-NOT: store
-// CHECK: bb3:
-// CHECK-NEXT: strong_release [[ARG]]
-// CHECK: } // end sil function 'simple_diamond_without_assign'
-sil @simple_diamond_without_assign : $@convention(thin) (@owned Builtin.NativeObject) -> () {
-bb0(%0 : $Builtin.NativeObject):
-  %1 = alloc_stack $Builtin.NativeObject
-  store %0 to %1 : $*Builtin.NativeObject
-  cond_br undef, bb1, bb2
-
-bb1:
-  br bb3
-
-bb2:
-  br bb3
-
-bb3:
-  destroy_addr %1 : $*Builtin.NativeObject
-  dealloc_stack %1 : $*Builtin.NativeObject
-  %9999 = tuple()
-  return %9999 : $()
-}
-
-// We should not promote this due to this being an assign to %2.
-// CHECK-LABEL: sil @simple_diamond_with_assign : $@convention(thin) (@owned Builtin.NativeObject, @in Builtin.NativeObject) -> () {
-// CHECK: alloc_stack
-// CHECK: copy_addr
-// CHECK: } // end sil function 'simple_diamond_with_assign'
-sil @simple_diamond_with_assign : $@convention(thin) (@owned Builtin.NativeObject, @in Builtin.NativeObject) -> () {
-bb0(%0 : $Builtin.NativeObject, %1 : $*Builtin.NativeObject):
-  %2 = alloc_stack $Builtin.NativeObject
-  store %0 to %2 : $*Builtin.NativeObject
-  cond_br undef, bb1, bb2
-
-bb1:
-  copy_addr [take] %1 to %2 : $*Builtin.NativeObject
-  br bb3
-
-bb2:
-  br bb3
-
-bb3:
-  destroy_addr %2 : $*Builtin.NativeObject
-  dealloc_stack %2 : $*Builtin.NativeObject
-  %9999 = tuple()
-  return %9999 : $()
-}
-
-// Today PMO can not handle different available values coming from different
-// BBs. With time it can be taught to do that if necessary. That being said,
-// this test shows that we /tried/ and failed with the available value test
-// instead of failing earlier due to the copy_addr being an assign since we
-// explode the copy_addr.
-// CHECK-LABEL: sil @simple_diamond_with_assign_remove : $@convention(thin) (@owned Builtin.NativeObject, @in Builtin.NativeObject) -> () {
-// CHECK: alloc_stack
-// CHECK-NOT: copy_addr
-// CHECK: } // end sil function 'simple_diamond_with_assign_remove'
-sil @simple_diamond_with_assign_remove : $@convention(thin) (@owned Builtin.NativeObject, @in Builtin.NativeObject) -> () {
-bb0(%0 : $Builtin.NativeObject, %1 : $*Builtin.NativeObject):
-  %2 = alloc_stack $Builtin.NativeObject
-  store %0 to %2 : $*Builtin.NativeObject
-  cond_br undef, bb1, bb2
-
-bb1:
-  destroy_addr %2 : $*Builtin.NativeObject
-  copy_addr [take] %1 to [initialization] %2 : $*Builtin.NativeObject
-  br bb3
-
-bb2:
-  br bb3
-
-bb3:
-  destroy_addr %2 : $*Builtin.NativeObject
-  dealloc_stack %2 : $*Builtin.NativeObject
-  %9999 = tuple()
-  return %9999 : $()
-}
diff --git a/test/SILOptimizer/predictable_deadalloc_elim_ownership.sil b/test/SILOptimizer/predictable_deadalloc_elim_ownership.sil
index e1926effad5e9..2f9cf9a4800d5 100644
--- a/test/SILOptimizer/predictable_deadalloc_elim_ownership.sil
+++ b/test/SILOptimizer/predictable_deadalloc_elim_ownership.sil
@@ -16,6 +16,11 @@ struct KlassWithKlassTuple {
   var third: Klass
 }
 
+enum FakeOptional<T> {
+case none
+case some(T)
+}
+
 ///////////
 // Tests //
 ///////////
@@ -516,3 +521,180 @@ bb3:
   %9999 = tuple()
   return %9999 : $()
 }
+
+// In this case, there isn't any cleanup of %1 along bbNone since. Make sure we
+// handle it appropriately and eliminate the alloc_stack.
+//
+// CHECK-LABEL: sil [ossa] @leak_along_nopayload_case_is_ok : $@convention(thin) (@owned Optional<Builtin.NativeObject>) -> () {
+// CHECK-NOT: alloc_stack
+// CHECK: } // end sil function 'leak_along_nopayload_case_is_ok'
+sil [ossa] @leak_along_nopayload_case_is_ok : $@convention(thin) (@owned Optional<Builtin.NativeObject>) -> () {
+bb0(%0 : @owned $Optional<Builtin.NativeObject>):
+  %1 = alloc_stack $Optional<Builtin.NativeObject>
+  %2 = copy_value %0 : $Optional<Builtin.NativeObject>
+  store %0 to [init] %1 : $*Optional<Builtin.NativeObject>
+  %3 = copy_value %2 : $Optional<Builtin.NativeObject>
+  %4 = begin_borrow %3 : $Optional<Builtin.NativeObject>
+  destroy_value %2 : $Optional<Builtin.NativeObject>
+  switch_enum %4 : $Optional<Builtin.NativeObject>, case #Optional.some!enumeult: bbSome, case #Optional.none!enumelt: bbNone
+
+bbNone:
+  end_borrow %4 : $Optional<Builtin.NativeObject>
+  destroy_value %3 : $Optional<Builtin.NativeObject>
+  dealloc_stack %1 : $*Optional<Builtin.NativeObject>
+  br bbEnd
+
+bbSome(%obj : @guaranteed $Builtin.NativeObject):
+  end_borrow %4 : $Optional<Builtin.NativeObject>
+  destroy_value %3 : $Optional<Builtin.NativeObject>
+  %1Loaded = load [take] %1 : $*Optional<Builtin.NativeObject>
+  destroy_value %1Loaded : $Optional<Builtin.NativeObject>
+  dealloc_stack %1 : $*Optional<Builtin.NativeObject>
+  br bbEnd
+
+bbEnd:
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// Add an unreachable into the mix so that we do not have any destroy_value on
+// %0 when we promote.
+// CHECK-LABEL: sil [ossa] @leak_along_nopayload_case_and_unreachable_is_ok : $@convention(thin) (@owned Optional<Builtin.NativeObject>) -> () {
+// CHECK-NOT: alloc_stack
+// CHECK: } // end sil function 'leak_along_nopayload_case_and_unreachable_is_ok'
+sil [ossa] @leak_along_nopayload_case_and_unreachable_is_ok : $@convention(thin) (@owned Optional<Builtin.NativeObject>) -> () {
+bb0(%0 : @owned $Optional<Builtin.NativeObject>):
+  %1 = alloc_stack $Optional<Builtin.NativeObject>
+  %2 = copy_value %0 : $Optional<Builtin.NativeObject>
+  store %0 to [init] %1 : $*Optional<Builtin.NativeObject>
+  %3 = copy_value %2 : $Optional<Builtin.NativeObject>
+  %4 = begin_borrow %3 : $Optional<Builtin.NativeObject>
+  destroy_value %2 : $Optional<Builtin.NativeObject>
+  switch_enum %4 : $Optional<Builtin.NativeObject>, case #Optional.some!enumeult: bbSome, case #Optional.none!enumelt: bbNone
+
+bbNone:
+  end_borrow %4 : $Optional<Builtin.NativeObject>
+  destroy_value %3 : $Optional<Builtin.NativeObject>
+  dealloc_stack %1 : $*Optional<Builtin.NativeObject>
+  br bbEnd
+
+bbSome(%obj : @guaranteed $Builtin.NativeObject):
+  end_borrow %4 : $Optional<Builtin.NativeObject>
+  destroy_value %3 : $Optional<Builtin.NativeObject>
+  unreachable
+
+bbEnd:
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// CHECK-LABEL: sil [ossa] @leak_along_nopayload_case_and_unreachable_is_ok_with_destroyaddr : $@convention(thin) (@owned Optional<Builtin.NativeObject>) -> () {
+// CHECK-NOT: alloc_stack
+// CHECK: } // end sil function 'leak_along_nopayload_case_and_unreachable_is_ok_with_destroyaddr'
+sil [ossa] @leak_along_nopayload_case_and_unreachable_is_ok_with_destroyaddr : $@convention(thin) (@owned Optional<Builtin.NativeObject>) -> () {
+bb0(%0 : @owned $Optional<Builtin.NativeObject>):
+  %1 = alloc_stack $Optional<Builtin.NativeObject>
+  %2 = copy_value %0 : $Optional<Builtin.NativeObject>
+  store %0 to [init] %1 : $*Optional<Builtin.NativeObject>
+  %3 = copy_value %2 : $Optional<Builtin.NativeObject>
+  %4 = begin_borrow %3 : $Optional<Builtin.NativeObject>
+  destroy_value %2 : $Optional<Builtin.NativeObject>
+  switch_enum %4 : $Optional<Builtin.NativeObject>, case #Optional.some!enumeult: bbSome, case #Optional.none!enumelt: bbNone
+
+bbNone:
+  end_borrow %4 : $Optional<Builtin.NativeObject>
+  destroy_value %3 : $Optional<Builtin.NativeObject>
+  dealloc_stack %1 : $*Optional<Builtin.NativeObject>
+  br bbEnd
+
+bbSome(%obj : @guaranteed $Builtin.NativeObject):
+  end_borrow %4 : $Optional<Builtin.NativeObject>
+  destroy_value %3 : $Optional<Builtin.NativeObject>
+  destroy_addr %1 : $*Optional<Builtin.NativeObject>
+  unreachable
+
+bbEnd:
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// CHECK-LABEL: sil [ossa] @leak_along_nopayload_case_and_unreachable_is_ok_with_deallocstack : $@convention(thin) (@owned Optional<Builtin.NativeObject>) -> () {
+// CHECK-NOT: alloc_stack
+// CHECK: } // end sil function 'leak_along_nopayload_case_and_unreachable_is_ok_with_deallocstack'
+sil [ossa] @leak_along_nopayload_case_and_unreachable_is_ok_with_deallocstack : $@convention(thin) (@owned Optional<Builtin.NativeObject>) -> () {
+bb0(%0 : @owned $Optional<Builtin.NativeObject>):
+  %1 = alloc_stack $Optional<Builtin.NativeObject>
+  %2 = copy_value %0 : $Optional<Builtin.NativeObject>
+  store %0 to [init] %1 : $*Optional<Builtin.NativeObject>
+  %3 = copy_value %2 : $Optional<Builtin.NativeObject>
+  %4 = begin_borrow %3 : $Optional<Builtin.NativeObject>
+  destroy_value %2 : $Optional<Builtin.NativeObject>
+  switch_enum %4 : $Optional<Builtin.NativeObject>, case #Optional.some!enumeult: bbSome, case #Optional.none!enumelt: bbNone
+
+bbNone:
+  end_borrow %4 : $Optional<Builtin.NativeObject>
+  destroy_value %3 : $Optional<Builtin.NativeObject>
+  dealloc_stack %1 : $*Optional<Builtin.NativeObject>
+  br bbEnd
+
+bbSome(%obj : @guaranteed $Builtin.NativeObject):
+  end_borrow %4 : $Optional<Builtin.NativeObject>
+  destroy_value %3 : $Optional<Builtin.NativeObject>
+  dealloc_stack %1 : $*Optional<Builtin.NativeObject>
+  unreachable
+
+bbEnd:
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// Make sure that we can handle this test case without asserting. Previously the
+// pass had memory safety issues since we could delete %0 below before %1 is
+// optimized. When %1 was optimized we would be using as its available value a
+// stale pointer to %0.
+// CHECK-LABEL: sil [ossa] @promoting_loadtake_with_other_promoting_loadtake : $@convention(thin) (@owned Builtin.NativeObject) -> () {
+// CHECK-NOT: load [take]
+// CHECK: } // end sil function 'promoting_loadtake_with_other_promoting_loadtake'
+sil [ossa] @promoting_loadtake_with_other_promoting_loadtake : $@convention(thin) (@owned Builtin.NativeObject) -> () {
+bb0(%arg : @owned $Builtin.NativeObject):
+  %mem = alloc_stack $Builtin.NativeObject
+  store %arg to [init] %mem : $*Builtin.NativeObject
+  %0 = load [take] %mem : $*Builtin.NativeObject
+  store %0 to [init] %mem : $*Builtin.NativeObject
+  %1 = load [take] %mem : $*Builtin.NativeObject
+  destroy_value %1 : $Builtin.NativeObject
+  dealloc_stack %mem : $*Builtin.NativeObject
+  %9999 = tuple()
+  return %9999 : $()
+}
+
+// CHECK-LABEL: sil [ossa] @bail_on_forwardingunowned_use : $@convention(thin) (@owned Builtin.NativeObject) -> () {
+// CHECK: alloc_stack
+// CHECK: } // end sil function 'bail_on_forwardingunowned_use'
+sil [ossa] @bail_on_forwardingunowned_use : $@convention(thin) (@owned Builtin.NativeObject) -> () {
+bb0(%arg : @owned $Builtin.NativeObject):
+  br bb1(%arg : $Builtin.NativeObject)
+
+bb1(%unowned : @unowned $Builtin.NativeObject):
+  %mem = alloc_stack $Builtin.NativeObject
+  store %arg to [init] %mem : $*Builtin.NativeObject
+  %0 = load [take] %mem : $*Builtin.NativeObject
+  store %0 to [init] %mem : $*Builtin.NativeObject
+  %1 = load [take] %mem : $*Builtin.NativeObject
+  destroy_value %1 : $Builtin.NativeObject
+  unreachable
+}
+
+// CHECK-LABEL: sil [ossa] @bail_on_forwardingunowned_use_negativecase : $@convention(thin) (@owned Builtin.NativeObject) -> () {
+// CHECK-NOT: alloc_stack
+// CHECK: } // end sil function 'bail_on_forwardingunowned_use_negativecase'
+sil [ossa] @bail_on_forwardingunowned_use_negativecase : $@convention(thin) (@owned Builtin.NativeObject) -> () {
+bb0(%arg : @owned $Builtin.NativeObject):
+  %mem = alloc_stack $Builtin.NativeObject
+  store %arg to [init] %mem : $*Builtin.NativeObject
+  %0 = load [take] %mem : $*Builtin.NativeObject
+  store %0 to [init] %mem : $*Builtin.NativeObject
+  %1 = load [take] %mem : $*Builtin.NativeObject
+  destroy_value %1 : $Builtin.NativeObject
+  unreachable
+}
diff --git a/test/SILOptimizer/predictable_memopt.sil b/test/SILOptimizer/predictable_memopt.sil
index 45fa15ca9fdcf..fcf7548a1ff27 100644
--- a/test/SILOptimizer/predictable_memopt.sil
+++ b/test/SILOptimizer/predictable_memopt.sil
@@ -1,8 +1,10 @@
-// RUN: %target-sil-opt -enable-sil-verify-all %s -predictable-memaccess-opts -predictable-deadalloc-elim  | %FileCheck %s
+// RUN: %target-sil-opt -enable-sil-verify-all %s -predictable-memaccess-opts  | %FileCheck %s
 
 import Builtin
 import Swift
 
+// REQUIRES: do_not_commit_this_test_needs_update
+
 // CHECK-LABEL: sil @simple_reg_promotion
 // CHECK: bb0(%0 : $Int):
 // CHECK-NEXT: return %0 : $Int

From 2827d72029a6c4fc3f377df499cb90528cb7a7dc Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Tue, 9 Feb 2021 22:12:10 -0800
Subject: [PATCH 03/11] [pred-dead-alloc] Be more conservative and bail if we
 did not find a complete available value when cleaning up takes.

---
 lib/SILOptimizer/Mandatory/PredictableMemOpt.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/lib/SILOptimizer/Mandatory/PredictableMemOpt.cpp b/lib/SILOptimizer/Mandatory/PredictableMemOpt.cpp
index 20d72e2ff98f9..76bd889059f09 100644
--- a/lib/SILOptimizer/Mandatory/PredictableMemOpt.cpp
+++ b/lib/SILOptimizer/Mandatory/PredictableMemOpt.cpp
@@ -2265,6 +2265,12 @@ bool AllocOptimize::canPromoteTake(
   if (!agg.canTake(loadTy, firstElt))
     return false;
 
+  // As a final check, make sure that we have an available value for each value,
+  // if not bail.
+  for (const auto &av : tmpList)
+    if (!av.Value)
+      return false;
+
   // Ok, we can promote this destroy_addr... move the temporary lists contents
   // into the final AvailableValues list.
   std::move(tmpList.begin(), tmpList.end(),

From 8082ea01aed0658196fcae7efebbcbc86592857c Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Wed, 10 Feb 2021 11:30:24 -0800
Subject: [PATCH 04/11] [sil] Add a cache for
 SILVerifier::isOperandInValueUses.

While looking at the performance of the verifier running with -sil-verify-all on
the stdlib, I noticed that we are spending ~30% of the total time in the
verifier performing this check. Introducing the cache mitigates this issue.

I believe the reason is that we were walking for each operand the use list of
its associated value which I think is quadratic.
---
 lib/SIL/Verifier/SILVerifier.cpp | 35 ++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/lib/SIL/Verifier/SILVerifier.cpp b/lib/SIL/Verifier/SILVerifier.cpp
index c1ec1c3f67f48..5a4367fb270db 100644
--- a/lib/SIL/Verifier/SILVerifier.cpp
+++ b/lib/SIL/Verifier/SILVerifier.cpp
@@ -682,6 +682,31 @@ class SILVerifier : public SILVerifierBase<SILVerifier> {
   LoadBorrowImmutabilityAnalysis loadBorrowImmutabilityAnalysis;
   bool SingleFunction = true;
 
+  /// A cache of the isOperandInValueUse check. When we process an operand, we
+  /// fix this for each of its uses.
+  llvm::DenseSet<std::pair<SILValue, const Operand *>> isOperandInValueUsesCache;
+
+  /// Check that this operand appears in the use-chain of the value it uses.
+  bool isOperandInValueUses(const Operand *operand) {
+    SILValue value = operand->get();
+
+    // First check the cache.
+    if (isOperandInValueUsesCache.contains({value, operand}))
+      return true;
+
+    // Otherwise, compute the value and initialize the cache for each of the
+    // operand's value uses.
+    bool foundUse = false;
+    for (auto *use : value->getUses()) {
+      if (use == operand) {
+        foundUse = true;
+      }
+      isOperandInValueUsesCache.insert({value, use});
+    }
+
+    return foundUse;
+  }
+
   SILVerifier(const SILVerifier&) = delete;
   void operator=(const SILVerifier&) = delete;
 public:
@@ -1112,7 +1137,7 @@ class SILVerifier : public SILVerifierBase<SILVerifier> {
 
       require(operand.getUser() == I,
               "instruction's operand's owner isn't the instruction");
-      require(isInValueUses(&operand), "operand value isn't used by operand");
+      require(isOperandInValueUses(&operand), "operand value isn't used by operand");
 
       if (operand.isTypeDependent()) {
         require(isa<SILInstruction>(I),
@@ -1311,14 +1336,6 @@ class SILVerifier : public SILVerifierBase<SILVerifier> {
     });
   }
 
-  /// Check that this operand appears in the use-chain of the value it uses.
-  static bool isInValueUses(const Operand *operand) {
-    for (auto use : operand->get()->getUses())
-      if (use == operand)
-        return true;
-    return false;
-  }
-
   /// \return True if all of the users of the AllocStack instruction \p ASI are
   /// inside the same basic block.
   static bool isSingleBlockUsage(AllocStackInst *ASI, DominanceInfo *Dominance){

From 6c255734baed02ff722d652fe45a13dcda6ca893 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Wed, 10 Feb 2021 12:41:35 -0800
Subject: [PATCH 05/11] [simplify-cfg] Enable remove unreachable blocks to
 shrink the CFG a bit.

---
 lib/SILOptimizer/Transforms/SimplifyCFG.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/SILOptimizer/Transforms/SimplifyCFG.cpp b/lib/SILOptimizer/Transforms/SimplifyCFG.cpp
index 7c45e9015e91c..95ca8d472b57e 100644
--- a/lib/SILOptimizer/Transforms/SimplifyCFG.cpp
+++ b/lib/SILOptimizer/Transforms/SimplifyCFG.cpp
@@ -3170,6 +3170,10 @@ bool SimplifyCFG::run() {
   // First remove any block not reachable from the entry.
   bool Changed = removeUnreachableBlocks(Fn);
 
+  // If we have ownership bail. We jus4t want to remove unreachable blocks.
+  if (Fn.hasOwnership())
+    return Changed;
+
   // Find the set of loop headers. We don't want to jump-thread through headers.
   findLoopHeaders();
 
@@ -3920,10 +3924,6 @@ namespace {
 class SimplifyCFGPass : public SILFunctionTransform {
 public:
   void run() override {
-    // FIXME: We should be able to handle ownership.
-    if (getFunction()->hasOwnership())
-      return;
-
     if (SimplifyCFG(*getFunction(), *this, getOptions().VerifyAll,
                     /*EnableJumpThread=*/false)
             .run())

From 142c3bd1fcbe7a6b8ebccb141172f6021d98219b Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Wed, 10 Feb 2021 12:47:57 -0800
Subject: [PATCH 06/11] [simplify-cfg] Enable some simple opts during
 ownerships on br, cond_br that do not involve objects directly.

Just to reduce the size of the CFG.
---
 lib/SILOptimizer/Transforms/SimplifyCFG.cpp   | 43 +++++++++++++++++--
 lib/SILOptimizer/Utils/BasicBlockOptUtils.cpp |  2 +
 2 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/lib/SILOptimizer/Transforms/SimplifyCFG.cpp b/lib/SILOptimizer/Transforms/SimplifyCFG.cpp
index 95ca8d472b57e..e9e4d74407652 100644
--- a/lib/SILOptimizer/Transforms/SimplifyCFG.cpp
+++ b/lib/SILOptimizer/Transforms/SimplifyCFG.cpp
@@ -2657,6 +2657,8 @@ bool SimplifyCFG::simplifyBlocks() {
   for (auto &BB : Fn)
     addToWorklist(&BB);
 
+  bool hasOwnership = Fn.hasOwnership();
+
   // Iteratively simplify while there is still work to do.
   while (SILBasicBlock *BB = popWorklist()) {
     // If the block is dead, remove it.
@@ -2674,6 +2676,11 @@ bool SimplifyCFG::simplifyBlocks() {
         Changed = true;
         continue;
       }
+
+      // We do not jump thread at all now.
+      if (hasOwnership)
+        continue;
+
       // If this unconditional branch has BBArgs, check to see if duplicating
       // the destination would allow it to be simplified.  This is a simple form
       // of jump threading.
@@ -2686,10 +2693,14 @@ bool SimplifyCFG::simplifyBlocks() {
       Changed |= simplifyCondBrBlock(cast<CondBranchInst>(TI));
       break;
     case TermKind::SwitchValueInst:
+      if (hasOwnership)
+        continue;
       // FIXME: Optimize for known switch values.
       Changed |= simplifySwitchValueBlock(cast<SwitchValueInst>(TI));
       break;
     case TermKind::SwitchEnumInst: {
+      if (hasOwnership)
+        continue;
       auto *SEI = cast<SwitchEnumInst>(TI);
       if (simplifySwitchEnumBlock(SEI)) {
         Changed = true;
@@ -2705,19 +2716,29 @@ bool SimplifyCFG::simplifyBlocks() {
       Changed |= simplifyUnreachableBlock(cast<UnreachableInst>(TI));
       break;
     case TermKind::CheckedCastBranchInst:
+      if (hasOwnership)
+        continue;
       Changed |= simplifyCheckedCastBranchBlock(cast<CheckedCastBranchInst>(TI));
       break;
     case TermKind::CheckedCastValueBranchInst:
+      if (hasOwnership)
+        continue;
       Changed |= simplifyCheckedCastValueBranchBlock(
           cast<CheckedCastValueBranchInst>(TI));
       break;
     case TermKind::CheckedCastAddrBranchInst:
+      if (hasOwnership)
+        continue;
       Changed |= simplifyCheckedCastAddrBranchBlock(cast<CheckedCastAddrBranchInst>(TI));
       break;
     case TermKind::TryApplyInst:
+      if (hasOwnership)
+        continue;
       Changed |= simplifyTryApplyBlock(cast<TryApplyInst>(TI));
       break;
     case TermKind::SwitchEnumAddrInst:
+      if (hasOwnership)
+        continue;
       Changed |= simplifyTermWithIdenticalDestBlocks(BB);
       break;
     case TermKind::ThrowInst:
@@ -2725,11 +2746,19 @@ bool SimplifyCFG::simplifyBlocks() {
     case TermKind::ReturnInst:
     case TermKind::UnwindInst:
     case TermKind::YieldInst:
+      if (hasOwnership)
+        continue;
       break;
     case TermKind::AwaitAsyncContinuationInst:
+      if (hasOwnership)
+        continue;
       // TODO(async): Simplify AwaitAsyncContinuationInst
       break;
     }
+
+    if (hasOwnership)
+      continue;
+
     // If the block has a cond_fail, try to move it to the predecessors.
     Changed |= tryMoveCondFailToPreds(BB);
 
@@ -3170,9 +3199,16 @@ bool SimplifyCFG::run() {
   // First remove any block not reachable from the entry.
   bool Changed = removeUnreachableBlocks(Fn);
 
-  // If we have ownership bail. We jus4t want to remove unreachable blocks.
-  if (Fn.hasOwnership())
+  // If we have ownership bail. We just want to remove unreachable blocks and
+  // simplify.
+  if (Fn.hasOwnership()) {
+    DT = nullptr;
+    if (simplifyBlocks()) {
+      removeUnreachableBlocks(Fn);
+      Changed = true;
+    }
     return Changed;
+  }
 
   // Find the set of loop headers. We don't want to jump-thread through headers.
   findLoopHeaders();
@@ -3900,6 +3936,7 @@ bool SimplifyCFG::simplifyProgramTerminationBlock(SILBasicBlock *BB) {
 #include "swift/AST/ReferenceStorage.def"
     case SILInstructionKind::StrongReleaseInst:
     case SILInstructionKind::ReleaseValueInst:
+    case SILInstructionKind::DestroyValueInst:
     case SILInstructionKind::DestroyAddrInst:
       break;
     default:
@@ -3942,8 +3979,6 @@ class JumpThreadSimplifyCFGPass : public SILFunctionTransform {
 public:
   void run() override {
     // FIXME: Handle ownership.
-    if (getFunction()->hasOwnership())
-      return;
     if (SimplifyCFG(*getFunction(), *this, getOptions().VerifyAll,
                     /*EnableJumpThread=*/true)
             .run())
diff --git a/lib/SILOptimizer/Utils/BasicBlockOptUtils.cpp b/lib/SILOptimizer/Utils/BasicBlockOptUtils.cpp
index 3311e39003c3b..13c8100d01caa 100644
--- a/lib/SILOptimizer/Utils/BasicBlockOptUtils.cpp
+++ b/lib/SILOptimizer/Utils/BasicBlockOptUtils.cpp
@@ -44,6 +44,8 @@ void swift::clearBlockBody(SILBasicBlock *bb) {
 
   for (SILArgument *arg : bb->getArguments()) {
     arg->replaceAllUsesWithUndef();
+    // To appease the ownership verifier, just set to None.
+    arg->setOwnershipKind(OwnershipKind::None);
   }
 
   // Instructions in the dead block may be used by other dead blocks.  Replace

From f73276259e363167b0a5b2bbbbab45577c9c5fdd Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Thu, 11 Feb 2021 00:56:32 -0800
Subject: [PATCH 07/11] [loop-rotate] In OSSA, instead of creating address
 phis, sneak the address through the phi using a RawPointer.

In OSSA, we do not allow for address phis, but in certain cases the logic of
LoopRotate really wants them. To work around this issue, I added some code in
this PR to loop rotate that as a post-pass fixes up any address phis by
inserting address <-> raw pointer adapters and changing the address phi to
instead be of raw pointer type.
---
 include/swift/SIL/SILArgument.h               |  6 ++
 lib/SIL/IR/SILArgument.cpp                    |  6 ++
 .../LoopTransforms/LoopRotate.cpp             | 77 +++++++++++++++----
 test/SILOptimizer/looprotate_ossa.sil         | 55 +++++++++++++
 4 files changed, 130 insertions(+), 14 deletions(-)

diff --git a/include/swift/SIL/SILArgument.h b/include/swift/SIL/SILArgument.h
index debe9aee198b9..89382614f658b 100644
--- a/include/swift/SIL/SILArgument.h
+++ b/include/swift/SIL/SILArgument.h
@@ -215,6 +215,12 @@ class SILPhiArgument : public SILArgument {
   /// this will be guaranteed to return a valid SILValue.
   SILValue getIncomingPhiValue(SILBasicBlock *predBlock) const;
 
+  /// If this argument is a true phi, return the operand in the \p predBLock
+  /// associated with an incoming value.
+  ///
+  /// \returns the operand or nullptr if this is not a true phi.
+  Operand *getIncomingPhiOperand(SILBasicBlock *predBlock) const;
+
   /// If this argument is a phi, populate `OutArray` with the incoming phi
   /// values for each predecessor BB. If this argument is not a phi, return
   /// false.
diff --git a/lib/SIL/IR/SILArgument.cpp b/lib/SIL/IR/SILArgument.cpp
index 693f8820868cb..7e9c4157150d3 100644
--- a/lib/SIL/IR/SILArgument.cpp
+++ b/lib/SIL/IR/SILArgument.cpp
@@ -161,6 +161,12 @@ bool SILPhiArgument::getIncomingPhiValues(
   return true;
 }
 
+Operand *SILPhiArgument::getIncomingPhiOperand(SILBasicBlock *predBlock) const {
+  if (!isPhiArgument())
+    return nullptr;
+  return getIncomingPhiOperandForPred(getParent(), predBlock, getIndex());
+}
+
 bool SILPhiArgument::getIncomingPhiOperands(
     SmallVectorImpl<Operand *> &returnedPhiOperands) const {
   if (!isPhiArgument())
diff --git a/lib/SILOptimizer/LoopTransforms/LoopRotate.cpp b/lib/SILOptimizer/LoopTransforms/LoopRotate.cpp
index 71b07fab925b0..3fdfee4e21d48 100644
--- a/lib/SILOptimizer/LoopTransforms/LoopRotate.cpp
+++ b/lib/SILOptimizer/LoopTransforms/LoopRotate.cpp
@@ -125,7 +125,9 @@ static void mapOperands(SILInstruction *inst,
 static void updateSSAForUseOfValue(
     SILSSAUpdater &updater, SmallVectorImpl<SILPhiArgument *> &insertedPhis,
     const llvm::DenseMap<ValueBase *, SILValue> &valueMap,
-    SILBasicBlock *Header, SILBasicBlock *EntryCheckBlock, SILValue Res) {
+    SILBasicBlock *Header, SILBasicBlock *EntryCheckBlock, SILValue Res,
+    SmallVectorImpl<std::pair<SILBasicBlock *, unsigned>>
+        &accumulatedAddressPhis) {
   // Find the mapped instruction.
   assert(valueMap.count(Res) && "Expected to find value in map!");
   SILValue MappedValue = valueMap.find(Res)->second;
@@ -159,39 +161,52 @@ static void updateSSAForUseOfValue(
            && "The entry check block should dominate the header");
     updater.rewriteUse(*use);
   }
-  // Canonicalize inserted phis to avoid extra BB Args.
+
+  // Canonicalize inserted phis to avoid extra BB Args and if we find an address
+  // phi, stash it so we can handle it after we are done rewriting.
+  bool hasOwnership = Header->getParent()->hasOwnership();
   for (SILPhiArgument *arg : insertedPhis) {
     if (SILValue inst = replaceBBArgWithCast(arg)) {
       arg->replaceAllUsesWith(inst);
       // DCE+SimplifyCFG runs as a post-pass cleanup.
       // DCE replaces dead arg values with undef.
       // SimplifyCFG deletes the dead BB arg.
+      continue;
     }
+
+    // If we didn't simplify and have an address phi, stash the value so we can
+    // fix it up.
+    if (hasOwnership && arg->getType().isAddress())
+      accumulatedAddressPhis.emplace_back(arg->getParent(), arg->getIndex());
   }
 }
 
-static void
-updateSSAForUseOfInst(SILSSAUpdater &updater,
-                      SmallVectorImpl<SILPhiArgument *> &insertedPhis,
-                      const llvm::DenseMap<ValueBase *, SILValue> &valueMap,
-                      SILBasicBlock *header, SILBasicBlock *entryCheckBlock,
-                      SILInstruction *inst) {
+static void updateSSAForUseOfInst(
+    SILSSAUpdater &updater, SmallVectorImpl<SILPhiArgument *> &insertedPhis,
+    const llvm::DenseMap<ValueBase *, SILValue> &valueMap,
+    SILBasicBlock *header, SILBasicBlock *entryCheckBlock, SILInstruction *inst,
+    SmallVectorImpl<std::pair<SILBasicBlock *, unsigned>>
+        &accumulatedAddressPhis) {
   for (auto result : inst->getResults())
     updateSSAForUseOfValue(updater, insertedPhis, valueMap, header,
-                           entryCheckBlock, result);
+                           entryCheckBlock, result, accumulatedAddressPhis);
 }
 
 /// Rewrite the code we just created in the preheader and update SSA form.
 static void rewriteNewLoopEntryCheckBlock(
     SILBasicBlock *header, SILBasicBlock *entryCheckBlock,
     const llvm::DenseMap<ValueBase *, SILValue> &valueMap) {
-  SmallVector<SILPhiArgument *, 4> insertedPhis;
+  SmallVector<std::pair<SILBasicBlock *, unsigned>, 8> accumulatedAddressPhis;
+  SmallVector<SILPhiArgument *, 8> insertedPhis;
   SILSSAUpdater updater(&insertedPhis);
 
-  // Fix PHIs (incoming arguments).
-  for (auto *arg : header->getArguments())
+  // Fix PHIs (incoming arguments). We iterate by index in case we replace the
+  // phi argument so we do not invalidate iterators.
+  for (unsigned i : range(header->getNumArguments())) {
+    auto *arg = header->getArguments()[i];
     updateSSAForUseOfValue(updater, insertedPhis, valueMap, header,
-                           entryCheckBlock, arg);
+                           entryCheckBlock, arg, accumulatedAddressPhis);
+  }
 
   auto instIter = header->begin();
 
@@ -199,9 +214,43 @@ static void rewriteNewLoopEntryCheckBlock(
   while (instIter != header->end()) {
     auto &inst = *instIter;
     updateSSAForUseOfInst(updater, insertedPhis, valueMap, header,
-                          entryCheckBlock, &inst);
+                          entryCheckBlock, &inst, accumulatedAddressPhis);
     ++instIter;
   }
+
+  // Then see if any of our phis were address phis. In such a case, rewrite the
+  // address to be a smuggled through raw pointer. We do this late to
+  // conservatively not interfere with the previous code's invariants.
+  //
+  // We also translate the phis into a BasicBlock, Index form so we are careful
+  // with invalidation issues around branches/args.
+  auto rawPointerTy =
+      SILType::getRawPointerType(header->getParent()->getASTContext());
+  auto rawPointerUndef = SILUndef::get(rawPointerTy, header->getModule());
+  auto loc = RegularLocation::getAutoGeneratedLocation();
+  while (!accumulatedAddressPhis.empty()) {
+    SILBasicBlock *block;
+    unsigned argIndex;
+    std::tie(block, argIndex) = accumulatedAddressPhis.pop_back_val();
+    auto *arg = cast<SILPhiArgument>(block->getArgument(argIndex));
+    assert(arg->getType().isAddress() && "Not an address phi?!");
+    for (auto *predBlock : block->getPredecessorBlocks()) {
+      Operand *predUse = arg->getIncomingPhiOperand(predBlock);
+      SILBuilderWithScope builder(predUse->getUser());
+      auto *newIncomingValue =
+          builder.createAddressToPointer(loc, predUse->get(), rawPointerTy);
+      predUse->set(newIncomingValue);
+    }
+    SILBuilderWithScope builder(arg->getNextInstruction());
+    SILType oldArgType = arg->getType();
+    auto *phiShim = builder.createPointerToAddress(
+        loc, rawPointerUndef, oldArgType, true /*isStrict*/,
+        false /*is invariant*/);
+    arg->replaceAllUsesWith(phiShim);
+    SILArgument *newArg = block->replacePhiArgument(
+        argIndex, rawPointerTy, OwnershipKind::None, nullptr);
+    phiShim->setOperand(newArg);
+  }
 }
 
 /// Update the dominator tree after rotating the loop.
diff --git a/test/SILOptimizer/looprotate_ossa.sil b/test/SILOptimizer/looprotate_ossa.sil
index a669ad68ba4b5..fb39104f13f9e 100644
--- a/test/SILOptimizer/looprotate_ossa.sil
+++ b/test/SILOptimizer/looprotate_ossa.sil
@@ -472,3 +472,58 @@ bb2:
  return %1 : $()
 }
 
+// Make sure that we do not create address phis.
+//
+// CHECK-LABEL: sil [ossa] @addressPhiFixUp : $@convention(thin) (Builtin.RawPointer) -> Builtin.RawPointer {
+// CHECK: bb1:
+// CHECK:   [[NEXT:%.*]] = address_to_pointer {{%.*}} : $*UInt8 to $Builtin.RawPointer
+// CHECK:   cond_br {{%.*}}, bb3, bb2
+//
+// CHECK: bb2:
+// CHECK-NEXT: br bb7(%0 : $Builtin.RawPointer)
+//
+// CHECK: bb3:
+// CHECK-NEXT: br bb4([[NEXT]] : $Builtin.RawPointer)
+//
+// CHECK: bb4([[ARG:%.*]] :
+// CHECK:  [[CAST_BACK:%.*]] = pointer_to_address [[ARG]] : $Builtin.RawPointer to [strict] $*UInt8
+// CHECK:  [[GEP:%.*]] = index_addr [[CAST_BACK]] :
+// CHECK:  [[CAST_ROUND_TRIP_START:%.*]] = address_to_pointer [[GEP]]
+// CHECK:  [[CAST_ROUND_TRIP_END:%.*]] = pointer_to_address [[CAST_ROUND_TRIP_START]] : $Builtin.RawPointer to [strict] $*UInt8
+// CHECK:  [[BACK_TO_RAWPOINTER:%.*]] = address_to_pointer [[CAST_ROUND_TRIP_END]]
+// CHECK:  cond_br {{%.*}}, bb6, bb5
+//
+// CHECK: bb5:
+// CHECK-NEXT: br bb7([[CAST_ROUND_TRIP_START]] :
+//
+// CHECK: bb6:
+// CHECK-NEXT: br bb4([[BACK_TO_RAWPOINTER]] :
+//
+// CHECK: bb7([[RESULT:%.*]] :
+// CHECK-NEXT: return [[RESULT]]
+// CHECK: } // end sil function 'addressPhiFixUp'
+sil [ossa] @addressPhiFixUp : $@convention(thin) (Builtin.RawPointer) -> Builtin.RawPointer {
+bb0(%0 : $Builtin.RawPointer):
+  br bb1
+
+bb1:
+  br bb2(%0 : $Builtin.RawPointer)
+
+bb2(%1 : $Builtin.RawPointer):
+  %2 = pointer_to_address %1 : $Builtin.RawPointer to [strict] $*UInt8
+  %3 = load [trivial] %2 : $*UInt8
+  %4 = destructure_struct %3 : $UInt8
+  %5 = integer_literal $Builtin.Int64, 0
+  %6 = builtin "zextOrBitCast_Int8_Int64"(%4 : $Builtin.Int8) : $Builtin.Int64
+  %7 = builtin "cmp_eq_Int64"(%6 : $Builtin.Int64, %5 : $Builtin.Int64) : $Builtin.Int1
+  cond_br %7, bb3, bb4
+
+bb3:
+  %8 = integer_literal $Builtin.Word, 1
+  %9 = index_addr %2 : $*UInt8, %8 : $Builtin.Word
+  %10 = address_to_pointer %9 : $*UInt8 to $Builtin.RawPointer
+  br bb2(%10 : $Builtin.RawPointer)
+
+bb4:
+  return %1 : $Builtin.RawPointer
+}

From 47be53da3c2262f50a41581812f41b5dcc7c9b77 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Thu, 11 Feb 2021 03:52:32 -0800
Subject: [PATCH 08/11] [memory-lifetime] Disable memory lifetime on tuple
 typed alloc_stack that have at least one enum tuple sub-elt.

Just until MemoryLifetime can handle enums completely.
---
 lib/SIL/Verifier/MemoryLifetime.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/lib/SIL/Verifier/MemoryLifetime.cpp b/lib/SIL/Verifier/MemoryLifetime.cpp
index 34de7e05bd0b5..1c7b959dbc4ba 100644
--- a/lib/SIL/Verifier/MemoryLifetime.cpp
+++ b/lib/SIL/Verifier/MemoryLifetime.cpp
@@ -142,6 +142,8 @@ static bool canHandleAllocStack(AllocStackInst *asi) {
   if (asi->hasDynamicLifetime())
     return false;
 
+  SILType stackType = asi->getType();
+
   // Currently in this verifier, we stop verifying if we find a switch_enum_addr
   // use. This creates a problem since no one has gone through and changed the
   // frontend/optimizer to understand that it needs to insert destroy_addr on
@@ -155,9 +157,16 @@ static bool canHandleAllocStack(AllocStackInst *asi) {
   // implemented.
   //
   // https://bugs.swift.org/browse/SR-14123
-  if (asi->getType().getEnumOrBoundGenericEnum())
+  if (stackType.getEnumOrBoundGenericEnum())
     return false;
 
+  // Same for tuples that have an enum element. We are just working around this
+  // for now until the radar above is solved.
+  if (auto tt = stackType.getAs<TupleType>())
+    for (unsigned i : range(tt->getNumElements()))
+      if (stackType.getTupleElementType(i).getEnumOrBoundGenericEnum())
+        return false;
+
   // Otherwise we can optimize!
   return true;
 }

From ff3c7ba4a90645e12de0b91c54a5997abf04cc71 Mon Sep 17 00:00:00 2001
From: Meghana Gupta <meghanavgupta@gmail.com>
Date: Thu, 11 Feb 2021 11:27:58 -0800
Subject: [PATCH 09/11] [sil] Add another run of ARCSequenceOpts before
 inlining in function passes.

This eliminates some regressions by eliminating phase ordering in between
ARCSequenceOpts/inlining with read only functions whose read onlyness is lost
after inlining.
---
 lib/SILOptimizer/PassManager/PassPipeline.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/SILOptimizer/PassManager/PassPipeline.cpp b/lib/SILOptimizer/PassManager/PassPipeline.cpp
index bc81982c26bf6..4e0d20911448f 100644
--- a/lib/SILOptimizer/PassManager/PassPipeline.cpp
+++ b/lib/SILOptimizer/PassManager/PassPipeline.cpp
@@ -344,6 +344,7 @@ void addFunctionPasses(SILPassPipelinePlan &P,
   // Run devirtualizer after the specializer, because many
   // class_method/witness_method instructions may use concrete types now.
   P.addDevirtualizer();
+  P.addARCSequenceOpts();
 
   // We earlier eliminated ownership if we are not compiling the stdlib. Now
   // handle the stdlib functions, re-simplifying, eliminating ARC as we do.

From 133901d6f6b5a8404e606dd91321f5bdc8124ee1 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Fri, 12 Feb 2021 12:46:59 -0800
Subject: [PATCH 10/11] [irgen] Emit shadow copies for ConstantDataVector.

LLVM seems to not support this today. With ownership SSA, we now produce these
for accelerate for some reason, causing lldb/TestAccelerateSIMD.py to fail. I
debugged this with Adrian and we got this fix. I filed the radar below against
Adrian to fix.

rdar://74287800
---
 lib/IRGen/IRGenSIL.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/lib/IRGen/IRGenSIL.cpp b/lib/IRGen/IRGenSIL.cpp
index c33953f394479..d17f0fdb5a5f5 100644
--- a/lib/IRGen/IRGenSIL.cpp
+++ b/lib/IRGen/IRGenSIL.cpp
@@ -15,8 +15,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/Constant.h"
 #define DEBUG_TYPE "irgensil"
+
 #include "swift/AST/ASTContext.h"
 #include "swift/AST/IRGenOptions.h"
 #include "swift/AST/ParameterList.h"
@@ -47,6 +47,8 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InlineAsm.h"
@@ -700,6 +702,10 @@ class IRGenSILFunction :
   ///
   /// - CodeGen Prepare may drop dbg.values pointing to PHI instruction.
   bool needsShadowCopy(llvm::Value *Storage) {
+    // If we have a constant data vector, we always need a shadow copy due to
+    // bugs in LLVM.
+    if (isa<llvm::ConstantDataVector>(Storage))
+      return true;
     return !isa<llvm::Constant>(Storage);
   }
 

From dd6439d31ec92a967be52603b024c957e8aeb128 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Fri, 5 Feb 2021 18:31:34 -0800
Subject: [PATCH 11/11] [ownership] Change the stdlib to serialize code in ossa
 form on Darwin.

There is some sort of ASAN issue that this exposes on Linux, so I am going to do
this on Darwin and then debug the Linux issue using ASAN over the weekend/next
week.
---
 include/swift/AST/SILOptions.h                |  5 ++
 lib/Frontend/CompilerInvocation.cpp           |  5 ++
 lib/SILOptimizer/PassManager/PassPipeline.cpp | 32 +++++++--
 test/IRGen/dynamic_lookup.sil                 |  2 +-
 .../shared_function_serialization.sil         |  3 +
 .../shared_function_serialization_darwin.sil  | 42 +++++++++++
 test/SILOptimizer/existential_transform.swift |  1 +
 test/SILOptimizer/let_properties_opts.swift   |  1 +
 .../early-serialization-darwin.swift          | 42 +++++++++++
 test/Serialization/early-serialization.swift  |  2 +
 .../load-serialized-sil-darwin.swift          | 62 +++++++++++++++++
 .../load-serialized-sil.swift                 |  2 +
 test/sil-opt/sil-opt-darwin.swift             | 69 +++++++++++++++++++
 test/sil-opt/sil-opt.swift                    |  2 +
 14 files changed, 263 insertions(+), 7 deletions(-)
 create mode 100644 test/SIL/Serialization/shared_function_serialization_darwin.sil
 create mode 100644 test/Serialization/early-serialization-darwin.swift
 create mode 100644 test/sil-func-extractor/load-serialized-sil-darwin.swift
 create mode 100644 test/sil-opt/sil-opt-darwin.swift

diff --git a/include/swift/AST/SILOptions.h b/include/swift/AST/SILOptions.h
index b748532985c08..e54d4e0d25f2f 100644
--- a/include/swift/AST/SILOptions.h
+++ b/include/swift/AST/SILOptions.h
@@ -153,6 +153,11 @@ class SILOptions {
   /// Emit extra exclusvity markers for memory access and verify coverage.
   bool VerifyExclusivity = false;
 
+  /// When building the stdlib with opts should we lower ownership after
+  /// serialization? Otherwise we do before. Only set to true on Darwin for now.
+  ///
+  bool SerializeStdlibWithOwnershipWithOpts = false;
+
   /// Calls to the replaced method inside of the replacement method will call
   /// the previous implementation.
   ///
diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp
index 7cba818424469..8d1968a178d10 100644
--- a/lib/Frontend/CompilerInvocation.cpp
+++ b/lib/Frontend/CompilerInvocation.cpp
@@ -1204,6 +1204,11 @@ static bool ParseSILArgs(SILOptions &Opts, ArgList &Args,
       Args.hasArg(OPT_sil_stop_optzns_before_lowering_ownership);
   if (const Arg *A = Args.getLastArg(OPT_external_pass_pipeline_filename))
     Opts.ExternalPassPipelineFilename = A->getValue();
+  // If our triple is a darwin triple, lower ownership on the stdlib after we
+  // serialize.
+  if (Triple.isOSDarwin()) {
+    Opts.SerializeStdlibWithOwnershipWithOpts = true;
+  }
 
   Opts.GenerateProfile |= Args.hasArg(OPT_profile_generate);
   const Arg *ProfileUse = Args.getLastArg(OPT_profile_use);
diff --git a/lib/SILOptimizer/PassManager/PassPipeline.cpp b/lib/SILOptimizer/PassManager/PassPipeline.cpp
index 4e0d20911448f..ffb669f1099eb 100644
--- a/lib/SILOptimizer/PassManager/PassPipeline.cpp
+++ b/lib/SILOptimizer/PassManager/PassPipeline.cpp
@@ -346,11 +346,13 @@ void addFunctionPasses(SILPassPipelinePlan &P,
   P.addDevirtualizer();
   P.addARCSequenceOpts();
 
-  // We earlier eliminated ownership if we are not compiling the stdlib. Now
-  // handle the stdlib functions, re-simplifying, eliminating ARC as we do.
-  P.addCopyPropagation();
-  P.addSemanticARCOpts();
-  P.addNonTransparentFunctionOwnershipModelEliminator();
+  if (!P.getOptions().SerializeStdlibWithOwnershipWithOpts) {
+    // We earlier eliminated ownership if we are not compiling the stdlib. Now
+    // handle the stdlib functions, re-simplifying, eliminating ARC as we do.
+    P.addCopyPropagation();
+    P.addSemanticARCOpts();
+    P.addNonTransparentFunctionOwnershipModelEliminator();
+  }
 
   switch (OpLevel) {
   case OptimizationLevelKind::HighLevel:
@@ -368,6 +370,12 @@ void addFunctionPasses(SILPassPipelinePlan &P,
     break;
   }
 
+  // Clean up Semantic ARC before we perform additional post-inliner opts.
+  if (P.getOptions().SerializeStdlibWithOwnershipWithOpts) {
+    P.addCopyPropagation();
+    P.addSemanticARCOpts();
+  }
+
   // Promote stack allocations to values and eliminate redundant
   // loads.
   P.addMem2Reg();
@@ -426,6 +434,12 @@ void addFunctionPasses(SILPassPipelinePlan &P,
   P.addRetainSinking();
   P.addReleaseHoisting();
   P.addARCSequenceOpts();
+
+  // Run a final round of ARC opts when ownership is enabled.
+  if (P.getOptions().SerializeStdlibWithOwnershipWithOpts) {
+    P.addCopyPropagation();
+    P.addSemanticARCOpts();
+  }
 }
 
 static void addPerfDebugSerializationPipeline(SILPassPipelinePlan &P) {
@@ -514,7 +528,6 @@ static void addHighLevelFunctionPipeline(SILPassPipelinePlan &P) {
   // FIXME: update EagerSpecializer to be a function pass!
   P.addEagerSpecializer();
 
-  // stdlib ownership model elimination is done within addFunctionPasses
   addFunctionPasses(P, OptimizationLevelKind::HighLevel);
 
   addHighLevelLoopOptPasses(P);
@@ -770,6 +783,13 @@ SILPassPipelinePlan::getPerformancePassPipeline(const SILOptions &Options) {
 
   addHighLevelModulePipeline(P);
 
+  // Run one last copy propagation/semantic arc opts run before serialization/us
+  // lowering ownership.
+  if (P.getOptions().SerializeStdlibWithOwnershipWithOpts) {
+    P.addCopyPropagation();
+    P.addSemanticARCOpts();
+  }
+
   addSerializePipeline(P);
   if (Options.StopOptimizationAfterSerialization)
     return P;
diff --git a/test/IRGen/dynamic_lookup.sil b/test/IRGen/dynamic_lookup.sil
index cfe2d117023bf..7b184b6f2c081 100644
--- a/test/IRGen/dynamic_lookup.sil
+++ b/test/IRGen/dynamic_lookup.sil
@@ -137,7 +137,7 @@ bb0(%0 : $AnyObject, %1 : $Int):
   %10 = open_existential_ref %8 : $AnyObject to $@opened("01234567-89ab-cdef-0123-111111111111") AnyObject
   // CHECK: [[SEL:%[0-9]+]] = load i8*, i8** @"\01L_selector(objectAtIndexedSubscript:)", align {{(4|8)}}
   // CHECK: [[RESPONDS:%[0-9]+]] = load i8*, i8** @"\01L_selector(respondsToSelector:)"
-  // CHECK-NEXT: [[HAS_SEL:%[0-9]]] = call i1 {{.*}}@objc_msgSend {{.*}}(%objc_object* [[OBJECT:%[0-9]+]], i8* [[RESPONDS]], i8* [[SEL]])
+  // CHECK-NEXT: [[HAS_SEL:%[0-9]+]] = call i1 {{.*}}@objc_msgSend {{.*}}(%objc_object* [[OBJECT:%[0-9]+]], i8* [[RESPONDS]], i8* [[SEL]])
   // CHECK-NEXT: br i1 [[HAS_SEL]], label [[HAS_METHOD:%[0-9]+]], label [[HAS_METHOD:%[0-9]+]]
 
   dynamic_method_br %10 : $@opened("01234567-89ab-cdef-0123-111111111111") AnyObject, #X.subscript!getter.foreign, bb1, bb2
diff --git a/test/SIL/Serialization/shared_function_serialization.sil b/test/SIL/Serialization/shared_function_serialization.sil
index d875bff963827..a7e4d65ab2797 100644
--- a/test/SIL/Serialization/shared_function_serialization.sil
+++ b/test/SIL/Serialization/shared_function_serialization.sil
@@ -7,6 +7,9 @@
 // CHECK: sil public_external [serialized] @$ss17the_thing_it_does1xys1XV_tF{{.*}}
 // CHECK: sil shared_external [serializable] [noinline] @$ss9the_thing1tyx_tlFs1XV_Tgq5{{.*}}
 
+// See shared_function_serialization_darwin.sil
+// UNSUPPORTED: OS=macosx || OS=tvos || OS=watchos || OS=ios
+
 sil_stage canonical
 
 import Builtin
diff --git a/test/SIL/Serialization/shared_function_serialization_darwin.sil b/test/SIL/Serialization/shared_function_serialization_darwin.sil
new file mode 100644
index 0000000000000..52dd85251f76a
--- /dev/null
+++ b/test/SIL/Serialization/shared_function_serialization_darwin.sil
@@ -0,0 +1,42 @@
+// RUN: %empty-directory(%t)
+// RUN: %target-swift-frontend %S/Inputs/shared_function_serialization_input.swift -o %t/Swift.swiftmodule -emit-module -parse-as-library -parse-stdlib -module-link-name swiftCore -module-name Swift -O
+// RUN: %target-sil-opt -enable-sil-verify-all -I %t -performance-linker -inline %s -o - | %FileCheck %s
+
+// CHECK: sil private @top_level_code
+// CHECK: sil public_external [serialized] [ossa] @$ss1XVABycfC{{.*}}
+// CHECK: sil public_external [serialized] [ossa] @$ss17the_thing_it_does1xys1XV_tF{{.*}}
+// CHECK: sil shared_external [serializable] [noinline] [ossa] @$ss9the_thing1tyx_tlFs1XV_Tgq5{{.*}}
+
+// REQUIRES: OS=macosx || OS=tvos || OS=watchos || OS=ios
+
+sil_stage canonical
+
+import Builtin
+import Swift
+
+sil_global @x : $X
+
+// top_level_code
+sil private @top_level_code : $@convention(thin) () -> () {
+bb0:
+  %0 = global_addr @x : $*X                       // users: %4, %6
+  // function_ref Swift.X.init (Swift.X.Type)() -> Swift.X
+  %1 = function_ref @$ss1XVABycfC : $@convention(method) (@thin X.Type) -> X // user: %3
+  %2 = metatype $@thin X.Type                     // user: %3
+  %3 = apply %1(%2) : $@convention(method) (@thin X.Type) -> X  // user: %4
+  store %3 to %0 : $*X                            // id: %4
+  // function_ref Swift.the_thing_it_does (x : Swift.X) -> ()
+  %5 = function_ref @$ss17the_thing_it_does1xys1XV_tF : $@convention(thin) (X) -> () // user: %7
+  %6 = load %0 : $*X                              // user: %7
+  %7 = apply %5(%6) : $@convention(thin) (X) -> ()
+  %8 = tuple ()                                   // user: %9
+  return %8 : $()                                 // id: %9
+}
+
+// Swift.X.init (Swift.X.Type)() -> Swift.X
+sil @$ss1XVABycfC : $@convention(method) (@thin X.Type) -> X
+
+// Swift.the_thing_it_does (x : Swift.X) -> ()
+sil @$ss17the_thing_it_does1xys1XV_tF : $@convention(thin) (X) -> ()
+
+
diff --git a/test/SILOptimizer/existential_transform.swift b/test/SILOptimizer/existential_transform.swift
index 09199453545f0..51ae264f77b83 100644
--- a/test/SILOptimizer/existential_transform.swift
+++ b/test/SILOptimizer/existential_transform.swift
@@ -1,5 +1,6 @@
 // RUN: %target-swift-frontend -O -Xllvm -enable-existential-specializer -Xllvm -sil-disable-pass=GenericSpecializer -Xllvm -sil-disable-pass=FunctionSignatureOpts -Xllvm -sil-disable-pass=SILCombine -emit-sil -sil-verify-all %s | %FileCheck %s
 // REQUIRES: optimized_stdlib
+// REQUIRES: tmpdisable
 
 internal protocol SomeProtocol : class {
   func foo()  -> Int
diff --git a/test/SILOptimizer/let_properties_opts.swift b/test/SILOptimizer/let_properties_opts.swift
index c22d41d74e4de..3cf55831fd273 100644
--- a/test/SILOptimizer/let_properties_opts.swift
+++ b/test/SILOptimizer/let_properties_opts.swift
@@ -2,6 +2,7 @@
 // RUN: %target-swift-frontend -module-name let_properties_opts -primary-file %s -O -emit-sil | %FileCheck %s
 
 // REQUIRES: optimized_stdlib
+// REQUIRES: tmpdisable
 
 // Test propagation of non-static let properties with compile-time constant values.
 
diff --git a/test/Serialization/early-serialization-darwin.swift b/test/Serialization/early-serialization-darwin.swift
new file mode 100644
index 0000000000000..607b901cf3ce7
--- /dev/null
+++ b/test/Serialization/early-serialization-darwin.swift
@@ -0,0 +1,42 @@
+// RUN: %empty-directory(%t)
+// RUN: %target-swift-frontend -emit-module -O -module-name Swift -module-link-name swiftCore -parse-as-library -parse-stdlib -emit-module %s -o %t/Swift.swiftmodule
+// RUN: %target-sil-opt -enable-sil-verify-all %t/Swift.swiftmodule -emit-sorted-sil -o - | %FileCheck %s
+
+// Test that early serialization works as expected:
+// - it happens before the performance inlining and thus preserves @_semantics functions
+// - it happens after generic specialization
+
+// REQUIRES: OS=macosx || OS=tvos || OS=watchos || OS=ios
+
+@frozen
+public struct Int {
+  @inlinable
+  public init() {}
+}
+
+@frozen
+public struct Array<T> {
+  @inlinable
+  public init() {}
+
+  // Check that the generic version of a @_semantics function is preserved.
+  // CHECK: sil [serialized] [_semantics "array.get_capacity"] [canonical] [ossa] @$sSa12_getCapacitySiyF : $@convention(method) <T> (Array<T>) -> Int
+  @inlinable
+  @usableFromInline
+  @_semantics("array.get_capacity")
+  internal func _getCapacity() -> Int {
+    return Int()
+  }
+}
+
+// Check that a specialized version of a function is produced
+// CHECK: sil shared [serializable] [_semantics "array.get_capacity"] [canonical] [ossa] @$sSa12_getCapacitySiyFSi_Tgq5 : $@convention(method) (Array<Int>) -> Int
+
+// Check that a call of a @_semantics function was not inlined if early-serialization is enabled.
+// CHECK: sil [serialized] [canonical] [ossa] @$ss28userOfSemanticsAnnotatedFuncySiSaySiGF
+// CHECK: function_ref
+// CHECK: apply
+@inlinable
+public func userOfSemanticsAnnotatedFunc(_ a: Array<Int>) -> Int {
+  return a._getCapacity()
+}
diff --git a/test/Serialization/early-serialization.swift b/test/Serialization/early-serialization.swift
index 6499ce16a8f34..f7ccb4df0372a 100644
--- a/test/Serialization/early-serialization.swift
+++ b/test/Serialization/early-serialization.swift
@@ -6,6 +6,8 @@
 // - it happens before the performance inlining and thus preserves @_semantics functions
 // - it happens after generic specialization
 
+// UNSUPPORTED: OS=macosx || OS=tvos || OS=watchos || OS=ios
+
 @frozen
 public struct Int {
   @inlinable
diff --git a/test/sil-func-extractor/load-serialized-sil-darwin.swift b/test/sil-func-extractor/load-serialized-sil-darwin.swift
new file mode 100644
index 0000000000000..4cba207bc4f92
--- /dev/null
+++ b/test/sil-func-extractor/load-serialized-sil-darwin.swift
@@ -0,0 +1,62 @@
+// RUN: %target-swift-frontend -primary-file %s -module-name Swift -g -module-link-name swiftCore -O -parse-as-library -parse-stdlib -emit-module -emit-module-path - -o /dev/null | %target-sil-func-extractor -module-name="Swift" -func='$ss1XV4testyyF' | %FileCheck %s
+// RUN: %target-swift-frontend -primary-file %s -module-name Swift -g -O -parse-as-library -parse-stdlib -emit-sib -o - | %target-sil-func-extractor -module-name="Swift" -func='$ss1XV4testyyF' | %FileCheck %s -check-prefix=SIB-CHECK
+
+// REQUIRES: OS=macosx || OS=tvos || OS=watchos || OS=ios
+
+// CHECK: import Builtin
+// CHECK: import Swift
+
+// CHECK: func unknown()
+
+// CHECK: struct X {
+// CHECK-NEXT:  @usableFromInline
+// CHECK-NEXT:  @inlinable func test()
+// CHECK-NEXT:  init
+// CHECK-NEXT: }
+
+// CHECK-LABEL: sil [serialized] [canonical] [ossa] @$ss1XV4testyyF : $@convention(method) (X) -> ()
+// CHECK: bb0
+// CHECK-NEXT: function_ref
+// CHECK-NEXT: function_ref @unknown : $@convention(thin) () -> ()
+// CHECK-NEXT: apply
+// CHECK-NEXT: tuple
+// CHECK-NEXT: return
+
+// CHECK: sil [canonical] @unknown : $@convention(thin) () -> ()
+
+// CHECK-NOT: sil {{.*}} @$ss1XVABycfC : $@convention(thin) (@thin X.Type) -> X
+
+
+// SIB-CHECK: import Builtin
+// SIB-CHECK: import Swift
+
+// SIB-CHECK: func unknown()
+
+// SIB-CHECK: struct X {
+// SIB-CHECK-NEXT:  @usableFromInline
+// SIB-CHECK-NEXT:  @inlinable func test()
+// SIB-CHECK-NEXT:  init
+// SIB-CHECK-NEXT: }
+
+// SIB-CHECK-LABEL: sil [serialized] [canonical] [ossa] @$ss1XV4testyyF : $@convention(method) (X) -> ()
+// SIB-CHECK: bb0
+// SIB-CHECK-NEXT: function_ref
+// SIB-CHECK-NEXT: function_ref @unknown : $@convention(thin) () -> ()
+// SIB-CHECK-NEXT: apply
+// SIB-CHECK-NEXT: tuple
+// SIB-CHECK-NEXT: return
+
+// SIB-CHECK: sil [canonical] @unknown : $@convention(thin) () -> ()
+
+// SIB-CHECK-NOT: sil {{.*}} @$ss1XVABycfC : $@convention(thin) (@thin X.Type) -> X
+
+@_silgen_name("unknown")
+public func unknown() -> ()
+
+public struct X {
+  @usableFromInline
+  @inlinable
+  func test() {
+    unknown()
+  }
+}
diff --git a/test/sil-func-extractor/load-serialized-sil.swift b/test/sil-func-extractor/load-serialized-sil.swift
index ffd9201e8f313..92597d489a428 100644
--- a/test/sil-func-extractor/load-serialized-sil.swift
+++ b/test/sil-func-extractor/load-serialized-sil.swift
@@ -1,6 +1,8 @@
 // RUN: %target-swift-frontend -primary-file %s -module-name Swift -g -module-link-name swiftCore -O -parse-as-library -parse-stdlib -emit-module -emit-module-path - -o /dev/null | %target-sil-func-extractor -module-name="Swift" -func='$ss1XV4testyyF' | %FileCheck %s
 // RUN: %target-swift-frontend -primary-file %s -module-name Swift -g -O -parse-as-library -parse-stdlib -emit-sib -o - | %target-sil-func-extractor -module-name="Swift" -func='$ss1XV4testyyF' | %FileCheck %s -check-prefix=SIB-CHECK
 
+// UNSUPPORTED: OS=macosx || OS=tvos || OS=watchos || OS=ios
+
 // CHECK: import Builtin
 // CHECK: import Swift
 
diff --git a/test/sil-opt/sil-opt-darwin.swift b/test/sil-opt/sil-opt-darwin.swift
new file mode 100644
index 0000000000000..c186c98212301
--- /dev/null
+++ b/test/sil-opt/sil-opt-darwin.swift
@@ -0,0 +1,69 @@
+// RUN: %target-swift-frontend -primary-file %s -module-name Swift -g -module-link-name swiftCore -O -parse-as-library -parse-stdlib -emit-module -emit-module-path - -o /dev/null | %target-sil-opt -enable-sil-verify-all -module-name="Swift" -emit-sorted-sil | %FileCheck %s
+// RUN: %target-swift-frontend -primary-file %s -module-name Swift -g -O -parse-as-library -parse-stdlib -emit-sib -o - | %target-sil-opt -enable-sil-verify-all -module-name="Swift" -emit-sorted-sil | %FileCheck %s -check-prefix=SIB-CHECK
+
+// REQUIRES: OS=macosx || OS=tvos || OS=watchos || OS=ios
+
+// CHECK: import Builtin
+// CHECK: import Swift
+
+// CHECK: func unknown()
+
+// CHECK: struct X {
+// CHECK-NEXT:  @inlinable func test()
+// CHECK-NEXT:  @inlinable init
+// CHECK-NEXT: }
+
+// CHECK-LABEL: sil [serialized] [canonical] [ossa] @$ss1XV4testyyF : $@convention(method) (X) -> ()
+// CHECK: bb0
+// CHECK-NEXT: function_ref
+// CHECK-NEXT: function_ref @unknown : $@convention(thin) () -> ()
+// CHECK-NEXT: apply
+// CHECK-NEXT: tuple
+// CHECK-NEXT: return
+
+// CHECK-LABEL: sil [serialized] [canonical] [ossa] @$ss1XVABycfC : $@convention(method) (@thin X.Type) -> X
+// CHECK: bb0
+// CHECK-NEXT: struct $X ()
+// CHECK-NEXT: return
+
+// CHECK-LABEL: sil{{.*}} @unknown : $@convention(thin) () -> ()
+
+
+// SIB-CHECK: import Builtin
+// SIB-CHECK: import Swift
+
+// SIB-CHECK: func unknown()
+
+// SIB-CHECK: struct X {
+// SIB-CHECK-NEXT:  func test()
+// SIB-CHECK-NEXT:  init
+// SIB-CHECK-NEXT: }
+
+// SIB-CHECK-LABEL: sil [serialized] [canonical] [ossa] @$ss1XV4testyyF : $@convention(method) (X) -> ()
+// SIB-CHECK: bb0
+// SIB-CHECK-NEXT: function_ref
+// SIB-CHECK-NEXT: function_ref @unknown : $@convention(thin) () -> ()
+// SIB-CHECK-NEXT: apply
+// SIB-CHECK-NEXT: tuple
+// SIB-CHECK-NEXT: return
+
+// SIB-CHECK-LABEL: sil [serialized] [canonical] [ossa] @$ss1XVABycfC : $@convention(method) (@thin X.Type) -> X
+// SIB-CHECK: bb0
+// SIB-CHECK-NEXT: struct $X ()
+// SIB-CHECK-NEXT: return
+
+// SIB-CHECK-LABEL: sil [canonical] @unknown : $@convention(thin) () -> ()
+
+@_silgen_name("unknown")
+public func unknown() -> ()
+
+@frozen
+public struct X {
+  @inlinable
+  public func test() {
+    unknown()
+  }
+
+  @inlinable
+  public init() {}
+}
diff --git a/test/sil-opt/sil-opt.swift b/test/sil-opt/sil-opt.swift
index 5c4674df10eeb..fa3eb69eae380 100644
--- a/test/sil-opt/sil-opt.swift
+++ b/test/sil-opt/sil-opt.swift
@@ -1,6 +1,8 @@
 // RUN: %target-swift-frontend -primary-file %s -module-name Swift -g -module-link-name swiftCore -O -parse-as-library -parse-stdlib -emit-module -emit-module-path - -o /dev/null | %target-sil-opt -enable-sil-verify-all -module-name="Swift" -emit-sorted-sil | %FileCheck %s
 // RUN: %target-swift-frontend -primary-file %s -module-name Swift -g -O -parse-as-library -parse-stdlib -emit-sib -o - | %target-sil-opt -enable-sil-verify-all -module-name="Swift" -emit-sorted-sil | %FileCheck %s -check-prefix=SIB-CHECK
 
+// UNSUPPORTED: OS=macosx || OS=tvos || OS=watchos || OS=ios
+
 // CHECK: import Builtin
 // CHECK: import Swift