-
Notifications
You must be signed in to change notification settings - Fork 10.5k
[Autodiff] Memory leaks found under certain conditions. #67323
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Comments
I have another version of this reproducer which highlights four memory leaks instead of two: import _Differentiation; import Foundation
public struct B: Differentiable {}
extension B {@differentiable(reverse) func uu(nt: A<SIMD8<Float>>, f: A<Float>, zs: [SZ]) -> N {let nt = nt; let f = f; return N(nt: nt, f: f)}}
extension B {@differentiable(reverse) mutating func a(_ r: C) {}}
struct S: Differentiable{}
struct C: Differentiable {var f: Array<Float>; var nt: Array<SIMD8<Float>>}
extension C {init() {self.f = []; self.nt = []}}
struct W: Differentiable {
@noDerivative var i: [Double]
var h: B
@differentiable(reverse) init(i: [Double] = [], h: B) {self.i = i; self.h = h}
}
struct N: Differentiable {var nt: A<SIMD8<Float>>; var f: A<Float>}
struct O: Differentiable {
@noDerivative public var i: [Double]
@noDerivative public var h: B
@differentiable(reverse) public init(i: [Double], h: B) {self.i = i; self.h = h}
}
public struct A<T>: Differentiable where T: Differentiable, T: AdditiveArithmetic {
public struct TangentVector: Differentiable, AdditiveArithmetic {
public typealias TangentVector = A.TangentVector
public var _b: [T.TangentVector]
public var _a: T.TangentVector
public init(_b: [T.TangentVector], _a: T.TangentVector) {self._b = _b; self._a = _a}
}
@usableFromInline var _v: [T]
@inlinable @differentiable(reverse) public init(_ values: [T], _a: T = .zero) {self._v = values}
@inlinable @differentiable(reverse) public var _r: [T] { return _v }
@inlinable @derivative(of: init(_:_a:)) static func _vjpInit(_ values: [T], _a: T = .zero) -> (value: A, pullback: (TangentVector) -> (Array<T>.TangentVector, T.TangentVector)) {return (A(values, _a: _a), { v in return (Array<T>.TangentVector(v._b), v._a)})}
@inlinable @derivative(of: _r) func vjpArray() -> (value: [T], pullback: (Array<T>.TangentVector) -> TangentVector) {func pullback(v: Array<T>.TangentVector) -> TangentVector {return TangentVector(_b: v.base, _a: T.TangentVector.zero)}; return (_v, pullback)}
public mutating func move(by offset: TangentVector) {}
}
public extension A.TangentVector { // Not mathematically correct, of course, but simplified to this to demonstrate the memory leak(s).
@inlinable static func + (lhs: Self, rhs: Self) -> Self {return lhs}
@inlinable static func - (lhs: Self, rhs: Self) -> Self {return lhs}
@inlinable static var zero: Self { Self(_b: [], _a: .zero) }
}
public struct SZ: Differentiable {}
func g(h: B) -> C {
var nt = A([SIMD8<Float>(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)])
var f = A([Float(0.0)])
let x = h.uu(nt: nt, f: f,zs: [SZ]())
nt = x.nt
f = x.f
return C(f: f._r,nt: nt._r)
}
func o<T, R>(_ x: T, _ f: @differentiable(reverse) (T) -> R) -> R {f(x)}
func p<T, R>(_ f: @escaping @differentiable(reverse) (T) -> R) -> @differentiable(reverse) (T) -> R {{ x in o(x, f) }}
@differentiable(reverse) func j(h: B, r: S) -> O {
@differentiable(reverse) func q(_ l: W) -> W {var h = l.h; d(h: &h, r: r); var n = l; n.h = h; return n}
let rg = p(q)
for _ in 0 ..< 1 {} // Removing this empty for-loop causes all the memory leaks to vanish.
let W = rg(W(h: h))
return O(i: W.i, h: W.h)
}
func b(h: B, r: S) -> (value: O, pullback: (O.TangentVector) -> (B.TangentVector, S.TangentVector))
{
let s = valueWithPullback(at: h, r, of: j)
return (value: s.value, pullback: s.pullback)
}
@differentiable(reverse) func d(h: inout B, r: S) {h.a(g(h: h))}
_ = b(h: B(), r: S()) The output from
One of the leaks will vanish if the conformance to I've made an attempt to make a 1:1 mapping with respect to the variable names in both reproducers, however, there may be variables present in one reproducer and not the other. For example, The 2023-07-10a toolchain was used for this. |
Specifying a concrete type for |
Posting my findings, so far, on this issue. I have been using the following, further reduced, reproducer which I believe captures the kind of memory leaks that we are seeing in the original reproducer.
The
Findings/Observations/Story so far...
|
Thanks for reduction, this is very helpful! Where we're releasing the pullback that is generated in |
Continuing from last comment.
|
Here's the SIL for // m()
sil hidden @$s4main1myyF : $@convention(thin) () -> () {
bb0:
%0 = alloc_stack $Float // users: %35, %31
%1 = float_literal $Builtin.FPIEEE32, 0x3F800000 // 1 // user: %2
%2 = struct $Float (%1 : $Builtin.FPIEEE32) // user: %4
%3 = alloc_stack $Float // users: %4, %32, %31
store %2 to %3 : $*Float // id: %4
// function_ref f(x:)
%5 = function_ref @$s4main1f1xS2f_tF : $@convention(thin) (Float) -> Float // user: %6
%6 = thin_to_thick_function %5 : $@convention(thin) (Float) -> Float to $@callee_guaranteed (Float) -> Float // user: %11
%7 = differentiability_witness_function [jvp] [reverse] [parameters 0] [results 0] @$s4main1f1xS2f_tF : $@convention(thin) (Float) -> Float // user: %8
%8 = thin_to_thick_function %7 : $@convention(thin) (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) to $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // user: %11
%9 = differentiability_witness_function [vjp] [reverse] [parameters 0] [results 0] @$s4main1f1xS2f_tF : $@convention(thin) (Float) -> Float // user: %10
%10 = thin_to_thick_function %9 : $@convention(thin) (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) to $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // user: %11
%11 = differentiable_function [parameters 0] [results 0] %6 : $@callee_guaranteed (Float) -> Float with_derivative {%8 : $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float), %10 : $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float)} // users: %37, %13, %12
retain_value %11 : $@differentiable(reverse) @callee_guaranteed (Float) -> Float // id: %12
%13 = convert_escape_to_noescape %11 : $@differentiable(reverse) @callee_guaranteed (Float) -> Float to $@differentiable(reverse) @noescape @callee_guaranteed (Float) -> Float // users: %14, %19, %24
%14 = differentiable_function_extract [original] %13 : $@differentiable(reverse) @noescape @callee_guaranteed (Float) -> Float // user: %16
// function_ref thunk for @callee_guaranteed (@unowned Float) -> (@unowned Float)
%15 = function_ref @$sS2fIgyd_S2fIegnr_TR : $@convention(thin) (@in_guaranteed Float, @guaranteed @noescape @callee_guaranteed (Float) -> Float) -> @out Float // user: %16
%16 = partial_apply [callee_guaranteed] %15(%14) : $@convention(thin) (@in_guaranteed Float, @guaranteed @noescape @callee_guaranteed (Float) -> Float) -> @out Float // user: %17
%17 = convert_function %16 : $@callee_guaranteed (@in_guaranteed Float) -> @out Float to $@callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float> // users: %38, %18
%18 = convert_escape_to_noescape %17 : $@callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float> to $@noescape @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float> // user: %29
%19 = differentiable_function_extract [jvp] %13 : $@differentiable(reverse) @noescape @callee_guaranteed (Float) -> Float // user: %21
// function_ref thunk for @callee_guaranteed (@unowned Float) -> (@unowned Float, @owned @escaping @callee_guaranteed (@unowned Float) -> (@unowned Float))
%20 = function_ref @$sS4fIegyd_Igydo_S2fxq_r0_lyS2fIsegnr_Iegnro_TR : $@convention(thin) (@in_guaranteed Float, @guaranteed @noescape @callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float)) -> (@out Float, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float>) // user: %21
%21 = partial_apply [callee_guaranteed] %20(%19) : $@convention(thin) (@in_guaranteed Float, @guaranteed @noescape @callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float)) -> (@out Float, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float>) // user: %22
%22 = convert_function %21 : $@callee_guaranteed (@in_guaranteed Float) -> (@out Float, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float>) to $@callee_guaranteed @substituted <τ_0_0, τ_0_1, τ_0_2, τ_0_3> (@in_guaranteed τ_0_0) -> (@out τ_0_1, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <τ_0_2, τ_0_3>) for <Float, Float, Float, Float> // users: %39, %23
%23 = convert_escape_to_noescape %22 : $@callee_guaranteed @substituted <τ_0_0, τ_0_1, τ_0_2, τ_0_3> (@in_guaranteed τ_0_0) -> (@out τ_0_1, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <τ_0_2, τ_0_3>) for <Float, Float, Float, Float> to $@noescape @callee_guaranteed @substituted <τ_0_0, τ_0_1, τ_0_2, τ_0_3> (@in_guaranteed τ_0_0) -> (@out τ_0_1, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <τ_0_2, τ_0_3>) for <Float, Float, Float, Float> // user: %29
%24 = differentiable_function_extract [vjp] %13 : $@differentiable(reverse) @noescape @callee_guaranteed (Float) -> Float // user: %26
// function_ref thunk for @callee_guaranteed (@unowned Float) -> (@unowned Float, @owned @escaping @callee_guaranteed (@unowned Float) -> (@unowned Float))
%25 = function_ref @$sS4fIegyd_Igydo_S2fxq_r0_lyS2fIsegnr_Iegnro_TR : $@convention(thin) (@in_guaranteed Float, @guaranteed @noescape @callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float)) -> (@out Float, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float>) // user: %26
%26 = partial_apply [callee_guaranteed] %25(%24) : $@convention(thin) (@in_guaranteed Float, @guaranteed @noescape @callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float)) -> (@out Float, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float>) // user: %27
%27 = convert_function %26 : $@callee_guaranteed (@in_guaranteed Float) -> (@out Float, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float>) to $@callee_guaranteed @substituted <τ_0_0, τ_0_1, τ_0_2, τ_0_3> (@in_guaranteed τ_0_0) -> (@out τ_0_1, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <τ_0_2, τ_0_3>) for <Float, Float, Float, Float> // users: %40, %28
%28 = convert_escape_to_noescape %27 : $@callee_guaranteed @substituted <τ_0_0, τ_0_1, τ_0_2, τ_0_3> (@in_guaranteed τ_0_0) -> (@out τ_0_1, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <τ_0_2, τ_0_3>) for <Float, Float, Float, Float> to $@noescape @callee_guaranteed @substituted <τ_0_0, τ_0_1, τ_0_2, τ_0_3> (@in_guaranteed τ_0_0) -> (@out τ_0_1, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <τ_0_2, τ_0_3>) for <Float, Float, Float, Float> // user: %29
%29 = differentiable_function [parameters 0] [results 0] %18 : $@noescape @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float> with_derivative {%23 : $@noescape @callee_guaranteed @substituted <τ_0_0, τ_0_1, τ_0_2, τ_0_3> (@in_guaranteed τ_0_0) -> (@out τ_0_1, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <τ_0_2, τ_0_3>) for <Float, Float, Float, Float>, %28 : $@noescape @callee_guaranteed @substituted <τ_0_0, τ_0_1, τ_0_2, τ_0_3> (@in_guaranteed τ_0_0) -> (@out τ_0_1, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <τ_0_2, τ_0_3>) for <Float, Float, Float, Float>} // user: %31
// function_ref valueWithPullback<A, B>(at:of:)
%30 = function_ref @$s16_Differentiation17valueWithPullback2at2ofq_0B0_13TangentVectorQzAFQy_c8pullbacktx_q_xYjrXEtAA14DifferentiableRzAaJR_r0_lF : $@convention(thin) <τ_0_0, τ_0_1 where τ_0_0 : Differentiable, τ_0_1 : Differentiable> (@in_guaranteed τ_0_0, @guaranteed @differentiable(reverse) @noescape @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <τ_0_0, τ_0_1>) -> (@out τ_0_1, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <τ_0_1.TangentVector, τ_0_0.TangentVector>) // user: %31
%31 = apply %30<Float, Float>(%0, %3, %29) : $@convention(thin) <τ_0_0, τ_0_1 where τ_0_0 : Differentiable, τ_0_1 : Differentiable> (@in_guaranteed τ_0_0, @guaranteed @differentiable(reverse) @noescape @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <τ_0_0, τ_0_1>) -> (@out τ_0_1, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <τ_0_1.TangentVector, τ_0_0.TangentVector>) // user: %33
dealloc_stack %3 : $*Float // id: %32
%33 = convert_function %31 : $@callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float> to $@callee_guaranteed (@in_guaranteed Float) -> @out Float // user: %34
strong_release %33 : $@callee_guaranteed (@in_guaranteed Float) -> @out Float // id: %34
dealloc_stack %0 : $*Float // id: %35
%36 = tuple () // user: %41
release_value %11 : $@differentiable(reverse) @callee_guaranteed (Float) -> Float // id: %37
strong_release %17 : $@callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float> // id: %38
strong_release %22 : $@callee_guaranteed @substituted <τ_0_0, τ_0_1, τ_0_2, τ_0_3> (@in_guaranteed τ_0_0) -> (@out τ_0_1, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <τ_0_2, τ_0_3>) for <Float, Float, Float, Float> // id: %39
strong_release %27 : $@callee_guaranteed @substituted <τ_0_0, τ_0_1, τ_0_2, τ_0_3> (@in_guaranteed τ_0_0) -> (@out τ_0_1, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <τ_0_2, τ_0_3>) for <Float, Float, Float, Float> // id: %40
return %36 : $() // id: %41
} // end sil function '$s4main1myyF' |
This looks probable. Let me check what is going on |
Quick update! In order to validate my theory I modified the compiler to simply not store callee differentials in the loop context objects allocated on the heap and the memory leaks seem to go away. Obviously this isn't a fix, but I think it's a strong enough indicator of our theory. |
Thanks @jkshtj for nailing this down. This is optimized code for // m()
sil hidden [noinline] @$s4leak1myyF : $@convention(thin) () -> () {
bb0:
// function_ref f(x:)
%0 = function_ref @$s4leak1f1xS2f_tF : $@convention(thin) (Float) -> Float // users: %6, %1
%1 = thin_to_thick_function %0 : $@convention(thin) (Float) -> Float to $@callee_guaranteed (Float) -> Float // users: %39, %10
// function_ref forward-mode derivative of f(x:)
%2 = function_ref @$s4leak1f1xS2f_tFTJfSpSUr : $@convention(thin) (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // users: %7, %3
%3 = thin_to_thick_function %2 : $@convention(thin) (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) to $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // users: %40, %11
// function_ref reverse-mode derivative of f(x:)
%4 = function_ref @$s4leak1f1xS2f_tFTJrSpSUr : $@convention(thin) (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // users: %8, %5
%5 = thin_to_thick_function %4 : $@convention(thin) (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) to $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // users: %41, %12
%6 = thin_to_thick_function %0 : $@convention(thin) (Float) -> Float to $@noescape @callee_guaranteed (Float) -> Float // user: %42
%7 = thin_to_thick_function %2 : $@convention(thin) (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) to $@noescape @callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // user: %43
%8 = thin_to_thick_function %4 : $@convention(thin) (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) to $@noescape @callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // user: %44
%9 = integer_literal $Builtin.Word, 24 // user: %13
strong_retain %1 : $@callee_guaranteed (Float) -> Float // id: %10
strong_retain %3 : $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // id: %11
strong_retain %5 : $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // id: %12
%13 = builtin "autoDiffCreateLinearMapContext"(%9 : $Builtin.Word) : $Builtin.NativeObject // users: %38, %35, %28, %23, %18
%14 = tuple () // users: %45, %15
%15 = enum $_AD__$s4leak1f1xS2f_tF_bb1__Pred__src_0_wrt_0, #_AD__$s4leak1f1xS2f_tF_bb1__Pred__src_0_wrt_0.bb0!enumelt, %14 : $() // user: %17
%16 = integer_literal $Builtin.Word, 8 // users: %28, %23, %18
%17 = tuple $(predecessor: _AD__$s4leak1f1xS2f_tF_bb1__Pred__src_0_wrt_0) (%15) // user: %20
%18 = builtin "autoDiffAllocateSubcontext"(%13 : $Builtin.NativeObject, %16 : $Builtin.Word) : $Builtin.RawPointer // users: %21, %19
%19 = pointer_to_address %18 : $Builtin.RawPointer to [strict] $*(predecessor: _AD__$s4leak1f1xS2f_tF_bb1__Pred__src_0_wrt_0) // user: %20
store %17 to %19 : $*(predecessor: _AD__$s4leak1f1xS2f_tF_bb1__Pred__src_0_wrt_0) // id: %20
%21 = enum $_AD__$s4leak1f1xS2f_tF_bb2__Pred__src_0_wrt_0, #_AD__$s4leak1f1xS2f_tF_bb2__Pred__src_0_wrt_0.bb1!enumelt, %18 : $Builtin.RawPointer // user: %22
%22 = tuple $(predecessor: _AD__$s4leak1f1xS2f_tF_bb2__Pred__src_0_wrt_0) (%21) // user: %25
%23 = builtin "autoDiffAllocateSubcontext"(%13 : $Builtin.NativeObject, %16 : $Builtin.Word) : $Builtin.RawPointer // users: %26, %24
%24 = pointer_to_address %23 : $Builtin.RawPointer to [strict] $*(predecessor: _AD__$s4leak1f1xS2f_tF_bb2__Pred__src_0_wrt_0) // user: %25
store %22 to %24 : $*(predecessor: _AD__$s4leak1f1xS2f_tF_bb2__Pred__src_0_wrt_0) // id: %25
%26 = enum $_AD__$s4leak1f1xS2f_tF_bb1__Pred__src_0_wrt_0, #_AD__$s4leak1f1xS2f_tF_bb1__Pred__src_0_wrt_0.bb2!enumelt, %23 : $Builtin.RawPointer // user: %27
%27 = tuple $(predecessor: _AD__$s4leak1f1xS2f_tF_bb1__Pred__src_0_wrt_0) (%26) // user: %30
%28 = builtin "autoDiffAllocateSubcontext"(%13 : $Builtin.NativeObject, %16 : $Builtin.Word) : $Builtin.RawPointer // users: %31, %29
%29 = pointer_to_address %28 : $Builtin.RawPointer to [strict] $*(predecessor: _AD__$s4leak1f1xS2f_tF_bb1__Pred__src_0_wrt_0) // user: %30
store %27 to %29 : $*(predecessor: _AD__$s4leak1f1xS2f_tF_bb1__Pred__src_0_wrt_0) // id: %30
%31 = enum $_AD__$s4leak1f1xS2f_tF_bb3__Pred__src_0_wrt_0, #_AD__$s4leak1f1xS2f_tF_bb3__Pred__src_0_wrt_0.bb1!enumelt, %28 : $Builtin.RawPointer // user: %34
// function_ref specialized autodiff subset parameters thunk for pullback from @escaping @callee_guaranteed (@unowned Float) -> (@unowned Float, @unowned Float)
%32 = function_ref @$sS3fIegydd_TJSpSSUpSUUUrSUUP067$sSf16_DifferentiationE7_vjpAdd3lhs3rhsSf5value_Sf_SftSfc8pullbackth1_i5FZSf_I6SfcfU_Tf3npf_n : $@convention(thin) (Float) -> Float // user: %33
%33 = thin_to_thick_function %32 : $@convention(thin) (Float) -> Float to $@callee_guaranteed (Float) -> Float // user: %34
%34 = tuple $(predecessor: _AD__$s4leak1f1xS2f_tF_bb3__Pred__src_0_wrt_0, @callee_guaranteed (Float) -> Float) (%31, %33) // user: %37
%35 = builtin "autoDiffProjectTopLevelSubcontext"(%13 : $Builtin.NativeObject) : $Builtin.RawPointer // user: %36
%36 = pointer_to_address %35 : $Builtin.RawPointer to [strict] $*(predecessor: _AD__$s4leak1f1xS2f_tF_bb3__Pred__src_0_wrt_0, @callee_guaranteed (Float) -> Float) // user: %37
store %34 to %36 : $*(predecessor: _AD__$s4leak1f1xS2f_tF_bb3__Pred__src_0_wrt_0, @callee_guaranteed (Float) -> Float) // id: %37
strong_release %13 : $Builtin.NativeObject // id: %38
strong_release %1 : $@callee_guaranteed (Float) -> Float // id: %39
strong_release %3 : $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // id: %40
strong_release %5 : $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // id: %41
strong_release %6 : $@noescape @callee_guaranteed (Float) -> Float // id: %42
strong_release %7 : $@noescape @callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // id: %43
strong_release %8 : $@noescape @callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // id: %44
return %14 : $() // id: %45
} // end sil function '$s4leak1myyF' Note the following: %33 = thin_to_thick_function %32 : $@convention(thin) (Float) -> Float to $@callee_guaranteed (Float) -> Float // user: %34
%34 = tuple $(predecessor: _AD__$s4leak1f1xS2f_tF_bb3__Pred__src_0_wrt_0, @callee_guaranteed (Float) -> Float) (%31, %33) // user: %37
%35 = builtin "autoDiffProjectTopLevelSubcontext"(%13 : $Builtin.NativeObject) : $Builtin.RawPointer // user: %36
%36 = pointer_to_address %35 : $Builtin.RawPointer to [strict] $*(predecessor: _AD__$s4leak1f1xS2f_tF_bb3__Pred__src_0_wrt_0, @callee_guaranteed (Float) -> Float) // user: %37
store %34 to %36 : $*(predecessor: _AD__$s4leak1f1xS2f_tF_bb3__Pred__src_0_wrt_0, @callee_guaranteed (Float) -> Float) // id: %37 Here we're essentially capturing |
Yup, that's pretty much the code I removed in my testing, which made the leaks go away. |
Note that in optimized code due to inlining, etc. the context finally got optimized out by LLVM passes. |
Here is unoptimized code: // function_ref autodiff subset parameters thunk for reverse-mode derivative from static Float.+ infix(_:_:)
%63 = function_ref @$sSf1poiyS2f_SftFZS3fXMtS3fIegyd_IetMyyydo_TJSrSSUpSrSUUP : $@convention(method) (Float, Float, @thin Float.Type) -> (Float, @owned @callee_guaranteed (Float) -> Float) // user: %64
%64 = differentiable_function [parameters 0] [results 0] %59 : $@convention(method) (Float, Float, @thin Float.Type) -> Float with_derivative {%61 : $@convention(method) (Float, Float, @thin Float.Type) -> (Float
%65 = differentiable_function_extract [vjp] %64 : $@differentiable(reverse) @convention(method) (Float, @noDerivative Float, @noDerivative @thin Float.Type) -> Float // user: %66
%66 = apply %65(%0, %5, %58) : $@convention(method) (Float, Float, @thin Float.Type) -> (Float, @owned @callee_guaranteed (Float) -> Float) // users: %68, %67
%67 = tuple_extract %66 : $(Float, @callee_guaranteed (Float) -> Float), 0 // user: %75
%68 = tuple_extract %66 : $(Float, @callee_guaranteed (Float) -> Float), 1 // user: %70
// function_ref pullback of f(x:)
%69 = function_ref @$s4leak1f1xS2f_tFTJpSpSr : $@convention(thin) (Float, @guaranteed Builtin.NativeObject) -> Float // user: %74
%70 = tuple $(predecessor: _AD__$s4leak1f1xS2f_tF_bb3__Pred__src_0_wrt_0, @callee_guaranteed (Float) -> Float) (%56, %68) // user: %73
%71 = builtin "autoDiffProjectTopLevelSubcontext"(%2 : $Builtin.NativeObject) : $Builtin.RawPointer // user: %72
%72 = pointer_to_address %71 : $Builtin.RawPointer to [strict] $*(predecessor: _AD__$s4leak1f1xS2f_tF_bb3__Pred__src_0_wrt_0, @callee_guaranteed (Float) -> Float) // user: %73
store %70 to %72 : $*(predecessor: _AD__$s4leak1f1xS2f_tF_bb3__Pred__src_0_wrt_0, @callee_guaranteed (Float) -> Float) // id: %73
%74 = partial_apply [callee_guaranteed] %69(%2) : $@convention(thin) (Float, @guaranteed Builtin.NativeObject) -> Float // user: %75
%75 = tuple (%67 : $Float, %74 : $@callee_guaranteed (Float) -> Float) // user: %76
return %75 : $(Float, @callee_guaranteed (Float) -> Float) // id: %76
} // end sil function '$s4leak1f1xS2f_tFTJrSpSr' We're capturing |
Tagging @rxwei Ok, so here are some loud thinkings. We are using heap-allocated context as a tape (in classical AD sense). In loop-less code this tape is implicit via the chain of closures. The main problem is that loop context is type-less: we are only using it as a memory allocator, and while it frees that allocated memory it does not release the heap objects stored there and it has no way of doing this: there is no type information. On the context we're storing pullback tuples. Each tuple encodes the control flow (predecessor BB), if available, as well as pullbacks of nested calls inside this BB. Note that both components of tuple are optional: there might be no predecessors for the entry BB and there might be no pullback closures is there are no calls in the corresponding BBs. And it's the captured pullbacks that we need to release. One of possible quick band-aid solution is to box captured tuples. This way loop context will store boxes, so the drawback is that additional indirection would be necessary. However, boxes always use native reference counting, so we can simply iterate over heap objects in destructor releasing them. |
Thanks for capturing the discussion so far Anton! The box solution makes sense to me. Another possible solution might be to do a little book-keeping in the linear map context object and store a map, let's call it ST2Addr, of SILTypes to Addresses (representing the nested pullbacks) . Then during the destruction/deinitialization of the linear map context, call This might provide us advantages in the way of keeping the common case (where the generated pullbacks are actually called) fast, as it is right now. It could also potentially let us get away without changing any of the core derivative generation code. |
Sorry I haven't read it in detail as I'm on vacation. I talked to @BradLarson recently about transitioning from the bump pointer allocator to |
I currently checking this as an alternative to the solution above. |
@rxwei So, I do not see how The problem here is not that the memory allocated by linear map context is not freed, it is freed. The problem is that the Let me show the example. In the case above we are allocating 24 bytes from linear map context. These 24 bytes are used to store This context pointer is retained but never released. The bump pointer allocator happily frees these 24 bytes of memory, however, as it does not know what we stored into the allocated slab, it has no way to release the context and we're leaking it. In order to correctly release the objects captured by linear map context we need to know the types of all pullback tuples stored there. |
So, here is the proposal. Let's make everything typed. We are having 3 functions / builtins: AutoDiffLinearMapContext *swift_autoDiffCreateLinearMapContext(size_t);
void *swift_autoDiffProjectTopLevelSubcontext(AutoDiffLinearMapContext *);
void *swift_autoDiffAllocateSubcontext(AutoDiffLinearMapContext *, size_t); They correspond to the following builtins: autoDiffCreateLinearMapContext: (Builtin.Word) -> Builtin.NativeObject
autoDiffProjectTopLevelSubcontext: (Builtin.NativeObject) -> Builtin.RawPointer
autoDiffAllocateSubcontext: (Builtin.NativeObject, Builtin.Word) -> Builtin.RawPointer Instead of amount of memory to be allocated, let us pass the types there. So, the swift builtins will become: autoDiffCreateLinearMapContext: (T.Type) -> Builtin.NativeObject
autoDiffProjectTopLevelSubcontext: (Builtin.NativeObject) -> Builtin.RawPointer
autoDiffAllocateSubcontext: (Builtin.NativeObject, T.Type) -> Builtin.RawPointer and the corresponding runtime functions will be: AutoDiffLinearMapContext *swift_autoDiffCreateLinearMapContext(const Metadata*);
void *swift_autoDiffProjectTopLevelSubcontext(AutoDiffLinearMapContext *);
void *swift_autoDiffAllocateSubcontext(AutoDiffLinearMapContext *, const Metadata*); The semantics would be as follows:
@jkshtj this is for you :) |
I think afterwards we can proceed with possible optimizations (early deallocation, in-place boxes, etc). But for now, let us get rid of leaks first :) |
Description
The following code was found to leak memory upon the process exiting.
Steps to reproduce
First, compile the following code in Debug mode (in a single file):
Next, run
leaks
, or another memory leak-checking tool.The command line for
leaks
to generate the following output is:leaks --atExit -- ./executableName
.Note that Xcode's memory leak checker may give inconsistent results from run to run with this code snippet.
The stack trace of the first leak is as follows:
The second one looks like this:
Expected behavior
No leaks should be detected, and the program should exit with an exit code of 0.
Environment
Additional context
for
loop in line 40, and running its contents once, will cause the leaks to vanish.f
from lines 3, 8 and 32 will cause one, but not both, of the leaks to vanish.AdditiveArithmetic
in line 11 will crash the compiler, with the following assertion failing:Assertion failed: (!ActiveDiagnostic && "Already have an active diagnostic")
The text was updated successfully, but these errors were encountered: