From af573bfc75e84432c25c14f2be8af602c4f5ea13 Mon Sep 17 00:00:00 2001 From: Alex Hoppen Date: Wed, 29 Aug 2018 15:45:17 -0700 Subject: [PATCH 1/3] Make RawSyntax a class --- Sources/SwiftSyntax/RawSyntax.swift | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/Sources/SwiftSyntax/RawSyntax.swift b/Sources/SwiftSyntax/RawSyntax.swift index bab3d5ba94b..0ce14c36147 100644 --- a/Sources/SwiftSyntax/RawSyntax.swift +++ b/Sources/SwiftSyntax/RawSyntax.swift @@ -85,7 +85,7 @@ fileprivate enum RawSyntaxData { /// Represents the raw tree structure underlying the syntax tree. These nodes /// have no notion of identity and only provide structure to the tree. They /// are immutable and can be freely shared between syntax nodes. -struct RawSyntax { +final class RawSyntax { fileprivate let data: RawSyntaxData let presence: SourcePresence @@ -124,6 +124,13 @@ struct RawSyntax { } }).value } + + /// Creates a copy of `other`. + init(_ other: RawSyntax) { + self.data = other.data + self.presence = other.presence + self.id = other.id + } init(kind: SyntaxKind, layout: [RawSyntax?], presence: SourcePresence, id: SyntaxNodeId? = nil) { @@ -336,7 +343,7 @@ extension RawSyntax { extension RawSyntax: Codable { /// Creates a RawSyntax from the provided Foundation Decoder. - init(from decoder: Decoder) throws { + required convenience init(from decoder: Decoder) throws { let container = try decoder.container(keyedBy: CodingKeys.self) let id = try container.decodeIfPresent(SyntaxNodeId.self, forKey: .id) let omitted = try container.decodeIfPresent(Bool.self, forKey: .omitted) ?? false @@ -352,7 +359,7 @@ extension RawSyntax: Codable { guard let lookupNode = lookupFunc(id) else { throw IncrementalDecodingError.nodeLookupFailed(id) } - self = lookupNode + self.init(lookupNode) return } From 226a9780830074f730e7aaa966d968e2a1135de8 Mon Sep 17 00:00:00 2001 From: Alex Hoppen Date: Wed, 29 Aug 2018 15:47:41 -0700 Subject: [PATCH 2/3] [swiftSyntax] Store weak references to RawSyntax nodes in the nodeLookupTable This way we don't continue to retain RawSyntax nodes that are no longer needed for incremental transfer. --- Sources/SwiftSyntax/RawSyntax.swift | 2 +- Sources/SwiftSyntax/SwiftSyntax.swift | 35 +++++++++++++++++++++++---- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/Sources/SwiftSyntax/RawSyntax.swift b/Sources/SwiftSyntax/RawSyntax.swift index 0ce14c36147..182b91e91ed 100644 --- a/Sources/SwiftSyntax/RawSyntax.swift +++ b/Sources/SwiftSyntax/RawSyntax.swift @@ -343,7 +343,7 @@ extension RawSyntax { extension RawSyntax: Codable { /// Creates a RawSyntax from the provided Foundation Decoder. - required convenience init(from decoder: Decoder) throws { + convenience init(from decoder: Decoder) throws { let container = try decoder.container(keyedBy: CodingKeys.self) let id = try container.decodeIfPresent(SyntaxNodeId.self, forKey: .id) let omitted = try container.decodeIfPresent(Bool.self, forKey: .omitted) ?? false diff --git a/Sources/SwiftSyntax/SwiftSyntax.swift b/Sources/SwiftSyntax/SwiftSyntax.swift index df96532e98e..eca5916b8c8 100644 --- a/Sources/SwiftSyntax/SwiftSyntax.swift +++ b/Sources/SwiftSyntax/SwiftSyntax.swift @@ -46,6 +46,14 @@ public enum SerializationFormat { case byteTree } +fileprivate struct WeakReference { + weak private(set) var value: T? + + init(_ value: T) { + self.value = value + } +} + /// Deserializes the syntax tree from its serialized form to an object tree in /// Swift. To deserialize incrementally transferred syntax trees, the same /// instance of the deserializer must be used for all subsequent @@ -56,7 +64,14 @@ public final class SyntaxTreeDeserializer { /// Syntax nodes that have already been parsed and are able to be reused if /// they were omitted in an incremental syntax tree transfer - private var nodeLookupTable: [SyntaxNodeId: RawSyntax] = [:] + private var nodeLookupTable: [SyntaxNodeId: WeakReference] = [:] + + /// Keep a strong reference to the syntax tree that contains the nodes in the + /// `nodeLookupTable`. Because `nodeLookupTable` only holds a weak reference + /// to the RawSyntax nodes, all retired `RawSyntax` nodes will be deallocated + /// once we set a new tree. The weak references in `nodeLookupTable` will then + /// become `nil` but will also never be accessed again. + private var nodeLookupTree: RawSyntax? = nil /// The IDs of the nodes that were reused as part of incremental syntax /// parsing during the last deserialization @@ -70,7 +85,9 @@ public final class SyntaxTreeDeserializer { let decoder = JSONDecoder() decoder.userInfo[.rawSyntaxDecodedCallback] = self.addToLookupTable decoder.userInfo[.omittedNodeLookupFunction] = self.lookupNode - return try decoder.decode(RawSyntax.self, from: data) + let tree = try decoder.decode(RawSyntax.self, from: data) + self.nodeLookupTree = tree + return tree } /// Deserialize the given data as a ByteTree encoded syntax tree @@ -78,11 +95,13 @@ public final class SyntaxTreeDeserializer { var userInfo: [ByteTreeUserInfoKey: Any] = [:] userInfo[.rawSyntaxDecodedCallback] = self.addToLookupTable userInfo[.omittedNodeLookupFunction] = self.lookupNode - return try ByteTreeReader.read(RawSyntax.self, from: data, + let tree = try ByteTreeReader.read(RawSyntax.self, from: data, userInfo: &userInfo) { (version: ByteTreeProtocolVersion) in return version.major == 1 } + self.nodeLookupTree = tree + return tree } /// Decode a serialized form of SourceFileSyntax to a syntax tree. @@ -112,11 +131,17 @@ public final class SyntaxTreeDeserializer { private func lookupNode(id: SyntaxNodeId) -> RawSyntax? { reusedNodeIds.insert(id) - return nodeLookupTable[id] + guard let weakRef = nodeLookupTable[id] else { + return nil + } + guard let value = weakRef.value else { + fatalError("Trying to retrieve a node that has since been deallocated") + } + return value } private func addToLookupTable(_ node: RawSyntax) { - nodeLookupTable[node.id] = node + nodeLookupTable[node.id] = WeakReference(node) } } From 336d1d597c31161922b11934ad36c11bc5f117ab Mon Sep 17 00:00:00 2001 From: Rintaro Ishizaki Date: Fri, 14 Sep 2018 10:47:29 +0900 Subject: [PATCH 3/3] [SwiftSyntax] Use custom hash table for node lookup table Use Set like custom hash table (WeakLookupTable) that holds weak references for 'RawSyntax' nodes. This hash table doesn't holds ids separately, instead, use 'T.id' via Identifiable protocol. So freeing 'RawSyntax' turns the bucket in the table into just 'nil', so it's reusable for referencing another object. --- Sources/SwiftSyntax/SwiftSyntax.swift | 26 +- Sources/SwiftSyntax/WeakLookupTable.swift | 228 ++++++++++++++++++ Tests/LinuxMain.swift | 28 ++- .../SwiftSyntaxTest/WeakLookupTableTest.swift | 113 +++++++++ 4 files changed, 364 insertions(+), 31 deletions(-) create mode 100644 Sources/SwiftSyntax/WeakLookupTable.swift create mode 100644 Tests/SwiftSyntaxTest/WeakLookupTableTest.swift diff --git a/Sources/SwiftSyntax/SwiftSyntax.swift b/Sources/SwiftSyntax/SwiftSyntax.swift index eca5916b8c8..3551d52194b 100644 --- a/Sources/SwiftSyntax/SwiftSyntax.swift +++ b/Sources/SwiftSyntax/SwiftSyntax.swift @@ -46,31 +46,23 @@ public enum SerializationFormat { case byteTree } -fileprivate struct WeakReference { - weak private(set) var value: T? - - init(_ value: T) { - self.value = value - } -} +// For WeakLookupTable. +extension RawSyntax : Identifiable {} /// Deserializes the syntax tree from its serialized form to an object tree in /// Swift. To deserialize incrementally transferred syntax trees, the same /// instance of the deserializer must be used for all subsequent /// deserializations. public final class SyntaxTreeDeserializer { - // FIXME: This lookup table just accumulates nodes, we should invalidate nodes - // that are no longer used at some point and remove them from the table - /// Syntax nodes that have already been parsed and are able to be reused if /// they were omitted in an incremental syntax tree transfer - private var nodeLookupTable: [SyntaxNodeId: WeakReference] = [:] + private var nodeLookupTable: WeakLookupTable = .init() /// Keep a strong reference to the syntax tree that contains the nodes in the /// `nodeLookupTable`. Because `nodeLookupTable` only holds a weak reference /// to the RawSyntax nodes, all retired `RawSyntax` nodes will be deallocated /// once we set a new tree. The weak references in `nodeLookupTable` will then - /// become `nil` but will also never be accessed again. + /// become `nil` and the slot will be reused to refer another node. private var nodeLookupTree: RawSyntax? = nil /// The IDs of the nodes that were reused as part of incremental syntax @@ -131,17 +123,11 @@ public final class SyntaxTreeDeserializer { private func lookupNode(id: SyntaxNodeId) -> RawSyntax? { reusedNodeIds.insert(id) - guard let weakRef = nodeLookupTable[id] else { - return nil - } - guard let value = weakRef.value else { - fatalError("Trying to retrieve a node that has since been deallocated") - } - return value + return nodeLookupTable[id] } private func addToLookupTable(_ node: RawSyntax) { - nodeLookupTable[node.id] = WeakReference(node) + nodeLookupTable.insert(node) } } diff --git a/Sources/SwiftSyntax/WeakLookupTable.swift b/Sources/SwiftSyntax/WeakLookupTable.swift new file mode 100644 index 00000000000..0d1ea3dea74 --- /dev/null +++ b/Sources/SwiftSyntax/WeakLookupTable.swift @@ -0,0 +1,228 @@ +//===----------- WeakLookupTable.swift - Swift Syntax Library -------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2018 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// +// This file provides lookup table with weak reference. +//===----------------------------------------------------------------------===// + +/// Protocol for self-identifiable object +protocol Identifiable { + associatedtype Identifier : Hashable + var id: Identifier { get } +} + +private struct WeakReference: ExpressibleByNilLiteral { + weak var value: T? + init(_ value: T?) { + self.value = value + } + init(nilLiteral: ()) { + self.value = nil + } +} + +/// 'Set' like container providing lookup table for objects with identifier. +/// This doesn't take ownership of the objects. Instead, the objects are +/// held by weak references. +/// +/// References are stored in a hash table with simple open adressing. Because +/// of weak reference, unlike normal open addressing, erased bucket are simply +/// turned into 'nil'. +class WeakLookupTable { + + /// Storage for the hash table. + private var buckets: UnsafeMutablePointer> + private var bucketCount: Int + + /// Estimated count of inserted values. This is greater than or equal to + /// the number of actually occupied buckets. + /// i.e. estimatedCount >= _countOccupiedBuckets() + private var estimatedCount: Int + + init(capacity: Int = 0) { + bucketCount = WeakLookupTable._bucketCount(for: capacity) + buckets = .allocate(capacity: bucketCount) + buckets.initialize(repeating: nil, count: bucketCount) + estimatedCount = 0 + } + + deinit { + buckets.deinitialize(count: bucketCount) + buckets.deallocate() + } + + /// Constant max load factor for hash table. + private static var maxLoadFactor: Double { + @inline(__always) get { + return 0.75 + } + } + + /// Minimal number of bucket count enough to hold specified capacity of + /// objects with taking max load factor into account. + private static func _minimalBucketCount(for capacity: Int) -> Int { + return Int((Double(capacity) / maxLoadFactor).rounded(.up)) + } + + /// Number of bucket count to allocate to hold specified number of objects. + /// This is the next power of 2 greater than or equal to + /// '_minimalBucketCount(for: capacity)' + private static func _bucketCount(for capacity: Int, + from current: Int = 2) -> Int { + // Bucket count must always be power of 2. + precondition((current & (current - 1)) == 0) + // Minimum is 2 to guarantee at least 1 hole. + precondition(current >= 2) + + let minimalBucketCount = _minimalBucketCount(for: capacity) + + // Make sure it's representable. If 'minimalBucketCount' here is over + // 0x4000_..., the bucket count must be 0x8000_... thus overflows. + precondition(minimalBucketCount <= (Int.max >> 1) + 1) + + var bucketCount = current + while bucketCount < minimalBucketCount { + // '&*=' for performance. Guaranteed by above 'precondition()'. + bucketCount &*= 2 + } + return bucketCount + } + + private var _bucketMask: Int { + @inline(__always) get { + // '&-' for performance. We know 'bucketCount >= 2'. + return bucketCount &- 1 + } + } + + @inline(__always) + private func _idealBucket(for id: Element.Identifier) -> Int { + return id.hashValue & _bucketMask + } + + /// Finds the bucket where the object with the specified id should be stored + /// to. + private + func _findHole(_ id: Element.Identifier) -> (pos: Int, alreadyExists: Bool) { + var bucket = _idealBucket(for: id) + + // Starting from the ideal bucket for the id, search an available bucket, + // or the bucket holding the id. + while true { + guard let obj = buckets[bucket].value else { + return (bucket, false) + } + if obj.id == id { + return (bucket, true) + } + // '&+' for performance. 'bucketCount' is 0x4000_... or below. + bucket = (bucket &+ 1) & _bucketMask + } + } + + /// Reserves enough space to store the specified number of elements. Returns + /// true if resizing happened. + func reserveCapacity(_ requiredCapacity: Int) -> Bool { + let requiredBucketCount = WeakLookupTable + ._bucketCount(for: requiredCapacity, from: bucketCount) + if (bucketCount >= requiredBucketCount) { + return false + } + + // Slow path. Resizing. + let oldBuckets = buckets + let oldBucketRange = buckets ..< buckets.advanced(by: bucketCount) + + bucketCount = requiredBucketCount + buckets = .allocate(capacity: requiredBucketCount) + buckets.initialize(repeating: nil, count: requiredBucketCount) + + // Move all nodes from the old buffer. + for oldBucket in oldBucketRange { + if let id = oldBucket.pointee.value?.id { + let newBucket = buckets.advanced(by: _findHole(id).pos) + newBucket.moveAssign(from: oldBucket, count: 1) + } else { + oldBucket.deinitialize(count: 1) + } + } + + oldBuckets.deallocate() + + return true + } + + /// Count the actual number of occupied buckets. + @inline(__always) + private func _countOccupiedBuckets() -> Int { + var count = 0 + for i in 0 ..< bucketCount where buckets[i].value != nil { + // '&+=' for performance. 'bucketCount' is 0x4000_... or below. + count &+= 1 + } + return count + } + + /// Reserves enough space to store a single new object. Returns true if + /// resizing happened. + private func _ensurePlusOneCapacity() -> Bool { + // '&+' for performance. 'estimatedCount' is always less than 'bucketCount' + // which is 0x4000_... or below. + if bucketCount >= WeakLookupTable + ._minimalBucketCount(for: estimatedCount &+ 1) { + return false + } + + // Slow path. + estimatedCount = _countOccupiedBuckets() + // '&+' for performance. We know 'estimatedCount' derived by + // '_countOccupiedBuckets()' is equal to or less than previous + // 'estimatedCount'. + return reserveCapacity(estimatedCount &+ 1) + } + + /// Inserts the given object into the table. + @discardableResult + func insert(_ obj: Element) -> Bool { + var (pos, alreadyExists) = _findHole(obj.id) + if alreadyExists { + return false + } + + if /*resized=*/_ensurePlusOneCapacity() { + pos = _findHole(obj.id).pos + } + buckets[pos].value = obj + // '&+=' for performance. '_ensurePlusOneCapacity()' ensures it's safe. + estimatedCount &+= 1 + return true + } + + /// Get a object with specified id. Returns 'nil' if the object hasn't been + /// insert()-ed or it's already been freed. + subscript(id: Element.Identifier) -> Element? { + // Since we don't fill the bucket when the object is freed (because we don't + // know), we can't stop iteration at a hole. So in the worst case (i.e. if + // the object doesn't exist in the table), full linear search is needed. + // However, since we assume the object exists and hasn't been freed yet, + // we expect it's stored near the 'idealBucket' anyway. + let idealBucket = _idealBucket(for: id) + var bucket = idealBucket + repeat { + if let obj = buckets[bucket].value, obj.id == id { + return obj + } + // '&+' for performance. 'bucketCount' is 0x4000_... or below. + bucket = (bucket &+ 1) & _bucketMask + } while bucket != idealBucket + + return nil + } +} diff --git a/Tests/LinuxMain.swift b/Tests/LinuxMain.swift index 2385dc58522..eee4c1b2a4f 100644 --- a/Tests/LinuxMain.swift +++ b/Tests/LinuxMain.swift @@ -1,14 +1,20 @@ import XCTest import SwiftSyntaxTest -XCTMain([ - testCase(AbsolutePositionTestCase.allTests), - testCase(DecodeSyntaxTestCase.allTests), - testCase(DiagnosticTestCase.allTests), - testCase(LazyCachingTestCase.allTests), - testCase(ParseFileTestCase.allTests), - testCase(SyntaxChildrenAPITestCase.allTests), - testCase(SyntaxCollectionsAPITestCase.allTests), - testCase(SyntaxFactoryAPITestCase.allTests), - testCase(SyntaxVisitorTestCase.allTests), -]) +XCTMain({ () -> [XCTestCaseEntry] in + var testCases: [XCTestCaseEntry] = [ + testCase(AbsolutePositionTestCase.allTests), + testCase(DecodeSyntaxTestCase.allTests), + testCase(DiagnosticTestCase.allTests), + testCase(LazyCachingTestCase.allTests), + testCase(ParseFileTestCase.allTests), + testCase(SyntaxChildrenAPITestCase.allTests), + testCase(SyntaxCollectionsAPITestCase.allTests), + testCase(SyntaxFactoryAPITestCase.allTests), + testCase(SyntaxVisitorTestCase.allTests), + ] +#if DEBUG + testCases.append(testCase(WeakLookupTableTestCase.allTests)) +#endif + return testCases +}()) diff --git a/Tests/SwiftSyntaxTest/WeakLookupTableTest.swift b/Tests/SwiftSyntaxTest/WeakLookupTableTest.swift new file mode 100644 index 00000000000..1c8152f6d57 --- /dev/null +++ b/Tests/SwiftSyntaxTest/WeakLookupTableTest.swift @@ -0,0 +1,113 @@ +#if DEBUG // @testable import + +import XCTest +@testable import SwiftSyntax + +final class MinimalNode : Identifiable { + let id: String + let children: [MinimalNode?] + init(id: String, children: [MinimalNode?]) { + self.id = id + self.children = children + } +} + +func createNode( + _ table: WeakLookupTable, + _ id: String, + _ children: [MinimalNode?] = [] +) -> MinimalNode { + let node = MinimalNode(id: id, children: children) + XCTAssertTrue(table.insert(node)) + return node +} + +public class WeakLookupTableTestCase: XCTestCase { + + public static let allTests = [ + ("testBasic", testBasic), + ] + + func testBasic() { + let table = WeakLookupTable() + var tree: MinimalNode? = nil + + // ------------------------------------------------------------------------ + // Initial. + tree = createNode(table, "root1", [ + createNode(table, "1", [ + createNode(table, "1-1"), + createNode(table, "1-2"), + createNode(table, "1-3"), + ]), + createNode(table, "2", [ + createNode(table, "2-1"), + nil, + createNode(table, "2-2"), + ]), + createNode(table, "3"), + ]) + + XCTAssertNil(table["notexist"]) + + XCTAssertNotNil(table["root1"]) + XCTAssertNotNil(table["1"]) + XCTAssertNotNil(table["1-1"]) + XCTAssertNotNil(table["1-2"]) + XCTAssertNotNil(table["1-3"]) + XCTAssertNotNil(table["2"]) + XCTAssertNotNil(table["2-1"]) + XCTAssertNotNil(table["2-2"]) + XCTAssertNotNil(table["3"]) + + // ------------------------------------------------------------------------ + // Partially replaced. + tree = createNode(table, "root2", [ + table["1"], + createNode(table, "4", [ + table["2-1"], + nil, + createNode(table, "4-3") + ]), + table["3"], + ]) + + // Reused nodes. + XCTAssertNotNil(table["1"]) + XCTAssertNotNil(table["1-1"]) + XCTAssertNotNil(table["1-2"]) + XCTAssertNotNil(table["1-3"]) + XCTAssertNotNil(table["2-1"]) + XCTAssertNotNil(table["3"]) + + // Newly added nodes. + XCTAssertNotNil(table["root2"]) + XCTAssertNotNil(table["4"]) + XCTAssertNotNil(table["4-3"]) + + // Destroyed nodes. + XCTAssertNil(table["root1"]) + XCTAssertNil(table["2"]) + XCTAssertNil(table["2-2"]) + + // ------------------------------------------------------------------------ + // Completely replaced. + tree = createNode(table, "empty") + + XCTAssertNotNil(table["empty"]) + + XCTAssertNil(table["root2"]) + XCTAssertNil(table["1"]) + XCTAssertNil(table["1-1"]) + XCTAssertNil(table["1-2"]) + XCTAssertNil(table["1-3"]) + XCTAssertNil(table["2-1"]) + XCTAssertNil(table["3"]) + XCTAssertNil(table["4"]) + XCTAssertNil(table["4-3"]) + + _ = tree // Prevents 'never read' warning + } +} + +#endif // DEBUG