Skip to content

Commit 336d1d5

Browse files
committed
[SwiftSyntax] Use custom hash table for node lookup table
Use Set<T> like custom hash table (WeakLookupTable) that holds weak references for 'RawSyntax' nodes. This hash table doesn't holds ids separately, instead, use 'T.id' via Identifiable protocol. So freeing 'RawSyntax' turns the bucket in the table into just 'nil', so it's reusable for referencing another object.
1 parent 226a978 commit 336d1d5

File tree

4 files changed

+364
-31
lines changed

4 files changed

+364
-31
lines changed

Sources/SwiftSyntax/SwiftSyntax.swift

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -46,31 +46,23 @@ public enum SerializationFormat {
4646
case byteTree
4747
}
4848

49-
fileprivate struct WeakReference<T: AnyObject> {
50-
weak private(set) var value: T?
51-
52-
init(_ value: T) {
53-
self.value = value
54-
}
55-
}
49+
// For WeakLookupTable.
50+
extension RawSyntax : Identifiable {}
5651

5752
/// Deserializes the syntax tree from its serialized form to an object tree in
5853
/// Swift. To deserialize incrementally transferred syntax trees, the same
5954
/// instance of the deserializer must be used for all subsequent
6055
/// deserializations.
6156
public final class SyntaxTreeDeserializer {
62-
// FIXME: This lookup table just accumulates nodes, we should invalidate nodes
63-
// that are no longer used at some point and remove them from the table
64-
6557
/// Syntax nodes that have already been parsed and are able to be reused if
6658
/// they were omitted in an incremental syntax tree transfer
67-
private var nodeLookupTable: [SyntaxNodeId: WeakReference<RawSyntax>] = [:]
59+
private var nodeLookupTable: WeakLookupTable<RawSyntax> = .init()
6860

6961
/// Keep a strong reference to the syntax tree that contains the nodes in the
7062
/// `nodeLookupTable`. Because `nodeLookupTable` only holds a weak reference
7163
/// to the RawSyntax nodes, all retired `RawSyntax` nodes will be deallocated
7264
/// once we set a new tree. The weak references in `nodeLookupTable` will then
73-
/// become `nil` but will also never be accessed again.
65+
/// become `nil` and the slot will be reused to refer another node.
7466
private var nodeLookupTree: RawSyntax? = nil
7567

7668
/// The IDs of the nodes that were reused as part of incremental syntax
@@ -131,17 +123,11 @@ public final class SyntaxTreeDeserializer {
131123

132124
private func lookupNode(id: SyntaxNodeId) -> RawSyntax? {
133125
reusedNodeIds.insert(id)
134-
guard let weakRef = nodeLookupTable[id] else {
135-
return nil
136-
}
137-
guard let value = weakRef.value else {
138-
fatalError("Trying to retrieve a node that has since been deallocated")
139-
}
140-
return value
126+
return nodeLookupTable[id]
141127
}
142128

143129
private func addToLookupTable(_ node: RawSyntax) {
144-
nodeLookupTable[node.id] = WeakReference(node)
130+
nodeLookupTable.insert(node)
145131
}
146132
}
147133

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
//===----------- WeakLookupTable.swift - Swift Syntax Library -------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2018 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
// This file provides lookup table with weak reference.
13+
//===----------------------------------------------------------------------===//
14+
15+
/// Protocol for self-identifiable object
16+
protocol Identifiable {
17+
associatedtype Identifier : Hashable
18+
var id: Identifier { get }
19+
}
20+
21+
private struct WeakReference<T: AnyObject>: ExpressibleByNilLiteral {
22+
weak var value: T?
23+
init(_ value: T?) {
24+
self.value = value
25+
}
26+
init(nilLiteral: ()) {
27+
self.value = nil
28+
}
29+
}
30+
31+
/// 'Set' like container providing lookup table for objects with identifier.
32+
/// This doesn't take ownership of the objects. Instead, the objects are
33+
/// held by weak references.
34+
///
35+
/// References are stored in a hash table with simple open adressing. Because
36+
/// of weak reference, unlike normal open addressing, erased bucket are simply
37+
/// turned into 'nil'.
38+
class WeakLookupTable<Element: Identifiable & AnyObject> {
39+
40+
/// Storage for the hash table.
41+
private var buckets: UnsafeMutablePointer<WeakReference<Element>>
42+
private var bucketCount: Int
43+
44+
/// Estimated count of inserted values. This is greater than or equal to
45+
/// the number of actually occupied buckets.
46+
/// i.e. estimatedCount >= _countOccupiedBuckets()
47+
private var estimatedCount: Int
48+
49+
init(capacity: Int = 0) {
50+
bucketCount = WeakLookupTable<Element>._bucketCount(for: capacity)
51+
buckets = .allocate(capacity: bucketCount)
52+
buckets.initialize(repeating: nil, count: bucketCount)
53+
estimatedCount = 0
54+
}
55+
56+
deinit {
57+
buckets.deinitialize(count: bucketCount)
58+
buckets.deallocate()
59+
}
60+
61+
/// Constant max load factor for hash table.
62+
private static var maxLoadFactor: Double {
63+
@inline(__always) get {
64+
return 0.75
65+
}
66+
}
67+
68+
/// Minimal number of bucket count enough to hold specified capacity of
69+
/// objects with taking max load factor into account.
70+
private static func _minimalBucketCount(for capacity: Int) -> Int {
71+
return Int((Double(capacity) / maxLoadFactor).rounded(.up))
72+
}
73+
74+
/// Number of bucket count to allocate to hold specified number of objects.
75+
/// This is the next power of 2 greater than or equal to
76+
/// '_minimalBucketCount(for: capacity)'
77+
private static func _bucketCount(for capacity: Int,
78+
from current: Int = 2) -> Int {
79+
// Bucket count must always be power of 2.
80+
precondition((current & (current - 1)) == 0)
81+
// Minimum is 2 to guarantee at least 1 hole.
82+
precondition(current >= 2)
83+
84+
let minimalBucketCount = _minimalBucketCount(for: capacity)
85+
86+
// Make sure it's representable. If 'minimalBucketCount' here is over
87+
// 0x4000_..., the bucket count must be 0x8000_... thus overflows.
88+
precondition(minimalBucketCount <= (Int.max >> 1) + 1)
89+
90+
var bucketCount = current
91+
while bucketCount < minimalBucketCount {
92+
// '&*=' for performance. Guaranteed by above 'precondition()'.
93+
bucketCount &*= 2
94+
}
95+
return bucketCount
96+
}
97+
98+
private var _bucketMask: Int {
99+
@inline(__always) get {
100+
// '&-' for performance. We know 'bucketCount >= 2'.
101+
return bucketCount &- 1
102+
}
103+
}
104+
105+
@inline(__always)
106+
private func _idealBucket(for id: Element.Identifier) -> Int {
107+
return id.hashValue & _bucketMask
108+
}
109+
110+
/// Finds the bucket where the object with the specified id should be stored
111+
/// to.
112+
private
113+
func _findHole(_ id: Element.Identifier) -> (pos: Int, alreadyExists: Bool) {
114+
var bucket = _idealBucket(for: id)
115+
116+
// Starting from the ideal bucket for the id, search an available bucket,
117+
// or the bucket holding the id.
118+
while true {
119+
guard let obj = buckets[bucket].value else {
120+
return (bucket, false)
121+
}
122+
if obj.id == id {
123+
return (bucket, true)
124+
}
125+
// '&+' for performance. 'bucketCount' is 0x4000_... or below.
126+
bucket = (bucket &+ 1) & _bucketMask
127+
}
128+
}
129+
130+
/// Reserves enough space to store the specified number of elements. Returns
131+
/// true if resizing happened.
132+
func reserveCapacity(_ requiredCapacity: Int) -> Bool {
133+
let requiredBucketCount = WeakLookupTable<Element>
134+
._bucketCount(for: requiredCapacity, from: bucketCount)
135+
if (bucketCount >= requiredBucketCount) {
136+
return false
137+
}
138+
139+
// Slow path. Resizing.
140+
let oldBuckets = buckets
141+
let oldBucketRange = buckets ..< buckets.advanced(by: bucketCount)
142+
143+
bucketCount = requiredBucketCount
144+
buckets = .allocate(capacity: requiredBucketCount)
145+
buckets.initialize(repeating: nil, count: requiredBucketCount)
146+
147+
// Move all nodes from the old buffer.
148+
for oldBucket in oldBucketRange {
149+
if let id = oldBucket.pointee.value?.id {
150+
let newBucket = buckets.advanced(by: _findHole(id).pos)
151+
newBucket.moveAssign(from: oldBucket, count: 1)
152+
} else {
153+
oldBucket.deinitialize(count: 1)
154+
}
155+
}
156+
157+
oldBuckets.deallocate()
158+
159+
return true
160+
}
161+
162+
/// Count the actual number of occupied buckets.
163+
@inline(__always)
164+
private func _countOccupiedBuckets() -> Int {
165+
var count = 0
166+
for i in 0 ..< bucketCount where buckets[i].value != nil {
167+
// '&+=' for performance. 'bucketCount' is 0x4000_... or below.
168+
count &+= 1
169+
}
170+
return count
171+
}
172+
173+
/// Reserves enough space to store a single new object. Returns true if
174+
/// resizing happened.
175+
private func _ensurePlusOneCapacity() -> Bool {
176+
// '&+' for performance. 'estimatedCount' is always less than 'bucketCount'
177+
// which is 0x4000_... or below.
178+
if bucketCount >= WeakLookupTable<Element>
179+
._minimalBucketCount(for: estimatedCount &+ 1) {
180+
return false
181+
}
182+
183+
// Slow path.
184+
estimatedCount = _countOccupiedBuckets()
185+
// '&+' for performance. We know 'estimatedCount' derived by
186+
// '_countOccupiedBuckets()' is equal to or less than previous
187+
// 'estimatedCount'.
188+
return reserveCapacity(estimatedCount &+ 1)
189+
}
190+
191+
/// Inserts the given object into the table.
192+
@discardableResult
193+
func insert(_ obj: Element) -> Bool {
194+
var (pos, alreadyExists) = _findHole(obj.id)
195+
if alreadyExists {
196+
return false
197+
}
198+
199+
if /*resized=*/_ensurePlusOneCapacity() {
200+
pos = _findHole(obj.id).pos
201+
}
202+
buckets[pos].value = obj
203+
// '&+=' for performance. '_ensurePlusOneCapacity()' ensures it's safe.
204+
estimatedCount &+= 1
205+
return true
206+
}
207+
208+
/// Get a object with specified id. Returns 'nil' if the object hasn't been
209+
/// insert()-ed or it's already been freed.
210+
subscript(id: Element.Identifier) -> Element? {
211+
// Since we don't fill the bucket when the object is freed (because we don't
212+
// know), we can't stop iteration at a hole. So in the worst case (i.e. if
213+
// the object doesn't exist in the table), full linear search is needed.
214+
// However, since we assume the object exists and hasn't been freed yet,
215+
// we expect it's stored near the 'idealBucket' anyway.
216+
let idealBucket = _idealBucket(for: id)
217+
var bucket = idealBucket
218+
repeat {
219+
if let obj = buckets[bucket].value, obj.id == id {
220+
return obj
221+
}
222+
// '&+' for performance. 'bucketCount' is 0x4000_... or below.
223+
bucket = (bucket &+ 1) & _bucketMask
224+
} while bucket != idealBucket
225+
226+
return nil
227+
}
228+
}

Tests/LinuxMain.swift

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,20 @@
11
import XCTest
22
import SwiftSyntaxTest
33

4-
XCTMain([
5-
testCase(AbsolutePositionTestCase.allTests),
6-
testCase(DecodeSyntaxTestCase.allTests),
7-
testCase(DiagnosticTestCase.allTests),
8-
testCase(LazyCachingTestCase.allTests),
9-
testCase(ParseFileTestCase.allTests),
10-
testCase(SyntaxChildrenAPITestCase.allTests),
11-
testCase(SyntaxCollectionsAPITestCase.allTests),
12-
testCase(SyntaxFactoryAPITestCase.allTests),
13-
testCase(SyntaxVisitorTestCase.allTests),
14-
])
4+
XCTMain({ () -> [XCTestCaseEntry] in
5+
var testCases: [XCTestCaseEntry] = [
6+
testCase(AbsolutePositionTestCase.allTests),
7+
testCase(DecodeSyntaxTestCase.allTests),
8+
testCase(DiagnosticTestCase.allTests),
9+
testCase(LazyCachingTestCase.allTests),
10+
testCase(ParseFileTestCase.allTests),
11+
testCase(SyntaxChildrenAPITestCase.allTests),
12+
testCase(SyntaxCollectionsAPITestCase.allTests),
13+
testCase(SyntaxFactoryAPITestCase.allTests),
14+
testCase(SyntaxVisitorTestCase.allTests),
15+
]
16+
#if DEBUG
17+
testCases.append(testCase(WeakLookupTableTestCase.allTests))
18+
#endif
19+
return testCases
20+
}())

0 commit comments

Comments
 (0)