Skip to content

Commit 9185049

Browse files
committed
[WIP] Use custom hash table for node lookup table
1 parent 7948be8 commit 9185049

File tree

2 files changed

+205
-16
lines changed

2 files changed

+205
-16
lines changed
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
//===----------- CacheLookupTable.swift - Swift Syntax Library -----------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
// This file provides lookup table for cached object.
13+
//===----------------------------------------------------------------------===//
14+
15+
/// Protocol for self-identifiable object
16+
protocol Identifiable {
17+
associatedtype Identifier : Hashable
18+
var id: Identifier { get }
19+
}
20+
21+
private struct WeakReference<T: AnyObject>: ExpressibleByNilLiteral {
22+
weak var value: T?
23+
init(_ value: T?) {
24+
self.value = value
25+
}
26+
init(nilLiteral: ()) {
27+
self.value = nil
28+
}
29+
}
30+
31+
/// 'Set' like container providing lookup table for objects with identifier.
32+
/// This doesn't take an ownership of the objects. Instead the objects are
33+
/// hold by weak reference.
34+
///
35+
/// References are stored in a hash table with simple open adressing. Because
36+
/// of weak reference, unlike normal open addressing, erased bucket are simply
37+
/// turned into 'nil'.
38+
struct CacheLookupTable<T: Identifiable & AnyObject> {
39+
40+
private typealias Buffer = Array<WeakReference<T>>
41+
42+
/// Storage for the hash table.
43+
private var buckets: Buffer
44+
45+
/// Estimated count of inserted values. This is greater than or equal to
46+
/// the number of the acually occupied buckets.
47+
/// i.e. estimatedCount >= _countOccupiedBuckets()
48+
private var estimatedCount: Int
49+
50+
init(capacity: Int = 0) {
51+
buckets = .init(repeating: nil,
52+
count: CacheLookupTable<T>._bucketCount(for: capacity))
53+
estimatedCount = 0
54+
}
55+
56+
/// Constant max load factor for hash table.
57+
private static var maxLoadFactor: Double {
58+
@inline(__always) get {
59+
return 0.75
60+
}
61+
}
62+
63+
/// Minimal number of bucket count which enough to hold specified capacity
64+
/// of values with max load factor taken into account.
65+
private static func _minimalBucketCount(for capacity: Int) -> Int {
66+
return Int((Double(capacity) / maxLoadFactor).rounded(.up))
67+
}
68+
69+
/// Number of bucket count to allocate for holding specified number of
70+
/// values. This is the next power of 2 greater than or equal to
71+
/// '_minimalBucketCount(for: capacity)'
72+
private static func _bucketCount(for capacity: Int,
73+
from current: Int = 2) -> Int {
74+
// Make sure it's representable.
75+
precondition(capacity <= (Int.max >> 1) + 1)
76+
// Bucket count must always be power of 2.
77+
precondition((current & (current - 1)) == 0)
78+
// Minimum is 2 to guarantee at least 1 hole.
79+
precondition(current >= 2)
80+
81+
let minimalBucketCount = _minimalBucketCount(for: capacity)
82+
var bucketCount = current
83+
while bucketCount < minimalBucketCount {
84+
bucketCount &*= 2
85+
}
86+
return bucketCount
87+
}
88+
89+
private var _bucketMask: Int {
90+
@inline(__always) get {
91+
return buckets.count &- 1
92+
}
93+
}
94+
95+
@inline(__always)
96+
private func _idealBucket(for id: T.Identifier) -> Int {
97+
return id.hashValue & _bucketMask
98+
}
99+
100+
/// Find the slot where the value with the specified id should be stored to.
101+
private func _findHole(_ id: T.Identifier) -> (pos: Int, found: Bool) {
102+
var bucket = _idealBucket(for: id)
103+
104+
// Starting from the ideal bucket for the id, search an available bucket,
105+
// or the bucket holding the id.
106+
while true {
107+
guard let value = buckets[bucket].value else {
108+
return (bucket, false)
109+
}
110+
if value.id == id {
111+
return (bucket, true)
112+
}
113+
bucket = (bucket &+ 1) & _bucketMask
114+
}
115+
}
116+
117+
/// Reserves enough space to store the specified number of elements. Returns
118+
/// true if resizing happened.
119+
mutating func reserveCapacity(_ requiredCapacity: Int) -> Bool {
120+
let bucketCount = CacheLookupTable<T>._bucketCount(for: requiredCapacity,
121+
from: buckets.count)
122+
if (buckets.count >= bucketCount) {
123+
return false
124+
}
125+
126+
// Slow path. Resizing.
127+
var oldBuckets = buckets
128+
buckets = .init(repeating: nil, count: bucketCount)
129+
130+
// Move all nodes from the old buffer.
131+
// TODO: move(), when available.
132+
for i in 0..<oldBuckets.count {
133+
if let oldValue = oldBuckets[i].value {
134+
let pos = _findHole(oldValue.id).pos
135+
Swift.swap(&buckets[pos], &oldBuckets[i])
136+
}
137+
}
138+
return true
139+
}
140+
141+
/// Count the actual number of occupied buckets.
142+
@inline(__always)
143+
private func _countOccupiedBuckets() -> Int {
144+
var count = 0
145+
for i in 0 ..< buckets.count where buckets[i].value != nil {
146+
count &+= 1
147+
}
148+
return count
149+
}
150+
151+
/// Reserves enough space to store a single new value. Returns true if
152+
/// resizing happened.
153+
mutating private func _ensurePlusOneCapacity() -> Bool {
154+
if buckets.count >= CacheLookupTable<T>
155+
._minimalBucketCount(for: estimatedCount &+ 1) {
156+
return false
157+
}
158+
159+
// Slow path. Re-count the actual number of occupied buckets, then reserve
160+
// the capacity.
161+
estimatedCount = _countOccupiedBuckets()
162+
return reserveCapacity(estimatedCount &+ 1)
163+
}
164+
165+
/// Inserts the given object into the table.
166+
@discardableResult
167+
mutating func insert(_ obj: T) -> Bool {
168+
var (pos, found) = _findHole(obj.id)
169+
if found {
170+
return false
171+
}
172+
173+
if /*resized=*/_ensurePlusOneCapacity() {
174+
pos = _findHole(obj.id).pos
175+
}
176+
buckets[pos].value = obj
177+
estimatedCount &+= 1
178+
return true
179+
}
180+
181+
/// Get a value with specified id. Returns 'nil' if the value hasn't been
182+
/// insert()-ed or it's already been freed.
183+
public subscript(id: T.Identifier) -> T? {
184+
// Since we don't fill the bucket when the value is freed (because we don't
185+
// know), we can't stop iteration a hole. So in the worst case (i.e. if the
186+
// object is not contained in this table), full linear search is required.
187+
// However, since we assume the value exists and hasn't been freed yet,
188+
// we expect it's stored near the 'idealBucket' anyway.
189+
let idealBucket = _idealBucket(for: id)
190+
var bucket = idealBucket
191+
repeat {
192+
if buckets[bucket].value?.id == id {
193+
return buckets[bucket].value
194+
}
195+
bucket = (bucket &+ 1) & _bucketMask
196+
} while bucket != idealBucket
197+
198+
return nil
199+
}
200+
}

Sources/SwiftSyntax/SwiftSyntax.swift

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,8 @@ public enum SerializationFormat {
4646
case byteTree
4747
}
4848

49-
fileprivate struct WeakReference<T: AnyObject> {
50-
weak private(set) var value: T?
51-
52-
init(_ value: T) {
53-
self.value = value
54-
}
55-
}
49+
// For CacheLookupTable.
50+
extension RawSyntax : Identifiable {}
5651

5752
/// Deserializes the syntax tree from its serialized form to an object tree in
5853
/// Swift. To deserialize incrementally transferred syntax trees, the same
@@ -64,7 +59,7 @@ public final class SyntaxTreeDeserializer {
6459

6560
/// Syntax nodes that have already been parsed and are able to be reused if
6661
/// they were omitted in an incremental syntax tree transfer
67-
private var nodeLookupTable: [SyntaxNodeId: WeakReference<RawSyntax>] = [:]
62+
private var nodeLookupTable: CacheLookupTable<RawSyntax> = .init()
6863

6964
/// Keep a strong reference to the syntax tree that contains the nodes in the
7065
/// `nodeLookupTable`. Because `nodeLookupTable` only holds a weak reference
@@ -131,17 +126,11 @@ public final class SyntaxTreeDeserializer {
131126

132127
private func lookupNode(id: SyntaxNodeId) -> RawSyntax? {
133128
reusedNodeIds.insert(id)
134-
guard let weakRef = nodeLookupTable[id] else {
135-
return nil
136-
}
137-
guard let value = weakRef.value else {
138-
fatalError("Trying to retrieve a node that has since been deallocated")
139-
}
140-
return value
129+
return nodeLookupTable[id]
141130
}
142131

143132
private func addToLookupTable(_ node: RawSyntax) {
144-
nodeLookupTable[node.id] = WeakReference(node)
133+
nodeLookupTable.insert(node)
145134
}
146135
}
147136

0 commit comments

Comments
 (0)