Skip to content

Commit a997013

Browse files
authored
Merge pull request swiftlang#11 from rintaro/lookuptable-custom
Use custom hash table as node lookup table
2 parents bbdb60c + 336d1d5 commit a997013

File tree

5 files changed

+386
-21
lines changed

5 files changed

+386
-21
lines changed

Diff for: Sources/SwiftSyntax/RawSyntax.swift

+10-3
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ fileprivate enum RawSyntaxData {
8585
/// Represents the raw tree structure underlying the syntax tree. These nodes
8686
/// have no notion of identity and only provide structure to the tree. They
8787
/// are immutable and can be freely shared between syntax nodes.
88-
struct RawSyntax {
88+
final class RawSyntax {
8989
fileprivate let data: RawSyntaxData
9090
let presence: SourcePresence
9191

@@ -124,6 +124,13 @@ struct RawSyntax {
124124
}
125125
}).value
126126
}
127+
128+
/// Creates a copy of `other`.
129+
init(_ other: RawSyntax) {
130+
self.data = other.data
131+
self.presence = other.presence
132+
self.id = other.id
133+
}
127134

128135
init(kind: SyntaxKind, layout: [RawSyntax?], presence: SourcePresence,
129136
id: SyntaxNodeId? = nil) {
@@ -336,7 +343,7 @@ extension RawSyntax {
336343

337344
extension RawSyntax: Codable {
338345
/// Creates a RawSyntax from the provided Foundation Decoder.
339-
init(from decoder: Decoder) throws {
346+
convenience init(from decoder: Decoder) throws {
340347
let container = try decoder.container(keyedBy: CodingKeys.self)
341348
let id = try container.decodeIfPresent(SyntaxNodeId.self, forKey: .id)
342349
let omitted = try container.decodeIfPresent(Bool.self, forKey: .omitted) ?? false
@@ -352,7 +359,7 @@ extension RawSyntax: Codable {
352359
guard let lookupNode = lookupFunc(id) else {
353360
throw IncrementalDecodingError.nodeLookupFailed(id)
354361
}
355-
self = lookupNode
362+
self.init(lookupNode)
356363
return
357364
}
358365

Diff for: Sources/SwiftSyntax/SwiftSyntax.swift

+18-7
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,24 @@ public enum SerializationFormat {
4646
case byteTree
4747
}
4848

49+
// For WeakLookupTable.
50+
extension RawSyntax : Identifiable {}
51+
4952
/// Deserializes the syntax tree from its serialized form to an object tree in
5053
/// Swift. To deserialize incrementally transferred syntax trees, the same
5154
/// instance of the deserializer must be used for all subsequent
5255
/// deserializations.
5356
public final class SyntaxTreeDeserializer {
54-
// FIXME: This lookup table just accumulates nodes, we should invalidate nodes
55-
// that are no longer used at some point and remove them from the table
56-
5757
/// Syntax nodes that have already been parsed and are able to be reused if
5858
/// they were omitted in an incremental syntax tree transfer
59-
private var nodeLookupTable: [SyntaxNodeId: RawSyntax] = [:]
59+
private var nodeLookupTable: WeakLookupTable<RawSyntax> = .init()
60+
61+
/// Keep a strong reference to the syntax tree that contains the nodes in the
62+
/// `nodeLookupTable`. Because `nodeLookupTable` only holds a weak reference
63+
/// to the RawSyntax nodes, all retired `RawSyntax` nodes will be deallocated
64+
/// once we set a new tree. The weak references in `nodeLookupTable` will then
65+
/// become `nil` and the slot will be reused to refer another node.
66+
private var nodeLookupTree: RawSyntax? = nil
6067

6168
/// The IDs of the nodes that were reused as part of incremental syntax
6269
/// parsing during the last deserialization
@@ -70,19 +77,23 @@ public final class SyntaxTreeDeserializer {
7077
let decoder = JSONDecoder()
7178
decoder.userInfo[.rawSyntaxDecodedCallback] = self.addToLookupTable
7279
decoder.userInfo[.omittedNodeLookupFunction] = self.lookupNode
73-
return try decoder.decode(RawSyntax.self, from: data)
80+
let tree = try decoder.decode(RawSyntax.self, from: data)
81+
self.nodeLookupTree = tree
82+
return tree
7483
}
7584

7685
/// Deserialize the given data as a ByteTree encoded syntax tree
7786
private func deserializeByteTree(_ data: Data) throws -> RawSyntax {
7887
var userInfo: [ByteTreeUserInfoKey: Any] = [:]
7988
userInfo[.rawSyntaxDecodedCallback] = self.addToLookupTable
8089
userInfo[.omittedNodeLookupFunction] = self.lookupNode
81-
return try ByteTreeReader.read(RawSyntax.self, from: data,
90+
let tree = try ByteTreeReader.read(RawSyntax.self, from: data,
8291
userInfo: &userInfo) {
8392
(version: ByteTreeProtocolVersion) in
8493
return version.major == 1
8594
}
95+
self.nodeLookupTree = tree
96+
return tree
8697
}
8798

8899
/// Decode a serialized form of SourceFileSyntax to a syntax tree.
@@ -116,7 +127,7 @@ public final class SyntaxTreeDeserializer {
116127
}
117128

118129
private func addToLookupTable(_ node: RawSyntax) {
119-
nodeLookupTable[node.id] = node
130+
nodeLookupTable.insert(node)
120131
}
121132
}
122133

Diff for: Sources/SwiftSyntax/WeakLookupTable.swift

+228
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
//===----------- WeakLookupTable.swift - Swift Syntax Library -------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2018 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
// This file provides lookup table with weak reference.
13+
//===----------------------------------------------------------------------===//
14+
15+
/// Protocol for self-identifiable object
16+
protocol Identifiable {
17+
associatedtype Identifier : Hashable
18+
var id: Identifier { get }
19+
}
20+
21+
private struct WeakReference<T: AnyObject>: ExpressibleByNilLiteral {
22+
weak var value: T?
23+
init(_ value: T?) {
24+
self.value = value
25+
}
26+
init(nilLiteral: ()) {
27+
self.value = nil
28+
}
29+
}
30+
31+
/// 'Set' like container providing lookup table for objects with identifier.
32+
/// This doesn't take ownership of the objects. Instead, the objects are
33+
/// held by weak references.
34+
///
35+
/// References are stored in a hash table with simple open adressing. Because
36+
/// of weak reference, unlike normal open addressing, erased bucket are simply
37+
/// turned into 'nil'.
38+
class WeakLookupTable<Element: Identifiable & AnyObject> {
39+
40+
/// Storage for the hash table.
41+
private var buckets: UnsafeMutablePointer<WeakReference<Element>>
42+
private var bucketCount: Int
43+
44+
/// Estimated count of inserted values. This is greater than or equal to
45+
/// the number of actually occupied buckets.
46+
/// i.e. estimatedCount >= _countOccupiedBuckets()
47+
private var estimatedCount: Int
48+
49+
init(capacity: Int = 0) {
50+
bucketCount = WeakLookupTable<Element>._bucketCount(for: capacity)
51+
buckets = .allocate(capacity: bucketCount)
52+
buckets.initialize(repeating: nil, count: bucketCount)
53+
estimatedCount = 0
54+
}
55+
56+
deinit {
57+
buckets.deinitialize(count: bucketCount)
58+
buckets.deallocate()
59+
}
60+
61+
/// Constant max load factor for hash table.
62+
private static var maxLoadFactor: Double {
63+
@inline(__always) get {
64+
return 0.75
65+
}
66+
}
67+
68+
/// Minimal number of bucket count enough to hold specified capacity of
69+
/// objects with taking max load factor into account.
70+
private static func _minimalBucketCount(for capacity: Int) -> Int {
71+
return Int((Double(capacity) / maxLoadFactor).rounded(.up))
72+
}
73+
74+
/// Number of bucket count to allocate to hold specified number of objects.
75+
/// This is the next power of 2 greater than or equal to
76+
/// '_minimalBucketCount(for: capacity)'
77+
private static func _bucketCount(for capacity: Int,
78+
from current: Int = 2) -> Int {
79+
// Bucket count must always be power of 2.
80+
precondition((current & (current - 1)) == 0)
81+
// Minimum is 2 to guarantee at least 1 hole.
82+
precondition(current >= 2)
83+
84+
let minimalBucketCount = _minimalBucketCount(for: capacity)
85+
86+
// Make sure it's representable. If 'minimalBucketCount' here is over
87+
// 0x4000_..., the bucket count must be 0x8000_... thus overflows.
88+
precondition(minimalBucketCount <= (Int.max >> 1) + 1)
89+
90+
var bucketCount = current
91+
while bucketCount < minimalBucketCount {
92+
// '&*=' for performance. Guaranteed by above 'precondition()'.
93+
bucketCount &*= 2
94+
}
95+
return bucketCount
96+
}
97+
98+
private var _bucketMask: Int {
99+
@inline(__always) get {
100+
// '&-' for performance. We know 'bucketCount >= 2'.
101+
return bucketCount &- 1
102+
}
103+
}
104+
105+
@inline(__always)
106+
private func _idealBucket(for id: Element.Identifier) -> Int {
107+
return id.hashValue & _bucketMask
108+
}
109+
110+
/// Finds the bucket where the object with the specified id should be stored
111+
/// to.
112+
private
113+
func _findHole(_ id: Element.Identifier) -> (pos: Int, alreadyExists: Bool) {
114+
var bucket = _idealBucket(for: id)
115+
116+
// Starting from the ideal bucket for the id, search an available bucket,
117+
// or the bucket holding the id.
118+
while true {
119+
guard let obj = buckets[bucket].value else {
120+
return (bucket, false)
121+
}
122+
if obj.id == id {
123+
return (bucket, true)
124+
}
125+
// '&+' for performance. 'bucketCount' is 0x4000_... or below.
126+
bucket = (bucket &+ 1) & _bucketMask
127+
}
128+
}
129+
130+
/// Reserves enough space to store the specified number of elements. Returns
131+
/// true if resizing happened.
132+
func reserveCapacity(_ requiredCapacity: Int) -> Bool {
133+
let requiredBucketCount = WeakLookupTable<Element>
134+
._bucketCount(for: requiredCapacity, from: bucketCount)
135+
if (bucketCount >= requiredBucketCount) {
136+
return false
137+
}
138+
139+
// Slow path. Resizing.
140+
let oldBuckets = buckets
141+
let oldBucketRange = buckets ..< buckets.advanced(by: bucketCount)
142+
143+
bucketCount = requiredBucketCount
144+
buckets = .allocate(capacity: requiredBucketCount)
145+
buckets.initialize(repeating: nil, count: requiredBucketCount)
146+
147+
// Move all nodes from the old buffer.
148+
for oldBucket in oldBucketRange {
149+
if let id = oldBucket.pointee.value?.id {
150+
let newBucket = buckets.advanced(by: _findHole(id).pos)
151+
newBucket.moveAssign(from: oldBucket, count: 1)
152+
} else {
153+
oldBucket.deinitialize(count: 1)
154+
}
155+
}
156+
157+
oldBuckets.deallocate()
158+
159+
return true
160+
}
161+
162+
/// Count the actual number of occupied buckets.
163+
@inline(__always)
164+
private func _countOccupiedBuckets() -> Int {
165+
var count = 0
166+
for i in 0 ..< bucketCount where buckets[i].value != nil {
167+
// '&+=' for performance. 'bucketCount' is 0x4000_... or below.
168+
count &+= 1
169+
}
170+
return count
171+
}
172+
173+
/// Reserves enough space to store a single new object. Returns true if
174+
/// resizing happened.
175+
private func _ensurePlusOneCapacity() -> Bool {
176+
// '&+' for performance. 'estimatedCount' is always less than 'bucketCount'
177+
// which is 0x4000_... or below.
178+
if bucketCount >= WeakLookupTable<Element>
179+
._minimalBucketCount(for: estimatedCount &+ 1) {
180+
return false
181+
}
182+
183+
// Slow path.
184+
estimatedCount = _countOccupiedBuckets()
185+
// '&+' for performance. We know 'estimatedCount' derived by
186+
// '_countOccupiedBuckets()' is equal to or less than previous
187+
// 'estimatedCount'.
188+
return reserveCapacity(estimatedCount &+ 1)
189+
}
190+
191+
/// Inserts the given object into the table.
192+
@discardableResult
193+
func insert(_ obj: Element) -> Bool {
194+
var (pos, alreadyExists) = _findHole(obj.id)
195+
if alreadyExists {
196+
return false
197+
}
198+
199+
if /*resized=*/_ensurePlusOneCapacity() {
200+
pos = _findHole(obj.id).pos
201+
}
202+
buckets[pos].value = obj
203+
// '&+=' for performance. '_ensurePlusOneCapacity()' ensures it's safe.
204+
estimatedCount &+= 1
205+
return true
206+
}
207+
208+
/// Get a object with specified id. Returns 'nil' if the object hasn't been
209+
/// insert()-ed or it's already been freed.
210+
subscript(id: Element.Identifier) -> Element? {
211+
// Since we don't fill the bucket when the object is freed (because we don't
212+
// know), we can't stop iteration at a hole. So in the worst case (i.e. if
213+
// the object doesn't exist in the table), full linear search is needed.
214+
// However, since we assume the object exists and hasn't been freed yet,
215+
// we expect it's stored near the 'idealBucket' anyway.
216+
let idealBucket = _idealBucket(for: id)
217+
var bucket = idealBucket
218+
repeat {
219+
if let obj = buckets[bucket].value, obj.id == id {
220+
return obj
221+
}
222+
// '&+' for performance. 'bucketCount' is 0x4000_... or below.
223+
bucket = (bucket &+ 1) & _bucketMask
224+
} while bucket != idealBucket
225+
226+
return nil
227+
}
228+
}

Diff for: Tests/LinuxMain.swift

+17-11
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,20 @@
11
import XCTest
22
import SwiftSyntaxTest
33

4-
XCTMain([
5-
testCase(AbsolutePositionTestCase.allTests),
6-
testCase(DecodeSyntaxTestCase.allTests),
7-
testCase(DiagnosticTestCase.allTests),
8-
testCase(LazyCachingTestCase.allTests),
9-
testCase(ParseFileTestCase.allTests),
10-
testCase(SyntaxChildrenAPITestCase.allTests),
11-
testCase(SyntaxCollectionsAPITestCase.allTests),
12-
testCase(SyntaxFactoryAPITestCase.allTests),
13-
testCase(SyntaxVisitorTestCase.allTests),
14-
])
4+
XCTMain({ () -> [XCTestCaseEntry] in
5+
var testCases: [XCTestCaseEntry] = [
6+
testCase(AbsolutePositionTestCase.allTests),
7+
testCase(DecodeSyntaxTestCase.allTests),
8+
testCase(DiagnosticTestCase.allTests),
9+
testCase(LazyCachingTestCase.allTests),
10+
testCase(ParseFileTestCase.allTests),
11+
testCase(SyntaxChildrenAPITestCase.allTests),
12+
testCase(SyntaxCollectionsAPITestCase.allTests),
13+
testCase(SyntaxFactoryAPITestCase.allTests),
14+
testCase(SyntaxVisitorTestCase.allTests),
15+
]
16+
#if DEBUG
17+
testCases.append(testCase(WeakLookupTableTestCase.allTests))
18+
#endif
19+
return testCases
20+
}())

0 commit comments

Comments
 (0)