|
| 1 | +/* |
| 2 | + This source file is part of the Swift.org open source project |
| 3 | + |
| 4 | + Copyright (c) 2020 Apple Inc. and the Swift project authors |
| 5 | + Licensed under Apache License v2.0 with Runtime Library Exception |
| 6 | + |
| 7 | + See http://swift.org/LICENSE.txt for license information |
| 8 | + See http://swift.org/CONTRIBUTORS.txt for Swift project authors |
| 9 | + */ |
| 10 | + |
| 11 | +import TSCBasic |
| 12 | + |
| 13 | +import PackageModel |
| 14 | + |
| 15 | +final class Trie<Document: Hashable> { |
| 16 | + private typealias Node = TrieNode<Character, Document> |
| 17 | + |
| 18 | + private let root: Node |
| 19 | + |
| 20 | + init() { |
| 21 | + self.root = Node() |
| 22 | + } |
| 23 | + |
| 24 | + /// Inserts a word and its document to the trie. |
| 25 | + func insert(word: String, foundIn document: Document) { |
| 26 | + guard !word.isEmpty else { return } |
| 27 | + |
| 28 | + var currentNode = self.root |
| 29 | + // Check if word already exists otherwise creates the node path |
| 30 | + for character in word.lowercased() { |
| 31 | + if let child = currentNode.children[character] { |
| 32 | + currentNode = child |
| 33 | + } else { |
| 34 | + currentNode = currentNode.add(value: character) |
| 35 | + } |
| 36 | + } |
| 37 | + |
| 38 | + currentNode.add(document: document) |
| 39 | + } |
| 40 | + |
| 41 | + /// Removes word occurrences found in the given document. |
| 42 | + func remove(document: Document) { |
| 43 | + func removeInSubTrie(root: Node, document: Document) { |
| 44 | + if root.isTerminating { |
| 45 | + root.remove(document: document) |
| 46 | + } |
| 47 | + |
| 48 | + // Clean up sub-tries |
| 49 | + root.children.values.forEach { |
| 50 | + removeInSubTrie(root: $0, document: document) |
| 51 | + } |
| 52 | + |
| 53 | + root.children.forEach { value, node in |
| 54 | + // If a child node doesn't have children (i.e., there are no words under it), |
| 55 | + // and itself is not a word, delete it since its path has become a deadend. |
| 56 | + if node.isLeaf, !node.isTerminating { |
| 57 | + root.remove(value: value) |
| 58 | + } |
| 59 | + } |
| 60 | + } |
| 61 | + |
| 62 | + removeInSubTrie(root: self.root, document: document) |
| 63 | + } |
| 64 | + |
| 65 | + /// Checks if the trie contains the exact word or words with matching prefix. |
| 66 | + func contains(word: String, prefixMatch: Bool = false) -> Bool { |
| 67 | + guard let node = self.findLastNodeOf(word: word) else { |
| 68 | + return false |
| 69 | + } |
| 70 | + return prefixMatch || node.isTerminating |
| 71 | + } |
| 72 | + |
| 73 | + /// Finds the word in this trie and returns its documents. |
| 74 | + func find(word: String) throws -> Set<Document> { |
| 75 | + guard let node = self.findLastNodeOf(word: word), node.isTerminating else { |
| 76 | + throw NotFoundError(word) |
| 77 | + } |
| 78 | + return node.documents |
| 79 | + } |
| 80 | + |
| 81 | + /// Finds words with matching prefix in this trie and returns their documents. |
| 82 | + func findWithPrefix(_ prefix: String) throws -> [String: Set<Document>] { |
| 83 | + guard let node = self.findLastNodeOf(word: prefix) else { |
| 84 | + throw NotFoundError(prefix) |
| 85 | + } |
| 86 | + |
| 87 | + func wordsInSubTrie(root: Node, prefix: String) -> [String: Set<Document>] { |
| 88 | + precondition(root.value != nil, "Sub-trie root's value should not be nil") |
| 89 | + |
| 90 | + var subTrieWords = [String: Set<Document>]() |
| 91 | + |
| 92 | + // Construct the new prefix by adding the sub-trie root's character |
| 93 | + var previousCharacters = prefix |
| 94 | + previousCharacters.append(root.value!.lowercased()) // !-safe; see precondition |
| 95 | + |
| 96 | + // The root actually forms a word |
| 97 | + if root.isTerminating { |
| 98 | + subTrieWords[previousCharacters] = root.documents |
| 99 | + } |
| 100 | + |
| 101 | + // Collect all words under this sub-trie |
| 102 | + root.children.values.forEach { |
| 103 | + let childWords = wordsInSubTrie(root: $0, prefix: previousCharacters) |
| 104 | + subTrieWords.merge(childWords, uniquingKeysWith: { _, child in child }) |
| 105 | + } |
| 106 | + |
| 107 | + return subTrieWords |
| 108 | + } |
| 109 | + |
| 110 | + var words = [String: Set<Document>]() |
| 111 | + |
| 112 | + let prefix = prefix.lowercased() |
| 113 | + // The prefix is actually a word |
| 114 | + if node.isTerminating { |
| 115 | + words[prefix] = node.documents |
| 116 | + } |
| 117 | + |
| 118 | + node.children.values.forEach { |
| 119 | + let childWords = wordsInSubTrie(root: $0, prefix: prefix) |
| 120 | + words.merge(childWords, uniquingKeysWith: { _, child in child }) |
| 121 | + } |
| 122 | + |
| 123 | + return words |
| 124 | + } |
| 125 | + |
| 126 | + /// Finds the last node in the path of the given word if it exists in this trie. |
| 127 | + private func findLastNodeOf(word: String) -> Node? { |
| 128 | + guard !word.isEmpty else { return nil } |
| 129 | + |
| 130 | + var currentNode = self.root |
| 131 | + // Traverse down the trie as far as we can |
| 132 | + for character in word.lowercased() { |
| 133 | + guard let child = currentNode.children[character] else { |
| 134 | + return nil |
| 135 | + } |
| 136 | + currentNode = child |
| 137 | + } |
| 138 | + return currentNode |
| 139 | + } |
| 140 | +} |
| 141 | + |
| 142 | +private final class TrieNode<T: Hashable, Document: Hashable> { |
| 143 | + /// The value (i.e., character) that this node stores. `nil` if root. |
| 144 | + let value: T? |
| 145 | + |
| 146 | + /// The parent of this node. `nil` if root. |
| 147 | + private weak var parent: TrieNode<T, Document>? |
| 148 | + |
| 149 | + /// The children of this node identified by their corresponding value. |
| 150 | + private var _children = [T: TrieNode<T, Document>]() |
| 151 | + private let childrenLock = Lock() |
| 152 | + |
| 153 | + /// If the path to this node forms a valid word, these are the documents where the word can be found. |
| 154 | + private var _documents = Set<Document>() |
| 155 | + private let documentsLock = Lock() |
| 156 | + |
| 157 | + var isLeaf: Bool { |
| 158 | + self.childrenLock.withLock { |
| 159 | + self._children.isEmpty |
| 160 | + } |
| 161 | + } |
| 162 | + |
| 163 | + /// `true` indicates the path to this node forms a valid word. |
| 164 | + var isTerminating: Bool { |
| 165 | + self.documentsLock.withLock { |
| 166 | + !self._documents.isEmpty |
| 167 | + } |
| 168 | + } |
| 169 | + |
| 170 | + var children: [T: TrieNode<T, Document>] { |
| 171 | + self.childrenLock.withLock { |
| 172 | + self._children |
| 173 | + } |
| 174 | + } |
| 175 | + |
| 176 | + var documents: Set<Document> { |
| 177 | + self.documentsLock.withLock { |
| 178 | + self._documents |
| 179 | + } |
| 180 | + } |
| 181 | + |
| 182 | + init(value: T? = nil, parent: TrieNode<T, Document>? = nil) { |
| 183 | + self.value = value |
| 184 | + self.parent = parent |
| 185 | + } |
| 186 | + |
| 187 | + /// Adds a subpath under this node. |
| 188 | + func add(value: T) -> TrieNode<T, Document> { |
| 189 | + self.childrenLock.withLock { |
| 190 | + if let existing = self._children[value] { |
| 191 | + return existing |
| 192 | + } |
| 193 | + |
| 194 | + let child = TrieNode<T, Document>(value: value, parent: self) |
| 195 | + self._children[value] = child |
| 196 | + return child |
| 197 | + } |
| 198 | + } |
| 199 | + |
| 200 | + /// Removes a subpath from this node. |
| 201 | + func remove(value: T) { |
| 202 | + _ = self.childrenLock.withLock { |
| 203 | + self._children.removeValue(forKey: value) |
| 204 | + } |
| 205 | + } |
| 206 | + |
| 207 | + /// Adds a document in which the word formed by path leading to this node can be found. |
| 208 | + func add(document: Document) { |
| 209 | + _ = self.documentsLock.withLock { |
| 210 | + self._documents.insert(document) |
| 211 | + } |
| 212 | + } |
| 213 | + |
| 214 | + /// Removes a referenced document. |
| 215 | + func remove(document: Document) { |
| 216 | + _ = self.documentsLock.withLock { |
| 217 | + self._documents.remove(document) |
| 218 | + } |
| 219 | + } |
| 220 | +} |
0 commit comments