Skip to content

Commit 4418183

Browse files
authored
Fix firstRange(of:) search (#656)
Calls to `ranges(of:)` and `firstRange(of:)` with a string parameter actually use two different string searching algorithms. `ranges(of:)` uses the "z-searcher" algorithm, while `firstRange(of:)` uses a two-way search. Since it's better to align on a single path for these searches, the z-searcher has lower requirements, and the two-way search implementation has a correctness bug, this change removes the two-way search algorithm and uses z-search for `firstRange(of:)`. The correctness bug in `firstRange(of:)` appears only when searching for the second (or later) occurrence of a substring, which you have to be fairly deliberate about. In the example below, the substring at offsets `7..<12` is missed: let text = "ADACBADADACBADACB" // ===== -----===== let pattern = "ADACB" let firstRange = text.firstRange(of: pattern)! // firstRange ~= 0..<5 let secondRange = text[firstRange.upperBound...].firstRange(of: pattern)! // secondRange ~= 12..<17 This change also removes some unrelated, unused code in Split.swift, in addition to removing an (unused) usage of `TwoWaySearcher`. rdar://92794248
1 parent 58626cc commit 4418183

File tree

5 files changed

+11
-265
lines changed

5 files changed

+11
-265
lines changed

Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,8 @@ extension BidirectionalCollection where Element: Comparable {
5050
public func firstRange<C: Collection>(
5151
of other: C
5252
) -> Range<Index>? where C.Element == Element {
53-
let searcher = PatternOrEmpty(
54-
searcher: TwoWaySearcher<SubSequence>(pattern: Array(other)))
55-
let slice = self[...]
56-
var state = searcher.state(for: slice, in: startIndex..<endIndex)
57-
return searcher.search(slice, &state)
53+
let searcher = ZSearcher<SubSequence>(pattern: Array(other), by: ==)
54+
return searcher.search(self[...], in: startIndex..<endIndex)
5855
}
5956
}
6057

Sources/_StringProcessing/Algorithms/Algorithms/Split.swift

Lines changed: 0 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -122,31 +122,6 @@ extension Collection {
122122
}
123123
}
124124

125-
// MARK: Predicate algorithms
126-
127-
extension Collection {
128-
// TODO: Non-escaping and throwing
129-
func split(
130-
whereSeparator predicate: @escaping (Element) -> Bool,
131-
maxSplits: Int,
132-
omittingEmptySubsequences: Bool
133-
) -> SplitCollection<PredicateConsumer<Self>> {
134-
split(by: PredicateConsumer(predicate: predicate), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)
135-
}
136-
}
137-
138-
// MARK: Single element algorithms
139-
140-
extension Collection where Element: Equatable {
141-
func split(
142-
by separator: Element,
143-
maxSplits: Int,
144-
omittingEmptySubsequences: Bool
145-
) -> SplitCollection<PredicateConsumer<Self>> {
146-
split(whereSeparator: { $0 == separator }, maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)
147-
}
148-
}
149-
150125
// MARK: Fixed pattern algorithms
151126

152127
extension Collection where Element: Equatable {
@@ -180,41 +155,6 @@ extension Collection where Element: Equatable {
180155
}
181156
}
182157

183-
extension BidirectionalCollection where Element: Equatable {
184-
// FIXME
185-
// public func splitFromBack<S: Sequence>(
186-
// separator: S
187-
// ) -> ReversedSplitCollection<ZSearcher<SubSequence>>
188-
// where S.Element == Element
189-
// {
190-
// splitFromBack(separator: ZSearcher(pattern: Array(separator), by: ==))
191-
// }
192-
}
193-
194-
extension BidirectionalCollection where Element: Comparable {
195-
func split<C: Collection>(
196-
by separator: C,
197-
maxSplits: Int,
198-
omittingEmptySubsequences: Bool
199-
) -> SplitCollection<PatternOrEmpty<TwoWaySearcher<Self>>>
200-
where C.Element == Element
201-
{
202-
split(
203-
by: PatternOrEmpty(searcher: TwoWaySearcher(pattern: Array(separator))),
204-
maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)
205-
}
206-
207-
// FIXME
208-
// public func splitFromBack<S: Sequence>(
209-
// separator: S
210-
// ) -> ReversedSplitCollection<PatternOrEmpty<TwoWaySearcher<SubSequence>>>
211-
// where S.Element == Element
212-
// {
213-
// splitFromBack(separator: PatternOrEmpty(
214-
// searcher: TwoWaySearcher(pattern: Array(separator))))
215-
// }
216-
}
217-
218158
// String split overload breakers
219159
//
220160
// These are underscored and marked as SPI so that the *actual* public overloads

Sources/_StringProcessing/Algorithms/Searchers/TwoWaySearcher.swift

Lines changed: 0 additions & 197 deletions
This file was deleted.

Sources/_StringProcessing/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ add_library(_StringProcessing
2323
Algorithms/Searchers/NaivePatternSearcher.swift
2424
Algorithms/Searchers/PatternOrEmpty.swift
2525
Algorithms/Searchers/PredicateSearcher.swift
26-
Algorithms/Searchers/TwoWaySearcher.swift
2726
Algorithms/Searchers/ZSearcher.swift
2827
Engine/Backtracking.swift
2928
Engine/Consume.swift

Tests/RegexTests/AlgorithmsTests.swift

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,12 @@ class AlgorithmTests: XCTestCase {
165165
let actualCol: [Range<Int>] = input.ranges(of: pattern)[...].map(input.offsets(of:))
166166
XCTAssertEqual(actualCol, expected, file: file, line: line)
167167

168-
let firstRange = input.firstRange(of: pattern).map(input.offsets(of:))
169-
XCTAssertEqual(firstRange, expected.first, file: file, line: line)
168+
let firstRange = input.firstRange(of: pattern)
169+
XCTAssertEqual(firstRange.map(input.offsets(of:)), expected.first, file: file, line: line)
170+
if let upperBound = firstRange?.upperBound, !pattern.isEmpty {
171+
let secondRange = input[upperBound...].firstRange(of: pattern).map(input.offsets(of:))
172+
XCTAssertEqual(secondRange, expected.dropFirst().first, file: file, line: line)
173+
}
170174
}
171175

172176
expectRanges("", "", [0..<0])
@@ -176,6 +180,9 @@ class AlgorithmTests: XCTestCase {
176180
expectRanges("abcde", "bcd", [1..<4])
177181
expectRanges("ababacabababa", "abababa", [6..<13])
178182
expectRanges("ababacabababa", "aba", [0..<3, 6..<9, 10..<13])
183+
184+
// Test for rdar://92794248
185+
expectRanges("ADACBADADACBADACB", "ADACB", [0..<5, 7..<12, 12..<17])
179186
}
180187

181188
// rdar://105154010

0 commit comments

Comments
 (0)