Skip to content

Commit 7cadebe

Browse files
committed
unify code paths
1 parent ca66442 commit 7cadebe

File tree

5 files changed

+182
-227
lines changed

5 files changed

+182
-227
lines changed

Documentation/ProgrammersManual.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ In the engine nomenclature, a quick-check results in a yes/no/maybe while a thor
88

99
The nature of quick checks and fast paths is that they bifurcate testing coverage. One easy way to prevent this in simple cases is to assert that a definite quick result matches the thorough result.
1010

11-
One example of this pattern is matching against a builtin character class. The engine has a `_doMatchBuiltinCC`
11+
One example of this pattern is matching against a builtin character class. The engine has a `_matchBuiltinCC`
1212

1313
```swift
14-
func _doMatchBuiltinCC(...) -> Input.Index? {
15-
// Calls _quickMatchBuiltinCC, if that gives a definite result
16-
// asserts that it is the same as the result of
14+
func _matchBuiltinCC(...) -> Input.Index? {
15+
// Calls _quickMatchBuiltinCC, if that gives a definite result
16+
// asserts that it is the same as the result of
1717
// _thoroughMatchBuiltinCC and returns it. Otherwise returns the
1818
// result of _thoroughMatchBuiltinCC
1919
}

Sources/_StringProcessing/Engine/MEBuiltins.swift

Lines changed: 161 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ extension Processor {
1515
isStrictASCII: Bool,
1616
isScalarSemantics: Bool
1717
) -> Bool {
18-
guard let next = _doMatchBuiltinCC(
18+
guard let next = _matchBuiltinCC(
1919
cc,
20+
in: input,
21+
at: currentPosition,
2022
isInverted: isInverted,
2123
isStrictASCII: isStrictASCII,
2224
isScalarSemantics: isScalarSemantics
@@ -28,140 +30,6 @@ extension Processor {
2830
return true
2931
}
3032

31-
// Mentioned in ProgrammersManual.md, update docs if redesigned
32-
func _doMatchBuiltinCC(
33-
_ cc: _CharacterClassModel.Representation,
34-
isInverted: Bool,
35-
isStrictASCII: Bool,
36-
isScalarSemantics: Bool
37-
) -> Input.Index? {
38-
if case .definite(let result) = _quickMatchBuiltinCC(
39-
cc,
40-
isInverted: isInverted,
41-
isStrictASCII: isStrictASCII,
42-
isScalarSemantics: isScalarSemantics
43-
) {
44-
assert(result == _thoroughMatchBuiltinCC(
45-
cc,
46-
isInverted: isInverted,
47-
isStrictASCII: isStrictASCII,
48-
isScalarSemantics: isScalarSemantics))
49-
return result
50-
}
51-
return _thoroughMatchBuiltinCC(
52-
cc,
53-
isInverted: isInverted,
54-
isStrictASCII: isStrictASCII,
55-
isScalarSemantics: isScalarSemantics)
56-
}
57-
58-
// Mentioned in ProgrammersManual.md, update docs if redesigned
59-
@inline(__always)
60-
func _quickMatchBuiltinCC(
61-
_ cc: _CharacterClassModel.Representation,
62-
isInverted: Bool,
63-
isStrictASCII: Bool,
64-
isScalarSemantics: Bool
65-
) -> QuickResult<Input.Index?> {
66-
guard let (next, result) = input._quickMatch(
67-
cc, at: currentPosition, isScalarSemantics: isScalarSemantics
68-
) else {
69-
return .unknown
70-
}
71-
return .definite(result == isInverted ? nil : next)
72-
}
73-
74-
// Mentioned in ProgrammersManual.md, update docs if redesigned
75-
@inline(never)
76-
func _thoroughMatchBuiltinCC(
77-
_ cc: _CharacterClassModel.Representation,
78-
isInverted: Bool,
79-
isStrictASCII: Bool,
80-
isScalarSemantics: Bool
81-
) -> Input.Index? {
82-
guard let char = load(), let scalar = loadScalar() else {
83-
return nil
84-
}
85-
86-
let asciiCheck = !isStrictASCII
87-
|| (scalar.isASCII && isScalarSemantics)
88-
|| char.isASCII
89-
90-
var matched: Bool
91-
var next: Input.Index
92-
switch (isScalarSemantics, cc) {
93-
case (_, .anyGrapheme):
94-
next = input.index(after: currentPosition)
95-
case (_, .anyScalar):
96-
next = input.unicodeScalars.index(after: currentPosition)
97-
case (true, _):
98-
next = input.unicodeScalars.index(after: currentPosition)
99-
case (false, _):
100-
next = input.index(after: currentPosition)
101-
}
102-
103-
switch cc {
104-
case .any, .anyGrapheme:
105-
matched = true
106-
case .anyScalar:
107-
if isScalarSemantics {
108-
matched = true
109-
} else {
110-
matched = input.isOnGraphemeClusterBoundary(next)
111-
}
112-
case .digit:
113-
if isScalarSemantics {
114-
matched = scalar.properties.numericType != nil && asciiCheck
115-
} else {
116-
matched = char.isNumber && asciiCheck
117-
}
118-
case .horizontalWhitespace:
119-
if isScalarSemantics {
120-
matched = scalar.isHorizontalWhitespace && asciiCheck
121-
} else {
122-
matched = char._isHorizontalWhitespace && asciiCheck
123-
}
124-
case .verticalWhitespace:
125-
if isScalarSemantics {
126-
matched = scalar.isNewline && asciiCheck
127-
} else {
128-
matched = char._isNewline && asciiCheck
129-
}
130-
case .newlineSequence:
131-
if isScalarSemantics {
132-
matched = scalar.isNewline && asciiCheck
133-
if matched && scalar == "\r"
134-
&& next != input.endIndex && input.unicodeScalars[next] == "\n" {
135-
// Match a full CR-LF sequence even in scalar semantics
136-
input.unicodeScalars.formIndex(after: &next)
137-
}
138-
} else {
139-
matched = char._isNewline && asciiCheck
140-
}
141-
case .whitespace:
142-
if isScalarSemantics {
143-
matched = scalar.properties.isWhitespace && asciiCheck
144-
} else {
145-
matched = char.isWhitespace && asciiCheck
146-
}
147-
case .word:
148-
if isScalarSemantics {
149-
matched = scalar.properties.isAlphabetic && asciiCheck
150-
} else {
151-
matched = char.isWordCharacter && asciiCheck
152-
}
153-
}
154-
155-
if isInverted {
156-
matched.toggle()
157-
}
158-
159-
guard matched else {
160-
return nil
161-
}
162-
return next
163-
}
164-
16533
func isAtStartOfLine(_ payload: AssertionPayload) -> Bool {
16634
if currentPosition == subjectBounds.lowerBound { return true }
16735
switch payload.semanticLevel {
@@ -171,7 +39,7 @@ extension Processor {
17139
return input.unicodeScalars[input.unicodeScalars.index(before: currentPosition)].isNewline
17240
}
17341
}
174-
42+
17543
func isAtEndOfLine(_ payload: AssertionPayload) -> Bool {
17644
if currentPosition == subjectBounds.upperBound { return true }
17745
switch payload.semanticLevel {
@@ -214,7 +82,7 @@ extension Processor {
21482
return isAtStartOfLine(payload)
21583
case .endOfLine:
21684
return isAtEndOfLine(payload)
217-
85+
21886
case .caretAnchor:
21987
if payload.anchorsMatchNewlines {
22088
return isAtStartOfLine(payload)
@@ -247,3 +115,159 @@ extension Processor {
247115
}
248116
}
249117
}
118+
119+
// MARK: Built-in character class matching
120+
121+
// Mentioned in ProgrammersManual.md, update docs if redesigned
122+
@_effects(releasenone)
123+
func _matchBuiltinCC(
124+
_ cc: _CharacterClassModel.Representation,
125+
in input: String,
126+
at currentPosition: String.Index,
127+
isInverted: Bool,
128+
isStrictASCII: Bool,
129+
isScalarSemantics: Bool
130+
) -> String.Index? {
131+
guard currentPosition < input.endIndex else {
132+
return nil
133+
}
134+
if case .definite(let result) = _quickMatchBuiltinCC(
135+
cc,
136+
in: input,
137+
at: currentPosition,
138+
isInverted: isInverted,
139+
isStrictASCII: isStrictASCII,
140+
isScalarSemantics: isScalarSemantics
141+
) {
142+
assert(result == _thoroughMatchBuiltinCC(
143+
cc,
144+
in: input,
145+
at: currentPosition,
146+
isInverted: isInverted,
147+
isStrictASCII: isStrictASCII,
148+
isScalarSemantics: isScalarSemantics))
149+
return result
150+
}
151+
return _thoroughMatchBuiltinCC(
152+
cc,
153+
in: input,
154+
at: currentPosition,
155+
isInverted: isInverted,
156+
isStrictASCII: isStrictASCII,
157+
isScalarSemantics: isScalarSemantics)
158+
}
159+
160+
// Mentioned in ProgrammersManual.md, update docs if redesigned
161+
@_effects(releasenone)
162+
@inline(__always)
163+
func _quickMatchBuiltinCC(
164+
_ cc: _CharacterClassModel.Representation,
165+
in input: String,
166+
at currentPosition: String.Index,
167+
isInverted: Bool,
168+
isStrictASCII: Bool,
169+
isScalarSemantics: Bool
170+
) -> QuickResult<String.Index?> {
171+
assert(currentPosition < input.endIndex)
172+
guard let (next, result) = input._quickMatch(
173+
cc, at: currentPosition, isScalarSemantics: isScalarSemantics
174+
) else {
175+
return .unknown
176+
}
177+
return .definite(result == isInverted ? nil : next)
178+
}
179+
180+
// Mentioned in ProgrammersManual.md, update docs if redesigned
181+
@_effects(releasenone)
182+
@inline(never)
183+
func _thoroughMatchBuiltinCC(
184+
_ cc: _CharacterClassModel.Representation,
185+
in input: String,
186+
at currentPosition: String.Index,
187+
isInverted: Bool,
188+
isStrictASCII: Bool,
189+
isScalarSemantics: Bool
190+
) -> String.Index? {
191+
assert(currentPosition < input.endIndex)
192+
let char = input[currentPosition]
193+
let scalar = input.unicodeScalars[currentPosition]
194+
195+
let asciiCheck = !isStrictASCII
196+
|| (scalar.isASCII && isScalarSemantics)
197+
|| char.isASCII
198+
199+
var matched: Bool
200+
var next: String.Index
201+
switch (isScalarSemantics, cc) {
202+
case (_, .anyGrapheme):
203+
next = input.index(after: currentPosition)
204+
case (_, .anyScalar):
205+
next = input.unicodeScalars.index(after: currentPosition)
206+
case (true, _):
207+
next = input.unicodeScalars.index(after: currentPosition)
208+
case (false, _):
209+
next = input.index(after: currentPosition)
210+
}
211+
212+
switch cc {
213+
case .any, .anyGrapheme:
214+
matched = true
215+
case .anyScalar:
216+
if isScalarSemantics {
217+
matched = true
218+
} else {
219+
matched = input.isOnGraphemeClusterBoundary(next)
220+
}
221+
case .digit:
222+
if isScalarSemantics {
223+
matched = scalar.properties.numericType != nil && asciiCheck
224+
} else {
225+
matched = char.isNumber && asciiCheck
226+
}
227+
case .horizontalWhitespace:
228+
if isScalarSemantics {
229+
matched = scalar.isHorizontalWhitespace && asciiCheck
230+
} else {
231+
matched = char._isHorizontalWhitespace && asciiCheck
232+
}
233+
case .verticalWhitespace:
234+
if isScalarSemantics {
235+
matched = scalar.isNewline && asciiCheck
236+
} else {
237+
matched = char._isNewline && asciiCheck
238+
}
239+
case .newlineSequence:
240+
if isScalarSemantics {
241+
matched = scalar.isNewline && asciiCheck
242+
if matched && scalar == "\r"
243+
&& next != input.endIndex && input.unicodeScalars[next] == "\n" {
244+
// Match a full CR-LF sequence even in scalar semantics
245+
input.unicodeScalars.formIndex(after: &next)
246+
}
247+
} else {
248+
matched = char._isNewline && asciiCheck
249+
}
250+
case .whitespace:
251+
if isScalarSemantics {
252+
matched = scalar.properties.isWhitespace && asciiCheck
253+
} else {
254+
matched = char.isWhitespace && asciiCheck
255+
}
256+
case .word:
257+
if isScalarSemantics {
258+
matched = scalar.properties.isAlphabetic && asciiCheck
259+
} else {
260+
matched = char.isWordCharacter && asciiCheck
261+
}
262+
}
263+
264+
if isInverted {
265+
matched.toggle()
266+
}
267+
268+
guard matched else {
269+
return nil
270+
}
271+
return next
272+
}
273+

Sources/_StringProcessing/Engine/MEQuantify.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@ extension Processor {
99
UnicodeScalar.init(_value: UInt32(payload.asciiChar)), true)
1010
case .builtin:
1111
// We only emit .quantify if it consumes a single character
12-
next = _doMatchBuiltinCC(
12+
next = _matchBuiltinCC(
1313
payload.builtin,
14+
in: input,
15+
at: currentPosition,
1416
isInverted: payload.builtinIsInverted,
1517
isStrictASCII: payload.builtinIsStrict,
1618
isScalarSemantics: false)

Sources/_StringProcessing/StringExtras.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,11 @@ extension String {
6363
guard next == endIndex || _isSub300StartingByte(utf8[next]) else {
6464
return nil
6565
}
66+
assert(self[idx] == "\r\n")
6667
return (first: base, next: next, crLF: true)
6768
}
6869

70+
assert(self[idx].isASCII && self[idx] != "\r\n")
6971
return (first: base, next: next, crLF: false)
7072
}
7173

0 commit comments

Comments
 (0)