@@ -15,8 +15,10 @@ extension Processor {
15
15
isStrictASCII: Bool ,
16
16
isScalarSemantics: Bool
17
17
) -> Bool {
18
- guard let next = _doMatchBuiltinCC (
18
+ guard let next = _matchBuiltinCC (
19
19
cc,
20
+ in: input,
21
+ at: currentPosition,
20
22
isInverted: isInverted,
21
23
isStrictASCII: isStrictASCII,
22
24
isScalarSemantics: isScalarSemantics
@@ -28,140 +30,6 @@ extension Processor {
28
30
return true
29
31
}
30
32
31
- // Mentioned in ProgrammersManual.md, update docs if redesigned
32
- func _doMatchBuiltinCC(
33
- _ cc: _CharacterClassModel . Representation ,
34
- isInverted: Bool ,
35
- isStrictASCII: Bool ,
36
- isScalarSemantics: Bool
37
- ) -> Input . Index ? {
38
- if case . definite( let result) = _quickMatchBuiltinCC (
39
- cc,
40
- isInverted: isInverted,
41
- isStrictASCII: isStrictASCII,
42
- isScalarSemantics: isScalarSemantics
43
- ) {
44
- assert ( result == _thoroughMatchBuiltinCC (
45
- cc,
46
- isInverted: isInverted,
47
- isStrictASCII: isStrictASCII,
48
- isScalarSemantics: isScalarSemantics) )
49
- return result
50
- }
51
- return _thoroughMatchBuiltinCC (
52
- cc,
53
- isInverted: isInverted,
54
- isStrictASCII: isStrictASCII,
55
- isScalarSemantics: isScalarSemantics)
56
- }
57
-
58
- // Mentioned in ProgrammersManual.md, update docs if redesigned
59
- @inline ( __always)
60
- func _quickMatchBuiltinCC(
61
- _ cc: _CharacterClassModel . Representation ,
62
- isInverted: Bool ,
63
- isStrictASCII: Bool ,
64
- isScalarSemantics: Bool
65
- ) -> QuickResult < Input . Index ? > {
66
- guard let ( next, result) = input. _quickMatch (
67
- cc, at: currentPosition, isScalarSemantics: isScalarSemantics
68
- ) else {
69
- return . unknown
70
- }
71
- return . definite( result == isInverted ? nil : next)
72
- }
73
-
74
- // Mentioned in ProgrammersManual.md, update docs if redesigned
75
- @inline ( never)
76
- func _thoroughMatchBuiltinCC(
77
- _ cc: _CharacterClassModel . Representation ,
78
- isInverted: Bool ,
79
- isStrictASCII: Bool ,
80
- isScalarSemantics: Bool
81
- ) -> Input . Index ? {
82
- guard let char = load ( ) , let scalar = loadScalar ( ) else {
83
- return nil
84
- }
85
-
86
- let asciiCheck = !isStrictASCII
87
- || ( scalar. isASCII && isScalarSemantics)
88
- || char. isASCII
89
-
90
- var matched : Bool
91
- var next : Input . Index
92
- switch ( isScalarSemantics, cc) {
93
- case ( _, . anyGrapheme) :
94
- next = input. index ( after: currentPosition)
95
- case ( _, . anyScalar) :
96
- next = input. unicodeScalars. index ( after: currentPosition)
97
- case ( true , _) :
98
- next = input. unicodeScalars. index ( after: currentPosition)
99
- case ( false , _) :
100
- next = input. index ( after: currentPosition)
101
- }
102
-
103
- switch cc {
104
- case . any, . anyGrapheme:
105
- matched = true
106
- case . anyScalar:
107
- if isScalarSemantics {
108
- matched = true
109
- } else {
110
- matched = input. isOnGraphemeClusterBoundary ( next)
111
- }
112
- case . digit:
113
- if isScalarSemantics {
114
- matched = scalar. properties. numericType != nil && asciiCheck
115
- } else {
116
- matched = char. isNumber && asciiCheck
117
- }
118
- case . horizontalWhitespace:
119
- if isScalarSemantics {
120
- matched = scalar. isHorizontalWhitespace && asciiCheck
121
- } else {
122
- matched = char. _isHorizontalWhitespace && asciiCheck
123
- }
124
- case . verticalWhitespace:
125
- if isScalarSemantics {
126
- matched = scalar. isNewline && asciiCheck
127
- } else {
128
- matched = char. _isNewline && asciiCheck
129
- }
130
- case . newlineSequence:
131
- if isScalarSemantics {
132
- matched = scalar. isNewline && asciiCheck
133
- if matched && scalar == " \r "
134
- && next != input. endIndex && input. unicodeScalars [ next] == " \n " {
135
- // Match a full CR-LF sequence even in scalar semantics
136
- input. unicodeScalars. formIndex ( after: & next)
137
- }
138
- } else {
139
- matched = char. _isNewline && asciiCheck
140
- }
141
- case . whitespace:
142
- if isScalarSemantics {
143
- matched = scalar. properties. isWhitespace && asciiCheck
144
- } else {
145
- matched = char. isWhitespace && asciiCheck
146
- }
147
- case . word:
148
- if isScalarSemantics {
149
- matched = scalar. properties. isAlphabetic && asciiCheck
150
- } else {
151
- matched = char. isWordCharacter && asciiCheck
152
- }
153
- }
154
-
155
- if isInverted {
156
- matched. toggle ( )
157
- }
158
-
159
- guard matched else {
160
- return nil
161
- }
162
- return next
163
- }
164
-
165
33
func isAtStartOfLine( _ payload: AssertionPayload ) -> Bool {
166
34
if currentPosition == subjectBounds. lowerBound { return true }
167
35
switch payload. semanticLevel {
@@ -171,7 +39,7 @@ extension Processor {
171
39
return input. unicodeScalars [ input. unicodeScalars. index ( before: currentPosition) ] . isNewline
172
40
}
173
41
}
174
-
42
+
175
43
func isAtEndOfLine( _ payload: AssertionPayload ) -> Bool {
176
44
if currentPosition == subjectBounds. upperBound { return true }
177
45
switch payload. semanticLevel {
@@ -214,7 +82,7 @@ extension Processor {
214
82
return isAtStartOfLine ( payload)
215
83
case . endOfLine:
216
84
return isAtEndOfLine ( payload)
217
-
85
+
218
86
case . caretAnchor:
219
87
if payload. anchorsMatchNewlines {
220
88
return isAtStartOfLine ( payload)
@@ -247,3 +115,159 @@ extension Processor {
247
115
}
248
116
}
249
117
}
118
+
119
+ // MARK: Built-in character class matching
120
+
121
+ // Mentioned in ProgrammersManual.md, update docs if redesigned
122
+ @_effects ( releasenone)
123
+ func _matchBuiltinCC(
124
+ _ cc: _CharacterClassModel . Representation ,
125
+ in input: String ,
126
+ at currentPosition: String . Index ,
127
+ isInverted: Bool ,
128
+ isStrictASCII: Bool ,
129
+ isScalarSemantics: Bool
130
+ ) -> String . Index ? {
131
+ guard currentPosition < input. endIndex else {
132
+ return nil
133
+ }
134
+ if case . definite( let result) = _quickMatchBuiltinCC (
135
+ cc,
136
+ in: input,
137
+ at: currentPosition,
138
+ isInverted: isInverted,
139
+ isStrictASCII: isStrictASCII,
140
+ isScalarSemantics: isScalarSemantics
141
+ ) {
142
+ assert ( result == _thoroughMatchBuiltinCC (
143
+ cc,
144
+ in: input,
145
+ at: currentPosition,
146
+ isInverted: isInverted,
147
+ isStrictASCII: isStrictASCII,
148
+ isScalarSemantics: isScalarSemantics) )
149
+ return result
150
+ }
151
+ return _thoroughMatchBuiltinCC (
152
+ cc,
153
+ in: input,
154
+ at: currentPosition,
155
+ isInverted: isInverted,
156
+ isStrictASCII: isStrictASCII,
157
+ isScalarSemantics: isScalarSemantics)
158
+ }
159
+
160
+ // Mentioned in ProgrammersManual.md, update docs if redesigned
161
+ @_effects ( releasenone)
162
+ @inline ( __always)
163
+ func _quickMatchBuiltinCC(
164
+ _ cc: _CharacterClassModel . Representation ,
165
+ in input: String ,
166
+ at currentPosition: String . Index ,
167
+ isInverted: Bool ,
168
+ isStrictASCII: Bool ,
169
+ isScalarSemantics: Bool
170
+ ) -> QuickResult < String . Index ? > {
171
+ assert ( currentPosition < input. endIndex)
172
+ guard let ( next, result) = input. _quickMatch (
173
+ cc, at: currentPosition, isScalarSemantics: isScalarSemantics
174
+ ) else {
175
+ return . unknown
176
+ }
177
+ return . definite( result == isInverted ? nil : next)
178
+ }
179
+
180
+ // Mentioned in ProgrammersManual.md, update docs if redesigned
181
+ @_effects ( releasenone)
182
+ @inline ( never)
183
+ func _thoroughMatchBuiltinCC(
184
+ _ cc: _CharacterClassModel . Representation ,
185
+ in input: String ,
186
+ at currentPosition: String . Index ,
187
+ isInverted: Bool ,
188
+ isStrictASCII: Bool ,
189
+ isScalarSemantics: Bool
190
+ ) -> String . Index ? {
191
+ assert ( currentPosition < input. endIndex)
192
+ let char = input [ currentPosition]
193
+ let scalar = input. unicodeScalars [ currentPosition]
194
+
195
+ let asciiCheck = !isStrictASCII
196
+ || ( scalar. isASCII && isScalarSemantics)
197
+ || char. isASCII
198
+
199
+ var matched : Bool
200
+ var next : String . Index
201
+ switch ( isScalarSemantics, cc) {
202
+ case ( _, . anyGrapheme) :
203
+ next = input. index ( after: currentPosition)
204
+ case ( _, . anyScalar) :
205
+ next = input. unicodeScalars. index ( after: currentPosition)
206
+ case ( true , _) :
207
+ next = input. unicodeScalars. index ( after: currentPosition)
208
+ case ( false , _) :
209
+ next = input. index ( after: currentPosition)
210
+ }
211
+
212
+ switch cc {
213
+ case . any, . anyGrapheme:
214
+ matched = true
215
+ case . anyScalar:
216
+ if isScalarSemantics {
217
+ matched = true
218
+ } else {
219
+ matched = input. isOnGraphemeClusterBoundary ( next)
220
+ }
221
+ case . digit:
222
+ if isScalarSemantics {
223
+ matched = scalar. properties. numericType != nil && asciiCheck
224
+ } else {
225
+ matched = char. isNumber && asciiCheck
226
+ }
227
+ case . horizontalWhitespace:
228
+ if isScalarSemantics {
229
+ matched = scalar. isHorizontalWhitespace && asciiCheck
230
+ } else {
231
+ matched = char. _isHorizontalWhitespace && asciiCheck
232
+ }
233
+ case . verticalWhitespace:
234
+ if isScalarSemantics {
235
+ matched = scalar. isNewline && asciiCheck
236
+ } else {
237
+ matched = char. _isNewline && asciiCheck
238
+ }
239
+ case . newlineSequence:
240
+ if isScalarSemantics {
241
+ matched = scalar. isNewline && asciiCheck
242
+ if matched && scalar == " \r "
243
+ && next != input. endIndex && input. unicodeScalars [ next] == " \n " {
244
+ // Match a full CR-LF sequence even in scalar semantics
245
+ input. unicodeScalars. formIndex ( after: & next)
246
+ }
247
+ } else {
248
+ matched = char. _isNewline && asciiCheck
249
+ }
250
+ case . whitespace:
251
+ if isScalarSemantics {
252
+ matched = scalar. properties. isWhitespace && asciiCheck
253
+ } else {
254
+ matched = char. isWhitespace && asciiCheck
255
+ }
256
+ case . word:
257
+ if isScalarSemantics {
258
+ matched = scalar. properties. isAlphabetic && asciiCheck
259
+ } else {
260
+ matched = char. isWordCharacter && asciiCheck
261
+ }
262
+ }
263
+
264
+ if isInverted {
265
+ matched. toggle ( )
266
+ }
267
+
268
+ guard matched else {
269
+ return nil
270
+ }
271
+ return next
272
+ }
273
+
0 commit comments