@@ -59,10 +59,14 @@ fileprivate extension Compiler.ByteCodeGen {
59
59
emitAny ( )
60
60
61
61
case let . char( c) :
62
- try emitCharacter ( c)
62
+ emitCharacter ( c)
63
63
64
64
case let . scalar( s) :
65
- try emitScalar ( s)
65
+ if options. semanticLevel == . graphemeCluster {
66
+ emitCharacter ( Character ( s) )
67
+ } else {
68
+ emitMatchScalar ( s)
69
+ }
66
70
67
71
case let . assertion( kind) :
68
72
try emitAssertion ( kind. ast)
@@ -88,6 +92,34 @@ fileprivate extension Compiler.ByteCodeGen {
88
92
}
89
93
}
90
94
95
+ mutating func emitQuotedLiteral( _ s: String ) {
96
+ guard options. semanticLevel == . graphemeCluster else {
97
+ for char in s {
98
+ for scalar in char. unicodeScalars {
99
+ emitMatchScalar ( scalar)
100
+ }
101
+ }
102
+ return
103
+ }
104
+
105
+ // Fast path for eliding boundary checks for an all ascii quoted literal
106
+ if optimizationsEnabled && s. allSatisfy ( \. isASCII) {
107
+ let lastIdx = s. unicodeScalars. indices. last!
108
+ for idx in s. unicodeScalars. indices {
109
+ let boundaryCheck = idx == lastIdx
110
+ let scalar = s. unicodeScalars [ idx]
111
+ if options. isCaseInsensitive && scalar. properties. isCased {
112
+ builder. buildMatchScalarCaseInsensitive ( scalar, boundaryCheck: boundaryCheck)
113
+ } else {
114
+ builder. buildMatchScalar ( scalar, boundaryCheck: boundaryCheck)
115
+ }
116
+ }
117
+ return
118
+ }
119
+
120
+ for c in s { emitCharacter ( c) }
121
+ }
122
+
91
123
mutating func emitBackreference(
92
124
_ ref: AST . Reference
93
125
) throws {
@@ -245,41 +277,47 @@ fileprivate extension Compiler.ByteCodeGen {
245
277
}
246
278
}
247
279
248
- mutating func emitScalar( _ s: UnicodeScalar ) throws {
249
- // TODO: Native instruction buildMatchScalar(s)
250
- if options. isCaseInsensitive {
251
- // TODO: e.g. buildCaseInsensitiveMatchScalar(s)
252
- builder. buildConsume ( by: consumeScalar {
253
- $0. properties. lowercaseMapping == s. properties. lowercaseMapping
254
- } )
280
+ mutating func emitMatchScalar( _ s: UnicodeScalar ) {
281
+ assert ( options. semanticLevel == . unicodeScalar)
282
+ if options. isCaseInsensitive && s. properties. isCased {
283
+ builder. buildMatchScalarCaseInsensitive ( s, boundaryCheck: false )
255
284
} else {
256
- builder. buildConsume ( by: consumeScalar {
257
- $0 == s
258
- } )
285
+ builder. buildMatchScalar ( s, boundaryCheck: false )
259
286
}
260
287
}
261
288
262
- mutating func emitCharacter( _ c: Character ) throws {
263
- // Unicode scalar matches the specific scalars that comprise a character
289
+ mutating func emitCharacter( _ c: Character ) {
290
+ // Unicode scalar mode matches the specific scalars that comprise a character
264
291
if options. semanticLevel == . unicodeScalar {
265
292
for scalar in c. unicodeScalars {
266
- try emitScalar ( scalar)
293
+ emitMatchScalar ( scalar)
267
294
}
268
295
return
269
296
}
270
297
271
298
if options. isCaseInsensitive && c. isCased {
272
- // TODO: buildCaseInsensitiveMatch(c) or buildMatch(c, caseInsensitive: true)
273
- builder. buildConsume { input, bounds in
274
- let inputChar = input [ bounds. lowerBound] . lowercased ( )
275
- let matchChar = c. lowercased ( )
276
- return inputChar == matchChar
277
- ? input. index ( after: bounds. lowerBound)
278
- : nil
299
+ if optimizationsEnabled && c. isASCII {
300
+ // c.isCased ensures that c is not CR-LF,
301
+ // so we know that c is a single scalar
302
+ assert ( c. unicodeScalars. count == 1 )
303
+ builder. buildMatchScalarCaseInsensitive (
304
+ c. unicodeScalars. last!,
305
+ boundaryCheck: true )
306
+ } else {
307
+ builder. buildMatch ( c, isCaseInsensitive: true )
279
308
}
280
- } else {
281
- builder. buildMatch ( c)
309
+ return
282
310
}
311
+
312
+ if optimizationsEnabled && c. isASCII {
313
+ let lastIdx = c. unicodeScalars. indices. last!
314
+ for idx in c. unicodeScalars. indices {
315
+ builder. buildMatchScalar ( c. unicodeScalars [ idx] , boundaryCheck: idx == lastIdx)
316
+ }
317
+ return
318
+ }
319
+
320
+ builder. buildMatch ( c, isCaseInsensitive: false )
283
321
}
284
322
285
323
mutating func emitAny( ) {
@@ -717,11 +755,12 @@ fileprivate extension Compiler.ByteCodeGen {
717
755
_ ccc: DSLTree . CustomCharacterClass
718
756
) throws {
719
757
if let asciiBitset = ccc. asAsciiBitset ( options) ,
720
- options. semanticLevel == . graphemeCluster,
721
758
optimizationsEnabled {
722
- // future work: add a bit to .matchBitset to consume either a character
723
- // or a scalar so we can have this optimization in scalar mode
724
- builder. buildMatchAsciiBitset ( asciiBitset)
759
+ if options. semanticLevel == . unicodeScalar {
760
+ builder. buildScalarMatchAsciiBitset ( asciiBitset)
761
+ } else {
762
+ builder. buildMatchAsciiBitset ( asciiBitset)
763
+ }
725
764
} else {
726
765
let consumer = try ccc. generateConsumer ( options)
727
766
builder. buildConsume ( by: consumer)
@@ -798,45 +837,7 @@ fileprivate extension Compiler.ByteCodeGen {
798
837
try emitAtom ( a)
799
838
800
839
case let . quotedLiteral( s) :
801
- if options. semanticLevel == . graphemeCluster {
802
- if options. isCaseInsensitive {
803
- // TODO: buildCaseInsensitiveMatchSequence(c) or alternative
804
- builder. buildConsume { input, bounds in
805
- var iterator = s. makeIterator ( )
806
- var currentIndex = bounds. lowerBound
807
- while let ch = iterator. next ( ) {
808
- guard currentIndex < bounds. upperBound,
809
- ch. lowercased ( ) == input [ currentIndex] . lowercased ( )
810
- else { return nil }
811
- input. formIndex ( after: & currentIndex)
812
- }
813
- return currentIndex
814
- }
815
- } else {
816
- builder. buildMatchSequence ( s)
817
- }
818
- } else {
819
- builder. buildConsume {
820
- [ caseInsensitive = options. isCaseInsensitive] input, bounds in
821
- // TODO: Case folding
822
- var iterator = s. unicodeScalars. makeIterator ( )
823
- var currentIndex = bounds. lowerBound
824
- while let scalar = iterator. next ( ) {
825
- guard currentIndex < bounds. upperBound else { return nil }
826
- if caseInsensitive {
827
- if scalar. properties. lowercaseMapping != input. unicodeScalars [ currentIndex] . properties. lowercaseMapping {
828
- return nil
829
- }
830
- } else {
831
- if scalar != input. unicodeScalars [ currentIndex] {
832
- return nil
833
- }
834
- }
835
- input. unicodeScalars. formIndex ( after: & currentIndex)
836
- }
837
- return currentIndex
838
- }
839
- }
840
+ emitQuotedLiteral ( s)
840
841
841
842
case let . convertedRegexLiteral( n, _) :
842
843
return try emitNode ( n)
0 commit comments