@@ -6,7 +6,7 @@ import type {
6
6
CharacterClass ,
7
7
CharacterClassElement ,
8
8
CharacterSet ,
9
- Element ,
9
+ ExpressionCharacterClass ,
10
10
Group ,
11
11
LookaroundAssertion ,
12
12
Node ,
@@ -17,12 +17,13 @@ import { createRule, defineRegexpVisitor } from "../utils"
17
17
import type { CharSet } from "refa"
18
18
import type { FirstConsumedChar , ReadonlyFlags } from "regexp-ast-analysis"
19
19
import {
20
- toCharSet ,
21
20
getFirstConsumedChar ,
22
21
getMatchingDirection ,
22
+ toUnicodeSet ,
23
23
} from "regexp-ast-analysis"
24
24
import type { Position , SourceLocation } from "estree"
25
25
import { assertNever } from "../utils/util"
26
+ import { RESERVED_DOUBLE_PUNCTUATOR_CHARS } from "../utils/unicode-set"
26
27
27
28
/**
28
29
* Find the first index of an element that satisfies the given condition.
@@ -59,7 +60,12 @@ type RawAlternative = RawCharAlternative | RawNonCharAlternative
59
60
interface RawCharAlternative {
60
61
readonly isCharacter : true
61
62
readonly alternative : Alternative
62
- readonly element : Character | CharacterSet | CharacterClass
63
+ readonly char : CharSet
64
+ readonly element :
65
+ | Character
66
+ | CharacterSet
67
+ | CharacterClass
68
+ | ExpressionCharacterClass
63
69
}
64
70
interface RawNonCharAlternative {
65
71
readonly isCharacter : false
@@ -88,52 +94,54 @@ function elementsToCharacterClass(elements: CharElementArray): string {
88
94
// Its ONLY job is to generate a valid character class from the given elements.
89
95
// Optimizations can be done by another rule.
90
96
91
- let result = "["
97
+ const parts : string [ ] = [ ]
92
98
93
- elements . forEach ( ( e , i ) => {
99
+ elements . forEach ( ( e ) => {
94
100
switch ( e . type ) {
95
101
case "Character" :
96
102
if ( e . raw === "-" ) {
97
- if ( i === 0 || i === elements . length - 1 ) {
98
- result += "-"
99
- } else {
100
- result += "\\-"
101
- }
102
- } else if ( e . raw === "^" ) {
103
- if ( i === 0 ) {
104
- result += "\\^"
105
- } else {
106
- result += "^"
107
- }
103
+ parts . push ( "\\-" )
108
104
} else if ( e . raw === "]" ) {
109
- result += "\\]"
105
+ parts . push ( "\\]" )
110
106
} else {
111
- result += e . raw
107
+ parts . push ( e . raw )
112
108
}
113
109
break
114
110
115
111
case "CharacterClassRange" :
116
- if ( e . min . raw === "^" && i === 0 ) {
117
- result += `\\^-${ e . max . raw } `
118
- } else {
119
- result += `${ e . min . raw } -${ e . max . raw } `
120
- }
121
- break
122
-
123
112
case "CharacterSet" :
124
- result += e . raw
113
+ case "CharacterClass" :
114
+ case "ClassStringDisjunction" :
115
+ case "ExpressionCharacterClass" :
116
+ parts . push ( e . raw )
125
117
break
126
118
127
119
default :
128
- // FIXME: TS Error
129
- // @ts -expect-error -- FIXME
130
120
throw assertNever ( e )
131
121
}
132
122
} )
133
123
134
- result += "]"
124
+ if ( parts . length > 0 && parts [ 0 ] . startsWith ( "^" ) ) {
125
+ parts [ 0 ] = `\\${ parts [ 0 ] } `
126
+ }
127
+
128
+ // escape double punctuators for v flag
129
+ for ( let i = 1 ; i < parts . length ; i ++ ) {
130
+ const prev = parts [ i - 1 ]
131
+ const curr = parts [ i ]
132
+
133
+ const pChar = prev . slice ( - 1 )
134
+ const cChar = curr [ 0 ]
135
+ if (
136
+ RESERVED_DOUBLE_PUNCTUATOR_CHARS . has ( cChar ) &&
137
+ cChar === pChar &&
138
+ ! prev . endsWith ( `\\${ pChar } ` )
139
+ ) {
140
+ parts [ i - 1 ] = `${ prev . slice ( 0 , - 1 ) } \\${ pChar } `
141
+ }
142
+ }
135
143
136
- return result
144
+ return `[ ${ parts . join ( "" ) } ]`
137
145
}
138
146
139
147
/**
@@ -144,21 +152,23 @@ function categorizeRawAlts(
144
152
alternatives : readonly Alternative [ ] ,
145
153
flags : ReadonlyFlags ,
146
154
) : RawAlternative [ ] {
147
- return alternatives . map < RawAlternative > ( ( alternative ) => {
155
+ return alternatives . map ( ( alternative ) : RawAlternative => {
148
156
if ( alternative . elements . length === 1 ) {
149
157
const element = alternative . elements [ 0 ]
150
158
if (
151
159
element . type === "Character" ||
152
160
element . type === "CharacterClass" ||
153
- element . type === "CharacterSet"
161
+ element . type === "CharacterSet" ||
162
+ element . type === "ExpressionCharacterClass"
154
163
) {
155
- return {
156
- isCharacter : true ,
157
- alternative,
158
- element,
159
- // FIXME: TS Error
160
- // @ts -expect-error -- FIXME
161
- char : toCharSet ( element , flags ) ,
164
+ const set = toUnicodeSet ( element , flags )
165
+ if ( set . accept . isEmpty ) {
166
+ return {
167
+ isCharacter : true ,
168
+ alternative,
169
+ char : set . chars ,
170
+ element,
171
+ }
162
172
}
163
173
}
164
174
}
@@ -189,23 +199,36 @@ function containsCharacterClass(alts: readonly RawAlternative[]): boolean {
189
199
*
190
200
* The returned array may be empty.
191
201
*/
192
- function toCharacterClassElement ( element : Element ) : CharElementArray | null {
193
- if ( element . type === "CharacterSet" ) {
194
- // normal dot is not possible (it technically is but it's complicated)
195
- if ( element . kind === "any" ) {
196
- return null
197
- }
198
- return [ element ]
199
- } else if ( element . type === "CharacterClass" ) {
200
- if ( element . negate ) {
201
- // we can't (easily) combine negated character classes
202
- return null
203
- }
204
- return element . elements
205
- } else if ( element . type === "Character" ) {
206
- return [ element ]
202
+ function toCharacterClassElement (
203
+ element : RawCharAlternative [ "element" ] ,
204
+ ) : CharElementArray | null {
205
+ switch ( element . type ) {
206
+ case "Character" :
207
+ return [ element ]
208
+
209
+ case "CharacterSet" :
210
+ if ( element . kind === "any" ) {
211
+ // normal dot is not possible (it technically is but it's complicated)
212
+ return null
213
+ }
214
+ return [ element ]
215
+
216
+ case "CharacterClass" :
217
+ if ( element . negate ) {
218
+ if ( element . unicodeSets ) {
219
+ return [ element ]
220
+ }
221
+ // we can't (easily) combine negated character classes without the v flag
222
+ return null
223
+ }
224
+ return element . elements
225
+
226
+ case "ExpressionCharacterClass" :
227
+ return [ element ]
228
+
229
+ default :
230
+ return assertNever ( element )
207
231
}
208
- return null
209
232
}
210
233
211
234
/**
@@ -215,16 +238,14 @@ function parseRawAlts(
215
238
alternatives : readonly RawAlternative [ ] ,
216
239
flags : ReadonlyFlags ,
217
240
) : ParsedAlternative [ ] {
218
- return alternatives . map < ParsedAlternative > ( ( a ) => {
241
+ return alternatives . map ( ( a ) : ParsedAlternative => {
219
242
if ( a . isCharacter ) {
220
243
const elements = toCharacterClassElement ( a . element )
221
244
if ( elements ) {
222
245
return {
223
246
isCharacter : true ,
224
247
elements,
225
- // FIXME: TS Error
226
- // @ts -expect-error -- FIXME
227
- char : toCharSet ( a . element , flags ) ,
248
+ char : a . char ,
228
249
raw : a . alternative . raw ,
229
250
}
230
251
}
@@ -349,21 +370,14 @@ function findNonDisjointAlt(
349
370
/**
350
371
* Returns where the given alternative can accept any character.
351
372
*/
352
- function totalIsAll (
353
- alternatives : readonly RawAlternative [ ] ,
354
- { flags } : RegExpContext ,
355
- ) : boolean {
373
+ function totalIsAll ( alternatives : readonly RawAlternative [ ] ) : boolean {
356
374
let total : CharSet | undefined = undefined
357
375
for ( const a of alternatives ) {
358
376
if ( a . isCharacter ) {
359
377
if ( total === undefined ) {
360
- // FIXME: TS Error
361
- // @ts -expect-error -- FIXME
362
- total = toCharSet ( a . element , flags )
378
+ total = a . char
363
379
} else {
364
- // FIXME: TS Error
365
- // @ts -expect-error -- FIXME
366
- total = total . union ( toCharSet ( a . element , flags ) )
380
+ total = total . union ( a . char )
367
381
}
368
382
}
369
383
}
@@ -506,10 +520,7 @@ export default createRule("prefer-character-class", {
506
520
return
507
521
}
508
522
509
- if (
510
- alts . every ( ( a ) => a . isCharacter ) &&
511
- totalIsAll ( alts , regexpContext )
512
- ) {
523
+ if ( alts . every ( ( a ) => a . isCharacter ) && totalIsAll ( alts ) ) {
513
524
// This is the special case where:
514
525
// 1) all alternatives are characters,
515
526
// 2) there are at least 2 alternatives, and
@@ -538,7 +549,7 @@ export default createRule("prefer-character-class", {
538
549
if (
539
550
characterAltsCount >= minCharacterAlternatives ||
540
551
containsCharacterClass ( alts ) ||
541
- totalIsAll ( alts , regexpContext ) ||
552
+ totalIsAll ( alts ) ||
542
553
findNonDisjointAlt ( parsedAlts )
543
554
) {
544
555
optimizeCharacterAlts ( parsedAlts )
0 commit comments