@@ -2,24 +2,34 @@ import type { RegExpVisitor } from "@eslint-community/regexpp/visitor"
2
2
import type {
3
3
CharacterClass ,
4
4
CharacterClassElement ,
5
+ ClassRangesCharacterClassElement ,
5
6
UnicodePropertyCharacterSet ,
7
+ UnicodeSetsCharacterClassElement ,
6
8
} from "@eslint-community/regexpp/ast"
7
9
import type { RegExpContext } from "../utils"
8
- import {
9
- CP_DIGIT_ZERO ,
10
- CP_SPACE ,
11
- createRule ,
12
- defineRegexpVisitor ,
13
- } from "../utils"
10
+ import { createRule , defineRegexpVisitor } from "../utils"
14
11
import { mention } from "../utils/mention"
12
+ import type { ReadonlyFlags } from "regexp-ast-analysis"
13
+ import { toUnicodeSet } from "regexp-ast-analysis"
14
+ import type { ReadonlyWord } from "refa"
15
+ import { getLexicographicallySmallest } from "../utils/lexicographically-smallest"
15
16
16
- type CharacterClassElementKind = "\\w" | "\\d" | "\\s" | "\\p" | "*"
17
+ type CharacterClassElementKind =
18
+ | "\\w"
19
+ | "\\d"
20
+ | "\\s"
21
+ | "\\p"
22
+ | "*"
23
+ | "\\q"
24
+ | "[]"
17
25
const DEFAULT_ORDER : CharacterClassElementKind [ ] = [
18
26
"\\s" ,
19
27
"\\w" ,
20
28
"\\d" ,
21
29
"\\p" ,
22
30
"*" ,
31
+ "\\q" ,
32
+ "[]" ,
23
33
]
24
34
25
35
/**
@@ -37,9 +47,46 @@ function getCharacterClassElementKind(
37
47
? "\\s"
38
48
: "\\p"
39
49
}
50
+ if ( node . type === "ClassStringDisjunction" ) {
51
+ return "\\q"
52
+ }
53
+ if (
54
+ node . type === "CharacterClass" ||
55
+ node . type === "ExpressionCharacterClass"
56
+ ) {
57
+ return "[]"
58
+ }
40
59
return "*"
41
60
}
42
61
62
+ /**
63
+ * Return the lexicographically smallest string accepted by the given element.
64
+ * If the class set is negate, the original value is used for calculation.
65
+ */
66
+ function getLexicographicallySmallestFromElement (
67
+ node : CharacterClassElement ,
68
+ flags : ReadonlyFlags ,
69
+ ) : ReadonlyWord {
70
+ const us =
71
+ node . type === "CharacterSet" && node . negate
72
+ ? toUnicodeSet ( { ...node , negate : false } , flags )
73
+ : toUnicodeSet ( node , flags )
74
+ return getLexicographicallySmallest ( us ) || [ ]
75
+ }
76
+
77
+ /**
78
+ * Compare two strings of char sets by byte order.
79
+ */
80
+ function compareWords ( a : ReadonlyWord , b : ReadonlyWord ) : number {
81
+ const l = Math . min ( a . length , b . length )
82
+ for ( let i = 0 ; i < l ; i ++ ) {
83
+ const aI = a [ i ]
84
+ const bI = b [ i ]
85
+ if ( aI !== bI ) return aI - bI
86
+ }
87
+ return a . length - b . length
88
+ }
89
+
43
90
export default createRule ( "sort-character-class-elements" , {
44
91
meta : {
45
92
docs : {
@@ -54,7 +101,17 @@ export default createRule("sort-character-class-elements", {
54
101
properties : {
55
102
order : {
56
103
type : "array" ,
57
- items : { enum : [ "\\w" , "\\d" , "\\s" , "\\p" , "*" ] } ,
104
+ items : {
105
+ enum : [
106
+ "\\s" ,
107
+ "\\w" ,
108
+ "\\d" ,
109
+ "\\p" ,
110
+ "*" ,
111
+ "\\q" ,
112
+ "[]" ,
113
+ ] ,
114
+ } ,
58
115
} ,
59
116
} ,
60
117
additionalProperties : false ,
@@ -73,6 +130,8 @@ export default createRule("sort-character-class-elements", {
73
130
"\\d" ?: number
74
131
"\\s" ?: number
75
132
"\\p" ?: number
133
+ "\\q" ?: number
134
+ "[]" ?: number
76
135
} = { "*" : Infinity }
77
136
78
137
; (
@@ -84,6 +143,7 @@ export default createRule("sort-character-class-elements", {
84
143
85
144
function createVisitor ( {
86
145
node,
146
+ flags,
87
147
getRegexpLocation,
88
148
patternSource,
89
149
} : RegExpContext ) : RegExpVisitor . Handlers {
@@ -93,10 +153,10 @@ export default createRule("sort-character-class-elements", {
93
153
for ( const next of ccNode . elements ) {
94
154
if ( prevList . length ) {
95
155
const prev = prevList [ 0 ]
96
- if ( ! isValidOrder ( prev , next ) ) {
156
+ if ( ! isValidOrder ( prev , next , flags ) ) {
97
157
let moveTarget = prev
98
158
for ( const p of prevList ) {
99
- if ( isValidOrder ( p , next ) ) {
159
+ if ( isValidOrder ( p , next , flags ) ) {
100
160
break
101
161
} else {
102
162
moveTarget = p
@@ -144,6 +204,7 @@ export default createRule("sort-character-class-elements", {
144
204
function isValidOrder (
145
205
prev : CharacterClassElement ,
146
206
next : CharacterClassElement ,
207
+ flags : ReadonlyFlags ,
147
208
) {
148
209
const prevKind = getCharacterClassElementKind ( prev )
149
210
const nextKind = getCharacterClassElementKind ( next )
@@ -154,41 +215,33 @@ export default createRule("sort-character-class-elements", {
154
215
} else if ( prevOrder > nextOrder ) {
155
216
return false
156
217
}
157
- if ( prev . type === "CharacterSet" && prev . kind === "property" ) {
158
- if ( next . type === "CharacterSet" ) {
159
- if ( next . kind === "property" ) {
160
- return isValidOrderForUnicodePropertyCharacterSet (
161
- prev ,
162
- next ,
163
- )
164
- }
165
- // e.g. /[\p{ASCII}\d]/
166
- return false
167
- }
168
- // e.g. /[\p{ASCII}a]/
218
+
219
+ const prevOrderShortCircuit = DEFAULT_ORDER . indexOf ( prevKind )
220
+ const nextOrderShortCircuit = DEFAULT_ORDER . indexOf ( nextKind )
221
+ if ( prevOrderShortCircuit < nextOrderShortCircuit ) {
169
222
return true
170
- } else if (
223
+ } else if ( prevOrderShortCircuit > nextOrderShortCircuit ) {
224
+ return false
225
+ }
226
+
227
+ if (
228
+ prev . type === "CharacterSet" &&
229
+ prev . kind === "property" &&
171
230
next . type === "CharacterSet" &&
172
231
next . kind === "property"
173
232
) {
174
- if ( prev . type === "CharacterSet" ) {
175
- // e.g. /[\d\p{ASCII}]/
176
- return true
177
- }
178
- // e.g. /[a\p{ASCII}]/
179
- return false
233
+ return isValidOrderForUnicodePropertyCharacterSet ( prev , next )
180
234
}
181
- if ( prev . type === "CharacterSet" && next . type === "CharacterSet" ) {
182
- if ( prev . kind === "word" && next . kind === "digit" ) {
183
- return true
184
- }
185
- if ( prev . kind === "digit" && next . kind === "word" ) {
186
- return false
187
- }
188
- }
189
- const prevCP = getTargetCodePoint ( prev )
190
- const nextCP = getTargetCodePoint ( next )
191
- if ( prevCP <= nextCP ) {
235
+
236
+ const prevWord = getLexicographicallySmallestFromElement (
237
+ prev ,
238
+ flags ,
239
+ )
240
+ const nextWord = getLexicographicallySmallestFromElement (
241
+ next ,
242
+ flags ,
243
+ )
244
+ if ( compareWords ( prevWord , nextWord ) <= 0 ) {
192
245
return true
193
246
}
194
247
return false
@@ -218,29 +271,6 @@ export default createRule("sort-character-class-elements", {
218
271
return true
219
272
}
220
273
221
- /**
222
- * Gets the target code point for a given element.
223
- */
224
- function getTargetCodePoint (
225
- node : Exclude < CharacterClassElement , UnicodePropertyCharacterSet > ,
226
- ) {
227
- if ( node . type === "CharacterSet" ) {
228
- if ( node . kind === "digit" || node . kind === "word" ) {
229
- return CP_DIGIT_ZERO
230
- }
231
- if ( node . kind === "space" ) {
232
- return CP_SPACE
233
- }
234
- return Infinity
235
- }
236
- if ( node . type === "CharacterClassRange" ) {
237
- return node . min . value
238
- }
239
- // FIXME: TS Error
240
- // @ts -expect-error -- FIXME
241
- return node . value
242
- }
243
-
244
274
return defineRegexpVisitor ( context , {
245
275
createVisitor,
246
276
} )
@@ -254,9 +284,11 @@ function escapeRaw(node: CharacterClassElement, target: CharacterClassElement) {
254
284
let raw = node . raw
255
285
if ( raw . startsWith ( "-" ) ) {
256
286
const parent = target . parent as CharacterClass
257
- // FIXME: TS Error
258
- // @ts -expect-error -- FIXME
259
- const prev = parent . elements [ parent . elements . indexOf ( target ) - 1 ]
287
+ const elements : (
288
+ | UnicodeSetsCharacterClassElement
289
+ | ClassRangesCharacterClassElement
290
+ ) [ ] = parent . elements
291
+ const prev = elements [ elements . indexOf ( target ) - 1 ]
260
292
if (
261
293
prev &&
262
294
( prev . type === "Character" || prev . type === "CharacterSet" )
0 commit comments