Skip to content

Commit 081abef

Browse files
authored
Add support for v flag to regexp/sort-character-class-elements rule (#588)
1 parent eaed9d1 commit 081abef

File tree

4 files changed

+173
-68
lines changed

4 files changed

+173
-68
lines changed

.changeset/green-peaches-explode.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"eslint-plugin-regexp": minor
3+
---
4+
5+
Add support for v flag to `regexp/sort-character-class-elements` rule

docs/rules/sort-character-class-elements.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,14 @@ var foo = /[b-fa]/
4444
"\\d", // \d or \D
4545
"\\p", // \p{...} or \P{...}
4646
"*", // Others (A character or range of characters or an element you did not specify.)
47+
"\\q", // \q{...}
48+
"[]", // Nesting character class, or character class expression
4749
]
4850
}]
4951
}
5052
```
5153

52-
- `"order"` ... An array of your preferred order. The default is `["\\s", "\\w", "\\d", "\\p", "*",]`.
54+
- `"order"` ... An array of your preferred order. The default is `["\\s", "\\w", "\\d", "\\p", "*", "\\q", "[]"]`.
5355

5456
## :rocket: Version
5557

lib/rules/sort-character-class-elements.ts

+98-66
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,34 @@ import type { RegExpVisitor } from "@eslint-community/regexpp/visitor"
22
import type {
33
CharacterClass,
44
CharacterClassElement,
5+
ClassRangesCharacterClassElement,
56
UnicodePropertyCharacterSet,
7+
UnicodeSetsCharacterClassElement,
68
} from "@eslint-community/regexpp/ast"
79
import type { RegExpContext } from "../utils"
8-
import {
9-
CP_DIGIT_ZERO,
10-
CP_SPACE,
11-
createRule,
12-
defineRegexpVisitor,
13-
} from "../utils"
10+
import { createRule, defineRegexpVisitor } from "../utils"
1411
import { mention } from "../utils/mention"
12+
import type { ReadonlyFlags } from "regexp-ast-analysis"
13+
import { toUnicodeSet } from "regexp-ast-analysis"
14+
import type { ReadonlyWord } from "refa"
15+
import { getLexicographicallySmallest } from "../utils/lexicographically-smallest"
1516

16-
type CharacterClassElementKind = "\\w" | "\\d" | "\\s" | "\\p" | "*"
17+
type CharacterClassElementKind =
18+
| "\\w"
19+
| "\\d"
20+
| "\\s"
21+
| "\\p"
22+
| "*"
23+
| "\\q"
24+
| "[]"
1725
const DEFAULT_ORDER: CharacterClassElementKind[] = [
1826
"\\s",
1927
"\\w",
2028
"\\d",
2129
"\\p",
2230
"*",
31+
"\\q",
32+
"[]",
2333
]
2434

2535
/**
@@ -37,9 +47,46 @@ function getCharacterClassElementKind(
3747
? "\\s"
3848
: "\\p"
3949
}
50+
if (node.type === "ClassStringDisjunction") {
51+
return "\\q"
52+
}
53+
if (
54+
node.type === "CharacterClass" ||
55+
node.type === "ExpressionCharacterClass"
56+
) {
57+
return "[]"
58+
}
4059
return "*"
4160
}
4261

62+
/**
63+
* Return the lexicographically smallest string accepted by the given element.
64+
* If the class set is negate, the original value is used for calculation.
65+
*/
66+
function getLexicographicallySmallestFromElement(
67+
node: CharacterClassElement,
68+
flags: ReadonlyFlags,
69+
): ReadonlyWord {
70+
const us =
71+
node.type === "CharacterSet" && node.negate
72+
? toUnicodeSet({ ...node, negate: false }, flags)
73+
: toUnicodeSet(node, flags)
74+
return getLexicographicallySmallest(us) || []
75+
}
76+
77+
/**
78+
* Compare two strings of char sets by byte order.
79+
*/
80+
function compareWords(a: ReadonlyWord, b: ReadonlyWord): number {
81+
const l = Math.min(a.length, b.length)
82+
for (let i = 0; i < l; i++) {
83+
const aI = a[i]
84+
const bI = b[i]
85+
if (aI !== bI) return aI - bI
86+
}
87+
return a.length - b.length
88+
}
89+
4390
export default createRule("sort-character-class-elements", {
4491
meta: {
4592
docs: {
@@ -54,7 +101,17 @@ export default createRule("sort-character-class-elements", {
54101
properties: {
55102
order: {
56103
type: "array",
57-
items: { enum: ["\\w", "\\d", "\\s", "\\p", "*"] },
104+
items: {
105+
enum: [
106+
"\\s",
107+
"\\w",
108+
"\\d",
109+
"\\p",
110+
"*",
111+
"\\q",
112+
"[]",
113+
],
114+
},
58115
},
59116
},
60117
additionalProperties: false,
@@ -73,6 +130,8 @@ export default createRule("sort-character-class-elements", {
73130
"\\d"?: number
74131
"\\s"?: number
75132
"\\p"?: number
133+
"\\q"?: number
134+
"[]"?: number
76135
} = { "*": Infinity }
77136

78137
;(
@@ -84,6 +143,7 @@ export default createRule("sort-character-class-elements", {
84143

85144
function createVisitor({
86145
node,
146+
flags,
87147
getRegexpLocation,
88148
patternSource,
89149
}: RegExpContext): RegExpVisitor.Handlers {
@@ -93,10 +153,10 @@ export default createRule("sort-character-class-elements", {
93153
for (const next of ccNode.elements) {
94154
if (prevList.length) {
95155
const prev = prevList[0]
96-
if (!isValidOrder(prev, next)) {
156+
if (!isValidOrder(prev, next, flags)) {
97157
let moveTarget = prev
98158
for (const p of prevList) {
99-
if (isValidOrder(p, next)) {
159+
if (isValidOrder(p, next, flags)) {
100160
break
101161
} else {
102162
moveTarget = p
@@ -144,6 +204,7 @@ export default createRule("sort-character-class-elements", {
144204
function isValidOrder(
145205
prev: CharacterClassElement,
146206
next: CharacterClassElement,
207+
flags: ReadonlyFlags,
147208
) {
148209
const prevKind = getCharacterClassElementKind(prev)
149210
const nextKind = getCharacterClassElementKind(next)
@@ -154,41 +215,33 @@ export default createRule("sort-character-class-elements", {
154215
} else if (prevOrder > nextOrder) {
155216
return false
156217
}
157-
if (prev.type === "CharacterSet" && prev.kind === "property") {
158-
if (next.type === "CharacterSet") {
159-
if (next.kind === "property") {
160-
return isValidOrderForUnicodePropertyCharacterSet(
161-
prev,
162-
next,
163-
)
164-
}
165-
// e.g. /[\p{ASCII}\d]/
166-
return false
167-
}
168-
// e.g. /[\p{ASCII}a]/
218+
219+
const prevOrderShortCircuit = DEFAULT_ORDER.indexOf(prevKind)
220+
const nextOrderShortCircuit = DEFAULT_ORDER.indexOf(nextKind)
221+
if (prevOrderShortCircuit < nextOrderShortCircuit) {
169222
return true
170-
} else if (
223+
} else if (prevOrderShortCircuit > nextOrderShortCircuit) {
224+
return false
225+
}
226+
227+
if (
228+
prev.type === "CharacterSet" &&
229+
prev.kind === "property" &&
171230
next.type === "CharacterSet" &&
172231
next.kind === "property"
173232
) {
174-
if (prev.type === "CharacterSet") {
175-
// e.g. /[\d\p{ASCII}]/
176-
return true
177-
}
178-
// e.g. /[a\p{ASCII}]/
179-
return false
233+
return isValidOrderForUnicodePropertyCharacterSet(prev, next)
180234
}
181-
if (prev.type === "CharacterSet" && next.type === "CharacterSet") {
182-
if (prev.kind === "word" && next.kind === "digit") {
183-
return true
184-
}
185-
if (prev.kind === "digit" && next.kind === "word") {
186-
return false
187-
}
188-
}
189-
const prevCP = getTargetCodePoint(prev)
190-
const nextCP = getTargetCodePoint(next)
191-
if (prevCP <= nextCP) {
235+
236+
const prevWord = getLexicographicallySmallestFromElement(
237+
prev,
238+
flags,
239+
)
240+
const nextWord = getLexicographicallySmallestFromElement(
241+
next,
242+
flags,
243+
)
244+
if (compareWords(prevWord, nextWord) <= 0) {
192245
return true
193246
}
194247
return false
@@ -218,29 +271,6 @@ export default createRule("sort-character-class-elements", {
218271
return true
219272
}
220273

221-
/**
222-
* Gets the target code point for a given element.
223-
*/
224-
function getTargetCodePoint(
225-
node: Exclude<CharacterClassElement, UnicodePropertyCharacterSet>,
226-
) {
227-
if (node.type === "CharacterSet") {
228-
if (node.kind === "digit" || node.kind === "word") {
229-
return CP_DIGIT_ZERO
230-
}
231-
if (node.kind === "space") {
232-
return CP_SPACE
233-
}
234-
return Infinity
235-
}
236-
if (node.type === "CharacterClassRange") {
237-
return node.min.value
238-
}
239-
// FIXME: TS Error
240-
// @ts-expect-error -- FIXME
241-
return node.value
242-
}
243-
244274
return defineRegexpVisitor(context, {
245275
createVisitor,
246276
})
@@ -254,9 +284,11 @@ function escapeRaw(node: CharacterClassElement, target: CharacterClassElement) {
254284
let raw = node.raw
255285
if (raw.startsWith("-")) {
256286
const parent = target.parent as CharacterClass
257-
// FIXME: TS Error
258-
// @ts-expect-error -- FIXME
259-
const prev = parent.elements[parent.elements.indexOf(target) - 1]
287+
const elements: (
288+
| UnicodeSetsCharacterClassElement
289+
| ClassRangesCharacterClassElement
290+
)[] = parent.elements
291+
const prev = elements[elements.indexOf(target) - 1]
260292
if (
261293
prev &&
262294
(prev.type === "Character" || prev.type === "CharacterSet")

tests/lib/rules/sort-character-class-elements.ts

+67-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import rule from "../../../lib/rules/sort-character-class-elements"
33

44
const tester = new RuleTester({
55
parserOptions: {
6-
ecmaVersion: 2020,
6+
ecmaVersion: "latest",
77
sourceType: "module",
88
},
99
})
@@ -38,6 +38,9 @@ tester.run("sort-character-class-elements", rule as any, {
3838
code: String.raw`/[\p{ASCII}a]/u`,
3939
options: [{ order: [] }],
4040
},
41+
String.raw`/[a\q{a}[a][a--b]]/v`,
42+
String.raw`/[\q{a}\q{b}\q{c}]/v`,
43+
String.raw`/[\q{aa}\q{ab}\q{ac}]/v`,
4144
],
4245
invalid: [
4346
{
@@ -230,5 +233,68 @@ tester.run("sort-character-class-elements", rule as any, {
230233
"Expected character class elements to be in ascending order. '\t' should be before ' '.",
231234
],
232235
},
236+
{
237+
code: String.raw`/[[a--b][a]\q{a}a]/v`,
238+
output: String.raw`/[\q{a}[a--b][a]a]/v`,
239+
errors: [
240+
"Expected character class elements to be in ascending order. '\\q{a}' should be before '[a--b]'.",
241+
"Expected character class elements to be in ascending order. 'a' should be before '[a--b]'.",
242+
],
243+
},
244+
{
245+
code: String.raw`/[\q{a}[a--b][a]a]/v`,
246+
output: String.raw`/[a\q{a}[a--b][a]]/v`,
247+
errors: [
248+
"Expected character class elements to be in ascending order. 'a' should be before '\\q{a}'.",
249+
],
250+
},
251+
{
252+
code: String.raw`/[[b--c][a]]/v`,
253+
output: String.raw`/[[a][b--c]]/v`,
254+
errors: [
255+
"Expected character class elements to be in ascending order. '[a]' should be before '[b--c]'.",
256+
],
257+
},
258+
{
259+
code: String.raw`/[[a]\q{a}]/v; /[\q{a}a]/v; /[[b-c]\q{a}]/v; /[[b-c][a]]/v;`,
260+
output: String.raw`/[\q{a}[a]]/v; /[a\q{a}]/v; /[\q{a}[b-c]]/v; /[[a][b-c]]/v;`,
261+
options: [{ order: [] }],
262+
errors: [
263+
"Expected character class elements to be in ascending order. '\\q{a}' should be before '[a]'.",
264+
"Expected character class elements to be in ascending order. 'a' should be before '\\q{a}'.",
265+
"Expected character class elements to be in ascending order. '\\q{a}' should be before '[b-c]'.",
266+
"Expected character class elements to be in ascending order. '[a]' should be before '[b-c]'.",
267+
],
268+
},
269+
{
270+
code: String.raw`/[\q{c}\q{b}\q{a}]/v`,
271+
output: String.raw`/[\q{b}\q{c}\q{a}]/v`,
272+
errors: [
273+
"Expected character class elements to be in ascending order. '\\q{b}' should be before '\\q{c}'.",
274+
"Expected character class elements to be in ascending order. '\\q{a}' should be before '\\q{c}'.",
275+
],
276+
},
277+
{
278+
code: String.raw`/[\q{b}\q{c}\q{a}]/v`,
279+
output: String.raw`/[\q{a}\q{b}\q{c}]/v`,
280+
errors: [
281+
"Expected character class elements to be in ascending order. '\\q{a}' should be before '\\q{b}'.",
282+
],
283+
},
284+
{
285+
code: String.raw`/[\q{ac}\q{ab}\q{aa}]/v`,
286+
output: String.raw`/[\q{ab}\q{ac}\q{aa}]/v`,
287+
errors: [
288+
"Expected character class elements to be in ascending order. '\\q{ab}' should be before '\\q{ac}'.",
289+
"Expected character class elements to be in ascending order. '\\q{aa}' should be before '\\q{ac}'.",
290+
],
291+
},
292+
{
293+
code: String.raw`/[\q{ab}\q{ac}\q{aa}]/v`,
294+
output: String.raw`/[\q{aa}\q{ab}\q{ac}]/v`,
295+
errors: [
296+
"Expected character class elements to be in ascending order. '\\q{aa}' should be before '\\q{ab}'.",
297+
],
298+
},
233299
],
234300
})

0 commit comments

Comments
 (0)