Skip to content

Commit 81f0153

Browse files
Add support for v flag to regexp/prefer-character-class (#619)
* Add support for `v` flag to `regexp/prefer-character-class` * Create clean-kids-mate.md
1 parent fbf590d commit 81f0153

7 files changed

+133
-152
lines changed

.changeset/clean-kids-mate.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"eslint-plugin-regexp": minor
3+
---
4+
5+
Add support for `v` flag to `regexp/prefer-character-class`

lib/rules/no-useless-character-class.ts

+3-11
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,13 @@ import type {
77
ExpressionCharacterClass,
88
UnicodeSetsCharacterClass,
99
} from "@eslint-community/regexpp/ast"
10+
import { RESERVED_DOUBLE_PUNCTUATOR_CHARS } from "../utils/unicode-set"
1011

1112
const ESCAPES_OUTSIDE_CHARACTER_CLASS = new Set("$()*+./?[{|")
1213
const ESCAPES_OUTSIDE_CHARACTER_CLASS_WITH_U = new Set([
1314
...ESCAPES_OUTSIDE_CHARACTER_CLASS,
1415
"}",
1516
])
16-
// A single character set of ClassSetReservedDoublePunctuator.
17-
// && !! ## $$ %% ** ++ ,, .. :: ;; << == >> ?? @@ ^^ `` ~~ are ClassSetReservedDoublePunctuator
18-
const REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR = new Set(
19-
"!#$%&*+,.:;<=>?@^`~",
20-
)
2117

2218
export default createRule("no-useless-character-class", {
2319
meta: {
@@ -217,9 +213,7 @@ export default createRule("no-useless-character-class", {
217213

218214
// Avoid [A&&[&]] => [A&&&]
219215
if (
220-
REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR.has(
221-
char,
222-
) &&
216+
RESERVED_DOUBLE_PUNCTUATOR_CHARS.has(char) &&
223217
// The previous character is the same
224218
pattern[ccNode.start - 1] === char
225219
) {
@@ -263,9 +257,7 @@ export default createRule("no-useless-character-class", {
263257

264258
// Avoid [A[&]&B] => [A&&B]
265259
return (
266-
REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR.has(
267-
char,
268-
) &&
260+
RESERVED_DOUBLE_PUNCTUATOR_CHARS.has(char) &&
269261
// The next character is the same
270262
pattern[ccNode.end] === char
271263
)

lib/rules/no-useless-escape.ts

+2-37
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,8 @@ import {
2525
CP_PIPE,
2626
CP_MINUS,
2727
canUnwrapped,
28-
CP_HASH,
29-
CP_PERCENT,
30-
CP_BAN,
31-
CP_AMP,
32-
CP_COMMA,
33-
CP_COLON,
34-
CP_SEMI,
35-
CP_LT,
36-
CP_EQ,
37-
CP_GT,
38-
CP_AT,
39-
CP_TILDE,
40-
CP_BACKTICK,
4128
} from "../utils"
29+
import { RESERVED_DOUBLE_PUNCTUATOR_CP } from "../utils/unicode-set"
4230

4331
const REGEX_CHAR_CLASS_ESCAPES = new Set([
4432
CP_BACK_SLASH, // \\
@@ -80,29 +68,6 @@ const POTENTIAL_ESCAPE_SEQUENCE_FOR_CHAR_CLASS = new Set([
8068
...POTENTIAL_ESCAPE_SEQUENCE,
8169
"q",
8270
])
83-
// A single character set of ClassSetReservedDoublePunctuator.
84-
// && !! ## $$ %% ** ++ ,, .. :: ;; << == >> ?? @@ ^^ `` ~~ are ClassSetReservedDoublePunctuator
85-
const REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR = new Set([
86-
CP_BAN, // !
87-
CP_HASH, // #
88-
CP_DOLLAR, // $
89-
CP_PERCENT, // %
90-
CP_AMP, // &
91-
CP_STAR, // *
92-
CP_PLUS, // +
93-
CP_COMMA, // ,
94-
CP_DOT, // .
95-
CP_COLON, // :
96-
CP_SEMI, // ;
97-
CP_LT, // <
98-
CP_EQ, // =
99-
CP_GT, // >
100-
CP_QUESTION, // ?
101-
CP_AT, // @
102-
CP_CARET, // ^
103-
CP_BACKTICK, // `
104-
CP_TILDE, // ~
105-
])
10671

10772
export default createRule("no-useless-escape", {
10873
meta: {
@@ -186,7 +151,7 @@ export default createRule("no-useless-escape", {
186151
}
187152
if (flags.unicodeSets) {
188153
if (
189-
REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR.has(
154+
RESERVED_DOUBLE_PUNCTUATOR_CP.has(
190155
cNode.value,
191156
)
192157
) {

lib/rules/prefer-character-class.ts

+85-74
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import type {
66
CharacterClass,
77
CharacterClassElement,
88
CharacterSet,
9-
Element,
9+
ExpressionCharacterClass,
1010
Group,
1111
LookaroundAssertion,
1212
Node,
@@ -17,12 +17,13 @@ import { createRule, defineRegexpVisitor } from "../utils"
1717
import type { CharSet } from "refa"
1818
import type { FirstConsumedChar, ReadonlyFlags } from "regexp-ast-analysis"
1919
import {
20-
toCharSet,
2120
getFirstConsumedChar,
2221
getMatchingDirection,
22+
toUnicodeSet,
2323
} from "regexp-ast-analysis"
2424
import type { Position, SourceLocation } from "estree"
2525
import { assertNever } from "../utils/util"
26+
import { RESERVED_DOUBLE_PUNCTUATOR_CHARS } from "../utils/unicode-set"
2627

2728
/**
2829
* Find the first index of an element that satisfies the given condition.
@@ -59,7 +60,12 @@ type RawAlternative = RawCharAlternative | RawNonCharAlternative
5960
interface RawCharAlternative {
6061
readonly isCharacter: true
6162
readonly alternative: Alternative
62-
readonly element: Character | CharacterSet | CharacterClass
63+
readonly char: CharSet
64+
readonly element:
65+
| Character
66+
| CharacterSet
67+
| CharacterClass
68+
| ExpressionCharacterClass
6369
}
6470
interface RawNonCharAlternative {
6571
readonly isCharacter: false
@@ -88,52 +94,54 @@ function elementsToCharacterClass(elements: CharElementArray): string {
8894
// Its ONLY job is to generate a valid character class from the given elements.
8995
// Optimizations can be done by another rule.
9096

91-
let result = "["
97+
const parts: string[] = []
9298

93-
elements.forEach((e, i) => {
99+
elements.forEach((e) => {
94100
switch (e.type) {
95101
case "Character":
96102
if (e.raw === "-") {
97-
if (i === 0 || i === elements.length - 1) {
98-
result += "-"
99-
} else {
100-
result += "\\-"
101-
}
102-
} else if (e.raw === "^") {
103-
if (i === 0) {
104-
result += "\\^"
105-
} else {
106-
result += "^"
107-
}
103+
parts.push("\\-")
108104
} else if (e.raw === "]") {
109-
result += "\\]"
105+
parts.push("\\]")
110106
} else {
111-
result += e.raw
107+
parts.push(e.raw)
112108
}
113109
break
114110

115111
case "CharacterClassRange":
116-
if (e.min.raw === "^" && i === 0) {
117-
result += `\\^-${e.max.raw}`
118-
} else {
119-
result += `${e.min.raw}-${e.max.raw}`
120-
}
121-
break
122-
123112
case "CharacterSet":
124-
result += e.raw
113+
case "CharacterClass":
114+
case "ClassStringDisjunction":
115+
case "ExpressionCharacterClass":
116+
parts.push(e.raw)
125117
break
126118

127119
default:
128-
// FIXME: TS Error
129-
// @ts-expect-error -- FIXME
130120
throw assertNever(e)
131121
}
132122
})
133123

134-
result += "]"
124+
if (parts.length > 0 && parts[0].startsWith("^")) {
125+
parts[0] = `\\${parts[0]}`
126+
}
127+
128+
// escape double punctuators for v flag
129+
for (let i = 1; i < parts.length; i++) {
130+
const prev = parts[i - 1]
131+
const curr = parts[i]
132+
133+
const pChar = prev.slice(-1)
134+
const cChar = curr[0]
135+
if (
136+
RESERVED_DOUBLE_PUNCTUATOR_CHARS.has(cChar) &&
137+
cChar === pChar &&
138+
!prev.endsWith(`\\${pChar}`)
139+
) {
140+
parts[i - 1] = `${prev.slice(0, -1)}\\${pChar}`
141+
}
142+
}
135143

136-
return result
144+
return `[${parts.join("")}]`
137145
}
138146

139147
/**
@@ -144,21 +152,23 @@ function categorizeRawAlts(
144152
alternatives: readonly Alternative[],
145153
flags: ReadonlyFlags,
146154
): RawAlternative[] {
147-
return alternatives.map<RawAlternative>((alternative) => {
155+
return alternatives.map((alternative): RawAlternative => {
148156
if (alternative.elements.length === 1) {
149157
const element = alternative.elements[0]
150158
if (
151159
element.type === "Character" ||
152160
element.type === "CharacterClass" ||
153-
element.type === "CharacterSet"
161+
element.type === "CharacterSet" ||
162+
element.type === "ExpressionCharacterClass"
154163
) {
155-
return {
156-
isCharacter: true,
157-
alternative,
158-
element,
159-
// FIXME: TS Error
160-
// @ts-expect-error -- FIXME
161-
char: toCharSet(element, flags),
164+
const set = toUnicodeSet(element, flags)
165+
if (set.accept.isEmpty) {
166+
return {
167+
isCharacter: true,
168+
alternative,
169+
char: set.chars,
170+
element,
171+
}
162172
}
163173
}
164174
}
@@ -189,23 +199,36 @@ function containsCharacterClass(alts: readonly RawAlternative[]): boolean {
189199
*
190200
* The returned array may be empty.
191201
*/
192-
function toCharacterClassElement(element: Element): CharElementArray | null {
193-
if (element.type === "CharacterSet") {
194-
// normal dot is not possible (it technically is but it's complicated)
195-
if (element.kind === "any") {
196-
return null
197-
}
198-
return [element]
199-
} else if (element.type === "CharacterClass") {
200-
if (element.negate) {
201-
// we can't (easily) combine negated character classes
202-
return null
203-
}
204-
return element.elements
205-
} else if (element.type === "Character") {
206-
return [element]
202+
function toCharacterClassElement(
203+
element: RawCharAlternative["element"],
204+
): CharElementArray | null {
205+
switch (element.type) {
206+
case "Character":
207+
return [element]
208+
209+
case "CharacterSet":
210+
if (element.kind === "any") {
211+
// normal dot is not possible (it technically is but it's complicated)
212+
return null
213+
}
214+
return [element]
215+
216+
case "CharacterClass":
217+
if (element.negate) {
218+
if (element.unicodeSets) {
219+
return [element]
220+
}
221+
// we can't (easily) combine negated character classes without the v flag
222+
return null
223+
}
224+
return element.elements
225+
226+
case "ExpressionCharacterClass":
227+
return [element]
228+
229+
default:
230+
return assertNever(element)
207231
}
208-
return null
209232
}
210233

211234
/**
@@ -215,16 +238,14 @@ function parseRawAlts(
215238
alternatives: readonly RawAlternative[],
216239
flags: ReadonlyFlags,
217240
): ParsedAlternative[] {
218-
return alternatives.map<ParsedAlternative>((a) => {
241+
return alternatives.map((a): ParsedAlternative => {
219242
if (a.isCharacter) {
220243
const elements = toCharacterClassElement(a.element)
221244
if (elements) {
222245
return {
223246
isCharacter: true,
224247
elements,
225-
// FIXME: TS Error
226-
// @ts-expect-error -- FIXME
227-
char: toCharSet(a.element, flags),
248+
char: a.char,
228249
raw: a.alternative.raw,
229250
}
230251
}
@@ -349,21 +370,14 @@ function findNonDisjointAlt(
349370
/**
350371
* Returns where the given alternative can accept any character.
351372
*/
352-
function totalIsAll(
353-
alternatives: readonly RawAlternative[],
354-
{ flags }: RegExpContext,
355-
): boolean {
373+
function totalIsAll(alternatives: readonly RawAlternative[]): boolean {
356374
let total: CharSet | undefined = undefined
357375
for (const a of alternatives) {
358376
if (a.isCharacter) {
359377
if (total === undefined) {
360-
// FIXME: TS Error
361-
// @ts-expect-error -- FIXME
362-
total = toCharSet(a.element, flags)
378+
total = a.char
363379
} else {
364-
// FIXME: TS Error
365-
// @ts-expect-error -- FIXME
366-
total = total.union(toCharSet(a.element, flags))
380+
total = total.union(a.char)
367381
}
368382
}
369383
}
@@ -506,10 +520,7 @@ export default createRule("prefer-character-class", {
506520
return
507521
}
508522

509-
if (
510-
alts.every((a) => a.isCharacter) &&
511-
totalIsAll(alts, regexpContext)
512-
) {
523+
if (alts.every((a) => a.isCharacter) && totalIsAll(alts)) {
513524
// This is the special case where:
514525
// 1) all alternatives are characters,
515526
// 2) there are at least 2 alternatives, and
@@ -538,7 +549,7 @@ export default createRule("prefer-character-class", {
538549
if (
539550
characterAltsCount >= minCharacterAlternatives ||
540551
containsCharacterClass(alts) ||
541-
totalIsAll(alts, regexpContext) ||
552+
totalIsAll(alts) ||
542553
findNonDisjointAlt(parsedAlts)
543554
) {
544555
optimizeCharacterAlts(parsedAlts)

0 commit comments

Comments
 (0)