-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathprefer-predefined-assertion.ts
226 lines (207 loc) · 8.36 KB
/
prefer-predefined-assertion.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
import type {
CharacterClass,
CharacterSet,
ExpressionCharacterClass,
LookaroundAssertion,
} from "@eslint-community/regexpp/ast"
import type { RegExpVisitor } from "@eslint-community/regexpp/visitor"
import {
Chars,
getFirstCharAfter,
getMatchingDirectionFromAssertionKind,
invertMatchingDirection,
toUnicodeSet,
} from "regexp-ast-analysis"
import type { RegExpContext } from "../utils"
import { createRule, defineRegexpVisitor } from "../utils"
/**
* If the lookaround only consists of a single character, character set, or
* character class, then this single character will be returned.
*/
function getCharacters(
lookaround: LookaroundAssertion,
): CharacterSet | CharacterClass | ExpressionCharacterClass | null {
if (lookaround.alternatives.length === 1) {
const alt = lookaround.alternatives[0]
if (alt.elements.length === 1) {
const first = alt.elements[0]
if (
first.type === "CharacterSet" ||
first.type === "CharacterClass" ||
first.type === "ExpressionCharacterClass"
) {
return first
}
}
}
return null
}
export default createRule("prefer-predefined-assertion", {
meta: {
docs: {
description:
"prefer predefined assertion over equivalent lookarounds",
category: "Best Practices",
recommended: true,
},
fixable: "code",
schema: [],
messages: {
replace:
"This lookaround assertion can be replaced with {{kind}} ('{{expr}}').",
},
type: "suggestion", // "problem",
},
create(context) {
function createVisitor(
regexpContext: RegExpContext,
): RegExpVisitor.Handlers {
const { node, flags, getRegexpLocation, fixReplaceNode } =
regexpContext
const word = Chars.word(flags)
const nonWord = Chars.word(flags).negate()
// /\b/ == /(?<!\w)(?=\w)|(?<=\w)(?!\w)/
// /\B/ == /(?<=\w)(?=\w)|(?<!\w)(?!\w)/
/**
* Tries to replace the given assertion with a word boundary
* assertion
*/
function replaceWordAssertion(
aNode: LookaroundAssertion,
wordNegated: boolean,
): void {
const direction = getMatchingDirectionFromAssertionKind(
aNode.kind,
)
/**
* Whether the lookaround is equivalent to (?!\w) / (?<!\w) or (?=\w) / (?<=\w)
*/
let lookaroundNegated = aNode.negate
if (wordNegated) {
// if the lookaround only contains a \W, then we have to negate the lookaround, so it only
// contains a \w. This is only possible iff we know that the pattern requires at least one
// character after the lookaround (in the direction of the lookaround).
//
// Examples:
// (?=\W) == (?!\w|$) ; Here we need to eliminate the $ which can be done by proving that the
// pattern matches another character after the lookahead. Example:
// (?=\W).+ == (?!\w).+ ; Since we know that the lookahead is always followed by a dot, we
// eliminate the $ alternative because it will always reject.
// (?!\W).+ == (?=\w|$).+ == (?=\w).+
const after = getFirstCharAfter(aNode, direction, flags)
const hasNextCharacter = !after.edge
if (hasNextCharacter) {
// we can successfully negate the lookaround
lookaroundNegated = !lookaroundNegated
} else {
// we couldn't negate the \W, so it's not possible to convert the lookaround into a
// predefined assertion
return
}
}
const before = getFirstCharAfter(
aNode,
invertMatchingDirection(direction),
flags,
)
if (before.edge) {
// to do the branch elimination necessary, we need to know the previous/next character
return
}
let otherNegated
if (before.char.isSubsetOf(word)) {
// we can think of the previous/next character as \w
otherNegated = false
} else if (before.char.isSubsetOf(nonWord)) {
// we can think of the previous/next character as \W
otherNegated = true
} else {
// the previous/next character is a subset of neither \w nor \W, so we can't do anything here
return
}
let kind = undefined
let replacement = undefined
if (lookaroundNegated === otherNegated) {
// \B
kind = "a negated word boundary assertion"
replacement = "\\B"
} else {
// \b
kind = "a word boundary assertion"
replacement = "\\b"
}
if (kind && replacement) {
context.report({
node,
loc: getRegexpLocation(aNode),
messageId: "replace",
data: { kind, expr: replacement },
fix: fixReplaceNode(aNode, replacement),
})
}
}
/**
* Tries to replace the given assertion with a edge assertion
*/
function replaceEdgeAssertion(
aNode: LookaroundAssertion,
lineAssertion: boolean,
): void {
if (!aNode.negate) {
return
}
if (flags.multiline === lineAssertion) {
const replacement = aNode.kind === "lookahead" ? "$" : "^"
context.report({
node,
loc: getRegexpLocation(aNode),
messageId: "replace",
data: { kind: "an edge assertion", expr: replacement },
fix: fixReplaceNode(aNode, replacement),
})
}
}
return {
onAssertionEnter(aNode) {
if (
aNode.kind !== "lookahead" &&
aNode.kind !== "lookbehind"
) {
// this rule doesn't affect predefined assertions
return
}
const chars = getCharacters(aNode)
if (chars === null) {
return
}
if (chars.type === "CharacterSet") {
if (chars.kind === "word") {
replaceWordAssertion(aNode, chars.negate)
return
}
if (chars.kind === "any") {
replaceEdgeAssertion(aNode, !flags.dotAll)
return
}
}
const set = toUnicodeSet(chars, flags)
if (!set.accept.isEmpty) {
// the set contains strings, so it can't be replaced with a predefined assertion
return
}
const charSet = set.chars
if (charSet.isAll) {
replaceEdgeAssertion(aNode, false)
} else if (charSet.equals(word)) {
replaceWordAssertion(aNode, false)
} else if (charSet.equals(nonWord)) {
replaceWordAssertion(aNode, true)
}
},
}
}
return defineRegexpVisitor(context, {
createVisitor,
})
},
})