Skip to content

Add support for v flag to regexp/no-dupe-characters-character-class rule #608

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/friendly-walls-reply.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"eslint-plugin-regexp": minor
---

Add support for v flag to `regexp/no-dupe-characters-character-class` rule
108 changes: 63 additions & 45 deletions lib/rules/no-dupe-characters-character-class.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ import type {
CharacterClassRange,
CharacterSet,
AnyCharacterSet,
UnicodeSetsCharacterClass,
ClassStringDisjunction,
ExpressionCharacterClass,
} from "@eslint-community/regexpp/ast"
import type { RegExpContext } from "../utils"
import {
Expand All @@ -20,8 +23,9 @@ import {
import type { CharRange, CharSet } from "refa"
import { JS } from "refa"
import type { ReadonlyFlags } from "regexp-ast-analysis"
import { toCharSet } from "regexp-ast-analysis"
import { toCharSet, toUnicodeSet } from "regexp-ast-analysis"
import { mentionChar } from "../utils/mention"
import { assertNever } from "../utils/util"

interface Grouping {
duplicates: {
Expand All @@ -30,7 +34,13 @@ interface Grouping {
}[]
characters: Character[]
characterRanges: CharacterClassRange[]
characterSets: (EscapeCharacterSet | UnicodePropertyCharacterSet)[]
characterSetAndClasses: (
| EscapeCharacterSet
| UnicodePropertyCharacterSet
| UnicodeSetsCharacterClass
| ClassStringDisjunction
| ExpressionCharacterClass
)[]
}

/**
Expand All @@ -44,9 +54,13 @@ function groupElements(
const duplicates: Grouping["duplicates"] = []
const characters = new Map<number, Character>()
const characterRanges = new Map<string, CharacterClassRange>()
const characterSets = new Map<
const characterSetAndClasses = new Map<
string,
EscapeCharacterSet | UnicodePropertyCharacterSet
| EscapeCharacterSet
| UnicodePropertyCharacterSet
| UnicodeSetsCharacterClass
| ClassStringDisjunction
| ExpressionCharacterClass
>()

/**
Expand All @@ -68,27 +82,32 @@ function groupElements(
}

for (const e of elements) {
// FIXME: TS Error
// @ts-expect-error -- FIXME
const charSet = toCharSet(e, flags)

if (e.type === "Character") {
const charSet = toCharSet(e, flags)
const key = charSet.ranges[0].min
addToGroup(characters, key, e)
} else if (e.type === "CharacterClassRange") {
const charSet = toCharSet(e, flags)
const key = buildRangeKey(charSet)
addToGroup(characterRanges, key, e)
} else if (e.type === "CharacterSet") {
} else if (
e.type === "CharacterSet" ||
e.type === "CharacterClass" ||
e.type === "ClassStringDisjunction" ||
e.type === "ExpressionCharacterClass"
) {
const key = e.raw
addToGroup(characterSets, key, e)
addToGroup(characterSetAndClasses, key, e)
} else {
assertNever(e)
}
}

return {
duplicates,
characters: [...characters.values()],
characterRanges: [...characterRanges.values()],
characterSets: [...characterSets.values()],
characterSetAndClasses: [...characterSetAndClasses.values()],
}

function buildRangeKey(rangeCharSet: CharSet) {
Expand Down Expand Up @@ -197,7 +216,10 @@ export default createRule("no-dupe-characters-character-class", {
subsetElement: CharacterClassElement,
element:
| Exclude<CharacterSet, AnyCharacterSet>
| CharacterClassRange,
| CharacterClassRange
| UnicodeSetsCharacterClass
| ClassStringDisjunction
| ExpressionCharacterClass,
) {
const { node, getRegexpLocation } = regexpContext

Expand Down Expand Up @@ -254,9 +276,12 @@ export default createRule("no-dupe-characters-character-class", {
duplicates,
characters,
characterRanges,
characterSets,
characterSetAndClasses,
} = groupElements(ccNode.elements, flags)
const rangesAndSets = [...characterRanges, ...characterSets]
const elementsOtherThanCharacter = [
...characterRanges,
...characterSetAndClasses,
]

// keep track of all reported subset elements
const subsets = new Set<CharacterClassElement>()
Expand All @@ -269,10 +294,10 @@ export default createRule("no-dupe-characters-character-class", {

// report characters that are already matched by some range or set
for (const char of characters) {
for (const other of rangesAndSets) {
// FIXME: TS Error
// @ts-expect-error -- FIXME
if (toCharSet(other, flags).has(char.value)) {
for (const other of elementsOtherThanCharacter) {
if (
toUnicodeSet(other, flags).chars.has(char.value)
) {
reportSubset(regexpContext, char, other)
subsets.add(char)
break
Expand All @@ -281,19 +306,15 @@ export default createRule("no-dupe-characters-character-class", {
}

// report character ranges and sets that are already matched by some range or set
for (const element of rangesAndSets) {
for (const other of rangesAndSets) {
for (const element of elementsOtherThanCharacter) {
for (const other of elementsOtherThanCharacter) {
if (element === other || subsets.has(other)) {
continue
}

if (
// FIXME: TS Error
// @ts-expect-error -- FIXME
toCharSet(element, flags).isSubsetOf(
// FIXME: TS Error
// @ts-expect-error -- FIXME
toCharSet(other, flags),
toUnicodeSet(element, flags).isSubsetOf(
toUnicodeSet(other, flags),
)
) {
reportSubset(regexpContext, element, other)
Expand All @@ -305,34 +326,28 @@ export default createRule("no-dupe-characters-character-class", {

// character ranges and sets might be a subset of a combination of other elements
// e.g. `b-d` is a subset of `a-cd-f`
const characterTotal = toCharSet(
const characterTotal = toUnicodeSet(
characters.filter((c) => !subsets.has(c)),
flags,
)
for (const element of rangesAndSets) {
for (const element of elementsOtherThanCharacter) {
if (subsets.has(element)) {
continue
}

const totalOthers = characterTotal.union(
...rangesAndSets
...elementsOtherThanCharacter
.filter((e) => !subsets.has(e) && e !== element)
// FIXME: TS Error
// @ts-expect-error -- FIXME
.map((e) => toCharSet(e, flags)),
.map((e) => toUnicodeSet(e, flags)),
)

// FIXME: TS Error
// @ts-expect-error -- FIXME
const elementCharSet = toCharSet(element, flags)
const elementCharSet = toUnicodeSet(element, flags)
if (elementCharSet.isSubsetOf(totalOthers)) {
const superSetElements = ccNode.elements
.filter((e) => !subsets.has(e) && e !== element)
.filter(
(e) =>
// FIXME: TS Error
// @ts-expect-error -- FIXME
!toCharSet(e, flags).isDisjointWith(
!toUnicodeSet(e, flags).isDisjointWith(
elementCharSet,
),
)
Expand All @@ -354,18 +369,20 @@ export default createRule("no-dupe-characters-character-class", {
continue
}

for (let j = i + 1; j < rangesAndSets.length; j++) {
const other = rangesAndSets[j]
for (
let j = i + 1;
j < elementsOtherThanCharacter.length;
j++
) {
const other = elementsOtherThanCharacter[j]
if (range === other || subsets.has(other)) {
continue
}

const intersection = toCharSet(
const intersection = toUnicodeSet(
range,
flags,
// FIXME: TS Error
// @ts-expect-error -- FIXME
).intersect(toCharSet(other, flags))
).intersect(toUnicodeSet(other, flags))
if (intersection.isEmpty) {
continue
}
Expand All @@ -375,7 +392,8 @@ export default createRule("no-dupe-characters-character-class", {
// character range.
// there is no point in reporting overlaps that can't be fixed.
const interestingRanges =
intersection.ranges.filter(
// Range and string never intersect, so we can only check `chars`.
intersection.chars.ranges.filter(
(r) =>
inRange(r, range.min.value) ||
inRange(r, range.max.value),
Expand Down
23 changes: 22 additions & 1 deletion tests/lib/rules/no-dupe-characters-character-class.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import rule from "../../../lib/rules/no-dupe-characters-character-class"

const tester = new RuleTester({
parserOptions: {
ecmaVersion: 2020,
ecmaVersion: "latest",
sourceType: "module",
},
})
Expand All @@ -21,6 +21,7 @@ tester.run("no-dupe-characters-character-class", rule as any, {
"/[\\w\\p{L}]/u",
"/\\p{ASCII}abc/u",
String.raw`/[\u1fff-\u2020\s]/`,
String.raw`/[\q{a}\q{ab}\q{abc}[\w--[ab]][\w&&b]]/v`,
// error
"var r = new RegExp('[\\\\wA-Za-z0-9_][invalid');",
],
Expand Down Expand Up @@ -685,5 +686,25 @@ tester.run("no-dupe-characters-character-class", rule as any, {
output: null,
errors: 1,
},
// v flags
{
code: String.raw`/[\q{a}aa-c[\w--b][\w&&a]]/v`,
output: String.raw`/[aa-c[\w--b]]/v`,
errors: [
"'\\q{a}' is already included in 'a-c' (U+0061 - U+0063).",
"'a' (U+0061) is already included in 'a-c' (U+0061 - U+0063).",
"Unexpected overlap of 'a-c' (U+0061 - U+0063) and '[\\w--b]' was found '[ac]'.",
"'[\\w&&a]' is already included in 'a-c' (U+0061 - U+0063).",
],
},
{
code: String.raw`/[\q{abc}\q{abc|ab}[\q{abc}--b][\q{abc}&&\q{abc|ab}]]/v`,
output: String.raw`/[\q{abc|ab}[\q{abc}&&\q{abc|ab}]]/v`,
errors: [
"'\\q{abc}' is already included in '\\q{abc|ab}'.",
"'[\\q{abc}--b]' is already included in '\\q{abc|ab}'.",
"'[\\q{abc}&&\\q{abc|ab}]' is already included in '\\q{abc|ab}'.",
],
},
],
})