Skip to content

Add support for v flag to regexp/sort-character-class-elements rule #588

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Sep 24, 2023
5 changes: 5 additions & 0 deletions .changeset/green-peaches-explode.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"eslint-plugin-regexp": minor
---

Add support for v flag to `regexp/sort-character-class-elements` rule
4 changes: 3 additions & 1 deletion docs/rules/sort-character-class-elements.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,14 @@ var foo = /[b-fa]/
"\\d", // \d or \D
"\\p", // \p{...} or \P{...}
"*", // Others (A character or range of characters or an element you did not specify.)
"\\q", // \q{...}
"[]", // Nesting character class, or character class expression
]
}]
}
```

- `"order"` ... An array of your preferred order. The default is `["\\s", "\\w", "\\d", "\\p", "*",]`.
- `"order"` ... An array of your preferred order. The default is `["\\s", "\\w", "\\d", "\\p", "*", "\\q", "[]"]`.

## :rocket: Version

Expand Down
164 changes: 98 additions & 66 deletions lib/rules/sort-character-class-elements.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,34 @@ import type { RegExpVisitor } from "@eslint-community/regexpp/visitor"
import type {
CharacterClass,
CharacterClassElement,
ClassRangesCharacterClassElement,
UnicodePropertyCharacterSet,
UnicodeSetsCharacterClassElement,
} from "@eslint-community/regexpp/ast"
import type { RegExpContext } from "../utils"
import {
CP_DIGIT_ZERO,
CP_SPACE,
createRule,
defineRegexpVisitor,
} from "../utils"
import { createRule, defineRegexpVisitor } from "../utils"
import { mention } from "../utils/mention"
import type { ReadonlyFlags } from "regexp-ast-analysis"
import { toUnicodeSet } from "regexp-ast-analysis"
import type { ReadonlyWord } from "refa"
import { getLexicographicallySmallest } from "../utils/lexicographically-smallest"

type CharacterClassElementKind = "\\w" | "\\d" | "\\s" | "\\p" | "*"
type CharacterClassElementKind =
| "\\w"
| "\\d"
| "\\s"
| "\\p"
| "*"
| "\\q"
| "[]"
const DEFAULT_ORDER: CharacterClassElementKind[] = [
"\\s",
"\\w",
"\\d",
"\\p",
"*",
"\\q",
"[]",
]

/**
Expand All @@ -37,9 +47,46 @@ function getCharacterClassElementKind(
? "\\s"
: "\\p"
}
if (node.type === "ClassStringDisjunction") {
return "\\q"
}
if (
node.type === "CharacterClass" ||
node.type === "ExpressionCharacterClass"
) {
return "[]"
}
return "*"
}

/**
* Return the lexicographically smallest string accepted by the given element.
* If the class set is negate, the original value is used for calculation.
*/
function getLexicographicallySmallestFromElement(
node: CharacterClassElement,
flags: ReadonlyFlags,
): ReadonlyWord {
const us =
node.type === "CharacterSet" && node.negate
? toUnicodeSet({ ...node, negate: false }, flags)
: toUnicodeSet(node, flags)
return getLexicographicallySmallest(us) || []
}

/**
* Compare two strings of char sets by byte order.
*/
function compareWords(a: ReadonlyWord, b: ReadonlyWord): number {
const l = Math.min(a.length, b.length)
for (let i = 0; i < l; i++) {
const aI = a[i]
const bI = b[i]
if (aI !== bI) return aI - bI
}
return a.length - b.length
}

export default createRule("sort-character-class-elements", {
meta: {
docs: {
Expand All @@ -54,7 +101,17 @@ export default createRule("sort-character-class-elements", {
properties: {
order: {
type: "array",
items: { enum: ["\\w", "\\d", "\\s", "\\p", "*"] },
items: {
enum: [
"\\s",
"\\w",
"\\d",
"\\p",
"*",
"\\q",
"[]",
],
},
},
},
additionalProperties: false,
Expand All @@ -73,6 +130,8 @@ export default createRule("sort-character-class-elements", {
"\\d"?: number
"\\s"?: number
"\\p"?: number
"\\q"?: number
"[]"?: number
} = { "*": Infinity }

;(
Expand All @@ -84,6 +143,7 @@ export default createRule("sort-character-class-elements", {

function createVisitor({
node,
flags,
getRegexpLocation,
patternSource,
}: RegExpContext): RegExpVisitor.Handlers {
Expand All @@ -93,10 +153,10 @@ export default createRule("sort-character-class-elements", {
for (const next of ccNode.elements) {
if (prevList.length) {
const prev = prevList[0]
if (!isValidOrder(prev, next)) {
if (!isValidOrder(prev, next, flags)) {
let moveTarget = prev
for (const p of prevList) {
if (isValidOrder(p, next)) {
if (isValidOrder(p, next, flags)) {
break
} else {
moveTarget = p
Expand Down Expand Up @@ -144,6 +204,7 @@ export default createRule("sort-character-class-elements", {
function isValidOrder(
prev: CharacterClassElement,
next: CharacterClassElement,
flags: ReadonlyFlags,
) {
const prevKind = getCharacterClassElementKind(prev)
const nextKind = getCharacterClassElementKind(next)
Expand All @@ -154,41 +215,33 @@ export default createRule("sort-character-class-elements", {
} else if (prevOrder > nextOrder) {
return false
}
if (prev.type === "CharacterSet" && prev.kind === "property") {
if (next.type === "CharacterSet") {
if (next.kind === "property") {
return isValidOrderForUnicodePropertyCharacterSet(
prev,
next,
)
}
// e.g. /[\p{ASCII}\d]/
return false
}
// e.g. /[\p{ASCII}a]/

const prevOrderShortCircuit = DEFAULT_ORDER.indexOf(prevKind)
const nextOrderShortCircuit = DEFAULT_ORDER.indexOf(nextKind)
if (prevOrderShortCircuit < nextOrderShortCircuit) {
return true
} else if (
} else if (prevOrderShortCircuit > nextOrderShortCircuit) {
return false
}

if (
prev.type === "CharacterSet" &&
prev.kind === "property" &&
next.type === "CharacterSet" &&
next.kind === "property"
) {
if (prev.type === "CharacterSet") {
// e.g. /[\d\p{ASCII}]/
return true
}
// e.g. /[a\p{ASCII}]/
return false
return isValidOrderForUnicodePropertyCharacterSet(prev, next)
}
if (prev.type === "CharacterSet" && next.type === "CharacterSet") {
if (prev.kind === "word" && next.kind === "digit") {
return true
}
if (prev.kind === "digit" && next.kind === "word") {
return false
}
}
const prevCP = getTargetCodePoint(prev)
const nextCP = getTargetCodePoint(next)
if (prevCP <= nextCP) {

const prevWord = getLexicographicallySmallestFromElement(
prev,
flags,
)
const nextWord = getLexicographicallySmallestFromElement(
next,
flags,
)
if (compareWords(prevWord, nextWord) <= 0) {
return true
}
return false
Expand Down Expand Up @@ -218,29 +271,6 @@ export default createRule("sort-character-class-elements", {
return true
}

/**
* Gets the target code point for a given element.
*/
function getTargetCodePoint(
node: Exclude<CharacterClassElement, UnicodePropertyCharacterSet>,
) {
if (node.type === "CharacterSet") {
if (node.kind === "digit" || node.kind === "word") {
return CP_DIGIT_ZERO
}
if (node.kind === "space") {
return CP_SPACE
}
return Infinity
}
if (node.type === "CharacterClassRange") {
return node.min.value
}
// FIXME: TS Error
// @ts-expect-error -- FIXME
return node.value
}

return defineRegexpVisitor(context, {
createVisitor,
})
Expand All @@ -254,9 +284,11 @@ function escapeRaw(node: CharacterClassElement, target: CharacterClassElement) {
let raw = node.raw
if (raw.startsWith("-")) {
const parent = target.parent as CharacterClass
// FIXME: TS Error
// @ts-expect-error -- FIXME
const prev = parent.elements[parent.elements.indexOf(target) - 1]
const elements: (
| UnicodeSetsCharacterClassElement
| ClassRangesCharacterClassElement
)[] = parent.elements
const prev = elements[elements.indexOf(target) - 1]
if (
prev &&
(prev.type === "Character" || prev.type === "CharacterSet")
Expand Down
68 changes: 67 additions & 1 deletion tests/lib/rules/sort-character-class-elements.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import rule from "../../../lib/rules/sort-character-class-elements"

const tester = new RuleTester({
parserOptions: {
ecmaVersion: 2020,
ecmaVersion: "latest",
sourceType: "module",
},
})
Expand Down Expand Up @@ -38,6 +38,9 @@ tester.run("sort-character-class-elements", rule as any, {
code: String.raw`/[\p{ASCII}a]/u`,
options: [{ order: [] }],
},
String.raw`/[a\q{a}[a][a--b]]/v`,
String.raw`/[\q{a}\q{b}\q{c}]/v`,
String.raw`/[\q{aa}\q{ab}\q{ac}]/v`,
],
invalid: [
{
Expand Down Expand Up @@ -230,5 +233,68 @@ tester.run("sort-character-class-elements", rule as any, {
"Expected character class elements to be in ascending order. '\t' should be before ' '.",
],
},
{
code: String.raw`/[[a--b][a]\q{a}a]/v`,
output: String.raw`/[\q{a}[a--b][a]a]/v`,
errors: [
"Expected character class elements to be in ascending order. '\\q{a}' should be before '[a--b]'.",
"Expected character class elements to be in ascending order. 'a' should be before '[a--b]'.",
],
},
{
code: String.raw`/[\q{a}[a--b][a]a]/v`,
output: String.raw`/[a\q{a}[a--b][a]]/v`,
errors: [
"Expected character class elements to be in ascending order. 'a' should be before '\\q{a}'.",
],
},
{
code: String.raw`/[[b--c][a]]/v`,
output: String.raw`/[[a][b--c]]/v`,
errors: [
"Expected character class elements to be in ascending order. '[a]' should be before '[b--c]'.",
],
},
{
code: String.raw`/[[a]\q{a}]/v; /[\q{a}a]/v; /[[b-c]\q{a}]/v; /[[b-c][a]]/v;`,
output: String.raw`/[\q{a}[a]]/v; /[a\q{a}]/v; /[\q{a}[b-c]]/v; /[[a][b-c]]/v;`,
options: [{ order: [] }],
errors: [
"Expected character class elements to be in ascending order. '\\q{a}' should be before '[a]'.",
"Expected character class elements to be in ascending order. 'a' should be before '\\q{a}'.",
"Expected character class elements to be in ascending order. '\\q{a}' should be before '[b-c]'.",
"Expected character class elements to be in ascending order. '[a]' should be before '[b-c]'.",
],
},
{
code: String.raw`/[\q{c}\q{b}\q{a}]/v`,
output: String.raw`/[\q{b}\q{c}\q{a}]/v`,
errors: [
"Expected character class elements to be in ascending order. '\\q{b}' should be before '\\q{c}'.",
"Expected character class elements to be in ascending order. '\\q{a}' should be before '\\q{c}'.",
],
},
{
code: String.raw`/[\q{b}\q{c}\q{a}]/v`,
output: String.raw`/[\q{a}\q{b}\q{c}]/v`,
errors: [
"Expected character class elements to be in ascending order. '\\q{a}' should be before '\\q{b}'.",
],
},
{
code: String.raw`/[\q{ac}\q{ab}\q{aa}]/v`,
output: String.raw`/[\q{ab}\q{ac}\q{aa}]/v`,
errors: [
"Expected character class elements to be in ascending order. '\\q{ab}' should be before '\\q{ac}'.",
"Expected character class elements to be in ascending order. '\\q{aa}' should be before '\\q{ac}'.",
],
},
{
code: String.raw`/[\q{ab}\q{ac}\q{aa}]/v`,
output: String.raw`/[\q{aa}\q{ab}\q{ac}]/v`,
errors: [
"Expected character class elements to be in ascending order. '\\q{aa}' should be before '\\q{ab}'.",
],
},
],
})