diff --git a/.changeset/green-peaches-explode.md b/.changeset/green-peaches-explode.md new file mode 100644 index 000000000..095910fe8 --- /dev/null +++ b/.changeset/green-peaches-explode.md @@ -0,0 +1,5 @@ +--- +"eslint-plugin-regexp": minor +--- + +Add support for v flag to `regexp/sort-character-class-elements` rule diff --git a/docs/rules/sort-character-class-elements.md b/docs/rules/sort-character-class-elements.md index 8942ef890..fa487edbe 100644 --- a/docs/rules/sort-character-class-elements.md +++ b/docs/rules/sort-character-class-elements.md @@ -44,12 +44,14 @@ var foo = /[b-fa]/ "\\d", // \d or \D "\\p", // \p{...} or \P{...} "*", // Others (A character or range of characters or an element you did not specify.) + "\\q", // \q{...} + "[]", // Nesting character class, or character class expression ] }] } ``` -- `"order"` ... An array of your preferred order. The default is `["\\s", "\\w", "\\d", "\\p", "*",]`. +- `"order"` ... An array of your preferred order. The default is `["\\s", "\\w", "\\d", "\\p", "*", "\\q", "[]"]`. ## :rocket: Version diff --git a/lib/rules/sort-character-class-elements.ts b/lib/rules/sort-character-class-elements.ts index ae2b5155e..d29c68a88 100644 --- a/lib/rules/sort-character-class-elements.ts +++ b/lib/rules/sort-character-class-elements.ts @@ -2,24 +2,34 @@ import type { RegExpVisitor } from "@eslint-community/regexpp/visitor" import type { CharacterClass, CharacterClassElement, + ClassRangesCharacterClassElement, UnicodePropertyCharacterSet, + UnicodeSetsCharacterClassElement, } from "@eslint-community/regexpp/ast" import type { RegExpContext } from "../utils" -import { - CP_DIGIT_ZERO, - CP_SPACE, - createRule, - defineRegexpVisitor, -} from "../utils" +import { createRule, defineRegexpVisitor } from "../utils" import { mention } from "../utils/mention" +import type { ReadonlyFlags } from "regexp-ast-analysis" +import { toUnicodeSet } from "regexp-ast-analysis" +import type { ReadonlyWord } from "refa" +import { getLexicographicallySmallest } from "../utils/lexicographically-smallest" -type CharacterClassElementKind = "\\w" | "\\d" | "\\s" | "\\p" | "*" +type CharacterClassElementKind = + | "\\w" + | "\\d" + | "\\s" + | "\\p" + | "*" + | "\\q" + | "[]" const DEFAULT_ORDER: CharacterClassElementKind[] = [ "\\s", "\\w", "\\d", "\\p", "*", + "\\q", + "[]", ] /** @@ -37,9 +47,46 @@ function getCharacterClassElementKind( ? "\\s" : "\\p" } + if (node.type === "ClassStringDisjunction") { + return "\\q" + } + if ( + node.type === "CharacterClass" || + node.type === "ExpressionCharacterClass" + ) { + return "[]" + } return "*" } +/** + * Return the lexicographically smallest string accepted by the given element. + * If the class set is negate, the original value is used for calculation. + */ +function getLexicographicallySmallestFromElement( + node: CharacterClassElement, + flags: ReadonlyFlags, +): ReadonlyWord { + const us = + node.type === "CharacterSet" && node.negate + ? toUnicodeSet({ ...node, negate: false }, flags) + : toUnicodeSet(node, flags) + return getLexicographicallySmallest(us) || [] +} + +/** + * Compare two strings of char sets by byte order. + */ +function compareWords(a: ReadonlyWord, b: ReadonlyWord): number { + const l = Math.min(a.length, b.length) + for (let i = 0; i < l; i++) { + const aI = a[i] + const bI = b[i] + if (aI !== bI) return aI - bI + } + return a.length - b.length +} + export default createRule("sort-character-class-elements", { meta: { docs: { @@ -54,7 +101,17 @@ export default createRule("sort-character-class-elements", { properties: { order: { type: "array", - items: { enum: ["\\w", "\\d", "\\s", "\\p", "*"] }, + items: { + enum: [ + "\\s", + "\\w", + "\\d", + "\\p", + "*", + "\\q", + "[]", + ], + }, }, }, additionalProperties: false, @@ -73,6 +130,8 @@ export default createRule("sort-character-class-elements", { "\\d"?: number "\\s"?: number "\\p"?: number + "\\q"?: number + "[]"?: number } = { "*": Infinity } ;( @@ -84,6 +143,7 @@ export default createRule("sort-character-class-elements", { function createVisitor({ node, + flags, getRegexpLocation, patternSource, }: RegExpContext): RegExpVisitor.Handlers { @@ -93,10 +153,10 @@ export default createRule("sort-character-class-elements", { for (const next of ccNode.elements) { if (prevList.length) { const prev = prevList[0] - if (!isValidOrder(prev, next)) { + if (!isValidOrder(prev, next, flags)) { let moveTarget = prev for (const p of prevList) { - if (isValidOrder(p, next)) { + if (isValidOrder(p, next, flags)) { break } else { moveTarget = p @@ -144,6 +204,7 @@ export default createRule("sort-character-class-elements", { function isValidOrder( prev: CharacterClassElement, next: CharacterClassElement, + flags: ReadonlyFlags, ) { const prevKind = getCharacterClassElementKind(prev) const nextKind = getCharacterClassElementKind(next) @@ -154,41 +215,33 @@ export default createRule("sort-character-class-elements", { } else if (prevOrder > nextOrder) { return false } - if (prev.type === "CharacterSet" && prev.kind === "property") { - if (next.type === "CharacterSet") { - if (next.kind === "property") { - return isValidOrderForUnicodePropertyCharacterSet( - prev, - next, - ) - } - // e.g. /[\p{ASCII}\d]/ - return false - } - // e.g. /[\p{ASCII}a]/ + + const prevOrderShortCircuit = DEFAULT_ORDER.indexOf(prevKind) + const nextOrderShortCircuit = DEFAULT_ORDER.indexOf(nextKind) + if (prevOrderShortCircuit < nextOrderShortCircuit) { return true - } else if ( + } else if (prevOrderShortCircuit > nextOrderShortCircuit) { + return false + } + + if ( + prev.type === "CharacterSet" && + prev.kind === "property" && next.type === "CharacterSet" && next.kind === "property" ) { - if (prev.type === "CharacterSet") { - // e.g. /[\d\p{ASCII}]/ - return true - } - // e.g. /[a\p{ASCII}]/ - return false + return isValidOrderForUnicodePropertyCharacterSet(prev, next) } - if (prev.type === "CharacterSet" && next.type === "CharacterSet") { - if (prev.kind === "word" && next.kind === "digit") { - return true - } - if (prev.kind === "digit" && next.kind === "word") { - return false - } - } - const prevCP = getTargetCodePoint(prev) - const nextCP = getTargetCodePoint(next) - if (prevCP <= nextCP) { + + const prevWord = getLexicographicallySmallestFromElement( + prev, + flags, + ) + const nextWord = getLexicographicallySmallestFromElement( + next, + flags, + ) + if (compareWords(prevWord, nextWord) <= 0) { return true } return false @@ -218,29 +271,6 @@ export default createRule("sort-character-class-elements", { return true } - /** - * Gets the target code point for a given element. - */ - function getTargetCodePoint( - node: Exclude, - ) { - if (node.type === "CharacterSet") { - if (node.kind === "digit" || node.kind === "word") { - return CP_DIGIT_ZERO - } - if (node.kind === "space") { - return CP_SPACE - } - return Infinity - } - if (node.type === "CharacterClassRange") { - return node.min.value - } - // FIXME: TS Error - // @ts-expect-error -- FIXME - return node.value - } - return defineRegexpVisitor(context, { createVisitor, }) @@ -254,9 +284,11 @@ function escapeRaw(node: CharacterClassElement, target: CharacterClassElement) { let raw = node.raw if (raw.startsWith("-")) { const parent = target.parent as CharacterClass - // FIXME: TS Error - // @ts-expect-error -- FIXME - const prev = parent.elements[parent.elements.indexOf(target) - 1] + const elements: ( + | UnicodeSetsCharacterClassElement + | ClassRangesCharacterClassElement + )[] = parent.elements + const prev = elements[elements.indexOf(target) - 1] if ( prev && (prev.type === "Character" || prev.type === "CharacterSet") diff --git a/tests/lib/rules/sort-character-class-elements.ts b/tests/lib/rules/sort-character-class-elements.ts index 1ef46818b..5c672faa1 100644 --- a/tests/lib/rules/sort-character-class-elements.ts +++ b/tests/lib/rules/sort-character-class-elements.ts @@ -3,7 +3,7 @@ import rule from "../../../lib/rules/sort-character-class-elements" const tester = new RuleTester({ parserOptions: { - ecmaVersion: 2020, + ecmaVersion: "latest", sourceType: "module", }, }) @@ -38,6 +38,9 @@ tester.run("sort-character-class-elements", rule as any, { code: String.raw`/[\p{ASCII}a]/u`, options: [{ order: [] }], }, + String.raw`/[a\q{a}[a][a--b]]/v`, + String.raw`/[\q{a}\q{b}\q{c}]/v`, + String.raw`/[\q{aa}\q{ab}\q{ac}]/v`, ], invalid: [ { @@ -230,5 +233,68 @@ tester.run("sort-character-class-elements", rule as any, { "Expected character class elements to be in ascending order. '\t' should be before ' '.", ], }, + { + code: String.raw`/[[a--b][a]\q{a}a]/v`, + output: String.raw`/[\q{a}[a--b][a]a]/v`, + errors: [ + "Expected character class elements to be in ascending order. '\\q{a}' should be before '[a--b]'.", + "Expected character class elements to be in ascending order. 'a' should be before '[a--b]'.", + ], + }, + { + code: String.raw`/[\q{a}[a--b][a]a]/v`, + output: String.raw`/[a\q{a}[a--b][a]]/v`, + errors: [ + "Expected character class elements to be in ascending order. 'a' should be before '\\q{a}'.", + ], + }, + { + code: String.raw`/[[b--c][a]]/v`, + output: String.raw`/[[a][b--c]]/v`, + errors: [ + "Expected character class elements to be in ascending order. '[a]' should be before '[b--c]'.", + ], + }, + { + code: String.raw`/[[a]\q{a}]/v; /[\q{a}a]/v; /[[b-c]\q{a}]/v; /[[b-c][a]]/v;`, + output: String.raw`/[\q{a}[a]]/v; /[a\q{a}]/v; /[\q{a}[b-c]]/v; /[[a][b-c]]/v;`, + options: [{ order: [] }], + errors: [ + "Expected character class elements to be in ascending order. '\\q{a}' should be before '[a]'.", + "Expected character class elements to be in ascending order. 'a' should be before '\\q{a}'.", + "Expected character class elements to be in ascending order. '\\q{a}' should be before '[b-c]'.", + "Expected character class elements to be in ascending order. '[a]' should be before '[b-c]'.", + ], + }, + { + code: String.raw`/[\q{c}\q{b}\q{a}]/v`, + output: String.raw`/[\q{b}\q{c}\q{a}]/v`, + errors: [ + "Expected character class elements to be in ascending order. '\\q{b}' should be before '\\q{c}'.", + "Expected character class elements to be in ascending order. '\\q{a}' should be before '\\q{c}'.", + ], + }, + { + code: String.raw`/[\q{b}\q{c}\q{a}]/v`, + output: String.raw`/[\q{a}\q{b}\q{c}]/v`, + errors: [ + "Expected character class elements to be in ascending order. '\\q{a}' should be before '\\q{b}'.", + ], + }, + { + code: String.raw`/[\q{ac}\q{ab}\q{aa}]/v`, + output: String.raw`/[\q{ab}\q{ac}\q{aa}]/v`, + errors: [ + "Expected character class elements to be in ascending order. '\\q{ab}' should be before '\\q{ac}'.", + "Expected character class elements to be in ascending order. '\\q{aa}' should be before '\\q{ac}'.", + ], + }, + { + code: String.raw`/[\q{ab}\q{ac}\q{aa}]/v`, + output: String.raw`/[\q{aa}\q{ab}\q{ac}]/v`, + errors: [ + "Expected character class elements to be in ascending order. '\\q{aa}' should be before '\\q{ab}'.", + ], + }, ], })