From 6c4bd4f84d802109b5e5f8735c9ef69f6069a4b8 Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Sat, 9 Sep 2023 23:32:58 +0900 Subject: [PATCH 1/7] Add support for v flag to `regexp/sort-character-class-elements` rule --- docs/rules/sort-character-class-elements.md | 4 +- lib/rules/sort-character-class-elements.ts | 179 +++++++++++------- .../rules/sort-character-class-elements.ts | 57 +++++- 3 files changed, 172 insertions(+), 68 deletions(-) diff --git a/docs/rules/sort-character-class-elements.md b/docs/rules/sort-character-class-elements.md index 8942ef890..cf0ae9395 100644 --- a/docs/rules/sort-character-class-elements.md +++ b/docs/rules/sort-character-class-elements.md @@ -43,13 +43,15 @@ var foo = /[b-fa]/ "\\w", // \w or \W "\\d", // \d or \D "\\p", // \p{...} or \P{...} + "\\q", // \q{...} + "[]", // Nesting character class, or character class expression "*", // Others (A character or range of characters or an element you did not specify.) ] }] } ``` -- `"order"` ... An array of your preferred order. The default is `["\\s", "\\w", "\\d", "\\p", "*",]`. +- `"order"` ... An array of your preferred order. The default is `["\\s", "\\w", "\\d", "\\p", "\\q", "[]", "*"]`. ## :rocket: Version diff --git a/lib/rules/sort-character-class-elements.ts b/lib/rules/sort-character-class-elements.ts index 11b423fec..b6974945f 100644 --- a/lib/rules/sort-character-class-elements.ts +++ b/lib/rules/sort-character-class-elements.ts @@ -2,23 +2,32 @@ import type { RegExpVisitor } from "@eslint-community/regexpp/visitor" import type { CharacterClass, CharacterClassElement, + ClassRangesCharacterClassElement, UnicodePropertyCharacterSet, + UnicodeSetsCharacterClassElement, } from "@eslint-community/regexpp/ast" import type { RegExpContext } from "../utils" -import { - CP_DIGIT_ZERO, - CP_SPACE, - createRule, - defineRegexpVisitor, -} from "../utils" +import { createRule, defineRegexpVisitor } from "../utils" import { mention } from "../utils/mention" +import type { ReadonlyFlags } from "regexp-ast-analysis" +import { toUnicodeSet } from "regexp-ast-analysis" +import type { ReadonlyWord, ReadonlyWordSet, Word } from "refa" -type CharacterClassElementKind = "\\w" | "\\d" | "\\s" | "\\p" | "*" +type CharacterClassElementKind = + | "\\w" + | "\\d" + | "\\s" + | "\\p" + | "\\q" + | "[]" + | "*" const DEFAULT_ORDER: CharacterClassElementKind[] = [ "\\s", "\\w", "\\d", "\\p", + "\\q", + "[]", "*", ] @@ -37,9 +46,53 @@ function getCharacterClassElementKind( ? "\\s" : "\\p" } + if (node.type === "ClassStringDisjunction") { + return "\\q" + } + if ( + node.type === "CharacterClass" || + node.type === "ExpressionCharacterClass" + ) { + return "[]" + } return "*" } +/** + * Return the lexicographically smallest string accepted by the given element. + * If the class set is negate, the original value is used for calculation. + */ +function getLexicographicallySmallestFromElement( + node: CharacterClassElement, + flags: ReadonlyFlags, +): Word { + const us = + node.type === "CharacterSet" && node.negate + ? toUnicodeSet({ ...node, negate: false }, flags) + : toUnicodeSet(node, flags) + const wordSets: ReadonlyWordSet[] = [ + ...(us.chars.isEmpty ? [] : [[us.chars]]), + ...(us.accept.isEmpty ? [] : us.accept.wordSets), + ] + const minimumWords: Word[] = wordSets.map((wordSet) => + wordSet.filter((cs) => !cs.isEmpty).map((c) => c.ranges[0].min), + ) + return minimumWords.sort(compareWords).shift() || [] +} + +/** + * Compare two strings of char sets by byte order. + */ +function compareWords(a: ReadonlyWord, b: ReadonlyWord): number { + const l = Math.min(a.length, b.length) + for (let i = 0; i < l; i++) { + const aI = a[i] + const bI = b[i] + if (aI !== bI) return aI - bI + } + return a.length - b.length +} + export default createRule("sort-character-class-elements", { meta: { docs: { @@ -54,7 +107,17 @@ export default createRule("sort-character-class-elements", { properties: { order: { type: "array", - items: { enum: ["\\w", "\\d", "\\s", "\\p", "*"] }, + items: { + enum: [ + "\\w", + "\\d", + "\\s", + "\\p", + "\\q", + "[]", + "*", + ], + }, }, }, additionalProperties: false, @@ -73,6 +136,8 @@ export default createRule("sort-character-class-elements", { "\\d"?: number "\\s"?: number "\\p"?: number + "\\q"?: number + "[]"?: number } = { "*": Infinity } ;( @@ -87,6 +152,7 @@ export default createRule("sort-character-class-elements", { */ function createVisitor({ node, + flags, getRegexpLocation, patternSource, }: RegExpContext): RegExpVisitor.Handlers { @@ -96,10 +162,10 @@ export default createRule("sort-character-class-elements", { for (const next of ccNode.elements) { if (prevList.length) { const prev = prevList[0] - if (!isValidOrder(prev, next)) { + if (!isValidOrder(prev, next, flags)) { let moveTarget = prev for (const p of prevList) { - if (isValidOrder(p, next)) { + if (isValidOrder(p, next, flags)) { break } else { moveTarget = p @@ -147,6 +213,7 @@ export default createRule("sort-character-class-elements", { function isValidOrder( prev: CharacterClassElement, next: CharacterClassElement, + flags: ReadonlyFlags, ) { const prevKind = getCharacterClassElementKind(prev) const nextKind = getCharacterClassElementKind(next) @@ -157,41 +224,42 @@ export default createRule("sort-character-class-elements", { } else if (prevOrder > nextOrder) { return false } - if (prev.type === "CharacterSet" && prev.kind === "property") { - if (next.type === "CharacterSet") { - if (next.kind === "property") { - return isValidOrderForUnicodePropertyCharacterSet( - prev, - next, - ) - } - // e.g. /[\p{ASCII}\d]/ - return false - } - // e.g. /[\p{ASCII}a]/ + + const orderOfShortCircuit = { + "\\s": 1, + "\\w": 2, + "\\d": 3, + "\\p": 4, + "*": 5, + "\\q": 5, + "[]": 5, + } + const prevOrderS = orderOfShortCircuit[prevKind] + const nextOrderS = orderOfShortCircuit[nextKind] + if (prevOrderS < nextOrderS) { return true - } else if ( + } else if (prevOrderS > nextOrderS) { + return false + } + + if ( + prev.type === "CharacterSet" && + prev.kind === "property" && next.type === "CharacterSet" && next.kind === "property" ) { - if (prev.type === "CharacterSet") { - // e.g. /[\d\p{ASCII}]/ - return true - } - // e.g. /[a\p{ASCII}]/ - return false + return isValidOrderForUnicodePropertyCharacterSet(prev, next) } - if (prev.type === "CharacterSet" && next.type === "CharacterSet") { - if (prev.kind === "word" && next.kind === "digit") { - return true - } - if (prev.kind === "digit" && next.kind === "word") { - return false - } - } - const prevCP = getTargetCodePoint(prev) - const nextCP = getTargetCodePoint(next) - if (prevCP <= nextCP) { + + const prevWord = getLexicographicallySmallestFromElement( + prev, + flags, + ) + const nextWord = getLexicographicallySmallestFromElement( + next, + flags, + ) + if (compareWords(prevWord, nextWord) <= 0) { return true } return false @@ -221,29 +289,6 @@ export default createRule("sort-character-class-elements", { return true } - /** - * Gets the target code point for a given element. - */ - function getTargetCodePoint( - node: Exclude, - ) { - if (node.type === "CharacterSet") { - if (node.kind === "digit" || node.kind === "word") { - return CP_DIGIT_ZERO - } - if (node.kind === "space") { - return CP_SPACE - } - return Infinity - } - if (node.type === "CharacterClassRange") { - return node.min.value - } - // FIXME: TS Error - // @ts-expect-error -- FIXME - return node.value - } - return defineRegexpVisitor(context, { createVisitor, }) @@ -257,9 +302,11 @@ function escapeRaw(node: CharacterClassElement, target: CharacterClassElement) { let raw = node.raw if (raw.startsWith("-")) { const parent = target.parent as CharacterClass - // FIXME: TS Error - // @ts-expect-error -- FIXME - const prev = parent.elements[parent.elements.indexOf(target) - 1] + const elements: ( + | UnicodeSetsCharacterClassElement + | ClassRangesCharacterClassElement + )[] = parent.elements + const prev = elements[elements.indexOf(target) - 1] if ( prev && (prev.type === "Character" || prev.type === "CharacterSet") diff --git a/tests/lib/rules/sort-character-class-elements.ts b/tests/lib/rules/sort-character-class-elements.ts index 1ef46818b..706488f25 100644 --- a/tests/lib/rules/sort-character-class-elements.ts +++ b/tests/lib/rules/sort-character-class-elements.ts @@ -3,7 +3,7 @@ import rule from "../../../lib/rules/sort-character-class-elements" const tester = new RuleTester({ parserOptions: { - ecmaVersion: 2020, + ecmaVersion: "latest", sourceType: "module", }, }) @@ -38,6 +38,9 @@ tester.run("sort-character-class-elements", rule as any, { code: String.raw`/[\p{ASCII}a]/u`, options: [{ order: [] }], }, + String.raw`/[\q{a}[a][a--b]a]/v`, + String.raw`/[\q{a}\q{b}\q{c}]/v`, + String.raw`/[\q{aa}\q{ab}\q{ac}]/v`, ], invalid: [ { @@ -230,5 +233,57 @@ tester.run("sort-character-class-elements", rule as any, { "Expected character class elements to be in ascending order. '\t' should be before ' '.", ], }, + { + code: String.raw`/[a[a]\q{a}[a--b]]/v`, + output: String.raw`/[[a]a\q{a}[a--b]]/v`, + errors: [ + "Expected character class elements to be in ascending order. '[a]' should be before 'a'.", + "Expected character class elements to be in ascending order. '\\q{a}' should be before 'a'.", + ], + }, + { + code: String.raw`/[[a]a\q{a}[a--b]]/v`, + output: String.raw`/[\q{a}[a]a[a--b]]/v`, + errors: [ + "Expected character class elements to be in ascending order. '\\q{a}' should be before '[a]'.", + ], + }, + { + code: String.raw`/[\q{a}[a]a[a--b]]/v`, + output: String.raw`/[\q{a}[a][a--b]a]/v`, + errors: [ + "Expected character class elements to be in ascending order. '[a--b]' should be before 'a'.", + ], + }, + { + code: String.raw`/[\q{c}\q{b}\q{a}]/v`, + output: String.raw`/[\q{b}\q{c}\q{a}]/v`, + errors: [ + "Expected character class elements to be in ascending order. '\\q{b}' should be before '\\q{c}'.", + "Expected character class elements to be in ascending order. '\\q{a}' should be before '\\q{c}'.", + ], + }, + { + code: String.raw`/[\q{b}\q{c}\q{a}]/v`, + output: String.raw`/[\q{a}\q{b}\q{c}]/v`, + errors: [ + "Expected character class elements to be in ascending order. '\\q{a}' should be before '\\q{b}'.", + ], + }, + { + code: String.raw`/[\q{ac}\q{ab}\q{aa}]/v`, + output: String.raw`/[\q{ab}\q{ac}\q{aa}]/v`, + errors: [ + "Expected character class elements to be in ascending order. '\\q{ab}' should be before '\\q{ac}'.", + "Expected character class elements to be in ascending order. '\\q{aa}' should be before '\\q{ac}'.", + ], + }, + { + code: String.raw`/[\q{ab}\q{ac}\q{aa}]/v`, + output: String.raw`/[\q{aa}\q{ab}\q{ac}]/v`, + errors: [ + "Expected character class elements to be in ascending order. '\\q{aa}' should be before '\\q{ab}'.", + ], + }, ], }) From 7b57a5558f5bd837f9918c10ffc4d2f0ec209fe6 Mon Sep 17 00:00:00 2001 From: Yosuke Ota Date: Sat, 9 Sep 2023 23:33:33 +0900 Subject: [PATCH 2/7] Create green-peaches-explode.md --- .changeset/green-peaches-explode.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/green-peaches-explode.md diff --git a/.changeset/green-peaches-explode.md b/.changeset/green-peaches-explode.md new file mode 100644 index 000000000..095910fe8 --- /dev/null +++ b/.changeset/green-peaches-explode.md @@ -0,0 +1,5 @@ +--- +"eslint-plugin-regexp": minor +--- + +Add support for v flag to `regexp/sort-character-class-elements` rule From d31527f5f5035f71ddaea61f84bd21405579d9a1 Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Sun, 10 Sep 2023 10:49:46 +0900 Subject: [PATCH 3/7] fix: change default order --- docs/rules/sort-character-class-elements.md | 4 +-- lib/rules/sort-character-class-elements.ts | 25 +++++--------- .../rules/sort-character-class-elements.ts | 33 ++++++++++++------- 3 files changed, 32 insertions(+), 30 deletions(-) diff --git a/docs/rules/sort-character-class-elements.md b/docs/rules/sort-character-class-elements.md index cf0ae9395..fa487edbe 100644 --- a/docs/rules/sort-character-class-elements.md +++ b/docs/rules/sort-character-class-elements.md @@ -43,15 +43,15 @@ var foo = /[b-fa]/ "\\w", // \w or \W "\\d", // \d or \D "\\p", // \p{...} or \P{...} + "*", // Others (A character or range of characters or an element you did not specify.) "\\q", // \q{...} "[]", // Nesting character class, or character class expression - "*", // Others (A character or range of characters or an element you did not specify.) ] }] } ``` -- `"order"` ... An array of your preferred order. The default is `["\\s", "\\w", "\\d", "\\p", "\\q", "[]", "*"]`. +- `"order"` ... An array of your preferred order. The default is `["\\s", "\\w", "\\d", "\\p", "*", "\\q", "[]"]`. ## :rocket: Version diff --git a/lib/rules/sort-character-class-elements.ts b/lib/rules/sort-character-class-elements.ts index b6974945f..465912b5c 100644 --- a/lib/rules/sort-character-class-elements.ts +++ b/lib/rules/sort-character-class-elements.ts @@ -18,17 +18,17 @@ type CharacterClassElementKind = | "\\d" | "\\s" | "\\p" + | "*" | "\\q" | "[]" - | "*" const DEFAULT_ORDER: CharacterClassElementKind[] = [ "\\s", "\\w", "\\d", "\\p", + "*", "\\q", "[]", - "*", ] /** @@ -109,13 +109,13 @@ export default createRule("sort-character-class-elements", { type: "array", items: { enum: [ + "\\s", "\\w", "\\d", - "\\s", "\\p", + "*", "\\q", "[]", - "*", ], }, }, @@ -225,20 +225,11 @@ export default createRule("sort-character-class-elements", { return false } - const orderOfShortCircuit = { - "\\s": 1, - "\\w": 2, - "\\d": 3, - "\\p": 4, - "*": 5, - "\\q": 5, - "[]": 5, - } - const prevOrderS = orderOfShortCircuit[prevKind] - const nextOrderS = orderOfShortCircuit[nextKind] - if (prevOrderS < nextOrderS) { + const prevOrderShortCircuit = DEFAULT_ORDER.indexOf(prevKind) + const nextOrderShortCircuit = DEFAULT_ORDER.indexOf(nextKind) + if (prevOrderShortCircuit < nextOrderShortCircuit) { return true - } else if (prevOrderS > nextOrderS) { + } else if (prevOrderShortCircuit > nextOrderShortCircuit) { return false } diff --git a/tests/lib/rules/sort-character-class-elements.ts b/tests/lib/rules/sort-character-class-elements.ts index 706488f25..5c672faa1 100644 --- a/tests/lib/rules/sort-character-class-elements.ts +++ b/tests/lib/rules/sort-character-class-elements.ts @@ -38,7 +38,7 @@ tester.run("sort-character-class-elements", rule as any, { code: String.raw`/[\p{ASCII}a]/u`, options: [{ order: [] }], }, - String.raw`/[\q{a}[a][a--b]a]/v`, + String.raw`/[a\q{a}[a][a--b]]/v`, String.raw`/[\q{a}\q{b}\q{c}]/v`, String.raw`/[\q{aa}\q{ab}\q{ac}]/v`, ], @@ -234,25 +234,36 @@ tester.run("sort-character-class-elements", rule as any, { ], }, { - code: String.raw`/[a[a]\q{a}[a--b]]/v`, - output: String.raw`/[[a]a\q{a}[a--b]]/v`, + code: String.raw`/[[a--b][a]\q{a}a]/v`, + output: String.raw`/[\q{a}[a--b][a]a]/v`, errors: [ - "Expected character class elements to be in ascending order. '[a]' should be before 'a'.", - "Expected character class elements to be in ascending order. '\\q{a}' should be before 'a'.", + "Expected character class elements to be in ascending order. '\\q{a}' should be before '[a--b]'.", + "Expected character class elements to be in ascending order. 'a' should be before '[a--b]'.", ], }, { - code: String.raw`/[[a]a\q{a}[a--b]]/v`, - output: String.raw`/[\q{a}[a]a[a--b]]/v`, + code: String.raw`/[\q{a}[a--b][a]a]/v`, + output: String.raw`/[a\q{a}[a--b][a]]/v`, errors: [ - "Expected character class elements to be in ascending order. '\\q{a}' should be before '[a]'.", + "Expected character class elements to be in ascending order. 'a' should be before '\\q{a}'.", ], }, { - code: String.raw`/[\q{a}[a]a[a--b]]/v`, - output: String.raw`/[\q{a}[a][a--b]a]/v`, + code: String.raw`/[[b--c][a]]/v`, + output: String.raw`/[[a][b--c]]/v`, errors: [ - "Expected character class elements to be in ascending order. '[a--b]' should be before 'a'.", + "Expected character class elements to be in ascending order. '[a]' should be before '[b--c]'.", + ], + }, + { + code: String.raw`/[[a]\q{a}]/v; /[\q{a}a]/v; /[[b-c]\q{a}]/v; /[[b-c][a]]/v;`, + output: String.raw`/[\q{a}[a]]/v; /[a\q{a}]/v; /[\q{a}[b-c]]/v; /[[a][b-c]]/v;`, + options: [{ order: [] }], + errors: [ + "Expected character class elements to be in ascending order. '\\q{a}' should be before '[a]'.", + "Expected character class elements to be in ascending order. 'a' should be before '\\q{a}'.", + "Expected character class elements to be in ascending order. '\\q{a}' should be before '[b-c]'.", + "Expected character class elements to be in ascending order. '[a]' should be before '[b-c]'.", ], }, { From 2c1d00b920b154c84db5b1759043f00598782944 Mon Sep 17 00:00:00 2001 From: Yosuke Ota Date: Sun, 10 Sep 2023 10:50:27 +0900 Subject: [PATCH 4/7] Update lib/rules/sort-character-class-elements.ts Co-authored-by: Michael Schmidt --- lib/rules/sort-character-class-elements.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rules/sort-character-class-elements.ts b/lib/rules/sort-character-class-elements.ts index 465912b5c..d8172203b 100644 --- a/lib/rules/sort-character-class-elements.ts +++ b/lib/rules/sort-character-class-elements.ts @@ -72,7 +72,7 @@ function getLexicographicallySmallestFromElement( : toUnicodeSet(node, flags) const wordSets: ReadonlyWordSet[] = [ ...(us.chars.isEmpty ? [] : [[us.chars]]), - ...(us.accept.isEmpty ? [] : us.accept.wordSets), + ...us.accept.wordSets, ] const minimumWords: Word[] = wordSets.map((wordSet) => wordSet.filter((cs) => !cs.isEmpty).map((c) => c.ranges[0].min), From e997b3f3fb0e4c37ef2e375d64f16ae35a5c3bde Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Sun, 10 Sep 2023 14:13:37 +0900 Subject: [PATCH 5/7] refactor wordSets to words --- lib/rules/sort-character-class-elements.ts | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/lib/rules/sort-character-class-elements.ts b/lib/rules/sort-character-class-elements.ts index d8172203b..a88355f5b 100644 --- a/lib/rules/sort-character-class-elements.ts +++ b/lib/rules/sort-character-class-elements.ts @@ -11,7 +11,7 @@ import { createRule, defineRegexpVisitor } from "../utils" import { mention } from "../utils/mention" import type { ReadonlyFlags } from "regexp-ast-analysis" import { toUnicodeSet } from "regexp-ast-analysis" -import type { ReadonlyWord, ReadonlyWordSet, Word } from "refa" +import type { ReadonlyWord } from "refa" type CharacterClassElementKind = | "\\w" @@ -65,18 +65,15 @@ function getCharacterClassElementKind( function getLexicographicallySmallestFromElement( node: CharacterClassElement, flags: ReadonlyFlags, -): Word { +): ReadonlyWord { const us = node.type === "CharacterSet" && node.negate ? toUnicodeSet({ ...node, negate: false }, flags) : toUnicodeSet(node, flags) - const wordSets: ReadonlyWordSet[] = [ - ...(us.chars.isEmpty ? [] : [[us.chars]]), - ...us.accept.wordSets, + const minimumWords: ReadonlyWord[] = [ + ...(us.chars.isEmpty ? [] : [[us.chars.ranges[0].min]]), + ...us.accept.words, ] - const minimumWords: Word[] = wordSets.map((wordSet) => - wordSet.filter((cs) => !cs.isEmpty).map((c) => c.ranges[0].min), - ) return minimumWords.sort(compareWords).shift() || [] } From 572d35fd5c087c045786f4193d1268ba6111ee08 Mon Sep 17 00:00:00 2001 From: Michael Schmidt Date: Fri, 15 Sep 2023 01:24:35 +0200 Subject: [PATCH 6/7] Fix `getLexicographicallySmallest` (#604) --- lib/rules/sort-character-class-elements.ts | 7 +- lib/utils/lexicographically-smallest.ts | 88 ++++++++++++++++++++++ 2 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 lib/utils/lexicographically-smallest.ts diff --git a/lib/rules/sort-character-class-elements.ts b/lib/rules/sort-character-class-elements.ts index 56ff56d85..d29c68a88 100644 --- a/lib/rules/sort-character-class-elements.ts +++ b/lib/rules/sort-character-class-elements.ts @@ -12,6 +12,7 @@ import { mention } from "../utils/mention" import type { ReadonlyFlags } from "regexp-ast-analysis" import { toUnicodeSet } from "regexp-ast-analysis" import type { ReadonlyWord } from "refa" +import { getLexicographicallySmallest } from "../utils/lexicographically-smallest" type CharacterClassElementKind = | "\\w" @@ -70,11 +71,7 @@ function getLexicographicallySmallestFromElement( node.type === "CharacterSet" && node.negate ? toUnicodeSet({ ...node, negate: false }, flags) : toUnicodeSet(node, flags) - const minimumWords: ReadonlyWord[] = [ - ...(us.chars.isEmpty ? [] : [[us.chars.ranges[0].min]]), - ...us.accept.words, - ] - return minimumWords.sort(compareWords).shift() || [] + return getLexicographicallySmallest(us) || [] } /** diff --git a/lib/utils/lexicographically-smallest.ts b/lib/utils/lexicographically-smallest.ts new file mode 100644 index 000000000..b6fe97e46 --- /dev/null +++ b/lib/utils/lexicographically-smallest.ts @@ -0,0 +1,88 @@ +import type { Word } from "refa" +import type { JS } from "refa" + +function findMin( + array: readonly T[], + compare: (a: T, b: T) => number, +): T | undefined { + if (array.length === 0) { + return undefined + } + + let min = array[0] + for (let i = 1; i < array.length; i++) { + const item = array[i] + if (compare(item, min) < 0) { + min = item + } + } + return min +} + +function compareWords(a: Word, b: Word): number { + const l = Math.min(a.length, b.length) + for (let i = 0; i < l; i++) { + const diff = a[i] - b[i] + if (diff !== 0) { + return diff + } + } + return a.length - b.length +} + +/** + * Returns the lexicographically smallest word in the given set or `undefined` if the set is empty. + */ +export function getLexicographicallySmallest( + set: JS.UnicodeSet, +): Word | undefined { + if (set.accept.isEmpty) { + return set.chars.isEmpty ? undefined : [set.chars.ranges[0].min] + } + + const words = set.accept.wordSets.map( + (w): Word => w.map((c) => c.ranges[0].min), + ) + return findMin(words, compareWords) +} + +/** + * Returns the lexicographically smallest word in the given set or `undefined` if the set is empty. + */ +export function getLexicographicallySmallestInConcatenation( + elements: readonly JS.UnicodeSet[], +): Word | undefined { + if (elements.length === 1) { + return getLexicographicallySmallest(elements[0]) + } + + let smallest: Word = [] + for (let i = elements.length - 1; i >= 0; i--) { + const set = elements[i] + if (set.isEmpty) { + return undefined + } else if (set.accept.isEmpty) { + smallest.unshift(set.chars.ranges[0].min) + } else { + let words = set.accept.wordSets.map( + (w): Word => w.map((c) => c.ranges[0].min), + ) + // we only have to consider the lexicographically smallest words with unique length + const seenLengths = new Set() + words = words.sort(compareWords).filter((w) => { + if (seenLengths.has(w.length)) { + return false + } + seenLengths.add(w.length) + return true + }) + + smallest = findMin( + // eslint-disable-next-line no-loop-func -- x + words.map((w): Word => [...w, ...smallest]), + compareWords, + )! + } + } + return smallest +} From 07765421a9a2e37d90c04a29cdf4fa5e282d3f39 Mon Sep 17 00:00:00 2001 From: Yosuke Ota Date: Fri, 22 Sep 2023 10:49:37 +0900 Subject: [PATCH 7/7] Update lexicographically-smallest.ts --- lib/utils/lexicographically-smallest.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/utils/lexicographically-smallest.ts b/lib/utils/lexicographically-smallest.ts index b6fe97e46..8d47bedb2 100644 --- a/lib/utils/lexicographically-smallest.ts +++ b/lib/utils/lexicographically-smallest.ts @@ -64,9 +64,10 @@ export function getLexicographicallySmallestInConcatenation( } else if (set.accept.isEmpty) { smallest.unshift(set.chars.ranges[0].min) } else { - let words = set.accept.wordSets.map( - (w): Word => w.map((c) => c.ranges[0].min), - ) + let words = [ + ...(set.chars.isEmpty ? [] : [[set.chars]]), + ...set.accept.wordSets, + ].map((w): Word => w.map((c) => c.ranges[0].min)) // we only have to consider the lexicographically smallest words with unique length const seenLengths = new Set() words = words.sort(compareWords).filter((w) => {