|
| 1 | +import { CharSet, JS } from "refa"; |
| 2 | +import { CharacterClass, CharacterClassElement, CharacterSet, Flags } from "regexpp/ast"; |
| 3 | +import { Simple, assertNever } from "./util"; |
| 4 | + |
| 5 | +export function toCharSet( |
| 6 | + elements: (Simple<CharacterClassElement> | Simple<CharacterSet>)[] | CharacterClass, |
| 7 | + flags: Partial<Flags> |
| 8 | +): CharSet { |
| 9 | + if (Array.isArray(elements)) { |
| 10 | + return JS.createCharSet( |
| 11 | + elements.map(e => { |
| 12 | + switch (e.type) { |
| 13 | + case "Character": |
| 14 | + return e.value; |
| 15 | + case "CharacterClassRange": |
| 16 | + return { min: e.min.value, max: e.max.value }; |
| 17 | + case "CharacterSet": |
| 18 | + return e; |
| 19 | + default: |
| 20 | + throw assertNever(e); |
| 21 | + } |
| 22 | + }), |
| 23 | + flags |
| 24 | + ); |
| 25 | + } else { |
| 26 | + const chars = toCharSet(elements.elements, flags); |
| 27 | + if (elements.negate) { |
| 28 | + return chars.negate(); |
| 29 | + } |
| 30 | + return chars; |
| 31 | + } |
| 32 | +} |
| 33 | + |
| 34 | +const EMPTY_UTF16_CHARSET = CharSet.empty(0xffff); |
| 35 | +const EMPTY_UNICODE_CHARSET = CharSet.empty(0x10ffff); |
| 36 | +/** |
| 37 | + * Returns an empty character set for the given flags. |
| 38 | + */ |
| 39 | +export function emptyCharSet(flags: Partial<Flags>): CharSet { |
| 40 | + if (flags.unicode) { |
| 41 | + return EMPTY_UNICODE_CHARSET; |
| 42 | + } else { |
| 43 | + return EMPTY_UTF16_CHARSET; |
| 44 | + } |
| 45 | +} |
| 46 | +const ALL_UTF16_CHARSET = CharSet.all(0xffff); |
| 47 | +const ALL_UNICODE_CHARSET = CharSet.all(0x10ffff); |
| 48 | +/** |
| 49 | + * Returns a full character set for the given flags. |
| 50 | + */ |
| 51 | +export function allCharSet(flags: Partial<Flags>): CharSet { |
| 52 | + if (flags.unicode) { |
| 53 | + return ALL_UNICODE_CHARSET; |
| 54 | + } else { |
| 55 | + return ALL_UTF16_CHARSET; |
| 56 | + } |
| 57 | +} |
| 58 | +const LINE_TERMINATOR_UTF16_CHARSET = JS.createCharSet([{ kind: "any" }], { unicode: false }).negate(); |
| 59 | +const LINE_TERMINATOR_UNICODE_CHARSET = JS.createCharSet([{ kind: "any" }], { unicode: true }).negate(); |
| 60 | +export function lineTerminatorCharSet(flags: Partial<Flags>): CharSet { |
| 61 | + if (flags.unicode) { |
| 62 | + return LINE_TERMINATOR_UNICODE_CHARSET; |
| 63 | + } else { |
| 64 | + return LINE_TERMINATOR_UTF16_CHARSET; |
| 65 | + } |
| 66 | +} |
| 67 | + |
| 68 | +/** |
| 69 | + * Returns whether the given character class/set matches all characters. |
| 70 | + */ |
| 71 | +export function isMatchAll(char: CharacterClass | CharacterSet, flags: Partial<Flags>): boolean { |
| 72 | + if (char.type === "CharacterSet") { |
| 73 | + if (char.kind === "property") { |
| 74 | + return JS.createCharSet([char], flags).isAll; |
| 75 | + } else if (char.kind === "any") { |
| 76 | + return !!flags.dotAll; |
| 77 | + } else { |
| 78 | + return false; |
| 79 | + } |
| 80 | + } else { |
| 81 | + if (char.negate && char.elements.length === 0) { |
| 82 | + return true; |
| 83 | + } else { |
| 84 | + if (char.negate) { |
| 85 | + return toCharSet(char.elements, flags).isEmpty; |
| 86 | + } else { |
| 87 | + return toCharSet(char.elements, flags).isAll; |
| 88 | + } |
| 89 | + } |
| 90 | + } |
| 91 | +} |
| 92 | + |
| 93 | +/** |
| 94 | + * Returns whether the given character class/set matches no characters. |
| 95 | + */ |
| 96 | +export function isMatchNone(char: CharacterClass | CharacterSet, flags: Partial<Flags>): boolean { |
| 97 | + if (char.type === "CharacterSet") { |
| 98 | + if (char.kind === "property") { |
| 99 | + return JS.createCharSet([char], flags).isEmpty; |
| 100 | + } else { |
| 101 | + return false; |
| 102 | + } |
| 103 | + } else { |
| 104 | + if (!char.negate && char.elements.length === 0) { |
| 105 | + return true; |
| 106 | + } else { |
| 107 | + if (char.negate) { |
| 108 | + return toCharSet(char.elements, flags).isAll; |
| 109 | + } else { |
| 110 | + return toCharSet(char.elements, flags).isEmpty; |
| 111 | + } |
| 112 | + } |
| 113 | + } |
| 114 | +} |
0 commit comments