|
| 1 | +import regexpTree from 'regexp-tree' |
| 2 | +import type { Expression, Char, ClassRange } from 'regexp-tree/ast' |
| 3 | + |
| 4 | +function build(node: Expression | null): string { |
| 5 | + if (node === null) return '' |
| 6 | + |
| 7 | + switch (node.type) { |
| 8 | + case 'CharacterClass': { |
| 9 | + const exprs = combineContinuousSimpleChars(node.expressions) |
| 10 | + |
| 11 | + // TODO: hard coded cases, need to be improved for multi char class |
| 12 | + if (exprs.length === 1) { |
| 13 | + const first = exprs[0] |
| 14 | + if (typeof first === 'string') { |
| 15 | + return node.negative ? `charNotIn(${first})` : `charIn(${first})` |
| 16 | + } else if (first.type === 'Char' && first.kind === 'meta' && node.negative) { |
| 17 | + if (first.value === '\\t') return `not.tab` |
| 18 | + if (first.value === '\\n') return `not.linefeed` |
| 19 | + if (first.value === '\\r') return `not.carriageReturn` |
| 20 | + } else { |
| 21 | + const range = normalizeClassRange(first) |
| 22 | + if (range === 'A-Z') { |
| 23 | + return node.negative ? `not.letter.uppercase` : `letter.uppercase` |
| 24 | + } else if (range === 'a-z') { |
| 25 | + return node.negative ? `not.letter.lowercase` : `letter.lowercase` |
| 26 | + } |
| 27 | + } |
| 28 | + } else if (exprs.length === 2) { |
| 29 | + if (typeof exprs[0] !== 'string' && typeof exprs[1] !== 'string') { |
| 30 | + const range1 = normalizeClassRange(exprs[0]) |
| 31 | + const range2 = normalizeClassRange(exprs[1]) |
| 32 | + if ((range1 === 'A-Z' && range2 === 'a-z') || (range1 === 'a-z' && range2 === 'A-Z')) { |
| 33 | + return node.negative ? `not.letter` : `letter` |
| 34 | + } |
| 35 | + } |
| 36 | + } |
| 37 | + |
| 38 | + throw new Error('Unsupported for Complex charactor class') |
| 39 | + } |
| 40 | + |
| 41 | + case 'Disjunction': |
| 42 | + return chain(build(node.left), `or(${build(node.right)})`) |
| 43 | + |
| 44 | + case 'Assertion': |
| 45 | + switch (node.kind) { |
| 46 | + case '\\b': |
| 47 | + return 'wordBoundary' |
| 48 | + |
| 49 | + case '\\B': |
| 50 | + return 'not.wordBoundary' |
| 51 | + |
| 52 | + case '^': |
| 53 | + return chain('', 'at.lineStart()') |
| 54 | + |
| 55 | + case '$': |
| 56 | + return chain('', 'at.lineEnd()') |
| 57 | + |
| 58 | + case 'Lookbehind': |
| 59 | + return chain('', `${node.negative ? 'notAfter' : 'after'}(${build(node.assertion)})`) |
| 60 | + |
| 61 | + case 'Lookahead': |
| 62 | + return chain('', `${node.negative ? 'notBefore' : 'before'}(${build(node.assertion)})`) |
| 63 | + |
| 64 | + /* v8 ignore next 2 */ |
| 65 | + default: |
| 66 | + throw new TypeError(`Unknown Assertion kind: ${(node as any).kind}`) |
| 67 | + } |
| 68 | + case 'Char': |
| 69 | + if (node.kind === 'meta') { |
| 70 | + switch (node.value) { |
| 71 | + case '.': |
| 72 | + return 'char' |
| 73 | + |
| 74 | + case '\\w': |
| 75 | + return 'wordChar' |
| 76 | + case '\\d': |
| 77 | + return 'digit' |
| 78 | + case '\\s': |
| 79 | + return 'whitespace' |
| 80 | + case '\\t': |
| 81 | + return 'tab' |
| 82 | + case '\\n': |
| 83 | + return 'linefeed' |
| 84 | + case '\\r': |
| 85 | + return 'carriageReturn' |
| 86 | + |
| 87 | + case '\\W': |
| 88 | + return 'not.wordChar' |
| 89 | + case '\\D': |
| 90 | + return 'not.digit' |
| 91 | + case '\\S': |
| 92 | + return 'not.whitespace' |
| 93 | + |
| 94 | + case '\f': |
| 95 | + case '\v': |
| 96 | + default: |
| 97 | + throw new Error(`Unsupported Meta Char: ${node.value}`) |
| 98 | + } |
| 99 | + } else { |
| 100 | + const char = getChar(node) |
| 101 | + if (char === null) throw new Error(`Unknown Char: ${node.value}`) |
| 102 | + return `'${char}'` |
| 103 | + } |
| 104 | + |
| 105 | + case 'Repetition': { |
| 106 | + const quantifier = node.quantifier |
| 107 | + const expr = build(node.expression) |
| 108 | + |
| 109 | + // TODO: support lazy quantifier |
| 110 | + const lazy = !quantifier.greedy |
| 111 | + if (lazy) throw new Error('Unsupported for lazy quantifier') |
| 112 | + |
| 113 | + switch (quantifier.kind) { |
| 114 | + case '+': |
| 115 | + return `oneOrMore(${expr})` |
| 116 | + case '?': |
| 117 | + return `maybe(${expr})` |
| 118 | + case '*': |
| 119 | + return chain(expr, 'times.any()') |
| 120 | + case 'Range': |
| 121 | + // {1} |
| 122 | + if (quantifier.from === quantifier.to) { |
| 123 | + return chain(expr, `times(${quantifier.from})`) |
| 124 | + } |
| 125 | + // {1,} |
| 126 | + else if (!quantifier.to) { |
| 127 | + return chain(expr, `times.atLeast(${quantifier.from})`) |
| 128 | + } |
| 129 | + // {0,3} |
| 130 | + else if (quantifier.from === 0) { |
| 131 | + return chain(expr, `times.atMost(${quantifier.to})`) |
| 132 | + } |
| 133 | + // {1,3} |
| 134 | + return chain(expr, `times.between(${quantifier.from}, ${quantifier.to})`) |
| 135 | + |
| 136 | + /* v8 ignore next 2 */ |
| 137 | + default: |
| 138 | + return '' as never |
| 139 | + } |
| 140 | + } |
| 141 | + |
| 142 | + case 'Alternative': { |
| 143 | + const alts = combineContinuousSimpleChars(node.expressions) |
| 144 | + const exprs: string[] = [] |
| 145 | + |
| 146 | + for (let i = 0; i < alts.length; i++) { |
| 147 | + const alt = alts[i] |
| 148 | + |
| 149 | + if (typeof alt === 'string') { |
| 150 | + exprs.push(alt) |
| 151 | + continue |
| 152 | + } |
| 153 | + |
| 154 | + if (alt.type === 'Assertion') { |
| 155 | + switch (alt.kind) { |
| 156 | + case '^': { |
| 157 | + const next = alts[++i] |
| 158 | + if (next === undefined) |
| 159 | + throw new Error(`Unexpected assertion: ${JSON.stringify(alt)}`) |
| 160 | + exprs.push(chain(next, 'at.lineStart()')) |
| 161 | + continue |
| 162 | + } |
| 163 | + |
| 164 | + case '$': { |
| 165 | + const prev = exprs.pop() |
| 166 | + if (prev === undefined) |
| 167 | + throw new Error(`Unexpected assertion: ${JSON.stringify(alt)}`) |
| 168 | + exprs.push(chain(prev, 'at.lineEnd()')) |
| 169 | + continue |
| 170 | + } |
| 171 | + |
| 172 | + case 'Lookbehind': { |
| 173 | + const next = alts[++i] |
| 174 | + if (next === undefined) |
| 175 | + throw new Error(`Unexpected assertion: ${JSON.stringify(alt)}`) |
| 176 | + const helper = alt.negative ? 'notAfter' : 'after' |
| 177 | + exprs.push(chain(next, `${helper}(${build(alt.assertion)})`)) |
| 178 | + continue |
| 179 | + } |
| 180 | + |
| 181 | + case 'Lookahead': { |
| 182 | + const prev = exprs.pop() |
| 183 | + if (prev === undefined) |
| 184 | + throw new Error(`Unexpected assertion: ${JSON.stringify(alt)}`) |
| 185 | + const helper = alt.negative ? 'notBefore' : 'before' |
| 186 | + exprs.push(chain(prev, `${helper}(${build(alt.assertion)})`)) |
| 187 | + continue |
| 188 | + } |
| 189 | + } |
| 190 | + } |
| 191 | + |
| 192 | + // TODO: currenly not support backreference for cross group |
| 193 | + if (alt.type === 'Backreference') { |
| 194 | + if (alt.kind !== 'name') throw new Error(`Unsupport for number reference`) |
| 195 | + |
| 196 | + const ref = chain(`exactly(${exprs.join(', ')})`, `and.referenceTo('${alt.reference}')`) |
| 197 | + exprs.length = 0 |
| 198 | + exprs.push(ref) |
| 199 | + continue |
| 200 | + } |
| 201 | + |
| 202 | + exprs.push(build(alt)) |
| 203 | + } |
| 204 | + |
| 205 | + return exprs.join(', ') |
| 206 | + } |
| 207 | + case 'Group': |
| 208 | + if (node.capturing) |
| 209 | + return chain(build(node.expression), node.name ? `as('${node.name}')` : 'grouped()') |
| 210 | + else return chain(build(node.expression)) |
| 211 | + |
| 212 | + /* v8 ignore next 2 */ |
| 213 | + case 'Backreference': |
| 214 | + return chain('', `and.referenceTo('${node.reference}')`) |
| 215 | + } |
| 216 | +} |
| 217 | + |
| 218 | +function normalizeClassRange(node: Char | ClassRange): string | undefined { |
| 219 | + if (node.type === 'ClassRange') return node.from.value + '-' + node.to.value |
| 220 | +} |
| 221 | + |
| 222 | +function combineContinuousSimpleChars<T extends (Char | ClassRange) | Expression>( |
| 223 | + expressions: T[] |
| 224 | +): (T | string)[] { |
| 225 | + let simpleChars = '' |
| 226 | + const exprs = expressions.reduce( |
| 227 | + (acc, expr) => { |
| 228 | + const char = expr.type === 'Char' ? getChar(expr) : null |
| 229 | + if (char !== null) { |
| 230 | + simpleChars += char |
| 231 | + } else { |
| 232 | + if (simpleChars) { |
| 233 | + acc.push(`'${simpleChars}'`) |
| 234 | + simpleChars = '' |
| 235 | + } |
| 236 | + acc.push(expr) |
| 237 | + } |
| 238 | + return acc |
| 239 | + }, |
| 240 | + [] as Array<T | string> |
| 241 | + ) |
| 242 | + |
| 243 | + // Add the last accumulated string if it exists |
| 244 | + if (simpleChars) { |
| 245 | + exprs.push(`'${simpleChars}'`) |
| 246 | + } |
| 247 | + |
| 248 | + return exprs |
| 249 | +} |
| 250 | + |
| 251 | +function getChar(char: Char): string | null { |
| 252 | + function escapeSimpleChar(char: string): string { |
| 253 | + // for generator only because we will output createRegExp('...') |
| 254 | + return char === "'" ? "\\'" : char |
| 255 | + } |
| 256 | + |
| 257 | + switch (char.kind) { |
| 258 | + case 'simple': |
| 259 | + return escapeSimpleChar(char.value) |
| 260 | + |
| 261 | + case 'oct': |
| 262 | + case 'decimal': |
| 263 | + case 'hex': |
| 264 | + case 'unicode': |
| 265 | + if ('symbol' in char) return escapeSimpleChar((char as any).symbol) |
| 266 | + } |
| 267 | + |
| 268 | + return null |
| 269 | +} |
| 270 | + |
| 271 | +function chain(expr: Expression | string, helper?: string): string { |
| 272 | + let _expr = '' |
| 273 | + if (typeof expr === 'string') { |
| 274 | + if (expr === '') _expr = "exactly('')" |
| 275 | + else _expr = expr.startsWith("'") && expr.endsWith("'") ? `exactly(${expr})` : expr |
| 276 | + } else { |
| 277 | + _expr = build(expr) |
| 278 | + } |
| 279 | + return helper ? `${_expr}.${helper}` : _expr |
| 280 | +} |
| 281 | + |
| 282 | +function buildFlags(flags: string) { |
| 283 | + if (!flags) return '' |
| 284 | + |
| 285 | + const readableFlags = flags.split('').map(flag => { |
| 286 | + return ( |
| 287 | + { |
| 288 | + d: 'withIndices', |
| 289 | + i: 'caseInsensitive', |
| 290 | + g: 'global', |
| 291 | + m: 'multiline', |
| 292 | + s: 'dotAll', |
| 293 | + u: 'unicode', |
| 294 | + y: 'sticky', |
| 295 | + }[flag] || `'${flag}'` |
| 296 | + ) |
| 297 | + }) |
| 298 | + |
| 299 | + return '[' + readableFlags.join(', ') + ']' |
| 300 | +} |
| 301 | + |
| 302 | +export function convert(regex: RegExp, { argsOnly = false } = {}) { |
| 303 | + const ast = regexpTree.parse(regex) |
| 304 | + |
| 305 | + if (ast.type !== 'RegExp') throw new TypeError(`Unexpected RegExp AST: ${ast.type}`) |
| 306 | + |
| 307 | + const flags = buildFlags(ast.flags) |
| 308 | + const args = build(ast.body) + (flags ? `, ${flags}` : '') |
| 309 | + return argsOnly ? args : `createRegExp(${args})` |
| 310 | +} |
0 commit comments