Skip to content

Commit 7ef5cb4

Browse files
Julian Rosselydell
Julian Rosse
authored andcommitted
Keep unicode code point escapes as is when possible (#4520)
1 parent 07ae1ed commit 7ef5cb4

File tree

4 files changed

+37
-23
lines changed

4 files changed

+37
-23
lines changed

lib/coffeescript/lexer.js

+20-11
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/lexer.coffee

+12-7
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
{Rewriter, INVERSES} = require './rewriter'
1313

1414
# Import the helpers we need.
15-
{count, starts, compact, repeat, invertLiterate,
15+
{count, starts, compact, repeat, invertLiterate, merge,
1616
locationDataToString, throwSyntaxError} = require './helpers'
1717

1818
# The Lexer Class
@@ -330,7 +330,6 @@ exports.Lexer = class Lexer
330330
when match = REGEX.exec @chunk
331331
[regex, body, closed] = match
332332
@validateEscapes body, isRegex: yes, offsetInChunk: 1
333-
body = @formatRegex body, delimiter: '/'
334333
index = regex.length
335334
prev = @prev()
336335
if prev
@@ -349,13 +348,17 @@ exports.Lexer = class Lexer
349348
when not VALID_FLAGS.test flags
350349
@error "invalid regular expression flags #{flags}", offset: index, length: flags.length
351350
when regex or tokens.length is 1
352-
body ?= @formatHeregex tokens[0][1]
351+
if body
352+
body = @formatRegex body, { flags, delimiter: '/' }
353+
else
354+
body = @formatHeregex tokens[0][1], { flags }
353355
@token 'REGEX', "#{@makeDelimitedLiteral body, delimiter: '/'}#{flags}", 0, end, origin
354356
else
355357
@token 'REGEX_START', '(', 0, 0, origin
356358
@token 'IDENTIFIER', 'RegExp', 0, 0
357359
@token 'CALL_START', '(', 0, 0
358-
@mergeInterpolationTokens tokens, {delimiter: '"', double: yes}, @formatHeregex
360+
@mergeInterpolationTokens tokens, {delimiter: '"', double: yes}, (str) =>
361+
@formatHeregex str, { flags }
359362
if flags
360363
@token ',', ',', index - 1, 0
361364
@token 'STRING', '"' + flags + '"', index - 1, flags.length
@@ -792,8 +795,8 @@ exports.Lexer = class Lexer
792795
formatString: (str, options) ->
793796
@replaceUnicodeCodePointEscapes str.replace(STRING_OMIT, '$1'), options
794797

795-
formatHeregex: (str) ->
796-
@formatRegex str.replace(HEREGEX_OMIT, '$1$2'), delimiter: '///'
798+
formatHeregex: (str, options) ->
799+
@formatRegex str.replace(HEREGEX_OMIT, '$1$2'), merge(options, delimiter: '///')
797800

798801
formatRegex: (str, options) ->
799802
@replaceUnicodeCodePointEscapes str, options
@@ -808,8 +811,9 @@ exports.Lexer = class Lexer
808811
low = (codePoint - 0x10000) % 0x400 + 0xDC00
809812
"#{toUnicodeEscape(high)}#{toUnicodeEscape(low)}"
810813

811-
# Replace \u{...} with \uxxxx[\uxxxx] in strings and regexes
814+
# Replace \u{...} with \uxxxx[\uxxxx] in regexes without `u` flag
812815
replaceUnicodeCodePointEscapes: (str, options) ->
816+
shouldReplace = options.flags? and 'u' not in options.flags
813817
str.replace UNICODE_CODE_POINT_ESCAPE, (match, escapedBackslash, codePointHex, offset) =>
814818
return escapedBackslash if escapedBackslash
815819

@@ -818,6 +822,7 @@ exports.Lexer = class Lexer
818822
@error "unicode code point escapes greater than \\u{10ffff} are not allowed",
819823
offset: offset + options.delimiter.length
820824
length: codePointHex.length + 4
825+
return match unless shouldReplace
821826

822827
@unicodeCodePointToUnicodeEscapes codePointDecimal
823828

test/regexps.coffee

+2-2
Original file line numberDiff line numberDiff line change
@@ -305,12 +305,12 @@ test "#4248: Unicode code point escapes", ->
305305
ok ///a\u{000001ab}c///.test 'a\u{1ab}c'
306306
ok /a\u{12345}c/.test 'a\ud808\udf45c'
307307

308-
# rewrite code point escapes
308+
# rewrite code point escapes unless u flag is set
309309
input = """
310310
/\\u{bcdef}\\u{abc}/u
311311
"""
312312
output = """
313-
/\\udab3\\uddef\\u0abc/u;
313+
/\\u{bcdef}\\u{abc}/u;
314314
"""
315315
eq toJS(input), output
316316

test/strings.coffee

+3-3
Original file line numberDiff line numberDiff line change
@@ -420,19 +420,19 @@ test "#4248: Unicode code point escapes", ->
420420
eq '\udab3\uddefc', """\u{bcdef}#{ 'c' }"""
421421
eq '\\u{123456}', "#{'\\'}#{'u{123456}'}"
422422

423-
# rewrite code point escapes
423+
# don't rewrite code point escapes
424424
input = """
425425
'\\u{bcdef}\\u{abc}'
426426
"""
427427
output = """
428-
'\\udab3\\uddef\\u0abc';
428+
'\\u{bcdef}\\u{abc}';
429429
"""
430430
eq toJS(input), output
431431

432432
input = """
433433
"#{ 'a' }\\u{bcdef}"
434434
"""
435435
output = """
436-
"a\\udab3\\uddef";
436+
"a\\u{bcdef}";
437437
"""
438438
eq toJS(input), output

0 commit comments

Comments
 (0)