@@ -261,14 +261,14 @@ exports.Lexer = class Lexer
261
261
indent = attempt if indent is null or 0 < attempt .length < indent .length
262
262
indentRegex = /// \n #{ indent} /// g if indent
263
263
@ mergeInterpolationTokens tokens, {delimiter}, (value , i ) =>
264
- value = @ formatString value
264
+ value = @ formatString value, delimiter : quote
265
265
value = value .replace indentRegex, ' \n ' if indentRegex
266
266
value = value .replace LEADING_BLANK_LINE, ' ' if i is 0
267
267
value = value .replace TRAILING_BLANK_LINE, ' ' if i is $
268
268
value
269
269
else
270
270
@ mergeInterpolationTokens tokens, {delimiter}, (value , i ) =>
271
- value = @ formatString value
271
+ value = @ formatString value, delimiter : quote
272
272
value = value .replace SIMPLE_STRING_OMIT, (match , offset ) ->
273
273
if (i is 0 and offset is 0 ) or
274
274
(i is $ and offset + match .length is value .length )
@@ -318,6 +318,7 @@ exports.Lexer = class Lexer
318
318
when match = REGEX .exec @chunk
319
319
[regex , body , closed ] = match
320
320
@ validateEscapes body, isRegex : yes , offsetInChunk : 1
321
+ body = @ formatRegex body, delimiter : ' /'
321
322
index = regex .length
322
323
[... , prev ] = @tokens
323
324
if prev
@@ -632,7 +633,7 @@ exports.Lexer = class Lexer
632
633
tokensToPush = value
633
634
when ' NEOSTRING'
634
635
# Convert 'NEOSTRING' into 'STRING'.
635
- converted = fn token[1 ], i
636
+ converted = fn . call this , token[1 ], i
636
637
# Optimize out empty strings. We ensure that the tokens stream always
637
638
# starts with a string token, though, to make sure that the result
638
639
# really is a string.
@@ -762,11 +763,37 @@ exports.Lexer = class Lexer
762
763
' **' , ' SHIFT' , ' RELATION' , ' COMPARE' , ' &' , ' ^' , ' |' , ' &&' , ' ||' ,
763
764
' BIN?' , ' THROW' , ' EXTENDS' ]
764
765
765
- formatString : (str ) ->
766
- str .replace STRING_OMIT, ' $1'
766
+ formatString : (str , options ) ->
767
+ @ replaceUnicodeCodePointEscapes str .replace ( STRING_OMIT, ' $1' ), options
767
768
768
769
formatHeregex : (str ) ->
769
- str .replace HEREGEX_OMIT, ' $1$2'
770
+ @ formatRegex str .replace (HEREGEX_OMIT, ' $1$2' ), delimiter : ' ///'
771
+
772
+ formatRegex : (str , options ) ->
773
+ @ replaceUnicodeCodePointEscapes str, options
774
+
775
+ unicodeCodePointToUnicodeEscapes : (codePoint ) ->
776
+ toUnicodeEscape = (val ) ->
777
+ str = val .toString 16
778
+ " \\ u#{ repeat ' 0' , 4 - str .length }#{ str} "
779
+ return toUnicodeEscape (codePoint) if codePoint < 0x10000
780
+ # surrogate pair
781
+ high = Math .floor ((codePoint - 0x10000 ) / 0x400 ) + 0xD800
782
+ low = (codePoint - 0x10000 ) % 0x400 + 0xDC00
783
+ " #{ toUnicodeEscape (high)}#{ toUnicodeEscape (low)} "
784
+
785
+ # Replace \u{...} with \uxxxx[\uxxxx] in strings and regexes
786
+ replaceUnicodeCodePointEscapes : (str , options ) ->
787
+ str .replace UNICODE_CODE_POINT_ESCAPE, (match , escapedBackslash , codePointHex , offset ) =>
788
+ return escapedBackslash if escapedBackslash
789
+
790
+ codePointDecimal = parseInt codePointHex, 16
791
+ if codePointDecimal > 0x10ffff
792
+ @ error " unicode code point escapes greater than \\ u{10ffff} are not allowed" ,
793
+ offset : offset + options .delimiter .length
794
+ length : codePointHex .length + 4
795
+
796
+ @ unicodeCodePointToUnicodeEscapes codePointDecimal
770
797
771
798
# Validates escapes in strings and regexes.
772
799
validateEscapes : (str , options = {}) ->
@@ -777,13 +804,13 @@ exports.Lexer = class Lexer
777
804
STRING_INVALID_ESCAPE
778
805
match = invalidEscapeRegex .exec str
779
806
return unless match
780
- [[], before , octal , hex , unicode ] = match
807
+ [[], before , octal , hex , unicodeCodePoint , unicode ] = match
781
808
message =
782
809
if octal
783
810
" octal escape sequences are not allowed"
784
811
else
785
812
" invalid escape sequence"
786
- invalidEscape = " \\ #{ octal or hex or unicode} "
813
+ invalidEscape = " \\ #{ octal or hex or unicodeCodePoint or unicode} "
787
814
@ error " #{ message} #{ invalidEscape} " ,
788
815
offset : (options .offsetInChunk ? 0 ) + match .index + before .length
789
816
length : invalidEscape .length
@@ -970,7 +997,7 @@ REGEX = /// ^
970
997
///
971
998
972
999
REGEX_FLAGS = / ^ \w * /
973
- VALID_FLAGS = / ^ (?!. * (. ). * \1 )[imgy ] * $ /
1000
+ VALID_FLAGS = / ^ (?!. * (. ). * \1 )[imguy ] * $ /
974
1001
975
1002
HEREGEX = /// ^ (?: [^ \\ /#] | \\ [\s\S ] | /(?!//) | \# (?!\{ ) )* ///
976
1003
@@ -994,18 +1021,26 @@ STRING_INVALID_ESCAPE = ///
994
1021
\\ (
995
1022
? : (0[0-7 ] | [1-7 ] ) # octal escape
996
1023
| (x(?![\d a-fA-F ] {2} ). {0,2} ) # hex escape
997
- | (u(?![\d a-fA-F ] {4} ). {0,4} ) # unicode escape
1024
+ | (u\{ (?![\d a-fA-F ] {1,} \} )[^ }] * \} ? ) # unicode code point escape
1025
+ | (u(?!\{ | [\d a-fA-F ] {4} ). {0,4} ) # unicode escape
998
1026
)
999
1027
///
1000
1028
REGEX_INVALID_ESCAPE = ///
1001
1029
( (?:^ | [^ \\ ] ) (?:\\\\ )* ) # make sure the escape isn’t escaped
1002
1030
\\ (
1003
1031
? : (0[0-7 ] ) # octal escape
1004
1032
| (x(?![\d a-fA-F ] {2} ). {0,2} ) # hex escape
1005
- | (u(?![\d a-fA-F ] {4} ). {0,4} ) # unicode escape
1033
+ | (u\{ (?![\d a-fA-F ] {1,} \} )[^ }] * \} ? ) # unicode code point escape
1034
+ | (u(?!\{ | [\d a-fA-F ] {4} ). {0,4} ) # unicode escape
1006
1035
)
1007
1036
///
1008
1037
1038
+ UNICODE_CODE_POINT_ESCAPE = ///
1039
+ ( \\\\ ) # make sure the escape isn’t escaped
1040
+ |
1041
+ \\ u\{ ( [\d a-fA-F ] + ) \}
1042
+ ///g
1043
+
1009
1044
LEADING_BLANK_LINE = /^ [^ \n \S ] * \n /
1010
1045
TRAILING_BLANK_LINE = /\n [^ \n \S ] * $ /
1011
1046
0 commit comments