@@ -14,6 +14,7 @@ module ts {
14
14
getTokenPos ( ) : number ;
15
15
getTokenText ( ) : string ;
16
16
getTokenValue ( ) : string ;
17
+ hasExtendedUnicodeEscape ( ) : boolean ;
17
18
hasPrecedingLineBreak ( ) : boolean ;
18
19
isIdentifier ( ) : boolean ;
19
20
isReservedWord ( ) : boolean ;
@@ -556,6 +557,7 @@ module ts {
556
557
var token : SyntaxKind ;
557
558
var tokenValue : string ;
558
559
var precedingLineBreak : boolean ;
560
+ var hasExtendedUnicodeEscape : boolean ;
559
561
var tokenIsUnterminated : boolean ;
560
562
561
563
function error ( message : DiagnosticMessage , length ?: number ) : void {
@@ -606,11 +608,27 @@ module ts {
606
608
}
607
609
return + ( text . substring ( start , pos ) ) ;
608
610
}
611
+
612
+ /**
613
+ * Scans the given number of hexadecimal digits in the text,
614
+ * returning -1 if the given number is unavailable.
615
+ */
616
+ function scanExactNumberOfHexDigits ( count : number ) : number {
617
+ return scanHexDigits ( /*minCount*/ count , /*scanAsManyAsPossible*/ false ) ;
618
+ }
619
+
620
+ /**
621
+ * Scans as many hexadecimal digits as are available in the text,
622
+ * returning -1 if the given number of digits was unavailable.
623
+ */
624
+ function scanMinimumNumberOfHexDigits ( count : number ) : number {
625
+ return scanHexDigits ( /*minCount*/ count , /*scanAsManyAsPossible*/ true ) ;
626
+ }
609
627
610
- function scanHexDigits ( count : number , mustMatchCount ? : boolean ) : number {
628
+ function scanHexDigits ( minCount : number , scanAsManyAsPossible : boolean ) : number {
611
629
var digits = 0 ;
612
630
var value = 0 ;
613
- while ( digits < count || ! mustMatchCount ) {
631
+ while ( digits < minCount || scanAsManyAsPossible ) {
614
632
var ch = text . charCodeAt ( pos ) ;
615
633
if ( ch >= CharacterCodes . _0 && ch <= CharacterCodes . _9 ) {
616
634
value = value * 16 + ch - CharacterCodes . _0 ;
@@ -627,7 +645,7 @@ module ts {
627
645
pos ++ ;
628
646
digits ++ ;
629
647
}
630
- if ( digits < count ) {
648
+ if ( digits < minCount ) {
631
649
value = - 1 ;
632
650
}
633
651
return value ;
@@ -764,16 +782,20 @@ module ts {
764
782
return "\'" ;
765
783
case CharacterCodes . doubleQuote :
766
784
return "\"" ;
767
- case CharacterCodes . x :
768
785
case CharacterCodes . u :
769
- var ch = scanHexDigits ( ch === CharacterCodes . x ? 2 : 4 , /*mustMatchCount*/ true ) ;
770
- if ( ch >= 0 ) {
771
- return String . fromCharCode ( ch ) ;
772
- }
773
- else {
774
- error ( Diagnostics . Hexadecimal_digit_expected ) ;
775
- return ""
786
+ // '\u{DDDDDDDD}'
787
+ if ( pos < len && text . charCodeAt ( pos ) === CharacterCodes . openBrace ) {
788
+ hasExtendedUnicodeEscape = true ;
789
+ pos ++ ;
790
+ return scanExtendedUnicodeEscape ( ) ;
776
791
}
792
+
793
+ // '\uDDDD'
794
+ return scanHexadecimalEscape ( /*numDigits*/ 4 )
795
+
796
+ case CharacterCodes . x :
797
+ // '\xDD'
798
+ return scanHexadecimalEscape ( /*numDigits*/ 2 )
777
799
778
800
// when encountering a LineContinuation (i.e. a backslash and a line terminator sequence),
779
801
// the line terminator is interpreted to be "the empty code unit sequence".
@@ -790,14 +812,74 @@ module ts {
790
812
return String . fromCharCode ( ch ) ;
791
813
}
792
814
}
815
+
816
+ function scanHexadecimalEscape ( numDigits : number ) : string {
817
+ var escapedValue = scanExactNumberOfHexDigits ( numDigits ) ;
818
+
819
+ if ( escapedValue >= 0 ) {
820
+ return String . fromCharCode ( escapedValue ) ;
821
+ }
822
+ else {
823
+ error ( Diagnostics . Hexadecimal_digit_expected ) ;
824
+ return ""
825
+ }
826
+ }
827
+
828
+ function scanExtendedUnicodeEscape ( ) : string {
829
+ var escapedValue = scanMinimumNumberOfHexDigits ( 1 ) ;
830
+ var isInvalidExtendedEscape = false ;
831
+
832
+ // Validate the value of the digit
833
+ if ( escapedValue < 0 ) {
834
+ error ( Diagnostics . Hexadecimal_digit_expected )
835
+ isInvalidExtendedEscape = true ;
836
+ }
837
+ else if ( escapedValue > 0x10FFFF ) {
838
+ error ( Diagnostics . An_extended_Unicode_escape_value_must_be_between_0x0_and_0x10FFFF_inclusive ) ;
839
+ isInvalidExtendedEscape = true ;
840
+ }
841
+
842
+ if ( pos >= len ) {
843
+ error ( Diagnostics . Unexpected_end_of_text ) ;
844
+ isInvalidExtendedEscape = true ;
845
+ }
846
+ else if ( text . charCodeAt ( pos ) == CharacterCodes . closeBrace ) {
847
+ // Only swallow the following character up if it's a ' }'.
848
+ pos ++ ;
849
+ }
850
+ else {
851
+ error ( Diagnostics . Unterminated_Unicode_escape_sequence ) ;
852
+ isInvalidExtendedEscape = true ;
853
+ }
854
+
855
+ if ( isInvalidExtendedEscape ) {
856
+ return "" ;
857
+ }
858
+
859
+ return utf16EncodeAsString ( escapedValue ) ;
860
+ }
861
+
862
+ // Derived from the 10.1.1 UTF16Encoding of the ES6 Spec.
863
+ function utf16EncodeAsString ( codePoint : number ) : string {
864
+ Debug . assert ( 0x0 <= codePoint && codePoint <= 0x10FFFF ) ;
865
+
866
+ if ( codePoint <= 65535 ) {
867
+ return String . fromCharCode ( codePoint ) ;
868
+ }
869
+
870
+ var codeUnit1 = Math . floor ( ( codePoint - 65536 ) / 1024 ) + 0xD800 ;
871
+ var codeUnit2 = ( ( codePoint - 65536 ) % 1024 ) + 0xDC00 ;
872
+
873
+ return String . fromCharCode ( codeUnit1 , codeUnit2 ) ;
874
+ }
793
875
794
876
// Current character is known to be a backslash. Check for Unicode escape of the form '\uXXXX'
795
877
// and return code point value if valid Unicode escape is found. Otherwise return -1.
796
878
function peekUnicodeEscape ( ) : number {
797
879
if ( pos + 5 < len && text . charCodeAt ( pos + 1 ) === CharacterCodes . u ) {
798
880
var start = pos ;
799
881
pos += 2 ;
800
- var value = scanHexDigits ( 4 , /*mustMatchCount*/ true ) ;
882
+ var value = scanExactNumberOfHexDigits ( 4 ) ;
801
883
pos = start ;
802
884
return value ;
803
885
}
@@ -869,6 +951,7 @@ module ts {
869
951
870
952
function scan ( ) : SyntaxKind {
871
953
startPos = pos ;
954
+ hasExtendedUnicodeEscape = false ;
872
955
precedingLineBreak = false ;
873
956
tokenIsUnterminated = false ;
874
957
while ( true ) {
@@ -1034,7 +1117,7 @@ module ts {
1034
1117
case CharacterCodes . _0 :
1035
1118
if ( pos + 2 < len && ( text . charCodeAt ( pos + 1 ) === CharacterCodes . X || text . charCodeAt ( pos + 1 ) === CharacterCodes . x ) ) {
1036
1119
pos += 2 ;
1037
- var value = scanHexDigits ( 1 , /*mustMatchCount*/ false ) ;
1120
+ var value = scanMinimumNumberOfHexDigits ( 1 ) ;
1038
1121
if ( value < 0 ) {
1039
1122
error ( Diagnostics . Hexadecimal_digit_expected ) ;
1040
1123
value = 0 ;
@@ -1336,6 +1419,7 @@ module ts {
1336
1419
getTokenPos : ( ) => tokenPos ,
1337
1420
getTokenText : ( ) => text . substring ( tokenPos , pos ) ,
1338
1421
getTokenValue : ( ) => tokenValue ,
1422
+ hasExtendedUnicodeEscape : ( ) => hasExtendedUnicodeEscape ,
1339
1423
hasPrecedingLineBreak : ( ) => precedingLineBreak ,
1340
1424
isIdentifier : ( ) => token === SyntaxKind . Identifier || token > SyntaxKind . LastReservedWord ,
1341
1425
isReservedWord : ( ) => token >= SyntaxKind . FirstReservedWord && token <= SyntaxKind . LastReservedWord ,
0 commit comments