@@ -68,7 +68,7 @@ export class Lexer {
68
68
token = token . next ;
69
69
} else {
70
70
// Read the next token and form a link in the token linked-list.
71
- const nextToken = readNextToken ( this , token ) ;
71
+ const nextToken = readNextToken ( this , token . end ) ;
72
72
// @ts -expect-error next is only mutable during parsing.
73
73
token . next = nextToken ;
74
74
// @ts -expect-error prev is only mutable during parsing.
@@ -161,10 +161,10 @@ function createToken(
161
161
* punctuators immediately or calls the appropriate helper function for more
162
162
* complicated tokens.
163
163
*/
164
- function readNextToken ( lexer : Lexer , prev : Token ) : Token {
164
+ function readNextToken ( lexer : Lexer , start : number ) : Token {
165
165
const body = lexer . source . body ;
166
166
const bodyLength = body . length ;
167
- let position = prev . end ;
167
+ let position = start ;
168
168
169
169
while ( position < bodyLength ) {
170
170
const code = body . charCodeAt ( position ) ;
@@ -185,22 +185,22 @@ function readNextToken(lexer: Lexer, prev: Token): Token {
185
185
// - "Space (U+0020)"
186
186
//
187
187
// Comma :: ,
188
- case 0x0009 : // \t
189
- case 0x0020 : // <space>
190
- case 0x002c : // ,
191
- case 0xfeff : // <BOM>
188
+ case 0xfeff : // <BOM>
189
+ case 0x0009 : // \t
190
+ case 0x0020 : // <space>
191
+ case 0x002c : // ,
192
192
++ position ;
193
193
continue ;
194
194
// LineTerminator ::
195
195
// - "New Line (U+000A)"
196
196
// - "Carriage Return (U+000D)" [lookahead != "New Line (U+000A)"]
197
197
// - "Carriage Return (U+000D)" "New Line (U+000A)"
198
- case 0x000a : // \n
198
+ case 0x000a : // \n
199
199
++ position ;
200
200
++ lexer . line ;
201
201
lexer . lineStart = position ;
202
202
continue ;
203
- case 0x000d : // \r
203
+ case 0x000d : // \r
204
204
if ( body . charCodeAt ( position + 1 ) === 0x000a ) {
205
205
position += 2 ;
206
206
} else {
@@ -210,7 +210,7 @@ function readNextToken(lexer: Lexer, prev: Token): Token {
210
210
lexer . lineStart = position ;
211
211
continue ;
212
212
// Comment
213
- case 0x0023 : // #
213
+ case 0x0023 : // #
214
214
return readComment ( lexer , position ) ;
215
215
// Token ::
216
216
// - Punctuator
@@ -220,42 +220,42 @@ function readNextToken(lexer: Lexer, prev: Token): Token {
220
220
// - StringValue
221
221
//
222
222
// Punctuator :: one of ! $ & ( ) ... : = @ [ ] { | }
223
- case 0x0021 : // !
223
+ case 0x0021 : // !
224
224
return createToken ( lexer , TokenKind . BANG , position , position + 1 ) ;
225
- case 0x0024 : // $
225
+ case 0x0024 : // $
226
226
return createToken ( lexer , TokenKind . DOLLAR , position , position + 1 ) ;
227
- case 0x0026 : // &
227
+ case 0x0026 : // &
228
228
return createToken ( lexer , TokenKind . AMP , position , position + 1 ) ;
229
- case 0x0028 : // (
229
+ case 0x0028 : // (
230
230
return createToken ( lexer , TokenKind . PAREN_L , position , position + 1 ) ;
231
- case 0x0029 : // )
231
+ case 0x0029 : // )
232
232
return createToken ( lexer , TokenKind . PAREN_R , position , position + 1 ) ;
233
- case 0x002e : // .
233
+ case 0x002e : // .
234
234
if (
235
235
body . charCodeAt ( position + 1 ) === 0x002e &&
236
236
body . charCodeAt ( position + 2 ) === 0x002e
237
237
) {
238
238
return createToken ( lexer , TokenKind . SPREAD , position , position + 3 ) ;
239
239
}
240
240
break ;
241
- case 0x003a : // :
241
+ case 0x003a : // :
242
242
return createToken ( lexer , TokenKind . COLON , position , position + 1 ) ;
243
- case 0x003d : // =
243
+ case 0x003d : // =
244
244
return createToken ( lexer , TokenKind . EQUALS , position , position + 1 ) ;
245
- case 0x0040 : // @
245
+ case 0x0040 : // @
246
246
return createToken ( lexer , TokenKind . AT , position , position + 1 ) ;
247
- case 0x005b : // [
247
+ case 0x005b : // [
248
248
return createToken ( lexer , TokenKind . BRACKET_L , position , position + 1 ) ;
249
- case 0x005d : // ]
249
+ case 0x005d : // ]
250
250
return createToken ( lexer , TokenKind . BRACKET_R , position , position + 1 ) ;
251
- case 0x007b : // {
251
+ case 0x007b : // {
252
252
return createToken ( lexer , TokenKind . BRACE_L , position , position + 1 ) ;
253
- case 0x007c : // |
253
+ case 0x007c : // |
254
254
return createToken ( lexer , TokenKind . PIPE , position , position + 1 ) ;
255
- case 0x007d : // }
255
+ case 0x007d : // }
256
256
return createToken ( lexer , TokenKind . BRACE_R , position , position + 1 ) ;
257
257
// StringValue
258
- case 0x0022 : // "
258
+ case 0x0022 : // "
259
259
if (
260
260
body . charCodeAt ( position + 1 ) === 0x0022 &&
261
261
body . charCodeAt ( position + 2 ) === 0x0022
@@ -265,9 +265,8 @@ function readNextToken(lexer: Lexer, prev: Token): Token {
265
265
return readString ( lexer , position ) ;
266
266
}
267
267
268
- // IntValue | FloatValue
269
- // 0-9 | -
270
- if ( ( code >= 0x0030 && code <= 0x0039 ) || code === 0x002d ) {
268
+ // IntValue | FloatValue (Digit | -)
269
+ if ( isDigit ( code ) || code === 0x002d ) {
271
270
return readNumber ( lexer , position , code ) ;
272
271
}
273
272
@@ -305,7 +304,7 @@ function readComment(lexer: Lexer, start: number): Token {
305
304
while ( position < bodyLength ) {
306
305
const code = body . charCodeAt ( position ) ;
307
306
308
- // LineTerminator (\n or \r)
307
+ // LineTerminator (\n | \r)
309
308
if ( code === 0x000a || code === 0x000d ) {
310
309
break ;
311
310
}
@@ -331,9 +330,6 @@ function readComment(lexer: Lexer, start: number): Token {
331
330
* Reads a number token from the source file, either a FloatValue or an IntValue
332
331
* depending on whether a FractionalPart or ExponentPart is encountered.
333
332
*
334
- * Digit :: one of
335
- * - `0` `1` `2` `3` `4` `5` `6` `7` `8` `9`
336
- *
337
333
* IntValue :: IntegerPart [lookahead != {Digit, `.`, NameStart}]
338
334
*
339
335
* IntegerPart ::
@@ -371,8 +367,7 @@ function readNumber(lexer: Lexer, start: number, firstCode: number): Token {
371
367
// Zero (0)
372
368
if ( code === 0x0030 ) {
373
369
code = body . charCodeAt ( ++ position ) ;
374
- // Digit (0-9)
375
- if ( code >= 0x0030 && code <= 0x0039 ) {
370
+ if ( isDigit ( code ) ) {
376
371
throw syntaxError (
377
372
lexer . source ,
378
373
position ,
@@ -434,25 +429,26 @@ function readNumber(lexer: Lexer, start: number, firstCode: number): Token {
434
429
* Returns the new position in the source after reading one or more digits.
435
430
*/
436
431
function readDigits ( lexer : Lexer , start : number , firstCode : number ) : number {
432
+ if ( ! isDigit ( firstCode ) ) {
433
+ throw syntaxError (
434
+ lexer . source ,
435
+ start ,
436
+ `Invalid number, expected digit but got: ${ printCodePointAt (
437
+ lexer ,
438
+ start ,
439
+ ) } .`,
440
+ ) ;
441
+ }
442
+
437
443
const body = lexer . source . body ;
438
444
let position = start ;
439
445
let code = firstCode ;
440
446
441
- // 0 - 9
442
- if ( code >= 0x0030 && code <= 0x0039 ) {
443
- do {
444
- code = body . charCodeAt ( ++ position ) ;
445
- } while ( code >= 0x0030 && code <= 0x0039 ) ; // 0 - 9
446
- return position ;
447
- }
448
- throw syntaxError (
449
- lexer . source ,
450
- position ,
451
- `Invalid number, expected digit but got: ${ printCodePointAt (
452
- lexer ,
453
- position ,
454
- ) } .`,
455
- ) ;
447
+ do {
448
+ code = body . charCodeAt ( ++ position ) ;
449
+ } while ( isDigit ( code ) ) ;
450
+
451
+ return position ;
456
452
}
457
453
458
454
/**
@@ -500,7 +496,7 @@ function readString(lexer: Lexer, start: number): Token {
500
496
continue ;
501
497
}
502
498
503
- // LineTerminator (\n or \r)
499
+ // LineTerminator (\n | \r)
504
500
if ( code === 0x000a || code === 0x000d ) {
505
501
break ;
506
502
}
@@ -545,40 +541,39 @@ function readEscapedUnicode(lexer: Lexer, position: number): EscapeSequence {
545
541
}
546
542
547
543
/**
548
- * Reads four hexadecimal chars and returns the integer that 16bit hexadecimal
549
- * string represents. For example, "000f" will return 15, and "dead" will
550
- * return 57005.
544
+ * Reads four hexadecimal characters and returns the positive integer that 16bit
545
+ * hexadecimal string represents. For example, "000f" will return 15, and "dead"
546
+ * will return 57005.
551
547
*
552
548
* Returns a negative number if any char was not a valid hexadecimal digit.
553
- *
554
- * This is implemented by noting that hexValue() returns -1 on error,
555
- * which means the result of ORing the hexValue() will also be negative.
556
549
*/
557
550
function read16BitHexCode ( body : string , position : number ) : number {
551
+ // readHexDigit() returns -1 on error. ORing a negative value with any other
552
+ // value always produces a negative value.
558
553
return (
559
- ( hexValue ( body . charCodeAt ( position ) ) << 12 ) |
560
- ( hexValue ( body . charCodeAt ( position + 1 ) ) << 8 ) |
561
- ( hexValue ( body . charCodeAt ( position + 2 ) ) << 4 ) |
562
- hexValue ( body . charCodeAt ( position + 3 ) )
554
+ ( readHexDigit ( body . charCodeAt ( position ) ) << 12 ) |
555
+ ( readHexDigit ( body . charCodeAt ( position + 1 ) ) << 8 ) |
556
+ ( readHexDigit ( body . charCodeAt ( position + 2 ) ) << 4 ) |
557
+ readHexDigit ( body . charCodeAt ( position + 3 ) )
563
558
) ;
564
559
}
565
560
566
561
/**
567
- * Converts a hex character to its integer value.
562
+ * Reads a hexadecimal character and returns its positive integer value (0-15) .
568
563
*
569
564
* '0' becomes 0, '9' becomes 9
570
565
* 'A' becomes 10, 'F' becomes 15
571
566
* 'a' becomes 10, 'f' becomes 15
572
567
*
573
- * Any other input returns -1 .
568
+ * Returns -1 if the provided character code was not a valid hexadecimal digit .
574
569
*/
575
- function hexValue ( code : number ) : number {
576
- return code >= 0x0030 && code <= 0x0039
577
- ? code - 0x0030 // 0-9
578
- : code >= 0x0041 && code <= 0x0046
579
- ? code - 0x0037 // A-F
580
- : code >= 0x0061 && code <= 0x0066
581
- ? code - 0x0057 // a-f
570
+ function readHexDigit ( code : number ) : number {
571
+ return code >= 0x0030 && code <= 0x0039 // 0-9
572
+ ? code - 0x0030
573
+ : code >= 0x0041 && code <= 0x0046 // A-F
574
+ ? code - 0x0037
575
+ : code >= 0x0061 && code <= 0x0066 // a-f
576
+ ? code - 0x0057
582
577
: - 1 ;
583
578
}
584
579
@@ -718,15 +713,6 @@ function readBlockString(lexer: Lexer, start: number): Token {
718
713
* - Letter
719
714
* - Digit
720
715
* - `_`
721
- *
722
- * Letter :: one of
723
- * - `A` `B` `C` `D` `E` `F` `G` `H` `I` `J` `K` `L` `M`
724
- * - `N` `O` `P` `Q` `R` `S` `T` `U` `V` `W` `X` `Y` `Z`
725
- * - `a` `b` `c` `d` `e` `f` `g` `h` `i` `j` `k` `l` `m`
726
- * - `n` `o` `p` `q` `r` `s` `t` `u` `v` `w` `x` `y` `z`
727
- *
728
- * Digit :: one of
729
- * - `0` `1` `2` `3` `4` `5` `6` `7` `8` `9`
730
716
*/
731
717
function readName ( lexer : Lexer , start : number ) : Token {
732
718
const body = lexer . source . body ;
@@ -736,17 +722,13 @@ function readName(lexer: Lexer, start: number): Token {
736
722
while ( position < bodyLength ) {
737
723
const code = body . charCodeAt ( position ) ;
738
724
// NameContinue
739
- if (
740
- ( code >= 0x0061 && code <= 0x007a ) || // a-z
741
- ( code >= 0x0041 && code <= 0x005a ) || // A-Z
742
- ( code >= 0x0030 && code <= 0x0039 ) || // 0-9
743
- code === 0x005f // _
744
- ) {
725
+ if ( isLetter ( code ) || isDigit ( code ) || code === 0x005f ) {
745
726
++ position ;
746
727
} else {
747
728
break ;
748
729
}
749
730
}
731
+
750
732
return createToken (
751
733
lexer ,
752
734
TokenKind . NAME ,
@@ -756,11 +738,28 @@ function readName(lexer: Lexer, start: number): Token {
756
738
) ;
757
739
}
758
740
759
- // a-z | A-Z | _
760
741
function isNameStart ( code : number ) : boolean {
742
+ return isLetter ( code ) || code === 0x005f ;
743
+ }
744
+
745
+ /**
746
+ * Digit :: one of
747
+ * - `0` `1` `2` `3` `4` `5` `6` `7` `8` `9`
748
+ */
749
+ function isDigit ( code : number ) : boolean {
750
+ return code >= 0x0030 && code <= 0x0039 ;
751
+ }
752
+
753
+ /**
754
+ * Letter :: one of
755
+ * - `A` `B` `C` `D` `E` `F` `G` `H` `I` `J` `K` `L` `M`
756
+ * - `N` `O` `P` `Q` `R` `S` `T` `U` `V` `W` `X` `Y` `Z`
757
+ * - `a` `b` `c` `d` `e` `f` `g` `h` `i` `j` `k` `l` `m`
758
+ * - `n` `o` `p` `q` `r` `s` `t` `u` `v` `w` `x` `y` `z`
759
+ */
760
+ function isLetter ( code : number ) : boolean {
761
761
return (
762
- ( code >= 0x0061 && code <= 0x007a ) ||
763
- ( code >= 0x0041 && code <= 0x005a ) ||
764
- code === 0x005f
762
+ ( code >= 0x0061 && code <= 0x007a ) || // A-Z
763
+ ( code >= 0x0041 && code <= 0x005a ) // a-z
765
764
) ;
766
765
}
0 commit comments