Skip to content

Commit 0d6a096

Browse files
Code review
Co-authored-by: Ivan Goncharov <[email protected]>
1 parent 2f893d6 commit 0d6a096

File tree

2 files changed

+88
-89
lines changed

2 files changed

+88
-89
lines changed

src/language/lexer.ts

+87-88
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ export class Lexer {
6868
token = token.next;
6969
} else {
7070
// Read the next token and form a link in the token linked-list.
71-
const nextToken = readNextToken(this, token);
71+
const nextToken = readNextToken(this, token.end);
7272
// @ts-expect-error next is only mutable during parsing.
7373
token.next = nextToken;
7474
// @ts-expect-error prev is only mutable during parsing.
@@ -161,10 +161,10 @@ function createToken(
161161
* punctuators immediately or calls the appropriate helper function for more
162162
* complicated tokens.
163163
*/
164-
function readNextToken(lexer: Lexer, prev: Token): Token {
164+
function readNextToken(lexer: Lexer, start: number): Token {
165165
const body = lexer.source.body;
166166
const bodyLength = body.length;
167-
let position = prev.end;
167+
let position = start;
168168

169169
while (position < bodyLength) {
170170
const code = body.charCodeAt(position);
@@ -185,22 +185,22 @@ function readNextToken(lexer: Lexer, prev: Token): Token {
185185
// - "Space (U+0020)"
186186
//
187187
// Comma :: ,
188-
case 0x0009: // \t
189-
case 0x0020: // <space>
190-
case 0x002c: // ,
191-
case 0xfeff: // <BOM>
188+
case 0xfeff: // <BOM>
189+
case 0x0009: // \t
190+
case 0x0020: // <space>
191+
case 0x002c: // ,
192192
++position;
193193
continue;
194194
// LineTerminator ::
195195
// - "New Line (U+000A)"
196196
// - "Carriage Return (U+000D)" [lookahead != "New Line (U+000A)"]
197197
// - "Carriage Return (U+000D)" "New Line (U+000A)"
198-
case 0x000a: // \n
198+
case 0x000a: // \n
199199
++position;
200200
++lexer.line;
201201
lexer.lineStart = position;
202202
continue;
203-
case 0x000d: // \r
203+
case 0x000d: // \r
204204
if (body.charCodeAt(position + 1) === 0x000a) {
205205
position += 2;
206206
} else {
@@ -210,7 +210,7 @@ function readNextToken(lexer: Lexer, prev: Token): Token {
210210
lexer.lineStart = position;
211211
continue;
212212
// Comment
213-
case 0x0023: // #
213+
case 0x0023: // #
214214
return readComment(lexer, position);
215215
// Token ::
216216
// - Punctuator
@@ -220,42 +220,42 @@ function readNextToken(lexer: Lexer, prev: Token): Token {
220220
// - StringValue
221221
//
222222
// Punctuator :: one of ! $ & ( ) ... : = @ [ ] { | }
223-
case 0x0021: // !
223+
case 0x0021: // !
224224
return createToken(lexer, TokenKind.BANG, position, position + 1);
225-
case 0x0024: // $
225+
case 0x0024: // $
226226
return createToken(lexer, TokenKind.DOLLAR, position, position + 1);
227-
case 0x0026: // &
227+
case 0x0026: // &
228228
return createToken(lexer, TokenKind.AMP, position, position + 1);
229-
case 0x0028: // (
229+
case 0x0028: // (
230230
return createToken(lexer, TokenKind.PAREN_L, position, position + 1);
231-
case 0x0029: // )
231+
case 0x0029: // )
232232
return createToken(lexer, TokenKind.PAREN_R, position, position + 1);
233-
case 0x002e: // .
233+
case 0x002e: // .
234234
if (
235235
body.charCodeAt(position + 1) === 0x002e &&
236236
body.charCodeAt(position + 2) === 0x002e
237237
) {
238238
return createToken(lexer, TokenKind.SPREAD, position, position + 3);
239239
}
240240
break;
241-
case 0x003a: // :
241+
case 0x003a: // :
242242
return createToken(lexer, TokenKind.COLON, position, position + 1);
243-
case 0x003d: // =
243+
case 0x003d: // =
244244
return createToken(lexer, TokenKind.EQUALS, position, position + 1);
245-
case 0x0040: // @
245+
case 0x0040: // @
246246
return createToken(lexer, TokenKind.AT, position, position + 1);
247-
case 0x005b: // [
247+
case 0x005b: // [
248248
return createToken(lexer, TokenKind.BRACKET_L, position, position + 1);
249-
case 0x005d: // ]
249+
case 0x005d: // ]
250250
return createToken(lexer, TokenKind.BRACKET_R, position, position + 1);
251-
case 0x007b: // {
251+
case 0x007b: // {
252252
return createToken(lexer, TokenKind.BRACE_L, position, position + 1);
253-
case 0x007c: // |
253+
case 0x007c: // |
254254
return createToken(lexer, TokenKind.PIPE, position, position + 1);
255-
case 0x007d: // }
255+
case 0x007d: // }
256256
return createToken(lexer, TokenKind.BRACE_R, position, position + 1);
257257
// StringValue
258-
case 0x0022: // "
258+
case 0x0022: // "
259259
if (
260260
body.charCodeAt(position + 1) === 0x0022 &&
261261
body.charCodeAt(position + 2) === 0x0022
@@ -265,9 +265,8 @@ function readNextToken(lexer: Lexer, prev: Token): Token {
265265
return readString(lexer, position);
266266
}
267267

268-
// IntValue | FloatValue
269-
// 0-9 | -
270-
if ((code >= 0x0030 && code <= 0x0039) || code === 0x002d) {
268+
// IntValue | FloatValue (Digit | -)
269+
if (isDigit(code) || code === 0x002d) {
271270
return readNumber(lexer, position, code);
272271
}
273272

@@ -305,7 +304,7 @@ function readComment(lexer: Lexer, start: number): Token {
305304
while (position < bodyLength) {
306305
const code = body.charCodeAt(position);
307306

308-
// LineTerminator (\n or \r)
307+
// LineTerminator (\n | \r)
309308
if (code === 0x000a || code === 0x000d) {
310309
break;
311310
}
@@ -331,9 +330,6 @@ function readComment(lexer: Lexer, start: number): Token {
331330
* Reads a number token from the source file, either a FloatValue or an IntValue
332331
* depending on whether a FractionalPart or ExponentPart is encountered.
333332
*
334-
* Digit :: one of
335-
* - `0` `1` `2` `3` `4` `5` `6` `7` `8` `9`
336-
*
337333
* IntValue :: IntegerPart [lookahead != {Digit, `.`, NameStart}]
338334
*
339335
* IntegerPart ::
@@ -371,8 +367,7 @@ function readNumber(lexer: Lexer, start: number, firstCode: number): Token {
371367
// Zero (0)
372368
if (code === 0x0030) {
373369
code = body.charCodeAt(++position);
374-
// Digit (0-9)
375-
if (code >= 0x0030 && code <= 0x0039) {
370+
if (isDigit(code)) {
376371
throw syntaxError(
377372
lexer.source,
378373
position,
@@ -434,25 +429,26 @@ function readNumber(lexer: Lexer, start: number, firstCode: number): Token {
434429
* Returns the new position in the source after reading one or more digits.
435430
*/
436431
function readDigits(lexer: Lexer, start: number, firstCode: number): number {
432+
if (!isDigit(firstCode)) {
433+
throw syntaxError(
434+
lexer.source,
435+
start,
436+
`Invalid number, expected digit but got: ${printCodePointAt(
437+
lexer,
438+
start,
439+
)}.`,
440+
);
441+
}
442+
437443
const body = lexer.source.body;
438444
let position = start;
439445
let code = firstCode;
440446

441-
// 0 - 9
442-
if (code >= 0x0030 && code <= 0x0039) {
443-
do {
444-
code = body.charCodeAt(++position);
445-
} while (code >= 0x0030 && code <= 0x0039); // 0 - 9
446-
return position;
447-
}
448-
throw syntaxError(
449-
lexer.source,
450-
position,
451-
`Invalid number, expected digit but got: ${printCodePointAt(
452-
lexer,
453-
position,
454-
)}.`,
455-
);
447+
do {
448+
code = body.charCodeAt(++position);
449+
} while (isDigit(code));
450+
451+
return position;
456452
}
457453

458454
/**
@@ -500,7 +496,7 @@ function readString(lexer: Lexer, start: number): Token {
500496
continue;
501497
}
502498

503-
// LineTerminator (\n or \r)
499+
// LineTerminator (\n | \r)
504500
if (code === 0x000a || code === 0x000d) {
505501
break;
506502
}
@@ -545,40 +541,39 @@ function readEscapedUnicode(lexer: Lexer, position: number): EscapeSequence {
545541
}
546542

547543
/**
548-
* Reads four hexadecimal chars and returns the integer that 16bit hexadecimal
549-
* string represents. For example, "000f" will return 15, and "dead" will
550-
* return 57005.
544+
* Reads four hexadecimal characters and returns the positive integer that 16bit
545+
* hexadecimal string represents. For example, "000f" will return 15, and "dead"
546+
* will return 57005.
551547
*
552548
* Returns a negative number if any char was not a valid hexadecimal digit.
553-
*
554-
* This is implemented by noting that hexValue() returns -1 on error,
555-
* which means the result of ORing the hexValue() will also be negative.
556549
*/
557550
function read16BitHexCode(body: string, position: number): number {
551+
// readHexDigit() returns -1 on error. ORing a negative value with any other
552+
// value always produces a negative value.
558553
return (
559-
(hexValue(body.charCodeAt(position)) << 12) |
560-
(hexValue(body.charCodeAt(position + 1)) << 8) |
561-
(hexValue(body.charCodeAt(position + 2)) << 4) |
562-
hexValue(body.charCodeAt(position + 3))
554+
(readHexDigit(body.charCodeAt(position)) << 12) |
555+
(readHexDigit(body.charCodeAt(position + 1)) << 8) |
556+
(readHexDigit(body.charCodeAt(position + 2)) << 4) |
557+
readHexDigit(body.charCodeAt(position + 3))
563558
);
564559
}
565560

566561
/**
567-
* Converts a hex character to its integer value.
562+
* Reads a hexadecimal character and returns its positive integer value (0-15).
568563
*
569564
* '0' becomes 0, '9' becomes 9
570565
* 'A' becomes 10, 'F' becomes 15
571566
* 'a' becomes 10, 'f' becomes 15
572567
*
573-
* Any other input returns -1.
568+
* Returns -1 if the provided character code was not a valid hexadecimal digit.
574569
*/
575-
function hexValue(code: number): number {
576-
return code >= 0x0030 && code <= 0x0039
577-
? code - 0x0030 // 0-9
578-
: code >= 0x0041 && code <= 0x0046
579-
? code - 0x0037 // A-F
580-
: code >= 0x0061 && code <= 0x0066
581-
? code - 0x0057 // a-f
570+
function readHexDigit(code: number): number {
571+
return code >= 0x0030 && code <= 0x0039 // 0-9
572+
? code - 0x0030
573+
: code >= 0x0041 && code <= 0x0046 // A-F
574+
? code - 0x0037
575+
: code >= 0x0061 && code <= 0x0066 // a-f
576+
? code - 0x0057
582577
: -1;
583578
}
584579

@@ -718,15 +713,6 @@ function readBlockString(lexer: Lexer, start: number): Token {
718713
* - Letter
719714
* - Digit
720715
* - `_`
721-
*
722-
* Letter :: one of
723-
* - `A` `B` `C` `D` `E` `F` `G` `H` `I` `J` `K` `L` `M`
724-
* - `N` `O` `P` `Q` `R` `S` `T` `U` `V` `W` `X` `Y` `Z`
725-
* - `a` `b` `c` `d` `e` `f` `g` `h` `i` `j` `k` `l` `m`
726-
* - `n` `o` `p` `q` `r` `s` `t` `u` `v` `w` `x` `y` `z`
727-
*
728-
* Digit :: one of
729-
* - `0` `1` `2` `3` `4` `5` `6` `7` `8` `9`
730716
*/
731717
function readName(lexer: Lexer, start: number): Token {
732718
const body = lexer.source.body;
@@ -736,17 +722,13 @@ function readName(lexer: Lexer, start: number): Token {
736722
while (position < bodyLength) {
737723
const code = body.charCodeAt(position);
738724
// NameContinue
739-
if (
740-
(code >= 0x0061 && code <= 0x007a) || // a-z
741-
(code >= 0x0041 && code <= 0x005a) || // A-Z
742-
(code >= 0x0030 && code <= 0x0039) || // 0-9
743-
code === 0x005f // _
744-
) {
725+
if (isLetter(code) || isDigit(code) || code === 0x005f) {
745726
++position;
746727
} else {
747728
break;
748729
}
749730
}
731+
750732
return createToken(
751733
lexer,
752734
TokenKind.NAME,
@@ -756,11 +738,28 @@ function readName(lexer: Lexer, start: number): Token {
756738
);
757739
}
758740

759-
// a-z | A-Z | _
760741
function isNameStart(code: number): boolean {
742+
return isLetter(code) || code === 0x005f;
743+
}
744+
745+
/**
746+
* Digit :: one of
747+
* - `0` `1` `2` `3` `4` `5` `6` `7` `8` `9`
748+
*/
749+
function isDigit(code: number): boolean {
750+
return code >= 0x0030 && code <= 0x0039;
751+
}
752+
753+
/**
754+
* Letter :: one of
755+
* - `A` `B` `C` `D` `E` `F` `G` `H` `I` `J` `K` `L` `M`
756+
* - `N` `O` `P` `Q` `R` `S` `T` `U` `V` `W` `X` `Y` `Z`
757+
* - `a` `b` `c` `d` `e` `f` `g` `h` `i` `j` `k` `l` `m`
758+
* - `n` `o` `p` `q` `r` `s` `t` `u` `v` `w` `x` `y` `z`
759+
*/
760+
function isLetter(code: number): boolean {
761761
return (
762-
(code >= 0x0061 && code <= 0x007a) ||
763-
(code >= 0x0041 && code <= 0x005a) ||
764-
code === 0x005f
762+
(code >= 0x0061 && code <= 0x007a) || // A-Z
763+
(code >= 0x0041 && code <= 0x005a) // a-z
765764
);
766765
}

src/language/parser.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1411,7 +1411,7 @@ export class Parser {
14111411
* If the next token is a given keyword, advance the lexer.
14121412
* Otherwise, do not change the parser state and throw an error.
14131413
*/
1414-
expectKeyword(value: string) {
1414+
expectKeyword(value: string): void {
14151415
const token = this._lexer.token;
14161416
if (token.kind === TokenKind.NAME && token.value === value) {
14171417
this._lexer.advance();

0 commit comments

Comments
 (0)