microsoft · andrewbranch · Jan 29, 2021 · Sep 25, 2020 · Jan 27, 2021 · Jan 27, 2021
diff --git a/src/compiler/parser.ts b/src/compiler/parser.ts
@@ -1637,8 +1637,8 @@ namespace ts {
         // with magic property names like '__proto__'. The 'identifiers' object is used to share a single string instance for
         // each identifier in order to reduce memory consumption.
         function createIdentifier(isIdentifier: boolean, diagnosticMessage?: DiagnosticMessage, privateIdentifierDiagnosticMessage?: DiagnosticMessage): Identifier {
-            identifierCount++;
             if (isIdentifier) {
+                identifierCount++;
                 const pos = getNodePos();
                 // Store original token kind if it is not just an Identifier so we can report appropriate error later in type checker
                 const originalKeywordKind = token();
@@ -1652,6 +1652,12 @@ namespace ts {
                 return createIdentifier(/*isIdentifier*/ true);
             }
 
+            if (token() === SyntaxKind.Unknown && scanner.tryScan(() => scanner.reScanInvalidIdentifier() === SyntaxKind.Identifier)) {
+                // Scanner has already recorded an 'Invalid character' error, so no need to add another from the parser.
+                return createIdentifier(/*isIdentifier*/ true);
+            }
+
+            identifierCount++;
             // Only for end of file because the error gets reported incorrectly on embedded script tags.
             const reportAtCurrentPosition = token() === SyntaxKind.EndOfFileToken;
 

diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts
@@ -43,6 +43,7 @@ namespace ts {
         reScanJsxToken(): JsxTokenSyntaxKind;
         reScanLessThanToken(): SyntaxKind;
         reScanQuestionToken(): SyntaxKind;
+        reScanInvalidIdentifier(): SyntaxKind;
         scanJsxToken(): JsxTokenSyntaxKind;
         scanJsDocToken(): JSDocSyntaxKind;
         scan(): SyntaxKind;
@@ -966,6 +967,7 @@ namespace ts {
             reScanJsxToken,
             reScanLessThanToken,
             reScanQuestionToken,
+            reScanInvalidIdentifier,
             scanJsxToken,
             scanJsDocToken,
             scan,
@@ -2041,14 +2043,9 @@ namespace ts {
                         }
                         return token = SyntaxKind.PrivateIdentifier;
                     default:
-                        if (isIdentifierStart(ch, languageVersion)) {
-                            pos += charSize(ch);
-                            while (pos < end && isIdentifierPart(ch = codePointAt(text, pos), languageVersion)) pos += charSize(ch);
-                            tokenValue = text.substring(tokenPos, pos);
-                            if (ch === CharacterCodes.backslash) {
-                                tokenValue += scanIdentifierParts();
-                            }
-                            return token = getIdentifierToken();
+                        const identifierKind = scanIdentifier(ch, languageVersion);
+                        if (identifierKind) {
+                            return token = identifierKind;
                         }
                         else if (isWhiteSpaceSingleLine(ch)) {
                             pos += charSize(ch);
@@ -2066,6 +2063,32 @@ namespace ts {
             }
         }
 
+        function reScanInvalidIdentifier(): SyntaxKind {
+            Debug.assert(token === SyntaxKind.Unknown, "'reScanInvalidIdentifier' should only be called when the current token is 'SyntaxKind.Unknown'.");
+            pos = tokenPos = startPos;
+            tokenFlags = 0;
+            const ch = codePointAt(text, pos);
+            const identifierKind = scanIdentifier(ch, ScriptTarget.ESNext);
+            if (identifierKind) {
+                return token = identifierKind;
+            }
+            pos += charSize(ch);
+            return token; // Still `SyntaKind.Unknown`
+        }
+
+        function scanIdentifier(startCharacter: number, languageVersion: ScriptTarget) {
+            let ch = startCharacter;
+            if (isIdentifierStart(ch, languageVersion)) {
+                pos += charSize(ch);
+                while (pos < end && isIdentifierPart(ch = codePointAt(text, pos), languageVersion)) pos += charSize(ch);
+                tokenValue = text.substring(tokenPos, pos);
+                if (ch === CharacterCodes.backslash) {
+                    tokenValue += scanIdentifierParts();
+                }
+                return getIdentifierToken();
+            }
+        }
+
         function reScanGreaterToken(): SyntaxKind {
             if (token === SyntaxKind.GreaterThanToken) {
                 if (text.charCodeAt(pos) === CharacterCodes.greaterThan) {

diff --git a/src/harness/fourslashImpl.ts b/src/harness/fourslashImpl.ts
@@ -1453,9 +1453,9 @@ namespace FourSlash {
         }
 
         public baselineRename(marker: string, options: FourSlashInterface.RenameOptions) {
-            const position = this.getMarkerByName(marker).position;
+            const { fileName, position } = this.getMarkerByName(marker);
             const locations = this.languageService.findRenameLocations(
-                this.activeFile.fileName,
+                fileName,
                 position,
                 options.findInStrings ?? false,
                 options.findInComments ?? false,

diff --git a/tests/baselines/reference/api/tsserverlibrary.d.ts b/tests/baselines/reference/api/tsserverlibrary.d.ts
@@ -3970,6 +3970,7 @@ declare namespace ts {
         reScanJsxToken(): JsxTokenSyntaxKind;
         reScanLessThanToken(): SyntaxKind;
         reScanQuestionToken(): SyntaxKind;
+        reScanInvalidIdentifier(): SyntaxKind;
         scanJsxToken(): JsxTokenSyntaxKind;
         scanJsDocToken(): JSDocSyntaxKind;
         scan(): SyntaxKind;

diff --git a/tests/baselines/reference/api/typescript.d.ts b/tests/baselines/reference/api/typescript.d.ts
@@ -3970,6 +3970,7 @@ declare namespace ts {
         reScanJsxToken(): JsxTokenSyntaxKind;
         reScanLessThanToken(): SyntaxKind;
         reScanQuestionToken(): SyntaxKind;
+        reScanInvalidIdentifier(): SyntaxKind;
         scanJsxToken(): JsxTokenSyntaxKind;
         scanJsDocToken(): JSDocSyntaxKind;
         scan(): SyntaxKind;

diff --git a/tests/baselines/reference/processInvalidSyntax1.baseline b/tests/baselines/reference/processInvalidSyntax1.baseline
@@ -0,0 +1,21 @@
+/*====== /tests/cases/fourslash/decl.js ======*/
+
+var RENAME = {};
+
+/*====== /tests/cases/fourslash/unicode1.js ======*/
+
+RENAME.𝒜 ;
+
+/*====== /tests/cases/fourslash/unicode2.js ======*/
+
+RENAME.¬ ;
+
+/*====== /tests/cases/fourslash/unicode3.js ======*/
+
+RENAME¬
+
+/*====== /tests/cases/fourslash/forof.js ======*/
+
+for (RENAME.prop of arr) {
+
+}
diff --git a/tests/cases/fourslash/processInvalidSyntax1.ts b/tests/cases/fourslash/processInvalidSyntax1.ts
@@ -0,0 +1,25 @@
+/// <reference path="fourslash.ts" />
+
+// @allowJs: true
+
+// Test validates that language service getChildren() doesn't
+// crash due to invalid identifier in unicode.js.
+
+// @Filename: decl.js
+//// var obj = {};
+
+// @Filename: unicode1.js
+//// obj.𝒜 ;
+
+// @Filename: unicode2.js
+//// obj.¬ ;
+
+// @Filename: unicode3.js
+//// obj¬
+
+// @Filename: forof.js
+//// for (obj/**/.prop of arr) {
+//// 
+//// }
+
+verify.baselineRename("", {});