Skip to content

Commit efed5f0

Browse files
Merge pull request #2026 from Microsoft/lexicalTemplateStringsOnMaster
Lexical colorization/classification for template strings.
2 parents b1dc910 + a1b90f0 commit efed5f0

11 files changed

+459
-38
lines changed

src/services/services.ts

+123-10
Original file line numberDiff line numberDiff line change
@@ -1143,6 +1143,9 @@ module ts {
11431143
InMultiLineCommentTrivia,
11441144
InSingleQuoteStringLiteral,
11451145
InDoubleQuoteStringLiteral,
1146+
InTemplateHeadOrNoSubstitutionTemplate,
1147+
InTemplateMiddleOrTail,
1148+
InTemplateSubstitutionPosition,
11461149
}
11471150

11481151
export enum TokenClass {
@@ -1168,7 +1171,26 @@ module ts {
11681171
}
11691172

11701173
export interface Classifier {
1171-
getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult;
1174+
/**
1175+
* Gives lexical classifications of tokens on a line without any syntactic context.
1176+
* For instance, a token consisting of the text 'string' can be either an identifier
1177+
* named 'string' or the keyword 'string', however, because this classifier is not aware,
1178+
* it relies on certain heuristics to give acceptable results. For classifications where
1179+
* speed trumps accuracy, this function is preferable; however, for true accuracy, the
1180+
* syntactic classifier is ideal. In fact, in certain editing scenarios, combining the
1181+
* lexical, syntactic, and semantic classifiers may issue the best user experience.
1182+
*
1183+
* @param text The text of a line to classify.
1184+
* @param lexState The state of the lexical classifier at the end of the previous line.
1185+
* @param syntacticClassifierAbsent Whether the client is *not* using a syntactic classifier.
1186+
* If there is no syntactic classifier (syntacticClassifierAbsent=true),
1187+
* certain heuristics may be used in its place; however, if there is a
1188+
* syntactic classifier (syntacticClassifierAbsent=false), certain
1189+
* classifications which may be incorrectly categorized will be given
1190+
* back as Identifiers in order to allow the syntactic classifier to
1191+
* subsume the classification.
1192+
*/
1193+
getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult;
11721194
}
11731195

11741196
/**
@@ -5617,6 +5639,28 @@ module ts {
56175639
noRegexTable[SyntaxKind.TrueKeyword] = true;
56185640
noRegexTable[SyntaxKind.FalseKeyword] = true;
56195641

5642+
// Just a stack of TemplateHeads and OpenCurlyBraces, used to perform rudimentary (inexact)
5643+
// classification on template strings. Because of the context free nature of templates,
5644+
// the only precise way to classify a template portion would be by propagating the stack across
5645+
// lines, just as we do with the end-of-line state. However, this is a burden for implementers,
5646+
// and the behavior is entirely subsumed by the syntactic classifier anyway, so we instead
5647+
// flatten any nesting when the template stack is non-empty and encode it in the end-of-line state.
5648+
// Situations in which this fails are
5649+
// 1) When template strings are nested across different lines:
5650+
// `hello ${ `world
5651+
// ` }`
5652+
//
5653+
// Where on the second line, you will get the closing of a template,
5654+
// a closing curly, and a new template.
5655+
//
5656+
// 2) When substitution expressions have curly braces and the curly brace falls on the next line:
5657+
// `hello ${ () => {
5658+
// return "world" } } `
5659+
//
5660+
// Where on the second line, you will get the 'return' keyword,
5661+
// a string literal, and a template end consisting of '} } `'.
5662+
var templateStack: SyntaxKind[] = [];
5663+
56205664
function isAccessibilityModifier(kind: SyntaxKind) {
56215665
switch (kind) {
56225666
case SyntaxKind.PublicKeyword:
@@ -5650,13 +5694,19 @@ module ts {
56505694
// if there are more cases we want the classifier to be better at.
56515695
return true;
56525696
}
5653-
5654-
// 'classifyKeywordsInGenerics' should be 'true' when a syntactic classifier is not present.
5655-
function getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult {
5697+
5698+
// If there is a syntactic classifier ('syntacticClassifierAbsent' is false),
5699+
// we will be more conservative in order to avoid conflicting with the syntactic classifier.
5700+
function getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent?: boolean): ClassificationResult {
56565701
var offset = 0;
56575702
var token = SyntaxKind.Unknown;
56585703
var lastNonTriviaToken = SyntaxKind.Unknown;
56595704

5705+
// Empty out the template stack for reuse.
5706+
while (templateStack.length > 0) {
5707+
templateStack.pop();
5708+
}
5709+
56605710
// If we're in a string literal, then prepend: "\
56615711
// (and a newline). That way when we lex we'll think we're still in a string literal.
56625712
//
@@ -5675,6 +5725,17 @@ module ts {
56755725
text = "/*\n" + text;
56765726
offset = 3;
56775727
break;
5728+
case EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate:
5729+
text = "`\n" + text;
5730+
offset = 2;
5731+
break;
5732+
case EndOfLineState.InTemplateMiddleOrTail:
5733+
text = "}\n" + text;
5734+
offset = 2;
5735+
// fallthrough
5736+
case EndOfLineState.InTemplateSubstitutionPosition:
5737+
templateStack.push(SyntaxKind.TemplateHead);
5738+
break;
56785739
}
56795740

56805741
scanner.setText(text);
@@ -5739,12 +5800,45 @@ module ts {
57395800
token === SyntaxKind.StringKeyword ||
57405801
token === SyntaxKind.NumberKeyword ||
57415802
token === SyntaxKind.BooleanKeyword) {
5742-
if (angleBracketStack > 0 && !classifyKeywordsInGenerics) {
5743-
// If it looks like we're could be in something generic, don't classify this
5744-
// as a keyword. We may just get overwritten by the syntactic classifier,
5745-
// causing a noisy experience for the user.
5746-
token = SyntaxKind.Identifier;
5747-
}
5803+
if (angleBracketStack > 0 && !syntacticClassifierAbsent) {
5804+
// If it looks like we're could be in something generic, don't classify this
5805+
// as a keyword. We may just get overwritten by the syntactic classifier,
5806+
// causing a noisy experience for the user.
5807+
token = SyntaxKind.Identifier;
5808+
}
5809+
}
5810+
else if (token === SyntaxKind.TemplateHead) {
5811+
templateStack.push(token);
5812+
}
5813+
else if (token === SyntaxKind.OpenBraceToken) {
5814+
// If we don't have anything on the template stack,
5815+
// then we aren't trying to keep track of a previously scanned template head.
5816+
if (templateStack.length > 0) {
5817+
templateStack.push(token);
5818+
}
5819+
}
5820+
else if (token === SyntaxKind.CloseBraceToken) {
5821+
// If we don't have anything on the template stack,
5822+
// then we aren't trying to keep track of a previously scanned template head.
5823+
if (templateStack.length > 0) {
5824+
var lastTemplateStackToken = lastOrUndefined(templateStack);
5825+
5826+
if (lastTemplateStackToken === SyntaxKind.TemplateHead) {
5827+
token = scanner.reScanTemplateToken();
5828+
5829+
// Only pop on a TemplateTail; a TemplateMiddle indicates there is more for us.
5830+
if (token === SyntaxKind.TemplateTail) {
5831+
templateStack.pop();
5832+
}
5833+
else {
5834+
Debug.assert(token === SyntaxKind.TemplateMiddle, "Should have been a template middle. Was " + token);
5835+
}
5836+
}
5837+
else {
5838+
Debug.assert(lastTemplateStackToken === SyntaxKind.OpenBraceToken, "Should have been an open brace. Was: " + token);
5839+
templateStack.pop();
5840+
}
5841+
}
57485842
}
57495843

57505844
lastNonTriviaToken = token;
@@ -5789,6 +5883,22 @@ module ts {
57895883
result.finalLexState = EndOfLineState.InMultiLineCommentTrivia;
57905884
}
57915885
}
5886+
else if (isTemplateLiteralKind(token)) {
5887+
if (scanner.isUnterminated()) {
5888+
if (token === SyntaxKind.TemplateTail) {
5889+
result.finalLexState = EndOfLineState.InTemplateMiddleOrTail;
5890+
}
5891+
else if (token === SyntaxKind.NoSubstitutionTemplateLiteral) {
5892+
result.finalLexState = EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate;
5893+
}
5894+
else {
5895+
Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token);
5896+
}
5897+
}
5898+
}
5899+
else if (templateStack.length > 0 && lastOrUndefined(templateStack) === SyntaxKind.TemplateHead) {
5900+
result.finalLexState = EndOfLineState.InTemplateSubstitutionPosition;
5901+
}
57925902
}
57935903
}
57945904

@@ -5892,6 +6002,9 @@ module ts {
58926002
return TokenClass.Whitespace;
58936003
case SyntaxKind.Identifier:
58946004
default:
6005+
if (isTemplateLiteralKind(token)) {
6006+
return TokenClass.StringLiteral;
6007+
}
58956008
return TokenClass.Identifier;
58966009
}
58976010
}

src/services/shims.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ module ts {
165165
}
166166

167167
export interface ClassifierShim extends Shim {
168-
getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): string;
168+
getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent?: boolean): string;
169169
}
170170

171171
export interface CoreServicesShim extends Shim {

tests/baselines/reference/APISample_compile.js

+23-1
Original file line numberDiff line numberDiff line change
@@ -1741,6 +1741,9 @@ declare module "typescript" {
17411741
InMultiLineCommentTrivia = 1,
17421742
InSingleQuoteStringLiteral = 2,
17431743
InDoubleQuoteStringLiteral = 3,
1744+
InTemplateHeadOrNoSubstitutionTemplate = 4,
1745+
InTemplateMiddleOrTail = 5,
1746+
InTemplateSubstitutionPosition = 6,
17441747
}
17451748
enum TokenClass {
17461749
Punctuation = 0,
@@ -1762,7 +1765,26 @@ declare module "typescript" {
17621765
classification: TokenClass;
17631766
}
17641767
interface Classifier {
1765-
getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult;
1768+
/**
1769+
* Gives lexical classifications of tokens on a line without any syntactic context.
1770+
* For instance, a token consisting of the text 'string' can be either an identifier
1771+
* named 'string' or the keyword 'string', however, because this classifier is not aware,
1772+
* it relies on certain heuristics to give acceptable results. For classifications where
1773+
* speed trumps accuracy, this function is preferable; however, for true accuracy, the
1774+
* syntactic classifier is ideal. In fact, in certain editing scenarios, combining the
1775+
* lexical, syntactic, and semantic classifiers may issue the best user experience.
1776+
*
1777+
* @param text The text of a line to classify.
1778+
* @param lexState The state of the lexical classifier at the end of the previous line.
1779+
* @param syntacticClassifierAbsent Whether the client is *not* using a syntactic classifier.
1780+
* If there is no syntactic classifier (syntacticClassifierAbsent=true),
1781+
* certain heuristics may be used in its place; however, if there is a
1782+
* syntactic classifier (syntacticClassifierAbsent=false), certain
1783+
* classifications which may be incorrectly categorized will be given
1784+
* back as Identifiers in order to allow the syntactic classifier to
1785+
* subsume the classification.
1786+
*/
1787+
getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult;
17661788
}
17671789
/**
17681790
* The document registry represents a store of SourceFile objects that can be shared between

tests/baselines/reference/APISample_compile.types

+31-3
Original file line numberDiff line numberDiff line change
@@ -5539,6 +5539,15 @@ declare module "typescript" {
55395539

55405540
InDoubleQuoteStringLiteral = 3,
55415541
>InDoubleQuoteStringLiteral : EndOfLineState
5542+
5543+
InTemplateHeadOrNoSubstitutionTemplate = 4,
5544+
>InTemplateHeadOrNoSubstitutionTemplate : EndOfLineState
5545+
5546+
InTemplateMiddleOrTail = 5,
5547+
>InTemplateMiddleOrTail : EndOfLineState
5548+
5549+
InTemplateSubstitutionPosition = 6,
5550+
>InTemplateSubstitutionPosition : EndOfLineState
55425551
}
55435552
enum TokenClass {
55445553
>TokenClass : TokenClass
@@ -5594,12 +5603,31 @@ declare module "typescript" {
55945603
interface Classifier {
55955604
>Classifier : Classifier
55965605

5597-
getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult;
5598-
>getClassificationsForLine : (text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean) => ClassificationResult
5606+
/**
5607+
* Gives lexical classifications of tokens on a line without any syntactic context.
5608+
* For instance, a token consisting of the text 'string' can be either an identifier
5609+
* named 'string' or the keyword 'string', however, because this classifier is not aware,
5610+
* it relies on certain heuristics to give acceptable results. For classifications where
5611+
* speed trumps accuracy, this function is preferable; however, for true accuracy, the
5612+
* syntactic classifier is ideal. In fact, in certain editing scenarios, combining the
5613+
* lexical, syntactic, and semantic classifiers may issue the best user experience.
5614+
*
5615+
* @param text The text of a line to classify.
5616+
* @param lexState The state of the lexical classifier at the end of the previous line.
5617+
* @param syntacticClassifierAbsent Whether the client is *not* using a syntactic classifier.
5618+
* If there is no syntactic classifier (syntacticClassifierAbsent=true),
5619+
* certain heuristics may be used in its place; however, if there is a
5620+
* syntactic classifier (syntacticClassifierAbsent=false), certain
5621+
* classifications which may be incorrectly categorized will be given
5622+
* back as Identifiers in order to allow the syntactic classifier to
5623+
* subsume the classification.
5624+
*/
5625+
getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult;
5626+
>getClassificationsForLine : (text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean) => ClassificationResult
55995627
>text : string
56005628
>lexState : EndOfLineState
56015629
>EndOfLineState : EndOfLineState
5602-
>classifyKeywordsInGenerics : boolean
5630+
>syntacticClassifierAbsent : boolean
56035631
>ClassificationResult : ClassificationResult
56045632
}
56055633
/**

tests/baselines/reference/APISample_linter.js

+23-1
Original file line numberDiff line numberDiff line change
@@ -1772,6 +1772,9 @@ declare module "typescript" {
17721772
InMultiLineCommentTrivia = 1,
17731773
InSingleQuoteStringLiteral = 2,
17741774
InDoubleQuoteStringLiteral = 3,
1775+
InTemplateHeadOrNoSubstitutionTemplate = 4,
1776+
InTemplateMiddleOrTail = 5,
1777+
InTemplateSubstitutionPosition = 6,
17751778
}
17761779
enum TokenClass {
17771780
Punctuation = 0,
@@ -1793,7 +1796,26 @@ declare module "typescript" {
17931796
classification: TokenClass;
17941797
}
17951798
interface Classifier {
1796-
getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult;
1799+
/**
1800+
* Gives lexical classifications of tokens on a line without any syntactic context.
1801+
* For instance, a token consisting of the text 'string' can be either an identifier
1802+
* named 'string' or the keyword 'string', however, because this classifier is not aware,
1803+
* it relies on certain heuristics to give acceptable results. For classifications where
1804+
* speed trumps accuracy, this function is preferable; however, for true accuracy, the
1805+
* syntactic classifier is ideal. In fact, in certain editing scenarios, combining the
1806+
* lexical, syntactic, and semantic classifiers may issue the best user experience.
1807+
*
1808+
* @param text The text of a line to classify.
1809+
* @param lexState The state of the lexical classifier at the end of the previous line.
1810+
* @param syntacticClassifierAbsent Whether the client is *not* using a syntactic classifier.
1811+
* If there is no syntactic classifier (syntacticClassifierAbsent=true),
1812+
* certain heuristics may be used in its place; however, if there is a
1813+
* syntactic classifier (syntacticClassifierAbsent=false), certain
1814+
* classifications which may be incorrectly categorized will be given
1815+
* back as Identifiers in order to allow the syntactic classifier to
1816+
* subsume the classification.
1817+
*/
1818+
getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult;
17971819
}
17981820
/**
17991821
* The document registry represents a store of SourceFile objects that can be shared between

0 commit comments

Comments
 (0)