Skip to content

Commit 69342d5

Browse files
authored
Add support for BlockStrings (#131)
1 parent 8e0ba08 commit 69342d5

File tree

5 files changed

+310
-4
lines changed

5 files changed

+310
-4
lines changed

src/Directory.Build.props

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<Project>
22

33
<PropertyGroup>
4-
<VersionPrefix>7.1.0-preview</VersionPrefix>
4+
<VersionPrefix>7.2.0-preview</VersionPrefix>
55
<LangVersion>8.0</LangVersion>
66
<GenerateAssemblyInfo>true</GenerateAssemblyInfo>
77
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>

src/GraphQLParser.Tests/LexerTests.cs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -974,6 +974,47 @@ public void Lex_WhiteSpaceStringToken_HasStringKind()
974974
token.Kind.ShouldBe(TokenKind.STRING);
975975
}
976976

977+
[Theory]
978+
[InlineData("test", "test")]
979+
[InlineData("te\\\"\"\"st", "te\"\"\"st")]
980+
[InlineData("\ntest", "test")]
981+
[InlineData("\r\ntest", "test")]
982+
[InlineData(" \ntest", "test")]
983+
[InlineData("\t\ntest", "test")]
984+
[InlineData("\n\ntest", "test")]
985+
[InlineData("test\nline2", "test\nline2")]
986+
[InlineData("test\rline2", "test\nline2")]
987+
[InlineData("test\r\nline2", "test\nline2")]
988+
[InlineData("test\r\r\nline2", "test\n\nline2")]
989+
[InlineData("test\r\n\nline2", "test\n\nline2")]
990+
[InlineData("test\n", "test")]
991+
[InlineData("test\n ", "test")]
992+
[InlineData("test\n\t", "test")]
993+
[InlineData("test\n\n", "test")]
994+
[InlineData("test\n line2", "test\nline2")]
995+
[InlineData("test\n\t\tline2", "test\nline2")]
996+
[InlineData("test\n \tline2", "test\nline2")]
997+
[InlineData(" test\nline2", " test\nline2")]
998+
[InlineData(" test\n line2", " test\nline2")]
999+
[InlineData("\n test\n line2", "test\nline2")]
1000+
[InlineData(" test\n line2\n\t\tline3\n line4", " test\nline2\n\tline3\n line4")]
1001+
[InlineData(" test\n Hello,\n\n world!\n ", " test\nHello,\n\n world!")]
1002+
[InlineData(" \n Hello,\r\n\n world!\n ", "Hello,\n\n world!")]
1003+
[InlineData(" \n Hello,\r\n\n wor___ld!\n ", "Hello,\n\n wor___ld!")]
1004+
[InlineData("\r\n Hello,\r\n World!\r\n\r\n Yours,\r\n GraphQL.\r\n ", "Hello,\n World!\n\nYours,\n GraphQL.")]
1005+
[InlineData("Test \\n escaping", "Test \\n escaping")]
1006+
[InlineData("Test \\u1234 escaping", "Test \\u1234 escaping")]
1007+
[InlineData("Test \\ escaping", "Test \\ escaping")]
1008+
public void Lex_BlockString_Tests(string input, string expected)
1009+
{
1010+
input = input.Replace("___", new string('_', 9000));
1011+
expected = expected.Replace("___", new string('_', 9000));
1012+
input = "\"\"\"" + input + "\"\"\"";
1013+
var actual = input.Lex();
1014+
actual.Kind.ShouldBe(TokenKind.STRING);
1015+
actual.Value.ToString().ShouldBe(expected);
1016+
}
1017+
9771018
private static Token GetATPunctuationTokenLexer()
9781019
{
9791020
return "@".Lex();

src/GraphQLParser.Tests/Validation/LexerValidationTests.cs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,20 @@ public void Lex_UnescapedControlChar_ThrowsExceptionWithCorrectMessage()
364364
exception.Column.ShouldBe(21);
365365
}
366366

367+
[Fact]
368+
public void Lex_UnescapedControlChar_Blockstring_ThrowsExceptionWithCorrectMessage()
369+
{
370+
var exception = Should.Throw<GraphQLSyntaxErrorException>(() => "\"\"\"contains unescaped \u0007 control char".Lex());
371+
372+
exception.Message.ShouldBe(
373+
"Syntax Error GraphQL (1:23) Invalid character within BlockString: \\u0007.\n" +
374+
"1: \"\"\"contains unescaped \\u0007 control char\n" +
375+
" ^\n");
376+
exception.Description.ShouldBe("Invalid character within BlockString: \\u0007.");
377+
exception.Line.ShouldBe(1);
378+
exception.Column.ShouldBe(23);
379+
}
380+
367381
[Fact]
368382
public void Lex_UnterminatedString_ThrowsExceptionWithCorrectMessage()
369383
{
@@ -391,5 +405,33 @@ public void Lex_UnterminatedStringWithText_ThrowsExceptionWithCorrectMessage()
391405
exception.Line.ShouldBe(1);
392406
exception.Column.ShouldBe(14);
393407
}
408+
409+
[Fact]
410+
public void Lex_UnterminatedBlockString_ThrowsExceptionWithCorrectMessage()
411+
{
412+
var exception = Should.Throw<GraphQLSyntaxErrorException>(() => "\"\"\"".Lex());
413+
414+
exception.Message.ShouldBe(
415+
"Syntax Error GraphQL (1:4) Unterminated string.\n" +
416+
"1: \"\"\"\n" +
417+
" ^\n");
418+
exception.Description.ShouldBe("Unterminated string.");
419+
exception.Line.ShouldBe(1);
420+
exception.Column.ShouldBe(4);
421+
}
422+
423+
[Fact]
424+
public void Lex_UnterminatedBlockStringWithText_ThrowsExceptionWithCorrectMessage()
425+
{
426+
var exception = Should.Throw<GraphQLSyntaxErrorException>(() => "\"\"\"no end triple-quote\"\"".Lex());
427+
428+
exception.Message.ShouldBe(
429+
"Syntax Error GraphQL (1:25) Unterminated string.\n" +
430+
"1: \"\"\"no end triple-quote\"\"\n" +
431+
" ^\n");
432+
exception.Description.ShouldBe("Unterminated string.");
433+
exception.Line.ShouldBe(1);
434+
exception.Column.ShouldBe(25);
435+
}
394436
}
395437
}

src/GraphQLParser/LexerContext.cs

Lines changed: 212 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,16 @@ public Token GetToken()
4747
return ReadNumber();
4848

4949
if (code == '"')
50-
return ReadString();
50+
{
51+
if (_currentIndex + 2 < _source.Length && _source.Span[_currentIndex + 1] == '"' && _source.Span[_currentIndex + 2] == '"')
52+
{
53+
return ReadBlockString();
54+
}
55+
else
56+
{
57+
return ReadString();
58+
}
59+
}
5160

5261
return Throw_From_GetToken2(code);
5362
}
@@ -172,6 +181,203 @@ private Token ReadComment()
172181
);
173182
}
174183

184+
private Token ReadBlockString()
185+
{
186+
int start = _currentIndex += 2;
187+
char code = NextCode();
188+
189+
Span<char> buffer = stackalloc char[4096];
190+
StringBuilder? sb = null;
191+
192+
int index = 0;
193+
bool escape = false; //when the last character was \
194+
bool lastWasCr = false;
195+
196+
while (_currentIndex < _source.Length)
197+
{
198+
if (code < 0x0020 && code != 0x0009 && code != 0x000A && code != 0x000D)
199+
{
200+
Throw_From_ReadBlockString1(code);
201+
}
202+
203+
//check for """
204+
if (code == '"' && _currentIndex + 2 < _source.Length && _source.Span[_currentIndex + 1] == '"' && _source.Span[_currentIndex + 2] == '"')
205+
{
206+
//if last character was \ then go ahead and write out the """, skipping the \
207+
if (escape)
208+
{
209+
escape = false;
210+
}
211+
else
212+
{
213+
//end of blockstring
214+
break;
215+
}
216+
}
217+
else if (escape)
218+
{
219+
//last character was \ so write the \ and then retry this character with escaped = false
220+
code = '\\';
221+
_currentIndex--;
222+
escape = false;
223+
}
224+
else if (code == '\\')
225+
{
226+
//this character is a \ so don't write anything yet, but check the next character
227+
escape = true;
228+
code = NextCode();
229+
lastWasCr = false;
230+
continue;
231+
}
232+
else
233+
{
234+
escape = false;
235+
}
236+
237+
238+
if (!(lastWasCr && code == '\n'))
239+
{
240+
//write code
241+
if (index < buffer.Length)
242+
{
243+
buffer[index++] = code == '\r' ? '\n' : code;
244+
}
245+
else // fallback to StringBuilder in case of buffer overflow
246+
{
247+
if (sb == null)
248+
sb = new StringBuilder(buffer.Length * 2);
249+
250+
for (int i = 0; i < buffer.Length; ++i)
251+
sb.Append(buffer[i]);
252+
253+
sb.Append(code == '\r' ? '\n' : code);
254+
index = 0;
255+
}
256+
}
257+
258+
lastWasCr = code == '\r';
259+
260+
code = NextCode();
261+
}
262+
263+
if (_currentIndex >= _source.Length)
264+
{
265+
Throw_From_ReadString2();
266+
}
267+
_currentIndex += 2;
268+
269+
if (sb != null)
270+
{
271+
for (int i = 0; i < index; ++i)
272+
sb.Append(buffer[i]);
273+
}
274+
275+
//at this point, if sb != null, then sb has the whole string, otherwise buffer (of length index) has the whole string
276+
//also, all line termination combinations have been replaced with LF
277+
278+
ROM value;
279+
if (sb != null)
280+
{
281+
var chars = new char[sb.Length];
282+
sb.CopyTo(0, chars, 0, sb.Length);
283+
value = ProcessBuffer(chars);
284+
}
285+
else
286+
{
287+
value = ProcessBuffer(buffer.Slice(0, index));
288+
}
289+
290+
return new Token
291+
(
292+
TokenKind.STRING,
293+
value,
294+
start,
295+
_currentIndex + 1
296+
);
297+
298+
static ROM ProcessBuffer(Span<char> buffer)
299+
{
300+
//scan string to determine maximum valid commonIndent value,
301+
//number of initial blank lines, and number of trailing blank lines
302+
int commonIndent = int.MaxValue;
303+
int initialBlankLines = 1;
304+
int skipLinesAfter; //skip all text after line ###, as determined by the number of trailing blank lines
305+
{
306+
int trailingBlankLines = 0;
307+
int line = 0;
308+
int whitespace = 0;
309+
bool allWhitespace = true;
310+
bool reachedCharacter = false;
311+
for (int index = 0; index < buffer.Length; index++)
312+
{
313+
char code = buffer[index];
314+
if (code == '\n')
315+
{
316+
if (allWhitespace)
317+
trailingBlankLines += 1;
318+
if (line != 0 && !allWhitespace && whitespace < commonIndent)
319+
commonIndent = whitespace;
320+
line++;
321+
whitespace = 0;
322+
allWhitespace = true;
323+
if (!reachedCharacter)
324+
initialBlankLines++;
325+
}
326+
else if (code == ' ' || code == '\t')
327+
{
328+
if (allWhitespace)
329+
whitespace++;
330+
}
331+
else
332+
{
333+
allWhitespace = false;
334+
if (!reachedCharacter)
335+
initialBlankLines--;
336+
reachedCharacter = true;
337+
trailingBlankLines = 0;
338+
}
339+
}
340+
if (allWhitespace)
341+
trailingBlankLines += 1;
342+
if (line != 0 && !allWhitespace && whitespace < commonIndent)
343+
commonIndent = whitespace;
344+
if (commonIndent == int.MaxValue)
345+
commonIndent = 0;
346+
int lines = line + 1;
347+
skipLinesAfter = lines - trailingBlankLines;
348+
}
349+
350+
//step through the input, skipping the initial blank lines and the trailing blank lines,
351+
//and skipping the initial blank characters from the start of each line
352+
Span<char> output = buffer.Length <= 4096 ? stackalloc char[buffer.Length] : new char[buffer.Length];
353+
int outputIndex = 0;
354+
{
355+
int line = 0;
356+
int col = 0;
357+
for (int index = 0; index < buffer.Length; index++)
358+
{
359+
char code = buffer[index];
360+
if (code == '\n')
361+
{
362+
if (++line >= skipLinesAfter)
363+
break;
364+
col = 0;
365+
if (line > initialBlankLines)
366+
output[outputIndex++] = code;
367+
}
368+
else
369+
{
370+
if (line >= initialBlankLines && (line == 0 || col++ >= commonIndent))
371+
output[outputIndex++] = code;
372+
}
373+
}
374+
}
375+
376+
//return the string value from the output buffer
377+
return output.Slice(0, outputIndex).ToString();
378+
}
379+
}
380+
175381
private Token ReadString()
176382
{
177383
int start = _currentIndex;
@@ -245,6 +451,11 @@ private void Throw_From_ReadString2()
245451
throw new GraphQLSyntaxErrorException("Unterminated string.", _source, _currentIndex);
246452
}
247453

454+
private void Throw_From_ReadBlockString1(char code)
455+
{
456+
throw new GraphQLSyntaxErrorException($"Invalid character within BlockString: \\u{(int)code:D4}.", _source, _currentIndex);
457+
}
458+
248459
// sets escaped only to true
249460
private char ReadCharacterFromString(char currentCharacter, ref bool escaped)
250461
{

src/GraphQLParser/TokenKind.cs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,21 @@ public enum TokenKind
9393
FLOAT = 17,
9494

9595
/// <summary>
96+
/// A string value, encoded as either a 'string' or 'block string'
97+
/// <br/><br/>
9698
/// Strings are sequences of characters wrapped in double‐quotes ("). (ex. "Hello World").
9799
/// White space and other otherwise‐ignored characters are significant within a string value.
98-
/// </summary>
100+
/// <br/><br/>
101+
/// Block strings are sequences of characters wrapped in triple‐quotes ("""). White space, line terminators,
102+
/// quote, and backslash characters may all be used unescaped to enable verbatim text.
103+
/// Since block strings represent freeform text often used in indented positions, the string value semantics
104+
/// of a block string excludes uniform indentation and blank initial and trailing lines.
105+
/// Triple-quotes (""") may be escaped as \""" within the block string. No other escape sequences may be used
106+
/// within a block string.
107+
/// </summary>
108+
/// <remarks>
109+
/// Within a block string, line termination sequences (LF, CR, or CRLF) are always replaced with a line-feed (LF) character.
110+
/// </remarks>
99111
STRING = 18,
100112

101113
/// <summary>
@@ -115,6 +127,6 @@ public enum TokenKind
115127
/// <summary>
116128
/// &amp;
117129
/// </summary>
118-
AMPERSAND = 21
130+
AMPERSAND = 21,
119131
}
120132
}

0 commit comments

Comments
 (0)