Skip to content

Add support for BlockStrings #131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Directory.Build.props
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project>

<PropertyGroup>
<VersionPrefix>7.1.0-preview</VersionPrefix>
<VersionPrefix>7.2.0-preview</VersionPrefix>
<LangVersion>8.0</LangVersion>
<GenerateAssemblyInfo>true</GenerateAssemblyInfo>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
Expand Down
41 changes: 41 additions & 0 deletions src/GraphQLParser.Tests/LexerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -974,6 +974,47 @@ public void Lex_WhiteSpaceStringToken_HasStringKind()
token.Kind.ShouldBe(TokenKind.STRING);
}

[Theory]
[InlineData("test", "test")]
[InlineData("te\\\"\"\"st", "te\"\"\"st")]
[InlineData("\ntest", "test")]
[InlineData("\r\ntest", "test")]
[InlineData(" \ntest", "test")]
[InlineData("\t\ntest", "test")]
[InlineData("\n\ntest", "test")]
[InlineData("test\nline2", "test\nline2")]
[InlineData("test\rline2", "test\nline2")]
[InlineData("test\r\nline2", "test\nline2")]
[InlineData("test\r\r\nline2", "test\n\nline2")]
[InlineData("test\r\n\nline2", "test\n\nline2")]
[InlineData("test\n", "test")]
[InlineData("test\n ", "test")]
[InlineData("test\n\t", "test")]
[InlineData("test\n\n", "test")]
[InlineData("test\n line2", "test\nline2")]
[InlineData("test\n\t\tline2", "test\nline2")]
[InlineData("test\n \tline2", "test\nline2")]
[InlineData(" test\nline2", " test\nline2")]
[InlineData(" test\n line2", " test\nline2")]
[InlineData("\n test\n line2", "test\nline2")]
[InlineData(" test\n line2\n\t\tline3\n line4", " test\nline2\n\tline3\n line4")]
[InlineData(" test\n Hello,\n\n world!\n ", " test\nHello,\n\n world!")]
[InlineData(" \n Hello,\r\n\n world!\n ", "Hello,\n\n world!")]
[InlineData(" \n Hello,\r\n\n wor___ld!\n ", "Hello,\n\n wor___ld!")]
[InlineData("\r\n Hello,\r\n World!\r\n\r\n Yours,\r\n GraphQL.\r\n ", "Hello,\n World!\n\nYours,\n GraphQL.")]
[InlineData("Test \\n escaping", "Test \\n escaping")]
[InlineData("Test \\u1234 escaping", "Test \\u1234 escaping")]
[InlineData("Test \\ escaping", "Test \\ escaping")]
public void Lex_BlockString_Tests(string input, string expected)
{
input = input.Replace("___", new string('_', 9000));
expected = expected.Replace("___", new string('_', 9000));
input = "\"\"\"" + input + "\"\"\"";
var actual = input.Lex();
actual.Kind.ShouldBe(TokenKind.STRING);
actual.Value.ToString().ShouldBe(expected);
}

private static Token GetATPunctuationTokenLexer()
{
return "@".Lex();
Expand Down
42 changes: 42 additions & 0 deletions src/GraphQLParser.Tests/Validation/LexerValidationTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,20 @@ public void Lex_UnescapedControlChar_ThrowsExceptionWithCorrectMessage()
exception.Column.ShouldBe(21);
}

[Fact]
public void Lex_UnescapedControlChar_Blockstring_ThrowsExceptionWithCorrectMessage()
{
var exception = Should.Throw<GraphQLSyntaxErrorException>(() => "\"\"\"contains unescaped \u0007 control char".Lex());

exception.Message.ShouldBe(
"Syntax Error GraphQL (1:23) Invalid character within BlockString: \\u0007.\n" +
"1: \"\"\"contains unescaped \\u0007 control char\n" +
" ^\n");
exception.Description.ShouldBe("Invalid character within BlockString: \\u0007.");
exception.Line.ShouldBe(1);
exception.Column.ShouldBe(23);
}

[Fact]
public void Lex_UnterminatedString_ThrowsExceptionWithCorrectMessage()
{
Expand Down Expand Up @@ -391,5 +405,33 @@ public void Lex_UnterminatedStringWithText_ThrowsExceptionWithCorrectMessage()
exception.Line.ShouldBe(1);
exception.Column.ShouldBe(14);
}

[Fact]
public void Lex_UnterminatedBlockString_ThrowsExceptionWithCorrectMessage()
{
var exception = Should.Throw<GraphQLSyntaxErrorException>(() => "\"\"\"".Lex());

exception.Message.ShouldBe(
"Syntax Error GraphQL (1:4) Unterminated string.\n" +
"1: \"\"\"\n" +
" ^\n");
exception.Description.ShouldBe("Unterminated string.");
exception.Line.ShouldBe(1);
exception.Column.ShouldBe(4);
}

[Fact]
public void Lex_UnterminatedBlockStringWithText_ThrowsExceptionWithCorrectMessage()
{
var exception = Should.Throw<GraphQLSyntaxErrorException>(() => "\"\"\"no end triple-quote\"\"".Lex());

exception.Message.ShouldBe(
"Syntax Error GraphQL (1:25) Unterminated string.\n" +
"1: \"\"\"no end triple-quote\"\"\n" +
" ^\n");
exception.Description.ShouldBe("Unterminated string.");
exception.Line.ShouldBe(1);
exception.Column.ShouldBe(25);
}
}
}
213 changes: 212 additions & 1 deletion src/GraphQLParser/LexerContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,16 @@ public Token GetToken()
return ReadNumber();

if (code == '"')
return ReadString();
{
if (_currentIndex + 2 < _source.Length && _source.Span[_currentIndex + 1] == '"' && _source.Span[_currentIndex + 2] == '"')
{
return ReadBlockString();
}
else
{
return ReadString();
}
}

return Throw_From_GetToken2(code);
}
Expand Down Expand Up @@ -172,6 +181,203 @@ private Token ReadComment()
);
}

private Token ReadBlockString()
{
int start = _currentIndex += 2;
char code = NextCode();

Span<char> buffer = stackalloc char[4096];
StringBuilder? sb = null;

int index = 0;
bool escape = false; //when the last character was \
bool lastWasCr = false;

while (_currentIndex < _source.Length)
{
if (code < 0x0020 && code != 0x0009 && code != 0x000A && code != 0x000D)
{
Throw_From_ReadBlockString1(code);
}

//check for """
if (code == '"' && _currentIndex + 2 < _source.Length && _source.Span[_currentIndex + 1] == '"' && _source.Span[_currentIndex + 2] == '"')
{
//if last character was \ then go ahead and write out the """, skipping the \
if (escape)
{
escape = false;
}
else
{
//end of blockstring
break;
}
}
else if (escape)
{
//last character was \ so write the \ and then retry this character with escaped = false
code = '\\';
_currentIndex--;
escape = false;
}
else if (code == '\\')
{
//this character is a \ so don't write anything yet, but check the next character
escape = true;
code = NextCode();
lastWasCr = false;
continue;
}
else
{
escape = false;
}


if (!(lastWasCr && code == '\n'))
{
//write code
if (index < buffer.Length)
{
buffer[index++] = code == '\r' ? '\n' : code;
}
else // fallback to StringBuilder in case of buffer overflow
{
if (sb == null)
sb = new StringBuilder(buffer.Length * 2);

for (int i = 0; i < buffer.Length; ++i)
sb.Append(buffer[i]);

sb.Append(code == '\r' ? '\n' : code);
index = 0;
}
}

lastWasCr = code == '\r';

code = NextCode();
}

if (_currentIndex >= _source.Length)
{
Throw_From_ReadString2();
}
_currentIndex += 2;

if (sb != null)
{
for (int i = 0; i < index; ++i)
sb.Append(buffer[i]);
}

//at this point, if sb != null, then sb has the whole string, otherwise buffer (of length index) has the whole string
//also, all line termination combinations have been replaced with LF

ROM value;
if (sb != null)
{
var chars = new char[sb.Length];
sb.CopyTo(0, chars, 0, sb.Length);
value = ProcessBuffer(chars);
}
else
{
value = ProcessBuffer(buffer.Slice(0, index));
}

return new Token
(
TokenKind.STRING,
value,
start,
_currentIndex + 1
);

static ROM ProcessBuffer(Span<char> buffer)
{
//scan string to determine maximum valid commonIndent value,
//number of initial blank lines, and number of trailing blank lines
int commonIndent = int.MaxValue;
int initialBlankLines = 1;
int skipLinesAfter; //skip all text after line ###, as determined by the number of trailing blank lines
{
int trailingBlankLines = 0;
int line = 0;
int whitespace = 0;
bool allWhitespace = true;
bool reachedCharacter = false;
for (int index = 0; index < buffer.Length; index++)
{
char code = buffer[index];
if (code == '\n')
{
if (allWhitespace)
trailingBlankLines += 1;
if (line != 0 && !allWhitespace && whitespace < commonIndent)
commonIndent = whitespace;
line++;
whitespace = 0;
allWhitespace = true;
if (!reachedCharacter)
initialBlankLines++;
}
else if (code == ' ' || code == '\t')
{
if (allWhitespace)
whitespace++;
}
else
{
allWhitespace = false;
if (!reachedCharacter)
initialBlankLines--;
reachedCharacter = true;
trailingBlankLines = 0;
}
}
if (allWhitespace)
trailingBlankLines += 1;
if (line != 0 && !allWhitespace && whitespace < commonIndent)
commonIndent = whitespace;
if (commonIndent == int.MaxValue)
commonIndent = 0;
int lines = line + 1;
skipLinesAfter = lines - trailingBlankLines;
}

//step through the input, skipping the initial blank lines and the trailing blank lines,
//and skipping the initial blank characters from the start of each line
Span<char> output = buffer.Length <= 4096 ? stackalloc char[buffer.Length] : new char[buffer.Length];
int outputIndex = 0;
{
int line = 0;
int col = 0;
for (int index = 0; index < buffer.Length; index++)
{
char code = buffer[index];
if (code == '\n')
{
if (++line >= skipLinesAfter)
break;
col = 0;
if (line > initialBlankLines)
output[outputIndex++] = code;
}
else
{
if (line >= initialBlankLines && (line == 0 || col++ >= commonIndent))
output[outputIndex++] = code;
}
}
}

//return the string value from the output buffer
return output.Slice(0, outputIndex).ToString();
}
}

private Token ReadString()
{
int start = _currentIndex;
Expand Down Expand Up @@ -245,6 +451,11 @@ private void Throw_From_ReadString2()
throw new GraphQLSyntaxErrorException("Unterminated string.", _source, _currentIndex);
}

private void Throw_From_ReadBlockString1(char code)
{
throw new GraphQLSyntaxErrorException($"Invalid character within BlockString: \\u{(int)code:D4}.", _source, _currentIndex);
}

// sets escaped only to true
private char ReadCharacterFromString(char currentCharacter, ref bool escaped)
{
Expand Down
16 changes: 14 additions & 2 deletions src/GraphQLParser/TokenKind.cs
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,21 @@ public enum TokenKind
FLOAT = 17,

/// <summary>
/// A string value, encoded as either a 'string' or 'block string'
/// <br/><br/>
/// Strings are sequences of characters wrapped in double‐quotes ("). (ex. "Hello World").
/// White space and other otherwise‐ignored characters are significant within a string value.
/// </summary>
/// <br/><br/>
/// Block strings are sequences of characters wrapped in triple‐quotes ("""). White space, line terminators,
/// quote, and backslash characters may all be used unescaped to enable verbatim text.
/// Since block strings represent freeform text often used in indented positions, the string value semantics
/// of a block string excludes uniform indentation and blank initial and trailing lines.
/// Triple-quotes (""") may be escaped as \""" within the block string. No other escape sequences may be used
/// within a block string.
/// </summary>
/// <remarks>
/// Within a block string, line termination sequences (LF, CR, or CRLF) are always replaced with a line-feed (LF) character.
/// </remarks>
STRING = 18,

/// <summary>
Expand All @@ -115,6 +127,6 @@ public enum TokenKind
/// <summary>
/// &amp;
/// </summary>
AMPERSAND = 21
AMPERSAND = 21,
}
}