From b33378ddedcfc8eca3bbaa1d75f537611ea12655 Mon Sep 17 00:00:00 2001 From: Lee Byron Date: Wed, 21 Jun 2017 16:56:29 -0700 Subject: [PATCH 1/3] RFC: Multi-line String This RFC adds a new form of `StringValue`, the multi-line string, similar to that found in Python and Scala. A multi-line string starts and ends with a triple-quote: ``` """This is a triple-quoted string and it can contain multiple lines""" ``` Multi-line strings are useful for typing literal bodies of text where new lines should be interpretted literally. In fact, the only escape sequence used is `\"""` and `\` is otherwise allowed unescaped. This is beneficial when writing documentation within strings which may reference the back-slash often: ``` """ In a multi-line string \n and C:\\ are unescaped. """ ``` The primary value of multi-line strings are to write long-form input directly in query text, in tools like GraphiQL, and as a prerequisite to another pending RFC to allow docstring style documentation in the Schema Definition Language. --- src/language/__tests__/kitchen-sink.graphql | 2 +- src/language/__tests__/lexer-test.js | 100 ++++++++++++++++++++ src/language/__tests__/parser-test.js | 16 ++++ src/language/__tests__/printer-test.js | 2 +- src/language/__tests__/visitor-test.js | 6 ++ src/language/ast.js | 2 + src/language/lexer.js | 88 +++++++++++++++-- src/language/parser.js | 2 + src/language/printer.js | 5 +- 9 files changed, 212 insertions(+), 11 deletions(-) diff --git a/src/language/__tests__/kitchen-sink.graphql b/src/language/__tests__/kitchen-sink.graphql index ff4a05c444..9cfb217e10 100644 --- a/src/language/__tests__/kitchen-sink.graphql +++ b/src/language/__tests__/kitchen-sink.graphql @@ -46,7 +46,7 @@ subscription StoryLikeSubscription($input: StoryLikeSubscribeInput) { } fragment frag on Friend { - foo(size: $size, bar: $b, obj: {key: "value"}) + foo(size: $size, bar: $b, obj: {key: "value", multiLine: """string"""}) } { diff --git a/src/language/__tests__/lexer-test.js b/src/language/__tests__/lexer-test.js index fea56f199a..57d38ebbb5 100644 --- a/src/language/__tests__/lexer-test.js +++ b/src/language/__tests__/lexer-test.js @@ -289,6 +289,106 @@ describe('Lexer', () => { ); }); + it('lexes multi-line strings', () => { + + expect( + lexOne('"""simple"""') + ).to.containSubset({ + kind: TokenKind.MULTI_LINE_STRING, + start: 0, + end: 12, + value: 'simple' + }); + + expect( + lexOne('""" white space """') + ).to.containSubset({ + kind: TokenKind.MULTI_LINE_STRING, + start: 0, + end: 19, + value: ' white space ' + }); + + expect( + lexOne('"""contains " quote"""') + ).to.containSubset({ + kind: TokenKind.MULTI_LINE_STRING, + start: 0, + end: 22, + value: 'contains " quote' + }); + + expect( + lexOne('"""contains \\""" triplequote"""') + ).to.containSubset({ + kind: TokenKind.MULTI_LINE_STRING, + start: 0, + end: 31, + value: 'contains """ triplequote' + }); + + expect( + lexOne('"""multi\nline"""') + ).to.containSubset({ + kind: TokenKind.MULTI_LINE_STRING, + start: 0, + end: 16, + value: 'multi\nline' + }); + + expect( + lexOne('"""multi\rline"""') + ).to.containSubset({ + kind: TokenKind.MULTI_LINE_STRING, + start: 0, + end: 16, + value: 'multi\rline' + }); + + expect( + lexOne('"""unescaped \\n\\r\\b\\t\\f\\u1234"""') + ).to.containSubset({ + kind: TokenKind.MULTI_LINE_STRING, + start: 0, + end: 32, + value: 'unescaped \\n\\r\\b\\t\\f\\u1234' + }); + + expect( + lexOne('"""slashes \\\\ \\/"""') + ).to.containSubset({ + kind: TokenKind.MULTI_LINE_STRING, + start: 0, + end: 19, + value: 'slashes \\\\ \\/' + }); + + }); + + it('lex reports useful multi-line string errors', () => { + + expect( + () => lexOne('"""') + ).to.throw('Syntax Error GraphQL request (1:4) Unterminated string.'); + + expect( + () => lexOne('"""no end quote') + ).to.throw('Syntax Error GraphQL request (1:16) Unterminated string.'); + + expect( + () => lexOne('"""contains unescaped \u0007 control char"""') + ).to.throw( + 'Syntax Error GraphQL request (1:23) Invalid character within String: "\\u0007".' + ); + + expect( + () => lexOne('"""null-byte is not \u0000 end of file"""') + ).to.throw( + 'Syntax Error GraphQL request (1:21) Invalid character within String: "\\u0000".' + ); + + }); + it('lexes numbers', () => { expect( diff --git a/src/language/__tests__/parser-test.js b/src/language/__tests__/parser-test.js index c06bfeb46e..806640d498 100644 --- a/src/language/__tests__/parser-test.js +++ b/src/language/__tests__/parser-test.js @@ -326,6 +326,22 @@ describe('Parser', () => { }); }); + it('parses multi-line strings', () => { + expect(parseValue('["""long""" "short"]')).to.containSubset({ + kind: Kind.LIST, + loc: { start: 0, end: 20 }, + values: [ + { kind: Kind.STRING, + loc: { start: 1, end: 11}, + value: 'long', + multiLine: true }, + { kind: Kind.STRING, + loc: { start: 12, end: 19}, + value: 'short', + multiLine: false } ] + }); + }); + }); describe('parseType', () => { diff --git a/src/language/__tests__/printer-test.js b/src/language/__tests__/printer-test.js index 4648e36b29..d06744c87c 100644 --- a/src/language/__tests__/printer-test.js +++ b/src/language/__tests__/printer-test.js @@ -127,7 +127,7 @@ describe('Printer', () => { } fragment frag on Friend { - foo(size: $size, bar: $b, obj: {key: "value"}) + foo(size: $size, bar: $b, obj: {key: "value", multiLine: """string"""}) } { diff --git a/src/language/__tests__/visitor-test.js b/src/language/__tests__/visitor-test.js index 76515880b5..e7a1d0511f 100644 --- a/src/language/__tests__/visitor-test.js +++ b/src/language/__tests__/visitor-test.js @@ -590,6 +590,12 @@ describe('Visitor', () => { [ 'enter', 'StringValue', 'value', 'ObjectField' ], [ 'leave', 'StringValue', 'value', 'ObjectField' ], [ 'leave', 'ObjectField', 0, undefined ], + [ 'enter', 'ObjectField', 1, undefined ], + [ 'enter', 'Name', 'name', 'ObjectField' ], + [ 'leave', 'Name', 'name', 'ObjectField' ], + [ 'enter', 'StringValue', 'value', 'ObjectField' ], + [ 'leave', 'StringValue', 'value', 'ObjectField' ], + [ 'leave', 'ObjectField', 1, undefined ], [ 'leave', 'ObjectValue', 'value', 'Argument' ], [ 'leave', 'Argument', 2, undefined ], [ 'leave', 'Field', 0, undefined ], diff --git a/src/language/ast.js b/src/language/ast.js index 5ad2e95341..2bca68a00e 100644 --- a/src/language/ast.js +++ b/src/language/ast.js @@ -66,6 +66,7 @@ type TokenKind = '' | 'Int' | 'Float' | 'String' + | 'MultiLineString' | 'Comment'; /** @@ -288,6 +289,7 @@ export type StringValueNode = { kind: 'StringValue'; loc?: Location; value: string; + multiLine?: boolean; }; export type BooleanValueNode = { diff --git a/src/language/lexer.js b/src/language/lexer.js index 04a7bbc276..322081931b 100644 --- a/src/language/lexer.js +++ b/src/language/lexer.js @@ -100,6 +100,7 @@ const NAME = 'Name'; const INT = 'Int'; const FLOAT = 'Float'; const STRING = 'String'; +const MULTI_LINE_STRING = 'MultiLineString'; const COMMENT = 'Comment'; /** @@ -126,6 +127,7 @@ export const TokenKind = { INT, FLOAT, STRING, + MULTI_LINE_STRING, COMMENT }; @@ -269,7 +271,12 @@ function readToken(lexer: Lexer<*>, prev: Token): Token { case 53: case 54: case 55: case 56: case 57: return readNumber(source, position, code, line, col, prev); // " - case 34: return readString(source, position, line, col, prev); + case 34: + if (charCodeAt.call(body, position + 1) === 34 && + charCodeAt.call(body, position + 2) === 34) { + return readMultiLineString(source, position, line, col, prev); + } + return readString(source, position, line, col, prev); } throw syntaxError( @@ -452,10 +459,14 @@ function readString(source, start, line, col, prev): Token { position < body.length && (code = charCodeAt.call(body, position)) !== null && // not LineTerminator - code !== 0x000A && code !== 0x000D && - // not Quote (") - code !== 34 + code !== 0x000A && code !== 0x000D ) { + // Closing Quote (") + if (code === 34) { + value += slice.call(body, chunkStart, position); + return new Tok(STRING, start, position + 1, line, col, prev, value); + } + // SourceCharacter if (code < 0x0020 && code !== 0x0009) { throw syntaxError( @@ -508,12 +519,73 @@ function readString(source, start, line, col, prev): Token { } } - if (code !== 34) { // quote (") - throw syntaxError(source, position, 'Unterminated string.'); + throw syntaxError(source, position, 'Unterminated string.'); +} + +/** + * Reads a multi-line string token from the source file. + * + * """("?"?(\\"""|\\(?!=""")|[^"\\]))*""" + */ +function readMultiLineString(source, start, line, col, prev): Token { + const body = source.body; + let position = start + 3; + let chunkStart = position; + let code = 0; + let value = ''; + + while ( + position < body.length && + (code = charCodeAt.call(body, position)) !== null + ) { + // Closing Triple-Quote (""") + if ( + code === 34 && + charCodeAt.call(body, position + 1) === 34 && + charCodeAt.call(body, position + 2) === 34 + ) { + value += slice.call(body, chunkStart, position); + return new Tok( + MULTI_LINE_STRING, + start, + position + 3, + line, + col, + prev, + value + ); + } + + // SourceCharacter + if ( + code < 0x0020 && + code !== 0x0009 && + code !== 0x000A && + code !== 0x000D + ) { + throw syntaxError( + source, + position, + `Invalid character within String: ${printCharCode(code)}.` + ); + } + + // Escape Triple-Quote (\""") + if ( + code === 92 && + charCodeAt.call(body, position + 1) === 34 && + charCodeAt.call(body, position + 2) === 34 && + charCodeAt.call(body, position + 3) === 34 + ) { + value += slice.call(body, chunkStart, position) + '"""'; + position += 4; + chunkStart = position; + } else { + ++position; + } } - value += slice.call(body, chunkStart, position); - return new Tok(STRING, start, position + 1, line, col, prev, value); + throw syntaxError(source, position, 'Unterminated string.'); } /** diff --git a/src/language/parser.js b/src/language/parser.js index 5defac882f..4e2685a1ae 100644 --- a/src/language/parser.js +++ b/src/language/parser.js @@ -543,10 +543,12 @@ function parseValueLiteral(lexer: Lexer<*>, isConst: boolean): ValueNode { loc: loc(lexer, token) }; case TokenKind.STRING: + case TokenKind.MULTI_LINE_STRING: lexer.advance(); return { kind: (STRING: 'StringValue'), value: ((token.value: any): string), + multiLine: token.kind === TokenKind.MULTI_LINE_STRING, loc: loc(lexer, token) }; case TokenKind.NAME: diff --git a/src/language/printer.js b/src/language/printer.js index 37ec2d0b70..46e2f2699a 100644 --- a/src/language/printer.js +++ b/src/language/printer.js @@ -72,7 +72,10 @@ const printDocASTReducer = { IntValue: ({ value }) => value, FloatValue: ({ value }) => value, - StringValue: ({ value }) => JSON.stringify(value), + StringValue: ({ value, multiLine }) => + multiLine ? + `"""${value.replace(/"""/g, '\\"""')}"""` : + JSON.stringify(value), BooleanValue: ({ value }) => JSON.stringify(value), NullValue: () => 'null', EnumValue: ({ value }) => value, From 7c00820104d1ac206f0c50f4100b18206bd1ed4c Mon Sep 17 00:00:00 2001 From: Lee Byron Date: Thu, 22 Jun 2017 00:07:55 -0700 Subject: [PATCH 2/3] Add RemoveIndentation() to the lexer for multi-line strings. --- src/jsutils/dedent.js | 13 +----- src/jsutils/removeIndentation.js | 66 ++++++++++++++++++++++++++++ src/language/__tests__/lexer-test.js | 27 +++++++++--- src/language/lexer.js | 9 ++-- src/language/printer.js | 19 ++++++-- 5 files changed, 110 insertions(+), 24 deletions(-) create mode 100644 src/jsutils/removeIndentation.js diff --git a/src/jsutils/dedent.js b/src/jsutils/dedent.js index cbaa02c01a..1203d040fe 100644 --- a/src/jsutils/dedent.js +++ b/src/jsutils/dedent.js @@ -7,16 +7,7 @@ * @flow */ - /** - * fixes identation by removing leading spaces from each line - */ -function fixIdent(str: string): string { - const indent = /^\n?( *)/.exec(str)[1]; // figure out ident - return str - .replace(RegExp('^' + indent, 'mg'), '') // remove ident - .replace(/^\n*/m, '') // remove leading newline - .replace(/ *$/, ''); // remove trailing spaces -} +import removeIndentation from './removeIndentation'; /** * An ES6 string tag that fixes identation. Also removes leading newlines @@ -45,5 +36,5 @@ export default function dedent( } } - return fixIdent(res); + return removeIndentation(res) + '\n'; } diff --git a/src/jsutils/removeIndentation.js b/src/jsutils/removeIndentation.js new file mode 100644 index 0000000000..4006128e28 --- /dev/null +++ b/src/jsutils/removeIndentation.js @@ -0,0 +1,66 @@ +/* @flow */ +/** + * Copyright (c) 2017, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +/** + * Removes leading identation from each line in a multi-line string. + * + * This implements RemoveIndentation() algorithm in the GraphQL spec. + * + * Note: this is similar to Python's docstring "trim" operation. + * https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation + */ +export default function removeIndentation(rawString: string): string { + // Expand a multi-line string into independent lines. + const lines = rawString.split(/\r\n|[\n\r]/g); + + // Determine minimum indentation, not including the first line. + let minIndent; + for (let i = 1; i < lines.length; i++) { + const line = lines[i]; + const lineIndent = leadingWhitespace(line); + if ( + lineIndent < line.length && + (minIndent === undefined || lineIndent < minIndent) + ) { + minIndent = lineIndent; + if (minIndent === 0) { + break; + } + } + } + + // Remove indentation, not including the first line. + if (minIndent) { + for (let i = 1; i < lines.length; i++) { + lines[i] = lines[i].slice(minIndent); + } + } + + // Remove leading and trailing empty lines. + while (lines.length > 0 && lines[0].length === 0) { + lines.shift(); + } + while (lines.length > 0 && lines[lines.length - 1].length === 0) { + lines.pop(); + } + + // Return a multi-line string joined with U+000A. + return lines.join('\n'); +} + +function leadingWhitespace(str) { + let i = 0; + for (; i < str.length; i++) { + if (str[i] !== ' ' && str[i] !== '\t') { + break; + } + } + return i; +} diff --git a/src/language/__tests__/lexer-test.js b/src/language/__tests__/lexer-test.js index 57d38ebbb5..c38802f0f5 100644 --- a/src/language/__tests__/lexer-test.js +++ b/src/language/__tests__/lexer-test.js @@ -301,11 +301,11 @@ describe('Lexer', () => { }); expect( - lexOne('""" white space """') + lexOne('" white space "') ).to.containSubset({ - kind: TokenKind.MULTI_LINE_STRING, + kind: TokenKind.STRING, start: 0, - end: 19, + end: 15, value: ' white space ' }); @@ -337,12 +337,12 @@ describe('Lexer', () => { }); expect( - lexOne('"""multi\rline"""') + lexOne('"""multi\rline\r\nnormalized"""') ).to.containSubset({ kind: TokenKind.MULTI_LINE_STRING, start: 0, - end: 16, - value: 'multi\rline' + end: 28, + value: 'multi\nline\nnormalized' }); expect( @@ -363,6 +363,21 @@ describe('Lexer', () => { value: 'slashes \\\\ \\/' }); + expect( + lexOne(`""" + + spans + multiple + lines + + """`) + ).to.containSubset({ + kind: TokenKind.MULTI_LINE_STRING, + start: 0, + end: 68, + value: 'spans\n multiple\n lines' + }); + }); it('lex reports useful multi-line string errors', () => { diff --git a/src/language/lexer.js b/src/language/lexer.js index 322081931b..d841758c5e 100644 --- a/src/language/lexer.js +++ b/src/language/lexer.js @@ -10,6 +10,7 @@ import type { Token } from './ast'; import type { Source } from './source'; import { syntaxError } from '../error'; +import removeIndentation from '../jsutils/removeIndentation'; /** * Given a Source object, this returns a Lexer for that source. @@ -532,7 +533,7 @@ function readMultiLineString(source, start, line, col, prev): Token { let position = start + 3; let chunkStart = position; let code = 0; - let value = ''; + let rawValue = ''; while ( position < body.length && @@ -544,7 +545,7 @@ function readMultiLineString(source, start, line, col, prev): Token { charCodeAt.call(body, position + 1) === 34 && charCodeAt.call(body, position + 2) === 34 ) { - value += slice.call(body, chunkStart, position); + rawValue += slice.call(body, chunkStart, position); return new Tok( MULTI_LINE_STRING, start, @@ -552,7 +553,7 @@ function readMultiLineString(source, start, line, col, prev): Token { line, col, prev, - value + removeIndentation(rawValue) ); } @@ -577,7 +578,7 @@ function readMultiLineString(source, start, line, col, prev): Token { charCodeAt.call(body, position + 2) === 34 && charCodeAt.call(body, position + 3) === 34 ) { - value += slice.call(body, chunkStart, position) + '"""'; + rawValue += slice.call(body, chunkStart, position) + '"""'; position += 4; chunkStart = position; } else { diff --git a/src/language/printer.js b/src/language/printer.js index 46e2f2699a..a7b497000f 100644 --- a/src/language/printer.js +++ b/src/language/printer.js @@ -73,9 +73,7 @@ const printDocASTReducer = { IntValue: ({ value }) => value, FloatValue: ({ value }) => value, StringValue: ({ value, multiLine }) => - multiLine ? - `"""${value.replace(/"""/g, '\\"""')}"""` : - JSON.stringify(value), + multiLine ? printMultiLineString(value) : JSON.stringify(value), BooleanValue: ({ value }) => JSON.stringify(value), NullValue: () => 'null', EnumValue: ({ value }) => value, @@ -204,3 +202,18 @@ function wrap(start, maybeString, end) { function indent(maybeString) { return maybeString && maybeString.replace(/\n/g, '\n '); } + +function printMultiLineString(value) { + const hasLineBreak = value.indexOf('\n') !== -1; + const hasLeadingSpace = value[0] === ' ' || value[0] === '\t'; + let printed = '"""'; + if (hasLineBreak && !hasLeadingSpace) { + printed += '\n'; + } + printed += value.replace(/"""/g, '\\"""'); + if (hasLineBreak) { + printed += '\n'; + } + printed += '"""'; + return printed; +} From 36ec0e9d34666362ff0e2b2b18edeb98e3c9abee Mon Sep 17 00:00:00 2001 From: Lee Byron Date: Wed, 29 Nov 2017 20:51:18 -0800 Subject: [PATCH 3/3] blockStringValue --- src/jsutils/dedent.js | 13 ++- src/jsutils/removeIndentation.js | 66 ----------- .../__tests__/blockStringValue-test.js | 110 ++++++++++++++++++ src/language/__tests__/kitchen-sink.graphql | 6 +- src/language/__tests__/lexer-test.js | 20 ++-- src/language/__tests__/parser-test.js | 6 +- src/language/__tests__/printer-test.js | 4 +- src/language/ast.js | 4 +- src/language/blockStringValue.js | 64 ++++++++++ src/language/lexer.js | 16 +-- src/language/parser.js | 4 +- src/language/printer.js | 21 +--- 12 files changed, 222 insertions(+), 112 deletions(-) delete mode 100644 src/jsutils/removeIndentation.js create mode 100644 src/language/__tests__/blockStringValue-test.js create mode 100644 src/language/blockStringValue.js diff --git a/src/jsutils/dedent.js b/src/jsutils/dedent.js index 1203d040fe..cbaa02c01a 100644 --- a/src/jsutils/dedent.js +++ b/src/jsutils/dedent.js @@ -7,7 +7,16 @@ * @flow */ -import removeIndentation from './removeIndentation'; + /** + * fixes identation by removing leading spaces from each line + */ +function fixIdent(str: string): string { + const indent = /^\n?( *)/.exec(str)[1]; // figure out ident + return str + .replace(RegExp('^' + indent, 'mg'), '') // remove ident + .replace(/^\n*/m, '') // remove leading newline + .replace(/ *$/, ''); // remove trailing spaces +} /** * An ES6 string tag that fixes identation. Also removes leading newlines @@ -36,5 +45,5 @@ export default function dedent( } } - return removeIndentation(res) + '\n'; + return fixIdent(res); } diff --git a/src/jsutils/removeIndentation.js b/src/jsutils/removeIndentation.js deleted file mode 100644 index 4006128e28..0000000000 --- a/src/jsutils/removeIndentation.js +++ /dev/null @@ -1,66 +0,0 @@ -/* @flow */ -/** - * Copyright (c) 2017, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. An additional grant - * of patent rights can be found in the PATENTS file in the same directory. - */ - -/** - * Removes leading identation from each line in a multi-line string. - * - * This implements RemoveIndentation() algorithm in the GraphQL spec. - * - * Note: this is similar to Python's docstring "trim" operation. - * https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation - */ -export default function removeIndentation(rawString: string): string { - // Expand a multi-line string into independent lines. - const lines = rawString.split(/\r\n|[\n\r]/g); - - // Determine minimum indentation, not including the first line. - let minIndent; - for (let i = 1; i < lines.length; i++) { - const line = lines[i]; - const lineIndent = leadingWhitespace(line); - if ( - lineIndent < line.length && - (minIndent === undefined || lineIndent < minIndent) - ) { - minIndent = lineIndent; - if (minIndent === 0) { - break; - } - } - } - - // Remove indentation, not including the first line. - if (minIndent) { - for (let i = 1; i < lines.length; i++) { - lines[i] = lines[i].slice(minIndent); - } - } - - // Remove leading and trailing empty lines. - while (lines.length > 0 && lines[0].length === 0) { - lines.shift(); - } - while (lines.length > 0 && lines[lines.length - 1].length === 0) { - lines.pop(); - } - - // Return a multi-line string joined with U+000A. - return lines.join('\n'); -} - -function leadingWhitespace(str) { - let i = 0; - for (; i < str.length; i++) { - if (str[i] !== ' ' && str[i] !== '\t') { - break; - } - } - return i; -} diff --git a/src/language/__tests__/blockStringValue-test.js b/src/language/__tests__/blockStringValue-test.js new file mode 100644 index 0000000000..7dc639eddb --- /dev/null +++ b/src/language/__tests__/blockStringValue-test.js @@ -0,0 +1,110 @@ +/** + * Copyright (c) 2015-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +import { expect } from 'chai'; +import { describe, it } from 'mocha'; +import blockStringValue from '../blockStringValue'; + +describe('blockStringValue', () => { + + it('removes uniform indentation from a string', () => { + const rawValue = [ + '', + ' Hello,', + ' World!', + '', + ' Yours,', + ' GraphQL.', + ].join('\n'); + expect(blockStringValue(rawValue)).to.equal([ + 'Hello,', + ' World!', + '', + 'Yours,', + ' GraphQL.', + ].join('\n')); + }); + + it('removes empty leading and trailing lines', () => { + const rawValue = [ + '', + '', + ' Hello,', + ' World!', + '', + ' Yours,', + ' GraphQL.', + '', + '', + ].join('\n'); + expect(blockStringValue(rawValue)).to.equal([ + 'Hello,', + ' World!', + '', + 'Yours,', + ' GraphQL.', + ].join('\n')); + }); + + it('removes blank leading and trailing lines', () => { + const rawValue = [ + ' ', + ' ', + ' Hello,', + ' World!', + '', + ' Yours,', + ' GraphQL.', + ' ', + ' ', + ].join('\n'); + expect(blockStringValue(rawValue)).to.equal([ + 'Hello,', + ' World!', + '', + 'Yours,', + ' GraphQL.', + ].join('\n')); + }); + + it('retains indentation from first line', () => { + const rawValue = [ + ' Hello,', + ' World!', + '', + ' Yours,', + ' GraphQL.', + ].join('\n'); + expect(blockStringValue(rawValue)).to.equal([ + ' Hello,', + ' World!', + '', + 'Yours,', + ' GraphQL.', + ].join('\n')); + }); + + it('does not alter trailing spaces', () => { + const rawValue = [ + ' ', + ' Hello, ', + ' World! ', + ' ', + ' Yours, ', + ' GraphQL. ', + ' ', + ].join('\n'); + expect(blockStringValue(rawValue)).to.equal([ + 'Hello, ', + ' World! ', + ' ', + 'Yours, ', + ' GraphQL. ', + ].join('\n')); + }); + +}); diff --git a/src/language/__tests__/kitchen-sink.graphql b/src/language/__tests__/kitchen-sink.graphql index 9cfb217e10..6fcf394bf3 100644 --- a/src/language/__tests__/kitchen-sink.graphql +++ b/src/language/__tests__/kitchen-sink.graphql @@ -46,7 +46,11 @@ subscription StoryLikeSubscription($input: StoryLikeSubscribeInput) { } fragment frag on Friend { - foo(size: $size, bar: $b, obj: {key: "value", multiLine: """string"""}) + foo(size: $size, bar: $b, obj: {key: "value", block: """ + + block string uses \""" + + """}) } { diff --git a/src/language/__tests__/lexer-test.js b/src/language/__tests__/lexer-test.js index c38802f0f5..a0ad472525 100644 --- a/src/language/__tests__/lexer-test.js +++ b/src/language/__tests__/lexer-test.js @@ -289,12 +289,12 @@ describe('Lexer', () => { ); }); - it('lexes multi-line strings', () => { + it('lexes block strings', () => { expect( lexOne('"""simple"""') ).to.containSubset({ - kind: TokenKind.MULTI_LINE_STRING, + kind: TokenKind.BLOCK_STRING, start: 0, end: 12, value: 'simple' @@ -312,7 +312,7 @@ describe('Lexer', () => { expect( lexOne('"""contains " quote"""') ).to.containSubset({ - kind: TokenKind.MULTI_LINE_STRING, + kind: TokenKind.BLOCK_STRING, start: 0, end: 22, value: 'contains " quote' @@ -321,7 +321,7 @@ describe('Lexer', () => { expect( lexOne('"""contains \\""" triplequote"""') ).to.containSubset({ - kind: TokenKind.MULTI_LINE_STRING, + kind: TokenKind.BLOCK_STRING, start: 0, end: 31, value: 'contains """ triplequote' @@ -330,7 +330,7 @@ describe('Lexer', () => { expect( lexOne('"""multi\nline"""') ).to.containSubset({ - kind: TokenKind.MULTI_LINE_STRING, + kind: TokenKind.BLOCK_STRING, start: 0, end: 16, value: 'multi\nline' @@ -339,7 +339,7 @@ describe('Lexer', () => { expect( lexOne('"""multi\rline\r\nnormalized"""') ).to.containSubset({ - kind: TokenKind.MULTI_LINE_STRING, + kind: TokenKind.BLOCK_STRING, start: 0, end: 28, value: 'multi\nline\nnormalized' @@ -348,7 +348,7 @@ describe('Lexer', () => { expect( lexOne('"""unescaped \\n\\r\\b\\t\\f\\u1234"""') ).to.containSubset({ - kind: TokenKind.MULTI_LINE_STRING, + kind: TokenKind.BLOCK_STRING, start: 0, end: 32, value: 'unescaped \\n\\r\\b\\t\\f\\u1234' @@ -357,7 +357,7 @@ describe('Lexer', () => { expect( lexOne('"""slashes \\\\ \\/"""') ).to.containSubset({ - kind: TokenKind.MULTI_LINE_STRING, + kind: TokenKind.BLOCK_STRING, start: 0, end: 19, value: 'slashes \\\\ \\/' @@ -372,7 +372,7 @@ describe('Lexer', () => { """`) ).to.containSubset({ - kind: TokenKind.MULTI_LINE_STRING, + kind: TokenKind.BLOCK_STRING, start: 0, end: 68, value: 'spans\n multiple\n lines' @@ -380,7 +380,7 @@ describe('Lexer', () => { }); - it('lex reports useful multi-line string errors', () => { + it('lex reports useful block string errors', () => { expect( () => lexOne('"""') diff --git a/src/language/__tests__/parser-test.js b/src/language/__tests__/parser-test.js index 806640d498..190f070fc7 100644 --- a/src/language/__tests__/parser-test.js +++ b/src/language/__tests__/parser-test.js @@ -326,7 +326,7 @@ describe('Parser', () => { }); }); - it('parses multi-line strings', () => { + it('parses block strings', () => { expect(parseValue('["""long""" "short"]')).to.containSubset({ kind: Kind.LIST, loc: { start: 0, end: 20 }, @@ -334,11 +334,11 @@ describe('Parser', () => { { kind: Kind.STRING, loc: { start: 1, end: 11}, value: 'long', - multiLine: true }, + block: true }, { kind: Kind.STRING, loc: { start: 12, end: 19}, value: 'short', - multiLine: false } ] + block: false } ] }); }); diff --git a/src/language/__tests__/printer-test.js b/src/language/__tests__/printer-test.js index d06744c87c..bf30328879 100644 --- a/src/language/__tests__/printer-test.js +++ b/src/language/__tests__/printer-test.js @@ -127,7 +127,9 @@ describe('Printer', () => { } fragment frag on Friend { - foo(size: $size, bar: $b, obj: {key: "value", multiLine: """string"""}) + foo(size: $size, bar: $b, obj: {key: "value", block: """ + block string uses \""" + """}) } { diff --git a/src/language/ast.js b/src/language/ast.js index 2bca68a00e..8da495fbad 100644 --- a/src/language/ast.js +++ b/src/language/ast.js @@ -66,7 +66,7 @@ type TokenKind = '' | 'Int' | 'Float' | 'String' - | 'MultiLineString' + | 'BlockString' | 'Comment'; /** @@ -289,7 +289,7 @@ export type StringValueNode = { kind: 'StringValue'; loc?: Location; value: string; - multiLine?: boolean; + block?: boolean; }; export type BooleanValueNode = { diff --git a/src/language/blockStringValue.js b/src/language/blockStringValue.js new file mode 100644 index 0000000000..0167cf7876 --- /dev/null +++ b/src/language/blockStringValue.js @@ -0,0 +1,64 @@ +/** + * Copyright (c) 2015-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + * + * @flow + */ + +/** + * Produces the value of a block string from its parsed raw value, similar to + * Coffeescript's block string, Python's docstring trim or Ruby's strip_heredoc. + * + * This implements the GraphQL spec's BlockStringValue() static algorithm. + */ +export default function blockStringValue(rawString: string): string { + // Expand a block string's raw value into independent lines. + const lines = rawString.split(/\r\n|[\n\r]/g); + + // Remove common indentation from all lines but first. + let commonIndent = null; + for (let i = 1; i < lines.length; i++) { + const line = lines[i]; + const indent = leadingWhitespace(line); + if ( + indent < line.length && + (commonIndent === null || indent < commonIndent) + ) { + commonIndent = indent; + if (commonIndent === 0) { + break; + } + } + } + + if (commonIndent) { + for (let i = 1; i < lines.length; i++) { + lines[i] = lines[i].slice(commonIndent); + } + } + + // Remove leading and trailing blank lines. + while (lines.length > 0 && isBlank(lines[0])) { + lines.shift(); + } + while (lines.length > 0 && isBlank(lines[lines.length - 1])) { + lines.pop(); + } + + // Return a string of the lines joined with U+000A. + return lines.join('\n'); +} + +function leadingWhitespace(str) { + let i = 0; + while (i < str.length && (str[i] === ' ' || str[i] === '\t')) { + i++; + } + return i; +} + +function isBlank(str) { + return leadingWhitespace(str) === str.length; +} diff --git a/src/language/lexer.js b/src/language/lexer.js index d841758c5e..2199c09e94 100644 --- a/src/language/lexer.js +++ b/src/language/lexer.js @@ -10,7 +10,7 @@ import type { Token } from './ast'; import type { Source } from './source'; import { syntaxError } from '../error'; -import removeIndentation from '../jsutils/removeIndentation'; +import blockStringValue from './blockStringValue'; /** * Given a Source object, this returns a Lexer for that source. @@ -101,7 +101,7 @@ const NAME = 'Name'; const INT = 'Int'; const FLOAT = 'Float'; const STRING = 'String'; -const MULTI_LINE_STRING = 'MultiLineString'; +const BLOCK_STRING = 'BlockString'; const COMMENT = 'Comment'; /** @@ -128,7 +128,7 @@ export const TokenKind = { INT, FLOAT, STRING, - MULTI_LINE_STRING, + BLOCK_STRING, COMMENT }; @@ -275,7 +275,7 @@ function readToken(lexer: Lexer<*>, prev: Token): Token { case 34: if (charCodeAt.call(body, position + 1) === 34 && charCodeAt.call(body, position + 2) === 34) { - return readMultiLineString(source, position, line, col, prev); + return readBlockString(source, position, line, col, prev); } return readString(source, position, line, col, prev); } @@ -524,11 +524,11 @@ function readString(source, start, line, col, prev): Token { } /** - * Reads a multi-line string token from the source file. + * Reads a block string token from the source file. * * """("?"?(\\"""|\\(?!=""")|[^"\\]))*""" */ -function readMultiLineString(source, start, line, col, prev): Token { +function readBlockString(source, start, line, col, prev): Token { const body = source.body; let position = start + 3; let chunkStart = position; @@ -547,13 +547,13 @@ function readMultiLineString(source, start, line, col, prev): Token { ) { rawValue += slice.call(body, chunkStart, position); return new Tok( - MULTI_LINE_STRING, + BLOCK_STRING, start, position + 3, line, col, prev, - removeIndentation(rawValue) + blockStringValue(rawValue) ); } diff --git a/src/language/parser.js b/src/language/parser.js index 4e2685a1ae..a0cbee1437 100644 --- a/src/language/parser.js +++ b/src/language/parser.js @@ -543,12 +543,12 @@ function parseValueLiteral(lexer: Lexer<*>, isConst: boolean): ValueNode { loc: loc(lexer, token) }; case TokenKind.STRING: - case TokenKind.MULTI_LINE_STRING: + case TokenKind.BLOCK_STRING: lexer.advance(); return { kind: (STRING: 'StringValue'), value: ((token.value: any): string), - multiLine: token.kind === TokenKind.MULTI_LINE_STRING, + block: token.kind === TokenKind.BLOCK_STRING, loc: loc(lexer, token) }; case TokenKind.NAME: diff --git a/src/language/printer.js b/src/language/printer.js index a7b497000f..a611b73c28 100644 --- a/src/language/printer.js +++ b/src/language/printer.js @@ -72,8 +72,10 @@ const printDocASTReducer = { IntValue: ({ value }) => value, FloatValue: ({ value }) => value, - StringValue: ({ value, multiLine }) => - multiLine ? printMultiLineString(value) : JSON.stringify(value), + StringValue: ({ value, block: isBlockString }) => + isBlockString ? + `"""\n${value.replace(/"""/g, '\\"""')}\n"""` : + JSON.stringify(value), BooleanValue: ({ value }) => JSON.stringify(value), NullValue: () => 'null', EnumValue: ({ value }) => value, @@ -202,18 +204,3 @@ function wrap(start, maybeString, end) { function indent(maybeString) { return maybeString && maybeString.replace(/\n/g, '\n '); } - -function printMultiLineString(value) { - const hasLineBreak = value.indexOf('\n') !== -1; - const hasLeadingSpace = value[0] === ' ' || value[0] === '\t'; - let printed = '"""'; - if (hasLineBreak && !hasLeadingSpace) { - printed += '\n'; - } - printed += value.replace(/"""/g, '\\"""'); - if (hasLineBreak) { - printed += '\n'; - } - printed += '"""'; - return printed; -}