Add RemoveIndentation() to the lexer for multi-line strings.

leebyron · leebyron · commit ce2d30ad6493 · 2017-06-22T00:23:12.000-07:00
diff --git a/src/jsutils/dedent.js b/src/jsutils/dedent.js
@@ -8,16 +8,7 @@
  *  of patent rights can be found in the PATENTS file in the same directory.
  */
 
- /**
-  * fixes identation by removing leading spaces from each line
-  */
-function fixIdent(str: string): string {
-  const indent = /^\n?( *)/.exec(str)[1]; // figure out ident
-  return str
-    .replace(RegExp('^' + indent, 'mg'), '') // remove ident
-    .replace(/^\n*/m, '') //  remove leading newline
-    .replace(/ *$/, ''); // remove trailing spaces
-}
+import removeIndentation from './removeIndentation';
 
 /**
  * An ES6 string tag that fixes identation. Also removes leading newlines
@@ -46,5 +37,5 @@ export default function dedent(
     }
   }
 
-  return fixIdent(res);
+  return removeIndentation(res) + '\n';
 }
diff --git a/src/jsutils/removeIndentation.js b/src/jsutils/removeIndentation.js
@@ -0,0 +1,66 @@
+/* @flow */
+/**
+ *  Copyright (c) 2017, Facebook, Inc.
+ *  All rights reserved.
+ *
+ *  This source code is licensed under the BSD-style license found in the
+ *  LICENSE file in the root directory of this source tree. An additional grant
+ *  of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+/**
+ * Removes leading identation from each line in a multi-line string.
+ *
+ * This implements RemoveIndentation() algorithm in the GraphQL spec.
+ *
+ * Note: this is similar to Python's docstring "trim" operation.
+ * https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
+ */
+export default function removeIndentation(rawString: string): string {
+  // Expand a multi-line string into independent lines.
+  const lines = rawString.split(/\r\n|[\n\r]/g);
+
+  // Determine minimum indentation, not including the first line.
+  let minIndent;
+  for (let i = 1; i < lines.length; i++) {
+    const line = lines[i];
+    const lineIndent = leadingWhitespace(line);
+    if (
+      lineIndent < line.length &&
+      (minIndent === undefined || lineIndent < minIndent)
+    ) {
+      minIndent = lineIndent;
+      if (minIndent === 0) {
+        break;
+      }
+    }
+  }
+
+  // Remove indentation, not including the first line.
+  if (minIndent) {
+    for (let i = 1; i < lines.length; i++) {
+      lines[i] = lines[i].slice(minIndent);
+    }
+  }
+
+  // Remove leading and trailing empty lines.
+  while (lines.length > 0 && lines[0].length === 0) {
+    lines.shift();
+  }
+  while (lines.length > 0 && lines[lines.length - 1].length === 0) {
+    lines.pop();
+  }
+
+  // Return a multi-line string joined with U+000A.
+  return lines.join('\n');
+}
+
+function leadingWhitespace(str) {
+  let i = 0;
+  for (; i < str.length; i++) {
+    if (str[i] !== ' ' && str[i] !== '\t') {
+      break;
+    }
+  }
+  return i;
+}
diff --git a/src/language/__tests__/lexer-test.js b/src/language/__tests__/lexer-test.js
@@ -270,11 +270,11 @@ describe('Lexer', () => {
     });
 
     expect(
-      lexOne('""" white space """')
+      lexOne('" white space "')
     ).to.containSubset({
-      kind: TokenKind.MULTI_LINE_STRING,
+      kind: TokenKind.STRING,
       start: 0,
-      end: 19,
+      end: 15,
       value: ' white space '
     });
 
@@ -332,6 +332,21 @@ describe('Lexer', () => {
       value: 'slashes \\\\ \\/'
     });
 
+    expect(
+      lexOne(`"""
+
+        spans
+          multiple
+            lines
+
+        """`)
+    ).to.containSubset({
+      kind: TokenKind.MULTI_LINE_STRING,
+      start: 0,
+      end: 68,
+      value: 'spans\n  multiple\n    lines'
+    });
+
   });
 
   it('lex reports useful multi-line string errors', () => {
diff --git a/src/language/lexer.js b/src/language/lexer.js
@@ -11,6 +11,7 @@
 import type { Token } from './ast';
 import type { Source } from './source';
 import { syntaxError } from '../error';
+import removeIndentation from '../jsutils/removeIndentation';
 
 /**
  * Given a Source object, this returns a Lexer for that source.
@@ -533,7 +534,7 @@ function readMultiLineString(source, start, line, col, prev): Token {
   let position = start + 3;
   let chunkStart = position;
   let code = 0;
-  let value = '';
+  let rawValue = '';
 
   while (
     position < body.length &&
@@ -545,15 +546,15 @@ function readMultiLineString(source, start, line, col, prev): Token {
       charCodeAt.call(body, position + 1) === 34 &&
       charCodeAt.call(body, position + 2) === 34
     ) {
-      value += slice.call(body, chunkStart, position);
+      rawValue += slice.call(body, chunkStart, position);
       return new Tok(
         MULTI_LINE_STRING,
         start,
         position + 3,
         line,
         col,
         prev,
-        value
+        removeIndentation(rawValue)
       );
     }
 
@@ -578,7 +579,7 @@ function readMultiLineString(source, start, line, col, prev): Token {
       charCodeAt.call(body, position + 2) === 34 &&
       charCodeAt.call(body, position + 3) === 34
     ) {
-      value += slice.call(body, chunkStart, position) + '"""';
+      rawValue += slice.call(body, chunkStart, position) + '"""';
       position += 4;
       chunkStart = position;
     } else {