From ce971b766f4170a51a3dfbab90a4808d98fa0b88 Mon Sep 17 00:00:00 2001
From: Alan Pierce <alangpierce@gmail.com>
Date: Thu, 11 Aug 2016 06:28:17 -0700
Subject: [PATCH 1/2] Change OUTDENT tokens to be positioned at the end of the
 previous token

This commit adds another post-processing step after normal lexing that sets the
locationData on all OUTDENT tokens to be at the last character of the previous
token. This does feel like a little bit of a hack. Ideally the location data
would be set correctly in the first place and not in a post-processing step, but
I tried that and some temporary intermediate tokens were causing problems, so I
decided to set the location data once those intermediate tokens were removed.
Also, having this as a separate processing step makes it more robust and
isolated.

This fixes the problem in https://github.com/decaffeinate/decaffeinate/issues/371 .
In that issue, the CoffeeScript tokens had three OUTDENT tokens in a row, and
the last two overlapped with the `]`. Since at least one of those OUTDENT tokens
was considered part of the function body, the function expression had an ending
position just after the end of the `]`.

OUTDENT tokens are sort of a weird case in the lexer anyway, since they often
don't correspond to an actual location in the source code. It seems like the
code in `lexer.coffee` makes an attempt at finding a good place for them, but in
some cases, it has a bad result. This seems hard to avoid in the general case.
For example, in this code:
```coffee
[->
  a]
```
There must be an OUTDENT between the `a` and the `]`, but CoffeeScript tokens
have an inclusive start and end, so they must always be at least one character
wide (I think). In this case, the lexer was choosing the `]` as the location,
and the parser ended up generating correct location data, I believe because
it ignores the outermost INDENT and OUTDENT tokens. However, with multiple
OUTDENT tokens in a row, the parser ends up producing location data that is
wrong.

It seems to me like there isn't a solid answer to "what location do OUTDENT
tokens have", since it hasn't mattered much, but for this commit, I'm defining
it: they always have the location of the last character of the previous token.
This should hopefully be fairly safe because tokens are still in the same order
relative to each other. Also, it's worth noting that this makes the start
location for OUTDENT tokens awkward. However, OUTDENT tokens are always used to
mark the end of something, so their `last_line` and `last_column` values are
always what matter when determining AST node bounds, so it is most important for
those to be correct.
---
 lib/coffee-script/rewriter.js | 18 ++++++++++++++++++
 src/rewriter.coffee           | 15 +++++++++++++++
 test/location.coffee          | 18 ++++++++++++++++++
 3 files changed, 51 insertions(+)

diff --git a/lib/coffee-script/rewriter.js b/lib/coffee-script/rewriter.js
index 2ee88b9890..8439ac2597 100644
--- a/lib/coffee-script/rewriter.js
+++ b/lib/coffee-script/rewriter.js
@@ -26,6 +26,7 @@
       this.tagPostfixConditionals();
       this.addImplicitBracesAndParens();
       this.addLocationDataToGeneratedTokens();
+      this.fixOutdentLocationData();
       return this.tokens;
     };
 
@@ -372,6 +373,23 @@
       });
     };
 
+    Rewriter.prototype.fixOutdentLocationData = function() {
+      return this.scanTokens(function(token, i, tokens) {
+        var prevLocationData;
+        if (token[0] !== 'OUTDENT') {
+          return 1;
+        }
+        prevLocationData = tokens[i - 1][2];
+        token[2] = {
+          first_line: prevLocationData.last_line,
+          first_column: prevLocationData.last_column,
+          last_line: prevLocationData.last_line,
+          last_column: prevLocationData.last_column
+        };
+        return 1;
+      });
+    };
+
     Rewriter.prototype.normalizeLines = function() {
       var action, condition, indent, outdent, starter;
       starter = indent = outdent = null;
diff --git a/src/rewriter.coffee b/src/rewriter.coffee
index 5c4d44ea06..eec724bef8 100644
--- a/src/rewriter.coffee
+++ b/src/rewriter.coffee
@@ -33,6 +33,7 @@ class exports.Rewriter
     @tagPostfixConditionals()
     @addImplicitBracesAndParens()
     @addLocationDataToGeneratedTokens()
+    @fixOutdentLocationData()
     @tokens
 
   # Rewrite the token stream, looking one token ahead and behind.
@@ -368,6 +369,20 @@ class exports.Rewriter
         last_column:  column
       return 1
 
+  # OUTDENT tokens should always be positioned at the last character of the
+  # previous token, so that AST nodes ending in an OUTDENT token end up with a
+  # location corresponding to the last "real" token under the node.
+  fixOutdentLocationData: ->
+    @scanTokens (token, i, tokens) ->
+      return 1 unless token[0] is 'OUTDENT'
+      prevLocationData = tokens[i - 1][2]
+      token[2] =
+        first_line:   prevLocationData.last_line
+        first_column: prevLocationData.last_column
+        last_line:    prevLocationData.last_line
+        last_column:  prevLocationData.last_column
+      return 1
+
   # Because our grammar is LALR(1), it can't handle some single-line
   # expressions that lack ending delimiters. The **Rewriter** adds the implicit
   # blocks, so it doesn't need to. To keep the grammar clean and tidy, trailing
diff --git a/test/location.coffee b/test/location.coffee
index 35b9584c51..1782946d82 100644
--- a/test/location.coffee
+++ b/test/location.coffee
@@ -469,6 +469,24 @@ test "Verify tokens have locations that are in order", ->
         ok token[2].first_column >= lastToken[2].last_column
     lastToken = token
 
+test "Verify OUTDENT tokens are located at the end of the previous token", ->
+  source = '''
+    SomeArr = [ ->
+      if something
+        lol =
+          count: 500
+    ]
+  '''
+  tokens = CoffeeScript.tokens source
+  [..., number, curly, outdent1, outdent2, outdent3, bracket, terminator] = tokens
+  eq number[0], 'NUMBER'
+  for outdent in [outdent1, outdent2, outdent3]
+    eq outdent[0], 'OUTDENT'
+    eq outdent[2].first_line, number[2].last_line
+    eq outdent[2].first_column, number[2].last_column
+    eq outdent[2].last_line, number[2].last_line
+    eq outdent[2].last_column, number[2].last_column
+
 test "Verify all tokens get a location", ->
   doesNotThrow ->
     tokens = CoffeeScript.tokens testScript

From 88693e420df9ff9cc5c681d6b467d56190c86df5 Mon Sep 17 00:00:00 2001
From: Alan Pierce <alangpierce@gmail.com>
Date: Sun, 2 Oct 2016 19:45:53 -0700
Subject: [PATCH 2/2] Fix location data for implicit CALL_END tokens

Fixes https://github.com/decaffeinate/decaffeinate/issues/446

In addition to OUTDENT tokens, CALL_END tokens can also be virtual tokens
without a real location, and sometimes they end up with a location that's
incorrect.
---
 lib/coffee-script/rewriter.js |  2 +-
 src/rewriter.coffee           |  3 ++-
 test/location.coffee          | 41 +++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/lib/coffee-script/rewriter.js b/lib/coffee-script/rewriter.js
index 8439ac2597..41bcd4c240 100644
--- a/lib/coffee-script/rewriter.js
+++ b/lib/coffee-script/rewriter.js
@@ -376,7 +376,7 @@
     Rewriter.prototype.fixOutdentLocationData = function() {
       return this.scanTokens(function(token, i, tokens) {
         var prevLocationData;
-        if (token[0] !== 'OUTDENT') {
+        if (!(token[0] === 'OUTDENT' || (token.generated && token[0] === 'CALL_END'))) {
           return 1;
         }
         prevLocationData = tokens[i - 1][2];
diff --git a/src/rewriter.coffee b/src/rewriter.coffee
index eec724bef8..79d8eb710b 100644
--- a/src/rewriter.coffee
+++ b/src/rewriter.coffee
@@ -374,7 +374,8 @@ class exports.Rewriter
   # location corresponding to the last "real" token under the node.
   fixOutdentLocationData: ->
     @scanTokens (token, i, tokens) ->
-      return 1 unless token[0] is 'OUTDENT'
+      return 1 unless token[0] is 'OUTDENT' or
+        (token.generated and token[0] is 'CALL_END')
       prevLocationData = tokens[i - 1][2]
       token[2] =
         first_line:   prevLocationData.last_line
diff --git a/test/location.coffee b/test/location.coffee
index 1782946d82..a2f855a7ba 100644
--- a/test/location.coffee
+++ b/test/location.coffee
@@ -487,6 +487,47 @@ test "Verify OUTDENT tokens are located at the end of the previous token", ->
     eq outdent[2].last_line, number[2].last_line
     eq outdent[2].last_column, number[2].last_column
 
+test "Verify OUTDENT and CALL_END tokens are located at the end of the previous token", ->
+  source = '''
+    a = b {
+      c: ->
+        d e,
+          if f
+            g {},
+              if h
+                i {}
+    }
+  '''
+  tokens = CoffeeScript.tokens source
+  [..., closeCurly1, callEnd1, outdent1, outdent2, callEnd2, outdent3, outdent4,
+    callEnd3, outdent5, outdent6, closeCurly2, callEnd4, terminator] = tokens
+  eq closeCurly1[0], '}'
+  assertAtCloseCurly = (token) ->
+    eq token[2].first_line, closeCurly1[2].last_line
+    eq token[2].first_column, closeCurly1[2].last_column
+    eq token[2].last_line, closeCurly1[2].last_line
+    eq token[2].last_column, closeCurly1[2].last_column
+
+  for token in [outdent1, outdent2, outdent3, outdent4, outdent5, outdent6]
+    eq token[0], 'OUTDENT'
+    assertAtCloseCurly(token)
+  for token in [callEnd1, callEnd2, callEnd3]
+    eq token[0], 'CALL_END'
+    assertAtCloseCurly(token)
+
+test "Verify real CALL_END tokens have the right position", ->
+  source = '''
+    a()
+  '''
+  tokens = CoffeeScript.tokens source
+  [identifier, callStart, callEnd, terminator] = tokens
+  startIndex = identifier[2].first_column
+  eq identifier[2].last_column, startIndex
+  eq callStart[2].first_column, startIndex + 1
+  eq callStart[2].last_column, startIndex + 1
+  eq callEnd[2].first_column, startIndex + 2
+  eq callEnd[2].last_column, startIndex + 2
+
 test "Verify all tokens get a location", ->
   doesNotThrow ->
     tokens = CoffeeScript.tokens testScript