Skip to content

Commit 8fd6258

Browse files
committed
Fix #3410, #3182: Allow regex to start with space or =
A regex may not follow a specific set of tokens. These were already known before in the `NOT_REGEX` and `NOT_SPACED_REGEX` arrays. (However, I've refactored them to be more correct and to add a few missing tokens). In all other cases (except after a spaced callable) a slash is the start of a regex, and may now start with a space or an equals sign. It’s really that simple! A slash after a spaced callable is the only ambigous case. We cannot know if that's division or function application with a regex as the argument. The spacing determines which is which: Space on both sides: - `a / b/i` -> `a / b / i` - `a /= b/i` -> `a /= b / i` No spaces: - `a/b/i` -> `a / b / i` - `a/=b/i` -> `a /= b / i` Space on the right side: - `a/ b/i` -> `a / b / i` - `a/= b/i` -> `a /= b / i` Space on the left side: - `a /b/i` -> `a(/b/i)` - `a /=b/i` -> `a(/=b/i)` The last case used to compile to `a /= b / i`, but that has been changed to be consistent with the `/` operator. The last case really looks like a regex, so it should be parsed as one. Moreover, you may now also space the `/` and `/=` operators with other whitespace characters than a space (such as tabs and non-breaking spaces) for consistency. Lastly, unclosed regexes are now reported as such, instead of generating some other confusing error message. It should perhaps also be noted that apart from escaping (such as `a /\ b/`) you may now also use parentheses to disambiguate division and regex: `a (/ b/)`. See #3182 (comment).
1 parent e769423 commit 8fd6258

File tree

4 files changed

+256
-35
lines changed

4 files changed

+256
-35
lines changed

lib/coffee-script/lexer.js

+20-11
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/lexer.coffee

+19-18
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,15 @@ exports.Lexer = class Lexer
258258
when @chunk[...3] is '///'
259259
{tokens, index} = @matchWithInterpolations @chunk[3..], HEREGEX, '///', 3
260260
when match = REGEX.exec @chunk
261-
[regex] = match
261+
[regex, closed] = match
262262
index = regex.length
263263
prev = last @tokens
264-
return 0 if prev and (prev[0] in (if prev.spaced then NOT_REGEX else NOT_SPACED_REGEX))
264+
if prev
265+
if prev.spaced and prev[0] in CALLABLE
266+
return 0 if not closed or POSSIBLY_DIVISION.test regex
267+
else if prev[0] in NOT_REGEX
268+
return 0
269+
@error 'missing / (unclosed regex)' unless closed
265270
else
266271
return 0
267272

@@ -776,13 +781,13 @@ HEREDOC_INDENT = /\n+([^\n\S]*)(?=\S)/g
776781

777782
# Regex-matching-regexes.
778783
REGEX = /// ^
779-
/ (?! [\s=] ) ( # disallow leading whitespace or equals sign
784+
/ (?!/) (
780785
?: [^ [ / \n \\ ] # every other thing
781786
| \\. # anything (but newlines) escaped
782787
| \[ # character class
783788
(?: \\. | [^ \] \n \\ ] )*
784789
]
785-
)+ /
790+
)* (/)?
786791
///
787792

788793
REGEX_FLAGS = /^\w*/
@@ -798,6 +803,8 @@ HEREGEX_OMIT = ///
798803
799804
REGEX_ILLEGAL = /// ^ ( / | /{3}\s*) (\*) ///
800805
806+
POSSIBLY_DIVISION = /// ^ /=?\s ///
807+
801808
# Other regexes.
802809
MULTILINER = /\n/g
803810
@@ -841,23 +848,17 @@ RELATION = ['IN', 'OF', 'INSTANCEOF']
841848
# Boolean tokens.
842849
BOOL = ['TRUE', 'FALSE']
843850
844-
# Tokens which a regular expression will never immediately follow, but which
845-
# a division operator might.
846-
#
847-
# See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
848-
#
849-
# Our list is shorter, due to sans-parentheses method calls.
850-
NOT_REGEX = ['NUMBER', 'REGEX', 'BOOL', 'NULL', 'UNDEFINED', '++', '--']
851-
852-
# If the previous token is not spaced, there are more preceding tokens that
853-
# force a division parse:
854-
NOT_SPACED_REGEX = NOT_REGEX.concat ')', '}', 'THIS', 'IDENTIFIER', 'STRING', ']'
855-
856851
# Tokens which could legitimately be invoked or indexed. An opening
857852
# parentheses or bracket following these tokens will be recorded as the start
858853
# of a function invocation or indexing operation.
859-
CALLABLE = ['IDENTIFIER', 'STRING', 'REGEX', ')', ']', '}', '?', '::', '@', 'THIS', 'SUPER']
860-
INDEXABLE = CALLABLE.concat 'NUMBER', 'BOOL', 'NULL', 'UNDEFINED'
854+
CALLABLE = ['IDENTIFIER', ')', ']', '?', '@', 'THIS', 'SUPER']
855+
INDEXABLE = CALLABLE.concat ['NUMBER', 'STRING', 'REGEX', 'BOOL', 'NULL', 'UNDEFINED', '}', '::']
856+
857+
# Tokens which a regular expression will never immediately follow (except spaced
858+
# CALLABLEs in some cases), but which a division operator can.
859+
#
860+
# See: http://www-archive.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
861+
NOT_REGEX = INDEXABLE.concat ['++', '--']
861862
862863
# Tokens that, when immediately preceding a `WHEN`, indicate that the `WHEN`
863864
# occurs at the start of a line. We disambiguate these from trailing whens to

test/error_messages.coffee

+26
Original file line numberDiff line numberDiff line change
@@ -405,3 +405,29 @@ test "missing `)`, `}`, `]`", ->
405405
foo#{ bar "#{1}"
406406
^
407407
'''
408+
409+
test "unclosed regexes", ->
410+
assertErrorFormat '''
411+
/
412+
''', '''
413+
[stdin]:1:1: error: missing / (unclosed regex)
414+
/
415+
^
416+
'''
417+
assertErrorFormat '''
418+
# Note the double escaping; this would be `/a\/` real code.
419+
/a\\/
420+
''', '''
421+
[stdin]:2:1: error: missing / (unclosed regex)
422+
/a\\/
423+
^
424+
'''
425+
assertErrorFormat '''
426+
/// ^
427+
a #{""" ""#{if /[/].test "|" then 1 else 0}"" """}
428+
///
429+
''', '''
430+
[stdin]:2:18: error: missing / (unclosed regex)
431+
a #{""" ""#{if /[/].test "|" then 1 else 0}"" """}
432+
^
433+
'''

test/regexps.coffee

+191-6
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,34 @@ test "basic regular expression literals", ->
1313
ok 'a'.match /a/g
1414

1515
test "division is not confused for a regular expression", ->
16+
# Any spacing around the slash is allowed when it cannot be a regex.
1617
eq 2, 4 / 2 / 1
17-
18-
a = 4
18+
eq 2, 4/2/1
19+
eq 2, 4/ 2 / 1
20+
eq 2, 4 /2 / 1
21+
eq 2, 4 / 2/ 1
22+
eq 2, 4 / 2 /1
23+
eq 2, 4 /2/ 1
24+
25+
a = (regex) -> regex.test 'a b c'
26+
a.valueOf = -> 4
1927
b = 2
2028
g = 1
21-
eq 2, a / b/g
2229

23-
a = 10
24-
b = a /= 4 / 2
25-
eq a, 5
30+
eq 2, a / b/g
31+
eq 2, a/ b/g
32+
eq 2, a / b/ g
33+
eq 2, a / b/g # Tabs.
34+
eq 2, a / b/g # Non-breaking spaces.
35+
eq true, a /b/g
36+
# Use parentheses to disambiguate.
37+
eq true, a(/ b/g)
38+
eq true, a(/ b/)
39+
eq true, a (/ b/)
40+
# Escape to disambiguate.
41+
eq true, a /\ b/g
42+
eq false, a /\ b/g
43+
eq true, a /\ b/
2644

2745
obj = method: -> 2
2846
two = 2
@@ -32,6 +50,173 @@ test "division is not confused for a regular expression", ->
3250
eq 2, (4)/2/i
3351
eq 1, i/i/i
3452

53+
a = ''
54+
a += ' ' until / /.test a
55+
eq a, ' '
56+
57+
a = if /=/.test '=' then yes else no
58+
eq a, yes
59+
60+
a = if !/=/.test '=' then yes else no
61+
eq a, no
62+
63+
#3182:
64+
match = 'foo=bar'.match /=/
65+
eq match[0], '='
66+
67+
#3410:
68+
ok ' '.match(/ /)[0] is ' '
69+
70+
71+
test "division vs regex after a callable token", ->
72+
b = 2
73+
g = 1
74+
r = (r) -> r.test 'b'
75+
76+
a = 4
77+
eq 2, a / b/g
78+
eq 2, a/b/g
79+
eq 2, a/ b/g
80+
eq true, r /b/g
81+
eq 2, (1 + 3) / b/g
82+
eq 2, (1 + 3)/b/g
83+
eq 2, (1 + 3)/ b/g
84+
eq true, (r) /b/g
85+
eq 2, [4][0] / b/g
86+
eq 2, [4][0]/b/g
87+
eq 2, [4][0]/ b/g
88+
eq true, [r][0] /b/g
89+
eq 0.5, 4? / b/g
90+
eq 0.5, 4?/b/g
91+
eq 0.5, 4?/ b/g
92+
eq true, r? /b/g
93+
(->
94+
eq 2, @ / b/g
95+
eq 2, @/b/g
96+
eq 2, @/ b/g
97+
).call 4
98+
(->
99+
eq true, @ /b/g
100+
).call r
101+
(->
102+
eq 2, this / b/g
103+
eq 2, this/b/g
104+
eq 2, this/ b/g
105+
).call 4
106+
(->
107+
eq true, this /b/g
108+
).call r
109+
class A
110+
p: (regex) -> if regex then r regex else 4
111+
class B extends A
112+
p: ->
113+
eq 2, super / b/g
114+
eq 2, super/b/g
115+
eq 2, super/ b/g
116+
eq true, super /b/g
117+
new B().p()
118+
119+
test "always division and never regex after some tokens", ->
120+
b = 2
121+
g = 1
122+
123+
eq 2, 4 / b/g
124+
eq 2, 4/b/g
125+
eq 2, 4/ b/g
126+
eq 2, 4 /b/g
127+
eq 2, "4" / b/g
128+
eq 2, "4"/b/g
129+
eq 2, "4"/ b/g
130+
eq 2, "4" /b/g
131+
ok isNaN /a/ / b/g
132+
ok isNaN /a/i / b/g
133+
ok isNaN /a//b/g
134+
ok isNaN /a/i/b/g
135+
ok isNaN /a// b/g
136+
ok isNaN /a/i/ b/g
137+
ok isNaN /a/ /b/g
138+
ok isNaN /a/i /b/g
139+
eq 0.5, true / b/g
140+
eq 0.5, true/b/g
141+
eq 0.5, true/ b/g
142+
eq 0.5, true /b/g
143+
eq 0, false / b/g
144+
eq 0, false/b/g
145+
eq 0, false/ b/g
146+
eq 0, false /b/g
147+
eq 0, null / b/g
148+
eq 0, null/b/g
149+
eq 0, null/ b/g
150+
eq 0, null /b/g
151+
ok isNaN undefined / b/g
152+
ok isNaN undefined/b/g
153+
ok isNaN undefined/ b/g
154+
ok isNaN undefined /b/g
155+
ok isNaN {a: 4} / b/g
156+
ok isNaN {a: 4}/b/g
157+
ok isNaN {a: 4}/ b/g
158+
ok isNaN {a: 4} /b/g
159+
o = prototype: 4
160+
eq 2, o:: / b/g
161+
eq 2, o::/b/g
162+
eq 2, o::/ b/g
163+
eq 2, o:: /b/g
164+
i = 4
165+
eq 2.0, i++ / b/g
166+
eq 2.5, i++/b/g
167+
eq 3.0, i++/ b/g
168+
eq 3.5, i++ /b/g
169+
eq 4.0, i-- / b/g
170+
eq 3.5, i--/b/g
171+
eq 3.0, i--/ b/g
172+
eq 2.5, i-- /b/g
173+
174+
test "compound division vs regex", ->
175+
c = 4
176+
i = 2
177+
178+
a = 10
179+
b = a /= c / i
180+
eq a, 5
181+
182+
a = 10
183+
b = a /= c /i
184+
eq a, 5
185+
186+
a = 10
187+
b = a /= c /i # Tabs.
188+
eq a, 5
189+
190+
a = 10
191+
b =/= c /i # Non-breaking spaces.
192+
eq a, 5
193+
194+
a = 10
195+
b = a/= c /i
196+
eq a, 5
197+
198+
a = 10
199+
b = a/=c/i
200+
eq a, 5
201+
202+
a = (regex) -> regex.test '=C '
203+
b = a /=c /i
204+
eq b, true
205+
206+
a = (regex) -> regex.test '= C '
207+
# Use parentheses to disambiguate.
208+
b = a(/= c /i)
209+
eq b, true
210+
b = a(/= c /)
211+
eq b, false
212+
b = a (/= c /)
213+
eq b, false
214+
# Escape to disambiguate.
215+
b = a /\= c /i
216+
eq b, true
217+
b = a /\= c /
218+
eq b, false
219+
35220
test "#764: regular expressions should be indexable", ->
36221
eq /0/['source'], ///#{0}///['source']
37222

0 commit comments

Comments
 (0)