Skip to content

Commit 6ecaf12

Browse files
committed
Fix unhandled cases for exotic idents
Duplicates rescript-lang#6658, but with much smaller changes As follows @cristianoc review, I tried to reduce possible affected surface to only exotic uident.
1 parent 0b0b5c2 commit 6ecaf12

12 files changed

+164
-106
lines changed

jscomp/ext/ext_ident.ml

+21
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,27 @@ let [@inline] no_escape (c : char) =
132132
| '0' .. '9' | '_' | '$' -> true
133133
| _ -> false
134134

135+
let is_uident name =
136+
let len = String.length name in
137+
if len > 0 then
138+
match name.[0] with
139+
| 'A' .. 'Z' -> true
140+
| _ -> false
141+
else false
142+
143+
let is_exotic name =
144+
let len = String.length name in
145+
len >= 3
146+
&& name.[0] = '\\'
147+
&& name.[1] = '\"'
148+
&& name.[len - 1] = '\"'
149+
150+
let unwrap_exotic name =
151+
if is_exotic name then
152+
let len = String.length name in
153+
String.sub name 2 (len - 3)
154+
else name
155+
135156
exception Not_normal_letter of int
136157
let name_mangle name =
137158
let len = String.length name in

jscomp/ext/ext_ident.mli

+4
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,11 @@ val create_tmp : ?name:string -> unit -> Ident.t
4848

4949
val make_unused : unit -> Ident.t
5050

51+
val is_uident : string -> bool
5152

53+
val is_exotic : string -> bool
54+
55+
val unwrap_exotic : string -> string
5256

5357
(**
5458
Invariant: if name is not converted, the reference should be equal

jscomp/syntax/src/res_ast_debugger.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ module SexpAst = struct
5454
| [] -> [Sexp.list []]
5555
| items -> List.map f items
5656

57-
let string txt = Sexp.atom ("\"" ^ txt ^ "\"")
57+
let string txt = Sexp.atom ("\"" ^ Ext_ident.unwrap_exotic txt ^ "\"")
5858

5959
let char c = Sexp.atom ("'" ^ Char.escaped c ^ "'")
6060

jscomp/syntax/src/res_outcome_printer.ml

+11-71
Original file line numberDiff line numberDiff line change
@@ -8,70 +8,7 @@
88
* In general it represent messages to show results or errors to the user. *)
99

1010
module Doc = Res_doc
11-
module Token = Res_token
12-
13-
let rec unsafe_for_all_range s ~start ~finish p =
14-
start > finish
15-
|| p (String.unsafe_get s start)
16-
&& unsafe_for_all_range s ~start:(start + 1) ~finish p
17-
18-
let for_all_from s start p =
19-
let len = String.length s in
20-
unsafe_for_all_range s ~start ~finish:(len - 1) p
21-
22-
(* See https://github.com/rescript-lang/rescript-compiler/blob/726cfa534314b586e5b5734471bc2023ad99ebd9/jscomp/ext/ext_string.ml#L510 *)
23-
let isValidNumericPolyvarNumber (x : string) =
24-
let len = String.length x in
25-
len > 0
26-
&&
27-
let a = Char.code (String.unsafe_get x 0) in
28-
a <= 57
29-
&&
30-
if len > 1 then
31-
a > 48
32-
&& for_all_from x 1 (function
33-
| '0' .. '9' -> true
34-
| _ -> false)
35-
else a >= 48
36-
37-
type identifierStyle = ExoticIdent | NormalIdent
38-
39-
let classifyIdentContent ~allowUident txt =
40-
let len = String.length txt in
41-
let rec go i =
42-
if i == len then NormalIdent
43-
else
44-
let c = String.unsafe_get txt i in
45-
if
46-
i == 0
47-
&& not
48-
((allowUident && c >= 'A' && c <= 'Z')
49-
|| (c >= 'a' && c <= 'z')
50-
|| c = '_')
51-
then ExoticIdent
52-
else if
53-
not
54-
((c >= 'a' && c <= 'z')
55-
|| (c >= 'A' && c <= 'Z')
56-
|| c = '\'' || c = '_'
57-
|| (c >= '0' && c <= '9'))
58-
then ExoticIdent
59-
else go (i + 1)
60-
in
61-
if Token.isKeywordTxt txt then ExoticIdent else go 0
62-
63-
let printIdentLike ~allowUident txt =
64-
match classifyIdentContent ~allowUident txt with
65-
| ExoticIdent -> Doc.concat [Doc.text "\\\""; Doc.text txt; Doc.text "\""]
66-
| NormalIdent -> Doc.text txt
67-
68-
let printPolyVarIdent txt =
69-
(* numeric poly-vars don't need quotes: #644 *)
70-
if isValidNumericPolyvarNumber txt then Doc.text txt
71-
else
72-
match classifyIdentContent ~allowUident:true txt with
73-
| ExoticIdent -> Doc.concat [Doc.text "\""; Doc.text txt; Doc.text "\""]
74-
| NormalIdent -> Doc.text txt
11+
module Printer = Res_printer
7512

7613
(* ReScript doesn't have parenthesized identifiers.
7714
* We don't support custom operators. *)
@@ -119,7 +56,7 @@ let escapeStringContents s =
11956

12057
let rec printOutIdentDoc ?(allowUident = true) (ident : Outcometree.out_ident) =
12158
match ident with
122-
| Oide_ident s -> printIdentLike ~allowUident s
59+
| Oide_ident s -> Printer.printIdentLike ~allowUident s
12360
| Oide_dot (ident, s) ->
12461
Doc.concat [printOutIdentDoc ident; Doc.dot; Doc.text s]
12562
| Oide_apply (call, arg) ->
@@ -189,7 +126,8 @@ let rec printOutTypeDoc (outType : Outcometree.out_type) =
189126
Doc.space;
190127
Doc.join ~sep:Doc.space
191128
(List.map
192-
(fun lbl -> printIdentLike ~allowUident:true lbl)
129+
(fun lbl ->
130+
Printer.printIdentLike ~allowUident:true lbl)
193131
tags);
194132
]));
195133
Doc.softLine;
@@ -400,7 +338,7 @@ and printOutVariant variant =
400338
(Doc.concat
401339
[
402340
Doc.text "#";
403-
printPolyVarIdent name;
341+
Printer.printPolyVarIdent name;
404342
(match types with
405343
| [] -> Doc.nil
406344
| types ->
@@ -530,7 +468,7 @@ and printRecordDeclRowDoc (name, mut, opt, arg) =
530468
(Doc.concat
531469
[
532470
(if mut then Doc.text "mutable " else Doc.nil);
533-
printIdentLike ~allowUident:false name;
471+
Printer.printIdentLike ~allowUident:false name;
534472
(if opt then Doc.text "?" else Doc.nil);
535473
Doc.text ": ";
536474
printOutTypeDoc arg;
@@ -733,7 +671,9 @@ let rec printOutSigItemDoc ?(printNameAsIs = false)
733671
attrs;
734672
kw;
735673
(if printNameAsIs then Doc.text outTypeDecl.otype_name
736-
else printIdentLike ~allowUident:false outTypeDecl.otype_name);
674+
else
675+
Printer.printIdentLike ~allowUident:false
676+
outTypeDecl.otype_name);
737677
typeParams;
738678
kind;
739679
]);
@@ -865,7 +805,7 @@ and printOutExtensionConstructorDoc
865805
(Doc.concat
866806
[
867807
Doc.text "type ";
868-
printIdentLike ~allowUident:false outExt.oext_type_name;
808+
Printer.printIdentLike ~allowUident:false outExt.oext_type_name;
869809
typeParams;
870810
Doc.text " += ";
871811
Doc.line;
@@ -904,7 +844,7 @@ and printOutTypeExtensionDoc (typeExtension : Outcometree.out_type_extension) =
904844
(Doc.concat
905845
[
906846
Doc.text "type ";
907-
printIdentLike ~allowUident:false typeExtension.otyext_name;
847+
Printer.printIdentLike ~allowUident:false typeExtension.otyext_name;
908848
typeParams;
909849
Doc.text " += ";
910850
(if typeExtension.otyext_private = Asttypes.Private then

jscomp/syntax/src/res_printer.ml

+2
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,7 @@ let classifyIdentContent ?(allowUident = false) ?(allowHyphen = false) txt =
399399
loop 0
400400

401401
let printIdentLike ?allowUident ?allowHyphen txt =
402+
let txt = Ext_ident.unwrap_exotic txt in
402403
match classifyIdentContent ?allowUident ?allowHyphen txt with
403404
| ExoticIdent -> Doc.concat [Doc.text "\\\""; Doc.text txt; Doc.text "\""]
404405
| NormalIdent -> Doc.text txt
@@ -432,6 +433,7 @@ let printPolyVarIdent txt =
432433
(* numeric poly-vars don't need quotes: #644 *)
433434
if isValidNumericPolyvarNumber txt then Doc.text txt
434435
else
436+
let txt = Ext_ident.unwrap_exotic txt in
435437
match classifyIdentContent ~allowUident:true txt with
436438
| ExoticIdent -> Doc.concat [Doc.text "\""; Doc.text txt; Doc.text "\""]
437439
| NormalIdent -> (

jscomp/syntax/src/res_printer.mli

+5
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,9 @@ val printImplementation :
2222
val printInterface :
2323
width:int -> Parsetree.signature -> comments:Res_comment.t list -> string
2424

25+
val printIdentLike :
26+
?allowUident:bool -> ?allowHyphen:bool -> string -> Res_doc.t
27+
28+
val printPolyVarIdent : string -> Res_doc.t
29+
2530
val polyVarIdentToString : string -> string [@@live]

jscomp/syntax/src/res_scanner.ml

+19-10
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,10 @@ let scanNumber scanner =
283283
else Token.Int {i = literal; suffix}
284284

285285
let scanExoticIdentifier scanner =
286-
(* TODO: are we disregarding the current char...? Should be a quote *)
287-
next scanner;
288-
let buffer = Buffer.create 20 in
289286
let startPos = position scanner in
287+
let startOff = scanner.offset in
288+
289+
next2 scanner;
290290

291291
let rec scan () =
292292
match scanner.ch with
@@ -301,14 +301,25 @@ let scanExoticIdentifier scanner =
301301
let endPos = position scanner in
302302
scanner.err ~startPos ~endPos
303303
(Diagnostics.message "Did you forget a \" here?")
304-
| ch ->
305-
Buffer.add_char buffer ch;
304+
| _ ->
306305
next scanner;
307306
scan ()
308307
in
309308
scan ();
310-
(* TODO: do we really need to create a new buffer instead of substring once? *)
311-
Token.Lident (Buffer.contents buffer)
309+
310+
let ident =
311+
(String.sub [@doesNotRaise]) scanner.src startOff (scanner.offset - startOff)
312+
in
313+
let name = Ext_ident.unwrap_exotic ident in
314+
let _ =
315+
if name = String.empty then
316+
let endPos = position scanner in
317+
scanner.err ~startPos ~endPos
318+
(Diagnostics.message "A quoted identifier can't be empty string.")
319+
in
320+
if Ext_ident.is_uident name then Token.Lident ident
321+
(* Exotic ident with uppercase letter should be encoded to avoid confusing in OCaml parsetree *)
322+
else Token.Lident name
312323

313324
let scanStringEscapeSequence ~startPos scanner =
314325
let scan ~n ~base ~max =
@@ -746,9 +757,7 @@ let rec scan scanner =
746757
| _ ->
747758
next scanner;
748759
Token.Colon)
749-
| '\\' ->
750-
next scanner;
751-
scanExoticIdentifier scanner
760+
| '\\' -> scanExoticIdentifier scanner
752761
| '/' -> (
753762
match peek scanner with
754763
| '/' ->
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
type \""
2+
3+
type \"" = int
4+
5+
let \""
6+
17
let \"a
28
b
39
c" = 1
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,97 @@
11

22
Syntax error!
3-
tests/parsing/errors/scanner/exoticIdent.res:1:7
3+
tests/parsing/errors/scanner/exoticIdent.res:1:6-8
44

5-
1 │ let \"a
6-
2 │ b
7-
3 │ c" = 1
5+
1 │ type \""
6+
2 │
7+
3 │ type \"" = int
88

9-
A quoted identifier can't contain line breaks.
9+
A quoted identifier can't be empty string.
1010

1111

1212
Syntax error!
13-
tests/parsing/errors/scanner/exoticIdent.res:2:1
13+
tests/parsing/errors/scanner/exoticIdent.res:3:6-8
1414

15-
1 │ let \"a
16-
2 │ b
17-
3 │ c" = 1
15+
1 │ type \""
16+
2 │
17+
3 │ type \"" = int
1818
4 │
19+
5 │ let \""
1920

20-
Did you forget a `=` here?
21+
A quoted identifier can't be empty string.
2122

2223

2324
Syntax error!
24-
tests/parsing/errors/scanner/exoticIdent.res:3:2-4:0
25+
tests/parsing/errors/scanner/exoticIdent.res:5:5-7
2526

26-
1 │ let \"a
27-
2 │ b
28-
3 │ c" = 1
27+
3 │ type \"" = int
2928
4 │
29+
5 │ let \""
30+
6 │
31+
7 │ let \"a
3032

31-
This string is missing a double quote at the end
33+
A quoted identifier can't be empty string.
3234

3335

3436
Syntax error!
35-
tests/parsing/errors/scanner/exoticIdent.res:3:2-4:0
37+
tests/parsing/errors/scanner/exoticIdent.res:5:8-7:3
3638

37-
1 │ let \"a
38-
2 │ b
39-
3 │ c" = 1
39+
3 │ type \"" = int
4040
4 │
41+
5 │ let \""
42+
6 │
43+
7 │ let \"a
44+
8 │ b
45+
9 │ c" = 1
46+
47+
Did you forget a `=` here?
48+
49+
50+
Syntax error!
51+
tests/parsing/errors/scanner/exoticIdent.res:7:5-7
52+
53+
5 │ let \""
54+
6 │
55+
7 │ let \"a
56+
8 │ b
57+
9 │ c" = 1
58+
59+
A quoted identifier can't contain line breaks.
60+
61+
62+
Syntax error!
63+
tests/parsing/errors/scanner/exoticIdent.res:8:1
64+
65+
6 │
66+
7 │ let \"a
67+
8 │ b
68+
9 │ c" = 1
69+
10 │
70+
71+
Did you forget a `=` here?
72+
73+
74+
Syntax error!
75+
tests/parsing/errors/scanner/exoticIdent.res:9:2-10:0
76+
77+
7 │ let \"a
78+
8 │ b
79+
9 │ c" = 1
80+
10 │
81+
82+
This string is missing a double quote at the end
83+
84+
85+
Syntax error!
86+
tests/parsing/errors/scanner/exoticIdent.res:9:2-10:0
87+
88+
7 │ let \"a
89+
8 │ b
90+
9 │ c" = 1
91+
10 │
4192

4293
consecutive statements on a line must be separated by ';' or a newline
4394

44-
let a = b
45-
;;c
46-
;;{js| = 1
47-
|js}
95+
type nonrec
96+
type nonrec = int
97+
let Fatal error: exception Invalid_argument("index out of bounds")

0 commit comments

Comments
 (0)