Skip to content

Unboxed float type parsing in layouts_alpha #1467

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jun 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14,249 changes: 7,814 additions & 6,435 deletions ocaml/boot/menhir/parser.ml

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions ocaml/boot/menhir/parser.mli
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ type token =
| INCLUDE
| IN
| IF
| HASH_SUFFIX
| HASHOP of (string)
| HASH
| GREATERRBRACKET
Expand Down
Binary file modified ocaml/boot/ocamlc
Binary file not shown.
Binary file modified ocaml/boot/ocamllex
Binary file not shown.
43 changes: 34 additions & 9 deletions ocaml/parsing/jane_syntax_parsing.ml
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,10 @@ module Error = struct
| Malformed_embedding of
Embedding_syntax.t * Embedded_name.t * malformed_embedding
| Unknown_extension of Embedding_syntax.t * Erasability.t * string
| Disabled_extension : _ Language_extension.t -> error
| Disabled_extension :
{ ext : _ Language_extension.t
; maturity : Language_extension.maturity option
} -> error
| Wrong_syntactic_category of Feature.t * string
| Misnamed_embedding of
Misnamed_embedding_error.t * string * Embedding_syntax.t
Expand All @@ -347,9 +350,16 @@ end

open Error

let assert_extension_enabled ~loc ext setting =
let assert_extension_enabled
(type a) ~loc (ext : a Language_extension.t) (setting : a)
=
if not (Language_extension.is_at_least ext setting) then
raise (Error(loc, Disabled_extension ext))
let maturity : Language_extension.maturity option =
match ext with
| Layouts -> Some (setting : Language_extension.maturity)
| _ -> None
in
raise (Error(loc, Disabled_extension { ext; maturity }))
;;

let report_error ~loc = function
Expand All @@ -371,11 +381,25 @@ let report_error ~loc = function
name
Embedded_name.pp_a_term (what, embedded_name)
(Embedding_syntax.name what)
| Disabled_extension ext ->
Location.errorf
~loc
"The extension \"%s\" is disabled and cannot be used"
(Language_extension.to_string ext)
| Disabled_extension { ext; maturity } -> begin
(* CR layouts: The [maturity] special case is a bit ad-hoc, but the
layouts error message would be much worse without it. It also
would be nice to mention the language construct in the error message.
*)
match maturity with
| None ->
Location.errorf
~loc
"The extension \"%s\" is disabled and cannot be used"
(Language_extension.to_string ext)
| Some maturity ->
Location.errorf
~loc
"This construct requires the %s version of the extension \"%s\", \
which is disabled and cannot be used"
(Language_extension.maturity_to_string maturity)
(Language_extension.to_string ext)
end
| Wrong_syntactic_category(feat, cat) ->
Location.errorf
~loc
Expand Down Expand Up @@ -837,7 +861,8 @@ module AST = struct
raise_error (Wrong_syntactic_category(feat, AST.plural))
end
| Error err -> raise_error begin match err with
| Disabled_extension ext -> Disabled_extension ext
| Disabled_extension ext ->
Disabled_extension { ext; maturity = None }
| Unknown_extension name ->
Unknown_extension (AST.embedding_syntax, erasability, name)
end
Expand Down
129 changes: 129 additions & 0 deletions ocaml/parsing/lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,103 @@ let is_in_string = ref false
let in_string () = !is_in_string
let print_warnings = ref true

type deferred_token =
{ token : token
; start_pos : Lexing.position
; end_pos : Lexing.position
}

(* This queue will only ever have 0 or 1 elements in it. We use it
instead of an [option ref] for its convenient interface.
*)
let deferred_tokens : deferred_token Queue.t = Queue.create ()

(* Effectively splits the text in the lexer's current "window" (defined below)
into two halves. The current call to the lexer will return the first half of
the text in the window, and the next call to the lexer will return the second
half (of length [len]) of the text in the window.

"window" refers to the text matched by a production of the lexer. It spans
from [lexer.lex_start_p] to [lexer.lex_curr_p].

The function accomplishes this splitting by doing two things:
- It sets the current window of the lexbuf to only account for the
first half of the text. (The first half is of length: |text|-len.)
- It enqueues a token into [deferred_tokens] such that, the next time the
lexer is called, it will return the specified [token] *and* set the window
of the lexbuf to account for the second half of the text. (The second half
is of length: |text|.)

This business with setting the window of the lexbuf is only so that error
messages point at the right place in the program text.
*)
let enqueue_token_from_end_of_lexbuf_window (lexbuf : Lexing.lexbuf) token ~len =
let suffix_end = lexbuf.lex_curr_p in
let suffix_start =
{ suffix_end with pos_cnum = suffix_end.pos_cnum - len }
in
lexbuf.lex_curr_p <- suffix_start;
Queue.add
{ token; start_pos = suffix_start; end_pos = suffix_end }
deferred_tokens

(* Note [Lexing hack for float#]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This note describes a non-backward-compatible Jane Street--internal change to
the lexer.

We want the lexer to lex [float#] differently than [float #]. [float#] is the
new syntax for the unboxed float type. It veers close to the syntax for the
type of all objects belonging to a class [c], which is [#c]. The way we
navigate this veering is by producing the following tokens for these source
program examples, where LIDENT(s) is an LIDENT with text [s].

float#c ==> LIDENT(float) HASH_SUFFIX LIDENT(c)
float# c ==> LIDENT(float) HASH_SUFFIX LIDENT(c)
float # c ==> LIDENT(float) HASH LIDENT(c)
float #c ==> LIDENT(float) HASH LIDENT(c)

(A) The parser interprets [LIDENT(float) HASH_SUFFIX LIDENT(c)] as
"the type constructor [c] applied to the unboxed float type."
(B) The parser interprets [LIDENT(float) HASH LIDENT(c)] as
"the type constructor [#c] applied to the usual boxed float type."

This is not a backward-compatible change. In upstream ocaml, the lexer
produces [LIDENT(float) HASH LIDENT(c)] for all the above source programs.

But, this isn't problematic: everybody puts a space before '#c' to mean (B).
No existing code writes things like [float#c] or indeed [float# c].

We accomplish this hack by setting some global mutable state upon seeing
an identifier immediately followed by a hash. When that state is set, we
will produce [HASH_SUFFIX] the next time the lexer is called. This is
done in [enqueue_hash_suffix_from_end_of_lexbuf_window].

Note [Lexing hack for hash operators]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To complicate the above story, we don't want to treat the # in the
below program as HASH_SUFFIX:

x#~#y

We instead want:

x#~#y ==> LIDENT(x) HASHOP(#~#) LIDENT(y)

This is to allow for infix hash operators. We add an additional hack, in
the style of Note [Lexing hack for float#], where the lexer consumes [x#~#]
all at once, but produces LIDENT(x) from the current call to the lexer and
HASHOP(#~#) from the next call to the lexer. This is done in
[enqueue_hashop_from_end_of_lexbuf_window].
*)

let enqueue_hash_suffix_from_end_of_lexbuf_window lexbuf =
enqueue_token_from_end_of_lexbuf_window lexbuf HASH_SUFFIX ~len:1

let enqueue_hashop_from_end_of_lexbuf_window lexbuf ~hashop =
enqueue_token_from_end_of_lexbuf_window lexbuf (HASHOP hashop)
~len:(String.length hashop)

(* Escaped chars are interpreted in strings unless they are in comments. *)
let store_escaped_char lexbuf c =
if in_comment () then store_lexeme lexbuf else store_string_char c
Expand Down Expand Up @@ -419,8 +516,33 @@ rule token = parse
| "?" (lowercase_latin1 identchar_latin1 * as name) ':'
{ warn_latin1 lexbuf;
OPTLABEL name }
(* Lowercase identifiers are split into 3 cases, and the order matters
(longest to shortest).
*)
| (lowercase identchar * as name) ('#' symbolchar_or_hash+ as hashop)
(* See Note [Lexing hack for hash operators] *)
{ enqueue_hashop_from_end_of_lexbuf_window lexbuf ~hashop;
lookup_keyword name }
| (lowercase identchar * as name) '#'
(* See Note [Lexing hack for float#] *)
{ enqueue_hash_suffix_from_end_of_lexbuf_window lexbuf;
lookup_keyword name }
| lowercase identchar * as name
{ lookup_keyword name }
(* Lowercase latin1 identifiers are split into 3 cases, and the order matters
(longest to shortest).
*)
| (lowercase_latin1 identchar_latin1 * as name)
('#' symbolchar_or_hash+ as hashop)
(* See Note [Lexing hack for hash operators] *)
{ warn_latin1 lexbuf;
enqueue_hashop_from_end_of_lexbuf_window lexbuf ~hashop;
LIDENT name }
| (lowercase_latin1 identchar_latin1 * as name) '#'
(* See Note [Lexing hack for float#] *)
{ warn_latin1 lexbuf;
enqueue_hash_suffix_from_end_of_lexbuf_window lexbuf;
LIDENT name }
| lowercase_latin1 identchar_latin1 * as name
{ warn_latin1 lexbuf; LIDENT name }
| uppercase identchar * as name
Expand Down Expand Up @@ -775,6 +897,13 @@ and skip_hash_bang = parse
| "" { () }

{
let token lexbuf =
match Queue.take_opt deferred_tokens with
| None -> token lexbuf
| Some { token; start_pos; end_pos } ->
lexbuf.lex_start_p <- start_pos;
lexbuf.lex_curr_p <- end_pos;
token

let token_with_comments lexbuf =
match !preprocessor with
Expand Down
49 changes: 38 additions & 11 deletions ocaml/parsing/parser.mly
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,16 @@ let unboxed_float sloc sign (f, m) =
assert_unboxed_literals ~loc:(make_loc sloc);
Pconst_float (with_sign sign f, m)

(* Unboxed float type *)

let assert_unboxed_float_type ~loc =
Language_extension.(
Jane_syntax_parsing.assert_extension_enabled ~loc Layouts Alpha)

let unboxed_float_type sloc tys =
assert_unboxed_float_type ~loc:(make_loc sloc);
Ptyp_constr (mkloc (Lident "float#") (make_loc sloc), tys)

(* Jane syntax *)

let mkexp_jane_syntax
Expand Down Expand Up @@ -946,6 +956,7 @@ let mkpat_jane_syntax
%token SEMI ";"
%token SEMISEMI ";;"
%token HASH "#"
%token HASH_SUFFIX "# "
%token <string> HASHOP "##" (* just an example *)
%token SIG "sig"
%token STAR "*"
Expand Down Expand Up @@ -1030,7 +1041,7 @@ The precedences must be listed from low to high.
%nonassoc prec_constant_constructor /* cf. simple_expr (C versus C x) */
%nonassoc prec_constr_appl /* above AS BAR COLONCOLON COMMA */
%nonassoc below_HASH
%nonassoc HASH /* simple_expr/toplevel_directive */
%nonassoc HASH HASH_SUFFIX /* simple_expr/toplevel_directive */
%left HASHOP
%nonassoc below_DOT
%nonassoc DOT DOTOP
Expand Down Expand Up @@ -2743,6 +2754,11 @@ comprehension_clause:
{ $1 }
;

%inline hash:
| HASH { () }
| HASH_SUFFIX { () }
;

%inline simple_expr_:
| mkrhs(val_longident)
{ Pexp_ident ($1) }
Expand Down Expand Up @@ -2771,7 +2787,7 @@ comprehension_clause:
Pexp_open(od, mkexp ~loc:$sloc (Pexp_override $4)) }
| mod_longident DOT LBRACELESS object_expr_content error
{ unclosed "{<" $loc($3) ">}" $loc($5) }
| simple_expr HASH mkrhs(label)
| simple_expr hash mkrhs(label)
{ Pexp_send($1, $3) }
| simple_expr op(HASHOP) simple_expr
{ mkinfix $1 $2 $3 }
Expand Down Expand Up @@ -3170,7 +3186,7 @@ simple_pattern_not_ident:
{ Ppat_construct($1, None) }
| name_tag
{ Ppat_variant($1, None) }
| HASH mkrhs(type_longident)
| hash mkrhs(type_longident)
{ Ppat_type ($2) }
| mkrhs(mod_longident) DOT simple_delimited_pattern
{ Ppat_open($1, $3) }
Expand Down Expand Up @@ -3836,7 +3852,18 @@ atomic_type:
{ Ptyp_any }
| tys = actual_type_parameters
tid = mkrhs(type_longident)
{ Ptyp_constr(tid, tys) }
HASH_SUFFIX
{ match tid.txt with
| Lident "float" ->
let ident_start = fst $loc(tid) in
let hash_end = snd $loc($3) in
unboxed_float_type (ident_start, hash_end) tys
| _ ->
not_expecting $sloc "Unboxed type other than float#"
}
| tys = actual_type_parameters
tid = mkrhs(type_longident)
{ Ptyp_constr(tid, tys) } %prec below_HASH
| LESS meth_list GREATER
{ let (f, c) = $2 in Ptyp_object (f, c) }
| LESS GREATER
Expand Down Expand Up @@ -3972,19 +3999,19 @@ constant:
| FLOAT { let (f, m) = $1 in Pconst_float (f, m) }
(* The unboxed literals have to be composed of multiple lexemes so we can
handle line number directives properly *)
| HASH INT { unboxed_int $sloc $loc($2) Positive $2 }
| HASH FLOAT { unboxed_float $sloc Positive $2 }
| hash INT { unboxed_int $sloc $loc($2) Positive $2 }
| hash FLOAT { unboxed_float $sloc Positive $2 }
;
signed_constant:
constant { $1 }
| MINUS INT { let (n, m) = $2 in Pconst_integer("-" ^ n, m) }
| MINUS FLOAT { let (f, m) = $2 in Pconst_float("-" ^ f, m) }
| MINUS HASH INT { unboxed_int $sloc $loc($3) Negative $3 }
| MINUS HASH FLOAT { unboxed_float $sloc Negative $3 }
| MINUS hash INT { unboxed_int $sloc $loc($3) Negative $3 }
| MINUS hash FLOAT { unboxed_float $sloc Negative $3 }
| PLUS INT { let (n, m) = $2 in Pconst_integer (n, m) }
| PLUS FLOAT { let (f, m) = $2 in Pconst_float(f, m) }
| PLUS HASH INT { unboxed_int $sloc $loc($3) Positive $3 }
| PLUS HASH FLOAT { unboxed_float $sloc Negative $3 }
| PLUS hash INT { unboxed_int $sloc $loc($3) Positive $3 }
| PLUS hash FLOAT { unboxed_float $sloc Negative $3 }
;

/* Identifiers and long identifiers */
Expand Down Expand Up @@ -4112,7 +4139,7 @@ any_longident:
/* Toplevel directives */

toplevel_directive:
HASH dir = mkrhs(ident)
hash dir = mkrhs(ident)
arg = ioption(mk_directive_arg(toplevel_directive_argument))
{ mk_directive ~loc:$sloc dir arg }
;
Expand Down
2 changes: 1 addition & 1 deletion ocaml/parsing/pprintast.ml
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ and core_type1 ctxt f x =
(list core_field_type ~sep:";") l
field_var o (* Cf #7200 *)
| Ptyp_class (li, l) -> (*FIXME*)
pp f "@[<hov2>%a#%a@]"
pp f "@[<hov2>%a@;#%a@]"
(list (core_type ctxt) ~sep:"," ~first:"(" ~last:")") l
longident_loc li
| Ptyp_package (lid, cstrs) ->
Expand Down
12 changes: 12 additions & 0 deletions ocaml/testsuite/tests/typing-layouts/parsing.compilers.reference
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,16 @@ Line 2, characters 11-15:
2 | type ('a : valu) t0 = 'a list;;
^^^^
Error: Syntax error: layout expected.
Line 2, characters 9-15:
2 | type t = float#;;
^^^^^^
Error: This construct requires the alpha version of the extension "layouts", which is disabled and cannot be used
Line 2, characters 9-13:
2 | type t = int#;;
^^^^
Error: Syntax error: Unboxed type other than float# not expected.
Line 2, characters 9-17:
2 | type t = Float.t#;;
^^^^^^^^
Error: Syntax error: Unboxed type other than float# not expected.

6 changes: 6 additions & 0 deletions ocaml/testsuite/tests/typing-layouts/parsing.ml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,9 @@ type ('a : immediate) t0 = 'a list;;
type ('a : void) t0 = 'a list;;

type ('a : valu) t0 = 'a list;;

type t = float#;;

type t = int#;;

type t = Float.t#;;
Loading