Skip to content

inline fast path of caml_applyN #934

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jan 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 50 additions & 12 deletions backend/cmm_helpers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ let bind_nonvar name arg fn =
let id = V.create_local name in
Clet (VP.create id, arg, fn (Cvar id))

let bind_list name args fn =
let rec aux bound_args = function
| [] -> fn bound_args
| arg :: args ->
bind name arg (fun bound_arg -> aux (bound_arg :: bound_args) args)
in
aux [] (List.rev args)

let caml_black = Nativeint.shift_left (Nativeint.of_int 3) 8

let caml_local = Nativeint.shift_left (Nativeint.of_int 2) 8
Expand Down Expand Up @@ -2059,18 +2067,53 @@ let ptr_offset ptr offset dbg =
let direct_apply lbl args (pos, _mode) dbg =
Cop (Capply (typ_val, pos), Cconst_symbol (lbl, dbg) :: args, dbg)

let call_caml_apply ty mut clos args pos mode dbg =
let arity = List.length args in
let really_call_caml_apply clos args =
let cargs =
(Cconst_symbol (apply_function_sym arity mode, dbg) :: args) @ [clos]
in
Cop (Capply (ty, pos), cargs, dbg)
in
if !Flambda_backend_flags.caml_apply_inline_fast_path
then
(* Generate the following expression:
* (if (= clos.arity N)
* (app clos.direct a1 ... aN clos)
* (app caml_applyN a1 .. aN clos)
*)
(* CR-someday gyorsh: in the [else] case above, call another version of
caml_applyN that has only the cold path. *)
bind_list "arg" args (fun args ->
bind "fun" clos (fun clos ->
Cifthenelse
( Cop
( Ccmpi Ceq,
[ Cop
( Casr,
[ get_field_gen mut clos 1 dbg;
Cconst_int (pos_arity_in_closinfo, dbg) ],
dbg );
Cconst_int (arity, dbg) ],
dbg ),
dbg,
Cop
( Capply (ty, pos),
(get_field_gen mut clos 2 dbg :: args) @ [clos],
dbg ),
dbg,
really_call_caml_apply clos args,
dbg,
Vval Pgenval )))
else really_call_caml_apply clos args

let generic_apply mut clos args (pos, mode) dbg =
match args with
| [arg] ->
bind "fun" clos (fun clos ->
Cop
(Capply (typ_val, pos), [get_field_gen mut clos 0 dbg; arg; clos], dbg))
| _ ->
let arity = List.length args in
let cargs =
(Cconst_symbol (apply_function_sym arity mode, dbg) :: args) @ [clos]
in
Cop (Capply (typ_val, pos), cargs, dbg)
| _ -> call_caml_apply typ_val mut clos args pos mode dbg

let send kind met obj args akind dbg =
let call_met obj args clos =
Expand Down Expand Up @@ -4075,12 +4118,7 @@ let indirect_call ~dbg ty pos alloc_mode f args =
( Capply (ty, pos),
[load ~dbg Word_int Asttypes.Mutable ~addr:(Cvar v); arg; Cvar v],
dbg ))
| args ->
let arity = List.length args in
let l =
(Cconst_symbol (apply_function_sym arity alloc_mode, dbg) :: args) @ [f]
in
Cop (Capply (ty, pos), l, dbg)
| args -> call_caml_apply ty Asttypes.Mutable f args pos alloc_mode dbg

let indirect_full_call ~dbg ty pos alloc_mode f = function
(* the single-argument case is already optimized by indirect_call *)
Expand Down
11 changes: 11 additions & 0 deletions driver/flambda_backend_args.ml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ let mk_no_long_frames f =
let mk_debug_long_frames_threshold f =
"-debug-long-frames-threshold", Arg.Int f, "n debug only: set long frames threshold"

let mk_caml_apply_inline_fast_path f =
"-caml-apply-inline-fast-path", Arg.Unit f, " Inline the fast path of caml_applyN"

let mk_dump_inlining_paths f =
"-dump-inlining-paths", Arg.Unit f, " Dump inlining paths when dumping flambda2 terms"

Expand Down Expand Up @@ -461,6 +464,7 @@ module type Flambda_backend_options = sig
val no_long_frames : unit -> unit
val long_frames_threshold : int -> unit

val caml_apply_inline_fast_path : unit -> unit
val internal_assembler : unit -> unit

val flambda2_join_points : unit -> unit
Expand Down Expand Up @@ -539,6 +543,8 @@ struct
mk_no_long_frames F.no_long_frames;
mk_debug_long_frames_threshold F.long_frames_threshold;

mk_caml_apply_inline_fast_path F.caml_apply_inline_fast_path;

mk_internal_assembler F.internal_assembler;

mk_flambda2_join_points F.flambda2_join_points;
Expand Down Expand Up @@ -652,6 +658,9 @@ module Flambda_backend_options_impl = struct
let no_long_frames = clear' Flambda_backend_flags.allow_long_frames
let long_frames_threshold n = set_long_frames_threshold n

let caml_apply_inline_fast_path =
set' Flambda_backend_flags.caml_apply_inline_fast_path

let internal_assembler = set' Flambda_backend_flags.internal_assembler

let flambda2_join_points = set Flambda2.join_points
Expand Down Expand Up @@ -860,6 +869,8 @@ module Extra_params = struct
(Printf.sprintf "Expected integer between 0 and %d"
Flambda_backend_flags.max_long_frames_threshold))
end
| "caml-apply-inline-fast-path" ->
set' Flambda_backend_flags.caml_apply_inline_fast_path
| "dasm-comments" -> set' Flambda_backend_flags.dasm_comments
| "dno-asm-comments" -> clear' Flambda_backend_flags.dasm_comments
| "gupstream-dwarf" -> set' Debugging.restrict_to_upstream_dwarf
Expand Down
1 change: 1 addition & 0 deletions driver/flambda_backend_args.mli
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ module type Flambda_backend_options = sig
val no_long_frames : unit -> unit
val long_frames_threshold : int -> unit

val caml_apply_inline_fast_path : unit -> unit
val internal_assembler : unit -> unit

val flambda2_join_points : unit -> unit
Expand Down
2 changes: 2 additions & 0 deletions driver/flambda_backend_flags.ml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ let allow_long_frames = ref true (* -no-long-frames *)
let max_long_frames_threshold = 0x7FFF
let long_frames_threshold = ref max_long_frames_threshold (* -debug-long-frames-threshold n *)

let caml_apply_inline_fast_path = ref false (* -caml-apply-inline-fast-path *)

type function_result_types = Never | Functors_only | All_functions
type opt_level = Oclassic | O2 | O3
type 'a or_default = Set of 'a | Default
Expand Down
1 change: 1 addition & 0 deletions driver/flambda_backend_flags.mli
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ val disable_poll_insertion : bool ref
val allow_long_frames : bool ref
val max_long_frames_threshold : int
val long_frames_threshold : int ref
val caml_apply_inline_fast_path : bool ref

type function_result_types = Never | Functors_only | All_functions
type opt_level = Oclassic | O2 | O3
Expand Down
2 changes: 1 addition & 1 deletion ocaml/dune
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
;; CR gyorsh: it is not clear what the ":standard" flags are, and they
;; may change depending on the version of dune.
;; Consider hard-coded flags, such as -O3.
(:standard -alloc-check)))
(:standard -alloc-check -caml-apply-inline-fast-path)))
(boot
(flags
(:standard -warn-error +A))))
Expand Down