Skip to content

Commit bba0b9f

Browse files
authored
inline fast path of caml_applyN (#934)
* Add compilation flag -caml-apply-inline-fast-path * Use -caml-apply-inline-fast-path for main build and libraries * Inline fast path of caml_apply * Bind args * Refactor and apply the same transformation to Flamba 2. * Pass [ty] argument to [call_caml_apply] * Add a CR about a version of caml_applyN that has only cold path and update a comment.
1 parent b3d1a3d commit bba0b9f

File tree

6 files changed

+66
-13
lines changed

6 files changed

+66
-13
lines changed

backend/cmm_helpers.ml

Lines changed: 50 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,14 @@ let bind_nonvar name arg fn =
3939
let id = V.create_local name in
4040
Clet (VP.create id, arg, fn (Cvar id))
4141

42+
let bind_list name args fn =
43+
let rec aux bound_args = function
44+
| [] -> fn bound_args
45+
| arg :: args ->
46+
bind name arg (fun bound_arg -> aux (bound_arg :: bound_args) args)
47+
in
48+
aux [] (List.rev args)
49+
4250
let caml_black = Nativeint.shift_left (Nativeint.of_int 3) 8
4351

4452
let caml_local = Nativeint.shift_left (Nativeint.of_int 2) 8
@@ -2073,18 +2081,53 @@ let ptr_offset ptr offset dbg =
20732081
let direct_apply lbl args (pos, _mode) dbg =
20742082
Cop (Capply (typ_val, pos), Cconst_symbol (lbl, dbg) :: args, dbg)
20752083

2084+
let call_caml_apply ty mut clos args pos mode dbg =
2085+
let arity = List.length args in
2086+
let really_call_caml_apply clos args =
2087+
let cargs =
2088+
(Cconst_symbol (apply_function_sym arity mode, dbg) :: args) @ [clos]
2089+
in
2090+
Cop (Capply (ty, pos), cargs, dbg)
2091+
in
2092+
if !Flambda_backend_flags.caml_apply_inline_fast_path
2093+
then
2094+
(* Generate the following expression:
2095+
* (if (= clos.arity N)
2096+
* (app clos.direct a1 ... aN clos)
2097+
* (app caml_applyN a1 .. aN clos)
2098+
*)
2099+
(* CR-someday gyorsh: in the [else] case above, call another version of
2100+
caml_applyN that has only the cold path. *)
2101+
bind_list "arg" args (fun args ->
2102+
bind "fun" clos (fun clos ->
2103+
Cifthenelse
2104+
( Cop
2105+
( Ccmpi Ceq,
2106+
[ Cop
2107+
( Casr,
2108+
[ get_field_gen mut clos 1 dbg;
2109+
Cconst_int (pos_arity_in_closinfo, dbg) ],
2110+
dbg );
2111+
Cconst_int (arity, dbg) ],
2112+
dbg ),
2113+
dbg,
2114+
Cop
2115+
( Capply (ty, pos),
2116+
(get_field_gen mut clos 2 dbg :: args) @ [clos],
2117+
dbg ),
2118+
dbg,
2119+
really_call_caml_apply clos args,
2120+
dbg,
2121+
Vval Pgenval )))
2122+
else really_call_caml_apply clos args
2123+
20762124
let generic_apply mut clos args (pos, mode) dbg =
20772125
match args with
20782126
| [arg] ->
20792127
bind "fun" clos (fun clos ->
20802128
Cop
20812129
(Capply (typ_val, pos), [get_field_gen mut clos 0 dbg; arg; clos], dbg))
2082-
| _ ->
2083-
let arity = List.length args in
2084-
let cargs =
2085-
(Cconst_symbol (apply_function_sym arity mode, dbg) :: args) @ [clos]
2086-
in
2087-
Cop (Capply (typ_val, pos), cargs, dbg)
2130+
| _ -> call_caml_apply typ_val mut clos args pos mode dbg
20882131

20892132
let send kind met obj args akind dbg =
20902133
let call_met obj args clos =
@@ -3724,12 +3767,7 @@ let indirect_call ~dbg ty pos alloc_mode f args =
37243767
( Capply (ty, pos),
37253768
[load ~dbg Word_int Asttypes.Mutable ~addr:(Cvar v); arg; Cvar v],
37263769
dbg ))
3727-
| args ->
3728-
let arity = List.length args in
3729-
let l =
3730-
(Cconst_symbol (apply_function_sym arity alloc_mode, dbg) :: args) @ [f]
3731-
in
3732-
Cop (Capply (ty, pos), l, dbg)
3770+
| args -> call_caml_apply ty Asttypes.Mutable f args pos alloc_mode dbg
37333771

37343772
let indirect_full_call ~dbg ty pos alloc_mode f = function
37353773
(* the single-argument case is already optimized by indirect_call *)

driver/flambda_backend_args.ml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ let mk_no_long_frames f =
7474
let mk_debug_long_frames_threshold f =
7575
"-debug-long-frames-threshold", Arg.Int f, "n debug only: set long frames threshold"
7676

77+
let mk_caml_apply_inline_fast_path f =
78+
"-caml-apply-inline-fast-path", Arg.Unit f, " Inline the fast path of caml_applyN"
79+
7780
let mk_dump_inlining_paths f =
7881
"-dump-inlining-paths", Arg.Unit f, " Dump inlining paths when dumping flambda2 terms"
7982

@@ -476,6 +479,7 @@ module type Flambda_backend_options = sig
476479
val no_long_frames : unit -> unit
477480
val long_frames_threshold : int -> unit
478481

482+
val caml_apply_inline_fast_path : unit -> unit
479483
val internal_assembler : unit -> unit
480484

481485
val flambda2_join_points : unit -> unit
@@ -557,6 +561,8 @@ struct
557561
mk_no_long_frames F.no_long_frames;
558562
mk_debug_long_frames_threshold F.long_frames_threshold;
559563

564+
mk_caml_apply_inline_fast_path F.caml_apply_inline_fast_path;
565+
560566
mk_internal_assembler F.internal_assembler;
561567

562568
mk_flambda2_join_points F.flambda2_join_points;
@@ -674,6 +680,9 @@ module Flambda_backend_options_impl = struct
674680
let no_long_frames = clear' Flambda_backend_flags.allow_long_frames
675681
let long_frames_threshold n = set_long_frames_threshold n
676682

683+
let caml_apply_inline_fast_path =
684+
set' Flambda_backend_flags.caml_apply_inline_fast_path
685+
677686
let internal_assembler = set' Flambda_backend_flags.internal_assembler
678687

679688
let flambda2_join_points = set Flambda2.join_points
@@ -882,6 +891,8 @@ module Extra_params = struct
882891
(Printf.sprintf "Expected integer between 0 and %d"
883892
Flambda_backend_flags.max_long_frames_threshold))
884893
end
894+
| "caml-apply-inline-fast-path" ->
895+
set' Flambda_backend_flags.caml_apply_inline_fast_path
885896
| "dasm-comments" -> set' Flambda_backend_flags.dasm_comments
886897
| "gupstream-dwarf" -> set' Debugging.restrict_to_upstream_dwarf
887898
| "gstartup" -> set' Debugging.dwarf_for_startup_file

driver/flambda_backend_args.mli

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ module type Flambda_backend_options = sig
4343
val no_long_frames : unit -> unit
4444
val long_frames_threshold : int -> unit
4545

46+
val caml_apply_inline_fast_path : unit -> unit
4647
val internal_assembler : unit -> unit
4748

4849
val flambda2_join_points : unit -> unit

driver/flambda_backend_flags.ml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ let allow_long_frames = ref true (* -no-long-frames *)
3535
let max_long_frames_threshold = 0x7FFF
3636
let long_frames_threshold = ref max_long_frames_threshold (* -debug-long-frames-threshold n *)
3737

38+
let caml_apply_inline_fast_path = ref false (* -caml-apply-inline-fast-path *)
39+
3840
type function_result_types = Never | Functors_only | All_functions
3941
type opt_level = Oclassic | O2 | O3
4042
type 'a or_default = Set of 'a | Default

driver/flambda_backend_flags.mli

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ val disable_poll_insertion : bool ref
3232
val allow_long_frames : bool ref
3333
val max_long_frames_threshold : int
3434
val long_frames_threshold : int ref
35+
val caml_apply_inline_fast_path : bool ref
3536

3637
type function_result_types = Never | Functors_only | All_functions
3738
type opt_level = Oclassic | O2 | O3

ocaml/dune

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
;; CR gyorsh: it is not clear what the ":standard" flags are, and they
2626
;; may change depending on the version of dune.
2727
;; Consider hard-coded flags, such as -O3.
28-
(:standard -alloc-check)))
28+
(:standard -alloc-check -caml-apply-inline-fast-path)))
2929
(boot
3030
(flags
3131
(:standard -warn-error +A))))

0 commit comments

Comments
 (0)