Skip to content

Runtime 5 forward & backward porting #2027

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
eeda5a0
Merge branch 'main' of github.com:ocaml-flambda/flambda-backend
TheNumbat Nov 15, 2023
d183a5c
Resolve sigprocmask cr
TheNumbat Nov 15, 2023
57b05ce
Backport amd64 backend changes
TheNumbat Nov 15, 2023
e942279
Fix systhreads5 install
TheNumbat Nov 15, 2023
956b3fc
Forward port ft relative retaddr
TheNumbat Nov 15, 2023
80dc60c
Backport cmm changes
TheNumbat Nov 15, 2023
293a970
Ignore nnp link config in 5
TheNumbat Nov 15, 2023
4062f94
Fix ocamltest build with 5 runtime
TheNumbat Nov 15, 2023
3478d80
Forward port unboxed int64 in closure
TheNumbat Nov 15, 2023
f81cd59
Forward port is_last closinfo flag
TheNumbat Nov 15, 2023
9a83d7c
Backport lazy implementation
TheNumbat Nov 15, 2023
33dd9e7
Forward port fl2 root scanning fix
TheNumbat Nov 15, 2023
3884dee
Make atomics compatible with 4 and 5
TheNumbat Nov 15, 2023
3526336
Backport marshal change
TheNumbat Nov 15, 2023
f63f8ce
Implement more of the domain API using DLS
TheNumbat Nov 15, 2023
6de61de
Backport filename change
TheNumbat Nov 15, 2023
0caec88
Backport format changes
TheNumbat Nov 15, 2023
cebaf44
Ignore backport gc change
TheNumbat Nov 15, 2023
4727471
Backport hashtbl change
TheNumbat Nov 15, 2023
c1f5a22
Add stubs that runtime5 needs for linking with stdlib
TheNumbat Nov 15, 2023
e668db3
CR for SIMD register save
TheNumbat Nov 15, 2023
697c1eb
Address code review comments
mshinwell Nov 17, 2023
f97a2a1
Merge remote-tracking branch 'flambda-backend/main' into runtime5-fea…
mshinwell Nov 17, 2023
cafebf0
Don't align for runtime5
TheNumbat Nov 17, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 59 additions & 65 deletions backend/amd64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,13 @@ let cfi_endproc () =
let cfi_adjust_cfa_offset n =
if Config.asm_cfi_supported then D.cfi_adjust_cfa_offset n

let _cfi_remember_state () =
let cfi_remember_state () =
if Config.asm_cfi_supported then D.cfi_remember_state ()

let _cfi_restore_state () =
let cfi_restore_state () =
if Config.asm_cfi_supported then D.cfi_restore_state ()

let _cfi_def_cfa_register reg =
let cfi_def_cfa_register reg =
if Config.asm_cfi_supported then D.cfi_def_cfa_register reg

let emit_debug_info ?discriminator dbg =
Expand All @@ -94,7 +94,7 @@ let emit_debug_info_linear i =

let fp = Config.with_frame_pointers

let _stack_threshold_size = Config.stack_threshold * 8 (* bytes *)
let stack_threshold_size = Config.stack_threshold * 8 (* bytes *)

(* Tradeoff between code size and code speed *)

Expand Down Expand Up @@ -1269,25 +1269,19 @@ let emit_instr fallthrough i =
end
end
end
| Lop(Iextcall { func; alloc; stack_ofs
(* BACKPORT BEGIN *)
= _
(* BACKPORT END *)
}) ->
| Lop(Iextcall { func; alloc; stack_ofs }) ->
add_used_symbol func;
(* BEGIN BACKPORT
if stack_ofs > 0 then begin
if Config.runtime5 && stack_ofs > 0 then begin
I.mov rsp r13;
I.lea (mem64 QWORD stack_ofs RSP) r12;
load_symbol_addr func rax;
emit_call "caml_c_call_stack_args";
record_frame env i.live (Dbg_other i.dbg);
end else *) if alloc then begin
load_symbol_addr (Cmm.global_symbol func) rax;
emit_call (Cmm.global_symbol "caml_c_call_stack_args");
record_frame i.live (Dbg_other i.dbg);
end else if alloc then begin
load_symbol_addr (Cmm.global_symbol func) rax;
emit_call (Cmm.global_symbol "caml_c_call");
record_frame i.live (Dbg_other i.dbg);
(* BEGIN BACKPORT *)
if system <> S_win64 then begin
if not Config.runtime5 && system <> S_win64 then begin

(* In amd64.S, "caml_c_call" tail-calls the C function (in order to
produce nicer backtraces), so we need to restore r15 manually after
Expand All @@ -1299,24 +1293,21 @@ let emit_instr fallthrough i =

I.mov (domain_field Domainstate.Domain_young_ptr) r15
end
(* END BACKPORT *)
end else begin
(* BEGIN BACKPORT
I.mov rsp rbx;
cfi_remember_state ();
cfi_def_cfa_register "rbx";
(* NB: gdb has asserts on contiguous stacks that mean it
will not unwind through this unless we were to tag this
calling frame with cfi_signal_frame in it's definition. *)
I.mov (domain_field Domainstate.Domain_c_stack) rsp; *)
(* END BACKPORT *)
emit_call (Cmm.global_symbol func)
(* BEGIN BACKPORT
;
I.mov rbx rsp;
cfi_restore_state ();
*)
(* END BACKPORT *)
if Config.runtime5 then begin
I.mov rsp rbx;
cfi_remember_state ();
cfi_def_cfa_register "rbx";
(* NB: gdb has asserts on contiguous stacks that mean it
will not unwind through this unless we were to tag this
calling frame with cfi_signal_frame in it's definition. *)
I.mov (domain_field Domainstate.Domain_c_stack) rsp;
end;
emit_call (Cmm.global_symbol func);
if Config.runtime5 then begin
I.mov rbx rsp;
cfi_restore_state ();
end;
end
| Lop(Istackoffset n) ->
emit_stack_offset n
Expand Down Expand Up @@ -1690,8 +1681,9 @@ let emit_instr fallthrough i =
I.set (cond (Iunsigned Cne)) (res8 i 0);
I.movzx (res8 i 0) (res i 0)
| Lop (Idls_get) ->
Misc.fatal_error "Dls is currently not supported";
(* I.mov (domain_field Domainstate.Domain_dls_root) (res i 0) *)
if Config.runtime5
then I.mov (domain_field Domainstate.Domain_dls_root) (res i 0)
else Misc.fatal_error "Dls is not supported in runtime4.";
| Lreloadretaddr ->
()
| Lreturn ->
Expand Down Expand Up @@ -1775,16 +1767,12 @@ let emit_instr fallthrough i =
| Lraise k ->
begin match k with
| Lambda.Raise_regular ->
(* BACKPORT BEGIN *)
I.mov (int 0) (domain_field Domainstate.Domain_backtrace_pos);
(* BACKPORT END *)
emit_call (Cmm.global_symbol "caml_raise_exn");
record_frame Reg.Set.empty (Dbg_raise i.dbg)
| Lambda.Raise_reraise ->
(* BACKPORT BEGIN *)
(* emit_call (Cmm.global_symbol "caml_reraise_exn"); *)
emit_call (Cmm.global_symbol "caml_raise_exn");
(* BACKPORT END *)
emit_call (Cmm.global_symbol
(if Config.runtime5 then "caml_reraise_exn" else "caml_raise_exn"));
record_frame Reg.Set.empty (Dbg_raise i.dbg)
| Lambda.Raise_notrace ->
I.mov (domain_field Domainstate.Domain_exn_handler) rsp;
Expand Down Expand Up @@ -1853,33 +1841,40 @@ let fundecl fundecl =
D.label (label_name (emit_symbol fundecl.fun_name));
emit_debug_info fundecl.fun_dbg;
cfi_startproc ();
(* BACKPORT BEGIN *)
(* if !Clflags.runtime_variant = "d" then
emit_call (Cmm.global_symbol "caml_assert_stack_invariants");
let { max_frame_size; contains_nontail_calls} =
preproc_stack_check
~fun_body:fundecl.fun_body ~frame_size:(frame_size ()) ~trap_size:16
let handle_overflow_and_max_frame_size =
(* CR mshinwell: this should be conditionalized on a specific
"stack checks enabled" config option, so we can backport to 4.x *)
if not Config.runtime5 then None
else (
if !Clflags.runtime_variant = "d" then
emit_call (Cmm.global_symbol "caml_assert_stack_invariants");
let { max_frame_size; contains_nontail_calls} =
preproc_stack_check
~fun_body:fundecl.fun_body ~frame_size:(frame_size ()) ~trap_size:16
in
let handle_overflow =
if contains_nontail_calls || max_frame_size >= stack_threshold_size then begin
let overflow = new_label () and ret = new_label () in
let threshold_offset = Domainstate.stack_ctx_words * 8 + stack_threshold_size in
I.lea (mem64 NONE (-(max_frame_size + threshold_offset)) RSP) r10;
I.cmp (domain_field Domainstate.Domain_current_stack) r10;
I.jb (label overflow);
def_label ret;
Some (overflow, ret)
end else None
in
match handle_overflow with
| None -> None
| Some handle_overflow -> Some (handle_overflow, max_frame_size)
)
in
let handle_overflow =
if contains_nontail_calls || max_frame_size >= stack_threshold_size then begin
let overflow = new_label () and ret = new_label () in
let threshold_offset = Domainstate.stack_ctx_words * 8 + stack_threshold_size in
I.lea (mem64 NONE (-(max_frame_size + threshold_offset)) RSP) r10;
I.cmp (domain_field Domainstate.Domain_current_stack) r10;
I.jb (label overflow);
def_label ret;
Some (overflow, ret)
end else None
in*)
(* BACKPORT END *)
emit_all true fundecl.fun_body;
List.iter emit_call_gc !call_gc_sites;
List.iter emit_local_realloc !local_realloc_sites;
emit_call_safety_errors ();
(* BACKPORT BEGIN *)
(*begin match handle_overflow with
begin match handle_overflow_and_max_frame_size with
| None -> ()
| Some (overflow,ret) -> begin
| Some ((overflow,ret), max_frame_size) -> begin
def_label overflow;
(* Pass the desired frame size on the stack, since all of the
argument-passing registers may be in use.
Expand All @@ -1892,8 +1887,7 @@ let fundecl fundecl =
cfi_adjust_cfa_offset (-8);
I.jmp (label ret)
end
end;*)
(* BACKPORT END *)
end;
if !frame_required then begin
let n = frame_size() - 8 - (if fp then 8 else 0) in
if n <> 0
Expand Down
15 changes: 9 additions & 6 deletions backend/amd64/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -309,10 +309,7 @@ let win64_float_external_arguments =
let win64_loc_external_arguments arg =
let loc = Array.make (Array.length arg) Reg.dummy in
let reg = ref 0
(* BACKPORT BEGIN *)
(* and ofs = ref 0 in *)
and ofs = ref 32 in
(* BACKPORT END *)
and ofs = ref (if Config.runtime5 then 0 else 32) in
for i = 0 to Array.length arg - 1 do
match arg.(i) with
| Val | Int | Addr as ty ->
Expand Down Expand Up @@ -374,10 +371,16 @@ let domainstate_ptr_dwarf_register_number = 14

(* Registers destroyed by operations *)

let int_regs_destroyed_at_c_call_win64 =
if Config.runtime5 then [|0;1;4;5;6;7;10;11;12|] else [|0;4;5;6;7;10;11|]

let int_regs_destroyed_at_c_call =
if Config.runtime5 then [|0;1;2;3;4;5;6;7;10;11|] else [|0;2;3;4;5;6;7;10;11|]

let destroyed_at_c_call_win64 =
(* Win64: rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15 preserved *)
let basic_regs = Array.append
(Array.map (phys_reg Int) [|0;4;5;6;7;10;11|] )
(Array.map (phys_reg Int) int_regs_destroyed_at_c_call_win64)
(Array.sub hard_float_reg 0 6)
in
fun () -> if simd_regalloc_disabled ()
Expand All @@ -387,7 +390,7 @@ let destroyed_at_c_call_win64 =
let destroyed_at_c_call_unix =
(* Unix: rbx, rbp, r12-r15 preserved *)
let basic_regs = Array.append
(Array.map (phys_reg Int) [|0;2;3;4;5;6;7;10;11|])
(Array.map (phys_reg Int) int_regs_destroyed_at_c_call)
hard_float_reg
in
fun () -> if simd_regalloc_disabled ()
Expand Down
6 changes: 5 additions & 1 deletion backend/asmlink.ml
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,11 @@ let add_ccobjs origin l =
end

let runtime_lib () =
let libname = "libasmrun" ^ !Clflags.runtime_variant ^ ext_lib in
let variant =
if Config.runtime5 && !Clflags.runtime_variant = "nnp" then ""
else !Clflags.runtime_variant
in
let libname = "libasmrun" ^ variant ^ ext_lib in
try
if !Clflags.nopervasives || not !Clflags.with_runtime then []
else [ Load_path.find libname ]
Expand Down
21 changes: 9 additions & 12 deletions backend/cmm_helpers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -826,9 +826,7 @@ let get_header ptr dbg =
data race on headers. This saves performance with ThreadSanitizer
instrumentation by avoiding to instrument header loads. *)
Cop
( (* BACKPORT BEGIN mk_load_immut Word_int, *)
mk_load_mut Word_int,
(* BACKPORT END *)
( (if Config.runtime5 then mk_load_immut Word_int else mk_load_mut Word_int),
[Cop (Cadda, [ptr; Cconst_int (-size_int, dbg)], dbg)],
dbg )

Expand All @@ -850,9 +848,9 @@ let get_tag ptr dbg =
(* If byte loads are efficient *)
(* Same comment as [get_header] above *)
Cop
( (* BACKPORT BEGIN mk_load_immut Byte_unsigned, *)
mk_load_mut Byte_unsigned,
(* BACKPORT END *)
( (if Config.runtime5
then mk_load_immut Byte_unsigned
else mk_load_mut Byte_unsigned),
[Cop (Cadda, [ptr; Cconst_int (tag_offset, dbg)], dbg)],
dbg )

Expand Down Expand Up @@ -1235,9 +1233,9 @@ let make_alloc_generic ~mode set_fn dbg tag wordsize args =
Cop
( Cextcall
{ func =
(* BACKPORT BEGIN "caml_alloc_shr_check_gc" *)
"caml_alloc"
(* BACKPORT END *);
(if Config.runtime5
then "caml_alloc_shr_check_gc"
else "caml_alloc");
ty = typ_val;
alloc = true;
builtin = false;
Expand Down Expand Up @@ -3124,10 +3122,9 @@ let assignment_kind (ptr : Lambda.immediate_or_pointer)
| Assignment Modify_maybe_stack, Pointer ->
assert Config.stack_allocation;
Caml_modify_local
(* BACKPORT BEGIN | Heap_initialization, Pointer | Root_initialization,
Pointer -> Caml_initialize *)
| Heap_initialization, Pointer -> Caml_initialize
| Root_initialization, Pointer -> Simple Initialization (* BACKPORT END *)
| Root_initialization, Pointer ->
if Config.runtime5 then Caml_initialize else Simple Initialization
| Assignment _, Immediate -> Simple Assignment
| Heap_initialization, Immediate | Root_initialization, Immediate ->
Simple Initialization
Expand Down
31 changes: 16 additions & 15 deletions backend/cmmgen.ml
Original file line number Diff line number Diff line change
Expand Up @@ -155,21 +155,22 @@ let mut_from_env env ptr =
else Asttypes.Mutable
| _ -> Asttypes.Mutable

(* BACKPORT
(* Minimum of two [mutable_flag] values, assuming [Immutable < Mutable]. *)
let min_mut x y =
match x,y with
| Immutable,_ | _,Immutable -> Immutable
| Mutable,Mutable -> Mutable
*)

(* BACKPORT BEGIN
let get_field env mut ptr n dbg =
let mut = min_mut mut (mut_from_env env ptr) in
*)
let get_field env layout ptr n dbg =
let mut = mut_from_env env ptr in
(* BACKPORT END *)
| Asttypes.Immutable, _
| _, Asttypes.Immutable -> Asttypes.Immutable
| Asttypes.Mutable, Asttypes.Mutable -> Asttypes.Mutable

let mut_from_lambda = function
| Lambda.Immutable -> Asttypes.Immutable
| Lambda.Immutable_unique -> Asttypes.Immutable
| Lambda.Mutable -> Asttypes.Mutable

let get_field env mut layout ptr n dbg =
let mut = if Config.runtime5
then min_mut (mut_from_lambda mut) (mut_from_env env ptr)
else mut_from_env env ptr in
let memory_chunk =
match layout with
| Pvalue Pintval | Punboxed_int _ -> Word_int
Expand Down Expand Up @@ -1002,13 +1003,13 @@ and transl_prim_1 env p arg dbg =
Popaque ->
opaque (transl env arg) dbg
(* Heap operations *)
| Pfield (n, layout, _, _) ->
get_field env layout (transl env arg) n dbg
| Pfield (n, layout, _, mut) ->
get_field env mut layout (transl env arg) n dbg
| Pfloatfield (n,mode) ->
let ptr = transl env arg in
box_float dbg mode (floatfield n ptr dbg)
| Pufloatfield n ->
get_field env Punboxed_float (transl env arg) n dbg
get_field env Mutable Punboxed_float (transl env arg) n dbg
| Pint_as_pointer _ ->
int_as_pointer (transl env arg) dbg
(* Exceptions *)
Expand Down
6 changes: 2 additions & 4 deletions backend/emitaux.ml
Original file line number Diff line number Diff line change
Expand Up @@ -534,8 +534,7 @@ let report_error ppf = function
name Debuginfo.print_compact dbg


(* BACKPORT BEGIN *)
(*type preproc_stack_check_result =
type preproc_stack_check_result =
{ max_frame_size : int;
contains_nontail_calls : bool }

Expand All @@ -562,5 +561,4 @@ let preproc_stack_check ~fun_body ~frame_size ~trap_size =
| Lentertrap | Lraise _ ->
loop i.next fs max_fs nontail_flag
in
loop fun_body frame_size frame_size false*)
(* BACKPORT END *)
loop fun_body frame_size frame_size false
6 changes: 2 additions & 4 deletions backend/emitaux.mli
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,12 @@ end
exception Error of error
val report_error: Format.formatter -> error -> unit

(* BACKPORT BEGIN *)
(*type preproc_stack_check_result =
type preproc_stack_check_result =
{ max_frame_size : int;
contains_nontail_calls : bool }

val preproc_stack_check:
fun_body:Linear.instruction ->
frame_size:int ->
trap_size:int ->
preproc_stack_check_result*)
(* BACKPORT END *)
preproc_stack_check_result
Loading