diff --git a/backend/amd64/emit.mlp b/backend/amd64/emit.mlp index d671d568de1..33576066aae 100644 --- a/backend/amd64/emit.mlp +++ b/backend/amd64/emit.mlp @@ -74,13 +74,13 @@ let cfi_endproc () = let cfi_adjust_cfa_offset n = if Config.asm_cfi_supported then D.cfi_adjust_cfa_offset n -let _cfi_remember_state () = +let cfi_remember_state () = if Config.asm_cfi_supported then D.cfi_remember_state () -let _cfi_restore_state () = +let cfi_restore_state () = if Config.asm_cfi_supported then D.cfi_restore_state () -let _cfi_def_cfa_register reg = +let cfi_def_cfa_register reg = if Config.asm_cfi_supported then D.cfi_def_cfa_register reg let emit_debug_info ?discriminator dbg = @@ -94,7 +94,7 @@ let emit_debug_info_linear i = let fp = Config.with_frame_pointers -let _stack_threshold_size = Config.stack_threshold * 8 (* bytes *) +let stack_threshold_size = Config.stack_threshold * 8 (* bytes *) (* Tradeoff between code size and code speed *) @@ -1269,25 +1269,19 @@ let emit_instr fallthrough i = end end end - | Lop(Iextcall { func; alloc; stack_ofs - (* BACKPORT BEGIN *) - = _ - (* BACKPORT END *) - }) -> + | Lop(Iextcall { func; alloc; stack_ofs }) -> add_used_symbol func; -(* BEGIN BACKPORT - if stack_ofs > 0 then begin + if Config.runtime5 && stack_ofs > 0 then begin I.mov rsp r13; I.lea (mem64 QWORD stack_ofs RSP) r12; - load_symbol_addr func rax; - emit_call "caml_c_call_stack_args"; - record_frame env i.live (Dbg_other i.dbg); - end else *) if alloc then begin + load_symbol_addr (Cmm.global_symbol func) rax; + emit_call (Cmm.global_symbol "caml_c_call_stack_args"); + record_frame i.live (Dbg_other i.dbg); + end else if alloc then begin load_symbol_addr (Cmm.global_symbol func) rax; emit_call (Cmm.global_symbol "caml_c_call"); record_frame i.live (Dbg_other i.dbg); -(* BEGIN BACKPORT *) - if system <> S_win64 then begin + if not Config.runtime5 && system <> S_win64 then begin (* In amd64.S, "caml_c_call" tail-calls the C function (in order to produce nicer backtraces), so we need to restore r15 manually after @@ -1299,24 +1293,21 @@ let emit_instr fallthrough i = I.mov (domain_field Domainstate.Domain_young_ptr) r15 end -(* END BACKPORT *) end else begin -(* BEGIN BACKPORT - I.mov rsp rbx; - cfi_remember_state (); - cfi_def_cfa_register "rbx"; - (* NB: gdb has asserts on contiguous stacks that mean it - will not unwind through this unless we were to tag this - calling frame with cfi_signal_frame in it's definition. *) - I.mov (domain_field Domainstate.Domain_c_stack) rsp; *) -(* END BACKPORT *) - emit_call (Cmm.global_symbol func) -(* BEGIN BACKPORT - ; - I.mov rbx rsp; - cfi_restore_state (); -*) -(* END BACKPORT *) + if Config.runtime5 then begin + I.mov rsp rbx; + cfi_remember_state (); + cfi_def_cfa_register "rbx"; + (* NB: gdb has asserts on contiguous stacks that mean it + will not unwind through this unless we were to tag this + calling frame with cfi_signal_frame in it's definition. *) + I.mov (domain_field Domainstate.Domain_c_stack) rsp; + end; + emit_call (Cmm.global_symbol func); + if Config.runtime5 then begin + I.mov rbx rsp; + cfi_restore_state (); + end; end | Lop(Istackoffset n) -> emit_stack_offset n @@ -1690,8 +1681,9 @@ let emit_instr fallthrough i = I.set (cond (Iunsigned Cne)) (res8 i 0); I.movzx (res8 i 0) (res i 0) | Lop (Idls_get) -> - Misc.fatal_error "Dls is currently not supported"; - (* I.mov (domain_field Domainstate.Domain_dls_root) (res i 0) *) + if Config.runtime5 + then I.mov (domain_field Domainstate.Domain_dls_root) (res i 0) + else Misc.fatal_error "Dls is not supported in runtime4."; | Lreloadretaddr -> () | Lreturn -> @@ -1775,16 +1767,12 @@ let emit_instr fallthrough i = | Lraise k -> begin match k with | Lambda.Raise_regular -> - (* BACKPORT BEGIN *) I.mov (int 0) (domain_field Domainstate.Domain_backtrace_pos); - (* BACKPORT END *) emit_call (Cmm.global_symbol "caml_raise_exn"); record_frame Reg.Set.empty (Dbg_raise i.dbg) | Lambda.Raise_reraise -> - (* BACKPORT BEGIN *) - (* emit_call (Cmm.global_symbol "caml_reraise_exn"); *) - emit_call (Cmm.global_symbol "caml_raise_exn"); - (* BACKPORT END *) + emit_call (Cmm.global_symbol + (if Config.runtime5 then "caml_reraise_exn" else "caml_raise_exn")); record_frame Reg.Set.empty (Dbg_raise i.dbg) | Lambda.Raise_notrace -> I.mov (domain_field Domainstate.Domain_exn_handler) rsp; @@ -1853,33 +1841,40 @@ let fundecl fundecl = D.label (label_name (emit_symbol fundecl.fun_name)); emit_debug_info fundecl.fun_dbg; cfi_startproc (); - (* BACKPORT BEGIN *) - (* if !Clflags.runtime_variant = "d" then - emit_call (Cmm.global_symbol "caml_assert_stack_invariants"); - let { max_frame_size; contains_nontail_calls} = - preproc_stack_check - ~fun_body:fundecl.fun_body ~frame_size:(frame_size ()) ~trap_size:16 + let handle_overflow_and_max_frame_size = + (* CR mshinwell: this should be conditionalized on a specific + "stack checks enabled" config option, so we can backport to 4.x *) + if not Config.runtime5 then None + else ( + if !Clflags.runtime_variant = "d" then + emit_call (Cmm.global_symbol "caml_assert_stack_invariants"); + let { max_frame_size; contains_nontail_calls} = + preproc_stack_check + ~fun_body:fundecl.fun_body ~frame_size:(frame_size ()) ~trap_size:16 + in + let handle_overflow = + if contains_nontail_calls || max_frame_size >= stack_threshold_size then begin + let overflow = new_label () and ret = new_label () in + let threshold_offset = Domainstate.stack_ctx_words * 8 + stack_threshold_size in + I.lea (mem64 NONE (-(max_frame_size + threshold_offset)) RSP) r10; + I.cmp (domain_field Domainstate.Domain_current_stack) r10; + I.jb (label overflow); + def_label ret; + Some (overflow, ret) + end else None + in + match handle_overflow with + | None -> None + | Some handle_overflow -> Some (handle_overflow, max_frame_size) + ) in - let handle_overflow = - if contains_nontail_calls || max_frame_size >= stack_threshold_size then begin - let overflow = new_label () and ret = new_label () in - let threshold_offset = Domainstate.stack_ctx_words * 8 + stack_threshold_size in - I.lea (mem64 NONE (-(max_frame_size + threshold_offset)) RSP) r10; - I.cmp (domain_field Domainstate.Domain_current_stack) r10; - I.jb (label overflow); - def_label ret; - Some (overflow, ret) - end else None - in*) - (* BACKPORT END *) emit_all true fundecl.fun_body; List.iter emit_call_gc !call_gc_sites; List.iter emit_local_realloc !local_realloc_sites; emit_call_safety_errors (); - (* BACKPORT BEGIN *) - (*begin match handle_overflow with + begin match handle_overflow_and_max_frame_size with | None -> () - | Some (overflow,ret) -> begin + | Some ((overflow,ret), max_frame_size) -> begin def_label overflow; (* Pass the desired frame size on the stack, since all of the argument-passing registers may be in use. @@ -1892,8 +1887,7 @@ let fundecl fundecl = cfi_adjust_cfa_offset (-8); I.jmp (label ret) end - end;*) - (* BACKPORT END *) + end; if !frame_required then begin let n = frame_size() - 8 - (if fp then 8 else 0) in if n <> 0 diff --git a/backend/amd64/proc.ml b/backend/amd64/proc.ml index a45d9cf84c2..dcdb59206fa 100644 --- a/backend/amd64/proc.ml +++ b/backend/amd64/proc.ml @@ -309,10 +309,7 @@ let win64_float_external_arguments = let win64_loc_external_arguments arg = let loc = Array.make (Array.length arg) Reg.dummy in let reg = ref 0 - (* BACKPORT BEGIN *) - (* and ofs = ref 0 in *) - and ofs = ref 32 in - (* BACKPORT END *) + and ofs = ref (if Config.runtime5 then 0 else 32) in for i = 0 to Array.length arg - 1 do match arg.(i) with | Val | Int | Addr as ty -> @@ -374,10 +371,16 @@ let domainstate_ptr_dwarf_register_number = 14 (* Registers destroyed by operations *) +let int_regs_destroyed_at_c_call_win64 = + if Config.runtime5 then [|0;1;4;5;6;7;10;11;12|] else [|0;4;5;6;7;10;11|] + +let int_regs_destroyed_at_c_call = + if Config.runtime5 then [|0;1;2;3;4;5;6;7;10;11|] else [|0;2;3;4;5;6;7;10;11|] + let destroyed_at_c_call_win64 = (* Win64: rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15 preserved *) let basic_regs = Array.append - (Array.map (phys_reg Int) [|0;4;5;6;7;10;11|] ) + (Array.map (phys_reg Int) int_regs_destroyed_at_c_call_win64) (Array.sub hard_float_reg 0 6) in fun () -> if simd_regalloc_disabled () @@ -387,7 +390,7 @@ let destroyed_at_c_call_win64 = let destroyed_at_c_call_unix = (* Unix: rbx, rbp, r12-r15 preserved *) let basic_regs = Array.append - (Array.map (phys_reg Int) [|0;2;3;4;5;6;7;10;11|]) + (Array.map (phys_reg Int) int_regs_destroyed_at_c_call) hard_float_reg in fun () -> if simd_regalloc_disabled () diff --git a/backend/asmlink.ml b/backend/asmlink.ml index 5965701e8da..7ef1ead3301 100644 --- a/backend/asmlink.ml +++ b/backend/asmlink.ml @@ -141,7 +141,11 @@ let add_ccobjs origin l = end let runtime_lib () = - let libname = "libasmrun" ^ !Clflags.runtime_variant ^ ext_lib in + let variant = + if Config.runtime5 && !Clflags.runtime_variant = "nnp" then "" + else !Clflags.runtime_variant + in + let libname = "libasmrun" ^ variant ^ ext_lib in try if !Clflags.nopervasives || not !Clflags.with_runtime then [] else [ Load_path.find libname ] diff --git a/backend/cmm_helpers.ml b/backend/cmm_helpers.ml index 0b4751ddf8b..b5b83381826 100644 --- a/backend/cmm_helpers.ml +++ b/backend/cmm_helpers.ml @@ -826,9 +826,7 @@ let get_header ptr dbg = data race on headers. This saves performance with ThreadSanitizer instrumentation by avoiding to instrument header loads. *) Cop - ( (* BACKPORT BEGIN mk_load_immut Word_int, *) - mk_load_mut Word_int, - (* BACKPORT END *) + ( (if Config.runtime5 then mk_load_immut Word_int else mk_load_mut Word_int), [Cop (Cadda, [ptr; Cconst_int (-size_int, dbg)], dbg)], dbg ) @@ -850,9 +848,9 @@ let get_tag ptr dbg = (* If byte loads are efficient *) (* Same comment as [get_header] above *) Cop - ( (* BACKPORT BEGIN mk_load_immut Byte_unsigned, *) - mk_load_mut Byte_unsigned, - (* BACKPORT END *) + ( (if Config.runtime5 + then mk_load_immut Byte_unsigned + else mk_load_mut Byte_unsigned), [Cop (Cadda, [ptr; Cconst_int (tag_offset, dbg)], dbg)], dbg ) @@ -1235,9 +1233,9 @@ let make_alloc_generic ~mode set_fn dbg tag wordsize args = Cop ( Cextcall { func = - (* BACKPORT BEGIN "caml_alloc_shr_check_gc" *) - "caml_alloc" - (* BACKPORT END *); + (if Config.runtime5 + then "caml_alloc_shr_check_gc" + else "caml_alloc"); ty = typ_val; alloc = true; builtin = false; @@ -3124,10 +3122,9 @@ let assignment_kind (ptr : Lambda.immediate_or_pointer) | Assignment Modify_maybe_stack, Pointer -> assert Config.stack_allocation; Caml_modify_local - (* BACKPORT BEGIN | Heap_initialization, Pointer | Root_initialization, - Pointer -> Caml_initialize *) | Heap_initialization, Pointer -> Caml_initialize - | Root_initialization, Pointer -> Simple Initialization (* BACKPORT END *) + | Root_initialization, Pointer -> + if Config.runtime5 then Caml_initialize else Simple Initialization | Assignment _, Immediate -> Simple Assignment | Heap_initialization, Immediate | Root_initialization, Immediate -> Simple Initialization diff --git a/backend/cmmgen.ml b/backend/cmmgen.ml index 9d051b12138..54f71397162 100644 --- a/backend/cmmgen.ml +++ b/backend/cmmgen.ml @@ -155,21 +155,22 @@ let mut_from_env env ptr = else Asttypes.Mutable | _ -> Asttypes.Mutable -(* BACKPORT (* Minimum of two [mutable_flag] values, assuming [Immutable < Mutable]. *) let min_mut x y = match x,y with - | Immutable,_ | _,Immutable -> Immutable - | Mutable,Mutable -> Mutable -*) - -(* BACKPORT BEGIN -let get_field env mut ptr n dbg = - let mut = min_mut mut (mut_from_env env ptr) in -*) - let get_field env layout ptr n dbg = - let mut = mut_from_env env ptr in -(* BACKPORT END *) + | Asttypes.Immutable, _ + | _, Asttypes.Immutable -> Asttypes.Immutable + | Asttypes.Mutable, Asttypes.Mutable -> Asttypes.Mutable + +let mut_from_lambda = function + | Lambda.Immutable -> Asttypes.Immutable + | Lambda.Immutable_unique -> Asttypes.Immutable + | Lambda.Mutable -> Asttypes.Mutable + +let get_field env mut layout ptr n dbg = + let mut = if Config.runtime5 + then min_mut (mut_from_lambda mut) (mut_from_env env ptr) + else mut_from_env env ptr in let memory_chunk = match layout with | Pvalue Pintval | Punboxed_int _ -> Word_int @@ -1002,13 +1003,13 @@ and transl_prim_1 env p arg dbg = Popaque -> opaque (transl env arg) dbg (* Heap operations *) - | Pfield (n, layout, _, _) -> - get_field env layout (transl env arg) n dbg + | Pfield (n, layout, _, mut) -> + get_field env mut layout (transl env arg) n dbg | Pfloatfield (n,mode) -> let ptr = transl env arg in box_float dbg mode (floatfield n ptr dbg) | Pufloatfield n -> - get_field env Punboxed_float (transl env arg) n dbg + get_field env Mutable Punboxed_float (transl env arg) n dbg | Pint_as_pointer _ -> int_as_pointer (transl env arg) dbg (* Exceptions *) diff --git a/backend/emitaux.ml b/backend/emitaux.ml index 44c76c5927d..c3b54b6b35c 100644 --- a/backend/emitaux.ml +++ b/backend/emitaux.ml @@ -534,8 +534,7 @@ let report_error ppf = function name Debuginfo.print_compact dbg -(* BACKPORT BEGIN *) -(*type preproc_stack_check_result = +type preproc_stack_check_result = { max_frame_size : int; contains_nontail_calls : bool } @@ -562,5 +561,4 @@ let preproc_stack_check ~fun_body ~frame_size ~trap_size = | Lentertrap | Lraise _ -> loop i.next fs max_fs nontail_flag in - loop fun_body frame_size frame_size false*) -(* BACKPORT END *) + loop fun_body frame_size frame_size false diff --git a/backend/emitaux.mli b/backend/emitaux.mli index 9d8a25d0144..c13594904cc 100644 --- a/backend/emitaux.mli +++ b/backend/emitaux.mli @@ -128,8 +128,7 @@ end exception Error of error val report_error: Format.formatter -> error -> unit -(* BACKPORT BEGIN *) -(*type preproc_stack_check_result = +type preproc_stack_check_result = { max_frame_size : int; contains_nontail_calls : bool } @@ -137,5 +136,4 @@ val preproc_stack_check: fun_body:Linear.instruction -> frame_size:int -> trap_size:int -> - preproc_stack_check_result*) -(* BACKPORT END *) + preproc_stack_check_result diff --git a/middle_end/flambda2/to_cmm/to_cmm_shared.ml b/middle_end/flambda2/to_cmm/to_cmm_shared.ml index 78315af5f63..77b4cf24592 100644 --- a/middle_end/flambda2/to_cmm/to_cmm_shared.ml +++ b/middle_end/flambda2/to_cmm/to_cmm_shared.ml @@ -248,12 +248,27 @@ let invalid res ~message = in call_expr, res -let make_update env res dbg kind ~symbol var ~index ~prev_updates = +let make_update env res dbg (kind : Cmm.memory_chunk) ~symbol var ~index + ~prev_updates = let To_cmm_env.{ env; res; expr = { cmm; free_vars; effs } } = To_cmm_env.inline_variable env res var in - let addr = field_address symbol index dbg in - let cmm = store ~dbg kind Initialization ~addr ~new_value:cmm in + let cmm = + if Config.runtime5 + then + let imm_or_ptr : Lambda.immediate_or_pointer = + match kind with + | Word_val -> Pointer + | Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed + | Thirtytwo_unsigned | Thirtytwo_signed | Word_int | Single | Double + | Onetwentyeight_unaligned | Onetwentyeight_aligned -> + Immediate + in + Cmm_helpers.setfield index imm_or_ptr Root_initialization symbol cmm dbg + else + let addr = field_address symbol index dbg in + store ~dbg kind Initialization ~addr ~new_value:cmm + in let update = match prev_updates with | None -> To_cmm_env.{ cmm; free_vars; effs } diff --git a/ocaml/asmcomp/amd64/emit.mlp b/ocaml/asmcomp/amd64/emit.mlp index 17f725916b9..75c20890c81 100644 --- a/ocaml/asmcomp/amd64/emit.mlp +++ b/ocaml/asmcomp/amd64/emit.mlp @@ -62,7 +62,6 @@ let cfi_endproc () = let cfi_adjust_cfa_offset n = if Config.asm_cfi_supported then D.cfi_adjust_cfa_offset n -(* BACKPORT let cfi_remember_state () = if Config.asm_cfi_supported then D.cfi_remember_state () @@ -71,7 +70,6 @@ let cfi_restore_state () = let cfi_def_cfa_register reg = if Config.asm_cfi_supported then D.cfi_def_cfa_register reg -*) let emit_debug_info dbg = emit_debug_info_gen dbg D.file D.loc @@ -90,13 +88,7 @@ let frame_size env = (* includes return address *) + 8 + (if fp then 8 else 0)) in -(* BACKPORT BEGIN *) - Misc.align -(* BACKPORT END *) - sz -(* BACKPORT BEGIN *) - 16 -(* BACKPORT END *) + if Config.runtime5 then sz else Misc.align sz 16 end else env.stack_offset + 8 @@ -676,25 +668,19 @@ let emit_instr env fallthrough i = end end end - | Lop(Iextcall { func; alloc; stack_ofs - (* BACKPORT BEGIN *) - = _ - (* BACKPORT END *) - }) -> + | Lop(Iextcall { func; alloc; stack_ofs }) -> add_used_symbol func; -(* BEGIN BACKPORT - if stack_ofs > 0 then begin + if Config.runtime5 && stack_ofs > 0 then begin I.mov rsp r13; I.lea (mem64 QWORD stack_ofs RSP) r12; load_symbol_addr func rax; emit_call "caml_c_call_stack_args"; record_frame env i.live (Dbg_other i.dbg); - end else *) if alloc then begin + end else if alloc then begin load_symbol_addr func rax; emit_call "caml_c_call"; record_frame env i.live (Dbg_other i.dbg); -(* BEGIN BACKPORT *) - if system <> S_win64 then begin + if not Config.runtime5 && system <> S_win64 then begin (* In amd64.S, "caml_c_call" tail-calls the C function (in order to produce nicer backtraces), so we need to restore r15 manually after @@ -706,24 +692,21 @@ let emit_instr env fallthrough i = I.mov (domain_field Domainstate.Domain_young_ptr) r15 end -(* END BACKPORT *) end else begin -(* BEGIN BACKPORT - I.mov rsp rbx; - cfi_remember_state (); - cfi_def_cfa_register "rbx"; - (* NB: gdb has asserts on contiguous stacks that mean it - will not unwind through this unless we were to tag this - calling frame with cfi_signal_frame in it's definition. *) - I.mov (domain_field Domainstate.Domain_c_stack) rsp; *) -(* END BACKPORT *) - emit_call func -(* BEGIN BACKPORT - ; - I.mov rbx rsp; - cfi_restore_state (); -*) -(* END BACKPORT *) + if Config.runtime5 then begin + I.mov rsp rbx; + cfi_remember_state (); + cfi_def_cfa_register "rbx"; + (* NB: gdb has asserts on contiguous stacks that mean it + will not unwind through this unless we were to tag this + calling frame with cfi_signal_frame in it's definition. *) + I.mov (domain_field Domainstate.Domain_c_stack) rsp; + end; + emit_call func; + if Config.runtime5 then begin + I.mov rbx rsp; + cfi_restore_state (); + end; end | Lop(Istackoffset n) -> emit_stack_offset env n @@ -947,12 +930,10 @@ let emit_instr env fallthrough i = I.cmp (int 0) (res16 i 0); I.set (cond (Iunsigned Cne)) (res8 i 0); I.movzx (res8 i 0) (res i 0) -(* BACKPORT BEGIN | Lop (Idls_get) -> - I.mov (domain_field Domainstate.Domain_dls_root) (res i 0) -*) - | Lop (Idls_get) -> Misc.fatal_error "Idls_get not implemented" -(* BACKPORT END *) + if Config.runtime5 + then I.mov (domain_field Domainstate.Domain_dls_root) (res i 0) + else Misc.fatal_error "Idls_get not implemented in runtime4." | Lreloadretaddr -> () | Lreturn -> @@ -1046,43 +1027,6 @@ let emit_instr env fallthrough i = let delta = 16 * delta_traps in cfi_adjust_cfa_offset delta; env.stack_offset <- env.stack_offset + delta -(* BACKPORT BEGIN - (exception handling) - | Lpushtrap { lbl_handler; } -> - let load_label_addr s arg = - if !Clflags.pic_code then - I.lea (mem64_rip NONE (emit_label s)) arg - else - I.mov (sym (emit_label s)) arg - in - load_label_addr lbl_handler r11; - I.push r11; - cfi_adjust_cfa_offset 8; - I.push (domain_field Domainstate.Domain_exn_handler); - cfi_adjust_cfa_offset 8; - I.mov rsp (domain_field Domainstate.Domain_exn_handler); - env.stack_offset <- env.stack_offset + 16; - | Lpoptrap -> - I.pop (domain_field Domainstate.Domain_exn_handler); - cfi_adjust_cfa_offset (-8); - I.add (int 8) rsp; - cfi_adjust_cfa_offset (-8); - env.stack_offset <- env.stack_offset - 16 - | Lraise k -> - begin match k with - | Lambda.Raise_regular -> - emit_call "caml_raise_exn"; - record_frame env Reg.Set.empty (Dbg_raise i.dbg) - | Lambda.Raise_reraise -> - emit_call "caml_reraise_exn"; - record_frame env Reg.Set.empty (Dbg_raise i.dbg) - | Lambda.Raise_notrace -> - I.mov (domain_field Domainstate.Domain_exn_handler) rsp; - I.pop (domain_field Domainstate.Domain_exn_handler); - I.pop r11; - I.jmp r11 - end -*) | Lpushtrap { lbl_handler; } -> let load_label_addr s arg = if !Clflags.pic_code then @@ -1118,7 +1062,6 @@ let emit_instr env fallthrough i = I.pop r11; I.jmp r11 end -(* BACKPORT END *) let rec emit_all env fallthrough i = match i.desc with diff --git a/ocaml/asmcomp/amd64/proc.ml b/ocaml/asmcomp/amd64/proc.ml index ba15caa0db0..5cdf07f08b3 100644 --- a/ocaml/asmcomp/amd64/proc.ml +++ b/ocaml/asmcomp/amd64/proc.ml @@ -238,11 +238,7 @@ let win64_float_external_arguments = let win64_loc_external_arguments arg = let loc = Array.make (Array.length arg) Reg.dummy in let reg = ref 0 -(* BACKPORT BEGIN - and ofs = ref 0 in -*) - and ofs = ref 32 in -(* BACKPORT END *) + and ofs = ref (if Config.runtime5 then 0 else 32) in for i = 0 to Array.length arg - 1 do match arg.(i) with | Val | Int | Addr as ty -> @@ -293,8 +289,7 @@ let stack_ptr_dwarf_register_number = 7 (* Registers destroyed by operations *) -(* BACKPORT BEGIN -let destroyed_at_c_call = +let destroyed_at_c_call5 = (* C calling conventions preserve rbx, but it is clobbered by the code sequence used for C calls in emit.mlp, so it is marked as destroyed. *) @@ -309,8 +304,8 @@ let destroyed_at_c_call = [0;1;2;3;4;5;6;7;10;11; 100;101;102;103;104;105;106;107; 108;109;110;111;112;113;114;115]) -*) -let destroyed_at_c_call = + +let destroyed_at_c_call4 = if win64 then (* Win64: rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15 preserved *) Array.of_list(List.map phys_reg @@ -322,7 +317,9 @@ let destroyed_at_c_call = [0;2;3;4;5;6;7;10;11; 100;101;102;103;104;105;106;107; 108;109;110;111;112;113;114;115]) -(* BACKPORT END *) + +let destroyed_at_c_call = + if Config.runtime5 then destroyed_at_c_call5 else destroyed_at_c_call4 let destroyed_at_alloc_or_poll = if X86_proc.use_plt then diff --git a/ocaml/asmcomp/asmlink.ml b/ocaml/asmcomp/asmlink.ml index 18472c5dfc0..41931bdcfcd 100644 --- a/ocaml/asmcomp/asmlink.ml +++ b/ocaml/asmcomp/asmlink.ml @@ -127,7 +127,11 @@ let add_ccobjs origin l = end let runtime_lib () = - let libname = "libasmrun" ^ !Clflags.runtime_variant ^ ext_lib in + let variant = + if Config.runtime5 && !Clflags.runtime_variant = "nnp" then "" + else !Clflags.runtime_variant + in + let libname = "libasmrun" ^ variant ^ ext_lib in try if !Clflags.nopervasives || not !Clflags.with_runtime then [] else [ Load_path.find libname ] diff --git a/ocaml/asmcomp/cmm_helpers.ml b/ocaml/asmcomp/cmm_helpers.ml index ea618503db4..17fd586b6f2 100644 --- a/ocaml/asmcomp/cmm_helpers.ml +++ b/ocaml/asmcomp/cmm_helpers.ml @@ -707,12 +707,9 @@ let get_header ptr dbg = loads can be marked as [Immutable], since the runtime should ensure that there is no data race on headers. This saves performance with ThreadSanitizer instrumentation by avoiding to instrument header loads. *) - Cop( -(* BACKPORT BEGIN - mk_load_immut Word_int, -*) - mk_load_mut Word_int, -(* BACKPORT END *) + Cop((if Config.runtime5 + then mk_load_immut Word_int + else mk_load_mut Word_int), [Cop(Cadda, [ptr; Cconst_int(-size_int, dbg)], dbg)], dbg) let get_header_masked ptr dbg = @@ -730,12 +727,9 @@ let get_tag ptr dbg = Cop(Cand, [get_header ptr dbg; Cconst_int (255, dbg)], dbg) else (* If byte loads are efficient *) (* Same comment as [get_header] above *) - Cop( -(* BACKPORT BEGIN - mk_load_immut Byte_unsigned, -*) - mk_load_mut Byte_unsigned, -(* BACKPORT END *) + Cop((if Config.runtime5 + then mk_load_immut Byte_unsigned + else mk_load_mut Byte_unsigned), [Cop(Cadda, [ptr; Cconst_int(tag_offset, dbg)], dbg)], dbg) let get_size ptr dbg = @@ -1012,11 +1006,9 @@ let make_alloc_generic ~mode set_fn dbg tag wordsize args = | e1::el -> Csequence(set_fn (Cvar id) (Cconst_int (idx, dbg)) e1 dbg, fill_fields (idx + 2) el) in Clet(VP.create id, -(* BACKPORT BEGIN - Cop(Cextcall("caml_alloc_shr_check_gc", typ_val, [], true), -*) - Cop(Cextcall("caml_alloc", typ_val, [], true), -(* BACKPORT END *) + Cop(Cextcall((if Config.runtime5 + then "caml_alloc_shr_check_gc" + else "caml_alloc"), typ_val, [], true), [Cconst_int (wordsize, dbg); Cconst_int (tag, dbg)], dbg), fill_fields 1 args) end @@ -2554,13 +2546,9 @@ let assignment_kind | Assignment Modify_maybe_stack, Pointer -> assert Config.stack_allocation; Caml_modify_local -(* BACKPORT BEGIN - | Heap_initialization, Pointer - | Root_initialization, Pointer -> Caml_initialize -*) | Heap_initialization, Pointer -> Caml_initialize - | Root_initialization, Pointer -> Simple Initialization -(* BACKPORT END *) + | Root_initialization, Pointer -> + if Config.runtime5 then Caml_initialize else Simple Initialization | (Assignment _), Immediate -> Simple Assignment | Heap_initialization, Immediate | Root_initialization, Immediate -> Simple Initialization diff --git a/ocaml/asmcomp/cmmgen.ml b/ocaml/asmcomp/cmmgen.ml index 5be04115dfe..c6ea3ee3c17 100644 --- a/ocaml/asmcomp/cmmgen.ml +++ b/ocaml/asmcomp/cmmgen.ml @@ -116,21 +116,22 @@ let mut_from_env env ptr = else Asttypes.Mutable | _ -> Asttypes.Mutable -(* BACKPORT (* Minimum of two [mutable_flag] values, assuming [Immutable < Mutable]. *) let min_mut x y = match x,y with - | Immutable,_ | _,Immutable -> Immutable - | Mutable,Mutable -> Mutable -*) - -(* BACKPORT BEGIN -let get_field env mut ptr n dbg = - let mut = min_mut mut (mut_from_env env ptr) in -*) -let get_field env layout ptr n dbg = - let mut = mut_from_env env ptr in -(* BACKPORT END *) + | Asttypes.Immutable, _ + | _, Asttypes.Immutable -> Asttypes.Immutable + | Asttypes.Mutable, Asttypes.Mutable -> Asttypes.Mutable + +let mut_from_lambda = function + | Lambda.Immutable -> Asttypes.Immutable + | Lambda.Immutable_unique -> Asttypes.Immutable + | Lambda.Mutable -> Asttypes.Mutable + +let get_field env mut layout ptr n dbg = + let mut = if Config.runtime5 + then min_mut (mut_from_lambda mut) (mut_from_env env ptr) + else mut_from_env env ptr in let memory_chunk = match layout with | Pvalue Pintval | Punboxed_int _ -> Word_int @@ -921,13 +922,13 @@ and transl_prim_1 env p arg dbg = Popaque -> opaque (transl env arg) dbg (* Heap operations *) - | Pfield (n, layout, _, _) -> - get_field env layout (transl env arg) n dbg + | Pfield (n, layout, _, mut) -> + get_field env mut layout (transl env arg) n dbg | Pfloatfield (n,mode) -> let ptr = transl env arg in box_float dbg mode (floatfield n ptr dbg) | Pufloatfield n -> - get_field env Punboxed_float (transl env arg) n dbg + get_field env Mutable Punboxed_float (transl env arg) n dbg | Pint_as_pointer _ -> int_as_pointer (transl env arg) dbg (* Exceptions *) diff --git a/ocaml/lambda/matching.ml b/ocaml/lambda/matching.ml index 2dc0afc2c74..62229ebd596 100644 --- a/ocaml/lambda/matching.ml +++ b/ocaml/lambda/matching.ml @@ -1925,274 +1925,281 @@ let get_mod_field modname field = | path, _ -> transl_value_path Loc_unknown env path )) -(* BACKPORT BEGIN - This is the OCaml 5 lazy implementation merged into ocaml-jst. +(* This is the OCaml 5 lazy implementation merged into ocaml-jst. *) +module Lazy5 = struct -let code_force_lazy_block = get_mod_field "CamlinternalLazy" "force_lazy_block" + (* CR ocaml 5 runtime: redefine target tags in their own file (see PR#1857) *) + let forcing_tag = 244 -let code_force_lazy = get_mod_field "CamlinternalLazy" "force_gen" + let code_force_lazy_block = get_mod_field "CamlinternalLazy" "force_lazy_block" -(* inline_lazy_force inlines the beginning of the code of Lazy.force. When - the value argument is tagged as: - - forward, take field 0 - - lazy || forcing, call the primitive that forces - - anything else, return it + let code_force_lazy = get_mod_field "CamlinternalLazy" "force_gen" - Using Lswitch below relies on the fact that the GC does not shortcut - Forward(val_out_of_heap). -*) + (* inline_lazy_force inlines the beginning of the code of Lazy.force. When + the value argument is tagged as: + - forward, take field 0 + - lazy || forcing, call the primitive that forces + - anything else, return it -let call_force_lazy_block ?(inlined = Default_inlined) varg loc ~pos = - (* The argument is wrapped with [Popaque] to prevent the rest of the compiler - from making any assumptions on its contents (see comments on - [CamlinternalLazy.force_gen], and discussions on PRs #9998 and #10909). - Alternatively, [ap_inlined] could be set to [Never_inline] to achieve a - similar result. *) - let force_fun = Lazy.force code_force_lazy_block in - Lapply - { ap_tailcall = Default_tailcall; - ap_loc = loc; - ap_func = force_fun; - ap_args = [ Lprim (Popaque Lambda.layout_lazy, [ varg ], loc) ]; - ap_result_layout = Lambda.layout_lazy_contents; - ap_region_close = pos; - ap_mode = alloc_heap; - ap_inlined = inlined; - ap_specialised = Default_specialise; - ap_probe = None; - } - -let lazy_forward_field = Lambda.Pfield (0, Pointer, Reads_vary) - -let inline_lazy_force_cond arg pos loc = - let idarg = Ident.create_local "lzarg" in - let varg = Lvar idarg in - let tag = Ident.create_local "tag" in - let test_tag t = - Lprim(Pintcomp Ceq, [Lvar tag; Lconst(Const_base(Const_int t))], loc) - in - Llet - ( Strict, - Lambda.layout_lazy, - idarg, - arg, - Llet - ( Alias, - Lambda.layout_int, - tag, - Lprim (Pccall prim_obj_tag, [ varg ], loc), - Lifthenelse - ( (* if (tag == Obj.forward_tag) then varg.(0) else ... *) - test_tag Obj.forward_tag, - Lprim (lazy_forward_field, [ varg ], loc), - Lifthenelse - ( - (* ... if tag == Obj.lazy_tag || tag == Obj.forcing_tag then - Lazy.force varg - else ... *) - Lprim (Psequor, - [test_tag Obj.lazy_tag; test_tag Obj.forcing_tag], loc), - (* nroberts: We probably don't need [Never_inlined] anymore - now that [ap_args] is opaque. *) - call_force_lazy_block ~inlined:Never_inlined varg loc ~pos, - (* ... arg *) - varg, Lambda.layout_lazy_contents), Lambda.layout_lazy_contents) ) ) - -let inline_lazy_force_switch arg pos loc = - let idarg = Ident.create_local "lzarg" in - let varg = Lvar idarg in - Llet - ( Strict, - Lambda.layout_lazy, - idarg, - arg, - Lifthenelse - ( Lprim (Pisint { variant_only = false }, [ varg ], loc), - varg, - Lswitch - ( Lprim (Pccall prim_obj_tag, [ varg ], loc), - { sw_numblocks = 0; - sw_blocks = []; - sw_numconsts = 256; - (* PR#6033 - tag ranges from 0 to 255 *) - sw_consts = - [ (Obj.forward_tag, Lprim (Pfield(0, Pointer, Reads_vary), - [ varg ], loc)); - (Obj.lazy_tag, call_force_lazy_block varg loc ~pos); - (Obj.forcing_tag, call_force_lazy_block varg loc ~pos) - ]; - sw_failaction = Some varg - }, - loc, Lambda.layout_lazy_contents), Lambda.layout_lazy_contents) ) - -let inline_lazy_force arg pos loc = - if !Clflags.afl_instrument then - (* Disable inlining optimisation if AFL instrumentation active, - so that the GC forwarding optimisation is not visible in the - instrumentation output. - (see https://github.com/stedolan/crowbar/issues/14) *) + Using Lswitch below relies on the fact that the GC does not shortcut + Forward(val_out_of_heap). + *) + + let call_force_lazy_block ?(inlined = Default_inlined) varg loc ~pos = + (* The argument is wrapped with [Popaque] to prevent the rest of the compiler + from making any assumptions on its contents (see comments on + [CamlinternalLazy.force_gen], and discussions on PRs #9998 and #10909). + Alternatively, [ap_inlined] could be set to [Never_inline] to achieve a + similar result. *) + let force_fun = Lazy.force code_force_lazy_block in Lapply { ap_tailcall = Default_tailcall; ap_loc = loc; - ap_func = Lazy.force code_force_lazy; - ap_args = [ Lconst (Const_base (Const_int 0)); arg ]; + ap_func = force_fun; + ap_args = [ Lprim (Popaque Lambda.layout_lazy, [ varg ], loc) ]; ap_result_layout = Lambda.layout_lazy_contents; ap_region_close = pos; ap_mode = alloc_heap; - (* nroberts: To make sure this wasn't inlined: - - Upstream changed [code_force_lazy] to a non-inlineable - function when compiling with AFL support. - - We just changed this to Never_inlined. - - If these two approaches are solving the same problem, we should - just converge to one. - *) - ap_inlined = Never_inlined; + ap_inlined = inlined; ap_specialised = Default_specialise; - ap_probe=None; + ap_probe = None; } - else if !Clflags.native_code && not (Clflags.is_flambda2 ()) then - (* CR vlaviron: Find a way for Flambda 2 to avoid both the call to - caml_obj_tag and the switch on arbitrary tags *) - (* Lswitch generates compact and efficient native code *) - inline_lazy_force_switch arg pos loc - else - (* generating bytecode: Lswitch would generate too many rather big - tables (~ 250 elts); conditionals are better *) - inline_lazy_force_cond arg pos loc -*) -(* BACKPORT END *) + let lazy_forward_field = Lambda.Pfield (0, Pointer, Reads_vary) + + let inline_lazy_force_cond arg pos loc = + let idarg = Ident.create_local "lzarg" in + let varg = Lvar idarg in + let tag = Ident.create_local "tag" in + let test_tag t = + Lprim(Pintcomp Ceq, [Lvar tag; Lconst(Const_base(Const_int t))], loc) + in + Llet + ( Strict, + Lambda.layout_lazy, + idarg, + arg, + Llet + ( Alias, + Lambda.layout_int, + tag, + Lprim (Pccall prim_obj_tag, [ varg ], loc), + Lifthenelse + ( (* if (tag == Obj.forward_tag) then varg.(0) else ... *) + test_tag Obj.forward_tag, + Lprim (lazy_forward_field, [ varg ], loc), + Lifthenelse + ( + (* ... if tag == Obj.lazy_tag || tag == forcing_tag then + Lazy.force varg + else ... *) + Lprim (Psequor, + [test_tag Obj.lazy_tag; test_tag forcing_tag], loc), + (* nroberts: We probably don't need [Never_inlined] anymore + now that [ap_args] is opaque. *) + call_force_lazy_block ~inlined:Never_inlined varg loc ~pos, + (* ... arg *) + varg, Lambda.layout_lazy_contents), Lambda.layout_lazy_contents) ) ) + + let inline_lazy_force_switch arg pos loc = + let idarg = Ident.create_local "lzarg" in + let varg = Lvar idarg in + Llet + ( Strict, + Lambda.layout_lazy, + idarg, + arg, + Lifthenelse + ( Lprim (Pisint { variant_only = false }, [ varg ], loc), + varg, + Lswitch + ( Lprim (Pccall prim_obj_tag, [ varg ], loc), + { sw_numblocks = 0; + sw_blocks = []; + sw_numconsts = 256; + (* PR#6033 - tag ranges from 0 to 255 *) + sw_consts = + [ (Obj.forward_tag, Lprim (Pfield(0, Pointer, Reads_vary), + [ varg ], loc)); + (Obj.lazy_tag, call_force_lazy_block varg loc ~pos); + (forcing_tag, call_force_lazy_block varg loc ~pos) + ]; + sw_failaction = Some varg + }, + loc, Lambda.layout_lazy_contents), Lambda.layout_lazy_contents) ) + + let inline_lazy_force arg pos loc = + if !Clflags.afl_instrument then + (* Disable inlining optimisation if AFL instrumentation active, + so that the GC forwarding optimisation is not visible in the + instrumentation output. + (see https://github.com/stedolan/crowbar/issues/14) *) + Lapply + { ap_tailcall = Default_tailcall; + ap_loc = loc; + ap_func = Lazy.force code_force_lazy; + ap_args = [ Lconst (Const_base (Const_int 0)); arg ]; + ap_result_layout = Lambda.layout_lazy_contents; + ap_region_close = pos; + ap_mode = alloc_heap; + (* nroberts: To make sure this wasn't inlined: + - Upstream changed [code_force_lazy] to a non-inlineable + function when compiling with AFL support. + - We just changed this to Never_inlined. + + If these two approaches are solving the same problem, we should + just converge to one. + *) + ap_inlined = Never_inlined; + ap_specialised = Default_specialise; + ap_probe=None; + } + else if !Clflags.native_code && not (Clflags.is_flambda2 ()) then + (* CR vlaviron: Find a way for Flambda 2 to avoid both the call to + caml_obj_tag and the switch on arbitrary tags *) + (* Lswitch generates compact and efficient native code *) + inline_lazy_force_switch arg pos loc + else + (* generating bytecode: Lswitch would generate too many rather big + tables (~ 250 elts); conditionals are better *) + inline_lazy_force_cond arg pos loc +end + +(* CR ocaml 5 runtime: delete the old implementation *) (* This is the OCaml 4 implementation of lazy with a tweak to the Pfield occurrence in lazy_forward_field (to add "Pointer"). *) +module Lazy4 = struct -let code_force_lazy_block = get_mod_field "CamlinternalLazy" "force_lazy_block" + let code_force_lazy_block = get_mod_field "CamlinternalLazy" "force_lazy_block" -let code_force_lazy = get_mod_field "CamlinternalLazy" "force" + let code_force_lazy = get_mod_field "CamlinternalLazy" "force" -(* inline_lazy_force inlines the beginning of the code of Lazy.force. When - the value argument is tagged as: - - forward, take field 0 - - lazy, call the primitive that forces (without testing again the tag) - - anything else, return it - - Using Lswitch below relies on the fact that the GC does not shortcut - Forward(val_out_of_heap). -*) + (* inline_lazy_force inlines the beginning of the code of Lazy.force. When + the value argument is tagged as: + - forward, take field 0 + - lazy, call the primitive that forces (without testing again the tag) + - anything else, return it -let lazy_forward_field = Lambda.Pfield (0, Pointer, Reads_vary) + Using Lswitch below relies on the fact that the GC does not shortcut + Forward(val_out_of_heap). + *) -let inline_lazy_force_cond arg pos loc = - let idarg = Ident.create_local "lzarg" in - let varg = Lvar idarg in - let tag = Ident.create_local "tag" in - let tag_var = Lvar tag in - let force_fun = Lazy.force code_force_lazy_block in - Llet - ( Strict, - Lambda.layout_lazy, - idarg, - arg, - Llet - ( Alias, - Lambda.layout_int, - tag, - Lprim (Pccall prim_obj_tag, [ varg ], loc), - Lifthenelse - (* if (tag == Obj.forward_tag) then varg.(0) else ... *) - ( Lprim - ( Pintcomp Ceq, - [ tag_var; Lconst (Const_base (Const_int Obj.forward_tag)) ], - loc ), - Lprim (lazy_forward_field, [ varg ], loc), - Lifthenelse - (* if (tag == Obj.lazy_tag) then Lazy.force varg else ... *) - ( Lprim - ( Pintcomp Ceq, - [ tag_var; Lconst (Const_base (Const_int Obj.lazy_tag)) ], - loc ), - Lapply - { ap_tailcall = Default_tailcall; - ap_loc = loc; - ap_func = force_fun; - ap_args = [ varg ]; - ap_result_layout = Lambda.layout_lazy_contents; - ap_region_close = pos; - ap_mode = alloc_heap; - ap_inlined = Never_inlined; - ap_specialised = Default_specialise; - ap_probe=None - }, - (* ... arg *) - varg, Lambda.layout_lazy_contents), Lambda.layout_lazy_contents) ) ) - -let inline_lazy_force_switch arg pos loc = - let idarg = Ident.create_local "lzarg" in - let varg = Lvar idarg in - let force_fun = Lazy.force code_force_lazy_block in - Llet - ( Strict, - Lambda.layout_lazy, - idarg, - arg, - Lifthenelse - ( Lprim (Pisint { variant_only = false }, [ varg ], loc), - varg, - Lswitch - ( varg, - { sw_numconsts = 0; - sw_consts = []; - sw_numblocks = 256; - (* PR#6033 - tag ranges from 0 to 255 *) - sw_blocks = - [ ( Obj.forward_tag, - Lprim (lazy_forward_field, [ varg ], loc) ); - ( Obj.lazy_tag, - Lapply - { ap_tailcall = Default_tailcall; - ap_loc = loc; - ap_func = force_fun; - ap_args = [ varg ]; - ap_result_layout = Lambda.layout_lazy_contents; - ap_region_close = pos; - ap_mode = alloc_heap; - ap_inlined = Default_inlined; - ap_specialised = Default_specialise; - ap_probe=None; - } ) - ]; - sw_failaction = Some varg - }, - loc, Lambda.layout_lazy_contents), Lambda.layout_lazy_contents) ) - -let inline_lazy_force arg pos loc = - if !Clflags.afl_instrument then - (* Disable inlining optimisation if AFL instrumentation active, - so that the GC forwarding optimisation is not visible in the - instrumentation output. - (see https://github.com/stedolan/crowbar/issues/14) *) - Lapply - { ap_tailcall = Default_tailcall; - ap_loc = loc; - ap_func = Lazy.force code_force_lazy; - ap_args = [ arg ]; - ap_result_layout = Lambda.layout_lazy_contents; - ap_region_close = pos; - ap_mode = alloc_heap; - ap_inlined = Never_inlined; - ap_specialised = Default_specialise; - ap_probe=None; - } - else if !Clflags.native_code && not (Clflags.is_flambda2 ()) then - (* CR vlaviron: Find a way for Flambda 2 to avoid both the call to - caml_obj_tag and the switch on arbitrary tags *) - (* Lswitch generates compact and efficient native code *) - inline_lazy_force_switch arg pos loc - else - (* generating bytecode: Lswitch would generate too many rather big - tables (~ 250 elts); conditionals are better *) - inline_lazy_force_cond arg pos loc + let lazy_forward_field = Lambda.Pfield (0, Pointer, Reads_vary) + + let inline_lazy_force_cond arg pos loc = + let idarg = Ident.create_local "lzarg" in + let varg = Lvar idarg in + let tag = Ident.create_local "tag" in + let tag_var = Lvar tag in + let force_fun = Lazy.force code_force_lazy_block in + Llet + ( Strict, + Lambda.layout_lazy, + idarg, + arg, + Llet + ( Alias, + Lambda.layout_int, + tag, + Lprim (Pccall prim_obj_tag, [ varg ], loc), + Lifthenelse + (* if (tag == Obj.forward_tag) then varg.(0) else ... *) + ( Lprim + ( Pintcomp Ceq, + [ tag_var; Lconst (Const_base (Const_int Obj.forward_tag)) ], + loc ), + Lprim (lazy_forward_field, [ varg ], loc), + Lifthenelse + (* if (tag == Obj.lazy_tag) then Lazy.force varg else ... *) + ( Lprim + ( Pintcomp Ceq, + [ tag_var; Lconst (Const_base (Const_int Obj.lazy_tag)) ], + loc ), + Lapply + { ap_tailcall = Default_tailcall; + ap_loc = loc; + ap_func = force_fun; + ap_args = [ varg ]; + ap_result_layout = Lambda.layout_lazy_contents; + ap_region_close = pos; + ap_mode = alloc_heap; + ap_inlined = Never_inlined; + ap_specialised = Default_specialise; + ap_probe=None + }, + (* ... arg *) + varg, Lambda.layout_lazy_contents), Lambda.layout_lazy_contents) ) ) + + let inline_lazy_force_switch arg pos loc = + let idarg = Ident.create_local "lzarg" in + let varg = Lvar idarg in + let force_fun = Lazy.force code_force_lazy_block in + Llet + ( Strict, + Lambda.layout_lazy, + idarg, + arg, + Lifthenelse + ( Lprim (Pisint { variant_only = false }, [ varg ], loc), + varg, + Lswitch + ( varg, + { sw_numconsts = 0; + sw_consts = []; + sw_numblocks = 256; + (* PR#6033 - tag ranges from 0 to 255 *) + sw_blocks = + [ ( Obj.forward_tag, + Lprim (lazy_forward_field, [ varg ], loc) ); + ( Obj.lazy_tag, + Lapply + { ap_tailcall = Default_tailcall; + ap_loc = loc; + ap_func = force_fun; + ap_args = [ varg ]; + ap_result_layout = Lambda.layout_lazy_contents; + ap_region_close = pos; + ap_mode = alloc_heap; + ap_inlined = Default_inlined; + ap_specialised = Default_specialise; + ap_probe=None; + } ) + ]; + sw_failaction = Some varg + }, + loc, Lambda.layout_lazy_contents), Lambda.layout_lazy_contents) ) + + let inline_lazy_force arg pos loc = + if !Clflags.afl_instrument then + (* Disable inlining optimisation if AFL instrumentation active, + so that the GC forwarding optimisation is not visible in the + instrumentation output. + (see https://github.com/stedolan/crowbar/issues/14) *) + Lapply + { ap_tailcall = Default_tailcall; + ap_loc = loc; + ap_func = Lazy.force code_force_lazy; + ap_args = [ arg ]; + ap_result_layout = Lambda.layout_lazy_contents; + ap_region_close = pos; + ap_mode = alloc_heap; + ap_inlined = Never_inlined; + ap_specialised = Default_specialise; + ap_probe=None; + } + else if !Clflags.native_code && not (Clflags.is_flambda2 ()) then + (* CR vlaviron: Find a way for Flambda 2 to avoid both the call to + caml_obj_tag and the switch on arbitrary tags *) + (* Lswitch generates compact and efficient native code *) + inline_lazy_force_switch arg pos loc + else + (* generating bytecode: Lswitch would generate too many rather big + tables (~ 250 elts); conditionals are better *) + inline_lazy_force_cond arg pos loc +end +let inline_lazy_force = + if Config.runtime5 then Lazy5.inline_lazy_force else Lazy4.inline_lazy_force (* End of lazy implementations. *) diff --git a/ocaml/ocamltest/dune b/ocaml/ocamltest/dune index d586c0ee523..7018d36af35 100644 --- a/ocaml/ocamltest/dune +++ b/ocaml/ocamltest/dune @@ -50,7 +50,7 @@ (rule (targets empty.ml) (deps - (source_tree ../runtime4/caml)) + (source_tree ../%{env:RUNTIME_DIR=runtime-dir-env-var-not-set}/caml)) (action (write-file %{targets} "(* hack *)"))) @@ -58,7 +58,7 @@ (name main) (modes byte) (flags - (:standard -principal -cclib "-I../runtime4")) + (:standard -principal -cclib "-I../%{env:RUNTIME_DIR=runtime-dir-env-var-not-set}")) (modules options main) (libraries ocamltest_core_and_plugin)) diff --git a/ocaml/ocamltest/ocaml_directories.ml b/ocaml/ocamltest/ocaml_directories.ml index e00e32eecf0..2db89eaea21 100644 --- a/ocaml/ocamltest/ocaml_directories.ml +++ b/ocaml/ocamltest/ocaml_directories.ml @@ -30,7 +30,8 @@ let toplevel = Filename.make_path [srcdir; "toplevel"] let runtime = - Filename.make_path [srcdir; "runtime4"] + let suffix = if Config.runtime5 then "" else "4" in + Filename.make_path [srcdir; "runtime" ^ suffix] let tools = Filename.make_path [srcdir; "tools"] diff --git a/ocaml/ocamltest/ocaml_files.ml b/ocaml/ocamltest/ocaml_files.ml index 7accd1633ed..7c657c7d6a0 100644 --- a/ocaml/ocamltest/ocaml_files.ml +++ b/ocaml/ocamltest/ocaml_files.ml @@ -34,7 +34,9 @@ let ocamlrun = | Debug -> "ocamlrund" | Instrumented -> "ocamlruni" in let ocamlrunfile = Filename.mkexe runtime in - Filename.make_path [Ocaml_directories.srcdir; "runtime4"; ocamlrunfile] + let suffix = if Config.runtime5 then "" else "4" in + Filename.make_path [Ocaml_directories.srcdir; "runtime" ^ suffix; + ocamlrunfile] let ocamlc = Filename.make_path [Ocaml_directories.srcdir; Filename.mkexe "ocamlc"] diff --git a/ocaml/otherlibs/systhreads/dune b/ocaml/otherlibs/systhreads/dune index 68028fa05a6..3738b645d5f 100644 --- a/ocaml/otherlibs/systhreads/dune +++ b/ocaml/otherlibs/systhreads/dune @@ -17,17 +17,7 @@ (install (files - (byte/threads.cma as threads/threads.cma) - (native/threadsnat.cmxa as threads/threads.cmxa) - (native/threadsnat.a as threads/threads.a) - (byte/libthreads_stubs.a as libthreads_stubs.a) - (byte/dllthreads_stubs.so as stublibs/dllthreads_stubs.so) - (native/libthreadsnat_stubs.a as libthreadsnat_stubs.a) - (native/libthreadsnat_stubs.a as libthreadsnat_stubs_native.a) ; for special_dune compat (thread.mli as threads/thread.mli) - (threads.h as caml/threads.h) - (native/.threadsnat.objs/native/thread.cmx as threads/thread.cmx) - (byte/.threads.objs/byte/thread.cmi as threads/thread.cmi) - (byte/.threads.objs/byte/thread.cmti as threads/thread.cmti)) + (threads.h as caml/threads.h)) (section lib) (package ocaml)) diff --git a/ocaml/otherlibs/unix/signals.c b/ocaml/otherlibs/unix/signals.c index 38fc5c22112..f44fa3fca9a 100644 --- a/ocaml/otherlibs/unix/signals.c +++ b/ocaml/otherlibs/unix/signals.c @@ -75,11 +75,9 @@ CAMLprim value caml_unix_sigprocmask(value vaction, value vset) decode_sigset(vset, &set); caml_enter_blocking_section(); #ifdef CAML_RUNTIME_5 - // CR ocaml 5 runtime: the upstream 5.0 unix lib uses sigprocmask here, - // which seems wrong? Previously, there was a global caml_sigmask_hook wrapper - // that got installed as sigprocmask or pthread_sigmask based on whether - // systhreads was enabled. The 5 runtime is now multithreaded, so always - // links pthread, so should always use pthread_sigmask. + // Differs from upstream at the point we branched, but this PR + // changes the behaviour to what we have here: + // https://github.com/ocaml/ocaml/pull/12743 retcode = pthread_sigmask(how, &set, &oldset); #else retcode = caml_sigmask_hook(how, &set, &oldset); diff --git a/ocaml/runtime/alloc.c b/ocaml/runtime/alloc.c index 37e285ac107..c401851fd0f 100644 --- a/ocaml/runtime/alloc.c +++ b/ocaml/runtime/alloc.c @@ -312,7 +312,7 @@ CAMLprim value caml_alloc_dummy_infix(value vsize, value voffset) block contains no pointers into the heap. However, the block cannot be marshaled or hashed, because not all closinfo fields and infix header fields are correctly initialized. */ - Closinfo_val(v) = Make_closinfo(0, wosize); + Closinfo_val(v) = Make_closinfo(0, wosize, 1); if (offset > 0) { v += Bsize_wsize(offset); (((header_t *) (v)) [-1]) = Make_header(offset, Infix_tag, 0); diff --git a/ocaml/runtime/amd64.S b/ocaml/runtime/amd64.S index 8dc9f81ec68..9a9326d7d2d 100644 --- a/ocaml/runtime/amd64.S +++ b/ocaml/runtime/amd64.S @@ -1054,14 +1054,14 @@ G(caml_system__code_end): .globl G(caml_system.frametable) .align EIGHT_ALIGN G(caml_system.frametable): - .quad 2 /* two descriptors */ - .quad LBL(108) /* return address into callback */ - .value -1 /* negative frame size => use callback link */ - .value 0 /* no roots here */ + .quad 2 /* two descriptors */ + .4byte LBL(108) - . /* return address into callback */ + .value -1 /* negative frame size => use callback link */ + .value 0 /* no roots here */ .align EIGHT_ALIGN - .quad LBL(frame_runstack) /* return address into fiber_val_handler */ - .value -1 /* negative frame size => use callback link */ - .value 0 /* no roots here */ + .4byte LBL(frame_runstack) - . /* return address into fiber_val_handler */ + .value -1 /* negative frame size => use callback link */ + .value 0 /* no roots here */ #if defined(SYS_macosx) .literal16 diff --git a/ocaml/runtime/amd64nt.asm b/ocaml/runtime/amd64nt.asm index 3aca2fca77a..4a82484c101 100644 --- a/ocaml/runtime/amd64nt.asm +++ b/ocaml/runtime/amd64nt.asm @@ -429,10 +429,10 @@ caml_system__code_end: .DATA PUBLIC caml_system.frametable caml_system.frametable LABEL QWORD - QWORD 1 ; one descriptor - QWORD L107 ; return address into callback - WORD -1 ; negative frame size => use callback link - WORD 0 ; no roots here + QWORD 1 ; one descriptor + DWORD L107 - THIS BYTE ; return address into callback + WORD -1 ; negative frame size => use callback link + WORD 0 ; no roots here ALIGN 8 PUBLIC caml_negf_mask diff --git a/ocaml/runtime/arm64.S b/ocaml/runtime/arm64.S index 361d56fb943..fc7e303f4ff 100644 --- a/ocaml/runtime/arm64.S +++ b/ocaml/runtime/arm64.S @@ -923,14 +923,14 @@ G(caml_system__code_end): /* GC roots for callback */ OBJECT(caml_system.frametable) - .quad 2 /* two descriptors */ - .quad L(caml_retaddr) /* return address into callback */ - .short -1 /* negative frame size => use callback link */ - .short 0 /* no roots */ + .quad 2 /* two descriptors */ + .4byte L(caml_retaddr) - . /* return address into callback */ + .short -1 /* negative frame size => use callback link */ + .short 0 /* no roots */ .align 3 - .quad L(frame_runstack) /* return address into fiber handler */ - .short -1 /* negative frame size => use callback link */ - .short 0 /* no roots here */ + .4byte L(frame_runstack) - . /* return address into fiber handler */ + .short -1 /* negative frame size => use callback link */ + .short 0 /* no roots here */ .align 3 END_OBJECT(caml_system.frametable) diff --git a/ocaml/runtime/array.c b/ocaml/runtime/array.c index 5a850b49442..7423467070e 100644 --- a/ocaml/runtime/array.c +++ b/ocaml/runtime/array.c @@ -593,3 +593,39 @@ CAMLprim value caml_array_fill(value array, } return Val_unit; } + +/* Linker compatibility with stdlib externals + CR ocaml 5 runtime: implement locals + CR ocaml 5 runtime: implement iarrays */ + +CAMLprim value caml_array_concat_local(value al) +{ + caml_failwith("Called caml_array_concat_local in runtime5: not implemented."); +} + +CAMLprim value caml_array_sub_local(value al, value a, value b) +{ + caml_failwith("Called caml_array_sub_local in runtime5: not implemented."); +} + +CAMLprim value caml_make_local_vect(value i, value a) +{ + caml_failwith("Called caml_array_make_local_vect in runtime5: not implemented."); +} + +CAMLprim value caml_array_append_local(value a1, value a2) +{ + caml_failwith("Called caml_array_append_local in runtime5: not implemented."); +} + +CAMLprim value caml_iarray_of_array(value a) +{ + return a; +} + +extern value caml_obj_dup(value); + +CAMLprim value caml_array_of_iarray(value a) +{ + return caml_obj_dup(a); +} diff --git a/ocaml/runtime/caml/frame_descriptors.h b/ocaml/runtime/caml/frame_descriptors.h index 71142a55504..b0d4f65d444 100644 --- a/ocaml/runtime/caml/frame_descriptors.h +++ b/ocaml/runtime/caml/frame_descriptors.h @@ -60,7 +60,7 @@ #define FRAME_RETURN_TO_C 0xFFFF typedef struct { - uintnat retaddr; + int32_t retaddr_rel; /* offset of return address from &retaddr_rel */ uint16_t frame_data; /* frame size and various flags */ uint16_t num_live; uint16_t live_ofs[1 /* num_live */]; @@ -105,6 +105,10 @@ Caml_inline bool frame_has_debug(frame_descr *d) { #define Hash_retaddr(addr, mask) \ (((uintnat)(addr) >> 3) & (mask)) +#define Retaddr_frame(d) \ + ((uintnat)&(d)->retaddr_rel + \ + (uintnat)(intnat)((d)->retaddr_rel)) + void caml_init_frame_descriptors(void); void caml_register_frametables(void **tables, int ntables); diff --git a/ocaml/runtime/caml/intext.h b/ocaml/runtime/caml/intext.h index 53138aea660..d10ea0e1fb8 100644 --- a/ocaml/runtime/caml/intext.h +++ b/ocaml/runtime/caml/intext.h @@ -98,6 +98,7 @@ #define OLD_CODE_CUSTOM 0x12 // no longer supported #define CODE_CUSTOM_LEN 0x18 #define CODE_CUSTOM_FIXED 0x19 +#define CODE_UNBOXED_INT64 0x1a #if ARCH_FLOAT_ENDIANNESS == 0x76543210 #define CODE_DOUBLE_NATIVE CODE_DOUBLE_BIG diff --git a/ocaml/runtime/caml/mlvalues.h b/ocaml/runtime/caml/mlvalues.h index da3a46da4f9..12861136318 100644 --- a/ocaml/runtime/caml/mlvalues.h +++ b/ocaml/runtime/caml/mlvalues.h @@ -276,21 +276,29 @@ Caml_inline void* Ptr_val(value val) #define Code_val(val) (((code_t *) (val)) [0]) /* Also an l-value. */ #define Closinfo_val(val) Field((val), 1) /* Arity and start env */ /* In the closure info field, the top 8 bits are the arity (signed). + The next least significant bit is set iff the current closure is the + last one to occur in the block. (This is used in the compactor.) The low bit is set to one, to look like an integer. - The remaining bits are the field number for the first word of the - environment, or, in other words, the offset (in words) from the closure - to the environment part. */ + The remaining bits are the field number for the first word of the scannable + part of the environment, or, in other words, the offset (in words) from the + closure to the scannable part of the environment. + The non-scannable part of the environment lives between the end of the + last closure and the start of the scannable environment within the block. */ /* CR ncourant: it might be cleaner to use a packed struct here */ #ifdef ARCH_SIXTYFOUR #define Arity_closinfo(info) ((intnat)(info) >> 56) -#define Start_env_closinfo(info) (((uintnat)(info) << 8) >> 9) -#define Make_closinfo(arity,delta) \ - (((uintnat)(arity) << 56) + ((uintnat)(delta) << 1) + 1) +#define Start_env_closinfo(info) (((uintnat)(info) << 9) >> 10) +#define Is_last_closinfo(info) (((uintnat)(info) << 8) >> 63) +#define Make_closinfo(arity,delta,is_last) \ + (((uintnat)(arity) << 56) + ((uintnat)(is_last) << 55) \ + + ((uintnat)(delta) << 1) + 1) #else #define Arity_closinfo(info) ((intnat)(info) >> 24) -#define Start_env_closinfo(info) (((uintnat)(info) << 8) >> 9) -#define Make_closinfo(arity,delta) \ - (((uintnat)(arity) << 24) + ((uintnat)(delta) << 1) + 1) +#define Start_env_closinfo(info) (((uintnat)(info) << 9) >> 10) +#define Is_last_closinfo(info) (((uintnat)(info) << 8) >> 31) +#define Make_closinfo(arity,delta,is_last) \ + (((uintnat)(arity) << 24) + ((uintnat)(is_last) << 23) \ + + ((uintnat)(delta) << 1) + 1) #endif /* This tag is used (with Forcing_tag & Forward_tag) to implement lazy values. diff --git a/ocaml/runtime/caml/stack.h b/ocaml/runtime/caml/stack.h index d595abd0daf..8cb955916a5 100644 --- a/ocaml/runtime/caml/stack.h +++ b/ocaml/runtime/caml/stack.h @@ -47,6 +47,7 @@ /* Size of the gc_regs structure, in words. See amd64.S and amd64/proc.ml for the indices */ #define Wosize_gc_regs (13 /* int regs */ + 16 /* float regs */) +/* CR ocaml 5 runtime (mshinwell): does Wosize_gc_regs need updating for SIMD? */ #define Saved_return_address(sp) *((intnat *)((sp) - 8)) #ifdef WITH_FRAME_POINTERS #define Pop_frame_pointer(sp) (sp) += sizeof(value) diff --git a/ocaml/runtime/extern.c b/ocaml/runtime/extern.c index 465c8319cd5..5043de4007f 100644 --- a/ocaml/runtime/extern.c +++ b/ocaml/runtime/extern.c @@ -594,6 +594,15 @@ Caml_inline void extern_int(struct caml_extern_state* s, intnat n) } } +Caml_inline void extern_unboxed_int(struct caml_extern_state* s, intnat n) +{ + if (s->extern_flags & COMPAT_32) + extern_failwith(s, + "output_value: cannot marshal unboxed values on 32 bit"); + + writecode64(s, CODE_UNBOXED_INT64, n); +} + /* Marshaling references to previously-marshaled blocks */ Caml_inline void extern_shared_reference(struct caml_extern_state* s, @@ -750,7 +759,7 @@ static void extern_code_pointer(struct caml_extern_state* s, char * codeptr) } } -/* Marshaling the non-environment part of closures */ +/* Marshaling the non-scanned-environment part of closures */ Caml_inline mlsize_t extern_closure_up_to_env(struct caml_extern_state* s, value v) @@ -772,8 +781,12 @@ Caml_inline mlsize_t extern_closure_up_to_env(struct caml_extern_state* s, if (Arity_closinfo(info) != 0 && Arity_closinfo(info) != 1) { extern_code_pointer(s, (char *) Field(v, i++)); } - } while (i < startenv); - CAMLassert(i == startenv); + } while (!Is_last_closinfo(info)); + CAMLassert(i <= startenv); + /* The non-scanned part of the environment */ + while (i < startenv) { + extern_unboxed_int(s, Field(v, i++)); + } return startenv; } diff --git a/ocaml/runtime/frame_descriptors.c b/ocaml/runtime/frame_descriptors.c index 1c2169c0f44..315cbbbe2ab 100644 --- a/ocaml/runtime/frame_descriptors.c +++ b/ocaml/runtime/frame_descriptors.c @@ -35,7 +35,7 @@ extern intnat * caml_frametable[]; static frame_descr * next_frame_descr(frame_descr * d) { unsigned char num_allocs = 0, *p; - CAMLassert(d->retaddr >= 4096); + CAMLassert(Retaddr_frame(d) >= 4096); if (!frame_return_to_C(d)) { /* Skip to end of live_ofs */ p = (unsigned char*)&d->live_ofs[d->num_live]; @@ -95,7 +95,7 @@ static void fill_hashtable( intnat len = *tbl; frame_descr * d = (frame_descr *)(tbl + 1); for (intnat j = 0; j < len; j++) { - uintnat h = Hash_retaddr(d->retaddr, table->mask); + uintnat h = Hash_retaddr(Retaddr_frame(d), table->mask); while (table->descriptors[h] != NULL) { h = (h+1) & table->mask; } @@ -224,7 +224,7 @@ frame_descr* caml_find_frame_descr(caml_frame_descrs fds, uintnat pc) while (1) { d = fds.descriptors[h]; if (d == 0) return NULL; /* can happen if some code compiled without -g */ - if (d->retaddr == pc) break; + if (Retaddr_frame(d) == pc) break; h = (h+1) & fds.mask; } return d; diff --git a/ocaml/runtime/globroots.c b/ocaml/runtime/globroots.c index e55e23eface..17a1eb99b0a 100644 --- a/ocaml/runtime/globroots.c +++ b/ocaml/runtime/globroots.c @@ -187,11 +187,39 @@ void caml_register_dyn_globals(void **globals, int nglobals) { caml_plat_unlock(&roots_mutex); } +/* Logic to determine at which index within a global root to start + scanning. [*glob_block] and [*start] may be updated by this function. */ +static void compute_index_for_global_root_scan(value* glob_block, int* start) +{ + *start = 0; + + CAMLassert (Is_block(*glob_block)); + + if (Tag_val(*glob_block) < No_scan_tag) { + /* Note: if a [Closure_tag] block is registered as a global root + (possibly containing one or more [Infix_tag] blocks), then only one + out of the combined set of the [Closure_tag] and [Infix_tag] blocks + may be registered as a global root. Multiple registrations can cause + the compactor to traverse the same fields of a block twice, which can + cause a failure. */ + if (Tag_val(*glob_block) == Infix_tag) + *glob_block -= Infix_offset_val(*glob_block); + if (Tag_val(*glob_block) == Closure_tag) + *start = Start_env_closinfo(Closinfo_val(*glob_block)); + } + else { + /* Set the index such that none of the block's fields will be scanned. */ + *start = Wosize_val(*glob_block); + } +} + static void scan_native_globals(scanning_action f, void* fdata) { int i, j; static link* dyn_globals; value* glob; + value glob_block; + int start; link* lnk; caml_plat_lock(&roots_mutex); @@ -201,8 +229,10 @@ static void scan_native_globals(scanning_action f, void* fdata) /* The global roots */ for (i = 0; caml_globals[i] != 0; i++) { for(glob = caml_globals[i]; *glob != 0; glob++) { - for (j = 0; j < Wosize_val(*glob); j++){ - f(fdata, Field(*glob, j), &Field(*glob, j)); + glob_block = *glob; + compute_index_for_global_root_scan(&glob_block, &start); + for (j = start; j < Wosize_val(glob_block); j++) { + f(fdata, Field(glob_block, j), &Field(glob_block, j)); } } } @@ -210,8 +240,10 @@ static void scan_native_globals(scanning_action f, void* fdata) /* Dynamic (natdynlink) global roots */ iter_list(dyn_globals, lnk) { for(glob = (value *) lnk->data; *glob != 0; glob++) { - for (j = 0; j < Wosize_val(*glob); j++){ - f(fdata, Field(*glob, j), &Field(*glob, j)); + glob_block = *glob; + compute_index_for_global_root_scan(&glob_block, &start); + for (j = start; j < Wosize_val(glob_block); j++) { + f(fdata, Field(glob_block, j), &Field(glob_block, j)); } } } diff --git a/ocaml/runtime/intern.c b/ocaml/runtime/intern.c index ad28f61c5e4..87d5d346e97 100644 --- a/ocaml/runtime/intern.c +++ b/ocaml/runtime/intern.c @@ -523,6 +523,15 @@ static void intern_rec(struct caml_intern_state* s, intern_cleanup(s); caml_failwith("input_value: integer too large"); break; +#endif + case CODE_UNBOXED_INT64: +#ifdef ARCH_SIXTYFOUR + v = (intnat) (read64u(s)); + break; +#else + intern_cleanup(); + caml_failwith("input_value: CODE_UNBOXED_INT64 not supported on 32 bit"); + break; #endif case CODE_SHARED8: ofs = read8u(s); diff --git a/ocaml/runtime/interp.c b/ocaml/runtime/interp.c index 92f6dd7d3eb..f140132da08 100644 --- a/ocaml/runtime/interp.c +++ b/ocaml/runtime/interp.c @@ -300,7 +300,7 @@ value caml_interprete(code_t prog, asize_t prog_size) raise_unhandled_effect_closure = caml_alloc_small (2, Closure_tag); Code_val(raise_unhandled_effect_closure) = (code_t)raise_unhandled_effect_code; - Closinfo_val(raise_unhandled_effect_closure) = Make_closinfo(0, 2); + Closinfo_val(raise_unhandled_effect_closure) = Make_closinfo(0, 2, 1); raise_unhandled_effect = raise_unhandled_effect_closure; caml_register_generational_global_root(&raise_unhandled_effect); caml_global_data = Val_unit; @@ -624,7 +624,7 @@ value caml_interprete(code_t prog, asize_t prog_size) Field(accu, 2) = env; for (i = 0; i < num_args; i++) Field(accu, i + 3) = sp[i]; Code_val(accu) = pc - 3; /* Point to the preceding RESTART instr. */ - Closinfo_val(accu) = Make_closinfo(0, 2); + Closinfo_val(accu) = Make_closinfo(0, 2, 1); sp += num_args; goto do_return; } @@ -648,7 +648,7 @@ value caml_interprete(code_t prog, asize_t prog_size) /* The code pointer is not in the heap, so no need to go through caml_initialize. */ Code_val(accu) = pc + *pc; - Closinfo_val(accu) = Make_closinfo(0, 2); + Closinfo_val(accu) = Make_closinfo(0, 2, 1); pc++; sp += nvars; Next; @@ -680,13 +680,14 @@ value caml_interprete(code_t prog, asize_t prog_size) *--sp = accu; p = &Field(accu, 0); *p++ = (value) (pc + pc[0]); - *p++ = Make_closinfo(0, envofs); + *p++ = Make_closinfo(0, envofs, nfuncs < 2); for (i = 1; i < nfuncs; i++) { *p++ = Make_header(i * 3, Infix_tag, 0); /* color irrelevant */ *--sp = (value) p; *p++ = (value) (pc + pc[i]); envofs -= 3; - *p++ = Make_closinfo(0, envofs); + CAMLassert(i <= nfuncs - 1); + *p++ = Make_closinfo(0, envofs, i == nfuncs - 1); } pc += nfuncs; Next; diff --git a/ocaml/runtime/memory.c b/ocaml/runtime/memory.c index 83cd540ecf5..2d56089ad38 100644 --- a/ocaml/runtime/memory.c +++ b/ocaml/runtime/memory.c @@ -244,6 +244,13 @@ CAMLexport int caml_atomic_cas_field ( } } +CAMLprim value caml_atomic_make(value v) +{ + CAMLparam1(v); + value ref = caml_alloc_small(1, 0); + Field(ref, 0) = v; + CAMLreturn(ref); +} CAMLprim value caml_atomic_load (value ref) { diff --git a/ocaml/runtime/meta.c b/ocaml/runtime/meta.c index 70bf854da78..3602919a065 100644 --- a/ocaml/runtime/meta.c +++ b/ocaml/runtime/meta.c @@ -121,7 +121,7 @@ CAMLprim value caml_reify_bytecode(value ls_prog, clos = caml_alloc_small (2, Closure_tag); Code_val(clos) = (code_t) prog; - Closinfo_val(clos) = Make_closinfo(0, 2); + Closinfo_val(clos) = Make_closinfo(0, 2, 1); bytecode = caml_alloc_small (2, Abstract_tag); Bytecode_val(bytecode)->prog = prog; Bytecode_val(bytecode)->len = len; diff --git a/ocaml/runtime/obj.c b/ocaml/runtime/obj.c index dd91d9426ff..bbd78ee750d 100644 --- a/ocaml/runtime/obj.c +++ b/ocaml/runtime/obj.c @@ -108,7 +108,7 @@ CAMLprim value caml_obj_block(value tag, value size) /* Closinfo_val is the second field, so we need size at least 2 */ if (sz < 2) caml_invalid_argument ("Obj.new_block"); res = caml_alloc(sz, tg); - Closinfo_val(res) = Make_closinfo(0, 2); /* does not allocate */ + Closinfo_val(res) = Make_closinfo(0, 2, 1); /* does not allocate */ break; } case String_tag: { diff --git a/ocaml/runtime/power.S b/ocaml/runtime/power.S index 68486cb7dc8..30dbee60bcc 100644 --- a/ocaml/runtime/power.S +++ b/ocaml/runtime/power.S @@ -649,7 +649,7 @@ caml_system__code_end: .type caml_system.frametable, @object caml_system.frametable: datag 1 /* one descriptor */ - datag .L105 + 4 /* return address into callback */ + .long .L105 + 4 - . /* return address into callback */ .short -1 /* negative size count => use callback link */ .short 0 /* no roots here */ diff --git a/ocaml/runtime/riscv.S b/ocaml/runtime/riscv.S index c02e437adda..01f4ee74b5c 100644 --- a/ocaml/runtime/riscv.S +++ b/ocaml/runtime/riscv.S @@ -834,13 +834,13 @@ caml_system__code_end: /* GC roots for callback */ OBJECT(caml_system.frametable) - .quad 2 /* two descriptors */ - .quad L(caml_retaddr) /* return address into callback */ - .short -1 /* negative frame size => use callback link */ - .short 0 /* no roots */ + .quad 2 /* two descriptors */ + .4byte L(caml_retaddr) - . /* return address into callback */ + .short -1 /* negative frame size => use callback link */ + .short 0 /* no roots */ .align 3 - .quad L(frame_runstack) /* return address into fiber handler */ - .short -1 /* negative frame size => use callback link */ - .short 0 /* no roots */ + .4byte L(frame_runstack) - . /* return address into fiber handler */ + .short -1 /* negative frame size => use callback link */ + .short 0 /* no roots */ .align 3 END_OBJECT(caml_system.frametable) diff --git a/ocaml/runtime/runtime_events.c b/ocaml/runtime/runtime_events.c index 1e5e141c0f1..1939e598850 100644 --- a/ocaml/runtime/runtime_events.c +++ b/ocaml/runtime/runtime_events.c @@ -832,3 +832,15 @@ CAMLprim value caml_runtime_events_user_resolve( CAMLdrop; return (value) Val_none; } + +/* Linker compatibility with unused 4 stdlib externals */ + +CAMLprim value caml_eventlog_resume(value v) +{ + caml_failwith("Called caml_eventlog_resume in runtime5: not supported."); +} + +CAMLprim value caml_eventlog_pause(value v) +{ + caml_failwith("Called caml_eventlog_pause in runtime5: not supported."); +} diff --git a/ocaml/runtime4/caml/domain_state.tbl b/ocaml/runtime4/caml/domain_state.tbl index 107d48bb71f..34b46ba8513 100644 --- a/ocaml/runtime4/caml/domain_state.tbl +++ b/ocaml/runtime4/caml/domain_state.tbl @@ -104,5 +104,10 @@ DOMAIN_STATE(void*, checking_pointer_pc) /* See major_gc.c */ #endif +DOMAIN_STATE(void*, dls_root) +DOMAIN_STATE(void*, c_stack) +DOMAIN_STATE(void*, current_stack) +/* Unused: compatibility with runtime5 */ + DOMAIN_STATE(extra_params_area, extra_params) /* This member must occur last, because it is an array, not a scalar */ diff --git a/ocaml/runtime4/misc.c b/ocaml/runtime4/misc.c index e2c0e9548ab..3da6ef99ed4 100644 --- a/ocaml/runtime4/misc.c +++ b/ocaml/runtime4/misc.c @@ -28,7 +28,9 @@ __declspec(noreturn) void __cdecl abort(void); #include #include #include +#include "caml/alloc.h" #include "caml/config.h" +#include "caml/fail.h" #include "caml/misc.h" #include "caml/memory.h" #include "caml/osdeps.h" @@ -225,3 +227,64 @@ void caml_flambda2_invalid (value message) fprintf (stderr, "Consider using [Sys.opaque_identity].\n"); abort (); } + +/* Fake atomic operations - runtime4 is single threaded, but we need to + provide these symbols for compatibility with the 5 Stdlib updates. */ + +CAMLprim value caml_atomic_make(value v) +{ + CAMLparam1(v); + value ref = caml_alloc_small(1, 0); + Field(ref, 0) = v; + CAMLreturn(ref); +} + +CAMLprim value caml_atomic_load(value ref) +{ + return Field(ref, 0); +} + +CAMLprim value caml_atomic_cas(value ref, value oldv, value newv) +{ + value* p = Op_val(ref); + if (*p == oldv) { + caml_modify(p, newv); + return Val_int(1); + } else { + return Val_int(0); + } +} + +CAMLprim value caml_atomic_exchange(value ref, value v) +{ + value ret = Field(ref, 0); + caml_modify(Op_val(ref), v); + return ret; +} + +CAMLprim value caml_atomic_fetch_add(value ref, value incr) +{ + value ret; + value* p = Op_val(ref); + CAMLassert(Is_long(*p)); + ret = *p; + *p = Val_long(Long_val(ret) + Long_val(incr)); + return ret; +} + +/* Fake lazy operations - stdlib compatiblity with the 5 lazy implementation. */ + +CAMLprim value caml_lazy_update_to_forward(value v) +{ + caml_failwith("Called caml_lazy_update_to_forward in runtime4: not supported."); +} + +CAMLprim value caml_lazy_reset_to_lazy(value v) +{ + caml_failwith("Called caml_lazy_reset_to_lazy in runtime4: not supported."); +} + +CAMLprim value caml_lazy_update_to_forcing(value v) +{ + caml_failwith("Called caml_lazy_update_to_forcing in runtime4: not supported."); +} diff --git a/ocaml/stdlib/atomic.ml b/ocaml/stdlib/atomic.ml index aa057e3985f..e4c5c36ece9 100644 --- a/ocaml/stdlib/atomic.ml +++ b/ocaml/stdlib/atomic.ml @@ -12,61 +12,24 @@ (* *) (**************************************************************************) -(* BACKPORT BEGIN type !'a t +(* BACKPORT BEGIN switch to primtiives external make : 'a -> 'a t = "%makemutable" external get : 'a t -> 'a = "%atomic_load" external exchange : 'a t -> 'a -> 'a = "%atomic_exchange" external compare_and_set : 'a t -> 'a -> 'a -> bool = "%atomic_cas" external fetch_and_add : int t -> int -> int = "%atomic_fetch_add" *) -external ( == ) : 'a -> 'a -> bool = "%eq" -external ( + ) : int -> int -> int = "%addint" +external make : 'a -> 'a t = "caml_atomic_make" +external get : 'a t -> 'a = "caml_atomic_load" +external exchange : 'a t -> 'a -> 'a = "caml_atomic_exchange" +external compare_and_set : 'a t -> 'a -> 'a -> bool = "caml_atomic_cas" +external fetch_and_add : int t -> int -> int = "caml_atomic_fetch_add" (* BACKPORT END *) -external ignore : 'a -> unit = "%ignore" - -(* BACKPORT BEGIN *) -(* We are not reusing ('a ref) directly to make it easier to reason - about atomicity if we wish to: even in a sequential implementation, - signals and other asynchronous callbacks might break atomicity. *) -type 'a t = {mutable v: 'a} - -let make v = {v} -let get r = r.v -let set r v = r.v <- v - -(* The following functions are set to never be inlined: Flambda is - allowed to move surrounding code inside the critical section, - including allocations. *) - -let[@inline never] exchange r v = - (* BEGIN ATOMIC *) - let cur = r.v in - r.v <- v; - (* END ATOMIC *) - cur -let[@inline never] compare_and_set r seen v = - (* BEGIN ATOMIC *) - let cur = r.v in - if cur == seen then ( - r.v <- v; - (* END ATOMIC *) - true - ) else - false - -let[@inline never] fetch_and_add r n = - (* BEGIN ATOMIC *) - let cur = r.v in - r.v <- (cur + n); - (* END ATOMIC *) - cur +external ignore : 'a -> unit = "%ignore" -(* BACKPORT END *) -(* BACKPORT let set r x = ignore (exchange r x) -*) let incr r = ignore (fetch_and_add r 1) let decr r = ignore (fetch_and_add r (-1)) diff --git a/ocaml/stdlib/camlinternalLazy.ml b/ocaml/stdlib/camlinternalLazy.ml index 6cf7163b22a..a0b50049278 100644 --- a/ocaml/stdlib/camlinternalLazy.ml +++ b/ocaml/stdlib/camlinternalLazy.ml @@ -26,145 +26,164 @@ type 'a t = 'a lazy_t exception Undefined -(* CR ocaml 5 runtime: - BACKPORT BEGIN - -(* [update_to_forcing blk] tries to update a [blk] with [lazy_tag] to - [forcing_tag] using compare-and-swap (CAS), taking care to handle concurrent - marking of the header word by a concurrent GC thread. Returns [0] if the - CAS is successful. If the CAS fails, then the tag was observed to be - something other than [lazy_tag] due to a concurrent mutator. In this case, - the function returns [1]. *) -external update_to_forcing : Obj.t -> int = - "caml_lazy_update_to_forcing" [@@noalloc] - -(* [reset_to_lazy blk] expects [blk] to be a lazy object with [Obj.forcing_tag] - and updates the tag to [Obj.lazy_tag], taking care to handle concurrent - marking of this object's header by a concurrent GC thread. *) -external reset_to_lazy : Obj.t -> unit = "caml_lazy_reset_to_lazy" [@@noalloc] - -(* [update_to_forward blk] expects [blk] to be a lazy object with - [Obj.forcing_tag] and updates the tag to [Obj.forward_tag], taking care to - handle concurrent marking of this object's header by a concurrent GC thread. - *) -external update_to_forward : Obj.t -> unit = - "caml_lazy_update_to_forward" [@@noalloc] - -(* Assumes [blk] is a block with tag forcing *) -let do_force_block blk = - let b = Obj.repr blk in - let closure = (Obj.obj (Obj.field b 0) : unit -> 'arg) in - Obj.set_field b 0 (Obj.repr ()); (* Release the closure *) - try +module Lazy5 = struct + (* [update_to_forcing blk] tries to update a [blk] with [lazy_tag] to + [forcing_tag] using compare-and-swap (CAS), taking care to handle concurrent + marking of the header word by a concurrent GC thread. Returns [0] if the + CAS is successful. If the CAS fails, then the tag was observed to be + something other than [lazy_tag] due to a concurrent mutator. In this case, + the function returns [1]. *) + external update_to_forcing : Obj.t -> int = + "caml_lazy_update_to_forcing" [@@noalloc] + + (* [reset_to_lazy blk] expects [blk] to be a lazy object with [Obj.forcing_tag] + and updates the tag to [Obj.lazy_tag], taking care to handle concurrent + marking of this object's header by a concurrent GC thread. *) + external reset_to_lazy : Obj.t -> unit = "caml_lazy_reset_to_lazy" [@@noalloc] + + (* [update_to_forward blk] expects [blk] to be a lazy object with + [Obj.forcing_tag] and updates the tag to [Obj.forward_tag], taking care to + handle concurrent marking of this object's header by a concurrent GC thread. + *) + external update_to_forward : Obj.t -> unit = + "caml_lazy_update_to_forward" [@@noalloc] + + (* Assumes [blk] is a block with tag forcing *) + let do_force_block blk = + let b = Obj.repr blk in + let closure = (Obj.obj (Obj.field b 0) : unit -> 'arg) in + Obj.set_field b 0 (Obj.repr ()); (* Release the closure *) + try + let result = closure () in + Obj.set_field b 0 (Obj.repr result); + update_to_forward b; + result + with e -> + Obj.set_field b 0 (Obj.repr (fun () -> raise e)); + reset_to_lazy b; + raise e + + (* Assumes [blk] is a block with tag forcing *) + let do_force_val_block blk = + let b = Obj.repr blk in + let closure = (Obj.obj (Obj.field b 0) : unit -> 'arg) in + Obj.set_field b 0 (Obj.repr ()); (* Release the closure *) let result = closure () in Obj.set_field b 0 (Obj.repr result); update_to_forward b; result - with e -> - Obj.set_field b 0 (Obj.repr (fun () -> raise e)); - reset_to_lazy b; - raise e - -(* Assumes [blk] is a block with tag forcing *) -let do_force_val_block blk = - let b = Obj.repr blk in - let closure = (Obj.obj (Obj.field b 0) : unit -> 'arg) in - Obj.set_field b 0 (Obj.repr ()); (* Release the closure *) - let result = closure () in - Obj.set_field b 0 (Obj.repr result); - update_to_forward b; - result - -(* Called by [force_gen] *) -let force_gen_lazy_block ~only_val (blk : 'arg lazy_t) = - (* We expect the tag to be [lazy_tag], but may be other tags due to - concurrent forcing of lazy values. *) - match update_to_forcing (Obj.repr blk) with - | 0 when only_val -> do_force_val_block blk - | 0 -> do_force_block blk - | _ -> raise Undefined - -(* used in the %lazy_force primitive *) -let force_lazy_block blk = force_gen_lazy_block ~only_val:false blk - -(* [force_gen ~only_val:false] is not used, since [Lazy.force] is - declared as a primitive whose code inlines the tag tests of its - argument, except when afl instrumentation is turned on. *) -let force_gen ~only_val (lzv : 'arg lazy_t) = - (* Using [Sys.opaque_identity] prevents two potential problems: - - If the value is known to have Forward_tag, then it could have been - shortcut during GC, so that information must be forgotten (see GPR#713 - and issue #7301). This is not an issue here at the moment since - [Obj.tag] is not simplified by the compiler, and GPR#713 also - ensures that no value will be known to have Forward_tag. - - If the value is known to be immutable, then if the compiler - cannot prove that the last branch is not taken it will issue a - warning 59 (modification of an immutable value) *) - let lzv = Sys.opaque_identity lzv in - let x = Obj.repr lzv in - (* START no safe points. If a GC occurs here, then the object [x] may be - short-circuited, and getting the first field of [x] would get us the wrong - value. Luckily, the compiler does not insert GC safe points at this place, - so it is ok. *) - let t = Obj.tag x in - if t = Obj.forward_tag then - (Obj.obj (Obj.field x 0) : 'arg) - (* END no safe points *) - else if t = Obj.forcing_tag then raise Undefined - else if t <> Obj.lazy_tag then (Obj.obj x : 'arg) - else force_gen_lazy_block ~only_val lzv -*) -let raise_undefined = Obj.repr (fun () -> raise Undefined) - -external make_forward : Obj.t -> Obj.t -> unit = "caml_obj_make_forward" - -(* Assume [blk] is a block with tag lazy *) -let force_lazy_block (blk : 'arg lazy_t) = - let closure = (Obj.obj (Obj.field (Obj.repr blk) 0) : unit -> 'arg) in - Obj.set_field (Obj.repr blk) 0 raise_undefined; - try + + (* Called by [force_gen] *) + let force_gen_lazy_block ~only_val (blk : 'arg lazy_t) = + (* We expect the tag to be [lazy_tag], but may be other tags due to + concurrent forcing of lazy values. *) + match update_to_forcing (Obj.repr blk) with + | 0 when only_val -> do_force_val_block blk + | 0 -> do_force_block blk + | _ -> raise Undefined + + (* used in the %lazy_force primitive *) + let force_lazy_block blk = force_gen_lazy_block ~only_val:false blk + + (* [force_gen ~only_val:false] is not used, since [Lazy.force] is + declared as a primitive whose code inlines the tag tests of its + argument, except when afl instrumentation is turned on. *) + let force_gen ~only_val (lzv : 'arg lazy_t) = + (* Using [Sys.opaque_identity] prevents two potential problems: + - If the value is known to have Forward_tag, then it could have been + shortcut during GC, so that information must be forgotten (see GPR#713 + and issue #7301). This is not an issue here at the moment since + [Obj.tag] is not simplified by the compiler, and GPR#713 also + ensures that no value will be known to have Forward_tag. + - If the value is known to be immutable, then if the compiler + cannot prove that the last branch is not taken it will issue a + warning 59 (modification of an immutable value) *) + let lzv = Sys.opaque_identity lzv in + let x = Obj.repr lzv in + (* START no safe points. If a GC occurs here, then the object [x] may be + short-circuited, and getting the first field of [x] would get us the wrong + value. Luckily, the compiler does not insert GC safe points at this place, + so it is ok. *) + let t = Obj.tag x in + if t = Obj.forward_tag then + (Obj.obj (Obj.field x 0) : 'arg) + (* END no safe points *) + else if t = Obj.forcing_tag then raise Undefined + else if t <> Obj.lazy_tag then (Obj.obj x : 'arg) + else force_gen_lazy_block ~only_val lzv +end + +(* CR ocaml 5 runtime: delete the old implementation *) +module Lazy4 = struct + let raise_undefined = Obj.repr (fun () -> raise Undefined) + + external make_forward : Obj.t -> Obj.t -> unit = "caml_obj_make_forward" + + (* Assume [blk] is a block with tag lazy *) + let force_lazy_block (blk : 'arg lazy_t) = + let closure = (Obj.obj (Obj.field (Obj.repr blk) 0) : unit -> 'arg) in + Obj.set_field (Obj.repr blk) 0 raise_undefined; + try + let result = closure () in + make_forward (Obj.repr blk) (Obj.repr result); + result + with e -> + Obj.set_field (Obj.repr blk) 0 (Obj.repr (fun () -> raise e)); + raise e + + + (* Assume [blk] is a block with tag lazy *) + let force_val_lazy_block (blk : 'arg lazy_t) = + let closure = (Obj.obj (Obj.field (Obj.repr blk) 0) : unit -> 'arg) in + Obj.set_field (Obj.repr blk) 0 raise_undefined; let result = closure () in make_forward (Obj.repr blk) (Obj.repr result); result - with e -> - Obj.set_field (Obj.repr blk) 0 (Obj.repr (fun () -> raise e)); - raise e - - -(* Assume [blk] is a block with tag lazy *) -let force_val_lazy_block (blk : 'arg lazy_t) = - let closure = (Obj.obj (Obj.field (Obj.repr blk) 0) : unit -> 'arg) in - Obj.set_field (Obj.repr blk) 0 raise_undefined; - let result = closure () in - make_forward (Obj.repr blk) (Obj.repr result); - result - - -(* [force] is not used, since [Lazy.force] is declared as a primitive - whose code inlines the tag tests of its argument, except when afl - instrumentation is turned on. *) - -let force (lzv : 'arg lazy_t) = - (* Using [Sys.opaque_identity] prevents two potential problems: - - If the value is known to have Forward_tag, then its tag could have - changed during GC, so that information must be forgotten (see GPR#713 - and issue #7301) - - If the value is known to be immutable, then if the compiler - cannot prove that the last branch is not taken it will issue a - warning 59 (modification of an immutable value) *) - let lzv = Sys.opaque_identity lzv in - let x = Obj.repr lzv in - let t = Obj.tag x in - if t = Obj.forward_tag then (Obj.obj (Obj.field x 0) : 'arg) else - if t <> Obj.lazy_tag then (Obj.obj x : 'arg) - else force_lazy_block lzv - - -let force_val (lzv : 'arg lazy_t) = - let x = Obj.repr lzv in - let t = Obj.tag x in - if t = Obj.forward_tag then (Obj.obj (Obj.field x 0) : 'arg) else - if t <> Obj.lazy_tag then (Obj.obj x : 'arg) - else force_val_lazy_block lzv - -(* BACKPORT END *) + + + (* [force] is not used, since [Lazy.force] is declared as a primitive + whose code inlines the tag tests of its argument, except when afl + instrumentation is turned on. *) + + let force (lzv : 'arg lazy_t) = + (* Using [Sys.opaque_identity] prevents two potential problems: + - If the value is known to have Forward_tag, then its tag could have + changed during GC, so that information must be forgotten (see GPR#713 + and issue #7301) + - If the value is known to be immutable, then if the compiler + cannot prove that the last branch is not taken it will issue a + warning 59 (modification of an immutable value) *) + let lzv = Sys.opaque_identity lzv in + let x = Obj.repr lzv in + let t = Obj.tag x in + if t = Obj.forward_tag then (Obj.obj (Obj.field x 0) : 'arg) else + if t <> Obj.lazy_tag then (Obj.obj x : 'arg) + else force_lazy_block lzv + + + let force_val (lzv : 'arg lazy_t) = + let x = Obj.repr lzv in + let t = Obj.tag x in + if t = Obj.forward_tag then (Obj.obj (Obj.field x 0) : 'arg) else + if t <> Obj.lazy_tag then (Obj.obj x : 'arg) + else force_val_lazy_block lzv +end + +external runtime5 : unit -> bool = "%runtime5" +let runtime5 = runtime5 () + +let force_lazy_block = + if runtime5 then Lazy5.force_lazy_block else Lazy4.force_lazy_block + +let force_gen ~only_val l = + if runtime5 then Lazy5.force_gen ~only_val l + else if only_val then Lazy4.force_val l + else Lazy4.force l + +let force l = + if runtime5 then Lazy5.force_gen ~only_val:false l + else Lazy4.force l + +let force_val l = + if runtime5 then Lazy5.force_gen ~only_val:true l + else Lazy4.force_val l diff --git a/ocaml/stdlib/camlinternalLazy.mli b/ocaml/stdlib/camlinternalLazy.mli index 126328f698e..bd6fe9e6dc4 100644 --- a/ocaml/stdlib/camlinternalLazy.mli +++ b/ocaml/stdlib/camlinternalLazy.mli @@ -26,14 +26,10 @@ exception Undefined val force_lazy_block : 'a lazy_t -> 'a -(* CR ocaml 5 runtime: - BACKPORT BEGIN *) -val force_val_lazy_block : 'a lazy_t -> 'a - +(* CR ocaml 5 runtime: delete these runtime4 functions *) +(* BACKPORT BEGIN *) val force : 'a lazy_t -> 'a val force_val : 'a lazy_t -> 'a (* BACKPORT END *) -(* CR ocaml 5 runtime: add this back in val force_gen : only_val:bool -> 'a lazy_t -> 'a -*) diff --git a/ocaml/stdlib/domain.ml b/ocaml/stdlib/domain.ml index af58ba96ee7..b6e33e84196 100644 --- a/ocaml/stdlib/domain.ml +++ b/ocaml/stdlib/domain.ml @@ -72,6 +72,41 @@ module DLS = struct else Obj.magic v end +(******** Callbacks **********) + +(* first spawn, domain startup and at exit functionality *) +let first_domain_spawned = Atomic.make false + +let first_spawn_function = ref (fun () -> ()) + +let before_first_spawn f = + if Atomic.get first_domain_spawned then + raise (Invalid_argument "first domain already spawned") + else begin + let old_f = !first_spawn_function in + let new_f () = old_f (); f () in + first_spawn_function := new_f + end + +let at_exit_key = DLS.new_key (fun () -> (fun () -> ())) + +let at_exit f = + let old_exit : unit -> unit = DLS.get at_exit_key in + let new_exit () = + (* The domain termination callbacks ([at_exit]) are run in + last-in-first-out (LIFO) order in order to be symmetric with the domain + creation callbacks ([at_each_spawn]) which run in first-in-fisrt-out + (FIFO) order. *) + f (); old_exit () + in + DLS.set at_exit_key new_exit + +let do_at_exit () = + let f : unit -> unit = DLS.get at_exit_key in + f () + +let _ = Stdlib.do_domain_local_at_exit := do_at_exit + (* CR ocaml 5 runtime: domains not supported on 4.x module Raw = struct @@ -197,21 +232,7 @@ let self () = Raw.self () let is_main_domain () = (self () :> int) = 0 -(******** Callbacks **********) - -(* first spawn, domain startup and at exit functionality *) -let first_domain_spawned = Atomic.make false - -let first_spawn_function = ref (fun () -> ()) - -let before_first_spawn f = - if Atomic.get first_domain_spawned then - raise (Invalid_argument "first domain already spawned") - else begin - let old_f = !first_spawn_function in - let new_f () = old_f (); f () in - first_spawn_function := new_f - end +(******* Creation and Termination ********) let do_before_first_spawn () = if not (Atomic.get first_domain_spawned) then begin @@ -221,27 +242,6 @@ let do_before_first_spawn () = first_spawn_function := (fun () -> ()) end -let at_exit_key = DLS.new_key (fun () -> (fun () -> ())) - -let at_exit f = - let old_exit : unit -> unit = DLS.get at_exit_key in - let new_exit () = - (* The domain termination callbacks ([at_exit]) are run in - last-in-first-out (LIFO) order in order to be symmetric with the domain - creation callbacks ([at_each_spawn]) which run in first-in-fisrt-out - (FIFO) order. *) - f (); old_exit () - in - DLS.set at_exit_key new_exit - -let do_at_exit () = - let f : unit -> unit = DLS.get at_exit_key in - f () - -let _ = Stdlib.do_domain_local_at_exit := do_at_exit - -(******* Creation and Termination ********) - let spawn f = do_before_first_spawn (); let pk = DLS.get_initial_keys () in diff --git a/ocaml/stdlib/domain.mli b/ocaml/stdlib/domain.mli index 08d5b5cce29..387661efeb0 100644 --- a/ocaml/stdlib/domain.mli +++ b/ocaml/stdlib/domain.mli @@ -54,6 +54,20 @@ val get_id : 'a t -> id val self : unit -> id (** [self ()] is the identifier of the currently running domain *) +val cpu_relax : unit -> unit +(** If busy-waiting, calling cpu_relax () between iterations + will improve performance on some CPU architectures *) + +val is_main_domain : unit -> bool +(** [is_main_domain ()] returns true if called from the initial domain. *) + +val recommended_domain_count : unit -> int +(** The recommended maximum number of domains which should be running + simultaneously (including domains already running). + + The value returned is at least [1]. *) +*) + val before_first_spawn : (unit -> unit) -> unit (** [before_first_spawn f] registers [f] to be called before the first domain is spawned by the program. The functions registered with @@ -81,20 +95,6 @@ let temp_file_key = Domain.DLS.new_key (fun _ -> to close it, thus guaranteeing the descriptor is not leaked in case the current domain exits. *) -val cpu_relax : unit -> unit -(** If busy-waiting, calling cpu_relax () between iterations - will improve performance on some CPU architectures *) - -val is_main_domain : unit -> bool -(** [is_main_domain ()] returns true if called from the initial domain. *) - -val recommended_domain_count : unit -> int -(** The recommended maximum number of domains which should be running - simultaneously (including domains already running). - - The value returned is at least [1]. *) -*) - module DLS : sig (** Domain-local Storage *) diff --git a/ocaml/stdlib/filename.ml b/ocaml/stdlib/filename.ml index 0933485f76a..32fcf7955db 100644 --- a/ocaml/stdlib/filename.ml +++ b/ocaml/stdlib/filename.ml @@ -333,28 +333,10 @@ let remove_extension name = external open_desc: string -> open_flag list -> int -> int = "caml_sys_open" external close_desc: int -> unit = "caml_sys_close" -(* CR ocaml 5 runtime: - BACKPORT BEGIN -let prng_key = - Domain.DLS.new_key Random.State.make_self_init -*) -let prng_key = ref (lazy(Random.State.make_self_init ())) -(* BACKPORT END *) - -(* BACKPORT BEGIN *) -module Domain = struct - module DLS = struct - let new_key ~split_from_parent:_ f = - ref (f ()) - - let get = (!) - let set = (:=) - end -end -(* BACKPORT END *) +let prng_key = Domain.DLS.new_key Random.State.make_self_init let temp_file_name temp_dir prefix suffix = - let random_state = Lazy.force (Domain.DLS.get prng_key) in + let random_state = Domain.DLS.get prng_key in let rnd = (Random.State.bits random_state) land 0xFFFFFF in concat temp_dir (Printf.sprintf "%s%06x%s" prefix rnd suffix) diff --git a/ocaml/stdlib/format.ml b/ocaml/stdlib/format.ml index f930f2b20fb..388a3767da4 100644 --- a/ocaml/stdlib/format.ml +++ b/ocaml/stdlib/format.ml @@ -1027,8 +1027,6 @@ and err_formatter = formatter_of_out_channel Stdlib.stderr and str_formatter = formatter_of_buffer stdbuf (* Initialise domain local state *) -(* CR ocaml 5 runtime: - BACKPORT BEGIN module DLS = Domain.DLS let stdbuf_key = DLS.new_key pp_make_buffer @@ -1075,16 +1073,6 @@ let err_formatter_key = DLS.new_key (fun () -> Domain.at_exit (pp_print_flush ppf); ppf) let _ = DLS.set err_formatter_key err_formatter -*) -let std_formatter_key = std_formatter -let err_formatter_key = err_formatter -let str_formatter_key = str_formatter -let stdbuf_key = stdbuf - -module DLS = struct - let get = Fun.id -end -(* BACKPORT END *) let get_std_formatter () = DLS.get std_formatter_key let get_err_formatter () = DLS.get err_formatter_key @@ -1107,8 +1095,6 @@ let flush_str_formatter () = let str_formatter = DLS.get str_formatter_key in flush_buffer_formatter stdbuf str_formatter -(* CR ocaml 5 runtime: - BACKPORT let make_synchronized_formatter output flush = DLS.new_key (fun () -> let buf = Buffer.create pp_buffer_size in @@ -1122,7 +1108,6 @@ let make_synchronized_formatter output flush = let synchronized_formatter_of_out_channel oc = make_synchronized_formatter (output_substring oc) (fun () -> flush oc) -*) (* Symbolic pretty-printing @@ -1493,8 +1478,6 @@ let flush_standard_formatters () = let () = at_exit flush_standard_formatters -(* CR ocaml 5 runtime: - BACKPORT let () = Domain.before_first_spawn (fun () -> flush_standard_formatters (); let fs = pp_get_formatter_out_functions std_formatter () in @@ -1507,4 +1490,3 @@ let () = Domain.before_first_spawn (fun () -> {fs with out_string = buffered_out_string err_buf_key; out_flush = buffered_out_flush Stdlib.stderr err_buf_key}; ) -*) diff --git a/ocaml/stdlib/format.mli b/ocaml/stdlib/format.mli index f884482ba02..a2dfbffd54b 100644 --- a/ocaml/stdlib/format.mli +++ b/ocaml/stdlib/format.mli @@ -969,8 +969,6 @@ val formatter_of_out_channel : out_channel -> formatter to the corresponding output channel [oc]. *) -(* CR ocaml 5 runtime: - BACKPORT val synchronized_formatter_of_out_channel : out_channel -> formatter Domain.DLS.key [@@alert unstable][@@alert "-unstable"] @@ -982,8 +980,6 @@ val synchronized_formatter_of_out_channel : domains will be interleaved with each other at points where the formatter is flushed, such as with {!print_flush}. *) -*) - val std_formatter : formatter (** The initial domain's standard formatter to write to standard output. @@ -1055,8 +1051,6 @@ val make_formatter : returns a formatter to the {!Stdlib.out_channel} [oc]. *) -(* CR ocaml 5 runtime: - BACKPORT val make_synchronized_formatter : (string -> int -> int -> unit) -> (unit -> unit) -> formatter Domain.DLS.key [@@alert unstable][@@alert "-unstable"] @@ -1069,7 +1063,6 @@ val make_synchronized_formatter : is flushed, such as with {!print_flush}. @since 5.0 *) -*) val formatter_of_out_functions : formatter_out_functions -> formatter diff --git a/ocaml/stdlib/gc.ml b/ocaml/stdlib/gc.ml index 92856778fa7..ac81c1736a4 100644 --- a/ocaml/stdlib/gc.ml +++ b/ocaml/stdlib/gc.ml @@ -135,11 +135,7 @@ let delete_alarm a = Atomic.set a false module Memprof = struct -(* BACKPORT BEGIN - type t -*) type t = unit -(* BACKPORT END *) type allocation_source = Normal | Marshal | Custom type allocation = { n_samples : int; @@ -174,8 +170,4 @@ module Memprof = c_start sampling_rate callstack_size tracker external stop : unit -> unit = "caml_memprof_stop" - -(* BACKPORT - external discard : t -> unit = "caml_memprof_discard" -*) end diff --git a/ocaml/stdlib/gc.mli b/ocaml/stdlib/gc.mli index c6652d02a48..59d531ae4b5 100644 --- a/ocaml/stdlib/gc.mli +++ b/ocaml/stdlib/gc.mli @@ -463,11 +463,8 @@ external eventlog_resume : unit -> unit = "caml_eventlog_resume" notice. *) module Memprof : sig -(* BACKPORT BEGIN - type t -*) type t = unit -(* BACKPORT END *) + (** the type of a profile *) type allocation_source = Normal | Marshal | Custom diff --git a/ocaml/stdlib/hashtbl.ml b/ocaml/stdlib/hashtbl.ml index cc8b5a7f882..9cb81a6e157 100644 --- a/ocaml/stdlib/hashtbl.ml +++ b/ocaml/stdlib/hashtbl.ml @@ -62,12 +62,7 @@ let randomized = Atomic.make randomized_default let randomize () = Atomic.set randomized true let is_randomized () = Atomic.get randomized -(* CR ocaml 5 runtime: - BACKPORT BEGIN let prng_key = Domain.DLS.new_key Random.State.make_self_init -*) -let prng = lazy (Random.State.make_self_init()) -(* BACKPORT END *) (* Functions which appear before the functorial interface must either be independent of the hash function or take it as a parameter (see #2202 and @@ -83,12 +78,7 @@ let rec power_2_above x n = let create ?(random = Atomic.get randomized) initial_size = let s = power_2_above 16 initial_size in let seed = -(* CR ocaml 5 runtime: - BACKPORT BEGIN if random then Random.State.bits (Domain.DLS.get prng_key) else 0 -*) - if random then Random.State.bits (Lazy.force prng) else 0 -(* BACKPORT END *) in { initial_size = s; size = 0; seed = seed; data = Array.make s Empty } @@ -636,12 +626,7 @@ let of_seq i = let rebuild ?(random = Atomic.get randomized) h = let s = power_2_above 16 (Array.length h.data) in let seed = -(* CR ocaml 5 runtime: - BACKPORT BEGIN if random then Random.State.bits (Domain.DLS.get prng_key) -*) - if random then Random.State.bits (Lazy.force prng) -(* BACKPORT END *) else if Obj.size (Obj.repr h) >= 4 then h.seed else 0 in let h' = { diff --git a/ocaml/stdlib/lazy.ml b/ocaml/stdlib/lazy.ml index 94c44bbfabd..eed45ddd98b 100644 --- a/ocaml/stdlib/lazy.ml +++ b/ocaml/stdlib/lazy.ml @@ -57,31 +57,37 @@ type 'a t = 'a CamlinternalLazy.t exception Undefined = CamlinternalLazy.Undefined external make_forward : 'a -> 'a lazy_t = "caml_lazy_make_forward" external force : 'a t -> 'a = "%lazy_force" +external runtime5 : unit -> bool = "%runtime5" +let runtime5 = runtime5 () -(* CR ocaml 5 runtime: - BACKPORT BEGIN let force_val l = CamlinternalLazy.force_gen ~only_val:true l -*) -let force_val = CamlinternalLazy.force_val -(* BACKPORT END *) let from_fun (f : unit -> 'arg) = let x = Obj.new_block Obj.lazy_tag 1 in Obj.set_field x 0 (Obj.repr f); (Obj.obj x : 'arg t) -let from_val (v : 'arg) = +let from_val4 (v : 'arg) = + let t = Obj.tag (Obj.repr v) in + if t = Obj.forward_tag || t = Obj.lazy_tag + || t = Obj.double_tag then begin + make_forward v + end else begin + (Obj.magic v : 'arg t) + end + +let from_val5 (v : 'arg) = let t = Obj.tag (Obj.repr v) in if t = Obj.forward_tag || t = Obj.lazy_tag -(* BACKPORT BEGIN - || t = Obj.forcing_tag *) -(* BACKPORT END *) + || t = Obj.forcing_tag || t = Obj.double_tag then begin make_forward v end else begin (Obj.magic v : 'arg t) end +let from_val = if runtime5 then from_val5 else from_val4 + let is_val (l : 'arg t) = Obj.tag (Obj.repr l) <> Obj.lazy_tag let map f x = diff --git a/ocaml/stdlib/marshal.ml b/ocaml/stdlib/marshal.ml index 98085840564..c9acded4a7c 100644 --- a/ocaml/stdlib/marshal.ml +++ b/ocaml/stdlib/marshal.ml @@ -50,11 +50,11 @@ external from_channel: in_channel -> 'a = "caml_input_value" external from_bytes_unsafe: bytes -> int -> 'a = "caml_input_value_from_bytes" external data_size_unsafe: bytes -> int -> int = "caml_marshal_data_size" -(* BACKPORT BEGIN -let header_size = 16 -*) -let header_size = 20 -(* BACKPORT END *) +external runtime5 : unit -> bool = "%runtime5" +let runtime5 = runtime5 () + +let header_size = if runtime5 then 16 else 20 + let data_size buff ofs = if ofs < 0 || ofs > Bytes.length buff - header_size then invalid_arg "Marshal.data_size" diff --git a/ocaml/stdlib/stdlib.ml b/ocaml/stdlib/stdlib.ml index 03b795f284e..b5cef7b61e7 100644 --- a/ocaml/stdlib/stdlib.ml +++ b/ocaml/stdlib/stdlib.ml @@ -556,26 +556,17 @@ let ( ^^ ) (Format (fmt1, str1)) (Format (fmt2, str2)) = external sys_exit : int -> 'a = "caml_sys_exit" (* for at_exit *) -(* BACKPORT BEGIN type 'a atomic_t +(* BACKPORT BEGIN external atomic_make : 'a -> 'a atomic_t = "%makemutable" external atomic_get : 'a atomic_t -> 'a = "%atomic_load" external atomic_compare_and_set : 'a atomic_t -> 'a -> 'a -> bool = "%atomic_cas" *) -type 'a t = {mutable v: 'a} - -let atomic_make v = {v} -let atomic_get r = r.v -let[@inline never] atomic_compare_and_set r seen v = - (* BEGIN ATOMIC *) - let cur = r.v in - if cur == seen then ( - r.v <- v; - (* END ATOMIC *) - true - ) else - false +external atomic_make : 'a -> 'a atomic_t = "caml_atomic_make" +external atomic_get : 'a atomic_t -> 'a = "caml_atomic_load" +external atomic_compare_and_set : 'a atomic_t -> 'a -> 'a -> bool + = "caml_atomic_cas" (* BACKPORT END *) let exit_function = atomic_make flush_all diff --git a/ocaml/testsuite/tests/backtrace/lazy.flambda.reference b/ocaml/testsuite/tests/backtrace/lazy.flambda.reference index 31c18daa32c..8b8ecc7a994 100644 --- a/ocaml/testsuite/tests/backtrace/lazy.flambda.reference +++ b/ocaml/testsuite/tests/backtrace/lazy.flambda.reference @@ -1,17 +1,17 @@ Uncaught exception Not_found Raised at Lazy.l1 in file "lazy.ml", line 3, characters 28-45 -Called from CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", line 125, characters 17-27 -Re-raised at CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", line 130, characters 4-11 +Called from CamlinternalLazy.Lazy4.force_lazy_block in file "camlinternalLazy.ml", line 126, characters 19-29 +Re-raised at CamlinternalLazy.Lazy4.force_lazy_block in file "camlinternalLazy.ml", line 131, characters 6-13 Called from Lazy.test1 in file "lazy.ml", line 6, characters 11-24 Called from Lazy.run in file "lazy.ml", line 15, characters 4-11 Uncaught exception Not_found Raised at Lazy.l1 in file "lazy.ml", line 3, characters 28-45 -Called from CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", line 125, characters 17-27 -Re-raised at CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", line 130, characters 4-11 +Called from CamlinternalLazy.Lazy4.force_lazy_block in file "camlinternalLazy.ml", line 126, characters 19-29 +Re-raised at CamlinternalLazy.Lazy4.force_lazy_block in file "camlinternalLazy.ml", line 131, characters 6-13 Called from Lazy.test1 in file "lazy.ml", line 6, characters 11-24 Called from Lazy.run in file "lazy.ml", line 15, characters 4-11 Re-raised at Lazy.l2 in file "lazy.ml", line 8, characters 28-45 -Called from CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", line 125, characters 17-27 -Re-raised at CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", line 130, characters 4-11 +Called from CamlinternalLazy.Lazy4.force_lazy_block in file "camlinternalLazy.ml", line 126, characters 19-29 +Re-raised at CamlinternalLazy.Lazy4.force_lazy_block in file "camlinternalLazy.ml", line 131, characters 6-13 Called from Lazy.test2 in file "lazy.ml", line 11, characters 6-15 Called from Lazy.run in file "lazy.ml", line 15, characters 4-11 diff --git a/ocaml/testsuite/tests/backtrace/lazy.reference b/ocaml/testsuite/tests/backtrace/lazy.reference index 31c18daa32c..8b8ecc7a994 100644 --- a/ocaml/testsuite/tests/backtrace/lazy.reference +++ b/ocaml/testsuite/tests/backtrace/lazy.reference @@ -1,17 +1,17 @@ Uncaught exception Not_found Raised at Lazy.l1 in file "lazy.ml", line 3, characters 28-45 -Called from CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", line 125, characters 17-27 -Re-raised at CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", line 130, characters 4-11 +Called from CamlinternalLazy.Lazy4.force_lazy_block in file "camlinternalLazy.ml", line 126, characters 19-29 +Re-raised at CamlinternalLazy.Lazy4.force_lazy_block in file "camlinternalLazy.ml", line 131, characters 6-13 Called from Lazy.test1 in file "lazy.ml", line 6, characters 11-24 Called from Lazy.run in file "lazy.ml", line 15, characters 4-11 Uncaught exception Not_found Raised at Lazy.l1 in file "lazy.ml", line 3, characters 28-45 -Called from CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", line 125, characters 17-27 -Re-raised at CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", line 130, characters 4-11 +Called from CamlinternalLazy.Lazy4.force_lazy_block in file "camlinternalLazy.ml", line 126, characters 19-29 +Re-raised at CamlinternalLazy.Lazy4.force_lazy_block in file "camlinternalLazy.ml", line 131, characters 6-13 Called from Lazy.test1 in file "lazy.ml", line 6, characters 11-24 Called from Lazy.run in file "lazy.ml", line 15, characters 4-11 Re-raised at Lazy.l2 in file "lazy.ml", line 8, characters 28-45 -Called from CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", line 125, characters 17-27 -Re-raised at CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", line 130, characters 4-11 +Called from CamlinternalLazy.Lazy4.force_lazy_block in file "camlinternalLazy.ml", line 126, characters 19-29 +Re-raised at CamlinternalLazy.Lazy4.force_lazy_block in file "camlinternalLazy.ml", line 131, characters 6-13 Called from Lazy.test2 in file "lazy.ml", line 11, characters 6-15 Called from Lazy.run in file "lazy.ml", line 15, characters 4-11 diff --git a/ocaml/testsuite/tests/backtrace/names.reference b/ocaml/testsuite/tests/backtrace/names.reference index 999503a807f..cfb1d0db093 100644 --- a/ocaml/testsuite/tests/backtrace/names.reference +++ b/ocaml/testsuite/tests/backtrace/names.reference @@ -1,8 +1,8 @@ Raised at Names.bang in file "names.ml", line 9, characters 29-39 Called from Names.nontailcall in file "names.ml", line 106, characters 2-6 Called from Names.lazy_ in file "names.ml", line 101, characters 41-45 -Called from CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", line 125, characters 17-27 -Re-raised at CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", line 130, characters 4-11 +Called from CamlinternalLazy.Lazy4.force_lazy_block in file "camlinternalLazy.ml", line 126, characters 19-29 +Re-raised at CamlinternalLazy.Lazy4.force_lazy_block in file "camlinternalLazy.ml", line 131, characters 6-13 Called from Names.inline_object.object#othermeth in file "names.ml", line 96, characters 6-10 Called from Names.inline_object.object#meth in file "names.ml", line 94, characters 6-26 Called from Names.klass2#othermeth.(fun) in file "names.ml", line 88, characters 18-22