diff --git a/backend/CSEgen.ml b/backend/CSEgen.ml index dbcdf936eff..42d5ab934d6 100644 --- a/backend/CSEgen.ml +++ b/backend/CSEgen.ml @@ -249,6 +249,7 @@ method class_of_operation op = | Ispecific _ -> Op_other | Iname_for_debugger _ -> Op_pure | Iprobe_is_enabled _ -> Op_other + | Ibeginregion | Iendregion -> Op_other (* Operations that are so cheap that it isn't worth factoring them. *) diff --git a/backend/afl_instrument.ml b/backend/afl_instrument.ml index 27a46192d18..699db17276a 100644 --- a/backend/afl_instrument.ml +++ b/backend/afl_instrument.ml @@ -88,6 +88,8 @@ and instrument = function in Ccatch (isrec, cases, instrument body) | Cexit (ex, args, traps) -> Cexit (ex, List.map instrument args, traps) + | Cregion e -> Cregion (instrument e) + | Ctail e -> Ctail (instrument e) (* these are base cases and have no logging *) | Cconst_int _ | Cconst_natint _ | Cconst_float _ diff --git a/backend/amd64/CSE.ml b/backend/amd64/CSE.ml index e52fdc915d9..4908cc76486 100644 --- a/backend/amd64/CSE.ml +++ b/backend/amd64/CSE.ml @@ -47,6 +47,7 @@ method! class_of_operation op = | Istackoffset _ | Iload _ | Istore _ | Ialloc _ | Iintop _ | Iintop_imm _ | Iname_for_debugger _ | Iprobe _ | Iprobe_is_enabled _ | Iopaque + | Ibeginregion | Iendregion -> super#class_of_operation op end diff --git a/backend/amd64/emit.mlp b/backend/amd64/emit.mlp index 482f1ae0ee6..f97aacca11d 100644 --- a/backend/amd64/emit.mlp +++ b/backend/amd64/emit.mlp @@ -305,6 +305,19 @@ let emit_call_gc gc = def_label gc.gc_frame; I.jmp (label gc.gc_return_lbl) +(* Record calls to local stack reallocation *) + +type local_realloc_call = + { lr_lbl: label; + lr_return_lbl: label; } + +let local_realloc_sites = ref ([] : local_realloc_call list) + +let emit_local_realloc lr = + def_label lr.lr_lbl; + emit_call "caml_call_local_realloc"; + I.jmp (label lr.lr_return_lbl) + (* Record calls to caml_ml_array_bound_error. In -g mode we maintain one call to caml_ml_array_bound_error per bound check site. Without -g, we can share @@ -760,7 +773,7 @@ let emit_instr fallthrough i = | Double -> I.movsd (arg i 0) (addressing addr REAL8 i 1) end - | Lop(Ialloc { bytes = n; dbginfo }) -> + | Lop(Ialloc { bytes = n; dbginfo; mode = Alloc_heap }) -> assert (n <= (Config.max_young_wosize + 1) * Arch.size_addr); if !fastcode_flag then begin I.sub (int n) r15; @@ -790,6 +803,21 @@ let emit_instr fallthrough i = def_label label; I.lea (mem64 NONE 8 R15) (res i 0) end + | Lop(Ialloc { bytes = n; dbginfo=_; mode = Alloc_local }) -> + let r = res i 0 in + I.mov (domain_field Domainstate.Domain_local_sp) r; + I.sub (int n) r; + I.mov r (domain_field Domainstate.Domain_local_sp); + I.cmp (domain_field Domainstate.Domain_local_limit) r; + let lbl_call = new_label () in + I.j L (label lbl_call); + let lbl_after_alloc = new_label () in + def_label lbl_after_alloc; + I.add (domain_field Domainstate.Domain_local_top) r; + I.add (int 8) r; + local_realloc_sites := + { lr_lbl = lbl_call; + lr_return_lbl = lbl_after_alloc } :: !local_realloc_sites | Lop(Iintop(Icomp cmp)) -> I.cmp (arg i 1) (arg i 0); I.set (cond cmp) al; @@ -963,6 +991,10 @@ let emit_instr fallthrough i = | High -> T0 in I.prefetch is_write locality (addressing addr QWORD i 0) + | Lop(Ibeginregion) -> + I.mov (domain_field Domainstate.Domain_local_sp) (res i 0) + | Lop(Iendregion) -> + I.mov (arg i 0) (domain_field Domainstate.Domain_local_sp) | Lop (Iname_for_debugger _) -> () | Lop (Iprobe _) -> let probe_label = new_label () in @@ -1149,6 +1181,7 @@ let fundecl fundecl = tailrec_entry_point := fundecl.fun_tailrec_entry_point_label; stack_offset := 0; call_gc_sites := []; + local_realloc_sites := []; bound_error_sites := []; bound_error_call := 0; for i = 0 to Proc.num_register_classes - 1 do @@ -1172,6 +1205,7 @@ let fundecl fundecl = cfi_startproc (); emit_all true fundecl.fun_body; List.iter emit_call_gc !call_gc_sites; + List.iter emit_local_realloc !local_realloc_sites; emit_call_bound_errors (); if !frame_required then begin let n = frame_size() - 8 - (if fp then 8 else 0) in diff --git a/backend/amd64/proc.ml b/backend/amd64/proc.ml index 22bd59fb1a5..a593171eaa5 100644 --- a/backend/amd64/proc.ml +++ b/backend/amd64/proc.ml @@ -337,6 +337,7 @@ let destroyed_at_oper = function | Iname_for_debugger _ | Iprobe _| Iprobe_is_enabled _ | Iopaque) | Iend | Ireturn _ | Iifthenelse (_, _, _) | Icatch (_, _, _, _) | Iexit _ | Iraise _ + | Iop(Ibeginregion | Iendregion) -> if fp then (* prevent any use of the frame pointer ! *) @@ -362,6 +363,7 @@ let safe_register_pressure = function | Istackoffset _ | Iload (_, _) | Istore (_, _, _) | Iintop _ | Iintop_imm (_, _) | Ispecific _ | Iname_for_debugger _ | Iprobe _ | Iprobe_is_enabled _ | Iopaque + | Ibeginregion | Iendregion -> if fp then 10 else 11 let max_register_pressure = @@ -401,6 +403,7 @@ let max_register_pressure = | Ioffset_loc (_, _) | Ifloatarithmem (_, _) | Ibswap _ | Ifloatsqrtf _ | Isqrtf) | Iname_for_debugger _ | Iprobe _ | Iprobe_is_enabled _ | Iopaque + | Ibeginregion | Iendregion -> consumes ~int:0 ~float:0 (* Pure operations (without any side effect besides updating their result @@ -410,6 +413,7 @@ let op_is_pure = function | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) | Iopaque -> false + | Ibeginregion | Iendregion -> false | Ispecific(Ipause) | Ispecific(Iprefetch _) -> false | Ispecific(Ilea _ | Isextend32 | Izextend32 | Ifloat_iround | Ifloat_round _ @@ -451,7 +455,7 @@ let init () = let operation_supported = function | Cpopcnt -> !popcnt_support | Cprefetch _ - | Capply _ | Cextcall _ | Cload _ | Calloc | Cstore _ + | Capply _ | Cextcall _ | Cload _ | Calloc _ | Cstore _ | Caddi | Csubi | Cmuli | Cmulhi _ | Cdivi | Cmodi | Cand | Cor | Cxor | Clsl | Clsr | Casr | Cclz _ | Cctz _ diff --git a/backend/amd64/reload.ml b/backend/amd64/reload.ml index c124c0ab335..d6b96fdbe48 100644 --- a/backend/amd64/reload.ml +++ b/backend/amd64/reload.ml @@ -158,6 +158,7 @@ method! reload_operation op arg res = | Itailcall_ind|Itailcall_imm _|Iextcall _|Istackoffset _|Iload (_, _) | Istore (_, _, _)|Ialloc _|Iname_for_debugger _|Iprobe _|Iprobe_is_enabled _ | Iopaque + | Ibeginregion | Iendregion -> (* Other operations: all args and results in registers *) super#reload_operation op arg res diff --git a/backend/amd64/selection.ml b/backend/amd64/selection.ml index fe9316e2aaa..2cf492eea73 100644 --- a/backend/amd64/selection.ml +++ b/backend/amd64/selection.ml @@ -155,6 +155,7 @@ let pseudoregs_for_operation op arg res = | Iconst_symbol _|Icall_ind|Icall_imm _|Itailcall_ind|Itailcall_imm _ | Iextcall _|Istackoffset _|Iload (_, _)|Istore (_, _, _)|Ialloc _ | Iname_for_debugger _|Iprobe _|Iprobe_is_enabled _ | Iopaque + | Ibeginregion | Iendregion -> raise Use_default let select_locality (l : Cmm.prefetch_temporal_locality_hint) @@ -242,6 +243,7 @@ method! select_store is_assign addr exp = | Cassign (_, _) | Ctuple _ | Cop (_, _, _) | Csequence (_, _) | Cifthenelse (_, _, _, _, _, _) | Cswitch (_, _, _, _) | Ccatch (_, _, _) | Cexit (_, _, _) | Ctrywith (_, _, _, _, _) + | Cregion _ | Ctail _ -> super#select_store is_assign addr exp diff --git a/backend/arm64/emit.mlp b/backend/arm64/emit.mlp index 210a69f75de..2895eac0f74 100644 --- a/backend/arm64/emit.mlp +++ b/backend/arm64/emit.mlp @@ -511,6 +511,8 @@ module BR = Branch_relaxation.Make (struct | 16 | 24 | 32 -> 1 | _ -> 1 + num_instructions_for_intconst (Nativeint.of_int num_bytes) end + | Lop (Ibeginregion | Iendregion) -> + Misc.fatal_error "Local allocations not supported on this architecture" | Lop (Iintop (Icomp _)) -> 2 | Lop (Icompf _) -> 2 | Lop (Iintop_imm (Icomp _, _)) -> 2 @@ -787,10 +789,12 @@ let emit_instr i = | Word_int | Word_val | Double -> ` str {emit_reg src}, {emit_addressing addr base}\n` end - | Lop(Ialloc { bytes = n; dbginfo }) -> + | Lop(Ialloc { bytes = n; dbginfo; mode = Alloc_heap }) -> assembly_code_for_allocation i ~n ~far:false ~dbginfo | Lop(Ispecific (Ifar_alloc { bytes = n; dbginfo })) -> assembly_code_for_allocation i ~n ~far:true ~dbginfo + | Lop(Ialloc { mode = Alloc_local } | Ibeginregion | Iendregion) -> + Misc.fatal_error "Local allocations not supported on this architecture" | Lop(Iintop_imm(Iadd, n)) -> emit_addimm i.res.(0) i.arg.(0) n | Lop(Iintop_imm(Isub, n)) -> diff --git a/backend/arm64/proc.ml b/backend/arm64/proc.ml index 0a093c56f4b..1328c8a0a2d 100644 --- a/backend/arm64/proc.ml +++ b/backend/arm64/proc.ml @@ -288,6 +288,7 @@ let op_is_pure = function | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) | Iopaque + | Ibeginregion | Iendregion | Ispecific(Ishiftcheckbound _) -> false | _ -> true @@ -314,7 +315,7 @@ let operation_supported = function | Cclz _ | Cctz _ | Cpopcnt | Cprefetch _ -> false (* Not implemented *) - | Capply _ | Cextcall _ | Cload _ | Calloc | Cstore _ + | Capply _ | Cextcall _ | Cload _ | Calloc _ | Cstore _ | Caddi | Csubi | Cmuli | Cmulhi _ | Cdivi | Cmodi | Cand | Cor | Cxor | Clsl | Clsr | Casr | Ccmpi _ | Caddv | Cadda | Ccmpa _ diff --git a/backend/cfg/cfg.ml b/backend/cfg/cfg.ml index b665f4646d9..73bb2706e82 100644 --- a/backend/cfg/cfg.ml +++ b/backend/cfg/cfg.ml @@ -221,6 +221,8 @@ let dump_op ppf = function Format.fprintf ppf "probe %s %s" name handler_code_sym | Probe_is_enabled { name } -> Format.fprintf ppf "probe_is_enabled %s" name | Opaque -> Format.fprintf ppf "opaque" + | Begin_region -> Format.fprintf ppf "beginregion" + | End_region -> Format.fprintf ppf "endregion" | Name_for_debugger _ -> Format.fprintf ppf "name_for_debugger" let dump_call ppf = function @@ -319,6 +321,8 @@ let can_raise_operation : operation -> bool = function | Specific _ -> false (* CR xclerc for xclerc: double check *) | Opaque -> false | Name_for_debugger _ -> false + | Begin_region -> false + | End_region -> false let can_raise_basic : basic -> bool = function | Op op -> can_raise_operation op diff --git a/backend/cfg/cfg_equivalence.ml b/backend/cfg/cfg_equivalence.ml index 050a1dba4db..ed6f4fbac18 100644 --- a/backend/cfg/cfg_equivalence.ml +++ b/backend/cfg/cfg_equivalence.ml @@ -250,6 +250,8 @@ let check_operation : location -> Cfg.operation -> Cfg.operation -> unit = when Arch.equal_specific_operation expected_spec result_spec -> () | Opaque, Opaque -> () + | Begin_region, Begin_region -> () + | End_region, End_region -> () | ( Name_for_debugger { ident = left_ident; which_parameter = left_which_parameter; @@ -276,9 +278,16 @@ let check_prim_call_operation : match expected, result with | External expected, External result -> check_external_call_operation location expected result - | ( Alloc { bytes = expected_bytes; dbginfo = _expected_dbginfo }, - Alloc { bytes = result_bytes; dbginfo = _result_dbginfo } ) - when Int.equal expected_bytes result_bytes -> + | ( Alloc + { bytes = expected_bytes; + dbginfo = _expected_dbginfo; + mode = expected_mode + }, + Alloc + { bytes = result_bytes; dbginfo = _result_dbginfo; mode = result_mode } + ) + when Int.equal expected_bytes result_bytes + && Lambda.eq_mode expected_mode result_mode -> (* CR xclerc for xclerc: also check debug info *) () | ( Checkbound { immediate = expected_immediate }, diff --git a/backend/cfg/cfg_intf.ml b/backend/cfg/cfg_intf.ml index a4b67cdf418..0db4a185e77 100644 --- a/backend/cfg/cfg_intf.ml +++ b/backend/cfg/cfg_intf.ml @@ -49,7 +49,8 @@ module S = struct | External of external_call_operation | Alloc of { bytes : int; - dbginfo : Debuginfo.alloc_dbginfo + dbginfo : Debuginfo.alloc_dbginfo; + mode : Lambda.alloc_mode } | Checkbound of { immediate : int option } @@ -80,6 +81,8 @@ module S = struct } | Probe_is_enabled of { name : string } | Opaque + | Begin_region + | End_region | Specific of Arch.specific_operation | Name_for_debugger of { ident : Ident.t; diff --git a/backend/cfg/cfg_to_linear.ml b/backend/cfg/cfg_to_linear.ml index 970e6c45479..031f766c980 100644 --- a/backend/cfg/cfg_to_linear.ml +++ b/backend/cfg/cfg_to_linear.ml @@ -51,7 +51,8 @@ let from_basic (basic : Cfg.basic) : L.instruction_desc = | Call (P (Checkbound { immediate = None })) -> Lop (Iintop Icheckbound) | Call (P (Checkbound { immediate = Some i })) -> Lop (Iintop_imm (Icheckbound, i)) - | Call (P (Alloc { bytes; dbginfo })) -> Lop (Ialloc { bytes; dbginfo }) + | Call (P (Alloc { bytes; dbginfo; mode })) -> + Lop (Ialloc { bytes; dbginfo; mode }) | Op op -> let op : Mach.operation = match op with @@ -79,6 +80,8 @@ let from_basic (basic : Cfg.basic) : L.instruction_desc = | Probe_is_enabled { name } -> Iprobe_is_enabled { name } | Opaque -> Iopaque | Specific op -> Ispecific op + | Begin_region -> Ibeginregion + | End_region -> Iendregion | Name_for_debugger { ident; which_parameter; provenance; is_assignment } -> Iname_for_debugger { ident; which_parameter; provenance; is_assignment } diff --git a/backend/cfg/cfgize.ml b/backend/cfg/cfgize.ml index b8e590644f4..e6428cabc09 100644 --- a/backend/cfg/cfgize.ml +++ b/backend/cfg/cfgize.ml @@ -158,7 +158,8 @@ let basic_or_terminator_of_operation : | Istackoffset ofs -> Basic (Op (Stackoffset ofs)) | Iload (mem, mode) -> Basic (Op (Load (mem, mode))) | Istore (mem, mode, assignment) -> Basic (Op (Store (mem, mode, assignment))) - | Ialloc { bytes; dbginfo } -> Basic (Call (P (Alloc { bytes; dbginfo }))) + | Ialloc { bytes; dbginfo; mode } -> + Basic (Call (P (Alloc { bytes; dbginfo; mode }))) | Iintop Icheckbound -> Basic (Call (P (Checkbound { immediate = None }))) | Iintop_imm (Icheckbound, i) -> Basic (Call (P (Checkbound { immediate = Some i }))) @@ -189,6 +190,8 @@ let basic_or_terminator_of_operation : | Iprobe { name; handler_code_sym } -> Basic (Op (Probe { name; handler_code_sym })) | Iprobe_is_enabled { name } -> Basic (Op (Probe_is_enabled { name })) + | Ibeginregion -> Basic (Op Begin_region) + | Iendregion -> Basic (Op End_region) let float_test_of_float_comparison : Cmm.float_comparison -> @@ -316,7 +319,8 @@ let is_noop_move (instr : Cfg.basic Cfg.instruction) : bool = ( Const_int _ | Const_float _ | Const_symbol _ | Stackoffset _ | Load _ | Store _ | Intop _ | Intop_imm _ | Negf | Absf | Addf | Subf | Mulf | Divf | Compf _ | Floatofint | Intoffloat | Probe _ | Opaque - | Probe_is_enabled _ | Specific _ | Name_for_debugger _ ) + | Probe_is_enabled _ | Specific _ | Name_for_debugger _ | Begin_region + | End_region ) | Call _ | Reloadretaddr | Pushtrap _ | Poptrap | Prologue -> false diff --git a/backend/cfg/linear_to_cfg.ml b/backend/cfg/linear_to_cfg.ml index fec1d584b19..36e2bfb6e10 100644 --- a/backend/cfg/linear_to_cfg.ml +++ b/backend/cfg/linear_to_cfg.ml @@ -393,7 +393,7 @@ let to_basic (mop : Mach.operation) : C.basic = | Ipopcnt | Iclz _ | Ictz _ | Ilsl | Ilsr | Iasr | Icomp _ ) as op), i ) -> Op (Intop_imm (op, i)) - | Ialloc { bytes; dbginfo } -> Call (P (Alloc { bytes; dbginfo })) + | Ialloc { bytes; dbginfo; mode } -> Call (P (Alloc { bytes; dbginfo; mode })) | Iprobe { name; handler_code_sym } -> Op (Probe { name; handler_code_sym }) | Iprobe_is_enabled { name } -> Op (Probe_is_enabled { name }) | Istackoffset i -> Op (Stackoffset i) @@ -415,6 +415,8 @@ let to_basic (mop : Mach.operation) : C.basic = | Ifloatofint -> Op Floatofint | Iintoffloat -> Op Intoffloat | Iopaque -> Op Opaque + | Ibeginregion -> Op Begin_region + | Iendregion -> Op End_region | Ispecific op -> Op (Specific op) | Iname_for_debugger { ident; which_parameter; provenance; is_assignment } -> Op (Name_for_debugger { ident; which_parameter; provenance; is_assignment }) @@ -611,8 +613,8 @@ let rec create_blocks (t : t) (i : L.instruction) (block : C.basic_block) | Istore (_, _, _) | Ialloc _ | Iintop _ | Iintop_imm (_, _) - | Iopaque | Iprobe _ | Iprobe_is_enabled _ | Ispecific _ - | Iname_for_debugger _ -> + | Iopaque | Iprobe _ | Iprobe_is_enabled _ | Ispecific _ | Ibeginregion + | Iendregion | Iname_for_debugger _ -> let desc = to_basic mop in block.body <- create_instruction t desc i ~trap_depth :: block.body; if Mach.operation_can_raise mop diff --git a/backend/cmm.ml b/backend/cmm.ml index 7983c9c2489..7dfb005d57b 100644 --- a/backend/cmm.ml +++ b/backend/cmm.ml @@ -167,7 +167,7 @@ type memory_chunk = | Double and operation = - Capply of machtype + Capply of machtype * Lambda.region_close | Cextcall of { func: string; ty: machtype; @@ -179,7 +179,7 @@ and operation = coeffects: coeffects; } | Cload of memory_chunk * Asttypes.mutable_flag - | Calloc + | Calloc of Lambda.alloc_mode | Cstore of memory_chunk * Lambda.initialization_or_assignment | Caddi | Csubi | Cmuli | Cmulhi of { signed: bool } | Cdivi | Cmodi | Cand | Cor | Cxor | Clsl | Clsr | Casr @@ -227,6 +227,8 @@ type expression = | Cexit of exit_label * expression list * trap_action list | Ctrywith of expression * trywith_kind * Backend_var.With_provenance.t * expression * Debuginfo.t + | Cregion of expression + | Ctail of expression type codegen_option = | Reduce_code_size @@ -286,6 +288,12 @@ let iter_shallow_tail f = function f e1; f e2; true + | Cregion e -> + f e; + true + | Ctail e -> + f e; + true | Cexit _ | Cop (Craise _, _, _) -> true | Cconst_int _ @@ -298,30 +306,34 @@ let iter_shallow_tail f = function | Cop _ -> false -let rec map_tail f = function +let map_shallow_tail f = function | Clet(id, exp, body) -> - Clet(id, exp, map_tail f body) + Clet(id, exp, f body) | Clet_mut(id, kind, exp, body) -> - Clet_mut(id, kind, exp, map_tail f body) + Clet_mut(id, kind, exp, f body) | Cphantom_let(id, exp, body) -> - Cphantom_let (id, exp, map_tail f body) + Cphantom_let (id, exp, f body) | Cifthenelse(cond, ifso_dbg, ifso, ifnot_dbg, ifnot, dbg) -> Cifthenelse ( cond, - ifso_dbg, map_tail f ifso, - ifnot_dbg, map_tail f ifnot, + ifso_dbg, f ifso, + ifnot_dbg, f ifnot, dbg ) | Csequence(e1, e2) -> - Csequence(e1, map_tail f e2) + Csequence(e1, f e2) | Cswitch(e, tbl, el, dbg') -> - Cswitch(e, tbl, Array.map (fun (e, dbg) -> map_tail f e, dbg) el, dbg') + Cswitch(e, tbl, Array.map (fun (e, dbg) -> f e, dbg) el, dbg') | Ccatch(rec_flag, handlers, body) -> - let map_h (n, ids, handler, dbg) = (n, ids, map_tail f handler, dbg) in - Ccatch(rec_flag, List.map map_h handlers, map_tail f body) + let map_h (n, ids, handler, dbg) = (n, ids, f handler, dbg) in + Ccatch(rec_flag, List.map map_h handlers, f body) | Ctrywith(e1, kind, id, e2, dbg) -> - Ctrywith(map_tail f e1, kind, id, map_tail f e2, dbg) + Ctrywith(f e1, kind, id, f e2, dbg) + | Cregion e -> + Cregion(f e) + | Ctail e -> + Ctail(f e) | Cexit _ | Cop (Craise _, _, _) as cmm -> cmm | Cconst_int _ @@ -331,8 +343,59 @@ let rec map_tail f = function | Cvar _ | Cassign _ | Ctuple _ - | Cop _ as c -> - f c + | Cop _ as cmm -> cmm + +let map_tail f = + let rec loop = function + | Cconst_int _ + | Cconst_natint _ + | Cconst_float _ + | Cconst_symbol _ + | Cvar _ + | Cassign _ + | Ctuple _ + | Cop _ as c -> + f c + | cmm -> map_shallow_tail loop cmm + in + loop + +let iter_shallow f = function + | Clet (_id, e1, e2) -> + f e1; f e2 + | Clet_mut (_id, _kind, e1, e2) -> + f e1; f e2 + | Cphantom_let (_id, _de, e) -> + f e + | Cassign (_id, e) -> + f e + | Ctuple el -> + List.iter f el + | Cop (_op, el, _dbg) -> + List.iter f el + | Csequence (e1, e2) -> + f e1; f e2 + | Cifthenelse(cond, _ifso_dbg, ifso, _ifnot_dbg, ifnot, _dbg) -> + f cond; f ifso; f ifnot + | Cswitch (_e, _ia, ea, _dbg) -> + Array.iter (fun (e, _) -> f e) ea + | Ccatch (_rf, hl, body) -> + let iter_h (_n, _ids, handler, _dbg) = f handler in + List.iter iter_h hl; f body + | Cexit (_n, el, _traps) -> + List.iter f el + | Ctrywith (e1, _kind, _id, e2, _dbg) -> + f e1; f e2 + | Cregion e -> + f e + | Ctail e -> + f e + | Cconst_int _ + | Cconst_natint _ + | Cconst_float _ + | Cconst_symbol _ + | Cvar _ -> + () let map_shallow f = function | Clet (id, e1, e2) -> @@ -360,6 +423,10 @@ let map_shallow f = function Cexit (n, List.map f el, traps) | Ctrywith (e1, kind, id, e2, dbg) -> Ctrywith (f e1, kind, id, f e2, dbg) + | Cregion e -> + Cregion (f e) + | Ctail e -> + Ctail (f e) | Cconst_int _ | Cconst_natint _ | Cconst_float _ diff --git a/backend/cmm.mli b/backend/cmm.mli index 9575062c214..f66fdb7af20 100644 --- a/backend/cmm.mli +++ b/backend/cmm.mli @@ -166,7 +166,7 @@ type memory_chunk = see PR#10433 *) and operation = - Capply of machtype + Capply of machtype * Lambda.region_close | Cextcall of { func: string; ty: machtype; @@ -181,7 +181,7 @@ and operation = The [exttype list] describes the unboxing types of the arguments. An empty list means "all arguments are machine words [XInt]". *) | Cload of memory_chunk * Asttypes.mutable_flag - | Calloc + | Calloc of Lambda.alloc_mode | Cstore of memory_chunk * Lambda.initialization_or_assignment | Caddi | Csubi | Cmuli | Cmulhi of { signed: bool } | Cdivi | Cmodi | Cand | Cor | Cxor | Clsl | Clsr | Casr @@ -236,6 +236,8 @@ and expression = | Cexit of exit_label * expression list * trap_action list | Ctrywith of expression * trywith_kind * Backend_var.With_provenance.t * expression * Debuginfo.t + | Cregion of expression + | Ctail of expression type codegen_option = | Reduce_code_size @@ -283,11 +285,18 @@ val iter_shallow_tail: (expression -> unit) -> expression -> bool considered to be in tail position (because their result become the final result for the expression). *) +val map_shallow_tail: (expression -> expression) -> expression -> expression + (** Apply the transformation to those immediate sub-expressions of an + expression that are in tail position, using the same definition of "tail" + as [iter_shallow_tail] *) + val map_tail: (expression -> expression) -> expression -> expression (** Apply the transformation to an expression, trying to push it - to all inner sub-expressions that can produce the final result. - Same disclaimer as for [iter_shallow_tail] about the notion - of "tail" sub-expression. *) + to all inner sub-expressions that can produce the final result, + by recursively applying map_shallow_tail *) + +val iter_shallow: (expression -> unit) -> expression -> unit + (** Apply the callback to each immediate sub-expression. *) val map_shallow: (expression -> expression) -> expression -> expression (** Apply the transformation to each immediate sub-expression. *) diff --git a/backend/cmm_helpers.ml b/backend/cmm_helpers.ml index 469a59732e4..bd747c19820 100644 --- a/backend/cmm_helpers.ml +++ b/backend/cmm_helpers.ml @@ -38,6 +38,7 @@ let bind_nonvar name arg fn = | _ -> let id = V.create_local name in Clet(VP.create id, arg, fn (Cvar id)) let caml_black = Nativeint.shift_left (Nativeint.of_int 3) 8 +let caml_local = Nativeint.shift_left (Nativeint.of_int 2) 8 (* cf. runtime/caml/gc.h *) (* Block headers. Meaning of the tag field: see stdlib/obj.ml *) @@ -51,10 +52,13 @@ let block_header tag sz = in no-naked-pointers mode. See [caml_darken] and the code below that emits structured constants and static module definitions. *) let black_block_header tag sz = Nativeint.logor (block_header tag sz) caml_black +let local_block_header tag sz = Nativeint.logor (block_header tag sz) caml_local let white_closure_header sz = block_header Obj.closure_tag sz let black_closure_header sz = black_block_header Obj.closure_tag sz +let local_closure_header sz = local_block_header Obj.closure_tag sz let infix_header ofs = block_header Obj.infix_tag ofs let float_header = block_header Obj.double_tag (size_float / size_addr) +let float_local_header = local_block_header Obj.double_tag (size_float / size_addr) let floatarray_header len = (* Zero-sized float arrays have tag zero for consistency with [caml_alloc_float_array]. *) @@ -66,6 +70,9 @@ let string_header len = let boxedint32_header = block_header Obj.custom_tag 2 let boxedint64_header = block_header Obj.custom_tag (1 + 8 / size_addr) let boxedintnat_header = block_header Obj.custom_tag 2 +let boxedint32_local_header = local_block_header Obj.custom_tag 2 +let boxedint64_local_header = local_block_header Obj.custom_tag (1 + 8 / size_addr) +let boxedintnat_local_header = local_block_header Obj.custom_tag 2 let caml_nativeint_ops = "caml_nativeint_ops" let caml_int32_ops = "caml_int32_ops" let caml_int64_ops = "caml_int64_ops" @@ -74,21 +81,41 @@ let pos_arity_in_closinfo = 8 * size_addr - 8 (* arity = the top 8 bits of the closinfo word *) let closure_info ~arity ~startenv = + let arity = + match arity with + | Lambda.Tupled, n -> -n + | Lambda.Curried, n -> n + in assert (-128 <= arity && arity <= 127); assert (0 <= startenv && startenv < 1 lsl (pos_arity_in_closinfo - 1)); Nativeint.(add (shift_left (of_int arity) pos_arity_in_closinfo) (add (shift_left (of_int startenv) 1) 1n)) -let alloc_float_header dbg = Cconst_natint (float_header, dbg) +let alloc_float_header mode dbg = + match mode with + | Lambda.Alloc_heap -> Cconst_natint (float_header, dbg) + | Lambda.Alloc_local -> Cconst_natint (float_local_header, dbg) let alloc_floatarray_header len dbg = Cconst_natint (floatarray_header len, dbg) -let alloc_closure_header sz dbg = Cconst_natint (white_closure_header sz, dbg) +let alloc_closure_header ~mode sz dbg = + match (mode : Lambda.alloc_mode) with + | Alloc_heap -> Cconst_natint (white_closure_header sz, dbg) + | Alloc_local -> Cconst_natint (local_closure_header sz, dbg) let alloc_infix_header ofs dbg = Cconst_natint (infix_header ofs, dbg) let alloc_closure_info ~arity ~startenv dbg = Cconst_natint (closure_info ~arity ~startenv, dbg) -let alloc_boxedint32_header dbg = Cconst_natint (boxedint32_header, dbg) -let alloc_boxedint64_header dbg = Cconst_natint (boxedint64_header, dbg) -let alloc_boxedintnat_header dbg = Cconst_natint (boxedintnat_header, dbg) +let alloc_boxedint32_header mode dbg = + match mode with + | Lambda.Alloc_heap -> Cconst_natint (boxedint32_header, dbg) + | Lambda.Alloc_local -> Cconst_natint (boxedint32_local_header, dbg) +let alloc_boxedint64_header mode dbg = + match mode with + | Lambda.Alloc_heap -> Cconst_natint (boxedint64_header, dbg) + | Lambda.Alloc_local -> Cconst_natint (boxedint64_local_header, dbg) +let alloc_boxedintnat_header mode dbg = + match mode with + | Lambda.Alloc_heap -> Cconst_natint (boxedintnat_header, dbg) + | Lambda.Alloc_local -> Cconst_natint (boxedintnat_local_header, dbg) (* Integers *) @@ -579,12 +606,12 @@ let test_bool dbg cmm = (* Float *) -let box_float dbg c = Cop(Calloc, [alloc_float_header dbg; c], dbg) +let box_float dbg m c = Cop(Calloc m, [alloc_float_header m dbg; c], dbg) let unbox_float dbg = map_tail (function - | Cop(Calloc, [Cconst_natint (hdr, _); c], _) + | Cop(Calloc _, [Cconst_natint (hdr, _); c], _) when Nativeint.equal hdr float_header -> c | Cconst_symbol (s, _dbg) as cmm -> @@ -600,7 +627,7 @@ let unbox_float dbg = (* Complex *) let box_complex dbg c_re c_im = - Cop(Calloc, [alloc_floatarray_header 2 dbg; c_re; c_im], dbg) + Cop(Calloc Alloc_heap, [alloc_floatarray_header 2 dbg; c_re; c_im], dbg) let complex_re c dbg = Cop(Cload (Double, Immutable), [c], dbg) let complex_im c dbg = Cop(Cload (Double, Immutable), @@ -632,10 +659,10 @@ let rec remove_unit = function Ctrywith(remove_unit body, kind, exn, remove_unit handler, dbg) | Clet(id, c1, c2) -> Clet(id, c1, remove_unit c2) - | Cop(Capply _mty, args, dbg) -> - Cop(Capply typ_void, args, dbg) + | Cop(Capply(_mty, pos), args, dbg) -> + Cop(Capply(typ_void, pos), args, dbg) | Cop(Cextcall c, args, dbg) -> - Cop(Cextcall {c with ty = typ_void; }, args, dbg) + Cop(Cextcall {c with ty = typ_void }, args, dbg) | Cexit (_,_,_) as c -> c | Ctuple [] as c -> c | c -> Csequence(c, Ctuple []) @@ -754,8 +781,9 @@ let unboxed_float_array_ref arr ofs dbg = Cop(Cload (Double, Mutable), [array_indexing log2_size_float arr ofs dbg], dbg) let float_array_ref arr ofs dbg = - box_float dbg (unboxed_float_array_ref arr ofs dbg) + box_float dbg Alloc_heap (unboxed_float_array_ref arr ofs dbg) +(* TODO support mutation of local arrays *) let addr_array_set arr ofs newval dbg = Cop(Cextcall { func = "caml_modify"; ty = typ_void; alloc = false; builtin = false; @@ -764,6 +792,7 @@ let addr_array_set arr ofs newval dbg = coeffects = Has_coeffects; ty_args = []}, [array_indexing log2_size_addr arr ofs dbg; newval], dbg) + let addr_array_initialize arr ofs newval dbg = Cop(Cextcall { func = "caml_initialize"; builtin = false; @@ -772,6 +801,7 @@ let addr_array_initialize arr ofs newval dbg = coeffects = Has_coeffects; ty = typ_void; alloc = false; ty_args = []}, [array_indexing log2_size_addr arr ofs dbg; newval], dbg) + let int_array_set arr ofs newval dbg = Cop(Cstore (Word_int, Lambda.Assignment), [array_indexing log2_size_addr arr ofs dbg; newval], dbg) @@ -831,22 +861,31 @@ let lookup_label obj lab dbg = let table = Cop (Cload (Word_val, Mutable), [obj], dbg) in addr_array_ref table lab dbg) -let call_cached_method obj tag cache pos args dbg = +let send_function_name n (mode : Lambda.alloc_mode) = + let suff = match mode with Alloc_heap -> "" | Alloc_local -> "L" in + "caml_send" ^ Int.to_string n ^ suff + +let call_cached_method obj tag cache pos args (apos,mode) dbg = let arity = List.length args in let cache = array_indexing log2_size_addr cache pos dbg in - Compilenv.need_send_fun arity; - Cop(Capply typ_val, - Cconst_symbol("caml_send" ^ Int.to_string arity, dbg) :: + Compilenv.need_send_fun arity mode; + Cop(Capply(typ_val, apos), + Cconst_symbol(send_function_name arity mode, dbg) :: obj :: tag :: cache :: args, dbg) (* Allocation *) -let make_alloc_generic set_fn dbg tag wordsize args = +let make_alloc_generic ~mode set_fn dbg tag wordsize args = (* allocs of size 0 must be statically allocated else the Gc will bug *) assert (List.compare_length_with args 0 > 0); - if wordsize <= Config.max_young_wosize then - Cop(Calloc, Cconst_natint(block_header tag wordsize, dbg) :: args, dbg) + if mode = Lambda.Alloc_local || wordsize <= Config.max_young_wosize then + let hdr = + match mode with + | Lambda.Alloc_local -> local_block_header tag wordsize + | Lambda.Alloc_heap -> block_header tag wordsize + in + Cop(Calloc mode, Cconst_natint(hdr, dbg) :: args, dbg) else begin let id = V.create_local "*alloc*" in let rec fill_fields idx = function @@ -864,7 +903,7 @@ let make_alloc_generic set_fn dbg tag wordsize args = fill_fields 1 args) end -let make_alloc dbg tag args = +let make_alloc ?(mode=Lambda.Alloc_heap) dbg tag args = let addr_array_init arr ofs newval dbg = Cop(Cextcall { func = "caml_initialize"; ty = typ_void; alloc = false; builtin = false; @@ -874,10 +913,10 @@ let make_alloc dbg tag args = ty_args = [] }, [array_indexing log2_size_addr arr ofs dbg; newval], dbg) in - make_alloc_generic addr_array_init dbg tag (List.length args) args + make_alloc_generic ~mode addr_array_init dbg tag (List.length args) args -let make_float_alloc dbg tag args = - make_alloc_generic float_array_set dbg tag +let make_float_alloc ?(mode=Lambda.Alloc_heap) dbg tag args = + make_alloc_generic ~mode float_array_set dbg tag (List.length args * size_float / size_addr) args (* Bounds checking *) @@ -890,14 +929,23 @@ let make_checkbound dbg = function Cop(Ccheckbound, args, dbg) (* Record application and currying functions *) - -let apply_function_sym n = - Compilenv.need_apply_fun n; "caml_apply" ^ Int.to_string n -let curry_function_sym n = - Compilenv.need_curry_fun n; - if n >= 0 - then "caml_curry" ^ Int.to_string n - else "caml_tuplify" ^ Int.to_string (-n) +let apply_function_name (n, (mode : Lambda.alloc_mode)) = + let suff = match mode with Alloc_heap -> "" | Alloc_local -> "L" in + "caml_apply" ^ Int.to_string n ^ suff +let apply_function_sym n mode = + assert (n > 0); + Compilenv.need_apply_fun n mode; + apply_function_name (n, mode) +let curry_function_sym ar = + Compilenv.need_curry_fun ar; + match ar with + | Lambda.Curried, n -> + assert (n > 0); + let nlocal = 0 in (* temporary *) + "caml_curry" ^ Int.to_string n ^ + (if nlocal > 0 then "L" ^ Int.to_string nlocal else "") + | Lambda.Tupled, n -> + "caml_tuplify" ^ Int.to_string n (* Big arrays *) @@ -1091,13 +1139,13 @@ let operations_boxed_int (bi : Primitive.boxed_integer) = | Pint32 -> caml_int32_ops | Pint64 -> caml_int64_ops -let alloc_header_boxed_int (bi : Primitive.boxed_integer) = +let alloc_header_boxed_int (bi : Primitive.boxed_integer) mode dbg = match bi with - Pnativeint -> alloc_boxedintnat_header - | Pint32 -> alloc_boxedint32_header - | Pint64 -> alloc_boxedint64_header + Pnativeint -> alloc_boxedintnat_header mode dbg + | Pint32 -> alloc_boxedint32_header mode dbg + | Pint64 -> alloc_boxedint64_header mode dbg -let box_int_gen dbg (bi : Primitive.boxed_integer) arg = +let box_int_gen dbg (bi : Primitive.boxed_integer) mode arg = let arg' = if bi = Primitive.Pint32 && size_int = 8 then if big_endian @@ -1105,9 +1153,10 @@ let box_int_gen dbg (bi : Primitive.boxed_integer) arg = else sign_extend_32 dbg arg else arg in - Cop(Calloc, [alloc_header_boxed_int bi dbg; - Cconst_symbol(operations_boxed_int bi, dbg); - arg'], dbg) + Cop(Calloc mode, + [alloc_header_boxed_int bi mode dbg; + Cconst_symbol(operations_boxed_int bi, dbg); + arg'], dbg) let split_int64_for_32bit_target arg dbg = bind "split_int64" arg (fun arg -> @@ -1141,20 +1190,20 @@ let unbox_int dbg bi = in map_tail (function - | Cop(Calloc, + | Cop(Calloc _, [hdr; ops; Cop(Clsl, [contents; Cconst_int (32, _)], _dbg')], _dbg) when bi = Primitive.Pint32 && size_int = 8 && big_endian && alloc_matches_boxed_int bi ~hdr ~ops -> (* Force sign-extension of low 32 bits *) sign_extend_32 dbg contents - | Cop(Calloc, + | Cop(Calloc _, [hdr; ops; contents], _dbg) when bi = Primitive.Pint32 && size_int = 8 && not big_endian && alloc_matches_boxed_int bi ~hdr ~ops -> (* Force sign-extension of low 32 bits *) sign_extend_32 dbg contents - | Cop(Calloc, [hdr; ops; contents], _dbg) + | Cop(Calloc _, [hdr; ops; contents], _dbg) when alloc_matches_boxed_int bi ~hdr ~ops -> contents | Cconst_symbol (s, _dbg) as cmm -> @@ -1432,11 +1481,11 @@ let unaligned_load size ptr idx dbg = | Thirty_two -> unaligned_load_32 ptr idx dbg | Sixty_four -> unaligned_load_64 ptr idx dbg -let box_sized size dbg exp = +let box_sized size mode dbg exp = match (size : Clambda_primitives.memory_access_size) with | Sixteen -> tag_int exp dbg - | Thirty_two -> box_int_gen dbg Pint32 exp - | Sixty_four -> box_int_gen dbg Pint64 exp + | Thirty_two -> box_int_gen dbg Pint32 mode exp + | Sixty_four -> box_int_gen dbg Pint64 mode exp (* Simplification of some primitives into C calls *) @@ -1773,27 +1822,27 @@ let ptr_offset ptr offset dbg = then ptr else Cop(Caddv, [ptr; Cconst_int(offset * size_addr, dbg)], dbg) -let direct_apply lbl args dbg = - Cop(Capply typ_val, Cconst_symbol (lbl, dbg) :: args, dbg) +let direct_apply lbl args (pos, _mode) dbg = + Cop(Capply(typ_val, pos), Cconst_symbol (lbl, dbg) :: args, dbg) -let generic_apply mut clos args dbg = +let generic_apply mut clos args (pos, mode) dbg = match args with | [arg] -> bind "fun" clos (fun clos -> - Cop(Capply typ_val, [get_field_gen mut clos 0 dbg; arg; clos], + Cop(Capply(typ_val, pos), [get_field_gen mut clos 0 dbg; arg; clos], dbg)) | _ -> let arity = List.length args in let cargs = - Cconst_symbol(apply_function_sym arity, dbg) :: args @ [clos] + Cconst_symbol(apply_function_sym arity mode, dbg) :: args @ [clos] in - Cop(Capply typ_val, cargs, dbg) + Cop(Capply(typ_val, pos), cargs, dbg) -let send kind met obj args dbg = +let send kind met obj args akind dbg = let call_met obj args clos = (* met is never a simple expression, so it never gets turned into an Immutable load *) - generic_apply Asttypes.Mutable clos (obj :: args) dbg + generic_apply Asttypes.Mutable clos (obj :: args) akind dbg in bind "obj" obj (fun obj -> match (kind : Lambda.meth_kind), args with @@ -1801,7 +1850,7 @@ let send kind met obj args dbg = bind "met" (lookup_label obj met dbg) (call_met obj args) | Cached, cache :: pos :: args -> - call_cached_method obj met cache pos args dbg + call_cached_method obj met cache pos args akind dbg | _ -> bind "met" (lookup_tag obj met dbg) (call_met obj args)) @@ -1867,6 +1916,52 @@ let cache_public_method meths tag cache dbg = Csequence(Cop (Cstore (Word_int, Assignment), [cache; Cvar tagged], dbg), Cvar tagged))))) +let has_local_allocs e = + let rec loop = function + | Cregion e -> + (* Local allocations within a nested region do not affect this region, + except inside a Ctail block *) + loop_until_tail e + | Cop (Calloc Alloc_local, _, _) + | Cop ((Cextcall _ | Capply _), _, _) -> + raise Exit + | e -> + iter_shallow loop e + and loop_until_tail = function + | Ctail e -> loop e + | Cregion _ -> () + | e -> ignore (iter_shallow_tail loop_until_tail e) + in + match loop e with + | () -> false + | exception Exit -> true + +let remove_region_tail e = + let rec has_tail = function + | Ctail _ + | Cop(Capply(_, Rc_close_at_apply), _, _) -> raise Exit + | Cregion _ -> () + | e -> ignore (iter_shallow_tail has_tail e) + in + let rec remove_tail = function + | Ctail e -> e + | Cop(Capply(mach, Rc_close_at_apply), args, dbg) -> + Cop(Capply(mach, Rc_normal), args, dbg) + | Cregion _ as e -> e + | e -> + map_shallow_tail remove_tail e + in + match has_tail e with + | () -> e + | exception Exit -> remove_tail e + +let region e = + (* [Cregion e] is equivalent to [e] if [e] contains no local allocs *) + if has_local_allocs e then + Cregion e + else + remove_region_tail e + (* CR mshinwell: These will be filled in by later pull requests. *) let placeholder_dbg () = Debuginfo.none let placeholder_fun_dbg ~human_name:_ = Debuginfo.none @@ -1882,14 +1977,14 @@ let placeholder_fun_dbg ~human_name:_ = Debuginfo.none (app closN-1.code aN closN-1)))) *) -let apply_function_body arity = +let apply_function_body (arity, (mode : Lambda.alloc_mode)) = let dbg = placeholder_dbg in let arg = Array.make arity (V.create_local "arg") in for i = 1 to arity - 1 do arg.(i) <- V.create_local "arg" done; let clos = V.create_local "clos" in let rec app_fun clos n = if n = arity-1 then - Cop(Capply typ_val, + Cop(Capply(typ_val, Rc_normal), [get_field_gen Asttypes.Mutable (Cvar clos) 0 (dbg ()); Cvar arg.(n); Cvar clos], @@ -1897,7 +1992,7 @@ let apply_function_body arity = else begin let newclos = V.create_local "clos" in Clet(VP.create newclos, - Cop(Capply typ_val, + Cop(Capply(typ_val, Rc_normal), [get_field_gen Asttypes.Mutable (Cvar clos) 0 (dbg ()); Cvar arg.(n); Cvar clos], dbg ()), app_fun newclos (n+1)) @@ -1912,18 +2007,20 @@ let apply_function_body arity = Cconst_int(pos_arity_in_closinfo, dbg())], dbg()); Cconst_int(arity, dbg())], dbg()), dbg (), - Cop(Capply typ_val, + Cop(Capply(typ_val, Rc_normal), get_field_gen Asttypes.Mutable (Cvar clos) 2 (dbg ()) :: List.map (fun s -> Cvar s) all_args, dbg ()), dbg (), - app_fun clos 0, + (match mode with + | Alloc_heap -> Cregion (app_fun clos 0) + | Alloc_local -> app_fun clos 0), dbg ())) -let send_function arity = +let send_function (arity, mode) = let dbg = placeholder_dbg in let cconst_int i = Cconst_int (i, dbg ()) in - let (args, clos', body) = apply_function_body (1+arity) in + let (args, clos', body) = apply_function_body (1+arity, mode) in let cache = V.create_local "cache" and obj = List.hd args and tag = V.create_local "tag" in @@ -1957,7 +2054,7 @@ let send_function arity = in let body = Clet(VP.create clos', clos, body) in let cache = cache in - let fun_name = "caml_send" ^ Int.to_string arity in + let fun_name = send_function_name arity mode in let fun_args = [obj, typ_val; tag, typ_int; cache, typ_val] @ List.map (fun id -> (id, typ_val)) (List.tl args) in @@ -1973,7 +2070,7 @@ let send_function arity = let apply_function arity = let (args, clos, body) = apply_function_body arity in let all_args = args @ [clos] in - let fun_name = "caml_apply" ^ Int.to_string arity in + let fun_name = apply_function_name arity in let fun_dbg = placeholder_fun_dbg ~human_name:fun_name in Cfunction {fun_name; @@ -2003,7 +2100,7 @@ let tuplify_function arity = {fun_name; fun_args = [VP.create arg, typ_val; VP.create clos, typ_val]; fun_body = - Cop(Capply typ_val, + Cop(Capply(typ_val, Rc_normal), get_field_gen Asttypes.Mutable (Cvar clos) 2 (dbg ()) :: access_components 0 @ [Cvar clos], (dbg ())); @@ -2040,13 +2137,13 @@ let tuplify_function arity = *) let max_arity_optimized = 15 -let final_curry_function arity = +let final_curry_function ~nlocal ~arity = let dbg = placeholder_dbg in let last_arg = V.create_local "arg" in let last_clos = V.create_local "clos" in let rec curry_fun args clos n = if n = 0 then - Cop(Capply typ_val, + Cop(Capply(typ_val, Rc_normal), get_field_gen Asttypes.Mutable (Cvar clos) 2 (dbg ()) :: args @ [Cvar last_arg; Cvar clos], dbg ()) @@ -2069,7 +2166,9 @@ let final_curry_function arity = newclos (n-1)) end in let fun_name = - "caml_curry" ^ Int.to_string arity ^ "_" ^ Int.to_string (arity-1) + "caml_curry" ^ Int.to_string arity + ^ (if nlocal > 0 then "L" ^ Int.to_string nlocal else "") + ^ "_" ^ Int.to_string (arity-1) in let fun_dbg = placeholder_fun_dbg ~human_name:fun_name in Cfunction @@ -2080,34 +2179,38 @@ let final_curry_function arity = fun_dbg; } -let rec intermediate_curry_functions arity num = +let rec intermediate_curry_functions ~nlocal ~arity num = let dbg = placeholder_dbg in if num = arity - 1 then - [final_curry_function arity] + [final_curry_function ~nlocal ~arity] else begin - let name1 = "caml_curry" ^ Int.to_string arity in + let name1 = "caml_curry" ^ Int.to_string arity + ^ (if nlocal > 0 then "L" ^ Int.to_string nlocal else "") in let name2 = if num = 0 then name1 else name1 ^ "_" ^ Int.to_string num in let arg = V.create_local "arg" and clos = V.create_local "clos" in let fun_dbg = placeholder_fun_dbg ~human_name:name2 in + let mode : Lambda.alloc_mode = + if num >= arity - nlocal then Alloc_local else Alloc_heap in + let curried n : Clambda.arity = (Curried, n) in Cfunction {fun_name = name2; fun_args = [VP.create arg, typ_val; VP.create clos, typ_val]; fun_body = if arity - num > 2 && arity <= max_arity_optimized then - Cop(Calloc, - [alloc_closure_header 5 (dbg ()); + Cop(Calloc mode, + [alloc_closure_header ~mode 5 (dbg ()); Cconst_symbol(name1 ^ "_" ^ Int.to_string (num+1), dbg ()); - alloc_closure_info ~arity:(arity - num - 1) + alloc_closure_info ~arity:(curried (arity - num - 1)) ~startenv:3 (dbg ()); Cconst_symbol(name1 ^ "_" ^ Int.to_string (num+1) ^ "_app", dbg ()); Cvar arg; Cvar clos], dbg ()) else - Cop(Calloc, - [alloc_closure_header 4 (dbg ()); + Cop(Calloc mode, + [alloc_closure_header ~mode 4 (dbg ()); Cconst_symbol(name1 ^ "_" ^ Int.to_string (num+1), dbg ()); - alloc_closure_info ~arity:1 ~startenv:2 (dbg ()); + alloc_closure_info ~arity:(curried 1) ~startenv:2 (dbg ()); Cvar arg; Cvar clos], dbg ()); fun_codegen_options = []; @@ -2124,7 +2227,7 @@ let rec intermediate_curry_functions arity num = let direct_args = iter (num+2) in let rec iter i args clos = if i = 0 then - Cop(Capply typ_val, + Cop(Capply(typ_val, Rc_normal), (get_field_gen Asttypes.Mutable (Cvar clos) 2 (dbg ())) :: args @ [Cvar clos], dbg ()) @@ -2153,21 +2256,24 @@ let rec intermediate_curry_functions arity num = fun_dbg; } in - cf :: intermediate_curry_functions arity (num+1) + cf :: intermediate_curry_functions ~nlocal ~arity (num+1) else - intermediate_curry_functions arity (num+1)) + intermediate_curry_functions ~nlocal ~arity (num+1)) end -let curry_function arity = - assert(arity <> 0); - (* Functions with arity = 0 does not have a curry_function *) - if arity > 0 - then intermediate_curry_functions arity 0 - else [tuplify_function (-arity)] +let curry_function = function + | Lambda.Tupled, n -> + assert (n > 0); [tuplify_function n] + | Lambda.Curried, n -> + assert (n > 0); + intermediate_curry_functions ~nlocal:0 ~arity:n 0 -module Int = Numbers.Int +module ApplyFnSet = + Set.Make (struct type t = int * Lambda.alloc_mode let compare = compare end) +module AritySet = + Set.Make (struct type t = Clambda.arity let compare = compare end) -let default_apply = Int.Set.add 2 (Int.Set.add 3 Int.Set.empty) +let default_apply = ApplyFnSet.of_list [2,Alloc_heap; 3,Alloc_heap] (* These apply funs are always present in the main program because the run-time system needs them (cf. runtime/.S) . *) @@ -2175,15 +2281,15 @@ let generic_functions shared units = let (apply,send,curry) = List.fold_left (fun (apply,send,curry) (ui : Cmx_format.unit_infos) -> - List.fold_right Int.Set.add ui.ui_apply_fun apply, - List.fold_right Int.Set.add ui.ui_send_fun send, - List.fold_right Int.Set.add ui.ui_curry_fun curry) - (Int.Set.empty,Int.Set.empty,Int.Set.empty) + List.fold_right ApplyFnSet.add ui.ui_apply_fun apply, + List.fold_right ApplyFnSet.add ui.ui_send_fun send, + List.fold_right AritySet.add ui.ui_curry_fun curry) + (ApplyFnSet.empty,ApplyFnSet.empty,AritySet.empty) units in - let apply = if shared then apply else Int.Set.union apply default_apply in - let accu = Int.Set.fold (fun n accu -> apply_function n :: accu) apply [] in - let accu = Int.Set.fold (fun n accu -> send_function n :: accu) send accu in - Int.Set.fold (fun n accu -> curry_function n @ accu) curry accu + let apply = if shared then apply else ApplyFnSet.union apply default_apply in + let accu = ApplyFnSet.fold (fun nr accu -> apply_function nr :: accu) apply [] in + let accu = ApplyFnSet.fold (fun nr accu -> send_function nr :: accu) send accu in + AritySet.fold (fun arity accu -> curry_function arity @ accu) curry accu (* Primitives *) @@ -2321,16 +2427,17 @@ type binary_primitive = expression -> expression -> Debuginfo.t -> expression (* Helper for compilation of initialization and assignment operations *) -type assignment_kind = Caml_modify | Caml_initialize | Simple +type assignment_kind = Caml_modify | Caml_modify_local | Simple let assignment_kind (ptr: Lambda.immediate_or_pointer) (init: Lambda.initialization_or_assignment) = match init, ptr with | Assignment, Pointer -> Caml_modify - | Heap_initialization, Pointer -> Caml_initialize - | Assignment, Immediate - | Heap_initialization, Immediate + | Local_assignment, Pointer -> Caml_modify_local + | Heap_initialization, _ -> + Misc.fatal_error "Cmm_helpers: Lambda.Heap_initialization unsupported" + | (Assignment | Local_assignment), Immediate | Root_initialization, (Immediate | Pointer) -> Simple let setfield n ptr init arg1 arg2 dbg = @@ -2346,17 +2453,17 @@ let setfield n ptr init arg1 arg2 dbg = [field_address arg1 n dbg; arg2], dbg)) - | Caml_initialize -> - return_unit dbg (Cop(Cextcall { func = "caml_initialize"; - ty = typ_void; alloc = false; - builtin = false; - returns = true; - effects = Arbitrary_effects; - coeffects = Has_coeffects; - ty_args = [] }, - [field_address arg1 n dbg; - arg2], - dbg)) + | Caml_modify_local -> + return_unit dbg + (Cop(Cextcall{ func = "caml_modify_local"; + ty = typ_void; alloc = false; + builtin = false; + returns = true; + effects = Arbitrary_effects; + coeffects = Has_coeffects; + ty_args = [] }, + [arg1; Cconst_int (n,dbg); arg2], + dbg)) | Simple -> return_unit dbg (set_field arg1 n arg2 init dbg) @@ -2443,16 +2550,16 @@ let stringref_safe arg1 arg2 dbg = Cop(Cload (Byte_unsigned, Mutable), [add_int str idx dbg], dbg))))) dbg -let string_load size unsafe arg1 arg2 dbg = - box_sized size dbg +let string_load size unsafe mode arg1 arg2 dbg = + box_sized size mode dbg (bind "index" (untag_int arg2 dbg) (fun idx -> bind "str" arg1 (fun str -> check_bound unsafe size dbg (string_length str dbg) idx (unaligned_load size str idx dbg)))) -let bigstring_load size unsafe arg1 arg2 dbg = - box_sized size dbg +let bigstring_load size unsafe mode arg1 arg2 dbg = + box_sized size mode dbg (bind "index" (untag_int arg2 dbg) (fun idx -> bind "ba" arg1 (fun ba -> bind "ba_data" @@ -2525,7 +2632,7 @@ let arrayref_safe kind arg1 arg2 dbg = (get_header_without_profinfo arr dbg) dbg; idx], int_array_ref arr idx dbg))) | Pfloatarray -> - box_float dbg ( + box_float dbg Alloc_heap ( bind "index" arg2 (fun idx -> bind "arr" arg1 (fun arr -> Csequence( @@ -2542,8 +2649,10 @@ let setfield_computed ptr init arg1 arg2 arg3 dbg = match assignment_kind ptr init with | Caml_modify -> return_unit dbg (addr_array_set arg1 arg2 arg3 dbg) - | Caml_initialize -> - return_unit dbg (addr_array_initialize arg1 arg2 arg3 dbg) + | Caml_modify_local -> + (* TODO: support this, if there are any uses. + (Currently, setfield_computed is only used by classes) *) + Misc.fatal_error "setfield_computed: local" | Simple -> return_unit dbg (int_array_set arg1 arg2 arg3 dbg) @@ -3056,7 +3165,7 @@ let entry_point namelist = List.fold_right (fun name next -> let entry_sym = Compilenv.make_symbol ~unitname:name (Some "entry") in - Csequence(Cop(Capply typ_void, + Csequence(Cop(Capply(typ_void, Rc_normal), [cconst_symbol entry_sym], dbg ()), Csequence(incr_global_inited (), next))) namelist (cconst_int 1) in @@ -3164,7 +3273,7 @@ let fundecls_size fundecls = (fun (f : Clambda.ufunction) -> let indirect_call_code_pointer_size = match f.arity with - | 0 | 1 -> 0 + | (0 | 1) -> 0 (* arity 1 does not need an indirect call handler. arity 0 cannot be indirect called *) | _ -> 1 @@ -3185,6 +3294,12 @@ let emit_constant_closure ((_, global_symb) as symb) fundecls clos_vars cont = else [] in + let fnarity (fn : Clambda.ufunction) = + if fn.arity >= 0 then + Lambda.Curried, fn.arity + else + Lambda.Tupled, -fn.arity + in match (fundecls : Clambda.ufunction list) with [] -> (* This should probably not happen: dead code has normally been @@ -3197,30 +3312,32 @@ let emit_constant_closure ((_, global_symb) as symb) fundecls clos_vars cont = let rec emit_others pos = function [] -> clos_vars @ cont | (f2 : Clambda.ufunction) :: rem -> - if f2.arity = 1 || f2.arity = 0 then + match (Lambda.Curried, f2.arity) with + | Curried, (0|1) as arity -> Cint(infix_header pos) :: (closure_symbol f2) @ Csymbol_address f2.label :: - Cint(closure_info ~arity:f2.arity ~startenv:(startenv - pos)) :: + Cint(closure_info ~arity ~startenv:(startenv - pos)) :: emit_others (pos + 3) rem - else + | arity -> Cint(infix_header pos) :: (closure_symbol f2) @ - Csymbol_address(curry_function_sym f2.arity) :: - Cint(closure_info ~arity:f2.arity ~startenv:(startenv - pos)) :: + Csymbol_address(curry_function_sym (fnarity f2)) :: + Cint(closure_info ~arity ~startenv:(startenv - pos)) :: Csymbol_address f2.label :: emit_others (pos + 4) rem in Cint(black_closure_header (fundecls_size fundecls + List.length clos_vars)) :: cdefine_symbol symb @ (closure_symbol f1) @ - if f1.arity = 1 || f1.arity = 0 then + match fnarity f1 with + | Curried, (0|1) as arity -> Csymbol_address f1.label :: - Cint(closure_info ~arity:f1.arity ~startenv) :: + Cint(closure_info ~arity ~startenv) :: emit_others 3 remainder - else - Csymbol_address(curry_function_sym f1.arity) :: - Cint(closure_info ~arity:f1.arity ~startenv) :: + | arity -> + Csymbol_address(curry_function_sym arity) :: + Cint(closure_info ~arity ~startenv) :: Csymbol_address f1.label :: emit_others 4 remainder diff --git a/backend/cmm_helpers.mli b/backend/cmm_helpers.mli index ff5daea5313..ef8dd5f04e2 100644 --- a/backend/cmm_helpers.mli +++ b/backend/cmm_helpers.mli @@ -53,30 +53,19 @@ val infix_header : int -> nativeint (** Header for a boxed float value *) val float_header : nativeint -(** Header for an unboxed float array of the given size *) -val floatarray_header : int -> nativeint - -(** Header for a string (or bytes) of the given length *) -val string_header : int -> nativeint - (** Boxed integer headers *) val boxedint32_header : nativeint val boxedint64_header : nativeint val boxedintnat_header : nativeint (** Closure info for a closure of given arity and distance to environment *) -val closure_info : arity:int -> startenv:int -> nativeint +val closure_info : arity:Clambda.arity -> startenv:int -> nativeint (** Wrappers *) -val alloc_float_header : Debuginfo.t -> expression -val alloc_floatarray_header : int -> Debuginfo.t -> expression -val alloc_closure_header : int -> Debuginfo.t -> expression val alloc_infix_header : int -> Debuginfo.t -> expression val alloc_closure_info : - arity:int -> startenv:int -> Debuginfo.t -> expression -val alloc_boxedint32_header : Debuginfo.t -> expression -val alloc_boxedint64_header : Debuginfo.t -> expression -val alloc_boxedintnat_header : Debuginfo.t -> expression + arity:(Lambda.function_kind * int) -> startenv:int -> + Debuginfo.t -> expression (** Integers *) @@ -183,7 +172,7 @@ val raise_symbol : Debuginfo.t -> string -> expression val test_bool : Debuginfo.t -> expression -> expression (** Float boxing and unboxing *) -val box_float : Debuginfo.t -> expression -> expression +val box_float : Debuginfo.t -> Lambda.alloc_mode -> expression -> expression val unbox_float : Debuginfo.t -> expression -> expression (** Complex number creation and access *) @@ -314,15 +303,17 @@ val lookup_label : expression -> expression -> Debuginfo.t -> expression - args : the additional arguments to the method call *) val call_cached_method : expression -> expression -> expression -> expression -> expression list -> - Debuginfo.t -> expression + Clambda.apply_kind -> Debuginfo.t -> expression (** Allocations *) (** Allocate a block of regular values with the given tag *) -val make_alloc : Debuginfo.t -> int -> expression list -> expression +val make_alloc : + ?mode:Lambda.alloc_mode -> Debuginfo.t -> int -> expression list -> expression (** Allocate a block of unboxed floats with the given tag *) -val make_float_alloc : Debuginfo.t -> int -> expression list -> expression +val make_float_alloc : + ?mode:Lambda.alloc_mode -> Debuginfo.t -> int -> expression list -> expression (** Bounds checking *) @@ -345,12 +336,11 @@ val opaque : expression -> Debuginfo.t -> expression (** Get the symbol for the generic application with [n] arguments, and ensure its presence in the set of defined symbols *) -val apply_function_sym : int -> string +val apply_function_sym : int -> Lambda.alloc_mode -> string -(** If [n] is positive, get the symbol for the generic currying wrapper with - [n] arguments, and ensure its presence in the set of defined symbols. - Otherwise, do the same for the generic tuple wrapper with [-n] arguments. *) -val curry_function_sym : int -> string +(** Get the symbol for the generic currying or tuplifying wrapper with + [n] arguments, and ensure its presence in the set of defined symbols. *) +val curry_function_sym : Clambda.arity -> string (** Bigarrays *) @@ -402,7 +392,8 @@ val caml_int64_ops : string (** Box a given integer, without sharing of constants *) val box_int_gen : - Debuginfo.t -> Primitive.boxed_integer -> expression -> expression + Debuginfo.t -> Primitive.boxed_integer -> Lambda.alloc_mode -> + expression -> expression (** Unbox a given boxed integer *) val unbox_int : @@ -436,7 +427,7 @@ val unaligned_load : (** [box_sized size dbg exp] *) val box_sized : - Clambda_primitives.memory_access_size -> + Clambda_primitives.memory_access_size -> Lambda.alloc_mode -> Debuginfo.t -> expression -> expression (** Primitives *) @@ -478,8 +469,6 @@ val bswap16 : unary_primitive type binary_primitive = expression -> expression -> Debuginfo.t -> expression -type assignment_kind = Caml_modify | Caml_initialize | Simple - (** [setfield offset value_is_ptr init ptr value dbg] *) val setfield : int -> Lambda.immediate_or_pointer -> Lambda.initialization_or_assignment -> @@ -512,9 +501,11 @@ val stringref_safe : binary_primitive (** Load by chunk from string/bytes, bigstring. Args: string, index *) val string_load : - Clambda_primitives.memory_access_size -> Lambda.is_safe -> binary_primitive + Clambda_primitives.memory_access_size -> Lambda.is_safe -> + Lambda.alloc_mode -> binary_primitive val bigstring_load : - Clambda_primitives.memory_access_size -> Lambda.is_safe -> binary_primitive + Clambda_primitives.memory_access_size -> Lambda.is_safe -> + Lambda.alloc_mode -> binary_primitive (** Arrays *) @@ -584,7 +575,9 @@ val strmatch_compile : val ptr_offset : expression -> int -> Debuginfo.t -> expression (** Direct application of a function via a symbol *) -val direct_apply : string -> expression list -> Debuginfo.t -> expression +val direct_apply : + string -> expression list -> Clambda.apply_kind + -> Debuginfo.t -> expression (** Generic application of a function to one or several arguments. The mutable_flag argument annotates the loading of the code pointer @@ -592,8 +585,8 @@ val direct_apply : string -> expression list -> Debuginfo.t -> expression default, with a special case when the load is from (the first function of) the currently defined closure. *) val generic_apply : - Asttypes.mutable_flag -> - expression -> expression list -> Debuginfo.t -> expression + Asttypes.mutable_flag -> expression -> expression list + -> Clambda.apply_kind -> Debuginfo.t -> expression (** Method call : [send kind met obj args dbg] - [met] is a method identifier, which can be a hashed variant or an index @@ -603,8 +596,11 @@ val generic_apply : of any way for the frontend to generate any arguments other than the cache and cache position) *) val send : - Lambda.meth_kind -> expression -> expression -> expression list -> - Debuginfo.t -> expression + Lambda.meth_kind -> expression -> expression -> expression list + -> Clambda.apply_kind -> Debuginfo.t -> expression + +(** Construct [Cregion e], eliding some useless regions *) +val region : expression -> expression (** [cextcall prim args dbg type_of_result] returns Cextcall operation that corresponds to [prim]. If [prim] is a C builtin supported on the diff --git a/backend/cmm_invariants.ml b/backend/cmm_invariants.ml index 54c29e6ef03..7c8d8add478 100644 --- a/backend/cmm_invariants.ml +++ b/backend/cmm_invariants.ml @@ -176,6 +176,8 @@ let rec check env (expr : Cmm.expression) = not reported as an error. *) check env body; check env handler + | Cregion e -> check env e + | Ctail e -> check env e let run ppf (fundecl : Cmm.fundecl) = let env = Env.init () in diff --git a/backend/cmmgen.ml b/backend/cmmgen.ml index 5ead254d5fe..84184f44a4a 100644 --- a/backend/cmmgen.ml +++ b/backend/cmmgen.ml @@ -36,8 +36,8 @@ open Cmm_helpers (* Environments used for translation to Cmm. *) type boxed_number = - | Boxed_float of Debuginfo.t - | Boxed_integer of boxed_integer * Debuginfo.t + | Boxed_float of alloc_mode * Debuginfo.t + | Boxed_integer of boxed_integer * alloc_mode * Debuginfo.t type env = { unboxed_ids : (V.t * boxed_number) V.tbl; @@ -252,7 +252,7 @@ let box_int_constant sym bi n = let n = Int64.of_nativeint n in emit_int64_constant (sym, Local) n [] -let box_int dbg bi arg = +let box_int dbg bi mode arg = match arg with | Cconst_int (n, _) -> let sym = Compilenv.new_const_symbol () in @@ -265,13 +265,13 @@ let box_int dbg bi arg = Cmmgen_state.add_data_items data_items; Cconst_symbol (sym, dbg) | _ -> - box_int_gen dbg bi arg + box_int_gen dbg bi mode arg (* Boxed numbers *) let typ_of_boxed_number = function | Boxed_float _ -> Cmm.typ_float - | Boxed_integer (Pint64, _) when size_int = 4 -> [|Int;Int|] + | Boxed_integer (Pint64, _,_) when size_int = 4 -> [|Int;Int|] | Boxed_integer _ -> Cmm.typ_int let equal_unboxed_integer ui1 ui2 = @@ -284,24 +284,24 @@ let equal_unboxed_integer ui1 ui2 = let equal_boxed_number bn1 bn2 = match bn1, bn2 with | Boxed_float _, Boxed_float _ -> true - | Boxed_integer(ui1, _), Boxed_integer(ui2, _) -> - equal_unboxed_integer ui1 ui2 + | Boxed_integer(ui1, m, _), Boxed_integer(ui2, m', _) -> + equal_unboxed_integer ui1 ui2 && Lambda.eq_mode m m' | _, _ -> false let box_number bn arg = match bn with - | Boxed_float dbg -> box_float dbg arg - | Boxed_integer (bi, dbg) -> box_int dbg bi arg + | Boxed_float (m, dbg) -> box_float dbg m arg + | Boxed_integer (bi, m, dbg) -> box_int dbg bi m arg (* Returns the unboxed representation of a boxed float or integer. For Pint32 on 64-bit archs, the high 32 bits of the result are undefined. *) let unbox_number dbg bn arg = match bn with - | Boxed_float dbg -> + | Boxed_float (_, dbg) -> unbox_float dbg arg - | Boxed_integer (Pint32, _) -> + | Boxed_integer (Pint32, _, _) -> low_32 dbg (unbox_int dbg Pint32 arg) - | Boxed_integer (bi, _) -> + | Boxed_integer (bi, _, _) -> unbox_int dbg bi arg (* Auxiliary functions for optimizing "let" of boxed numbers (floats and @@ -339,36 +339,36 @@ let is_unboxed_number_cmm ~strict cmm = r := join_unboxed_number_kind ~strict !r k in let rec aux = function - | Cop(Calloc, [Cconst_natint (hdr, _); _], dbg) + | Cop(Calloc mode, [Cconst_natint (hdr, _); _], dbg) when Nativeint.equal hdr float_header -> - notify (Boxed (Boxed_float dbg, false)) - | Cop(Calloc, [Cconst_natint (hdr, _); Cconst_symbol (ops, _); _], dbg) -> + notify (Boxed (Boxed_float (mode,dbg), false)) + | Cop(Calloc mode, [Cconst_natint (hdr, _); Cconst_symbol (ops, _); _], dbg) -> if Nativeint.equal hdr boxedintnat_header && String.equal ops caml_nativeint_ops then - notify (Boxed (Boxed_integer (Pnativeint, dbg), false)) + notify (Boxed (Boxed_integer (Pnativeint, mode, dbg), false)) else if Nativeint.equal hdr boxedint32_header && String.equal ops caml_int32_ops then - notify (Boxed (Boxed_integer (Pint32, dbg), false)) + notify (Boxed (Boxed_integer (Pint32, mode, dbg), false)) else if Nativeint.equal hdr boxedint64_header && String.equal ops caml_int64_ops then - notify (Boxed (Boxed_integer (Pint64, dbg), false)) + notify (Boxed (Boxed_integer (Pint64, mode, dbg), false)) else notify No_unboxing | Cconst_symbol (s, _) -> begin match Cmmgen_state.structured_constant_of_sym s with | Some (Uconst_float _) -> - notify (Boxed (Boxed_float Debuginfo.none, true)) + notify (Boxed (Boxed_float (Alloc_heap, Debuginfo.none), true)) | Some (Uconst_nativeint _) -> - notify (Boxed (Boxed_integer (Pnativeint, Debuginfo.none), true)) + notify (Boxed (Boxed_integer (Pnativeint, Alloc_heap, Debuginfo.none), true)) | Some (Uconst_int32 _) -> - notify (Boxed (Boxed_integer (Pint32, Debuginfo.none), true)) + notify (Boxed (Boxed_integer (Pint32, Alloc_heap, Debuginfo.none), true)) | Some (Uconst_int64 _) -> - notify (Boxed (Boxed_integer (Pint64, Debuginfo.none), true)) + notify (Boxed (Boxed_integer (Pint64, Alloc_heap, Debuginfo.none), true)) | _ -> notify No_unboxing end @@ -408,15 +408,22 @@ let rec transl env e = | f :: rem -> Cmmgen_state.add_function f; let dbg = f.dbg in + let arity = + if f.arity >= 0 then + Curried, f.arity + else + Tupled, -f.arity + in let without_header = - if f.arity = 1 || f.arity = 0 then + match arity with + | Curried, (1|0) -> Cconst_symbol (f.label, dbg) :: - alloc_closure_info ~arity:f.arity + alloc_closure_info ~arity ~startenv:(startenv - pos) dbg :: transl_fundecls (pos + 3) rem - else - Cconst_symbol (curry_function_sym f.arity, dbg) :: - alloc_closure_info ~arity:f.arity + | arity -> + Cconst_symbol (curry_function_sym arity, dbg) :: + alloc_closure_info ~arity ~startenv:(startenv - pos) dbg :: Cconst_symbol (f.label, dbg) :: transl_fundecls (pos + 4) rem @@ -442,16 +449,16 @@ let rec transl env e = (Cop(Cprobe { name; handler_code_sym; }, args, dbg)) | Udirect_apply(lbl, args, None, dbg) -> let args = List.map (transl env) args in - direct_apply lbl args dbg + direct_apply lbl args (Rc_normal, Alloc_heap) dbg | Ugeneric_apply(clos, args, dbg) -> let clos = transl env clos in let args = List.map (transl env) args in - generic_apply (mut_from_env env clos) clos args dbg + generic_apply (mut_from_env env clos) clos args (Rc_normal, Alloc_heap) dbg | Usend(kind, met, obj, args, dbg) -> let met = transl env met in let obj = transl env obj in let args = List.map (transl env) args in - send kind met obj args dbg + send kind met obj args (Rc_normal, Alloc_heap) dbg | Ulet(str, kind, id, exp, body) -> transl_let env str kind id exp body | Uphantom_let (var, defining_expr, body) -> @@ -489,7 +496,7 @@ let rec transl env e = | (Pmakeblock _, []) -> assert false | (Pmakeblock(tag, _mut, _kind), args) -> - make_alloc dbg tag (List.map (transl env) args) + make_alloc ~mode:Alloc_heap dbg tag (List.map (transl env) args) | (Pccall prim, args) -> transl_ccall env prim args dbg | (Pduparray (kind, _), [Uprim (Pmakearray (kind', _), args, _dbg)]) -> @@ -504,7 +511,7 @@ let rec transl env e = state of [Translcore], we will in fact only get here with [Pfloatarray]s. *) assert (kind = kind'); - transl_make_array dbg env kind args + transl_make_array dbg env kind Alloc_heap args | (Pduparray _, [arg]) -> let prim_obj_dup = Primitive.simple ~name:"caml_obj_dup" ~arity:1 ~alloc:true @@ -512,17 +519,19 @@ let rec transl env e = transl_ccall env prim_obj_dup [arg] dbg | (Pmakearray _, []) -> Misc.fatal_error "Pmakearray is not allowed for an empty array" - | (Pmakearray (kind, _), args) -> transl_make_array dbg env kind args + | (Pmakearray (kind, _), args) -> + transl_make_array dbg env kind Alloc_heap args | (Pbigarrayref(unsafe, _num_dims, elt_kind, layout), arg1 :: argl) -> let elt = bigarray_get unsafe elt_kind layout (transl env arg1) (List.map (transl env) argl) dbg in begin match elt_kind with - Pbigarray_float32 | Pbigarray_float64 -> box_float dbg elt + (* TODO: local allocation of bigarray elements *) + Pbigarray_float32 | Pbigarray_float64 -> box_float dbg Alloc_heap elt | Pbigarray_complex32 | Pbigarray_complex64 -> elt - | Pbigarray_int32 -> box_int dbg Pint32 elt - | Pbigarray_int64 -> box_int dbg Pint64 elt - | Pbigarray_native_int -> box_int dbg Pnativeint elt + | Pbigarray_int32 -> box_int dbg Pint32 Alloc_heap elt + | Pbigarray_int64 -> box_int dbg Pint64 Alloc_heap elt + | Pbigarray_native_int -> box_int dbg Pnativeint Alloc_heap elt | Pbigarray_caml_int -> tag_int elt dbg | Pbigarray_sint8 | Pbigarray_uint8 | Pbigarray_sint16 | Pbigarray_uint16 -> tag_int elt dbg @@ -583,7 +592,7 @@ let rec transl env e = | Pcompare_ints | Pcompare_floats | Pcompare_bints _ | Poffsetref _ | Pfloatcomp _ | Parraylength _ | Parrayrefu _ | Parraysetu _ | Parrayrefs _ | Parraysets _ - | Pbintofint _ | Pintofbint _ | Pcvtbint (_, _) | Pnegbint _ + | Pbintofint _ | Pintofbint _ | Pcvtbint _ | Pnegbint _ | Paddbint _ | Psubbint _ | Pmulbint _ | Pdivbint _ | Pmodbint _ | Pandbint _ | Porbint _ | Pxorbint _ | Plslbint _ | Plsrbint _ | Pasrbint _ | Pbintcomp (_, _) | Pstring_load _ | Pbytes_load _ @@ -761,20 +770,25 @@ and transl_catch env nfail ids body handler dbg = in ccatch (new_nfail, ids, body, transl new_env handler, dbg) -and transl_make_array dbg env kind args = +and transl_make_array dbg env kind mode args = match kind with | Pgenarray -> - Cop(Cextcall { func = "caml_make_array"; + let func = + match (mode : Lambda.alloc_mode) with + | Alloc_heap -> "caml_make_array" + | Alloc_local -> "caml_make_array_local" + in + Cop(Cextcall { func; builtin = false; returns = true; effects = Arbitrary_effects; coeffects = Has_coeffects; ty = typ_val; alloc = true; ty_args = []}, - [make_alloc dbg 0 (List.map (transl env) args)], dbg) + [make_alloc ~mode dbg 0 (List.map (transl env) args)], dbg) | Paddrarray | Pintarray -> - make_alloc dbg 0 (List.map (transl env) args) + make_alloc ~mode dbg 0 (List.map (transl env) args) | Pfloatarray -> - make_float_alloc dbg Obj.double_array_tag + make_float_alloc ~mode dbg Obj.double_array_tag (List.map (transl_unbox_float dbg env) args) and transl_ccall env prim args dbg = @@ -810,10 +824,11 @@ and transl_ccall env prim args dbg = let typ_res, wrap_result = match prim.prim_native_repr_res with | Same_as_ocaml_repr -> (typ_val, fun x -> x) - | Unboxed_float -> (typ_float, box_float dbg) + (* TODO: Allow Alloc_local on suitably typed C stubs *) + | Unboxed_float -> (typ_float, box_float dbg Alloc_heap) | Unboxed_integer Pint64 when size_int = 4 -> - ([|Int; Int|], box_int dbg Pint64) - | Unboxed_integer bi -> (typ_int, box_int dbg bi) + ([|Int; Int|], box_int dbg Pint64 Alloc_heap) + | Unboxed_integer bi -> (typ_int, box_int dbg bi Alloc_heap) | Untagged_int -> (typ_int, (fun i -> tag_int i dbg)) in let typ_args, args = transl_args prim.prim_native_repr_args args in @@ -830,7 +845,7 @@ and transl_prim_1 env p arg dbg = get_field env (transl env arg) n dbg | Pfloatfield n -> let ptr = transl env arg in - box_float dbg (floatfield n ptr dbg) + box_float dbg Alloc_heap (floatfield n ptr dbg) | Pint_as_pointer -> int_as_pointer (transl env arg) dbg (* Exceptions *) @@ -845,13 +860,13 @@ and transl_prim_1 env p arg dbg = offsetref n (transl env arg) dbg (* Floating-point operations *) | Pfloatofint -> - box_float dbg (Cop(Cfloatofint, [untag_int(transl env arg) dbg], dbg)) + box_float dbg Alloc_heap (Cop(Cfloatofint, [untag_int(transl env arg) dbg], dbg)) | Pintoffloat -> tag_int(Cop(Cintoffloat, [transl_unbox_float dbg env arg], dbg)) dbg | Pnegfloat -> - box_float dbg (Cop(Cnegf, [transl_unbox_float dbg env arg], dbg)) + box_float dbg Alloc_heap (Cop(Cnegf, [transl_unbox_float dbg env arg], dbg)) | Pabsfloat -> - box_float dbg (Cop(Cabsf, [transl_unbox_float dbg env arg], dbg)) + box_float dbg Alloc_heap (Cop(Cabsf, [transl_unbox_float dbg env arg], dbg)) (* String operations *) | Pstringlength | Pbyteslength -> tag_int(string_length (transl env arg) dbg) dbg @@ -869,17 +884,17 @@ and transl_prim_1 env p arg dbg = tag_int(Cop(Cand, [transl env arg; Cconst_int (1, dbg)], dbg)) dbg (* Boxed integers *) | Pbintofint bi -> - box_int dbg bi (untag_int (transl env arg) dbg) + box_int dbg bi Alloc_heap (untag_int (transl env arg) dbg) | Pintofbint bi -> tag_int (transl_unbox_int dbg env bi arg) dbg | Pcvtbint(bi1, bi2) -> - box_int dbg bi2 (transl_unbox_int dbg env bi1 arg) + box_int dbg bi2 Alloc_heap (transl_unbox_int dbg env bi1 arg) | Pnegbint bi -> - box_int dbg bi + box_int dbg bi Alloc_heap (Cop(Csubi, [Cconst_int (0, dbg); transl_unbox_int dbg env bi arg], dbg)) | Pbbswap bi -> - box_int dbg bi (bbswap bi (transl_unbox_int dbg env bi arg) dbg) + box_int dbg bi Alloc_heap (bbswap bi (transl_unbox_int dbg env bi arg) dbg) | Pbswap16 -> tag_int (bswap16 (ignore_high_bit_int (untag_int (transl env arg) dbg)) dbg) dbg @@ -974,22 +989,22 @@ and transl_prim_2 env p arg1 arg2 dbg = transl_isout (transl env arg1) (transl env arg2) dbg (* Float operations *) | Paddfloat -> - box_float dbg (Cop(Caddf, + box_float dbg Alloc_heap (Cop(Caddf, [transl_unbox_float dbg env arg1; transl_unbox_float dbg env arg2], dbg)) | Psubfloat -> - box_float dbg (Cop(Csubf, + box_float dbg Alloc_heap (Cop(Csubf, [transl_unbox_float dbg env arg1; transl_unbox_float dbg env arg2], dbg)) | Pmulfloat -> - box_float dbg (Cop(Cmulf, + box_float dbg Alloc_heap (Cop(Cmulf, [transl_unbox_float dbg env arg1; transl_unbox_float dbg env arg2], dbg)) | Pdivfloat -> - box_float dbg (Cop(Cdivf, + box_float dbg Alloc_heap (Cop(Cdivf, [transl_unbox_float dbg env arg1; transl_unbox_float dbg env arg2], dbg)) @@ -1005,9 +1020,9 @@ and transl_prim_2 env p arg1 arg2 dbg = | Pstringrefs | Pbytesrefs -> stringref_safe (transl env arg1) (transl env arg2) dbg | Pstring_load(size, unsafe) | Pbytes_load(size, unsafe) -> - string_load size unsafe (transl env arg1) (transl env arg2) dbg + string_load size unsafe Alloc_heap (transl env arg1) (transl env arg2) dbg | Pbigstring_load(size, unsafe) -> - bigstring_load size unsafe (transl env arg1) (transl env arg2) dbg + bigstring_load size unsafe Alloc_heap (transl env arg1) (transl env arg2) dbg (* Array operations *) | Parrayrefu kind -> @@ -1017,50 +1032,50 @@ and transl_prim_2 env p arg1 arg2 dbg = (* Boxed integers *) | Paddbint bi -> - box_int dbg bi (add_int + box_int dbg bi Alloc_heap (add_int (transl_unbox_int_low dbg env bi arg1) (transl_unbox_int_low dbg env bi arg2) dbg) | Psubbint bi -> - box_int dbg bi (sub_int + box_int dbg bi Alloc_heap (sub_int (transl_unbox_int_low dbg env bi arg1) (transl_unbox_int_low dbg env bi arg2) dbg) | Pmulbint bi -> - box_int dbg bi (mul_int + box_int dbg bi Alloc_heap (mul_int (transl_unbox_int_low dbg env bi arg1) (transl_unbox_int_low dbg env bi arg2) dbg) | Pdivbint { size = bi; is_safe } -> - box_int dbg bi (safe_div_bi is_safe + box_int dbg bi Alloc_heap (safe_div_bi is_safe (transl_unbox_int dbg env bi arg1) (transl_unbox_int dbg env bi arg2) bi dbg) | Pmodbint { size = bi; is_safe } -> - box_int dbg bi (safe_mod_bi is_safe + box_int dbg bi Alloc_heap (safe_mod_bi is_safe (transl_unbox_int dbg env bi arg1) (transl_unbox_int dbg env bi arg2) bi dbg) | Pandbint bi -> - box_int dbg bi (and_int + box_int dbg bi Alloc_heap (and_int (transl_unbox_int_low dbg env bi arg1) (transl_unbox_int_low dbg env bi arg2) dbg) | Porbint bi -> - box_int dbg bi (or_int + box_int dbg bi Alloc_heap (or_int (transl_unbox_int_low dbg env bi arg1) (transl_unbox_int_low dbg env bi arg2) dbg) | Pxorbint bi -> - box_int dbg bi (xor_int + box_int dbg bi Alloc_heap (xor_int (transl_unbox_int_low dbg env bi arg1) (transl_unbox_int_low dbg env bi arg2) dbg) | Plslbint bi -> - box_int dbg bi (lsl_int + box_int dbg bi Alloc_heap (lsl_int (transl_unbox_int_low dbg env bi arg1) (untag_int(transl env arg2) dbg) dbg) | Plsrbint bi -> - box_int dbg bi (lsr_int + box_int dbg bi Alloc_heap (lsr_int (make_unsigned_int bi (transl_unbox_int dbg env bi arg1) dbg) (untag_int(transl env arg2) dbg) dbg) | Pasrbint bi -> - box_int dbg bi (asr_int + box_int dbg bi Alloc_heap (asr_int (transl_unbox_int dbg env bi arg1) (untag_int(transl env arg2) dbg) dbg) | Pbintcomp(bi, cmp) -> @@ -1070,7 +1085,8 @@ and transl_prim_2 env p arg1 arg2 dbg = | Pnot | Pnegint | Pintoffloat | Pfloatofint | Pnegfloat | Pabsfloat | Pstringlength | Pbyteslength | Pbytessetu | Pbytessets | Pisint | Pbswap16 | Pint_as_pointer | Popaque | Pread_symbol _ - | Pmakeblock (_, _, _) | Pfield _ | Psetfield_computed (_, _) | Pfloatfield _ + | Pmakeblock (_, _, _) | Pfield _ | Psetfield_computed (_, _) + | Pfloatfield _ | Pduprecord (_, _) | Pccall _ | Praise _ | Poffsetint _ | Poffsetref _ | Pmakearray (_, _) | Pduparray (_, _) | Parraylength _ | Parraysetu _ | Parraysets _ | Pbintofint _ | Pintofbint _ | Pcvtbint (_, _) @@ -1124,13 +1140,14 @@ and transl_prim_3 env p arg1 arg2 arg3 dbg = | Pintoffloat | Pfloatofint | Pnegfloat | Pabsfloat | Paddfloat | Psubfloat | Pmulfloat | Pdivfloat | Pstringlength | Pstringrefu | Pstringrefs | Pbyteslength | Pbytesrefu | Pbytesrefs | Pisint | Pisout - | Pbswap16 | Pint_as_pointer | Popaque | Pread_symbol _ | Pmakeblock (_, _, _) + | Pbswap16 | Pint_as_pointer | Popaque | Pread_symbol _ + | Pmakeblock (_, _, _) | Pfield _ | Psetfield (_, _, _) | Pfloatfield _ | Psetfloatfield (_, _) | Pduprecord (_, _) | Pccall _ | Praise _ | Pdivint _ | Pmodint _ | Pintcomp _ | Pcompare_ints | Pcompare_floats | Pcompare_bints _ | Poffsetint _ | Poffsetref _ | Pfloatcomp _ | Pmakearray (_, _) | Pduparray (_, _) | Parraylength _ | Parrayrefu _ | Parrayrefs _ - | Pbintofint _ | Pintofbint _ | Pcvtbint (_, _) | Pnegbint _ | Paddbint _ + | Pbintofint _ | Pintofbint _ | Pcvtbint _ | Pnegbint _ | Paddbint _ | Psubbint _ | Pmulbint _ | Pdivbint _ | Pmodbint _ | Pandbint _ | Porbint _ | Pxorbint _ | Plslbint _ | Plsrbint _ | Pasrbint _ | Pbintcomp (_, _) | Pbigarrayref (_, _, _, _) | Pbigarrayset (_, _, _, _) | Pbigarraydim _ @@ -1169,9 +1186,9 @@ and transl_let env str kind id exp body = used in loops and we really want to avoid repeated boxing. *) match str, kind with | Mutable, Pfloatval -> - Boxed (Boxed_float dbg, false) + Boxed (Boxed_float (Alloc_heap, dbg), false) | Mutable, Pboxedintval bi -> - Boxed (Boxed_integer (bi, dbg), false) + Boxed (Boxed_integer (bi, Alloc_heap, dbg), false) | _, (Pfloatval | Pboxedintval _) -> (* It would be safe to always unbox in this case, but we do it only if this indeed allows us to get rid of diff --git a/backend/comballoc.ml b/backend/comballoc.ml index 6040f651a2f..2eaaed34d93 100644 --- a/backend/comballoc.ml +++ b/backend/comballoc.ml @@ -20,7 +20,8 @@ open Mach type pending_alloc = { reg: Reg.t; (* register holding the result of the last allocation *) dbginfos: Debuginfo.alloc_dbginfo; (* debug info for each pending alloc *) - totalsz: int } (* amount to be allocated in this block *) + totalsz: int; (* amount to be allocated in this block *) + mode: Lambda.alloc_mode } (* heap or stack allocation *) type allocation_state = No_alloc @@ -30,16 +31,19 @@ let rec combine i allocstate = match i.desc with Iend | Ireturn _ | Iexit _ | Iraise _ -> (i, allocstate) - | Iop(Ialloc { bytes = sz; dbginfo; _ }) -> + | Iop(Ialloc { bytes = sz; dbginfo; mode }) -> assert (List.length dbginfo = 1); begin match allocstate with - | Pending_alloc {reg; dbginfos; totalsz} - when totalsz + sz <= (Config.max_young_wosize + 1) * Arch.size_addr -> + | Pending_alloc {reg; dbginfos; totalsz; mode = prev_mode} + when (mode = prev_mode) && + ((totalsz + sz <= (Config.max_young_wosize + 1) * Arch.size_addr) + || mode = Lambda.Alloc_local) -> let (next, state) = combine i.next (Pending_alloc { reg = i.res.(0); dbginfos = dbginfo @ dbginfos; - totalsz = totalsz + sz }) in + totalsz = totalsz + sz; + mode }) in (instr_cons_debug (Iop(Iintop_imm(Iadd, -sz))) [| reg |] i.res i.dbg next, state) @@ -48,18 +52,21 @@ let rec combine i allocstate = combine i.next (Pending_alloc { reg = i.res.(0); dbginfos = dbginfo; - totalsz = sz }) in + totalsz = sz; + mode }) in let totalsz, dbginfo = match state with | No_alloc -> assert false - | Pending_alloc { totalsz; dbginfos; _ } -> totalsz, dbginfos in + | Pending_alloc { totalsz; dbginfos; mode = m; _ } -> + assert (Lambda.eq_mode m mode); + totalsz, dbginfos in let next = let offset = totalsz - sz in if offset = 0 then next else instr_cons_debug (Iop(Iintop_imm(Iadd, offset))) i.res i.res i.dbg next in - (instr_cons_debug (Iop(Ialloc {bytes = totalsz; dbginfo; })) + (instr_cons_debug (Iop(Ialloc {bytes = totalsz; dbginfo; mode})) i.arg i.res i.dbg next, allocstate) end | Iop(Icall_ind | Icall_imm _ | Iextcall _ | diff --git a/backend/mach.ml b/backend/mach.ml index 311c4573295..012ff3a7e48 100644 --- a/backend/mach.ml +++ b/backend/mach.ml @@ -61,7 +61,8 @@ type operation = | Istackoffset of int | Iload of Cmm.memory_chunk * Arch.addressing_mode | Istore of Cmm.memory_chunk * Arch.addressing_mode * bool - | Ialloc of { bytes : int; dbginfo : Debuginfo.alloc_dbginfo; } + | Ialloc of { bytes : int; dbginfo : Debuginfo.alloc_dbginfo; + mode : Lambda.alloc_mode } | Iintop of integer_operation | Iintop_imm of integer_operation * int | Icompf of float_comparison @@ -73,6 +74,7 @@ type operation = provenance : unit option; is_assignment : bool; } | Iprobe of { name: string; handler_code_sym: string; } | Iprobe_is_enabled of { name: string } + | Ibeginregion | Iendregion type instruction = { desc: instruction_desc; @@ -173,7 +175,8 @@ let rec instr_iter f i = | Icompf _ | Ifloatofint | Iintoffloat | Ispecific _ | Iname_for_debugger _ | Iprobe _ | Iprobe_is_enabled _ - | Iopaque) -> + | Iopaque + | Ibeginregion | Iendregion) -> instr_iter f i.next let operation_can_raise op = diff --git a/backend/mach.mli b/backend/mach.mli index c621d2a5006..8d53d449cfd 100644 --- a/backend/mach.mli +++ b/backend/mach.mli @@ -65,7 +65,8 @@ type operation = | Iload of Cmm.memory_chunk * Arch.addressing_mode | Istore of Cmm.memory_chunk * Arch.addressing_mode * bool (* false = initialization, true = assignment *) - | Ialloc of { bytes : int; dbginfo : Debuginfo.alloc_dbginfo; } + | Ialloc of { bytes : int; dbginfo : Debuginfo.alloc_dbginfo; + mode: Lambda.alloc_mode } | Iintop of integer_operation | Iintop_imm of integer_operation * int | Icompf of float_comparison @@ -83,6 +84,7 @@ type operation = identifier is forgotten. *) | Iprobe of { name: string; handler_code_sym: string; } | Iprobe_is_enabled of { name: string } + | Ibeginregion | Iendregion type instruction = { desc: instruction_desc; diff --git a/backend/printcmm.ml b/backend/printcmm.ml index 59f32c560c0..4317ac27179 100644 --- a/backend/printcmm.ml +++ b/backend/printcmm.ml @@ -151,18 +151,20 @@ let trywith_kind ppf kind = | Delayed i -> fprintf ppf "" i let operation d = function - | Capply _ty -> "app" ^ location d + | Capply(_ty, _) -> "app" ^ location d | Cextcall { func = lbl; _ } -> Printf.sprintf "extcall \"%s\"%s" lbl (location d) | Cload (c, Asttypes.Immutable) -> Printf.sprintf "load %s" (chunk c) | Cload (c, Asttypes.Mutable) -> Printf.sprintf "load_mut %s" (chunk c) - | Calloc -> "alloc" ^ location d + | Calloc Alloc_heap -> "alloc" ^ location d + | Calloc Alloc_local -> "alloc_local" ^ location d | Cstore (c, init) -> let init = match init with | Lambda.Heap_initialization -> "(heap-init)" | Lambda.Root_initialization -> "(root-init)" | Lambda.Assignment -> "" + | Local_assignment -> "(local)" in Printf.sprintf "store %s%s" (chunk c) init | Caddi -> "+" @@ -265,7 +267,7 @@ let rec expr ppf = function fprintf ppf "@[<2>(%s" (operation dbg op); List.iter (fun e -> fprintf ppf "@ %a" expr e) el; begin match op with - | Capply mty -> fprintf ppf "@ %a" machtype mty + | Capply(mty, _) -> fprintf ppf "@ %a" machtype mty | Cextcall { ty; ty_args; alloc = _; func = _; returns; } -> let ty = if returns then Some ty else None in fprintf ppf "@ %a" extcall_signature (ty, ty_args) @@ -323,6 +325,10 @@ let rec expr ppf = function trywith_kind kind sequence e1 VP.print id; with_location_mapping ~label:"Ctrywith" ~dbg ppf (fun () -> fprintf ppf "%a)@]" sequence e2); + | Cregion e -> + fprintf ppf "@[<2>(region@ %a)@]" sequence e + | Ctail e -> + fprintf ppf "@[<2>(tail@ %a)@]" sequence e and sequence ppf = function | Csequence(e1, e2) -> fprintf ppf "%a@ %a" sequence e1 sequence e2 diff --git a/backend/printmach.ml b/backend/printmach.ml index dc5327d5fe2..4f8221ea5a7 100644 --- a/backend/printmach.ml +++ b/backend/printmach.ml @@ -145,8 +145,10 @@ let operation op arg ppf res = (Array.sub arg 1 (Array.length arg - 1)) reg arg.(0) (if is_assign then "(assign)" else "(init)") - | Ialloc { bytes = n; } -> + | Ialloc { bytes = n; mode = Alloc_heap } -> fprintf ppf "alloc %i" n; + | Ialloc { bytes = n; mode = Alloc_local } -> + fprintf ppf "alloc_local %i" n; | Iintop(op) -> if is_unary_op op then begin assert (Array.length arg = 1); @@ -173,6 +175,8 @@ let operation op arg ppf res = | None -> "" | Some index -> sprintf "[P%d]" index) reg arg.(0) + | Ibeginregion -> fprintf ppf "beginregion" + | Iendregion -> fprintf ppf "endregion %a" reg arg.(0) | Ispecific op -> Arch.print_specific_operation reg op ppf arg | Iprobe {name;handler_code_sym} -> diff --git a/backend/selectgen.ml b/backend/selectgen.ml index 0319d283128..57f3f645b7d 100644 --- a/backend/selectgen.ml +++ b/backend/selectgen.ml @@ -28,14 +28,18 @@ type trap_stack_info = | Unreachable | Reachable of trap_stack +type region_stack = Reg.t array list + type environment = { vars : (Reg.t array * Backend_var.Provenance.t option * Asttypes.mutable_flag) V.Map.t; - static_exceptions : (Reg.t array list * trap_stack_info ref) Int.Map.t; + static_exceptions : (Reg.t array list * region_stack * trap_stack_info ref) Int.Map.t; (** Which registers must be populated when jumping to the given handler. *) trap_stack : trap_stack; + regions : region_stack; + region_tail : bool; } let env_add ?(mut=Asttypes.Immutable) var regs env = @@ -45,7 +49,7 @@ let env_add ?(mut=Asttypes.Immutable) var regs env = let env_add_static_exception id v env = let r = ref Unreachable in - { env with static_exceptions = Int.Map.add id (v, r) env.static_exceptions }, r + { env with static_exceptions = Int.Map.add id (v, env.regions, r) env.static_exceptions }, r let env_find id env = let regs, _provenance, _mut = V.Map.find id env.vars in @@ -113,7 +117,7 @@ let set_traps_for_raise env = | Generic_trap _ -> () | Specific_trap (lbl, _) -> begin match env_find_static_exception lbl env with - | (_, traps_ref) -> set_traps lbl traps_ref ts [Pop] + | (_, _, traps_ref) -> set_traps lbl traps_ref ts [Pop] | exception Not_found -> Misc.fatal_errorf "Trap %d not registered in env" lbl end @@ -134,12 +138,28 @@ let env_empty = { vars = V.Map.empty; static_exceptions = Int.Map.empty; trap_stack = Uncaught; + regions = []; + region_tail = false; } +(* Assuming [rs] is equal to or a suffix of [env.regions], + return the last region in [env.regions] but not [rs] + (or None if they are equal) *) +let env_close_regions env rs = + let rec aux v es rs = + match es, rs with + | [], [] -> v + | (r :: _), (r' :: _) when r == r' -> v + | [], _::_ -> + Misc.fatal_error "Selectgen.env_close_regions: not a suffix" + | r :: es, rs -> aux (Some r) es rs + in + aux None env.regions rs + (* Infer the type of the result of an operation *) let oper_result_type = function - Capply ty -> ty + | Capply(ty, _) -> ty | Cextcall { ty; ty_args = _; alloc = _; func = _; } -> ty | Cload (c, _) -> begin match c with @@ -147,7 +167,7 @@ let oper_result_type = function | Single | Double -> typ_float | _ -> typ_int end - | Calloc -> typ_val + | Calloc _ -> typ_val | Cstore (_c, _) -> typ_void | Cprefetch _ -> typ_void | Caddi | Csubi | Cmuli | Cmulhi _ | Cdivi | Cmodi | @@ -417,7 +437,7 @@ method is_simple_expr = function | Cextcall { effects = No_effects; coeffects = No_coeffects; } -> List.for_all self#is_simple_expr args (* The following may have side effects *) - | Capply _ | Cextcall _ | Calloc | Cstore _ + | Capply _ | Cextcall _ | Calloc _ | Cstore _ | Craise _ | Ccheckbound | Cprobe _ | Cprobe_is_enabled _ | Copaque -> false | Cprefetch _ -> false (* avoid reordering *) @@ -429,7 +449,7 @@ method is_simple_expr = function | Ccmpf _ -> List.for_all self#is_simple_expr args end | Cassign _ | Cifthenelse _ | Cswitch _ | Ccatch _ | Cexit _ - | Ctrywith _ -> false + | Ctrywith _ | Cregion _ | Ctail _ -> false (* Analyses the effects and coeffects of an expression. This is used across a whole list of expressions with a view to determining which expressions @@ -463,7 +483,8 @@ method effects_of exp = | Cextcall { effects = e; coeffects = ce; } -> EC.create (select_effects e) (select_coeffects ce) | Capply _ | Cprobe _ | Copaque -> EC.arbitrary - | Calloc -> EC.none + | Calloc Alloc_heap -> EC.none + | Calloc Alloc_local -> EC.coeffect_only Coeffect.Arbitrary | Cstore _ -> EC.effect_only Effect.Arbitrary | Cprefetch _ -> EC.arbitrary | Craise _ | Ccheckbound -> EC.effect_only Effect.Raise @@ -477,7 +498,8 @@ method effects_of exp = EC.none in EC.join from_op (EC.join_list_map args self#effects_of) - | Cassign _ | Cswitch _ | Ccatch _ | Cexit _ | Ctrywith _ -> + | Cassign _ | Cswitch _ | Ccatch _ | Cexit _ | Ctrywith _ + | Cregion _ | Ctail _ -> EC.arbitrary (* Says whether an integer constant is a suitable immediate argument for @@ -555,7 +577,7 @@ method select_operation op args _dbg = match init with | Lambda.Root_initialization -> false | Lambda.Heap_initialization -> false - | Lambda.Assignment -> true + | Lambda.Assignment | Lambda.Local_assignment -> true in if chunk = Word_int || chunk = Word_val then begin let (op, newarg2) = self#select_store is_assign addr arg2 in @@ -564,7 +586,7 @@ method select_operation op args _dbg = (Istore(chunk, addr, is_assign), [arg2; eloc]) (* Inversion addr/datum in Istore *) end - | (Calloc, _) -> (Ialloc {bytes = 0; dbginfo = []}), args + | (Calloc mode, _) -> (Ialloc {bytes = 0; dbginfo = []; mode}), args | (Caddi, _) -> self#select_arith_comm Iadd args | (Csubi, _) -> self#select_arith Isub args | (Cmuli, _) -> self#select_arith_comm Imul args @@ -713,6 +735,9 @@ method insert_op env op rs rd = at the end of the self sequence *) method emit_expr (env:environment) exp = + (* Environment used in recursive calls not in tail position *) + let env' = + if env.region_tail then {env with region_tail=false} else env in match exp with Cconst_int (n, _dbg) -> let r = self#regs_for typ_int in @@ -740,12 +765,12 @@ method emit_expr (env:environment) exp = Misc.fatal_error("Selection.emit_expr: unbound var " ^ V.unique_name v) end | Clet(v, e1, e2) -> - begin match self#emit_expr env e1 with + begin match self#emit_expr env' e1 with None -> None | Some r1 -> self#emit_expr (self#bind_let env v r1) e2 end | Clet_mut(v, k, e1, e2) -> - begin match self#emit_expr env e1 with + begin match self#emit_expr env' e1 with None -> None | Some r1 -> self#emit_expr (self#bind_let_mut env v k r1) e2 end @@ -757,7 +782,7 @@ method emit_expr (env:environment) exp = env_find_mut v env with Not_found -> Misc.fatal_error ("Selection.emit_expr: unbound var " ^ V.name v) in - begin match self#emit_expr env e1 with + begin match self#emit_expr env' e1 with None -> None | Some r1 -> self#insert_moves env r1 rv; Some [||] @@ -765,13 +790,13 @@ method emit_expr (env:environment) exp = | Ctuple [] -> Some [||] | Ctuple exp_list -> - begin match self#emit_parts_list env exp_list with + begin match self#emit_parts_list env' exp_list with None -> None | Some(simple_list, ext_env) -> Some(self#emit_tuple ext_env simple_list) end | Cop(Craise k, [arg], dbg) -> - begin match self#emit_expr env arg with + begin match self#emit_expr env' arg with None -> None | Some r1 -> let rd = [|Proc.loc_exn_bucket|] in @@ -788,7 +813,7 @@ method emit_expr (env:environment) exp = Some (self#insert_op_debug env Iopaque dbg rs rs) end | Cop(op, args, dbg) -> - begin match self#emit_parts_list env args with + begin match self#emit_parts_list env' args with None -> None | Some(simple_args, env) -> let ty = oper_result_type op in @@ -826,13 +851,15 @@ method emit_expr (env:environment) exp = self#insert_move_results env loc_res rd stack_ofs; set_traps_for_raise env; if returns then Some rd else None - | Ialloc { bytes = _; } -> + | Ialloc { bytes = _; mode } -> let rd = self#regs_for typ_val in let bytes = size_expr env (Ctuple new_args) in assert (bytes mod Arch.size_addr = 0); let alloc_words = bytes / Arch.size_addr in let op = - Ialloc { bytes; dbginfo = [{alloc_words; alloc_dbg = dbg}] } + Ialloc { bytes; + dbginfo = [{alloc_words; alloc_dbg = dbg}]; + mode } in self#insert_debug env (Iop op) dbg [||] rd; self#emit_stores env new_args rd; @@ -850,13 +877,13 @@ method emit_expr (env:environment) exp = Some (self#insert_op_debug env op dbg r1 rd) end | Csequence(e1, e2) -> - begin match self#emit_expr env e1 with + begin match self#emit_expr env' e1 with None -> None | Some _ -> self#emit_expr env e2 end | Cifthenelse(econd, _ifso_dbg, eif, _ifnot_dbg, eelse, _dbg) -> let (cond, earg) = self#select_condition econd in - begin match self#emit_expr env earg with + begin match self#emit_expr env' earg with None -> None | Some rarg -> let (rif, sif) = self#emit_sequence env eif in @@ -867,7 +894,7 @@ method emit_expr (env:environment) exp = r end | Cswitch(esel, index, ecases, _dbg) -> - begin match self#emit_expr env esel with + begin match self#emit_expr env' esel with None -> None | Some rsel -> let rscases = @@ -892,6 +919,9 @@ method emit_expr (env:environment) exp = (nfail, ids, rs, e2, dbg)) handlers in + let env = + (* Disable region-fusion on loops *) + match rec_flag with Recursive -> env' | Nonrecursive -> env in let env, handlers_map = (* Since the handlers may be recursive, and called from the body, the same environment is used for translating both the handlers and @@ -957,13 +987,13 @@ method emit_expr (env:environment) exp = [||] [||]; r | Cexit (lbl,args,traps) -> - begin match self#emit_parts_list env args with + begin match self#emit_parts_list env' args with None -> None | Some (simple_list, ext_env) -> begin match lbl with | Lbl nfail -> let src = self#emit_tuple ext_env simple_list in - let dest_args, trap_stack = + let dest_args, dest_regions, trap_stack = try env_find_static_exception nfail env with Not_found -> Misc.fatal_error ("Selection.emit_expr: unbound label "^ @@ -976,6 +1006,10 @@ method emit_expr (env:environment) exp = Array.iter (fun reg -> assert(reg.typ <> Addr)) src; self#insert_moves env src tmp_regs ; self#insert_moves env tmp_regs (Array.concat dest_args) ; + begin match env_close_regions env dest_regions with + | None -> () + | Some regs -> self#insert env (Iop Iendregion) regs [||] + end; self#insert env (Iexit (nfail, traps)) [||] [||]; set_traps nfail trap_stack env.trap_stack traps; None @@ -992,6 +1026,10 @@ method emit_expr (env:environment) exp = end end | Ctrywith(e1, kind, v, e2, _dbg) -> + (* This region is used only to clean up local allocations in the + exceptional path. It need not be ended in the non-exception case. *) + let reg = self#regs_for typ_int in + self#insert env (Iop Ibeginregion) [| |] reg; let env_body = env_enter_trywith env kind in let (r1, s1) = self#emit_sequence env_body e1 in let rv = self#regs_for typ_val in @@ -1002,18 +1040,18 @@ method emit_expr (env:environment) exp = (Itrywith(s1#extract, kind, (env_handler.trap_stack, instr_cons (Iop Imove) [|Proc.loc_exn_bucket|] rv - (s2#extract)))) + (instr_cons (Iop Iendregion) reg [| |] s2#extract)))) [||] [||]; r in let env = env_add v rv env in - match kind with + begin match kind with | Regular -> with_handler env e2 | Delayed lbl -> begin match env_find_static_exception lbl env_body with - | (_, { contents = Reachable ts; }) -> + | (_, _, { contents = Reachable ts; }) -> with_handler (env_set_trap_stack env ts) e2 - | (_, { contents = Unreachable; }) -> + | (_, _, { contents = Unreachable; }) -> let unreachable = Cmm.(Cop ((Cload (Word_int, Mutable)), [Cconst_int (0, Debuginfo.none)], @@ -1025,6 +1063,20 @@ method emit_expr (env:environment) exp = | exception Not_found -> Misc.fatal_errorf "Selection.emit_expr: Unbound handler %d" lbl end + end + | Cregion e -> + let reg = self#regs_for typ_int in + self#insert env (Iop Ibeginregion) [| |] reg; + let env = { env with regions = reg::env.regions; region_tail = true } in + begin match self#emit_expr env e with + None -> None + | Some _ as res -> + self#insert env (Iop Iendregion) reg [| |]; + res + end + | Ctail e -> + assert env.region_tail; + self#emit_expr env e method private emit_sequence (env:environment) exp = let s = {< instr_seq = dummy_instr >} in @@ -1199,39 +1251,52 @@ method emit_stores env data regs_addr = (* Same, but in tail position *) -method private emit_return (env:environment) exp (traps:trap_action list) = - match self#emit_expr env exp with + +method private insert_return (env:environment) r (traps:trap_action list) = + match r with None -> () | Some r -> let loc = Proc.loc_results (Reg.typv r) in + if env.region_tail then + self#insert env (Iop Iendregion) (List.hd env.regions) [||]; self#insert_moves env r loc; self#insert env (Ireturn traps) loc [||] +method private emit_return (env:environment) exp traps = + self#insert_return env (self#emit_expr env exp) traps + method emit_tail (env:environment) exp = + let env' = + if env.region_tail then {env with region_tail=false} else env in match exp with Clet(v, e1, e2) -> - begin match self#emit_expr env e1 with + begin match self#emit_expr env' e1 with None -> () | Some r1 -> self#emit_tail (self#bind_let env v r1) e2 end | Clet_mut (v, k, e1, e2) -> - begin match self#emit_expr env e1 with + begin match self#emit_expr env' e1 with None -> () | Some r1 -> self#emit_tail (self#bind_let_mut env v k r1) e2 end | Cphantom_let (_var, _defining_expr, body) -> self#emit_tail env body - | Cop((Capply ty) as op, args, dbg) -> - begin match self#emit_parts_list env args with + | Cop((Capply(ty, pos)) as op, args, dbg) -> + let tail = (pos = Lambda.Rc_close_at_apply) in + let endregion = env.region_tail in + begin match self#emit_parts_list env' args with None -> () | Some(simple_args, env) -> let (new_op, new_args) = self#select_operation op simple_args dbg in match new_op with Icall_ind -> let r1 = self#emit_tuple env new_args in + if endregion && tail then + self#insert env (Iop Iendregion) (List.hd env.regions) [||]; + let endregion = endregion && not tail in let rarg = Array.sub r1 1 (Array.length r1 - 1) in let (loc_arg, stack_ofs) = Proc.loc_arguments (Reg.typv rarg) in - if stack_ofs = 0 && trap_stack_is_empty env then begin + if stack_ofs = 0 && trap_stack_is_empty env && not endregion then begin let call = Iop (Itailcall_ind) in self#insert_moves env rarg loc_arg; self#insert_debug env call dbg @@ -1243,17 +1308,26 @@ method emit_tail (env:environment) exp = self#insert_debug env (Iop new_op) dbg (Array.append [|r1.(0)|] loc_arg) loc_res; set_traps_for_raise env; - self#insert env (Iop(Istackoffset(-stack_ofs))) [||] [||]; + if not endregion then begin + self#insert env (Iop(Istackoffset(-stack_ofs))) [||] [||] + end else begin + self#insert_move_results env loc_res rd stack_ofs; + self#insert env (Iop Iendregion) (List.hd env.regions) [||]; + self#insert_moves env rd loc_res + end; self#insert env (Ireturn (pop_all_traps env)) loc_res [||] end | Icall_imm { func; } -> let r1 = self#emit_tuple env new_args in + if endregion && tail then + self#insert env (Iop Iendregion) (List.hd env.regions) [||]; + let endregion = endregion && not tail in let (loc_arg, stack_ofs) = Proc.loc_arguments (Reg.typv r1) in - if stack_ofs = 0 && trap_stack_is_empty env then begin + if stack_ofs = 0 && trap_stack_is_empty env && not endregion then begin let call = Iop (Itailcall_imm { func; }) in self#insert_moves env r1 loc_arg; self#insert_debug env call dbg loc_arg [||]; - end else if func = !current_function_name && trap_stack_is_empty env then begin + end else if func = !current_function_name && trap_stack_is_empty env && not endregion then begin let call = Iop (Itailcall_imm { func; }) in let loc_arg' = Proc.loc_parameters (Reg.typv r1) in self#insert_moves env r1 loc_arg'; @@ -1264,19 +1338,25 @@ method emit_tail (env:environment) exp = self#insert_move_args env r1 loc_arg stack_ofs; self#insert_debug env (Iop new_op) dbg loc_arg loc_res; set_traps_for_raise env; - self#insert env (Iop(Istackoffset(-stack_ofs))) [||] [||]; + if not endregion then begin + self#insert env (Iop(Istackoffset(-stack_ofs))) [||] [||] + end else begin + self#insert_move_results env loc_res rd stack_ofs; + self#insert env (Iop Iendregion) (List.hd env.regions) [||]; + self#insert_moves env rd loc_res + end; self#insert env (Ireturn (pop_all_traps env)) loc_res [||] end | _ -> Misc.fatal_error "Selection.emit_tail" end | Csequence(e1, e2) -> - begin match self#emit_expr env e1 with + begin match self#emit_expr env' e1 with None -> () | Some _ -> self#emit_tail env e2 end | Cifthenelse(econd, _ifso_dbg, eif, _ifnot_dbg, eelse, _dbg) -> let (cond, earg) = self#select_condition econd in - begin match self#emit_expr env earg with + begin match self#emit_expr env' earg with None -> () | Some rarg -> self#insert env @@ -1285,7 +1365,7 @@ method emit_tail (env:environment) exp = rarg [||] end | Cswitch(esel, index, ecases, _dbg) -> - begin match self#emit_expr env esel with + begin match self#emit_expr env' esel with None -> () | Some rsel -> let cases = @@ -1306,6 +1386,9 @@ method emit_tail (env:environment) exp = ids in (nfail, ids, rs, e2, dbg)) handlers in + let env = + (* Disable region-fusion on loops *) + match rec_flag with Recursive -> env' | Nonrecursive -> env in let env, handlers_map = List.fold_left (fun (env, map) (nfail, ids, rs, e2, dbg) -> let env, r = env_add_static_exception nfail rs env in @@ -1349,6 +1432,10 @@ method emit_tail (env:environment) exp = self#insert env (Icatch(rec_flag, env.trap_stack, new_handlers, s_body)) [||] [||] | Ctrywith(e1, kind, v, e2, _dbg) -> + (* This region is used only to clean up local allocations in the + exceptional path. It need not be ended in the non-exception case. *) + let reg = self#regs_for typ_int in + self#insert env (Iop Ibeginregion) [| |] reg; let env_body = env_enter_trywith env kind in let s1 = self#emit_tail_sequence env_body e1 in let rv = self#regs_for typ_val in @@ -1357,7 +1444,8 @@ method emit_tail (env:environment) exp = self#insert env (Itrywith(s1, kind, (env_handler.trap_stack, - instr_cons (Iop Imove) [|Proc.loc_exn_bucket|] rv s2))) + instr_cons (Iop Imove) [|Proc.loc_exn_bucket|] rv + (instr_cons (Iop Iendregion) reg [| |] s2)))) [||] [||] in let env = env_add v rv env in @@ -1365,9 +1453,9 @@ method emit_tail (env:environment) exp = | Regular -> with_handler env e2 | Delayed lbl -> begin match env_find_static_exception lbl env_body with - | (_, { contents = Reachable ts; }) -> + | (_, _, { contents = Reachable ts; }) -> with_handler (env_set_trap_stack env ts) e2 - | (_, { contents = Unreachable; }) -> + | (_, _, { contents = Unreachable; }) -> let unreachable = Cmm.(Cop ((Cload (Word_int, Mutable)), [Cconst_int (0, Debuginfo.none)], @@ -1380,6 +1468,20 @@ method emit_tail (env:environment) exp = Misc.fatal_errorf "Selection.emit_expr: Unbound handler %d" lbl end end + | Cregion e -> + if env.region_tail then + self#emit_return env exp (pop_all_traps env) + else begin + let reg = self#regs_for typ_int in + self#insert env (Iop Ibeginregion) [| |] reg; + let env' = { env with regions = reg::env.regions; region_tail = true } in + self#emit_tail env' e + end + | Ctail e -> + assert env.region_tail; + self#insert env' (Iop Iendregion) (List.hd env.regions) [| |]; + self#emit_tail { env with regions = List.tl env.regions; + region_tail = false } e | Cop _ | Cconst_int _ | Cconst_natint _ | Cconst_float _ | Cconst_symbol _ | Cvar _ diff --git a/file_formats/cmx_format.mli b/file_formats/cmx_format.mli index 7e9550a7943..137109b3f43 100644 --- a/file_formats/cmx_format.mli +++ b/file_formats/cmx_format.mli @@ -36,15 +36,16 @@ type export_info = | Flambda1 of Export_info.t | Flambda2 of Flambda2_cmx.Flambda_cmx_format.t option +type apply_fn := int * Lambda.alloc_mode type unit_infos = { mutable ui_name: modname; (* Name of unit implemented *) mutable ui_symbol: string; (* Prefix for symbols *) mutable ui_defines: string list; (* Unit and sub-units implemented *) mutable ui_imports_cmi: crcs; (* Interfaces imported *) mutable ui_imports_cmx: crcs; (* Infos imported *) - mutable ui_curry_fun: int list; (* Currying functions needed *) - mutable ui_apply_fun: int list; (* Apply functions needed *) - mutable ui_send_fun: int list; (* Send functions needed *) + mutable ui_curry_fun: Clambda.arity list; (* Currying functions needed *) + mutable ui_apply_fun: apply_fn list; (* Apply functions needed *) + mutable ui_send_fun: apply_fn list; (* Send functions needed *) mutable ui_export_info: export_info; mutable ui_force_link: bool } (* Always linked *) diff --git a/middle_end/clambda.ml b/middle_end/clambda.ml index c8ea80fd2d4..283f8d3a8fa 100644 --- a/middle_end/clambda.ml +++ b/middle_end/clambda.ml @@ -20,6 +20,8 @@ open Asttypes open Lambda type function_label = string +type arity = Lambda.function_kind * int +type apply_kind = Lambda.region_close * Lambda.alloc_mode type ustructured_constant = | Uconst_float of float diff --git a/middle_end/clambda.mli b/middle_end/clambda.mli index f4fad913706..83795ba6372 100644 --- a/middle_end/clambda.mli +++ b/middle_end/clambda.mli @@ -20,6 +20,8 @@ open Asttypes open Lambda type function_label = string +type arity = Lambda.function_kind * int +type apply_kind = Lambda.region_close * Lambda.alloc_mode type ustructured_constant = | Uconst_float of float diff --git a/middle_end/closure/closure.ml b/middle_end/closure/closure.ml index 12fda0945bc..9d1ea4ccb5d 100644 --- a/middle_end/closure/closure.ml +++ b/middle_end/closure/closure.ml @@ -121,7 +121,7 @@ let prim_size prim args = | Psetfield(_f, isptr, init) -> begin match init with | Root_initialization -> 1 (* never causes a write barrier hit *) - | Assignment | Heap_initialization -> + | Assignment | Local_assignment | Heap_initialization -> match isptr with | Pointer -> 4 | Immediate -> 1 diff --git a/middle_end/compilenv.ml b/middle_end/compilenv.ml index 4695c2d9bd0..c2af7450065 100644 --- a/middle_end/compilenv.ml +++ b/middle_end/compilenv.ml @@ -343,18 +343,18 @@ let approx_env () = !merged_environment (* Record that a currying function or application function is needed *) -let need_curry_fun n = - if not (List.mem n current_unit.ui_curry_fun) then - current_unit.ui_curry_fun <- n :: current_unit.ui_curry_fun +let need_curry_fun arity = + if not (List.mem arity current_unit.ui_curry_fun) then + current_unit.ui_curry_fun <- arity :: current_unit.ui_curry_fun -let need_apply_fun n = +let need_apply_fun n mode = assert(n > 0); - if not (List.mem n current_unit.ui_apply_fun) then - current_unit.ui_apply_fun <- n :: current_unit.ui_apply_fun + if not (List.mem (n,mode) current_unit.ui_apply_fun) then + current_unit.ui_apply_fun <- (n,mode) :: current_unit.ui_apply_fun -let need_send_fun n = - if not (List.mem n current_unit.ui_send_fun) then - current_unit.ui_send_fun <- n :: current_unit.ui_send_fun +let need_send_fun n mode = + if not (List.mem (n,mode) current_unit.ui_send_fun) then + current_unit.ui_send_fun <- (n,mode) :: current_unit.ui_send_fun (* Write the description of the current unit *) diff --git a/middle_end/compilenv.mli b/middle_end/compilenv.mli index 76165a64305..fb93afd906b 100644 --- a/middle_end/compilenv.mli +++ b/middle_end/compilenv.mli @@ -104,9 +104,9 @@ val get_global_info' : Ident.t -> Cmx_format.export_info option val flambda2_set_export_info : Flambda2_cmx.Flambda_cmx_format.t -> unit (* Set the export information for the current unit (Flambda 2 only). *) -val need_curry_fun: int -> unit -val need_apply_fun: int -> unit -val need_send_fun: int -> unit +val need_curry_fun: Clambda.arity -> unit +val need_apply_fun: int -> Lambda.alloc_mode -> unit +val need_send_fun: int -> Lambda.alloc_mode -> unit (* Record the need of a currying (resp. application, message sending) function with the given arity *) diff --git a/middle_end/flambda/inlining_cost.ml b/middle_end/flambda/inlining_cost.ml index b05b6fe45e9..78ac3b07b26 100644 --- a/middle_end/flambda/inlining_cost.ml +++ b/middle_end/flambda/inlining_cost.ml @@ -26,7 +26,7 @@ let prim_size (prim : Clambda_primitives.primitive) args = | Psetfield (_, isptr, init) -> begin match init with | Root_initialization -> 1 (* never causes a write barrier hit *) - | Assignment | Heap_initialization -> + | Assignment | Local_assignment | Heap_initialization -> match isptr with | Pointer -> 4 | Immediate -> 1 diff --git a/middle_end/flambda2/from_lambda/lambda_conversions.ml b/middle_end/flambda2/from_lambda/lambda_conversions.ml index 31ef7f23ab0..feceec8c2c0 100644 --- a/middle_end/flambda2/from_lambda/lambda_conversions.ml +++ b/middle_end/flambda2/from_lambda/lambda_conversions.ml @@ -185,6 +185,7 @@ let convert_init_or_assign (i_or_a : L.initialization_or_assignment) : P.Init_or_assign.t = match i_or_a with | Assignment -> Assignment + | Local_assignment -> assert false (* temporary *) | Heap_initialization -> Initialization | Root_initialization -> Misc.fatal_error "[Root_initialization] should not appear in Flambda input" diff --git a/middle_end/flambda2/to_cmm/to_cmm.ml b/middle_end/flambda2/to_cmm/to_cmm.ml index 8c274f386f5..98b19a6978e 100644 --- a/middle_end/flambda2/to_cmm/to_cmm.ml +++ b/middle_end/flambda2/to_cmm/to_cmm.ml @@ -1038,7 +1038,7 @@ and apply_call env e = let meth, env, _ = simple env f in let kind = meth_kind kind in let args, env, _ = arg_list env args in - C.send kind meth obj args dbg, env, effs + C.send kind meth obj args (Rc_normal, Alloc_heap) dbg, env, effs (* function calls that have an exn continuation with extra arguments must be wrapped with assignments for the mutable variables used to pass the extra @@ -1456,7 +1456,7 @@ and fill_slot decls startenv elts env acc offset slot = | Env_var v -> let field, env, eff = simple env (Var_within_closure.Map.find v elts) in field :: acc, offset + 1, env, eff - | Closure (c : Closure_id.t) -> + | Closure (c : Closure_id.t) -> ( let code_id = Closure_id.Map.find c decls in (* CR-someday mshinwell: We should probably use the code's [dbg], but it would be tricky to get hold of, and this is very unlikely to make any @@ -1465,22 +1465,25 @@ and fill_slot decls startenv elts env acc offset slot = let code_symbol = Code_id.code_symbol code_id in let code_name = Linkage_name.to_string (Symbol.linkage_name code_symbol) in let arity = Env.get_func_decl_params_arity env code_id in + let arity = + if arity >= 0 then Lambda.Curried, arity else Lambda.Tupled, -arity + in let closure_info = C.closure_info ~arity ~startenv:(startenv - offset) in (* We build here the **reverse** list of fields for the closure *) - if arity = 1 || arity = 0 - then + match arity with + | Curried, (1 | 0) -> let acc = C.nativeint ~dbg closure_info :: C.symbol ~dbg code_name :: acc in acc, offset + 2, env, Ece.pure - else + | arity -> let acc = C.symbol ~dbg code_name :: C.nativeint ~dbg closure_info :: C.symbol ~dbg (C.curry_function_sym arity) :: acc in - acc, offset + 3, env, Ece.pure + acc, offset + 3, env, Ece.pure) and fill_up_to j acc i = if i > j then Misc.fatal_errorf "Problem while filling up a closure in to_cmm"; diff --git a/middle_end/flambda2/to_cmm/to_cmm_helper.ml b/middle_end/flambda2/to_cmm/to_cmm_helper.ml index 1f0824408a3..0ff040e6dd6 100644 --- a/middle_end/flambda2/to_cmm/to_cmm_helper.ml +++ b/middle_end/flambda2/to_cmm/to_cmm_helper.ml @@ -134,11 +134,11 @@ let unbox_number ?(dbg = Debuginfo.none) kind arg = let box_number ?(dbg = Debuginfo.none) kind arg = match (kind : Flambda_kind.Boxable_number.t) with - | Naked_float -> box_float dbg arg + | Naked_float -> box_float dbg Alloc_heap arg | Untagged_immediate -> tag_int arg dbg | _ -> let primitive_kind = primitive_boxed_int_of_boxable_number kind in - box_int_gen dbg primitive_kind arg + box_int_gen dbg primitive_kind Alloc_heap arg let box_int64 ?dbg arg = box_number ?dbg Flambda_kind.Boxable_number.Naked_int64 arg @@ -537,7 +537,7 @@ let ccatch ~rec_flag ~handlers ~body = (* Function calls *) let direct_call ?(dbg = Debuginfo.none) ty f_code_sym args = - Cmm.Cop (Cmm.Capply ty, f_code_sym :: args, dbg) + Cmm.Cop (Cmm.Capply (ty, Rc_normal), f_code_sym :: args, dbg) let indirect_call ?(dbg = Debuginfo.none) ty f = function | [arg] -> @@ -546,13 +546,13 @@ let indirect_call ?(dbg = Debuginfo.none) ty f = function let v' = Backend_var.With_provenance.create v in letin v' f @@ Cmm.Cop - ( Cmm.Capply ty, + ( Cmm.Capply (ty, Rc_normal), [load Cmm.Word_int Asttypes.Mutable (var v); arg; var v], dbg ) | args -> let arity = List.length args in - let l = (symbol (apply_function_sym arity) :: args) @ [f] in - Cmm.Cop (Cmm.Capply ty, l, dbg) + let l = (symbol (apply_function_sym arity Alloc_heap) :: args) @ [f] in + Cmm.Cop (Cmm.Capply (ty, Rc_normal), l, dbg) let indirect_full_call ?(dbg = Debuginfo.none) ty f = function (* the single-argument case is already optimized by indirect_call *) @@ -565,7 +565,8 @@ let indirect_full_call ?(dbg = Debuginfo.none) ty f = function let fun_ptr = load Cmm.Word_int Asttypes.Mutable @@ field_address (var v) 2 dbg in - letin v' f @@ Cmm.Cop (Cmm.Capply ty, (fun_ptr :: args) @ [var v], dbg) + letin v' f + @@ Cmm.Cop (Cmm.Capply (ty, Rc_normal), (fun_ptr :: args) @ [var v], dbg) (* Cmm phrases *) diff --git a/middle_end/flambda2/to_cmm/to_cmm_static.ml b/middle_end/flambda2/to_cmm/to_cmm_static.ml index 29f87045c6d..83259030bfa 100644 --- a/middle_end/flambda2/to_cmm/to_cmm_static.ml +++ b/middle_end/flambda2/to_cmm/to_cmm_static.ml @@ -206,7 +206,7 @@ and fill_static_slot s symbs decls startenv elts env acc offset updates slot = env, [C.cint 1n], updates in env, List.rev fields @ acc, offset + 1, updates - | Closure c -> + | Closure c -> ( let code_id = Closure_id.Map.find c decls in let symb = Closure_id.Map.find c symbs in let external_name = symbol symb in @@ -214,19 +214,22 @@ and fill_static_slot s symbs decls startenv elts env acc offset updates slot = let code_name = Linkage_name.to_string (Symbol.linkage_name code_symbol) in let acc = List.rev (C.define_symbol ~global:true external_name) @ acc in let arity = Env.get_func_decl_params_arity env code_id in + let arity = + if arity >= 0 then Lambda.Curried, arity else Lambda.Tupled, -arity + in let closure_info = C.closure_info ~arity ~startenv:(startenv - offset) in (* We build here the **reverse** list of fields for the closure *) - if arity = 1 || arity = 0 - then + match arity with + | Curried, (1 | 0) -> let acc = C.cint closure_info :: C.symbol_address code_name :: acc in env, acc, offset + 2, updates - else + | arity -> let acc = C.symbol_address code_name :: C.cint closure_info :: C.symbol_address (C.curry_function_sym arity) :: acc in - env, acc, offset + 3, updates + env, acc, offset + 3, updates) and fill_static_up_to j acc i = if i = j then acc else fill_static_up_to j (C.cint 1n :: acc) (i + 1) diff --git a/middle_end/printclambda_primitives.ml b/middle_end/printclambda_primitives.ml index 6e0b126e05e..10bbc748b25 100644 --- a/middle_end/printclambda_primitives.ml +++ b/middle_end/printclambda_primitives.ml @@ -75,6 +75,7 @@ let primitive ppf (prim:Clambda_primitives.primitive) = | Heap_initialization -> "(heap-init)" | Root_initialization -> "(root-init)" | Assignment -> "" + | Local_assignment -> "(local)" in fprintf ppf "setfield_%s%s %i" instr init n | Psetfield_computed (ptr, init) -> @@ -88,6 +89,7 @@ let primitive ppf (prim:Clambda_primitives.primitive) = | Heap_initialization -> "(heap-init)" | Root_initialization -> "(root-init)" | Assignment -> "" + | Local_assignment -> "(local)" in fprintf ppf "setfield_%s%s_computed" instr init | Pfloatfield n -> fprintf ppf "floatfield %i" n @@ -97,6 +99,7 @@ let primitive ppf (prim:Clambda_primitives.primitive) = | Heap_initialization -> "(heap-init)" | Root_initialization -> "(root-init)" | Assignment -> "" + | Local_assignment -> "(local)" in fprintf ppf "setfloatfield%s %i" init n | Pduprecord (rep, size) -> diff --git a/ocaml/asmcomp/cmm_helpers.ml b/ocaml/asmcomp/cmm_helpers.ml index b0b4ac6bae4..50abf44d096 100644 --- a/ocaml/asmcomp/cmm_helpers.ml +++ b/ocaml/asmcomp/cmm_helpers.ml @@ -2180,6 +2180,7 @@ let assignment_kind (ptr: Lambda.immediate_or_pointer) (init: Lambda.initialization_or_assignment) = match init, ptr with + | Local_assignment, _ -> assert false (* temporary *) | Assignment, Pointer -> Caml_modify | Heap_initialization, Pointer -> Caml_initialize | Assignment, Immediate diff --git a/ocaml/asmcomp/printcmm.ml b/ocaml/asmcomp/printcmm.ml index 3062ef10530..58b6f49fe8a 100644 --- a/ocaml/asmcomp/printcmm.ml +++ b/ocaml/asmcomp/printcmm.ml @@ -126,6 +126,7 @@ let operation d = function | Lambda.Heap_initialization -> "(heap-init)" | Lambda.Root_initialization -> "(root-init)" | Lambda.Assignment -> "" + | Lambda.Local_assignment -> "(local)" in Printf.sprintf "store %s%s" (chunk c) init | Caddi -> "+" diff --git a/ocaml/asmcomp/selectgen.ml b/ocaml/asmcomp/selectgen.ml index af11cd5f25d..0bc64bbba7e 100644 --- a/ocaml/asmcomp/selectgen.ml +++ b/ocaml/asmcomp/selectgen.ml @@ -456,7 +456,7 @@ method select_operation op args _dbg = match init with | Lambda.Root_initialization -> false | Lambda.Heap_initialization -> false - | Lambda.Assignment -> true + | Lambda.Assignment | Lambda.Local_assignment -> true in if chunk = Word_int || chunk = Word_val then begin let (op, newarg2) = self#select_store is_assign addr arg2 in diff --git a/ocaml/lambda/lambda.ml b/ocaml/lambda/lambda.ml index df625ec2629..b42cbf06f7e 100644 --- a/ocaml/lambda/lambda.ml +++ b/ocaml/lambda/lambda.ml @@ -34,6 +34,7 @@ type immediate_or_pointer = type initialization_or_assignment = | Assignment + | Local_assignment | Heap_initialization | Root_initialization @@ -45,6 +46,14 @@ type field_read_semantics = | Reads_agree | Reads_vary +type alloc_mode = + | Alloc_heap + | Alloc_local + +type region_close = + | Rc_close_at_apply + | Rc_normal + type primitive = | Pidentity | Pbytes_to_string @@ -1005,3 +1014,21 @@ let mod_field ?(read_semantics=Reads_agree) pos = let mod_setfield pos = Psetfield (pos, Pointer, Root_initialization) + +let join_mode a b = + match a, b with + | Alloc_local, _ | _, Alloc_local -> Alloc_local + | Alloc_heap, Alloc_heap -> Alloc_heap + +let sub_mode a b = + match a, b with + | Alloc_heap, _ -> true + | _, Alloc_local -> true + | Alloc_local, Alloc_heap -> false + +let eq_mode a b = + match a, b with + | Alloc_heap, Alloc_heap -> true + | Alloc_local, Alloc_local -> true + | Alloc_heap, Alloc_local -> false + | Alloc_local, Alloc_heap -> false diff --git a/ocaml/lambda/lambda.mli b/ocaml/lambda/lambda.mli index 75e9e408166..6f3e353f12e 100644 --- a/ocaml/lambda/lambda.mli +++ b/ocaml/lambda/lambda.mli @@ -36,6 +36,7 @@ type immediate_or_pointer = type initialization_or_assignment = | Assignment + | Local_assignment (* mutations of blocks that may be locally allocated *) (* Initialization of in heap values, like [caml_initialize] C primitive. The field should not have been read before and initialization should happen only once. *) @@ -52,6 +53,14 @@ type field_read_semantics = | Reads_agree | Reads_vary +type alloc_mode = + | Alloc_heap + | Alloc_local + +type region_close = + | Rc_close_at_apply + | Rc_normal + type primitive = | Pidentity | Pbytes_to_string @@ -461,6 +470,10 @@ val max_arity : unit -> int This is unlimited ([max_int]) for bytecode, but limited (currently to 126) for native code. *) +val join_mode : alloc_mode -> alloc_mode -> alloc_mode +val sub_mode : alloc_mode -> alloc_mode -> bool +val eq_mode : alloc_mode -> alloc_mode -> bool + (***********************) (* For static failures *) (***********************) diff --git a/ocaml/lambda/printlambda.ml b/ocaml/lambda/printlambda.ml index 423858776b5..fa600d949d9 100644 --- a/ocaml/lambda/printlambda.ml +++ b/ocaml/lambda/printlambda.ml @@ -217,6 +217,7 @@ let primitive ppf = function | Heap_initialization -> "(heap-init)" | Root_initialization -> "(root-init)" | Assignment -> "" + | Local_assignment -> "(local)" in fprintf ppf "setfield_%s%s %i" instr init n | Psetfield_computed (ptr, init) -> @@ -230,6 +231,7 @@ let primitive ppf = function | Heap_initialization -> "(heap-init)" | Root_initialization -> "(root-init)" | Assignment -> "" + | Local_assignment -> "(local)" in fprintf ppf "setfield_%s%s_computed" instr init | Pfloatfield (n, sem) -> @@ -240,6 +242,7 @@ let primitive ppf = function | Heap_initialization -> "(heap-init)" | Root_initialization -> "(root-init)" | Assignment -> "" + | Local_assignment -> "(local)" in fprintf ppf "setfloatfield%s %i" init n | Pduprecord (rep, size) -> fprintf ppf "duprecord %a %i" record_rep rep size diff --git a/ocaml/middle_end/closure/closure.ml b/ocaml/middle_end/closure/closure.ml index 9287b00dd2d..47341f5c7ba 100644 --- a/ocaml/middle_end/closure/closure.ml +++ b/ocaml/middle_end/closure/closure.ml @@ -121,7 +121,7 @@ let prim_size prim args = | Psetfield(_f, isptr, init) -> begin match init with | Root_initialization -> 1 (* never causes a write barrier hit *) - | Assignment | Heap_initialization -> + | Assignment | Local_assignment | Heap_initialization -> match isptr with | Pointer -> 4 | Immediate -> 1 diff --git a/ocaml/middle_end/flambda/inlining_cost.ml b/ocaml/middle_end/flambda/inlining_cost.ml index b05b6fe45e9..78ac3b07b26 100644 --- a/ocaml/middle_end/flambda/inlining_cost.ml +++ b/ocaml/middle_end/flambda/inlining_cost.ml @@ -26,7 +26,7 @@ let prim_size (prim : Clambda_primitives.primitive) args = | Psetfield (_, isptr, init) -> begin match init with | Root_initialization -> 1 (* never causes a write barrier hit *) - | Assignment | Heap_initialization -> + | Assignment | Local_assignment | Heap_initialization -> match isptr with | Pointer -> 4 | Immediate -> 1 diff --git a/ocaml/middle_end/printclambda_primitives.ml b/ocaml/middle_end/printclambda_primitives.ml index 6e0b126e05e..10bbc748b25 100644 --- a/ocaml/middle_end/printclambda_primitives.ml +++ b/ocaml/middle_end/printclambda_primitives.ml @@ -75,6 +75,7 @@ let primitive ppf (prim:Clambda_primitives.primitive) = | Heap_initialization -> "(heap-init)" | Root_initialization -> "(root-init)" | Assignment -> "" + | Local_assignment -> "(local)" in fprintf ppf "setfield_%s%s %i" instr init n | Psetfield_computed (ptr, init) -> @@ -88,6 +89,7 @@ let primitive ppf (prim:Clambda_primitives.primitive) = | Heap_initialization -> "(heap-init)" | Root_initialization -> "(root-init)" | Assignment -> "" + | Local_assignment -> "(local)" in fprintf ppf "setfield_%s%s_computed" instr init | Pfloatfield n -> fprintf ppf "floatfield %i" n @@ -97,6 +99,7 @@ let primitive ppf (prim:Clambda_primitives.primitive) = | Heap_initialization -> "(heap-init)" | Root_initialization -> "(root-init)" | Assignment -> "" + | Local_assignment -> "(local)" in fprintf ppf "setfloatfield%s %i" init n | Pduprecord (rep, size) -> diff --git a/ocaml/runtime/caml/domain_state.tbl b/ocaml/runtime/caml/domain_state.tbl index f094d37f7e2..62a136688b8 100644 --- a/ocaml/runtime/caml/domain_state.tbl +++ b/ocaml/runtime/caml/domain_state.tbl @@ -36,6 +36,11 @@ DOMAIN_STATE(struct caml_ephe_ref_table*, ephe_ref_table) DOMAIN_STATE(struct caml_custom_table*, custom_table) /* See minor_gc.c */ +DOMAIN_STATE(struct caml_local_arenas*, local_arenas) +DOMAIN_STATE(intnat, local_sp) +DOMAIN_STATE(void*, local_top) +DOMAIN_STATE(intnat, local_limit) + DOMAIN_STATE(struct mark_stack*, mark_stack) /* See major_gc.c */ diff --git a/ocaml/runtime/domain.c b/ocaml/runtime/domain.c index d4d8de53fcf..7ab701436e7 100644 --- a/ocaml/runtime/domain.c +++ b/ocaml/runtime/domain.c @@ -56,6 +56,11 @@ void caml_init_domain () Caml_state->external_raise = NULL; Caml_state->exn_bucket = Val_unit; + Caml_state->local_arenas = NULL; + Caml_state->local_sp = 0; + Caml_state->local_top = NULL; + Caml_state->local_limit = 0; + Caml_state->top_of_stack = NULL; Caml_state->bottom_of_stack = NULL; /* no stack initially */ Caml_state->last_return_address = 1; /* not in OCaml code initially */ diff --git a/testsuite/tools/parsecmm.mly b/testsuite/tools/parsecmm.mly index 9b3e29b59b5..1604e636240 100644 --- a/testsuite/tools/parsecmm.mly +++ b/testsuite/tools/parsecmm.mly @@ -219,7 +219,8 @@ expr: | LPAREN LETMUT letmutdef sequence RPAREN { make_letmutdef $3 $4 } | LPAREN ASSIGN IDENT expr RPAREN { Cassign(find_ident $3, $4) } | LPAREN APPLY location expr exprlist machtype RPAREN - { Cop(Capply $6, $4 :: List.rev $5, debuginfo ?loc:$3 ()) } + { Cop(Capply ($6, Lambda.Rc_normal), + $4 :: List.rev $5, debuginfo ?loc:$3 ()) } | LPAREN EXTCALL STRING exprlist machtype RPAREN {Cop(Cextcall {func=$3; ty=$5; alloc=false; builtin=false; @@ -228,7 +229,7 @@ expr: coeffects=Has_coeffects; ty_args=[];}, List.rev $4, debuginfo ())} - | LPAREN ALLOC exprlist RPAREN { Cop(Calloc, List.rev $3, debuginfo ()) } + | LPAREN ALLOC exprlist RPAREN { Cop(Calloc Lambda.Alloc_heap, List.rev $3, debuginfo ()) } | LPAREN SUBF expr RPAREN { Cop(Cnegf, [$3], debuginfo ()) } | LPAREN SUBF expr expr RPAREN { Cop(Csubf, [$3; $4], debuginfo ()) } | LPAREN unaryop expr RPAREN { Cop($2, [$3], debuginfo ()) } diff --git a/tools/objinfo.ml b/tools/objinfo.ml index 7514ecefb3c..dfa7bd66d93 100644 --- a/tools/objinfo.ml +++ b/tools/objinfo.ml @@ -174,11 +174,16 @@ let print_cmx_infos (ui, crc) = flush stdout; Format.printf "%a\n%!" Flambda2_cmx.Flambda_cmx_format.print cmx end; - let pr_funs _ fns = - List.iter (fun arity -> printf " %d" arity) fns in - printf "Currying functions:%a\n" pr_funs ui.ui_curry_fun; - printf "Apply functions:%a\n" pr_funs ui.ui_apply_fun; - printf "Send functions:%a\n" pr_funs ui.ui_send_fun; + let pr_afuns _ fns = + let mode = function Lambda.Alloc_heap -> "" | Lambda.Alloc_local -> "L" in + List.iter (fun (arity,m) -> printf " %d%s" arity (mode m)) fns in + let pr_cfuns _ fns = + List.iter (function + | (Lambda.Curried, a) -> printf " %dL" a + | (Lambda.Tupled, a) -> printf " -%d" a) fns in + printf "Currying functions:%a\n" pr_cfuns ui.ui_curry_fun; + printf "Apply functions:%a\n" pr_afuns ui.ui_apply_fun; + printf "Send functions:%a\n" pr_afuns ui.ui_send_fun; printf "Force link: %s\n" (if ui.ui_force_link then "YES" else "no") let print_cmxa_infos (lib : Cmx_format.library_infos) =