diff --git a/backend/cmm_helpers.ml b/backend/cmm_helpers.ml index 421dbbb292c..d53b75a915f 100644 --- a/backend/cmm_helpers.ml +++ b/backend/cmm_helpers.ml @@ -49,7 +49,8 @@ let bind_list name args fn = let caml_black = Nativeint.shift_left (Nativeint.of_int 3) 8 -let caml_local = Nativeint.shift_left (Nativeint.of_int 2) 8 +let caml_local = + Nativeint.shift_left (Nativeint.of_int (if Config.runtime5 then 3 else 2)) 8 (* cf. runtime/caml/gc.h *) (* Loads *) diff --git a/ocaml/asmcomp/cmm_helpers.ml b/ocaml/asmcomp/cmm_helpers.ml index 17fd586b6f2..1a052af2394 100644 --- a/ocaml/asmcomp/cmm_helpers.ml +++ b/ocaml/asmcomp/cmm_helpers.ml @@ -38,7 +38,9 @@ let bind_nonvar name arg fn = | _ -> let id = V.create_local name in Clet(VP.create id, arg, fn (Cvar id)) let caml_black = Nativeint.shift_left (Nativeint.of_int 3) 8 -let caml_local = Nativeint.shift_left (Nativeint.of_int 2) 8 +let caml_local = + Nativeint.shift_left (Nativeint.of_int (if Config.runtime5 then 3 else 2)) 8 +(* cf. runtime/caml/gc.h *) (* cf. runtime/caml/gc.h *) (* Loads *) diff --git a/ocaml/configure b/ocaml/configure index f86d51bf86e..25bb4cd5703 100755 --- a/ocaml/configure +++ b/ocaml/configure @@ -19290,8 +19290,6 @@ esac if [ x"$enable_runtime5" = x"yes" ]; then runtime_suffix= - # CR ocaml 5 runtime: forward port locals - enable_stack_allocation="no" else runtime_suffix=4 fi diff --git a/ocaml/configure.ac b/ocaml/configure.ac index c9fde7bd8e8..d816c5ff3ec 100644 --- a/ocaml/configure.ac +++ b/ocaml/configure.ac @@ -2120,8 +2120,6 @@ AS_CASE([$host], if [[ x"$enable_runtime5" = x"yes" ]]; then runtime_suffix= - # CR ocaml 5 runtime: forward port locals - enable_stack_allocation="no" else runtime_suffix=4 fi diff --git a/ocaml/otherlibs/systhreads/st_stubs.c b/ocaml/otherlibs/systhreads/st_stubs.c index fe1df205eca..e936af5ca6d 100644 --- a/ocaml/otherlibs/systhreads/st_stubs.c +++ b/ocaml/otherlibs/systhreads/st_stubs.c @@ -80,6 +80,7 @@ struct caml_thread_struct { /* Note: we do not save Caml_state->stack_cache, because it can safely be shared between all threads on the same domain. */ struct caml__roots_block *local_roots; /* saved value of local_roots */ + struct caml_local_arenas *local_arenas; int backtrace_pos; /* saved value of Caml_state->backtrace_pos */ backtrace_slot * backtrace_buffer; /* saved value of Caml_state->backtrace_buffer */ @@ -173,7 +174,8 @@ static void caml_thread_scan_roots( if (th != Active_thread) { if (th->current_stack != NULL) caml_do_local_roots(action, fflags, fdata, - th->local_roots, th->current_stack, th->gc_regs); + th->local_roots, th->current_stack, th->gc_regs, + th->local_arenas); } th = th->next; } while (th != Active_thread); @@ -196,6 +198,7 @@ static void save_runtime_state(void) this_thread->gc_regs_buckets = Caml_state->gc_regs_buckets; this_thread->exn_handler = Caml_state->exn_handler; this_thread->local_roots = Caml_state->local_roots; + this_thread->local_arenas = caml_get_local_arenas(Caml_state); this_thread->backtrace_pos = Caml_state->backtrace_pos; this_thread->backtrace_buffer = Caml_state->backtrace_buffer; this_thread->backtrace_last_exn = Caml_state->backtrace_last_exn; @@ -216,6 +219,7 @@ static void restore_runtime_state(caml_thread_t th) Caml_state->gc_regs_buckets = th->gc_regs_buckets; Caml_state->exn_handler = th->exn_handler; Caml_state->local_roots = th->local_roots; + caml_set_local_arenas(Caml_state, th->local_arenas); Caml_state->backtrace_pos = th->backtrace_pos; Caml_state->backtrace_buffer = th->backtrace_buffer; Caml_state->backtrace_last_exn = th->backtrace_last_exn; @@ -283,6 +287,7 @@ static caml_thread_t caml_thread_new_info(void) } th->c_stack = NULL; th->local_roots = NULL; + th->local_arenas = NULL; th->backtrace_pos = 0; th->backtrace_buffer = NULL; th->backtrace_last_exn = Val_unit; @@ -319,6 +324,8 @@ void caml_thread_free_info(caml_thread_t th) caml_free_stack(th->current_stack); caml_free_backtrace_buffer(th->backtrace_buffer); + // CR sdolan: free local arenas + /* Remark: we could share gc_regs_buckets between threads on a same domain, but this might break the invariant that it is always non-empty at the point where we switch from OCaml to C, so we diff --git a/ocaml/runtime/alloc.c b/ocaml/runtime/alloc.c index c401851fd0f..c8b9bba073d 100644 --- a/ocaml/runtime/alloc.c +++ b/ocaml/runtime/alloc.c @@ -193,6 +193,20 @@ CAMLexport value caml_alloc_string (mlsize_t len) return result; } +/* [len] is a number of bytes (chars) */ +CAMLexport value caml_alloc_local_string (mlsize_t len) +{ + mlsize_t offset_index; + mlsize_t wosize = (len + sizeof (value)) / sizeof (value); + value result; + + result = caml_alloc_local(wosize, String_tag); + Field (result, wosize - 1) = 0; + offset_index = Bsize_wsize (wosize) - 1; + Byte (result, offset_index) = offset_index - len; + return result; +} + /* [len] is a number of bytes (chars) */ CAMLexport value caml_alloc_initialized_string (mlsize_t len, const char *p) { diff --git a/ocaml/runtime/amd64.S b/ocaml/runtime/amd64.S index 0d4259411c1..afde04f5602 100644 --- a/ocaml/runtime/amd64.S +++ b/ocaml/runtime/amd64.S @@ -581,6 +581,20 @@ CFI_STARTPROC CFI_ENDPROC ENDFUNCTION(G(caml_allocN)) +FUNCTION(G(caml_call_local_realloc)) +CFI_STARTPROC + CFI_SIGNAL_FRAME + ENTER_FUNCTION + SAVE_ALL_REGS + SWITCH_OCAML_TO_C + C_call (GCALL(caml_local_realloc)) + SWITCH_C_TO_OCAML + RESTORE_ALL_REGS + LEAVE_FUNCTION + ret +CFI_ENDPROC +ENDFUNCTION(G(caml_call_local_realloc)) + /******************************************************************************/ /* Call a C function from OCaml */ /******************************************************************************/ diff --git a/ocaml/runtime/array.c b/ocaml/runtime/array.c index ed3b7d5f434..d265f6a99e3 100644 --- a/ocaml/runtime/array.c +++ b/ocaml/runtime/array.c @@ -197,8 +197,14 @@ CAMLprim value caml_floatarray_create(value len) return caml_process_pending_actions_with_root(result); } +CAMLprim value caml_floatarray_create_local(value len) +{ + mlsize_t wosize = Long_val(len) * Double_wosize; + return caml_alloc_local (wosize, Double_array_tag); +} + /* [len] is a [value] representing number of words or floats */ -CAMLprim value caml_make_vect(value len, value init) +static value make_vect_gen(value len, value init, int local) { CAMLparam2 (len, init); CAMLlocal1 (res); @@ -215,18 +221,22 @@ CAMLprim value caml_make_vect(value len, value init) d = Double_val(init); wsize = size * Double_wosize; if (wsize > Max_wosize) caml_invalid_argument("Array.make"); - res = caml_alloc(wsize, Double_array_tag); + res = local ? + caml_alloc_local(wsize, Double_array_tag) : + caml_alloc(wsize, Double_array_tag); for (i = 0; i < size; i++) { Store_double_flat_field(res, i, d); } #endif } else { - if (size <= Max_young_wosize) { + if (size > Max_wosize) caml_invalid_argument("Array.make"); + else if (local) { + res = caml_alloc_local(size, 0); + for (i = 0; i < size; i++) Field(res, i) = init; + } else if (size <= Max_young_wosize) { res = caml_alloc_small(size, 0); for (i = 0; i < size; i++) Field(res, i) = init; - } - else if (size > Max_wosize) caml_invalid_argument("Array.make"); - else { + } else { if (Is_block(init) && Is_young(init)) { /* We don't want to create so many major-to-minor references, so [init] is moved to the major heap by doing a minor GC. */ @@ -241,10 +251,21 @@ CAMLprim value caml_make_vect(value len, value init) } } /* Give the GC a chance to run, and run memprof callbacks */ - caml_process_pending_actions (); + if (!local) caml_process_pending_actions (); CAMLreturn (res); } + +CAMLprim value caml_make_vect(value len, value init) +{ + return make_vect_gen(len, init, 0); +} + +CAMLprim value caml_make_local_vect(value len, value init) +{ + return make_vect_gen(len, init, 1); +} + /* [len] is a [value] representing number of floats */ /* [ int -> float array ] */ CAMLprim value caml_make_float_vect(value len) @@ -274,7 +295,7 @@ CAMLprim value caml_make_float_vect(value len) boxed floats and returns the corresponding flat-allocated [float array]. In all other cases, it just returns its argument unchanged. */ -CAMLprim value caml_make_array(value init) +static value make_array_gen(value init, int local) { #ifdef FLAT_FLOAT_ARRAY CAMLparam1 (init); @@ -291,7 +312,9 @@ CAMLprim value caml_make_array(value init) CAMLreturn (init); } else { wsize = size * Double_wosize; - if (wsize <= Max_young_wosize) { + if (local) { + res = caml_alloc_local(wsize, Double_array_tag); + } else if (wsize <= Max_young_wosize) { res = caml_alloc_small(wsize, Double_array_tag); } else { res = caml_alloc_shr(wsize, Double_array_tag); @@ -301,7 +324,8 @@ CAMLprim value caml_make_array(value init) Store_double_flat_field(res, i, d); } /* run memprof callbacks */ - caml_process_pending_actions(); + if (!local) + caml_process_pending_actions(); CAMLreturn (res); } } @@ -310,6 +334,16 @@ CAMLprim value caml_make_array(value init) #endif } +CAMLprim value caml_make_array(value init) +{ + return make_array_gen(init, 0); +} + +CAMLprim value caml_make_array_local(value init) +{ + return make_array_gen(init, 1); +} + /* Blitting */ /* [wo_memmove] copies [nvals] values from [src] to [dst]. If there is a single @@ -370,8 +404,8 @@ CAMLprim value caml_array_blit(value a1, value ofs1, value a2, value ofs2, return caml_floatarray_blit(a1, ofs1, a2, ofs2, n); #endif CAMLassert (Tag_val(a2) != Double_array_tag); - if (Is_young(a2)) { - /* Arrays of values, destination is in young generation. + if (Is_young(a2) || caml_is_local(a2)) { + /* Arrays of values, destination is local or in young generation. Here too we can do a direct copy since this cannot create old-to-young pointers, nor mess up with the incremental major GC. Again, wo_memmove takes care of overlap. */ @@ -410,7 +444,8 @@ CAMLprim value caml_array_blit(value a1, value ofs1, value a2, value ofs2, static value caml_array_gather(intnat num_arrays, value arrays[/*num_arrays*/], intnat offsets[/*num_arrays*/], - intnat lengths[/*num_arrays*/]) + intnat lengths[/*num_arrays*/], + int local) { CAMLparamN(arrays, num_arrays); value res; /* no need to register it as a root */ @@ -439,7 +474,9 @@ static value caml_array_gather(intnat num_arrays, /* This is an array of floats. We can use memcpy directly. */ if (size > Max_wosize/Double_wosize) caml_invalid_argument("Array.concat"); wsize = size * Double_wosize; - res = caml_alloc(wsize, Double_array_tag); + res = local ? + caml_alloc_local(wsize, Double_array_tag) : + caml_alloc(wsize, Double_array_tag); for (i = 0, pos = 0; i < num_arrays; i++) { /* [res] is freshly allocated, and no other domain has a reference to it. Hence, a plain [memcpy] is sufficient. */ @@ -451,10 +488,15 @@ static value caml_array_gather(intnat num_arrays, CAMLassert(pos == size); } #endif - else if (size <= Max_young_wosize) { - /* Array of values, small enough to fit in young generation. + else if (size > Max_wosize) { + /* Array of values, too big. */ + caml_invalid_argument("Array.concat"); + } else if (size <= Max_young_wosize || local) { + /* Array of values, local or small enough to fit in young generation. We can use memcpy directly. */ - res = caml_alloc_small(size, 0); + res = local ? + caml_alloc_local(size, 0) : + caml_alloc_small(size, 0); for (i = 0, pos = 0; i < num_arrays; i++) { /* [res] is freshly allocated, and no other domain has a reference to it. Hence, a plain [memcpy] is sufficient. */ @@ -464,10 +506,6 @@ static value caml_array_gather(intnat num_arrays, pos += lengths[i]; } CAMLassert(pos == size); - } - else if (size > Max_wosize) { - /* Array of values, too big. */ - caml_invalid_argument("Array.concat"); } else { /* Array of values, must be allocated in old generation and filled using caml_initialize. */ @@ -494,7 +532,15 @@ CAMLprim value caml_array_sub(value a, value ofs, value len) value arrays[1] = { a }; intnat offsets[1] = { Long_val(ofs) }; intnat lengths[1] = { Long_val(len) }; - return caml_array_gather(1, arrays, offsets, lengths); + return caml_array_gather(1, arrays, offsets, lengths, 0); +} + +CAMLprim value caml_array_sub_local(value a, value ofs, value len) +{ + value arrays[1] = { a }; + intnat offsets[1] = { Long_val(ofs) }; + intnat lengths[1] = { Long_val(len) }; + return caml_array_gather(1, arrays, offsets, lengths, 1); } CAMLprim value caml_array_append(value a1, value a2) @@ -502,10 +548,18 @@ CAMLprim value caml_array_append(value a1, value a2) value arrays[2] = { a1, a2 }; intnat offsets[2] = { 0, 0 }; intnat lengths[2] = { caml_array_length(a1), caml_array_length(a2) }; - return caml_array_gather(2, arrays, offsets, lengths); + return caml_array_gather(2, arrays, offsets, lengths, 0); } -CAMLprim value caml_array_concat(value al) +CAMLprim value caml_array_append_local(value a1, value a2) +{ + value arrays[2] = { a1, a2 }; + intnat offsets[2] = { 0, 0 }; + intnat lengths[2] = { caml_array_length(a1), caml_array_length(a2) }; + return caml_array_gather(2, arrays, offsets, lengths, 1); +} + +static value array_concat_gen(value al, int local) { #define STATIC_SIZE 16 value static_arrays[STATIC_SIZE], * arrays; @@ -542,7 +596,7 @@ CAMLprim value caml_array_concat(value al) lengths[i] = caml_array_length(Field(l, 0)); } /* Do the concatenation */ - res = caml_array_gather(n, arrays, offsets, lengths); + res = caml_array_gather(n, arrays, offsets, lengths, local); /* Free the extra storage if needed */ if (n > STATIC_SIZE) { caml_stat_free(arrays); @@ -552,6 +606,16 @@ CAMLprim value caml_array_concat(value al) return res; } +CAMLprim value caml_array_concat(value al) +{ + return array_concat_gen(al, 0); +} + +CAMLprim value caml_array_concat_local(value al) +{ + return array_concat_gen(al, 1); +} + CAMLprim value caml_array_fill(value array, value v_ofs, value v_len, @@ -574,7 +638,7 @@ CAMLprim value caml_array_fill(value array, } #endif fp = &Field(array, ofs); - if (Is_young(array)) { + if (Is_young(array) || caml_is_local(array)) { for (; len > 0; len--, fp++) *fp = val; } else { int is_val_young_block = Is_block(val) && Is_young(val); @@ -595,39 +659,8 @@ CAMLprim value caml_array_fill(value array, } /* Linker compatibility with stdlib externals - CR ocaml 5 runtime: implement locals CR ocaml 5 runtime: implement iarrays */ -CAMLprim value caml_array_concat_local(value al) -{ - /* CR ocaml 5 runtime: replace with proper locals implementation */ - return caml_array_concat(al); -} - -CAMLprim value caml_array_sub_local(value al, value a, value b) -{ - /* CR ocaml 5 runtime: replace with proper locals implementation */ - return caml_array_sub(al, a, b); -} - -CAMLprim value caml_make_local_vect(value i, value a) -{ - /* CR ocaml 5 runtime: replace with proper locals implementation */ - return caml_make_vect(i, a); -} - -CAMLprim value caml_array_append_local(value a1, value a2) -{ - /* CR ocaml 5 runtime: replace with proper locals implementation */ - return caml_array_append(a1, a2); -} - -CAMLprim value caml_floatarray_create_local(value len) -{ - /* CR ocaml 5 runtime: replace with proper locals implementation */ - return caml_floatarray_create(len); -} - CAMLprim value caml_iarray_of_array(value a) { return a; diff --git a/ocaml/runtime/caml/alloc.h b/ocaml/runtime/caml/alloc.h index 11339faf91f..64cb48fb16e 100644 --- a/ocaml/runtime/caml/alloc.h +++ b/ocaml/runtime/caml/alloc.h @@ -48,6 +48,7 @@ CAMLextern value caml_alloc_tuple (mlsize_t); CAMLextern value caml_alloc_float_array (mlsize_t len); CAMLextern value caml_alloc_string (mlsize_t len); /* len in bytes (chars) */ CAMLextern value caml_alloc_initialized_string (mlsize_t len, const char *); +CAMLextern value caml_alloc_local_string (mlsize_t len); CAMLextern value caml_copy_string (char const *); CAMLextern value caml_copy_string_array (char const * const*); CAMLextern value caml_copy_double (double); diff --git a/ocaml/runtime/caml/fiber.h b/ocaml/runtime/caml/fiber.h index 43038530242..dd642675ccf 100644 --- a/ocaml/runtime/caml/fiber.h +++ b/ocaml/runtime/caml/fiber.h @@ -247,7 +247,8 @@ CAMLextern struct stack_info* caml_alloc_main_stack (uintnat init_wsize); void caml_scan_stack( scanning_action f, scanning_action_flags fflags, void* fdata, - struct stack_info* stack, value* v_gc_regs); + struct stack_info* stack, value* v_gc_regs, + struct caml_local_arenas* locals); struct stack_info* caml_alloc_stack_noexc(mlsize_t wosize, value hval, value hexn, value heff, int64_t id); diff --git a/ocaml/runtime/caml/gc.h b/ocaml/runtime/caml/gc.h index 663b89e8368..1af26350a81 100644 --- a/ocaml/runtime/caml/gc.h +++ b/ocaml/runtime/caml/gc.h @@ -32,4 +32,36 @@ #define Make_header(wosize, tag, color) \ Make_header_with_reserved(wosize, tag, color, 0) +#ifdef CAML_INTERNALS + + +#define Init_local_arena_bsize 4096 + +/* We allow the local stack to quadruple 19 times, which is virtually infinite. + Hardware limit will probably hit first (either out of address space on 32bit + systems, or out of physical memory on 64bit) + + 19 is the biggest number without triggering some compiler errors about + integer overflow during shifting; I don't know if overflow would actually + happen if I make the number bigger, but 19 corresponds to 1024TB and should + be sufficient for a very long time. */ +#define Max_local_arenas 19 + +struct caml_local_arena { + char* base; + uintnat length; + void* alloc_block; +}; +typedef struct caml_local_arenas { + int count; + intnat saved_sp; + intnat next_length; + struct caml_local_arena arenas[Max_local_arenas]; +} caml_local_arenas; + +/* Neither a valid header nor value */ +#define Local_uninit_hd Make_header(0, 0x42, NOT_MARKABLE) + +#endif /* CAML_INTERNALS */ + #endif /* CAML_GC_H */ diff --git a/ocaml/runtime/caml/m.h.in b/ocaml/runtime/caml/m.h.in index f38404676e0..c8338b4d591 100644 --- a/ocaml/runtime/caml/m.h.in +++ b/ocaml/runtime/caml/m.h.in @@ -97,3 +97,5 @@ #undef SUPPORTS_TREE_VECTORIZE #undef USE_MMAP_MAP_STACK + +#undef STACK_ALLOCATION diff --git a/ocaml/runtime/caml/memory.h b/ocaml/runtime/caml/memory.h index 92afc722239..1c9cc8a8f0d 100644 --- a/ocaml/runtime/caml/memory.h +++ b/ocaml/runtime/caml/memory.h @@ -36,11 +36,13 @@ extern "C" { CAMLextern value caml_alloc_shr (mlsize_t wosize, tag_t); CAMLextern value caml_alloc_shr_noexc(mlsize_t wosize, tag_t); CAMLextern value caml_alloc_shr_reserved (mlsize_t, tag_t, reserved_t); +CAMLextern value caml_alloc_local(mlsize_t, tag_t); CAMLextern void caml_adjust_gc_speed (mlsize_t, mlsize_t); CAMLextern void caml_alloc_dependent_memory (mlsize_t bsz); CAMLextern void caml_free_dependent_memory (mlsize_t bsz); CAMLextern void caml_modify (volatile value *, value); +CAMLextern void caml_modify_local (value obj, intnat i, value val); CAMLextern void caml_initialize (volatile value *, value); CAMLextern int caml_atomic_cas_field (value, intnat, value, value); CAMLextern value caml_check_urgent_gc (value); @@ -164,6 +166,7 @@ CAMLextern caml_stat_string caml_stat_strconcat(int n, ...); CAMLextern wchar_t* caml_stat_wcsconcat(int n, ...); #endif +CAMLextern int caml_is_local(value); /* void caml_shrink_heap (char *); Only used in compact.c */ @@ -217,6 +220,9 @@ enum caml_alloc_small_flags { #define Alloc_small(result, wosize, tag, GC) \ Alloc_small_with_reserved(result, wosize, tag, GC, (uintnat)0) +CAMLextern caml_local_arenas* caml_get_local_arenas(caml_domain_state*); +CAMLextern void caml_set_local_arenas(caml_domain_state*, caml_local_arenas* s); + #endif /* CAML_INTERNALS */ struct caml__roots_block { diff --git a/ocaml/runtime/caml/misc.h b/ocaml/runtime/caml/misc.h index a9790d8a77f..2cc363e2140 100644 --- a/ocaml/runtime/caml/misc.h +++ b/ocaml/runtime/caml/misc.h @@ -531,9 +531,11 @@ int caml_runtime_warnings_active(void); #define Debug_free_shrink Debug_tag (0x03) #define Debug_free_truncate Debug_tag (0x04) /* obsolete */ #define Debug_free_unused Debug_tag (0x05) +#define Debug_free_local Debug_tag (0x06) #define Debug_uninit_minor Debug_tag (0x10) #define Debug_uninit_major Debug_tag (0x11) #define Debug_uninit_align Debug_tag (0x15) +#define Debug_uninit_local Debug_tag (0x16) #define Debug_filler_align Debug_tag (0x85) #define Debug_pool_magic Debug_tag (0x99) diff --git a/ocaml/runtime/caml/roots.h b/ocaml/runtime/caml/roots.h index 5435a145592..dac89c76cb9 100644 --- a/ocaml/runtime/caml/roots.h +++ b/ocaml/runtime/caml/roots.h @@ -42,7 +42,8 @@ CAMLextern void caml_do_local_roots( void* data, struct caml__roots_block* local_roots, struct stack_info *current_stack, - value * v_gc_regs); + value * v_gc_regs, + struct caml_local_arenas* locals); #endif /* CAML_INTERNALS */ diff --git a/ocaml/runtime/domain.c b/ocaml/runtime/domain.c index 115b72571c3..256764c6ed8 100644 --- a/ocaml/runtime/domain.c +++ b/ocaml/runtime/domain.c @@ -665,6 +665,11 @@ static void domain_create(uintnat initial_minor_heap_wsize) { domain_state->backtrace_active = 0; caml_register_generational_global_root(&domain_state->backtrace_last_exn); + domain_state->local_arenas = NULL; + domain_state->local_sp = 0; + domain_state->local_top = NULL; + domain_state->local_limit = 0; + domain_state->compare_unordered = 0; domain_state->oo_next_id_local = 0; @@ -1865,6 +1870,8 @@ static void domain_terminate (void) caml_free_backtrace_buffer(domain_state->backtrace_buffer); caml_free_gc_regs_buckets(domain_state->gc_regs_buckets); + // CR sdolan: free locals stack + /* signal the domain termination to the backup thread NB: for a program with no additional domains, the backup thread will not have been started */ diff --git a/ocaml/runtime/fiber.c b/ocaml/runtime/fiber.c index ff4f34eb132..7b9e7e3eb64 100644 --- a/ocaml/runtime/fiber.c +++ b/ocaml/runtime/fiber.c @@ -32,6 +32,7 @@ #include "caml/major_gc.h" #include "caml/memory.h" #include "caml/startup_aux.h" +#include "caml/shared_heap.h" #ifdef NATIVE_CODE #include "caml/stack.h" #include "caml/frame_descriptors.h" @@ -222,9 +223,164 @@ void caml_get_stack_sp_pc (struct stack_info* stack, *sp = p + sizeof(value); } + +static const header_t Hd_high_bit = + ((uintnat)-1) << (sizeof(uintnat) * 8 - 1); + + +/* Returns the arena number of a block, + or -1 if it is not in any local arena */ +static int get_local_ix(caml_local_arenas* loc, value v) +{ + int i; + CAMLassert(Is_block(v)); + /* Search local arenas, starting from the largest (last) */ + for (i = 0; i < loc->count; i++) { + struct caml_local_arena arena = loc->arenas[i]; + if (arena.base <= (char*)v && (char*)v < arena.base + arena.length) + return i; + } + return -1; +} + + +/* If it visits an unmarked local block, + returns the index of the containing arena + Otherwise returns -1. */ +static int visit(scanning_action f, void* fdata, + struct caml_local_arenas* locals, + value* p) +{ + value v = *p, vblock = v; + header_t hd; + int ix; + if (!Is_block(v)) + return -1; + + if (Is_young(v)) { + f(fdata, v, p); + return -1; + } + + /* major or local or external */ + + hd = Hd_val(vblock); + if (Tag_hd(hd) == Infix_tag) { + vblock -= Infix_offset_val(v); + hd = Hd_val(vblock); + } + + if (Color_hd(hd) != NOT_MARKABLE) { + /* Major heap */ + f(fdata, v, p); + return -1; + } else { + /* Local or external */ + if (hd & Hd_high_bit) + /* Local, marked */ + return -1; + + if (locals == NULL) + /* external */ + return -1; + + ix = get_local_ix(locals, vblock); + + if (ix != -1) { + /* Mark this unmarked local */ + *Hp_val(vblock) = hd | Hd_high_bit; + } + + return ix; + } +} + +static void scan_local_allocations(scanning_action f, void* fdata, + caml_local_arenas* loc) +{ + int arena_ix; + intnat sp; + struct caml_local_arena arena; + + if (loc == NULL) return; + CAMLassert(loc->count > 0); + sp = loc->saved_sp; + arena_ix = loc->count - 1; + arena = loc->arenas[arena_ix]; +#ifdef DEBUG + { header_t* hp; + for (hp = (header_t*)arena.base; + hp < (header_t*)(arena.base + arena.length + sp); + hp++) { + *hp = Debug_free_local; + } + } +#endif + + while (sp < 0) { + header_t* hp = (header_t*)(arena.base + arena.length + sp), hd = *hp; + intnat i; + + if (hd == Local_uninit_hd) { + CAMLassert(arena_ix > 0); + arena = loc->arenas[--arena_ix]; +#ifdef DEBUG + for (hp = (header_t*)arena.base; + hp < (header_t*)(arena.base + arena.length + sp); + hp++) { + *hp = Debug_free_local; + } +#endif + continue; + } + CAMLassert(Color_hd(hd) == NOT_MARKABLE); + if (!(hd & Hd_high_bit)) { + /* Local allocation, not marked */ +#ifdef DEBUG + for (i = 0; i < Wosize_hd(hd); i++) + Field(Val_hp(hp), i) = Debug_free_local; +#endif + sp += Bhsize_hd(hd); + continue; + } + /* reset mark */ + hd &= ~Hd_high_bit; + *hp = hd; + CAMLassert(Tag_hd(hd) != Infix_tag); /* start of object, no infix */ + CAMLassert(Tag_hd(hd) != Cont_tag); /* no local continuations */ + if (Tag_hd(hd) >= No_scan_tag) { + sp += Bhsize_hd(hd); + continue; + } + i = 0; + if (Tag_hd(hd) == Closure_tag) + i = Start_env_closinfo(Closinfo_val(Val_hp(hp))); + for (; i < Wosize_hd(hd); i++) { + value *p = Op_val(Val_hp(hp)) + i; + int marked_ix = visit(f, fdata, loc, p); + if (marked_ix != -1) { + struct caml_local_arena a = loc->arenas[marked_ix]; + intnat newsp = (char*)*p - (a.base + a.length); + if (sp <= newsp) { + /* forwards pointer, common case */ + CAMLassert(marked_ix <= arena_ix); + } else { + /* If backwards pointers are ever supported (e.g. local recursive + values), then this should reset sp and iterate to a fixpoint */ + CAMLassert(marked_ix >= arena_ix); + caml_fatal_error("backwards local pointer"); + } + } + } + sp += Bhsize_hd(hd); + } +} + + Caml_inline void scan_stack_frames( scanning_action f, scanning_action_flags fflags, void* fdata, - struct stack_info* stack, value* gc_regs) + struct stack_info* stack, value* gc_regs, + struct caml_local_arenas* locals) { char * sp; uintnat retaddr; @@ -259,7 +415,7 @@ Caml_inline void scan_stack_frames( } else { root = (value *)(sp + ofs); } - f (fdata, *root, root); + visit (f, fdata, locals, root); } } else { uint16_t *p; @@ -271,7 +427,7 @@ Caml_inline void scan_stack_frames( } else { root = (value *)(sp + ofs); } - f (fdata, *root, root); + visit (f, fdata, locals, root); } } /* Move to next frame */ @@ -291,10 +447,11 @@ Caml_inline void scan_stack_frames( void caml_scan_stack( scanning_action f, scanning_action_flags fflags, void* fdata, - struct stack_info* stack, value* gc_regs) + struct stack_info* stack, value* gc_regs, + struct caml_local_arenas* locals) { while (stack != NULL) { - scan_stack_frames(f, fflags, fdata, stack, gc_regs); + scan_stack_frames(f, fflags, fdata, stack, gc_regs, locals); f(fdata, Stack_handle_value(stack), &Stack_handle_value(stack)); f(fdata, Stack_handle_exception(stack), &Stack_handle_exception(stack)); @@ -377,7 +534,8 @@ CAMLprim value caml_ensure_stack_capacity(value required_space) void caml_scan_stack( scanning_action f, scanning_action_flags fflags, void* fdata, - struct stack_info* stack, value* v_gc_regs) + struct stack_info* stack, value* v_gc_regs, + struct caml_local_arenas* unused) { value *low, *high, *sp; @@ -406,6 +564,40 @@ void caml_scan_stack( #endif /* end BYTE_CODE */ +CAMLexport void caml_do_local_roots ( + scanning_action f, scanning_action_flags fflags, void* fdata, + struct caml__roots_block *local_roots, + struct stack_info *current_stack, + value * v_gc_regs, + struct caml_local_arenas* locals) +{ + struct caml__roots_block *lr; + int i, j; + value* sp; + + for (lr = local_roots; lr != NULL; lr = lr->next) { + for (i = 0; i < lr->ntables; i++){ + for (j = 0; j < lr->nitems; j++){ + sp = &(lr->tables[i][j]); + if (*sp != 0) { +#ifdef NATIVE_CODE + visit (f, fdata, locals, sp); +#else + f (fdata, *sp, sp); +#endif + } + } + } + } + caml_scan_stack(f, fflags, fdata, current_stack, v_gc_regs, locals); +#ifdef NATIVE_CODE + scan_local_allocations(f, fdata, locals); +#else + CAMLassert(locals == NULL); +#endif +} + + /* Stack management. diff --git a/ocaml/runtime/major_gc.c b/ocaml/runtime/major_gc.c index 5c099105536..2c39b12c0d8 100644 --- a/ocaml/runtime/major_gc.c +++ b/ocaml/runtime/major_gc.c @@ -1041,7 +1041,7 @@ void caml_darken_cont(value cont) value stk = Field(cont, 0); if (Ptr_val(stk) != NULL) caml_scan_stack(&caml_darken, darken_scanning_flags, Caml_state, - Ptr_val(stk), 0); + Ptr_val(stk), 0, NULL); atomic_store_release(Hp_atomic_val(cont), With_status_hd(hd, caml_global_heap_state.MARKED)); } diff --git a/ocaml/runtime/memory.c b/ocaml/runtime/memory.c index a057ab46d6d..51df092adc3 100644 --- a/ocaml/runtime/memory.c +++ b/ocaml/runtime/memory.c @@ -333,6 +333,146 @@ CAMLexport void caml_set_fields (value obj, value v) } } +CAMLexport int caml_is_local (value v) +{ + int i; + struct caml_local_arenas* loc = Caml_state->local_arenas; + if (!Is_block(v)) return 0; + if (Color_hd(Hd_val(v)) != NOT_MARKABLE) return 0; + if (loc == NULL) return 0; + + /* Search local arenas, starting from the largest (last) */ + for (i = 0; i < loc->count; i++) { + struct caml_local_arena arena = loc->arenas[i]; + if (arena.base <= (char*)v && (char*)v < arena.base + arena.length) + return 1; + } + + return 0; +} + +/* This version of [caml_modify] may additionally be used to mutate + locally-allocated objects. (This version is used by mutations + generated from OCaml code when the value being modified may be + locally allocated) */ +CAMLexport void caml_modify_local (value obj, intnat i, value val) +{ + if (Color_hd(Hd_val(obj)) == NOT_MARKABLE) { + /* This function should not be used on external values */ + CAMLassert(caml_is_local(obj)); + Field(obj, i) = val; + } else { + caml_modify(&Field(obj, i), val); + } +} + +CAMLexport caml_local_arenas* caml_get_local_arenas(caml_domain_state* dom) +{ + caml_local_arenas* s = dom->local_arenas; + if (s != NULL) + s->saved_sp = dom->local_sp; + return s; +} + +CAMLexport void caml_set_local_arenas(caml_domain_state* dom, caml_local_arenas* s) +{ + dom->local_arenas = s; + if (s != NULL) { + struct caml_local_arena a = s->arenas[s->count - 1]; + dom->local_sp = s->saved_sp; + dom->local_top = (void*)(a.base + a.length); + dom->local_limit = - a.length; + } else { + dom->local_sp = 0; + dom->local_top = NULL; + dom->local_limit = 0; + } +} + +void caml_local_realloc(void) +{ + caml_local_arenas* s = caml_get_local_arenas(Caml_state); + intnat i; + char* arena; + caml_stat_block block; + if (s == NULL) { + s = caml_stat_alloc(sizeof(*s)); + s->count = 0; + s->next_length = 0; + s->saved_sp = Caml_state->local_sp; + } + if (s->count == Max_local_arenas) + caml_fatal_error("Local allocation stack overflow - exceeded Max_local_arenas"); + + do { + if (s->next_length == 0) { + s->next_length = Init_local_arena_bsize; + } else { + /* overflow check */ + CAML_STATIC_ASSERT(((intnat)Init_local_arena_bsize << (2*Max_local_arenas)) > 0); + s->next_length *= 4; + } + /* may need to loop, if a very large allocation was requested */ + } while (s->saved_sp + s->next_length < 0); + + arena = caml_stat_alloc_aligned_noexc(s->next_length, 0, &block); + if (arena == NULL) + caml_fatal_error("Local allocation stack overflow - out of memory"); +#ifdef DEBUG + for (i = 0; i < s->next_length; i += sizeof(value)) { + *((header_t*)(arena + i)) = Debug_uninit_local; + } +#endif + for (i = s->saved_sp; i < 0; i += sizeof(value)) { + *((header_t*)(arena + s->next_length + i)) = Local_uninit_hd; + } + caml_gc_message(0x08, + "Growing local stack to %"ARCH_INTNAT_PRINTF_FORMAT"d kB\n", + s->next_length / 1024); + s->count++; + s->arenas[s->count-1].length = s->next_length; + s->arenas[s->count-1].base = arena; + s->arenas[s->count-1].alloc_block = block; + caml_set_local_arenas(Caml_state, s); + CAMLassert(Caml_state->local_limit <= Caml_state->local_sp); +} + +CAMLexport value caml_alloc_local(mlsize_t wosize, tag_t tag) +{ +#if defined(NATIVE_CODE) && defined(STACK_ALLOCATION) + intnat sp = Caml_state->local_sp; + header_t* hp; + sp -= Bhsize_wosize(wosize); + Caml_state->local_sp = sp; + if (sp < Caml_state->local_limit) + caml_local_realloc(); + hp = (header_t*)((char*)Caml_state->local_top + sp); + *hp = Make_header(wosize, tag, NOT_MARKABLE); + return Val_hp(hp); +#else + if (wosize <= Max_young_wosize) { + return caml_alloc_small(wosize, tag); + } else { + /* The return value is initialised directly using Field. + This is invalid if it may create major -> minor pointers. + So, perform a minor GC to prevent this. (See caml_make_vect) */ + caml_minor_collection(); + return caml_alloc_shr(wosize, tag); + } +#endif +} + +CAMLprim value caml_local_stack_offset(value blk) +{ +#ifdef NATIVE_CODE + intnat sp = Caml_state->local_sp; + return Val_long(-sp); +#else + return Val_long(0); +#endif +} + + Caml_inline value alloc_shr(mlsize_t wosize, tag_t tag, reserved_t reserved, int noexc) { @@ -380,25 +520,6 @@ CAMLexport value caml_alloc_shr_noexc(mlsize_t wosize, tag_t tag) { return alloc_shr(wosize, tag, 0, 1); } -CAMLprim value caml_local_stack_offset(value blk) -{ - /* CR ocaml 5 runtime: implement properly for locals */ - return Val_long(0); -} - -CAMLprim value caml_obj_is_stack(value blk) -{ - /* CR ocaml 5 runtime: implement properly for locals */ - return Val_false; -} - -extern value caml_create_bytes(value); -CAMLprim value caml_create_local_bytes(value len) -{ - /* CR ocaml 5 runtime: implement properly for locals */ - return caml_create_bytes(len); -} - /* Global memory pool. The pool is structured as a ring of blocks, where each block's header diff --git a/ocaml/runtime/minor_gc.c b/ocaml/runtime/minor_gc.c index be686af4ca0..986af933299 100644 --- a/ocaml/runtime/minor_gc.c +++ b/ocaml/runtime/minor_gc.c @@ -277,7 +277,7 @@ static void oldify_one (void* st_v, value v, volatile value *p) Field(result, 0) = Val_ptr(stk); if (stk != NULL) { caml_scan_stack(&oldify_one, oldify_scanning_flags, st, - stk, 0); + stk, 0, NULL); } } else @@ -619,7 +619,8 @@ void caml_empty_minor_heap_promote(caml_domain_state* domain, CAML_EV_BEGIN(EV_MINOR_LOCAL_ROOTS); caml_do_local_roots( &oldify_one, oldify_scanning_flags, &st, - domain->local_roots, domain->current_stack, domain->gc_regs); + domain->local_roots, domain->current_stack, domain->gc_regs, + caml_get_local_arenas(domain)); scan_roots_hook = atomic_load(&caml_scan_roots_hook); if (scan_roots_hook != NULL) diff --git a/ocaml/runtime/obj.c b/ocaml/runtime/obj.c index bbd78ee750d..84737749798 100644 --- a/ocaml/runtime/obj.c +++ b/ocaml/runtime/obj.c @@ -258,6 +258,11 @@ CAMLprim value caml_lazy_update_to_forcing (value v) } } +CAMLprim value caml_obj_is_stack (value v) +{ + return Val_int(caml_is_local(v)); +} + /* For mlvalues.h and camlinternalOO.ml See also GETPUBMET in interp.c */ diff --git a/ocaml/runtime/roots.c b/ocaml/runtime/roots.c index 42dd222149c..3d98194bcb6 100644 --- a/ocaml/runtime/roots.c +++ b/ocaml/runtime/roots.c @@ -39,32 +39,10 @@ void caml_do_roots ( { scan_roots_hook hook; caml_do_local_roots(f, fflags, fdata, - d->local_roots, d->current_stack, d->gc_regs); + d->local_roots, d->current_stack, d->gc_regs, + caml_get_local_arenas(d)); hook = atomic_load(&caml_scan_roots_hook); if (hook != NULL) (*hook)(f, fflags, fdata, d); caml_final_do_roots(f, fflags, fdata, d, do_final_val); } - -CAMLexport void caml_do_local_roots ( - scanning_action f, scanning_action_flags fflags, void* fdata, - struct caml__roots_block *local_roots, - struct stack_info *current_stack, - value * v_gc_regs) -{ - struct caml__roots_block *lr; - int i, j; - value* sp; - - for (lr = local_roots; lr != NULL; lr = lr->next) { - for (i = 0; i < lr->ntables; i++){ - for (j = 0; j < lr->nitems; j++){ - sp = &(lr->tables[i][j]); - if (*sp != 0) { - f (fdata, *sp, sp); - } - } - } - } - caml_scan_stack(f, fflags, fdata, current_stack, v_gc_regs); -} diff --git a/ocaml/runtime/shared_heap.c b/ocaml/runtime/shared_heap.c index 50613446843..3220f5a79d6 100644 --- a/ocaml/runtime/shared_heap.c +++ b/ocaml/runtime/shared_heap.c @@ -760,7 +760,7 @@ static void verify_object(struct heap_verify_state* st, value v) { if (Tag_val(v) == Cont_tag) { struct stack_info* stk = Ptr_val(Field(v, 0)); if (stk != NULL) - caml_scan_stack(verify_push, verify_scanning_flags, st, stk, 0); + caml_scan_stack(verify_push, verify_scanning_flags, st, stk, 0, NULL); } else if (Tag_val(v) < No_scan_tag) { int i = 0; if (Tag_val(v) == Closure_tag) { diff --git a/ocaml/runtime/str.c b/ocaml/runtime/str.c index 32ca54c7a58..aa5b650ccf3 100644 --- a/ocaml/runtime/str.c +++ b/ocaml/runtime/str.c @@ -78,6 +78,14 @@ CAMLprim value caml_create_bytes(value len) return caml_alloc_string(size); } +CAMLprim value caml_create_local_bytes(value len) +{ + mlsize_t size = Long_val(len); + if (size > Bsize_wsize (Max_wosize) - 1){ + caml_invalid_argument("Bytes.create"); + } + return caml_alloc_local_string(size); +} CAMLprim value caml_string_get(value str, value index) diff --git a/ocaml/testsuite/tests/lib-obj/get_header.byte.local.reference b/ocaml/testsuite/tests/lib-obj/get_header.byte.local.reference deleted file mode 100644 index 1627d1560fc..00000000000 --- a/ocaml/testsuite/tests/lib-obj/get_header.byte.local.reference +++ /dev/null @@ -1,3 +0,0 @@ -None false -Some(wosize=1,color=0,tag=252) false -Some(wosize=1,color=0,tag=0) false diff --git a/ocaml/testsuite/tests/lib-obj/get_header.byte.reference b/ocaml/testsuite/tests/lib-obj/get_header.byte.reference index 1627d1560fc..5bef041564f 100644 --- a/ocaml/testsuite/tests/lib-obj/get_header.byte.reference +++ b/ocaml/testsuite/tests/lib-obj/get_header.byte.reference @@ -1,3 +1,2 @@ -None false -Some(wosize=1,color=0,tag=252) false -Some(wosize=1,color=0,tag=0) false +None +Some(wosize=1,color=0,tag=252) diff --git a/ocaml/testsuite/tests/lib-obj/get_header.ml b/ocaml/testsuite/tests/lib-obj/get_header.ml index d2e40e9f3d3..c97557fac6a 100644 --- a/ocaml/testsuite/tests/lib-obj/get_header.ml +++ b/ocaml/testsuite/tests/lib-obj/get_header.ml @@ -1,16 +1,15 @@ (* TEST - * stack-allocation - ** native - reference = "${test_source_directory}/get_header.opt.local.reference" - ** bytecode - reference = "${test_source_directory}/get_header.byte.local.reference" - * no-stack-allocation - ** native - reference = "${test_source_directory}/get_header.opt.reference" - ** bytecode - reference = "${test_source_directory}/get_header.byte.reference" + * native + reference = "${test_source_directory}/get_header.opt.reference" + * bytecode + reference = "${test_source_directory}/get_header.byte.reference" *) +(* We're likely to remove %get_header in favour of calls to + caml_obj_is_stack under runtime5 (since testing a block's colour isn't + sufficient to check for local allocations) so this doesn't check for local + allocations any more. *) + external repr : ('a[@local_opt]) -> (Obj.t[@local_opt]) = "%identity" external get_header_unsafe : (Obj.t[@local_opt]) -> nativeint = "%get_header" external is_int : (Obj.t[@local_opt]) -> bool = "%obj_is_int" @@ -55,31 +54,15 @@ let print_maybe_header ppf header = | None -> Format.fprintf ppf "None" | Some header -> Format.fprintf ppf "Some(%a)" print_header header -let is_local repr = - match get_header_parsed repr with - | None -> false - | Some {color; _} -> color = 2 - (* immediate *) let () = let x = 42 in let rp = repr x in - Format.printf "%a %a\n" print_maybe_header (get_header_parsed rp) - Format.pp_print_bool (is_local rp) + Format.printf "%a\n" print_maybe_header (get_header_parsed rp) (* global*) let () = let s = "hello" in let _r = ref s in let rp = repr s in - Format.printf "%a %a\n" print_maybe_header (get_header_parsed rp) - Format.pp_print_bool (is_local rp) - -(* local *) -let foo x = - let local_ s = ref x in - let rp = repr s in - Format.printf "%a %a\n" print_maybe_header (get_header_parsed rp) - Format.pp_print_bool (is_local rp) - -let () = foo 42 + Format.printf "%a\n" print_maybe_header (get_header_parsed rp) diff --git a/ocaml/testsuite/tests/lib-obj/get_header.opt.local.reference b/ocaml/testsuite/tests/lib-obj/get_header.opt.local.reference deleted file mode 100644 index 4eee2c72cf2..00000000000 --- a/ocaml/testsuite/tests/lib-obj/get_header.opt.local.reference +++ /dev/null @@ -1,3 +0,0 @@ -None false -Some(wosize=1,color=3,tag=252) false -Some(wosize=1,color=2,tag=0) true diff --git a/ocaml/testsuite/tests/lib-obj/get_header.opt.reference b/ocaml/testsuite/tests/lib-obj/get_header.opt.reference index ba64d682deb..186569a327c 100644 --- a/ocaml/testsuite/tests/lib-obj/get_header.opt.reference +++ b/ocaml/testsuite/tests/lib-obj/get_header.opt.reference @@ -1,3 +1,2 @@ -None false -Some(wosize=1,color=3,tag=252) false -Some(wosize=1,color=0,tag=0) false +None +Some(wosize=1,color=3,tag=252)