Skip to content

Commit b0a712c

Browse files
authored
flambda-backend: Statmemprof support for runtime5 (#2461)
* Import new Gc.Memprof API * Import new statmemprof testsuite * Runtime4 support for new statmemprof testsuite * Port 12383: backtrace abstractions * Import runtime5 statmemprof implementation Ports PRs #12382, #12817, #12824, #12923, #13068 * Avoid allocating unnecessarily during bytecode callbacks * Minor statmemprof testsuite robustness fixes
1 parent 5d4ae73 commit b0a712c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+3824
-1639
lines changed

otherlibs/runtime_events/runtime_events.ml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,12 @@ type runtime_phase =
4141
| EV_MAJOR
4242
| EV_MAJOR_SWEEP
4343
| EV_MAJOR_MARK_ROOTS
44+
| EV_MAJOR_MEMPROF_ROOTS
4445
| EV_MAJOR_MARK
4546
| EV_MINOR
4647
| EV_MINOR_LOCAL_ROOTS
48+
| EV_MINOR_MEMPROF_ROOTS
49+
| EV_MINOR_MEMPROF_CLEAN
4750
| EV_MINOR_FINALIZED
4851
| EV_EXPLICIT_GC_MAJOR_SLICE
4952
| EV_FINALISE_UPDATE_FIRST
@@ -66,6 +69,7 @@ type runtime_phase =
6669
| EV_STW_HANDLER
6770
| EV_STW_LEADER
6871
| EV_MAJOR_FINISH_SWEEPING
72+
| EV_MAJOR_MEMPROF_CLEAN
6973
| EV_MINOR_FINALIZERS_ADMIN
7074
| EV_MINOR_REMEMBERED_SET
7175
| EV_MINOR_REMEMBERED_SET_PROMOTE
@@ -126,9 +130,12 @@ let runtime_phase_name phase =
126130
| EV_MAJOR -> "major"
127131
| EV_MAJOR_SWEEP -> "major_sweep"
128132
| EV_MAJOR_MARK_ROOTS -> "major_mark_roots"
133+
| EV_MAJOR_MEMPROF_ROOTS -> "major_memprof_roots"
129134
| EV_MAJOR_MARK -> "major_mark"
130135
| EV_MINOR -> "minor"
131136
| EV_MINOR_LOCAL_ROOTS -> "minor_local_roots"
137+
| EV_MINOR_MEMPROF_ROOTS -> "minor_memprof_roots"
138+
| EV_MINOR_MEMPROF_CLEAN -> "minor_memprof_clean"
132139
| EV_MINOR_FINALIZED -> "minor_finalized"
133140
| EV_EXPLICIT_GC_MAJOR_SLICE -> "explicit_gc_major_slice"
134141
| EV_FINALISE_UPDATE_FIRST -> "finalise_update_first"
@@ -150,6 +157,7 @@ let runtime_phase_name phase =
150157
| EV_STW_HANDLER -> "stw_handler"
151158
| EV_STW_LEADER -> "stw_leader"
152159
| EV_MAJOR_FINISH_SWEEPING -> "major_finish_sweeping"
160+
| EV_MAJOR_MEMPROF_CLEAN -> "major_memprof_clean"
153161
| EV_MINOR_FINALIZERS_ADMIN -> "minor_finalizers_admin"
154162
| EV_MINOR_REMEMBERED_SET -> "minor_remembered_set"
155163
| EV_MINOR_REMEMBERED_SET_PROMOTE -> "minor_remembered_set_promote"

otherlibs/runtime_events/runtime_events.mli

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,12 @@ type runtime_phase =
9999
| EV_MAJOR
100100
| EV_MAJOR_SWEEP
101101
| EV_MAJOR_MARK_ROOTS
102+
| EV_MAJOR_MEMPROF_ROOTS
102103
| EV_MAJOR_MARK
103104
| EV_MINOR
104105
| EV_MINOR_LOCAL_ROOTS
106+
| EV_MINOR_MEMPROF_ROOTS
107+
| EV_MINOR_MEMPROF_CLEAN
105108
| EV_MINOR_FINALIZED
106109
| EV_EXPLICIT_GC_MAJOR_SLICE
107110
| EV_FINALISE_UPDATE_FIRST
@@ -124,6 +127,7 @@ type runtime_phase =
124127
| EV_STW_HANDLER
125128
| EV_STW_LEADER
126129
| EV_MAJOR_FINISH_SWEEPING
130+
| EV_MAJOR_MEMPROF_CLEAN
127131
| EV_MINOR_FINALIZERS_ADMIN
128132
| EV_MINOR_REMEMBERED_SET
129133
| EV_MINOR_REMEMBERED_SET_PROMOTE

otherlibs/systhreads/st_stubs.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ struct caml_thread_struct {
106106
value * gc_regs_buckets; /* saved value of Caml_state->gc_regs_buckets */
107107
void * exn_handler; /* saved value of Caml_state->exn_handler */
108108
char * async_exn_handler; /* saved value of Caml_state->async_exn_handler */
109+
memprof_thread_t memprof; /* memprof's internal thread data structure */
110+
109111
#ifndef NATIVE_CODE
110112
intnat trap_sp_off; /* saved value of Caml_state->trap_sp_off */
111113
intnat trap_barrier_off; /* saved value of Caml_state->trap_barrier_off */
@@ -294,6 +296,7 @@ static void restore_runtime_state(caml_thread_t th)
294296
Caml_state->external_raise = th->external_raise;
295297
Caml_state->external_raise_async = th->external_raise_async;
296298
#endif
299+
caml_memprof_enter_thread(th->memprof);
297300
}
298301

299302
CAMLexport void caml_thread_restore_runtime_state(void)
@@ -392,6 +395,7 @@ static caml_thread_t caml_thread_new_info(void)
392395
th->external_raise_async = NULL;
393396
#endif
394397

398+
th->memprof = caml_memprof_new_thread(domain_state);
395399
return th;
396400
}
397401

@@ -470,6 +474,7 @@ static void caml_thread_reinitialize(void)
470474
th = Active_thread->next;
471475
while (th != Active_thread) {
472476
next = th->next;
477+
caml_memprof_delete_thread(th->memprof);
473478
caml_thread_free_info(th);
474479
th = next;
475480
}
@@ -557,11 +562,12 @@ static void caml_thread_domain_initialize_hook(void)
557562
new_thread->next = new_thread;
558563
new_thread->prev = new_thread;
559564
new_thread->backtrace_last_exn = Val_unit;
565+
new_thread->memprof = caml_memprof_main_thread(Caml_state);
560566

561567
st_tls_set(caml_thread_key, new_thread);
562568

563569
Active_thread = new_thread;
564-
570+
caml_memprof_enter_thread(new_thread->memprof);
565571
}
566572

567573
CAMLprim value caml_thread_yield(value unit);
@@ -608,7 +614,6 @@ CAMLprim value caml_thread_initialize(value unit)
608614
caml_domain_external_interrupt_hook = caml_thread_interrupt_hook;
609615
caml_domain_initialize_hook = caml_thread_domain_initialize_hook;
610616
caml_domain_stop_hook = caml_thread_domain_stop_hook;
611-
612617
caml_atfork_hook = caml_thread_reinitialize;
613618

614619
return Val_unit;
@@ -641,6 +646,10 @@ static void caml_thread_stop(void)
641646
always one more thread in the chain at this point in time. */
642647
CAMLassert(Active_thread->next != Active_thread);
643648

649+
/* Tell memprof that this thread is terminating */
650+
caml_memprof_delete_thread(Active_thread->memprof);
651+
652+
/* Signal that the thread has terminated */
644653
caml_threadstatus_terminate(Terminated(Active_thread->descr));
645654

646655
/* The following also sets Active_thread to a sane value in case the

runtime/backtrace_byt.c

Lines changed: 82 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -312,107 +312,124 @@ code_t caml_next_frame_pointer(value* stack_high, value ** sp,
312312
return NULL;
313313
}
314314

315-
/* Stores upto [max_frames_value] frames of the current call stack to
316-
return to the user. This is used not in an exception-raising context, but
317-
only when the user requests to save the trace (hopefully less often).
318-
Instead of using a bounded buffer as [Caml_state->stash_backtrace], we first
319-
traverse the stack to compute the right size, then allocate space for the
320-
trace. */
321-
322-
static void get_callstack(value* sp, intnat trap_spoff,
323-
struct stack_info* stack,
324-
intnat max_frames,
325-
code_t** trace, intnat* trace_size)
315+
/* minimum size to allocate a backtrace (in slots) */
316+
#define MIN_BACKTRACE_SIZE 16
317+
318+
/* Stores up to [max_slots] backtrace slots of the current call stack
319+
to return to the user in [*backtrace_p] (with the allocated size in
320+
[*alloc_size_p]). Returns the number of frames stored. Instead of
321+
using a bounded buffer as [Caml_state->stash_backtrace], we
322+
dynamically grow the allocated space as required. */
323+
324+
static size_t get_callstack(value* sp, intnat trap_spoff,
325+
struct stack_info* stack,
326+
intnat max_slots,
327+
backtrace_slot **backtrace_p,
328+
size_t *alloc_size_p)
326329
{
330+
backtrace_slot *backtrace = *backtrace_p;
331+
size_t alloc_size = *alloc_size_p;
327332
struct stack_info* parent = Stack_parent(stack);
328333
value *stack_high = Stack_high(stack);
329-
value* saved_sp = sp;
330-
intnat saved_trap_spoff = trap_spoff;
331-
332334
CAMLnoalloc;
333335

334-
/* first compute the size of the trace */
335-
{
336-
*trace_size = 0;
337-
while (*trace_size < max_frames) {
338-
code_t p = caml_next_frame_pointer(stack_high, &sp, &trap_spoff);
339-
if (p == NULL) {
340-
if (parent == NULL) break;
341-
sp = parent->sp;
342-
trap_spoff = Long_val(sp[0]);
343-
stack_high = Stack_high(parent);
344-
parent = Stack_parent(parent);
345-
} else {
346-
++*trace_size;
336+
size_t slots = 0;
337+
while (slots < max_slots) {
338+
code_t p = caml_next_frame_pointer(stack_high, &sp, &trap_spoff);
339+
if (!p) {
340+
if (!parent) break;
341+
sp = parent->sp;
342+
trap_spoff = Long_val(sp[0]);
343+
stack_high = Stack_high(parent);
344+
parent = Stack_parent(parent);
345+
} else {
346+
if (slots == alloc_size) {
347+
size_t new_size = alloc_size ? alloc_size * 2 : MIN_BACKTRACE_SIZE;
348+
backtrace = caml_stat_resize_noexc(backtrace,
349+
sizeof(backtrace_slot) * new_size);
350+
351+
if (!backtrace) { /* allocation failed */
352+
*backtrace_p = NULL;
353+
*alloc_size_p = 0;
354+
return 0;
355+
}
356+
alloc_size = new_size;
347357
}
348-
}
349-
}
350-
351-
*trace = caml_stat_alloc(sizeof(code_t*) * *trace_size);
352-
353-
sp = saved_sp;
354-
parent = Stack_parent(stack);
355-
stack_high = Stack_high(stack);
356-
trap_spoff = saved_trap_spoff;
357358

358-
/* then collect the trace */
359-
{
360-
uintnat trace_pos = 0;
361-
362-
while (trace_pos < *trace_size) {
363-
code_t p = caml_next_frame_pointer(stack_high, &sp, &trap_spoff);
364-
if (p == NULL) {
365-
sp = parent->sp;
366-
trap_spoff = Long_val(sp[0]);
367-
stack_high = Stack_high(parent);
368-
parent = Stack_parent(parent);
369-
} else {
370-
(*trace)[trace_pos] = p;
371-
++trace_pos;
372-
}
359+
backtrace[slots++] = p;
373360
}
374361
}
362+
*backtrace_p = backtrace;
363+
*alloc_size_p = alloc_size;
364+
return slots;
375365
}
376366

377-
static value alloc_callstack(code_t* trace, intnat trace_len)
367+
static value alloc_callstack(backtrace_slot *trace, size_t slots)
378368
{
379369
CAMLparam0();
380370
CAMLlocal1(callstack);
381371
int i;
382-
callstack = caml_alloc(trace_len, 0);
383-
for (i = 0; i < trace_len; i++)
372+
callstack = caml_alloc(slots, 0);
373+
for (i = 0; i < slots; i++)
384374
Store_field(callstack, i, Val_backtrace_slot(trace[i]));
385375
caml_stat_free(trace);
386376
CAMLreturn(callstack);
387377
}
388378

379+
/* Obtain up to [max_slots] of the callstack of the current domain,
380+
* including parent fibers. The callstack is written into [*buffer_p],
381+
* current size [*alloc_size_p], which should be reallocated (on the C
382+
* heap) if required. Returns the number of slots obtained.
383+
*
384+
* [alloc_idx] is ignored, and must be negative (this interface is
385+
* also used by the native-code runtime, in which [alloc_idx] is
386+
* meaningful.
387+
*/
388+
389+
size_t caml_get_callstack(size_t max_slots,
390+
backtrace_slot **buffer_p,
391+
size_t *alloc_size_p,
392+
ssize_t alloc_idx)
393+
{
394+
CAMLassert(alloc_idx < 1); /* allocation indexes not used in bytecode */
395+
return get_callstack(Caml_state->current_stack->sp,
396+
Caml_state->trap_sp_off,
397+
Caml_state->current_stack,
398+
max_slots,
399+
buffer_p, alloc_size_p);
400+
}
401+
389402
CAMLprim value caml_get_current_callstack (value max_frames_value)
390403
{
391-
code_t* trace;
392-
intnat trace_len;
393-
get_callstack(Caml_state->current_stack->sp, Caml_state->trap_sp_off,
394-
Caml_state->current_stack, Long_val(max_frames_value),
395-
&trace, &trace_len);
396-
return alloc_callstack(trace, trace_len);
404+
backtrace_slot *backtrace = NULL;
405+
size_t trace_size = 0;
406+
size_t slots = get_callstack(Caml_state->current_stack->sp,
407+
Caml_state->trap_sp_off,
408+
Caml_state->current_stack,
409+
Long_val(max_frames_value),
410+
&backtrace, &trace_size);
411+
return alloc_callstack(backtrace, slots);
397412
}
398413

399414
CAMLprim value caml_get_continuation_callstack (value cont, value max_frames)
400415
{
401-
code_t* trace;
402-
intnat trace_len;
416+
backtrace_slot *backtrace = NULL;
417+
size_t trace_size = 0;
418+
size_t slots;
403419
struct stack_info *stack;
404420
value *sp;
405421

406422
stack = Ptr_val(caml_continuation_use(cont));
407423
{
408424
CAMLnoalloc; /* GC must not see the stack outside the cont */
409425
sp = stack->sp;
410-
get_callstack(sp, Long_val(sp[0]), stack, Long_val(max_frames),
411-
&trace, &trace_len);
426+
slots = get_callstack(sp, Long_val(sp[0]),
427+
stack, Long_val(max_frames),
428+
&backtrace, &trace_size);
412429
caml_continuation_replace(cont, stack);
413430
}
414431

415-
return alloc_callstack(trace, trace_len);
432+
return alloc_callstack(backtrace, slots);
416433
}
417434

418435

0 commit comments

Comments
 (0)