Skip to content

Commit c79579e

Browse files
lukemaurerapilatjsmshinwell
authored
flambda-backend: Retainer profiling for 5.x runtime (#2000)
* Add [Obj.uniquely_reachable_words] Cherry-picked from #1705 * Fixed ISO C99 warning introduced in #1705 Cherry-picked from #1787 * Fix compilation after cherry pick * Add `volatile` keyword to helper function Avoid warnings when passing the result of `Field` to a helper that takes a pointer. For future-proofing, added a comment warning that one shouldn't read this to mean the function is multicore-safe. * Add CR --------- Co-authored-by: apilatjs <[email protected]> Co-authored-by: Mark Shinwell <[email protected]>
1 parent 86c6eb1 commit c79579e

File tree

1 file changed

+210
-40
lines changed

1 file changed

+210
-40
lines changed

runtime/extern.c

Lines changed: 210 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -323,9 +323,9 @@ static void extern_resize_position_table(struct caml_extern_state *s)
323323

324324
/* Determine whether the given object [obj] is in the hash table.
325325
If so, set [*pos_out] to its position in the output and return 1.
326-
If not, set [*h_out] to the hash value appropriate for
327-
[extern_record_location] and return 0. */
328-
326+
If not, return 0.
327+
Either way, set [*h_out] to the hash value appropriate for
328+
[extern_record_location]. */
329329
Caml_inline int extern_lookup_position(struct caml_extern_state *s, value obj,
330330
uintnat * pos_out, uintnat * h_out)
331331
{
@@ -336,29 +336,46 @@ Caml_inline int extern_lookup_position(struct caml_extern_state *s, value obj,
336336
return 0;
337337
}
338338
if (s->pos_table.entries[h].obj == obj) {
339+
*h_out = h;
339340
*pos_out = s->pos_table.entries[h].pos;
340341
return 1;
341342
}
342343
h = (h + 1) & s->pos_table.mask;
343344
}
344345
}
345346

346-
/* Record the output position for the given object [obj]. */
347+
/* Record the given object [obj] in the hashmap, associated to the specified data [data]. */
347348
/* The [h] parameter is the index in the hash table where the object
348349
must be inserted. It was determined during lookup. */
349-
350-
static void extern_record_location(struct caml_extern_state* s,
351-
value obj, uintnat h)
350+
static void extern_record_location_with_data(struct caml_extern_state* s,
351+
value obj, uintnat h, uintnat data)
352352
{
353353
if (s->extern_flags & NO_SHARING) return;
354354
bitvect_set(s->pos_table.present, h);
355355
s->pos_table.entries[h].obj = obj;
356-
s->pos_table.entries[h].pos = s->obj_counter;
356+
s->pos_table.entries[h].pos = data;
357357
s->obj_counter++;
358358
if (s->obj_counter >= s->pos_table.threshold)
359359
extern_resize_position_table(s);
360360
}
361361

362+
/* Record the output position for the given object [obj]. */
363+
/* The [h] parameter is the index in the hash table where the object
364+
must be inserted. It was determined during lookup. */
365+
static void extern_record_location(struct caml_extern_state* s,
366+
value obj, uintnat h)
367+
{
368+
extern_record_location_with_data(s, obj, h, s->obj_counter);
369+
}
370+
371+
/* Update the data associated with the given object [obj]. */
372+
static void extern_update_location_with_data(struct caml_extern_state* s,
373+
uintnat h, uintnat data)
374+
{
375+
if (s->extern_flags & NO_SHARING) return;
376+
s->pos_table.entries[h].pos = data;
377+
}
378+
362379
/* To buffer the output */
363380

364381
static void init_extern_output(struct caml_extern_state* s)
@@ -1334,56 +1351,144 @@ CAMLexport void caml_serialize_block_float_8(void * data, intnat len)
13341351
#endif
13351352
}
13361353

1337-
CAMLprim value caml_obj_reachable_words(value v)
1338-
{
1339-
intnat size;
1340-
struct extern_item * sp;
1341-
uintnat h = 0;
1342-
uintnat pos = 0;
1343-
struct caml_extern_state *s = get_extern_state ();
1354+
enum reachable_words_node_state {
1355+
/* This node is reachable from at least two distinct roots, so it doesn't
1356+
* have a unique owner and will be ignored in all future traversals. */
1357+
Shared = -1,
1358+
/* This node is one of the roots and has not been visited yet (i.e. the computation
1359+
* starting at that root still hasn't ran */
1360+
RootUnprocessed = -2,
1361+
/* This node is one of the roots and the computation for that root has already ran */
1362+
RootProcessed = -3,
1363+
/* Sentinel value for a state that should never be observed */
1364+
Invalid = -4,
1365+
/* States that are non-negative integers indicate that a node has only been visited
1366+
* starting from a single root. The state is then equal to the identifier of the
1367+
* root that we reached it from */
1368+
};
13441369

1345-
s->obj_counter = 0;
1346-
s->extern_flags = 0;
1347-
extern_init_position_table(s);
1370+
/* CR ocaml 5 runtime (mshinwell): think about what to do here */
1371+
/* Not multicore-safe (the [volatile] just lets us use this with the [Field] macro) */
1372+
static void add_to_long_value(volatile value *v, intnat x) {
1373+
*v = Val_long(Long_val(*v) + x);
1374+
}
1375+
1376+
/* Performs traversal through the OCaml object reachability graph to deterime
1377+
how much memory an object has access to.
1378+
1379+
Assumes that the position_table has already been initialized using
1380+
[reachable_words_init]. We can run this function multiple times
1381+
without clearing the position table to share data between runs starting
1382+
from different roots. Identifiers must be positive integers.
1383+
1384+
For each value node visited, we record its traversal status in the [pos] field
1385+
of its entry in [position_table.entries]. The statuses are described in detail
1386+
in the [reachable_words_node_state] enum.
1387+
1388+
Returns the total size of elements marked, that is ones that are reachable
1389+
from the current root and can be reached by at most one root from the ones
1390+
that already ran.
1391+
1392+
[shared_size] is incremented by the total size of elements that were newly
1393+
marked [Shared], that is ones that we just found out are reachable from at least
1394+
two roots.
1395+
1396+
If [sizes_by_root_id] is not [Val_unit], we expect it to be an OCaml array
1397+
with length equal to the number of roots. Then during the traversal we will
1398+
update the number of words uniquely reachable from each root.
1399+
That is, when we visit a node for the first time, we add its size to the
1400+
corresponding root identifier, and when we visit it for the second time, we
1401+
undo this addition. */
1402+
intnat reachable_words_once(struct caml_extern_state *s,
1403+
value root, intnat identifier, value sizes_by_root_id,
1404+
intnat *shared_size) {
1405+
struct extern_item * sp;
1406+
intnat size;
1407+
uintnat mark = Invalid, new_mark;
1408+
value v = root;
1409+
uintnat h;
1410+
int previously_marked, should_traverse;
13481411
sp = s->extern_stack;
13491412
size = 0;
13501413

1414+
CAMLassert(identifier >= 0);
1415+
13511416
/* In Multicore OCaml, we don't distinguish between major heap blocks and
13521417
* out-of-heap blocks, so we end up counting out-of-heap blocks too. */
13531418
while (1) {
13541419
if (Is_long(v)) {
13551420
/* Tagged integers contribute 0 to the size, nothing to do */
1356-
} else if (extern_lookup_position(s, v, &pos, &h)) {
1357-
/* Already seen and counted, nothing to do */
13581421
} else {
13591422
header_t hd = Hd_val(v);
13601423
tag_t tag = Tag_hd(hd);
13611424
mlsize_t sz = Wosize_hd(hd);
1425+
intnat sz_with_header = 1 + sz;
13621426
/* Infix pointer: go back to containing closure */
13631427
if (tag == Infix_tag) {
13641428
v = v - Infix_offset_hd(hd);
13651429
continue;
13661430
}
1367-
/* Remember that we've visited this block */
1368-
extern_record_location(s, v, h);
1369-
/* The block contributes to the total size */
1370-
size += 1 + sz; /* header word included */
1371-
if (tag < No_scan_tag) {
1372-
/* i is the position of the first field to traverse recursively */
1373-
uintnat i =
1374-
tag == Closure_tag ? Start_env_closinfo(Closinfo_val(v)) : 0;
1375-
if (i < sz) {
1376-
if (i < sz - 1) {
1377-
/* Remember that we need to count fields i + 1 ... sz - 1 */
1378-
sp++;
1379-
if (sp >= s->extern_stack_limit)
1380-
sp = extern_resize_stack(s, sp);
1381-
sp->v = &Field(v, i + 1);
1382-
sp->count = sz - i - 1;
1431+
1432+
previously_marked = extern_lookup_position(s, v, &mark, &h);
1433+
if (!previously_marked) {
1434+
/* All roots must have been marked by [reachable_words_mark_root] before
1435+
* calling this function so we can safely assign new_mark to
1436+
* identifier */
1437+
CAMLassert(v != root);
1438+
should_traverse = 1;
1439+
new_mark = identifier;
1440+
} else if (mark == RootUnprocessed && v == root) {
1441+
should_traverse = 1;
1442+
new_mark = RootProcessed;
1443+
} else if (mark == Shared || mark == RootUnprocessed || mark == RootProcessed) {
1444+
should_traverse = 0;
1445+
} else if (mark == identifier) {
1446+
should_traverse = 0;
1447+
} else {
1448+
CAMLassert(mark != Invalid);
1449+
/* mark is some other root's identifier */
1450+
should_traverse = 1;
1451+
new_mark = Shared;
1452+
}
1453+
1454+
if (should_traverse) {
1455+
if (!previously_marked) {
1456+
extern_record_location_with_data(s, v, h, new_mark);
1457+
} else {
1458+
extern_update_location_with_data(s, h, new_mark);
1459+
}
1460+
1461+
/* The block contributes to the total size */
1462+
size += sz_with_header; /* header word included */
1463+
if (sizes_by_root_id != Val_unit) {
1464+
if (new_mark == Shared) {
1465+
/* mark is identifier of some other root that we counted this node
1466+
* as contributing to. Since it is evidently not uniquely reachable, we
1467+
* undo this contribution */
1468+
add_to_long_value(&Field(sizes_by_root_id, mark), -sz_with_header);
1469+
*shared_size += sz_with_header;
1470+
} else {
1471+
CAMLassert(new_mark == identifier || (v == root && new_mark == RootProcessed));
1472+
add_to_long_value(&Field(sizes_by_root_id, identifier), sz_with_header);
1473+
}
1474+
}
1475+
if (tag < No_scan_tag) {
1476+
/* i is the position of the first field to traverse recursively */
1477+
uintnat i =
1478+
tag == Closure_tag ? Start_env_closinfo(Closinfo_val(v)) : 0;
1479+
if (i < sz) {
1480+
if (i < sz - 1) {
1481+
/* Remember that we need to count fields i + 1 ... sz - 1 */
1482+
sp++;
1483+
if (sp >= s->extern_stack_limit)
1484+
sp = extern_resize_stack(s, sp);
1485+
sp->v = &Field(v, i + 1);
1486+
sp->count = sz - i - 1;
1487+
}
1488+
/* Continue with field i */
1489+
v = Field(v, i);
1490+
continue;
13831491
}
1384-
/* Continue with field i */
1385-
v = Field(v, i);
1386-
continue;
13871492
}
13881493
}
13891494
}
@@ -1392,7 +1497,72 @@ CAMLprim value caml_obj_reachable_words(value v)
13921497
v = *((sp->v)++);
13931498
if (--(sp->count) == 0) sp--;
13941499
}
1500+
1501+
return size;
1502+
}
1503+
1504+
struct caml_extern_state* reachable_words_init(void)
1505+
{
1506+
struct caml_extern_state *s = get_extern_state ();
1507+
s->obj_counter = 0;
1508+
s->extern_flags = 0;
1509+
extern_init_position_table(s);
1510+
return s;
1511+
}
1512+
1513+
void reachable_words_mark_root(struct caml_extern_state *s, value v)
1514+
{
1515+
uintnat h, mark;
1516+
extern_lookup_position(s, v, &mark, &h);
1517+
extern_record_location_with_data(s, v, h, RootUnprocessed);
1518+
}
1519+
1520+
void reachable_words_cleanup(struct caml_extern_state *s)
1521+
{
13951522
extern_free_stack(s);
13961523
extern_free_position_table(s);
1397-
return Val_long(size);
1524+
}
1525+
1526+
CAMLprim value caml_obj_reachable_words(value v)
1527+
{
1528+
struct caml_extern_state *s;
1529+
CAMLparam1(v);
1530+
CAMLlocal1(size);
1531+
1532+
intnat shared_size = 0;
1533+
1534+
s = reachable_words_init();
1535+
reachable_words_mark_root(s, v);
1536+
size = Val_long(reachable_words_once(s, v, 0, Val_unit, &shared_size));
1537+
reachable_words_cleanup(s);
1538+
1539+
CAMLreturn(size);
1540+
}
1541+
1542+
CAMLprim value caml_obj_uniquely_reachable_words(value v)
1543+
{
1544+
struct caml_extern_state *s;
1545+
CAMLparam1(v);
1546+
CAMLlocal2(sizes_by_root_id, ret);
1547+
1548+
intnat length, shared_size;
1549+
1550+
length = Wosize_val(v);
1551+
sizes_by_root_id = caml_alloc(length, 0);
1552+
shared_size = 0;
1553+
1554+
s = reachable_words_init();
1555+
for (intnat i = 0; i < length; i++) {
1556+
reachable_words_mark_root(s, Field(v, i));
1557+
Field(sizes_by_root_id, i) = Val_int(0);
1558+
}
1559+
for (intnat i = 0; i < length; i++) {
1560+
reachable_words_once(s, Field(v, i), i, sizes_by_root_id, &shared_size);
1561+
}
1562+
reachable_words_cleanup(s);
1563+
1564+
ret = caml_alloc_small(2, 0);
1565+
Field(ret, 0) = sizes_by_root_id;
1566+
Field(ret, 1) = Val_long(shared_size);
1567+
CAMLreturn(ret);
13981568
}

0 commit comments

Comments
 (0)