Skip to content

Commit 963bfbc

Browse files
authored
flambda-backend: Add [Obj.uniquely_reachable_words] (#1705)
* Add [Obj.uniqely_reachable_words] This function takes in a list of objects and for each one computes the number of words of memory that can be reachabed from that object, but no others in the list. * Fix implementation of [Obj.reachable_words] The previous changes to [caml_obj_reachable_words_once] were not compatible with the current usage in [Obj.reachable_words] but were overlooked. * Fix incorrect use of hashtable when adding roots Previously, we simply hashed the value instead of using [extern_lookup_position]. However, this doesn't appropriately guard against collisions and can cause our state to be inconsistent. Test case which gave incorrect, non-determinstic input before the fix ``` let data = List.init 10 (fun _ -> List.init 1 (fun i -> i)) let direct = List.map Obj.repr data @ [ Obj.repr data ] let print_s xs = List.map string_of_int xs |> String.concat "," |> print_endline let () = print_s (Obj.uniquely_reachable_words direct) let () = print_s (List.map Obj.reachable_words direct) ``` * Address code review - Move array iteration into C - Give meaningful names to constants in traversal code - Other minor changes * Make [tests/lib-obj/uniquely_reachable_words.ml] more robust Instead of relying on an exact number of words used which can differ slightly with the backend due to different optimizations, we deduce what objects are reachable bases on the sum of their sizes. * Optimize algorithm to only require a single iteration through the roots * Clean up code following review suggestions * Code style improvements * Additionally return the total size of shared memory This enables easier evaluation of the retainer profiling functionality as we can quantify how much memory we were able to assign uniquely to a root.
1 parent 4cd24bd commit 963bfbc

File tree

4 files changed

+313
-38
lines changed

4 files changed

+313
-38
lines changed

runtime/extern.c

Lines changed: 199 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -261,9 +261,9 @@ static void extern_resize_position_table(void)
261261

262262
/* Determine whether the given object [obj] is in the hash table.
263263
If so, set [*pos_out] to its position in the output and return 1.
264-
If not, set [*h_out] to the hash value appropriate for
265-
[extern_record_location] and return 0. */
266-
264+
If not, return 0.
265+
Either way, set [*h_out] to the hash value appropriate for
266+
[extern_record_location]. */
267267
Caml_inline int extern_lookup_position(value obj,
268268
uintnat * pos_out, uintnat * h_out)
269269
{
@@ -274,27 +274,42 @@ Caml_inline int extern_lookup_position(value obj,
274274
return 0;
275275
}
276276
if (pos_table.entries[h].obj == obj) {
277+
*h_out = h;
277278
*pos_out = pos_table.entries[h].pos;
278279
return 1;
279280
}
280281
h = (h + 1) & pos_table.mask;
281282
}
282283
}
283284

284-
/* Record the output position for the given object [obj]. */
285+
/* Record the given object [obj] in the hashmap, associated to the specified data [data]. */
285286
/* The [h] parameter is the index in the hash table where the object
286287
must be inserted. It was determined during lookup. */
287-
288-
static void extern_record_location(value obj, uintnat h)
288+
static void extern_record_location_with_data(value obj, uintnat h, uintnat data)
289289
{
290290
if (extern_flags & NO_SHARING) return;
291291
bitvect_set(pos_table.present, h);
292292
pos_table.entries[h].obj = obj;
293-
pos_table.entries[h].pos = obj_counter;
293+
pos_table.entries[h].pos = data;
294294
obj_counter++;
295295
if (obj_counter >= pos_table.threshold) extern_resize_position_table();
296296
}
297297

298+
/* Record the output position for the given object [obj]. */
299+
/* The [h] parameter is the index in the hash table where the object
300+
must be inserted. It was determined during lookup. */
301+
static void extern_record_location(value obj, uintnat h)
302+
{
303+
extern_record_location_with_data(obj, h, obj_counter);
304+
}
305+
306+
/* Update the data associated with the given object [obj]. */
307+
static void extern_update_location_with_data(uintnat h, uintnat data)
308+
{
309+
if (extern_flags & NO_SHARING) return;
310+
pos_table.entries[h].pos = data;
311+
}
312+
298313
/* To buffer the output */
299314

300315
static char * extern_userprovided_output;
@@ -1136,18 +1151,64 @@ CAMLexport void caml_serialize_block_float_8(void * data, intnat len)
11361151
#endif
11371152
}
11381153

1139-
CAMLprim value caml_obj_reachable_words(value v)
1140-
{
1141-
intnat size;
1142-
struct extern_item * sp;
1143-
uintnat h = 0;
1144-
uintnat pos;
1154+
enum reachable_words_node_state {
1155+
/* This node is reachable from at least two distinct roots, so it doesn't
1156+
* have a unique owner and will be ignored in all future traversals. */
1157+
Shared = -1,
1158+
/* This node is one of the roots and has not been visited yet (i.e. the computation
1159+
* starting at that root still hasn't ran */
1160+
RootUnprocessed = -2,
1161+
/* This node is one of the roots and the computation for that root has already ran */
1162+
RootProcessed = -3,
1163+
/* Sentinel value for a state that should never be observed */
1164+
Invalid = -4,
1165+
/* States that are non-negative integers indicate that a node has only been visited
1166+
* starting from a single root. The state is then equal to the identifier of the
1167+
* root that we reached it from */
1168+
};
11451169

1146-
obj_counter = 0;
1147-
extern_flags = 0;
1148-
extern_init_position_table();
1170+
static void add_to_long_value(value *v, intnat x) {
1171+
*v = Val_long(Long_val(*v) + x);
1172+
}
1173+
1174+
/* Performs traversal through the OCaml object reachability graph to deterime
1175+
how much memory an object has access to.
1176+
1177+
Assumes that the position_table has already been initialized using
1178+
[reachable_words_init]. We can run this function multiple times
1179+
without clearing the position table to share data between runs starting
1180+
from different roots. Identifiers must be positive integers.
1181+
1182+
For each value node visited, we record its traversal status in the [pos] field
1183+
of its entry in [position_table.entries]. The statuses are described in detail
1184+
in the [reachable_words_node_state] enum.
1185+
1186+
Returns the total size of elements marked, that is ones that are reachable
1187+
from the current root and can be reached by at most one root from the ones
1188+
that already ran.
1189+
1190+
[shared_size] is incremented by the total size of elements that were newly
1191+
marked [Shared], that is ones that we just found out are reachable from at least
1192+
two roots.
1193+
1194+
If [sizes_by_root_id] is not [Val_unit], we expect it to be an OCaml array
1195+
with length equal to the number of roots. Then during the traversal we will
1196+
update the number of words uniquely reachable from each root.
1197+
That is, when we visit a node for the first time, we add its size to the
1198+
corresponding root identifier, and when we visit it for the second time, we
1199+
undo this addition. */
1200+
intnat reachable_words_once(value root, intnat identifier, value sizes_by_root_id,
1201+
intnat *shared_size) {
1202+
CAMLassert(identifier >= 0);
1203+
struct extern_item * sp;
1204+
intnat size;
1205+
uintnat mark = Invalid, new_mark;
1206+
value v = root;
1207+
uintnat h;
1208+
int previously_marked, should_traverse;
11491209
sp = extern_stack;
11501210
size = 0;
1211+
11511212
while (1) {
11521213
if (Is_long(v)) {
11531214
/* Tagged integers contribute 0 to the size, nothing to do */
@@ -1157,36 +1218,76 @@ CAMLprim value caml_obj_reachable_words(value v)
11571218
between major heap blocks and out-of-heap blocks,
11581219
and the test above is always false,
11591220
so we end up counting out-of-heap blocks too. */
1160-
} else if (extern_lookup_position(v, &pos, &h)) {
1161-
/* Already seen and counted, nothing to do */
11621221
} else {
11631222
header_t hd = Hd_val(v);
11641223
tag_t tag = Tag_hd(hd);
11651224
mlsize_t sz = Wosize_hd(hd);
1225+
intnat sz_with_header = 1 + sz;
11661226
/* Infix pointer: go back to containing closure */
11671227
if (tag == Infix_tag) {
11681228
v = v - Infix_offset_hd(hd);
11691229
continue;
11701230
}
1171-
/* Remember that we've visited this block */
1172-
extern_record_location(v, h);
1173-
/* The block contributes to the total size */
1174-
size += 1 + sz; /* header word included */
1175-
if (tag < No_scan_tag) {
1176-
/* i is the position of the first field to traverse recursively */
1177-
uintnat i =
1178-
tag == Closure_tag ? Start_env_closinfo(Closinfo_val(v)) : 0;
1179-
if (i < sz) {
1180-
if (i < sz - 1) {
1181-
/* Remember that we need to count fields i + 1 ... sz - 1 */
1182-
sp++;
1183-
if (sp >= extern_stack_limit) sp = extern_resize_stack(sp);
1184-
sp->v = &Field(v, i + 1);
1185-
sp->count = sz - i - 1;
1231+
1232+
previously_marked = extern_lookup_position(v, &mark, &h);
1233+
if (!previously_marked) {
1234+
/* All roots must have been marked by [reachable_words_mark_root] before
1235+
* calling this function so we can safely assign new_mark to
1236+
* identifier */
1237+
CAMLassert(v != root);
1238+
should_traverse = 1;
1239+
new_mark = identifier;
1240+
} else if (mark == RootUnprocessed && v == root) {
1241+
should_traverse = 1;
1242+
new_mark = RootProcessed;
1243+
} else if (mark == Shared || mark == RootUnprocessed || mark == RootProcessed) {
1244+
should_traverse = 0;
1245+
} else if (mark == identifier) {
1246+
should_traverse = 0;
1247+
} else {
1248+
CAMLassert(mark != Invalid);
1249+
/* mark is some other root's identifier */
1250+
should_traverse = 1;
1251+
new_mark = Shared;
1252+
}
1253+
1254+
if (should_traverse) {
1255+
if (!previously_marked) {
1256+
extern_record_location_with_data(v, h, new_mark);
1257+
} else {
1258+
extern_update_location_with_data(h, new_mark);
1259+
}
1260+
1261+
/* The block contributes to the total size */
1262+
size += sz_with_header; /* header word included */
1263+
if (sizes_by_root_id != Val_unit) {
1264+
if (new_mark == Shared) {
1265+
/* mark is identifier of some other root that we counted this node
1266+
* as contributing to. Since it is evidently not uniquely reachable, we
1267+
* undo this contribution */
1268+
add_to_long_value(&Field(sizes_by_root_id, mark), -sz_with_header);
1269+
*shared_size += sz_with_header;
1270+
} else {
1271+
CAMLassert(new_mark == identifier || (v == root && new_mark == RootProcessed));
1272+
add_to_long_value(&Field(sizes_by_root_id, identifier), sz_with_header);
1273+
}
1274+
}
1275+
if (tag < No_scan_tag) {
1276+
/* i is the position of the first field to traverse recursively */
1277+
uintnat i =
1278+
tag == Closure_tag ? Start_env_closinfo(Closinfo_val(v)) : 0;
1279+
if (i < sz) {
1280+
if (i < sz - 1) {
1281+
/* Remember that we need to count fields i + 1 ... sz - 1 */
1282+
sp++;
1283+
if (sp >= extern_stack_limit) sp = extern_resize_stack(sp);
1284+
sp->v = &Field(v, i + 1);
1285+
sp->count = sz - i - 1;
1286+
}
1287+
/* Continue with field i */
1288+
v = Field(v, i);
1289+
continue;
11861290
}
1187-
/* Continue with field i */
1188-
v = Field(v, i);
1189-
continue;
11901291
}
11911292
}
11921293
}
@@ -1195,7 +1296,68 @@ CAMLprim value caml_obj_reachable_words(value v)
11951296
v = *((sp->v)++);
11961297
if (--(sp->count) == 0) sp--;
11971298
}
1299+
1300+
return size;
1301+
}
1302+
1303+
void reachable_words_init()
1304+
{
1305+
obj_counter = 0;
1306+
extern_flags = 0;
1307+
extern_init_position_table();
1308+
}
1309+
1310+
void reachable_words_mark_root(value v)
1311+
{
1312+
uintnat h, mark;
1313+
extern_lookup_position(v, &mark, &h);
1314+
extern_record_location_with_data(v, h, RootUnprocessed);
1315+
}
1316+
1317+
void reachable_words_cleanup()
1318+
{
11981319
extern_free_stack();
11991320
extern_free_position_table();
1200-
return Val_long(size);
1321+
}
1322+
1323+
CAMLprim value caml_obj_reachable_words(value v)
1324+
{
1325+
CAMLparam1(v);
1326+
CAMLlocal1(size);
1327+
1328+
intnat shared_size = 0;
1329+
1330+
reachable_words_init();
1331+
reachable_words_mark_root(v);
1332+
size = Val_long(reachable_words_once(v, 0, Val_unit, &shared_size));
1333+
reachable_words_cleanup();
1334+
1335+
CAMLreturn(size);
1336+
}
1337+
1338+
CAMLprim value caml_obj_uniquely_reachable_words(value v)
1339+
{
1340+
CAMLparam1(v);
1341+
CAMLlocal2(sizes_by_root_id, ret);
1342+
1343+
intnat length, shared_size;
1344+
1345+
length = Wosize_val(v);
1346+
sizes_by_root_id = caml_alloc(length, 0);
1347+
shared_size = 0;
1348+
1349+
reachable_words_init();
1350+
for (intnat i = 0; i < length; i++) {
1351+
reachable_words_mark_root(Field(v, i));
1352+
Field(sizes_by_root_id, i) = Val_int(0);
1353+
}
1354+
for (intnat i = 0; i < length; i++) {
1355+
reachable_words_once(Field(v, i), i, sizes_by_root_id, &shared_size);
1356+
}
1357+
reachable_words_cleanup();
1358+
1359+
ret = caml_alloc_small(2, 0);
1360+
Field(ret, 0) = sizes_by_root_id;
1361+
Field(ret, 1) = Val_long(shared_size);
1362+
CAMLreturn(ret);
12011363
}

stdlib/obj.ml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ external tag : t -> int = "caml_obj_tag" [@@noalloc]
3737
external size : t -> int = "%obj_size"
3838
let [@inline always] size t = size (Sys.opaque_identity t)
3939
external reachable_words : t -> int = "caml_obj_reachable_words"
40+
external uniquely_reachable_words : t array -> int array * int = "caml_obj_uniquely_reachable_words"
4041
external field : t -> int -> t = "%obj_field"
4142
let [@inline always] field t index = field (Sys.opaque_identity t) index
4243
external set_field : t -> int -> t -> unit = "%obj_set_field"

stdlib/obj.mli

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ val is_block : t -> bool
3232
external is_int : t -> bool = "%obj_is_int"
3333
external tag : t -> int = "caml_obj_tag" [@@noalloc]
3434
val size : t -> int
35-
external reachable_words : t -> int = "caml_obj_reachable_words"
35+
val reachable_words : t -> int
3636
(**
3737
Computes the total size (in words, including the headers) of all
3838
heap blocks accessible from the argument. Statically
@@ -42,6 +42,16 @@ external reachable_words : t -> int = "caml_obj_reachable_words"
4242
@since 4.04
4343
*)
4444

45+
val uniquely_reachable_words : t array -> int array * int
46+
(** For each element of the array, computes the total size (as defined
47+
above by [reachable_words]) of all heap blocks accessible from the
48+
argument but excluding all blocks accessible from any other arguments.
49+
50+
Also returns a single number denoting the total memory reachable from
51+
at least two of the roots. We make no attempt to classify which two
52+
(or more) roots are responsible for this memory.
53+
*)
54+
4555
val field : t -> int -> t
4656

4757
(** When using flambda:

0 commit comments

Comments
 (0)