Skip to content

Commit ecbe37a

Browse files
authored
flambda-backend: Implement mixed blocks in runtime 4 (#2422)
* First attempt, buggy * 'Fix bug' (rectify my understanding by imposing my will on NO_PROFINFO * Reenable and run tests * Add unimplemented function for large-enough mixed blocks * Fix bug (missed 'signpost', in the idiom of the PR description) * Fix upstream build * review: Bosize_val * Fix accounting: don't claim that you scanned the flat suffix * Fix never-ending comment
1 parent c578c62 commit ecbe37a

30 files changed

+273
-114
lines changed

runtime4/alloc.c

Lines changed: 70 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,43 +32,70 @@
3232
#define Setup_for_gc
3333
#define Restore_after_gc
3434

35-
CAMLexport value caml_alloc (mlsize_t wosize, tag_t tag)
35+
CAMLexport value caml_alloc_with_reserved (mlsize_t wosize, tag_t tag,
36+
reserved_t reserved)
3637
{
38+
3739
value result;
3840
mlsize_t i;
3941

42+
// Optimization: for mixed blocks, don't fill in non-scannable fields
43+
mlsize_t scannable_wosize =
44+
Is_mixed_block_reserved(reserved)
45+
? Mixed_block_scannable_wosize_reserved(reserved)
46+
: wosize;
47+
4048
CAMLassert (tag < 256);
4149
CAMLassert (tag != Infix_tag);
4250
if (wosize <= Max_young_wosize){
4351
if (wosize == 0){
4452
result = Atom (tag);
4553
}else{
46-
Alloc_small (result, wosize, tag);
54+
Alloc_small_with_reserved (result, wosize, tag, reserved);
4755
if (tag < No_scan_tag){
48-
for (i = 0; i < wosize; i++) Field (result, i) = Val_unit;
56+
for (i = 0; i < scannable_wosize; i++) Field (result, i) = Val_unit;
4957
}
5058
}
5159
}else{
52-
result = caml_alloc_shr (wosize, tag);
60+
result = caml_alloc_shr_reserved (wosize, tag, reserved);
5361
if (tag < No_scan_tag){
54-
for (i = 0; i < wosize; i++) Field (result, i) = Val_unit;
62+
for (i = 0; i < scannable_wosize; i++) Field (result, i) = Val_unit;
5563
}
5664
result = caml_check_urgent_gc (result);
5765
}
5866
return result;
5967
}
6068

61-
CAMLexport value caml_alloc_small (mlsize_t wosize, tag_t tag)
69+
CAMLexport value caml_alloc (mlsize_t wosize, tag_t tag) {
70+
return caml_alloc_with_reserved (wosize, tag, 0);
71+
}
72+
73+
CAMLexport value caml_alloc_mixed (mlsize_t wosize, tag_t tag,
74+
mlsize_t scannable_prefix) {
75+
reserved_t reserved =
76+
Reserved_mixed_block_scannable_wosize(scannable_prefix);
77+
return caml_alloc_with_reserved (wosize, tag, reserved);
78+
}
79+
80+
CAMLexport value caml_alloc_small_with_reserved (mlsize_t wosize, tag_t tag,
81+
reserved_t reserved)
6282
{
6383
value result;
6484

6585
CAMLassert (wosize > 0);
6686
CAMLassert (wosize <= Max_young_wosize);
6787
CAMLassert (tag < 256);
68-
Alloc_small (result, wosize, tag);
88+
CAMLassert (tag != Infix_tag);
89+
Alloc_small_with_reserved (result, wosize, tag, reserved);
6990
return result;
7091
}
7192

93+
CAMLexport value caml_alloc_small (mlsize_t wosize, tag_t tag)
94+
{
95+
return caml_alloc_small_with_reserved(wosize, tag, 0);
96+
}
97+
98+
7299
/* [n] is a number of words (fields) */
73100
CAMLexport value caml_alloc_tuple(mlsize_t n)
74101
{
@@ -220,6 +247,28 @@ CAMLprim value caml_alloc_dummy_float (value size)
220247
return caml_alloc (wosize, 0);
221248
}
222249

250+
/* [size] is a [value] representing the number of fields.
251+
[scannable_size] is a [value] representing the length of the prefix of
252+
fields that contains pointer values.
253+
*/
254+
CAMLprim value caml_alloc_dummy_mixed (value size, value scannable_size)
255+
{
256+
mlsize_t wosize = Long_val(size);
257+
mlsize_t scannable_wosize = Long_val(scannable_size);
258+
#ifdef NATIVECODE
259+
/* The below code runs for bytecode and native code, and critically assumes
260+
that a double record field can be stored in one word. That's true both for
261+
32-bit and 64-bit bytecode (as a double record field in a mixed record is
262+
always boxed), and for 64-bit native code (as the double record field is
263+
stored flat, taking up 1 word).
264+
*/
265+
CAML_STATIC_ASSERT(Double_wosize == 1);
266+
#endif
267+
reserved_t reserved =
268+
Reserved_mixed_block_scannable_wosize(scannable_wosize);
269+
return caml_alloc_with_reserved (wosize, 0, reserved);
270+
}
271+
223272
CAMLprim value caml_alloc_dummy_infix(value vsize, value voffset)
224273
{
225274
mlsize_t wosize = Long_val(vsize), offset = Long_val(voffset);
@@ -270,14 +319,25 @@ CAMLprim value caml_update_dummy(value dummy, value newval)
270319
} else {
271320
CAMLassert (tag < No_scan_tag);
272321
CAMLassert (Tag_val(dummy) != Infix_tag);
322+
CAMLassert (Reserved_val(dummy) == Reserved_val(newval));
273323
Tag_val(dummy) = tag;
274324
size = Wosize_val(newval);
275325
CAMLassert (size == Wosize_val(dummy));
326+
mlsize_t scannable_size = Scannable_wosize_val(newval);
327+
CAMLassert (scannable_size == Scannable_wosize_val(dummy));
276328
/* See comment above why this is safe even if [tag == Closure_tag]
277-
and some of the "values" being copied are actually code pointers. */
278-
for (i = 0; i < size; i++){
329+
and some of the "values" being copied are actually code pointers.
330+
331+
This reasoning does not apply to arbitrary flat fields, which might have
332+
the same shape as pointers into the minor heap, so we need to handle the
333+
non-scannable suffix of mixed blocks specially.
334+
*/
335+
for (i = 0; i < scannable_size; i++){
279336
caml_modify (&Field(dummy, i), Field(newval, i));
280337
}
338+
for (i = scannable_size; i < size; i++) {
339+
Field(dummy, i) = Field(newval, i);
340+
}
281341
}
282342
return Val_unit;
283343
}
@@ -293,4 +353,4 @@ CAMLexport value caml_alloc_some(value v)
293353
CAMLprim value caml_atomic_make_contended(value v)
294354
{
295355
caml_fatal_error("Atomic.make_contended is not supported by runtime4");
296-
}
356+
}

runtime4/array.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,11 @@ CAMLprim value caml_floatarray_get(value array, value index)
133133
double d;
134134
value res;
135135

136-
CAMLassert (Tag_val(array) == Double_array_tag);
136+
// [caml_floatarray_get] may be called on a floatarray
137+
// or a mixed block.
138+
CAMLassert ( Tag_val(array) == Double_array_tag
139+
|| index > Scannable_wosize_val(array) );
140+
137141
if (idx < 0 || idx >= Wosize_val(array) / Double_wosize)
138142
caml_array_bound_error();
139143
d = Double_flat_field(array, idx);
@@ -153,7 +157,11 @@ CAMLprim value caml_floatarray_get_local(value array, value index)
153157
double d;
154158
value res;
155159

156-
CAMLassert (Tag_val(array) == Double_array_tag);
160+
// [caml_floatarray_get] may be called on a floatarray
161+
// or a mixed block.
162+
CAMLassert ( Tag_val(array) == Double_array_tag
163+
|| index > Scannable_wosize_val(array) );
164+
157165
if (idx < 0 || idx >= Wosize_val(array) / Double_wosize)
158166
caml_array_bound_error();
159167
d = Double_flat_field(array, idx);

runtime4/caml/alloc.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,13 @@ extern "C" {
3030
/* It is guaranteed that these allocation functions will not trigger
3131
any OCaml callback such as finalizers or signal handlers. */
3232

33+
CAMLextern value caml_alloc_with_reserved (mlsize_t, tag_t, reserved_t);
3334
CAMLextern value caml_alloc (mlsize_t wosize, tag_t);
35+
CAMLextern value caml_alloc_mixed (mlsize_t wosize, tag_t,
36+
mlsize_t scannable_wosize);
3437
CAMLextern value caml_alloc_small (mlsize_t wosize, tag_t);
38+
CAMLextern value caml_alloc_small_with_reserved (mlsize_t wosize, tag_t,
39+
reserved_t);
3540
CAMLextern value caml_alloc_tuple (mlsize_t wosize);
3641
CAMLextern value caml_alloc_float_array (mlsize_t len);
3742
CAMLextern value caml_alloc_string (mlsize_t len); /* len in bytes (chars) */

runtime4/caml/memory.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ CAMLextern value caml_alloc_shr (mlsize_t wosize, tag_t);
4141
Equivalent to caml_alloc_shr unless WITH_PROFINFO is true */
4242
CAMLextern value caml_alloc_shr_with_profinfo (mlsize_t, tag_t, intnat);
4343

44+
/* The same as [caml_alloc_shr_with_profinfo], but named to match the runtime5
45+
naming convention of reserved bits.
46+
*/
47+
CAMLextern value caml_alloc_shr_reserved (mlsize_t, tag_t, reserved_t);
48+
4449
/* Variant of [caml_alloc_shr] where no memprof sampling is performed. */
4550
CAMLextern value caml_alloc_shr_no_track_noexc (mlsize_t, tag_t);
4651

@@ -240,6 +245,9 @@ extern void caml_alloc_small_dispatch (intnat wosize, int flags,
240245
#define Alloc_small_with_profinfo(result, wosize, tag, profinfo) \
241246
Alloc_small_aux(result, wosize, tag, profinfo, CAML_DO_TRACK)
242247

248+
#define Alloc_small_with_reserved(result, wosize, tag, reserved) \
249+
Alloc_small_with_profinfo(result, wosize, tag, reserved)
250+
243251
#define Alloc_small(result, wosize, tag) \
244252
Alloc_small_with_profinfo(result, wosize, tag, (uintnat) 0)
245253
#define Alloc_small_no_track(result, wosize, tag) \

runtime4/caml/mlvalues.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ extern "C" {
6060
typedef intnat value;
6161
typedef uintnat header_t;
6262
typedef uintnat mlsize_t;
63+
typedef header_t reserved_t; /* Same role as reserved_t in runtime 5 (reserved
64+
header bits). The mechanism for reserving bits
65+
in runtime 4 is different than runtime 5: it's
66+
the WITH_PROFINFO and PROFINFO_WIDTH macros.
67+
*/
6368
typedef unsigned int tag_t; /* Actually, an unsigned char */
6469
typedef uintnat color_t;
6570
typedef uintnat mark_t;
@@ -135,6 +140,40 @@ originally built for Spacetime profiling, hence the odd name.
135140
#define Profinfo_hd(hd) NO_PROFINFO
136141
#endif /* WITH_PROFINFO */
137142

143+
/* Header bits reserved for mixed blocks */
144+
145+
#define Reserved_hd(hd) ((reserved_t)(Profinfo_hd(hd)))
146+
#define Reserved_val(val) ((reserved_t)(Profinfo_val(val)))
147+
148+
#define Scannable_wosize_val(val) (Scannable_wosize_hd (Hd_val (val)))
149+
150+
#define Is_mixed_block_reserved(res) (((reserved_t)(res)) > 0)
151+
#define Mixed_block_scannable_wosize_reserved(res) (((reserved_t)(res)) - 1)
152+
#define Reserved_mixed_block_scannable_wosize(sz) (((mlsize_t)(sz)) + 1)
153+
154+
/* The scannable size of a block is how many fields are values as opposed
155+
to flat floats/ints/etc. This is different than the (normal) size of a
156+
block for mixed blocks.
157+
158+
The runtime has several functions that traverse over the structure of
159+
an OCaml value. (e.g. polymorphic comparison, GC marking/sweeping)
160+
All of these traversals must be written to have one of the following
161+
properties:
162+
- it's known that the input can never be a mixed block,
163+
- it raises an exception on mixed blocks, or
164+
- it uses the scannable size (not the normal size) to figure out which
165+
fields to recursively descend into.
166+
167+
Otherwise, the traversal could attempt to recursively descend into
168+
a flat field, which could segfault (or worse).
169+
*/
170+
Caml_inline mlsize_t Scannable_wosize_hd(header_t hd) {
171+
reserved_t res = Reserved_hd(hd);
172+
return
173+
Is_mixed_block_reserved(res)
174+
? Mixed_block_scannable_wosize_reserved(res)
175+
: Wosize_hd(hd);
176+
}
138177

139178
#define Hd_val(val) (((header_t *) (val)) [-1]) /* Also an l-value. */
140179
#define Hd_op(op) (Hd_val (op)) /* Also an l-value. */

runtime4/compact.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ static void do_compaction (intnat new_allocation_policy)
178178

179179
while (Is_gray_hd (q)) q = * dptr (q);
180180
wosz = Wosize_hd (q);
181+
mlsize_t scannable_wosz = Scannable_wosize_hd (q);
181182
if (Is_white_hd (q)){
182183
t = Tag_hd (q);
183184
CAMLassert (t != Infix_tag);
@@ -188,7 +189,7 @@ static void do_compaction (intnat new_allocation_policy)
188189
}else{
189190
first_field = 0;
190191
}
191-
for (i = first_field; i < wosz; i++){
192+
for (i = first_field; i < scannable_wosz; i++){
192193
invert_pointer_at ((word *) &Field (v,i));
193194
}
194195
}

runtime4/compare.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,11 @@ static intnat do_compare_val(struct compare_stack* stk,
194194
if (t1 != t2)
195195
return (intnat)t1 - (intnat)t2;
196196
}
197+
if ( Is_mixed_block_reserved(Reserved_val(v1))
198+
|| Is_mixed_block_reserved(Reserved_val(v2))) {
199+
compare_free_stack(stk);
200+
caml_invalid_argument("compare: mixed block value");
201+
}
197202
switch(t1) {
198203
case Forward_tag: {
199204
v1 = Forward_val (v1);

runtime4/extern.c

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,11 @@ static void extern_rec(value v)
730730
header_t hd = Hd_val(v);
731731
tag_t tag = Tag_hd(hd);
732732
mlsize_t sz = Wosize_hd(hd);
733+
reserved_t reserved = Reserved_hd(hd);
734+
if (Is_mixed_block_reserved(reserved)) {
735+
extern_invalid_argument("output_value: mixed block");
736+
break;
737+
}
733738

734739
if (tag == Forward_tag) {
735740
value f = Forward_val (v);
@@ -1274,16 +1279,19 @@ intnat reachable_words_once(value root, intnat identifier, value sizes_by_root_i
12741279
}
12751280
}
12761281
if (tag < No_scan_tag) {
1277-
/* i is the position of the first field to traverse recursively */
1282+
/* i is the position of the first field to traverse recursively,
1283+
and j is the position of the last such field.
1284+
*/
12781285
uintnat i =
12791286
tag == Closure_tag ? Start_env_closinfo(Closinfo_val(v)) : 0;
1280-
if (i < sz) {
1281-
if (i < sz - 1) {
1282-
/* Remember that we need to count fields i + 1 ... sz - 1 */
1287+
uintnat j = Scannable_wosize_hd(hd);
1288+
if (i < j) {
1289+
if (i < j - 1) {
1290+
/* Remember that we need to count fields i + 1 ... j - 1 */
12831291
sp++;
12841292
if (sp >= extern_stack_limit) sp = extern_resize_stack(sp);
12851293
sp->v = &Field(v, i + 1);
1286-
sp->count = sz - i - 1;
1294+
sp->count = j - i - 1;
12871295
}
12881296
/* Continue with field i */
12891297
v = Field(v, i);

runtime4/gc_ctrl.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ static void check_block (header_t *hp)
116116
/* For closures, skip to the start of the scannable environment */
117117
if (tag == Closure_tag) start = Start_env_closinfo(Closinfo_val(v));
118118
else start = 0;
119-
for (i = start; i < Wosize_hp (hp); i++){
119+
for (i = start; i < Scannable_wosize_hd (Hd_hp (hp)); i++){
120120
f = Field (v, i);
121121
if (Is_block (f) && Is_in_heap (f)){
122122
check_head (f);

runtime4/hash.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,10 +286,17 @@ CAMLprim value caml_hash(value count, value limit, value seed, value obj)
286286
/* Mix in the tag and size, but do not count this towards [num] */
287287
h = caml_hash_mix_uint32(h, Whitehd_hd(Hd_val(v)));
288288
/* Copy fields into queue, not exceeding the total size [sz] */
289-
for (i = 0, len = Wosize_val(v); i < len; i++) {
289+
for (i = 0, len = Scannable_wosize_val(v); i < len; i++) {
290290
if (wr >= sz) break;
291291
queue[wr++] = Field(v, i);
292292
}
293+
294+
/* We don't attempt to hash the flat suffix of a mixed block.
295+
This is consistent with abstract blocks which, like mixed
296+
blocks, cause polymorphic comparison to raise and don't
297+
attempt to hash the non-scannable portion.
298+
*/
299+
293300
break;
294301
}
295302
}

0 commit comments

Comments
 (0)