Skip to content

Commit 0980058

Browse files
stedolanxavierleroy
authored andcommitted
Do not cache young_limit in a processor register (upstream PR 9876) (#315)
On target architectures with 32 or more registers, a register was used to cache the value of the young_limit field of the domain state. This reduced the size and execution time of the code for inlined allocations. However, this usage is problematic with respect to polling for signals and to inter-domain communication in Multicore OCaml, because it is often not possible to change the value of the register when we change young_limit. So, the change to young_limit doesn't take effect immediately, only when the register is reloaded from young_limit. - Removes the caching of young_limit in a register from the ARM64, PowerPC and RISC-V ports. - Recycle the former "young limit" register, giving one more allocatable register Now that we have a unused callee-save register on ARM64, PowerPC, and RISC-V, make it available for register allocation. - Assorted cleanups in runtime/*.S and in asmcomp/*/proc.ml - ARM64: wrong register pressure limits for Iextcall There are only 7 callee-save integer registers (x19 to x25), not 10. Co-authored-by: Xavier Leroy <[email protected]>
1 parent b003914 commit 0980058

File tree

17 files changed

+247
-252
lines changed

17 files changed

+247
-252
lines changed

backend/arm64/emit.mlp

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,11 @@ let fastcode_flag = ref true
3333

3434
(* Names for special regs *)
3535

36-
let reg_domain_state_ptr = phys_reg 22
37-
let reg_trap_ptr = phys_reg 23
38-
let reg_alloc_ptr = phys_reg 24
39-
let reg_alloc_limit = phys_reg 25
40-
let reg_tmp1 = phys_reg 26
41-
let reg_x8 = phys_reg 8
36+
let reg_domain_state_ptr = phys_reg 25 (* x28 *)
37+
let reg_trap_ptr = phys_reg 23 (* x26 *)
38+
let reg_alloc_ptr = phys_reg 24 (* x27 *)
39+
let reg_tmp1 = phys_reg 26 (* x16 *)
40+
let reg_x8 = phys_reg 8 (* x8 *)
4241

4342
(* Output a label *)
4443

@@ -504,10 +503,8 @@ module BR = Branch_relaxation.Make (struct
504503
| Lop (Iload (size, addr)) | Lop (Istore (size, addr, _)) ->
505504
let based = match addr with Iindexed _ -> 0 | Ibased _ -> 1 in
506505
based + begin match size with Single -> 2 | _ -> 1 end
507-
| Lop (Ialloc {bytes = num_bytes}) when !fastcode_flag ->
508-
if num_bytes <= 0xFFF then 4 else 5
509-
| Lop (Ispecific (Ifar_alloc {bytes = num_bytes})) when !fastcode_flag ->
510-
if num_bytes <= 0xFFF then 5 else 6
506+
| Lop (Ialloc _) when !fastcode_flag -> 5
507+
| Lop (Ispecific (Ifar_alloc _)) when !fastcode_flag -> 6
511508
| Lop (Ialloc { bytes = num_bytes; _ })
512509
| Lop (Ispecific (Ifar_alloc { bytes = num_bytes; _ })) ->
513510
begin match num_bytes with
@@ -597,8 +594,10 @@ let assembly_code_for_allocation i ~n ~far ~dbginfo =
597594
so it is reasonable to assume n < 0x1_000. This makes
598595
the generated code simpler. *)
599596
assert (16 <= n && n < 0x1_000 && n land 0x7 = 0);
597+
let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in
598+
` ldr {emit_reg reg_tmp1}, [{emit_reg reg_domain_state_ptr}, #{emit_int offset}]\n`;
600599
` sub {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, #{emit_int n}\n`;
601-
` cmp {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_limit}\n`;
600+
` cmp {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp1}\n`;
602601
if not far then begin
603602
` b.lo {emit_label lbl_call_gc}\n`
604603
end else begin

backend/arm64/proc.ml

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,10 @@ let word_addressed = false
3434
x0 - x15 general purpose (caller-save)
3535
x16, x17 temporaries (used by call veeners)
3636
x18 platform register (reserved)
37-
x19 - x24 general purpose (callee-save)
38-
x25 domain state pointer
37+
x19 - x25 general purpose (callee-save)
3938
x26 trap pointer
4039
x27 alloc pointer
41-
x28 alloc limit
40+
x28 domain state pointer
4241
x29 frame pointer
4342
x30 return address
4443
sp / xzr stack pointer / zero register
@@ -49,10 +48,11 @@ let word_addressed = false
4948
*)
5049

5150
let int_reg_name =
52-
[| "x0"; "x1"; "x2"; "x3"; "x4"; "x5"; "x6"; "x7";
53-
"x8"; "x9"; "x10"; "x11"; "x12"; "x13"; "x14"; "x15";
54-
"x19"; "x20"; "x21"; "x22"; "x23"; "x24";
55-
"x25"; "x26"; "x27"; "x28"; "x16"; "x17" |]
51+
[| "x0"; "x1"; "x2"; "x3"; "x4"; "x5"; "x6"; "x7"; (* 0 - 7 *)
52+
"x8"; "x9"; "x10"; "x11"; "x12"; "x13"; "x14"; "x15"; (* 8 - 15 *)
53+
"x19"; "x20"; "x21"; "x22"; "x23"; "x24"; "x25"; (* 16 - 22 *)
54+
"x26"; "x27"; "x28"; (* 23 - 25 *)
55+
"x16"; "x17" |] (* 26 - 27 *)
5656

5757
let float_reg_name =
5858
[| "d0"; "d1"; "d2"; "d3"; "d4"; "d5"; "d6"; "d7";
@@ -68,7 +68,7 @@ let register_class r =
6868
| Float -> 1
6969

7070
let num_available_registers =
71-
[| 22; 32 |] (* first 22 int regs allocatable; all float regs allocatable *)
71+
[| 23; 32 |] (* first 23 int regs allocatable; all float regs allocatable *)
7272

7373
let first_available_register =
7474
[| 0; 100 |]
@@ -270,16 +270,16 @@ let destroyed_at_reloadretaddr = [| |]
270270
(* Maximal register pressure *)
271271

272272
let safe_register_pressure = function
273-
| Iextcall _ -> 8
274-
| Ialloc _ -> 24
275-
| _ -> 25
273+
| Iextcall _ -> 7
274+
| Ialloc _ -> 22
275+
| _ -> 23
276276

277277
let max_register_pressure = function
278-
| Iextcall _ -> [| 10; 8 |]
279-
| Ialloc _ -> [| 24; 32 |]
278+
| Iextcall _ -> [| 7; 8 |] (* 7 integer callee-saves, 8 FP callee-saves *)
279+
| Ialloc _ -> [| 22; 32 |]
280280
| Iintoffloat | Ifloatofint
281-
| Iload(Single, _) | Istore(Single, _, _) -> [| 25; 31 |]
282-
| _ -> [| 25; 32 |]
281+
| Iload(Single, _) | Istore(Single, _, _) -> [| 23; 31 |]
282+
| _ -> [| 23; 32 |]
283283

284284
(* Pure operations (without any side effect besides updating their result
285285
registers). *)

backend/power/emit.mlp

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -508,8 +508,8 @@ module BR = Branch_relaxation.Make (struct
508508
then load_store_size addr + 1
509509
else load_store_size addr
510510
| Lop(Istore(_chunk, addr, _)) -> load_store_size addr
511-
| Lop(Ialloc _) -> 4
512-
| Lop(Ispecific(Ialloc_far _)) -> 5
511+
| Lop(Ialloc _) -> 5
512+
| Lop(Ispecific(Ialloc_far _)) -> 6
513513
| Lop(Iintop Imod) -> 3
514514
| Lop(Iintop(Icomp _)) -> 4
515515
| Lop(Icompf _) -> 5
@@ -550,6 +550,26 @@ module BR = Branch_relaxation.Make (struct
550550
let relax_intop_imm_checkbound ~bound:_ = assert false
551551
end)
552552

553+
(* Assembly code for inlined allocation *)
554+
555+
let emit_alloc i bytes dbginfo far =
556+
if !call_gc_label = 0 then call_gc_label := new_label ();
557+
let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in
558+
` {emit_string lg} 0, {emit_int offset}(30)\n`;
559+
` addi 31, 31, {emit_int(-bytes)}\n`;
560+
` {emit_string cmplg} 31, 0\n`;
561+
if not far then begin
562+
` bltl {emit_label !call_gc_label}\n`;
563+
record_frame i.live (Dbg_alloc dbginfo);
564+
` addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n`
565+
end else begin
566+
let lbl = new_label() in
567+
` bge {emit_label lbl}\n`;
568+
` bl {emit_label !call_gc_label}\n`;
569+
record_frame i.live (Dbg_alloc dbginfo);
570+
`{emit_label lbl}: addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n`
571+
end
572+
553573
(* Output the assembly code for an instruction *)
554574

555575
let emit_instr i =
@@ -782,22 +802,10 @@ let emit_instr i =
782802
| Single -> "stfs"
783803
| Double -> "stfd" in
784804
emit_load_store storeinstr addr i.arg 1 i.arg.(0)
785-
| Lop(Ialloc { bytes = n; dbginfo }) ->
786-
if !call_gc_label = 0 then call_gc_label := new_label ();
787-
` addi 31, 31, {emit_int(-n)}\n`;
788-
` {emit_string cmplg} 31, 30\n`;
789-
` bltl {emit_label !call_gc_label}\n`;
790-
record_frame i.live (Dbg_alloc dbginfo);
791-
` addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n`;
792-
| Lop(Ispecific(Ialloc_far { bytes = n; dbginfo })) ->
793-
if !call_gc_label = 0 then call_gc_label := new_label ();
794-
let lbl = new_label() in
795-
` addi 31, 31, {emit_int(-n)}\n`;
796-
` {emit_string cmplg} 31, 30\n`;
797-
` bge {emit_label lbl}\n`;
798-
` bl {emit_label !call_gc_label}\n`;
799-
record_frame i.live (Dbg_alloc dbginfo);
800-
`{emit_label lbl}: addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n`
805+
| Lop(Ialloc { bytes; dbginfo }) ->
806+
emit_alloc i bytes dbginfo false
807+
| Lop(Ispecific(Ialloc_far { bytes; dbginfo })) ->
808+
emit_alloc i bytes dbginfo true
801809
| Lop(Iintop Isub) -> (* subfc has swapped arguments *)
802810
` subfc {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`
803811
| Lop(Iintop Imod) ->
@@ -1009,8 +1017,8 @@ let emit_instr i =
10091017
Domainstate.(idx_of_field Domain_backtrace_pos)
10101018
in
10111019
begin match abi with
1012-
| ELF32 -> ` stw 0, {emit_int (backtrace_pos * 8)}(28)\n`
1013-
| _ -> ` std 0, {emit_int (backtrace_pos * 8)}(28)\n`
1020+
| ELF32 -> ` stw 0, {emit_int (backtrace_pos * 8)}(30)\n`
1021+
| _ -> ` std 0, {emit_int (backtrace_pos * 8)}(30)\n`
10141022
end;
10151023
emit_call "caml_raise_exn";
10161024
record_frame Reg.Set.empty (Dbg_raise i.dbg);

backend/power/proc.ml

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,9 @@ let word_addressed = false
3535
3 - 10 function arguments and results
3636
11 - 12 temporaries
3737
13 pointer to small data area
38-
14 - 27 general purpose, preserved by C
39-
28 domain state pointer
38+
14 - 28 general purpose, preserved by C
4039
29 trap pointer
41-
30 allocation limit
40+
30 domain state pointer
4241
31 allocation pointer
4342
Floating-point register map:
4443
0 temporary
@@ -47,9 +46,9 @@ let word_addressed = false
4746
*)
4847

4948
let int_reg_name =
50-
[| "3"; "4"; "5"; "6"; "7"; "8"; "9"; "10";
51-
"14"; "15"; "16"; "17"; "18"; "19"; "20"; "21";
52-
"22"; "23"; "24"; "25"; "26"; "27" |]
49+
[| "3"; "4"; "5"; "6"; "7"; "8"; "9"; "10"; (* 0 - 7 *)
50+
"14"; "15"; "16"; "17"; "18"; "19"; "20"; "21"; (* 8 - 15 *)
51+
"22"; "23"; "24"; "25"; "26"; "27"; "28" |] (* 16 - 22 *)
5352

5453
let float_reg_name =
5554
[| "1"; "2"; "3"; "4"; "5"; "6"; "7"; "8";
@@ -64,7 +63,7 @@ let register_class r =
6463
| Val | Int | Addr -> 0
6564
| Float -> 1
6665

67-
let num_available_registers = [| 22; 31 |]
66+
let num_available_registers = [| 23; 31 |]
6867

6968
let first_available_register = [| 0; 100 |]
7069

@@ -76,7 +75,7 @@ let rotate_registers = true
7675
(* Representation of hard registers by pseudo-registers *)
7776

7877
let hard_int_reg =
79-
let v = Array.make 22 Reg.dummy in
78+
let v = Array.make 23 Reg.dummy in
8079
for i = 0 to 21 do v.(i) <- Reg.at_location Int (Reg i) done; v
8180

8281
let hard_float_reg =
@@ -315,11 +314,11 @@ let destroyed_at_reloadretaddr = [| phys_reg 11 |]
315314

316315
let safe_register_pressure = function
317316
Iextcall _ -> 14
318-
| _ -> 22
317+
| _ -> 23
319318

320319
let max_register_pressure = function
321320
Iextcall _ -> [| 14; 18 |]
322-
| _ -> [| 22; 30 |]
321+
| _ -> [| 23; 30 |]
323322

324323
(* Pure operations (without any side effect besides updating their result
325324
registers). *)

backend/riscv/emit.mlp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,12 +82,11 @@ let rodata_space =
8282

8383
(* Names for special regs *)
8484

85-
let reg_tmp = phys_reg 22
85+
let reg_tmp = phys_reg 23
8686
let reg_t2 = phys_reg 16
87-
let reg_domain_state_ptr = phys_reg 23
87+
let reg_domain_state_ptr = phys_reg 26
8888
let reg_trap = phys_reg 24
8989
let reg_alloc_ptr = phys_reg 25
90-
let reg_alloc_lim = phys_reg 26
9190

9291
(* Output a pseudo-register *)
9392

@@ -392,13 +391,15 @@ let emit_instr i =
392391
let lbl_after_alloc = new_label () in
393392
let lbl_call_gc = new_label () in
394393
let n = -bytes in
394+
let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in
395395
if is_immediate n then
396396
` addi {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, {emit_int n}\n`
397397
else begin
398398
` li {emit_reg reg_tmp}, {emit_int n}\n`;
399399
` add {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}\n`
400400
end;
401-
` bltu {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_lim}, {emit_label lbl_call_gc}\n`;
401+
` ld {emit_reg reg_tmp}, {emit_int offset}({emit_reg reg_domain_state_ptr})\n`;
402+
` bltu {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}, {emit_label lbl_call_gc}\n`;
402403
`{emit_label lbl_after_alloc}:\n`;
403404
` addi {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, {emit_int size_addr}\n`;
404405
call_gc_sites :=

backend/riscv/proc.ml

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,12 @@ let word_addressed = false
3737
a0-a7 0-7 arguments/results
3838
s2-s9 8-15 arguments/results (preserved by C)
3939
t2-t6 16-20 temporary
40-
t0 21 temporary
41-
t1 22 temporary (used by code generator)
42-
s0 23 domain pointer (preserved by C)
40+
s0 21 general purpose (preserved by C)
41+
t0 22 temporary
42+
t1 23 temporary (used by code generator)
4343
s1 24 trap pointer (preserved by C)
4444
s10 25 allocation pointer (preserved by C)
45-
s11 26 allocation limit (preserved by C)
45+
s11 26 domain pointer (preserved by C)
4646
4747
Floating-point register map
4848
---------------------------
@@ -66,11 +66,12 @@ let word_addressed = false
6666
*)
6767

6868
let int_reg_name =
69-
[| "a0"; "a1"; "a2"; "a3"; "a4"; "a5"; "a6"; "a7";
70-
"s2"; "s3"; "s4"; "s5"; "s6"; "s7"; "s8"; "s9";
71-
"t2"; "t3"; "t4"; "t5"; "t6";
72-
"t0"; "t1";
73-
"s0"; "s1"; "s10"; "s11" |]
69+
[| "a0"; "a1"; "a2"; "a3"; "a4"; "a5"; "a6"; "a7"; (* 0 - 7 *)
70+
"s2"; "s3"; "s4"; "s5"; "s6"; "s7"; "s8"; "s9"; (* 8 - 15 *)
71+
"t2"; "t3"; "t4"; "t5"; "t6"; (* 16 - 20 *)
72+
"s0"; (* 21 *)
73+
"t0"; "t1"; (* 22 - 23 *)
74+
"s1"; "s10"; "s11" |] (* 24 - 26 *)
7475

7576
let float_reg_name =
7677
[| "ft0"; "ft1"; "ft2"; "ft3"; "ft4"; "ft5"; "ft6"; "ft7";
@@ -86,7 +87,7 @@ let register_class r =
8687
| Val | Int | Addr -> 0
8788
| Float -> 1
8889

89-
let num_available_registers = [| 22; 32 |]
90+
let num_available_registers = [| 23; 32 |]
9091

9192
let first_available_register = [| 0; 100 |]
9293

@@ -235,21 +236,21 @@ let regs_are_volatile _ = false
235236
let destroyed_at_c_call =
236237
(* s0-s11 and fs0-fs11 are callee-save *)
237238
Array.of_list(List.map phys_reg
238-
[0; 1; 2; 3; 4; 5; 6; 7; 16; 17; 18; 19; 20; 21;
239+
[0; 1; 2; 3; 4; 5; 6; 7; 16; 17; 18; 19; 20; 22;
239240
100; 101; 102; 103; 104; 105; 106; 107; 110; 111; 112; 113; 114; 115; 116;
240241
117; 128; 129; 130; 131])
241242

242243
let destroyed_at_alloc =
243-
(* t0-t3 are used for PLT stubs *)
244-
if !Clflags.dlcode then Array.map phys_reg [|16; 17; 18; 19; 20; 21|]
244+
(* t0-t6 are used for PLT stubs *)
245+
if !Clflags.dlcode then Array.map phys_reg [|16; 17; 18; 19; 20; 22|]
245246
else [| |]
246247

247248
let destroyed_at_oper = function
248249
| Iop(Icall_ind | Icall_imm _ | Iextcall{alloc = true; _}) -> all_phys_regs
249250
| Iop(Iextcall{alloc = false; _}) -> destroyed_at_c_call
250251
| Iop(Ialloc _) -> destroyed_at_alloc
251252
| Iop(Istore(Single, _, _)) -> [| phys_reg 100 |]
252-
| Iswitch _ -> [| phys_reg 21 |]
253+
| Iswitch _ -> [| phys_reg 22 |] (* t0 *)
253254
| _ -> [||]
254255

255256
let destroyed_at_raise = all_phys_regs
@@ -259,12 +260,12 @@ let destroyed_at_reloadretaddr = [| |]
259260
(* Maximal register pressure *)
260261

261262
let safe_register_pressure = function
262-
| Iextcall _ -> 15
263-
| _ -> 22
263+
| Iextcall _ -> 9
264+
| _ -> 23
264265

265266
let max_register_pressure = function
266-
| Iextcall _ -> [| 15; 18 |]
267-
| _ -> [| 22; 30 |]
267+
| Iextcall _ -> [| 9; 12 |]
268+
| _ -> [| 23; 30 |]
268269

269270
(* Pure operations (without any side effect besides updating their result
270271
registers). *)
@@ -293,8 +294,9 @@ let int_dwarf_reg_numbers =
293294
[| 10; 11; 12; 13; 14; 15; 16; 17;
294295
18; 19; 20; 21; 22; 23; 24; 25;
295296
7; 28; 29; 30; 31;
297+
8;
296298
5; 6;
297-
8; 9; 26; 27;
299+
9; 26; 27;
298300
|]
299301

300302
let float_dwarf_reg_numbers =

0 commit comments

Comments
 (0)