From d01e6f4b336bd4d275c8d8ceba64015bb9a761d0 Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Thu, 3 Sep 2020 17:31:15 +0200 Subject: [PATCH] Do not cache young_limit in a processor register (upstream PR 9876) On target architectures with 32 or more registers, a register was used to cache the value of the young_limit field of the domain state. This reduced the size and execution time of the code for inlined allocations. However, this usage is problematic with respect to polling for signals and to inter-domain communication in Multicore OCaml, because it is often not possible to change the value of the register when we change young_limit. So, the change to young_limit doesn't take effect immediately, only when the register is reloaded from young_limit. - Removes the caching of young_limit in a register from the ARM64, PowerPC and RISC-V ports. - Recycle the former "young limit" register, giving one more allocatable register Now that we have a unused callee-save register on ARM64, PowerPC, and RISC-V, make it available for register allocation. - Assorted cleanups in runtime/*.S and in asmcomp/*/proc.ml - ARM64: wrong register pressure limits for Iextcall There are only 7 callee-save integer registers (x19 to x25), not 10. --- backend/arm64/emit.mlp | 21 ++++--- backend/arm64/proc.ml | 30 +++++----- backend/power/emit.mlp | 48 ++++++++------- backend/power/proc.ml | 19 +++--- backend/riscv/emit.mlp | 9 +-- backend/riscv/proc.ml | 40 +++++++------ ocaml/asmcomp/arm64/emit.mlp | 21 ++++--- ocaml/asmcomp/arm64/proc.ml | 30 +++++----- ocaml/asmcomp/power/emit.mlp | 48 ++++++++------- ocaml/asmcomp/power/proc.ml | 19 +++--- ocaml/asmcomp/riscv/emit.mlp | 9 +-- ocaml/asmcomp/riscv/proc.ml | 40 +++++++------ ocaml/runtime/arm64.S | 29 +++++---- ocaml/runtime/power.S | 15 ++--- ocaml/runtime/riscv.S | 108 ++++++++++++++++------------------ ocaml/runtime/signals_nat.c | 7 --- ocaml/runtime/signals_osdep.h | 6 -- 17 files changed, 247 insertions(+), 252 deletions(-) diff --git a/backend/arm64/emit.mlp b/backend/arm64/emit.mlp index d4b0680898b..210a69f75de 100644 --- a/backend/arm64/emit.mlp +++ b/backend/arm64/emit.mlp @@ -33,12 +33,11 @@ let fastcode_flag = ref true (* Names for special regs *) -let reg_domain_state_ptr = phys_reg 22 -let reg_trap_ptr = phys_reg 23 -let reg_alloc_ptr = phys_reg 24 -let reg_alloc_limit = phys_reg 25 -let reg_tmp1 = phys_reg 26 -let reg_x8 = phys_reg 8 +let reg_domain_state_ptr = phys_reg 25 (* x28 *) +let reg_trap_ptr = phys_reg 23 (* x26 *) +let reg_alloc_ptr = phys_reg 24 (* x27 *) +let reg_tmp1 = phys_reg 26 (* x16 *) +let reg_x8 = phys_reg 8 (* x8 *) (* Output a label *) @@ -504,10 +503,8 @@ module BR = Branch_relaxation.Make (struct | Lop (Iload (size, addr)) | Lop (Istore (size, addr, _)) -> let based = match addr with Iindexed _ -> 0 | Ibased _ -> 1 in based + begin match size with Single -> 2 | _ -> 1 end - | Lop (Ialloc {bytes = num_bytes}) when !fastcode_flag -> - if num_bytes <= 0xFFF then 4 else 5 - | Lop (Ispecific (Ifar_alloc {bytes = num_bytes})) when !fastcode_flag -> - if num_bytes <= 0xFFF then 5 else 6 + | Lop (Ialloc _) when !fastcode_flag -> 5 + | Lop (Ispecific (Ifar_alloc _)) when !fastcode_flag -> 6 | Lop (Ialloc { bytes = num_bytes; _ }) | Lop (Ispecific (Ifar_alloc { bytes = num_bytes; _ })) -> begin match num_bytes with @@ -597,8 +594,10 @@ let assembly_code_for_allocation i ~n ~far ~dbginfo = so it is reasonable to assume n < 0x1_000. This makes the generated code simpler. *) assert (16 <= n && n < 0x1_000 && n land 0x7 = 0); + let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in + ` ldr {emit_reg reg_tmp1}, [{emit_reg reg_domain_state_ptr}, #{emit_int offset}]\n`; ` sub {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, #{emit_int n}\n`; - ` cmp {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_limit}\n`; + ` cmp {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp1}\n`; if not far then begin ` b.lo {emit_label lbl_call_gc}\n` end else begin diff --git a/backend/arm64/proc.ml b/backend/arm64/proc.ml index 03500512b7d..0a093c56f4b 100644 --- a/backend/arm64/proc.ml +++ b/backend/arm64/proc.ml @@ -34,11 +34,10 @@ let word_addressed = false x0 - x15 general purpose (caller-save) x16, x17 temporaries (used by call veeners) x18 platform register (reserved) - x19 - x24 general purpose (callee-save) - x25 domain state pointer + x19 - x25 general purpose (callee-save) x26 trap pointer x27 alloc pointer - x28 alloc limit + x28 domain state pointer x29 frame pointer x30 return address sp / xzr stack pointer / zero register @@ -49,10 +48,11 @@ let word_addressed = false *) let int_reg_name = - [| "x0"; "x1"; "x2"; "x3"; "x4"; "x5"; "x6"; "x7"; - "x8"; "x9"; "x10"; "x11"; "x12"; "x13"; "x14"; "x15"; - "x19"; "x20"; "x21"; "x22"; "x23"; "x24"; - "x25"; "x26"; "x27"; "x28"; "x16"; "x17" |] + [| "x0"; "x1"; "x2"; "x3"; "x4"; "x5"; "x6"; "x7"; (* 0 - 7 *) + "x8"; "x9"; "x10"; "x11"; "x12"; "x13"; "x14"; "x15"; (* 8 - 15 *) + "x19"; "x20"; "x21"; "x22"; "x23"; "x24"; "x25"; (* 16 - 22 *) + "x26"; "x27"; "x28"; (* 23 - 25 *) + "x16"; "x17" |] (* 26 - 27 *) let float_reg_name = [| "d0"; "d1"; "d2"; "d3"; "d4"; "d5"; "d6"; "d7"; @@ -68,7 +68,7 @@ let register_class r = | Float -> 1 let num_available_registers = - [| 22; 32 |] (* first 22 int regs allocatable; all float regs allocatable *) + [| 23; 32 |] (* first 23 int regs allocatable; all float regs allocatable *) let first_available_register = [| 0; 100 |] @@ -270,16 +270,16 @@ let destroyed_at_reloadretaddr = [| |] (* Maximal register pressure *) let safe_register_pressure = function - | Iextcall _ -> 8 - | Ialloc _ -> 24 - | _ -> 25 + | Iextcall _ -> 7 + | Ialloc _ -> 22 + | _ -> 23 let max_register_pressure = function - | Iextcall _ -> [| 10; 8 |] - | Ialloc _ -> [| 24; 32 |] + | Iextcall _ -> [| 7; 8 |] (* 7 integer callee-saves, 8 FP callee-saves *) + | Ialloc _ -> [| 22; 32 |] | Iintoffloat | Ifloatofint - | Iload(Single, _) | Istore(Single, _, _) -> [| 25; 31 |] - | _ -> [| 25; 32 |] + | Iload(Single, _) | Istore(Single, _, _) -> [| 23; 31 |] + | _ -> [| 23; 32 |] (* Pure operations (without any side effect besides updating their result registers). *) diff --git a/backend/power/emit.mlp b/backend/power/emit.mlp index 40f9a104c48..d921b728d34 100644 --- a/backend/power/emit.mlp +++ b/backend/power/emit.mlp @@ -508,8 +508,8 @@ module BR = Branch_relaxation.Make (struct then load_store_size addr + 1 else load_store_size addr | Lop(Istore(_chunk, addr, _)) -> load_store_size addr - | Lop(Ialloc _) -> 4 - | Lop(Ispecific(Ialloc_far _)) -> 5 + | Lop(Ialloc _) -> 5 + | Lop(Ispecific(Ialloc_far _)) -> 6 | Lop(Iintop Imod) -> 3 | Lop(Iintop(Icomp _)) -> 4 | Lop(Icompf _) -> 5 @@ -550,6 +550,26 @@ module BR = Branch_relaxation.Make (struct let relax_intop_imm_checkbound ~bound:_ = assert false end) +(* Assembly code for inlined allocation *) + +let emit_alloc i bytes dbginfo far = + if !call_gc_label = 0 then call_gc_label := new_label (); + let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in + ` {emit_string lg} 0, {emit_int offset}(30)\n`; + ` addi 31, 31, {emit_int(-bytes)}\n`; + ` {emit_string cmplg} 31, 0\n`; + if not far then begin + ` bltl {emit_label !call_gc_label}\n`; + record_frame i.live (Dbg_alloc dbginfo); + ` addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n` + end else begin + let lbl = new_label() in + ` bge {emit_label lbl}\n`; + ` bl {emit_label !call_gc_label}\n`; + record_frame i.live (Dbg_alloc dbginfo); + `{emit_label lbl}: addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n` + end + (* Output the assembly code for an instruction *) let emit_instr i = @@ -782,22 +802,10 @@ let emit_instr i = | Single -> "stfs" | Double -> "stfd" in emit_load_store storeinstr addr i.arg 1 i.arg.(0) - | Lop(Ialloc { bytes = n; dbginfo }) -> - if !call_gc_label = 0 then call_gc_label := new_label (); - ` addi 31, 31, {emit_int(-n)}\n`; - ` {emit_string cmplg} 31, 30\n`; - ` bltl {emit_label !call_gc_label}\n`; - record_frame i.live (Dbg_alloc dbginfo); - ` addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n`; - | Lop(Ispecific(Ialloc_far { bytes = n; dbginfo })) -> - if !call_gc_label = 0 then call_gc_label := new_label (); - let lbl = new_label() in - ` addi 31, 31, {emit_int(-n)}\n`; - ` {emit_string cmplg} 31, 30\n`; - ` bge {emit_label lbl}\n`; - ` bl {emit_label !call_gc_label}\n`; - record_frame i.live (Dbg_alloc dbginfo); - `{emit_label lbl}: addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n` + | Lop(Ialloc { bytes; dbginfo }) -> + emit_alloc i bytes dbginfo false + | Lop(Ispecific(Ialloc_far { bytes; dbginfo })) -> + emit_alloc i bytes dbginfo true | Lop(Iintop Isub) -> (* subfc has swapped arguments *) ` subfc {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` | Lop(Iintop Imod) -> @@ -1009,8 +1017,8 @@ let emit_instr i = Domainstate.(idx_of_field Domain_backtrace_pos) in begin match abi with - | ELF32 -> ` stw 0, {emit_int (backtrace_pos * 8)}(28)\n` - | _ -> ` std 0, {emit_int (backtrace_pos * 8)}(28)\n` + | ELF32 -> ` stw 0, {emit_int (backtrace_pos * 8)}(30)\n` + | _ -> ` std 0, {emit_int (backtrace_pos * 8)}(30)\n` end; emit_call "caml_raise_exn"; record_frame Reg.Set.empty (Dbg_raise i.dbg); diff --git a/backend/power/proc.ml b/backend/power/proc.ml index 02e21b993cd..0de9daa9fed 100644 --- a/backend/power/proc.ml +++ b/backend/power/proc.ml @@ -35,10 +35,9 @@ let word_addressed = false 3 - 10 function arguments and results 11 - 12 temporaries 13 pointer to small data area - 14 - 27 general purpose, preserved by C - 28 domain state pointer + 14 - 28 general purpose, preserved by C 29 trap pointer - 30 allocation limit + 30 domain state pointer 31 allocation pointer Floating-point register map: 0 temporary @@ -47,9 +46,9 @@ let word_addressed = false *) let int_reg_name = - [| "3"; "4"; "5"; "6"; "7"; "8"; "9"; "10"; - "14"; "15"; "16"; "17"; "18"; "19"; "20"; "21"; - "22"; "23"; "24"; "25"; "26"; "27" |] + [| "3"; "4"; "5"; "6"; "7"; "8"; "9"; "10"; (* 0 - 7 *) + "14"; "15"; "16"; "17"; "18"; "19"; "20"; "21"; (* 8 - 15 *) + "22"; "23"; "24"; "25"; "26"; "27"; "28" |] (* 16 - 22 *) let float_reg_name = [| "1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; @@ -64,7 +63,7 @@ let register_class r = | Val | Int | Addr -> 0 | Float -> 1 -let num_available_registers = [| 22; 31 |] +let num_available_registers = [| 23; 31 |] let first_available_register = [| 0; 100 |] @@ -76,7 +75,7 @@ let rotate_registers = true (* Representation of hard registers by pseudo-registers *) let hard_int_reg = - let v = Array.make 22 Reg.dummy in + let v = Array.make 23 Reg.dummy in for i = 0 to 21 do v.(i) <- Reg.at_location Int (Reg i) done; v let hard_float_reg = @@ -315,11 +314,11 @@ let destroyed_at_reloadretaddr = [| phys_reg 11 |] let safe_register_pressure = function Iextcall _ -> 14 - | _ -> 22 + | _ -> 23 let max_register_pressure = function Iextcall _ -> [| 14; 18 |] - | _ -> [| 22; 30 |] + | _ -> [| 23; 30 |] (* Pure operations (without any side effect besides updating their result registers). *) diff --git a/backend/riscv/emit.mlp b/backend/riscv/emit.mlp index 809633a4cfb..1fd616c8cd6 100644 --- a/backend/riscv/emit.mlp +++ b/backend/riscv/emit.mlp @@ -82,12 +82,11 @@ let rodata_space = (* Names for special regs *) -let reg_tmp = phys_reg 22 +let reg_tmp = phys_reg 23 let reg_t2 = phys_reg 16 -let reg_domain_state_ptr = phys_reg 23 +let reg_domain_state_ptr = phys_reg 26 let reg_trap = phys_reg 24 let reg_alloc_ptr = phys_reg 25 -let reg_alloc_lim = phys_reg 26 (* Output a pseudo-register *) @@ -392,13 +391,15 @@ let emit_instr i = let lbl_after_alloc = new_label () in let lbl_call_gc = new_label () in let n = -bytes in + let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in if is_immediate n then ` addi {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, {emit_int n}\n` else begin ` li {emit_reg reg_tmp}, {emit_int n}\n`; ` add {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}\n` end; - ` bltu {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_lim}, {emit_label lbl_call_gc}\n`; + ` ld {emit_reg reg_tmp}, {emit_int offset}({emit_reg reg_domain_state_ptr})\n`; + ` bltu {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}, {emit_label lbl_call_gc}\n`; `{emit_label lbl_after_alloc}:\n`; ` addi {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, {emit_int size_addr}\n`; call_gc_sites := diff --git a/backend/riscv/proc.ml b/backend/riscv/proc.ml index a9f970eb714..ce72ee4b864 100644 --- a/backend/riscv/proc.ml +++ b/backend/riscv/proc.ml @@ -37,12 +37,12 @@ let word_addressed = false a0-a7 0-7 arguments/results s2-s9 8-15 arguments/results (preserved by C) t2-t6 16-20 temporary - t0 21 temporary - t1 22 temporary (used by code generator) - s0 23 domain pointer (preserved by C) + s0 21 general purpose (preserved by C) + t0 22 temporary + t1 23 temporary (used by code generator) s1 24 trap pointer (preserved by C) s10 25 allocation pointer (preserved by C) - s11 26 allocation limit (preserved by C) + s11 26 domain pointer (preserved by C) Floating-point register map --------------------------- @@ -66,11 +66,12 @@ let word_addressed = false *) let int_reg_name = - [| "a0"; "a1"; "a2"; "a3"; "a4"; "a5"; "a6"; "a7"; - "s2"; "s3"; "s4"; "s5"; "s6"; "s7"; "s8"; "s9"; - "t2"; "t3"; "t4"; "t5"; "t6"; - "t0"; "t1"; - "s0"; "s1"; "s10"; "s11" |] + [| "a0"; "a1"; "a2"; "a3"; "a4"; "a5"; "a6"; "a7"; (* 0 - 7 *) + "s2"; "s3"; "s4"; "s5"; "s6"; "s7"; "s8"; "s9"; (* 8 - 15 *) + "t2"; "t3"; "t4"; "t5"; "t6"; (* 16 - 20 *) + "s0"; (* 21 *) + "t0"; "t1"; (* 22 - 23 *) + "s1"; "s10"; "s11" |] (* 24 - 26 *) let float_reg_name = [| "ft0"; "ft1"; "ft2"; "ft3"; "ft4"; "ft5"; "ft6"; "ft7"; @@ -86,7 +87,7 @@ let register_class r = | Val | Int | Addr -> 0 | Float -> 1 -let num_available_registers = [| 22; 32 |] +let num_available_registers = [| 23; 32 |] let first_available_register = [| 0; 100 |] @@ -235,13 +236,13 @@ let regs_are_volatile _ = false let destroyed_at_c_call = (* s0-s11 and fs0-fs11 are callee-save *) Array.of_list(List.map phys_reg - [0; 1; 2; 3; 4; 5; 6; 7; 16; 17; 18; 19; 20; 21; + [0; 1; 2; 3; 4; 5; 6; 7; 16; 17; 18; 19; 20; 22; 100; 101; 102; 103; 104; 105; 106; 107; 110; 111; 112; 113; 114; 115; 116; 117; 128; 129; 130; 131]) let destroyed_at_alloc = - (* t0-t3 are used for PLT stubs *) - if !Clflags.dlcode then Array.map phys_reg [|16; 17; 18; 19; 20; 21|] + (* t0-t6 are used for PLT stubs *) + if !Clflags.dlcode then Array.map phys_reg [|16; 17; 18; 19; 20; 22|] else [| |] let destroyed_at_oper = function @@ -249,7 +250,7 @@ let destroyed_at_oper = function | Iop(Iextcall{alloc = false; _}) -> destroyed_at_c_call | Iop(Ialloc _) -> destroyed_at_alloc | Iop(Istore(Single, _, _)) -> [| phys_reg 100 |] - | Iswitch _ -> [| phys_reg 21 |] + | Iswitch _ -> [| phys_reg 22 |] (* t0 *) | _ -> [||] let destroyed_at_raise = all_phys_regs @@ -259,12 +260,12 @@ let destroyed_at_reloadretaddr = [| |] (* Maximal register pressure *) let safe_register_pressure = function - | Iextcall _ -> 15 - | _ -> 22 + | Iextcall _ -> 9 + | _ -> 23 let max_register_pressure = function - | Iextcall _ -> [| 15; 18 |] - | _ -> [| 22; 30 |] + | Iextcall _ -> [| 9; 12 |] + | _ -> [| 23; 30 |] (* Pure operations (without any side effect besides updating their result registers). *) @@ -293,8 +294,9 @@ let int_dwarf_reg_numbers = [| 10; 11; 12; 13; 14; 15; 16; 17; 18; 19; 20; 21; 22; 23; 24; 25; 7; 28; 29; 30; 31; + 8; 5; 6; - 8; 9; 26; 27; + 9; 26; 27; |] let float_dwarf_reg_numbers = diff --git a/ocaml/asmcomp/arm64/emit.mlp b/ocaml/asmcomp/arm64/emit.mlp index 7d01a8b047d..23bec3fde56 100644 --- a/ocaml/asmcomp/arm64/emit.mlp +++ b/ocaml/asmcomp/arm64/emit.mlp @@ -33,12 +33,11 @@ let fastcode_flag = ref true (* Names for special regs *) -let reg_domain_state_ptr = phys_reg 22 -let reg_trap_ptr = phys_reg 23 -let reg_alloc_ptr = phys_reg 24 -let reg_alloc_limit = phys_reg 25 -let reg_tmp1 = phys_reg 26 -let reg_x8 = phys_reg 8 +let reg_domain_state_ptr = phys_reg 25 (* x28 *) +let reg_trap_ptr = phys_reg 23 (* x26 *) +let reg_alloc_ptr = phys_reg 24 (* x27 *) +let reg_tmp1 = phys_reg 26 (* x16 *) +let reg_x8 = phys_reg 8 (* x8 *) (* Output a label *) @@ -504,10 +503,8 @@ module BR = Branch_relaxation.Make (struct | Lop (Iload (size, addr)) | Lop (Istore (size, addr, _)) -> let based = match addr with Iindexed _ -> 0 | Ibased _ -> 1 in based + begin match size with Single -> 2 | _ -> 1 end - | Lop (Ialloc {bytes = num_bytes}) when !fastcode_flag -> - if num_bytes <= 0xFFF then 4 else 5 - | Lop (Ispecific (Ifar_alloc {bytes = num_bytes})) when !fastcode_flag -> - if num_bytes <= 0xFFF then 5 else 6 + | Lop (Ialloc _) when !fastcode_flag -> 5 + | Lop (Ispecific (Ifar_alloc _)) when !fastcode_flag -> 6 | Lop (Ialloc { bytes = num_bytes; _ }) | Lop (Ispecific (Ifar_alloc { bytes = num_bytes; _ })) -> begin match num_bytes with @@ -593,8 +590,10 @@ let assembly_code_for_allocation i ~n ~far ~dbginfo = so it is reasonable to assume n < 0x1_000. This makes the generated code simpler. *) assert (16 <= n && n < 0x1_000 && n land 0x7 = 0); + let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in + ` ldr {emit_reg reg_tmp1}, [{emit_reg reg_domain_state_ptr}, #{emit_int offset}]\n`; ` sub {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, #{emit_int n}\n`; - ` cmp {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_limit}\n`; + ` cmp {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp1}\n`; if not far then begin ` b.lo {emit_label lbl_call_gc}\n` end else begin diff --git a/ocaml/asmcomp/arm64/proc.ml b/ocaml/asmcomp/arm64/proc.ml index 7635181a0a6..ac849a46cbb 100644 --- a/ocaml/asmcomp/arm64/proc.ml +++ b/ocaml/asmcomp/arm64/proc.ml @@ -33,11 +33,10 @@ let word_addressed = false x0 - x15 general purpose (caller-save) x16, x17 temporaries (used by call veeners) x18 platform register (reserved) - x19 - x24 general purpose (callee-save) - x25 domain state pointer + x19 - x25 general purpose (callee-save) x26 trap pointer x27 alloc pointer - x28 alloc limit + x28 domain state pointer x29 frame pointer x30 return address sp / xzr stack pointer / zero register @@ -48,10 +47,11 @@ let word_addressed = false *) let int_reg_name = - [| "x0"; "x1"; "x2"; "x3"; "x4"; "x5"; "x6"; "x7"; - "x8"; "x9"; "x10"; "x11"; "x12"; "x13"; "x14"; "x15"; - "x19"; "x20"; "x21"; "x22"; "x23"; "x24"; - "x25"; "x26"; "x27"; "x28"; "x16"; "x17" |] + [| "x0"; "x1"; "x2"; "x3"; "x4"; "x5"; "x6"; "x7"; (* 0 - 7 *) + "x8"; "x9"; "x10"; "x11"; "x12"; "x13"; "x14"; "x15"; (* 8 - 15 *) + "x19"; "x20"; "x21"; "x22"; "x23"; "x24"; "x25"; (* 16 - 22 *) + "x26"; "x27"; "x28"; (* 23 - 25 *) + "x16"; "x17" |] (* 26 - 27 *) let float_reg_name = [| "d0"; "d1"; "d2"; "d3"; "d4"; "d5"; "d6"; "d7"; @@ -67,7 +67,7 @@ let register_class r = | Float -> 1 let num_available_registers = - [| 22; 32 |] (* first 22 int regs allocatable; all float regs allocatable *) + [| 23; 32 |] (* first 23 int regs allocatable; all float regs allocatable *) let first_available_register = [| 0; 100 |] @@ -269,16 +269,16 @@ let destroyed_at_reloadretaddr = [| |] (* Maximal register pressure *) let safe_register_pressure = function - | Iextcall _ -> 8 - | Ialloc _ -> 24 - | _ -> 25 + | Iextcall _ -> 7 + | Ialloc _ -> 22 + | _ -> 23 let max_register_pressure = function - | Iextcall _ -> [| 10; 8 |] - | Ialloc _ -> [| 24; 32 |] + | Iextcall _ -> [| 7; 8 |] (* 7 integer callee-saves, 8 FP callee-saves *) + | Ialloc _ -> [| 22; 32 |] | Iintoffloat | Ifloatofint - | Iload(Single, _) | Istore(Single, _, _) -> [| 25; 31 |] - | _ -> [| 25; 32 |] + | Iload(Single, _) | Istore(Single, _, _) -> [| 23; 31 |] + | _ -> [| 23; 32 |] (* Pure operations (without any side effect besides updating their result registers). *) diff --git a/ocaml/asmcomp/power/emit.mlp b/ocaml/asmcomp/power/emit.mlp index 20d744a625d..681a7659b95 100644 --- a/ocaml/asmcomp/power/emit.mlp +++ b/ocaml/asmcomp/power/emit.mlp @@ -486,8 +486,8 @@ module BR = Branch_relaxation.Make (struct then load_store_size addr + 1 else load_store_size addr | Lop(Istore(_chunk, addr, _)) -> load_store_size addr - | Lop(Ialloc _) -> 4 - | Lop(Ispecific(Ialloc_far _)) -> 5 + | Lop(Ialloc _) -> 5 + | Lop(Ispecific(Ialloc_far _)) -> 6 | Lop(Iintop Imod) -> 3 | Lop(Iintop(Icomp _)) -> 4 | Lop(Iintop _) -> 1 @@ -524,6 +524,26 @@ module BR = Branch_relaxation.Make (struct let relax_intop_imm_checkbound ~bound:_ = assert false end) +(* Assembly code for inlined allocation *) + +let emit_alloc i bytes dbginfo far = + if !call_gc_label = 0 then call_gc_label := new_label (); + let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in + ` {emit_string lg} 0, {emit_int offset}(30)\n`; + ` addi 31, 31, {emit_int(-bytes)}\n`; + ` {emit_string cmplg} 31, 0\n`; + if not far then begin + ` bltl {emit_label !call_gc_label}\n`; + record_frame i.live (Dbg_alloc dbginfo); + ` addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n` + end else begin + let lbl = new_label() in + ` bge {emit_label lbl}\n`; + ` bl {emit_label !call_gc_label}\n`; + record_frame i.live (Dbg_alloc dbginfo); + `{emit_label lbl}: addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n` + end + (* Output the assembly code for an instruction *) let emit_instr i = @@ -754,22 +774,10 @@ let emit_instr i = | Single -> "stfs" | Double -> "stfd" in emit_load_store storeinstr addr i.arg 1 i.arg.(0) - | Lop(Ialloc { bytes = n; dbginfo }) -> - if !call_gc_label = 0 then call_gc_label := new_label (); - ` addi 31, 31, {emit_int(-n)}\n`; - ` {emit_string cmplg} 31, 30\n`; - ` bltl {emit_label !call_gc_label}\n`; - record_frame i.live (Dbg_alloc dbginfo); - ` addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n`; - | Lop(Ispecific(Ialloc_far { bytes = n; dbginfo })) -> - if !call_gc_label = 0 then call_gc_label := new_label (); - let lbl = new_label() in - ` addi 31, 31, {emit_int(-n)}\n`; - ` {emit_string cmplg} 31, 30\n`; - ` bge {emit_label lbl}\n`; - ` bl {emit_label !call_gc_label}\n`; - record_frame i.live (Dbg_alloc dbginfo); - `{emit_label lbl}: addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n` + | Lop(Ialloc { bytes; dbginfo }) -> + emit_alloc i bytes dbginfo false + | Lop(Ispecific(Ialloc_far { bytes; dbginfo })) -> + emit_alloc i bytes dbginfo true | Lop(Iintop Isub) -> (* subfc has swapped arguments *) ` subfc {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` | Lop(Iintop Imod) -> @@ -985,8 +993,8 @@ let emit_instr i = Domainstate.(idx_of_field Domain_backtrace_pos) in begin match abi with - | ELF32 -> ` stw 0, {emit_int (backtrace_pos * 8)}(28)\n` - | _ -> ` std 0, {emit_int (backtrace_pos * 8)}(28)\n` + | ELF32 -> ` stw 0, {emit_int (backtrace_pos * 8)}(30)\n` + | _ -> ` std 0, {emit_int (backtrace_pos * 8)}(30)\n` end; emit_call "caml_raise_exn"; record_frame Reg.Set.empty (Dbg_raise i.dbg); diff --git a/ocaml/asmcomp/power/proc.ml b/ocaml/asmcomp/power/proc.ml index eec140db38f..24f95ff62b8 100644 --- a/ocaml/asmcomp/power/proc.ml +++ b/ocaml/asmcomp/power/proc.ml @@ -34,10 +34,9 @@ let word_addressed = false 3 - 10 function arguments and results 11 - 12 temporaries 13 pointer to small data area - 14 - 27 general purpose, preserved by C - 28 domain state pointer + 14 - 28 general purpose, preserved by C 29 trap pointer - 30 allocation limit + 30 domain state pointer 31 allocation pointer Floating-point register map: 0 temporary @@ -46,9 +45,9 @@ let word_addressed = false *) let int_reg_name = - [| "3"; "4"; "5"; "6"; "7"; "8"; "9"; "10"; - "14"; "15"; "16"; "17"; "18"; "19"; "20"; "21"; - "22"; "23"; "24"; "25"; "26"; "27" |] + [| "3"; "4"; "5"; "6"; "7"; "8"; "9"; "10"; (* 0 - 7 *) + "14"; "15"; "16"; "17"; "18"; "19"; "20"; "21"; (* 8 - 15 *) + "22"; "23"; "24"; "25"; "26"; "27"; "28" |] (* 16 - 22 *) let float_reg_name = [| "1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; @@ -63,7 +62,7 @@ let register_class r = | Val | Int | Addr -> 0 | Float -> 1 -let num_available_registers = [| 22; 31 |] +let num_available_registers = [| 23; 31 |] let first_available_register = [| 0; 100 |] @@ -75,7 +74,7 @@ let rotate_registers = true (* Representation of hard registers by pseudo-registers *) let hard_int_reg = - let v = Array.make 22 Reg.dummy in + let v = Array.make 23 Reg.dummy in for i = 0 to 21 do v.(i) <- Reg.at_location Int (Reg i) done; v let hard_float_reg = @@ -314,11 +313,11 @@ let destroyed_at_reloadretaddr = [| phys_reg 11 |] let safe_register_pressure = function Iextcall _ -> 14 - | _ -> 22 + | _ -> 23 let max_register_pressure = function Iextcall _ -> [| 14; 18 |] - | _ -> [| 22; 30 |] + | _ -> [| 23; 30 |] (* Pure operations (without any side effect besides updating their result registers). *) diff --git a/ocaml/asmcomp/riscv/emit.mlp b/ocaml/asmcomp/riscv/emit.mlp index d8e694cf214..2f8582148d8 100644 --- a/ocaml/asmcomp/riscv/emit.mlp +++ b/ocaml/asmcomp/riscv/emit.mlp @@ -82,12 +82,11 @@ let rodata_space = (* Names for special regs *) -let reg_tmp = phys_reg 22 +let reg_tmp = phys_reg 23 let reg_t2 = phys_reg 16 -let reg_domain_state_ptr = phys_reg 23 +let reg_domain_state_ptr = phys_reg 26 let reg_trap = phys_reg 24 let reg_alloc_ptr = phys_reg 25 -let reg_alloc_lim = phys_reg 26 (* Output a pseudo-register *) @@ -374,13 +373,15 @@ let emit_instr i = let lbl_after_alloc = new_label () in let lbl_call_gc = new_label () in let n = -bytes in + let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in if is_immediate n then ` addi {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, {emit_int n}\n` else begin ` li {emit_reg reg_tmp}, {emit_int n}\n`; ` add {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}\n` end; - ` bltu {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_lim}, {emit_label lbl_call_gc}\n`; + ` ld {emit_reg reg_tmp}, {emit_int offset}({emit_reg reg_domain_state_ptr})\n`; + ` bltu {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}, {emit_label lbl_call_gc}\n`; `{emit_label lbl_after_alloc}:\n`; ` addi {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, {emit_int size_addr}\n`; call_gc_sites := diff --git a/ocaml/asmcomp/riscv/proc.ml b/ocaml/asmcomp/riscv/proc.ml index 4e30e02bf03..1b460b8e0ab 100644 --- a/ocaml/asmcomp/riscv/proc.ml +++ b/ocaml/asmcomp/riscv/proc.ml @@ -36,12 +36,12 @@ let word_addressed = false a0-a7 0-7 arguments/results s2-s9 8-15 arguments/results (preserved by C) t2-t6 16-20 temporary - t0 21 temporary - t1 22 temporary (used by code generator) - s0 23 domain pointer (preserved by C) + s0 21 general purpose (preserved by C) + t0 22 temporary + t1 23 temporary (used by code generator) s1 24 trap pointer (preserved by C) s10 25 allocation pointer (preserved by C) - s11 26 allocation limit (preserved by C) + s11 26 domain pointer (preserved by C) Floating-point register map --------------------------- @@ -65,11 +65,12 @@ let word_addressed = false *) let int_reg_name = - [| "a0"; "a1"; "a2"; "a3"; "a4"; "a5"; "a6"; "a7"; - "s2"; "s3"; "s4"; "s5"; "s6"; "s7"; "s8"; "s9"; - "t2"; "t3"; "t4"; "t5"; "t6"; - "t0"; "t1"; - "s0"; "s1"; "s10"; "s11" |] + [| "a0"; "a1"; "a2"; "a3"; "a4"; "a5"; "a6"; "a7"; (* 0 - 7 *) + "s2"; "s3"; "s4"; "s5"; "s6"; "s7"; "s8"; "s9"; (* 8 - 15 *) + "t2"; "t3"; "t4"; "t5"; "t6"; (* 16 - 20 *) + "s0"; (* 21 *) + "t0"; "t1"; (* 22 - 23 *) + "s1"; "s10"; "s11" |] (* 24 - 26 *) let float_reg_name = [| "ft0"; "ft1"; "ft2"; "ft3"; "ft4"; "ft5"; "ft6"; "ft7"; @@ -85,7 +86,7 @@ let register_class r = | Val | Int | Addr -> 0 | Float -> 1 -let num_available_registers = [| 22; 32 |] +let num_available_registers = [| 23; 32 |] let first_available_register = [| 0; 100 |] @@ -234,13 +235,13 @@ let regs_are_volatile _ = false let destroyed_at_c_call = (* s0-s11 and fs0-fs11 are callee-save *) Array.of_list(List.map phys_reg - [0; 1; 2; 3; 4; 5; 6; 7; 16; 17; 18; 19; 20; 21; + [0; 1; 2; 3; 4; 5; 6; 7; 16; 17; 18; 19; 20; 22; 100; 101; 102; 103; 104; 105; 106; 107; 110; 111; 112; 113; 114; 115; 116; 117; 128; 129; 130; 131]) let destroyed_at_alloc = - (* t0-t3 are used for PLT stubs *) - if !Clflags.dlcode then Array.map phys_reg [|16; 17; 18; 19; 20; 21|] + (* t0-t6 are used for PLT stubs *) + if !Clflags.dlcode then Array.map phys_reg [|16; 17; 18; 19; 20; 22|] else [| |] let destroyed_at_oper = function @@ -248,7 +249,7 @@ let destroyed_at_oper = function | Iop(Iextcall{alloc = false; _}) -> destroyed_at_c_call | Iop(Ialloc _) -> destroyed_at_alloc | Iop(Istore(Single, _, _)) -> [| phys_reg 100 |] - | Iswitch _ -> [| phys_reg 21 |] + | Iswitch _ -> [| phys_reg 22 |] (* t0 *) | _ -> [||] let destroyed_at_raise = all_phys_regs @@ -258,12 +259,12 @@ let destroyed_at_reloadretaddr = [| |] (* Maximal register pressure *) let safe_register_pressure = function - | Iextcall _ -> 15 - | _ -> 22 + | Iextcall _ -> 9 + | _ -> 23 let max_register_pressure = function - | Iextcall _ -> [| 15; 18 |] - | _ -> [| 22; 30 |] + | Iextcall _ -> [| 9; 12 |] + | _ -> [| 23; 30 |] (* Pure operations (without any side effect besides updating their result registers). *) @@ -292,8 +293,9 @@ let int_dwarf_reg_numbers = [| 10; 11; 12; 13; 14; 15; 16; 17; 18; 19; 20; 21; 22; 23; 24; 25; 7; 28; 29; 30; 31; + 8; 5; 6; - 8; 9; 26; 27; + 9; 26; 27; |] let float_dwarf_reg_numbers = diff --git a/ocaml/runtime/arm64.S b/ocaml/runtime/arm64.S index 30092c8d584..e9d2c12b292 100644 --- a/ocaml/runtime/arm64.S +++ b/ocaml/runtime/arm64.S @@ -20,10 +20,9 @@ /* Special registers */ -#define DOMAIN_STATE_PTR x25 +#define DOMAIN_STATE_PTR x28 #define TRAP_PTR x26 #define ALLOC_PTR x27 -#define ALLOC_LIMIT x28 #define ADDITIONAL_ARG x8 #define TMP x16 #define TMP2 x17 @@ -64,7 +63,7 @@ #include "../runtime/caml/domain_state.tbl" #undef DOMAIN_STATE -#define Caml_state(var) [x25, 8*domain_field_caml_##var] +#define Caml_state(var) [DOMAIN_STATE_PTR, 8*domain_field_caml_##var] /* Globals and labels */ #if defined(SYS_macosx) @@ -233,9 +232,8 @@ L(caml_call_gc): ldp d26, d27, [sp, 352] ldp d28, d29, [sp, 368] ldp d30, d31, [sp, 384] - /* Reload new allocation pointer and allocation limit */ + /* Reload new allocation pointer */ ldr ALLOC_PTR, Caml_state(young_ptr) - ldr ALLOC_LIMIT, Caml_state(young_limit) /* Free stack space and return to caller */ ldp x29, x30, [sp], 400 ret @@ -244,8 +242,9 @@ L(caml_call_gc): FUNCTION(caml_alloc1) CFI_STARTPROC + ldr TMP, Caml_state(young_limit) sub ALLOC_PTR, ALLOC_PTR, #16 - cmp ALLOC_PTR, ALLOC_LIMIT + cmp ALLOC_PTR, TMP b.lo L(caml_call_gc) ret CFI_ENDPROC @@ -253,8 +252,9 @@ FUNCTION(caml_alloc1) FUNCTION(caml_alloc2) CFI_STARTPROC + ldr TMP, Caml_state(young_limit) sub ALLOC_PTR, ALLOC_PTR, #24 - cmp ALLOC_PTR, ALLOC_LIMIT + cmp ALLOC_PTR, TMP b.lo L(caml_call_gc) ret CFI_ENDPROC @@ -262,8 +262,9 @@ FUNCTION(caml_alloc2) FUNCTION(caml_alloc3) CFI_STARTPROC + ldr TMP, Caml_state(young_limit) sub ALLOC_PTR, ALLOC_PTR, #32 - cmp ALLOC_PTR, ALLOC_LIMIT + cmp ALLOC_PTR, TMP b.lo L(caml_call_gc) ret CFI_ENDPROC @@ -271,8 +272,9 @@ FUNCTION(caml_alloc3) FUNCTION(caml_allocN) CFI_STARTPROC + ldr TMP, Caml_state(young_limit) sub ALLOC_PTR, ALLOC_PTR, ADDITIONAL_ARG - cmp ALLOC_PTR, ALLOC_LIMIT + cmp ALLOC_PTR, TMP b.lo L(caml_call_gc) ret CFI_ENDPROC @@ -295,9 +297,8 @@ FUNCTION(caml_c_call) str TRAP_PTR, Caml_state(exception_pointer) /* Call the function */ blr ADDITIONAL_ARG - /* Reload alloc ptr and alloc limit */ + /* Reload alloc ptr */ ldr ALLOC_PTR, Caml_state(young_ptr) - ldr ALLOC_LIMIT, Caml_state(young_limit) /* Return */ ret x19 CFI_ENDPROC @@ -346,9 +347,8 @@ L(jump_to_caml): stp x8, x9, [sp, -16]! CFI_ADJUST(16) add TRAP_PTR, sp, #0 - /* Reload allocation pointers */ + /* Reload allocation pointer */ ldr ALLOC_PTR, Caml_state(young_ptr) - ldr ALLOC_LIMIT, Caml_state(young_limit) /* Call the OCaml code */ blr TMP2 L(caml_retaddr): @@ -431,10 +431,9 @@ FUNCTION(caml_raise_exception) mov DOMAIN_STATE_PTR, C_ARG_1 /* Load the exception bucket */ mov x0, C_ARG_2 - /* Reload trap ptr, alloc ptr and alloc limit */ + /* Reload trap ptr and alloc ptr */ ldr TRAP_PTR, Caml_state(exception_pointer) ldr ALLOC_PTR, Caml_state(young_ptr) - ldr ALLOC_LIMIT, Caml_state(young_limit) /* Test if backtrace is active */ ldr TMP, Caml_state(backtrace_active) cbnz TMP, 2f diff --git a/ocaml/runtime/power.S b/ocaml/runtime/power.S index 1933a10ed99..4ca1a145381 100644 --- a/ocaml/runtime/power.S +++ b/ocaml/runtime/power.S @@ -23,9 +23,8 @@ #define C_CALL_FUN 25 #define C_CALL_TOC 26 #define C_CALL_RET_ADDR 27 -#define DOMAIN_STATE_PTR 28 #define TRAP_PTR 29 -#define ALLOC_LIMIT 30 +#define DOMAIN_STATE_PTR 30 #define ALLOC_PTR 31 #if defined(MODEL_ppc64) || defined(MODEL_ppc64le) @@ -149,7 +148,7 @@ #include "../runtime/caml/domain_state.tbl" #undef DOMAIN_STATE -#define Caml_state(var) 8*domain_field_caml_##var(28) +#define Caml_state(var) 8*domain_field_caml_##var(DOMAIN_STATE_PTR) #if defined(MODEL_ppc64) .section ".opd","aw" @@ -241,9 +240,8 @@ FUNCTION(caml_call_gc) #if defined(MODEL_ppc64) || defined(MODEL_ppc64le) nop #endif - /* Reload new allocation pointer and allocation limit */ + /* Reload new allocation pointer */ lg ALLOC_PTR, Caml_state(young_ptr) - lg ALLOC_LIMIT, Caml_state(young_limit) /* Restore all regs used by the code generator */ addi 11, 1, 8*32 + PARAM_SAVE_AREA + RESERVED_STACK - WORD lgu 3, WORD(11) @@ -349,9 +347,8 @@ FUNCTION(caml_c_call) #endif /* Restore return address (in 27, preserved by the C function) */ mtlr C_CALL_RET_ADDR - /* Reload allocation pointer and allocation limit*/ + /* Reload allocation pointer*/ lg ALLOC_PTR, Caml_state(young_ptr) - lg ALLOC_LIMIT, Caml_state(young_limit) /* Return to caller */ blr .cfi_endproc @@ -401,7 +398,6 @@ FUNCTION(caml_raise_exception) /* Reload OCaml global registers */ lg 1, Caml_state(exception_pointer) lg ALLOC_PTR, Caml_state(young_ptr) - lg ALLOC_LIMIT, Caml_state(young_limit) /* Pop trap frame */ lg 0, TRAP_HANDLER_OFFSET(1) mtctr 0 @@ -505,9 +501,8 @@ FUNCTION(caml_start_program) lg 11, Caml_state(exception_pointer) stg 11, TRAP_PREVIOUS_OFFSET(1) mr TRAP_PTR, 1 - /* Reload allocation pointers */ + /* Reload allocation pointer */ lg ALLOC_PTR, Caml_state(young_ptr) - lg ALLOC_LIMIT, Caml_state(young_limit) /* Call the OCaml code (address in r12) */ #if defined(MODEL_ppc) mtctr 12 diff --git a/ocaml/runtime/riscv.S b/ocaml/runtime/riscv.S index d3a5a794bd2..4e195f27a11 100644 --- a/ocaml/runtime/riscv.S +++ b/ocaml/runtime/riscv.S @@ -17,10 +17,9 @@ /* Must be preprocessed by cpp */ #define ARG_DOMAIN_STATE_PTR t0 -#define DOMAIN_STATE_PTR s0 +#define DOMAIN_STATE_PTR s11 #define TRAP_PTR s1 #define ALLOC_PTR s10 -#define ALLOC_LIMIT s11 #define TMP t1 #define ARG t2 @@ -34,7 +33,7 @@ #include "../runtime/caml/domain_state.tbl" #undef DOMAIN_STATE -#define Caml_state(var) (8*domain_field_caml_##var)(s0) +#define Caml_state(var) (8*domain_field_caml_##var)(DOMAIN_STATE_PTR) #define FUNCTION(name) \ .align 2; \ @@ -63,11 +62,11 @@ FUNCTION(caml_call_gc) /* Record lowest stack address */ STORE sp, Caml_state(bottom_of_stack) /* Set up stack space, saving return address */ - /* (1 reg for RA, 1 reg for FP, 22 allocatable int regs, + /* (1 reg for RA, 1 reg for FP, 23 allocatable int regs, 20 caller-save float regs) * 8 */ - addi sp, sp, -0x160 + /* + 1 for alignment */ + addi sp, sp, -0x170 STORE ra, 0x8(sp) - STORE s0, 0x0(sp) /* Save allocatable integer registers on the stack, in the order given in proc.ml */ STORE a0, 0x10(sp) @@ -91,29 +90,30 @@ FUNCTION(caml_call_gc) STORE t4, 0xa0(sp) STORE t5, 0xa8(sp) STORE t6, 0xb0(sp) - STORE t0, 0xb8(sp) + STORE s0, 0xb8(sp) + STORE t0, 0xc0(sp) /* Save caller-save floating-point registers on the stack (callee-saves are preserved by caml_garbage_collection) */ - fsd ft0, 0xc0(sp) - fsd ft1, 0xc8(sp) - fsd ft2, 0xd0(sp) - fsd ft3, 0xd8(sp) - fsd ft4, 0xe0(sp) - fsd ft5, 0xe8(sp) - fsd ft6, 0xf0(sp) - fsd ft7, 0xf8(sp) - fsd fa0, 0x100(sp) - fsd fa1, 0x108(sp) - fsd fa2, 0x110(sp) - fsd fa3, 0x118(sp) - fsd fa4, 0x120(sp) - fsd fa5, 0x128(sp) - fsd fa6, 0x130(sp) - fsd fa7, 0x138(sp) - fsd ft8, 0x140(sp) - fsd ft9, 0x148(sp) - fsd ft10, 0x150(sp) - fsd ft11, 0x158(sp) + fsd ft0, 0xd0(sp) + fsd ft1, 0xd8(sp) + fsd ft2, 0xe0(sp) + fsd ft3, 0xe8(sp) + fsd ft4, 0xf0(sp) + fsd ft5, 0xf8(sp) + fsd ft6, 0x100(sp) + fsd ft7, 0x108(sp) + fsd fa0, 0x110(sp) + fsd fa1, 0x118(sp) + fsd fa2, 0x120(sp) + fsd fa3, 0x128(sp) + fsd fa4, 0x130(sp) + fsd fa5, 0x138(sp) + fsd fa6, 0x140(sp) + fsd fa7, 0x148(sp) + fsd ft8, 0x150(sp) + fsd ft9, 0x158(sp) + fsd ft10, 0x160(sp) + fsd ft11, 0x168(sp) /* Store pointer to saved integer registers in caml_gc_regs */ addi TMP, sp, 0x10 STORE TMP, Caml_state(gc_regs) @@ -145,34 +145,33 @@ FUNCTION(caml_call_gc) LOAD t4, 0xa0(sp) LOAD t5, 0xa8(sp) LOAD t6, 0xb0(sp) - LOAD t0, 0xb8(sp) - fld ft0, 0xc0(sp) - fld ft1, 0xc8(sp) - fld ft2, 0xd0(sp) - fld ft3, 0xd8(sp) - fld ft4, 0xe0(sp) - fld ft5, 0xe8(sp) - fld ft6, 0xf0(sp) - fld ft7, 0xf8(sp) - fld fa0, 0x100(sp) - fld fa1, 0x108(sp) - fld fa2, 0x110(sp) - fld fa3, 0x118(sp) - fld fa4, 0x120(sp) - fld fa5, 0x128(sp) - fld fa6, 0x130(sp) - fld fa7, 0x138(sp) - fld ft8, 0x140(sp) - fld ft9, 0x148(sp) - fld ft10, 0x150(sp) - fld ft11, 0x158(sp) - /* Reload new allocation pointer and allocation limit */ + LOAD s0, 0xb8(sp) + LOAD t0, 0xc0(sp) + fld ft0, 0xd0(sp) + fld ft1, 0xd8(sp) + fld ft2, 0xe0(sp) + fld ft3, 0xe8(sp) + fld ft4, 0xf0(sp) + fld ft5, 0xf8(sp) + fld ft6, 0x100(sp) + fld ft7, 0x108(sp) + fld fa0, 0x110(sp) + fld fa1, 0x118(sp) + fld fa2, 0x120(sp) + fld fa3, 0x128(sp) + fld fa4, 0x130(sp) + fld fa5, 0x138(sp) + fld fa6, 0x140(sp) + fld fa7, 0x148(sp) + fld ft8, 0x150(sp) + fld ft9, 0x158(sp) + fld ft10, 0x160(sp) + fld ft11, 0x168(sp) + /* Reload new allocation pointer */ LOAD ALLOC_PTR, Caml_state(young_ptr) - LOAD ALLOC_LIMIT, Caml_state(young_limit) /* Free stack space and return to caller */ LOAD ra, 0x8(sp) - LOAD s0, 0x0(sp) - addi sp, sp, 0x160 + addi sp, sp, 0x170 ret .size caml_call_gc, .-caml_call_gc @@ -190,9 +189,8 @@ FUNCTION(caml_c_call) STORE TRAP_PTR, Caml_state(exception_pointer) /* Call the function */ jalr ARG - /* Reload alloc ptr and alloc limit */ + /* Reload alloc ptr */ LOAD ALLOC_PTR, Caml_state(young_ptr) - LOAD ALLOC_LIMIT, Caml_state(young_limit) /* Return */ jr s2 .size caml_c_call, .-caml_c_call @@ -231,7 +229,6 @@ FUNCTION(caml_raise_exception) mv a0, a1 LOAD TRAP_PTR, Caml_state(exception_pointer) LOAD ALLOC_PTR, Caml_state(young_ptr) - LOAD ALLOC_LIMIT, Caml_state(young_limit) LOAD TMP, Caml_state(backtrace_active) bnez TMP, 2f 1: /* Cut stack at current trap handler */ @@ -304,7 +301,6 @@ FUNCTION(caml_start_program) STORE TMP, 8(sp) mv TRAP_PTR, sp LOAD ALLOC_PTR, Caml_state(young_ptr) - LOAD ALLOC_LIMIT, Caml_state(young_limit) STORE x0, Caml_state(last_return_address) jalr ARG .Lcaml_retaddr: /* pop trap frame, restoring caml_exception_pointer */ diff --git a/ocaml/runtime/signals_nat.c b/ocaml/runtime/signals_nat.c index 8b64ab45263..1be1b45d420 100644 --- a/ocaml/runtime/signals_nat.c +++ b/ocaml/runtime/signals_nat.c @@ -99,13 +99,6 @@ DECLARE_SIGNAL_HANDLER(handle_signal) #endif if (sig < 0 || sig >= NSIG) return; caml_record_signal(sig); - /* Some ports cache [Caml_state->young_limit] in a register. - Use the signal context to modify that register too, but only if - we are inside OCaml code (not inside C code). */ -#if defined(CONTEXT_PC) && defined(CONTEXT_YOUNG_LIMIT) - if (caml_find_code_fragment_by_pc((char *) CONTEXT_PC) != NULL) - CONTEXT_YOUNG_LIMIT = (context_reg) Caml_state->young_limit; -#endif errno = saved_errno; } diff --git a/ocaml/runtime/signals_osdep.h b/ocaml/runtime/signals_osdep.h index 5b23bbf93ae..1fd7101d5a1 100644 --- a/ocaml/runtime/signals_osdep.h +++ b/ocaml/runtime/signals_osdep.h @@ -315,7 +315,6 @@ #define CONTEXT_STATE (CONTEXT_MCONTEXT->CONTEXT_REG(ss)) #define CONTEXT_PC (CONTEXT_STATE.CONTEXT_REG(srr0)) #define CONTEXT_EXCEPTION_POINTER (CONTEXT_STATE.CONTEXT_REG(r29)) - #define CONTEXT_YOUNG_LIMIT (CONTEXT_STATE.CONTEXT_REG(r30)) #define CONTEXT_YOUNG_PTR (CONTEXT_STATE.CONTEXT_REG(r31)) #define CONTEXT_SP (CONTEXT_STATE.CONTEXT_REG(r1)) #define CONTEXT_FAULTING_ADDRESS ((char *) info->si_addr) @@ -334,7 +333,6 @@ typedef unsigned long context_reg; #define CONTEXT_PC (context->regs->nip) #define CONTEXT_EXCEPTION_POINTER (context->regs->gpr[29]) - #define CONTEXT_YOUNG_LIMIT (context->regs->gpr[30]) #define CONTEXT_YOUNG_PTR (context->regs->gpr[31]) #define CONTEXT_SP (context->regs->gpr[1]) @@ -352,7 +350,6 @@ typedef unsigned long context_reg; #define CONTEXT_PC (context->uc_mcontext.gp_regs[32]) #define CONTEXT_EXCEPTION_POINTER (context->uc_mcontext.gp_regs[29]) - #define CONTEXT_YOUNG_LIMIT (context->uc_mcontext.gp_regs[30]) #define CONTEXT_YOUNG_PTR (context->uc_mcontext.gp_regs[31]) #define CONTEXT_SP (context->uc_mcontext.gp_regs[1]) #define CONTEXT_FAULTING_ADDRESS ((char *) info->si_addr) @@ -372,7 +369,6 @@ typedef long context_reg; #define CONTEXT_PC (_UC_MACHINE_PC(context)) #define CONTEXT_EXCEPTION_POINTER (context->uc_mcontext.__gregs[_REG_R29]) - #define CONTEXT_YOUNG_LIMIT (context->uc_mcontext.__gregs[_REG_R30]) #define CONTEXT_YOUNG_PTR (context->uc_mcontext.__gregs[_REG_R31]) #define CONTEXT_SP (_UC_MACHINE_SP(context)) #define CONTEXT_FAULTING_ADDRESS ((char *) info->si_addr) @@ -393,7 +389,6 @@ typedef unsigned long context_reg; #define CONTEXT_PC (context->sc_frame.srr0) #define CONTEXT_EXCEPTION_POINTER (context->sc_frame.fixreg[29]) - #define CONTEXT_YOUNG_LIMIT (context->sc_frame.fixreg[30]) #define CONTEXT_YOUNG_PTR (context->sc_frame.fixreg[31]) #define CONTEXT_SP (context->sc_frame.fixreg[1]) @@ -410,7 +405,6 @@ typedef unsigned long context_reg; #define CONTEXT_PC (context->uc_mcontext.psw.addr) #define CONTEXT_EXCEPTION_POINTER (context->uc_mcontext.gregs[13]) - #define CONTEXT_YOUNG_LIMIT (context->uc_mcontext.gregs[10]) #define CONTEXT_YOUNG_PTR (context->uc_mcontext.gregs[11]) #define CONTEXT_SP (context->uc_mcontext.gregs[15]) #define CONTEXT_FAULTING_ADDRESS ((char *) info->si_addr)