Skip to content

Disable stack checks by default #2404

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
May 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 36 additions & 34 deletions backend/amd64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -474,31 +474,44 @@ let emit_call_safety_errors () =
type stack_realloc = {
sc_label : Label.t; (* Label of the reallocation code. *)
sc_return : Label.t; (* Label to return to after reallocation. *)
sc_max_frame_size_in_bytes : int; (* Size for reallocation. *)
sc_size_in_bytes : int; (* Size for reallocation. *)
}

let stack_realloc = ref (None : stack_realloc option)
let stack_realloc = ref ([] : stack_realloc list)

let clear_stack_realloc () =
stack_realloc := None
stack_realloc := []

let emit_stack_realloc () =
begin match !stack_realloc with
| None -> ()
| Some { sc_label; sc_return; sc_max_frame_size_in_bytes; } -> begin
def_label sc_label;
(* Pass the desired frame size on the stack, since all of the
argument-passing registers may be in use.
Also serves to align the stack properly before the call *)
I.push (int (Config.stack_threshold + sc_max_frame_size_in_bytes / 8));
cfi_adjust_cfa_offset 8;
(* measured in words *)
emit_call (Cmm.global_symbol "caml_call_realloc_stack");
I.add (int 8) rsp;
cfi_adjust_cfa_offset (-8);
I.jmp (label sc_return)
end
end
List.iter
(fun { sc_label; sc_return; sc_size_in_bytes; } ->
def_label sc_label;
(* Pass the desired frame size on the stack, since all of the
argument-passing registers may be in use.
Also serves to align the stack properly before the call *)
I.push (int (Config.stack_threshold + sc_size_in_bytes / 8));
cfi_adjust_cfa_offset 8;
(* measured in words *)
emit_call (Cmm.global_symbol "caml_call_realloc_stack");
I.add (int 8) rsp;
cfi_adjust_cfa_offset (-8);
I.jmp (label sc_return))
!stack_realloc

let emit_stack_check ~size_in_bytes ~save_registers =
let overflow = new_label () and ret = new_label () in
let threshold_offset = Domainstate.stack_ctx_words * 8 + Stack_check.stack_threshold_size in
if save_registers then I.push r10;
I.lea (mem64 NONE (-(size_in_bytes + threshold_offset)) RSP) r10;
I.cmp (domain_field Domainstate.Domain_current_stack) r10;
if save_registers then I.pop r10;
I.jb (label overflow);
def_label ret;
stack_realloc := {
sc_label = overflow;
sc_return = ret;
sc_size_in_bytes = size_in_bytes;
} :: !stack_realloc

(* Record jump tables *)
type jump_table =
Expand Down Expand Up @@ -1832,20 +1845,7 @@ let emit_instr ~first ~fallthrough i =
I.jmp r11
end
| Lstackcheck { max_frame_size_bytes; } ->
let save_registers = not first in
let overflow = new_label () and ret = new_label () in
let threshold_offset = Domainstate.stack_ctx_words * 8 + Stack_check.stack_threshold_size in
if save_registers then I.push r10;
I.lea (mem64 NONE (-(max_frame_size_bytes + threshold_offset)) RSP) r10;
I.cmp (domain_field Domainstate.Domain_current_stack) r10;
if save_registers then I.pop r10;
I.jb (label overflow);
def_label ret;
stack_realloc := Some {
sc_label = overflow;
sc_return = ret;
sc_max_frame_size_in_bytes = max_frame_size_bytes;
}
emit_stack_check ~size_in_bytes:max_frame_size_bytes ~save_registers:(not first)

let rec emit_all ~first ~fallthrough i =
match i.desc with
Expand Down Expand Up @@ -1909,7 +1909,7 @@ let fundecl fundecl =
D.label (label_name (emit_symbol fundecl.fun_name));
emit_debug_info fundecl.fun_dbg;
cfi_startproc ();
if Config.runtime5 && !Clflags.runtime_variant = "d" then begin
if Config.runtime5 && (not Config.no_stack_checks) && !Clflags.runtime_variant = "d" then begin
emit_call (Cmm.global_symbol "caml_assert_stack_invariants");
end;
emit_all ~first:true ~fallthrough:true fundecl.fun_body;
Expand Down Expand Up @@ -2168,6 +2168,8 @@ let emit_probe_handler_wrapper p =
let padding = if ((wrapper_frame_size k) mod 16) = 0 then 0 else 8 in
let n = k + padding in
(* Allocate stack space *)
if Config.runtime5 && (not Config.no_stack_checks) && (n >= Stack_check.stack_threshold_size) then
emit_stack_check ~size_in_bytes:n ~save_registers:true;
emit_stack_offset n;
(* Save all live hard registers *)
let offset = aux_offset + tmp_offset + loc_offset in
Expand Down
14 changes: 8 additions & 6 deletions backend/cfg/cfg_stack_checks.ml
Original file line number Diff line number Diff line change
Expand Up @@ -217,11 +217,13 @@ let cfg (cfg_with_layout : Cfg_with_layout.t) =
| false -> cfg_with_layout
| true ->
let cfg = Cfg_with_layout.cfg cfg_with_layout in
let { max_frame_size; blocks_needing_stack_checks; max_instr_id } =
build_cfg_info cfg
in
if not (Label.Set.is_empty blocks_needing_stack_checks)
(if not Config.no_stack_checks
then
insert_stack_checks cfg ~max_frame_size ~blocks_needing_stack_checks
~max_instr_id;
let { max_frame_size; blocks_needing_stack_checks; max_instr_id } =
build_cfg_info cfg
in
if not (Label.Set.is_empty blocks_needing_stack_checks)
then
insert_stack_checks cfg ~max_frame_size ~blocks_needing_stack_checks
~max_instr_id);
cfg_with_layout
44 changes: 24 additions & 20 deletions backend/emitaux.ml
Original file line number Diff line number Diff line change
Expand Up @@ -569,24 +569,28 @@ let preproc_stack_check ~fun_body ~frame_size ~trap_size =
loop fun_body frame_size frame_size false

let add_stack_checks_if_needed (fundecl : Linear.fundecl) ~stack_offset ~stack_threshold_size ~trap_size =
let frame_size =
Proc.frame_size ~stack_offset
~num_stack_slots:fundecl.fun_num_stack_slots
~contains_calls:fundecl.fun_contains_calls
in
let { max_frame_size; contains_nontail_calls } =
preproc_stack_check ~fun_body:fundecl.fun_body ~frame_size ~trap_size
in
let insert_stack_check =
contains_nontail_calls || max_frame_size >= stack_threshold_size
in
if insert_stack_check
then
let fun_body =
Linear.instr_cons
(Lstackcheck { max_frame_size_bytes = max_frame_size })
[||] [||] ~available_before:fundecl.fun_body.available_before
~available_across:fundecl.fun_body.available_across fundecl.fun_body
if Config.no_stack_checks then
fundecl
else begin
let frame_size =
Proc.frame_size ~stack_offset
~num_stack_slots:fundecl.fun_num_stack_slots
~contains_calls:fundecl.fun_contains_calls
in
let { max_frame_size; contains_nontail_calls } =
preproc_stack_check ~fun_body:fundecl.fun_body ~frame_size ~trap_size
in
{ fundecl with fun_body }
else fundecl
let insert_stack_check =
contains_nontail_calls || max_frame_size >= stack_threshold_size
in
if insert_stack_check
then
let fun_body =
Linear.instr_cons
(Lstackcheck { max_frame_size_bytes = max_frame_size })
[||] [||] ~available_before:fundecl.fun_body.available_before
~available_across:fundecl.fun_body.available_across fundecl.fun_body
in
{ fundecl with fun_body }
else fundecl
end
23 changes: 23 additions & 0 deletions ocaml/configure

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions ocaml/configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,15 @@ AS_IF([test x"$enable_runtime5" = xyes],
[runtime_suffix=],
[runtime_suffix=4])

AC_ARG_ENABLE([stack_checks],
[AS_HELP_STRING([--enable-stack_checks],
[Enable stack checks])])

## Output variables

AC_SUBST([enable_runtime5])
AC_SUBST([runtime_suffix])
AC_SUBST([enable_stack_checks])
AC_SUBST([CONFIGURE_ARGS])
AC_SUBST([native_compiler])
AC_SUBST([default_build_target])
Expand Down Expand Up @@ -1344,6 +1349,13 @@ AS_CASE([$host],
[has_native_backend=yes; arch=riscv; model=riscv64; system=linux]
)

# Disabling of stack checks is only supported on amd64.
AS_IF([test x"$enable_stack_checks" = xyes],
[AC_DEFINE([STACK_CHECKS_ENABLED])],
[AS_IF([test x"$arch" != xamd64],
[AC_DEFINE([STACK_CHECKS_ENABLED])],
[])])

native_cflags=''
native_cppflags="-DTARGET_${arch} -DMODEL_${model} -DSYS_${system}"

Expand Down
15 changes: 14 additions & 1 deletion ocaml/otherlibs/systhreads/st_stubs.c
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,19 @@ static void caml_thread_leave_blocking_section(void)
restore_runtime_state(th);
}

static int get_pthreads_stack_size(void)
{
pthread_attr_t attr;
size_t res =
// default value, retrieved from a recent system (May 2024)
8388608;
if (pthread_attr_init(&attr) == 0) {
pthread_attr_getstacksize(&attr, &res);
}
pthread_attr_destroy(&attr);
return res;
}

/* Create and setup a new thread info block.
This block has no associated thread descriptor and
is not inserted in the list of threads. */
Expand All @@ -362,7 +375,7 @@ static caml_thread_t caml_thread_new_info(void)
{
caml_thread_t th;
caml_domain_state *domain_state;
uintnat stack_wsize = caml_get_init_stack_wsize();
uintnat stack_wsize = caml_get_init_stack_wsize(get_pthreads_stack_size());

domain_state = Caml_state;
th = NULL;
Expand Down
36 changes: 26 additions & 10 deletions ocaml/runtime/amd64.S
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,28 @@ LBL(caml_call_gc):
CFI_ENDPROC
ENDFUNCTION(G(caml_call_gc))

FUNCTION(G(caml_raise_stack_overflow_nat))
CFI_STARTPROC
CFI_SIGNAL_FRAME
ENTER_FUNCTION
SAVE_ALL_REGS
movq %r15, Caml_state(gc_regs)
/* We assume that we are always on the OCaml stack at this point.
The SIGSEGV handler doesn't actually check that the faulting
address lies in OCaml code, so it seems theoretically possible
for the code here to be reached via a segfault from C, but
in practice this seems most unlikely. The current situation will
suffice as this patch is only a temporary measure in any case. */
SWITCH_OCAML_TO_C
C_call (GCALL(caml_raise_stack_overflow))
SWITCH_C_TO_OCAML
movq Caml_state(gc_regs), %r15
RESTORE_ALL_REGS
LEAVE_FUNCTION
ret
CFI_ENDPROC
ENDFUNCTION(G(caml_raise_stack_overflow_nat))

FUNCTION(G(caml_alloc1))
CFI_STARTPROC
ENTER_FUNCTION
Expand Down Expand Up @@ -800,16 +822,10 @@ LBL(117):
movq %rax, %r12 /* Save exception bucket */
movq Caml_state(c_stack), %rsp
movq %rax, C_ARG_1 /* arg 1: exception bucket */
#ifdef WITH_FRAME_POINTERS
movq 8(%r10), C_ARG_2 /* arg 2: pc of raise */
leaq 16(%r10), C_ARG_3 /* arg 3: sp at raise */
#else
movq (%r10), C_ARG_2 /* arg 2: pc of raise */
leaq 8(%r10), C_ARG_3 /* arg 3: sp at raise */
#endif
movq Caml_state(exn_handler), C_ARG_4
/* arg 4: sp of handler */
C_call (GCALL(caml_stash_backtrace))
movq %r10, C_ARG_2 /* arg 2: passed rsp */
movq Caml_state(exn_handler), C_ARG_3
/* arg 3: sp of handler */
C_call (GCALL(caml_stash_backtrace_wrapper))
movq %r12, %rax /* Recover exception bucket */
RESTORE_EXN_HANDLER_OCAML
ret
Expand Down
29 changes: 29 additions & 0 deletions ocaml/runtime/backtrace_nat.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "caml/alloc.h"
#include "caml/backtrace.h"
Expand Down Expand Up @@ -132,6 +133,34 @@ void caml_stash_backtrace(value exn, uintnat pc, char * sp, char* trapsp)
}
}

void caml_stash_backtrace_wrapper(value exn, char* rsp, char* trapsp)
{
#if defined(NATIVE_CODE) && !defined(STACK_CHECKS_ENABLED)
/* If we get an rsp that lies in the guard page, just do nothing - using rsp
* would trigger another segfault, and we are probably in the process of
* raising the exception from a segfault. In any case this behaviour seems
* consistent with runtime4, where no backtrace appears to be available at
* this point. */
struct stack_info *block = Caml_state->current_stack;
int page_size = getpagesize();
char* protected_low = Protected_stack_page(block, page_size);
char* protected_high = protected_low + page_size;
if ((rsp >= protected_low) && (rsp < protected_high)) {
return;
}
#endif
char* pc;
char* sp;
#ifdef WITH_FRAME_POINTERS
pc = rsp + 8;
sp = rsp + 16;
#else
pc = rsp;
sp = rsp + 8;
#endif
caml_stash_backtrace(exn, *((uintnat*) pc), sp, trapsp);
}

/* minimum size to allocate a backtrace (in slots) */
#define MIN_BACKTRACE_SIZE 16

Expand Down
Loading
Loading