Skip to content

Commit 096ffdc

Browse files
xclercmshinwell
andauthored
Disable stack checks by default (#2404)
* Disable stack checks by default. * Actually disable the checks. * Review * Make the build fail if stack checks are disabled but POSIX signals are not available. * Abort if trying to reallocate the stack when stack checks are disabled. * Emit stack checks for probes (#2539) * Force stack check to be enabled on arm64. * Make sure mmap and others are available when stack checks are disabled. * Correct arm64 check * Long line * Increase main stack size in case of getrlimit failure * Missing Wsize_bsize * Don't assume mmap returns a page-aligned block. * Ensure stack check are properly disabled on the non-CFG path. * Cosmetic changes; remove unnecessary label * Stack computations * Fix build. * Fix build. --------- Co-authored-by: Mark Shinwell <[email protected]>
1 parent db1fc1e commit 096ffdc

21 files changed

+395
-79
lines changed

backend/amd64/emit.mlp

Lines changed: 36 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -474,31 +474,44 @@ let emit_call_safety_errors () =
474474
type stack_realloc = {
475475
sc_label : Label.t; (* Label of the reallocation code. *)
476476
sc_return : Label.t; (* Label to return to after reallocation. *)
477-
sc_max_frame_size_in_bytes : int; (* Size for reallocation. *)
477+
sc_size_in_bytes : int; (* Size for reallocation. *)
478478
}
479479

480-
let stack_realloc = ref (None : stack_realloc option)
480+
let stack_realloc = ref ([] : stack_realloc list)
481481

482482
let clear_stack_realloc () =
483-
stack_realloc := None
483+
stack_realloc := []
484484

485485
let emit_stack_realloc () =
486-
begin match !stack_realloc with
487-
| None -> ()
488-
| Some { sc_label; sc_return; sc_max_frame_size_in_bytes; } -> begin
489-
def_label sc_label;
490-
(* Pass the desired frame size on the stack, since all of the
491-
argument-passing registers may be in use.
492-
Also serves to align the stack properly before the call *)
493-
I.push (int (Config.stack_threshold + sc_max_frame_size_in_bytes / 8));
494-
cfi_adjust_cfa_offset 8;
495-
(* measured in words *)
496-
emit_call (Cmm.global_symbol "caml_call_realloc_stack");
497-
I.add (int 8) rsp;
498-
cfi_adjust_cfa_offset (-8);
499-
I.jmp (label sc_return)
500-
end
501-
end
486+
List.iter
487+
(fun { sc_label; sc_return; sc_size_in_bytes; } ->
488+
def_label sc_label;
489+
(* Pass the desired frame size on the stack, since all of the
490+
argument-passing registers may be in use.
491+
Also serves to align the stack properly before the call *)
492+
I.push (int (Config.stack_threshold + sc_size_in_bytes / 8));
493+
cfi_adjust_cfa_offset 8;
494+
(* measured in words *)
495+
emit_call (Cmm.global_symbol "caml_call_realloc_stack");
496+
I.add (int 8) rsp;
497+
cfi_adjust_cfa_offset (-8);
498+
I.jmp (label sc_return))
499+
!stack_realloc
500+
501+
let emit_stack_check ~size_in_bytes ~save_registers =
502+
let overflow = new_label () and ret = new_label () in
503+
let threshold_offset = Domainstate.stack_ctx_words * 8 + Stack_check.stack_threshold_size in
504+
if save_registers then I.push r10;
505+
I.lea (mem64 NONE (-(size_in_bytes + threshold_offset)) RSP) r10;
506+
I.cmp (domain_field Domainstate.Domain_current_stack) r10;
507+
if save_registers then I.pop r10;
508+
I.jb (label overflow);
509+
def_label ret;
510+
stack_realloc := {
511+
sc_label = overflow;
512+
sc_return = ret;
513+
sc_size_in_bytes = size_in_bytes;
514+
} :: !stack_realloc
502515

503516
(* Record jump tables *)
504517
type jump_table =
@@ -1866,20 +1879,7 @@ let emit_instr ~first ~fallthrough i =
18661879
I.jmp r11
18671880
end
18681881
| Lstackcheck { max_frame_size_bytes; } ->
1869-
let save_registers = not first in
1870-
let overflow = new_label () and ret = new_label () in
1871-
let threshold_offset = Domainstate.stack_ctx_words * 8 + Stack_check.stack_threshold_size in
1872-
if save_registers then I.push r10;
1873-
I.lea (mem64 NONE (-(max_frame_size_bytes + threshold_offset)) RSP) r10;
1874-
I.cmp (domain_field Domainstate.Domain_current_stack) r10;
1875-
if save_registers then I.pop r10;
1876-
I.jb (label overflow);
1877-
def_label ret;
1878-
stack_realloc := Some {
1879-
sc_label = overflow;
1880-
sc_return = ret;
1881-
sc_max_frame_size_in_bytes = max_frame_size_bytes;
1882-
}
1882+
emit_stack_check ~size_in_bytes:max_frame_size_bytes ~save_registers:(not first)
18831883

18841884
let rec emit_all ~first ~fallthrough i =
18851885
match i.desc with
@@ -1943,7 +1943,7 @@ let fundecl fundecl =
19431943
D.label (label_name (emit_symbol fundecl.fun_name));
19441944
emit_debug_info fundecl.fun_dbg;
19451945
cfi_startproc ();
1946-
if Config.runtime5 && !Clflags.runtime_variant = "d" then begin
1946+
if Config.runtime5 && (not Config.no_stack_checks) && !Clflags.runtime_variant = "d" then begin
19471947
emit_call (Cmm.global_symbol "caml_assert_stack_invariants");
19481948
end;
19491949
emit_all ~first:true ~fallthrough:true fundecl.fun_body;
@@ -2209,6 +2209,8 @@ let emit_probe_handler_wrapper p =
22092209
let padding = if ((wrapper_frame_size k) mod 16) = 0 then 0 else 8 in
22102210
let n = k + padding in
22112211
(* Allocate stack space *)
2212+
if Config.runtime5 && (not Config.no_stack_checks) && (n >= Stack_check.stack_threshold_size) then
2213+
emit_stack_check ~size_in_bytes:n ~save_registers:true;
22122214
emit_stack_offset n;
22132215
(* Save all live hard registers *)
22142216
let offset = aux_offset + tmp_offset + loc_offset in

backend/cfg/cfg_stack_checks.ml

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -217,11 +217,13 @@ let cfg (cfg_with_layout : Cfg_with_layout.t) =
217217
| false -> cfg_with_layout
218218
| true ->
219219
let cfg = Cfg_with_layout.cfg cfg_with_layout in
220-
let { max_frame_size; blocks_needing_stack_checks; max_instr_id } =
221-
build_cfg_info cfg
222-
in
223-
if not (Label.Set.is_empty blocks_needing_stack_checks)
220+
(if not Config.no_stack_checks
224221
then
225-
insert_stack_checks cfg ~max_frame_size ~blocks_needing_stack_checks
226-
~max_instr_id;
222+
let { max_frame_size; blocks_needing_stack_checks; max_instr_id } =
223+
build_cfg_info cfg
224+
in
225+
if not (Label.Set.is_empty blocks_needing_stack_checks)
226+
then
227+
insert_stack_checks cfg ~max_frame_size ~blocks_needing_stack_checks
228+
~max_instr_id);
227229
cfg_with_layout

backend/emitaux.ml

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -569,24 +569,28 @@ let preproc_stack_check ~fun_body ~frame_size ~trap_size =
569569
loop fun_body frame_size frame_size false
570570

571571
let add_stack_checks_if_needed (fundecl : Linear.fundecl) ~stack_offset ~stack_threshold_size ~trap_size =
572-
let frame_size =
573-
Proc.frame_size ~stack_offset
574-
~num_stack_slots:fundecl.fun_num_stack_slots
575-
~contains_calls:fundecl.fun_contains_calls
576-
in
577-
let { max_frame_size; contains_nontail_calls } =
578-
preproc_stack_check ~fun_body:fundecl.fun_body ~frame_size ~trap_size
579-
in
580-
let insert_stack_check =
581-
contains_nontail_calls || max_frame_size >= stack_threshold_size
582-
in
583-
if insert_stack_check
584-
then
585-
let fun_body =
586-
Linear.instr_cons
587-
(Lstackcheck { max_frame_size_bytes = max_frame_size })
588-
[||] [||] ~available_before:fundecl.fun_body.available_before
589-
~available_across:fundecl.fun_body.available_across fundecl.fun_body
572+
if Config.no_stack_checks then
573+
fundecl
574+
else begin
575+
let frame_size =
576+
Proc.frame_size ~stack_offset
577+
~num_stack_slots:fundecl.fun_num_stack_slots
578+
~contains_calls:fundecl.fun_contains_calls
579+
in
580+
let { max_frame_size; contains_nontail_calls } =
581+
preproc_stack_check ~fun_body:fundecl.fun_body ~frame_size ~trap_size
590582
in
591-
{ fundecl with fun_body }
592-
else fundecl
583+
let insert_stack_check =
584+
contains_nontail_calls || max_frame_size >= stack_threshold_size
585+
in
586+
if insert_stack_check
587+
then
588+
let fun_body =
589+
Linear.instr_cons
590+
(Lstackcheck { max_frame_size_bytes = max_frame_size })
591+
[||] [||] ~available_before:fundecl.fun_body.available_before
592+
~available_across:fundecl.fun_body.available_across fundecl.fun_body
593+
in
594+
{ fundecl with fun_body }
595+
else fundecl
596+
end

ocaml/configure

Lines changed: 23 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ocaml/configure.ac

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,15 @@ AS_IF([test x"$enable_runtime5" = xyes],
9797
[runtime_suffix=],
9898
[runtime_suffix=4])
9999

100+
AC_ARG_ENABLE([stack_checks],
101+
[AS_HELP_STRING([--enable-stack_checks],
102+
[Enable stack checks])])
103+
100104
## Output variables
101105

102106
AC_SUBST([enable_runtime5])
103107
AC_SUBST([runtime_suffix])
108+
AC_SUBST([enable_stack_checks])
104109
AC_SUBST([CONFIGURE_ARGS])
105110
AC_SUBST([native_compiler])
106111
AC_SUBST([default_build_target])
@@ -1344,6 +1349,13 @@ AS_CASE([$host],
13441349
[has_native_backend=yes; arch=riscv; model=riscv64; system=linux]
13451350
)
13461351

1352+
# Disabling of stack checks is only supported on amd64.
1353+
AS_IF([test x"$enable_stack_checks" = xyes],
1354+
[AC_DEFINE([STACK_CHECKS_ENABLED])],
1355+
[AS_IF([test x"$arch" != xamd64],
1356+
[AC_DEFINE([STACK_CHECKS_ENABLED])],
1357+
[])])
1358+
13471359
native_cflags=''
13481360
native_cppflags="-DTARGET_${arch} -DMODEL_${model} -DSYS_${system}"
13491361

ocaml/otherlibs/systhreads/st_stubs.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,19 @@ static void caml_thread_leave_blocking_section(void)
354354
restore_runtime_state(th);
355355
}
356356

357+
static int get_pthreads_stack_size(void)
358+
{
359+
pthread_attr_t attr;
360+
size_t res =
361+
// default value, retrieved from a recent system (May 2024)
362+
8388608;
363+
if (pthread_attr_init(&attr) == 0) {
364+
pthread_attr_getstacksize(&attr, &res);
365+
}
366+
pthread_attr_destroy(&attr);
367+
return res;
368+
}
369+
357370
/* Create and setup a new thread info block.
358371
This block has no associated thread descriptor and
359372
is not inserted in the list of threads. */
@@ -362,7 +375,7 @@ static caml_thread_t caml_thread_new_info(void)
362375
{
363376
caml_thread_t th;
364377
caml_domain_state *domain_state;
365-
uintnat stack_wsize = caml_get_init_stack_wsize();
378+
uintnat stack_wsize = caml_get_init_stack_wsize(get_pthreads_stack_size());
366379

367380
domain_state = Caml_state;
368381
th = NULL;

ocaml/runtime/amd64.S

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,28 @@ LBL(caml_call_gc):
545545
CFI_ENDPROC
546546
ENDFUNCTION(G(caml_call_gc))
547547

548+
FUNCTION(G(caml_raise_stack_overflow_nat))
549+
CFI_STARTPROC
550+
CFI_SIGNAL_FRAME
551+
ENTER_FUNCTION
552+
SAVE_ALL_REGS
553+
movq %r15, Caml_state(gc_regs)
554+
/* We assume that we are always on the OCaml stack at this point.
555+
The SIGSEGV handler doesn't actually check that the faulting
556+
address lies in OCaml code, so it seems theoretically possible
557+
for the code here to be reached via a segfault from C, but
558+
in practice this seems most unlikely. The current situation will
559+
suffice as this patch is only a temporary measure in any case. */
560+
SWITCH_OCAML_TO_C
561+
C_call (GCALL(caml_raise_stack_overflow))
562+
SWITCH_C_TO_OCAML
563+
movq Caml_state(gc_regs), %r15
564+
RESTORE_ALL_REGS
565+
LEAVE_FUNCTION
566+
ret
567+
CFI_ENDPROC
568+
ENDFUNCTION(G(caml_raise_stack_overflow_nat))
569+
548570
FUNCTION(G(caml_alloc1))
549571
CFI_STARTPROC
550572
ENTER_FUNCTION
@@ -800,16 +822,10 @@ LBL(117):
800822
movq %rax, %r12 /* Save exception bucket */
801823
movq Caml_state(c_stack), %rsp
802824
movq %rax, C_ARG_1 /* arg 1: exception bucket */
803-
#ifdef WITH_FRAME_POINTERS
804-
movq 8(%r10), C_ARG_2 /* arg 2: pc of raise */
805-
leaq 16(%r10), C_ARG_3 /* arg 3: sp at raise */
806-
#else
807-
movq (%r10), C_ARG_2 /* arg 2: pc of raise */
808-
leaq 8(%r10), C_ARG_3 /* arg 3: sp at raise */
809-
#endif
810-
movq Caml_state(exn_handler), C_ARG_4
811-
/* arg 4: sp of handler */
812-
C_call (GCALL(caml_stash_backtrace))
825+
movq %r10, C_ARG_2 /* arg 2: passed rsp */
826+
movq Caml_state(exn_handler), C_ARG_3
827+
/* arg 3: sp of handler */
828+
C_call (GCALL(caml_stash_backtrace_wrapper))
813829
movq %r12, %rax /* Recover exception bucket */
814830
RESTORE_EXN_HANDLER_OCAML
815831
ret

ocaml/runtime/backtrace_nat.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <stdio.h>
2121
#include <stdlib.h>
2222
#include <string.h>
23+
#include <unistd.h>
2324

2425
#include "caml/alloc.h"
2526
#include "caml/backtrace.h"
@@ -132,6 +133,34 @@ void caml_stash_backtrace(value exn, uintnat pc, char * sp, char* trapsp)
132133
}
133134
}
134135

136+
void caml_stash_backtrace_wrapper(value exn, char* rsp, char* trapsp)
137+
{
138+
#if defined(NATIVE_CODE) && !defined(STACK_CHECKS_ENABLED)
139+
/* If we get an rsp that lies in the guard page, just do nothing - using rsp
140+
* would trigger another segfault, and we are probably in the process of
141+
* raising the exception from a segfault. In any case this behaviour seems
142+
* consistent with runtime4, where no backtrace appears to be available at
143+
* this point. */
144+
struct stack_info *block = Caml_state->current_stack;
145+
int page_size = getpagesize();
146+
char* protected_low = Protected_stack_page(block, page_size);
147+
char* protected_high = protected_low + page_size;
148+
if ((rsp >= protected_low) && (rsp < protected_high)) {
149+
return;
150+
}
151+
#endif
152+
char* pc;
153+
char* sp;
154+
#ifdef WITH_FRAME_POINTERS
155+
pc = rsp + 8;
156+
sp = rsp + 16;
157+
#else
158+
pc = rsp;
159+
sp = rsp + 8;
160+
#endif
161+
caml_stash_backtrace(exn, *((uintnat*) pc), sp, trapsp);
162+
}
163+
135164
/* minimum size to allocate a backtrace (in slots) */
136165
#define MIN_BACKTRACE_SIZE 16
137166

0 commit comments

Comments
 (0)