Skip to content

Commit 24e132e

Browse files
committed
s390x: Add support for inline probestack
This implements the gen_inline_probestack callback in line with the implementation on other platforms, which allows detection of stack overflows with misconfigured hosts. Fixes: #9396
1 parent cef7774 commit 24e132e

File tree

8 files changed

+205
-19
lines changed

8 files changed

+205
-19
lines changed

cranelift/codegen/src/isa/s390x/abi.rs

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -648,12 +648,56 @@ impl ABIMachineSpec for S390xMachineDeps {
648648
}
649649

650650
fn gen_inline_probestack(
651-
_insts: &mut SmallInstVec<Self::I>,
651+
insts: &mut SmallInstVec<Self::I>,
652652
_call_conv: isa::CallConv,
653-
_frame_size: u32,
654-
_guard_size: u32,
653+
frame_size: u32,
654+
guard_size: u32,
655655
) {
656-
unimplemented!("Inline stack probing is unimplemented on S390x");
656+
// Number of probes that we need to perform
657+
let probe_count = align_to(frame_size, guard_size) / guard_size;
658+
659+
// The stack probe loop currently takes 4 instructions and each unrolled
660+
// probe takes 2. Set this to 2 to keep the max size to 4 instructions.
661+
const PROBE_MAX_UNROLL: u32 = 2;
662+
663+
if probe_count <= PROBE_MAX_UNROLL {
664+
// Unrolled probe loop.
665+
for _ in 0..probe_count {
666+
insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));
667+
668+
insts.push(Inst::StoreImm8 {
669+
imm: 0,
670+
mem: MemArg::reg(stack_reg(), MemFlags::trusted()),
671+
});
672+
}
673+
} else {
674+
// Explicit probe loop.
675+
676+
// Load the number of probes into a register used as loop counter.
677+
// `gen_inline_probestack` is called after regalloc2, so we can
678+
// use the nonallocatable spilltmp register for this purpose.
679+
let probe_count_reg = writable_spilltmp_reg();
680+
if let Ok(probe_count) = i16::try_from(probe_count) {
681+
insts.push(Inst::Mov32SImm16 {
682+
rd: probe_count_reg,
683+
imm: probe_count,
684+
});
685+
} else {
686+
insts.push(Inst::Mov32Imm {
687+
rd: probe_count_reg,
688+
imm: probe_count,
689+
});
690+
}
691+
692+
// Emit probe loop. The guard size is assumed to fit in 16 bits.
693+
insts.push(Inst::StackProbeLoop {
694+
probe_count: probe_count_reg,
695+
guard_size: i16::try_from(guard_size).unwrap(),
696+
});
697+
}
698+
699+
// Restore the stack pointer to its original position.
700+
insts.extend(Self::gen_sp_reg_adjust((probe_count * guard_size) as i32));
657701
}
658702

659703
fn gen_clobber_save(

cranelift/codegen/src/isa/s390x/inst.isle

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,11 @@
977977
(ridx Reg)
978978
(targets BoxVecMachLabel))
979979

980+
;; Stack probe loop sequence, as one compound instruction.
981+
(StackProbeLoop
982+
(probe_count WritableReg)
983+
(guard_size i16))
984+
980985
;; Load an inline symbol reference with relocation.
981986
(LoadSymbolReloc
982987
(rd WritableReg)

cranelift/codegen/src/isa/s390x/inst/emit.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3383,6 +3383,36 @@ impl Inst {
33833383
start_off = sink.cur_offset();
33843384
}
33853385

3386+
Inst::StackProbeLoop {
3387+
probe_count,
3388+
guard_size,
3389+
} => {
3390+
// Emit the loop start label
3391+
let loop_start = sink.get_label();
3392+
sink.bind_label(loop_start, state.ctrl_plane_mut());
3393+
3394+
// aghi %r15, -GUARD_SIZE
3395+
let inst = Inst::AluRSImm16 {
3396+
alu_op: ALUOp::Add64,
3397+
rd: writable_stack_reg(),
3398+
ri: stack_reg(),
3399+
imm: -guard_size,
3400+
};
3401+
inst.emit(sink, emit_info, state);
3402+
3403+
// mvi 0(%r15), 0
3404+
let inst = Inst::StoreImm8 {
3405+
imm: 0,
3406+
mem: MemArg::reg(stack_reg(), MemFlags::trusted()),
3407+
};
3408+
inst.emit(sink, emit_info, state);
3409+
3410+
// brct PROBE_COUNT, LOOP_START
3411+
let opcode = 0xa76; // BRCT
3412+
sink.use_label_at_offset(sink.cur_offset(), loop_start, LabelUse::BranchRI);
3413+
put(sink, &enc_ri_b(opcode, probe_count.to_reg(), 0));
3414+
}
3415+
33863416
&Inst::Unwind { ref inst } => {
33873417
sink.add_unwind(inst.clone());
33883418
}

cranelift/codegen/src/isa/s390x/inst/emit_tests.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7032,6 +7032,15 @@ fn test_s390x_binemit() {
70327032
"0: cr %r2, %r3 ; jgnh 1f ; cs %r4, %r5, 0(%r6) ; jglh 0b ; 1:",
70337033
));
70347034

7035+
insns.push((
7036+
Inst::StackProbeLoop {
7037+
probe_count: writable_gpr(1),
7038+
guard_size: 4096,
7039+
},
7040+
"A7FBF0009200F000A716FFFC",
7041+
"0: aghi %r15, -4096 ; mvi 0(%r15), 0 ; brct %r1, 0b",
7042+
));
7043+
70357044
insns.push((
70367045
Inst::FpuMove32 {
70377046
rd: writable_vr(8),

cranelift/codegen/src/isa/s390x/inst/mod.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ impl Inst {
226226
| Inst::Debugtrap
227227
| Inst::Trap { .. }
228228
| Inst::JTSequence { .. }
229+
| Inst::StackProbeLoop { .. }
229230
| Inst::LoadSymbolReloc { .. }
230231
| Inst::LoadAddr { .. }
231232
| Inst::Loop { .. }
@@ -969,6 +970,9 @@ fn s390x_get_operands(inst: &mut Inst, collector: &mut DenyReuseVisitor<impl Ope
969970
collector.reg_def(rd);
970971
memarg_operands(mem, collector);
971972
}
973+
Inst::StackProbeLoop { probe_count, .. } => {
974+
collector.reg_early_def(probe_count);
975+
}
972976
Inst::Loop { body, .. } => {
973977
// `reuse_def` constraints can't be permitted in a Loop instruction because the operand
974978
// index will always be relative to the Loop instruction, not the individual
@@ -3286,6 +3290,14 @@ impl Inst {
32863290

32873291
format!("{mem_str}{op} {rd}, {mem}")
32883292
}
3293+
&Inst::StackProbeLoop {
3294+
probe_count,
3295+
guard_size,
3296+
} => {
3297+
let probe_count = pretty_print_reg(probe_count.to_reg());
3298+
let stack_reg = pretty_print_reg(stack_reg());
3299+
format!("0: aghi {stack_reg}, -{guard_size} ; mvi 0({stack_reg}), 0 ; brct {probe_count}, 0b")
3300+
}
32893301
&Inst::Loop { ref body, cond } => {
32903302
let body = body
32913303
.into_iter()
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
test compile precise-output
2+
set enable_probestack=true
3+
set probestack_strategy=inline
4+
; This is the default and is equivalent to a page size of 4096
5+
set probestack_size_log2=12
6+
target s390x
7+
8+
9+
; If the stack size is just one page, we can avoid the stack probe entirely
10+
function %single_page() -> i64 tail {
11+
ss0 = explicit_slot 2048
12+
13+
block0:
14+
v1 = stack_addr.i64 ss0
15+
return v1
16+
}
17+
18+
; VCode:
19+
; aghi %r15, -2048
20+
; block0:
21+
; la %r2, 0(%r15)
22+
; aghi %r15, 2048
23+
; br %r14
24+
;
25+
; Disassembled:
26+
; block0: ; offset 0x0
27+
; aghi %r15, -0x800
28+
; block1: ; offset 0x4
29+
; la %r2, 0(%r15)
30+
; aghi %r15, 0x800
31+
; br %r14
32+
33+
function %unrolled() -> i64 tail {
34+
ss0 = explicit_slot 8192
35+
36+
block0:
37+
v1 = stack_addr.i64 ss0
38+
return v1
39+
}
40+
41+
; VCode:
42+
; aghi %r15, -4096
43+
; mvi 0(%r15), 0
44+
; aghi %r15, -4096
45+
; mvi 0(%r15), 0
46+
; aghi %r15, 8192
47+
; aghi %r15, -8192
48+
; block0:
49+
; la %r2, 0(%r15)
50+
; aghi %r15, 8192
51+
; br %r14
52+
;
53+
; Disassembled:
54+
; block0: ; offset 0x0
55+
; aghi %r15, -0x1000
56+
; mvi 0(%r15), 0
57+
; aghi %r15, -0x1000
58+
; mvi 0(%r15), 0
59+
; aghi %r15, 0x2000
60+
; aghi %r15, -0x2000
61+
; block1: ; offset 0x18
62+
; la %r2, 0(%r15)
63+
; aghi %r15, 0x2000
64+
; br %r14
65+
66+
function %large() -> i64 tail {
67+
ss0 = explicit_slot 100000
68+
69+
block0:
70+
v1 = stack_addr.i64 ss0
71+
return v1
72+
}
73+
74+
; VCode:
75+
; lhi %r1, 25
76+
; 0: aghi %r15, -4096 ; mvi 0(%r15), 0 ; brct %r1, 0b
77+
; agfi %r15, 102400
78+
; agfi %r15, -100000
79+
; block0:
80+
; la %r2, 0(%r15)
81+
; agfi %r15, 100000
82+
; br %r14
83+
;
84+
; Disassembled:
85+
; block0: ; offset 0x0
86+
; lhi %r1, 0x19
87+
; aghi %r15, -0x1000
88+
; mvi 0(%r15), 0
89+
; brct %r1, 4
90+
; agfi %r15, 0x19000
91+
; agfi %r15, -0x186a0
92+
; block1: ; offset 0x1c
93+
; la %r2, 0(%r15)
94+
; agfi %r15, 0x186a0
95+
; br %r14
96+

crates/wasmtime/src/config.rs

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ use core::str::FromStr;
88
use serde_derive::{Deserialize, Serialize};
99
#[cfg(any(feature = "cache", feature = "cranelift", feature = "winch"))]
1010
use std::path::Path;
11-
use target_lexicon::Architecture;
1211
use wasmparser::WasmFeatures;
1312
#[cfg(feature = "cache")]
1413
use wasmtime_cache::CacheConfig;
@@ -2068,16 +2067,14 @@ impl Config {
20682067

20692068
let target = self.compiler_target();
20702069

2071-
// On supported targets, we enable stack probing by default.
2070+
// We enable stack probing by default on all targets.
20722071
// This is required on Windows because of the way Windows
20732072
// commits its stacks, but it's also a good idea on other
20742073
// platforms to ensure guard pages are hit for large frame
20752074
// sizes.
2076-
if probestack_supported(target.architecture) {
2077-
self.compiler_config
2078-
.flags
2079-
.insert("enable_probestack".into());
2080-
}
2075+
self.compiler_config
2076+
.flags
2077+
.insert("enable_probestack".into());
20812078

20822079
if let Some(unwind_requested) = self.native_unwind_info {
20832080
if !self
@@ -3021,13 +3018,6 @@ impl PoolingAllocationConfig {
30213018
}
30223019
}
30233020

3024-
pub(crate) fn probestack_supported(arch: Architecture) -> bool {
3025-
matches!(
3026-
arch,
3027-
Architecture::X86_64 | Architecture::Aarch64(_) | Architecture::Riscv64(_)
3028-
)
3029-
}
3030-
30313021
#[cfg(feature = "std")]
30323022
fn detect_host_feature(feature: &str) -> Option<bool> {
30333023
#[cfg(target_arch = "aarch64")]

crates/wasmtime/src/engine.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ impl Engine {
306306
// runtime.
307307
"libcall_call_conv" => *value == FlagValue::Enum("isa_default".into()),
308308
"preserve_frame_pointers" => *value == FlagValue::Bool(true),
309-
"enable_probestack" => *value == FlagValue::Bool(crate::config::probestack_supported(target.architecture)),
309+
"enable_probestack" => *value == FlagValue::Bool(true),
310310
"probestack_strategy" => *value == FlagValue::Enum("inline".into()),
311311

312312
// Features wasmtime doesn't use should all be disabled, since

0 commit comments

Comments
 (0)