Skip to content

Commit c1464fa

Browse files
committed
Rewrite msvc backtrace support to be much faster on 64-bit platforms
Currently, capturing the stack backtrace is done on Windows by calling into `dbghelp!StackWalkEx` (or `dbghelp!StackWalk64` if the version of `dbghelp` we loaded is too old to contain that function). This is very convenient since `StackWalkEx` handles everything for us but there are two issues with doing so: 1. `dbghelp` is not safe to use from multiple threads at the same time so all calls into it must be serialized. 2. `StackWalkEx` returns inlined frames as if they were regular stack frames which requires loading debug info just to walk the stack. As a result, simply capturing a backtrace without resolving it is much more expensive on Windows than *nix. This change rewrites our Windows support to call `RtlVirtualUnwind` instead on platforms which support this API (`x86_64` and `aarch64`). This API walks the actual (ie, not inlined) stack frames so it does not require loading any debug info and is significantly faster. For platforms that do not support `RtlVirtualUnwind` (ie, `i686`), we fall back to the current implementation which calls into `dbghelp`. To recover the inlined frame information when we are asked to resolve symbols, we use `SymAddrIncludeInlineTrace` to load debug info and detect inlined frames and then `SymQueryInlineTrace` to get the appropriate inline context to resolve them. The result is significant performance improvements to backtrace capture and symbolizing on Windows! Before: ``` > cargo +nightly bench Running benches\benchmarks.rs running 6 tests test new ... bench: 658,652 ns/iter (+/- 30,741) test new_unresolved ... bench: 343,240 ns/iter (+/- 13,108) test new_unresolved_and_resolve_separate ... bench: 648,890 ns/iter (+/- 31,651) test trace ... bench: 304,815 ns/iter (+/- 19,633) test trace_and_resolve_callback ... bench: 463,645 ns/iter (+/- 12,893) test trace_and_resolve_separate ... bench: 474,290 ns/iter (+/- 73,858) test result: ok. 0 passed; 0 failed; 0 ignored; 6 measured; 0 filtered out; finished in 8.26s ``` After: ``` > cargo +nightly bench Running benches\benchmarks.rs running 6 tests test new ... bench: 495,468 ns/iter (+/- 31,215) test new_unresolved ... bench: 1,241 ns/iter (+/- 251) test new_unresolved_and_resolve_separate ... bench: 436,730 ns/iter (+/- 32,482) test trace ... bench: 850 ns/iter (+/- 162) test trace_and_resolve_callback ... bench: 410,790 ns/iter (+/- 19,424) test trace_and_resolve_separate ... bench: 408,090 ns/iter (+/- 29,324) test result: ok. 0 passed; 0 failed; 0 ignored; 6 measured; 0 filtered out; finished in 7.02s ``` The changes to the symbolize step also allow us to report inlined frames when resolving from just an instruction address which was not previously possible.
1 parent 99faef8 commit c1464fa

File tree

6 files changed

+270
-210
lines changed

6 files changed

+270
-210
lines changed

src/backtrace/dbghelp.rs

+136-154
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,32 @@
11
//! Backtrace strategy for MSVC platforms.
22
//!
3-
//! This module contains the ability to generate a backtrace on MSVC using one
4-
//! of two possible methods. The `StackWalkEx` function is primarily used if
5-
//! possible, but not all systems have that. Failing that the `StackWalk64`
6-
//! function is used instead. Note that `StackWalkEx` is favored because it
7-
//! handles debuginfo internally and returns inline frame information.
3+
//! This module contains the ability to capture a backtrace on MSVC using one
4+
//! of three possible methods. For `x86_64` and `aarch64`, we use `RtlVirtualUnwind`
5+
//! to walk the stack one frame at a time. This function is much faster than using
6+
//! `dbghelp!StackWalk*` because it does not load debug info to report inlined frames.
7+
//! We still report inlined frames during symbolization by consulting the appropriate
8+
//! `dbghelp` functions.
9+
//!
10+
//! For all other platforms, primarily `i686`, the `StackWalkEx` function is used if
11+
//! possible, but not all systems have that. Failing that the `StackWalk64` function
12+
//! is used instead. Note that `StackWalkEx` is favored because it handles debuginfo
13+
//! internally and returns inline frame information.
814
//!
915
//! Note that all dbghelp support is loaded dynamically, see `src/dbghelp.rs`
1016
//! for more information about that.
1117
1218
#![allow(bad_style)]
1319

14-
use super::super::{dbghelp, windows::*};
20+
use super::super::windows::*;
1521
use core::ffi::c_void;
16-
use core::mem;
17-
18-
#[derive(Clone, Copy)]
19-
pub enum StackFrame {
20-
New(STACKFRAME_EX),
21-
Old(STACKFRAME64),
22-
}
2322

2423
#[derive(Clone, Copy)]
2524
pub struct Frame {
26-
pub(crate) stack_frame: StackFrame,
2725
base_address: *mut c_void,
26+
ip: *mut c_void,
27+
sp: *mut c_void,
28+
#[cfg(not(target_env = "gnu"))]
29+
inline_context: Option<DWORD>,
2830
}
2931

3032
// we're just sending around raw pointers and reading them, never interpreting
@@ -34,62 +36,108 @@ unsafe impl Sync for Frame {}
3436

3537
impl Frame {
3638
pub fn ip(&self) -> *mut c_void {
37-
self.addr_pc().Offset as *mut _
39+
self.ip
3840
}
3941

4042
pub fn sp(&self) -> *mut c_void {
41-
self.addr_stack().Offset as *mut _
43+
self.sp
4244
}
4345

4446
pub fn symbol_address(&self) -> *mut c_void {
45-
self.ip()
47+
self.ip
4648
}
4749

4850
pub fn module_base_address(&self) -> Option<*mut c_void> {
4951
Some(self.base_address)
5052
}
5153

52-
fn addr_pc(&self) -> &ADDRESS64 {
53-
match self.stack_frame {
54-
StackFrame::New(ref new) => &new.AddrPC,
55-
StackFrame::Old(ref old) => &old.AddrPC,
56-
}
54+
#[cfg(not(target_env = "gnu"))]
55+
pub fn inline_context(&self) -> Option<DWORD> {
56+
self.inline_context
5757
}
58+
}
5859

59-
fn addr_pc_mut(&mut self) -> &mut ADDRESS64 {
60-
match self.stack_frame {
61-
StackFrame::New(ref mut new) => &mut new.AddrPC,
62-
StackFrame::Old(ref mut old) => &mut old.AddrPC,
63-
}
60+
#[repr(C, align(16))] // required by `CONTEXT`, is a FIXME in winapi right now
61+
struct MyContext(CONTEXT);
62+
63+
#[cfg(target_arch = "x86_64")]
64+
impl MyContext {
65+
#[inline(always)]
66+
fn ip(&self) -> DWORD64 {
67+
self.0.Rip
6468
}
6569

66-
fn addr_frame_mut(&mut self) -> &mut ADDRESS64 {
67-
match self.stack_frame {
68-
StackFrame::New(ref mut new) => &mut new.AddrFrame,
69-
StackFrame::Old(ref mut old) => &mut old.AddrFrame,
70-
}
70+
#[inline(always)]
71+
fn sp(&self) -> DWORD64 {
72+
self.0.Rsp
7173
}
74+
}
7275

73-
fn addr_stack(&self) -> &ADDRESS64 {
74-
match self.stack_frame {
75-
StackFrame::New(ref new) => &new.AddrStack,
76-
StackFrame::Old(ref old) => &old.AddrStack,
77-
}
76+
#[cfg(target_arch = "aarch64")]
77+
impl MyContext {
78+
#[inline(always)]
79+
fn ip(&self) -> DWORD64 {
80+
self.0.Pc
7881
}
7982

80-
fn addr_stack_mut(&mut self) -> &mut ADDRESS64 {
81-
match self.stack_frame {
82-
StackFrame::New(ref mut new) => &mut new.AddrStack,
83-
StackFrame::Old(ref mut old) => &mut old.AddrStack,
84-
}
83+
#[inline(always)]
84+
fn sp(&self) -> DWORD64 {
85+
self.0.Sp
8586
}
8687
}
8788

88-
#[repr(C, align(16))] // required by `CONTEXT`, is a FIXME in winapi right now
89-
struct MyContext(CONTEXT);
89+
#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
90+
#[inline(always)]
91+
pub unsafe fn trace(cb: &mut dyn FnMut(&super::Frame) -> bool) {
92+
use core::ptr;
93+
94+
let mut context = core::mem::zeroed::<MyContext>();
95+
RtlCaptureContext(&mut context.0);
96+
97+
// Call `RtlVirtualUnwind` to find the previous stack frame, walking until we hit ip = 0.
98+
while context.ip() != 0 {
99+
let mut base = 0;
100+
101+
let fn_entry = RtlLookupFunctionEntry(context.ip(), &mut base, ptr::null_mut());
102+
if fn_entry.is_null() {
103+
break;
104+
}
90105

106+
let frame = super::Frame {
107+
inner: Frame {
108+
base_address: fn_entry as *mut c_void,
109+
ip: context.ip() as *mut c_void,
110+
sp: context.sp() as *mut c_void,
111+
#[cfg(not(target_env = "gnu"))]
112+
inline_context: None,
113+
},
114+
};
115+
116+
if !cb(&frame) {
117+
break;
118+
}
119+
120+
let mut handler_data = 0usize;
121+
let mut establisher_frame = 0;
122+
123+
RtlVirtualUnwind(
124+
0,
125+
base,
126+
context.ip(),
127+
fn_entry,
128+
&mut context.0,
129+
&mut handler_data as *mut usize as *mut PVOID,
130+
&mut establisher_frame,
131+
ptr::null_mut(),
132+
);
133+
}
134+
}
135+
136+
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
91137
#[inline(always)]
92138
pub unsafe fn trace(cb: &mut dyn FnMut(&super::Frame) -> bool) {
139+
use core::mem;
140+
93141
// Allocate necessary structures for doing the stack walk
94142
let process = GetCurrentProcess();
95143
let thread = GetCurrentThread();
@@ -98,105 +146,89 @@ pub unsafe fn trace(cb: &mut dyn FnMut(&super::Frame) -> bool) {
98146
RtlCaptureContext(&mut context.0);
99147

100148
// Ensure this process's symbols are initialized
101-
let dbghelp = match dbghelp::init() {
149+
let dbghelp = match super::super::dbghelp::init() {
102150
Ok(dbghelp) => dbghelp,
103151
Err(()) => return, // oh well...
104152
};
105153

106-
// On x86_64 and ARM64 we opt to not use the default `Sym*` functions from
107-
// dbghelp for getting the function table and module base. Instead we use
108-
// the `RtlLookupFunctionEntry` function in kernel32 which will account for
109-
// JIT compiler frames as well. These should be equivalent, but using
110-
// `Rtl*` allows us to backtrace through JIT frames.
111-
//
112-
// Note that `RtlLookupFunctionEntry` only works for in-process backtraces,
113-
// but that's all we support anyway, so it all lines up well.
114-
cfg_if::cfg_if! {
115-
if #[cfg(target_pointer_width = "64")] {
116-
use core::ptr;
117-
118-
unsafe extern "system" fn function_table_access(_process: HANDLE, addr: DWORD64) -> PVOID {
119-
let mut base = 0;
120-
RtlLookupFunctionEntry(addr, &mut base, ptr::null_mut()).cast()
121-
}
122-
123-
unsafe extern "system" fn get_module_base(_process: HANDLE, addr: DWORD64) -> DWORD64 {
124-
let mut base = 0;
125-
RtlLookupFunctionEntry(addr, &mut base, ptr::null_mut());
126-
base
127-
}
128-
} else {
129-
let function_table_access = dbghelp.SymFunctionTableAccess64();
130-
let get_module_base = dbghelp.SymGetModuleBase64();
131-
}
132-
}
154+
let function_table_access = dbghelp.SymFunctionTableAccess64();
155+
let get_module_base = dbghelp.SymGetModuleBase64();
133156

134157
let process_handle = GetCurrentProcess();
135158

136159
// Attempt to use `StackWalkEx` if we can, but fall back to `StackWalk64`
137160
// since it's in theory supported on more systems.
138161
match (*dbghelp.dbghelp()).StackWalkEx() {
139162
Some(StackWalkEx) => {
140-
let mut inner: STACKFRAME_EX = mem::zeroed();
141-
inner.StackFrameSize = mem::size_of::<STACKFRAME_EX>() as DWORD;
142-
let mut frame = super::Frame {
143-
inner: Frame {
144-
stack_frame: StackFrame::New(inner),
145-
base_address: 0 as _,
146-
},
147-
};
148-
let image = init_frame(&mut frame.inner, &context.0);
149-
let frame_ptr = match &mut frame.inner.stack_frame {
150-
StackFrame::New(ptr) => ptr as *mut STACKFRAME_EX,
151-
_ => unreachable!(),
152-
};
163+
let mut stack_frame_ex: STACKFRAME_EX = mem::zeroed();
164+
stack_frame_ex.StackFrameSize = mem::size_of::<STACKFRAME_EX>() as DWORD;
165+
stack_frame_ex.AddrPC.Offset = context.0.Eip as u64;
166+
stack_frame_ex.AddrPC.Mode = AddrModeFlat;
167+
stack_frame_ex.AddrStack.Offset = context.0.Esp as u64;
168+
stack_frame_ex.AddrStack.Mode = AddrModeFlat;
169+
stack_frame_ex.AddrFrame.Offset = context.0.Ebp as u64;
170+
stack_frame_ex.AddrFrame.Mode = AddrModeFlat;
153171

154172
while StackWalkEx(
155-
image as DWORD,
173+
IMAGE_FILE_MACHINE_I386 as DWORD,
156174
process,
157175
thread,
158-
frame_ptr,
159-
&mut context.0 as *mut CONTEXT as *mut _,
176+
&mut stack_frame_ex,
177+
&mut context.0 as *mut CONTEXT as PVOID,
160178
None,
161179
Some(function_table_access),
162180
Some(get_module_base),
163181
None,
164182
0,
165183
) == TRUE
166184
{
167-
frame.inner.base_address = get_module_base(process_handle, frame.ip() as _) as _;
185+
let frame = super::Frame {
186+
inner: Frame {
187+
base_address: get_module_base(process_handle, stack_frame_ex.AddrPC.Offset)
188+
as *mut c_void,
189+
ip: stack_frame_ex.AddrPC.Offset as *mut c_void,
190+
sp: stack_frame_ex.AddrStack.Offset as *mut c_void,
191+
#[cfg(not(target_env = "gnu"))]
192+
inline_context: Some(stack_frame_ex.InlineFrameContext),
193+
},
194+
};
168195

169196
if !cb(&frame) {
170197
break;
171198
}
172199
}
173200
}
174201
None => {
175-
let mut frame = super::Frame {
176-
inner: Frame {
177-
stack_frame: StackFrame::Old(mem::zeroed()),
178-
base_address: 0 as _,
179-
},
180-
};
181-
let image = init_frame(&mut frame.inner, &context.0);
182-
let frame_ptr = match &mut frame.inner.stack_frame {
183-
StackFrame::Old(ptr) => ptr as *mut STACKFRAME64,
184-
_ => unreachable!(),
185-
};
202+
let mut stack_frame64: STACKFRAME64 = mem::zeroed();
203+
stack_frame64.AddrPC.Offset = context.0.Eip as u64;
204+
stack_frame64.AddrPC.Mode = AddrModeFlat;
205+
stack_frame64.AddrStack.Offset = context.0.Esp as u64;
206+
stack_frame64.AddrStack.Mode = AddrModeFlat;
207+
stack_frame64.AddrFrame.Offset = context.0.Ebp as u64;
208+
stack_frame64.AddrFrame.Mode = AddrModeFlat;
186209

187210
while dbghelp.StackWalk64()(
188-
image as DWORD,
211+
IMAGE_FILE_MACHINE_I386 as DWORD,
189212
process,
190213
thread,
191-
frame_ptr,
192-
&mut context.0 as *mut CONTEXT as *mut _,
214+
&mut stack_frame64,
215+
&mut context.0 as *mut CONTEXT as PVOID,
193216
None,
194217
Some(function_table_access),
195218
Some(get_module_base),
196219
None,
197220
) == TRUE
198221
{
199-
frame.inner.base_address = get_module_base(process_handle, frame.ip() as _) as _;
222+
let frame = super::Frame {
223+
inner: Frame {
224+
base_address: get_module_base(process_handle, stack_frame64.AddrPC.Offset)
225+
as *mut c_void,
226+
ip: stack_frame64.AddrPC.Offset as *mut c_void,
227+
sp: stack_frame64.AddrStack.Offset as *mut c_void,
228+
#[cfg(not(target_env = "gnu"))]
229+
inline_context: None,
230+
},
231+
};
200232

201233
if !cb(&frame) {
202234
break;
@@ -205,53 +237,3 @@ pub unsafe fn trace(cb: &mut dyn FnMut(&super::Frame) -> bool) {
205237
}
206238
}
207239
}
208-
209-
#[cfg(target_arch = "x86_64")]
210-
fn init_frame(frame: &mut Frame, ctx: &CONTEXT) -> WORD {
211-
frame.addr_pc_mut().Offset = ctx.Rip as u64;
212-
frame.addr_pc_mut().Mode = AddrModeFlat;
213-
frame.addr_stack_mut().Offset = ctx.Rsp as u64;
214-
frame.addr_stack_mut().Mode = AddrModeFlat;
215-
frame.addr_frame_mut().Offset = ctx.Rbp as u64;
216-
frame.addr_frame_mut().Mode = AddrModeFlat;
217-
218-
IMAGE_FILE_MACHINE_AMD64
219-
}
220-
221-
#[cfg(target_arch = "x86")]
222-
fn init_frame(frame: &mut Frame, ctx: &CONTEXT) -> WORD {
223-
frame.addr_pc_mut().Offset = ctx.Eip as u64;
224-
frame.addr_pc_mut().Mode = AddrModeFlat;
225-
frame.addr_stack_mut().Offset = ctx.Esp as u64;
226-
frame.addr_stack_mut().Mode = AddrModeFlat;
227-
frame.addr_frame_mut().Offset = ctx.Ebp as u64;
228-
frame.addr_frame_mut().Mode = AddrModeFlat;
229-
230-
IMAGE_FILE_MACHINE_I386
231-
}
232-
233-
#[cfg(target_arch = "aarch64")]
234-
fn init_frame(frame: &mut Frame, ctx: &CONTEXT) -> WORD {
235-
frame.addr_pc_mut().Offset = ctx.Pc as u64;
236-
frame.addr_pc_mut().Mode = AddrModeFlat;
237-
frame.addr_stack_mut().Offset = ctx.Sp as u64;
238-
frame.addr_stack_mut().Mode = AddrModeFlat;
239-
unsafe {
240-
frame.addr_frame_mut().Offset = ctx.u.s().Fp as u64;
241-
}
242-
frame.addr_frame_mut().Mode = AddrModeFlat;
243-
IMAGE_FILE_MACHINE_ARM64
244-
}
245-
246-
#[cfg(target_arch = "arm")]
247-
fn init_frame(frame: &mut Frame, ctx: &CONTEXT) -> WORD {
248-
frame.addr_pc_mut().Offset = ctx.Pc as u64;
249-
frame.addr_pc_mut().Mode = AddrModeFlat;
250-
frame.addr_stack_mut().Offset = ctx.Sp as u64;
251-
frame.addr_stack_mut().Mode = AddrModeFlat;
252-
unsafe {
253-
frame.addr_frame_mut().Offset = ctx.R11 as u64;
254-
}
255-
frame.addr_frame_mut().Mode = AddrModeFlat;
256-
IMAGE_FILE_MACHINE_ARMNT
257-
}

src/backtrace/mod.rs

-2
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,6 @@ cfg_if::cfg_if! {
153153
mod dbghelp;
154154
use self::dbghelp::trace as trace_imp;
155155
pub(crate) use self::dbghelp::Frame as FrameImp;
156-
#[cfg(target_env = "msvc")] // only used in dbghelp symbolize
157-
pub(crate) use self::dbghelp::StackFrame;
158156
} else {
159157
mod noop;
160158
use self::noop::trace as trace_imp;

0 commit comments

Comments
 (0)