Skip to content

libprobe: Add a probe! macro for static instrumentation #14031

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion mk/crates.mk
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
TARGET_CRATES := libc std green rustuv native flate arena glob term semver \
uuid serialize sync getopts collections num test time rand \
workcache url log regex graphviz core
HOST_CRATES := syntax rustc rustdoc fourcc hexfloat regex_macros
HOST_CRATES := syntax rustc rustdoc fourcc hexfloat regex_macros probe
CRATES := $(TARGET_CRATES) $(HOST_CRATES)
TOOLS := compiletest rustdoc rustc

Expand Down Expand Up @@ -88,6 +88,7 @@ DEPS_workcache := std serialize collections log
DEPS_log := std sync
DEPS_regex := std collections
DEPS_regex_macros = syntax std regex
DEPS_probe = std

TOOL_DEPS_compiletest := test green rustuv getopts
TOOL_DEPS_rustdoc := rustdoc native
Expand Down
144 changes: 144 additions & 0 deletions src/libprobe/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! This crate provides static instrumentation macros.
//!
//! With the `probe!` macro, programmers can place static instrumentation
//! points in their code to mark events of interest. These are compiled into
//! platform-specific implementations, e.g. SystemTap SDT on Linux. Probes are
//! designed to have negligible overhead during normal operation, so they can
//! be present in all builds, and only activated using those external tools.
//!
//! # Example
//!
//! This simple example instruments the beginning and end of program, as well
//! as every iteration through the loop with arguments for the counter and
//! intermediate total.
//!
//! ```rust
//! #![feature(phase)]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't this have to opt-in to the asm feature as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mentioned that before, but this seems to work fine as-is. I've tried it in a separate file, and rustdoc --test works too. Hopefully this is not just an accident, because it will be nicer if users don't need to know it's asm underneath.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I forgot about #12122!

It's unclear how much of a bug that is, but this is an experimental crate so it's ok to be susceptible to breakage.

//! #[phase(syntax)]
//! extern crate probe;
//! fn main() {
//! probe!(foo, begin);
//! let mut total = 0;
//! for i in range(0, 100) {
//! total += i;
//! probe!(foo, loop, i, total);
//! }
//! assert_eq!(total, 4950);
//! probe!(foo, end);
//! }
//! ```
//!
//! ## Using probes with SystemTap
//!
//! For the program above, a SystemTap script could double-check the totals:
//!
//! ```notrust
//! global check
//!
//! probe process.provider("foo").mark("loop") {
//! check += $arg1;
//! if (check != $arg2)
//! printf("foo total is out of sync! (%d != %d)\n", check, $arg2);
//! }
//!
//! // .provider is optional
//! probe process.mark("begin"), process.mark("end") {
//! printf("%s:%s\n", $$provider, $$name);
//! }
//! ```
//!
//! Since this program behaves as expected, this script will not have any complaint.
//!
//! ```notrust
//! $ stap --dyninst foo.stp -c ./foo
//! foo:begin
//! foo:end
//! ```
//!
//! ## Using probes with GDB
//!
//! Starting in version 7.5, GDB can set breakpoints on probes and read arguments.
//!
//! ```notrust
//! (gdb) info probes
//! Provider Name Where Semaphore Object
//! foo begin 0x0000000000402e70 /tmp/foo
//! foo end 0x000000000040315c /tmp/foo
//! foo loop 0x0000000000402f25 /tmp/foo
//! (gdb) break -probe foo:loop
//! Breakpoint 1 at 0x402f25
//! (gdb) condition 1 $_probe_arg1 > 1000
//! (gdb) run
//! Starting program: /tmp/foo
//! [Thread debugging using libthread_db enabled]
//! Using host libthread_db library "/lib64/libthread_db.so.1".
//!
//! Breakpoint 1, 0x0000000000402f25 in main::hd67360886023c1c6faa::v0.0 ()
//! (gdb) print $_probe_arg0
//! $1 = 45
//! (gdb) print $_probe_arg1
//! $2 = 1035
//! ```

#![crate_id = "probe#0.11-pre"]
#![crate_type = "dylib"]
#![experimental]
#![license = "MIT/ASL2"]
#![doc(html_logo_url = "http://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
html_favicon_url = "http://www.rust-lang.org/favicon.ico",
html_root_url = "http://static.rust-lang.org/doc/master")]

#![feature(asm, macro_rules)]

mod platform;

/// Define a static probe point.
///
/// This annotates a code location with a name and arguments, and compiles
/// in metadata to let debugging tools locate it.
///
/// # Arguments
///
/// * `provider` - An identifier for naming probe groups.
///
/// * `name` - An identifier for this specific probe.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Being unfamiliar with these sorts of probes, is it required that these be an identifier? Could they be an arbitrary string?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. C uses identifiers too: STAP_PROBE(provider, name). 2) Is there a way to accept only strings, not other expressions? $name:ident accepts only identifiers, but $name:expr accepts too much.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The other option could be $name:expr with stringify!($name), but that may be too accepting still. If only identifiers are supported, that's not so bad!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On SystemTap's part, it's not strictly necessary to be an identifier. The C implementation uses them to name a variable for the semaphore, but that's an internal detail could be totally different if/when we add semaphores here. And they are stringified in the final macro already, so it could conceivably let expr through if you really want.

I'm not sure if GDB is prepared for arbitrary strings in break -probe name though. At first glance, its line parser seems to be breaking it up on spaces, regardless of quoting. That might be fixable.

DTrace USDT uses them in a function name - see USDT Macros. I'm not sure how necessary it is to be named like that, because I don't know USDT internals.

So sticking to identifiers seems best. Even better would be just C-identifiers, no Unicode, but I don't know how to enforce that in the macro. Also note that it did allow a keyword loop in my example, which is fine by me, just surprising that :ident allowed it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok! Sounds like we should stick to identifiers for now. We could expand to expressions in general later if necessary.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI, I just noted on reddit that Unicode does work. I had no idea that even C99 allows this! :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not familiar with SystemTap, either. If I stumble upon a piece of code using probe!, I'd be a bit puzzled by the use of identifiers.

First, I will ask myself whether these identifiers make reference to a previously declared variable, and I'd scan the code to find them with no luck. Then, I'd wrongly assume this macro introduces a new variable or another item.

Would it be possible to use a more specific macro syntax instead of a coma-separated list of identifiers ? This would hint me that these identifiers are somewhat interpreted specially by the macro.

fn main() {
    probe!(main@foo);
    let x = 42;
    probe!(show_x@foo: x);
    let y = Some(x);
    probe!(show_y@foo: match y {
        Some(n) => n,
        None    => -1
    });
}

fn main() {
    probe!(begin@foo);
    let mut total = 0;
    for i in range(0, 100) {
        total += i;
        probe!(loop@foo: i, total);
    }
    assert_eq!(total, 4950);
    probe!(end@foo);
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Olivier-Renaud, that's a fair point about identifier confusion. These are totally outside the normal code namespace, and I like the idea of changing up the syntax to make it look special. (That was never an option in CPP macros.) The chosen syntax won't matter much to the actual implementation - just tweaking the macro patterns. Now we get to bikeshed on what might look better. :)

I don't really care for name@provider, just because that's inverted from the order that the existing tools refer to these fields. Flipping to provider@name could be better, except '@' has the wrong hierarchy implication when read like "at", and there's also the past association with '@' pointers. Maybe some other symbol wouldn't have as much baggage, like provider$name. (I don't think any QWERTY-keyboard symbols are left without any rust meaning at all...)

SystemTap is relatively verbose with process.provider("provider").mark("name"), so that's not helpful for macro inspiration. GDB refers to probes by provider:name. DTrace uses provider:module:function:name (module being the binary, or crate in rust terms), and you can omit fields to leave just provider:::name. So both GDB and DTrace resemble rust paths - maybe we should embrace that and go with provider::name? Or maybe that's confusing if it looks like a real path but isn't, so just settle on provider:name?

If the colon is the initial separator, I think think it's fine to stick with commas for the optional arguments, probe!(foo:bar) and probe!(foo:baz, arg1, arg2,...). But since placing a probe is vaguely like a function call,maybe we could also use parentheses like probe!(foo:bar()) and probe!(foo:baz(arg1, arg2, ...)).

Opening up syntax leaves endless possibilities. Opinions and suggestions are welcome...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

provider:name seems good to me, even better if you say that other tools use this syntax. I like the parenthesized form, too.

///
/// * `arg`... - Optional data to provide with the probe. Any expression which
/// can be cast `as i64` is allowed as an argument. The arguments might not
/// be evaluated at all when a debugger is not attached to the probe,
/// depending on the platform implementation, so don't rely on side effects.
///
/// # Example
///
/// ```
/// #![feature(phase)]
/// #[phase(syntax)]
/// extern crate probe;
/// fn main() {
/// probe!(foo, main);
///
/// let x = 42;
/// probe!(foo, show_x, x);
///
/// let y = Some(x);
/// probe!(foo, show_y, match y {
/// Some(n) => n,
/// None => -1
/// });
/// }
/// ```
#[macro_export]
macro_rules! probe(
($provider:ident, $name:ident $(, $arg:expr)*)
=> (platform_probe!($provider, $name $(, $arg)*));
)
14 changes: 14 additions & 0 deletions src/libprobe/platform/default.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

#[macro_export]
macro_rules! platform_probe(
($provider:ident, $name:ident $(, $arg:expr)*) => ()
)
16 changes: 16 additions & 0 deletions src/libprobe/platform/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

#[cfg(target_os = "linux")]
#[cfg(target_os = "android")]
mod systemtap;

#[cfg(not(target_os = "linux"), not(target_os = "android"))]
mod default;
194 changes: 194 additions & 0 deletions src/libprobe/platform/systemtap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! SystemTap static probes
//!
//! This is a mechanism for developers to provide static annotations for
//! meaningful points in code, with arguments that indicate some relevant
//! state. Such locations may be probed by SystemTap `process.mark("name")`,
//! and GDB can also locate them with `info probes` and `break -probe name`.
//!
//! The impact on code generation is designed to be minimal: just a single
//! `NOP` placeholder is added inline for instrumentation, and ELF notes
//! contain metadata to name the probe and describe the location of its
//! arguments.
//!
//! # Links:
//!
//! * https://sourceware.org/systemtap/man/stapprobes.3stap.html#lbAO (see `process.mark`)
//! * https://sourceware.org/systemtap/wiki/AddingUserSpaceProbingToApps
//! * https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
//! * https://sourceware.org/gdb/onlinedocs/gdb/Static-Probe-Points.html

//
// DEVELOPER NOTES
//
// Arguments are currently type-casted as i64, because that directly maps to
// SystemTap's long, no matter the architecture. However, if we could figure
// out types here, they could be annotated more specifically, for example an
// argstr of "4@$0 -2@$1" indicates u32 and i16 respectively. Any pointer
// would be fine too, like *c_char, simply 4@ or 8@ for target_word_size.
//
// The macros in sdt.h don't know types either, so they split each argument
// into two asm inputs, roughly:
// asm("[...]"
// ".asciz \"%n[_SDT_S0]@%[_SDT_A0]\""
// "[...]"
// : :
// [_SDT_S0] "n" ((_SDT_ARGSIGNED (x) ? 1 : -1) * (int) sizeof (x)),
// [_SDT_A0] "nor" (x)
// );
// where _SDT_ARGSIGNED is a macro using gcc builtins, so it's still resolved a
// compile time, and %n makes it a raw literal rather than an asm number.
//
// This might be a possible direction for Rust SDT to follow. For LLVM
// InlineAsm, the string would look like "${0:n}@$1", but we need the size/sign
// for that first input, and that must be a numeric constant no matter what
// optimization level we're at.
//
// NB: If there were also a way to generate the positional "$0 $1 ..." indexes,
// then we could lose the manually-unrolled duplication below. For now, expand
// up to 12 args, the same limit as sys/sdt.h.
//
// FIXME semaphores - SDT can define a short* that debuggers will increment when
// they attach, and decrement on detach. Thus a probe_enabled!(provider,name)
// could return if that value != 0, to be used similarly to log_enabled!(). We
// could even be clever and skip argument evaluation altogether, the same way
// that log!() checks log_enabled!() first.
//

#[macro_export]
macro_rules! platform_probe(
($provider:ident, $name:ident)
=> (sdt_asm!($provider, $name, ""));

($provider:ident, $name:ident, $arg1:expr)
=> (sdt_asm!($provider, $name,
"-8@$0",
$arg1));

($provider:ident, $name:ident, $arg1:expr, $arg2:expr)
=> (sdt_asm!($provider, $name,
"-8@$0 -8@$1",
$arg1, $arg2));

($provider:ident, $name:ident, $arg1:expr, $arg2:expr, $arg3:expr)
=> (sdt_asm!($provider, $name,
"-8@$0 -8@$1 -8@$2",
$arg1, $arg2, $arg3));

($provider:ident, $name:ident, $arg1:expr, $arg2:expr, $arg3:expr, $arg4:expr)
=> (sdt_asm!($provider, $name,
"-8@$0 -8@$1 -8@$2 -8@$3",
$arg1, $arg2, $arg3, $arg4));

($provider:ident, $name:ident, $arg1:expr, $arg2:expr, $arg3:expr, $arg4:expr, $arg5:expr)
=> (sdt_asm!($provider, $name,
"-8@$0 -8@$1 -8@$2 -8@$3 -8@$4",
$arg1, $arg2, $arg3, $arg4, $arg5));

($provider:ident, $name:ident, $arg1:expr, $arg2:expr, $arg3:expr, $arg4:expr, $arg5:expr,
$arg6:expr)
=> (sdt_asm!($provider, $name,
"-8@$0 -8@$1 -8@$2 -8@$3 -8@$4 -8@$5",
$arg1, $arg2, $arg3, $arg4, $arg5, $arg6));

($provider:ident, $name:ident, $arg1:expr, $arg2:expr, $arg3:expr, $arg4:expr, $arg5:expr,
$arg6:expr, $arg7:expr)
=> (sdt_asm!($provider, $name,
"-8@$0 -8@$1 -8@$2 -8@$3 -8@$4 -8@$5 -8@$6",
$arg1, $arg2, $arg3, $arg4, $arg5, $arg6, $arg7));

($provider:ident, $name:ident, $arg1:expr, $arg2:expr, $arg3:expr, $arg4:expr, $arg5:expr,
$arg6:expr, $arg7:expr, $arg8:expr)
=> (sdt_asm!($provider, $name,
"-8@$0 -8@$1 -8@$2 -8@$3 -8@$4 -8@$5 -8@$6 -8@$7",
$arg1, $arg2, $arg3, $arg4, $arg5, $arg6, $arg7, $arg8));

($provider:ident, $name:ident, $arg1:expr, $arg2:expr, $arg3:expr, $arg4:expr, $arg5:expr,
$arg6:expr, $arg7:expr, $arg8:expr, $arg9:expr)
=> (sdt_asm!($provider, $name,
"-8@$0 -8@$1 -8@$2 -8@$3 -8@$4 -8@$5 -8@$6 -8@$7 -8@$8",
$arg1, $arg2, $arg3, $arg4, $arg5, $arg6, $arg7, $arg8, $arg9));

($provider:ident, $name:ident, $arg1:expr, $arg2:expr, $arg3:expr, $arg4:expr, $arg5:expr,
$arg6:expr, $arg7:expr, $arg8:expr, $arg9:expr, $arg10:expr)
=> (sdt_asm!($provider, $name,
"-8@$0 -8@$1 -8@$2 -8@$3 -8@$4 -8@$5 -8@$6 -8@$7 -8@$8 -8@$9",
$arg1, $arg2, $arg3, $arg4, $arg5, $arg6, $arg7, $arg8, $arg9, $arg10));

($provider:ident, $name:ident, $arg1:expr, $arg2:expr, $arg3:expr, $arg4:expr, $arg5:expr,
$arg6:expr, $arg7:expr, $arg8:expr, $arg9:expr, $arg10:expr, $arg11:expr)
=> (sdt_asm!($provider, $name,
"-8@$0 -8@$1 -8@$2 -8@$3 -8@$4 -8@$5 -8@$6 -8@$7 -8@$8 -8@$9 -8@$10",
$arg1, $arg2, $arg3, $arg4, $arg5, $arg6, $arg7, $arg8, $arg9, $arg10, $arg11));

($provider:ident, $name:ident, $arg1:expr, $arg2:expr, $arg3:expr, $arg4:expr, $arg5:expr,
$arg6:expr, $arg7:expr, $arg8:expr, $arg9:expr, $arg10:expr, $arg11:expr, $arg12:expr)
=> (sdt_asm!($provider, $name,
"-8@$0 -8@$1 -8@$2 -8@$3 -8@$4 -8@$5 -8@$6 -8@$7 -8@$8 -8@$9 -8@$10 -8@$11",
$arg1, $arg2, $arg3, $arg4, $arg5, $arg6, $arg7, $arg8, $arg9, $arg10, $arg11,
$arg12));
)

#[cfg(target_word_size = "32")]
#[macro_export]
macro_rules! sdt_asm(
($provider:ident, $name:ident, $argstr:tt $(, $arg:expr)*)
=> (unsafe {
_sdt_asm!(".4byte", $provider, $name, $argstr $(, $arg)*);
}))

#[cfg(target_word_size = "64")]
#[macro_export]
macro_rules! sdt_asm(
($provider:ident, $name:ident, $argstr:tt $(, $arg:expr)*)
=> (unsafe {
_sdt_asm!(".8byte", $provider, $name, $argstr $(, $arg)*);
}))

// Since we can't #include <sys/sdt.h>, we have to reinvent it...
// but once you take out the C/C++ type handling, there's not a lot to it.
#[macro_export]
macro_rules! _sdt_asm(
($addr:tt, $provider:ident, $name:ident, $argstr:tt $(, $arg:expr)*) => (
asm!(concat!(r#"
990: nop
.pushsection .note.stapsdt,"?","note"
.balign 4
.4byte 992f-991f, 994f-993f, 3
991: .asciz "stapsdt"
992: .balign 4
993: "#, $addr, r#" 990b
"#, $addr, r#" _.stapsdt.base
"#, $addr, r#" 0 // FIXME set semaphore address
.asciz ""#, stringify!($provider), r#""
.asciz ""#, stringify!($name), r#""
.asciz ""#, $argstr, r#""
994: .balign 4
.popsection
.ifndef _.stapsdt.base
.pushsection .stapsdt.base,"aG","progbits",.stapsdt.base,comdat
.weak _.stapsdt.base
.hidden _.stapsdt.base
_.stapsdt.base: .space 1
.size _.stapsdt.base, 1
.popsection
.endif
"#
)
: // output operands
: // input operands
$("nor"(($arg) as i64)),*
: // clobbers
: // options
"volatile"
)
))