Skip to content

Commit 3b37dda

Browse files
authored
feat(console): add task scheduled times histogram (#409)
Display the scheduled time percentiles and sparklines for the histogram of scheduled times. The schduled time is the time between when a task is woken and when it is next polled. The scheduled time, which was already calculated, is now stored in a histogram and sent over the wire in together with the task details. This is used to draw percentiles and sparklines on the task details view, in the same way that is done for the poll times histogram. The refactoring done in #408 has been used to more easily display two sets of durations (percentiles and histogram where possible). ## PR Notes The PR depends on both #406, which adds initial support for recording the scheduled (wake-to-poll) time, and #408, which refactors the percentile and histogram widgets to make them easier to reuse. It shouldn't really be reviewed in depth until those two have been merged as it contains a lot of duplication and will need to be rebased. Here are some examples of the scheduled times durations on the task detail view: <img width="1037" alt="task detail view for the sender task in the long-scheduled example" src="https://user-images.githubusercontent.com/89589/232608774-d8ac48a7-3fe7-4742-a75b-e11bdb23abaa.png"> <img width="1043" alt="task detail view for the burn task in the app example" src="https://user-images.githubusercontent.com/89589/232608864-637f4f52-d4a6-468d-88fc-8fe1d53fdff9.png">
1 parent 36839de commit 3b37dda

File tree

12 files changed

+239
-58
lines changed

12 files changed

+239
-58
lines changed

console-api/proto/tasks.proto

+6
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ message TaskDetails {
5959
// A histogram plus additional data.
6060
DurationHistogram histogram = 4;
6161
}
62+
63+
// A histogram of task scheduled durations.
64+
//
65+
// The scheduled duration is the time a task spends between being
66+
// woken and when it is next polled.
67+
DurationHistogram scheduled_times_histogram = 5;
6268
}
6369

6470
// Data recorded when a new task is spawned.

console-api/src/generated/rs.tokio.console.tasks.rs

+6
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@ pub struct TaskDetails {
4343
/// The timestamp for when the update to the task took place.
4444
#[prost(message, optional, tag="2")]
4545
pub now: ::core::option::Option<::prost_types::Timestamp>,
46+
/// A histogram of task scheduled durations.
47+
///
48+
/// The scheduled duration is the time a task spends between being
49+
/// woken and when it is next polled.
50+
#[prost(message, optional, tag="5")]
51+
pub scheduled_times_histogram: ::core::option::Option<DurationHistogram>,
4652
/// A histogram of task poll durations.
4753
///
4854
/// This is either:
+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
use std::time::Duration;
2+
3+
use console_subscriber::ConsoleLayer;
4+
use tokio::task::{self, yield_now};
5+
use tracing::info;
6+
7+
#[tokio::main(flavor = "multi_thread", worker_threads = 2)]
8+
async fn main() -> Result<(), Box<dyn std::error::Error>> {
9+
ConsoleLayer::builder()
10+
.with_default_env()
11+
.publish_interval(Duration::from_millis(100))
12+
.init();
13+
14+
let long_sleeps = task::Builder::new()
15+
.name("long-sleeps")
16+
.spawn(long_sleeps(5000))
17+
.unwrap();
18+
19+
let sleep_forever = task::Builder::new()
20+
.name("sleep-forever")
21+
.spawn(sleep_forever(5000))
22+
.unwrap();
23+
24+
match (long_sleeps.await, sleep_forever.await) {
25+
(Ok(_), Ok(_)) => info!("Success"),
26+
(_, _) => info!("Error awaiting tasks."),
27+
}
28+
29+
tokio::time::sleep(Duration::from_millis(200)).await;
30+
31+
Ok(())
32+
}
33+
34+
async fn long_sleeps(inc: u64) {
35+
let millis = inc;
36+
loop {
37+
std::thread::sleep(Duration::from_millis(millis));
38+
39+
yield_now().await;
40+
}
41+
}
42+
43+
async fn sleep_forever(inc: u64) {
44+
let millis = inc;
45+
loop {
46+
std::thread::sleep(Duration::from_millis(millis));
47+
}
48+
}

console-subscriber/src/aggregator/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ impl Aggregator {
327327
task_id: Some(id.clone().into()),
328328
now,
329329
poll_times_histogram: Some(stats.poll_duration_histogram()),
330+
scheduled_times_histogram: Some(stats.scheduled_duration_histogram()),
330331
})
331332
{
332333
self.details_watchers
@@ -374,6 +375,7 @@ impl Aggregator {
374375
task_id: Some(id.clone().into()),
375376
now: Some(self.base_time.to_timestamp(Instant::now())),
376377
poll_times_histogram: Some(task_stats.poll_duration_histogram()),
378+
scheduled_times_histogram: Some(task_stats.scheduled_duration_histogram()),
377379
};
378380
watchers.retain(|watch| watch.update(&details));
379381
!watchers.is_empty()

console-subscriber/src/builder.rs

+24
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,12 @@ pub struct Builder {
5050
/// Any polls exceeding this duration will be clamped to this value. Higher
5151
/// values will result in more memory usage.
5252
pub(super) poll_duration_max: Duration,
53+
54+
/// The maximum value for the task scheduled duration histogram.
55+
///
56+
/// Any scheduled times exceeding this duration will be clamped to this
57+
/// value. Higher values will result in more memory usage.
58+
pub(super) scheduled_duration_max: Duration,
5359
}
5460

5561
impl Default for Builder {
@@ -60,6 +66,7 @@ impl Default for Builder {
6066
publish_interval: ConsoleLayer::DEFAULT_PUBLISH_INTERVAL,
6167
retention: ConsoleLayer::DEFAULT_RETENTION,
6268
poll_duration_max: ConsoleLayer::DEFAULT_POLL_DURATION_MAX,
69+
scheduled_duration_max: ConsoleLayer::DEFAULT_SCHEDULED_DURATION_MAX,
6370
server_addr: ServerAddr::Tcp(SocketAddr::new(Server::DEFAULT_IP, Server::DEFAULT_PORT)),
6471
recording_path: None,
6572
filter_env_var: "RUST_LOG".to_string(),
@@ -235,6 +242,23 @@ impl Builder {
235242
}
236243
}
237244

245+
/// Sets the maximum value for task scheduled duration histograms.
246+
///
247+
/// Any scheduled duration (the time from a task being woken until it is next
248+
/// polled) exceeding this value will be clamped down to this duration
249+
/// and recorded as an outlier.
250+
///
251+
/// By default, this is [one second]. Higher values will increase per-task
252+
/// memory usage.
253+
///
254+
/// [one second]: ConsoleLayer::DEFAULT_SCHEDULED_DURATION_MAX
255+
pub fn scheduled_duration_histogram_max(self, max: Duration) -> Self {
256+
Self {
257+
scheduled_duration_max: max,
258+
..self
259+
}
260+
}
261+
238262
/// Sets whether tasks, resources, and async ops from the console
239263
/// subscriber thread are recorded.
240264
///

console-subscriber/src/lib.rs

+21-1
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ pub struct ConsoleLayer {
123123
///
124124
/// By default, this is one second.
125125
max_poll_duration_nanos: u64,
126+
127+
/// Maximum value for the scheduled time histogram.
128+
///
129+
/// By default, this is one second.
130+
max_scheduled_duration_nanos: u64,
126131
}
127132

128133
/// A gRPC [`Server`] that implements the [`tokio-console` wire format][wire].
@@ -273,6 +278,7 @@ impl ConsoleLayer {
273278
?config.recording_path,
274279
?config.filter_env_var,
275280
?config.poll_duration_max,
281+
?config.scheduled_duration_max,
276282
?base_time,
277283
"configured console subscriber"
278284
);
@@ -310,6 +316,7 @@ impl ConsoleLayer {
310316
recorder,
311317
base_time,
312318
max_poll_duration_nanos: config.poll_duration_max.as_nanos() as u64,
319+
max_scheduled_duration_nanos: config.scheduled_duration_max.as_nanos() as u64,
313320
};
314321
(layer, server)
315322
}
@@ -365,6 +372,15 @@ impl ConsoleLayer {
365372
/// See also [`Builder::poll_duration_histogram_max`].
366373
pub const DEFAULT_POLL_DURATION_MAX: Duration = Duration::from_secs(1);
367374

375+
/// The default maximum value for the task scheduled duration histogram.
376+
///
377+
/// Any scheduled duration (the time from a task being woken until it is next
378+
/// polled) exceeding this will be clamped to this value. By default, the
379+
/// maximum scheduled duration is one second.
380+
///
381+
/// See also [`Builder::scheduled_duration_histogram_max`].
382+
pub const DEFAULT_SCHEDULED_DURATION_MAX: Duration = Duration::from_secs(1);
383+
368384
fn is_spawn(&self, meta: &'static Metadata<'static>) -> bool {
369385
self.spawn_callsites.contains(meta)
370386
}
@@ -567,7 +583,11 @@ where
567583
fields: record::SerializeFields(fields.clone()),
568584
});
569585
if let Some(stats) = self.send_stats(&self.shared.dropped_tasks, move || {
570-
let stats = Arc::new(stats::TaskStats::new(self.max_poll_duration_nanos, at));
586+
let stats = Arc::new(stats::TaskStats::new(
587+
self.max_poll_duration_nanos,
588+
self.max_scheduled_duration_nanos,
589+
at,
590+
));
571591
let event = Event::Spawn {
572592
id: id.clone(),
573593
stats: stats.clone(),

console-subscriber/src/stats.rs

+30-12
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,8 @@ struct PollTimestamps<H> {
117117
last_poll_ended: Option<Instant>,
118118
busy_time: Duration,
119119
scheduled_time: Duration,
120-
histogram: H,
120+
poll_histogram: H,
121+
scheduled_histogram: H,
121122
}
122123

123124
#[derive(Debug)]
@@ -128,8 +129,8 @@ struct Histogram {
128129
max_outlier: Option<u64>,
129130
}
130131

131-
trait RecordPoll {
132-
fn record_poll_duration(&mut self, duration: Duration);
132+
trait RecordDuration {
133+
fn record_duration(&mut self, duration: Duration);
133134
}
134135

135136
impl TimeAnchor {
@@ -153,15 +154,20 @@ impl TimeAnchor {
153154
}
154155

155156
impl TaskStats {
156-
pub(crate) fn new(poll_duration_max: u64, created_at: Instant) -> Self {
157+
pub(crate) fn new(
158+
poll_duration_max: u64,
159+
scheduled_duration_max: u64,
160+
created_at: Instant,
161+
) -> Self {
157162
Self {
158163
is_dirty: AtomicBool::new(true),
159164
is_dropped: AtomicBool::new(false),
160165
created_at,
161166
dropped_at: Mutex::new(None),
162167
poll_stats: PollStats {
163168
timestamps: Mutex::new(PollTimestamps {
164-
histogram: Histogram::new(poll_duration_max),
169+
poll_histogram: Histogram::new(poll_duration_max),
170+
scheduled_histogram: Histogram::new(scheduled_duration_max),
165171
first_poll: None,
166172
last_wake: None,
167173
last_poll_started: None,
@@ -240,10 +246,18 @@ impl TaskStats {
240246
}
241247

242248
pub(crate) fn poll_duration_histogram(&self) -> proto::tasks::task_details::PollTimesHistogram {
243-
let hist = self.poll_stats.timestamps.lock().histogram.to_proto();
249+
let hist = self.poll_stats.timestamps.lock().poll_histogram.to_proto();
244250
proto::tasks::task_details::PollTimesHistogram::Histogram(hist)
245251
}
246252

253+
pub(crate) fn scheduled_duration_histogram(&self) -> proto::tasks::DurationHistogram {
254+
self.poll_stats
255+
.timestamps
256+
.lock()
257+
.scheduled_histogram
258+
.to_proto()
259+
}
260+
247261
#[inline]
248262
fn make_dirty(&self) {
249263
self.is_dirty.swap(true, AcqRel);
@@ -475,7 +489,7 @@ impl ToProto for ResourceStats {
475489

476490
// === impl PollStats ===
477491

478-
impl<H: RecordPoll> PollStats<H> {
492+
impl<H: RecordDuration> PollStats<H> {
479493
fn wake(&self, at: Instant) {
480494
let mut timestamps = self.timestamps.lock();
481495
timestamps.last_wake = cmp::max(timestamps.last_wake, Some(at));
@@ -515,6 +529,10 @@ impl<H: RecordPoll> PollStats<H> {
515529
return;
516530
}
517531
};
532+
533+
// if we have a scheduled time histogram, add the timestamp
534+
timestamps.scheduled_histogram.record_duration(elapsed);
535+
518536
timestamps.scheduled_time += elapsed;
519537
}
520538

@@ -550,7 +568,7 @@ impl<H: RecordPoll> PollStats<H> {
550568
};
551569

552570
// if we have a poll time histogram, add the timestamp
553-
timestamps.histogram.record_poll_duration(elapsed);
571+
timestamps.poll_histogram.record_duration(elapsed);
554572

555573
timestamps.busy_time += elapsed;
556574
}
@@ -636,8 +654,8 @@ impl Histogram {
636654
}
637655
}
638656

639-
impl RecordPoll for Histogram {
640-
fn record_poll_duration(&mut self, duration: Duration) {
657+
impl RecordDuration for Histogram {
658+
fn record_duration(&mut self, duration: Duration) {
641659
let mut duration_ns = duration.as_nanos() as u64;
642660

643661
// clamp the duration to the histogram's max value
@@ -653,8 +671,8 @@ impl RecordPoll for Histogram {
653671
}
654672
}
655673

656-
impl RecordPoll for () {
657-
fn record_poll_duration(&mut self, _: Duration) {
674+
impl RecordDuration for () {
675+
fn record_duration(&mut self, _: Duration) {
658676
// do nothing
659677
}
660678
}

tokio-console/src/state/histogram.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ impl DurationHistogram {
3030
})
3131
}
3232

33-
fn from_proto(proto: &proto::DurationHistogram) -> Option<Self> {
33+
pub(crate) fn from_proto(proto: &proto::DurationHistogram) -> Option<Self> {
3434
let histogram = deserialize_histogram(&proto.raw_histogram[..])?;
3535
Some(Self {
3636
histogram,

tokio-console/src/state/mod.rs

+4
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,10 @@ impl State {
221221
.poll_times_histogram
222222
.as_ref()
223223
.and_then(histogram::DurationHistogram::from_poll_durations),
224+
scheduled_times_histogram: update
225+
.scheduled_times_histogram
226+
.as_ref()
227+
.and_then(histogram::DurationHistogram::from_proto),
224228
};
225229

226230
*self.current_task_details.borrow_mut() = Some(details);

tokio-console/src/state/tasks.rs

+5
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ pub(crate) struct TasksState {
3232
pub(crate) struct Details {
3333
pub(crate) span_id: SpanId,
3434
pub(crate) poll_times_histogram: Option<DurationHistogram>,
35+
pub(crate) scheduled_times_histogram: Option<DurationHistogram>,
3536
}
3637

3738
#[derive(Debug, Copy, Clone)]
@@ -264,6 +265,10 @@ impl Details {
264265
pub(crate) fn poll_times_histogram(&self) -> Option<&DurationHistogram> {
265266
self.poll_times_histogram.as_ref()
266267
}
268+
269+
pub(crate) fn scheduled_times_histogram(&self) -> Option<&DurationHistogram> {
270+
self.scheduled_times_histogram.as_ref()
271+
}
267272
}
268273

269274
impl Task {

tokio-console/src/view/durations.rs

+15-1
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,22 @@ pub(crate) struct Durations<'a> {
3636
percentiles_title: &'a str,
3737
/// Title for histogram sparkline block
3838
histogram_title: &'a str,
39+
/// Fixed width for percentiles block
40+
percentiles_width: u16,
3941
}
4042

4143
impl<'a> Widget for Durations<'a> {
4244
fn render(self, area: tui::layout::Rect, buf: &mut tui::buffer::Buffer) {
4345
// Only split the durations area in half if we're also drawing a
4446
// sparkline. We require UTF-8 to draw the sparkline and also enough width.
4547
let (percentiles_area, histogram_area) = if self.styles.utf8 {
46-
let percentiles_width = cmp::max(self.percentiles_title.len() as u16, 13_u16) + 2;
48+
let percentiles_width = match self.percentiles_width {
49+
// Fixed width
50+
width if width > 0 => width,
51+
// Long enough for the title or for a single line
52+
// like "p99: 544.77µs" (13) (and borders on the sides).
53+
_ => cmp::max(self.percentiles_title.len() as u16, 13_u16) + 2,
54+
};
4755

4856
// If there isn't enough width left after drawing the percentiles
4957
// then we won't draw the sparkline at all.
@@ -88,6 +96,7 @@ impl<'a> Durations<'a> {
8896
histogram: None,
8997
percentiles_title: "Percentiles",
9098
histogram_title: "Histogram",
99+
percentiles_width: 0,
91100
}
92101
}
93102

@@ -105,4 +114,9 @@ impl<'a> Durations<'a> {
105114
self.histogram_title = title;
106115
self
107116
}
117+
118+
pub(crate) fn percentiles_width(mut self, width: u16) -> Self {
119+
self.percentiles_width = width;
120+
self
121+
}
108122
}

0 commit comments

Comments
 (0)