Skip to content

Commit 725e8ec

Browse files
committed
workqueue: Add pwq->stats[] and a monitoring script
Currently, the only way to peer into workqueue operations is through tracing. While possible, it isn't easy or convenient to monitor per-workqueue behaviors over time this way. Let's add pwq->stats[] that track relevant events and a drgn monitoring script - tools/workqueue/wq_monitor.py. It's arguable whether this needs to be configurable. However, it currently only has several counters and the runtime overhead shouldn't be noticeable given that they're on pwq's which are per-cpu on per-cpu workqueues and per-numa-node on unbound ones. Let's keep it simple for the time being. v2: Patch reordered to earlier with fewer fields. Field will be added back gradually. Help message improved. Signed-off-by: Tejun Heo <[email protected]> Cc: Lai Jiangshan <[email protected]>
1 parent 854f5cc commit 725e8ec

File tree

3 files changed

+205
-1
lines changed

3 files changed

+205
-1
lines changed

Documentation/core-api/workqueue.rst

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,37 @@ Guidelines
348348
level of locality in wq operations and work item execution.
349349

350350

351+
Monitoring
352+
==========
353+
354+
Use tools/workqueue/wq_monitor.py to monitor workqueue operations: ::
355+
356+
$ tools/workqueue/wq_monitor.py events
357+
total infl CMwake mayday rescued
358+
events 18545 0 5 - -
359+
events_highpri 8 0 0 - -
360+
events_long 3 0 0 - -
361+
events_unbound 38306 0 - - -
362+
events_freezable 0 0 0 - -
363+
events_power_efficient 29598 0 0 - -
364+
events_freezable_power_ 10 0 0 - -
365+
sock_diag_events 0 0 0 - -
366+
367+
total infl CMwake mayday rescued
368+
events 18548 0 5 - -
369+
events_highpri 8 0 0 - -
370+
events_long 3 0 0 - -
371+
events_unbound 38322 0 - - -
372+
events_freezable 0 0 0 - -
373+
events_power_efficient 29603 0 0 - -
374+
events_freezable_power_ 10 0 0 - -
375+
sock_diag_events 0 0 0 - -
376+
377+
...
378+
379+
See the command's help message for more info.
380+
381+
351382
Debugging
352383
=========
353384

@@ -387,6 +418,7 @@ the stack trace of the offending worker thread. ::
387418
The work item's function should be trivially visible in the stack
388419
trace.
389420

421+
390422
Non-reentrance Conditions
391423
=========================
392424

kernel/workqueue.c

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,20 @@ struct worker_pool {
199199
struct rcu_head rcu;
200200
};
201201

202+
/*
203+
* Per-pool_workqueue statistics. These can be monitored using
204+
* tools/workqueue/wq_monitor.py.
205+
*/
206+
enum pool_workqueue_stats {
207+
PWQ_STAT_STARTED, /* work items started execution */
208+
PWQ_STAT_COMPLETED, /* work items completed execution */
209+
PWQ_STAT_CM_WAKEUP, /* concurrency-management worker wakeups */
210+
PWQ_STAT_MAYDAY, /* maydays to rescuer */
211+
PWQ_STAT_RESCUED, /* linked work items executed by rescuer */
212+
213+
PWQ_NR_STATS,
214+
};
215+
202216
/*
203217
* The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS
204218
* of work_struct->data are used for flags and the remaining high bits
@@ -236,6 +250,8 @@ struct pool_workqueue {
236250
struct list_head pwqs_node; /* WR: node on wq->pwqs */
237251
struct list_head mayday_node; /* MD: node on wq->maydays */
238252

253+
u64 stats[PWQ_NR_STATS];
254+
239255
/*
240256
* Release of unbound pwq is punted to system_wq. See put_pwq()
241257
* and pwq_unbound_release_workfn() for details. pool_workqueue
@@ -929,8 +945,10 @@ void wq_worker_sleeping(struct task_struct *task)
929945
}
930946

931947
pool->nr_running--;
932-
if (need_more_worker(pool))
948+
if (need_more_worker(pool)) {
949+
worker->current_pwq->stats[PWQ_STAT_CM_WAKEUP]++;
933950
wake_up_worker(pool);
951+
}
934952
raw_spin_unlock_irq(&pool->lock);
935953
}
936954

@@ -2165,6 +2183,7 @@ static void send_mayday(struct work_struct *work)
21652183
get_pwq(pwq);
21662184
list_add_tail(&pwq->mayday_node, &wq->maydays);
21672185
wake_up_process(wq->rescuer->task);
2186+
pwq->stats[PWQ_STAT_MAYDAY]++;
21682187
}
21692188
}
21702189

@@ -2403,13 +2422,15 @@ __acquires(&pool->lock)
24032422
* workqueues), so hiding them isn't a problem.
24042423
*/
24052424
lockdep_invariant_state(true);
2425+
pwq->stats[PWQ_STAT_STARTED]++;
24062426
trace_workqueue_execute_start(work);
24072427
worker->current_func(work);
24082428
/*
24092429
* While we must be careful to not use "work" after this, the trace
24102430
* point will only record its address.
24112431
*/
24122432
trace_workqueue_execute_end(work, worker->current_func);
2433+
pwq->stats[PWQ_STAT_COMPLETED]++;
24132434
lock_map_release(&lockdep_map);
24142435
lock_map_release(&pwq->wq->lockdep_map);
24152436

@@ -2653,6 +2674,7 @@ static int rescuer_thread(void *__rescuer)
26532674
if (first)
26542675
pool->watchdog_ts = jiffies;
26552676
move_linked_works(work, scheduled, &n);
2677+
pwq->stats[PWQ_STAT_RESCUED]++;
26562678
}
26572679
first = false;
26582680
}

tools/workqueue/wq_monitor.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
#!/usr/bin/env drgn
2+
#
3+
# Copyright (C) 2023 Tejun Heo <[email protected]>
4+
# Copyright (C) 2023 Meta Platforms, Inc. and affiliates.
5+
6+
desc = """
7+
This is a drgn script to monitor workqueues. For more info on drgn, visit
8+
https://github.com/osandov/drgn.
9+
10+
total Total number of work items executed by the workqueue.
11+
12+
infl The number of currently in-flight work items.
13+
14+
CMwake The number of concurrency-management wake-ups while executing a
15+
work item of the workqueue.
16+
17+
mayday The number of times the rescuer was requested while waiting for
18+
new worker creation.
19+
20+
rescued The number of work items executed by the rescuer.
21+
"""
22+
23+
import sys
24+
import signal
25+
import os
26+
import re
27+
import time
28+
import json
29+
30+
import drgn
31+
from drgn.helpers.linux.list import list_for_each_entry,list_empty
32+
from drgn.helpers.linux.cpumask import for_each_possible_cpu
33+
34+
import argparse
35+
parser = argparse.ArgumentParser(description=desc,
36+
formatter_class=argparse.RawTextHelpFormatter)
37+
parser.add_argument('workqueue', metavar='REGEX', nargs='*',
38+
help='Target workqueue name patterns (all if empty)')
39+
parser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1,
40+
help='Monitoring interval (0 to print once and exit)')
41+
parser.add_argument('-j', '--json', action='store_true',
42+
help='Output in json')
43+
args = parser.parse_args()
44+
45+
def err(s):
46+
print(s, file=sys.stderr, flush=True)
47+
sys.exit(1)
48+
49+
workqueues = prog['workqueues']
50+
51+
WQ_UNBOUND = prog['WQ_UNBOUND']
52+
WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM']
53+
54+
PWQ_STAT_STARTED = prog['PWQ_STAT_STARTED'] # work items started execution
55+
PWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed execution
56+
PWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups
57+
PWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer
58+
PWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED'] # linked work items executed by rescuer
59+
PWQ_NR_STATS = prog['PWQ_NR_STATS']
60+
61+
class WqStats:
62+
def __init__(self, wq):
63+
self.name = wq.name.string_().decode()
64+
self.unbound = wq.flags & WQ_UNBOUND != 0
65+
self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0
66+
self.stats = [0] * PWQ_NR_STATS
67+
for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'):
68+
for i in range(PWQ_NR_STATS):
69+
self.stats[i] += int(pwq.stats[i])
70+
71+
def dict(self, now):
72+
return { 'timestamp' : now,
73+
'name' : self.name,
74+
'unbound' : self.unbound,
75+
'mem_reclaim' : self.mem_reclaim,
76+
'started' : self.stats[PWQ_STAT_STARTED],
77+
'completed' : self.stats[PWQ_STAT_COMPLETED],
78+
'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP],
79+
'mayday' : self.stats[PWQ_STAT_MAYDAY],
80+
'rescued' : self.stats[PWQ_STAT_RESCUED], }
81+
82+
def table_header_str():
83+
return f'{"":>24} {"total":>8} {"infl":>5} {"CMwake":>7} {"mayday":>7} {"rescued":>7}'
84+
85+
def table_row_str(self):
86+
cm_wakeup = '-'
87+
mayday = '-'
88+
rescued = '-'
89+
90+
if not self.unbound:
91+
cm_wakeup = str(self.stats[PWQ_STAT_CM_WAKEUP])
92+
93+
if self.mem_reclaim:
94+
mayday = str(self.stats[PWQ_STAT_MAYDAY])
95+
rescued = str(self.stats[PWQ_STAT_RESCUED])
96+
97+
out = f'{self.name[-24:]:24} ' \
98+
f'{self.stats[PWQ_STAT_STARTED]:8} ' \
99+
f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \
100+
f'{cm_wakeup:>7} ' \
101+
f'{mayday:>7} ' \
102+
f'{rescued:>7} '
103+
return out.rstrip(':')
104+
105+
exit_req = False
106+
107+
def sigint_handler(signr, frame):
108+
global exit_req
109+
exit_req = True
110+
111+
def main():
112+
# handle args
113+
table_fmt = not args.json
114+
interval = args.interval
115+
116+
re_str = None
117+
if args.workqueue:
118+
for r in args.workqueue:
119+
if re_str is None:
120+
re_str = r
121+
else:
122+
re_str += '|' + r
123+
124+
filter_re = re.compile(re_str) if re_str else None
125+
126+
# monitoring loop
127+
signal.signal(signal.SIGINT, sigint_handler)
128+
129+
while not exit_req:
130+
now = time.time()
131+
132+
if table_fmt:
133+
print()
134+
print(WqStats.table_header_str())
135+
136+
for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'):
137+
stats = WqStats(wq)
138+
if filter_re and not filter_re.search(stats.name):
139+
continue
140+
if table_fmt:
141+
print(stats.table_row_str())
142+
else:
143+
print(stats.dict(now))
144+
145+
if interval == 0:
146+
break
147+
time.sleep(interval)
148+
149+
if __name__ == "__main__":
150+
main()

0 commit comments

Comments
 (0)