|
| 1 | +#!/usr/bin/env drgn |
| 2 | +# |
| 3 | +# Copyright (C) 2023 Tejun Heo <[email protected]> |
| 4 | +# Copyright (C) 2023 Meta Platforms, Inc. and affiliates. |
| 5 | + |
| 6 | +desc = """ |
| 7 | +This is a drgn script to monitor workqueues. For more info on drgn, visit |
| 8 | +https://github.com/osandov/drgn. |
| 9 | +
|
| 10 | + total Total number of work items executed by the workqueue. |
| 11 | +
|
| 12 | + infl The number of currently in-flight work items. |
| 13 | +
|
| 14 | + CMwake The number of concurrency-management wake-ups while executing a |
| 15 | + work item of the workqueue. |
| 16 | +
|
| 17 | + mayday The number of times the rescuer was requested while waiting for |
| 18 | + new worker creation. |
| 19 | +
|
| 20 | + rescued The number of work items executed by the rescuer. |
| 21 | +""" |
| 22 | + |
| 23 | +import sys |
| 24 | +import signal |
| 25 | +import os |
| 26 | +import re |
| 27 | +import time |
| 28 | +import json |
| 29 | + |
| 30 | +import drgn |
| 31 | +from drgn.helpers.linux.list import list_for_each_entry,list_empty |
| 32 | +from drgn.helpers.linux.cpumask import for_each_possible_cpu |
| 33 | + |
| 34 | +import argparse |
| 35 | +parser = argparse.ArgumentParser(description=desc, |
| 36 | + formatter_class=argparse.RawTextHelpFormatter) |
| 37 | +parser.add_argument('workqueue', metavar='REGEX', nargs='*', |
| 38 | + help='Target workqueue name patterns (all if empty)') |
| 39 | +parser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1, |
| 40 | + help='Monitoring interval (0 to print once and exit)') |
| 41 | +parser.add_argument('-j', '--json', action='store_true', |
| 42 | + help='Output in json') |
| 43 | +args = parser.parse_args() |
| 44 | + |
| 45 | +def err(s): |
| 46 | + print(s, file=sys.stderr, flush=True) |
| 47 | + sys.exit(1) |
| 48 | + |
| 49 | +workqueues = prog['workqueues'] |
| 50 | + |
| 51 | +WQ_UNBOUND = prog['WQ_UNBOUND'] |
| 52 | +WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM'] |
| 53 | + |
| 54 | +PWQ_STAT_STARTED = prog['PWQ_STAT_STARTED'] # work items started execution |
| 55 | +PWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed execution |
| 56 | +PWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups |
| 57 | +PWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer |
| 58 | +PWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED'] # linked work items executed by rescuer |
| 59 | +PWQ_NR_STATS = prog['PWQ_NR_STATS'] |
| 60 | + |
| 61 | +class WqStats: |
| 62 | + def __init__(self, wq): |
| 63 | + self.name = wq.name.string_().decode() |
| 64 | + self.unbound = wq.flags & WQ_UNBOUND != 0 |
| 65 | + self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0 |
| 66 | + self.stats = [0] * PWQ_NR_STATS |
| 67 | + for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'): |
| 68 | + for i in range(PWQ_NR_STATS): |
| 69 | + self.stats[i] += int(pwq.stats[i]) |
| 70 | + |
| 71 | + def dict(self, now): |
| 72 | + return { 'timestamp' : now, |
| 73 | + 'name' : self.name, |
| 74 | + 'unbound' : self.unbound, |
| 75 | + 'mem_reclaim' : self.mem_reclaim, |
| 76 | + 'started' : self.stats[PWQ_STAT_STARTED], |
| 77 | + 'completed' : self.stats[PWQ_STAT_COMPLETED], |
| 78 | + 'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP], |
| 79 | + 'mayday' : self.stats[PWQ_STAT_MAYDAY], |
| 80 | + 'rescued' : self.stats[PWQ_STAT_RESCUED], } |
| 81 | + |
| 82 | + def table_header_str(): |
| 83 | + return f'{"":>24} {"total":>8} {"infl":>5} {"CMwake":>7} {"mayday":>7} {"rescued":>7}' |
| 84 | + |
| 85 | + def table_row_str(self): |
| 86 | + cm_wakeup = '-' |
| 87 | + mayday = '-' |
| 88 | + rescued = '-' |
| 89 | + |
| 90 | + if not self.unbound: |
| 91 | + cm_wakeup = str(self.stats[PWQ_STAT_CM_WAKEUP]) |
| 92 | + |
| 93 | + if self.mem_reclaim: |
| 94 | + mayday = str(self.stats[PWQ_STAT_MAYDAY]) |
| 95 | + rescued = str(self.stats[PWQ_STAT_RESCUED]) |
| 96 | + |
| 97 | + out = f'{self.name[-24:]:24} ' \ |
| 98 | + f'{self.stats[PWQ_STAT_STARTED]:8} ' \ |
| 99 | + f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \ |
| 100 | + f'{cm_wakeup:>7} ' \ |
| 101 | + f'{mayday:>7} ' \ |
| 102 | + f'{rescued:>7} ' |
| 103 | + return out.rstrip(':') |
| 104 | + |
| 105 | +exit_req = False |
| 106 | + |
| 107 | +def sigint_handler(signr, frame): |
| 108 | + global exit_req |
| 109 | + exit_req = True |
| 110 | + |
| 111 | +def main(): |
| 112 | + # handle args |
| 113 | + table_fmt = not args.json |
| 114 | + interval = args.interval |
| 115 | + |
| 116 | + re_str = None |
| 117 | + if args.workqueue: |
| 118 | + for r in args.workqueue: |
| 119 | + if re_str is None: |
| 120 | + re_str = r |
| 121 | + else: |
| 122 | + re_str += '|' + r |
| 123 | + |
| 124 | + filter_re = re.compile(re_str) if re_str else None |
| 125 | + |
| 126 | + # monitoring loop |
| 127 | + signal.signal(signal.SIGINT, sigint_handler) |
| 128 | + |
| 129 | + while not exit_req: |
| 130 | + now = time.time() |
| 131 | + |
| 132 | + if table_fmt: |
| 133 | + print() |
| 134 | + print(WqStats.table_header_str()) |
| 135 | + |
| 136 | + for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'): |
| 137 | + stats = WqStats(wq) |
| 138 | + if filter_re and not filter_re.search(stats.name): |
| 139 | + continue |
| 140 | + if table_fmt: |
| 141 | + print(stats.table_row_str()) |
| 142 | + else: |
| 143 | + print(stats.dict(now)) |
| 144 | + |
| 145 | + if interval == 0: |
| 146 | + break |
| 147 | + time.sleep(interval) |
| 148 | + |
| 149 | +if __name__ == "__main__": |
| 150 | + main() |
0 commit comments