Skip to content

Commit a08b072

Browse files
reivilibreMic92
authored andcommitted
Add logging to tasks managed by the task scheduler, showing CPU and database usage. (element-hq#17219)
The log format is the same as the request log format, except: - fields that are specific to HTTP requests have been removed - the task's params are included at the end of the log line. These log lines are emitted: - when the task function finishes — both completion and failure (and I suppose it is possible for a task to become schedulable again?) - every 5 minutes whilst it is running Closes element-hq#17217. --------- Signed-off-by: Olivier 'reivilibre <[email protected]>
1 parent 7d32075 commit a08b072

File tree

2 files changed

+68
-2
lines changed

2 files changed

+68
-2
lines changed

changelog.d/17219.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add logging to tasks managed by the task scheduler, showing CPU and database usage.

synapse/util/task_scheduler.py

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@
2424

2525
from twisted.python.failure import Failure
2626

27-
from synapse.logging.context import nested_logging_context
27+
from synapse.logging.context import (
28+
ContextResourceUsage,
29+
LoggingContext,
30+
nested_logging_context,
31+
set_current_context,
32+
)
2833
from synapse.metrics import LaterGauge
2934
from synapse.metrics.background_process_metrics import (
3035
run_as_background_process,
@@ -81,6 +86,8 @@ class TaskScheduler:
8186
MAX_CONCURRENT_RUNNING_TASKS = 5
8287
# Time from the last task update after which we will log a warning
8388
LAST_UPDATE_BEFORE_WARNING_MS = 24 * 60 * 60 * 1000 # 24hrs
89+
# Report a running task's status and usage every so often.
90+
OCCASIONAL_REPORT_INTERVAL_MS = 5 * 60 * 1000 # 5 minutes
8491

8592
def __init__(self, hs: "HomeServer"):
8693
self._hs = hs
@@ -346,6 +353,33 @@ async def _clean_scheduled_tasks(self) -> None:
346353
assert task.id not in self._running_tasks
347354
await self._store.delete_scheduled_task(task.id)
348355

356+
@staticmethod
357+
def _log_task_usage(
358+
state: str, task: ScheduledTask, usage: ContextResourceUsage, active_time: float
359+
) -> None:
360+
"""
361+
Log a line describing the state and usage of a task.
362+
The log line is inspired by / a copy of the request log line format,
363+
but with irrelevant fields removed.
364+
365+
active_time: Time that the task has been running for, in seconds.
366+
"""
367+
368+
logger.info(
369+
"Task %s: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
370+
" [%d dbevts] %r, %r",
371+
state,
372+
active_time,
373+
usage.ru_utime,
374+
usage.ru_stime,
375+
usage.db_sched_duration_sec,
376+
usage.db_txn_duration_sec,
377+
int(usage.db_txn_count),
378+
usage.evt_db_fetch_count,
379+
task.resource_id,
380+
task.params,
381+
)
382+
349383
async def _launch_task(self, task: ScheduledTask) -> None:
350384
"""Launch a scheduled task now.
351385
@@ -360,8 +394,32 @@ async def _launch_task(self, task: ScheduledTask) -> None:
360394
)
361395
function = self._actions[task.action]
362396

397+
def _occasional_report(
398+
task_log_context: LoggingContext, start_time: float
399+
) -> None:
400+
"""
401+
Helper to log a 'Task continuing' line every so often.
402+
"""
403+
404+
current_time = self._clock.time()
405+
calling_context = set_current_context(task_log_context)
406+
try:
407+
usage = task_log_context.get_resource_usage()
408+
TaskScheduler._log_task_usage(
409+
"continuing", task, usage, current_time - start_time
410+
)
411+
finally:
412+
set_current_context(calling_context)
413+
363414
async def wrapper() -> None:
364-
with nested_logging_context(task.id):
415+
with nested_logging_context(task.id) as log_context:
416+
start_time = self._clock.time()
417+
occasional_status_call = self._clock.looping_call(
418+
_occasional_report,
419+
TaskScheduler.OCCASIONAL_REPORT_INTERVAL_MS,
420+
log_context,
421+
start_time,
422+
)
365423
try:
366424
(status, result, error) = await function(task)
367425
except Exception:
@@ -383,6 +441,13 @@ async def wrapper() -> None:
383441
)
384442
self._running_tasks.remove(task.id)
385443

444+
current_time = self._clock.time()
445+
usage = log_context.get_resource_usage()
446+
TaskScheduler._log_task_usage(
447+
status.value, task, usage, current_time - start_time
448+
)
449+
occasional_status_call.stop()
450+
386451
# Try launch a new task since we've finished with this one.
387452
self._clock.call_later(0.1, self._launch_scheduled_tasks)
388453

0 commit comments

Comments
 (0)