Skip to content

Commit 5831342

Browse files
committed
Log task usage upon finish and every 5 minutes otherwise
1 parent 52af16c commit 5831342

File tree

1 file changed

+66
-2
lines changed

1 file changed

+66
-2
lines changed

synapse/util/task_scheduler.py

+66-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@
2424

2525
from twisted.python.failure import Failure
2626

27-
from synapse.logging.context import nested_logging_context
27+
from synapse.logging.context import (
28+
ContextResourceUsage,
29+
LoggingContext,
30+
nested_logging_context,
31+
set_current_context,
32+
)
2833
from synapse.metrics import LaterGauge
2934
from synapse.metrics.background_process_metrics import (
3035
run_as_background_process,
@@ -81,6 +86,8 @@ class TaskScheduler:
8186
MAX_CONCURRENT_RUNNING_TASKS = 5
8287
# Time from the last task update after which we will log a warning
8388
LAST_UPDATE_BEFORE_WARNING_MS = 24 * 60 * 60 * 1000 # 24hrs
89+
# Report a running task's status and usage every so often.
90+
OCCASIONAL_REPORT_INTERVAL_MS = 5 * 60 * 1000 # 5 minutes
8491

8592
def __init__(self, hs: "HomeServer"):
8693
self._hs = hs
@@ -346,6 +353,32 @@ async def _clean_scheduled_tasks(self) -> None:
346353
assert task.id not in self._running_tasks
347354
await self._store.delete_scheduled_task(task.id)
348355

356+
@staticmethod
357+
def _log_task_usage(
358+
state: str, task: ScheduledTask, usage: ContextResourceUsage, active_time: float
359+
) -> None:
360+
"""
361+
Log a line describing the state and usage of a task.
362+
The log line is inspired by / a copy of the request log line format,
363+
but with irrelevant fields removed.
364+
365+
active_time: Time that the task has been running for, in seconds.
366+
"""
367+
368+
logger.info(
369+
"Task %s: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
370+
" [%d dbevts] %r",
371+
state,
372+
active_time,
373+
usage.ru_utime,
374+
usage.ru_stime,
375+
usage.db_sched_duration_sec,
376+
usage.db_txn_duration_sec,
377+
int(usage.db_txn_count),
378+
usage.evt_db_fetch_count,
379+
task.params,
380+
)
381+
349382
async def _launch_task(self, task: ScheduledTask) -> None:
350383
"""Launch a scheduled task now.
351384
@@ -360,8 +393,32 @@ async def _launch_task(self, task: ScheduledTask) -> None:
360393
)
361394
function = self._actions[task.action]
362395

396+
def _occasional_report(
397+
task_log_context: LoggingContext, start_time: int
398+
) -> None:
399+
"""
400+
Helper to log a 'Task continuing' line every so often.
401+
"""
402+
403+
current_time = int(self._clock.time())
404+
calling_context = set_current_context(task_log_context)
405+
try:
406+
usage = task_log_context.get_resource_usage()
407+
TaskScheduler._log_task_usage(
408+
"continuing", task, usage, (current_time - start_time) * 0.001
409+
)
410+
finally:
411+
set_current_context(calling_context)
412+
363413
async def wrapper() -> None:
364-
with nested_logging_context(task.id):
414+
with nested_logging_context(task.id) as log_context:
415+
start_time = int(self._clock.time())
416+
occasional_status_call = self._clock.looping_call(
417+
_occasional_report,
418+
TaskScheduler.OCCASIONAL_REPORT_INTERVAL_MS,
419+
log_context,
420+
start_time,
421+
)
365422
try:
366423
(status, result, error) = await function(task)
367424
except Exception:
@@ -383,6 +440,13 @@ async def wrapper() -> None:
383440
)
384441
self._running_tasks.remove(task.id)
385442

443+
current_time = int(self._clock.time())
444+
usage = log_context.get_resource_usage()
445+
TaskScheduler._log_task_usage(
446+
status.value, task, usage, (current_time - start_time) * 0.001
447+
)
448+
occasional_status_call.stop()
449+
386450
# Try launch a new task since we've finished with this one.
387451
self._clock.call_later(0.1, self._launch_scheduled_tasks)
388452

0 commit comments

Comments
 (0)