Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 7af07f9

Browse files
Instrument _check_sigs_and_hash_and_fetch to trace time spent in child concurrent calls (#13588)
Instrument `_check_sigs_and_hash_and_fetch` to trace time spent in child concurrent calls because I've see `_check_sigs_and_hash_and_fetch` take [10.41s to process 100 events](#13587) Fix #13587 Part of #13356
1 parent a25a370 commit 7af07f9

File tree

5 files changed

+47
-3
lines changed

5 files changed

+47
-3
lines changed

changelog.d/13588.misc

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Instrument `_check_sigs_and_hash_and_fetch` to trace time spent in child concurrent calls for understandable traces in Jaeger.

synapse/crypto/event_signing.py

+2
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,15 @@
2828
from synapse.api.room_versions import RoomVersion
2929
from synapse.events import EventBase
3030
from synapse.events.utils import prune_event, prune_event_dict
31+
from synapse.logging.opentracing import trace
3132
from synapse.types import JsonDict
3233

3334
logger = logging.getLogger(__name__)
3435

3536
Hasher = Callable[[bytes], "hashlib._Hash"]
3637

3738

39+
@trace
3840
def check_event_content_hash(
3941
event: EventBase, hash_algorithm: Hasher = hashlib.sha256
4042
) -> bool:

synapse/events/spamcheck.py

+2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232

3333
import synapse
3434
from synapse.api.errors import Codes
35+
from synapse.logging.opentracing import trace
3536
from synapse.rest.media.v1._base import FileInfo
3637
from synapse.rest.media.v1.media_storage import ReadableFileWrapper
3738
from synapse.spam_checker_api import RegistrationBehaviour
@@ -378,6 +379,7 @@ def register_callbacks(
378379
if check_media_file_for_spam is not None:
379380
self._check_media_file_for_spam_callbacks.append(check_media_file_for_spam)
380381

382+
@trace
381383
async def check_event_for_spam(
382384
self, event: "synapse.events.EventBase"
383385
) -> Union[Tuple[Codes, JsonDict], str]:

synapse/federation/federation_base.py

+22
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from synapse.events import EventBase, make_event_from_dict
2424
from synapse.events.utils import prune_event, validate_canonicaljson
2525
from synapse.http.servlet import assert_params_in_dict
26+
from synapse.logging.opentracing import log_kv, trace
2627
from synapse.types import JsonDict, get_domain_from_id
2728

2829
if TYPE_CHECKING:
@@ -55,6 +56,7 @@ def __init__(self, hs: "HomeServer"):
5556
self._clock = hs.get_clock()
5657
self._storage_controllers = hs.get_storage_controllers()
5758

59+
@trace
5860
async def _check_sigs_and_hash(
5961
self, room_version: RoomVersion, pdu: EventBase
6062
) -> EventBase:
@@ -97,17 +99,36 @@ async def _check_sigs_and_hash(
9799
"Event %s seems to have been redacted; using our redacted copy",
98100
pdu.event_id,
99101
)
102+
log_kv(
103+
{
104+
"message": "Event seems to have been redacted; using our redacted copy",
105+
"event_id": pdu.event_id,
106+
}
107+
)
100108
else:
101109
logger.warning(
102110
"Event %s content has been tampered, redacting",
103111
pdu.event_id,
104112
)
113+
log_kv(
114+
{
115+
"message": "Event content has been tampered, redacting",
116+
"event_id": pdu.event_id,
117+
}
118+
)
105119
return redacted_event
106120

107121
spam_check = await self.spam_checker.check_event_for_spam(pdu)
108122

109123
if spam_check != self.spam_checker.NOT_SPAM:
110124
logger.warning("Event contains spam, soft-failing %s", pdu.event_id)
125+
log_kv(
126+
{
127+
"message": "Event contains spam, redacting (to save disk space) "
128+
"as well as soft-failing (to stop using the event in prev_events)",
129+
"event_id": pdu.event_id,
130+
}
131+
)
111132
# we redact (to save disk space) as well as soft-failing (to stop
112133
# using the event in prev_events).
113134
redacted_event = prune_event(pdu)
@@ -117,6 +138,7 @@ async def _check_sigs_and_hash(
117138
return pdu
118139

119140

141+
@trace
120142
async def _check_sigs_on_pdu(
121143
keyring: Keyring, room_version: RoomVersion, pdu: EventBase
122144
) -> None:

synapse/federation/federation_client.py

+20-3
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
)
6262
from synapse.federation.transport.client import SendJoinResponse
6363
from synapse.http.types import QueryParams
64-
from synapse.logging.opentracing import SynapseTags, set_tag, tag_args, trace
64+
from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, tag_args, trace
6565
from synapse.types import JsonDict, UserID, get_domain_from_id
6666
from synapse.util.async_helpers import concurrently_execute
6767
from synapse.util.caches.expiringcache import ExpiringCache
@@ -587,11 +587,15 @@ async def _check_sigs_and_hash_and_fetch(
587587
Returns:
588588
A list of PDUs that have valid signatures and hashes.
589589
"""
590+
set_tag(
591+
SynapseTags.RESULT_PREFIX + "pdus.length",
592+
str(len(pdus)),
593+
)
590594

591595
# We limit how many PDUs we check at once, as if we try to do hundreds
592596
# of thousands of PDUs at once we see large memory spikes.
593597

594-
valid_pdus = []
598+
valid_pdus: List[EventBase] = []
595599

596600
async def _execute(pdu: EventBase) -> None:
597601
valid_pdu = await self._check_sigs_and_hash_and_fetch_one(
@@ -607,6 +611,8 @@ async def _execute(pdu: EventBase) -> None:
607611

608612
return valid_pdus
609613

614+
@trace
615+
@tag_args
610616
async def _check_sigs_and_hash_and_fetch_one(
611617
self,
612618
pdu: EventBase,
@@ -639,16 +645,27 @@ async def _check_sigs_and_hash_and_fetch_one(
639645
except InvalidEventSignatureError as e:
640646
logger.warning(
641647
"Signature on retrieved event %s was invalid (%s). "
642-
"Checking local store/orgin server",
648+
"Checking local store/origin server",
643649
pdu.event_id,
644650
e,
645651
)
652+
log_kv(
653+
{
654+
"message": "Signature on retrieved event was invalid. "
655+
"Checking local store/origin server",
656+
"event_id": pdu.event_id,
657+
"InvalidEventSignatureError": e,
658+
}
659+
)
646660

647661
# Check local db.
648662
res = await self.store.get_event(
649663
pdu.event_id, allow_rejected=True, allow_none=True
650664
)
651665

666+
# If the PDU fails its signature check and we don't have it in our
667+
# database, we then request it from sender's server (if that is not the
668+
# same as `origin`).
652669
pdu_origin = get_domain_from_id(pdu.sender)
653670
if not res and pdu_origin != origin:
654671
try:

0 commit comments

Comments
 (0)