1
- # Copyright 2014-2021 The Matrix.org Foundation C.I.C.
1
+ # Copyright 2014-2022 The Matrix.org Foundation C.I.C.
2
2
# Copyright 2020 Sorunome
3
3
#
4
4
# Licensed under the Apache License, Version 2.0 (the "License");
15
15
16
16
"""Contains handlers for federation events."""
17
17
18
+ import enum
19
+ import itertools
18
20
import logging
21
+ from enum import Enum
19
22
from http import HTTPStatus
20
23
from typing import TYPE_CHECKING , Dict , Iterable , List , Optional , Tuple , Union
21
24
25
+ import attr
22
26
from signedjson .key import decode_verify_key_bytes
23
27
from signedjson .sign import verify_signed_json
24
28
from unpaddedbase64 import decode_base64
@@ -92,6 +96,24 @@ def get_domains_from_state(state: StateMap[EventBase]) -> List[Tuple[str, int]]:
92
96
return sorted (joined_domains .items (), key = lambda d : d [1 ])
93
97
94
98
99
+ class _BackfillPointType (Enum ):
100
+ # a regular backwards extremity (ie, an event which we don't yet have, but which
101
+ # is referred to by other events in the DAG)
102
+ BACKWARDS_EXTREMITY = enum .auto ()
103
+
104
+ # an MSC2716 "insertion event"
105
+ INSERTION_PONT = enum .auto ()
106
+
107
+
108
+ @attr .s (slots = True , auto_attribs = True , frozen = True )
109
+ class _BackfillPoint :
110
+ """A potential point we might backfill from"""
111
+
112
+ event_id : str
113
+ depth : int
114
+ type : _BackfillPointType
115
+
116
+
95
117
class FederationHandler :
96
118
"""Handles general incoming federation requests
97
119
@@ -157,89 +179,51 @@ async def maybe_backfill(
157
179
async def _maybe_backfill_inner (
158
180
self , room_id : str , current_depth : int , limit : int
159
181
) -> bool :
160
- oldest_events_with_depth = (
161
- await self .store .get_oldest_event_ids_with_depth_in_room (room_id )
162
- )
182
+ backwards_extremities = [
183
+ _BackfillPoint (event_id , depth , _BackfillPointType .BACKWARDS_EXTREMITY )
184
+ for event_id , depth in await self .store .get_oldest_event_ids_with_depth_in_room (
185
+ room_id
186
+ )
187
+ ]
163
188
164
- insertion_events_to_be_backfilled : Dict [ str , int ] = {}
189
+ insertion_events_to_be_backfilled : List [ _BackfillPoint ] = []
165
190
if self .hs .config .experimental .msc2716_enabled :
166
- insertion_events_to_be_backfilled = (
167
- await self .store .get_insertion_event_backward_extremities_in_room (
191
+ insertion_events_to_be_backfilled = [
192
+ _BackfillPoint (event_id , depth , _BackfillPointType .INSERTION_PONT )
193
+ for event_id , depth in await self .store .get_insertion_event_backward_extremities_in_room (
168
194
room_id
169
195
)
170
- )
196
+ ]
171
197
logger .debug (
172
- "_maybe_backfill_inner: extremities oldest_events_with_depth =%s insertion_events_to_be_backfilled=%s" ,
173
- oldest_events_with_depth ,
198
+ "_maybe_backfill_inner: backwards_extremities =%s insertion_events_to_be_backfilled=%s" ,
199
+ backwards_extremities ,
174
200
insertion_events_to_be_backfilled ,
175
201
)
176
202
177
- if not oldest_events_with_depth and not insertion_events_to_be_backfilled :
203
+ if not backwards_extremities and not insertion_events_to_be_backfilled :
178
204
logger .debug ("Not backfilling as no extremeties found." )
179
205
return False
180
206
181
- # We only want to paginate if we can actually see the events we'll get,
182
- # as otherwise we'll just spend a lot of resources to get redacted
183
- # events.
184
- #
185
- # We do this by filtering all the backwards extremities and seeing if
186
- # any remain. Given we don't have the extremity events themselves, we
187
- # need to actually check the events that reference them.
188
- #
189
- # *Note*: the spec wants us to keep backfilling until we reach the start
190
- # of the room in case we are allowed to see some of the history. However
191
- # in practice that causes more issues than its worth, as a) its
192
- # relatively rare for there to be any visible history and b) even when
193
- # there is its often sufficiently long ago that clients would stop
194
- # attempting to paginate before backfill reached the visible history.
195
- #
196
- # TODO: If we do do a backfill then we should filter the backwards
197
- # extremities to only include those that point to visible portions of
198
- # history.
199
- #
200
- # TODO: Correctly handle the case where we are allowed to see the
201
- # forward event but not the backward extremity, e.g. in the case of
202
- # initial join of the server where we are allowed to see the join
203
- # event but not anything before it. This would require looking at the
204
- # state *before* the event, ignoring the special casing certain event
205
- # types have.
206
-
207
- forward_event_ids = await self .store .get_successor_events (
208
- list (oldest_events_with_depth )
209
- )
210
-
211
- extremities_events = await self .store .get_events (
212
- forward_event_ids ,
213
- redact_behaviour = EventRedactBehaviour .AS_IS ,
214
- get_prev_content = False ,
207
+ # we now have a list of potential places to backpaginate from. We prefer to
208
+ # start with the most recent (ie, max depth), so let's sort the list.
209
+ sorted_backfill_points : List [_BackfillPoint ] = sorted (
210
+ itertools .chain (
211
+ backwards_extremities ,
212
+ insertion_events_to_be_backfilled ,
213
+ ),
214
+ key = lambda e : - int (e .depth ),
215
215
)
216
216
217
- # We set `check_history_visibility_only` as we might otherwise get false
218
- # positives from users having been erased.
219
- filtered_extremities = await filter_events_for_server (
220
- self .storage ,
221
- self .server_name ,
222
- list (extremities_events .values ()),
223
- redact = False ,
224
- check_history_visibility_only = True ,
225
- )
226
217
logger .debug (
227
- "_maybe_backfill_inner: filtered_extremities %s" , filtered_extremities
218
+ "_maybe_backfill_inner: room_id: %s: current_depth: %s, limit: %s, "
219
+ "backfill points (%d): %s" ,
220
+ room_id ,
221
+ current_depth ,
222
+ limit ,
223
+ len (sorted_backfill_points ),
224
+ sorted_backfill_points ,
228
225
)
229
226
230
- if not filtered_extremities and not insertion_events_to_be_backfilled :
231
- return False
232
-
233
- extremities = {
234
- ** oldest_events_with_depth ,
235
- # TODO: insertion_events_to_be_backfilled is currently skipping the filtered_extremities checks
236
- ** insertion_events_to_be_backfilled ,
237
- }
238
-
239
- # Check if we reached a point where we should start backfilling.
240
- sorted_extremeties_tuple = sorted (extremities .items (), key = lambda e : - int (e [1 ]))
241
- max_depth = sorted_extremeties_tuple [0 ][1 ]
242
-
243
227
# If we're approaching an extremity we trigger a backfill, otherwise we
244
228
# no-op.
245
229
#
@@ -249,6 +233,11 @@ async def _maybe_backfill_inner(
249
233
# chose more than one times the limit in case of failure, but choosing a
250
234
# much larger factor will result in triggering a backfill request much
251
235
# earlier than necessary.
236
+ #
237
+ # XXX: shouldn't we do this *after* the filter by depth below? Again, we don't
238
+ # care about events that have happened after our current position.
239
+ #
240
+ max_depth = sorted_backfill_points [0 ].depth
252
241
if current_depth - 2 * limit > max_depth :
253
242
logger .debug (
254
243
"Not backfilling as we don't need to. %d < %d - 2 * %d" ,
@@ -265,31 +254,98 @@ async def _maybe_backfill_inner(
265
254
# 2. we have likely previously tried and failed to backfill from that
266
255
# extremity, so to avoid getting "stuck" requesting the same
267
256
# backfill repeatedly we drop those extremities.
268
- filtered_sorted_extremeties_tuple = [
269
- t for t in sorted_extremeties_tuple if int (t [1 ]) <= current_depth
270
- ]
271
-
272
- logger .debug (
273
- "room_id: %s, backfill: current_depth: %s, limit: %s, max_depth: %s, extrems (%d): %s filtered_sorted_extremeties_tuple: %s" ,
274
- room_id ,
275
- current_depth ,
276
- limit ,
277
- max_depth ,
278
- len (sorted_extremeties_tuple ),
279
- sorted_extremeties_tuple ,
280
- filtered_sorted_extremeties_tuple ,
281
- )
282
-
257
+ #
283
258
# However, we need to check that the filtered extremities are non-empty.
284
259
# If they are empty then either we can a) bail or b) still attempt to
285
260
# backfill. We opt to try backfilling anyway just in case we do get
286
261
# relevant events.
287
- if filtered_sorted_extremeties_tuple :
288
- sorted_extremeties_tuple = filtered_sorted_extremeties_tuple
262
+ #
263
+ filtered_sorted_backfill_points = [
264
+ t for t in sorted_backfill_points if t .depth <= current_depth
265
+ ]
266
+ if filtered_sorted_backfill_points :
267
+ logger .debug (
268
+ "_maybe_backfill_inner: backfill points before current depth: %s" ,
269
+ filtered_sorted_backfill_points ,
270
+ )
271
+ sorted_backfill_points = filtered_sorted_backfill_points
272
+ else :
273
+ logger .debug (
274
+ "_maybe_backfill_inner: all backfill points are *after* current depth. Backfilling anyway."
275
+ )
276
+
277
+ # For performance's sake, we only want to paginate from a particular extremity
278
+ # if we can actually see the events we'll get. Otherwise, we'd just spend a lot
279
+ # of resources to get redacted events. We check each extremity in turn and
280
+ # ignore those which users on our server wouldn't be able to see.
281
+ #
282
+ # Additionally, we limit ourselves to backfilling from at most 5 extremities,
283
+ # for two reasons:
284
+ #
285
+ # - The check which determines if we can see an extremity's events can be
286
+ # expensive (we load the full state for the room at each of the backfill
287
+ # points, or (worse) their successors)
288
+ # - We want to avoid the server-server API request URI becoming too long.
289
+ #
290
+ # *Note*: the spec wants us to keep backfilling until we reach the start
291
+ # of the room in case we are allowed to see some of the history. However,
292
+ # in practice that causes more issues than its worth, as (a) it's
293
+ # relatively rare for there to be any visible history and (b) even when
294
+ # there is it's often sufficiently long ago that clients would stop
295
+ # attempting to paginate before backfill reached the visible history.
289
296
290
- # We don't want to specify too many extremities as it causes the backfill
291
- # request URI to be too long.
292
- extremities = dict (sorted_extremeties_tuple [:5 ])
297
+ extremities_to_request : List [str ] = []
298
+ for bp in sorted_backfill_points :
299
+ if len (extremities_to_request ) >= 5 :
300
+ break
301
+
302
+ # For regular backwards extremities, we don't have the extremity events
303
+ # themselves, so we need to actually check the events that reference them -
304
+ # their "successor" events.
305
+ #
306
+ # TODO: Correctly handle the case where we are allowed to see the
307
+ # successor event but not the backward extremity, e.g. in the case of
308
+ # initial join of the server where we are allowed to see the join
309
+ # event but not anything before it. This would require looking at the
310
+ # state *before* the event, ignoring the special casing certain event
311
+ # types have.
312
+ if bp .type == _BackfillPointType .INSERTION_PONT :
313
+ event_ids_to_check = [bp .event_id ]
314
+ else :
315
+ event_ids_to_check = await self .store .get_successor_events (bp .event_id )
316
+
317
+ events_to_check = await self .store .get_events_as_list (
318
+ event_ids_to_check ,
319
+ redact_behaviour = EventRedactBehaviour .AS_IS ,
320
+ get_prev_content = False ,
321
+ )
322
+
323
+ # We set `check_history_visibility_only` as we might otherwise get false
324
+ # positives from users having been erased.
325
+ filtered_extremities = await filter_events_for_server (
326
+ self .storage ,
327
+ self .server_name ,
328
+ events_to_check ,
329
+ redact = False ,
330
+ check_history_visibility_only = True ,
331
+ )
332
+ if filtered_extremities :
333
+ extremities_to_request .append (bp .event_id )
334
+ else :
335
+ logger .debug (
336
+ "_maybe_backfill_inner: skipping extremity %s as it would not be visible" ,
337
+ bp ,
338
+ )
339
+
340
+ if not extremities_to_request :
341
+ logger .debug (
342
+ "_maybe_backfill_inner: found no extremities which would be visible"
343
+ )
344
+ return False
345
+
346
+ logger .debug (
347
+ "_maybe_backfill_inner: extremities_to_request %s" , extremities_to_request
348
+ )
293
349
294
350
# Now we need to decide which hosts to hit first.
295
351
@@ -309,7 +365,7 @@ async def try_backfill(domains: List[str]) -> bool:
309
365
for dom in domains :
310
366
try :
311
367
await self ._federation_event_handler .backfill (
312
- dom , room_id , limit = 100 , extremities = extremities
368
+ dom , room_id , limit = 100 , extremities = extremities_to_request
313
369
)
314
370
# If this succeeded then we probably already have the
315
371
# appropriate stuff.
0 commit comments