Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 93c23e2

Browse files
committed
Rewrite filter_events_for_server to handle outliers
The intention here is to avoid doing state lookups for outliers in `/_matrix/federation/v1/event`. Unfortunately that's expanded into something of a rewrite of `filter_events_for_server`, which ended up trying to do that operation in a couple of places.
1 parent a7fb66e commit 93c23e2

File tree

2 files changed

+179
-104
lines changed

2 files changed

+179
-104
lines changed

synapse/visibility.py

+126-104
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Copyright 2014 - 2016 OpenMarket Ltd
2+
# Copyright (C) The Matrix.org Foundation C.I.C. 2022
23
#
34
# Licensed under the Apache License, Version 2.0 (the "License");
45
# you may not use this file except in compliance with the License.
@@ -12,7 +13,7 @@
1213
# See the License for the specific language governing permissions and
1314
# limitations under the License.
1415
import logging
15-
from typing import Dict, FrozenSet, List, Optional
16+
from typing import Collection, Dict, Final, FrozenSet, List, Optional
1617

1718
from synapse.api.constants import EventTypes, HistoryVisibility, Membership
1819
from synapse.events import EventBase
@@ -293,67 +294,28 @@ def is_sender_erased(event: EventBase, erased_senders: Dict[str, bool]) -> bool:
293294
return True
294295
return False
295296

296-
def check_event_is_visible(event: EventBase, state: StateMap[EventBase]) -> bool:
297-
history = state.get((EventTypes.RoomHistoryVisibility, ""), None)
298-
if history:
299-
visibility = history.content.get(
300-
"history_visibility", HistoryVisibility.SHARED
301-
)
302-
if visibility in [HistoryVisibility.INVITED, HistoryVisibility.JOINED]:
303-
# We now loop through all state events looking for
304-
# membership states for the requesting server to determine
305-
# if the server is either in the room or has been invited
306-
# into the room.
307-
for ev in state.values():
308-
if ev.type != EventTypes.Member:
309-
continue
310-
try:
311-
domain = get_domain_from_id(ev.state_key)
312-
except Exception:
313-
continue
314-
315-
if domain != server_name:
316-
continue
317-
318-
memtype = ev.membership
319-
if memtype == Membership.JOIN:
320-
return True
321-
elif memtype == Membership.INVITE:
322-
if visibility == HistoryVisibility.INVITED:
323-
return True
324-
else:
325-
# server has no users in the room: redact
326-
return False
327-
328-
return True
329-
330-
# Lets check to see if all the events have a history visibility
331-
# of "shared" or "world_readable". If that's the case then we don't
332-
# need to check membership (as we know the server is in the room).
333-
event_to_state_ids = await storage.state.get_state_ids_for_events(
334-
frozenset(e.event_id for e in events),
335-
state_filter=StateFilter.from_types(
336-
types=((EventTypes.RoomHistoryVisibility, ""),)
337-
),
338-
)
339-
340-
visibility_ids = set()
341-
for sids in event_to_state_ids.values():
342-
hist = sids.get((EventTypes.RoomHistoryVisibility, ""))
343-
if hist:
344-
visibility_ids.add(hist)
297+
def check_event_is_visible(
298+
visibility: str, memberships: StateMap[EventBase]
299+
) -> bool:
300+
if visibility not in (HistoryVisibility.INVITED, HistoryVisibility.JOINED):
301+
return True
345302

346-
# If we failed to find any history visibility events then the default
347-
# is "shared" visibility.
348-
if not visibility_ids:
349-
all_open = True
350-
else:
351-
event_map = await storage.main.get_events(visibility_ids)
352-
all_open = all(
353-
e.content.get("history_visibility")
354-
in (None, HistoryVisibility.SHARED, HistoryVisibility.WORLD_READABLE)
355-
for e in event_map.values()
356-
)
303+
# We now loop through all membership events looking for
304+
# membership states for the requesting server to determine
305+
# if the server is either in the room or has been invited
306+
# into the room.
307+
for ev in memberships.values():
308+
assert get_domain_from_id(ev.state_key) == server_name
309+
310+
memtype = ev.membership
311+
if memtype == Membership.JOIN:
312+
return True
313+
elif memtype == Membership.INVITE:
314+
if visibility == HistoryVisibility.INVITED:
315+
return True
316+
317+
# server has no users in the room: redact
318+
return False
357319

358320
if not check_history_visibility_only:
359321
erased_senders = await storage.main.are_users_erased(e.sender for e in events)
@@ -362,34 +324,104 @@ def check_event_is_visible(event: EventBase, state: StateMap[EventBase]) -> bool
362324
# to no users having been erased.
363325
erased_senders = {}
364326

365-
if all_open:
366-
# all the history_visibility state affecting these events is open, so
367-
# we don't need to filter by membership state. We *do* need to check
368-
# for user erasure, though.
369-
if erased_senders:
370-
to_return = []
371-
for e in events:
372-
if not is_sender_erased(e, erased_senders):
373-
to_return.append(e)
374-
elif redact:
375-
to_return.append(prune_event(e))
376-
377-
return to_return
378-
379-
# If there are no erased users then we can just return the given list
380-
# of events without having to copy it.
381-
return events
382-
383-
# Ok, so we're dealing with events that have non-trivial visibility
384-
# rules, so we need to also get the memberships of the room.
385-
386-
# first, for each event we're wanting to return, get the event_ids
387-
# of the history vis and membership state at those events.
327+
# Let's check to see if all the events have a history visibility
328+
# of "shared" or "world_readable". If that's the case then we don't
329+
# need to check membership (as we know the server is in the room).
330+
event_to_history_vis = await _event_to_history_vis(storage, events)
331+
332+
# for any with restricted vis, we also need the memberships
333+
event_to_memberships = await _event_to_memberships(
334+
storage,
335+
[
336+
e
337+
for e in events
338+
if event_to_history_vis[e.event_id]
339+
not in (HistoryVisibility.SHARED, HistoryVisibility.WORLD_READABLE)
340+
],
341+
server_name,
342+
)
343+
344+
to_return = []
345+
for e in events:
346+
erased = is_sender_erased(e, erased_senders)
347+
visible = check_event_is_visible(
348+
event_to_history_vis[e.event_id], event_to_memberships.get(e.event_id, {})
349+
)
350+
if visible and not erased:
351+
to_return.append(e)
352+
elif redact:
353+
to_return.append(prune_event(e))
354+
355+
return to_return
356+
357+
358+
_HISTORY_VIS_KEY: Final = (EventTypes.RoomHistoryVisibility, "")
359+
360+
361+
async def _event_to_history_vis(
362+
storage: Storage, events: Collection[EventBase]
363+
) -> Dict[str, str]:
364+
"""Get the history visibility at each of the given events
365+
366+
Returns a map from event id to history_visibility setting
367+
"""
368+
369+
# outliers get special treatment here. We don't have the state at that point in the
370+
# room (and attempting to look it up will raise an exception), so all we can really
371+
# do is assume that the requesting server is allowed to see the event. That's
372+
# equivalent to there not being a history_visibility event, so we just exclude
373+
# any outliers from the query.
374+
event_to_state_ids = await storage.state.get_state_ids_for_events(
375+
frozenset(e.event_id for e in events if not e.internal_metadata.is_outlier()),
376+
state_filter=StateFilter.from_types(types=(_HISTORY_VIS_KEY,)),
377+
)
378+
379+
visibility_ids = {
380+
vis_event_id
381+
for vis_event_id in (
382+
state_ids.get((EventTypes.RoomHistoryVisibility, ""))
383+
for state_ids in event_to_state_ids.values()
384+
)
385+
if vis_event_id
386+
}
387+
vis_events = await storage.main.get_events(visibility_ids)
388+
389+
result: Dict[str, str] = {}
390+
for event in events:
391+
vis = HistoryVisibility.SHARED
392+
state_ids = event_to_state_ids.get(event.event_id)
393+
394+
# if we didn't find any state for this event, it's an outlier, and we assume
395+
# it's open
396+
visibility_id = None
397+
if state_ids:
398+
visibility_id = state_ids.get(_HISTORY_VIS_KEY)
399+
400+
if visibility_id:
401+
vis_event = vis_events[visibility_id]
402+
vis = vis_event.content.get("history_visibility", HistoryVisibility.SHARED)
403+
assert isinstance(vis, str)
404+
405+
result[event.event_id] = vis
406+
return result
407+
408+
409+
async def _event_to_memberships(
410+
storage: Storage, events: Collection[EventBase], server_name: str
411+
) -> Dict[str, StateMap[EventBase]]:
412+
"""Get the remote membership list at each of the given events
413+
414+
Returns a map from event id to state map, which will contain only membership events
415+
for the given server.
416+
"""
417+
418+
if not events:
419+
return {}
420+
421+
# for each event, get the event_ids of the membership state at those events.
388422
event_to_state_ids = await storage.state.get_state_ids_for_events(
389423
frozenset(e.event_id for e in events),
390-
state_filter=StateFilter.from_types(
391-
types=((EventTypes.RoomHistoryVisibility, ""), (EventTypes.Member, None))
392-
),
424+
state_filter=StateFilter.from_types(types=((EventTypes.Member, None),)),
393425
)
394426

395427
# We only want to pull out member events that correspond to the
@@ -405,36 +437,26 @@ def check_event_is_visible(event: EventBase, state: StateMap[EventBase]) -> bool
405437
for key, event_id in key_to_eid.items()
406438
}
407439

408-
def include(typ, state_key):
409-
if typ != EventTypes.Member:
410-
return True
411-
440+
def include(state_key):
412441
# we avoid using get_domain_from_id here for efficiency.
413442
idx = state_key.find(":")
414443
if idx == -1:
415444
return False
416445
return state_key[idx + 1 :] == server_name
417446

418447
event_map = await storage.main.get_events(
419-
[e_id for e_id, key in event_id_to_state_key.items() if include(key[0], key[1])]
448+
[
449+
e_id
450+
for e_id, (_, state_key) in event_id_to_state_key.items()
451+
if include(state_key)
452+
]
420453
)
421454

422-
event_to_state = {
455+
return {
423456
e_id: {
424457
key: event_map[inner_e_id]
425458
for key, inner_e_id in key_to_eid.items()
426459
if inner_e_id in event_map
427460
}
428461
for e_id, key_to_eid in event_to_state_ids.items()
429462
}
430-
431-
to_return = []
432-
for e in events:
433-
erased = is_sender_erased(e, erased_senders)
434-
visible = check_event_is_visible(e, event_to_state[e.event_id])
435-
if visible and not erased:
436-
to_return.append(e)
437-
elif redact:
438-
to_return.append(prune_event(e))
439-
440-
return to_return

tests/test_visibility.py

+53
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
from synapse.api.room_versions import RoomVersions
1919
from synapse.events import EventBase, make_event_from_dict
20+
from synapse.events.snapshot import EventContext
2021
from synapse.types import JsonDict, create_requester
2122
from synapse.visibility import filter_events_for_client, filter_events_for_server
2223

@@ -73,6 +74,39 @@ def test_filtering(self) -> None:
7374
self.assertEqual(events_to_filter[i].event_id, filtered[i].event_id)
7475
self.assertEqual(filtered[i].content["a"], "b")
7576

77+
def test_filter_outlier(self) -> None:
78+
# outlier events must be returned, for the good of the collective federation
79+
self.get_success(self._inject_room_member("@resident:remote_hs"))
80+
self.get_success(self._inject_visibility("@resident:remote_hs", "joined"))
81+
82+
outlier = self.get_success(self._inject_outlier())
83+
self.assertEqual(
84+
self.get_success(
85+
filter_events_for_server(self.storage, "remote_hs", [outlier])
86+
),
87+
[outlier],
88+
)
89+
90+
# it should also work when there are other events in the list
91+
evt = self.get_success(self._inject_message("@unerased:local_hs"))
92+
93+
filtered = self.get_success(
94+
filter_events_for_server(self.storage, "remote_hs", [outlier, evt])
95+
)
96+
self.assertEqual(len(filtered), 2, f"expected 2 results, got: {filtered}")
97+
self.assertEqual(filtered[0], outlier)
98+
self.assertEqual(filtered[1].event_id, evt.event_id)
99+
self.assertEqual(filtered[1].content, evt.content)
100+
101+
# ... but other servers should only be able to see the outlier (the other should
102+
# be redacted)
103+
filtered = self.get_success(
104+
filter_events_for_server(self.storage, "other_server", [outlier, evt])
105+
)
106+
self.assertEqual(filtered[0], outlier)
107+
self.assertEqual(filtered[1].event_id, evt.event_id)
108+
self.assertNotIn("body", filtered[1].content)
109+
76110
def test_erased_user(self) -> None:
77111
# 4 message events, from erased and unerased users, with a membership
78112
# change in the middle of them.
@@ -187,6 +221,25 @@ def _inject_message(
187221
self.get_success(self.storage.persistence.persist_event(event, context))
188222
return event
189223

224+
def _inject_outlier(self) -> EventBase:
225+
builder = self.event_builder_factory.for_room_version(
226+
RoomVersions.V1,
227+
{
228+
"type": "m.room.member",
229+
"sender": "@test:user",
230+
"state_key": "@test:user",
231+
"room_id": TEST_ROOM_ID,
232+
"content": {"membership": "join"},
233+
},
234+
)
235+
236+
event = self.get_success(builder.build(prev_event_ids=[], auth_event_ids=[]))
237+
event.internal_metadata.outlier = True
238+
self.get_success(
239+
self.storage.persistence.persist_event(event, EventContext.for_outlier())
240+
)
241+
return event
242+
190243

191244
class FilterEventsForClientTestCase(unittest.FederatingHomeserverTestCase):
192245
def test_out_of_band_invite_rejection(self):

0 commit comments

Comments
 (0)