Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 41cf4c2

Browse files
authored
Fix non-strings in the event_search table (#12037)
Don't attempt to add non-string `value`s to `event_search` and add a background update to clear out bad rows from `event_search` when using sqlite. Signed-off-by: Sean Quah <[email protected]>
1 parent c56bfb0 commit 41cf4c2

File tree

5 files changed

+173
-11
lines changed

5 files changed

+173
-11
lines changed

Diff for: changelog.d/12037.bugfix

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Properly fix a long-standing bug where wrong data could be inserted in the `event_search` table when using sqlite. This could block running `synapse_port_db` with an "argument of type 'int' is not iterable" error. This bug was partially fixed by a change in Synapse 1.44.0.

Diff for: synapse/storage/databases/main/events.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -1473,10 +1473,10 @@ def _store_rejected_events_txn(self, txn, events_and_contexts):
14731473

14741474
def _update_metadata_tables_txn(
14751475
self,
1476-
txn,
1476+
txn: LoggingTransaction,
14771477
*,
1478-
events_and_contexts,
1479-
all_events_and_contexts,
1478+
events_and_contexts: List[Tuple[EventBase, EventContext]],
1479+
all_events_and_contexts: List[Tuple[EventBase, EventContext]],
14801480
inhibit_local_membership_updates: bool = False,
14811481
):
14821482
"""Update all the miscellaneous tables for new events
@@ -1953,20 +1953,20 @@ def _handle_redaction(self, txn, redacted_event_id):
19531953
txn, table="event_relations", keyvalues={"event_id": redacted_event_id}
19541954
)
19551955

1956-
def _store_room_topic_txn(self, txn, event):
1957-
if hasattr(event, "content") and "topic" in event.content:
1956+
def _store_room_topic_txn(self, txn: LoggingTransaction, event: EventBase):
1957+
if isinstance(event.content.get("topic"), str):
19581958
self.store_event_search_txn(
19591959
txn, event, "content.topic", event.content["topic"]
19601960
)
19611961

1962-
def _store_room_name_txn(self, txn, event):
1963-
if hasattr(event, "content") and "name" in event.content:
1962+
def _store_room_name_txn(self, txn: LoggingTransaction, event: EventBase):
1963+
if isinstance(event.content.get("name"), str):
19641964
self.store_event_search_txn(
19651965
txn, event, "content.name", event.content["name"]
19661966
)
19671967

1968-
def _store_room_message_txn(self, txn, event):
1969-
if hasattr(event, "content") and "body" in event.content:
1968+
def _store_room_message_txn(self, txn: LoggingTransaction, event: EventBase):
1969+
if isinstance(event.content.get("body"), str):
19701970
self.store_event_search_txn(
19711971
txn, event, "content.body", event.content["body"]
19721972
)

Diff for: synapse/storage/databases/main/search.py

+26
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ class SearchBackgroundUpdateStore(SearchWorkerStore):
115115
EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order"
116116
EVENT_SEARCH_USE_GIST_POSTGRES_NAME = "event_search_postgres_gist"
117117
EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin"
118+
EVENT_SEARCH_DELETE_NON_STRINGS = "event_search_sqlite_delete_non_strings"
118119

119120
def __init__(
120121
self,
@@ -147,6 +148,10 @@ def __init__(
147148
self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME, self._background_reindex_gin_search
148149
)
149150

151+
self.db_pool.updates.register_background_update_handler(
152+
self.EVENT_SEARCH_DELETE_NON_STRINGS, self._background_delete_non_strings
153+
)
154+
150155
async def _background_reindex_search(self, progress, batch_size):
151156
# we work through the events table from highest stream id to lowest
152157
target_min_stream_id = progress["target_min_stream_id_inclusive"]
@@ -372,6 +377,27 @@ def reindex_search_txn(txn):
372377

373378
return num_rows
374379

380+
async def _background_delete_non_strings(
381+
self, progress: JsonDict, batch_size: int
382+
) -> int:
383+
"""Deletes rows with non-string `value`s from `event_search` if using sqlite.
384+
385+
Prior to Synapse 1.44.0, malformed events received over federation could cause integers
386+
to be inserted into the `event_search` table when using sqlite.
387+
"""
388+
389+
def delete_non_strings_txn(txn: LoggingTransaction) -> None:
390+
txn.execute("DELETE FROM event_search WHERE typeof(value) != 'text'")
391+
392+
await self.db_pool.runInteraction(
393+
self.EVENT_SEARCH_DELETE_NON_STRINGS, delete_non_strings_txn
394+
)
395+
396+
await self.db_pool.updates._end_background_update(
397+
self.EVENT_SEARCH_DELETE_NON_STRINGS
398+
)
399+
return 1
400+
375401

376402
class SearchStore(SearchBackgroundUpdateStore):
377403
def __init__(
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/* Copyright 2022 The Matrix.org Foundation C.I.C
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
17+
-- Delete rows with non-string `value`s from `event_search` if using sqlite.
18+
--
19+
-- Prior to Synapse 1.44.0, malformed events received over federation could
20+
-- cause integers to be inserted into the `event_search` table.
21+
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
22+
(6805, 'event_search_sqlite_delete_non_strings', '{}');

Diff for: tests/storage/test_room_search.py

+115-2
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,16 @@
1313
# limitations under the License.
1414

1515
import synapse.rest.admin
16+
from synapse.api.constants import EventTypes
17+
from synapse.api.errors import StoreError
1618
from synapse.rest.client import login, room
1719
from synapse.storage.engines import PostgresEngine
1820

19-
from tests.unittest import HomeserverTestCase
21+
from tests.unittest import HomeserverTestCase, skip_unless
22+
from tests.utils import USE_POSTGRES_FOR_TESTS
2023

2124

22-
class NullByteInsertionTest(HomeserverTestCase):
25+
class EventSearchInsertionTest(HomeserverTestCase):
2326
servlets = [
2427
synapse.rest.admin.register_servlets_for_client_rest_resource,
2528
login.register_servlets,
@@ -72,3 +75,113 @@ def test_null_byte(self):
7275
)
7376
if isinstance(store.database_engine, PostgresEngine):
7477
self.assertIn("alice", result.get("highlights"))
78+
79+
def test_non_string(self):
80+
"""Test that non-string `value`s are not inserted into `event_search`.
81+
82+
This is particularly important when using sqlite, since a sqlite column can hold
83+
both strings and integers. When using Postgres, integers are automatically
84+
converted to strings.
85+
86+
Regression test for #11918.
87+
"""
88+
store = self.hs.get_datastores().main
89+
90+
# Register a user and create a room
91+
user_id = self.register_user("alice", "password")
92+
access_token = self.login("alice", "password")
93+
room_id = self.helper.create_room_as("alice", tok=access_token)
94+
room_version = self.get_success(store.get_room_version(room_id))
95+
96+
# Construct a message with a numeric body to be received over federation
97+
# The message can't be sent using the client API, since Synapse's event
98+
# validation will reject it.
99+
prev_event_ids = self.get_success(store.get_prev_events_for_room(room_id))
100+
prev_event = self.get_success(store.get_event(prev_event_ids[0]))
101+
prev_state_map = self.get_success(
102+
self.hs.get_storage().state.get_state_ids_for_event(prev_event_ids[0])
103+
)
104+
105+
event_dict = {
106+
"type": EventTypes.Message,
107+
"content": {"msgtype": "m.text", "body": 2},
108+
"room_id": room_id,
109+
"sender": user_id,
110+
"depth": prev_event.depth + 1,
111+
"prev_events": prev_event_ids,
112+
"origin_server_ts": self.clock.time_msec(),
113+
}
114+
builder = self.hs.get_event_builder_factory().for_room_version(
115+
room_version, event_dict
116+
)
117+
event = self.get_success(
118+
builder.build(
119+
prev_event_ids=prev_event_ids,
120+
auth_event_ids=self.hs.get_event_auth_handler().compute_auth_events(
121+
builder,
122+
prev_state_map,
123+
for_verification=False,
124+
),
125+
depth=event_dict["depth"],
126+
)
127+
)
128+
129+
# Receive the event
130+
self.get_success(
131+
self.hs.get_federation_event_handler().on_receive_pdu(
132+
self.hs.hostname, event
133+
)
134+
)
135+
136+
# The event should not have an entry in the `event_search` table
137+
f = self.get_failure(
138+
store.db_pool.simple_select_one_onecol(
139+
"event_search",
140+
{"room_id": room_id, "event_id": event.event_id},
141+
"event_id",
142+
),
143+
StoreError,
144+
)
145+
self.assertEqual(f.value.code, 404)
146+
147+
@skip_unless(not USE_POSTGRES_FOR_TESTS, "requires sqlite")
148+
def test_sqlite_non_string_deletion_background_update(self):
149+
"""Test the background update to delete bad rows from `event_search`."""
150+
store = self.hs.get_datastores().main
151+
152+
# Populate `event_search` with dummy data
153+
self.get_success(
154+
store.db_pool.simple_insert_many(
155+
"event_search",
156+
keys=["event_id", "room_id", "key", "value"],
157+
values=[
158+
("event1", "room_id", "content.body", "hi"),
159+
("event2", "room_id", "content.body", "2"),
160+
("event3", "room_id", "content.body", 3),
161+
],
162+
desc="populate_event_search",
163+
)
164+
)
165+
166+
# Run the background update
167+
store.db_pool.updates._all_done = False
168+
self.get_success(
169+
store.db_pool.simple_insert(
170+
"background_updates",
171+
{
172+
"update_name": "event_search_sqlite_delete_non_strings",
173+
"progress_json": "{}",
174+
},
175+
)
176+
)
177+
self.wait_for_background_updates()
178+
179+
# The non-string `value`s ought to be gone now.
180+
values = self.get_success(
181+
store.db_pool.simple_select_onecol(
182+
"event_search",
183+
{"room_id": "room_id"},
184+
"value",
185+
),
186+
)
187+
self.assertCountEqual(values, ["hi", "2"])

0 commit comments

Comments
 (0)