Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit c7e9c1d

Browse files
authored
Speed up user directory rebuild for users some more... (#15665)
1 parent 1f55c04 commit c7e9c1d

File tree

2 files changed

+115
-76
lines changed

2 files changed

+115
-76
lines changed

changelog.d/15665.misc

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Speed up rebuilding of the user directory for local users.

synapse/storage/databases/main/user_directory.py

+114-76
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import unicodedata
1818
from typing import (
1919
TYPE_CHECKING,
20+
Collection,
2021
Iterable,
2122
List,
2223
Mapping,
@@ -45,7 +46,7 @@
4546
if TYPE_CHECKING:
4647
from synapse.server import HomeServer
4748

48-
from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules
49+
from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules, UserTypes
4950
from synapse.storage.database import (
5051
DatabasePool,
5152
LoggingDatabaseConnection,
@@ -356,13 +357,30 @@ async def _populate_user_directory_process_users(
356357
Add all local users to the user directory.
357358
"""
358359

359-
def _get_next_batch(txn: LoggingTransaction) -> Optional[List[str]]:
360-
sql = "SELECT user_id FROM %s LIMIT %s" % (
361-
TEMP_TABLE + "_users",
362-
str(batch_size),
363-
)
364-
txn.execute(sql)
365-
user_result = cast(List[Tuple[str]], txn.fetchall())
360+
def _populate_user_directory_process_users_txn(
361+
txn: LoggingTransaction,
362+
) -> Optional[int]:
363+
if self.database_engine.supports_returning:
364+
# Note: we use an ORDER BY in the SELECT to force usage of an
365+
# index. Otherwise, postgres does a sequential scan that is
366+
# surprisingly slow (I think due to the fact it will read/skip
367+
# over lots of already deleted rows).
368+
sql = f"""
369+
DELETE FROM {TEMP_TABLE + "_users"}
370+
WHERE user_id IN (
371+
SELECT user_id FROM {TEMP_TABLE + "_users"} ORDER BY user_id LIMIT ?
372+
)
373+
RETURNING user_id
374+
"""
375+
txn.execute(sql, (batch_size,))
376+
user_result = cast(List[Tuple[str]], txn.fetchall())
377+
else:
378+
sql = "SELECT user_id FROM %s ORDER BY user_id LIMIT %s" % (
379+
TEMP_TABLE + "_users",
380+
str(batch_size),
381+
)
382+
txn.execute(sql)
383+
user_result = cast(List[Tuple[str]], txn.fetchall())
366384

367385
if not user_result:
368386
return None
@@ -378,85 +396,81 @@ def _get_next_batch(txn: LoggingTransaction) -> Optional[List[str]]:
378396
assert count_result is not None
379397
progress["remaining"] = count_result[0]
380398

381-
return users_to_work_on
382-
383-
users_to_work_on = await self.db_pool.runInteraction(
384-
"populate_user_directory_temp_read", _get_next_batch
385-
)
399+
if not users_to_work_on:
400+
return None
386401

387-
# No more users -- complete the transaction.
388-
if not users_to_work_on:
389-
await self.db_pool.updates._end_background_update(
390-
"populate_user_directory_process_users"
402+
logger.debug(
403+
"Processing the next %d users of %d remaining",
404+
len(users_to_work_on),
405+
progress["remaining"],
391406
)
392-
return 1
393-
394-
logger.debug(
395-
"Processing the next %d users of %d remaining"
396-
% (len(users_to_work_on), progress["remaining"])
397-
)
398407

399-
# First filter down to users we want to insert into the user directory.
400-
users_to_insert = [
401-
user_id
402-
for user_id in users_to_work_on
403-
if await self.should_include_local_user_in_dir(user_id)
404-
]
408+
# First filter down to users we want to insert into the user directory.
409+
users_to_insert = self._filter_local_users_for_dir_txn(
410+
txn, users_to_work_on
411+
)
405412

406-
# Next fetch their profiles. Note that the `user_id` here is the
407-
# *localpart*, and that not all users have profiles.
408-
profile_rows = await self.db_pool.simple_select_many_batch(
409-
table="profiles",
410-
column="user_id",
411-
iterable=[get_localpart_from_id(u) for u in users_to_insert],
412-
retcols=(
413-
"user_id",
414-
"displayname",
415-
"avatar_url",
416-
),
417-
keyvalues={},
418-
desc="populate_user_directory_process_users_get_profiles",
419-
)
420-
profiles = {
421-
f"@{row['user_id']}:{self.server_name}": _UserDirProfile(
422-
f"@{row['user_id']}:{self.server_name}",
423-
row["displayname"],
424-
row["avatar_url"],
413+
# Next fetch their profiles. Note that the `user_id` here is the
414+
# *localpart*, and that not all users have profiles.
415+
profile_rows = self.db_pool.simple_select_many_txn(
416+
txn,
417+
table="profiles",
418+
column="user_id",
419+
iterable=[get_localpart_from_id(u) for u in users_to_insert],
420+
retcols=(
421+
"user_id",
422+
"displayname",
423+
"avatar_url",
424+
),
425+
keyvalues={},
425426
)
426-
for row in profile_rows
427-
}
427+
profiles = {
428+
f"@{row['user_id']}:{self.server_name}": _UserDirProfile(
429+
f"@{row['user_id']}:{self.server_name}",
430+
row["displayname"],
431+
row["avatar_url"],
432+
)
433+
for row in profile_rows
434+
}
428435

429-
profiles_to_insert = [
430-
profiles.get(user_id) or _UserDirProfile(user_id)
431-
for user_id in users_to_insert
432-
]
436+
profiles_to_insert = [
437+
profiles.get(user_id) or _UserDirProfile(user_id)
438+
for user_id in users_to_insert
439+
]
440+
441+
# Actually insert the users with their profiles into the directory.
442+
self._update_profiles_in_user_dir_txn(txn, profiles_to_insert)
443+
444+
# We've finished processing the users. Delete it from the table, if
445+
# we haven't already.
446+
if not self.database_engine.supports_returning:
447+
self.db_pool.simple_delete_many_txn(
448+
txn,
449+
table=TEMP_TABLE + "_users",
450+
column="user_id",
451+
values=users_to_work_on,
452+
keyvalues={},
453+
)
433454

434-
# Actually insert the users with their profiles into the directory.
435-
await self.db_pool.runInteraction(
436-
"populate_user_directory_process_users_insertion",
437-
self._update_profiles_in_user_dir_txn,
438-
profiles_to_insert,
439-
)
455+
# Update the remaining counter.
456+
progress["remaining"] -= len(users_to_work_on)
457+
self.db_pool.updates._background_update_progress_txn(
458+
txn, "populate_user_directory_process_users", progress
459+
)
460+
return len(users_to_work_on)
440461

441-
# We've finished processing the users. Delete it from the table.
442-
await self.db_pool.simple_delete_many(
443-
table=TEMP_TABLE + "_users",
444-
column="user_id",
445-
iterable=users_to_work_on,
446-
keyvalues={},
447-
desc="populate_user_directory_process_users_delete",
462+
processed_count = await self.db_pool.runInteraction(
463+
"populate_user_directory_temp", _populate_user_directory_process_users_txn
448464
)
449465

450-
# Update the remaining counter.
451-
progress["remaining"] -= len(users_to_work_on)
452-
await self.db_pool.runInteraction(
453-
"populate_user_directory",
454-
self.db_pool.updates._background_update_progress_txn,
455-
"populate_user_directory_process_users",
456-
progress,
457-
)
466+
# No more users -- complete the transaction.
467+
if not processed_count:
468+
await self.db_pool.updates._end_background_update(
469+
"populate_user_directory_process_users"
470+
)
471+
return 1
458472

459-
return len(users_to_work_on)
473+
return processed_count
460474

461475
async def should_include_local_user_in_dir(self, user: str) -> bool:
462476
"""Certain classes of local user are omitted from the user directory.
@@ -494,6 +508,30 @@ async def should_include_local_user_in_dir(self, user: str) -> bool:
494508

495509
return True
496510

511+
def _filter_local_users_for_dir_txn(
512+
self, txn: LoggingTransaction, users: Collection[str]
513+
) -> Collection[str]:
514+
"""A batched version of `should_include_local_user_in_dir`"""
515+
users = [
516+
user
517+
for user in users
518+
if self.get_app_service_by_user_id(user) is None # type: ignore[attr-defined]
519+
and not self.get_if_app_services_interested_in_user(user) # type: ignore[attr-defined]
520+
]
521+
522+
rows = self.db_pool.simple_select_many_txn(
523+
txn,
524+
table="users",
525+
column="name",
526+
iterable=users,
527+
keyvalues={
528+
"deactivated": 0,
529+
},
530+
retcols=("name", "user_type"),
531+
)
532+
533+
return [row["name"] for row in rows if row["user_type"] != UserTypes.SUPPORT]
534+
497535
async def is_room_world_readable_or_publicly_joinable(self, room_id: str) -> bool:
498536
"""Check if the room is either world_readable or publically joinable"""
499537

0 commit comments

Comments
 (0)