17
17
import unicodedata
18
18
from typing import (
19
19
TYPE_CHECKING ,
20
+ Collection ,
20
21
Iterable ,
21
22
List ,
22
23
Mapping ,
45
46
if TYPE_CHECKING :
46
47
from synapse .server import HomeServer
47
48
48
- from synapse .api .constants import EventTypes , HistoryVisibility , JoinRules
49
+ from synapse .api .constants import EventTypes , HistoryVisibility , JoinRules , UserTypes
49
50
from synapse .storage .database import (
50
51
DatabasePool ,
51
52
LoggingDatabaseConnection ,
@@ -356,13 +357,30 @@ async def _populate_user_directory_process_users(
356
357
Add all local users to the user directory.
357
358
"""
358
359
359
- def _get_next_batch (txn : LoggingTransaction ) -> Optional [List [str ]]:
360
- sql = "SELECT user_id FROM %s LIMIT %s" % (
361
- TEMP_TABLE + "_users" ,
362
- str (batch_size ),
363
- )
364
- txn .execute (sql )
365
- user_result = cast (List [Tuple [str ]], txn .fetchall ())
360
+ def _populate_user_directory_process_users_txn (
361
+ txn : LoggingTransaction ,
362
+ ) -> Optional [int ]:
363
+ if self .database_engine .supports_returning :
364
+ # Note: we use an ORDER BY in the SELECT to force usage of an
365
+ # index. Otherwise, postgres does a sequential scan that is
366
+ # surprisingly slow (I think due to the fact it will read/skip
367
+ # over lots of already deleted rows).
368
+ sql = f"""
369
+ DELETE FROM { TEMP_TABLE + "_users" }
370
+ WHERE user_id IN (
371
+ SELECT user_id FROM { TEMP_TABLE + "_users" } ORDER BY user_id LIMIT ?
372
+ )
373
+ RETURNING user_id
374
+ """
375
+ txn .execute (sql , (batch_size ,))
376
+ user_result = cast (List [Tuple [str ]], txn .fetchall ())
377
+ else :
378
+ sql = "SELECT user_id FROM %s ORDER BY user_id LIMIT %s" % (
379
+ TEMP_TABLE + "_users" ,
380
+ str (batch_size ),
381
+ )
382
+ txn .execute (sql )
383
+ user_result = cast (List [Tuple [str ]], txn .fetchall ())
366
384
367
385
if not user_result :
368
386
return None
@@ -378,85 +396,81 @@ def _get_next_batch(txn: LoggingTransaction) -> Optional[List[str]]:
378
396
assert count_result is not None
379
397
progress ["remaining" ] = count_result [0 ]
380
398
381
- return users_to_work_on
382
-
383
- users_to_work_on = await self .db_pool .runInteraction (
384
- "populate_user_directory_temp_read" , _get_next_batch
385
- )
399
+ if not users_to_work_on :
400
+ return None
386
401
387
- # No more users -- complete the transaction.
388
- if not users_to_work_on :
389
- await self . db_pool . updates . _end_background_update (
390
- "populate_user_directory_process_users"
402
+ logger . debug (
403
+ "Processing the next %d users of %d remaining" ,
404
+ len ( users_to_work_on ),
405
+ progress [ "remaining" ],
391
406
)
392
- return 1
393
-
394
- logger .debug (
395
- "Processing the next %d users of %d remaining"
396
- % (len (users_to_work_on ), progress ["remaining" ])
397
- )
398
407
399
- # First filter down to users we want to insert into the user directory.
400
- users_to_insert = [
401
- user_id
402
- for user_id in users_to_work_on
403
- if await self .should_include_local_user_in_dir (user_id )
404
- ]
408
+ # First filter down to users we want to insert into the user directory.
409
+ users_to_insert = self ._filter_local_users_for_dir_txn (
410
+ txn , users_to_work_on
411
+ )
405
412
406
- # Next fetch their profiles. Note that the `user_id` here is the
407
- # *localpart*, and that not all users have profiles.
408
- profile_rows = await self .db_pool .simple_select_many_batch (
409
- table = "profiles" ,
410
- column = "user_id" ,
411
- iterable = [get_localpart_from_id (u ) for u in users_to_insert ],
412
- retcols = (
413
- "user_id" ,
414
- "displayname" ,
415
- "avatar_url" ,
416
- ),
417
- keyvalues = {},
418
- desc = "populate_user_directory_process_users_get_profiles" ,
419
- )
420
- profiles = {
421
- f"@{ row ['user_id' ]} :{ self .server_name } " : _UserDirProfile (
422
- f"@{ row ['user_id' ]} :{ self .server_name } " ,
423
- row ["displayname" ],
424
- row ["avatar_url" ],
413
+ # Next fetch their profiles. Note that the `user_id` here is the
414
+ # *localpart*, and that not all users have profiles.
415
+ profile_rows = self .db_pool .simple_select_many_txn (
416
+ txn ,
417
+ table = "profiles" ,
418
+ column = "user_id" ,
419
+ iterable = [get_localpart_from_id (u ) for u in users_to_insert ],
420
+ retcols = (
421
+ "user_id" ,
422
+ "displayname" ,
423
+ "avatar_url" ,
424
+ ),
425
+ keyvalues = {},
425
426
)
426
- for row in profile_rows
427
- }
427
+ profiles = {
428
+ f"@{ row ['user_id' ]} :{ self .server_name } " : _UserDirProfile (
429
+ f"@{ row ['user_id' ]} :{ self .server_name } " ,
430
+ row ["displayname" ],
431
+ row ["avatar_url" ],
432
+ )
433
+ for row in profile_rows
434
+ }
428
435
429
- profiles_to_insert = [
430
- profiles .get (user_id ) or _UserDirProfile (user_id )
431
- for user_id in users_to_insert
432
- ]
436
+ profiles_to_insert = [
437
+ profiles .get (user_id ) or _UserDirProfile (user_id )
438
+ for user_id in users_to_insert
439
+ ]
440
+
441
+ # Actually insert the users with their profiles into the directory.
442
+ self ._update_profiles_in_user_dir_txn (txn , profiles_to_insert )
443
+
444
+ # We've finished processing the users. Delete it from the table, if
445
+ # we haven't already.
446
+ if not self .database_engine .supports_returning :
447
+ self .db_pool .simple_delete_many_txn (
448
+ txn ,
449
+ table = TEMP_TABLE + "_users" ,
450
+ column = "user_id" ,
451
+ values = users_to_work_on ,
452
+ keyvalues = {},
453
+ )
433
454
434
- # Actually insert the users with their profiles into the directory .
435
- await self . db_pool . runInteraction (
436
- "populate_user_directory_process_users_insertion" ,
437
- self . _update_profiles_in_user_dir_txn ,
438
- profiles_to_insert ,
439
- )
455
+ # Update the remaining counter .
456
+ progress [ "remaining" ] -= len ( users_to_work_on )
457
+ self . db_pool . updates . _background_update_progress_txn (
458
+ txn , "populate_user_directory_process_users" , progress
459
+ )
460
+ return len ( users_to_work_on )
440
461
441
- # We've finished processing the users. Delete it from the table.
442
- await self .db_pool .simple_delete_many (
443
- table = TEMP_TABLE + "_users" ,
444
- column = "user_id" ,
445
- iterable = users_to_work_on ,
446
- keyvalues = {},
447
- desc = "populate_user_directory_process_users_delete" ,
462
+ processed_count = await self .db_pool .runInteraction (
463
+ "populate_user_directory_temp" , _populate_user_directory_process_users_txn
448
464
)
449
465
450
- # Update the remaining counter.
451
- progress ["remaining" ] -= len (users_to_work_on )
452
- await self .db_pool .runInteraction (
453
- "populate_user_directory" ,
454
- self .db_pool .updates ._background_update_progress_txn ,
455
- "populate_user_directory_process_users" ,
456
- progress ,
457
- )
466
+ # No more users -- complete the transaction.
467
+ if not processed_count :
468
+ await self .db_pool .updates ._end_background_update (
469
+ "populate_user_directory_process_users"
470
+ )
471
+ return 1
458
472
459
- return len ( users_to_work_on )
473
+ return processed_count
460
474
461
475
async def should_include_local_user_in_dir (self , user : str ) -> bool :
462
476
"""Certain classes of local user are omitted from the user directory.
@@ -494,6 +508,30 @@ async def should_include_local_user_in_dir(self, user: str) -> bool:
494
508
495
509
return True
496
510
511
+ def _filter_local_users_for_dir_txn (
512
+ self , txn : LoggingTransaction , users : Collection [str ]
513
+ ) -> Collection [str ]:
514
+ """A batched version of `should_include_local_user_in_dir`"""
515
+ users = [
516
+ user
517
+ for user in users
518
+ if self .get_app_service_by_user_id (user ) is None # type: ignore[attr-defined]
519
+ and not self .get_if_app_services_interested_in_user (user ) # type: ignore[attr-defined]
520
+ ]
521
+
522
+ rows = self .db_pool .simple_select_many_txn (
523
+ txn ,
524
+ table = "users" ,
525
+ column = "name" ,
526
+ iterable = users ,
527
+ keyvalues = {
528
+ "deactivated" : 0 ,
529
+ },
530
+ retcols = ("name" , "user_type" ),
531
+ )
532
+
533
+ return [row ["name" ] for row in rows if row ["user_type" ] != UserTypes .SUPPORT ]
534
+
497
535
async def is_room_world_readable_or_publicly_joinable (self , room_id : str ) -> bool :
498
536
"""Check if the room is either world_readable or publically joinable"""
499
537
0 commit comments