From 4e397c5cab365cbab8d022b3e0a611bdff01faca Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 19 Aug 2022 22:10:26 -0500 Subject: [PATCH 01/15] Optimize how we calculate likely_domains during backfill --- synapse/storage/databases/main/roommember.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 827c1f1efd3b..5f1f73c10667 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1038,6 +1038,7 @@ async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: # For PostgreSQL we can use a regex to pull out the domains from the # joined users in `current_state_events` via regex. + # TODO: Trying to remember what to do: group by host, order by depth def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> Set[str]: sql = """ SELECT DISTINCT substring(state_key FROM '@[^:]*:(.*)$') From abd77f7c817fbe5aef9f988eac0a6d0a7d786ac1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 20 Aug 2022 00:23:03 -0500 Subject: [PATCH 02/15] Update query to order servers by how long they have been in the room (by depth) --- synapse/storage/databases/main/roommember.py | 22 ++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 5f1f73c10667..f367b2c1a61e 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1038,15 +1038,25 @@ async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: # For PostgreSQL we can use a regex to pull out the domains from the # joined users in `current_state_events` via regex. - # TODO: Trying to remember what to do: group by host, order by depth def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> Set[str]: sql = """ - SELECT DISTINCT substring(state_key FROM '@[^:]*:(.*)$') - FROM current_state_events + SELECT + /* Only use the row with the least depth from each domain group */ + min(e.depth) as min_depth, + /* Match the domain part of the MXID */ + substring(c.state_key FROM '@[^:]*:(.*)$') as domain + FROM current_state_events c + /* Get the depth of the event from the events table */ + INNER JOIN events AS e USING (event_id) WHERE - type = 'm.room.member' - AND membership = 'join' - AND room_id = ? + /* Find any join state events in the room */ + c.type = 'm.room.member' + AND c.membership = 'join' + AND c.room_id = ? + /* Group all state events from the same domain into their own buckets (groups) */ + GROUP BY domain + /* Sorted by lowest depth first */ + ORDER BY min_depth ASC; """ txn.execute(sql, (room_id,)) return {d for d, in txn} From d9cd3efb259daeba79deba7525573cd7a9af6a62 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 20 Aug 2022 00:24:20 -0500 Subject: [PATCH 03/15] No need to select depth, just sort by it --- synapse/storage/databases/main/roommember.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index f367b2c1a61e..414b3a939ec5 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1039,12 +1039,12 @@ async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: # joined users in `current_state_events` via regex. def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> Set[str]: + # Returns a list of servers currently joined in the room sorted by + # longest in the room first (aka. with the lowest depth). sql = """ SELECT - /* Only use the row with the least depth from each domain group */ - min(e.depth) as min_depth, /* Match the domain part of the MXID */ - substring(c.state_key FROM '@[^:]*:(.*)$') as domain + substring(c.state_key FROM '@[^:]*:(.*)$') as server_domain FROM current_state_events c /* Get the depth of the event from the events table */ INNER JOIN events AS e USING (event_id) @@ -1054,9 +1054,9 @@ def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> Set[str]: AND c.membership = 'join' AND c.room_id = ? /* Group all state events from the same domain into their own buckets (groups) */ - GROUP BY domain + GROUP BY server_domain /* Sorted by lowest depth first */ - ORDER BY min_depth ASC; + ORDER BY min(e.depth) ASC; """ txn.execute(sql, (room_id,)) return {d for d, in txn} From a55f0941db367d24c999e814c2da1d7827b0fd69 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 20 Aug 2022 00:37:58 -0500 Subject: [PATCH 04/15] Add changelog --- changelog.d/13575.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/13575.misc diff --git a/changelog.d/13575.misc b/changelog.d/13575.misc new file mode 100644 index 000000000000..384147261705 --- /dev/null +++ b/changelog.d/13575.misc @@ -0,0 +1 @@ +Optimize how Synapse calculates domains to fetch from during backfill. From 396ca2d885d8cfbabca985f19e92db4b98a0de73 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 20 Aug 2022 01:14:35 -0500 Subject: [PATCH 05/15] Replace usage of get_domains_from_state with get_current_hosts_in_room --- synapse/handlers/federation.py | 45 +++------------------------------- synapse/handlers/room.py | 10 ++------ 2 files changed, 6 insertions(+), 49 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index a09eaa437946..3c9851594d63 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -99,37 +99,6 @@ ) -def get_domains_from_state(state: StateMap[EventBase]) -> List[Tuple[str, int]]: - """Get joined domains from state - - Args: - state: State map from type/state key to event. - - Returns: - Returns a list of servers with the lowest depth of their joins. - Sorted by lowest depth first. - """ - joined_users = [ - (state_key, int(event.depth)) - for (e_type, state_key), event in state.items() - if e_type == EventTypes.Member and event.membership == Membership.JOIN - ] - - joined_domains: Dict[str, int] = {} - for u, d in joined_users: - try: - dom = get_domain_from_id(u) - old_d = joined_domains.get(dom) - if old_d: - joined_domains[dom] = min(d, old_d) - else: - joined_domains[dom] = d - except Exception: - pass - - return sorted(joined_domains.items(), key=lambda d: d[1]) - - class _BackfillPointType(Enum): # a regular backwards extremity (ie, an event which we don't yet have, but which # is referred to by other events in the DAG) @@ -427,17 +396,11 @@ async def _maybe_backfill_inner( ) # Now we need to decide which hosts to hit first. - - # First we try hosts that are already in the room + # First we try hosts that are already in the room. # TODO: HEURISTIC ALERT. - - curr_state = await self._storage_controllers.state.get_current_state(room_id) - - curr_domains = get_domains_from_state(curr_state) - - likely_domains = [ - domain for domain, depth in curr_domains if domain != self.server_name - ] + likely_domains = ( + await self._storage_controllers.state.get_current_hosts_in_room(room_id) + ) async def try_backfill(domains: List[str]) -> bool: # TODO: Should we try multiple of these at a time? diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 55395457c3d1..5948e77188d9 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -60,7 +60,6 @@ from synapse.events import EventBase from synapse.events.utils import copy_and_fixup_power_levels_contents from synapse.federation.federation_client import InvalidResponseError -from synapse.handlers.federation import get_domains_from_state from synapse.handlers.relations import BundledAggregations from synapse.module_api import NOT_SPAM from synapse.rest.admin._base import assert_user_is_admin @@ -1459,14 +1458,9 @@ async def get_event_for_timestamp( timestamp, ) - # Find other homeservers from the given state in the room - curr_state = await self._storage_controllers.state.get_current_state( - room_id + likely_domains = ( + await self._storage_controllers.state.get_current_hosts_in_room(room_id) ) - curr_domains = get_domains_from_state(curr_state) - likely_domains = [ - domain for domain, depth in curr_domains if domain != self.server_name - ] # Loop through each homeserver candidate until we get a succesful response for domain in likely_domains: From cf68485c93dd93b88262dce306c6d89af0a329f9 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 20 Aug 2022 01:22:07 -0500 Subject: [PATCH 06/15] Add some comments why least depth --- synapse/storage/databases/main/roommember.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 414b3a939ec5..4602e331eb6c 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1019,7 +1019,17 @@ async def _check_host_room_membership( @cached(iterable=True, max_entries=10000) async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: - """Get current hosts in room based on current state.""" + """ + Get current hosts in room based on current state. + + The heuristic of sorting by servers who have been in the room the + longest is good because they're most likely to have anything we ask + about. + + Returns: + Returns a list of servers sorted by longest in the room first. (aka. + sorted by join with the lowest depth first). + """ # First we check if we already have `get_users_in_room` in the cache, as # we can just calculate result from that @@ -1040,7 +1050,10 @@ async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> Set[str]: # Returns a list of servers currently joined in the room sorted by - # longest in the room first (aka. with the lowest depth). + # longest in the room first (aka. with the lowest depth). The + # heuristic of sorting by servers who have been in the room the + # longest is good because they're most likely to have anything we + # ask about. sql = """ SELECT /* Match the domain part of the MXID */ From b90ab9879ae8f1bf2610f5a19af0d6dec7b77b76 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 20 Aug 2022 01:52:31 -0500 Subject: [PATCH 07/15] Fix lints --- synapse/handlers/federation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 3c9851594d63..cb010ae5da59 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -70,7 +70,7 @@ from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.state import StateFilter -from synapse.types import JsonDict, StateMap, get_domain_from_id +from synapse.types import JsonDict, get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.retryutils import NotRetryingDestination from synapse.visibility import filter_events_for_server @@ -402,7 +402,7 @@ async def _maybe_backfill_inner( await self._storage_controllers.state.get_current_hosts_in_room(room_id) ) - async def try_backfill(domains: List[str]) -> bool: + async def try_backfill(domains: Collection[str]) -> bool: # TODO: Should we try multiple of these at a time? for dom in domains: try: From 6ba5ef318e94ceff0aa37a90d8f441a48735815d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 20 Aug 2022 02:43:38 -0500 Subject: [PATCH 08/15] Skip asking our own server See https://github.com/matrix-org/synapse/pull/13575#discussion_r950663754 --- synapse/handlers/federation.py | 4 ++++ synapse/handlers/room.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index cb010ae5da59..ab1065113d69 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -405,6 +405,10 @@ async def _maybe_backfill_inner( async def try_backfill(domains: Collection[str]) -> bool: # TODO: Should we try multiple of these at a time? for dom in domains: + # We don't want to ask our own server for information we don't have + if dom != self.server_name: + continue + try: await self._federation_event_handler.backfill( dom, room_id, limit=100, extremities=extremities_to_request diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 5948e77188d9..2497e0a6e0c3 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1464,6 +1464,10 @@ async def get_event_for_timestamp( # Loop through each homeserver candidate until we get a succesful response for domain in likely_domains: + # We don't want to ask our own server for information we don't have + if domain != self.server_name: + continue + try: remote_response = await self.federation_client.timestamp_to_event( domain, room_id, timestamp, direction From 55330c81867e97e65358ebc77e4d5bbedb5158d0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 20 Aug 2022 13:44:57 -0500 Subject: [PATCH 09/15] Fix logic direction --- synapse/handlers/federation.py | 2 +- synapse/handlers/room.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index ab1065113d69..16a898ea6865 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -406,7 +406,7 @@ async def try_backfill(domains: Collection[str]) -> bool: # TODO: Should we try multiple of these at a time? for dom in domains: # We don't want to ask our own server for information we don't have - if dom != self.server_name: + if dom == self.server_name: continue try: diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 2497e0a6e0c3..3e2cc3c730d4 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1465,7 +1465,7 @@ async def get_event_for_timestamp( # Loop through each homeserver candidate until we get a succesful response for domain in likely_domains: # We don't want to ask our own server for information we don't have - if domain != self.server_name: + if domain == self.server_name: continue try: From aafb356b385cc0c94e2590facaba35a926a918ae Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 22 Aug 2022 21:14:02 -0500 Subject: [PATCH 10/15] Sort get_users_in_room so that it matches get_current_hosts_in_room See https://github.com/matrix-org/synapse/pull/13575#discussion_r950892416 --- synapse/storage/databases/main/roommember.py | 28 +++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 4602e331eb6c..f284f2a7681e 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -187,27 +187,47 @@ def _check_safe_current_state_events_membership_updated_txn( @cached(max_entries=100000, iterable=True) async def get_users_in_room(self, room_id: str) -> List[str]: + """ + Returns a list of users in the room sorted by longest in the room first + (aka. with the lowest depth). This is done to match the sort in + `get_current_hosts_in_room()` and so we can re-use the cache but it's + not horrible to have here either. + """ + return await self.db_pool.runInteraction( "get_users_in_room", self.get_users_in_room_txn, room_id ) def get_users_in_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[str]: + """ + Returns a list of users in the room sorted by longest in the room first + (aka. with the lowest depth). This is done to match the sort in + `get_current_hosts_in_room()` and so we can re-use the cache but it's + not horrible to have here either. + """ # If we can assume current_state_events.membership is up to date # then we can avoid a join, which is a Very Good Thing given how # frequently this function gets called. if self._current_state_events_membership_up_to_date: sql = """ - SELECT state_key FROM current_state_events + SELECT state_key FROM current_state_events as c + /* Get the depth of the event from the events table */ + INNER JOIN events AS e USING (event_id) WHERE type = 'm.room.member' AND room_id = ? AND membership = ? + /* Sorted by lowest depth first */ + ORDER BY e.depth ASC; """ else: sql = """ SELECT state_key FROM room_memberships as m - INNER JOIN current_state_events as c - ON m.event_id = c.event_id + INNER JOIN current_state_events as c USING (event_id) + /* Get the depth of the event from the events table */ + INNER JOIN events AS e USING (event_id) AND m.room_id = c.room_id AND m.user_id = c.state_key WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ? + /* Sorted by lowest depth first */ + ORDER BY e.depth ASC; """ txn.execute(sql, (room_id, Membership.JOIN)) @@ -1037,6 +1057,8 @@ async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: (room_id,), None, update_metrics=False ) if users is not None: + # Because `users` is sorted from lowest -> highest depth, the set of + # domains will also be sorted that way. return {get_domain_from_id(u) for u in users} if isinstance(self.database_engine, Sqlite3Engine): From f32739ce88ceb1b475d395428314ce39a67fae7c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 22 Aug 2022 21:34:54 -0500 Subject: [PATCH 11/15] Fix ambiguous column references and join mistake --- synapse/storage/databases/main/roommember.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index f284f2a7681e..419a78d31bd4 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -210,19 +210,20 @@ def get_users_in_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[s # frequently this function gets called. if self._current_state_events_membership_up_to_date: sql = """ - SELECT state_key FROM current_state_events as c + SELECT c.state_key FROM current_state_events as c /* Get the depth of the event from the events table */ INNER JOIN events AS e USING (event_id) - WHERE type = 'm.room.member' AND room_id = ? AND membership = ? + WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ? /* Sorted by lowest depth first */ ORDER BY e.depth ASC; """ else: sql = """ - SELECT state_key FROM room_memberships as m - INNER JOIN current_state_events as c USING (event_id) + SELECT c.state_key FROM room_memberships as m /* Get the depth of the event from the events table */ INNER JOIN events AS e USING (event_id) + INNER JOIN current_state_events as c + ON m.event_id = c.event_id AND m.room_id = c.room_id AND m.user_id = c.state_key WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ? From 6700cf5e0fb1a910b007cebf08b11e82a86a3fee Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 23 Aug 2022 11:04:44 -0500 Subject: [PATCH 12/15] Use list for guaranteed sort order See https://github.com/matrix-org/synapse/pull/13575#discussion_r952748930 --- synapse/storage/controllers/state.py | 3 +-- synapse/storage/databases/main/roommember.py | 21 ++++++++++++++------ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index f9ffd0e29e32..ba5380ce3e91 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -23,7 +23,6 @@ List, Mapping, Optional, - Set, Tuple, ) @@ -520,7 +519,7 @@ async def get_current_state_event( ) return state_map.get(key) - async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: + async def get_current_hosts_in_room(self, room_id: str) -> List[str]: """Get current hosts in room based on current state.""" await self._partial_state_room_tracker.await_full_state(room_id) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 419a78d31bd4..04ee7ab910d6 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1039,7 +1039,7 @@ async def _check_host_room_membership( return True @cached(iterable=True, max_entries=10000) - async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: + async def get_current_hosts_in_room(self, room_id: str) -> List[str]: """ Get current hosts in room based on current state. @@ -1057,21 +1057,30 @@ async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: users = self.get_users_in_room.cache.get_immediate( (room_id,), None, update_metrics=False ) + domains: List[str] = [] if users is not None: - # Because `users` is sorted from lowest -> highest depth, the set of + # Because `users` is sorted from lowest -> highest depth, the list of # domains will also be sorted that way. - return {get_domain_from_id(u) for u in users} + for u in users: + domain = get_domain_from_id(u) + if domain not in domains: + domains.append(domain) + return domains if isinstance(self.database_engine, Sqlite3Engine): # If we're using SQLite then let's just always use # `get_users_in_room` rather than funky SQL. users = await self.get_users_in_room(room_id) - return {get_domain_from_id(u) for u in users} + for u in users: + domain = get_domain_from_id(u) + if domain not in domains: + domains.append(domain) + return domains # For PostgreSQL we can use a regex to pull out the domains from the # joined users in `current_state_events` via regex. - def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> Set[str]: + def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> List[str]: # Returns a list of servers currently joined in the room sorted by # longest in the room first (aka. with the lowest depth). The # heuristic of sorting by servers who have been in the room the @@ -1095,7 +1104,7 @@ def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> Set[str]: ORDER BY min(e.depth) ASC; """ txn.execute(sql, (room_id,)) - return {d for d, in txn} + return [d for d, in txn] return await self.db_pool.runInteraction( "get_current_hosts_in_room", get_current_hosts_in_room_txn From 3fb2adec83926d168e14bdcfbfc87725c3209b3f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 23 Aug 2022 11:21:58 -0500 Subject: [PATCH 13/15] Re-arrange to make the logic flow better --- synapse/storage/databases/main/roommember.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 04ee7ab910d6..a8249fd84e60 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1057,20 +1057,15 @@ async def get_current_hosts_in_room(self, room_id: str) -> List[str]: users = self.get_users_in_room.cache.get_immediate( (room_id,), None, update_metrics=False ) - domains: List[str] = [] - if users is not None: - # Because `users` is sorted from lowest -> highest depth, the list of - # domains will also be sorted that way. - for u in users: - domain = get_domain_from_id(u) - if domain not in domains: - domains.append(domain) - return domains - - if isinstance(self.database_engine, Sqlite3Engine): + if users is None and isinstance(self.database_engine, Sqlite3Engine): # If we're using SQLite then let's just always use # `get_users_in_room` rather than funky SQL. users = await self.get_users_in_room(room_id) + + if users is not None: + # Because `users` is sorted from lowest -> highest depth, the list + # of domains will also be sorted that way. + domains: List[str] = [] for u in users: domain = get_domain_from_id(u) if domain not in domains: From 9a56cbea9c00ac4f4bccc59167bcdf8d5c8a502b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 23 Aug 2022 11:57:25 -0500 Subject: [PATCH 14/15] Use a set for fast lookups See https://github.com/matrix-org/synapse/pull/13575#discussion_r952871092 --- synapse/storage/databases/main/roommember.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index a8249fd84e60..be376770adca 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1066,9 +1066,12 @@ async def get_current_hosts_in_room(self, room_id: str) -> List[str]: # Because `users` is sorted from lowest -> highest depth, the list # of domains will also be sorted that way. domains: List[str] = [] + # We use a `Set` just for fast lookups + domain_set: Set[str] = {} for u in users: domain = get_domain_from_id(u) - if domain not in domains: + if domain not in domain_set: + domain_set.add(domain) domains.append(domain) return domains From 42b04c422c4ad65f6d51b8eeb749fa2fd2bda55a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 23 Aug 2022 17:49:34 -0500 Subject: [PATCH 15/15] Fix lint about initializing a set --- synapse/storage/databases/main/roommember.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index be376770adca..240017774372 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1067,7 +1067,7 @@ async def get_current_hosts_in_room(self, room_id: str) -> List[str]: # of domains will also be sorted that way. domains: List[str] = [] # We use a `Set` just for fast lookups - domain_set: Set[str] = {} + domain_set: Set[str] = set() for u in users: domain = get_domain_from_id(u) if domain not in domain_set: