Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Optimize how we calculate likely_domains during backfill #13575

Merged
merged 15 commits into from
Aug 30, 2022
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Replace usage of get_domains_from_state with get_current_hosts_in_room
MadLittleMods committed Aug 20, 2022
commit 396ca2d885d8cfbabca985f19e92db4b98a0de73
45 changes: 4 additions & 41 deletions synapse/handlers/federation.py
Original file line number Diff line number Diff line change
@@ -99,37 +99,6 @@
)


def get_domains_from_state(state: StateMap[EventBase]) -> List[Tuple[str, int]]:
Copy link
Contributor Author

@MadLittleMods MadLittleMods Aug 20, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eliminating this option to get domains for a room in favor of get_current_hosts_in_room.

In a room like #matrixhq, getting all 80k state events in the room, then whittling it down in the app is not faster than a quick targeted database query or maybe even luckily hitting the cache.

"""Get joined domains from state

Args:
state: State map from type/state key to event.

Returns:
Returns a list of servers with the lowest depth of their joins.
Sorted by lowest depth first.
"""
joined_users = [
(state_key, int(event.depth))
for (e_type, state_key), event in state.items()
if e_type == EventTypes.Member and event.membership == Membership.JOIN
]

joined_domains: Dict[str, int] = {}
for u, d in joined_users:
try:
dom = get_domain_from_id(u)
old_d = joined_domains.get(dom)
if old_d:
joined_domains[dom] = min(d, old_d)
else:
joined_domains[dom] = d
except Exception:
pass

return sorted(joined_domains.items(), key=lambda d: d[1])


class _BackfillPointType(Enum):
# a regular backwards extremity (ie, an event which we don't yet have, but which
# is referred to by other events in the DAG)
@@ -427,17 +396,11 @@ async def _maybe_backfill_inner(
)

# Now we need to decide which hosts to hit first.

# First we try hosts that are already in the room
# First we try hosts that are already in the room.
# TODO: HEURISTIC ALERT.

curr_state = await self._storage_controllers.state.get_current_state(room_id)

curr_domains = get_domains_from_state(curr_state)

likely_domains = [
domain for domain, depth in curr_domains if domain != self.server_name
]
likely_domains = (
await self._storage_controllers.state.get_current_hosts_in_room(room_id)
)

async def try_backfill(domains: List[str]) -> bool:
# TODO: Should we try multiple of these at a time?
10 changes: 2 additions & 8 deletions synapse/handlers/room.py
Original file line number Diff line number Diff line change
@@ -60,7 +60,6 @@
from synapse.events import EventBase
from synapse.events.utils import copy_and_fixup_power_levels_contents
from synapse.federation.federation_client import InvalidResponseError
from synapse.handlers.federation import get_domains_from_state
from synapse.handlers.relations import BundledAggregations
from synapse.module_api import NOT_SPAM
from synapse.rest.admin._base import assert_user_is_admin
@@ -1459,14 +1458,9 @@ async def get_event_for_timestamp(
timestamp,
)

# Find other homeservers from the given state in the room
curr_state = await self._storage_controllers.state.get_current_state(
room_id
likely_domains = (
await self._storage_controllers.state.get_current_hosts_in_room(room_id)
)
curr_domains = get_domains_from_state(curr_state)
likely_domains = [
domain for domain, depth in curr_domains if domain != self.server_name
]

# Loop through each homeserver candidate until we get a succesful response
for domain in likely_domains: