Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Remove cached wrap on _get_joined_users_from_context method #13569

Merged
Merged
1 change: 1 addition & 0 deletions changelog.d/13569.removal
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Remove redundant `_get_joined_users_from_context` cache. Contributed by Nick @ Beeper (@fizzadar).
2 changes: 1 addition & 1 deletion synapse/state/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ async def get_current_user_ids_in_room(
logger.debug("calling resolve_state_groups from get_current_user_ids_in_room")
entry = await self.resolve_state_groups_for_events(room_id, latest_event_ids)
state = await entry.get_state(self._state_storage_controller, StateFilter.all())
return await self.store.get_joined_user_ids_from_state(room_id, state, entry)
return await self.store.get_joined_user_ids_from_state(room_id, state)

async def get_hosts_in_room_at_events(
self, room_id: str, event_ids: Collection[str]
Expand Down
132 changes: 43 additions & 89 deletions synapse/storage/databases/main/roommember.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import attr

from synapse.api.constants import EventTypes, Membership
from synapse.events import EventBase
from synapse.metrics import LaterGauge
from synapse.metrics.background_process_metrics import (
run_as_background_process,
Expand Down Expand Up @@ -861,97 +860,54 @@ async def get_mutual_rooms_between_users(

return shared_room_ids or frozenset()

async def get_joined_user_ids_from_state(
self, room_id: str, state: StateMap[str], state_entry: "_StateCacheEntry"
) -> Set[str]:
state_group: Union[object, int] = state_entry.state_group
if not state_group:
# If state_group is None it means it has yet to be assigned a
# state group, i.e. we need to make sure that calls with a state_group
# of None don't hit previous cached calls with a None state_group.
# To do this we set the state_group to a new object as object() != object()
state_group = object()
async def get_joined_users_from_state(
self, room_id: str, state: StateMap[str]
) -> Dict[str, ProfileInfo]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the merge has gone awry here: we're now reverting get_joined_user_ids_from_state back to get_joined_users_from_state (cf #13573)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another 🤦 for me today - 6404481 (+ 4e62c46).

"""
For a given set of state IDs, get a map of user ID to profile information
for users in the room.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't return profile information any more!


This method doesn't do any fetching of data itself and instead checks various
caches for the relevant data before passing any remaining missing event IDs to
`_get_joined_profiles_from_event_ids` which does the actual data fetching.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm somewhat confused by what this text means. Possibly not helped by the fact that _get_joined_profiles_from_event_ids no longer exists, but in any case, I think it's misleading:

This method doesn't do any fetching of data itself

... except that it does. Admittedly, under the hood, it uses _get_user_ids_from_membership_event_ids to do that work, but that's an implementation detail. It's like saying "This method doesn't do any fetching of data itself, instead it uses simple_select_many to do the lookup". It might be true in some sense, but it's just not helpful information.

I think what you mean is something like "This method checks the local event cache, before calling _get_user_ids_from_membership_event_ids for any uncached events." ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"""

assert state_group is not None
with Measure(self._clock, "get_joined_users_from_state"):
return await self._get_joined_user_ids_from_context(
room_id, state_group, state, context=state_entry
users_in_room = set()
member_event_ids = [
e_id
for key, e_id in current_state_ids.items()
if key[0] == EventTypes.Member
]

# We check if we have any of the member event ids in the event cache
# before we ask the DB

# We don't update the event cache hit ratio as it completely throws off
# the hit ratio counts. After all, we don't populate the cache if we
# miss it here
event_map = self._get_events_from_local_cache(
member_event_ids, update_metrics=False
)

@cached(num_args=2, iterable=True, max_entries=100000)
async def _get_joined_user_ids_from_context(
self,
room_id: str,
state_group: Union[object, int],
current_state_ids: StateMap[str],
event: Optional[EventBase] = None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

context: this also seems to have been unused since #13267.

context: Optional["_StateCacheEntry"] = None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note to self/for posterity: context has always been not-None since #8070 removed the only call that doesn't provide it (https://github.com/matrix-org/synapse/pull/8070/files#diff-54aaace5758538a90a9ed3a020eb9b260139269ee33c10ee8b2caf850c8a3e1dL290)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also: #13267 changed things so that it was always a _StateCacheEntry. Previously (since time immemorial) it could have been either an EventContext or a _StateCacheEntry. Both classes have properties like delta_ids and prev_group, so this worked, though it's a miracle we never broke it.

) -> Set[str]:
# We don't use `state_group`, it's there so that we can cache based
# on it. However, it's important that it's never None, since two current_states
# with a state_group of None are likely to be different.
assert state_group is not None

users_in_room = set()
member_event_ids = [
e_id
for key, e_id in current_state_ids.items()
if key[0] == EventTypes.Member
]

if context is not None:
# If we have a context with a delta from a previous state group,
# check if we also have the result from the previous group in cache.
# If we do then we can reuse that result and simply update it with
# any membership changes in `delta_ids`
if context.prev_group and context.delta_ids:
prev_res = self._get_joined_user_ids_from_context.cache.get_immediate(
(room_id, context.prev_group), None
missing_member_event_ids = []
for event_id in member_event_ids:
ev_entry = event_map.get(event_id)
if ev_entry and not ev_entry.event.rejected_reason:
if ev_entry.event.membership == Membership.JOIN:
users_in_room.add(ev_entry.event.state_key)
else:
missing_member_event_ids.append(event_id)

if missing_member_event_ids:
event_to_memberships = await self._get_user_ids_from_membership_event_ids(
missing_member_event_ids
)
users_in_room.update(
user_id for user_id in event_to_memberships.values() if user_id
)
if prev_res and isinstance(prev_res, set):
users_in_room = prev_res
member_event_ids = [
e_id
for key, e_id in context.delta_ids.items()
if key[0] == EventTypes.Member
]
for etype, state_key in context.delta_ids:
if etype == EventTypes.Member:
users_in_room.discard(state_key)

# We check if we have any of the member event ids in the event cache
# before we ask the DB

# We don't update the event cache hit ratio as it completely throws off
# the hit ratio counts. After all, we don't populate the cache if we
# miss it here
event_map = self._get_events_from_local_cache(
member_event_ids, update_metrics=False
)

missing_member_event_ids = []
for event_id in member_event_ids:
ev_entry = event_map.get(event_id)
if ev_entry and not ev_entry.event.rejected_reason:
if ev_entry.event.membership == Membership.JOIN:
users_in_room.add(ev_entry.event.state_key)
else:
missing_member_event_ids.append(event_id)

if missing_member_event_ids:
event_to_memberships = await self._get_user_ids_from_membership_event_ids(
missing_member_event_ids
)
users_in_room.update(
user_id for user_id in event_to_memberships.values() if user_id
)

if event is not None and event.type == EventTypes.Member:
if event.membership == Membership.JOIN:
if event.event_id in member_event_ids:
users_in_room.add(event.state_key)

return users_in_room
return users_in_room

@cached(
max_entries=10000,
Expand Down Expand Up @@ -1150,9 +1106,7 @@ async def _get_joined_hosts(
else:
# The cache doesn't match the state group or prev state group,
# so we calculate the result from first principles.
joined_user_ids = await self.get_joined_user_ids_from_state(
room_id, state, state_entry
)
joined_user_ids = await self.get_joined_user_ids_from_state(room_id, state)

cache.hosts_to_joined_users = {}
for user_id in joined_user_ids:
Expand Down