Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Faster joins: add issue links to the TODOs #13004

Merged
merged 3 commits into from
Jun 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/13004.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Faster joins: add issue links to the TODO comments in the code.
13 changes: 12 additions & 1 deletion synapse/handlers/federation.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,7 @@ async def do_invite_join(
if ret.partial_state:
# TODO(faster_joins): roll this back if we don't manage to start the
# background resync (eg process_remote_join fails)
# https://github.com/matrix-org/synapse/issues/12998
await self.store.store_partial_state_room(room_id, ret.servers_in_room)

max_stream_id = await self._federation_event_handler.process_remote_join(
Expand Down Expand Up @@ -1506,14 +1507,17 @@ async def _sync_partial_state_room(
# TODO(faster_joins): do we need to lock to avoid races? What happens if other
# worker processes kick off a resync in parallel? Perhaps we should just elect
# a single worker to do the resync.
# https://github.com/matrix-org/synapse/issues/12994
#
# TODO(faster_joins): what happens if we leave the room during a resync? if we
# really leave, that might mean we have difficulty getting the room state over
# federation.
# https://github.com/matrix-org/synapse/issues/12802
#
# TODO(faster_joins): we need some way of prioritising which homeservers in
# `other_destinations` to try first, otherwise we'll spend ages trying dead
# homeservers for large rooms.
# https://github.com/matrix-org/synapse/issues/12999

if initial_destination is None and len(other_destinations) == 0:
raise ValueError(
Expand Down Expand Up @@ -1543,9 +1547,11 @@ async def _sync_partial_state_room(
# all the events are updated, so we can update current state and
# clear the lazy-loading flag.
logger.info("Updating current state for %s", room_id)
# TODO(faster_joins): support workers
# https://github.com/matrix-org/synapse/issues/12994
assert (
self._storage_controllers.persistence is not None
), "TODO(faster_joins): support for workers"
), "worker-mode deployments not currently supported here"
await self._storage_controllers.persistence.update_current_state(
room_id
)
Expand All @@ -1559,13 +1565,17 @@ async def _sync_partial_state_room(
)

# TODO(faster_joins) update room stats and user directory?
# https://github.com/matrix-org/synapse/issues/12814
# https://github.com/matrix-org/synapse/issues/12815
return

# we raced against more events arriving with partial state. Go round
# the loop again. We've already logged a warning, so no need for more.
# TODO(faster_joins): there is still a race here, whereby incoming events which raced
# with us will fail to be persisted after the call to `clear_partial_state_room` due to
# having partial state.
# https://github.com/matrix-org/synapse/issues/12988
#
continue

events = await self.store.get_events_as_list(
Expand All @@ -1588,6 +1598,7 @@ async def _sync_partial_state_room(
# indefinitely is also not the right thing to do if we can
# reach all homeservers and they all claim they don't have
# the state we want.
# https://github.com/matrix-org/synapse/issues/13000
logger.error(
"Failed to get state for %s at %s from %s because %s, "
"giving up!",
Expand Down
2 changes: 2 additions & 0 deletions synapse/handlers/federation_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,7 @@ async def update_state_for_partial_state_event(
#
# TODO(faster_joins): we probably need to be more intelligent, and
# exclude partial-state prev_events from consideration
# https://github.com/matrix-org/synapse/issues/13001
logger.warning(
"%s still has partial state: can't de-partial-state it yet",
event.event_id,
Expand Down Expand Up @@ -777,6 +778,7 @@ async def _process_pulled_event(
state_ids = await self._resolve_state_at_missing_prevs(origin, event)
# TODO(faster_joins): make sure that _resolve_state_at_missing_prevs does
# not return partial state
# https://github.com/matrix-org/synapse/issues/13002

await self._process_received_pdu(
origin, event, state_ids=state_ids, backfilled=backfilled
Expand Down
1 change: 1 addition & 0 deletions synapse/handlers/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,7 @@ async def create_new_client_event(
#
# TODO(faster_joins): figure out how this works, and make sure that the
# old state is complete.
# https://github.com/matrix-org/synapse/issues/13003
metadata = await self.store.get_metadata_for_events(state_event_ids)

state_map_for_event: MutableStateMap[str] = {}
Expand Down
5 changes: 4 additions & 1 deletion synapse/storage/controllers/persist_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,10 +388,13 @@ async def update_current_state(self, room_id: str) -> None:

# TODO(faster_joins): get a real stream ordering, to make this work correctly
# across workers.
# https://github.com/matrix-org/synapse/issues/12994
#
# TODO(faster_joins): this can race against event persistence, in which case we
# will end up with incorrect state. Perhaps we should make this a job we
# farm out to the event persister, somehow.
# farm out to the event persister thread, somehow.
# https://github.com/matrix-org/synapse/issues/13007
#
stream_id = self.main_store.get_room_max_stream_ordering()
await self.persist_events_store.update_current_state(room_id, delta, stream_id)

Expand Down
3 changes: 3 additions & 0 deletions synapse/storage/controllers/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,9 @@ async def get_current_state_deltas(
up to date.
"""
# FIXME(faster_joins): what do we do here?
# https://github.com/matrix-org/synapse/issues/12814
# https://github.com/matrix-org/synapse/issues/12815
# https://github.com/matrix-org/synapse/issues/13008

return await self.stores.main.get_partial_current_state_deltas(
prev_stream_id, max_stream_id
Expand Down
2 changes: 2 additions & 0 deletions synapse/storage/databases/main/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -1112,13 +1112,15 @@ async def clear_partial_state_room(self, room_id: str) -> bool:
# this can race with incoming events, so we watch out for FK errors.
# TODO(faster_joins): this still doesn't completely fix the race, since the persist process
# is not atomic. I fear we need an application-level lock.
# https://github.com/matrix-org/synapse/issues/12988
try:
await self.db_pool.runInteraction(
"clear_partial_state_room", self._clear_partial_state_room_txn, room_id
)
return True
except self.db_pool.engine.module.DatabaseError as e:
# TODO(faster_joins): how do we distinguish between FK errors and other errors?
# https://github.com/matrix-org/synapse/issues/12988
logger.warning(
"Exception while clearing lazy partial-state-room %s, retrying: %s",
room_id,
Expand Down
1 change: 1 addition & 0 deletions synapse/storage/databases/main/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ def _update_state_for_partial_state_event_txn(
)

# TODO(faster_joins): need to do something about workers here
# https://github.com/matrix-org/synapse/issues/12994
txn.call_after(self.is_partial_state_event.invalidate, (event.event_id,))
txn.call_after(
self._get_state_group_for_event.prefill,
Expand Down
1 change: 1 addition & 0 deletions synapse/storage/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ def must_await_full_state(self, is_mine_id: Callable[[str], bool]) -> bool:
# the sender of a piece of state wasn't actually in the room, then clearly that
# state shouldn't have been returned.
# We should at least add some tests around this to see what happens.
# https://github.com/matrix-org/synapse/issues/13006

# if we haven't requested membership events, then it depends on the value of
# 'include_others'
Expand Down