diff --git a/changelog.d/13004.misc b/changelog.d/13004.misc new file mode 100644 index 000000000000..d8e93d87af2a --- /dev/null +++ b/changelog.d/13004.misc @@ -0,0 +1 @@ +Faster joins: add issue links to the TODO comments in the code. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 6a143440d3f9..5e16139626b2 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -545,6 +545,7 @@ async def do_invite_join( if ret.partial_state: # TODO(faster_joins): roll this back if we don't manage to start the # background resync (eg process_remote_join fails) + # https://github.com/matrix-org/synapse/issues/12998 await self.store.store_partial_state_room(room_id, ret.servers_in_room) max_stream_id = await self._federation_event_handler.process_remote_join( @@ -1506,14 +1507,17 @@ async def _sync_partial_state_room( # TODO(faster_joins): do we need to lock to avoid races? What happens if other # worker processes kick off a resync in parallel? Perhaps we should just elect # a single worker to do the resync. + # https://github.com/matrix-org/synapse/issues/12994 # # TODO(faster_joins): what happens if we leave the room during a resync? if we # really leave, that might mean we have difficulty getting the room state over # federation. + # https://github.com/matrix-org/synapse/issues/12802 # # TODO(faster_joins): we need some way of prioritising which homeservers in # `other_destinations` to try first, otherwise we'll spend ages trying dead # homeservers for large rooms. + # https://github.com/matrix-org/synapse/issues/12999 if initial_destination is None and len(other_destinations) == 0: raise ValueError( @@ -1543,9 +1547,11 @@ async def _sync_partial_state_room( # all the events are updated, so we can update current state and # clear the lazy-loading flag. logger.info("Updating current state for %s", room_id) + # TODO(faster_joins): support workers + # https://github.com/matrix-org/synapse/issues/12994 assert ( self._storage_controllers.persistence is not None - ), "TODO(faster_joins): support for workers" + ), "worker-mode deployments not currently supported here" await self._storage_controllers.persistence.update_current_state( room_id ) @@ -1559,6 +1565,8 @@ async def _sync_partial_state_room( ) # TODO(faster_joins) update room stats and user directory? + # https://github.com/matrix-org/synapse/issues/12814 + # https://github.com/matrix-org/synapse/issues/12815 return # we raced against more events arriving with partial state. Go round @@ -1566,6 +1574,8 @@ async def _sync_partial_state_room( # TODO(faster_joins): there is still a race here, whereby incoming events which raced # with us will fail to be persisted after the call to `clear_partial_state_room` due to # having partial state. + # https://github.com/matrix-org/synapse/issues/12988 + # continue events = await self.store.get_events_as_list( @@ -1588,6 +1598,7 @@ async def _sync_partial_state_room( # indefinitely is also not the right thing to do if we can # reach all homeservers and they all claim they don't have # the state we want. + # https://github.com/matrix-org/synapse/issues/13000 logger.error( "Failed to get state for %s at %s from %s because %s, " "giving up!", diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 87a0608359c8..9889d1cb443a 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -532,6 +532,7 @@ async def update_state_for_partial_state_event( # # TODO(faster_joins): we probably need to be more intelligent, and # exclude partial-state prev_events from consideration + # https://github.com/matrix-org/synapse/issues/13001 logger.warning( "%s still has partial state: can't de-partial-state it yet", event.event_id, @@ -777,6 +778,7 @@ async def _process_pulled_event( state_ids = await self._resolve_state_at_missing_prevs(origin, event) # TODO(faster_joins): make sure that _resolve_state_at_missing_prevs does # not return partial state + # https://github.com/matrix-org/synapse/issues/13002 await self._process_received_pdu( origin, event, state_ids=state_ids, backfilled=backfilled diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index f455158a2cf6..294217cc235b 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1102,6 +1102,7 @@ async def create_new_client_event( # # TODO(faster_joins): figure out how this works, and make sure that the # old state is complete. + # https://github.com/matrix-org/synapse/issues/13003 metadata = await self.store.get_metadata_for_events(state_event_ids) state_map_for_event: MutableStateMap[str] = {} diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 4caaa8180863..4bcb99d06eae 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -388,10 +388,13 @@ async def update_current_state(self, room_id: str) -> None: # TODO(faster_joins): get a real stream ordering, to make this work correctly # across workers. + # https://github.com/matrix-org/synapse/issues/12994 # # TODO(faster_joins): this can race against event persistence, in which case we # will end up with incorrect state. Perhaps we should make this a job we - # farm out to the event persister, somehow. + # farm out to the event persister thread, somehow. + # https://github.com/matrix-org/synapse/issues/13007 + # stream_id = self.main_store.get_room_max_stream_ordering() await self.persist_events_store.update_current_state(room_id, delta, stream_id) diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index 3b4cdb67eb27..d3a44bc876b9 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -452,6 +452,9 @@ async def get_current_state_deltas( up to date. """ # FIXME(faster_joins): what do we do here? + # https://github.com/matrix-org/synapse/issues/12814 + # https://github.com/matrix-org/synapse/issues/12815 + # https://github.com/matrix-org/synapse/issues/13008 return await self.stores.main.get_partial_current_state_deltas( prev_stream_id, max_stream_id diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 68d4fc2e64cf..5760d3428e66 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1112,6 +1112,7 @@ async def clear_partial_state_room(self, room_id: str) -> bool: # this can race with incoming events, so we watch out for FK errors. # TODO(faster_joins): this still doesn't completely fix the race, since the persist process # is not atomic. I fear we need an application-level lock. + # https://github.com/matrix-org/synapse/issues/12988 try: await self.db_pool.runInteraction( "clear_partial_state_room", self._clear_partial_state_room_txn, room_id @@ -1119,6 +1120,7 @@ async def clear_partial_state_room(self, room_id: str) -> bool: return True except self.db_pool.engine.module.DatabaseError as e: # TODO(faster_joins): how do we distinguish between FK errors and other errors? + # https://github.com/matrix-org/synapse/issues/12988 logger.warning( "Exception while clearing lazy partial-state-room %s, retrying: %s", room_id, diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 5e6efbd0fc92..9674c4a7573b 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -435,6 +435,7 @@ def _update_state_for_partial_state_event_txn( ) # TODO(faster_joins): need to do something about workers here + # https://github.com/matrix-org/synapse/issues/12994 txn.call_after(self.is_partial_state_event.invalidate, (event.event_id,)) txn.call_after( self._get_state_group_for_event.prefill, diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 96aaffb53c04..af3bab2c15d9 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -546,6 +546,7 @@ def must_await_full_state(self, is_mine_id: Callable[[str], bool]) -> bool: # the sender of a piece of state wasn't actually in the room, then clearly that # state shouldn't have been returned. # We should at least add some tests around this to see what happens. + # https://github.com/matrix-org/synapse/issues/13006 # if we haven't requested membership events, then it depends on the value of # 'include_others'