From c2f5f234f9d0ea6af4e6be99b4e42a6499d5a8c8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 29 Apr 2022 12:19:47 +0100 Subject: [PATCH 1/3] Fix race when persisting an event and deleting a room --- synapse/storage/databases/main/events.py | 15 +++++++++++++++ synapse/storage/databases/main/purge_events.py | 8 ++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 2a1e567ce08e..8fb1c72008f5 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -47,6 +47,7 @@ ) from synapse.storage.databases.main.events_worker import EventCacheEntry from synapse.storage.databases.main.search import SearchEntry +from synapse.storage.engines.postgres import PostgresEngine from synapse.storage.util.id_generators import AbstractStreamIdGenerator from synapse.storage.util.sequence import SequenceGenerator from synapse.types import StateMap, get_domain_from_id @@ -364,6 +365,20 @@ def _persist_events_txn( min_stream_order = events_and_contexts[0][0].internal_metadata.stream_ordering max_stream_order = events_and_contexts[-1][0].internal_metadata.stream_ordering + # We check that the room still exists for events we're trying to + # persist. This is to protect against races with deleting a room. + # + # Annoyingly SQLite doesn't support row level locking. + if isinstance(self.database_engine, PostgresEngine): + for room_id in set(e.room_id for e, _ in events_and_contexts): + txn.execute( + "SELECT room_version FROM rooms WHERE room_id = ? FOR SHARE", + (room_id,), + ) + row = txn.fetchone() + if row is None: + raise Exception(f"Room does not exist {room_id}") + # stream orderings should have been assigned by now assert min_stream_order assert max_stream_order diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py index 2e3818e43244..bfc85b3add98 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py @@ -324,7 +324,12 @@ async def purge_room(self, room_id: str) -> List[int]: ) def _purge_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[int]: - # First we fetch all the state groups that should be deleted, before + # We *immediately* delete the room from the rooms table. This ensures + # that we don't race when persisting events (as that transaction checks + # that the room exists). + txn.execute("DELETE FROM rooms WHERE room_id = ?", (room_id,)) + + # Next, we fetch all the state groups that should be deleted, before # we delete that information. txn.execute( """ @@ -403,7 +408,6 @@ def _purge_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[int]: "room_stats_state", "room_stats_current", "room_stats_earliest_token", - "rooms", "stream_ordering_to_exterm", "users_in_public_rooms", "users_who_share_private_rooms", From 23942be14a7cb39a1cfc61e34645b36ca96b551a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 29 Apr 2022 12:23:02 +0100 Subject: [PATCH 2/3] Newsfile --- changelog.d/12594.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/12594.bugfix diff --git a/changelog.d/12594.bugfix b/changelog.d/12594.bugfix new file mode 100644 index 000000000000..7411d9c07934 --- /dev/null +++ b/changelog.d/12594.bugfix @@ -0,0 +1 @@ +Fix race when persisting an event and deleting a room that could lead to outbound federation breaking. From 5e85a2759dfa9a7958c7b26441f94f32306a3f47 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 3 May 2022 10:44:49 +0100 Subject: [PATCH 3/3] Update synapse/storage/databases/main/events.py --- synapse/storage/databases/main/events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 8fb1c72008f5..9a6c2fd47a55 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -370,7 +370,7 @@ def _persist_events_txn( # # Annoyingly SQLite doesn't support row level locking. if isinstance(self.database_engine, PostgresEngine): - for room_id in set(e.room_id for e, _ in events_and_contexts): + for room_id in {e.room_id for e, _ in events_and_contexts}: txn.execute( "SELECT room_version FROM rooms WHERE room_id = ? FOR SHARE", (room_id,),