Skip to content

Commit

Permalink
Add unique index right away for `sliding_sync_joined_rooms_to_recalcu…
Browse files Browse the repository at this point in the history
…late`

This makes it so we can always `upsert` to avoid duplicates otherwise
I'm not sure of how to not insert duplicates in certain situations
(see FIXME in the diff) which would cause problems down the line
for the unique index being added later.
  • Loading branch information
MadLittleMods committed Aug 28, 2024
1 parent 8468401 commit da463fb
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 78 deletions.
51 changes: 14 additions & 37 deletions synapse/storage/databases/main/events_bg_updates.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,6 @@ class _BackgroundUpdates:
SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE = (
"sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update"
)
SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE = (
"sliding_sync_index_joined_rooms_to_recalculate_table_bg_update"
)
SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE = "sliding_sync_joined_rooms_bg_update"
SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE = (
"sliding_sync_membership_snapshots_bg_update"
Expand Down Expand Up @@ -318,13 +315,6 @@ def __init__(
_BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
self._sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update,
)
self.db_pool.updates.register_background_index_update(
_BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
index_name="sliding_sync_joined_rooms_to_recalculate_room_id_idx",
table="sliding_sync_joined_rooms_to_recalculate",
columns=["room_id"],
unique=True,
)
# Add some background updates to populate the sliding sync tables
self.db_pool.updates.register_background_update_handler(
_BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE,
Expand Down Expand Up @@ -1579,38 +1569,25 @@ async def _sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update(
"""
Prefill `sliding_sync_joined_rooms_to_recalculate` table with all rooms we know about already.
"""
initial_insert = progress.get("initial_insert", False)

def _txn(txn: LoggingTransaction) -> None:
# We do this as one big bulk insert. This has been tested on a bigger
# homeserver with ~10M rooms and took 11s. There is potential for this to
# starve disk usage while this goes on.
if initial_insert:
txn.execute(
"""
INSERT INTO sliding_sync_joined_rooms_to_recalculate
(room_id)
SELECT room_id FROM rooms;
""",
)
else:
# We can only upsert once the unique index has been added to the table
# (see
# `_BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE`)
#
# We upsert in case we have to run this multiple times.
#
# The `WHERE TRUE` clause is to avoid "Parsing Ambiguity"
txn.execute(
"""
INSERT INTO sliding_sync_joined_rooms_to_recalculate
(room_id)
SELECT room_id FROM rooms WHERE ?
ON CONFLICT (room_id)
DO NOTHING;
""",
(True,),
)
#
# We upsert in case we have to run this multiple times.
#
# The `WHERE TRUE` clause is to avoid "Parsing Ambiguity"
txn.execute(
"""
INSERT INTO sliding_sync_joined_rooms_to_recalculate
(room_id)
SELECT room_id FROM rooms WHERE ?
ON CONFLICT (room_id)
DO NOTHING;
""",
(True,),
)

await self.db_pool.runInteraction(
"_sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update",
Expand Down
6 changes: 0 additions & 6 deletions synapse/storage/prepare_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,10 +679,6 @@ def _resolve_stale_data_in_sliding_sync_joined_rooms_table(

# Update the `sliding_sync_joined_rooms_to_recalculate` table with the rooms
# that went stale and now need to be recalculated.
#
# FIXME: There is potentially a race where the unique index (added via
# `_BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE`)
# hasn't been added at this point so we won't be able to upsert
DatabasePool.simple_upsert_many_txn_native_upsert(
txn,
table="sliding_sync_joined_rooms_to_recalculate",
Expand All @@ -707,8 +703,6 @@ def _resolve_stale_data_in_sliding_sync_joined_rooms_table(
# we're already working on it
insertion_values={
"progress_json": "{}",
# Since we're going to upsert, we need to make sure the unique index is in place
"depends_on": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
},
)
depends_on = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,9 @@
-- during the background update to populate `sliding_sync_joined_rooms` which works but
-- it takes a lot of work for the database to grab `DISTINCT` room_ids given how many
-- state events there are for each room.
--
-- This table doesn't have any indexes at this point. We add the indexes in a separate
-- step to avoid the extra calculations during the bulk one-shot prefill insert.
CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms_to_recalculate(
room_id TEXT NOT NULL REFERENCES rooms(room_id)
room_id TEXT NOT NULL REFERENCES rooms(room_id),
PRIMARY KEY (room_id)
);

-- A table for storing room meta data (current state relevant to sliding sync) that the
Expand Down Expand Up @@ -143,18 +141,12 @@ CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_event_stream
--
-- 1. Add a background update to prefill `sliding_sync_joined_rooms_to_recalculate`.
-- We do a one-shot bulk insert from the `rooms` table to prefill.
-- 2. Add a background update to add indexes to the
-- `sliding_sync_joined_rooms_to_recalculate` table after the one-shot bulk insert.
-- We add the index in a separate step after to avoid the extra calculations during
-- the one-shot bulk insert.
-- 3. Add a background update to populate the new `sliding_sync_joined_rooms` table
-- 2. Add a background update to populate the new `sliding_sync_joined_rooms` table
--
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
(8701, 'sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update', '{ "initial_insert": true }');
INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
(8701, 'sliding_sync_index_joined_rooms_to_recalculate_table_bg_update', '{}', 'sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update');
(8701, 'sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update', '{}');
INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
(8701, 'sliding_sync_joined_rooms_bg_update', '{}', 'sliding_sync_index_joined_rooms_to_calculate_table_bg_update');
(8701, 'sliding_sync_joined_rooms_bg_update', '{}', 'sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update');

-- Add a background updates to populate the new `sliding_sync_membership_snapshots` table
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
Expand Down
24 changes: 2 additions & 22 deletions tests/storage/test_sliding_sync_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2669,23 +2669,13 @@ def test_joined_background_update_missing(self) -> None:
},
)
)
self.get_success(
self.store.db_pool.simple_insert(
"background_updates",
{
"update_name": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
"progress_json": "{}",
"depends_on": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
},
)
)
self.get_success(
self.store.db_pool.simple_insert(
"background_updates",
{
"update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE,
"progress_json": "{}",
"depends_on": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
"depends_on": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
},
)
)
Expand Down Expand Up @@ -2836,23 +2826,13 @@ def test_joined_background_update_partial(self) -> None:
},
)
)
self.get_success(
self.store.db_pool.simple_insert(
"background_updates",
{
"update_name": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
"progress_json": "{}",
"depends_on": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
},
)
)
self.get_success(
self.store.db_pool.simple_insert(
"background_updates",
{
"update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE,
"progress_json": "{}",
"depends_on": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
"depends_on": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
},
)
)
Expand Down

0 comments on commit da463fb

Please sign in to comment.