Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Use receipts event_stream_ordering column, remove joins to events #13918

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/13918.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Use new receipts column to optimise receipt and push action SQL queries. Contributed by Nick @ Beeper (@fizzadar).
12 changes: 5 additions & 7 deletions synapse/storage/databases/main/event_push_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1005,9 +1005,8 @@ def _get_receipts_by_room_txn(
)

sql = f"""
SELECT room_id, thread_id, MAX(stream_ordering)
SELECT room_id, thread_id, MAX(event_stream_ordering)
FROM receipts_linearized
INNER JOIN events USING (room_id, event_id)
WHERE {receipt_types_clause}
AND user_id = ?
GROUP BY room_id, thread_id
Expand Down Expand Up @@ -1486,11 +1485,10 @@ def _handle_new_receipts_for_notifs_txn(self, txn: LoggingTransaction) -> bool:
)

sql = """
SELECT r.stream_id, r.room_id, r.user_id, r.thread_id, e.stream_ordering
FROM receipts_linearized AS r
INNER JOIN events AS e USING (event_id)
WHERE ? < r.stream_id AND r.stream_id <= ? AND user_id LIKE ?
ORDER BY r.stream_id ASC
SELECT stream_id, room_id, user_id, thread_id, event_stream_ordering
FROM receipts_linearized
WHERE ? < stream_id AND stream_id <= ? AND user_id LIKE ?
ORDER BY stream_id ASC
LIMIT ?
"""

Expand Down
30 changes: 20 additions & 10 deletions synapse/storage/databases/main/receipts.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
LoggingDatabaseConnection,
LoggingTransaction,
)
from synapse.storage.databases.main.stream import StreamWorkerStore
from synapse.storage.engines import PostgresEngine
from synapse.storage.engines._base import IsolationLevel
from synapse.storage.util.id_generators import (
Expand All @@ -52,7 +53,7 @@
logger = logging.getLogger(__name__)


class ReceiptsWorkerStore(SQLBaseStore):
class ReceiptsWorkerStore(StreamWorkerStore, SQLBaseStore):
def __init__(
self,
database: DatabasePool,
Expand Down Expand Up @@ -144,14 +145,13 @@ def get_last_unthreaded_receipt_for_user_txn(
)

sql = f"""
SELECT event_id, stream_ordering
SELECT event_id, event_stream_ordering
FROM receipts_linearized
INNER JOIN events USING (room_id, event_id)
WHERE {clause}
AND user_id = ?
AND room_id = ?
AND thread_id IS NULL
ORDER BY stream_ordering DESC
ORDER BY event_stream_ordering DESC
LIMIT 1
"""

Expand Down Expand Up @@ -625,23 +625,32 @@ def _insert_linearized_receipt_txn(
allow_none=True,
)

# Local user receipts must have an event_stream_ordering so that push action
# summarisation works correctly. This should always be the case because any
# local user should only receive events from this server. This exception
# protects against bad actors sending dodgy receipts.
if res is None and self.hs.is_mine_id(user_id):
raise ValueError(
"Local users cannot send receipts for unknown events, "
f"roomID={room_id}, eventID={event_id}",
)

stream_ordering = int(res["stream_ordering"]) if res else None
rx_ts = res["received_ts"] if res else 0

# We don't want to clobber receipts for more recent events, so we
# have to compare orderings of existing receipts
if stream_ordering is not None:
if thread_id is None:
thread_clause = "r.thread_id IS NULL"
thread_clause = "thread_id IS NULL"
thread_args: Tuple[str, ...] = ()
else:
thread_clause = "r.thread_id = ?"
thread_clause = "thread_id = ?"
thread_args = (thread_id,)

sql = f"""
SELECT stream_ordering, event_id FROM events
INNER JOIN receipts_linearized AS r USING (event_id, room_id)
WHERE r.room_id = ? AND r.receipt_type = ? AND r.user_id = ? AND {thread_clause}
SELECT event_stream_ordering, event_id FROM receipts_linearized
WHERE room_id = ? AND receipt_type = ? AND user_id = ? AND {thread_clause}
"""
txn.execute(
sql,
Expand All @@ -654,7 +663,8 @@ def _insert_linearized_receipt_txn(
)

for so, eid in txn:
if int(so) >= stream_ordering:
# Guard against old receipts with no `event_stream_ordering`
if so and int(so) >= stream_ordering:
logger.debug(
"Ignoring new receipt for %s in favour of existing "
"one for later event %s",
Expand Down
14 changes: 10 additions & 4 deletions tests/handlers/test_appservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,7 @@ def test_sending_read_receipt_batches_to_application_services(self) -> None:
],
ApplicationService.NS_ROOMS: [
{
"regex": "!fakeroom_.*",
"regex": ".*",
"exclusive": True,
}
],
Expand All @@ -597,16 +597,22 @@ def test_sending_read_receipt_batches_to_application_services(self) -> None:

# Now, pretend that we receive a large burst of read receipts (300 total) that
# all come in at once.
for i in range(300):
for _ in range(300):
room_id = self.helper.create_room_as(
self.local_user, tok=self.local_user_token
)
resp = self.helper.send(room_id, tok=self.local_user_token)
event_id = resp["event_id"]

Comment on lines 623 to +631
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Creating a room in between each read receipt does a lot of stuff, including creating new events. I'm not sure if this test is still testing the intended scenario.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, this is also very slow, not sure how to proceed here. The original test specifies a unique room/event per receipt so I was trying to replicate that...

self.get_success(
# Insert a fake read receipt into the database
self.hs.get_datastores().main.insert_receipt(
# We have to use unique room ID + user ID combinations here, as the db query
# is an upsert.
room_id=f"!fakeroom_{i}:test",
room_id=room_id,
receipt_type="m.read",
user_id=self.local_user,
event_ids=[f"$eventid_{i}"],
event_ids=[event_id],
thread_id=None,
data={},
)
Expand Down
9 changes: 7 additions & 2 deletions tests/rest/client/test_receipts.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from twisted.test.proto_helpers import MemoryReactor

import synapse.rest.admin
from synapse.rest.client import login, receipts, register
from synapse.rest.client import login, receipts, register, room
from synapse.server import HomeServer
from synapse.util import Clock

Expand All @@ -26,6 +26,7 @@ class ReceiptsTestCase(unittest.HomeserverTestCase):
login.register_servlets,
register.register_servlets,
receipts.register_servlets,
room.register_servlets,
synapse.rest.admin.register_servlets,
]

Expand All @@ -34,9 +35,13 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
self.owner_tok = self.login("owner", "pass")

def test_send_receipt(self) -> None:
room_id = self.helper.create_room_as(self.owner, tok=self.owner_tok)
resp = self.helper.send(room_id, tok=self.owner_tok)
event_id = resp["event_id"]

channel = self.make_request(
"POST",
"/rooms/!abc:beep/receipt/m.read/$def",
f"/rooms/!abc:beep/receipt/m.read/{event_id}",
content={},
access_token=self.owner_tok,
)
Expand Down