Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Refactor devices changed query to pull less from DB #5559

Merged
merged 7 commits into from
Jun 27, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/5559.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Optimise devices changed query to not pull unnecessary rows from the database, reducing database load.
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved
12 changes: 6 additions & 6 deletions synapse/handlers/device.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,13 @@ def get_user_ids_changed(self, user_id, from_token):

room_ids = yield self.store.get_rooms_for_user(user_id)

# First we check if any devices have changed
# First we check if any devices have changed for users that we share
# rooms with.
users_who_share_room = yield self.store.get_users_who_share_room_with_user(
user_id
)
changed = yield self.store.get_user_whose_devices_changed(
from_token.device_list_key
from_token.device_list_key, users_who_share_room
)

# Then work out if any users have since joined
Expand Down Expand Up @@ -188,10 +192,6 @@ def get_user_ids_changed(self, user_id, from_token):
break

if possibly_changed or possibly_left:
users_who_share_room = yield self.store.get_users_who_share_room_with_user(
user_id
)

# Take the intersection of the users whose devices may have changed
# and those that actually still share a room with the user
possibly_joined = possibly_changed & users_who_share_room
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the & here now redundant?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possibly. We do go and add entries in the loop, though they should all be new users

Expand Down
22 changes: 10 additions & 12 deletions synapse/handlers/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -1062,10 +1062,6 @@ def _generate_sync_entry_for_device_list(
since_token = sync_result_builder.since_token

if since_token and since_token.device_list_key:
changed = yield self.store.get_user_whose_devices_changed(
since_token.device_list_key
)

# TODO: Be more clever than this, i.e. remove users who we already
# share a room with?
for room_id in newly_joined_rooms:
Expand All @@ -1076,21 +1072,23 @@ def _generate_sync_entry_for_device_list(
left_users = yield self.state.get_current_users_in_room(room_id)
newly_left_users.update(left_users)

users_who_share_room = yield self.store.get_users_who_share_room_with_user(
user_id
)

# TODO: Check that these users are actually new, i.e. either they
# weren't in the previous sync *or* they left and rejoined.
changed.update(newly_joined_or_invited_users)

if not changed and not newly_left_users:
defer.returnValue(DeviceLists(changed=[], left=newly_left_users))
changed = users_who_share_room & set(newly_joined_or_invited_users)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this used to be a union: is the change to & deliberate?


users_who_share_room = yield self.store.get_users_who_share_room_with_user(
user_id
changed_users = yield self.store.get_user_whose_devices_changed(
since_token.device_list_key, users_who_share_room
)

changed.update(changed_users)

defer.returnValue(
DeviceLists(
changed=users_who_share_room & changed,
richvdh marked this conversation as resolved.
Show resolved Hide resolved
left=set(newly_left_users) - users_who_share_room,
changed=changed, left=set(newly_left_users) - users_who_share_room
)
)
else:
Expand Down
51 changes: 41 additions & 10 deletions synapse/storage/devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,22 +391,53 @@ def _get_devices_with_keys_by_user_txn(self, txn, user_id):

return now_stream_id, []

@defer.inlineCallbacks
def get_user_whose_devices_changed(self, from_key):
"""Get set of users whose devices have changed since `from_key`.
def get_user_whose_devices_changed(self, from_key, user_ids):
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved
"""Get set of users whose devices have changed since `from_key` that
are in the given list of user_ids.

Args:
user_ids (Iterable[str])
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved
from_key: The device lists stream token
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved

Returns:
Deferred[set[str]]: The set of user_ids whose devices have changed
since `from_key`
"""
from_key = int(from_key)
changed = self._device_list_stream_cache.get_all_entities_changed(from_key)
if changed is not None:
defer.returnValue(set(changed))

# Get set of users who *may* have changed. Users not in the returned
# list have definitely not changed.
to_check = list(
self._device_list_stream_cache.get_entities_changed(user_ids, from_key)
)

if not to_check:
return defer.succeed(set())

# We now check the database for all users in `to_check`, in batches.
batch_size = 100
chunks = [
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved
to_check[i : i + batch_size] for i in range(0, len(to_check), batch_size)
]

sql = """
SELECT DISTINCT user_id FROM device_lists_stream WHERE stream_id > ?
SELECT DISTINCT user_id FROM device_lists_stream
WHERE stream_id > ?
AND user_id IN (%s)
"""
rows = yield self._execute(
"get_user_whose_devices_changed", None, sql, from_key

def _get_user_whose_devices_changed_txn(txn):
changes = set()

for chunk in chunks:
txn.execute(sql % (",".join("?" for _ in chunk),), [from_key] + chunk)
changes.update(user_id for user_id, in txn)

return changes

return self.runInteraction(
"get_user_whose_devices_changed", _get_user_whose_devices_changed_txn
)
defer.returnValue(set(row[0] for row in rows))

def get_all_device_list_changes_for_remotes(self, from_key, to_key):
"""Return a list of `(stream_id, user_id, destination)` which is the
Expand Down