Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create one-off scheduled task to delete old OTKs #17934

Merged
merged 6 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/17934.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add a one-off task to delete old one-time-keys, to guard against us having old OTKs in the database that the client has long forgotten about.
35 changes: 35 additions & 0 deletions synapse/handlers/e2e_keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
from synapse.types import (
JsonDict,
JsonMapping,
ScheduledTask,
TaskStatus,
UserID,
get_domain_from_id,
get_verify_key_from_cross_signing_key,
Expand Down Expand Up @@ -70,6 +72,7 @@ def __init__(self, hs: "HomeServer"):
self.is_mine = hs.is_mine
self.clock = hs.get_clock()
self._worker_lock_handler = hs.get_worker_locks_handler()
self._task_scheduler = hs.get_task_scheduler()

federation_registry = hs.get_federation_registry()

Expand Down Expand Up @@ -116,6 +119,10 @@ def __init__(self, hs: "HomeServer"):
hs.config.experimental.msc3984_appservice_key_query
)

self._task_scheduler.register_action(
self._delete_old_one_time_keys_task, "delete_old_otks"
)

@trace
@cancellable
async def query_devices(
Expand Down Expand Up @@ -1574,6 +1581,34 @@ async def has_different_keys(self, user_id: str, body: JsonDict) -> bool:
return True
return False

async def _delete_old_one_time_keys_task(
self, task: ScheduledTask
) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
"""Scheduler task to delete old one time keys.

Until Synapse 1.119, Synapse used to issue one-time-keys in a random order, leading to the possibility
that it could still have old OTKs that the client has dropped. This task is scheduled exactly once
by a database schema delta file, and it clears out old one-time-keys that look like they came from libolm.
"""
user = task.result.get("from_user", "") if task.result else ""
while True:
user, rowcount = await self.store.delete_old_otks_for_one_user(user)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any chance we could do a number of users in a batch? Rather than trying to do ~25 transactions a second?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good idea. Done.

if user is None:
# We're done!
return TaskStatus.COMPLETE, None, None

logger.debug("Deleted %i old one-time-keys for user '%s'", rowcount, user)

# Store our progress
await self._task_scheduler.update_task(task.id, result={"from_user": user})

# Sleep a little before doing the next user.
#
# matrix.org has about 15M users in the e2e_one_time_keys_json table
# (comprising 20M devices). We want this to take about a week, so we need
# to do 25 per second.
await self.clock.sleep(0.04)


def _check_cross_signing_key(
key: JsonDict, user_id: str, key_type: str, signing_key: Optional[VerifyKey] = None
Expand Down
43 changes: 43 additions & 0 deletions synapse/storage/databases/main/end_to_end_keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -1453,6 +1453,49 @@ def impl(txn: LoggingTransaction) -> Tuple[bool, Optional[int]]:
impl,
)

async def delete_old_otks_for_one_user(
self, after_user_id: str
) -> Tuple[Optional[str], int]:
"""Deletes old OTKs belonging to one user.

Returns:
`(user, rows)`, where:
* `user` is the user ID of the updated user, or None if we are don
* `rows` is the number of deleted rows
"""

def impl(txn: LoggingTransaction) -> Tuple[Optional[str], int]:
# Find the next user
txn.execute(
"""
SELECT user_id FROM e2e_one_time_keys_json WHERE user_id > ? LIMIT 1
""",
(after_user_id,),
)
row = txn.fetchone()
if not row:
# We're done!
return None, 0
(user_id,) = row

# Delete any old OTKs belonging to that user.
#
# We only actually consider OTKs whose key ID is 6 characters long. These
# keys were likely made by libolm rather than Vodozemac; libolm only kept
# 100 private OTKs, so was far more vulnerable than Vodozemac to throwing
# away keys prematurely.
txn.execute(
"""
DELETE FROM e2e_one_time_keys_json
WHERE user_id = ? AND ts_added_ms < ? AND length(key_id) = 6
""",
(user_id, self._clock.time_msec() - (7 * 24 * 3600 * 1000)),
)

return user_id, txn.rowcount

return await self.db_pool.runInteraction("delete_old_otks_for_one_user", impl)


class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore):
def __init__(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
--
-- This file is licensed under the Affero General Public License (AGPL) version 3.
--
-- Copyright (C) 2024 New Vector, Ltd
--
-- This program is free software: you can redistribute it and/or modify
-- it under the terms of the GNU Affero General Public License as
-- published by the Free Software Foundation, either version 3 of the
-- License, or (at your option) any later version.
--
-- See the GNU Affero General Public License for more details:
-- <https://www.gnu.org/licenses/agpl-3.0.html>.

-- Until Synapse 1.119, Synapse used to issue one-time-keys in a random order, leading to the possibility
-- that it could still have old OTKs that the client has dropped.
--
-- We create a scheduled task which will drop old OTKs, to flush them out.
INSERT INTO scheduled_tasks(id, action, status, timestamp)
VALUES ('delete_old_otks_task', 'delete_old_otks', 'scheduled', extract(epoch from current_timestamp) * 1000);
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
--
-- This file is licensed under the Affero General Public License (AGPL) version 3.
--
-- Copyright (C) 2024 New Vector, Ltd
--
-- This program is free software: you can redistribute it and/or modify
-- it under the terms of the GNU Affero General Public License as
-- published by the Free Software Foundation, either version 3 of the
-- License, or (at your option) any later version.
--
-- See the GNU Affero General Public License for more details:
-- <https://www.gnu.org/licenses/agpl-3.0.html>.

-- Until Synapse 1.119, Synapse used to issue one-time-keys in a random order, leading to the possibility
-- that it could still have old OTKs that the client has dropped.
--
-- We create a scheduled task which will drop old OTKs, to flush them out.
INSERT INTO scheduled_tasks(id, action, status, timestamp)
VALUES ('delete_old_otks_task', 'delete_old_otks', 'scheduled', strftime('%s', 'now') * 1000);
Loading