Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Quarantine media by ID or user ID #6681

Merged
merged 15 commits into from
Jan 13, 2020
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/6681.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Extend the quarantine_media admin API to quarantine media by ID or all media by a specific user.
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
45 changes: 38 additions & 7 deletions docs/admin_api/media_admin_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,50 @@ It returns a JSON body like the following:
}
```

# Quarantine media in a room
# Quarantine media

This API 'quarantines' all the media in a room.
Quarantining media means that it is marked as inaccessible by users. It applies
to any local media, and any locally-cached copies of remote media.

The API is:
The media file itself (and any thumbnails) is not deleted from the server.

## Quarantining media by ID

This API quarantines a single piece of local or remote media.

```
POST /_synapse/admin/v1/quarantine_media/<room_id>
POST /_synapse/admin/v1/quarantine_media/<media_id>

{}
```

Quarantining media means that it is marked as inaccessible by users. It applies
to any local media, and any locally-cached copies of remote media.
Where `media_id` is in the form of `example.org/abcdefg12345...`.

The media file itself (and any thumbnails) is not deleted from the server.
## Quarantining media in a room

This API quarantines all local and remote media in a room.

```
POST /_synapse/admin/v1/quarantine_media/<room_id>

{
"num_quarantined": 10 # The number of media items successfully quarantined
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
}
```

Where `room_id` is in the form of `!roomid12345:example.org`.

## Quarantining all media of a user

This API quarantines all *local* media that a *local* user has uploaded. That is to say, if
you would like to quarantine media uploaded by a user on a remote homeserver, you should
instead use one of the other APIs.

```
POST /_synapse/admin/v1/quarantine_media/<user_id>
{
"num_quarantined": 10 # The number of media items successfully quarantined
}
```

Where `user_id` is in the form of `@bob:example.org`.
39 changes: 31 additions & 8 deletions synapse/rest/admin/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import logging

from synapse.api.errors import AuthError
from synapse.api.errors import AuthError, Codes, SynapseError
from synapse.http.servlet import RestServlet, parse_integer
from synapse.rest.admin._base import (
assert_requester_is_admin,
Expand All @@ -28,23 +28,46 @@


class QuarantineMediaInRoom(RestServlet):
"""Quarantines all media in a room so that no one can download it via
this server.
"""Quarantines all media in a room or by a user so that no one can download it via
this server. This endpoint accepts both a room ID and user ID.
"""

PATTERNS = historical_admin_path_patterns("/quarantine_media/(?P<room_id>[^/]+)")
PATTERNS = historical_admin_path_patterns("/quarantine_media/(?P<identifier>[^/]+)")

def __init__(self, hs):
self.store = hs.get_datastore()
self.auth = hs.get_auth()

async def on_POST(self, request, room_id):
async def on_POST(self, request, identifier: str):
requester = await self.auth.get_user_by_req(request)
await assert_user_is_admin(self.auth, requester.user)

num_quarantined = await self.store.quarantine_media_ids_in_room(
room_id, requester.user.to_string()
)
logging.info("Quarantining %s", identifier)

if identifier.startswith("!"):
# Quarantine all media in this room
num_quarantined = await self.store.quarantine_media_ids_in_room(
identifier, requester.user.to_string()
)
elif identifier.startswith("@"):
# Quarantine all media this user has uploaded
num_quarantined = await self.store.quarantine_media_ids_by_user(
identifier, requester.user.to_string()
)
else:
# Quarantine this media id
server_and_media_id = identifier.split("/")
if len(server_and_media_id) != 2:
raise SynapseError(
400,
"Invalid media_id supplied: '%s'" % identifier,
errcode=Codes.INVALID_PARAM,
)
server, media_id = server_and_media_id
await self.store.quarantine_media_by_id(
server, media_id, requester.user.to_string()
)
return 200, {}

return 200, {"num_quarantined": num_quarantined}

Expand Down
214 changes: 213 additions & 1 deletion synapse/storage/data_stores/main/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import logging
import re
from abc import abstractmethod
from typing import Optional, Tuple
from typing import List, Optional, Tuple

from six import integer_types

Expand Down Expand Up @@ -938,6 +938,218 @@ def block_room(self, room_id, user_id):
(room_id,),
)

def get_media_mxcs_in_room(self, room_id):
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
"""Retrieves all the local and remote media MXC URIs in a given room

Args:
room_id (str)

Returns:
The local and remote media as a lists of tuples where the key is
the hostname and the value is the media ID.
"""

def _get_media_mxcs_in_room_txn(txn):
local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id)
local_media_mxcs = []
remote_media_mxcs = []

# Convert the IDs to MXC URIs
for media_id in local_mxcs:
local_media_mxcs.append("mxc://%s/%s" % (self.hs.hostname, media_id))
for hostname, media_id in remote_mxcs:
remote_media_mxcs.append("mxc://%s/%s" % (hostname, media_id))

return local_media_mxcs, remote_media_mxcs

return self.db.runInteraction(
"get_media_ids_in_room", _get_media_mxcs_in_room_txn
)

def quarantine_media_ids_in_room(self, room_id, quarantined_by):
"""For a room loops through all events with media and quarantines
the associated media
"""

def _quarantine_media_in_room_txn(txn):
local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id)
return self._quarantine_media_txn(
txn, local_mxcs, remote_mxcs, quarantined_by,
)

return self.db.runInteraction(
"quarantine_media_in_room", _quarantine_media_in_room_txn
)
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved

def _get_media_mxcs_in_room_txn(
self, txn, room_id,
) -> Tuple[List[str], List[Tuple[str, str]]]:
"""Retrieves all the local and remote media MXC URIs in a given room

Args:
txn (cursor)
room_id (str)

Returns:
The local and remote media as a lists of tuples where the key is
the hostname and the value is the media ID.
"""
mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)")

next_token = self.get_current_events_token() + 1
local_media_mxcs = []
remote_media_mxcs = []

while next_token:
sql = """
SELECT stream_ordering, json FROM events
JOIN event_json USING (room_id, event_id)
WHERE room_id = ?
AND stream_ordering < ?
AND contains_url = ? AND outlier = ?
ORDER BY stream_ordering DESC
LIMIT ?
"""
txn.execute(sql, (room_id, next_token, True, False, 100))

next_token = None
for stream_ordering, content_json in txn:
next_token = stream_ordering
event_json = json.loads(content_json)
content = event_json["content"]
content_url = content.get("url")
thumbnail_url = content.get("info", {}).get("thumbnail_url")

for url in (content_url, thumbnail_url):
if not url:
continue
matches = mxc_re.match(url)
if matches:
hostname = matches.group(1)
media_id = matches.group(2)
if hostname == self.hs.hostname:
local_media_mxcs.append(media_id)
else:
remote_media_mxcs.append((hostname, media_id))

return local_media_mxcs, remote_media_mxcs

def quarantine_media_by_id(
self, server_name: str, media_id: str, quarantined_by: str,
):
"""quarantines a single local or remote media id

Args:
server_name: The name of the server that holds this media
media_id: The ID of the media to be quarantined
quarantined_by: The user ID that initiated the quarantine request
"""
logger.info("Quarantining media: %s/%s", server_name, media_id)
is_local = server_name == self.config.server_name

def _quarantine_media_by_id_txn(txn):
local_mxcs = [media_id] if is_local else []
remote_mxcs = [(server_name, media_id)] if not is_local else []

return self._quarantine_media_txn(
txn, local_mxcs, remote_mxcs, quarantined_by
)

return self.db.runInteraction(
"quarantine_media_by_user", _quarantine_media_by_id_txn
)

def quarantine_media_ids_by_user(self, user_id: str, quarantined_by: str):
"""quarantines all local media associated with a single user

Args:
user_id: The ID of the user to quarantine media of
quarantined_by: The ID of the user who made the quarantine request
"""

def _quarantine_media_by_user_txn(txn):
local_media_ids = self._get_media_ids_by_user_txn(txn, user_id)
return self._quarantine_media_txn(txn, local_media_ids, [], quarantined_by)

return self.db.runInteraction(
"quarantine_media_by_user", _quarantine_media_by_user_txn
)

def _get_media_ids_by_user_txn(self, txn, user_id: str, filter_quarantined=True):
"""Retrieves local media IDs by a given user

Args:
txn (cursor)
user_id: The ID of the user to retrieve media IDs of

Returns:
The local and remote media as a lists of tuples where the key is
the hostname and the value is the media ID.
"""
# Local media
sql = """
SELECT media_id
FROM local_media_repository
WHERE user_id = ?
"""
if filter_quarantined:
sql += "AND quarantined_by IS NULL"
txn.execute(sql, (user_id,))

rows = txn.fetchall()
if rows:
local_media_ids = [row[0] for row in rows]
else:
local_media_ids = []
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved

# TODO: Figure out all remote media a user has referenced in a message

return local_media_ids

def _quarantine_media_txn(
self,
txn,
local_mxcs: List[str],
remote_mxcs: List[Tuple[str, str]],
quarantined_by: str,
) -> int:
"""Quarantine local and remote media items

Args:
txn (cursor)
local_mxcs: A list of local mxc URLs
remote_mxcs: A list of (remote server, media id) tuples representing
remote mxc URLs
quarantined_by: The ID of the user who initiated the quarantine request
Returns:
The total number of media items quarantined
"""
total_media_quarantined = 0

# Update all the tables to set the quarantined_by flag
txn.executemany(
"""
UPDATE local_media_repository
SET quarantined_by = ?
WHERE media_id = ?
""",
((quarantined_by, media_id) for media_id in local_mxcs),
)

txn.executemany(
"""
UPDATE remote_media_cache
SET quarantined_by = ?
WHERE media_origin = ? AND media_id = ?
""",
((quarantined_by, origin, media_id) for origin, media_id in remote_mxcs),
)

total_media_quarantined += len(local_mxcs)
total_media_quarantined += len(remote_mxcs)

return total_media_quarantined

@defer.inlineCallbacks
def get_rooms_for_retention_period_in_range(
self, min_ms, max_ms, include_null=False
Expand Down
Loading