Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Return ThumbnailInfo in more places #16438

Merged
merged 4 commits into from
Oct 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/16438.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Reduce memory allocations.
2 changes: 1 addition & 1 deletion synapse/media/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ class ThumbnailInfo:
# Content type of thumbnail, e.g. image/png
type: str
# The size of the media file, in bytes.
length: Optional[int] = None
length: int


@attr.s(slots=True, frozen=True, auto_attribs=True)
Expand Down
3 changes: 3 additions & 0 deletions synapse/media/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,7 @@ async def generate_local_exact_thumbnail(
height=t_height,
method=t_method,
type=t_type,
length=t_byte_source.tell(),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sanity check: is t_byte_source definitely going to be fully written with the cursor at the end of the buffer, so that .tell() is the length? Both .crop() and .scale() say that they return "bytes ... ready to be written to disk" so I assume this is fine.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understand is that it will be written to and the cursor will be at the end of buffer; we could do something like:

cur = t_byte_source.tell()
t_byte_source.seek(0, os.SEEK_END)
end = t_byte_source.tell()
t_byte_source.seek(cur, os.SEEK_SET)

If you're concerned about it not being at the end of the buffer.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reading back through the code I think this is fine TBH.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That was my understanding too, I just wanted to check this through. Tinkering in an interpreter, our understanding seems to be correct:

 20230728 (Red Hat 13.2.1-1)] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> from tests.test_utils import SMALL_PNG
>>> from io import BytesIO
>>> src = Image.open(BytesIO(SMALL_PNG))
>>> output = BytesIO()
>>> src.save(output, "png")
>>> output.tell()
70
>>> len(output.getvalue())
70

Though curiously this is slightly larger than the original PNG:

>>> len(SMALL_PNG)
67

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anyway LGTM, :shipit:

),
)

Expand Down Expand Up @@ -694,6 +695,7 @@ async def generate_remote_exact_thumbnail(
height=t_height,
method=t_method,
type=t_type,
length=t_byte_source.tell(),
),
)

Expand Down Expand Up @@ -839,6 +841,7 @@ async def _generate_thumbnails(
height=t_height,
method=t_method,
type=t_type,
length=t_byte_source.tell(),
),
)

Expand Down
98 changes: 42 additions & 56 deletions synapse/rest/media/thumbnail_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import logging
import re
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
from typing import TYPE_CHECKING, List, Optional, Tuple

from synapse.api.errors import Codes, SynapseError, cs_error
from synapse.config.repository import THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP
Expand Down Expand Up @@ -159,30 +159,24 @@ async def _select_or_generate_local_thumbnail(

thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
for info in thumbnail_infos:
t_w = info["thumbnail_width"] == desired_width
t_h = info["thumbnail_height"] == desired_height
t_method = info["thumbnail_method"] == desired_method
t_type = info["thumbnail_type"] == desired_type
t_w = info.width == desired_width
t_h = info.height == desired_height
t_method = info.method == desired_method
t_type = info.type == desired_type
clokep marked this conversation as resolved.
Show resolved Hide resolved

if t_w and t_h and t_method and t_type:
file_info = FileInfo(
server_name=None,
file_id=media_id,
url_cache=media_info["url_cache"],
thumbnail=ThumbnailInfo(
width=info["thumbnail_width"],
height=info["thumbnail_height"],
type=info["thumbnail_type"],
method=info["thumbnail_method"],
),
thumbnail=info,
)

t_type = file_info.thumbnail_type
t_length = info["thumbnail_length"]

responder = await self.media_storage.fetch_media(file_info)
if responder:
await respond_with_responder(request, responder, t_type, t_length)
clokep marked this conversation as resolved.
Show resolved Hide resolved
await respond_with_responder(
request, responder, info.type, info.length
)
return

logger.debug("We don't have a thumbnail of that size. Generating")
Expand Down Expand Up @@ -222,29 +216,23 @@ async def _select_or_generate_remote_thumbnail(
file_id = media_info["filesystem_id"]

for info in thumbnail_infos:
t_w = info["thumbnail_width"] == desired_width
t_h = info["thumbnail_height"] == desired_height
t_method = info["thumbnail_method"] == desired_method
t_type = info["thumbnail_type"] == desired_type
t_w = info.width == desired_width
t_h = info.height == desired_height
t_method = info.method == desired_method
t_type = info.type == desired_type

if t_w and t_h and t_method and t_type:
file_info = FileInfo(
server_name=server_name,
file_id=media_info["filesystem_id"],
thumbnail=ThumbnailInfo(
width=info["thumbnail_width"],
height=info["thumbnail_height"],
type=info["thumbnail_type"],
method=info["thumbnail_method"],
),
thumbnail=info,
)

t_type = file_info.thumbnail_type
t_length = info["thumbnail_length"]

responder = await self.media_storage.fetch_media(file_info)
if responder:
await respond_with_responder(request, responder, t_type, t_length)
await respond_with_responder(
request, responder, info.type, info.length
)
return

logger.debug("We don't have a thumbnail of that size. Generating")
Expand Down Expand Up @@ -304,7 +292,7 @@ async def _select_and_respond_with_thumbnail(
desired_height: int,
desired_method: str,
desired_type: str,
thumbnail_infos: List[Dict[str, Any]],
thumbnail_infos: List[ThumbnailInfo],
media_id: str,
file_id: str,
url_cache: bool,
Expand All @@ -319,7 +307,7 @@ async def _select_and_respond_with_thumbnail(
desired_height: The desired height, the returned thumbnail may be larger than this.
desired_method: The desired method used to generate the thumbnail.
desired_type: The desired content-type of the thumbnail.
thumbnail_infos: A list of dictionaries of candidate thumbnails.
thumbnail_infos: A list of thumbnail info of candidate thumbnails.
file_id: The ID of the media that a thumbnail is being requested for.
url_cache: True if this is from a URL cache.
server_name: The server name, if this is a remote thumbnail.
Expand Down Expand Up @@ -443,7 +431,7 @@ def _select_thumbnail(
desired_height: int,
desired_method: str,
desired_type: str,
thumbnail_infos: List[Dict[str, Any]],
thumbnail_infos: List[ThumbnailInfo],
file_id: str,
url_cache: bool,
server_name: Optional[str],
Expand All @@ -456,7 +444,7 @@ def _select_thumbnail(
desired_height: The desired height, the returned thumbnail may be larger than this.
desired_method: The desired method used to generate the thumbnail.
desired_type: The desired content-type of the thumbnail.
thumbnail_infos: A list of dictionaries of candidate thumbnails.
thumbnail_infos: A list of thumbnail infos of candidate thumbnails.
file_id: The ID of the media that a thumbnail is being requested for.
url_cache: True if this is from a URL cache.
server_name: The server name, if this is a remote thumbnail.
Expand All @@ -474,21 +462,25 @@ def _select_thumbnail(

if desired_method == "crop":
# Thumbnails that match equal or larger sizes of desired width/height.
crop_info_list: List[Tuple[int, int, int, bool, int, Dict[str, Any]]] = []
crop_info_list: List[
Tuple[int, int, int, bool, Optional[int], ThumbnailInfo]
] = []
# Other thumbnails.
crop_info_list2: List[Tuple[int, int, int, bool, int, Dict[str, Any]]] = []
crop_info_list2: List[
Tuple[int, int, int, bool, Optional[int], ThumbnailInfo]
] = []
for info in thumbnail_infos:
# Skip thumbnails generated with different methods.
if info["thumbnail_method"] != "crop":
if info.method != "crop":
continue

t_w = info["thumbnail_width"]
t_h = info["thumbnail_height"]
t_w = info.width
t_h = info.height
aspect_quality = abs(d_w * t_h - d_h * t_w)
min_quality = 0 if d_w <= t_w and d_h <= t_h else 1
size_quality = abs((d_w - t_w) * (d_h - t_h))
type_quality = desired_type != info["thumbnail_type"]
length_quality = info["thumbnail_length"]
type_quality = desired_type != info.type
length_quality = info.length
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: this is duplicated out of the info struct because everything but the last field is used as a sort criterion.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right -- I guess we could just inline it below, but I'm trying to avoid too much refactoring at the same time as this?

if t_w >= d_w or t_h >= d_h:
crop_info_list.append(
(
Expand All @@ -513,28 +505,28 @@ def _select_thumbnail(
)
# Pick the most appropriate thumbnail. Some values of `desired_width` and
# `desired_height` may result in a tie, in which case we avoid comparing on
# the thumbnail info dictionary and pick the thumbnail that appears earlier
# the thumbnail info and pick the thumbnail that appears earlier
# in the list of candidates.
if crop_info_list:
thumbnail_info = min(crop_info_list, key=lambda t: t[:-1])[-1]
elif crop_info_list2:
thumbnail_info = min(crop_info_list2, key=lambda t: t[:-1])[-1]
elif desired_method == "scale":
# Thumbnails that match equal or larger sizes of desired width/height.
info_list: List[Tuple[int, bool, int, Dict[str, Any]]] = []
info_list: List[Tuple[int, bool, int, ThumbnailInfo]] = []
# Other thumbnails.
info_list2: List[Tuple[int, bool, int, Dict[str, Any]]] = []
info_list2: List[Tuple[int, bool, int, ThumbnailInfo]] = []

for info in thumbnail_infos:
# Skip thumbnails generated with different methods.
if info["thumbnail_method"] != "scale":
if info.method != "scale":
continue

t_w = info["thumbnail_width"]
t_h = info["thumbnail_height"]
t_w = info.width
t_h = info.height
size_quality = abs((d_w - t_w) * (d_h - t_h))
type_quality = desired_type != info["thumbnail_type"]
length_quality = info["thumbnail_length"]
type_quality = desired_type != info.type
length_quality = info.length
if t_w >= d_w or t_h >= d_h:
info_list.append((size_quality, type_quality, length_quality, info))
else:
Expand All @@ -543,7 +535,7 @@ def _select_thumbnail(
)
# Pick the most appropriate thumbnail. Some values of `desired_width` and
# `desired_height` may result in a tie, in which case we avoid comparing on
# the thumbnail info dictionary and pick the thumbnail that appears earlier
# the thumbnail info and pick the thumbnail that appears earlier
# in the list of candidates.
if info_list:
thumbnail_info = min(info_list, key=lambda t: t[:-1])[-1]
Expand All @@ -555,13 +547,7 @@ def _select_thumbnail(
file_id=file_id,
url_cache=url_cache,
server_name=server_name,
thumbnail=ThumbnailInfo(
width=thumbnail_info["thumbnail_width"],
height=thumbnail_info["thumbnail_height"],
type=thumbnail_info["thumbnail_type"],
method=thumbnail_info["thumbnail_method"],
length=thumbnail_info["thumbnail_length"],
),
thumbnail=thumbnail_info,
)

# No matching thumbnail was found.
Expand Down
30 changes: 25 additions & 5 deletions synapse/storage/databases/main/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

from synapse.api.constants import Direction
from synapse.logging.opentracing import trace
from synapse.media._base import ThumbnailInfo
from synapse.storage._base import SQLBaseStore
from synapse.storage.database import (
DatabasePool,
Expand Down Expand Up @@ -435,8 +436,8 @@ async def store_url_cache(
desc="store_url_cache",
)

async def get_local_media_thumbnails(self, media_id: str) -> List[Dict[str, Any]]:
return await self.db_pool.simple_select_list(
async def get_local_media_thumbnails(self, media_id: str) -> List[ThumbnailInfo]:
rows = await self.db_pool.simple_select_list(
"local_media_repository_thumbnails",
{"media_id": media_id},
(
Expand All @@ -448,6 +449,16 @@ async def get_local_media_thumbnails(self, media_id: str) -> List[Dict[str, Any]
),
desc="get_local_media_thumbnails",
)
return [
ThumbnailInfo(
width=row["thumbnail_width"],
height=row["thumbnail_height"],
method=row["thumbnail_method"],
type=row["thumbnail_type"],
length=row["thumbnail_length"],
)
for row in rows
]

@trace
async def store_local_thumbnail(
Expand Down Expand Up @@ -556,8 +567,8 @@ def update_cache_txn(txn: LoggingTransaction) -> None:

async def get_remote_media_thumbnails(
self, origin: str, media_id: str
) -> List[Dict[str, Any]]:
return await self.db_pool.simple_select_list(
) -> List[ThumbnailInfo]:
rows = await self.db_pool.simple_select_list(
"remote_media_cache_thumbnails",
{"media_origin": origin, "media_id": media_id},
(
Expand All @@ -566,10 +577,19 @@ async def get_remote_media_thumbnails(
"thumbnail_method",
"thumbnail_type",
"thumbnail_length",
"filesystem_id",
),
desc="get_remote_media_thumbnails",
)
return [
ThumbnailInfo(
width=row["thumbnail_width"],
height=row["thumbnail_height"],
method=row["thumbnail_method"],
type=row["thumbnail_type"],
length=row["thumbnail_length"],
)
for row in rows
]

@trace
async def get_remote_media_thumbnail(
Expand Down
36 changes: 18 additions & 18 deletions tests/media/test_media_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from synapse.events import EventBase
from synapse.http.types import QueryParams
from synapse.logging.context import make_deferred_yieldable
from synapse.media._base import FileInfo
from synapse.media._base import FileInfo, ThumbnailInfo
from synapse.media.filepath import MediaFilePaths
from synapse.media.media_storage import MediaStorage, ReadableFileWrapper
from synapse.media.storage_provider import FileStorageProviderBackend
Expand Down Expand Up @@ -605,6 +605,8 @@ def test_same_quality(self, method: str, desired_size: int) -> None:
"""Test that choosing between thumbnails with the same quality rating succeeds.

We are not particular about which thumbnail is chosen."""

content_type = self.test_image.content_type.decode()
media_repo = self.hs.get_media_repository()
thumbnail_resouce = ThumbnailResource(
self.hs, media_repo, media_repo.media_storage
Expand All @@ -615,26 +617,24 @@ def test_same_quality(self, method: str, desired_size: int) -> None:
desired_width=desired_size,
desired_height=desired_size,
desired_method=method,
desired_type=self.test_image.content_type, # type: ignore[arg-type]
desired_type=content_type,
# Provide two identical thumbnails which are guaranteed to have the same
# quality rating.
thumbnail_infos=[
{
"thumbnail_width": 32,
"thumbnail_height": 32,
"thumbnail_method": method,
"thumbnail_type": self.test_image.content_type,
"thumbnail_length": 256,
"filesystem_id": f"thumbnail1{self.test_image.extension.decode()}",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was filesystem_id just redundant here? (Guessing it should have been in FileInfo and was missed in some previous refactor?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_remote_media_thumbnails no longer returns it, we're just matching the return type of that function here.

},
{
"thumbnail_width": 32,
"thumbnail_height": 32,
"thumbnail_method": method,
"thumbnail_type": self.test_image.content_type,
"thumbnail_length": 256,
"filesystem_id": f"thumbnail2{self.test_image.extension.decode()}",
},
ThumbnailInfo(
width=32,
height=32,
method=method,
type=content_type,
length=256,
),
ThumbnailInfo(
width=32,
height=32,
method=method,
type=content_type,
length=256,
),
],
file_id=f"image{self.test_image.extension.decode()}",
url_cache=False,
Expand Down