Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Save memory #1829

Merged
merged 6 commits into from
Mar 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions weasyprint/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,11 @@ def render(self, stylesheets=None, presentational_hints=False,
:param font_config: A font configuration handling ``@font-face`` rules.
:type counter_style: :class:`css.counters.CounterStyle`
:param counter_style: A dictionary storing ``@counter-style`` rules.
:param dict image_cache: A dictionary used to cache images.
:param image_cache:
A dictionary used to cache images, or a folder path where images
are temporarily stored.
:type image_cache:
:obj:`dict`, :obj:`str` or :class:`document.DiskCache`
:param bool forms: Whether PDF forms have to be included.
:returns: A :class:`document.Document` object.

Expand Down Expand Up @@ -186,7 +190,11 @@ def write_pdf(self, target=None, stylesheets=None, zoom=1,
:param font_config: A font configuration handling ``@font-face`` rules.
:type counter_style: :class:`css.counters.CounterStyle`
:param counter_style: A dictionary storing ``@counter-style`` rules.
:param dict image_cache: A dictionary used to cache images.
:param image_cache:
A dictionary used to cache images, or a folder path where images
are temporarily stored.
:type image_cache:
:obj:`dict`, :obj:`str` or :class:`document.DiskCache`
:param bytes identifier: A bytestring used as PDF file identifier.
:param str variant: A PDF variant name.
:param str version: A PDF version number.
Expand Down
10 changes: 10 additions & 0 deletions weasyprint/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ def main(argv=None, stdout=None, stdin=None):
multiple times, ``all`` adds all allowed values, ``none`` removes all
previously set values.

.. option:: -c <folder>, --cache-folder <folder>

Store cache on disk instead of memory. The ``folder`` is created if
needed and cleaned after the PDF is generated.

.. option:: -v, --verbose

Show warnings and information messages.
Expand Down Expand Up @@ -156,6 +161,10 @@ def main(argv=None, stdout=None, stdin=None):
'-O', '--optimize-size', action='append',
help='optimize output size for specified features',
choices=('images', 'fonts', 'all', 'none'), default=['fonts'])
parser.add_argument(
'-c', '--cache-folder',
help='Store cache on disk instead of memory. The ``folder`` is '
'created if needed and cleaned after the PDF is generated.')
parser.add_argument(
'-v', '--verbose', action='store_true',
help='show warnings and information messages')
Expand Down Expand Up @@ -203,6 +212,7 @@ def main(argv=None, stdout=None, stdin=None):
'version': args.pdf_version,
'forms': args.pdf_forms,
'custom_metadata': args.custom_metadata,
'image_cache': args.cache_folder,
}

# Default to logging to stderr.
Expand Down
70 changes: 59 additions & 11 deletions weasyprint/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

import functools
import io
import shutil
from hashlib import md5
from pathlib import Path

from . import CSS
from .anchors import gather_anchors, make_page_bookmark_tree
Expand Down Expand Up @@ -159,6 +160,52 @@ def __init__(self, title=None, authors=None, description=None,
self.custom = custom or {}


class DiskCache:
"""Dict-like storing images content on disk.

Bytestrings values are stored on disk. Other Python objects (i.e.
RasterImage instances) are still stored in memory, but are much more
lightweight.

"""
def __init__(self, folder):
self._path = Path(folder)
self._path.mkdir(parents=True, exist_ok=True)
self._memory_cache = {}
self._disk_paths = set()

def _path_from_key(self, key):
return self._path / md5(key.encode()).hexdigest()

def __getitem__(self, key):
if key in self._memory_cache:
return self._memory_cache[key]
else:
return self._path_from_key(key).read_bytes()

def __setitem__(self, key, value):
if isinstance(value, bytes):
path = self._path_from_key(key)
self._disk_paths.add(path)
path.write_bytes(value)
else:
self._memory_cache[key] = value

def __contains__(self, key):
return (
key in self._memory_cache or
self._path_from_key(key).exists())

def __del__(self):
try:
for path in self._disk_paths:
path.unlink(missing_ok=True)
self._path.rmdir()
except Exception:
# Silently ignore errors while clearing cache
pass


class Document:
"""A rendered document ready to be painted in a pydyf stream.

Expand All @@ -181,7 +228,10 @@ def _build_layout_context(cls, html, stylesheets, presentational_hints,
target_collector = TargetCollector()
page_rules = []
user_stylesheets = []
image_cache = {} if image_cache is None else image_cache
if image_cache is None:
image_cache = {}
elif not isinstance(image_cache, DiskCache):
image_cache = DiskCache(image_cache)
for css in stylesheets or []:
if not hasattr(css, 'matcher'):
css = CSS(
Expand Down Expand Up @@ -364,15 +414,13 @@ def write_pdf(self, target=None, zoom=1, attachments=None, finisher=None,
if finisher:
finisher(self, pdf)

output = io.BytesIO()
pdf.write(output, version=pdf.version, identifier=identifier)

if target is None:
output = io.BytesIO()
pdf.write(output, version=pdf.version, identifier=identifier)
return output.getvalue()

if hasattr(target, 'write'):
pdf.write(target, version=pdf.version, identifier=identifier)
else:
output.seek(0)
if hasattr(target, 'write'):
shutil.copyfileobj(output, target)
else:
with open(target, 'wb') as fd:
shutil.copyfileobj(output, fd)
with open(target, 'wb') as fd:
pdf.write(fd, version=pdf.version, identifier=identifier)
2 changes: 1 addition & 1 deletion weasyprint/draw.py
Original file line number Diff line number Diff line change
Expand Up @@ -1199,7 +1199,7 @@ def draw_first_line(stream, textbox, text_overflow, block_ellipsis, x, y,
pillow_image = Image.open(BytesIO(png_data))
image_id = f'{font.hash}{glyph}'
image = RasterImage(
pillow_image, image_id, optimize_size=())
pillow_image, image_id, optimize_size=(), cache={})
d = font.widths[glyph] / 1000
a = pillow_image.width / pillow_image.height * d
pango.pango_font_get_glyph_extents(
Expand Down
Loading