Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compress each file in a ThreadPool #484

Merged
merged 3 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ Unreleased

Thanks to Sarah Boyce in `PR #486 <https://github.com/evansd/whitenoise/pull/486>`__.

* Compress files using a thread pool.
This speeds up the compression step up to four times in benchmarks.

Thanks to Anthony Ricaud in `PR #484 <https://github.com/evansd/whitenoise/pull/484>`__.

6.7.0 (2024-06-19)
------------------

Expand Down
20 changes: 13 additions & 7 deletions src/whitenoise/compress.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import gzip
import os
import re
from concurrent.futures import ThreadPoolExecutor
from io import BytesIO

try:
Expand Down Expand Up @@ -77,7 +78,7 @@ def should_compress(self, filename):
def log(self, message):
pass

def compress(self, path):
def _lazy_compress(self, path):
with open(path, "rb") as f:
stat_result = os.fstat(f.fileno())
data = f.read()
Expand All @@ -94,6 +95,9 @@ def compress(self, path):
if self.is_compressed_effectively("Gzip", path, size, compressed):
yield self.write_data(path, compressed, ".gz", stat_result)

def compress(self, path):
return list(self._lazy_compress(path))

@staticmethod
def compress_gzip(data):
output = BytesIO()
Expand Down Expand Up @@ -175,12 +179,14 @@ def main(argv=None):
use_brotli=args.use_brotli,
quiet=args.quiet,
)
for dirpath, _dirs, files in os.walk(args.root):
for filename in files:
if compressor.should_compress(filename):
path = os.path.join(dirpath, filename)
for _compressed in compressor.compress(path):
pass

with ThreadPoolExecutor() as executor:
for dirpath, _dirs, files in os.walk(args.root):
for filename in files:
if compressor.should_compress(filename):
executor.submit(
adamchainz marked this conversation as resolved.
Show resolved Hide resolved
compressor.compress, os.path.join(dirpath, filename)
)

return 0

Expand Down
56 changes: 38 additions & 18 deletions src/whitenoise/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
import os
import re
import textwrap
from collections.abc import Generator
from collections.abc import Iterator
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed
from typing import Any
from typing import Union

Expand All @@ -29,15 +32,23 @@ def post_process(
return

extensions = getattr(settings, "WHITENOISE_SKIP_COMPRESS_EXTENSIONS", None)
compressor = self.create_compressor(extensions=extensions, quiet=True)

for path in paths:
if compressor.should_compress(path):
full_path = self.path(path)
prefix_len = len(full_path) - len(path)
for compressed_path in compressor.compress(full_path):
compressed_name = compressed_path[prefix_len:]
yield path, compressed_name, True
self.compressor = self.create_compressor(extensions=extensions, quiet=True)

def _compress_path(path: str) -> Generator[tuple[str, str, bool]]:
full_path = self.path(path)
prefix_len = len(full_path) - len(path)
for compressed_path in self.compressor.compress(full_path):
compressed_name = compressed_path[prefix_len:]
yield (path, compressed_name, True)

with ThreadPoolExecutor() as executor:
futures = (
executor.submit(_compress_path, path)
for path in paths
if self.compressor.should_compress(path)
)
for future in as_completed(futures):
yield from future.result()

def create_compressor(self, **kwargs: Any) -> Compressor:
return Compressor(**kwargs)
Expand Down Expand Up @@ -127,16 +138,25 @@ def delete_files(self, files_to_delete):
def create_compressor(self, **kwargs):
return Compressor(**kwargs)

def compress_files(self, names):
def compress_files(self, paths):
extensions = getattr(settings, "WHITENOISE_SKIP_COMPRESS_EXTENSIONS", None)
compressor = self.create_compressor(extensions=extensions, quiet=True)
for name in names:
if compressor.should_compress(name):
path = self.path(name)
prefix_len = len(path) - len(name)
for compressed_path in compressor.compress(path):
compressed_name = compressed_path[prefix_len:]
yield name, compressed_name
self.compressor = self.create_compressor(extensions=extensions, quiet=True)

def _compress_path(path: str) -> Generator[tuple[str, str]]:
full_path = self.path(path)
prefix_len = len(full_path) - len(path)
for compressed_path in self.compressor.compress(full_path):
compressed_name = compressed_path[prefix_len:]
yield (path, compressed_name)

with ThreadPoolExecutor() as executor:
futures = (
executor.submit(_compress_path, path)
for path in paths
if self.compressor.should_compress(path)
)
for future in as_completed(futures):
yield from future.result()

def make_helpful_exception(self, exception, name):
"""
Expand Down