Skip to content

Commit

Permalink
Compress each file in a ThreadPool (#484)
Browse files Browse the repository at this point in the history
Fixes #148.

---------

Co-authored-by: Adam Johnson <[email protected]>
  • Loading branch information
rik and adamchainz authored Oct 28, 2024
1 parent 9494ff3 commit d5caf8d
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 25 deletions.
5 changes: 5 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ Unreleased

Thanks to Sarah Boyce in `PR #486 <https://github.com/evansd/whitenoise/pull/486>`__.

* Compress files using a thread pool.
This speeds up the compression step up to four times in benchmarks.

Thanks to Anthony Ricaud in `PR #484 <https://github.com/evansd/whitenoise/pull/484>`__.

6.7.0 (2024-06-19)
------------------

Expand Down
20 changes: 13 additions & 7 deletions src/whitenoise/compress.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import gzip
import os
import re
from concurrent.futures import ThreadPoolExecutor
from io import BytesIO

try:
Expand Down Expand Up @@ -77,7 +78,7 @@ def should_compress(self, filename):
def log(self, message):
pass

def compress(self, path):
def _lazy_compress(self, path):
with open(path, "rb") as f:
stat_result = os.fstat(f.fileno())
data = f.read()
Expand All @@ -94,6 +95,9 @@ def compress(self, path):
if self.is_compressed_effectively("Gzip", path, size, compressed):
yield self.write_data(path, compressed, ".gz", stat_result)

def compress(self, path):
return list(self._lazy_compress(path))

@staticmethod
def compress_gzip(data):
output = BytesIO()
Expand Down Expand Up @@ -175,12 +179,14 @@ def main(argv=None):
use_brotli=args.use_brotli,
quiet=args.quiet,
)
for dirpath, _dirs, files in os.walk(args.root):
for filename in files:
if compressor.should_compress(filename):
path = os.path.join(dirpath, filename)
for _compressed in compressor.compress(path):
pass

with ThreadPoolExecutor() as executor:
for dirpath, _dirs, files in os.walk(args.root):
for filename in files:
if compressor.should_compress(filename):
executor.submit(
compressor.compress, os.path.join(dirpath, filename)
)

return 0

Expand Down
56 changes: 38 additions & 18 deletions src/whitenoise/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
import os
import re
import textwrap
from collections.abc import Generator
from collections.abc import Iterator
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed
from typing import Any
from typing import Union

Expand All @@ -29,15 +32,23 @@ def post_process(
return

extensions = getattr(settings, "WHITENOISE_SKIP_COMPRESS_EXTENSIONS", None)
compressor = self.create_compressor(extensions=extensions, quiet=True)

for path in paths:
if compressor.should_compress(path):
full_path = self.path(path)
prefix_len = len(full_path) - len(path)
for compressed_path in compressor.compress(full_path):
compressed_name = compressed_path[prefix_len:]
yield path, compressed_name, True
self.compressor = self.create_compressor(extensions=extensions, quiet=True)

def _compress_path(path: str) -> Generator[tuple[str, str, bool]]:
full_path = self.path(path)
prefix_len = len(full_path) - len(path)
for compressed_path in self.compressor.compress(full_path):
compressed_name = compressed_path[prefix_len:]
yield (path, compressed_name, True)

with ThreadPoolExecutor() as executor:
futures = (
executor.submit(_compress_path, path)
for path in paths
if self.compressor.should_compress(path)
)
for future in as_completed(futures):
yield from future.result()

def create_compressor(self, **kwargs: Any) -> Compressor:
return Compressor(**kwargs)
Expand Down Expand Up @@ -127,16 +138,25 @@ def delete_files(self, files_to_delete):
def create_compressor(self, **kwargs):
return Compressor(**kwargs)

def compress_files(self, names):
def compress_files(self, paths):
extensions = getattr(settings, "WHITENOISE_SKIP_COMPRESS_EXTENSIONS", None)
compressor = self.create_compressor(extensions=extensions, quiet=True)
for name in names:
if compressor.should_compress(name):
path = self.path(name)
prefix_len = len(path) - len(name)
for compressed_path in compressor.compress(path):
compressed_name = compressed_path[prefix_len:]
yield name, compressed_name
self.compressor = self.create_compressor(extensions=extensions, quiet=True)

def _compress_path(path: str) -> Generator[tuple[str, str]]:
full_path = self.path(path)
prefix_len = len(full_path) - len(path)
for compressed_path in self.compressor.compress(full_path):
compressed_name = compressed_path[prefix_len:]
yield (path, compressed_name)

with ThreadPoolExecutor() as executor:
futures = (
executor.submit(_compress_path, path)
for path in paths
if self.compressor.should_compress(path)
)
for future in as_completed(futures):
yield from future.result()

def make_helpful_exception(self, exception, name):
"""
Expand Down

0 comments on commit d5caf8d

Please sign in to comment.