Skip to content

Commit

Permalink
Merge tag 'v1.1.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
jaraco committed Jan 25, 2020
2 parents 5a512a3 + 088b0cf commit 8031665
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 42 deletions.
14 changes: 14 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
v2.1.0
======

#32: Merge with v1.1.0.

v1.1.0
======

#32: For read-only zip files, complexity of ``.exists`` and
``joinpath`` is now constant time instead of ``O(n)``, preventing
quadratic time in common use-cases and rendering large
zip files unusable for Path. Big thanks to Benjy Weinberger
for the bug report and contributed fix (#33).

v2.0.1
======

Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ testing =
# upstream

# local
jaraco.itertools

docs =
# upstream
Expand Down
26 changes: 25 additions & 1 deletion test_zipp.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import tempfile
import shutil

import jaraco.itertools

import zipp

consume = tuple
Expand All @@ -16,7 +18,7 @@ def add_dirs(zf):
Given a writable zip file zf, inject directory entries for
any directories implied by the presence of children.
"""
for name in zipp.Path._implied_dirs(zf.namelist()):
for name in zipp.CompleteDirs._implied_dirs(zf.namelist()):
zf.writestr(name, b"")
return zf

Expand Down Expand Up @@ -201,3 +203,25 @@ def test_mutability(self):
assert (root / 'foo.txt').read_text() == 'foo'
baz, = (root / 'bar').iterdir()
assert baz.read_text() == 'baz'

HUGE_ZIPFILE_NUM_ENTRIES = 2 ** 13

def huge_zipfile(self):
"""Create a read-only zipfile with a huge number of entries entries."""
strm = io.BytesIO()
zf = zipfile.ZipFile(strm, "w")
for entry in map(str, range(self.HUGE_ZIPFILE_NUM_ENTRIES)):
zf.writestr(entry, entry)
zf.mode = 'r'
return zf

def test_joinpath_constant_time(self):
"""
Ensure joinpath on items in zipfile is linear time.
"""
root = zipp.Path(self.huge_zipfile())
entries = jaraco.itertools.Counter(root.iterdir())
for entry in entries:
entry.joinpath('suffix')
# Check the file iterated all items
assert entries.count == self.HUGE_ZIPFILE_NUM_ENTRIES
131 changes: 90 additions & 41 deletions zipp.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import zipfile
import functools
import itertools
import contextlib
from collections import OrderedDict


Expand Down Expand Up @@ -47,6 +48,90 @@ def _ancestry(path):
path, tail = posixpath.split(path)


class CompleteDirs(zipfile.ZipFile):
"""
A ZipFile subclass that ensures that implied directories
are always included in the namelist.
"""

@staticmethod
def _implied_dirs(names):
parents = itertools.chain.from_iterable(map(_parents, names))
# Deduplicate entries in original order
implied_dirs = OrderedDict.fromkeys(
p + posixpath.sep for p in parents
# Cast names to a set for O(1) lookups
if p + posixpath.sep not in set(names)
)
return implied_dirs

def namelist(self):
names = super(CompleteDirs, self).namelist()
return names + list(self._implied_dirs(names))

def _name_set(self):
return set(self.namelist())

def resolve_dir(self, name):
"""
If the name represents a directory, return that name
as a directory (with the trailing slash).
"""
names = self._name_set()
dirname = name + '/'
dir_match = name not in names and dirname in names
return dirname if dir_match else name

@classmethod
def make(cls, source):
"""
Given a source (filename or zipfile), return an
appropriate CompleteDirs subclass.
"""
if isinstance(source, CompleteDirs):
return source

if not isinstance(source, zipfile.ZipFile):
return cls(_pathlib_compat(source))

# Only allow for FastPath when supplied zipfile is read-only
if 'r' not in source.mode:
cls = CompleteDirs

res = cls.__new__(cls)
vars(res).update(vars(source))
return res


class FastLookup(CompleteDirs):
"""
ZipFile subclass to ensure implicit
dirs exist and are resolved rapidly.
"""
def namelist(self):
with contextlib.suppress(AttributeError):
return self.__names
self.__names = super(FastLookup, self).namelist()
return self.__names

def _name_set(self):
with contextlib.suppress(AttributeError):
return self.__lookup
self.__lookup = super(FastLookup, self)._name_set()
return self.__lookup


def _pathlib_compat(path):
"""
For path-like objects, convert to a filename for compatibility
on Python 3.6.1 and earlier.
"""
try:
return path.__fspath__()
except AttributeError:
return str(path)


class Path:
"""
A pathlib-compatible interface for zip files.
Expand Down Expand Up @@ -115,24 +200,9 @@ class Path:
__repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"

def __init__(self, root, at=""):
self.root = (
root
if isinstance(root, zipfile.ZipFile)
else zipfile.ZipFile(self._pathlib_compat(root))
)
self.root = FastLookup.make(root)
self.at = at

@staticmethod
def _pathlib_compat(path):
"""
For path-like objects, convert to a filename for compatibility
on Python 3.6.1 and earlier.
"""
try:
return path.__fspath__()
except AttributeError:
return str(path)

@property
def open(self):
return functools.partial(self.root.open, self.at)
Expand Down Expand Up @@ -162,12 +232,12 @@ def is_file(self):
return not self.is_dir()

def exists(self):
return self.at in self._names()
return self.at in self.root._name_set()

def iterdir(self):
if not self.is_dir():
raise ValueError("Can't listdir a file")
subs = map(self._next, self._names())
subs = map(self._next, self.root.namelist())
return filter(self._is_child, subs)

def __str__(self):
Expand All @@ -177,35 +247,14 @@ def __repr__(self):
return self.__repr.format(self=self)

def joinpath(self, add):
add = self._pathlib_compat(add)
next = posixpath.join(self.at, add)
next_dir = posixpath.join(self.at, add, "")
names = self._names()
return self._next(next_dir if next not in names and next_dir in names else next)
next = posixpath.join(self.at, _pathlib_compat(add))
return self._next(self.root.resolve_dir(next))

__truediv__ = joinpath

@staticmethod
def _implied_dirs(names):
parents = itertools.chain.from_iterable(map(_parents, names))
# Deduplicate entries in original order
implied_dirs = OrderedDict.fromkeys(
p + posixpath.sep for p in parents
# Cast names to a set for O(1) lookups
if p + posixpath.sep not in set(names)
)
return iter(implied_dirs)

@classmethod
def _add_implied_dirs(cls, names):
return names + list(cls._implied_dirs(names))

@property
def parent(self):
parent_at = posixpath.dirname(self.at.rstrip('/'))
if parent_at:
parent_at += '/'
return self._next(parent_at)

def _names(self):
return self._add_implied_dirs(self.root.namelist())

0 comments on commit 8031665

Please sign in to comment.