Skip to content

Commit

Permalink
add info filter, "backwards" .conda to .tar.bz2 transmute
Browse files Browse the repository at this point in the history
  • Loading branch information
dholth committed Oct 28, 2022
1 parent c2b7750 commit 67a09fa
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 19 deletions.
2 changes: 1 addition & 1 deletion conda_package_streaming/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.5.1"
__version__ = "0.6.0"
5 changes: 4 additions & 1 deletion conda_package_streaming/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,10 @@ def extract(filename, dest_dir=None, fileobj=None):
"""
assert dest_dir, "dest_dir is required"
if str(filename).endswith(".conda"):
components = list(package_streaming.CondaComponent)
components = [
package_streaming.CondaComponent.pkg,
package_streaming.CondaComponent.info,
]
else: # .tar.bz2 doesn't filter by component
components = [package_streaming.CondaComponent.pkg]

Expand Down
48 changes: 46 additions & 2 deletions conda_package_streaming/transmute.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import zstandard

# streams everything in .tar.bz2 mode
from .package_streaming import stream_conda_component
from .package_streaming import CondaComponent, stream_conda_component

# increase to reduce speed and increase compression (22 = conda's default)
ZSTD_COMPRESS_LEVEL = 22
Expand All @@ -35,6 +35,7 @@ def transmute(
compressor=lambda: zstandard.ZstdCompressor(
level=ZSTD_COMPRESS_LEVEL, threads=ZSTD_COMPRESS_THREADS
),
is_info=lambda filename: filename.startswith("info/"),
):
"""
Convert .tar.bz2 conda :package to .conda-format under path.
Expand All @@ -43,6 +44,11 @@ def transmute(
:param path: destination path for transmuted .conda package
:param compressor: A function that creates instances of
``zstandard.ZstdCompressor()`` to override defaults.
:param is_info: A function that returns True if a file belongs in the
``info`` component of a `.conda` package. ``conda-package-handling``
(not this package ``conda-package-streaming``) uses a set of regular
expressions to keep expected items in the info- component, while other
items starting with ``info/`` wind up in the pkg- component.
"""
assert package.endswith(".tar.bz2"), "can only convert .tar.bz2 to .conda"
assert os.path.isdir(path)
Expand Down Expand Up @@ -70,7 +76,7 @@ def transmute(

stream = iter(stream_conda_component(package))
for tar, member in stream:
tar_get = info_tar if member.name.startswith("info/") else pkg_tar
tar_get = info_tar if is_info(member.name) else pkg_tar
if member.isfile():
tar_get.addfile(member, tar.extractfile(member))
else:
Expand All @@ -84,3 +90,41 @@ def transmute(

with conda_file.open(f"info-{file_id}.tar.zst", "w") as info_file:
info_file.write(info_io.getvalue())


def transmute_tar_bz2(
package,
path,
):
"""
Convert .conda :package to .tar.bz2 format under path.
Can recompress .tar.bz2 packages.
:param package: path to `.conda` or `.tar.bz2` package.
:param path: destination path for transmuted package.
"""
assert package.endswith((".tar.bz2", ".conda")), "Unknown extension"
assert os.path.isdir(path)

incoming_format = ".conda" if package.endswith(".conda") else ".tar.bz2"

file_id = os.path.basename(package)[: -len(incoming_format)]

if incoming_format == ".conda":
# .tar.bz2 MUST place info/ first.
components = [CondaComponent.info, CondaComponent.pkg]
else:
# .tar.bz2 doesn't filter by component
components = [CondaComponent.pkg]

with open(package, "rb") as fileobj, tarfile.open(
os.path.join(path, f"{file_id}.tar.bz2"), "x:bz2"
) as pkg_tar:
for component in components:
stream = iter(stream_conda_component(package, fileobj, component=component))
for tar, member in stream:
if member.isfile():
pkg_tar.addfile(member, tar.extractfile(member))
else:
pkg_tar.addfile(member)
92 changes: 77 additions & 15 deletions tests/test_transmute.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,33 @@
import contextlib
import io
import os
import tarfile
import tempfile
import time
from pathlib import Path

from conda_package_streaming.transmute import transmute
import pytest

from conda_package_streaming.package_streaming import (
CondaComponent,
stream_conda_component,
)
from conda_package_streaming.transmute import transmute, transmute_tar_bz2


@pytest.fixture
def testtar_bytes():
buffer = io.BytesIO()
with tarfile.open("test.tar.bz2", "w:bz2", fileobj=buffer) as tar:
symlink = tarfile.TarInfo(name="symlink")
symlink.type = tarfile.LNKTYPE
symlink.linkname = "target"
tar.addfile(symlink)

expected = tarfile.TarInfo(name="info/expected")
tar.addfile(expected, io.BytesIO())
unexpected = tarfile.TarInfo(name="info/unexpected")
tar.addfile(unexpected, io.BytesIO())
return buffer.getbuffer()


@contextlib.contextmanager
Expand All @@ -16,7 +38,7 @@ def timeme(message: str = ""):
print(f"{message}{end-begin:0.2f}s")


def test_transmute(conda_paths):
def test_transmute(conda_paths, tmpdir):

tarbz_packages = []
for path in conda_paths:
Expand All @@ -25,21 +47,61 @@ def test_transmute(conda_paths):
tarbz_packages = [path]
conda_packages = [] # not supported

assert tarbz_packages, "no medium-sized package found"
assert tarbz_packages, "no medium-sized .tar.bz2 packages found"

with tempfile.TemporaryDirectory() as outdir:
for packages in (conda_packages, tarbz_packages):
for package in packages:
with timeme(f"{package} took "):
transmute(package, outdir)
for packages in (conda_packages, tarbz_packages):
for package in packages:
with timeme(f"{package} took "):
transmute(package, tmpdir)


def test_transmute_symlink(tmpdir):
def test_transmute_symlink(tmpdir, testtar_bytes):
testtar = Path(tmpdir, "test.tar.bz2")
with tarfile.open(testtar, "w:bz2") as tar:
symlink = tarfile.TarInfo(name="symlink")
symlink.type = tarfile.LNKTYPE
symlink.linkname = "target"
tar.addfile(symlink)
testtar.write_bytes(testtar_bytes)

transmute(str(testtar), tmpdir)


def test_transmute_info_filter(tmpdir, testtar_bytes):
testtar = Path(tmpdir, "test.tar.bz2")
testtar.write_bytes(testtar_bytes)

transmute(
str(testtar), tmpdir, is_info=lambda filename: filename == "info/expected"
)

with open(Path(tmpdir, "test.conda"), "rb") as fileobj:
for component, expected in (CondaComponent.info, {"info/expected"}), (
CondaComponent.pkg,
{
"info/unexpected",
"symlink",
},
):
items = stream_conda_component("test.conda", fileobj, component)
assert set(member.name for tar, member in items) == expected, items


def test_transmute_backwards(tmpdir, conda_paths):

tarbz_packages = []
for path in conda_paths:
path = str(path)
if path.endswith(".conda") and (1 << 20 < os.stat(path).st_size < 1 << 22):
tarbz_packages = [path]
conda_packages = [] # not supported

assert tarbz_packages, "no medium-sized .conda packages found"

for packages in (conda_packages, tarbz_packages):
for package in packages:
with timeme(f"{package} took "):
transmute_tar_bz2(package, tmpdir)


def test_transmute_tarbz2_to_tarbz2(tmpdir, testtar_bytes):
testtar = Path(tmpdir, "test.tar.bz2")
testtar.write_bytes(testtar_bytes)
outdir = Path(tmpdir, "output")
outdir.mkdir()
transmute_tar_bz2(str(testtar), outdir)

0 comments on commit 67a09fa

Please sign in to comment.