From 67a09fafedab3d75f4302045d5fdb3329088118b Mon Sep 17 00:00:00 2001 From: Daniel Holth Date: Fri, 28 Oct 2022 16:51:01 -0400 Subject: [PATCH] add info filter, "backwards" .conda to .tar.bz2 transmute --- conda_package_streaming/__init__.py | 2 +- conda_package_streaming/extract.py | 5 +- conda_package_streaming/transmute.py | 48 ++++++++++++++- tests/test_transmute.py | 92 +++++++++++++++++++++++----- 4 files changed, 128 insertions(+), 19 deletions(-) diff --git a/conda_package_streaming/__init__.py b/conda_package_streaming/__init__.py index dd9b22c..906d362 100644 --- a/conda_package_streaming/__init__.py +++ b/conda_package_streaming/__init__.py @@ -1 +1 @@ -__version__ = "0.5.1" +__version__ = "0.6.0" diff --git a/conda_package_streaming/extract.py b/conda_package_streaming/extract.py index 7b78487..8fcd0fa 100644 --- a/conda_package_streaming/extract.py +++ b/conda_package_streaming/extract.py @@ -67,7 +67,10 @@ def extract(filename, dest_dir=None, fileobj=None): """ assert dest_dir, "dest_dir is required" if str(filename).endswith(".conda"): - components = list(package_streaming.CondaComponent) + components = [ + package_streaming.CondaComponent.pkg, + package_streaming.CondaComponent.info, + ] else: # .tar.bz2 doesn't filter by component components = [package_streaming.CondaComponent.pkg] diff --git a/conda_package_streaming/transmute.py b/conda_package_streaming/transmute.py index 220840b..bf47eec 100644 --- a/conda_package_streaming/transmute.py +++ b/conda_package_streaming/transmute.py @@ -20,7 +20,7 @@ import zstandard # streams everything in .tar.bz2 mode -from .package_streaming import stream_conda_component +from .package_streaming import CondaComponent, stream_conda_component # increase to reduce speed and increase compression (22 = conda's default) ZSTD_COMPRESS_LEVEL = 22 @@ -35,6 +35,7 @@ def transmute( compressor=lambda: zstandard.ZstdCompressor( level=ZSTD_COMPRESS_LEVEL, threads=ZSTD_COMPRESS_THREADS ), + is_info=lambda filename: filename.startswith("info/"), ): """ Convert .tar.bz2 conda :package to .conda-format under path. @@ -43,6 +44,11 @@ def transmute( :param path: destination path for transmuted .conda package :param compressor: A function that creates instances of ``zstandard.ZstdCompressor()`` to override defaults. + :param is_info: A function that returns True if a file belongs in the + ``info`` component of a `.conda` package. ``conda-package-handling`` + (not this package ``conda-package-streaming``) uses a set of regular + expressions to keep expected items in the info- component, while other + items starting with ``info/`` wind up in the pkg- component. """ assert package.endswith(".tar.bz2"), "can only convert .tar.bz2 to .conda" assert os.path.isdir(path) @@ -70,7 +76,7 @@ def transmute( stream = iter(stream_conda_component(package)) for tar, member in stream: - tar_get = info_tar if member.name.startswith("info/") else pkg_tar + tar_get = info_tar if is_info(member.name) else pkg_tar if member.isfile(): tar_get.addfile(member, tar.extractfile(member)) else: @@ -84,3 +90,41 @@ def transmute( with conda_file.open(f"info-{file_id}.tar.zst", "w") as info_file: info_file.write(info_io.getvalue()) + + +def transmute_tar_bz2( + package, + path, +): + """ + Convert .conda :package to .tar.bz2 format under path. + + Can recompress .tar.bz2 packages. + + :param package: path to `.conda` or `.tar.bz2` package. + :param path: destination path for transmuted package. + """ + assert package.endswith((".tar.bz2", ".conda")), "Unknown extension" + assert os.path.isdir(path) + + incoming_format = ".conda" if package.endswith(".conda") else ".tar.bz2" + + file_id = os.path.basename(package)[: -len(incoming_format)] + + if incoming_format == ".conda": + # .tar.bz2 MUST place info/ first. + components = [CondaComponent.info, CondaComponent.pkg] + else: + # .tar.bz2 doesn't filter by component + components = [CondaComponent.pkg] + + with open(package, "rb") as fileobj, tarfile.open( + os.path.join(path, f"{file_id}.tar.bz2"), "x:bz2" + ) as pkg_tar: + for component in components: + stream = iter(stream_conda_component(package, fileobj, component=component)) + for tar, member in stream: + if member.isfile(): + pkg_tar.addfile(member, tar.extractfile(member)) + else: + pkg_tar.addfile(member) diff --git a/tests/test_transmute.py b/tests/test_transmute.py index bc13c24..0513a07 100644 --- a/tests/test_transmute.py +++ b/tests/test_transmute.py @@ -1,11 +1,33 @@ import contextlib +import io import os import tarfile -import tempfile import time from pathlib import Path -from conda_package_streaming.transmute import transmute +import pytest + +from conda_package_streaming.package_streaming import ( + CondaComponent, + stream_conda_component, +) +from conda_package_streaming.transmute import transmute, transmute_tar_bz2 + + +@pytest.fixture +def testtar_bytes(): + buffer = io.BytesIO() + with tarfile.open("test.tar.bz2", "w:bz2", fileobj=buffer) as tar: + symlink = tarfile.TarInfo(name="symlink") + symlink.type = tarfile.LNKTYPE + symlink.linkname = "target" + tar.addfile(symlink) + + expected = tarfile.TarInfo(name="info/expected") + tar.addfile(expected, io.BytesIO()) + unexpected = tarfile.TarInfo(name="info/unexpected") + tar.addfile(unexpected, io.BytesIO()) + return buffer.getbuffer() @contextlib.contextmanager @@ -16,7 +38,7 @@ def timeme(message: str = ""): print(f"{message}{end-begin:0.2f}s") -def test_transmute(conda_paths): +def test_transmute(conda_paths, tmpdir): tarbz_packages = [] for path in conda_paths: @@ -25,21 +47,61 @@ def test_transmute(conda_paths): tarbz_packages = [path] conda_packages = [] # not supported - assert tarbz_packages, "no medium-sized package found" + assert tarbz_packages, "no medium-sized .tar.bz2 packages found" - with tempfile.TemporaryDirectory() as outdir: - for packages in (conda_packages, tarbz_packages): - for package in packages: - with timeme(f"{package} took "): - transmute(package, outdir) + for packages in (conda_packages, tarbz_packages): + for package in packages: + with timeme(f"{package} took "): + transmute(package, tmpdir) -def test_transmute_symlink(tmpdir): +def test_transmute_symlink(tmpdir, testtar_bytes): testtar = Path(tmpdir, "test.tar.bz2") - with tarfile.open(testtar, "w:bz2") as tar: - symlink = tarfile.TarInfo(name="symlink") - symlink.type = tarfile.LNKTYPE - symlink.linkname = "target" - tar.addfile(symlink) + testtar.write_bytes(testtar_bytes) transmute(str(testtar), tmpdir) + + +def test_transmute_info_filter(tmpdir, testtar_bytes): + testtar = Path(tmpdir, "test.tar.bz2") + testtar.write_bytes(testtar_bytes) + + transmute( + str(testtar), tmpdir, is_info=lambda filename: filename == "info/expected" + ) + + with open(Path(tmpdir, "test.conda"), "rb") as fileobj: + for component, expected in (CondaComponent.info, {"info/expected"}), ( + CondaComponent.pkg, + { + "info/unexpected", + "symlink", + }, + ): + items = stream_conda_component("test.conda", fileobj, component) + assert set(member.name for tar, member in items) == expected, items + + +def test_transmute_backwards(tmpdir, conda_paths): + + tarbz_packages = [] + for path in conda_paths: + path = str(path) + if path.endswith(".conda") and (1 << 20 < os.stat(path).st_size < 1 << 22): + tarbz_packages = [path] + conda_packages = [] # not supported + + assert tarbz_packages, "no medium-sized .conda packages found" + + for packages in (conda_packages, tarbz_packages): + for package in packages: + with timeme(f"{package} took "): + transmute_tar_bz2(package, tmpdir) + + +def test_transmute_tarbz2_to_tarbz2(tmpdir, testtar_bytes): + testtar = Path(tmpdir, "test.tar.bz2") + testtar.write_bytes(testtar_bytes) + outdir = Path(tmpdir, "output") + outdir.mkdir() + transmute_tar_bz2(str(testtar), outdir)