Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate local repo every time you run conda-mirror #28

Merged
merged 7 commits into from
Feb 15, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 20 additions & 89 deletions conda_mirror/conda_mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import os
import pdb
import shutil
import subprocess
import sys
import json
import tarfile
Expand All @@ -17,6 +16,7 @@
import bz2
import requests
import yaml
import hashlib

logger = None

Expand Down Expand Up @@ -227,7 +227,7 @@ def pdb_hook(exctype, value, traceback):
args.platform, blacklist, whitelist)


def _remove_package(pkg_path, reason=None):
def _remove_package(pkg_path, reason):
"""
Log and remove a package.

Expand All @@ -236,41 +236,15 @@ def _remove_package(pkg_path, reason=None):
pkg_path : str
Path to a conda package that should be removed
"""
if reason is None:
reason = "No reason given"
msg = "Removing: %s. Reason: %s"
logger.warning(msg, pkg_path, reason)
os.remove(pkg_path)


def _assert_or_remove(left, right, assertion_test, filename):
try:
assert left == right
except AssertionError:
logger.info("Package validation failed for %s: %s != %s",
assertion_test, left, right,
exc_info=True)
_remove_package(filename, reason="Failed %s test" % assertion_test)
return True
else:
logger.debug('%s check passed', assertion_test)


def _get_output(cmd):
try:
return subprocess.check_output(cmd).decode().strip().split()[0]
except subprocess.CalledProcessError as cpe:
logger.exception(cpe.output.decode())
return ""
except Exception:
msg = "Error in subprocess.check_output. cmd: '%s'"
logger.exception(msg, ' '.join(cmd))
return ""


def _validate(filename, md5=None, sha256=None, size=None):
def _validate(filename, md5=None, size=None):
"""Validate the conda package tarfile located at `filename` with any of the
passed in options `md5`, `sha256` or `size. Also implicitly validate that
passed in options `md5` or `size. Also implicitly validate that
the conda package is a valid tarfile.

NOTE: Removes packages that fail validation
Expand All @@ -281,8 +255,6 @@ def _validate(filename, md5=None, sha256=None, size=None):
The path to the file you wish to validate
md5 : str, optional
If provided, perform an `md5sum` on `filename` and compare to `md5`
sha256 : str, optional
If provided, perform a `sha256sum` on `filename` and compare to `sha256`
size : int, optional
if provided, stat the file at `filename` and make sure its size
matches `size`
Expand All @@ -291,20 +263,19 @@ def _validate(filename, md5=None, sha256=None, size=None):
t = tarfile.open(filename)
t.extractfile('info/index.json').read().decode('utf-8')
except tarfile.TarError:
logger.debug("tarfile error encountered. Original error below.")
logger.debug(pformat(traceback.format_exc()))
logger.info("Validation failed because conda package is corrupted.",
exc_info=True)
_remove_package(filename, reason="Tarfile read failure")
return
checks = [
(size, lambda: os.stat(filename).st_size, 'size'),
(md5, lambda: _get_output(['md5sum', filename]), 'md5'),
(sha256, lambda: _get_output(['sha256sum', filename]), 'sha256'),
]
for target, validate_function, description in checks:
if target is not None:
if _assert_or_remove(target, validate_function(), description,
filename):
return
if size:
if os.stat(filename).st_size != size:
_remove_package(filename, reason="Failed size test")
if md5:
calc = hashlib.md5(open(filename, 'rb').read()).hexdigest()
if calc != md5:
_remove_package(
filename,
reason="Failed md5 validation. Expected: %s. Computed: %s" % (calc, md5))


def get_repodata(channel, platform):
Expand All @@ -330,8 +301,7 @@ def get_repodata(channel, platform):
return json.get('info', {}), json.get('packages', {})


def _download(url, target_directory, package_metadata=None, validate=True,
chunk_size=None):
def _download(url, target_directory):
"""Download `url` to `target_directory`

Parameters
Expand All @@ -340,17 +310,8 @@ def _download(url, target_directory, package_metadata=None, validate=True,
The url to download
target_directory : str
The path to a directory where `url` should be downloaded
package_metadata : dict, optional
package metadata from repodata.json. Will be used for validation of
the downloaded package. If None, then validation is skipped
validate : bool, optional
True: Perform package validation if `package_metadata` is provided.
Defaults to True.
chunk_size : int, optional
The size in Bytes to chunk the download iterator. Defaults to 1024 (1KB)
"""
if chunk_size is None:
chunk_size = 1024 # 1KB chunks
chunk_size = 1024 # 1KB chunks
logger.info("download_url=%s", url)
# create a temporary file
target_filename = url.split('/')[-1]
Expand All @@ -360,16 +321,6 @@ def _download(url, target_directory, package_metadata=None, validate=True,
ret = requests.get(url, stream=True)
for data in ret.iter_content(chunk_size):
tf.write(data)
# do some validations
if validate and package_metadata:
_validate(download_filename,
md5=package_metadata.get('md5'),
sha256=package_metadata.get('sha256'),
size=package_metadata.get('size'))
else:
logger.info("Not validating %s because validate is %s and "
"package_metadata is %s", download_filename, validate,
package_metadata)


def _list_conda_packages(local_dir):
Expand Down Expand Up @@ -423,29 +374,9 @@ def _validate_packages(package_repodata, package_directory):
len(local_packages))
_validate(os.path.join(package_directory, package),
md5=package_metadata.get('md5'),
sha256=package_metadata.get('sha256'),
size=package_metadata.get('size'))


def _remove_local_blacklisted(blacklist, local_dir):
"""Removes any local conda packages that are blacklisted.

Parameters
----------
blacklist : list
Packages that should not be in `local_dir`
local_dir : str
Local directory to check for blacklisted conda packages
"""
# get list of current packages in folder
local_packages = _list_conda_packages(local_dir)
# if any are not in the final mirror list, remove them
for package_name in local_packages:
if package_name in blacklist:
_remove_package(os.path.join(local_dir, package_name),
reason="Package is blacklisted")


def main(upstream_channel, target_directory, temp_directory, platform,
blacklist=None, whitelist=None):
"""
Expand Down Expand Up @@ -558,8 +489,8 @@ def main(upstream_channel, target_directory, temp_directory, platform,
logger.debug('possible_packages_to_mirror')
logger.debug(pformat(sorted(possible_packages_to_mirror)))

# 4. remove blacklisted packages
_remove_local_blacklisted(true_blacklist, local_directory)
# 4. Validate all local packages
_validate_packages(possible_packages_to_mirror, local_directory)

# 5. figure out final list of packages to mirror
# do the set difference of what is local and what is in the final
Expand All @@ -582,7 +513,7 @@ def main(upstream_channel, target_directory, temp_directory, platform,
channel=channel,
platform=platform,
file_name=package_name)
_download(url, download_dir, packages)
_download(url, download_dir)

# validate all packages in the download directory
_validate_packages(packages, download_dir)
Expand Down
1 change: 0 additions & 1 deletion test-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
coverage
pytest
pytest-ordering
26 changes: 7 additions & 19 deletions test/test_conda_mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@ def test_match(repodata):
assert len(matched) == len(repodata_packages)


def test_version():
old_args = copy.copy(sys.argv)
sys.argv = ['conda-mirror', '--version']
conda_mirror.cli()
sys.argv = old_args


@pytest.mark.parametrize(
'channel,platform',
itertools.product([anaconda_channel, 'conda-forge'], ['linux-64']))
Expand Down Expand Up @@ -113,22 +120,3 @@ def test_handling_bad_package(tmpdir, repodata):
conda_mirror._validate_packages(anaconda_repodata, bad_pkg_root)
assert bad_pkg_name not in os.listdir(bad_pkg_root)


def test_local_blacklisted_package(tmpdir):
local_repo_root = tmpdir.mkdir('repo').strpath
pkg_root = os.path.join(local_repo_root, 'linux-64')
os.makedirs(pkg_root)
blacklisted_pkg_name = 'remove-1-0.tar.bz2'
non_blacklisted_pkg_name = 'keep-1-0.tar.bz2'
with bz2.BZ2File(os.path.join(pkg_root, blacklisted_pkg_name), 'wb') as f:
f.write("This is a blacklisted package".encode())
with bz2.BZ2File(os.path.join(pkg_root, non_blacklisted_pkg_name), 'wb') as f:
f.write("This is not a blacklisted package".encode())
blacklist = [blacklisted_pkg_name]

# Test removal of local blacklisted packages
conda_mirror.logger.info("Testing %s", blacklisted_pkg_name)
assert blacklisted_pkg_name in os.listdir(pkg_root)
conda_mirror._remove_local_blacklisted(blacklist, pkg_root)
assert blacklisted_pkg_name not in os.listdir(pkg_root)
assert non_blacklisted_pkg_name in os.listdir(pkg_root)