From 0dbab23df903926de71b8e2ce08359a378327ad1 Mon Sep 17 00:00:00 2001 From: Ian Wienand Date: Wed, 26 Jun 2019 19:44:43 +1000 Subject: [PATCH] Add SHA256 hash of .whl as info output (#5908) * Add SHA256 hash of .whl as info output Currently I'm trying to debug some issues with what appear to be corrupt wheels. It would be very useful to see what pip thought the state of things was as it wrote the wheel output; if a final corrupt distributed file is then different to what pip has saved in its build logs, you know the problem is somewhere after pip but before distribution. Currently we get a log of the initial creation, then the stamp when it gets moved in the final output location, e.g.: creating '/tmp/pip-wheel-71CpBe/foo-1.2.3-py2.py3-none-any.whl ... Stored in directory: /opt/wheel/workspace A lot happens in between this, so my suggestion is we add the final output file and it's hash before the "Stored in directory:", e.g. you now see: Building wheels for collected packages: simple Running setup.py bdist_wheel for simple: started Running setup.py bdist_wheel for simple: finished with status 'done' Finished: simple-3.0-py3-none-any.whl sha256=39005a57a6327972575072af82e11d0817439fe6a069381f6f2a123a8c0bf1cf Stored in directory: /tmp/pytest-of-iwienand/pytest-18/test_pip_wheel_success0/workspace/scratch Successfully built simple Despite the hash being fairly important for things like --require-hashes, AFAICS the final hash is not put in the logs at all currently, so I think this is generically helpful. * Reword wheel hash details output This rewords the output to be more like the form of the preceding messages. Additionally the size is added, since we have calculated it anyway. The output will now look like: Collecting simple==3.0 Building wheels for collected packages: simple Building wheel for simple (setup.py): started Building wheel for simple (setup.py): finished with status 'done' Created wheel for simple: filename=simple-3.0-py3-none-any.whl size=1138 sha256=2a980a802c9d38a24d29aded2dc2df2b080e58370902e5fdf950090ff67aec10 Stored in directory: /tmp/pytest-of-iwienand/pytest-0/test_pip_wheel_success0/workspace/scratch Successfully built simple --- news/5908.feature | 2 ++ src/pip/_internal/wheel.py | 16 ++++++++++++++-- tests/functional/test_wheel.py | 7 +++++++ tests/unit/test_wheel.py | 26 ++++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 news/5908.feature diff --git a/news/5908.feature b/news/5908.feature new file mode 100644 index 00000000000..4e63748593f --- /dev/null +++ b/news/5908.feature @@ -0,0 +1,2 @@ +Log the final filename and SHA256 of a ``.whl`` file when done building a +wheel. diff --git a/src/pip/_internal/wheel.py b/src/pip/_internal/wheel.py index f113f8158a3..501a184eac5 100644 --- a/src/pip/_internal/wheel.py +++ b/src/pip/_internal/wheel.py @@ -68,8 +68,8 @@ def normpath(src, p): return os.path.relpath(src, p).replace(os.path.sep, '/') -def rehash(path, blocksize=1 << 20): - # type: (str, int) -> Tuple[str, str] +def hash_file(path, blocksize=1 << 20): + # type: (str, int) -> Tuple[Any, int] """Return (hash, length) for path using hashlib.sha256()""" h = hashlib.sha256() length = 0 @@ -77,6 +77,13 @@ def rehash(path, blocksize=1 << 20): for block in read_chunks(f, size=blocksize): length += len(block) h.update(block) + return (h, length) # type: ignore + + +def rehash(path, blocksize=1 << 20): + # type: (str, int) -> Tuple[str, str] + """Return (encoded_digest, length) for path using hashlib.sha256()""" + h, length = hash_file(path, blocksize) digest = 'sha256=' + urlsafe_b64encode( h.digest() ).decode('latin1').rstrip('=') @@ -902,7 +909,12 @@ def _build_one_inside_env(self, req, output_dir, python_tag=None): wheel_name = os.path.basename(wheel_path) dest_path = os.path.join(output_dir, wheel_name) try: + wheel_hash, length = hash_file(wheel_path) shutil.move(wheel_path, dest_path) + logger.info('Created wheel for %s: ' + 'filename=%s size=%d sha256=%s', + req.name, wheel_name, length, + wheel_hash.hexdigest()) logger.info('Stored in directory: %s', output_dir) return dest_path except Exception: diff --git a/tests/functional/test_wheel.py b/tests/functional/test_wheel.py index f67720f165a..ee0b19e47a4 100644 --- a/tests/functional/test_wheel.py +++ b/tests/functional/test_wheel.py @@ -1,5 +1,6 @@ """'pip wheel' tests""" import os +import re from os.path import exists import pytest @@ -48,6 +49,12 @@ def test_pip_wheel_success(script, data): ) wheel_file_name = 'simple-3.0-py%s-none-any.whl' % pyversion[0] wheel_file_path = script.scratch / wheel_file_name + assert re.search( + r"Created wheel for simple: " + r"filename=%s size=\d+ sha256=[A-Fa-f0-9]{64}" + % re.escape(wheel_file_name), result.stdout) + assert re.search( + r"^\s+Stored in directory: ", result.stdout, re.M) assert wheel_file_path in result.files_created, result.stdout assert "Successfully built simple" in result.stdout, result.stdout diff --git a/tests/unit/test_wheel.py b/tests/unit/test_wheel.py index cae9ef35e32..10a294b9e96 100644 --- a/tests/unit/test_wheel.py +++ b/tests/unit/test_wheel.py @@ -815,3 +815,29 @@ def test_missing_PATH_env_treated_as_empty_PATH_env(self): retval_empty = wheel.message_about_scripts_not_on_PATH(scripts) assert retval_missing == retval_empty + + +class TestWheelHashCalculators(object): + + def prep(self, tmpdir): + self.test_file = tmpdir.join("hash.file") + # Want this big enough to trigger the internal read loops. + self.test_file_len = 2 * 1024 * 1024 + with open(str(self.test_file), "w") as fp: + fp.truncate(self.test_file_len) + self.test_file_hash = \ + '5647f05ec18958947d32874eeb788fa396a05d0bab7c1b71f112ceb7e9b31eee' + self.test_file_hash_encoded = \ + 'sha256=VkfwXsGJWJR9ModO63iPo5agXQurfBtx8RLOt-mzHu4' + + def test_hash_file(self, tmpdir): + self.prep(tmpdir) + h, length = wheel.hash_file(self.test_file) + assert length == self.test_file_len + assert h.hexdigest() == self.test_file_hash + + def test_rehash(self, tmpdir): + self.prep(tmpdir) + h, length = wheel.rehash(self.test_file) + assert length == str(self.test_file_len) + assert h == self.test_file_hash_encoded