From 42392ef8e38527ce4e50454cdd357425b3f57c87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Harabie=C5=84?= Date: Wed, 9 Oct 2024 18:53:12 +0200 Subject: [PATCH] fix: do not spam the log with checksum related INFO messages when downloading using transfer_manager (#1357) * fix: do not spam the log with checksum related INFO messages when downloading using transfer_manager `download_chunks_concurrently` function does not allow to set `checksum` field in `download_kwargs`. It also does not set it on its own so it takes the default value of `"md5"` (see `Blob._prep_and_do_download`). Because ranged downloads do not return checksums it results in a lot of INFO messages (tens/hundreds): ``` INFO google.resumable_media._helpers - No MD5 checksum was returned from the service while downloading ... (which happens for composite objects), so client-side content integrity checking is not being performed. ``` To fix it set the `checksum` field to `None` which means no checksum checking for individual chunks. Note that `transfer_manager` has its own checksum checking logic (enabled by `crc32c_checksum` argument) * fix tests --- google/cloud/storage/transfer_manager.py | 2 ++ tests/unit/test_transfer_manager.py | 7 +------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/google/cloud/storage/transfer_manager.py b/google/cloud/storage/transfer_manager.py index 1b48cd9cf..15325df56 100644 --- a/google/cloud/storage/transfer_manager.py +++ b/google/cloud/storage/transfer_manager.py @@ -885,6 +885,8 @@ def download_chunks_concurrently( "'checksum' is in download_kwargs, but is not supported because sliced downloads have a different checksum mechanism from regular downloads. Use the 'crc32c_checksum' argument on download_chunks_concurrently instead." ) + download_kwargs = download_kwargs.copy() + download_kwargs["checksum"] = None download_kwargs["command"] = "tm.download_sharded" # We must know the size and the generation of the blob. diff --git a/tests/unit/test_transfer_manager.py b/tests/unit/test_transfer_manager.py index cee83ba54..09969b5eb 100644 --- a/tests/unit/test_transfer_manager.py +++ b/tests/unit/test_transfer_manager.py @@ -606,6 +606,7 @@ def test_download_chunks_concurrently(): expected_download_kwargs = EXPECTED_DOWNLOAD_KWARGS.copy() expected_download_kwargs["command"] = "tm.download_sharded" + expected_download_kwargs["checksum"] = None with mock.patch("google.cloud.storage.transfer_manager.open", mock.mock_open()): result = transfer_manager.download_chunks_concurrently( @@ -636,9 +637,6 @@ def test_download_chunks_concurrently_with_crc32c(): blob_mock.size = len(BLOB_CONTENTS) blob_mock.crc32c = "eOVVVw==" - expected_download_kwargs = EXPECTED_DOWNLOAD_KWARGS.copy() - expected_download_kwargs["command"] = "tm.download_sharded" - def write_to_file(f, *args, **kwargs): f.write(BLOB_CHUNK) @@ -664,9 +662,6 @@ def test_download_chunks_concurrently_with_crc32c_failure(): blob_mock.size = len(BLOB_CONTENTS) blob_mock.crc32c = "invalid" - expected_download_kwargs = EXPECTED_DOWNLOAD_KWARGS.copy() - expected_download_kwargs["command"] = "tm.download_sharded" - def write_to_file(f, *args, **kwargs): f.write(BLOB_CHUNK)