From 7e7d158ee982c4133945128299fdc26c9edb75a5 Mon Sep 17 00:00:00 2001 From: Scott K Logan Date: Fri, 12 Nov 2021 14:04:26 -0800 Subject: [PATCH] Compress HTTP with GZip where available The server behind GitHub URLs supports GZip compressed content encoding, which we can leverage to compress the YAML database behind rosdep substantially. This reduces the typical bandwidth usage of a call to `rosdep update` by 84%, from 410KiB to 64KiB. It doesn't appear to change the runtime performance on a moderately fast internet connection, but might be very helpful on less performant connections. --- src/rosdep2/gbpdistro_support.py | 8 ++--- src/rosdep2/platforms/source.py | 7 ++-- src/rosdep2/rep3.py | 7 ++-- src/rosdep2/sources_list.py | 20 ++---------- src/rosdep2/url_utils.py | 56 ++++++++++++++++++++++++++++++++ 5 files changed, 66 insertions(+), 32 deletions(-) create mode 100644 src/rosdep2/url_utils.py diff --git a/src/rosdep2/gbpdistro_support.py b/src/rosdep2/gbpdistro_support.py index fd90ae096..0b523c6d5 100644 --- a/src/rosdep2/gbpdistro_support.py +++ b/src/rosdep2/gbpdistro_support.py @@ -1,7 +1,3 @@ -try: - from urllib.request import urlopen -except ImportError: - from urllib2 import urlopen import yaml try: import urlparse @@ -22,6 +18,8 @@ from .rep3 import download_targets_data # deprecated, will output warning +from .url_utils import urlopen_gzip + import warnings create_default_installer_context = None @@ -201,7 +199,7 @@ def download_gbpdistro_as_rosdep_data(gbpdistro_url, targets_url=None): # will output a warning targets_data = download_targets_data(targets_url=targets_url) try: - f = urlopen(gbpdistro_url, timeout=DOWNLOAD_TIMEOUT) + f = urlopen_gzip(gbpdistro_url, timeout=DOWNLOAD_TIMEOUT) text = f.read() f.close() gbpdistro_data = yaml.safe_load(text) diff --git a/src/rosdep2/platforms/source.py b/src/rosdep2/platforms/source.py index 3ba17fedc..b3dcf3e7d 100644 --- a/src/rosdep2/platforms/source.py +++ b/src/rosdep2/platforms/source.py @@ -31,13 +31,9 @@ import os try: - from urllib.request import urlopen from urllib.request import urlretrieve - from urllib.error import URLError except ImportError: - from urllib2 import urlopen from urllib import urlretrieve - from urllib2 import URLError import hashlib import yaml @@ -45,6 +41,7 @@ from ..core import rd_debug, InvalidData from ..installers import PackageManagerInstaller, InstallFailed from ..shell_utils import create_tempfile_from_string_and_execute +from ..url_utils import urlopen_gzip, URLError SOURCE_INSTALLER = 'source' @@ -75,7 +72,7 @@ def _sub_fetch_file(url, md5sum=None): """ contents = '' try: - fh = urlopen(url) + fh = urlopen_gzip(url) contents = fh.read() if md5sum is not None: filehash = hashlib.md5(contents).hexdigest() diff --git a/src/rosdep2/rep3.py b/src/rosdep2/rep3.py index 71b2f56f4..0c17a8faa 100644 --- a/src/rosdep2/rep3.py +++ b/src/rosdep2/rep3.py @@ -25,15 +25,12 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. -try: - from urllib.request import urlopen -except ImportError: - from urllib2 import urlopen import yaml import warnings from .core import DownloadFailure from .rosdistrohelper import PreRep137Warning +from .url_utils import urlopen_gzip # location of targets file for processing gbpdistro files REP3_TARGETS_URL = 'https://raw.githubusercontent.com/ros/rosdistro/master/releases/targets.yaml' @@ -59,7 +56,7 @@ def download_targets_data(targets_url=None): if targets_url is None: targets_url = REP3_TARGETS_URL try: - f = urlopen(targets_url, timeout=DOWNLOAD_TIMEOUT) + f = urlopen_gzip(targets_url, timeout=DOWNLOAD_TIMEOUT) text = f.read() f.close() targets_data = yaml.safe_load(text) diff --git a/src/rosdep2/sources_list.py b/src/rosdep2/sources_list.py index 72bdc7244..954a4e8fe 100644 --- a/src/rosdep2/sources_list.py +++ b/src/rosdep2/sources_list.py @@ -32,14 +32,6 @@ import os import sys import yaml -try: - from urllib.request import urlopen - from urllib.error import URLError - import urllib.request as request -except ImportError: - from urllib2 import urlopen - from urllib2 import URLError - import urllib2 as request try: import cPickle as pickle except ImportError: @@ -49,7 +41,7 @@ from .core import InvalidData, DownloadFailure, CachePermissionError from .gbpdistro_support import get_gbprepo_as_rosdep_data, download_gbpdistro_as_rosdep_data from .meta import MetaDatabase -from ._version import __version__ +from .url_utils import urlopen_gzip, URLError try: import urlparse @@ -306,13 +298,7 @@ def download_rosdep_data(url): retrieved (e.g. 404, bad YAML format, server down). """ try: - # http/https URLs need custom requests to specify the user-agent, since some repositories reject - # requests from the default user-agent. - if url.startswith("http://") or url.startswith("https://"): - url_request = request.Request(url, headers={'User-Agent': 'rosdep/{version}'.format(version=__version__)}) - else: - url_request = url - f = urlopen(url_request, timeout=DOWNLOAD_TIMEOUT) + f = urlopen_gzip(url, timeout=DOWNLOAD_TIMEOUT) text = f.read() f.close() data = yaml.safe_load(text) @@ -337,7 +323,7 @@ def download_default_sources_list(url=DEFAULT_SOURCES_LIST_URL): retrieved (e.g. 404, server down). """ try: - f = urlopen(url, timeout=DOWNLOAD_TIMEOUT) + f = urlopen_gzip(url, timeout=DOWNLOAD_TIMEOUT) except (URLError, httplib.HTTPException) as e: raise URLError(str(e) + ' (%s)' % url) data = f.read().decode() diff --git a/src/rosdep2/url_utils.py b/src/rosdep2/url_utils.py new file mode 100644 index 000000000..fbd10faa0 --- /dev/null +++ b/src/rosdep2/url_utils.py @@ -0,0 +1,56 @@ +# Copyright (c) 2021, Open Source Robotics Foundation, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the Willow Garage, Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +from gzip import GzipFile +from io import BytesIO +try: + from urllib.request import urlopen + from urllib.error import URLError + import urllib.request as request +except ImportError: + from urllib2 import urlopen + from urllib2 import URLError + import urllib2 as request + +from ._version import __version__ + + +def urlopen_gzip(url, **kwargs): + # http/https URLs need custom requests to specify the user-agent, since some repositories reject + # requests from the default user-agent. + if url.startswith("http://") or url.startswith("https://"): + url_request = request.Request(url, headers={ + 'Accept-Encoding': 'gzip', + 'User-Agent': 'rosdep/{version}'.format(version=__version__), + }) + response = urlopen(url_request, **kwargs) + if response.info().get('Content-Encoding') == 'gzip': + buffer = BytesIO(response.read()) + return GzipFile(fileobj=buffer, mode='rb') + return response + + return urlopen(url, **kwargs)