diff --git a/AUTHORS.rst b/AUTHORS.rst index eb1fa79..9b2850e 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -11,3 +11,4 @@ Authors * Petr Ĺ ebek - https://github.com/Artimi * Swen Kooij - https://github.com/Photonios * Vara Canero - https://github.com/varac +* Andre Bianchi - https://github.com/drebs diff --git a/CHANGELOG.rst b/CHANGELOG.rst index af8660a..4f32ebe 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,10 @@ Changelog * Added a ``time`` field in ``commit_info``. Contributed by Vara Canero in `#71 `_. +* Fixed the leaking of credentials by masking the URL printed when storing + data to elasticsearch. +* Added a `--benchmark-netrc` option to use credentials from a netrc file when + storing data to elasticsearch. 3.1.0a2 (2017-03-27) -------------------- diff --git a/src/pytest_benchmark/cli.py b/src/pytest_benchmark/cli.py index 1a10332..5e87c71 100644 --- a/src/pytest_benchmark/cli.py +++ b/src/pytest_benchmark/cli.py @@ -147,7 +147,7 @@ def main(): parser = make_parser() args = parser.parse_args() logger = Logger(args.verbose) - storage = load_storage(args.storage, logger=logger) + storage = load_storage(args.storage, logger=logger, netrc=args.netrc) if args.command == 'list': for file in storage.query(): diff --git a/src/pytest_benchmark/plugin.py b/src/pytest_benchmark/plugin.py index 889a244..c41a619 100644 --- a/src/pytest_benchmark/plugin.py +++ b/src/pytest_benchmark/plugin.py @@ -109,6 +109,11 @@ def add_global_options(addoption, prefix="benchmark-"): "(when --benchmark-save or --benchmark-autosave are used). For backwards compatibility unexpected values " "are converted to file://. Default: %(default)r." ) + addoption( + "--{0}netrc".format(prefix), + nargs="?", default='', const='~/.netrc', + help="Load elasticsearch credentials from a netrc file. Default: %(default)r.", + ) addoption( "--{0}verbose".format(prefix), *[] if prefix else ['-v'], action="store_true", default=False, diff --git a/src/pytest_benchmark/session.py b/src/pytest_benchmark/session.py index 2fd68a5..e399144 100644 --- a/src/pytest_benchmark/session.py +++ b/src/pytest_benchmark/session.py @@ -36,7 +36,8 @@ def __init__(self, config): self.storage = load_storage( config.getoption("benchmark_storage"), logger=self.logger, - default_machine_id=self.machine_id + default_machine_id=self.machine_id, + netrc=config.getoption("benchmark_netrc") ) self.options = dict( min_time=SecondsDecimal(config.getoption("benchmark_min_time")), diff --git a/src/pytest_benchmark/storage/elasticsearch.py b/src/pytest_benchmark/storage/elasticsearch.py index 8981dfd..dcb2baa 100644 --- a/src/pytest_benchmark/storage/elasticsearch.py +++ b/src/pytest_benchmark/storage/elasticsearch.py @@ -1,10 +1,12 @@ from __future__ import absolute_import +import re import sys import uuid from datetime import date from datetime import datetime from decimal import Decimal +from functools import partial from ..compat import reraise @@ -28,6 +30,13 @@ def default(self, data): return "UNSERIALIZABLE[%r]" % data +def _mask_hosts(hosts): + m = re.compile('^([^:]+)://[^@]+@') + sub_fun = partial(m.sub, '\\1://***:***@') + masked_hosts = list(map(sub_fun, hosts)) + return masked_hosts + + class ElasticsearchStorage(object): def __init__(self, hosts, index, doctype, project_name, logger, default_machine_id=None): @@ -162,8 +171,10 @@ def save(self, output_json, save): body=bench, id=doc_id, ) + # hide user's credentials before logging + masked_hosts = _mask_hosts(self._es_hosts) self.logger.info("Saved benchmark data to %s to index %s as doctype %s" % ( - self._es_hosts, self._es_index, self._es_doctype)) + masked_hosts, self._es_index, self._es_doctype)) def _create_index(self): mapping = { diff --git a/src/pytest_benchmark/utils.py b/src/pytest_benchmark/utils.py index 3972a90..ce7c1c6 100644 --- a/src/pytest_benchmark/utils.py +++ b/src/pytest_benchmark/utils.py @@ -4,6 +4,7 @@ import argparse import genericpath import json +import netrc import ntpath import os import platform @@ -404,10 +405,34 @@ def parse_save(string): return string -def parse_elasticsearch_storage(string, default_index="benchmark", default_doctype="benchmark"): +def _parse_hosts(storage_url, netrc_file): + + # load creds from netrc file + path = os.path.expanduser(netrc_file) + creds = None + if netrc_file and os.path.isfile(path): + creds = netrc.netrc(path) + + # add creds to urls + urls = [] + for netloc in storage_url.netloc.split(','): + auth = "" + if creds and '@' not in netloc: + host = netloc.split(':').pop(0) + res = creds.authenticators(host) + if res: + user, _, secret = res + auth = "{user}:{secret}@".format(user=user, secret=secret) + url = "{scheme}://{auth}{netloc}".format(scheme=storage_url.scheme, + netloc=netloc, auth=auth) + urls.append(url) + return urls + + +def parse_elasticsearch_storage(string, default_index="benchmark", + default_doctype="benchmark", netrc_file=''): storage_url = urlparse(string) - hosts = ["{scheme}://{netloc}".format(scheme=storage_url.scheme, netloc=netloc) for netloc in - storage_url.netloc.split(',')] + hosts = _parse_hosts(storage_url, netrc_file) index = default_index doctype = default_doctype if storage_url.path and storage_url.path != "/": @@ -426,13 +451,16 @@ def parse_elasticsearch_storage(string, default_index="benchmark", default_docty def load_storage(storage, **kwargs): if "://" not in storage: storage = "file://" + storage + netrc_file = kwargs.pop('netrc') # only used by elasticsearch storage if storage.startswith("file://"): from .storage.file import FileStorage return FileStorage(storage[len("file://"):], **kwargs) elif storage.startswith("elasticsearch+"): from .storage.elasticsearch import ElasticsearchStorage # TODO update benchmark_autosave - return ElasticsearchStorage(*parse_elasticsearch_storage(storage[len("elasticsearch+"):]), **kwargs) + args = parse_elasticsearch_storage(storage[len("elasticsearch+"):], + netrc_file=netrc_file) + return ElasticsearchStorage(*args, **kwargs) else: raise argparse.ArgumentTypeError("Storage must be in form of file://path or " "elasticsearch+http[s]://host1,host2/index/doctype") diff --git a/tests/test_elasticsearch_storage.py b/tests/test_elasticsearch_storage.py index dd6a107..016f39f 100644 --- a/tests/test_elasticsearch_storage.py +++ b/tests/test_elasticsearch_storage.py @@ -2,6 +2,7 @@ import json import logging +import os from io import BytesIO from io import StringIO @@ -16,6 +17,8 @@ from pytest_benchmark.plugin import pytest_benchmark_generate_json from pytest_benchmark.plugin import pytest_benchmark_group_stats from pytest_benchmark.storage.elasticsearch import ElasticsearchStorage +from pytest_benchmark.storage.elasticsearch import _mask_hosts +from pytest_benchmark.utils import parse_elasticsearch_storage try: import unittest.mock as mock @@ -173,3 +176,59 @@ def test_handle_saving(sess, logger_output, monkeypatch): body=ES_DATA, id='FoobarOS_commitId_tests/test_normal.py::test_xfast_parametrized[0]', ) + + +def test_parse_with_no_creds(): + string = 'https://example.org,another.org' + hosts, _, _, _ = parse_elasticsearch_storage(string) + assert len(hosts) == 2 + assert 'https://example.org' in hosts + assert 'https://another.org' in hosts + + +def test_parse_with_creds_in_first_host_of_url(): + string = 'https://user:pass@example.org,another.org' + hosts, _, _, _ = parse_elasticsearch_storage(string) + assert len(hosts) == 2 + assert 'https://user:pass@example.org' in hosts + assert 'https://another.org' in hosts + + +def test_parse_with_creds_in_second_host_of_url(): + string = 'https://example.org,user:pass@another.org' + hosts, _, _, _ = parse_elasticsearch_storage(string) + assert len(hosts) == 2 + assert 'https://example.org' in hosts + assert 'https://user:pass@another.org' in hosts + + +def test_parse_with_creds_in_netrc(tmpdir): + netrc_file = os.path.join(tmpdir.strpath, 'netrc') + with open(netrc_file, 'w') as f: + f.write('machine example.org login user1 password pass1\n') + f.write('machine another.org login user2 password pass2\n') + string = 'https://example.org,another.org' + hosts, _, _, _ = parse_elasticsearch_storage(string, netrc_file=netrc_file) + assert len(hosts) == 2 + assert 'https://user1:pass1@example.org' in hosts + assert 'https://user2:pass2@another.org' in hosts + + +def test_parse_url_creds_supersedes_netrc_creds(tmpdir): + netrc_file = os.path.join(tmpdir.strpath, 'netrc') + with open(netrc_file, 'w') as f: + f.write('machine example.org login user1 password pass1\n') + f.write('machine another.org login user2 password pass2\n') + string = 'https://user3:pass3@example.org,another.org' + hosts, _, _, _ = parse_elasticsearch_storage(string, netrc_file=netrc_file) + assert len(hosts) == 2 + assert 'https://user3:pass3@example.org' in hosts # superseded by creds in url + assert 'https://user2:pass2@another.org' in hosts # got creds from netrc file + + +def test__mask_hosts(): + hosts = ['https://user1:pass1@example.org', 'https://user2:pass2@another.org'] + masked_hosts = _mask_hosts(hosts) + assert len(masked_hosts) == len(hosts) + assert 'https://***:***@example.org' in masked_hosts + assert 'https://***:***@another.org' in masked_hosts