diff --git a/.travis.yml b/.travis.yml index b963f7fcaa..6e9431814e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -64,6 +64,7 @@ env: - TRAVIS_FLAVOR=ssh - TRAVIS_FLAVOR=fluentd - TRAVIS_FLAVOR=rabbitmq + - TRAVIS_FLAVOR=etcd # Override travis defaults with empty jobs before_install: echo "OVERRIDING TRAVIS STEPS" diff --git a/Rakefile b/Rakefile index aabae0a6c0..8da0af9eaf 100755 --- a/Rakefile +++ b/Rakefile @@ -9,6 +9,7 @@ require './ci/cassandra' require './ci/couchdb' require './ci/default' require './ci/elasticsearch' +require './ci/etcd' require './ci/fluentd' require './ci/gearman' require './ci/haproxy' diff --git a/checks.d/etcd.py b/checks.d/etcd.py index cd0d7caceb..50dce7b47b 100644 --- a/checks.d/etcd.py +++ b/checks.d/etcd.py @@ -54,7 +54,7 @@ def check(self, instance): instance_tags = instance.get('tags', []) # Append the instance's URL in case there are more than one, that # way they can tell the difference! - instance_tags.append("url:{}".format(url)) + instance_tags.append("url:{0}".format(url)) timeout = float(instance.get('timeout', self.DEFAULT_TIMEOUT)) self_response = self.get_self_metrics(url, timeout) @@ -68,13 +68,13 @@ def check(self, instance): if key in self_response: self.rate(self.SELF_RATES[key], self_response[key], tags=instance_tags) else: - self.log.warn("Missing key {} in stats.".format(key)) + self.log.warn("Missing key {0} in stats.".format(key)) for key in self.SELF_GAUGES: if key in self_response: self.gauge(self.SELF_GAUGES[key], self_response[key], tags=instance_tags) else: - self.log.warn("Missing key {} in stats.".format(key)) + self.log.warn("Missing key {0} in stats.".format(key)) store_response = self.get_store_metrics(url, timeout) if store_response is not None: @@ -82,13 +82,16 @@ def check(self, instance): if key in store_response: self.rate(self.STORE_RATES[key], store_response[key], tags=instance_tags) else: - self.log.warn("Missing key {} in stats.".format(key)) + self.log.warn("Missing key {0} in stats.".format(key)) for key in self.STORE_GAUGES: if key in store_response: self.gauge(self.STORE_GAUGES[key], store_response[key], tags=instance_tags) else: - self.log.warn("Missing key {} in stats.".format(key)) + self.log.warn("Missing key {0} in stats.".format(key)) + + if self_response is not None and store_response is not None: + self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.OK, tags=instance_tags) def get_self_metrics(self, url, timeout): return self.get_json(url + "/v2/stats/self", timeout) @@ -103,13 +106,13 @@ def get_json(self, url, timeout): # If there's a timeout self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL, message="Timeout when hitting %s" % url, - tags = ["url:{}".format(url)]) - return None + tags = ["url:{0}".format(url)]) + raise if r.status_code != 200: self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL, message="Got %s when hitting %s" % (r.status_code, url), - tags = ["url:{}".format(url)]) - return None + tags = ["url:{0}".format(url)]) + raise Exception("Http status code {0} on url {1}".format(r.status_code, url)) return r.json() diff --git a/ci/etcd.rb b/ci/etcd.rb new file mode 100644 index 0000000000..5f161f5d42 --- /dev/null +++ b/ci/etcd.rb @@ -0,0 +1,63 @@ +require './ci/common' + +def etcd_version + ENV['ETCD_VERSION'] || '2.0.3' +end + +def etcd_rootdir + "#{ENV['INTEGRATIONS_DIR']}/etcd_#{etcd_version}" +end + +namespace :ci do + namespace :etcd do |flavor| + task :before_install => ['ci:common:before_install'] + + task :install => ['ci:common:install'] do + unless Dir.exist? File.expand_path(etcd_rootdir) + sh %(curl -s -L -o $VOLATILE_DIR/etcd.tar.gz\ + https://github.com/coreos/etcd/releases/download/v#{etcd_version}/etcd-v#{etcd_version}-linux-amd64.tar.gz) + sh %(mkdir -p #{etcd_rootdir}) + sh %(tar xzvf $VOLATILE_DIR/etcd.tar.gz\ + -C #{etcd_rootdir}\ + --strip-components=1 >/dev/null) + end + end + + task :before_script => ['ci:common:before_script'] do + sh %(cd $VOLATILE_DIR && #{etcd_rootdir}/etcd >/dev/null &) + sleep_for 10 + end + + task :script => ['ci:common:script'] do + this_provides = [ + 'etcd' + ] + Rake::Task['ci:common:run_tests'].invoke(this_provides) + end + + task :cleanup => ['ci:common:cleanup'] do + # This will delete the temp directory of etcd, + # so the etcd process will kill himself quickly after that (<10s) + sh %(rm -rf $VOLATILE_DIR/*etcd*) + end + + task :execute do + exception = nil + begin + %w(before_install install before_script script).each do |t| + Rake::Task["#{flavor.scope.path}:#{t}"].invoke + end + rescue => e + exception = e + puts "Failed task: #{e.class} #{e.message}".red + end + if ENV['SKIP_CLEANUP'] + puts 'Skipping cleanup, disposable environments are great'.yellow + else + puts 'Cleaning up' + Rake::Task["#{flavor.scope.path}:cleanup"].invoke + end + fail exception if exception + end + end +end diff --git a/tests/common.py b/tests/common.py index cdc78fc4e8..ef157aad73 100644 --- a/tests/common.py +++ b/tests/common.py @@ -123,14 +123,23 @@ def run_check(self, config, agent_config=None): if self.check is None: self.check = load_check(self.CHECK_NAME, config, agent_config) + error = None for instance in self.check.instances: - self.check.check(instance) + try: + self.check.check(instance) + except Exception, e: + # Catch error before re-raising it to be able to get service_checks + print"Exception {0} during check" + error = e self.metrics = self.check.get_metrics() self.events = self.check.get_events() self.service_checks = self.check.get_service_checks() self.warnings = self.check.get_warnings() + if error is not None: + raise error + def print_current_state(self): print "++++++++++++ DEBUG ++++++++++++" print "METRICS ", @@ -197,3 +206,16 @@ def assertMetricTag(self, metric_name, tag, count=None): candidates.append((m_name, ts, val, mdata)) self._candidates_size_assert(candidates, count=count) + + def assertServiceCheck(self, service_check_name, status=None, tags=None, count=None): + candidates = [] + for sc in self.service_checks: + if sc['check'] == service_check_name: + if status is not None and sc['status'] != status: + continue + if tags is not None and sorted(tags) != sorted(sc.get("tags")): + continue + + candidates.append(sc) + + self._candidates_size_assert(candidates, count=count) diff --git a/tests/test_etcd.py b/tests/test_etcd.py new file mode 100644 index 0000000000..dc1006fdf4 --- /dev/null +++ b/tests/test_etcd.py @@ -0,0 +1,41 @@ +import unittest +from tests.common import AgentCheckTest +from nose.plugins.attrib import attr +from time import sleep +from checks import AgentCheck +from requests.exceptions import Timeout + +@attr(requires='etcd') +class EtcdTest(AgentCheckTest): + + CHECK_NAME = "etcd" + def __init__(self, *args, **kwargs): + AgentCheckTest.__init__(self, *args, **kwargs) + self.config = {"instances": [{"url": "http://localhost:4001"}]} + + def test_metrics(self): + self.run_check(self.config) + sleep(1) + self.run_check(self.config) + tags = ['url:http://localhost:4001', 'etcd_state:leader'] + self.assertMetric('etcd.store.gets.success', metric_value=0.0, tags=tags) + self.assertMetric('etcd.store.gets.fail', metric_value=0.0, tags=tags) + self.assertMetric('etcd.self.send.appendrequest.count', metric_value=0.0, tags=tags) + + + def test_service_checks(self): + self.run_check(self.config) + + self.assertEqual(len(self.service_checks), 1, self.service_checks) + self.assertServiceCheck(self.check.SERVICE_CHECK_NAME, + status=AgentCheck.OK, + tags=['url:http://localhost:4001', 'etcd_state:leader']) + + def test_bad_config(self): + self.assertRaises(Exception, + lambda: self.run_check({"instances": [{"url": "http://localhost:4001/test"}]})) + + self.assertEqual(len(self.service_checks), 1, self.service_checks) + self.assertServiceCheck(self.check.SERVICE_CHECK_NAME, + status=AgentCheck.CRITICAL, + tags=['url:http://localhost:4001/test/v2/stats/self']) diff --git a/tests/test_riakcs.py b/tests/test_riakcs.py index 7c388f2b56..ee5b9c1466 100644 --- a/tests/test_riakcs.py +++ b/tests/test_riakcs.py @@ -32,9 +32,7 @@ def test_service_checks(self): self.check = load_check(self.CHECK_NAME, self.config, {}) self.assertRaises(error, lambda: self.run_check(self.config)) - service_checks = self.check.get_service_checks() - self.assertEqual(len(service_checks), 1, service_checks) - sc = service_checks[0] - self.assertEquals(sc["check"], self.check.SERVICE_CHECK_NAME, sc["check"]) - self.assertEquals(sc["status"], AgentCheck.CRITICAL, sc["status"]) - self.assertEquals(sc["tags"], ['aggregation_key:localhost:8080'], sc["tags"]) + self.assertEqual(len(self.service_checks), 1, self.service_checks) + self.assertServiceCheck(self.check.SERVICE_CHECK_NAME, + status=AgentCheck.CRITICAL, + tags=['aggregation_key:localhost:8080'])