Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[etcd] service check & test #1379

Merged
merged 4 commits into from
Feb 19, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ env:
- TRAVIS_FLAVOR=ssh
- TRAVIS_FLAVOR=fluentd
- TRAVIS_FLAVOR=rabbitmq
- TRAVIS_FLAVOR=etcd

# Override travis defaults with empty jobs
before_install: echo "OVERRIDING TRAVIS STEPS"
Expand Down
1 change: 1 addition & 0 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ require './ci/cassandra'
require './ci/couchdb'
require './ci/default'
require './ci/elasticsearch'
require './ci/etcd'
require './ci/fluentd'
require './ci/gearman'
require './ci/haproxy'
Expand Down
21 changes: 12 additions & 9 deletions checks.d/etcd.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def check(self, instance):
instance_tags = instance.get('tags', [])
# Append the instance's URL in case there are more than one, that
# way they can tell the difference!
instance_tags.append("url:{}".format(url))
instance_tags.append("url:{0}".format(url))
timeout = float(instance.get('timeout', self.DEFAULT_TIMEOUT))

self_response = self.get_self_metrics(url, timeout)
Expand All @@ -68,27 +68,30 @@ def check(self, instance):
if key in self_response:
self.rate(self.SELF_RATES[key], self_response[key], tags=instance_tags)
else:
self.log.warn("Missing key {} in stats.".format(key))
self.log.warn("Missing key {0} in stats.".format(key))

for key in self.SELF_GAUGES:
if key in self_response:
self.gauge(self.SELF_GAUGES[key], self_response[key], tags=instance_tags)
else:
self.log.warn("Missing key {} in stats.".format(key))
self.log.warn("Missing key {0} in stats.".format(key))

store_response = self.get_store_metrics(url, timeout)
if store_response is not None:
for key in self.STORE_RATES:
if key in store_response:
self.rate(self.STORE_RATES[key], store_response[key], tags=instance_tags)
else:
self.log.warn("Missing key {} in stats.".format(key))
self.log.warn("Missing key {0} in stats.".format(key))

for key in self.STORE_GAUGES:
if key in store_response:
self.gauge(self.STORE_GAUGES[key], store_response[key], tags=instance_tags)
else:
self.log.warn("Missing key {} in stats.".format(key))
self.log.warn("Missing key {0} in stats.".format(key))

if self_response is not None and store_response is not None:
self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.OK, tags=instance_tags)

def get_self_metrics(self, url, timeout):
return self.get_json(url + "/v2/stats/self", timeout)
Expand All @@ -103,13 +106,13 @@ def get_json(self, url, timeout):
# If there's a timeout
self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL,
message="Timeout when hitting %s" % url,
tags = ["url:{}".format(url)])
return None
tags = ["url:{0}".format(url)])
raise

if r.status_code != 200:
self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL,
message="Got %s when hitting %s" % (r.status_code, url),
tags = ["url:{}".format(url)])
return None
tags = ["url:{0}".format(url)])
raise Exception("Http status code {0} on url {1}".format(r.status_code, url))

return r.json()
63 changes: 63 additions & 0 deletions ci/etcd.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
require './ci/common'

def etcd_version
ENV['ETCD_VERSION'] || '2.0.3'
end

def etcd_rootdir
"#{ENV['INTEGRATIONS_DIR']}/etcd_#{etcd_version}"
end

namespace :ci do
namespace :etcd do |flavor|
task :before_install => ['ci:common:before_install']

task :install => ['ci:common:install'] do
unless Dir.exist? File.expand_path(etcd_rootdir)
sh %(curl -s -L -o $VOLATILE_DIR/etcd.tar.gz\
https://github.com/coreos/etcd/releases/download/v#{etcd_version}/etcd-v#{etcd_version}-linux-amd64.tar.gz)
sh %(mkdir -p #{etcd_rootdir})
sh %(tar xzvf $VOLATILE_DIR/etcd.tar.gz\
-C #{etcd_rootdir}\
--strip-components=1 >/dev/null)
end
end

task :before_script => ['ci:common:before_script'] do
sh %(cd $VOLATILE_DIR && #{etcd_rootdir}/etcd >/dev/null &)
sleep_for 10
end

task :script => ['ci:common:script'] do
this_provides = [
'etcd'
]
Rake::Task['ci:common:run_tests'].invoke(this_provides)
end

task :cleanup => ['ci:common:cleanup'] do
# This will delete the temp directory of etcd,
# so the etcd process will kill himself quickly after that (<10s)
sh %(rm -rf $VOLATILE_DIR/*etcd*)
end

task :execute do
exception = nil
begin
%w(before_install install before_script script).each do |t|
Rake::Task["#{flavor.scope.path}:#{t}"].invoke
end
rescue => e
exception = e
puts "Failed task: #{e.class} #{e.message}".red
end
if ENV['SKIP_CLEANUP']
puts 'Skipping cleanup, disposable environments are great'.yellow
else
puts 'Cleaning up'
Rake::Task["#{flavor.scope.path}:cleanup"].invoke
end
fail exception if exception
end
end
end
24 changes: 23 additions & 1 deletion tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,23 @@ def run_check(self, config, agent_config=None):
if self.check is None:
self.check = load_check(self.CHECK_NAME, config, agent_config)

error = None
for instance in self.check.instances:
self.check.check(instance)
try:
self.check.check(instance)
except Exception, e:
# Catch error before re-raising it to be able to get service_checks
print"Exception {0} during check"
error = e

self.metrics = self.check.get_metrics()
self.events = self.check.get_events()
self.service_checks = self.check.get_service_checks()
self.warnings = self.check.get_warnings()

if error is not None:
raise error

def print_current_state(self):
print "++++++++++++ DEBUG ++++++++++++"
print "METRICS ",
Expand Down Expand Up @@ -197,3 +206,16 @@ def assertMetricTag(self, metric_name, tag, count=None):
candidates.append((m_name, ts, val, mdata))

self._candidates_size_assert(candidates, count=count)

def assertServiceCheck(self, service_check_name, status=None, tags=None, count=None):
candidates = []
for sc in self.service_checks:
if sc['check'] == service_check_name:
if status is not None and sc['status'] != status:
continue
if tags is not None and sorted(tags) != sorted(sc.get("tags")):
continue

candidates.append(sc)

self._candidates_size_assert(candidates, count=count)
41 changes: 41 additions & 0 deletions tests/test_etcd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import unittest
from tests.common import AgentCheckTest
from nose.plugins.attrib import attr
from time import sleep
from checks import AgentCheck
from requests.exceptions import Timeout

@attr(requires='etcd')
class EtcdTest(AgentCheckTest):

CHECK_NAME = "etcd"
def __init__(self, *args, **kwargs):
AgentCheckTest.__init__(self, *args, **kwargs)
self.config = {"instances": [{"url": "http://localhost:4001"}]}

def test_metrics(self):
self.run_check(self.config)
sleep(1)
self.run_check(self.config)
tags = ['url:http://localhost:4001', 'etcd_state:leader']
self.assertMetric('etcd.store.gets.success', metric_value=0.0, tags=tags)
self.assertMetric('etcd.store.gets.fail', metric_value=0.0, tags=tags)
self.assertMetric('etcd.self.send.appendrequest.count', metric_value=0.0, tags=tags)


def test_service_checks(self):
self.run_check(self.config)

self.assertEqual(len(self.service_checks), 1, self.service_checks)
self.assertServiceCheck(self.check.SERVICE_CHECK_NAME,
status=AgentCheck.OK,
tags=['url:http://localhost:4001', 'etcd_state:leader'])

def test_bad_config(self):
self.assertRaises(Exception,
lambda: self.run_check({"instances": [{"url": "http://localhost:4001/test"}]}))

self.assertEqual(len(self.service_checks), 1, self.service_checks)
self.assertServiceCheck(self.check.SERVICE_CHECK_NAME,
status=AgentCheck.CRITICAL,
tags=['url:http://localhost:4001/test/v2/stats/self'])
10 changes: 4 additions & 6 deletions tests/test_riakcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@ def test_service_checks(self):
self.check = load_check(self.CHECK_NAME, self.config, {})
self.assertRaises(error, lambda: self.run_check(self.config))

service_checks = self.check.get_service_checks()
self.assertEqual(len(service_checks), 1, service_checks)
sc = service_checks[0]
self.assertEquals(sc["check"], self.check.SERVICE_CHECK_NAME, sc["check"])
self.assertEquals(sc["status"], AgentCheck.CRITICAL, sc["status"])
self.assertEquals(sc["tags"], ['aggregation_key:localhost:8080'], sc["tags"])
self.assertEqual(len(self.service_checks), 1, self.service_checks)
self.assertServiceCheck(self.check.SERVICE_CHECK_NAME,
status=AgentCheck.CRITICAL,
tags=['aggregation_key:localhost:8080'])