From d038ae5b1f73215f2b616d5cb384f44c35dea9c3 Mon Sep 17 00:00:00 2001 From: Remi Hakim Date: Tue, 3 Nov 2015 16:29:35 -0500 Subject: [PATCH] [kubernets] Kubelet check failing shouldn't stop metrics collection Also fix service check messages --- checks.d/kubernetes.py | 15 +++++++++------ checks/__init__.py | 2 +- conf.d/kubernetes.yaml.example | 4 ---- tests/checks/mock/test_kubernetes.py | 3 +-- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/checks.d/kubernetes.py b/checks.d/kubernetes.py index f2de851ce3..c0a88a709f 100644 --- a/checks.d/kubernetes.py +++ b/checks.d/kubernetes.py @@ -93,8 +93,8 @@ def _perform_kubelet_checks(self, url): except Exception, e: self.log.warning('kubelet check failed: %s' % str(e)) - self.service_check(service_check_base, AgentCheck.CRITICAL, 'Kubelet check failed: %s' % str(e)) - raise + self.service_check(service_check_base, AgentCheck.CRITICAL, + message='Kubelet check failed: %s' % str(e)) def _perform_master_checks(self, url): try: @@ -104,12 +104,15 @@ def _perform_master_checks(self, url): nodename = nodeinfo['name'] service_check_name = "{0}.master.{1}.check".format(NAMESPACE, nodename) cond = nodeinfo['status'][-1]['type'] + minion_name = nodeinfo['metadata']['name'] + tags = ["minion_name:{0}".format(minion_name)] if cond != 'Ready': - self.service_check(service_check_name, AgentCheck.CRITICAL, cond) + self.service_check(service_check_name, AgentCheck.CRITICAL, + tags=tags, message=cond) else: - self.service_check(service_check_name, AgentCheck.OK) + self.service_check(service_check_name, AgentCheck.OK, tags=tags) except Exception, e: - self.service_check(service_check_name, AgentCheck.CRITICAL, cond) + self.service_check(service_check_name, AgentCheck.CRITICAL, message=str(e)) self.log.warning('master checks url=%s exception=%s' % (url, str(e))) raise @@ -138,7 +141,7 @@ def check(self, instance): if instance.get('enable_master_checks', False): master_port = instance.get('master_port', DEFAULT_MASTER_PORT) master_host = instance.get('master_host', 'localhost') - master_url = '%s://%s:%d/nodes' % (method, host, master_port) + master_url = '%s://%s:%d/api/v1/nodes' % (method, host, master_port) self._perform_master_checks(master_url) # kubelet health checks diff --git a/checks/__init__.py b/checks/__init__.py index c916ee53e4..dd003c5265 100644 --- a/checks/__init__.py +++ b/checks/__init__.py @@ -548,7 +548,7 @@ def service_check(self, check_name, status, tags=None, timestamp=None, :param hostname: (optional) str, host that generated the service check. Defaults to the host_name of the agent :param check_run_id: (optional) int, id used for logging and tracing - purposes. Don't need to be unique. If not + purposes. Doesn't need to be unique. If not specified, one will be generated. """ if hostname is None: diff --git a/conf.d/kubernetes.yaml.example b/conf.d/kubernetes.yaml.example index 35533de1e5..7386ee7e2e 100644 --- a/conf.d/kubernetes.yaml.example +++ b/conf.d/kubernetes.yaml.example @@ -22,10 +22,6 @@ instances: # use_histogram: True # - # Getting the master checks - # master_host: localhost - # master_port: 8080 - # # Kubelet checks # enable_kubelet_checks: true # kubelet_port: 10255 diff --git a/tests/checks/mock/test_kubernetes.py b/tests/checks/mock/test_kubernetes.py index 715286f4a4..83a33df91c 100644 --- a/tests/checks/mock/test_kubernetes.py +++ b/tests/checks/mock/test_kubernetes.py @@ -35,8 +35,7 @@ def test_fail(self): } # Can't use run_check_twice due to specific metrics - with self.assertRaises(Exception): - self.run_check(config, mocks=mocks, force_reload=True) + self.run_check(config, mocks=mocks, force_reload=True) self.assertServiceCheck("kubernetes.kubelet.check", status=AgentCheck.CRITICAL) self.coverage_report()