From 35e5f7e4cfb2bc3a1bb728ec2ab43930bc09ca86 Mon Sep 17 00:00:00 2001 From: Pierre Date: Fri, 22 Dec 2017 14:46:40 +0100 Subject: [PATCH 1/5] Add node tag for pod.ready/schedule metric --- .travis.yml | 1 + circle.yml | 1 + .../kubernetes_state/kubernetes_state.py | 46 +++++++++++++++++-- .../test/test_kubernetes_state.py | 14 +++++- 4 files changed, 57 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3cc8cf2861ab5..994b9ba47c3a1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -82,6 +82,7 @@ env: - TRAVIS_FLAVOR=kong FLAVOR_VERSION=0.9.0 - TRAVIS_FLAVOR=kube_dns FLAVOR_VERSION=0.1.0 - TRAVIS_FLAVOR=kubernetes + - TRAVIS_FLAVOR=kubernetes_state - TRAVIS_FLAVOR=kyototycoon FLAVOR_VERSION=0.9.56 - TRAVIS_FLAVOR=lighttpd - TRAVIS_FLAVOR=mcache FLAVOR_VERSION=1.4.22 diff --git a/circle.yml b/circle.yml index 683e1d1539516..8df659c54bc3e 100644 --- a/circle.yml +++ b/circle.yml @@ -80,6 +80,7 @@ test: - rake ci:run[kafka] - rake ci:run[docker_daemon] - rake ci:run[kubernetes] + - rake ci:run[kubernetes_state] - rake ci:run[cassandra_nodetool] - rake ci:run[squid] - bundle exec rake requirements diff --git a/kubernetes_state/datadog_checks/kubernetes_state/kubernetes_state.py b/kubernetes_state/datadog_checks/kubernetes_state/kubernetes_state.py index 823159bd4b639..bdcf6e1dd6d2e 100644 --- a/kubernetes_state/datadog_checks/kubernetes_state/kubernetes_state.py +++ b/kubernetes_state/datadog_checks/kubernetes_state/kubernetes_state.py @@ -80,8 +80,6 @@ def __init__(self, name, init_config, agentConfig, instances=None): 'kube_pod_container_status_running': 'container.running', 'kube_pod_container_resource_requests_nvidia_gpu_devices': 'container.gpu.request', 'kube_pod_container_resource_limits_nvidia_gpu_devices': 'container.gpu.limit', - 'kube_pod_status_ready': 'pod.ready', - 'kube_pod_status_scheduled': 'pod.scheduled', 'kube_replicaset_spec_replicas': 'replicaset.replicas_desired', 'kube_replicaset_status_fully_labeled_replicas': 'replicaset.fully_labeled_replicas', 'kube_replicaset_status_ready_replicas': 'replicaset.replicas_ready', @@ -115,7 +113,6 @@ def __init__(self, name, init_config, agentConfig, instances=None): 'kube_node_labels', 'kube_pod_created' 'kube_pod_container_info', - 'kube_pod_info', 'kube_pod_owner', 'kube_pod_start_time', 'kube_pod_labels', @@ -154,6 +151,8 @@ def __init__(self, name, init_config, agentConfig, instances=None): 'kube_job_status_start_time', ] + self.pod_node_mapping = {} + def check(self, instance): endpoint = instance.get('kube_state_url') if endpoint is None: @@ -176,6 +175,8 @@ def check(self, instance): self.job_succeeded_count = defaultdict(int) self.job_failed_count = defaultdict(int) + self.active_pod_set = set() + self.process(endpoint, send_histograms_buckets=send_buckets, instance=instance) for job_tags, job_count in self.job_succeeded_count.iteritems(): @@ -183,6 +184,11 @@ def check(self, instance): for job_tags, job_count in self.job_failed_count.iteritems(): self.monotonic_count(self.NAMESPACE + '.job.failed', job_count, list(job_tags)) + #clean pod/node mapping + for key in self.pod_node_mapping.keys(): + if key not in self.active_pod_set: + self.pod_node_mapping.pop(key, None) + def _condition_to_service_check(self, metric, sc_name, mapping, tags=None): """ Some metrics contains conditions, labels that have "condition" as name and "true", "false", or "unknown" @@ -508,3 +514,37 @@ def kube_limitrange(self, message, **kwargs): self.gauge(metric_base_name.format(resource, constraint), val, tags) else: self.log.error("Metric type %s unsupported for metric %s" % (message.type, message.name)) + + def _enrich_pod_with_node_tag(self, metric, metric_name, message_type): + for label in metric.label: + if label.name == "pod": + self.active_pod_set.add(label.value) + if label.value in self.pod_node_mapping.keys(): + tags = [ + self._label_to_tag("condition", metric.label), + self._label_to_tag("namespace", metric.label), + self._label_to_tag("pod", metric.label), + self._format_tag("node", self.pod_node_mapping[label.value]) + ] + self.gauge(metric_name, getattr(metric, METRIC_TYPES[message_type]).value, tags, hostname=self.pod_node_mapping[label.value]) + + def kube_pod_status_ready(self, message, **kwargs): + """ Whether the pod is ready to serve requests. """ + for metric in message.metric: + self._enrich_pod_with_node_tag(metric, self.NAMESPACE + ".pod.ready", message.type) + + def kube_pod_status_scheduled(self, message, **kwargs): + """ Describes the status of the scheduling process for the pod. """ + for metric in message.metric: + self._enrich_pod_with_node_tag(metric, self.NAMESPACE + ".pod.scheduled", message.type) + + def kube_pod_info(self, message, **kwargs): + """ Collect information about pod (no metric sent). """ + for metric in message.metric: + pod_name = node_name = "" + for label in metric.label: + if label.name == "pod": + pod_name = label.value + elif label.name == "node": + node_name = label.value + self.pod_node_mapping[pod_name] = node_name diff --git a/kubernetes_state/test/test_kubernetes_state.py b/kubernetes_state/test/test_kubernetes_state.py index 8ec746571b4b5..985915a098d43 100644 --- a/kubernetes_state/test/test_kubernetes_state.py +++ b/kubernetes_state/test/test_kubernetes_state.py @@ -6,11 +6,19 @@ import mock import os +# 3p +from nose.plugins.attrib import attr + # project from tests.checks.common import AgentCheckTest NAMESPACE = 'kubernetes_state' +<<<<<<< HEAD +======= +@attr(requires='kubernetes_state') +class TestKubernetesState(AgentCheckTest): +>>>>>>> Add node tag for pod.ready/schedule metric class MockResponse: """ @@ -118,7 +126,8 @@ def test__update_kube_state_metrics(self, mock_poll): }] } - self.run_check(config) + # run check twice to have pod/node mapping + self.run_check_twice(config) self.assertServiceCheck(NAMESPACE + '.node.ready', self.check.OK) self.assertServiceCheck(NAMESPACE + '.node.out_of_disk', self.check.OK) @@ -156,7 +165,8 @@ def test__update_kube_state_metrics_v040(self, mock_poll): }] } - self.run_check(config) + # run check twice to have pod/node mapping + self.run_check_twice(config) self.assertServiceCheck(NAMESPACE + '.node.ready', self.check.OK) self.assertServiceCheck(NAMESPACE + '.node.out_of_disk', self.check.OK) From 978a3130bc64ec5685077f5b9a1b5a401455ccad Mon Sep 17 00:00:00 2001 From: Pierre Date: Tue, 9 Jan 2018 14:11:28 +0100 Subject: [PATCH 2/5] Use label joins --- .../kubernetes_state/kubernetes_state.py | 53 ++++--------------- .../test/test_kubernetes_state.py | 7 +-- 2 files changed, 12 insertions(+), 48 deletions(-) diff --git a/kubernetes_state/datadog_checks/kubernetes_state/kubernetes_state.py b/kubernetes_state/datadog_checks/kubernetes_state/kubernetes_state.py index bdcf6e1dd6d2e..e314b91a07cf3 100644 --- a/kubernetes_state/datadog_checks/kubernetes_state/kubernetes_state.py +++ b/kubernetes_state/datadog_checks/kubernetes_state/kubernetes_state.py @@ -80,6 +80,8 @@ def __init__(self, name, init_config, agentConfig, instances=None): 'kube_pod_container_status_running': 'container.running', 'kube_pod_container_resource_requests_nvidia_gpu_devices': 'container.gpu.request', 'kube_pod_container_resource_limits_nvidia_gpu_devices': 'container.gpu.limit', + 'kube_pod_status_ready': 'pod.ready', + 'kube_pod_status_scheduled': 'pod.scheduled', 'kube_replicaset_spec_replicas': 'replicaset.replicas_desired', 'kube_replicaset_status_fully_labeled_replicas': 'replicaset.fully_labeled_replicas', 'kube_replicaset_status_ready_replicas': 'replicaset.replicas_ready', @@ -113,6 +115,7 @@ def __init__(self, name, init_config, agentConfig, instances=None): 'kube_node_labels', 'kube_pod_created' 'kube_pod_container_info', + 'kube_pod_info', 'kube_pod_owner', 'kube_pod_start_time', 'kube_pod_labels', @@ -151,7 +154,14 @@ def __init__(self, name, init_config, agentConfig, instances=None): 'kube_job_status_start_time', ] - self.pod_node_mapping = {} + self.label_joins = { + 'kube_pod_info': { + 'label_to_match': 'pod', + 'labels_to_get': ['node'] + } + } + + self.label_to_hostname = 'node' def check(self, instance): endpoint = instance.get('kube_state_url') @@ -175,8 +185,6 @@ def check(self, instance): self.job_succeeded_count = defaultdict(int) self.job_failed_count = defaultdict(int) - self.active_pod_set = set() - self.process(endpoint, send_histograms_buckets=send_buckets, instance=instance) for job_tags, job_count in self.job_succeeded_count.iteritems(): @@ -184,11 +192,6 @@ def check(self, instance): for job_tags, job_count in self.job_failed_count.iteritems(): self.monotonic_count(self.NAMESPACE + '.job.failed', job_count, list(job_tags)) - #clean pod/node mapping - for key in self.pod_node_mapping.keys(): - if key not in self.active_pod_set: - self.pod_node_mapping.pop(key, None) - def _condition_to_service_check(self, metric, sc_name, mapping, tags=None): """ Some metrics contains conditions, labels that have "condition" as name and "true", "false", or "unknown" @@ -514,37 +517,3 @@ def kube_limitrange(self, message, **kwargs): self.gauge(metric_base_name.format(resource, constraint), val, tags) else: self.log.error("Metric type %s unsupported for metric %s" % (message.type, message.name)) - - def _enrich_pod_with_node_tag(self, metric, metric_name, message_type): - for label in metric.label: - if label.name == "pod": - self.active_pod_set.add(label.value) - if label.value in self.pod_node_mapping.keys(): - tags = [ - self._label_to_tag("condition", metric.label), - self._label_to_tag("namespace", metric.label), - self._label_to_tag("pod", metric.label), - self._format_tag("node", self.pod_node_mapping[label.value]) - ] - self.gauge(metric_name, getattr(metric, METRIC_TYPES[message_type]).value, tags, hostname=self.pod_node_mapping[label.value]) - - def kube_pod_status_ready(self, message, **kwargs): - """ Whether the pod is ready to serve requests. """ - for metric in message.metric: - self._enrich_pod_with_node_tag(metric, self.NAMESPACE + ".pod.ready", message.type) - - def kube_pod_status_scheduled(self, message, **kwargs): - """ Describes the status of the scheduling process for the pod. """ - for metric in message.metric: - self._enrich_pod_with_node_tag(metric, self.NAMESPACE + ".pod.scheduled", message.type) - - def kube_pod_info(self, message, **kwargs): - """ Collect information about pod (no metric sent). """ - for metric in message.metric: - pod_name = node_name = "" - for label in metric.label: - if label.name == "pod": - pod_name = label.value - elif label.name == "node": - node_name = label.value - self.pod_node_mapping[pod_name] = node_name diff --git a/kubernetes_state/test/test_kubernetes_state.py b/kubernetes_state/test/test_kubernetes_state.py index 985915a098d43..9b0ec11e9aa87 100644 --- a/kubernetes_state/test/test_kubernetes_state.py +++ b/kubernetes_state/test/test_kubernetes_state.py @@ -14,12 +14,6 @@ NAMESPACE = 'kubernetes_state' -<<<<<<< HEAD -======= -@attr(requires='kubernetes_state') -class TestKubernetesState(AgentCheckTest): ->>>>>>> Add node tag for pod.ready/schedule metric - class MockResponse: """ MockResponse is used to simulate the object requests.Response commonly returned by requests.get @@ -37,6 +31,7 @@ def close(self): pass +@attr(requires='kubernetes_state') class TestKubernetesState(AgentCheckTest): CHECK_NAME = 'kubernetes_state' From fde0c79baf8d65bcec1401bf108496d1391dc910 Mon Sep 17 00:00:00 2001 From: Pierre Date: Thu, 11 Jan 2018 13:34:24 -0500 Subject: [PATCH 3/5] Add changelog --- kubernetes_state/CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kubernetes_state/CHANGELOG.md b/kubernetes_state/CHANGELOG.md index 92105e4453cc1..7e4ce98feecdf 100644 --- a/kubernetes_state/CHANGELOG.md +++ b/kubernetes_state/CHANGELOG.md @@ -1,5 +1,12 @@ # CHANGELOG - kubernetes_state +2.1.0 / Unreleased +================== +### Changes + +* [IMPROVEMENT] Add the node label wherever the pod label is present [#1000][] +* [IMPROVEMENT] Override hostname with the node label if present [#1000][] + 2.0.0 / 2018-01-10 ================== ### Changes From 9c726c1a89133bd0f0b8107fd2d8d9535625c664 Mon Sep 17 00:00:00 2001 From: Pierre Date: Thu, 11 Jan 2018 13:38:18 -0500 Subject: [PATCH 4/5] bump version --- kubernetes_state/manifest.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes_state/manifest.json b/kubernetes_state/manifest.json index 9ddbc3a60600d..e0fb5914c2987 100644 --- a/kubernetes_state/manifest.json +++ b/kubernetes_state/manifest.json @@ -11,7 +11,7 @@ "linux", "mac_os" ], - "version": "2.0.0", + "version": "2.1.0", "use_omnibus_reqs": true, "public_title": "Datadog-Kubernetes State Integration", "categories":["orchestration", "containers"], From e8de7a3484b62452b90f53adc3b8d6f27b4893f4 Mon Sep 17 00:00:00 2001 From: Pierre Date: Thu, 11 Jan 2018 14:15:45 -0500 Subject: [PATCH 5/5] Assert tag and hostname presence --- .../kubernetes_state/__init__.py | 2 +- .../test/test_kubernetes_state.py | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/kubernetes_state/datadog_checks/kubernetes_state/__init__.py b/kubernetes_state/datadog_checks/kubernetes_state/__init__.py index 99931a09a2855..0adf10be03882 100644 --- a/kubernetes_state/datadog_checks/kubernetes_state/__init__.py +++ b/kubernetes_state/datadog_checks/kubernetes_state/__init__.py @@ -2,6 +2,6 @@ KubernetesState = kubernetes_state.KubernetesState -__version__ = "2.0.0" +__version__ = "2.1.0" __all__ = ['kubernetes_state'] diff --git a/kubernetes_state/test/test_kubernetes_state.py b/kubernetes_state/test/test_kubernetes_state.py index 9b0ec11e9aa87..44d585dc14f47 100644 --- a/kubernetes_state/test/test_kubernetes_state.py +++ b/kubernetes_state/test/test_kubernetes_state.py @@ -93,6 +93,16 @@ class TestKubernetesState(AgentCheckTest): NAMESPACE + '.statefulset.replicas_updated', ] + TAGS = { + NAMESPACE + '.pod.ready': ['node:minikube'], + NAMESPACE + '.pod.scheduled': ['node:minikube'] + } + + HOSTNAMES = { + NAMESPACE + '.pod.ready': 'minikube', + NAMESPACE + '.pod.scheduled': 'minikube' + } + ZERO_METRICS = [ NAMESPACE + '.deployment.replicas_unavailable', NAMESPACE + '.deployment.paused', @@ -141,7 +151,14 @@ def test__update_kube_state_metrics(self, mock_poll): tags=['namespace:default', 'pod:hello-1509998460-tzh8k']) # Unknown for metric in self.METRICS: - self.assertMetric(metric) + self.assertMetric( + metric, + hostname=self.HOSTNAMES.get(metric, None) + ) + tags = self.TAGS.get(metric, None) + if tags: + for tag in tags: + self.assertMetricTag(metric, tag) if metric not in self.ZERO_METRICS: self.assertMetricNotAllZeros(metric)