Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Docker check adjustment #964

Merged
merged 4 commits into from
Jun 2, 2014
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 21 additions & 12 deletions checks.d/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import os
import re
import time
from urlparse import urlsplit, urljoin
from util import json, headers
from urlparse import urlsplit
from util import json
try:
from collections import defaultdict
except ImportError:
Expand Down Expand Up @@ -55,8 +55,8 @@
"cgroup": "cpuacct",
"file": "%s/%s/cpuacct.stat",
"metrics": {
"user": ("docker.cpu.user", "gauge"),
"system": ("docker.cpu.system", "gauge"),
"user": ("docker.cpu.user", "rate"),
"system": ("docker.cpu.system", "rate"),
},
},
]
Expand Down Expand Up @@ -109,18 +109,18 @@ def unix_open(self, req):
class Docker(AgentCheck):
def __init__(self, *args, **kwargs):
super(Docker, self).__init__(*args, **kwargs)
self._mounpoints = {}
self.cgroup_path_prefix = None # Depending on the version
self._mountpoints = {}
self.cgroup_path_prefix = None # Depending on the version
for metric in LXC_METRICS:
self._mounpoints[metric["cgroup"]] = self._find_cgroup(metric["cgroup"])
self._mountpoints[metric["cgroup"]] = self._find_cgroup(metric["cgroup"])
self._path_prefix = None
self._last_event_collection_ts = defaultdict(lambda: None)

@property
def path_prefix(self):
if self._path_prefix is None:
metric = LXC_METRICS[0]
mountpoint = self._mounpoints[metric["cgroup"]]
mountpoint = self._mountpoints[metric["cgroup"]]
stat_file_lxc = os.path.join(mountpoint, "lxc")
stat_file_docker = os.path.join(mountpoint, "docker")

Expand All @@ -136,9 +136,16 @@ def check(self, instance):
urllib2.install_opener(urllib2.build_opener(UnixSocketHandler())) # We need to reinstall the opener every time as it gets uninstalled
tags = instance.get("tags") or []

self._process_events(self._get_events(instance))
try:
self._process_events(self._get_events(instance))
except socket.timeout:
self.warning('Timeout during socket connection. Events will be missing.')

try:
containers = self._get_containers(instance)
except:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

except Exception, you should avoid just "except"

raise Exception('Cannot get containers list: timeout during socket connection. Try to refine the containers to collect by editing the configuration file.')
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do you know it's a timeout ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah my last commit was just incomplete, this should catch only socket.timeout. Fixing it.


containers = self._get_containers(instance)
if not containers:
self.gauge("docker.containers.running", 0)
raise Exception("No containers are running.")
Expand Down Expand Up @@ -173,10 +180,12 @@ def check(self, instance):
if key in container:
getattr(self, metric_type)(dd_key, int(container[key]), tags=container_tags)
for metric in LXC_METRICS:
mountpoint = self._mounpoints[metric["cgroup"]]
mountpoint = self._mountpoints[metric["cgroup"]]
stat_file = os.path.join(mountpoint, metric["file"] % (self.path_prefix, container["Id"]))
stats = self._parse_cgroup_file(stat_file)
for key, (dd_key, metric_type) in metric["metrics"].items():
if key.startswith("total_") and not instance.get("total"):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"total" is not a very explicit setting name. Could you use something a bit more explicit ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, I took it to stay close to the cgroup metric. Recursive was an other candidate (for exemple, cgroup ioblk metrics use _recursive instead of total_). Do you prefer it?

By the way, this parameter is explained in the documentation.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

something like "collect_total" would be more explicit

continue
if key in stats:
getattr(self, metric_type)(dd_key, int(stats[key]), tags=container_tags)

Expand All @@ -187,7 +196,7 @@ def _process_events(self, events):
'timestamp': ev['time'],
'host': self.hostname,
'event_type': EVENT_TYPE,
'msg_title': "%s %s on %s" % (ev['from'], ev['status'], self.hostname),
'msg_title': "%s %s on %s" % (ev['from'], ev['status'], self.hostname),
'source_type_name': EVENT_TYPE,
'event_object': ev['from'],
})
Expand Down
12 changes: 11 additions & 1 deletion conf.d/docker.yaml.example
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Warning
# Warning
# The user running the Datadog Agent (usually "dd-agent") must be part of the "docker" group

init_config:
Expand Down Expand Up @@ -30,3 +30,13 @@ instances:
# exclude:
# - "image:ubuntu"
# - "image:debian"

# Sub-cgroups metrics
#
# If you want to include sub-cgroups metrics, turn the total option on.
# It is useless for a normal usage, as total_ metrics will be the same as normal ones.
#
# Example:
# instances:
# - url: "unix://var/run/docker.sock"
# total: true