Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tristan/agent checks ui #922

Merged
merged 12 commits into from
May 19, 2014
3 changes: 3 additions & 0 deletions checks.d/couch.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ class CouchDb(AgentCheck):
"""Extracts stats from CouchDB via its REST API
http://wiki.apache.org/couchdb/Runtime_Statistics
"""

SOURCE_TYPE_NAME = 'couchdb'

def _create_metric(self, data, tags=None):
overall_stats = data.get('stats', {})
for key, stats in overall_stats.items():
Expand Down
3 changes: 3 additions & 0 deletions checks.d/directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ class DirectoryCheck(AgentCheck):
"pattern" - string, the `fnmatch` pattern to use when reading the "directory"'s files. default "*"
"recursive" - boolean, when true the stats will recurse into directories. default False
"""

SOURCE_TYPE_NAME = 'system'

def check(self, instance):
if "directory" not in instance:
raise Exception('DirectoryCheck: missing "directory" in config')
Expand Down
2 changes: 2 additions & 0 deletions checks.d/elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ class ElasticSearch(AgentCheck):
"elasticsearch.cluster_status": ("gauge", "status", lambda v: {"red":0,"yellow":1,"green":2}.get(v, -1)),
}

SOURCE_TYPE_NAME = 'elasticsearch'

def __init__(self, name, init_config, agentConfig):
AgentCheck.__init__(self, name, init_config, agentConfig)

Expand Down
4 changes: 3 additions & 1 deletion checks.d/gearmand.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

class Gearman(AgentCheck):

SOURCE_TYPE_NAME = 'gearmand'
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need that one as the check name is also gearmand, do we ?


def get_library_versions(self):
try:
import gearman
Expand All @@ -28,7 +30,7 @@ def _get_metrics(self, client, tags):
running = 0
queued = 0
workers = 0

for stat in data:
running += stat['running']
queued += stat['queued']
Expand Down
2 changes: 2 additions & 0 deletions checks.d/http_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

class HTTPCheck(ServicesCheck):

SOURCE_TYPE_NAME = 'system'

def _load_conf(self, instance):
# Fetches the conf
tags = instance.get('tags', [])
Expand Down
3 changes: 3 additions & 0 deletions checks.d/kafka_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
import random

class KafkaCheck(AgentCheck):

SOURCE_TYPE_NAME = 'kafka'

def check(self, instance):
consumer_groups = self.read_config(instance, 'consumer_groups',
cast=self._validate_consumer_groups)
Expand Down
3 changes: 2 additions & 1 deletion checks.d/kyototycoon.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class KyotoTycoonCheck(AgentCheck):
"""Report statistics about the Kyoto Tycoon DBM-style
database server (http://fallabs.com/kyototycoon/)
"""
SOURCE_TYPE_NAME = 'kyoto tycoon'

GAUGES = {
'repl_delay': 'replication.delay',
Expand Down Expand Up @@ -68,7 +69,7 @@ def check(self, instance):
if key in self.GAUGES:
name = self.GAUGES[key]
self.gauge('kyototycoon.%s' % name, float(value), tags=tags)

elif key in self.RATES:
name = self.RATES[key]
self.rate('kyototycoon.%s_per_s' % name, float(value), tags=tags)
Expand Down
3 changes: 3 additions & 0 deletions checks.d/mcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@
# https://github.com/membase/ep-engine/blob/master/docs/stats.org

class Memcache(AgentCheck):

SOURCE_TYPE_NAME = 'memcached'

DEFAULT_PORT = 11211

GAUGES = [
Expand Down
2 changes: 2 additions & 0 deletions checks.d/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

class MongoDb(AgentCheck):

SOURCE_TYPE_NAME = 'mongodb'

GAUGES = [
"indexCounters.btree.missRatio",
"globalLock.ratio",
Expand Down
6 changes: 4 additions & 2 deletions checks.d/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

class Network(AgentCheck):

SOURCE_TYPE_NAME = 'system'

TCP_STATES = {
"ESTABLISHED": "established",
"SYN_SENT": "opening",
Expand Down Expand Up @@ -88,7 +90,7 @@ def _submit_devicemetrics(self, iface, vals_by_metric):

# For reasons i don't understand only these metrics are skipped if a
# particular interface is in the `excluded_interfaces` config list.
# Not sure why the others aren't included. Until I understand why, I'm
# Not sure why the others aren't included. Until I understand why, I'm
# going to keep the same behaviour.
exclude_iface_metrics = [
'packets_in.count',
Expand Down Expand Up @@ -238,7 +240,7 @@ def _check_bsd(self, instance):
'bytes_rcvd': self._parse_value(x[-5]),
'bytes_sent': self._parse_value(x[-2]),
'packets_in.count': self._parse_value(x[-7]),
'packets_in.error': self._parse_value(x[-6]),
'packets_in.error': self._parse_value(x[-6]),
'packets_out.count': self._parse_value(x[-4]),
'packets_out.error':self._parse_value(x[-3]),
}
Expand Down
2 changes: 2 additions & 0 deletions checks.d/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

class ProcessCheck(AgentCheck):

SOURCE_TYPE_NAME = 'system'

PROCESS_GAUGE = (
'system.processes.threads',
'system.processes.cpu.pct',
Expand Down
9 changes: 6 additions & 3 deletions checks.d/redisdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
class Redis(AgentCheck):
db_key_pattern = re.compile(r'^db\d+')
subkeys = ['keys', 'expires']

SOURCE_TYPE_NAME = 'redis'

GAUGE_KEYS = {
# Append-only metrics
'aof_last_rewrite_time_sec': 'redis.aof.last_rewrite_time',
Expand Down Expand Up @@ -106,7 +109,7 @@ def _get_conn(self, instance):
key = self._generate_instance_key(instance)
if key not in self.connections:
try:

# Only send useful parameters to the redis client constructor
list_params = ['host', 'port', 'db', 'password', 'socket_timeout',
'connection_pool', 'charset', 'errors', 'unix_socket_path']
Expand Down Expand Up @@ -139,12 +142,12 @@ def _check_db(self, instance, custom_tags=None):
try:
info = conn.info()
except ValueError, e:
# This is likely a know issue with redis library 2.0.0
# This is likely a know issue with redis library 2.0.0
# See https://github.com/DataDog/dd-agent/issues/374 for details
import redis
raise Exception("""Unable to run the info command. This is probably an issue with your version of the python-redis library.
Minimum required version: 2.4.11
Your current version: %s
Your current version: %s
Please upgrade to a newer version by running sudo easy_install redis""" % redis.__version__)

latency_ms = round((time.time() - start) * 1000, 2)
Expand Down
3 changes: 3 additions & 0 deletions checks.d/sqlserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
VALID_METRIC_TYPES = ('gauge', 'rate', 'histogram')

class SQLServer(AgentCheck):

SOURCE_TYPE_NAME = 'sql server'

METRICS = [
('sqlserver.buffer.cache_hit_ratio', 'gauge', 'Buffer cache hit ratio'),
('sqlserver.buffer.page_life_expectancy', 'gauge', 'Page life expectancy'),
Expand Down
2 changes: 2 additions & 0 deletions checks.d/tcp_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ class BadConfException(Exception): pass

class TCPCheck(ServicesCheck):

SOURCE_TYPE_NAME = 'system'

def _load_conf(self, instance):
# Fetches the conf

Expand Down
2 changes: 1 addition & 1 deletion checks.d/wmi_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _get_wmi_conn(self, host, user, password):
def check(self, instance):
if wmi is None:
raise Exception("Missing 'wmi' module")

host = instance.get('host', None)
user = instance.get('username', None)
password = instance.get('password', None)
Expand Down
2 changes: 2 additions & 0 deletions checks.d/zk.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
class Zookeeper(AgentCheck):
version_pattern = re.compile(r'Zookeeper version: ([^.]+)\.([^.]+)\.([^-]+)', flags=re.I)

SOURCE_TYPE_NAME = 'zookeeper'

def check(self, instance):
host = instance.get('host', 'localhost')
port = int(instance.get('port', 2181))
Expand Down
3 changes: 2 additions & 1 deletion checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,8 @@ def get_metrics(self, expire=True):
class AgentCheck(object):
OK, WARNING, CRITICAL, UNKNOWN, NONE = (0, 1, 2, 3, 4)

SOURCE_TYPE_NAME = None

def __init__(self, name, init_config, agentConfig, instances=None):
"""
Initialize a new check.
Expand All @@ -275,7 +277,6 @@ def __init__(self, name, init_config, agentConfig, instances=None):
"""
from aggregator import MetricsAggregator


self.name = name
self.init_config = init_config
self.agentConfig = agentConfig
Expand Down
3 changes: 2 additions & 1 deletion checks/check_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,9 @@ class CheckStatus(object):
def __init__(self, check_name, instance_statuses, metric_count=None,
event_count=None, service_check_count=None,
init_failed_error=None, init_failed_traceback=None,
library_versions=None):
library_versions=None, check_source_type_name=None):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick but if the class name is CheckStatus we don't need the check_ prefix in the attribute as it would be redundant.

self.name = check_name
self.source_type_name = check_source_type_name
self.instance_statuses = instance_statuses
self.metric_count = metric_count or 0
self.event_count = event_count or 0
Expand Down
23 changes: 20 additions & 3 deletions checks/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,6 @@ def run(self, checksd=None, start_event=True):
try:
# Run the check.
instance_statuses = check.run()

# Collect the metrics and events.
current_check_metrics = check.get_metrics()
current_check_events = check.get_events()
Expand All @@ -281,7 +280,8 @@ def run(self, checksd=None, start_event=True):
log.exception("Error running check %s" % check.name)

check_status = CheckStatus(check.name, instance_statuses, metric_count, event_count, service_check_count,
library_versions=check.get_library_info())
library_versions=check.get_library_info(),
check_source_type_name=check.SOURCE_TYPE_NAME or check.name)
check_statuses.append(check_status)

for check_name, info in self.init_failed_checks_d.iteritems():
Expand All @@ -297,6 +297,21 @@ def run(self, checksd=None, start_event=True):
payload['metrics'] = metrics
payload['events'] = events
payload['service_checks'] = service_checks

# Add agent_checks if needed
if self._should_send_metadata():
agent_checks = []
for check in check_statuses:
for instance_status in check.instance_statuses:
agent_checks.append(
(
check.name, check.source_type_name,
instance_status.instance_id,
instance_status.status, instance_status.error
)
)
payload['agent_checks'] = agent_checks

collect_duration = timer.step()

if self.os != 'windows':
Expand Down Expand Up @@ -379,7 +394,7 @@ def _build_payload(self, start_event=True):
}]

# Periodically send the host metadata.
if self._is_first_run() or self._should_send_metadata():
if self._should_send_metadata():
payload['systemStats'] = get_system_stats()
payload['meta'] = self._get_metadata()
self.metadata_cache = payload['meta']
Expand Down Expand Up @@ -427,6 +442,8 @@ def _get_metadata(self):
return metadata

def _should_send_metadata(self):
if self._is_first_run():
return True
# If the interval has passed, send the metadata again
now = time.time()
if now - self.metadata_start >= self.metadata_interval:
Expand Down