Skip to content

Commit

Permalink
Merge pull request #1350 from DataDog/haissam/redis-repl-metrics
Browse files Browse the repository at this point in the history
[redis] new post 2.8 replication metrics
  • Loading branch information
LeoCavaille committed Feb 24, 2015
2 parents 414c027 + 8cc0cb7 commit 92470f0
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 17 deletions.
29 changes: 24 additions & 5 deletions checks.d/redisdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
# 3rd party
import redis


class Redis(AgentCheck):
db_key_pattern = re.compile(r'^db\d+')
slave_key_pattern = re.compile(r'^slave\d+')
subkeys = ['keys', 'expires']

SOURCE_TYPE_NAME = 'redis'
Expand Down Expand Up @@ -63,7 +65,9 @@ class Redis(AgentCheck):
'master_last_io_seconds_ago': 'redis.replication.last_io_seconds_ago',
'master_sync_in_progress': 'redis.replication.sync',
'master_sync_left_bytes': 'redis.replication.sync_left_bytes',

'repl_backlog_histlen': 'redis.replication.backlog_histlen',
'master_repl_offset': 'redis.replication.master_repl_offset',
'slave_repl_offset': 'redis.replication.slave_repl_offset',
}

RATE_KEYS = {
Expand Down Expand Up @@ -109,7 +113,7 @@ def _get_conn(self, instance):

# Only send useful parameters to the redis client constructor
list_params = ['host', 'port', 'db', 'password', 'socket_timeout',
'connection_pool', 'charset', 'errors', 'unix_socket_path']
'connection_pool', 'charset', 'errors', 'unix_socket_path']

# Set a default timeout (in seconds) if no timeout is specified in the instance config
instance['socket_timeout'] = instance.get('socket_timeout', 5)
Expand All @@ -130,7 +134,7 @@ def _check_db(self, instance, custom_tags=None):
if 'unix_socket_path' in instance:
tags_to_add = ["unix_socket_path:%s" % instance.get("unix_socket_path")]
else:
tags_to_add = ["redis_host:%s" % instance.get('host'), "redis_port:%s" % instance.get('port')]
tags_to_add = ["redis_host:%s" % instance.get('host'), "redis_port:%s" % instance.get('port')]

if instance.get('db') is not None:
tags_to_add.append("db:%s" % instance.get('db'))
Expand Down Expand Up @@ -215,8 +219,23 @@ def _check_db(self, instance, custom_tags=None):
self.warning("{0} key not found in redis".format(key))
self.gauge('redis.key.length', 0, tags=key_tags)

# Save the replication delay for each slave
for key in info:
if self.slave_key_pattern.match(key) and isinstance(info[key], dict):
slave_offset = info[key].get('offset')
master_offset = info.get('master_repl_offset')
if slave_offset and master_offset and master_offset - slave_offset >= 0:
delay = master_offset - slave_offset
# Add id, ip, and port tags for the slave
slave_tags = tags[:]
for slave_tag in ('ip', 'port'):
if slave_tag in info[key]:
slave_tags.append('slave_{0}:{1}'.format(slave_tag, info[key][slave_tag]))
slave_tags.append('slave_id:%s' % key.lstrip('slave'))
self.gauge('redis.replication.delay', delay, tags=slave_tags)

def check(self, instance):
if (not "host" in instance or not "port" in instance) and not "unix_socket_path" in instance:
if ("host" not in instance or "port" not in instance) and "unix_socket_path" not in instance:
raise Exception("You must specify a host/port couple or a unix_socket_path")
custom_tags = instance.get('tags', [])
self._check_db(instance,custom_tags)
self._check_db(instance, custom_tags)
23 changes: 21 additions & 2 deletions ci/redis.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,27 @@ def redis_rootdir
Rake::Task['ci:common:run_tests'].invoke(this_provides)
end

task :cleanup => ['ci:common:cleanup']
# FIXME: stop redis
task :cleanup => ['ci:common:cleanup'] do
# Shutdown redis
conf_files = ["#{ENV['TRAVIS_BUILD_DIR']}/ci/resources/redis/auth.conf",
"#{ENV['TRAVIS_BUILD_DIR']}/ci/resources/redis/noauth.conf"]
for f in conf_files do
pass, port = nil, nil
File.readlines(f).each do |line|
param = line.split(' ')
if param[0] == 'port'
port = param[1]
elsif param[0] == 'requirepass'
pass = param[1]
end
end
if pass and port
sh %(#{redis_rootdir}/src/redis-cli -p #{port} -a #{pass} SHUTDOWN)
elsif port
sh %(#{redis_rootdir}/src/redis-cli -p #{port} SHUTDOWN)
end
end
end

task :execute do
exception = nil
Expand Down
1 change: 1 addition & 0 deletions ci/resources/redis/auth.conf
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ pidfile /tmp/dd-redis-auth.pid
bind 127.0.0.1
port 26379
requirepass datadog-is-devops-best-friend
slaveof 127.0.0.1 16379
62 changes: 52 additions & 10 deletions tests/test_redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ def test_redis_auth(self):
except Exception as e:
self.assertTrue(
# 2.8
'noauth authentication required' in str(e).lower()\
'noauth authentication required' in str(e).lower()
# previously
or 'operation not permitted' in str(e).lower()
, str(e))
or 'operation not permitted' in str(e).lower(),
str(e))

r = load_check('redisdb', {}, {})
try:
Expand All @@ -68,7 +68,7 @@ def test_redis_default(self):
'port': port
}

db = redis.Redis(port=port, db=14) # Datadog's test db
db = redis.Redis(port=port, db=14) # Datadog's test db
db.flushdb()
db.set("key1", "value")
db.set("key2", "value")
Expand Down Expand Up @@ -101,8 +101,8 @@ def assert_key_present(expected, present, tolerance):

# Assert that the keys metrics are tagged by db. just check db0, since
# it's the only one we can guarantee is there.
db_metrics = self._sort_metrics([m for m in metrics if m[0] in ['redis.keys',
'redis.expires'] and "redis_db:db14" in m[3]["tags"]])
db_metrics = self._sort_metrics(
[m for m in metrics if m[0] in ['redis.keys', 'redis.expires'] and "redis_db:db14" in m[3]["tags"]])
self.assertEquals(2, len(db_metrics))

self.assertEquals('redis.expires', db_metrics[0][0])
Expand All @@ -114,12 +114,19 @@ def assert_key_present(expected, present, tolerance):
# Service checks
service_checks = r.get_service_checks()
service_checks_count = len(service_checks)
self.assertTrue(type(service_checks) == type([]))
self.assertTrue(isinstance(service_checks, list))
self.assertTrue(service_checks_count > 0)
self.assertEquals(len([sc for sc in service_checks if sc['check'] == "redis.can_connect"]), 1, service_checks)
self.assertEquals(
len([sc for sc in service_checks if sc['check'] == "redis.can_connect"]), 1, service_checks)
# Assert that all service checks have the proper tags: host and port
self.assertEquals(len([sc for sc in service_checks if "redis_host:localhost" in sc['tags']]), service_checks_count, service_checks)
self.assertEquals(len([sc for sc in service_checks if "redis_port:%s" % port in sc['tags']]), service_checks_count, service_checks)
self.assertEquals(
len([sc for sc in service_checks if "redis_host:localhost" in sc['tags']]),
service_checks_count,
service_checks)
self.assertEquals(
len([sc for sc in service_checks if "redis_port:%s" % port in sc['tags']]),
service_checks_count,
service_checks)

# Run one more check and ensure we get total command count
# and other rates
Expand All @@ -129,6 +136,41 @@ def assert_key_present(expected, present, tolerance):
keys = [m[0] for m in metrics]
assert 'redis.net.commands' in keys

def test_redis_repl(self):
master_instance = {
'host': 'localhost',
'port': NOAUTH_PORT
}

slave_instance = {
'host': 'localhost',
'port': AUTH_PORT,
'password': 'datadog-is-devops-best-friend'
}

repl_metrics = [
'redis.replication.delay',
'redis.replication.backlog_histlen',
'redis.replication.delay',
'redis.replication.master_repl_offset',
]

master_db = redis.Redis(port=NOAUTH_PORT, db=14)
slave_db = redis.Redis(port=AUTH_PORT, password=slave_instance['password'], db=14)
master_db.flushdb()

# Assert that the replication works
master_db.set('replicated:test', 'true')
self.assertEquals(slave_db.get('replicated:test'), 'true')

r = load_check('redisdb', {}, {})
r.check(master_instance)
metrics = self._sort_metrics(r.get_metrics())

# Assert the presence of replication metrics
keys = [m[0] for m in metrics]
assert [x in keys for x in repl_metrics]

def _sort_metrics(self, metrics):
def sort_by(m):
return m[0], m[1], m[3]
Expand Down

0 comments on commit 92470f0

Please sign in to comment.