Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Checks.d #194

Merged
merged 50 commits into from
Sep 26, 2012
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
9c62fad
Move MetricAggregator out of dogstatsd
conorbranagan Sep 14, 2012
f9c8cec
first stab at the new checks interface using the aggregator
conorbranagan Sep 14, 2012
e7fd534
Merge branch 'master' into checks.d
conorbranagan Sep 19, 2012
5f33617
whitespace cleanup
conorbranagan Sep 19, 2012
a82b756
Use the dogstatsD aggregator in the new checks.d interface
conorbranagan Sep 19, 2012
add5ef4
Load checks and configs on agent start
conorbranagan Sep 19, 2012
570c979
Run the new checks.d-style checks in the doChecks() call
conorbranagan Sep 19, 2012
f85d276
Add the first checks.d check, haproxy. Update the test.
conorbranagan Sep 19, 2012
6d6ed5a
remove the old haproxy check
conorbranagan Sep 19, 2012
733009e
update haproxy example file
conorbranagan Sep 19, 2012
d693177
fix hostname in aggregator
conorbranagan Sep 19, 2012
1e12b7a
add pyyaml to requirements
conorbranagan Sep 19, 2012
62f5414
Merge branch 'master' into checks.d
conorbranagan Sep 19, 2012
66d93b4
use the pyyaml c libs when available
conorbranagan Sep 19, 2012
656d0aa
bundle pyyaml
conorbranagan Sep 19, 2012
7b65bca
add checks.d to packaging
conorbranagan Sep 19, 2012
2121f97
name the aggregator logger with `__name__`
conorbranagan Sep 20, 2012
98bab8f
warn when the rate is not increasing or the interval is 0
conorbranagan Sep 20, 2012
37c35a8
save the last point when flushing the rate
conorbranagan Sep 20, 2012
8aacf90
remove has_metrics()
conorbranagan Sep 20, 2012
f2a2d38
CheckD => AgentCheck
conorbranagan Sep 20, 2012
90a2997
`get_checks()` => `load_checks_directory()`
conorbranagan Sep 20, 2012
9384cdb
add docstrings for the AgentCheck interface
conorbranagan Sep 20, 2012
fdeb382
small tweaks to haproxy check
conorbranagan Sep 20, 2012
323095d
add a unit test for rate
conorbranagan Sep 20, 2012
27e44a4
standardize the config style so we can do generic multi-instance checks
conorbranagan Sep 20, 2012
318c6b6
Add backwards-compatability with old style agent config
conorbranagan Sep 20, 2012
47b7f44
percheck_config => instances
conorbranagan Sep 20, 2012
826fb6d
Send the full instance payload into the check instead of breaking into
conorbranagan Sep 20, 2012
6939ab8
don't send diagnostic stats in main statsd flush
conorbranagan Sep 20, 2012
811c620
package yaml in the datadog-agent-lib
conorbranagan Sep 20, 2012
2fcb0ae
package checks.d with the agent
conorbranagan Sep 21, 2012
8e6f2c0
add conf.d with haproxy example
conorbranagan Sep 21, 2012
01e14ce
Add conf.d files to packaging
conorbranagan Sep 21, 2012
8ed03a0
fix the backwards compatability
conorbranagan Sep 21, 2012
29c894c
Merge branch 'master' into checks.d
conorbranagan Sep 24, 2012
754b569
Tidy up AgentCheck docstrings, add `decrement`
conorbranagan Sep 25, 2012
371290a
fix syntax in logging
conorbranagan Sep 25, 2012
8075bab
Add README for adding custom checks (to go in docs eventually)
conorbranagan Sep 25, 2012
27892f1
don't require CHECK global in checks anymore
conorbranagan Sep 25, 2012
0bf0901
always format timestamps as ints
conorbranagan Sep 25, 2012
26a651a
fix test check module loader to search all classes
conorbranagan Sep 25, 2012
04dd19e
Port redis check to checks.d
conorbranagan Sep 25, 2012
3326136
Port postgres check to checks.d
conorbranagan Sep 25, 2012
12d0484
Remove imports of old pg check
conorbranagan Sep 25, 2012
e321a2b
add set to AgentCheck interface
conorbranagan Sep 25, 2012
f653e5b
syntax error
conorbranagan Sep 25, 2012
b00c69a
yet another syntax bug
conorbranagan Sep 25, 2012
b4e9251
move postgres.yaml to postgres.yaml.example
conorbranagan Sep 26, 2012
cf068d1
Merge branch 'master' into checks.d
conorbranagan Sep 26, 2012
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 29 additions & 26 deletions agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,26 +31,29 @@
if int(sys.version_info[1]) <= 3:
sys.stderr.write("Datadog agent requires python 2.4 or later.\n")
sys.exit(2)

# Custom modules
from checks.common import checks
from checks.ec2 import EC2
from config import get_config, get_system_stats, get_parsed_args
from config import get_config, get_system_stats, get_parsed_args, get_checks
from daemon import Daemon
from emitter import http_emitter
from util import Watchdog

# Override the generic daemon class to run our checks
class agent(Daemon):
class agent(Daemon):
def run(self, agentConfig=None, run_forever=True):
"""Main loop of the collector"""
agentLogger = logging.getLogger('agent')
systemStats = get_system_stats()
agentLogger.debug('System Properties: ' + str(systemStats))

if agentConfig is None:
agentConfig = get_config()

# Load the checks.d checks
checksd = get_checks(agentConfig)

# Try to fetch instance Id from EC2 if not hostname has been set
# in the config file
if agentConfig.get('hostname') is None and agentConfig.get('use_ec2_instance_id'):
Expand All @@ -60,14 +63,14 @@ def run(self, agentConfig=None, run_forever=True):
agentConfig['hostname'] = instanceId
else:
agentLogger.info('Not running on EC2, using hostname to identify this server')

emitters = [http_emitter]
for emitter_spec in [s.strip() for s in agentConfig.get('custom_emitters', '').split(',')]:
if len(emitter_spec) == 0: continue
emitters.append(modules.load(emitter_spec, 'emitter'))

check_freq = int(agentConfig['check_freq'])

# Checks instance
c = checks(agentConfig, emitters)

Expand All @@ -78,15 +81,15 @@ def run(self, agentConfig=None, run_forever=True):
watchdog.reset()

# Run checks once, to get once-in-a-run data
c.doChecks(True, systemStats)
c.doChecks(True, systemStats, checksd)

# Main loop
while run_forever:
if watchdog is not None:
watchdog.reset()
time.sleep(check_freq)
c.doChecks()
c.doChecks(checksd=checksd)

def setupLogging(agentConfig):
"""Configure logging to use syslog whenever possible.
Also controls debug_mode."""
Expand All @@ -105,11 +108,11 @@ def setupLogging(agentConfig):
# Special-case macs
if sys.platform == 'darwin':
sys_log_addr = "/var/run/syslog"

handler = SysLogHandler(address=sys_log_addr, facility=SysLogHandler.LOG_DAEMON)
formatter = logging.Formatter("dd-agent - %(name)s - %(levelname)s - %(message)s")
handler.setFormatter(formatter)
rootLog.addHandler(handler)
rootLog.addHandler(handler)
logging.info('Logging to syslog is set up')
except Exception,e:
sys.stderr.write("Error while setting up syslog logging (%s). No logging available" % str(e))
Expand All @@ -128,7 +131,7 @@ def getPidFile(pid_dir=PID_DIR):
except:
logging.exception("Cannot locate pid file, defaulting to /tmp/%s" % PID_FILE)
# continue

# if all else fails
if os.access("/tmp", os.W_OK):
logging.warn("Pid file: /tmp/%s" % PID_FILE)
Expand Down Expand Up @@ -158,42 +161,42 @@ def getPid(pid_dir=PID_DIR):
except:
logging.exception("Cannot read pid")
return None
# Control of daemon
if __name__ == '__main__':

# Control of daemon
if __name__ == '__main__':
options, args = get_parsed_args()
agentConfig = get_config()

# Logging
setupLogging(agentConfig)

argLen = len(sys.argv)

if len(args) > 0:
command = args[0]

if options.clean:
cleanPidFile()

pidFile = getPidFile()
daemon = agent(pidFile)

if 'start' == command:
logging.info('Start daemon')
daemon.start()

elif 'stop' == command:
logging.info('Stop daemon')
daemon.stop()

elif 'restart' == command:
logging.info('Restart daemon')
daemon.restart()

elif 'foreground' == command:
logging.info('Running in foreground')
daemon.run()

elif 'status' == command:
pid = getPid()
if pid is not None:
Expand All @@ -206,9 +209,9 @@ def getPid(pid_dir=PID_DIR):
else:
sys.stderr.write('Unknown command: %s.\n' % sys.argv[1])
sys.exit(2)

sys.exit(0)

else:
sys.stderr.write('Usage: %s start|stop|restart|foreground|status' % sys.argv[0])
sys.exit(2)
Loading