-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhealthcheck.py
executable file
·79 lines (65 loc) · 2.14 KB
/
healthcheck.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
'''
healthceck reads the file ~/etc/healthchecklist.txt, each line is a pathnamne to
sensorfs sensor entity - each of these directories should have an entry called time.
Since this is a consistent entry for any sensor it used to determine freshness of data.
For each path in the list the file modification time is checked against current time
and if the difference is greater than maxAge it is reported as stale. If the path
doesn't exist or is unreadable it is reported as missing.
When errors are found they are written to an error file ~/.healthcheckerrors.
If there was no error file when healthcheck runs an email is sent to errora_to.
If the file already existed, further emails aren't sent.
If there are no errors and the error file exists (i.e., any errors are cleared)
the error file is removed.
'''
import os
import glob
import time
import sys
now = time.time()
maxAge = 180
error = []
errors_to = "[email protected]"
logfile = "/home/user/.local/logs/healthcheck.log"
efile = f'/home/user/.healthcheck.errors'
def sendErrors(errors_to,error):
mailcmd = f'/sbin/sendmail -f [email protected] {errors_to}'
with os.popen(mailcmd,'w') as p:
#print(f'Sending to {errors_to}: [{error}]')
p.write(f'\n\n{error}\n')
return True
with open(logfile,"a") as log:
ecnt = 0
now = time.time()
with open('/home/user/etc/hchecklist.txt') as f:
paths = f.read().strip().split('\n')
for p in paths:
sensor = os.path.basename(p)
host = os.path.basename(os.path.dirname(p))
sen = f'{host}/{sensor}'
emsg = ''
check = os.path.join(p,'time')
if not os.path.exists(check):
emsg = f'{sen}: missing'
print(emsg,file=log)
error.append(emsg)
ecnt = ecnt + 1
continue
age = now - os.stat(check).st_mtime
if age > maxAge:
ecnt = ecnt + 1
emsg = f'{sen}: stale data'
error.append(emsg)
if len(error):
errors = '\n'.join(error)
error = f'{ecnt} error(s): {errors}'
if not os.path.exists(efile):
sendErrors(errors_to,error)
with open(efile,'a') as f:
f.write(error)
else:
if os.path.exists(efile):
os.unlink(efile)
with open('/tmp/healthcheck.ran','w') as f:
print('Ok',file=f)
sys.exit(ecnt)