-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathwdazure.py
487 lines (421 loc) · 20.1 KB
/
wdazure.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
# -*- coding: utf-8 -*-
"""This module provides Azure class.
Author: Peter Pakos <[email protected]>
Copyright (C) 2019 WANdisco
"""
from __future__ import print_function
import datetime
import prettytable
import tzlocal
import iso8601
from azure.common.credentials import ServicePrincipalCredentials
from azure.mgmt.resource.subscriptions import SubscriptionClient
from azure.mgmt.resource import ResourceManagementClient
from azure.mgmt.compute import ComputeManagementClient
from azure.mgmt.network import NetworkManagementClient
from azure.mgmt.hdinsight import HDInsightManagementClient
from azure.monitor import MonitorClient
from msrestazure.azure_exceptions import CloudError
from CONFIG import CONFIG
import logging
from wdcloud import WDCloud
log = logging.getLogger('cloud_tools')
class AZURE(WDCloud):
def __init__(self, *args, **kwargs):
super(AZURE, self).__init__(*args, **kwargs)
account = 'OLD' if 'old' in str(self._profile_name).lower() else ''
self._subscription_id = getattr(CONFIG, account + 'AZURE_SUBSCRIPTION_ID')
self._credentials = ServicePrincipalCredentials(
client_id=getattr(CONFIG, account + 'AZURE_CLIENT_ID'),
secret=getattr(CONFIG, account + 'AZURE_SECRET'),
tenant=getattr(CONFIG, account + 'AZURE_TENANT')
)
self._subscription_client = SubscriptionClient(self._credentials)
self._compute_client = ComputeManagementClient(self._credentials, self._subscription_id)
self._resource_client = ResourceManagementClient(self._credentials, self._subscription_id)
self._network_client = NetworkManagementClient(self._credentials, self._subscription_id)
self._monitor_client = MonitorClient(self._credentials, self._subscription_id)
self._hdi_client = HDInsightManagementClient(self._credentials, self._subscription_id)
self._resource_groups = []
for resource_group in self._resource_client.resource_groups.list():
self._resource_groups.append(resource_group.name)
for location in self._subscription_client.subscriptions.list_locations(self._subscription_id):
self._regions.append(location.name)
def list_hdi(self, warning_threshold, critical_threshold, disable_border, disable_header, notify, stop,
*args, **kwargs):
table = prettytable.PrettyTable(['Location', 'Name', 'Resource Group', 'Creator', 'Created Date', 'Uptime',
'Cluster State', 'Excluded'], sortby='Created Date',
border=not disable_border, header=not disable_header, reversesort=True)
table.align = 'l'
local_tz = tzlocal.get_localzone()
now = local_tz.localize(datetime.datetime.now())
clusters = self._hdi_client.clusters.list()
states_dict = {}
stop_dict = {}
dept_dict = {}
rg_dict = {}
name_dict = {}
uptime_dict = {}
info_dict = {}
warning_dict = {}
critical_dict = {}
i = 0
while True:
try:
for cluster in clusters.advance_page():
i += 1
rg = cluster.id
_, _, rg = rg.partition('/resourceGroups/')
rg, _, _ = rg.partition('/providers/')
created_date = iso8601.parse_date(cluster.properties.created_date).astimezone(local_tz).\
strftime('%Y-%m-%d %H:%M:%S')
launch_time_src = iso8601.parse_date(cluster.properties.created_date).astimezone(local_tz)
seconds = self._date_diff(now, launch_time_src)
uptime = self._get_uptime(seconds)
start = datetime.datetime.now().date() - datetime.timedelta(days=30)
afilter = " and ".join([
"eventTimestamp ge '%s'" % start,
"eventChannels eq 'Admin, Operation'",
"resourceUri eq '%s'" % cluster.id
])
select = ",".join([
"caller",
"eventName",
"operationName",
"eventTimestamp"
])
activity_logs = self._monitor_client.activity_logs.list(
filter=afilter,
select=select
)
creator = ''
for alog in activity_logs:
if alog.caller and ("Microsoft.HDInsight/clusters/write" in alog.operation_name.value):
creator = alog.caller.split('@', 1)[0]
break
if cluster.tags:
excluded = True if 'exclude' in [t.lower() for t in cluster.tags] else False
else:
excluded = False
table.add_row([cluster.location, cluster.name, rg, creator, created_date, uptime,
cluster.properties.cluster_state, 'Yes' if excluded else 'No'])
if cluster.properties.cluster_state in states_dict:
states_dict[cluster.properties.cluster_state] += 1
else:
states_dict[cluster.properties.cluster_state] = 1
if seconds >= (critical_threshold * 3600) and not excluded and\
'sales' not in str(rg).lower() and cluster.properties.cluster_state != 'Deleting':
if rg not in stop_dict:
stop_dict[rg] = []
stop_dict[rg].append(cluster.name)
if creator and notify and not excluded and cluster.properties.cluster_state != 'Deleting':
if creator not in dept_dict:
dept_dict[creator] = []
if rg.partition('-')[0] not in dept_dict[creator]:
dept_dict[creator].append(rg.partition('-')[0])
rg_dict[cluster.name] = rg
name_dict[cluster.name] = cluster.name
uptime_dict[cluster.name] = uptime
if creator not in info_dict:
info_dict[creator] = {}
if cluster.location not in info_dict[creator]:
info_dict[creator][cluster.location] = []
info_dict[creator][cluster.location].append(cluster.name)
if seconds >= (critical_threshold * 3600):
critical_dict[creator] = True
elif seconds >= (warning_threshold * 3600):
warning_dict[creator] = True
except StopIteration:
break
log.debug('info_dict: %s' % info_dict)
log.debug('warning_dict: %s' % warning_dict)
log.debug('critical_dict: %s' % critical_dict)
log.info(table)
out = ', '.join(['%s: %s' % (key, value) for (key, value) in sorted(states_dict.items())])
if len(out) > 0:
out = '(%s)' % out
else:
out = ''
print('Time: %s (%s) | Clusters: %s %s' % (now.strftime('%Y-%m-%d %H:%M:%S'), str(local_tz), i, out))
for user, region_ids in info_dict.items():
if user in critical_dict:
mail_type = 'critical'
elif user in warning_dict:
mail_type = 'warning'
else:
mail_type = 'info'
self._send_alert(
mail_type=mail_type,
user=user,
region_ids=region_ids,
name_dict=name_dict,
uptime_dict=uptime_dict,
warning_threshold=warning_threshold,
critical_threshold=critical_threshold,
stop=stop,
dept=dept_dict[user],
rg_dict=rg_dict,
resource='HDI cluster'
)
if stop and len(stop_dict) > 0:
for rg, vms in stop_dict.items():
print('\nTerminating HDI clusters in Resource Group %s (%s)... %s' % (
rg,
','.join(vms),
'SUCCESS' if self._delete_cluster(rg, vms) else 'FAIL'))
def list(self, disable_border=False, disable_header=False, state=None, notify=False, stop=False,
warning_threshold=None, critical_threshold=None, tag=None, *args, **kwargs):
if not state:
state = ['running', 'stopped', 'starting', 'stopping', 'busy', 'generalized']
table = prettytable.PrettyTable(['Region', 'RG', 'Name', 'Type', 'Image', 'State',
'Launch time', 'Uptime', 'User', 'Private IP', 'Public IP',
'Excluded'],
border=not disable_border, header=not disable_header, reversesort=True,
sortby='Launch time')
table.align = 'l'
i = 0
states_dict = {}
uptime_dict = {}
name_dict = {}
stop_dict = {}
info_dict = {}
warning_dict = {}
critical_dict = {}
dept_dict = {}
rg_dict = {}
local_tz = tzlocal.get_localzone()
now = local_tz.localize(datetime.datetime.now())
for resource_group in self._resource_groups:
instances = self._compute_client.virtual_machines.list(resource_group)
for instance in instances:
region = instance.location
if region not in self._regions:
continue
last_user = ''
start = datetime.datetime.now().date() - datetime.timedelta(days=30)
afilter = " and ".join([
"eventTimestamp ge '%s'" % start,
"eventChannels eq 'Admin, Operation'",
"resourceUri eq '%s'" % instance.id
])
select = ",".join([
"caller",
"eventName",
"operationName",
"eventTimestamp"
])
activity_logs = self._monitor_client.activity_logs.list(
filter=afilter,
select=select
)
for alog in activity_logs:
if alog.caller and ("virtualMachines/start/action" in alog.operation_name.value or
"virtualMachines/write" in alog.operation_name.value):
last_user = alog.caller.split('@', 1)[0]
break
instance_data = self._compute_client.virtual_machines.get(resource_group, instance.name,
expand='instanceView')
try:
instance_state = str(instance_data.instance_view.statuses[1].display_status).split('VM ')[1]
except IndexError:
instance_state = 'busy'
if instance_state == 'deallocated':
instance_state = 'stopped'
elif instance_state == 'deallocating':
instance_state = 'stopping'
if instance_state not in state:
if len(state) > 1:
print('UNKNOWN INSTANCE STATE: %s\n' % instance_state)
continue
instance_type = instance_data.hardware_profile.vm_size
try:
image_name = instance_data.storage_profile.image_reference.offer + ' ' + \
instance_data.storage_profile.image_reference.sku
except (AttributeError, TypeError):
image_name = ''
nic_group = instance_data.network_profile.network_interfaces[0].id.split('/')[4]
nic_name = instance_data.network_profile.network_interfaces[0].id.split('/')[8]
try:
net_interface = self._network_client.network_interfaces.get(nic_group, nic_name)
private_ip_address = net_interface.ip_configurations[0].private_ip_address
except CloudError:
private_ip_address = ''
net_interface = None
try:
ip_group = net_interface.ip_configurations[0].public_ip_address.id.split('/')[4]
ip_name = net_interface.ip_configurations[0].public_ip_address.id.split('/')[8]
public_ip_address = self._network_client.public_ip_addresses.get(ip_group, ip_name).ip_address or ''
except AttributeError:
public_ip_address = ''
uptime = ''
launch_time = ''
if instance.tags:
excluded = True if 'exclude' in [t.lower() for t in instance.tags] else False
else:
excluded = False
if instance_state == 'running':
try:
launch_time_src = instance_data.instance_view.disks[0].statuses[0].time.astimezone(local_tz)
launch_time = launch_time_src.strftime('%Y-%m-%d %H:%M:%S')
except AttributeError:
launch_time_src = ''
launch_time = ''
seconds = self._date_diff(now, launch_time_src)
uptime = self._get_uptime(seconds)
if seconds >= (critical_threshold * 3600) and not excluded and\
'sales' not in str(resource_group).lower():
if resource_group not in stop_dict:
stop_dict[resource_group] = []
stop_dict[resource_group].append(instance.name)
if last_user and notify and not excluded:
if last_user not in dept_dict:
dept_dict[last_user] = []
if resource_group not in dept_dict[last_user]:
dept_dict[last_user].append(resource_group)
rg_dict[instance.name] = resource_group
name_dict[instance.name] = instance.name
uptime_dict[instance.name] = uptime
if last_user not in info_dict:
info_dict[last_user] = {}
if region not in info_dict[last_user]:
info_dict[last_user][region] = []
info_dict[last_user][region].append(instance.name)
if seconds >= (critical_threshold * 3600):
critical_dict[last_user] = True
elif seconds >= (warning_threshold * 3600):
warning_dict[last_user] = True
i += 1
table.add_row([
instance.location,
resource_group,
instance.name,
instance_type,
image_name,
instance_state,
launch_time,
uptime,
last_user,
private_ip_address,
public_ip_address,
'Yes' if excluded else 'No'
])
if instance_state in states_dict:
states_dict[instance_state] += 1
else:
states_dict[instance_state] = 1
print(table)
out = ', '.join(['%s: %s' % (key, value) for (key, value) in sorted(states_dict.items())])
if len(out) > 0:
out = '(%s)' % out
else:
out = ''
print('Time: %s (%s) | Instances: %s %s' % (now.strftime('%Y-%m-%d %H:%M:%S'), str(local_tz), i, out))
if len(info_dict) > 0:
print()
for user, region_ids in info_dict.items():
if user in critical_dict:
mail_type = 'critical'
elif user in warning_dict:
mail_type = 'warning'
else:
mail_type = 'info'
self._send_alert(
mail_type=mail_type,
user=user,
region_ids=region_ids,
name_dict=name_dict,
uptime_dict=uptime_dict,
warning_threshold=warning_threshold,
critical_threshold=critical_threshold,
stop=stop,
dept=dept_dict[user],
rg_dict=rg_dict
)
if stop and len(stop_dict) > 0:
for rg, vms in stop_dict.items():
print('\nStopping instances in Resource Group %s (%s)... %s' % (
rg,
','.join(vms),
'SUCCESS' if self._stop_instance(rg, vms) else 'FAIL'))
def _create_tag(self, resource_group, instance, key, value):
try:
self._compute_client.virtual_machines.create_or_update(resource_group, instance.name, {
'location': instance.location,
'tags': {key: value}
}).wait()
except CloudError:
return False
else:
return True
def _delete_tag(self, resource_group, instance, key):
try:
self._compute_client.virtual_machines.create_or_update(resource_group, instance.name, {
'location': instance.location,
'tags': {key: ''}
}).wait()
except CloudError:
return False
else:
return True
def tag(self, instance_id, key, value='', delete=False, *args, **kwargs):
i = 0
resource_group = self._profile_name.upper()
instances = self._compute_client.virtual_machines.list(resource_group)
for instance in instances:
if instance.name in instance_id:
i += 1
if delete:
print('Instance ID %s found in region %s, deleting tag \'%s\': ' %
(instance.name, instance.location, key), end='')
response = self._delete_tag(resource_group, instance, key)
else:
print('Instance ID %s found in region %s, creating tag \'%s\': ' %
(instance.name, instance.location, key), end='')
response = self._create_tag(resource_group, instance, key, value)
if response:
print('OK')
else:
print('FAIL')
if len(instance_id) == i:
return
if i == 0:
print('Instance ID %s not found in any region' % (', '.join(instance_id)))
def _stop_instance(self, rg, vms):
error = False
for vm in vms:
try:
self._compute_client.virtual_machines.deallocate(rg, vm).wait()
except Exception as e:
log.debug(e)
error = True
return False if error else True
def _delete_cluster(self, rg, clusters):
error = False
for cluster in clusters:
try:
self._hdi_client.clusters.delete(rg, cluster)
except Exception as e:
log.debug(e)
error = True
return False if error else True
def sg(self, *args, **kwargs):
log.critical('Command not implemented')
exit(1)
def public_buckets(self, *args, **kwargs):
log.critical('Command not implemented')
exit(1)
def run(self, *args, **kwargs):
log.critical('Command not implemented')
exit(1)
def create_image(self, *args, **kwargs):
log.critical('Command not implemented')
exit(1)
def terminate(self, *args, **kwargs):
log.critical('Command not implemented')
exit(1)
def stop(self, *args, **kwargs):
log.critical('Command not implemented')
exit(1)
def start(self, *args, **kwargs):
log.critical('Command not implemented')
exit(1)