-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTLData.py
137 lines (110 loc) · 3.38 KB
/
TLData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/python
import os
import sys
import traceback
import logging
import multiprocessing
import bottle
import re
import subprocess
import gzip
import StringIO
try: import simplejson as json
except ImportError: import json
TL_DATA_DIR = './data'
logger = logging.getLogger(__name__)
class TLIndex:
'''
'''
def __init__(self, namespace, source):
self.namespace = namespace
self.source = source
baseDirName = TL_DATA_DIR + '/' + self.namespace + '/' + self.source
metafileName = baseDirName + '/' + '.metafile'
if not os.path.isdir(baseDirName):
try:
logger.info("Creating directory {0}".format(baseDirName))
os.makedirs(baseDirName)
except:
logger.error("Cannot create directory {0}".format(baseDirName))
if not os.path.isfile(metafileName):
try:
logger.info("Creating metafile {0}".format(metafileName))
metafile = open(metafileName, 'w')
metafile.write('0')
metafile.close
except:
logger.error("Cannot create metafile {0}".format(metafileName))
def getBaseDatafileName(self):
try:
baseDirName = TL_DATA_DIR + '/' + self.namespace + '/' + self.source
metafileName = baseDirName + '/' + '.metafile'
metafile = open(metafileName, 'r')
index = int(metafile.read())
baseDatafileName = baseDirName + '/' + str(index)
metafile.close()
return baseDatafileName
except:
logger.error(traceback.format_exc().split('\n'))
def incrementIndex(self):
try:
baseDirName = TL_DATA_DIR + '/' + self.namespace + '/' + self.source
metafileName = baseDirName + '/' + '.metafile'
metafile = open(metafileName, 'r+')
index = int(metafile.read())
baseDatafileName = baseDirName + '/' + str(index)
metafile.seek(0)
metafile.truncate()
metafile.write(str(index+1))
metafile.close()
except:
logger.error(traceback.format_exc().split('\n'))
class TLRawData:
'''
Writing raw data collected by TL agent to disk
'''
def write(self, baseDatafileName, data, compressed=False):
try:
if compressed:
data_sio = StringIO.StringIO(data)
with gzip.GzipFile(fileobj=data_sio, mode="rb") as f:
data = f.read()
f.close()
datafile = open(baseDatafileName, 'w')
# adding newline at the end to prevent 'diff' from complaining
datafile.write(data+'\n')
datafile.close()
except:
logger.error(traceback.format_exc().split('\n'))
logger.info("Creating raw file {0}".format(baseDatafileName))
class TLRawDataIndex:
'''
Flatten out raw data collected by TL agent and write to disk
Output file can be used to feed into Solr
'''
def write(self, baseDatafileName, data, compressed=False):
datafileName = baseDatafileName + '.index'
try:
if compressed:
data_sio = StringIO.StringIO(data)
with gzip.GzipFile(fileobj=data_sio, mode="rb") as f:
data = f.read()
f.close()
files = []
j = json.loads(data)
hostname = j['hostname_s']
collectionTime = j['collection_dt']
for file in j['files']:
file['id'] = file['name_s'] + ':' + collectionTime
file['hostname_s'] = hostname
file['collection_dt'] = collectionTime
files.append(file)
datafile = open(datafileName, 'w')
json.dump(files, datafile, indent=2)
# adding newline at the end to prevent 'diff' from complaining
datafile.write('\n')
datafile.close()
except:
logger.error(traceback.format_exc().split('\n'))
logger.info("Creating raw index file {0}".format(datafileName))
return