-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_structs.py
225 lines (199 loc) · 8.91 KB
/
data_structs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
import dynamic_loader as loader
# Load json (JSON) module
loader.load_json()
json = loader.json
# Load theMetadata_Record classt
loader.load_Metadata_Record()
Metadata_Record = loader.Metadata_Record
bySCN = {}
byTitle = {}
def hasSCN(scn):
return scn in bySCN
def hasTitle(title):
return title in byTitle
def getSCN_fromTitle(title):
return byTitle[title]
def getTitle_fromSCN(scn):
return bySCN[scn]
def setTitle_fromSCN(scn, title):
bySCN[scn] = title
def setSCN_fromTitle(title, scn):
byTitle[title] = scn
#----------------------------------------------------#
# Purpose: Check if the provided attribute with the #
# provided value is in the record set #
# provided #
# Parameters: records - The set of Metadata_Record ( #
# or subclass) to look through #
# for the title #
# attr_name - The name of the property #
# to check #
# attr_value - The value to check the #
# given property for #
# Return: Metadat_Record (or subclass) - The record #
# found #
# meeting the #
# specified #
# criteriad #
#----------------------------------------------------#
def get_item_with_attr(records, attr_name, attr_value):
return_value = None
# Loop over the record set to check each one
for record in records:
try:
if isinstance(getattr(record, attr_name), list):
for gotten_attr in getattr(record, attr_name):
if attr_value == gotten_attr:
return_value = record
else:
if attr_value == getattr(record, attr_name):
return_value = record
except AttributeError:
pass
return return_value
#----------------------------------------------------#
# Purpose: Check if the provided attribute with the #
# provided value is in the record set #
# provided #
# Parameters: records - The set of Metadata_Record ( #
# or subclass) to look through #
# for the title #
# attr_name - The name of the property #
# to check #
# attr_value - The value to check the #
# given property for #
# Return: boolean - If it was found #
#----------------------------------------------------#
def has_item_with_attr(records, attr_name, attr_value):
return get_item_with_attr(records, attr_name, attr_value) != None
#----------------------------------------------------#
# Purpose: To check if a record with the given SCN #
# exists in the set specified #
# Parameters: records - The set of Metadata_Record ( #
# or subclass) to look through #
# for the SCN #
# SCN - The SCN to check the records for #
# Return: boolean - If there was a matching record #
# found #
#----------------------------------------------------#
def has_SCN(records, SCN):
return has_item_with_attr(records, 'SCN', SCN)
#----------------------------------------------------#
# Purpose: To check if a record with the given title #
# exists in the set specified #
# Parameters: records - The set of Metadata_Record ( #
# or subclass) to look through #
# for the title #
# title - The title to check the records #
# for #
# Return: boolean - If there was a matching record #
# found #
#----------------------------------------------------#
def has_title(records, title):
return has_item_with_attr(records, 'title', title)
#----------------------------------------------------#
# Purpose: To construct a set of Metadata_Record #
# objects from the result of a json.load #
# call #
# Parameters: json_dict - The dictionaary object #
# provided by the json.load #
# call #
# Return: list - A set of Metadata_Record objects #
# constructed from the input #
#----------------------------------------------------#
def records_from_json(json_dict):
# Make sure this file s in a recognized form (more a sanity check at
# this point but might become more later)
if 'collection' in json_dict:
records = []
# For each "object" within the collection array create a new
# Metadata_Record object and assign the appropriate values
for record in json_dict['collection']:
new_record = Metadata_Record()
for k,v in record.iteritems():
setattr(new_record, k, v)
records.append(new_record)
# For each Metadata_Record object that we've now created
# validate to mitigate mistakes (human or computer)
for record in records:
if not record.validate():
print 'Removing ' + str(record) + ' because it is an invalid record'
records.remove(record)
# Return the results
return records
#----------------------------------------------------#
# Purpose: To construct a set of Metadata_Record #
# objects from the input of a tab seperated #
# file #
# Parameters: head_line - The line that has the #
# columns names (denotes the #
# schema of the file) #
# lines - The array of lines to parse #
# into individual records #
# Return: list - A set of Metadata_Record objects #
# constructed from the input #
#----------------------------------------------------#
def records_from_tab_seperated(head_line, lines):
records = []
# Parse the schema (what each column means)
props = head_line.split('\t')
for index in range(0, len(props)):
props[index] = props[index].strip()
# If the first line of the array and the schema line are the same remove
# it (so that we don't try to parse it as a line
if lines[0] == head_line:
lines.remove(lines[0])
# Loop over each line an create a new Metadata_Reord object assign its
# values to is appropriate property (as determined by the schema)
for line in lines:
new_record = Metadata_Record()
tokens = line.split('\t')
for index in range(0, len(tokens)):
setattr(new_record, props[index], tokens[index].strip())
records.append(new_record)
# For each of the records we now have lets validate them so that we know
# there wasn't hany mistakes made (human or program)
for record in records:
if not record.validate():
print 'Removing ' + str(record) + ' because it is an invalid record'
recoreds.remove(record)
# Return result
return records
#----------------------------------------------------#
# Purpose: To construct a set of Metadata_Record #
# objects from the input of a tab seperated #
# file #
# Parameters: head_line - The line that has the #
# columns names (denotes the #
# schema of the file) #
# lines - The array of lines to parse #
# into individual records #
# Return: list - A set of Metadata_Record objects #
# constructed from the input #
#----------------------------------------------------#
def records_from_csv(head_line, lines):
records = []
# Parse the schema (what each column means)
props = head_line.split(',')
for index in range(0, len(props)):
props[index] = props[index].strip()
# If the first line of the array and the schema line are the same remove
# it (so that we don't try to parse it as a line
if lines[0] == head_line:
lines.remove(lines[0])
# Loop over each line an create a new Metadata_Reord object assign its
# values to is appropriate property (as determined by the schema)
for line in lines:
new_record = Metadata_Record()
tokens = line.split(',')
for index in range(0, len(tokens)):
setattr(new_record, props[index], tokens[index].strip())
records.append(new_record)
# For each of the records we now have lets validate them so that we know
# there wasn't hany mistakes made (human or program)
for record in records:
if not record.validate():
print 'Removing ' + str(record) + ' because it is an invalid record'
recoreds.remove(record)
# Return result
return records