-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompile_data.py
executable file
·178 lines (147 loc) · 4.88 KB
/
compile_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#!/usr/bin/python
import os
import sys
import math
import _mysql
import login
the_db = login.get_db()
import data_objects as DO
CONTROLLED_TRIALS = [4537, 5506]
FIELD_TRIALS = [5544, 5541, 5546, 5540, 5542, 5543, 5539, 5545]
DETHLINGEN_TRIALS = [5519]
DROUGHT_ID = 170
DETHLINGEN_DROUGHT_IDS = (170, 172)
CONTROL_IDS = (169, 171)
#
def group_by_cultivar(data):
grouped = {}
for dobj in data:
key = dobj.cultivar.upper()
grouped[key] = grouped.get(key, []) + [dobj]
return grouped
#
def group_by(data, field):
grouped = {}
for dobj in data:
try:
key = getattr(dobj, field)
except:
sys.stderr.write('Group by: Missing field!\n')
sys.exit(1)
# key = dobj.sub_id
grouped[key] = grouped.get(key, []) + [dobj]
return grouped
#
def median(v):
# print v
v_sorted = sorted(v)
n = len(v)
if n % 2 == 0:
return (v_sorted[n/2-1] + v_sorted[n/2])/2.0
else:
return v_sorted[n/2]
#
def is_control(treatment):
return int(treatment) in CONTROL_IDS
#
def compute_starch_rel_ctrl(data, location, drought_ids):
results = {}
for cultivar, samples in data.items():
ctrl_yield = median([dobj.starch_abs
for dobj in samples
if is_control(dobj.treatment)])
for trmt in drought_ids:
key = (location, int(cultivar), trmt)
rel_starch = median([dobj.starch_abs/ctrl_yield
for dobj in samples
if dobj.treatment == trmt])
results[key] = DO.CompiledData()
results[key].eat_starch_data(samples[0])
results[key].rel_starch = rel_starch
results[key].treatment = int(trmt)
return results
#
def compute_starch_rel_field(data, trials, drought_id):
results = {}
# median_all = median([dobj.starch_abs for dobj in data])
by_cult = group_by(data, 'sub_id')
median_yields = dict([(k, median([dobj.starch_abs for dobj in by_cult[k]]))
for k in by_cult])
# print median_yields
# sys.exit(1)
for trial in trials:
loc_data = [d for d in data if d.location_id == trial]
by_cult = group_by(loc_data, 'sub_id')
for cultivar, samples in by_cult.items():
key = (trial, int(cultivar), drought_id)
"""
rel_starch = median([dobj.starch_abs/median_yields[cultivar]
for dobj in samples])
"""
rel_starch = median([dobj.starch_abs for dobj in samples])
rel_starch /= median_yields[cultivar]
results[key] = DO.CompiledData()
results[key].eat_starch_data(samples[0])
results[key].rel_starch = rel_starch
results[key].treatment = drought_id
if len(samples) != 2:
"""
Boehlendorf (4451) & Norika GL (4452)
should be solved.
"""
print 'PROBLEM', trial, cultivar
# results['median_all'] = median_all
return results
###
def compute_climate_data(data):
"""
Heat summation and sum of precipitation/irrigation.
What happens when there are differences in the
number of temperature measurements for the individual
locations?
"""
heat_d = {}
h2o_d = {}
for dobj in data:
# key = tuple(map(int, (dobj.limsid, dobj.id)))
key = int(dobj.limsid)
if dobj.heat_sum is None:
pass
else:
heat_d[key] = heat_d.get(key, []) + [dobj.heat_sum]
if dobj.precipitation is None:
pass
else:
# dobj.irrigation = 0
h2o_d[key] = h2o_d.get(key, []) + [dobj.precipitation + \
dobj.irrigation]
for k in heat_d:
# print k, heat_d[k]
heat_d[k] = (sum(heat_d[k]), len(heat_d[k]))
for k in h2o_d:
h2o_d[k] = (sum(h2o_d[k]), len(h2o_d[k]))
return heat_d, h2o_d
###
def main(argv):
"""
the_db.query(climate_query)
data = the_db.store_result().fetch_row(how=1, maxrows=999999)
data = [DO.ClimateData(d.keys(), d.values()) for d in data]
print [str(x) for x in data]
print 'lims_loc\tloc\theat_sum\t#temps\n'
for k, v in compute_heat_summation(data).items():
print '%i\t%i\t%.3f\t%i' % (k + v), v[0]/v[1]
"""
return None
###
def main_starch(argv):
"""
the_db.query(starch_query)
data = the_db.store_result().fetch_row(how=1, maxrows=999999)
data = [DO.StarchData(d.keys(), d.values()) for d in data]
# print compute_starch_rel_controlled(data, 4537)
# print compute_starch_rel_dethlingen(data)
print compute_field_trials(data)
"""
return None
if __name__ == '__main__': main_starch(sys.argv[1:])