-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathS03_MappingResults.py
424 lines (361 loc) · 16.2 KB
/
S03_MappingResults.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
# ==============================================================================
__title__ = "csv to netcdf and maps"
__author__ = "Arden Burrell"
__version__ = "v1.0(23.06.2020)"
__email__ = "[email protected]"
# ==============================================================================
import os
import sys
# ===== CHange the dir to the script location =====
if not os.path.dirname(sys.argv[0]) == "":
os.chdir(os.path.dirname(sys.argv[0]))
# ===== append that to the system path =====
sys.path.append(os.getcwd())
# ========== Import packages ==========
import numpy as np
import pandas as pd
import argparse
# import datetime as dt
import warnings as warn
import xarray as xr
import dask
import bottleneck as bn
from numba import jit
from collections import OrderedDict
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import palettable
import statsmodels.stats.multitest as smsM
import scipy.stats as sps
# ========== Load custom functions ==========
import CustomFunctions as cf
# ==============================================================================
def main(args):
# =========== Read the metadata file in ==========
if args.use_archived is None:
infofile = './results/infomation.json'
else:
'./results/archive/infomation_%02d.json' % args.use_archived
with open(infofile, "r+", errors='ignore') as f:
info = json.load(f)
# ========== fix the timedelta ==========
if type(info["ComputeTime"]) == float:
info["ComputeTime"] = pd.Timedelta(info["ComputeTime"], unit="sec")
# ========== Open the csv results file ==========
fn = "./results/AttributionResults.csv"
df = pd.read_csv(fn, index_col=0)
# ========== Fix the indexing and the error messaging ==========
lonlat = np.array([_unpackLatLon(index) for index in df.index])
df["longitude"] = lonlat[:, 0]
df["latitude" ] = lonlat[:, 1]
df = df.reset_index(drop=True).set_index(["longitude","latitude" ])
df["errors"] = df["errors"].apply(_df_error)
df["AnthropogenicClimateChange"] = df.ClimateChange + df.CO2
# ========== function to calculate adjusted p values for ACC ==========
pstack = np.stack((df["ClimateChange.Pvalue"].values, df["CO2.Pvalue"].values))
df["AnthropogenicClimateChange.Pvalue"]= np.apply_along_axis(_combine_pvalue2d, 0, pstack)
# ========== Open a reference dataset ==========
# This is a hack so i don't have to harrd code all the infomation
fnNDVI = "./data/AUSdemo_GIMMS_ndvi.nc"
ds_ref = xr.open_dataset(fnNDVI)
# ========== Convert back to xr dataset ==========
ds = xr.Dataset.from_dataframe(df)
# ++++++++++ Fix the time ++++++++++
# add a time dim, this case the end of the original timeseris.
# Netcdf files need a time dimenstion
ds = ds.assign_coords(time=pd.Timestamp("2015-12-31")).expand_dims("time")
ds = ds.transpose('time', 'latitude', 'longitude').sortby("latitude", ascending=False)
# ++++++++++ Fix the Global Attributes ++++++++++
ds = GlobalAttributes(ds, info, ds_ref=ds_ref)
# ++++++++++ Setup the netcdf file encoding and add ++++++++++
encoding = OrderedDict()
for va in df.columns:
# ========== Check and see if all th e values are nan, if yes drop them ==========
if bn.allnan(ds[va]):
ds = ds.drop_vars(va)
else:
encoding[va] = ({'shuffle':True,
'zlib':True,
'complevel':5.
# 'chunksizes':[1, ensinfo.lats.shape[0], 100],
})
# ========== Write the dataset to a netcdf file ==========
print("Starting write of data at:", pd.Timestamp.now())
ds.to_netcdf("./results/TSSRattribution_Results_%s_%dkm.nc" % (info["photo"], (25*(info["coarsen"]-1)+25)),
format = 'NETCDF4',
encoding = encoding,
unlimited_dims = ["time"])
# ========== Make a series of maps ==========
if args.plots:
# ========== set the plot ==========
plotpath = "./results/plots/"
for va in ds.variables:
# +++++ check its a variable to be skipped or mapped +++++
if (va in ['longitude', 'latitude', "OtherFactorsValid", "errors", "time"] or va.endswith(".Pvalue")):
continue
# ========== Perform significance correction ==========
if args.sig:
signif, s_method = FDRSignificanceCorrection(ds, va, args.method)
else:
s_method = ""
signif = None
# ========== Create the metadata for the plots ==========
maininfo = "Plot from %s (%s):%s by %s, %s" % (__title__, __file__,
__version__, __author__, str(pd.Timestamp.now()))
maininfo += s_method
gitinfo = cf.gitmetadata()
print(va)
# ========== Build, save and show the maps ==========
MapSetup(ds, va, signif, maininfo, gitinfo, info, plotpath)
# ==============================================================================
# BUILD THE MAPS
# ==============================================================================
def MapSetup(ds, va, signif, maininfo, gitinfo, info, plotpath, mask=None):
"""
Builds the maps of the change attribution
args:
ensinfo
ds xrds: TSSRESTREND RESULTS
va str : the name of the change variable
maininfo str : plot string
gitinfo str : summary of the git header
mask xrda: Any additional boolean masks
"""
# ========== Create the mapdet object ==========
mapdet = cf.mapclass("Australia", plotpath)
mapdet.dpi = 130
mapdet.var = va
mapdet.desc = "%s_%s_%dkm" % (va, info["photo"], (25*(info["coarsen"]-1)+25))
# ========== Create the data array ==========
da = ds[va].copy()
ad = da.squeeze()
# ========== Remove the non dryland areas ==========
if not (mask is None):
da *= mask
# ========== Mask for FDR significnace ==========
if not signif is None:
da *= signif
mapdet.desc += "_withNonSigMasked"
# ========== calculate the tick position ==========
if info['annual']:
mapdet.cblabel = r"$\times$10$^{-2}$ $\Delta$ NDVI$_{max}$ yr$^{-1}$"
# scale the NDVI to make it fit
da *= 1e2
else:
mapdet.cblabel = r'$\Delta$ NDVI$_{max}$'
if info['Nyears'] != 34.:
warn.warn("The plot tick values were chosen for a time series of 34 years. Other lengths may require differen colorbars")
tks = np.array([-0.30, -0.24, -0.12, -0.06, -0.02, 0, 0.02, 0.06, 0.12, 0.24, 0.30])
mapdet.cmin = -0.3 #-0.1 # the min of the colormap
mapdet.cmax = 0.3#0.1 # the max of the colormap
# ========== Set the colormap ==========
if va == "ObservedChange":
cmapHex = palettable.colorbrewer.diverging.PRGn_10.hex_colors
elif va in ["ObservedClimate", "ClimateChange"]:
cmapHex = palettable.colorbrewer.diverging.BrBG_10.hex_colors
elif va == "ClimateVariability":
cmapHex = palettable.colorbrewer.diverging.RdBu_10.hex_colors
elif va == "CO2":
tks = np.array([0, 0.001, 0.02, 0.03, 0.04, 0.06, 0.24, 0.30])
cmapHex = palettable.colorbrewer.sequential.Purples_7.hex_colors
mapdet.cmin = 0.0#-0.1 # the min of the colormap
elif va == "LandUse":
cmapHex = palettable.colorbrewer.diverging.PiYG_10.hex_colors
elif va == "OtherFactors":
# cmapHex = palettable.colorbrewer.diverging.RdBu_10.hex_colors
cmapHex = palettable.cmocean.diverging.Curl_10_r.hex_colors
elif va == "AnthropogenicClimateChange":
cmapHex = palettable.colorbrewer.diverging.PuOr_10.hex_colors
# ========== Set the variables ==========
mapdet.cZero = 0.5 # the zero point of the colormap
# ========== Add the Font info ==========
mapdet.gridalp = 0.5
# ========== Setup the cmap ==========
if va == "CO2":
spacing = 'uniform'
cbounds = tks
ticks = tks
# ========== create the colormap ==========
cmap = mpl.colors.ListedColormap(cmapHex)
cmap.set_over(cmapHex[-1])
norm = mpl.colors.BoundaryNorm(cbounds, cmap.N)
mapdet.extend = "max"
else:
cmap, norm, ticks, cbounds, spacing = cf.ReplaceHexColor(
cmapHex, mapdet.cmin, mapdet.cmax, ticks=tks, zeroR = 0.001)
# print(cbounds)
# cmap, norm, ticks, cbounds, spacing = pf.ReplaceHexColor(
# cmapHex, mapdet.cmin, mapdet.cmax, ticks=tks, zeroR = 0.001)
cmap.set_bad('dimgrey',1.)
mapdet.cmap = cmap
mapdet.norm = norm
mapdet.ticks = ticks
mapdet.cbounds = cbounds
mapdet.spacing = spacing
# ========== Make a map ==========
plotinfo, fname = cf.mapmaker(da, mapdet)
# ========== Save the metadata ==========
if fname:
infomation = [maininfo, plotinfo, fname, gitinfo]
cf.writemetadata(fname, infomation)
# ipdb.set_trace()
def FDRSignificanceCorrection(ds, var, FDRmethod, alpha = 0.10, ):
"""
Takes the results of an existing trend detection aproach and modifies them to
account for multiple comparisons.
args
ds: xr dataset
list of numpy arrays containing results of trend analysis
kys: list
list of what is in results
years:
years of accumulation
"""
if FDRmethod == "fdr_by":
t_method = "Adjusting for multiple comparisons using Benjamini/Yekutieli"
elif FDRmethod == "fdr_bh":
t_method = "Adjusting for multiple comparisons using Benjamini/Hochberg"
else:
raise ValueError("unknown MultipleComparisons method: %s, must be either fdr_by or fdr_bh " % FDRmethod)
# ========== Work out the pvalue name ===========
if var == "ObservedChange":
pnm = "Obs.Pvalue"
elif var == "OtherFactors":
t_method = "OtherFactors is Zero masked in areas where 1 or more components could not be calculated"
print(t_method)
return ds["OtherFactorsValid"].astype(float), t_method
else:
pnm = var + ".Pvalue"
# ========== Locate the p values and reshape them into a 1d array ==========
# ++++++++++ Find the pvalues ++++++++++
try:
ODimDa = ds[pnm].stack(loc = ('time', 'latitude', 'longitude')).copy()
except:
breakpoint()
isnan = np.isnan(ODimDa)
# ++++++++++ pull out the non nan pvalus ++++++++++
pvalue1d = ODimDa[~isnan]
# =========== Perform the MC correction ===========
pvalue_adj = smsM.multipletests(pvalue1d, method=FDRmethod, alpha=alpha)
# =========== Get the significance in bool ===========
sigfloat = pvalue_adj[0].astype(float)
# make an empty dataarray
re = xr.zeros_like(ODimDa)
re[~isnan] = sigfloat
print(t_method)
return re.unstack(), t_method
# ==============================================================================
# FIX THE METADATA OF THE XR DATASET
# ==============================================================================
def GlobalAttributes(ds, info, ds_ref=None):
"""
Creates the global attributes for the netcdf file that is being written
these attributes come from :
https://www.unidata.ucar.edu/software/thredds/current/netcdf-java/metadata/DataDiscoveryAttConvention.html
args
ds: xarray ds
Dataset containing the infomation im intepereting
ds_ref: xarray ds
Dataset that contains the original data to copy metadata from. Defualt is None so it can be skipped
returns:
attributes Ordered Dictionary cantaining the attribute infomation
"""
# ========== Create the ordered dictionary ==========
attr = ds.attrs
# fetch the references for my publications
# pubs = puplications()
# ========== Fill the Dictionary ==========
# ++++++++++ Highly recomended ++++++++++
attr["title"] = "TSS-RESTREND Attribution Results"
attr["summary"] = "The restacked results of a TSSRattribution function applied to GIMMS and TERRCLIMATE DATA"
attr["Conventions"] = "CF-1.7"
# ++++++++++ Data Provinance ++++++++++
info["history"] = "%s: Netcdf file created using %s (%s):%s. Script originally developed by %s" % (
str(pd.Timestamp.now()), __title__, __file__, __version__, __author__) + info["history"]
# attr["history"] += ds.history
attr["history"] = info["history"]
attr["creator_name"] = __author__
attr["creator_url"] = "ardenburrell.com"
attr["creator_email"] = __email__
attr["institution"] = "WHRC"
attr["date_created"] = str(pd.Timestamp.now())
# ++++++++++ Netcdf Summary infomation ++++++++++
attr["time_coverage_start"] = str(pd.Timestamp("2015-12-31"))
attr["time_coverage_end"] = str(pd.Timestamp("1982-01-01"))
# ++++++++++ TSS.RESTREND INFOMATION ++++++++++
attr["package_version"] = "TSSRESTREND version: " + info["TSSRESTREND.version"]
attr["package_url"] = "https://cran.r-project.org/web/packages/TSS.RESTREND/index.html"
if not ds_ref is None:
ds.longitude.attrs = ds_ref.longitude.attrs
ds.latitude.attrs = ds_ref.latitude.attrs
ds.time.attrs = ds_ref.time.attrs
# breakpoint()
# ds.time.attrs["calendar"] = 'standard'
# ds.time.attrs["units"] = 'days since 1900-01-01 00:00'
return ds
# ==============================================================================
# FUNCTIONS FOR FIXING THE INPUT DATAFRAME
# ==============================================================================
def _unpackLatLon(index):
"""
Fixes the string formatting of the indexs
args:
index: str
string of format "(longitude, latitude)""
return:
longitude, latitude: Floats
"""
# fix the latitude and longitude
lonST, latST = index.split(", ")
return float(lonST[1:]), float(latST[:-1])
def _df_error(error):
"""
Takes in the error codes which are catogorical and encodes them into integers
"""
# ========== Keys for error messages ==========
keys = ({
"NANinCTSR.VI":1,
"NANinCTSR.RF":2,
"NANinCTSR.TM":3,
"AttributionFailed":4
})
# breakpoint()
if error in keys.keys():
return keys[error]
elif np.isnan(error):
return 0
else:
# unknow errror
return 5
def _combine_pvalue2d(pvals):
"""
takes an array and removes nans then combines
args:
pvalues: np.array
"""
if bn.allnan(pvals):
return np.NAN
elif np.sum(pvals) == 0:
return 0
else:
___, pv = sps.combine_pvalues(pvals[~np.isnan(pvals)])
return pv
#===================
# ==============================================================================
if __name__ == '__main__':
description='Arguments that can be passed to the saving and plotting script'
parser = argparse.ArgumentParser(description=description)
parser.add_argument(
"-p", "--plots", action="store_false",
help="Quiet plots: if selected plots will not be displayed ")
parser.add_argument(
"-s", "--sig", action="store_true",
help="Significance: Apply a zero mask using FDR adjustment and the Benjamini/Hochberg method")
parser.add_argument(
"--method", type=str,choices=["fdr_bh", 'fdr_by'], default="fdr_bh", help="The method used to adjust for False Discovery Rate. must be fdr_bh or fdr_by")
parser.add_argument(
"--use_archived", type=int, default=None, help="Use this argument to redo archived infomation.json files")
args = parser.parse_args()
main(args)