-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path05-loaloazip.R
421 lines (350 loc) · 12.8 KB
/
05-loaloazip.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
### Loa Loa Zip Model ###
## Load the data ##
library(TMB)
precompile()
library(geostatsp)
data(loaloa,package = "geostatsp")
library(aghq)
library(tmbstan)
# Set the resolution for the spatial interpolations:
# The results shown in the paper use:
# reslist <- list(nrow = 200,ncol = 400)
# but this takes a couple hours. Here I set:
reslist <- list(nrow = 50,ncol = 100)
# globalpath <- "/storage/phd/projects/aghq-softwarepaper/paper/"
# savepath <- paste0(globalpath,"data/")
savestamp <- "20210505-v1"
# plotpath <- paste0(globalpath,"figures/loaloazip/")
globalpath <- tempdir()
plotpath <- file.path(globalpath,"loaloazip")
if (!dir.exists(plotpath)) dir.create(plotpath)
savepath <- plotpath
file.copy(system.file('extsrc/05_loaloazip.cpp',package='aghq'),globalpath)
# Compile TMB template-- only need to do once
compile(file.path(globalpath,"05_loaloazip.cpp"))
dyn.load(dynlib(file.path(globalpath,"05_loaloazip")))
# Flags, which analysis to do?
doaghq <- TRUE
domcmc <- FALSE # Not included in software paper. Try it if you want! Took 66 hours and failed when I ran it.
dopostsamplingaghq <- TRUE
dopostsamplingmcmc <- FALSE
# Initialize time variables
aghqtime <- 0
mcmctime <- 0
aghqsimtime <- 0
mcmcsimtime <- 0
## Prepare the "inner" model ##
# Design matrices
Amat <- Diagonal(nrow(loaloa))
Xmat <- cbind(rep(1,nrow(Amat)))
# Design matrix: zip model and risk model are the same
design <- bdiag(
# ZIP
cbind(
Amat,
Xmat
),
# Risk
cbind(
Amat,
Xmat
)
)
# Response
y <- loaloa@data$y
N <- loaloa@data$N
## Dimensions
n <- nrow(Xmat) # Number of obs
p <- ncol(Xmat) * 2 # Number of betas
m <- ncol(Amat) * 2 # Number of spatial points
Wd <- ncol(design) # Number of total params
# Check
stopifnot(Wd == m + p)
## Prior distributions ##
# Use the same prior for both sets of Matern params
sigma_u <- 1
sigma_alpha <- .025
rho_u <- 2e05
rho_alpha <- .975
# PC Prior for kappa,tau
maternconstants <- list()
maternconstants$d <- 2 # Dimension of spatial field, fixed
maternconstants$nu <- 1 # Shape param, fixed
get_kappa <- function(sigma,rho) sqrt(8*maternconstants$nu)/rho
get_tau <- function(sigma,rho) sigma * get_kappa(sigma,rho)^(maternconstants$nu) * sqrt(gamma(maternconstants$nu + maternconstants$d/2) * (4*pi)^(maternconstants$d/2) / gamma(maternconstants$nu))
get_sigma <- function(kappa,tau) tau / (kappa^(maternconstants$nu) * sqrt(gamma(maternconstants$nu + maternconstants$d/2) * (4*pi)^(maternconstants$d/2) / gamma(maternconstants$nu)))
get_rho <- function(kappa,tau) sqrt(8*maternconstants$nu) / kappa
# Precision for betas
beta_prec <- .001
## Log Posterior ----
startingsig <- 1
startingrho <- 4.22*1e04
datlist <- list(
y = y,
N = N,
design = design,
nu = maternconstants$nu,
rho_u = rho_u,
rho_alpha = rho_alpha,
sigma_u = sigma_u,
sigma_alpha = sigma_alpha,
D = raster::pointDistance(loaloa,lonlat = FALSE),
betaprec = beta_prec
)
# NOTE: for some initial values of W, TMB's inner optimization seems to fail
# This was tried over a bunch of random intializations and most worked, and all
# gave the same optimum. But this is why we set the seed here and use a random start.
set.seed(4564)
paraminit <- list(
W = rnorm(ncol(design)),
logkappa = log(get_kappa(startingsig,startingrho)),
logtau = log(get_tau(startingsig,startingrho))
)
ff <- MakeADFun(data = datlist,
parameters = paraminit,
random = "W",
DLL = "05_loaloazip",
ADreport = FALSE,
silent = TRUE)
if (doaghq) {
tm <- Sys.time()
cat("Doing AGHQ, time = ",format(tm),"\n")
loaloazipquad <- aghq::marginal_laplace_tmb(
ff,
3,
startingvalue = c(paraminit$logkappa,paraminit$logtau)
)
aghqtime <- difftime(Sys.time(),tm,units = 'secs')
save(loaloazipquad,file = file.path(savepath,paste0("loaloazipquad",savestamp,".RData")))
cat("AGHQ took: ",format(aghqtime),"\n")
}
## MCMC ----
if (domcmc){
tm <- Sys.time()
cat("Doing MCMC, time = ",format(tm),"\n")
loaloazipmcmc <- tmbstan(
ff,
chains = 8,
cores = 8,
iter = 1e04,
warmup = 1e03,
init = Reduce(c,paraminit),
seed = 124698,
algorithm = "NUTS"
)
mcmctime <- difftime(Sys.time(),tm,units = 'secs')
save(loaloazipmcmc,file = file.path(savepath,paste0("loaloazipmcmc",savestamp,".RData")))
cat("MCMC took: ",format(mcmctime),"\n")
# NOTE: for run the week of 15/04/2021, got the following warnings with default
# settings and 8 chains with 10,000 iterations (incl 1,000 warmups):
# There were 238 divergent transitions after warmup.
# There were 673 transitions after warmup that exceeded the maximum treedepth.
pdf(file = paste0(plotpath,"mcmc-pairs-plot",savestamp,".pdf"))
pairs(loaloazipmcmc,pars = c("W[191]","W[381]","logkappa","logtau"))
dev.off()
}
simulate_spatial_fields <- function(U,
theta,
pointsdata,
resolution = list(nrow = 100,ncol = 100)) {
# U: matrix of samples, each column is a sample
# theta: data.frame of theta values
# Draw from U*|U
# Compute matrix of var, range, shape
modpar <- cbind(
var = get_sigma(exp(theta$theta1),exp(theta$theta2))^2,
range = get_rho(exp(theta$theta1),exp(theta$theta2)),
shape = maternconstants$nu
)
fielddat <- pointsdata
fielddat@data <- as.data.frame(U)
geostatsp::RFsimulate(
model = modpar,
data = fielddat,
x = raster(fielddat,nrows = resolution$nrow,ncols = resolution$ncol)
)
}
## Post samples ----
if (FALSE) {
# Load if in interactive mode (and you want to)
load(file.path(savepath,paste0("loaloazipquad",savestamp,".RData")))
load(file.path(savepath,paste0("loaloazipmcmc",savestamp,".RData")))
}
if (dopostsamplingaghq) {
cat("Doing AGHQ field simulations, time = ",format(tm),"\n")
loazippostsamples <- sample_marginal(loaloazipquad,100)
# Extract out the U and V
postU <- loazippostsamples$samps[c(1:190), ]
postV <- loazippostsamples$samps[c(192:381), ]
postBeta <- loazippostsamples$samps[c(191,382), ]
tm <- Sys.time()
fieldbrickzip <- simulate_spatial_fields(
postU,
loazippostsamples$theta,
loaloa,
resolution = reslist
)
fieldbrickrisk <- simulate_spatial_fields(
postV,
loazippostsamples$theta,
loaloa,
resolution = reslist
)
fieldbrickzip_withint <- fieldbrickzip + postBeta[1, ]
fieldbrickrisk_withint <- fieldbrickrisk + postBeta[2, ]
simfieldsmeanzip <- mean(1 / (1 + exp(-fieldbrickzip_withint)))
simfieldsmeanrisk <- mean(
(1 / (1 + exp(-fieldbrickzip_withint))) *
(1 / (1 + exp(-fieldbrickrisk_withint)))
)
aghqsimtime <- difftime(Sys.time(),tm,units = 'secs')
raster::writeRaster(fieldbrickzip,file.path(savepath,paste0("fieldbrickzipaghq",savestamp,".grd")),overwrite = TRUE)
raster::writeRaster(fieldbrickrisk,file.path(savepath,paste0("fieldbrickzipaghq",savestamp,".grd")),overwrite = TRUE)
cameroonBorderLL = raster::getData("GADM", country=c('CMR'), level=2)
nigeriaBorderLL = raster::getData("GADM", country=c('NGA'), level=2)
cameroonBorder = spTransform(cameroonBorderLL, projection(loaloa))
nigeriaBorder = spTransform(nigeriaBorderLL, projection(loaloa))
cameroonBorderouter <- rgeos::gUnaryUnion(cameroonBorder)
nigeriaBorderouter <- rgeos::gUnaryUnion(nigeriaBorder)
fullborder <- raster::bind(cameroonBorder,nigeriaBorder)
fullborderouter <- raster::bind(cameroonBorderouter,nigeriaBorderouter)
fullborder <- crop(fullborder,loaloa)
fullborderouter <- crop(fullborderouter,loaloa)
plot_loaloa <- function(plotraster,breaks) {
predcols <- mapmisc::colourScale(
plotraster,
breaks = breaks,
style = "fixed",
col = "Spectral",
rev = TRUE
)
plotraster <- mask(plotraster,fullborderouter)
mapmisc::map.new(loaloa)
plot(plotraster,
col = predcols$col,
breaks = predcols$breaks,
legend=FALSE, add=TRUE)
points(loaloa,pch = 4)
plot(fullborder,add = TRUE,border=mapmisc::col2html("black", 0.5), lwd=0.5)
plot(fullborderouter,add = TRUE)
mapmisc::legendBreaks('right', predcols, cex=1, bty='o',inset = .05)
}
br <- c(0,.05,.1,.15,.2,.25,.3,.4,.5,.6,1)
brzero <- c(.35,.5,.6,.7,.8,.9,.91,.92,.93,.94,.95,1)
pdf(file.path(plotpath,paste0("loaloa-zip-postmean.pdf")),width=7,height=7)
plot_loaloa(simfieldsmeanzip,brzero)
dev.off()
pdf(file.path(plotpath,paste0("loaloa-risk-postmean.pdf")),width=7,height=7)
plot_loaloa(simfieldsmeanrisk,br)
dev.off()
cat("AGHQ simulations took: ",format(aghqsimtime),"\n")
}
## Post samples ----
if (dopostsamplingmcmc) {
cat("Doing MCMC field simulations, time = ",format(tm),"\n")
samps <- as.data.frame(loaloazipmcmc)
# Only do 100
idx <- sample.int(nrow(samps),100)
postU <- t(samps[idx,c(1:190)])
postV <- t(samps[idx,c(192:381)])
postBeta <- samps[idx,c(191,382)]
theta <- samps[idx,c(383,384)]
colnames(theta) <- c('theta1','theta2')
tm <- Sys.time()
fieldbrickzip <- simulate_spatial_fields(
postU,
theta,
loaloa,
resolution = reslist
)
fieldbrickrisk <- simulate_spatial_fields(
postV,
theta,
loaloa,
resolution = reslist
)
fieldbrickzip_withint <- fieldbrickzip + postBeta[ ,1]
fieldbrickrisk_withint <- fieldbrickrisk + postBeta[ ,2]
simfieldsmeanzip <- mean(1 / (1 + exp(-fieldbrickzip_withint)))
simfieldsmeanrisk <- mean(
(1 / (1 + exp(-fieldbrickzip_withint))) *
(1 / (1 + exp(-fieldbrickrisk_withint)))
)
mcmcsimtime <- difftime(Sys.time(),tm,units = 'secs')
raster::writeRaster(fieldbrickzip,file.path(savepath,paste0("fieldbrickzipmcmc",savestamp,".grd")),overwrite = TRUE)
raster::writeRaster(fieldbrickrisk,file.path(savepath,paste0("fieldbrickzipmcmc",savestamp,".grd")),overwrite = TRUE)
cameroonBorderLL = raster::getData("GADM", country=c('CMR'), level=2)
nigeriaBorderLL = raster::getData("GADM", country=c('NGA'), level=2)
cameroonBorder = spTransform(cameroonBorderLL, projection(loaloa))
nigeriaBorder = spTransform(nigeriaBorderLL, projection(loaloa))
cameroonBorderouter <- rgeos::gUnaryUnion(cameroonBorder)
nigeriaBorderouter <- rgeos::gUnaryUnion(nigeriaBorder)
fullborder <- raster::bind(cameroonBorder,nigeriaBorder)
fullborderouter <- raster::bind(cameroonBorderouter,nigeriaBorderouter)
fullborder <- crop(fullborder,loaloa)
fullborderouter <- crop(fullborderouter,loaloa)
plot_loaloa <- function(plotraster,breaks) {
predcols <- mapmisc::colourScale(
plotraster,
breaks = breaks,
style = "fixed",
col = "Spectral",
rev = TRUE
)
plotraster <- mask(plotraster,fullborderouter)
mapmisc::map.new(loaloa)
plot(plotraster,
col = predcols$col,
breaks = predcols$breaks,
legend=FALSE, add=TRUE)
points(loaloa,pch = 4)
plot(fullborder,add = TRUE,border=mapmisc::col2html("black", 0.5), lwd=0.5)
plot(fullborderouter,add = TRUE)
mapmisc::legendBreaks('right', predcols, cex=1, bty='o',inset = .05)
}
br <- c(0,.05,.1,.15,.2,.25,.3,.4,.5,.6,1)
brzero <- c(.35,.5,.6,.7,.8,.9,.91,.92,.93,.94,.95,1)
pdf(file.path(plotpath,paste0("loaloa-zip-postmean-mcmc.pdf")),width=7,height=7)
plot_loaloa(simfieldsmeanzip,brzero)
dev.off()
pdf(file.path(plotpath,paste0("loaloa-risk-postmean-mcmc.pdf")),width=7,height=7)
plot_loaloa(simfieldsmeanrisk,br)
dev.off()
cat("MCMC simulations took: ",format(mcmcsimtime),"\n")
}
# Do the pairs plots
if (dopostsamplingaghq & dopostsamplingmcmc) {
set.seed(5678798)
# MCMC
mcmcbetasamps <- as.data.frame(loaloazipmcmc)[ ,c(191,382)]
pdf(file = file.path(plotpath,paste0("mcmcpairsplot-",savestamp,".pdf")),width = 7,height = 7)
pairs(loaloazipmcmc,pars = c("W[191]","W[382]"),text.panel = function(x,y,labels,cex,font,...) NULL)
dev.off()
# AGHQ
aghqbetasamps <- t(sample_marginal(loaloazipquad,nrow(mcmcbetasamps))$samps[c(191,382), ])
pdf(file = file.path(plotpath,paste0("aghqpairsplot-",savestamp,".pdf")),width = 7,height = 7)
panel.hist <- function(x, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h$breaks; nB <- length(breaks)
y <- h$counts; y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, col = "cyan", ...)
}
smoothscatterforhere <- function(x,y,...) {
smoothScatter(x,y,add = TRUE,nrpoints = 0)
}
pairs(aghqbetasamps,
panel = smoothscatterforhere,
diag.panel = panel.hist,
text.panel = function(x,y,labels,cex,font,...) NULL)
dev.off()
}
# Write the timing table
timingtable <- data.frame(
task = c("AGHQ","MCMC","AGHQSim","MCMCSim"),
time = c(aghqtime,mcmctime,aghqsimtime,mcmcsimtime)
)
readr::write_csv(timingtable,file.path(savepath,paste0("timing-table",savestamp,".csv")))
cat("All done!\n")