Skip to content

Commit

Permalink
playing around
Browse files Browse the repository at this point in the history
  • Loading branch information
symbalex committed Dec 13, 2024
1 parent 396b3b6 commit 331d9ba
Showing 1 changed file with 161 additions and 127 deletions.
288 changes: 161 additions & 127 deletions live_code_examples/casestudy1_kestrel.R
Original file line number Diff line number Diff line change
@@ -1,127 +1,161 @@
# Load packages
library(mvgam) # Fit, interrogate and forecast DGAMs
library(tidyverse) # Tidy and flexible data manipulation
library(ggplot2) # Flexible plotting

# Set up plotting environment
theme_set(theme_classic(base_size = 15,
base_family = 'serif'))
myhist = function(...){
geom_histogram(col = 'white',
fill = '#B97C7C', ...)
}
hist_theme = function(){
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank())
}

# Load the annual American kestrel, Falco sparverius, abundance time series
# taken in British Columbia, Canada. These data have been collected
# annually, corrected for changes in observer coverage and detectability,
# and logged. They can be found in the MARSS package
load(url('https://github.com/atsa-es/MARSS/raw/master/data/kestrel.rda'))
head(kestrel)

# Arrange the data into a data.frame
regions <- c("BC",
"Alb",
"Sask")
model_data <- do.call(rbind,
lapply(seq_along(regions),
function(x){
data.frame(year = kestrel[, 1],
# Reverse the logging so that we deal directly
# with the detection-adjusted counts
adj_count = exp(kestrel[, 1 + x]),
region = regions[x])})) %>%
# Add series and time indicators for mvgam modelling
dplyr::mutate(yearfac = as.factor(year),
region = as.factor(region),
series = as.factor(region),
time = year)

# Inspect modelling data structure
head(model_data)
dplyr::glimpse(model_data)
levels(model_data$series)

# Split the data into a training and a testing split
data_train <- model_data %>%
dplyr::filter(year <= 2001)
data_test <- model_data %>%
dplyr::filter(year > 2001)

# Plot the time series together
plot_mvgam_series(data = data_train,
y = 'adj_count',
series = 'all')

# Now plot features for just one series at a time
plot_mvgam_series(data = data_train,
newdata = data_test,
y = 'adj_count',
series = 1)
plot_mvgam_series(data = data_train,
newdata = data_test,
y = 'adj_count',
series = 2)
plot_mvgam_series(data = data_train,
newdata = data_test,
y = 'adj_count',
series = 3)

# Look at distribution of the outcome variable
ggplot(data_train,
aes(x = adj_count)) +
myhist() +
labs(x = 'Adjusted count', y = '') +
hist_theme()
summary(data_train$adj_count)

# Heavy-ish tail to the right; perhaps a Gamma distribution
# Inspect default priors for a simple model that only includes
# random intercepts for years, implying that all three series share the same
# year effects
?mvgam::mvgam_formulae
?mgcv::random.effects
?mgcv::gam.models
?mvgam::get_mvgam_priors

# 1. Inspect default priors for a GAM that includes region-specific
# intercepts and a random effect of yearfac

# 2. Update the prior for the fixed effect beta coefficients to
# Normal(0, 1), coded as std_normal() in Stan syntax, and fit the
# model

# 3. Inspect model summaries, diagnostics and estimated parameters
?mvgam::`mvgam-class`
?mvgam::mcmc_plot.mvgam
?mvgam::as.matrix.mvgam
?mvgam::pairs.mvgam

# 4. Inspect estimated conditional effects
?mvgam::conditional_effects.mvgam
?marginaleffects::plot_predictions
?marginaleffects::avg_predictions

# 5. Plot some posterior predictive checks
?mvgam::pp_check

# 6. Plot randomized quantile (Dunn-Smyth) residuals
?mvgam::plot.mvgam

# 7. Expand to a State-Space model with more appropriate nonlinear
# temporal effects
?mvgam::mvgam_formulae
?brms::gp
?mvgam::AR

# 8. Compare the two models using Approximate Leave-One-Out
# Cross-Validation
?mvgam::loo_compare

# 9. Compare forecast distributions from the two models
?mvgam::forecast
?mvgam::score
# Load packages
library(mvgam) # Fit, interrogate and forecast DGAMs
library(tidyverse) # Tidy and flexible data manipulation
library(ggplot2) # Flexible plotting

# Set up plotting environment
theme_set(theme_classic(base_size = 15,
base_family = 'serif'))
myhist = function(...){
geom_histogram(col = 'white',
fill = '#B97C7C', ...)
}
hist_theme = function(){
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank())
}

# Load the annual American kestrel, Falco sparverius, abundance time series
# taken in British Columbia, Canada. These data have been collected
# annually, corrected for changes in observer coverage and detectability,
# and logged. They can be found in the MARSS package
load(url('https://github.com/atsa-es/MARSS/raw/master/data/kestrel.rda'))
head(kestrel)

# Arrange the data into a data.frame
regions <- c("BC",
"Alb",
"Sask")
model_data <- do.call(rbind,
lapply(seq_along(regions),
function(x){
data.frame(year = kestrel[, 1],
# Reverse the logging so that we deal directly
# with the detection-adjusted counts
adj_count = exp(kestrel[, 1 + x]),
region = regions[x])})) %>%
# Add series and time indicators for mvgam modelling
dplyr::mutate(yearfac = as.factor(year),
region = as.factor(region),
series = as.factor(region),
time = year)

# Inspect modelling data structure
head(model_data)
dplyr::glimpse(model_data)
levels(model_data$series)

# Split the data into a training and a testing split
data_train <- model_data %>%
dplyr::filter(year <= 2001)
data_test <- model_data %>%
dplyr::filter(year > 2001)

# Plot the time series together
plot_mvgam_series(data = data_train,
y = 'adj_count',
series = 'all')

# Now plot features for just one series at a time
plot_mvgam_series(data = data_train,
newdata = data_test,
y = 'adj_count',
series = 1)
plot_mvgam_series(data = data_train,
newdata = data_test,
y = 'adj_count',
series = 2)
plot_mvgam_series(data = data_train,
newdata = data_test,
y = 'adj_count',
series = 3)

# Look at distribution of the outcome variable
ggplot(data_train,
aes(x = adj_count)) +
myhist() +
labs(x = 'Adjusted count', y = '') +
hist_theme()
summary(data_train$adj_count)

# Heavy-ish tail to the right; perhaps a Gamma distribution
# Inspect default priors for a simple model that only includes
# random intercepts for years, implying that all three series share the same
# year effects
?mvgam::mvgam_formulae
?mgcv::random.effects
?mgcv::gam.models
?mvgam::get_mvgam_priors

# 1. Inspect default priors for a GAM that includes region-specific
# intercepts and a random effect of yearfac

# AJ note - a random effect on yuear is kinda shit .....

p <- get_mvgam_priors(
formula = adj_count ~ series + s(yearfac, bs = "re")
, data = data_train
, family = Gamma()
)
p

# 2. Update the prior for the fixed effect beta coefficients to
# Normal(0, 1), coded as std_normal() in Stan syntax, and fit the
# model

mod1 <- mvgam(
formula = adj_count ~ series + s(yearfac, bs = "re")
, priors = c(prior(std_normal(), class = b)
, prior(exponential(2)))
, data = data_train
, newdata = data_test
, family = Gamma()
, share_obs_params = TRUE
, silent = 1
, run_model = TRUE
)
summary(mod1)
summary(mod1, include_betas = F, smooth_test = F)


coef(mod1)
conditional_effects(mod1)
# 3. Inspect model summaries, diagnostics and estimated parameters
?mvgam::`mvgam-class`
?mvgam::mcmc_plot.mvgam
?mvgam::as.matrix.mvgam
?mvgam::pairs.mvgam

# 4. Inspect estimated conditional effects
?mvgam::conditional_effects.mvgam
?marginaleffects::plot_predictions
?marginaleffects::avg_predictions

# 5. Plot some posterior predictive checks
?mvgam::pp_check

# 6. Plot randomized quantile (Dunn-Smyth) residuals
?mvgam::plot.mvgam
plot(mod1, type = "residuals", series = 1) # I think this ae converted to normal (?) - not much autocorrelation left
plot(mod1, type = "residuals", series = 2)

### AJ - can forecast - can't remember how

# 7. Expand to a State-Space model with more appropriate nonlinear
# temporal effects
?mvgam::mvgam_formulae
?brms::gp
?mvgam::AR

# intead of using pslines for time, use Gaussian PRocess (i need to figure out what this is)
# basicallhy uses time distance between all pairs of points to learn correlations\
# brms has GP function fir this

# 8. Compare the two models using Approximate Leave-One-Out
# Cross-Validation
?mvgam::loo_compare

# 9. Compare forecast distributions from the two models
?mvgam::forecast
?mvgam::score

0 comments on commit 331d9ba

Please sign in to comment.