Skip to content

Commit

Permalink
Harmonizing partial and spartial parameters #80
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin-Jung committed Nov 12, 2023
1 parent 14318ea commit 2eab72f
Show file tree
Hide file tree
Showing 17 changed files with 239 additions and 121 deletions.
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# ibis.iSDM 0.1.1 (current dev branch)

#### Minor improvements and bug fixes
* Several bug fixes in `thin_observations` and `global` argument for bias-method
* Several bug fixes in `thin_observations` and `global` argument for bias-method.
* Harmonization of parameters for `spartial()` and addressing #80

# ibis.iSDM 0.1.0

Expand Down
10 changes: 8 additions & 2 deletions R/engine_bart.R
Original file line number Diff line number Diff line change
Expand Up @@ -555,11 +555,17 @@ engine_bart <- function(x,
}
},
# Spatial partial dependence plot option from embercardo
spartial = function(self, x.var = NULL, equal = FALSE,
spartial = function(self, x.var = NULL, newdata = NULL, equal = FALSE,
smooth = 1, transform = TRUE, type = NULL){
fit <- self$get_data('fit_best')
model <- self$model
predictors <- model$predictors_object$get_data()
if(is.null(newdata)){
predictors <- model$predictors_object$get_data()
} else {
predictors <- newdata
assertthat::assert_that(x.var %in% colnames(predictors),
msg = 'Variable not in provided data!')
}
assertthat::assert_that(x.var %in% attr(fit$fit$data@x,'term.labels'),
msg = 'Variable not in predicted model' )

Expand Down
40 changes: 19 additions & 21 deletions R/engine_breg.R
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,7 @@ engine_breg <- function(x,
return(pred_part)
},
# Spatial partial dependence plot
spartial = function(self, x.var, constant = NULL, plot = TRUE, type = NULL){
spartial = function(self, x.var, constant = NULL, newdata = NULL, plot = TRUE, type = NULL){
assertthat::assert_that(is.character(x.var) || is.null(x.var),
"model" %in% names(self),
is.null(constant) || is.numeric(constant),
Expand All @@ -649,7 +649,15 @@ engine_breg <- function(x,

mod <- self$get_data('fit_best')
model <- self$model
df <- model$biodiversity[[length( model$biodiversity )]]$predictors

# Check if newdata is defined, if yes use that one instead
if(!is.null(newdata)){
df <- newdata
assertthat::assert_that(nrow(df) == nrow(model$biodiversity[[1]]$predictors))
} else {
# df <- model$biodiversity[[1]]$predictors
df <- model$predictors
}
df <- subset(df, select = attr(mod$terms, "term.labels"))
w <- model$biodiversity[[1]]$expect # Also get exposure variable

Expand All @@ -661,23 +669,20 @@ engine_breg <- function(x,
}

# Make spatial container for prediction
suppressWarnings(
df_partial <- sp::SpatialPointsDataFrame(coords = model$predictors[,c('x', 'y')],
data = model$predictors[, names(model$predictors) %notin% c('x','y')],
proj4string = sp::CRS(sp::proj4string(methods::as(model$background, "Spatial")))
)
)
df_partial <- methods::as(df_partial, 'SpatialPixelsDataFrame')
template <- model_to_background(model)
# Assign a cellid to df to match the file later
# df$cellid <-

# Add all others as constant
if(is.null(constant)){
for(n in names(df_partial)) if(n != x.var) df_partial[[n]] <- suppressWarnings( mean(model$predictors[[n]], na.rm = TRUE) )
for(n in names(df)) if(n != x.var) df[[n]] <- suppressWarnings( mean(model$predictors[[n]], na.rm = TRUE) )
} else {
for(n in names(df_partial)) if(n != x.var) df_partial[[n]] <- constant
for(n in names(df)) if(n != x.var) df[[n]] <- constant
}

if(any(model$predictors_types$type=="factor")){
lvl <- levels(model$predictors[[model$predictors_types$predictors[model$predictors_types$type=="factor"]]])
df_partial[[model$predictors_types$predictors[model$predictors_types$type=="factor"]]] <-
df[[model$predictors_types$predictors[model$predictors_types$type=="factor"]]] <-
factor(lvl[1], levels = lvl)
# FIXME: Assigning the first level (usually reference) for now. But ideally find a way to skip factors from partial predictions
}
Expand All @@ -687,7 +692,7 @@ engine_breg <- function(x,

pred_breg <- predict_boom(
obj = mod,
newdata = df_partial@data,
newdata = df,
w = unique(w)[2], # The second entry of unique contains the non-observed variables
fam = fam,
params = settings$data # Use the settings as list
Expand All @@ -704,14 +709,7 @@ engine_breg <- function(x,
pred_part$cv <- pred_part$sd / pred_part$mean

# Now create spatial prediction
prediction <- try({emptyraster( model$predictors_object$get_data()[[1]] )},silent = TRUE) # Background
if(inherits(prediction, "try-error")){
prediction <- terra::rast(model$predictors[,c("x", "y")],
crs = terra::crs(model$background),
type = "xyz") |>
emptyraster()
}
prediction <- fill_rasters(pred_part, prediction)
prediction <- fill_rasters(pred_part, template)

# Do plot and return result
if(plot){
Expand Down
77 changes: 40 additions & 37 deletions R/engine_gdb.R
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,7 @@ engine_gdb <- function(x,
# Compute end of computation time
settings$set('end.time', Sys.time())
# Also append boosting control option to settings
for(entry in names(params)) settings$set(entry, params[[entry]])
for(entry in names(bc)) settings$set(entry, bc[[entry]])

# Create output
Expand All @@ -504,7 +505,6 @@ engine_gdb <- function(x,
fits = list(
"fit_best" = fit_gdb,
"fit_cv" = cvm,
"params" = params,
"fit_best_equation" = equation,
"prediction" = prediction
),
Expand All @@ -517,14 +517,12 @@ engine_gdb <- function(x,
# Get model
mod <- self$get_data('fit_best')
model <- self$model
settings <- self$settings
assertthat::assert_that(inherits(mod,'mboost'),msg = 'No model found!')
if(is.null(type)) type <- self$get_data('params')$type
if(is.null(type)) type <- settings$get('type')
# Check that all variables are in provided data.frame
assertthat::assert_that(all( as.character(mboost::extract(mod,'variable.names')) %in% names(newdata) ))

# Also get settings for bias values
settings <- self$settings

# Clamp?
if( settings$get("clamp") ) newdata <- clamp_predictions(model, newdata)

Expand Down Expand Up @@ -570,7 +568,8 @@ engine_gdb <- function(x,
variables <- mboost::extract(self$get_data('fit_best'),'variable.names')
assertthat::assert_that( all( x.var %in% variables), msg = 'x.var variable not found in model!' )

if(is.null(type)) type <- self$get_data('params')$type
settings <- self$settings
if(is.null(type)) type <- settings$get('type')
model <- self$model

# Special treatment for factors
Expand Down Expand Up @@ -669,7 +668,7 @@ engine_gdb <- function(x,
return(out)
},
# Spatial partial effect plots
spartial = function(self, x.var, constant = NULL, plot = TRUE, type = NULL, ...){
spartial = function(self, x.var, constant = NULL, newdata = NULL, plot = TRUE, type = NULL, ...){
assertthat::assert_that('fit_best' %in% names(self$fits),
is.character(x.var), length(x.var) == 1)
# Get model and make empty template
Expand All @@ -678,43 +677,46 @@ engine_gdb <- function(x,
# Also check that what is present in coefficients of model
variables <- as.character( mboost::extract(mod,'variable.names') )
assertthat::assert_that(x.var %in% variables,
msg = "Variable not found in model!" )
msg = "Variable not found in model! Regularized out?" )

if(is.null(type)) type <- self$settings$get("type")
type <- match.arg(type, c("link", "response"), several.ok = FALSE)
settings$set("type", type)

# Make template of target variable(s)
template <- try({
emptyraster( self$model$predictors_object$get_data()[[1]] )},
silent = TRUE) # Background
if(inherits(template, "try-error")){
template <- terra::rast(model$predictors[,c("x", "y")],
crs = terra::crs(model$background),
type = "xyz") |>
emptyraster()
}
template <- model_to_background(model)

# Get target variables and predict
target <- model$predictors
if(!is.null(newdata)){
df <- newdata
} else {
df <- model$predictors
}
assertthat::assert_that(x.var %in% colnames(df),
msg = "Variable not found in provided data.")

# Set all variables other the target variable to constant
if(is.null(constant)){
# Calculate mean
nn <- model$predictors_types$predictors[which(model$predictors_types$type=='numeric')]
constant <- apply(target[,nn], 2, function(x) mean(x, na.rm=T))
constant <- apply(df[,nn], 2, function(x) mean(x, na.rm=T))
for(v in variables[ variables %notin% x.var]){
if(v %notin% names(target) ) next()
target[!is.na(target[v]),v] <- constant[v]
if(v %notin% colnames(df) ) next()
df[!is.na(df[v]),v] <- constant[v]
}
} else {
target[!is.na(target[,x.var]), variables] <- constant
df[!is.na(df[,x.var]), variables] <- constant
}
target$rowid <- as.numeric( rownames(target) )
assertthat::assert_that(nrow(target)==ncell(template))
df$rowid <- as.numeric( rownames(df) )
assertthat::assert_that(nrow(df)==ncell(template))

pp <- suppressWarnings(
mboost::predict.mboost(mod, newdata = target, which = x.var,
type = 'link', aggregate = 'sum')
mboost::predict.mboost(mod, newdata = df, which = x.var,
type = settings$get('type'), aggregate = 'sum')
)
# If both linear and smooth effects are in model
if(length(target$rowid[which(!is.na(target[[x.var]]))] ) == length(pp[,ncol(pp)])){
template[ target$rowid[which(!is.na(target[[x.var]]))] ] <- pp[,ncol(pp)]
if(length(df$rowid[which(!is.na(df[[x.var]]))] ) == length(pp[,ncol(pp)])){
template[ df$rowid[which(!is.na(df[[x.var]]))] ] <- pp[,ncol(pp)]
} else { template[] <- pp[, ncol(pp) ]}
names(template) <- paste0('partial__',x.var)

Expand Down Expand Up @@ -757,7 +759,8 @@ engine_gdb <- function(x,
cofs$variable <- gsub('bols\\(|bbs\\(|bmono\\(', '', cofs$variable)
cofs$variable <- sapply(strsplit(cofs$variable, ","), function(z) z[[1]])
cofs$variable <- gsub('\\)', '', cofs$variable)
names(cofs) <- c("Feature", "Weights", "Beta")
cofs <- cofs |> dplyr::select(variable, beta)
names(cofs) <- c("Feature", "Beta")
return(cofs)
},
# Spatial latent effect
Expand All @@ -773,25 +776,25 @@ engine_gdb <- function(x,
msg = 'No spatial effect found in model!')

# Make template of target variable(s)
temp <- emptyraster( model$predictors_object$get_data() )
template <- model_to_background(model)

# Get target variables and predict
target <- self$model$predictors[,c('x','y')]

y <- suppressWarnings(
mboost::predict.mboost(mod, newdata = target, which = c('x','y'))
)
assertthat::assert_that(nrow(target)==nrow(y))
temp[] <- y[,2]
names(temp) <- paste0('partial__','space')
template[] <- y[,2]
names(template) <- paste0('partial__','space')
# Mask with background
temp <- terra::mask(temp, self$model$background )
template <- terra::mask(template, model$background )

# Plot both partial spatial partial
if(plot){
# Plot both partial spatial partial
terra::plot(temp, main = expression(f[partial]), col = ibis_colours$divg_bluegreen )
terra::plot(template, main = expression(f[partial]), col = ibis_colours$divg_bluegreen )
}

return(temp)
return(template)

}
)
Expand Down
60 changes: 38 additions & 22 deletions R/engine_glmnet.R
Original file line number Diff line number Diff line change
Expand Up @@ -543,16 +543,19 @@ engine_glmnet <- function(x,
is.null(newdata) || is.data.frame(newdata),
is.numeric(variable_length)
)
check_package("pdp")
# Settings
settings <- self$settings

mod <- self$get_data('fit_best')
model <- self$model
df <- model$biodiversity[[length( model$biodiversity )]]$predictors
co <- stats::coef(mod) |> row.names() # Get model coefficient names
# Set type
if(is.null(type)) type <- self$settings$get("type")
type <- match.arg(type, c("link", "response"), several.ok = FALSE)
settings$set("type", type)

# Get data
df <- model$biodiversity[[length( model$biodiversity )]]$predictors

# Match x.var to argument
if(is.null(x.var)){
Expand Down Expand Up @@ -603,18 +606,31 @@ engine_glmnet <- function(x,
assertthat::assert_that(all( x.var %in% colnames(df) ),
msg = 'Variable not in predicted model.')

# HACK: Overwrite lambda to make sure pdp uses it.
mod$lambda.1se <- determine_lambda(mod)
# Inverse link function
ilf <- switch (settings$get('type'),
"link" = NULL,
"response" = ifelse(model$biodiversity[[1]]$family=='poisson',
exp, logistic)
)

pp <- data.frame()
pb <- progress::progress_bar$new(total = length(x.var))
for(v in x.var){
if(!is.Waiver(of)){
# Predict with offset
p1 <- pdp::partial(mod, pred.var = v, pred.grid = df2, ice = FALSE, center = FALSE,
p1 <- pdp::partial(mod, pred.var = v, pred.grid = df2,
ice = FALSE, center = FALSE,
type = "regression", newoffset = of,
inv.link = ilf,
plot = FALSE, rug = TRUE, train = df)
} else {
p1 <- pdp::partial(mod, pred.var = v, pred.grid = df2, ice = FALSE, center = FALSE,
type = "regression",
plot = FALSE, rug = TRUE, train = df)
p1 <- pdp::partial(mod, pred.var = v, pred.grid = df2,
ice = FALSE, center = FALSE,
type = "regression", inv.link = ilf,
plot = FALSE, rug = TRUE, train = df
)
}
p1 <- p1[,c(v, "yhat")]
names(p1) <- c("partial_effect", "mean")
Expand All @@ -636,10 +652,11 @@ engine_glmnet <- function(x,
return(pp)
},
# Spatial partial dependence plot
spartial = function(self, x.var, constant = NULL, plot = TRUE, type = NULL){
spartial = function(self, x.var, constant = NULL, newdata = NULL, plot = TRUE, type = NULL){
assertthat::assert_that(is.character(x.var),
"model" %in% names(self),
is.null(constant) || is.numeric(constant),
is.null(newdata) || is.data.frame(newdata),
is.logical(plot),
is.character(type) || is.null(type)
)
Expand All @@ -654,8 +671,15 @@ engine_glmnet <- function(x,
model <- self$model
# For Integrated model, take the last one
fam <- model$biodiversity[[length(model$biodiversity)]]$family
df <- model$predictors
df$w <- model$exposure

# If new data is set
if(!is.null(newdata)){
df <- newdata
} else {
df <- model$predictors
df$w <- model$exposure
}
assertthat::assert_that(all(x.var %in% colnames(df)))
df$rowid <- 1:nrow(df)
# Match x.var to argument
x.var <- match.arg(x.var, names(df), several.ok = FALSE)
Expand Down Expand Up @@ -687,21 +711,13 @@ engine_glmnet <- function(x,
) |> as.data.frame()

# Now create spatial prediction
prediction <- try({
emptyraster( model$predictors_object$get_data()[[1]] )},
silent = TRUE) # Background
if(inherits(prediction, "try-error")){
prediction <- terra::rast(model$predictors[,c("x", "y")],
crs = terra::crs(model$background),
type = "xyz") |>
emptyraster()
}
prediction[df_sub$rowid] <- pred_gn[,1]
names(prediction) <- paste0("spartial_",x.var)
template <- model_to_background(model)
template[df_sub$rowid] <- pred_gn[,1]
names(template) <- paste0("spartial_",x.var)

# Do plot and return result
if(plot) terra::plot(prediction, col = ibis_colours$ohsu_palette)
return(prediction)
if(plot) terra::plot(template, col = ibis_colours$ohsu_palette)
return(template)
},
# Convergence check
has_converged = function(self){
Expand Down
Loading

0 comments on commit 2eab72f

Please sign in to comment.