diff --git a/CITATION.cff b/CITATION.cff index a18f98ee..c9c56099 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -8,7 +8,7 @@ message: 'To cite package "ibis.iSDM" in publications use:' type: software license: CC-BY-4.0 title: 'ibis.iSDM: Modelling framework for integrated biodiversity distribution scenarios' -version: 0.1.4 +version: 0.1.5 abstract: Integrated framework of modelling the distribution of species and ecosystems in a suitability framing. This package allows the estimation of integrated species distribution models (iSDM) based on several sources of evidence and provided presence-only @@ -76,6 +76,7 @@ references: given-names: Winston email: winston@stdout.org year: '2024' + doi: 10.32614/CRAN.package.R6 version: '>= 2.5.0' - type: software title: assertthat @@ -87,6 +88,7 @@ references: given-names: Hadley email: hadley@rstudio.com year: '2024' + doi: 10.32614/CRAN.package.assertthat version: '>= 0.2.0' - type: software title: doFuture @@ -100,7 +102,58 @@ references: email: henrikb@braju.com orcid: https://orcid.org/0000-0002-7579-5165 year: '2024' + doi: 10.32614/CRAN.package.doFuture version: '>= 0.12.2' +- type: software + title: future + abstract: 'future: Unified Parallel and Distributed Processing in R for Everyone' + notes: Imports + url: https://future.futureverse.org + repository: https://CRAN.R-project.org/package=future + authors: + - family-names: Bengtsson + given-names: Henrik + email: henrikb@braju.com + orcid: https://orcid.org/0000-0002-7579-5165 + year: '2024' + doi: 10.32614/CRAN.package.future + version: '>= 1.23.0' +- type: software + title: parallelly + abstract: 'parallelly: Enhancing the ''parallel'' Package' + notes: Imports + url: https://parallelly.futureverse.org + repository: https://CRAN.R-project.org/package=parallelly + authors: + - family-names: Bengtsson + given-names: Henrik + email: henrikb@braju.com + orcid: https://orcid.org/0000-0002-7579-5165 + year: '2024' + doi: 10.32614/CRAN.package.parallelly + version: '>= 1.30.0' +- type: software + title: parallel + abstract: 'R: A Language and Environment for Statistical Computing' + notes: Imports + authors: + - name: R Core Team + institution: + name: R Foundation for Statistical Computing + address: Vienna, Austria + year: '2024' +- type: software + title: foreach + abstract: 'foreach: Provides Foreach Looping Construct' + notes: Imports + url: https://github.com/RevolutionAnalytics/foreach + repository: https://CRAN.R-project.org/package=foreach + authors: + - name: Microsoft + - family-names: Weston + given-names: Steve + year: '2024' + doi: 10.32614/CRAN.package.foreach - type: software title: dplyr abstract: 'dplyr: A Grammar of Data Manipulation' @@ -125,30 +178,7 @@ references: email: davis@posit.co orcid: https://orcid.org/0000-0003-4777-038X year: '2024' -- type: software - title: foreach - abstract: 'foreach: Provides Foreach Looping Construct' - notes: Imports - url: https://github.com/RevolutionAnalytics/foreach - repository: https://CRAN.R-project.org/package=foreach - authors: - - name: Microsoft - - family-names: Weston - given-names: Steve - year: '2024' -- type: software - title: future - abstract: 'future: Unified Parallel and Distributed Processing in R for Everyone' - notes: Imports - url: https://future.futureverse.org - repository: https://CRAN.R-project.org/package=future - authors: - - family-names: Bengtsson - given-names: Henrik - email: henrikb@braju.com - orcid: https://orcid.org/0000-0002-7579-5165 - year: '2024' - version: '>= 1.23.0' + doi: 10.32614/CRAN.package.dplyr - type: software title: geodist abstract: 'geodist: Fast, Dependency-Free Geodesic Distance Calculations' @@ -162,6 +192,7 @@ references: - family-names: Sumner given-names: Michael D. year: '2024' + doi: 10.32614/CRAN.package.geodist - type: software title: ggplot2 abstract: 'ggplot2: Create Elegant Data Visualisations Using the Grammar of Graphics' @@ -201,6 +232,7 @@ references: name-particle: van den orcid: https://orcid.org/0000-0002-9335-7468 year: '2024' + doi: 10.32614/CRAN.package.ggplot2 - type: software title: graphics abstract: 'R: A Language and Environment for Statistical Computing' @@ -239,6 +271,7 @@ references: given-names: Mikael orcid: https://orcid.org/0000-0002-3542-2938 year: '2024' + doi: 10.32614/CRAN.package.Matrix - type: software title: ncdf4 abstract: 'ncdf4: Interface to Unidata netCDF (Version 4 or Earlier) Format Data @@ -250,17 +283,9 @@ references: - family-names: Pierce given-names: David email: dpierce@ucsd.edu + orcid: https://orcid.org/0000-0002-2453-9030 year: '2024' -- type: software - title: parallel - abstract: 'R: A Language and Environment for Statistical Computing' - notes: Imports - authors: - - name: R Core Team - institution: - name: R Foundation for Statistical Computing - address: Vienna, Austria - year: '2024' + doi: 10.32614/CRAN.package.ncdf4 - type: software title: posterior abstract: 'posterior: Tools for Working with Posterior Distributions' @@ -281,6 +306,7 @@ references: given-names: Aki email: Aki.Vehtari@aalto.fi year: '2024' + doi: 10.32614/CRAN.package.posterior - type: software title: sf abstract: 'sf: Simple Features for R' @@ -293,6 +319,7 @@ references: email: edzer.pebesma@uni-muenster.de orcid: https://orcid.org/0000-0001-8049-7069 year: '2024' + doi: 10.32614/CRAN.package.sf version: '>= 1.0' - type: software title: stars @@ -306,6 +333,7 @@ references: email: edzer.pebesma@uni-muenster.de orcid: https://orcid.org/0000-0001-8049-7069 year: '2024' + doi: 10.32614/CRAN.package.stars version: '>= 0.5' - type: software title: stats @@ -329,6 +357,7 @@ references: email: r.hijmans@gmail.com orcid: https://orcid.org/0000-0001-5872-2872 year: '2024' + doi: 10.32614/CRAN.package.terra version: '>= 1.7-10' - type: software title: tibble @@ -345,6 +374,7 @@ references: given-names: Hadley email: hadley@rstudio.com year: '2024' + doi: 10.32614/CRAN.package.tibble version: '>= 2.0.0' - type: software title: uuid @@ -356,10 +386,12 @@ references: - family-names: Urbanek given-names: Simon email: Simon.Urbanek@r-project.org + orcid: https://orcid.org/0000-0003-2297-1732 - family-names: Ts'o given-names: Theodore email: tytso@thunk.org year: '2024' + doi: 10.32614/CRAN.package.uuid - type: software title: utils abstract: 'R: A Language and Environment for Statistical Computing' @@ -393,6 +425,7 @@ references: - family-names: Heiberger given-names: Richard year: '2024' + doi: 10.32614/CRAN.package.abind - type: software title: BoomSpikeSlab abstract: 'BoomSpikeSlab: MCMC for Spike and Slab Regression' @@ -403,6 +436,7 @@ references: given-names: Steven L. email: steve.the.bayesian@gmail.com year: '2024' + doi: 10.32614/CRAN.package.BoomSpikeSlab version: '>= 1.2.4' - type: software title: Boruta @@ -418,6 +452,7 @@ references: - family-names: Rudnicki given-names: Witold Remigiusz year: '2024' + doi: 10.32614/CRAN.package.Boruta - type: software title: covr abstract: 'covr: Test Coverage for Packages' @@ -429,6 +464,7 @@ references: given-names: Jim email: james.f.hester@gmail.com year: '2024' + doi: 10.32614/CRAN.package.covr - type: software title: cubelyr abstract: 'cubelyr: A Data Cube ''dplyr'' Backend' @@ -440,6 +476,7 @@ references: given-names: Hadley email: hadley@rstudio.com year: '2024' + doi: 10.32614/CRAN.package.cubelyr - type: software title: dbarts abstract: 'dbarts: Discrete Bayesian Additive Regression Trees Sampler' @@ -458,6 +495,7 @@ references: given-names: Robert email: robert.mcculloch1@gmail.com year: '2024' + doi: 10.32614/CRAN.package.dbarts version: '>= 0.9-22' - type: software title: deldir @@ -468,18 +506,7 @@ references: - family-names: Turner given-names: Rolf year: '2024' -- type: software - title: doParallel - abstract: 'doParallel: Foreach Parallel Adaptor for the ''parallel'' Package' - notes: Suggests - url: https://github.com/RevolutionAnalytics/doparallel - repository: https://CRAN.R-project.org/package=doParallel - authors: - - family-names: Corporation - given-names: Microsoft - - family-names: Weston - given-names: Steve - year: '2024' + doi: 10.32614/CRAN.package.deldir - type: software title: ellipsis abstract: 'ellipsis: Tools for Working with ...' @@ -491,6 +518,7 @@ references: given-names: Hadley email: hadley@rstudio.com year: '2024' + doi: 10.32614/CRAN.package.ellipsis - type: software title: glmnet abstract: 'glmnet: Lasso and Elastic-Net Regularized Generalized Linear Models' @@ -514,6 +542,7 @@ references: - family-names: Yang given-names: James year: '2024' + doi: 10.32614/CRAN.package.glmnet version: '>= 4.1' - type: software title: glmnetUtils @@ -526,6 +555,7 @@ references: given-names: Hong email: hongooi73@gmail.com year: '2024' + doi: 10.32614/CRAN.package.glmnetUtils - type: software title: gnlm abstract: 'gnlm: Generalized Nonlinear Regression Models' @@ -540,6 +570,7 @@ references: given-names: Jim email: jlindsey@gen.unimaas.nl year: '2024' + doi: 10.32614/CRAN.package.gnlm - type: software title: geosphere abstract: 'geosphere: Spherical Trigonometry' @@ -551,6 +582,7 @@ references: given-names: Robert J. email: r.hijmans@gmail.com year: '2024' + doi: 10.32614/CRAN.package.geosphere - type: software title: inlabru abstract: 'inlabru: Bayesian Latent Gaussian Modelling using INLA and Extensions' @@ -566,6 +598,7 @@ references: given-names: Fabian E. email: bachlfab@gmail.com year: '2024' + doi: 10.32614/CRAN.package.inlabru version: '>= 2.10.0' - type: software title: fmesher @@ -579,6 +612,7 @@ references: email: finn.lindgren@gmail.com orcid: https://orcid.org/0000-0002-5833-2011 year: '2024' + doi: 10.32614/CRAN.package.fmesher version: '>= 0.1.7' - type: software title: igraph @@ -613,6 +647,7 @@ references: email: kirill@cynkra.com orcid: https://orcid.org/0000-0002-1416-3412 year: '2024' + doi: 10.32614/CRAN.package.igraph - type: software title: knitr abstract: 'knitr: A General-Purpose Package for Dynamic Report Generation in R' @@ -625,6 +660,7 @@ references: email: xie@yihui.name orcid: https://orcid.org/0000-0003-0645-5666 year: '2024' + doi: 10.32614/CRAN.package.knitr - type: software title: mboost abstract: 'mboost: Model-Based Boosting' @@ -649,6 +685,7 @@ references: given-names: Benjamin orcid: https://orcid.org/0000-0003-2810-3186 year: '2024' + doi: 10.32614/CRAN.package.mboost - type: software title: modEvA abstract: 'modEvA: Model Evaluation and Analysis' @@ -665,6 +702,7 @@ references: - family-names: R. given-names: Real year: '2024' + doi: 10.32614/CRAN.package.modEvA - type: software title: matrixStats abstract: 'matrixStats: Functions that Apply to Rows and Columns of Matrices (and @@ -677,6 +715,7 @@ references: given-names: Henrik email: henrikb@braju.com year: '2024' + doi: 10.32614/CRAN.package.matrixStats - type: software title: ncmeta abstract: 'ncmeta: Straightforward ''NetCDF'' Metadata' @@ -688,6 +727,7 @@ references: given-names: Michael email: mdsumner@gmail.com year: '2024' + doi: 10.32614/CRAN.package.ncmeta - type: software title: progress abstract: 'progress: Terminal Progress Bars' @@ -701,6 +741,7 @@ references: - family-names: FitzJohn given-names: Rich year: '2024' + doi: 10.32614/CRAN.package.progress - type: software title: pdp abstract: 'pdp: Partial Dependence Plots' @@ -713,6 +754,7 @@ references: email: greenwell.brandon@gmail.com orcid: https://orcid.org/0000-0002-8120-0084 year: '2024' + doi: 10.32614/CRAN.package.pdp - type: software title: rmarkdown abstract: 'rmarkdown: Dynamic Documents for R' @@ -756,6 +798,7 @@ references: email: rich@posit.co orcid: https://orcid.org/0000-0003-3925-190X year: '2024' + doi: 10.32614/CRAN.package.rmarkdown - type: software title: lubridate abstract: 'lubridate: Make Dealing with Dates a Little Easier' @@ -771,6 +814,7 @@ references: - family-names: Wickham given-names: Hadley year: '2024' + doi: 10.32614/CRAN.package.lubridate version: '>= 1.9.0' - type: software title: rstan @@ -799,6 +843,7 @@ references: email: badr@jhu.edu orcid: https://orcid.org/0000-0002-9808-2344 year: '2024' + doi: 10.32614/CRAN.package.rstan version: '>= 2.21.0' - type: software title: rstantools @@ -819,6 +864,7 @@ references: - family-names: Johnson given-names: Andrew year: '2024' + doi: 10.32614/CRAN.package.rstantools version: '>= 2.1.1' - type: software title: testthat @@ -831,6 +877,7 @@ references: given-names: Hadley email: hadley@posit.co year: '2024' + doi: 10.32614/CRAN.package.testthat version: '>= 3.0.0' - type: software title: xgboost @@ -880,4 +927,5 @@ references: given-names: Jiaming email: jm.yuan@outlook.com year: '2024' + doi: 10.32614/CRAN.package.xgboost diff --git a/DESCRIPTION b/DESCRIPTION index 7de32928..4d23f59f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ibis.iSDM Type: Package Title: Modelling framework for integrated biodiversity distribution scenarios -Version: 0.1.4 +Version: 0.1.5 Authors@R: c(person(given = "Martin", family = "Jung", @@ -32,16 +32,17 @@ Imports: R6 (>= 2.5.0), assertthat (>= 0.2.0), doFuture (>= 0.12.2), - dplyr, - foreach, future (>= 1.23.0), + parallelly (>= 1.30.0), + parallel, + foreach, + dplyr, geodist, ggplot2, graphics, methods, Matrix, ncdf4, - parallel, posterior, sf (>= 1.0), stars (>= 0.5), @@ -60,7 +61,6 @@ Suggests: cubelyr, dbarts (>= 0.9-22), deldir, - doParallel, ellipsis, glmnet (>= 4.1), glmnetUtils, diff --git a/NAMESPACE b/NAMESPACE index d4780c69..36c99004 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -105,8 +105,11 @@ export(get_ngbvalue) export(get_priors) export(get_rastervalue) export(ibis_dependencies) +export(ibis_enable_parallel) export(ibis_future) export(ibis_options) +export(ibis_set_strategy) +export(ibis_set_threads) export(interpolate_gaps) export(is.Id) export(is.Raster) @@ -119,8 +122,11 @@ export(mask.BiodiversityDatasetCollection) export(mask.BiodiversityScenario) export(mask.DistributionModel) export(mask.PredictorDataset) +export(modal) export(new_id) export(new_waiver) +export(nicheplot) +export(objects_size) export(partial) export(partial.DistributionModel) export(partial_density) @@ -140,6 +146,7 @@ export(rm_limits) export(rm_offset) export(rm_predictors) export(rm_priors) +export(run_parallel) export(run_stan) export(sanitize_names) export(scenario) @@ -166,3 +173,4 @@ import(terra) importFrom(foreach,"%do%") importFrom(foreach,"%dopar%") importFrom(stats,effects) +importFrom(utils,object.size) diff --git a/NEWS.md b/NEWS.md index 87003716..f834db9b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,18 @@ -# ibis.iSDM 0.1.4 (current dev branch) +# ibis.iSDM 0.1.5 (current dev branch) + +#### New features +* New visualization function `nicheplot()` to visualize suitability across 2 axes #87. +* Support for 'modal' value calculations in `ensemble()`. +* Support for 'superlearner' in `ensemble()`. +* Support for 'kmeans' derived threshold calculation in `threshold()` and `predictor_derivate()`. +* Support for future processing streamlined. See FAQ section for instructions #18. + +#### Minor improvements and bug fixes +* Now overwriting temporary data by default in `predictor_transform()` and similar functions. +* Minor :bug: fix related to misaligned thresholds and negative exponential kernels. +* :fire: :bug: fix for scenario projections that use different grain sizes than for inference. + +# ibis.iSDM 0.1.4 #### New features * Support for carnying over latent spatial effects (`add_latent_spatial()`) to `scenario()` projections. diff --git a/R/add_constraint.R b/R/add_constraint.R index bbde8db9..16a91756 100644 --- a/R/add_constraint.R +++ b/R/add_constraint.R @@ -382,8 +382,11 @@ methods::setMethod( # Divide alpha values by 2 alpha <- value/2 + # Scale value for different projections + value_scale <- ifelse(terra::is.lonlat(baseline_threshold), terra::res(baseline_threshold)[1] * 10000, 1) + # Grow baseline raster by using an exponentially weighted kernel - ras_dis <- terra::gridDist(baseline_threshold, target = 1) + ras_dis <- terra::gridDist(baseline_threshold, target = 1, scale = value_scale) # Normalized (with a constant) negative exponential kernel ras_dis <- terra::app(ras_dis, fun = function(x) (1 / (2 * pi * value ^ 2)) * exp(-x / value) ) # Equivalent to alpha = 1/value and @@ -847,12 +850,15 @@ methods::setMethod( # Rasterize the layer # First try and dig out a layer from a predictor dataset if found - if(inherits( mod$get_predictors(), "PredictorDataSet")){ - ras <- mod$get_predictors()$get_data() |> stars_to_raster() - ras <- ras[[1]] + if(inherits( mod$get_predictors(), "PredictorDataset")){ + ras <- mod$get_predictors()$get_data() + if(inherits(ras, 'stars')){ + ras <- stars_to_raster(ras)[[1]] + } } else { # Try and get the underlying model and its predictors ras <- mod$get_model()$get_data() + if(is.null(ras)) ras <- emptyraster(mod$get_model()$model$predictors_object$get_data()) } assertthat::assert_that(is.Raster(ras)) bb <- try({ terra::rasterize(layer, ras, 1)}, silent = TRUE) diff --git a/R/add_control_bias.R b/R/add_control_bias.R index 56cfd372..206495ed 100644 --- a/R/add_control_bias.R +++ b/R/add_control_bias.R @@ -77,7 +77,7 @@ #' estimating spatial sampling effort and habitat suitability for multiple species #' from opportunistic presence‐only data. Methods in Ecology and Evolution, 12(5), 933-945. #' -#' @seealso [add_control_extrapolation()] +#' @seealso [add_limits_extrapolation()] #' @keywords bias offset control #' @concept The spatial bias weighting was inspired by code in the \code{enmSdmX} package. #' diff --git a/R/add_predictors.R b/R/add_predictors.R index 43f020c2..eff56e8a 100644 --- a/R/add_predictors.R +++ b/R/add_predictors.R @@ -60,6 +60,7 @@ NULL #' * \code{'interaction'} - Add interacting predictors. Interactions need to be specified (\code{"int_variables"})! #' * \code{'thresh'} - Add threshold derivate predictors. #' * \code{'hinge'} - Add hinge derivate predictors. +#' * \code{'kmeans'} - Add k-means derived factors. #' * \code{'bin'} - Add predictors binned by their percentiles. #' #' @note @@ -130,7 +131,7 @@ methods::setMethod( assertthat::assert_that(inherits(x, "BiodiversityDistribution"), is.Raster(env), all(transform == 'none') || all( transform %in% c('pca', 'scale', 'norm', 'windsor') ), - all(derivates == 'none') || all( derivates %in% c('thresh', 'hinge', 'quadratic', 'bin', 'interaction') ), + all(derivates == 'none') || all( derivates %in% c('thresh', 'hinge', 'quadratic', 'bin', 'kmeans', 'interaction') ), is.vector(derivate_knots) || is.numeric(derivate_knots), is.null(names) || assertthat::is.scalar(names) || is.vector(names), is.logical(explode_factors), @@ -247,7 +248,7 @@ methods::setMethod( # Mask predictors with existing background layer if(bgmask){ - env <- terra::mask(env, mask = x$background) + env <- terra::mask(env, mask = x$background, overwrite = TRUE) # Reratify, work somehow only on stacks if(has_factors && any(is.factor(env)) ){ new_env <- env @@ -349,7 +350,7 @@ methods::setMethod( # If it is a raster if(is.Raster(x$background)){ # Check that background and range align, otherwise raise error - if(is_comparable_raster(layer, x$background)){ + if(!is_comparable_raster(layer, x$background)){ warning('Supplied range does not align with background! Aligning them now...') layer <- alignRasters(layer, x$background, method = 'bilinear', func = mean, cl = FALSE) } @@ -371,12 +372,12 @@ methods::setMethod( if(terra::global(ras1, "min", na.rm = TRUE) == terra::global(ras1, "max", na.rm = TRUE)){ o <- ras2 # Ensure that all layers have a minimum and a maximum - o[is.na(o)] <- 0; o <- terra::mask(o, x$background) + o[is.na(o)] <- 0; o <- terra::mask(o, x$background, overwrite = TRUE) names(o) <- c('elev_high') } else { o <- c(ras1, ras2) # Ensure that all layers have a minimum and a maximum - o[is.na(o)] <- 0; o <- terra::mask(o, x$background) + o[is.na(o)] <- 0; o <- terra::mask(o, x$background, overwrite = TRUE) names(o) <- c('elev_low', 'elev_high') } rm(ras1,ras2) @@ -552,7 +553,8 @@ methods::setMethod( # ras_range <- raster::rasterize(layer, temp, field = 1, background = NA) # } # } else { - ras_range <- terra::rasterize(layer, temp, field = 1, background = 0) + ras_range <- terra::rasterize(layer, temp, field = 1, + background = 0, overwrite = TRUE) # } # -------------- # @@ -564,8 +566,8 @@ methods::setMethod( names(dis) <- 'binary_range' } else if(method == 'distance'){ # Calculate the linear distance from the range - dis <- terra::gridDist(ras_range, target = 1) - dis <- terra::mask(dis, x$background) + dis <- terra::gridDist(ras_range, target = 1, overwrite = TRUE) + dis <- terra::mask(dis, x$background, overwrite = TRUE) # If max distance is specified if(!is.null(distance_max) && !is.infinite(distance_max)){ dis[dis > distance_max] <- NA # Set values above threshold to NA @@ -580,7 +582,7 @@ methods::setMethod( # Set NA to 0 and mask again dis[is.na(dis)] <- 0 - dis <- terra::mask(dis, x$background) + dis <- terra::mask(dis, x$background, overwrite = TRUE) names(dis) <- 'distance_range' } @@ -753,7 +755,7 @@ methods::setMethod( # names = names = NULL; transform = 'none'; derivates = 'none'; derivate_knots = 4; int_variables = NULL;harmonize_na = FALSE; state = NULL # Try and match transform and derivatives arguments transform <- match.arg(transform, c('none','pca', 'scale', 'norm', 'windsor', 'percentile'), several.ok = FALSE) # Several ok set to FALSE as states are not working otherwise - derivates <- match.arg(derivates, c('none','thresh', 'hinge', 'quadratic', 'bin', 'interaction'), several.ok = TRUE) + derivates <- match.arg(derivates, c('none','thresh', 'hinge', 'quadratic', 'bin', 'kmeans', 'interaction'), several.ok = TRUE) assertthat::validate_that(inherits(env,'stars'), msg = 'Projection rasters need to be stars stack!') assertthat::assert_that(inherits(x, "BiodiversityScenario"), @@ -767,8 +769,8 @@ methods::setMethod( assertthat::validate_that(length(env) >= 1) # Get model object - obj <- x$get_model() - assertthat::assert_that(!(is.null(obj) || is.Waiver(obj)), + obj <- x$get_model(copy = TRUE) + assertthat::assert_that(!(isFALSE(obj) || is.Waiver(obj)), msg = "No model object found in scenario?") model <- obj$model @@ -856,7 +858,7 @@ methods::setMethod( # Get variable names varn <- obj$get_coefficients()[,1] # Are there any derivates present in the coefficients? - if(any( length( grep("hinge_|bin_|quadratic_|thresh_|interaction_", varn ) ) > 0 )){ + if(any( length( grep("hinge_|bin_|kmeans_|quadratic_|thresh_|interaction_", varn ) ) > 0 )){ if(getOption('ibis.setupmessages', default = TRUE)) myLog('[Setup]','green','Creating predictor derivates...') for(dd in derivates){ if(any(grep(dd, varn))){ diff --git a/R/class-biodiversitydistribution.R b/R/class-biodiversitydistribution.R index e32ad92a..ae447dad 100644 --- a/R/class-biodiversitydistribution.R +++ b/R/class-biodiversitydistribution.R @@ -142,7 +142,7 @@ BiodiversityDistribution <- R6::R6Class( #' @description #' Specify new limits to the background #' @param x A [`list`] object with method and limit type. - #' @seealso [add_control_extrapolation()] + #' @seealso [add_limits_extrapolation()] #' @return This object. set_limits = function(x){ # Specify list diff --git a/R/class-biodiversityscenario.R b/R/class-biodiversityscenario.R index 4e764fec..6cf3c89e 100644 --- a/R/class-biodiversityscenario.R +++ b/R/class-biodiversityscenario.R @@ -103,6 +103,12 @@ BiodiversityScenario <- R6::R6Class( assertthat::assert_that(is.Waiver(self$get_predictors()) || inherits(self$get_predictors(), "PredictorDataset")) assertthat::assert_that(is.Waiver(self$get_data()) || (inherits(self$get_data(), "stars") || is.Raster(self$get_data())) ) assertthat::assert_that(is.Waiver(self$get_constraints()) || is.list(self$get_constraints())) + # Check predictor mismatch + if(!is.Waiver(self$get_predictors())){ + ori <- x$get_projection() + test <- self$get_projection() + assertthat::validate_that(sf::st_crs(test) == sf::st_crs(ori),msg = "Predictor and fitted predictor projections mismatch!") + } invisible(self) }, @@ -351,6 +357,15 @@ BiodiversityScenario <- R6::R6Class( return(self[[what]]) }, + #' @description + #' Remove scenario predictions + #' @param what A [`character`] vector with names of what + #' @return Invisible + rm_data = function(){ + self$scenarios <- new_waiver() + invisible() + }, + #' @description #' Set new data in object. #' @param x A new data object measuing scenarios. @@ -382,7 +397,6 @@ BiodiversityScenario <- R6::R6Class( #' Get latent factors if found in object. #' @return A [`list`] with the latent settings get_latent = function(){ - if(is.Waiver(self$latentfactors)) return('None') self$latentfactors }, diff --git a/R/engine_bart.R b/R/engine_bart.R index 06338f78..214f13d5 100644 --- a/R/engine_bart.R +++ b/R/engine_bart.R @@ -20,7 +20,7 @@ NULL #' sum-of-trees formulation (Default: \code{1000}). #' @param nburn A [`numeric`] estimate of the burn in samples (Default: \code{250}). #' @param chains A number of the number of chains to be used (Default: \code{4}). -#' @param type The mode used for creating posterior predictions. Either \code{"link"} +#' @param type The type used for creating posterior predictions. Either \code{"link"} #' or \code{"response"} (Default: \code{"response"}). #' @param ... Other options. #' @@ -95,12 +95,16 @@ engine_bart <- function(x, # Burn in the background template <- terra::rasterize(x$background, template, field = 0) + # mask template where all predictor layers are NA; change na.rm = FALSE for comeplete.cases + if (!is.Waiver(x$predictors)) template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + # Set up dbarts control with some parameters, rest default dc <- dbarts::dbartsControl(keepTrees = TRUE, # Keep trees n.burn = nburn, n.trees = iter, n.chains = chains, - n.threads = ifelse( dbarts::guessNumCores() < getOption('ibis.nthread'),dbarts::guessNumCores(),getOption('ibis.nthread')) + n.threads = ifelse( dbarts::guessNumCores() < getOption('ibis.nthread'), + dbarts::guessNumCores(), getOption('ibis.nthread')) ) # Other parameters # Set up the parameter list @@ -473,58 +477,49 @@ engine_bart <- function(x, full[[settings$get('bias_variable')[i]]] <- settings$get('bias_value')[i] } } - check_package("foreach") params <- self$get_data("params") - full$rowid <- 1:nrow(full) + if(is.Waiver(model$offset)) of <- NULL else of <- scales::rescale(model$offset[full$cellid, "spatial_offset"], to = c(1e-6, 1)) - # Tile the problem - splits <- cut(1:nrow(full), nrow(full) / min(nrow(full) / 4, 5000) ) + if(getOption("ibis.runparallel",default = FALSE)){ + check_package("doFuture") + if(!("doFuture" %in% loadedNamespaces()) || ('doFuture' %notin% utils::sessionInfo()$otherPkgs) ) { + try({requireNamespace('doFuture');attachNamespace("doFuture")},silent = TRUE) + } - # Get offset if existing - if(is.Waiver(model$offset)) of <- NULL else of <- scales::rescale(model$offset[full$cellid, "spatial_offset"], to = c(1e-6, 1)) + out <- predict_bart(obj = fit_bart, + newdata = full[, model$biodiversity[[1]]$predictors_names], + params = params, + w = w, + of = of, + # FIXME: Somehow parallel prediction do not work with dbarts (RNG) + # All estimates are 0.5 (random) + # By default turned off.... + run_future = FALSE, + N = NULL) - # Make a prediction - ms <- foreach::foreach(s = unique(splits), - .inorder = TRUE, - .combine = rbind, - .errorhandling = "stop", - .multicombine = TRUE, - .export = c("splits", "fit_bart", "full", "model", "params", "of"), - .packages = c("dbarts", "matrixStats")) %do% { - i <- which(splits == s) - - pred_bart <- predict(object = fit_bart, - newdata = full[i, model$biodiversity[[1]]$predictors_names], - type = params$type, - offset = of[i] - ) - # Summarize quantiles and sd from posterior - ms <- as.data.frame( - cbind( apply(pred_bart, 2, function(x) mean(x, na.rm = TRUE)), - matrixStats::colSds(pred_bart), - matrixStats::colQuantiles(pred_bart, probs = c(.05,.5,.95)), - apply(pred_bart, 2, mode) - ) - ) - names(ms) <- c("mean","sd", "q05", "q50", "q95", "mode") - ms$cv <- ms$sd / ms$mean - rm(pred_bart) - return( ms ) - } # End of processing - assertthat::assert_that(nrow(ms)>0, - nrow(ms) == nrow(full)) + } else { + out <- predict_bart(obj = fit_bart, + newdata = full[, model$biodiversity[[1]]$predictors_names], + params = params, + w = w, + of = of, + run_future = FALSE, + N = NULL) + } + # End of processing + assertthat::assert_that(nrow(out)>0) # Add them through a loop since the cellid changed prediction <- terra::rast() - for(post in names(ms)){ + for(post in names(out)){ prediction2 <- self$get_data('template') - prediction2[as.numeric(full$cellid)] <- ms[[post]]; names(prediction2) <- post + prediction2[as.numeric(full$cellid)] <- out[[post]]; names(prediction2) <- post suppressWarnings( prediction <- c(prediction, prediction2) ) rm(prediction2) } # plot(prediction$mean, col = ibis_colours$sdm_colour) - try({rm(ms, full)},silent = TRUE) + try({rm(out, full)},silent = TRUE) } else { # No prediction done prediction <- NULL @@ -676,7 +671,7 @@ engine_bart <- function(x, "q50" = matrixStats::rowQuantiles(pred_bart, probs = c(.5)), "median" = matrixStats::rowQuantiles(pred_bart, probs = c(.5)), "q95" = matrixStats::rowQuantiles(pred_bart, probs = c(.95)), - "mode" = apply(pred_bart, 1, mode), + "mode" = apply(pred_bart, 1, modal), "cv" <- matrixStats::rowSds(pred_bart) / matrixStats::rowMeans2(pred_bart) ) diff --git a/R/engine_breg.R b/R/engine_breg.R index 4499d168..be8c4ec9 100644 --- a/R/engine_breg.R +++ b/R/engine_breg.R @@ -81,6 +81,9 @@ engine_breg <- function(x, # Burn in the background template <- terra::rasterize(x$background, template, field = 0) + # mask template where all predictor layers are NA; change na.rm = FALSE for comeplete.cases + if (!is.Waiver(x$predictors)) template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + # Set up the parameter list params <- list( iter = iter, @@ -444,55 +447,30 @@ engine_breg <- function(x, w_full_sub <- w_full[full_sub$rowid] assertthat::assert_that((nrow(full_sub) == length(w_full_sub)) || is.null(w_full_sub) ) - # Tile the problem - splits <- cut(1:nrow(full_sub), nrow(full_sub) / (min(100, nrow(full_sub) / 10)) ) - # Now depending on parallization setting use foreach if(getOption("ibis.runparallel")){ - # Check that future is registered - if(!foreach::getDoParRegistered()) ibis_future(cores = getOption("ibis.nthread"), - strategy = getOption("ibis.futurestrategy")) - - # Run the outgoing command - # out <- foreach::foreach(s = unique(splits), - # .combine = rbind, - # .export = c("splits", "fit_breg", "full_sub", - # "w_full_sub", "fam", "params"), - # .packages = c("matrixStats"), - # .multicombine = TRUE, - # .inorder = TRUE, - # verbose = settings$get("verbose") ) %do% { - out <- parallel::mclapply(unique(splits), function(s) { - i <- which(splits == s) - # -> external code in utils-boom - pred_breg <- predict_boom( - obj = fit_breg, - newdata = full_sub[i,], - w = w_full_sub[i], - fam = fam, - params = params - ) - # Summarize the posterior - preds <- as.data.frame( - cbind( - matrixStats::rowMeans2(pred_breg, na.rm = TRUE), - matrixStats::rowSds(pred_breg, na.rm = TRUE), - matrixStats::rowQuantiles(pred_breg, probs = c(.05,.5,.95), na.rm = TRUE), - apply(pred_breg, 1, mode) - ) - ) - names(preds) <- c("mean", "sd", "q05", "q50", "q95", "mode") - preds$cv <- preds$sd / preds$mean - return(preds) - }) - out <- do.call(rbind, out) + check_package("doFuture") + if(!("doFuture" %in% loadedNamespaces()) || ('doFuture' %notin% utils::sessionInfo()$otherPkgs) ) { + try({requireNamespace('doFuture');attachNamespace("doFuture")},silent = TRUE) + } + # Prediction function + out <- predict_boom(obj = fit_breg, + newdata = full_sub, + w = w_full_sub, + fam = fam, + params = params, + run_future = TRUE, + N = NULL) + } else { + # Tile the problem + splits <- chunk_data(full_sub,N = (min(100, nrow(full_sub) / 10)), index_only = TRUE) + out <- data.frame() - pb <- progress::progress_bar$new(total = length(levels(unique(splits))), + pb <- progress::progress_bar$new(total = length(splits), format = "Creating model prediction (:spin) [:bar] :percent") - for(s in unique(splits)){ + for(i in splits){ pb$tick() - i <- which(splits == s) # -> external code in utils-boom pred_breg <- predict_boom( obj = fit_breg, @@ -506,7 +484,7 @@ engine_breg <- function(x, matrixStats::rowMeans2(pred_breg, na.rm = TRUE), matrixStats::rowSds(pred_breg, na.rm = TRUE), matrixStats::rowQuantiles(pred_breg, probs = c(.05,.5,.95), na.rm = TRUE), - apply(pred_breg, 1, mode) + apply(pred_breg, 1, modal) ) |> as.data.frame() names(preds) <- c("mean", "sd", "q05", "q50", "q95", "mode") preds$cv <- preds$sd / preds$mean @@ -515,7 +493,7 @@ engine_breg <- function(x, } } assertthat::assert_that(is.data.frame(out), nrow(out)>0, - msg = "Something went wrong withe prediction. Output empty!") + msg = "Something went wrong with the prediction. Output empty!") # Fill output with summaries of the posterior stk <- terra::rast() for(v in colnames(out)){ @@ -624,7 +602,7 @@ engine_breg <- function(x, matrixStats::rowMeans2(pred_breg, na.rm = TRUE), matrixStats::rowSds(pred_breg, na.rm = TRUE), matrixStats::rowQuantiles(pred_breg, probs = c(.05,.5,.95), na.rm = TRUE), - apply(pred_breg, 1, mode) + apply(pred_breg, 1, modal) ) |> as.data.frame() names(pred_part) <- c("mean", "sd", "q05", "q50", "q95", "mode") @@ -724,10 +702,12 @@ engine_breg <- function(x, pred_part <- cbind( matrixStats::rowMeans2(pred_breg, na.rm = TRUE), matrixStats::rowSds(pred_breg, na.rm = TRUE), - matrixStats::rowQuantiles(pred_breg, probs = c(.05,.5,.95), na.rm = TRUE), - apply(pred_breg, 1, mode) + matrixStats::rowQuantiles(pred_breg, probs = c(.05,.5,.95), na.rm = TRUE) ) |> as.data.frame() - names(pred_part) <- c("mean", "sd", "q05", "q50", "q95", "mode") + names(pred_part) <- c("mean", "sd", "q05", "q50", "q95") + assertthat::assert_that(all(is.numeric(pred_part[,1])), + msg = "Posterior summarizing issue...?") + pred_part$mode <- apply(pred_breg, 1, modal) pred_part$cv <- pred_part$sd / pred_part$mean # Now create spatial prediction @@ -790,7 +770,7 @@ engine_breg <- function(x, mod <- self$get_data('fit_best') model <- self$model df <- newdata - df <- subset(df, select = attr(mod$terms, "term.labels")) + df <- subset(df, select = c("x", "y", attr(mod$terms, "term.labels"))) # Clamp? if( settings$get("clamp") ) df <- clamp_predictions(model, df) @@ -812,36 +792,52 @@ engine_breg <- function(x, # For Integrated model, take the last one fam <- model$biodiversity[[length(model$biodiversity)]]$family - # Rather predict in steps than for the whole thing - out <- data.frame() + if(getOption("ibis.runparallel",default = FALSE)){ + check_package("doFuture") + if(!("doFuture" %in% loadedNamespaces()) || ('doFuture' %notin% utils::sessionInfo()$otherPkgs) ) { + try({requireNamespace('doFuture');attachNamespace("doFuture")},silent = TRUE) + } + # Prediction function + out <- predict_boom(obj = mod, + newdata = df_sub, + w = unique(w)[2], + fam = fam, + params = settings$data, + run_future = TRUE, + N = NULL) + } else { + # Sequential prediction + # Rather predict in steps than for the whole thing + out <- data.frame() - # Tile the problem - splits <- cut(1:nrow(df_sub), nrow(df_sub) / (min(100, nrow(df_sub) / 10)) ) + # Tile the problem + splits <- cut(1:nrow(df_sub), nrow(df_sub) / (min(100, nrow(df_sub) / 10)) ) - pb <- progress::progress_bar$new(total = length(levels(unique(splits))), - format = "Projecting on new data (:spin) [:bar] :percent") - for(s in unique(splits)){ - pb$tick() - i <- which(splits == s) - # -> external code in utils-boom - pred_breg <- predict_boom( - obj = mod, - newdata = df_sub[i,], - w = unique(w)[2], - fam = fam, - params = settings$data - ) - # Summarize the posterior - preds <- cbind( - matrixStats::rowMeans2(pred_breg, na.rm = TRUE), - matrixStats::rowSds(pred_breg, na.rm = TRUE), - matrixStats::rowQuantiles(pred_breg, probs = c(.05,.5,.95), na.rm = TRUE), - apply(pred_breg, 1, mode) - ) |> as.data.frame() - names(preds) <- c("mean", "sd", "q05", "q50", "q95", "mode") - preds$cv <- preds$sd / preds$mean - out <- rbind(out, preds) - rm(preds, pred_breg) + pb <- progress::progress_bar$new(total = length(levels(unique(splits))), + format = "Projecting on new data (:spin) [:bar] :percent") + for(s in unique(splits)){ + pb$tick() + i <- which(splits == s) + # -> external code in utils-boom + pred_breg <- predict_boom( + obj = mod, + newdata = df_sub[i,], + w = unique(w)[2], + fam = fam, + params = settings$data + ) + # Summarize the posterior + preds <- cbind( + matrixStats::rowMeans2(pred_breg, na.rm = TRUE), + matrixStats::rowSds(pred_breg, na.rm = TRUE), + matrixStats::rowQuantiles(pred_breg, probs = c(.05,.5,.95), na.rm = TRUE), + apply(pred_breg, 1, modal) + ) |> as.data.frame() + names(preds) <- c("mean", "sd", "q05", "q50", "q95", "mode") + preds$cv <- preds$sd / preds$mean + out <- rbind(out, preds) + rm(preds, pred_breg) + } } # Now create spatial prediction @@ -857,8 +853,9 @@ engine_breg <- function(x, type = "xyz") |> emptyraster() }, silent = TRUE) - prediction[] <- out[,layer] + prediction[df_sub$rowid] <- out[,layer] } + names(prediction) <- layer return(prediction) },overwrite = TRUE) diff --git a/R/engine_gdb.R b/R/engine_gdb.R index 1305bfbe..c537fbd2 100644 --- a/R/engine_gdb.R +++ b/R/engine_gdb.R @@ -31,6 +31,10 @@ NULL #' baselearners via [add_latent_spatial] or the specification of monotonically #' constrained priors via [GDBPrior]. #' +#' @note +#' The coefficients resulting from gdb with poipa data (Binomial) are only 0.5 +#' of the typical coefficients of a logit model obtained via glm (see Binomial). +#' #' @returns An engine. #' #' @references @@ -99,6 +103,9 @@ engine_gdb <- function(x, # Burn in the background template <- terra::rasterize(background, template, field = 0) + # mask template where all predictor layers are NA; change na.rm = FALSE for comeplete.cases + if (!is.Waiver(x$predictors)) template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + # Set up boosting control bc <- mboost::boost_control(mstop = iter, nu = learning_rate, @@ -476,13 +483,41 @@ engine_gdb <- function(x, } } - # Make a prediction - suppressWarnings( - pred_gdb <- mboost::predict.mboost(object = fit_gdb, newdata = full, - type = self$get_data('params')$type, - aggregate = 'sum', - offset = full$w) - ) + if(getOption("ibis.runparallel",default = FALSE)){ + check_package("doFuture") + if(!("doFuture" %in% loadedNamespaces()) || ('doFuture' %notin% utils::sessionInfo()$otherPkgs) ) { + try({requireNamespace('doFuture');attachNamespace("doFuture")},silent = TRUE) + } + + # Chunk the data + splits <- chunk_data(full, N = getOption("ibis.nthread",default = 10), index_only = TRUE) + + pred_gdb <- foreach::foreach(s = splits, + .combine = "rbind", + .inorder = TRUE, + .options.future = list(seed = TRUE, + packages = c("mboost")) + ) %dofuture% { + # Make a prediction + suppressWarnings( + mboost::predict.mboost(object = fit_gdb, + newdata = full[s,], + type = self$get_data('params')$type, + aggregate = 'sum', + offset = full$w[s]) + ) + } + + } else { + # Make a prediction + suppressWarnings( + pred_gdb <- mboost::predict.mboost(object = fit_gdb, newdata = full, + type = self$get_data('params')$type, + aggregate = 'sum', + offset = full$w) + ) + } + # Fill output prediction[as.numeric(full$cellid)] <- pred_gdb[,1] names(prediction) <- 'mean' @@ -534,11 +569,36 @@ engine_gdb <- function(x, # Subset to non-missing data newdata_sub <- subset(newdata, stats::complete.cases(newdata)) - # Predict - y <- suppressWarnings( - mboost::predict.mboost(object = mod, newdata = newdata_sub, - type = type, aggregate = 'sum') - ) + if(getOption("ibis.runparallel",default = FALSE)){ + check_package("doFuture") + if(!("doFuture" %in% loadedNamespaces()) || ('doFuture' %notin% utils::sessionInfo()$otherPkgs) ) { + try({requireNamespace('doFuture');attachNamespace("doFuture")},silent = TRUE) + } + # Chunk the data + splits <- chunk_data(newdata_sub, N = getOption("ibis.nthread",default = 10), + index_only = TRUE) + + y <- foreach::foreach(s = splits, + .combine = "rbind", + .inorder = TRUE, + .options.future = list(seed = TRUE, + packages = c("mboost")) + ) %dofuture% { + # Make a prediction + suppressWarnings( + mboost::predict.mboost(object = mod, + newdata = newdata_sub[s,], + type = type, + aggregate = 'sum') + ) + } + } else { + # Predict + y <- suppressWarnings( + mboost::predict.mboost(object = mod, newdata = newdata_sub, + type = type, aggregate = 'sum') + ) + } # Make empty template if(nrow(newdata)==nrow(model$predictors)){ @@ -553,7 +613,7 @@ engine_gdb <- function(x, type = "xyz") |> emptyraster() }, silent = TRUE) - prediction[] <- y[,1] + prediction[as.numeric(newdata_sub$rowid)] <- y[,1] } names(prediction) <- "mean" # Rename to mean, layer parameter gets ignored for this engine return(prediction) @@ -627,8 +687,10 @@ engine_gdb <- function(x, } # Now predict with model + # MH: There seems to be a problem if variables have some name overlap e.g., dbh and dbh_cv + # because in this case two columns are returned which is an issue later suppressWarnings(pp <- mboost::predict.mboost(object = self$get_data('fit_best'), - newdata = dummy_temp, which = v, + newdata = dummy_temp, which = which(names(dummy_temp) == v), type = type, aggregate = 'sum')) # If bbs is present and non-linear, use bbs estimate. If model is fitted diff --git a/R/engine_glm.R b/R/engine_glm.R index b585176c..fc52220f 100644 --- a/R/engine_glm.R +++ b/R/engine_glm.R @@ -28,6 +28,11 @@ NULL #' This engine is essentially a wrapper for [stats::glm.fit()], however with customized #' settings to support offsets and weights. #' +#' If \code{"optim_hyperparam"} is set to \code{TRUE} in [`train()`], then a AIC +#' based step-wise (backwards) model selection is performed. +#' Generally however [`engine_glmnet`] should be the preferred package for models +#' with more than \code{>3} covariates. +#' #' @returns An [Engine]. #' #' @references @@ -43,6 +48,7 @@ NULL #' #' # Add GLM as an engine #' x <- distribution(background) |> engine_glm() +#' print(x) #' #' @name engine_glm NULL @@ -84,6 +90,9 @@ engine_glm <- function(x, # Burn in the background template <- terra::rasterize(x$background, template, field = 0) + # mask template where all predictor layers are NA; change na.rm = FALSE for comeplete.cases + if (!is.Waiver(x$predictors)) template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + # Specify default control if(is.null(control)){ control <- stats::glm.control() @@ -327,29 +336,29 @@ engine_glm <- function(x, all(w >= 0,na.rm = TRUE) ) # --- # - # Determine the optimal lambda through k-fold cross-validation - if(getOption("ibis.runparallel")){ - if(!foreach::getDoParRegistered()) ibis_future(cores = getOption("ibis.nthread"), - strategy = getOption("ibis.futurestrategy")) - } - # Depending if regularized should be set, specify this separately + # Fit Base model + suppressWarnings( + fit_glm <- try({ + stats::glm(formula = form, + data = df, + weights = w, # Case weights + family = fam, + na.action = "na.pass", + control = params$control + ) + },silent = FALSE) + ) + if(inherits(fit_glm, "try-error")) stop("Model failed to converge with provided input data!") if( (settings$get('optim_hyperparam')) ){ - if(getOption('ibis.setupmessages', default = TRUE)) myLog('[Estimation]','yellow', - 'No hyperparameter optimization for glm implemented!') - } else { + if(getOption('ibis.setupmessages', default = TRUE)) myLog('[Estimation]','green', + 'Running step-wise AIC selection for glm!') suppressWarnings( - fit_glm <- try({ - stats::glm(formula = form, - data = df, - weights = w, # Case weights - family = fam, - na.action = "na.pass", - control = params$control - ) - },silent = FALSE) + fit_glm <- stats::step(fit_glm, + direction = "backward", + trace = ifelse(getOption('ibis.setupmessages', default = TRUE),1,0) + ) ) } - if(inherits(fit_glm, "try-error")) stop("Model failed to converge with provided input data!") # --- # # Predict spatially @@ -368,20 +377,57 @@ engine_glm <- function(x, } # Make a subset of non-na values full$rowid <- 1:nrow(full) - full_sub <- subset(full, stats::complete.cases(full)) - w_full_sub <- w_full[full_sub$rowid] - assertthat::assert_that((nrow(full_sub) == length(w_full_sub)) || is.null(w_full_sub) ) # Attempt prediction - out <- try({ - stats::predict.glm(object = fit_glm, - newdata = full, - type = params$type, - se.fit = TRUE, - na.action = "na.pass", - weights = w_full + if( getOption('ibis.runparallel',default = FALSE) ){ + check_package("doFuture") + if(!("doFuture" %in% loadedNamespaces()) || ('doFuture' %notin% utils::sessionInfo()$otherPkgs) ) { + try({requireNamespace('doFuture');attachNamespace("doFuture")},silent = TRUE) + } + + # Prediction function + do_run <- function() { + # Chunck the data + splits <- chunk_data(full, N = getOption("ibis.nthread"),index_only = TRUE) + + y <- foreach::foreach(s = splits, + .inorder = TRUE + # .options.future = list(globals = )) + ) %dofuture% { + stats::predict.glm(object = fit_glm, + newdata = full[s,], + type = params$type, + se.fit = TRUE, + na.action = "na.pass", + weights = w_full[s,] + ) + } + y + } + # Run + result <- do_run() + # Combine all + # FIXME: hacky list flattener, but works. Reduce and do.call failed + out <- list() + for(k in 1:length(result)){ + out[['fit']] <- c(out[['fit']], result[[k]]$fit) + out[['se.fit']] <- c(out[['se.fit']], result[[k]]$se.fit) + } + # Security check + assertthat::assert_that( + length(out$fit) == nrow(full) ) - },silent = TRUE) + } else { + out <- try({ + stats::predict.glm(object = fit_glm, + newdata = full, + type = params$type, + se.fit = TRUE, + na.action = "na.pass", + weights = w_full + ) + },silent = TRUE) + } if(!inherits(out,"try-error")){ # Fill output with summaries of the posterior prediction <- fill_rasters(out |> as.data.frame(), @@ -392,7 +438,7 @@ engine_glm <- function(x, } else { stop("GLM prediction failed!") } - try({rm(out, full, full_sub)},silent = TRUE) + try({rm(out, full)},silent = TRUE) } else { # No prediction done prediction <- NULL @@ -649,29 +695,66 @@ engine_glm <- function(x, if(!is.Waiver(model$offset)) ofs <- model$offset else ofs <- NULL assertthat::assert_that(nrow(df)>0) - if(is.null(ofs)){ - pred_glm <- stats::predict.glm( - object = mod, - newdata = df, - weights = df$w, # The second entry of unique contains the non-observed variables - se.fit = FALSE, - na.action = "na.pass", - fam = fam, - type = type - ) |> as.data.frame() + # Run in parallel if specified or not. + if( getOption('ibis.runparallel',default = FALSE) ){ + check_package("doFuture") + if(!("doFuture" %in% loadedNamespaces()) || ('doFuture' %notin% utils::sessionInfo()$otherPkgs) ) { + try({requireNamespace('doFuture');attachNamespace("doFuture")},silent = TRUE) + } + + # Prediction function + do_run <- function() { + # Chunck the data + splits <- chunk_data(df, N = getOption("ibis.nthread",default = 10),index_only = TRUE) + + y <- foreach::foreach(s = splits, + .inorder = TRUE + # .options.future = list(globals = )) + ) %dofuture% { + stats::predict.glm(object = mod, + newdata = df[s,], + type = type, + se.fit = FALSE, + na.action = "na.pass", + weights = df$w[s] + ) + } + y + } + # Run + result <- do_run() + # Combine all + # FIXME: hacky list flattener, but works. Reduce and do.call failed + pred_glm <- list() + for(k in 1:length(result)){ + pred_glm[['fit']] <- c(pred_glm[['fit']], result[[k]]$fit) + } + pred_glm <- as.data.frame(pred_glm) + } else { - pred_glm <- stats::predict.glm( - object = mod, - newdata = df, - weights = df$w, # The second entry of unique contains the non-observed variables - offset = ofs, - se.fit = FALSE, - na.action = "na.pass", - fam = fam, - type = type - ) |> as.data.frame() + if(is.null(ofs)){ + pred_glm <- stats::predict.glm( + object = mod, + newdata = df, + weights = df$w, # The second entry of unique contains the non-observed variables + se.fit = FALSE, + na.action = "na.pass", + fam = fam, + type = type + ) |> as.data.frame() + } else { + pred_glm <- stats::predict.glm( + object = mod, + newdata = df, + weights = df$w, # The second entry of unique contains the non-observed variables + offset = ofs, + se.fit = FALSE, + na.action = "na.pass", + fam = fam, + type = type + ) |> as.data.frame() + } } - names(pred_glm) <- layer assertthat::assert_that(nrow(pred_glm)>0, nrow(pred_glm) == nrow(df)) diff --git a/R/engine_glmnet.R b/R/engine_glmnet.R index 0c8895ae..e10ab760 100644 --- a/R/engine_glmnet.R +++ b/R/engine_glmnet.R @@ -114,6 +114,9 @@ engine_glmnet <- function(x, # Burn in the background template <- terra::rasterize(x$background, template, field = 0) + # mask template where all predictor layers are NA; change na.rm = FALSE for comeplete.cases + if (!is.Waiver(x$predictors)) template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + # Set up the parameter list params <- list( alpha = alpha, @@ -498,32 +501,76 @@ engine_glmnet <- function(x, w_full_sub <- w_full[full_sub$rowid] assertthat::assert_that((nrow(full_sub) == length(w_full_sub)) || is.null(w_full_sub) ) - # Attempt prediction - if(inherits(cv_gn, "cv.glmnet")){ - out <- predict(object = cv_gn, - newdata = full_sub, - weights = w_full_sub, - newoffset = ofs_pred[full_sub$rowid], - s = determine_lambda(cv_gn), # Determine the best lambda value - type = params$type - ) + if(getOption("ibis.runparallel",default = FALSE)){ + check_package("doFuture") + if(!("doFuture" %in% loadedNamespaces()) || ('doFuture' %notin% utils::sessionInfo()$otherPkgs) ) { + try({requireNamespace('doFuture');attachNamespace("doFuture")},silent = TRUE) + } + # Chunk the data + splits <- chunk_data(full_sub, N = getOption("ibis.nthread",default = 10), + index_only = TRUE) + + lambda <- determine_lambda(cv_gn) + + out <- foreach::foreach(s = splits, + .combine = "rbind", + .inorder = TRUE, + .options.future = list(seed = TRUE, + packages = c("glmnet", "glmnetUtils")) + ) %dofuture% { + # Make a prediction + if(inherits(cv_gn, "cv.glmnet")){ + out <- predict(object = cv_gn, + newdata = full_sub[s,], + weights = w_full_sub[s], + newoffset = ofs_pred[full_sub$rowid[s]], + s = lambda, # Determine the best lambda value + type = params$type + ) + } else { + # Assume cva.glmnet + out <- predict( + object = cv_gn, + newdata = full_sub[s,], + alpha = cv_gn$alpha, + weights = w_full_sub[s], + newoffset = ofs_pred[full_sub$rowid[s]], + s = lambda, # Determine the best lambda value + type = params$type + ) + # Determine best model based on cross-validated loss + # ind <- which.min( sapply(cv_gn$modlist, function(z) min(z$cvup)) ) + # cv_gn <- cv_gn$modlist[[ind]] + } + return(out) + } } else { - # Assume cva.glmnet - out <- predict( - object = cv_gn, - newdata = full_sub, - alpha = cv_gn$alpha, - weights = w_full_sub, - newoffset = ofs_pred[full_sub$rowid], - s = determine_lambda(cv_gn), # Determine the best lambda value - type = params$type - ) - # Determine best model based on cross-validated loss - # ind <- which.min( sapply(cv_gn$modlist, function(z) min(z$cvup)) ) - # cv_gn <- cv_gn$modlist[[ind]] + # Attempt prediction + if(inherits(cv_gn, "cv.glmnet")){ + out <- predict(object = cv_gn, + newdata = full_sub, + weights = w_full_sub, + newoffset = ofs_pred[full_sub$rowid], + s = determine_lambda(cv_gn), # Determine the best lambda value + type = params$type + ) + } else { + # Assume cva.glmnet + out <- predict( + object = cv_gn, + newdata = full_sub, + alpha = cv_gn$alpha, + weights = w_full_sub, + newoffset = ofs_pred[full_sub$rowid], + s = determine_lambda(cv_gn), # Determine the best lambda value + type = params$type + ) + # Determine best model based on cross-validated loss + # ind <- which.min( sapply(cv_gn$modlist, function(z) min(z$cvup)) ) + # cv_gn <- cv_gn$modlist[[ind]] + } } - # Fill output with summaries of the posterior prediction[full_sub$rowid] <- out[,1] names(prediction) <- "mean" @@ -815,29 +862,75 @@ engine_glmnet <- function(x, if(!is.Waiver(model$offset)) ofs <- model$offset[df_sub$rowid] else ofs <- NULL assertthat::assert_that(nrow(df_sub)>0) - if(inherits(mod, "cv.glmnet")){ - pred_gn <- predict( - object = mod, - newdata = df_sub, - weights = df_sub$w, # The second entry of unique contains the non-observed variables - newoffset = ofs, - na.action = "na.pass", - s = determine_lambda(mod), # Determine best available lambda - fam = fam, - type = type - ) |> as.data.frame() + if(getOption("ibis.runparallel",default = FALSE)){ + check_package("doFuture") + if(!("doFuture" %in% loadedNamespaces()) || ('doFuture' %notin% utils::sessionInfo()$otherPkgs) ) { + try({requireNamespace('doFuture');attachNamespace("doFuture")},silent = TRUE) + } + # Chunk the data + splits <- chunk_data(df_sub, N = getOption("ibis.nthread",default = 10), + index_only = TRUE) + lambda <- determine_lambda(mod) + + pred_gn <- foreach::foreach(s = splits, + .combine = "rbind", + .inorder = TRUE, + .options.future = list(seed = TRUE, + packages = c("glmnet", "glmnetUtils")) + ) %dofuture% { + # Make a prediction + if(inherits(mod, "cv.glmnet")){ + ms <- predict( + object = mod, + newdata = df_sub[s,], + weights = df_sub$w[s], # The second entry of unique contains the non-observed variables + newoffset = ofs[s], + na.action = "na.pass", + s = lambda, # Determine best available lambda + fam = fam, + type = type + ) |> as.data.frame() + } else { + ms <- predict( + object = mod, + newdata = df_sub[s,], + alpha = mod$alpha[s], + weights = df_sub$w[s], # The second entry of unique contains the non-observed variables + newoffset = ofs[s], + na.action = "na.pass", + s = lambda, # Determine the best lambda value + fam = fam, + type = type + ) + } + return(ms) + } + } else { - pred_gn <- predict( - object = mod, - newdata = df_sub, - alpha = mod$alpha, - weights = df_sub$w, # The second entry of unique contains the non-observed variables - newoffset = ofs, - na.action = "na.pass", - s = determine_lambda(mod), # Determine the best lambda value - fam = fam, - type = type - ) + if(inherits(mod, "cv.glmnet")){ + pred_gn <- predict( + object = mod, + newdata = df_sub, + weights = df_sub$w, # The second entry of unique contains the non-observed variables + newoffset = ofs, + na.action = "na.pass", + s = determine_lambda(mod), # Determine best available lambda + fam = fam, + type = type + ) |> as.data.frame() + } else { + pred_gn <- predict( + object = mod, + newdata = df_sub, + alpha = mod$alpha, + weights = df_sub$w, # The second entry of unique contains the non-observed variables + newoffset = ofs, + na.action = "na.pass", + s = determine_lambda(mod), # Determine the best lambda value + fam = fam, + type = type + ) + } } names(pred_gn) <- layer assertthat::assert_that(nrow(pred_gn)>0, nrow(pred_gn) == nrow(df_sub)) @@ -855,7 +948,7 @@ engine_glmnet <- function(x, type = "xyz") |> emptyraster() }, silent = TRUE) - prediction[] <- pred_gn[, layer] + prediction[df_sub$rowid] <- pred_gn[, layer] } return(prediction) diff --git a/R/engine_scampr.R b/R/engine_scampr.R index 7b36c0a7..e3dedd1f 100644 --- a/R/engine_scampr.R +++ b/R/engine_scampr.R @@ -98,6 +98,9 @@ engine_scampr <- function(x, # Burn in the background template <- terra::rasterize(x$background, template, field = 0) + # mask template where all predictor layers are NA; change na.rm = FALSE for comeplete.cases + if (!is.Waiver(x$predictors)) template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + # Set up the parameter list params <- list( type = type, @@ -796,7 +799,7 @@ engine_scampr <- function(x, type = "xyz") |> emptyraster() }, silent = TRUE) - prediction[] <- out + prediction[df_sub$rowid] <- out } names(prediction) <- layer } else { diff --git a/R/engine_stan.R b/R/engine_stan.R index 2e6647ae..d178eeb1 100644 --- a/R/engine_stan.R +++ b/R/engine_stan.R @@ -113,9 +113,13 @@ engine_stan <- function(x, # If predictor existing, use them template <- emptyraster(x$predictors$get_data() ) } + # Burn in the background template <- terra::rasterize(x$background, template, field = 0) + # mask template where all predictor layers are NA; change na.rm = FALSE for comeplete.cases + if (!is.Waiver(x$predictors)) template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + # Define new engine object of class eg <- Engine @@ -581,7 +585,7 @@ engine_stan <- function(x, newdata = full@data, offset = (full$w), family = fam, # Family - mode = self$stan_param$type # Type + type = self$stan_param$type # Type ) # Convert full to raster @@ -660,7 +664,7 @@ engine_stan <- function(x, newdata = full@data, offset = (full$w), family = fam, - mode = type # Linear predictor + type = type # Linear predictor ) # Fill output with summaries of the posterior @@ -766,7 +770,7 @@ engine_stan <- function(x, newdata = df_temp, offset = df_temp$w, family = fam, - mode = type) # Linear predictor + type = type) # Linear predictor # FIXME: Something wrong here I guess # Also attach the partial variable @@ -848,7 +852,7 @@ engine_stan <- function(x, newdata = df_partial@data, offset = df_partial$w, family = fam, - mode = type # Linear predictor + type = type # Linear predictor ) # Get container diff --git a/R/engine_xgboost.R b/R/engine_xgboost.R index 79c427a5..ff19e72a 100644 --- a/R/engine_xgboost.R +++ b/R/engine_xgboost.R @@ -115,6 +115,9 @@ engine_xgboost <- function(x, # Burn in the background template <- terra::rasterize(x$background, template, field = 0) + # mask template where all predictor layers are NA; change na.rm = FALSE for comeplete.cases + if (!is.Waiver(x$predictors)) template <- terra::mask(template, sum(x$predictors$get_data(), na.rm = TRUE)) + # Set up the parameter list params <- list( booster = booster, @@ -872,7 +875,8 @@ engine_xgboost <- function(x, if(nrow(newdata)==nrow(model$predictors)){ prediction <- try({model_to_background(model)}, silent = TRUE) prediction[] <- pred_xgb - prediction <- terra::mask(prediction, model$background) + mask_tmp <- sum(terra::rast(newdata_copy, crs = terra::crs(prediction))) + prediction <- terra::mask(prediction, mask_tmp) # make sure to only use nonNA cells } else { assertthat::assert_that(utils::hasName(newdata_copy,"x")&&utils::hasName(newdata_copy,"y"), msg = "Projection data.frame has no valid coordinates or differs in grain!") diff --git a/R/ensemble.R b/R/ensemble.R index a4c96603..5404ed35 100644 --- a/R/ensemble.R +++ b/R/ensemble.R @@ -35,6 +35,9 @@ #' standard deviation (\code{"sd"}), the average of all PCA axes except the #' first \code{"pca"}, the coefficient of variation (\code{"cv"}, Default) or #' the range between the lowest and highest value (\code{"range"}). +#' @param point A [`sf`] object containing observational data used for model training. Used +#' for method \code{'superlearner'} only (Default: \code{NULL}). +#' @param field_occurrence A [`character`] location of biodiversity point records (Default: \code{'observed'}). #' @param apply_threshold A [`logical`] flag (Default: \code{TRUE}) specifying #' whether threshold values should also be created via \code{"method"}. Only #' applies and works for [`DistributionModel`] and thresholds found. @@ -44,10 +47,13 @@ #' * \code{'median'} - Calculates the median of several predictions. #' * \code{'max'} - The maximum value across predictions. #' * \code{'min'} - The minimum value across predictions. +#' * \code{'mode'} - The mode/modal values as the most commonly occurring value. #' * \code{'weighted.mean'} - Calculates a weighted mean. Weights have to be supplied separately (e.g. TSS). #' * \code{'min.sd'} - Ensemble created by minimizing the uncertainty among predictions. #' * \code{'threshold.frequency'} - Returns an ensemble based on threshold frequency (simple count). Requires thresholds to be computed. #' * \code{'pca'} - Calculates a PCA between predictions of each algorithm and then extract the first axis (the one explaining the most variation). +#' * \code{'superlearner'} - Composites two predictions through a 'meta-model' fitted on top +#' (using a [`glm`] by default). Requires binomial data in current Setup. #' #' In addition to the different ensemble methods, a minimal threshold #' (\code{min.value}) can be set that needs to be surpassed for averaging. By @@ -94,14 +100,17 @@ NULL methods::setGeneric("ensemble", signature = methods::signature("..."), function(..., method = "mean", weights = NULL, min.value = NULL, layer = "mean", - normalize = FALSE, uncertainty = "cv", apply_threshold = TRUE) standardGeneric("ensemble")) + normalize = FALSE, uncertainty = "cv", + point = NULL, field_occurrence = 'observed', + apply_threshold = TRUE) standardGeneric("ensemble")) #' @rdname ensemble methods::setMethod( "ensemble", methods::signature("ANY"), function(..., method = "mean", weights = NULL, min.value = NULL, layer = "mean", - normalize = FALSE, uncertainty = "cv", apply_threshold = TRUE){ + normalize = FALSE, uncertainty = "cv", + point = NULL, field_occurrence = 'observed', apply_threshold = TRUE){ if(length(list(...))>1) { mc <- list(...) } else { @@ -135,11 +144,22 @@ methods::setMethod( ) # Check the method - method <- match.arg(method, c('mean', 'weighted.mean', 'median', 'max', 'min', + method <- match.arg(method, c('mean', 'weighted.mean', 'median', 'max', 'min','mode', + 'superlearner', 'threshold.frequency', 'min.sd', 'pca'), several.ok = FALSE) # Uncertainty calculation uncertainty <- match.arg(uncertainty, c('none','sd', 'cv', 'range', 'pca'), several.ok = FALSE) + # Method specific checks + if(method == "superlearner"){ + assertthat::assert_that(!is.null(point), + msg = "Ensemble method superlearner requires a specified point data.") + assertthat::assert_that(utils::hasName(point, field_occurrence), + msg = "Field occurrence not found in specified point data.") + assertthat::assert_that(dplyr::n_distinct(point[[field_occurrence]])==2, + msg = "Superlearner currently requires binomial data for ensembling!") + } + # --- # # For Distribution model ensembles if( all( sapply(mods, function(z) inherits(z, "DistributionModel")) ) ){ assertthat::assert_that(length(mods)>=2, # Need at least 2 otherwise this does not make sense @@ -184,6 +204,8 @@ methods::setMethod( new <- max(ras, na.rm = TRUE) } else if(method == 'min'){ new <- min(ras, na.rm = TRUE) + } else if(method == 'mode'){ + new <- terra::modal(ras, na.rm = TRUE) } else if(method == 'weighted.mean'){ new <- terra::weighted.mean( ras, w = weights, na.rm = TRUE) } else if(method == 'threshold.frequency'){ @@ -225,6 +247,23 @@ methods::setMethod( } else if(method == 'pca'){ # Calculate a pca on the layers and return the first axes new <- predictor_transform(ras, option = "pca", pca.var = 1)[[1]] + } else if(method == 'superlearner'){ + # Run a superlearner with the specified point data. + # Ensure that predictions have unique names + names(ras) <- paste0('model', 1:terra::nlyr(ras)) + ex <- terra::extract(ras, point, ID = FALSE) + ex <- cbind(point[,field_occurrence], ex) + fit <- stats::glm( + formula = paste(field_occurrence, "~", paste0(names(ras), collapse = ' + ')) |> stats::as.formula(), + family = stats::binomial(),data = ex + ) + # Now predict output with the meta-learner + new <- emptyraster(ras) + new[which(!is.na(ras[[1]])[])] <- terra::predict( + fit, ras, na.rm = FALSE, type = "response", + cores = getOption('ibis.nthread')) + attr(new, "superlearner.coefficients") <- stats::coef(fit) + try({ rm(ex,fit) },silent = TRUE) } # Rename @@ -275,6 +314,7 @@ methods::setMethod( method == "median" ~ median(ll_val, na.rm = TRUE), method == "max" ~ max(ll_val, na.rm = TRUE), method == "min" ~ min(ll_val, na.rm = TRUE), + method == "mode" ~ modal(ll_val, na.rm = TRUE), method == "weighted.mean" ~ weighted.mean(ll_val, w = weights, na.rm = TRUE), .default = mean(ll_val, na.rm = TRUE) ) @@ -338,12 +378,13 @@ methods::setMethod( new <- max(ras, na.rm = TRUE) } else if(method == 'min'){ new <- min(ras, na.rm = TRUE) + } else if(method == 'mode'){ + new <- terra::modal(ras, na.rm = TRUE) } else if(method == 'weighted.mean'){ new <- terra::weighted.mean( ras, w = weights, na.rm = TRUE) } else if(method == 'threshold.frequency'){ # Check that thresholds are available stop("This function does not (yet) work with directly provided Raster objects.") - } else if(method == 'min.sd'){ # If method 'min.sd' furthermore check that there is a sd object for all # of them @@ -362,6 +403,23 @@ methods::setMethod( } else if(method == 'pca'){ # Calculate a pca on the layers and return the first axes new <- predictor_transform(ras, option = "pca", pca.var = 1)[[1]] + } else if(method == 'superlearner'){ + # Run a superlearner with the specified point data. + # Ensure that predictions have unique names + names(ras) <- paste0('model', 1:terra::nlyr(ras)) + ex <- terra::extract(ras, point, ID = FALSE) + ex <- cbind(point[,field_occurrence], ex) + fit <- stats::glm( + formula = paste(field_occurrence, "~", paste0(names(ras), collapse = ' + ')) |> stats::as.formula(), + family = stats::binomial(),data = ex + ) + # Now predict output with the meta-learner + new <- emptyraster(ras) + new[which(!is.na(ras[[1]])[])] <- terra::predict( + fit, ras, na.rm = FALSE, type = "response", + cores = getOption('ibis.nthread')) + attr(new, "superlearner.coefficients") <- stats::coef(fit) + try({ rm(ex,fit) },silent = TRUE) } # Rename names(new) <- paste0("ensemble_", lyr) @@ -386,7 +444,7 @@ methods::setMethod( suppressWarnings( out <- c(out, new) ) } } - + # Check and return output assertthat::assert_that(is.Raster(out)) return(out) } else { @@ -450,6 +508,9 @@ methods::setMethod( } else if(method == 'min'){ out <- apply(lmat[,4:ncol(lmat)], # On the assumption that col 1-3 are coordinates+time 1, function(x) min(x, na.rm = TRUE)) + } else if(method == 'mode'){ + out <- apply(lmat[,4:ncol(lmat)], # On the assumption that col 1-3 are coordinates+time + 1, function(x) modal(x, na.rm = TRUE)) } else if(method == 'weighted.mean'){ out <- apply(lmat[,4:ncol(lmat)], # On the assumption that col 1-3 are coordinates+time 1, function(x) weighted.mean(x, w = weights, na.rm = TRUE)) @@ -461,6 +522,8 @@ methods::setMethod( stop("This has not been reasonably implemented in this context.") } else if(method == 'pca'){ stop("This has not been reasonably implemented in this context.") + } else if(method == 'superlearner'){ + stop("This has not been reasonably implemented in this context.") } # Add dimensions to output if(inherits(mods[[1]], "stars")){ @@ -497,6 +560,7 @@ methods::setMethod( method == "median" ~ median(ll_val, na.rm = TRUE), method == "max" ~ max(ll_val, na.rm = TRUE), method == "min" ~ min(ll_val, na.rm = TRUE), + method == "mode" ~ modal(ll_val, na.rm = TRUE), method == "weighted.mean" ~ weighted.mean(ll_val, w = weights, na.rm = TRUE), .default = mean(ll_val, na.rm = TRUE) ) diff --git a/R/ibis.iSDM-package.R b/R/ibis.iSDM-package.R index 24bdf49b..5a3654e1 100644 --- a/R/ibis.iSDM-package.R +++ b/R/ibis.iSDM-package.R @@ -29,6 +29,11 @@ globalVariables(c("background", "band", "bi_class", "bias", "geometry", #MJ: Added self here hoping that does not crash all methods. "self", + # Cores for parallel processing + "cores", + "%dofuture%", + # Global prediction function + "predict_boom", "id", "included", "i", "km", "vt", "V2", "limit", "lower", "layer", diff --git a/R/misc.R b/R/misc.R index bf767879..27c35425 100644 --- a/R/misc.R +++ b/R/misc.R @@ -141,55 +141,312 @@ ibis_dependencies <- function(deps = getOption("ibis.dependencies"), update = TR invisible() } -#' Options to set up ibis for parallel processing with future +#' Set the parallel processing flag to TRUE +#' @description +#' Small helper function to enable parallel processing. If set +#' to \code{TRUE}, then parallel inference (if supported by engines) and projection is +#' enabled across the package. +#' For enabling prediction support beyond sequential prediction see the [`ibis_future`] function. #' +#' @return Invisible +#' @seealso [future], [ibis_future] +#' @keywords misc +#' @export +ibis_enable_parallel <- function(){ + options('ibis.runparallel' = TRUE) + if(getOption('ibis.nthread')<2){ + myLog('[Setup]','yellow','Parallelization enabled but less than 2 nodes specified!') + } + invisible() +} + +#' Set the number of threads for parallel processing. +#' @description +#' Small helper function to respecify the strategy for parallel processing (Default: \code{'sequential'}). +#' @details +#' Currently supported strategies are: +#' +#' * \code{"sequential"} = Resolves futures sequentially in the current R process (Package default). +#' * \code{"multisession"} = Resolves futures asynchronously across \code{'cores'} sessions. +#' * \code{"multicore"} = Resolves futures asynchronously across on forked processes. Only works on UNIX systems! +#' * \code{"cluster"} = Resolves futures asynchronously in sessions on this or more machines. +#' * \code{"slurm"} = To be implemented: Slurm linkage via batchtools. +#' @param strategy A [`character`] with the strategy. +#' @return Invisible +#' @seealso [future], [ibis_future] +#' @keywords misc +#' @export +ibis_set_strategy <- function(strategy = "sequential"){ + assertthat::assert_that(is.character(strategy)) + + strategy <- match.arg(strategy, c("sequential", "multisession", "multicore", "cluster", "slurm"), + several.ok = FALSE) + options('ibis.futurestrategy' = strategy) + invisible() +} + +#' Set the threads for parallel processing. +#' @description +#' Small helper function to respecify the number of threads for parallel processing. +#' @param threads A [`numeric`] greater thna \code{0}. +#' @return Invisible +#' @seealso [future], [ibis_future_run] +#' @keywords misc +#' @export +ibis_set_threads <- function(threads = 2){ + assertthat::assert_that(is.numeric(threads), + threads >0) + options('ibis.nthread' = threads) + invisible() +} + +#' Internal function to enable (a)synchronous parallel processing +#' +#' @description +#' This function checks if parallel processing can be set up and enables it. +#' **Ideally this is done by the user for more control!** +#' In the package parallelization is usually only used for predictions and projections, +#' but not for inference in which case parallel inference should be handled by the engine. +#' @details +#' Currently supported strategies are: +#' +#' * \code{"sequential"} = Resolves futures sequentially in the current R process (Package default). +#' * \code{"multisession"} = Resolves futures asynchronously across \code{'cores'} sessions. +#' * \code{"multicore"} = Resolves futures asynchronously across on forked processes. Only works on UNIX systems! +#' * \code{"cluster"} = Resolves futures asynchronously in sessions on this or more machines. +#' * \code{"slurm"} = To be implemented: Slurm linkage via batchtools. +#' @note +#' The \code{'plan'} set by [future] exists after the function has been executed. +#' +#' If the aim is to parallize across many species, this is better done in a scripted solution. +#' Make sure not to parallize predictions within existing clusters to avoid out-of-memory +#' issues. +#' +#' @param plan_exists A [`logical`] check on whether an existing [`future`] plan exists (Default: \code{FALSE}). #' @param cores A [`numeric`] number stating the number of cores to use. #' @param strategy A [`character`] denoting the strategy to be used for future. #' See help of [`future`] for options. (Default: \code{"multisession"}). +#' @param workers An optional list of remote machines or workers, e.g. \code{"c(remote.server.org)"}. +#' Alternatively a \code{"cluster"} object can be provided. #' -#' @return None +#' @return Invisible #' #' @seealso [future] #' @keywords misc #' #' @examples #' \dontrun{ -#' # Starts future job -#' ibis_future(cores = 4) +#' # Starts future job. F in this case is a prediction function. +#' ibis_future(cores = 4, strategy = "multisession") #' } #' #' @export -ibis_future <- function(cores = getOption("ibis.nthread"), strategy = getOption("ibis.futurestrategy")) { +ibis_future <- function(plan_exists = FALSE, + cores = getOption("ibis.nthread",default = 2), + strategy = getOption("ibis.futurestrategy"), + workers = NULL + ) { assertthat::assert_that( + is.logical(plan_exists), is.numeric(cores), - is.character(strategy) + is.character(strategy), + is.null(workers) || is.vector(workers) || (inherits(workers, "ClusterFuture")) ) - check_package("future") - # Check that number of cores don't exceed what is possible - assertthat::assert_that(cores <= future::availableCores()) - strategy <- match.arg(strategy, c("sequential", "multisession", "multicore", "cluster", "remote"), - several.ok = FALSE) + # Check if plan exists, if not specify new plan + if(!plan_exists){ + # Check that number of cores don't exceed what is possible + assertthat::assert_that(cores <= parallelly::availableCores()[[1]]) - if(isTRUE(Sys.info()[["sysname"]] == "Windows")){ - if(strategy == "multicore") stop("Multicore is not supported on windows!") - } + strategy <- match.arg(strategy, c("sequential", "multisession", "multicore", "cluster", "slurm"), + several.ok = FALSE) + + # Check if parallel processing is enabled + assertthat::assert_that( + getOption("ibis.runparallel", default = FALSE), + msg = "Parallel processing not enabled. Run 'ibis_enable_parallel()' !" + ) - # Define plan based on formulated strategy - if(strategy == "remote"){ - #TODO: See if a testing environment could be found. - stop("TBD. Requires specific setup.") - #e.g. cl <- makeCluster(4, type = "MPI") - } else if(strategy == "sequential") { - future::plan(strategy = future::sequential()) - } else if(strategy == "multisession"){ - future::plan(strategy = future::multisession(workers = cores) ) - } else if(strategy == "multicore"){ - future::plan(strategy = future::multicore(workers = cores) ) - } else if(strategy == "cluster"){ - future::plan(strategy = future::cluster(workers = cores) ) + # Check that more 1 connection is available + if(strategy != "sequential"){ + assertthat::assert_that(parallelly::availableConnections()>1, + msg = "No further connections are available for parallel processing!") + } + + # isTRUE(Sys.info()[["sysname"]] == "Windows") + if(strategy == "multicore" && !parallelly::supportsMulticore()){ + if(getOption('ibis.setupmessages', default = TRUE)) myLog('[Setup]','yellow','Parallization multicore not supported om windows. Changing to multisession.') + strategy <- "multisession" + } + + if(is.null(cores)) cores <- 4 # Arbitrary nr of cores if somehow not found + if(is.null(workers)) workers <- cores # If no workers are found, use the cores + + # --- # + # Define plan based on formulated strategy + if(strategy == "slurm"){ + #TODO: See if a testing environment could be found. + stop("Not yet implemented") + #e.g. cl <- makeCluster(4, type = "MPI") + } else if(strategy == "sequential") { + future::plan(strategy = "sequential") + } else if(strategy == "multisession"){ + future::plan(strategy = "multisession", workers = cores) + } else if(strategy == "multicore"){ + future::plan(strategy = "multicore", workers = cores) + } else if(strategy == "cluster"){ + future::plan(strategy = "cluster", workers = cores) + } } - # Register the doFuture adapate - doFuture::registerDoFuture() + if(getOption('ibis.setupmessages', default = TRUE)) myLog('[Setup]','green','Specified parallel processing plan with strategy: ', strategy) invisible() } + +#' Internal helper function to split a data.frame into chucnks +#' +#' @param X A [`data.frame`] or [`matrix`] object to be split. +#' @param N A [`numeric`] with the number of chunks smaller than \code{`nrow(X)`}. +#' @param cores A [`numeric`] with the number of processing cores. Used when \code{N} is \code{NULL}. +#' @param index_only [`logical`] on whether only the indices or split X as list is returnsed (Default: \code{FALSE}). +#' @keywords internal +#' @returns A [`list`] object. +#' @examples +#' # Chunck example data into 4 subsets +#' chunk_data(datasets::airquality, N = 4) +#' @noRd +chunk_data <- function(X, N = NULL, cores = parallel::detectCores(), index_only = FALSE){ + assertthat::assert_that( + is.data.frame(X) || is.matrix(X), + nrow(X) > 1, + is.numeric(cores), + is.null(N) || is.numeric(N), + is.logical(index_only) + ) + + n_vars <- nrow(X) + # Use cores as N otherwise + if(is.null(N)) N <- cores + chunk_size <- ceiling(n_vars / N) + n_chunks <- ceiling(n_vars / chunk_size) + + if(index_only){ + chunk_list <- list() + } else { + chunk_list <- vector(length = n_chunks, mode = "list") + } + for (i in seq_len(n_chunks)) { + if ((chunk_size * (i - 1) + 1) <= n_vars) { + chunk <- (chunk_size * (i - 1) + 1):(min(c(chunk_size * + i, n_vars))) + if(index_only){ + o <- chunk + } else { + o <- X[chunk, ] |> as.data.frame() + colnames(o) <- colnames(X) + } + chunk_list[[i]] <- o + rm(o) + } + } + assertthat::assert_that(is.list(chunk_list)) + if(!index_only){ + assertthat::assert_that(sum(sapply(chunk_list, nrow)) == nrow(X), + msg = "Something went wrong with the data chunking...") + } + return(chunk_list) +} + +#' Parallel computation of function +#' +#' @description Some computations take considerable amount of time to execute. +#' This function provides a helper wrapper for running functions of the +#' [`apply`] family to specified outputs. +#' +#' @param X A [`list`], [`data.frame`] or [`matrix`] object to be fed to a single +#' core or parallel [apply] call. +#' @param FUN A [`function`] passed on for computation. +#' @param cores A [numeric] of the number of cores to use (Default: \code{1}). +#' @param approach [`character`] for the parallelization approach taken (Options: +#' \code{"parallel"} or \code{"future"}). +#' @param export_packages A [`vector`] with packages to export for use on parallel +#' nodes (Default: \code{NULL}). +#' @param ... Any other parameter passed on. +#' +#' @details By default, the [parallel] package is used for parallel computation, +#' however an option exists to use the [future] package instead. +#' +#' @keywords utils +#' +#' @examples +#' \dontrun{ +#' run_parallel(list, mean, cores = 4) +#' } +#' +#' @export +run_parallel <- function(X, FUN, cores = 1, approach = "future", export_packages = NULL, ...) { + assertthat::assert_that( + is.list(X) || is.data.frame(X) || is.matrix(X), + is.function(FUN), + is.numeric(cores), + is.null(export_packages) || is.character(export_packages) + ) + message("The run_parallel function is likely deprecated and is only kept for reference...") + + # Match approach + approach <- match.arg(approach, c("parallel", "future"), several.ok = FALSE) + + # Collect dots + dots <- list(...) + + if(!is.list(X)){ + # Convert input object to a list of split parameters + X <- chunk_data(X, cores = cores) + input_type = "data.frame" # Save to aggregate later again + } else { input_type = "list"} + + # Process depending on cores + if (cores == 1) { + out <- lapply(X, FUN, ...) + } else { + if(approach == "parallel"){ + # check_package('doParallel') + # require(foreach) + # isTRUE(Sys.info()[["sysname"]] == "Windows") + # Other operating systems + if(!isTRUE(Sys.info()[["sysname"]] == "Windows") && is.list(X)) { + out <- parallel::mclapply(X = X, FUN = FUN, mc.cores = cores, + ...) + } else { + # Other operating systems + cl <- parallel::makePSOCKcluster(cores) + on.exit(parallel::stopCluster(cl)) + if(!is.null(export_packages)){ + # Send all specified packages to the cluster + for(val in export_packages){ + parallel::clusterExport(cl, varlist = val, + envir = as.environment(asNamespace(val))) + } + } + out <- parallel::parLapply(cl = cl, X = X, fun = FUN, ...) + } + # out <- foreach::foreach(z = iterators::iter(X), + # .combine = ifelse(input_type!="list", "rbind", foreach:::defcombine), + # .inorder = FALSE, + # .multicombine = TRUE, + # .errorhandling = 'stop', + # .export = c("FUN"), + # .packages = export_packages, + # ... + # ) %dopar% { return( FUN(z, ...) ) } + } else { + # Check that future is loaded + check_package("future") + ibis_future() + } + } + # If input data was not a list, combine again + if(input_type != "list" && is.list(out)){ + out <- do.call(rbind, out) + } + return( out ) +} diff --git a/R/plot.R b/R/plot.R index 546d15fb..80d73302 100644 --- a/R/plot.R +++ b/R/plot.R @@ -57,7 +57,7 @@ plot.Engine <- function(x,...) x$plot(...) #' @export plot.BiodiversityScenario <- function(x,...) x$plot(...) -#' Bivariate plot wrapper for distribution objects +#' Bivariate prediction plot for distribution objects #' #' @description Often there is an intention to display not only the predictions #' made with a SDM, but also the uncertainty of the prediction. Uncertainty be @@ -127,7 +127,7 @@ methods::setMethod( is.character(col) || is.vector(col), is.null(title) || is.character(title), is.null(fname) || is.character(fname), - isTRUE(plot) || is.character(fname) + isTRUE(plot) ) # Check whether object is a raster, otherwise extract object if(is.Raster(mod)){ @@ -217,3 +217,307 @@ methods::setMethod( return(finalPlot) } ) + +#' Niche plot for distribution objects +#' +#' @description +#' The suitability of any given area for a biodiversity feature can in +#' many instances be complex and non-linear. Visualizing obtained suitability +#' predictions (e.g. from [`train()`]) against underlying predictors might help +#' to explain the underlying gradients of the niche. +#' +#' Supported Inputs for this function are either single trained \code{ibis.iSDM} +#' [`DistributionModel`] objects or alternatively a set of three [`SpatRaster`] objects. +#' In both cases, users can specify \code{"xvar"} and \code{"yvar"} explicitly +#' or leave them empty. In the latter case a principal component analysis (PCA) +#' is conducted on the full environmental stack (loaded from [`DistributionModel`] +#' or supplied separately). +#' +#' @param mod A trained [`DistributionModel`] or alternatively a [`SpatRaster`] +#' object with \code{prediction} model within. +#' @param xvar A [`character`] denoting the predictor on the x-axis. Alternatively a [`SpatRaster`] +#' object can be provided. +#' @param yvar A [`character`] denoting the predictor on the y-axis. Alternatively a [`SpatRaster`] +#' object can be provided. +#' @param envvars A [`SpatRaster`] object containing all environmental variables. Only +#' used if \code{xvar} and \code{yvar} is empty (Default: \code{NULL}). +#' @param overlay_data A [`logical`] on whether training data should be overlaid +#' on the plot. Only used for [`DistributionModel`] objects (Default: \code{FALSE}). +#' @param plot A [`logical`] indication of whether the result is to be plotted +#' (Default: \code{TRUE})? +#' @param fname A [`character`] specifying the output file name a created figure +#' should be written to. +#' @param title Allows to respecify the title through a [`character`] (Default: \code{NULL}). +#' @param pal An optional [`vector`] with continuous custom colours (Default: \code{NULL}). +#' @param ... Other engine specific parameters. +#' +#' @return Saved niche plot in \code{'fname'} if specified, otherwise plot. +#' +#' @seealso [partial], [plot.DistributionModel] +#' @keywords misc +#' @examples +#' # Make quick prediction +#' background <- terra::rast(system.file('extdata/europegrid_50km.tif', +#' package='ibis.iSDM',mustWork = TRUE)) +#' virtual_points <- sf::st_read(system.file('extdata/input_data.gpkg', package='ibis.iSDM'), 'points',quiet = TRUE) +#' ll <- list.files(system.file('extdata/predictors/',package = 'ibis.iSDM',mustWork = TRUE),full.names = TRUE) +#' +#' # Load them as rasters +#' predictors <- terra::rast(ll);names(predictors) <- tools::file_path_sans_ext(basename(ll)) +#' +#' # Add GLM as an engine and predict +#' fit <- distribution(background) |> +#' add_biodiversity_poipo(virtual_points, field_occurrence = 'Observed', +#' name = 'Virtual points',docheck = FALSE) |> +#' add_predictors(predictors, transform = 'none',derivates = 'none') |> +#' engine_glm() |> +#' train() +#' +#' # Plot niche for prediction for temperature and forest cover +#' nicheplot(fit, xvar = "bio01_mean_50km", yvar = "CLC3_312_mean_50km" ) +#' @export +#' @name nicheplot +NULL + +#' @rdname nicheplot +#' @export +methods::setGeneric( + "nicheplot", + signature = methods::signature("mod"), + function(mod, xvar = NULL, yvar = NULL, envvars = NULL, overlay_data = FALSE, + plot = TRUE, fname = NULL, title = NULL, pal = NULL, ...) standardGeneric("nicheplot")) + +#' @rdname nicheplot +methods::setMethod( + "nicheplot", + methods::signature(mod = "ANY"), + function(mod, xvar = NULL, yvar = NULL, envvars = NULL, overlay_data = FALSE, + plot = TRUE, fname = NULL, title = NULL, pal = NULL, ...) { + # Generic checks + assertthat::assert_that( + is.null(xvar) || (is.character(xvar) || is.Raster(xvar)), + is.null(yvar) || (is.character(yvar) || is.Raster(yvar)), + is.null(envvars) || is.Raster(envvars), + is.logical(overlay_data), + is.null(title) || is.character(title), + is.null(fname) || is.character(fname), + is.logical(plot), + is.null(pal) || is.vector(pal), + msg = "Provide correct parameters (see help file)." + ) + check_package("ggplot2") # Should be loaded by default + + # Check specific on x/y variables + assertthat::assert_that((is.null(xvar) && is.null(yvar)) || + (is.character(xvar) && is.character(yvar)), + msg = "Both x and y variable names must be specified!") + + # Check whether object is a raster, otherwise extract object + if(is.Raster(mod)){ + obj <- mod + # Check x and y variables are correct + assertthat::assert_that( + is.Raster(xvar) && is.Raster(yvar), + msg = "SpatRaster objects need to be supplied as xvar and yvar!" + ) + + # Check if set + if(!is.null(xvar) && !is.null(yvar)){ + # Align if mismatching + if(!is_comparable_raster(obj, xvar)){ + warning('xvariable not aligned with prediction. Aligning them now...') + xvar <- alignRasters(xvar, obj, method = 'bilinear', func = mean, cl = FALSE) + } + if(!is_comparable_raster(obj, yvar)){ + warning('yvariable not aligned with prediction. Aligning them now...') + yvar <- alignRasters(yvar, obj, method = 'bilinear', func = mean, cl = FALSE) + } + } else { + assertthat::assert_that(is.Raster(envvars), + terra::nlyr(envvars)>1, + msg = "A multi layer environmental stack has to be supplied directly!") + + if(!is_comparable_raster(obj, envvars)){ + warning('Predictorstack not aligned with prediction. Aligning them now...') + envvars <- alignRasters(envvars, obj, method = 'bilinear', func = mean, cl = FALSE) + } + } + + } else { + # Distribution model objects # + assertthat::assert_that(inherits(mod, "DistributionModel"), + is.Raster(mod$get_data()), + msg = "The nicheplot function currently only works with fitted distribution objects!") + # Check that distribution object has a prediction + assertthat::assert_that("prediction" %in% mod$show_rasters(), + is.Raster(mod$get_data()), + msg = "No prediction found in the provided object.") + obj <- mod$get_data()[[1]] # Get the first layer + + # Check if set + if(!is.null(xvar) && !is.null(yvar)){ + assertthat::assert_that(is.character(xvar), is.character(yvar), + msg = "Specify predictor names for both xvar and yvar.") + # Check that variables are present + assertthat::assert_that( + xvar %in% mod$model$predictors_names, yvar %in% mod$model$predictors_names, + msg = "Variables not used in underlying model?" + ) + # Also get the xvar/yvar + if(is.character(xvar)) xvar <- mod$model$predictors_object$get_data()[[xvar]] + if(is.character(yvar)) yvar <- mod$model$predictors_object$get_data()[[yvar]] + } else { + if(is.null(envvars)){ + envvars <- mod$model$predictors_object$get_data() + } else { + assertthat::assert_that(is.Raster(envvars), + terra::nlyr(envvars)>1, + msg = "A multi layer environmental stack has to be supplied directly!") } + } + } + + # Get training data if set + if(overlay_data){ + assertthat::assert_that(inherits(mod, "DistributionModel"), + msg = "Data overlay currently only works with a fitted model object!") + # Collect Biodiversity occurrence data + occ <- collect_occurrencepoints(mod$model, + include_absences = FALSE, + addName = TRUE,tosf = TRUE) + } + + # Define default title + if(is.null(title)){ + if(is.Raster(mod)) tt <- names(obj) else tt <- paste0("\n (",mod$model$runname,")") + title <- paste("Niche plot for prediction ",tt) + } + + # Define colour palette if not set + if(is.null(pal)){ + pal <- ibis_colours$sdm_colour + } else { + assertthat::assert_that(length(pal)>=1) + } + + # ----------- # + if(is.null(xvar) && is.null(yvar)){ + # No specific variables found. Conduct a principle component analysis + # and predict for the first two axes. + assertthat::assert_that(is.Raster(envvars)) + pca <- terra::prcomp(envvars, maxcell = 10000) + rpca <- terra::predict(envvars, pca, index=1:2) + + # Now check number of cells and extract. If too large, sample at random + o <- c(obj, rpca) + names(o) <- c("mean", "PC1", "PC2") + if(terra::ncell(o)>10000){ + # Messenger + if(getOption('ibis.setupmessages', default = TRUE)) myLog('[Visualization]','green','Sampling at random grid cells for extraction.') + ex <- terra::spatSample(o, size = 10000, method = "random", + as.df = TRUE, na.rm = TRUE) + } else { + # Extract + ex <- terra::as.data.frame(o, xy = FALSE, na.rm = TRUE, time = FALSE) + } + assertthat::assert_that(nrow(ex)>0) + + # Define variable names + xvar_lab <- "PC1" + yvar_lab <- "PC2" + col_lab <- names(obj) + + # Now plot + viz <- ggplot2::ggplot() + + ggplot2::theme_bw(base_size = 20) + + ggplot2::geom_point(data = ex, ggplot2::aes(x = PC1, y = PC2, colour = mean, alpha=mean)) + + ggplot2::scale_colour_gradientn(colours = pal) + + ggplot2::guides(colour = ggplot2::guide_colorbar(title = col_lab), alpha = "none") + + ggplot2::theme(legend.position = "bottom", + legend.title = ggplot2::element_text(vjust = 1), + legend.key.size = ggplot2::unit(1, "cm")) + + ggplot2::labs( + title = title, + x = xvar_lab, + y = yvar_lab + ) + + # Should the training data be overlaid? + if(overlay_data){ + pp <- terra::extract(o, occ, ID = FALSE) + pp$name <- as.factor( occ$name ) + + viz <- viz + + ggplot2::geom_point(data = pp, + ggplot2::aes(x = PC1, y = PC2), + col = "black", + size = 1.5,show.legend = TRUE) + + ggplot2::labs(subtitle = "Training data (black)") + } + + } else { + # Make plot for two variables which should have been collected above. + # Check that all Raster objects are there + assertthat::assert_that( + is.Raster(xvar), is.Raster(yvar), is.Raster(obj), + terra::hasValues(obj), + msg = "Layers are not in spatial format?" + ) + + # Define variable names + xvar_lab <- names(xvar) + yvar_lab <- names(yvar) + col_lab <- names(obj) + + # Now check number of cells and extract. If too large, sample at random + o <- c(obj, xvar, yvar) + names(o) <- c("mean", "xvar", "yvar") + if(terra::ncell(o)>10000){ + # Messenger + if(getOption('ibis.setupmessages', default = TRUE)) myLog('[Visualization]','green','Sampling at random grid cells for extraction.') + ex <- terra::spatSample(o, size = 10000, method = "random", + as.df = TRUE, na.rm = TRUE) + } else { + # Extract + ex <- terra::as.data.frame(o, xy = FALSE, na.rm = TRUE, time = FALSE) + } + assertthat::assert_that(nrow(ex)>0) + + + # Now plot + viz <- ggplot2::ggplot() + + ggplot2::theme_bw(base_size = 20) + + ggplot2::geom_point(data = ex, ggplot2::aes(x = xvar, y = yvar, colour = mean, alpha = mean)) + + ggplot2::scale_colour_gradientn(colours = pal) + + ggplot2::guides(colour = ggplot2::guide_colorbar(title = col_lab), alpha = "none") + + ggplot2::theme(legend.position = "bottom", + legend.title = ggplot2::element_text(vjust = 1), + legend.key.size = ggplot2::unit(1, "cm")) + + ggplot2::labs( + title = title, + x = xvar_lab, + y = yvar_lab + ) + + # Should the training data be overlaid? + if(overlay_data){ + pp <- terra::extract(o, occ, ID = FALSE) + pp$name <- as.factor( occ$name ) + + viz <- viz + + ggplot2::geom_point(data = pp, + ggplot2::aes(x = xvar, y = yvar), + col = "black", + size = 1.5,show.legend = TRUE) + + ggplot2::labs(subtitle = "Training data (black)") + } + } + + # Print the plot + if(plot){ + return(viz) + } + if(is.character(fname)){ + cowplot::ggsave2(filename = fname, plot = viz) + } + } +) diff --git a/R/project.R b/R/project.R index 7b281e4b..8c354d5e 100644 --- a/R/project.R +++ b/R/project.R @@ -129,6 +129,14 @@ methods::setMethod( new_preds <- mod$get_predictors() if(is.Waiver(new_preds)) stop('No scenario predictors found.') + # Check extents of models and raise a warning otherwise + if(!is.Waiver(fit$model$predictors_object)){ + if(fit$model$predictors_object$ncell() != new_preds$ncell()){ + if(getOption('ibis.setupmessages', default = TRUE)) myLog('[Scenario]','yellow',paste0('Model predictors and scenario predictors have different resolution!')) + } + } + + new_crs <- new_preds$get_projection() if(is.na(new_crs)) if(getOption('ibis.setupmessages', default = TRUE)) myLog('[Scenario]','yellow','Missing projection of future predictors.') @@ -236,6 +244,9 @@ methods::setMethod( msg = "Model predictors are missing from the scenario predictor!") } + # Create a template for use + template <- emptyraster( new_preds$get_data() ) + # Get constraints, threshold values and other parameters scenario_threshold <- mod$get_threshold() if(!is.Waiver(scenario_threshold)){ @@ -264,8 +275,13 @@ methods::setMethod( baseline_threshold <- baseline_threshold[[grep(layer, names(baseline_threshold))]] } - # Set all NA values to 0 (and then mask by background?) - baseline_threshold[is.na(baseline_threshold)]<-0 + # Set all NA values to 0 + baseline_threshold[is.na(baseline_threshold)] <- 0 + # Align with newpreds extend and + if(!terra::compareGeom(baseline_threshold, template, stopOnError = FALSE)){ + baseline_threshold <- terra::extend(baseline_threshold, template) + baseline_threshold <- terra::crop(baseline_threshold, template) + } } else { baseline_threshold <- new_waiver() @@ -275,9 +291,6 @@ methods::setMethod( scenario_constraints <- mod$get_constraints() scenario_simulations <- mod$get_simulation() - # Create a template for use - template <- emptyraster( new_preds$get_data() ) - # --- Check that everything is there --- # Check that thresholds are set for constrains if("dispersal" %in% names(scenario_constraints)){ @@ -340,10 +353,9 @@ methods::setMethod( total = length(unique(df$time))) } - # TODO: Consider doing this in parallel but sequential times <- sort(unique(df$time)) - for(step in times){ + for(step in times){ # step = times[1] # Get data nd <- subset(df, time == step) @@ -455,6 +467,7 @@ methods::setMethod( scenario_threshold <- scenario_threshold[[1]] out_thresh <- out out_thresh[out_thresh < scenario_threshold] <- 0; out_thresh[out_thresh >= scenario_threshold] <- 1 + out_thresh[is.na(out_thresh)] <- 0 # Added to avoid unnecessary clipping names(out_thresh) <- "threshold" # Apply minimum size constraint if set diff --git a/R/scenario.R b/R/scenario.R index aa1d6921..f07cfad3 100644 --- a/R/scenario.R +++ b/R/scenario.R @@ -19,7 +19,7 @@ NULL #' #' @note #' If a limit has been defined already during [train()], for example by adding -#' an extrapolation limit [add_control_extrapolation()], this zonal layer can be +#' an extrapolation limit [add_limits_extrapolation()], this zonal layer can be #' reused for the projections. **Note: This effectively fixes the projections to certain areas.** #' #' @examples diff --git a/R/threshold.R b/R/threshold.R index 7b5dc344..521b9f3d 100644 --- a/R/threshold.R +++ b/R/threshold.R @@ -28,7 +28,9 @@ #' @param return_threshold Should threshold value be returned instead (Default: \code{FALSE}) #' @param ... other parameters not yet set. #' -#' @details The following options are currently implemented: +#' @details +#' The following options are currently implemented: +#' #' * \code{'fixed'} = applies a single pre-determined threshold. Requires \code{value} #' to be set. #' * \code{'mtp'} = minimum training presence is used to find and set the lowest @@ -52,6 +54,8 @@ #' Requires the \code{"modEvA"} package to be installed. #' * \code{'AUC'} = Determines the optimal AUC of presence records. Requires the #' \code{"modEvA"} package to be installed. +#' * \code{'kmeans'} = Determines a threshold based on a 2 cluster k-means clustering. +#' The presence class is assumed to be the cluster with the larger mean. #' #' @returns A [SpatRaster] if a [SpatRaster] object as input. Otherwise the threshold #' is added to the respective [`DistributionModel`] or [`BiodiversityScenario`] object. @@ -113,6 +117,7 @@ methods::setMethod( ) # Matching for correct method method <- match.arg(method, c('fixed','mtp','percentile','min.cv', + 'kmeans', # modEvA measures 'TSS','kappa','F1score','Sensitivity','Specificity', 'Misclass','Omission','Commission','Precision', @@ -264,6 +269,7 @@ methods::setMethod( # Match to correct spelling mistakes method <- match.arg(method, c('fixed','mtp','percentile','min.cv', + 'kmeans', # modEvA measures 'TSS','kappa','F1score','Sensitivity','Specificity', 'Misclass','Omission','Commission','Precision', @@ -325,6 +331,16 @@ methods::setMethod( # Combine as a vector tr <- c(tr, value) + } else if(method == 'kmeans') { + # K-means based clustering. Presence and absences are identified through + # by getting the value within regular sampled values + ex <- terra::spatSample(raster_thresh, size = 1e6, method = "regular", + na.rm = TRUE, exhaustive = TRUE) + ex <- subset(ex, stats::complete.cases(ex)) + if(nrow(ex)<5) stop("Not enough values for clustering found...") + clus <- stats::kmeans(ex, centers = 2) + tr <- clus$centers[which.max(clus$centers[,1])] + rm(clus, ex) } else { # Optimized threshold statistics using the modEvA package # FIXME: Could think of porting these functions but too much effort for diff --git a/R/train.R b/R/train.R index 7be713ef..8319f603 100644 --- a/R/train.R +++ b/R/train.R @@ -516,12 +516,14 @@ methods::setMethod( # select only columns needed by equation if (model$biodiversity[[id]]$equation != "") { - env <- subset(env, select = c("ID", "x", "y", attr(stats::terms.formula(model$biodiversity[[id]]$equation), "term.labels"))) - } + # Check for common issues and exclude variables if affected (could be outsourced) + # --- Variance check --- + env <- predictor_check(env) + # Remove missing values as several engines can't deal with those easily miss <- stats::complete.cases(env) if(sum( !miss )>0 && getOption('ibis.setupmessages', default = TRUE)) { @@ -602,7 +604,10 @@ methods::setMethod( model[['biodiversity']][[id]][['predictors']] <- env model[['biodiversity']][[id]][['predictors_names']] <- names(env)[names(env) %notin% c("ID", "x", "y", "Intercept")] model[['biodiversity']][[id]][['predictors_types']] <- model[['predictors_types']][model[['predictors_types']][, "predictors"] %in% names(env), ] - } + # makes sure ordering is identical + sort_id <- match(model[['biodiversity']][[id]][['predictors_names']], model[['biodiversity']][[id]][['predictors_types']]$predictors) + model[['biodiversity']][[id]][['predictors_types']] <- model[['biodiversity']][[id]][['predictors_types']][sort_id, ] + } # If the method of integration is weights and there are more than 2 datasets, combine if(method_integration == "weight" && length(model$biodiversity)>=2){ @@ -912,7 +917,9 @@ methods::setMethod( pred_prs <- model$predictors_object$get_names() model$predictors_names <- pred_tmp model$predictors_types <- model$predictors_types[model$predictors_type$predictors %in% pred_tmp, ] - model$predictors <- model$predictors |> dplyr::select(dplyr::any_of(c("x", "y", pred_tmp))) + # make sure all in same order + model$predictors_types <- model$predictors_types[match(model$predictors_names, model$predictors_types$predictors), ] + model$predictors <- dplyr::select(model$predictors, dplyr::any_of(c("x", "y", pred_tmp))) model$predictors_object <- model$predictors_object$clone(deep = TRUE) if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ model$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) @@ -954,7 +961,9 @@ methods::setMethod( pred_prs <- model$predictors_object$get_names() model$predictors_names <- pred_tmp model$predictors_types <- model$predictors_types[model$predictors_type$predictors %in% pred_tmp, ] - model$predictors <- model$predictors |> dplyr::select(dplyr::any_of(c("x", "y", pred_tmp))) + # make sure all in same order + model$predictors_types <- model$predictors_types[match(model$predictors_names, model$predictors_types$predictors), ] + model$predictors <- dplyr::select(model$predictors, dplyr::any_of(c("x", "y", pred_tmp))) model$predictors_object <- model$predictors_object$clone(deep = TRUE) if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ model$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) @@ -988,7 +997,9 @@ methods::setMethod( model2$predictors_object <- model$predictors_object$clone(deep = TRUE) model2$predictors_names <- pred_tmp model2$predictors_types <- model2$predictors_types[model2$predictors_type$predictors %in% pred_tmp, ] - model2$predictors <- model2$predictors |> dplyr::select(dplyr::any_of(c("x", "y", pred_tmp))) + # make sure all in same order + model2$predictors_types <- model2$predictors_types[match(model2$predictors_names, model2$predictors_types$predictors), ] + model2$predictors <- dplyr::select(model2$predictors, dplyr::any_of(c("x", "y", pred_tmp))) if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ model2$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) } @@ -1119,7 +1130,9 @@ methods::setMethod( model2$predictors_object <- model$predictors_object$clone(deep = TRUE) model2$predictors_names <- pred_tmp model2$predictors_types <- model2$predictors_types[model2$predictors_type$predictors %in% pred_tmp, ] - model2$predictors <- model2$predictors |> dplyr::select(dplyr::any_of(c("x", "y", pred_tmp))) + # make sure all in same order + model2$predictors_types <- model2$predictors_types[match(model2$predictors_names, model2$predictors_types$predictors), ] + model2$predictors <- dplyr::select(model2$predictors, dplyr::any_of(c("x", "y", pred_tmp))) if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ model2$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) } @@ -1249,7 +1262,9 @@ methods::setMethod( model2$predictors_object <- model$predictors_object$clone(deep = TRUE) model2$predictors_names <- pred_tmp model2$predictors_types <- model2$predictors_types[model2$predictors_type$predictors %in% pred_tmp, ] - model2$predictors <- model2$predictors |> dplyr::select(dplyr::any_of(c("x", "y", pred_tmp))) + # make sure all in same order + model2$predictors_types <- model2$predictors_types[match(model2$predictors_names, model2$predictors_types$predictors), ] + model2$predictors <- dplyr::select(model2$predictors, dplyr::any_of(c("x", "y", pred_tmp))) if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ model2$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) } @@ -1403,7 +1418,9 @@ methods::setMethod( model2$predictors_object <- model$predictors_object$clone(deep = TRUE) model2$predictors_names <- pred_tmp model2$predictors_types <- model2$predictors_types[model2$predictors_type$predictors %in% pred_tmp, ] - model2$predictors <- model2$predictors |> dplyr::select(dplyr::any_of(c("x", "y", pred_tmp))) + # make sure all in same order + model2$predictors_types <- model2$predictors_types[match(model2$predictors_names, model2$predictors_types$predictors), ] + model2$predictors <- dplyr::select(model2$predictors, dplyr::any_of(c("x", "y", pred_tmp))) if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ model2$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) } @@ -1529,7 +1546,9 @@ methods::setMethod( model2$predictors_object <- model$predictors_object$clone(deep = TRUE) model2$predictors_names <- pred_tmp model2$predictors_types <- model2$predictors_types[model2$predictors_type$predictors %in% pred_tmp, ] - model2$predictors <- model2$predictors |> dplyr::select(dplyr::any_of(c("x", "y", pred_tmp))) + # make sure all in same order + model2$predictors_types <- model2$predictors_types[match(model2$predictors_names, model2$predictors_types$predictors), ] + model2$predictors <- dplyr::select(model2$predictors, dplyr::any_of(c("x", "y", pred_tmp))) if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ model2$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) } @@ -1657,7 +1676,9 @@ methods::setMethod( model2$predictors_object <- model$predictors_object$clone(deep = TRUE) model2$predictors_names <- pred_tmp model2$predictors_types <- model2$predictors_types[model2$predictors_type$predictors %in% pred_tmp, ] - model2$predictors <- model2$predictors |> dplyr::select(dplyr::any_of(c("x", "y", pred_tmp))) + # make sure all in same order + model2$predictors_types <- model2$predictors_types[match(model2$predictors_names, model2$predictors_types$predictors), ] + model2$predictors <- dplyr::select(model2$predictors, dplyr::any_of(c("x", "y", pred_tmp))) if (length(pred_prs[!pred_prs %in% pred_tmp]) > 0){ model2$predictors_object$rm_data(pred_prs[!pred_prs %in% pred_tmp]) } @@ -1813,7 +1834,7 @@ methods::setMethod( o <- terra::mask(out$get_data("prediction"), layer) } else { # Default! Leaves rest of background to 0 - o <- terra::mask(out$get_data("prediction"), layer, updatevalue = 0) + o <- terra::mask(out$get_data("prediction"), layer, updatevalue = NA) } out <- out$set_data("prediction", o) try({ rm(layer, o) }) diff --git a/R/utils-bart.R b/R/utils-bart.R index 0b0329be..500e847f 100644 --- a/R/utils-bart.R +++ b/R/utils-bart.R @@ -87,6 +87,117 @@ varimp.bart <- function(model){ return(var.df) } +#' Prediction with `dbarts` package for bart models +#' +#' @description Helper function to create a prediction with [engine_bart] fitted +#' models. +#' +#' @param obj A [list] containing the fitted model. +#' @param newdata A [`data.frame`] with all the predictor used for model fitting. +#' @param params A [`list`] with parameters for estimation. Normally created during +#' model fitting. +#' @param of A [`numeric`] optional offset. +#' @param w A [`numeric`] [`vector`] containing the exposure variables for PPMs. +#' Can be \code{NULL} if the model is not a PPM. +#' @param run_future A [`logical`] on whether the model is to be run through chunking +#' and the [future] package (Default: \code{FALSE}). +#' @param N An optional [`numeric`] value describing the number of chunking pieces (Default: \code{NULL}). +#' +#' @returns Always a summarized posterior [`data.frame`] with the respective +#' statistical moments. +#' +#' @keywords internal +#' +#' @noRd +predict_bart <- function(obj, newdata, params, of = NULL, w = NULL, run_future = FALSE, N = NULL) { + assertthat::assert_that( + is.list(obj), + is.matrix(newdata) || is.data.frame(newdata) || inherits(newdata, "SpatialPixelsDataFrame"), + is.list(params), + is.null(of), + is.null(w) || is.numeric(w), + is.logical(run_future), + is.null(N) || is.numeric(N) + ) + + # Non-future + if(!run_future){ + # Make a prediction + check_package("foreach") + # Tile the problem + splits <- cut(1:nrow(newdata), nrow(newdata) / min(nrow(newdata) / 4, 100) ) + + # Make a prediction + out <- foreach::foreach(s = unique(splits), + .inorder = TRUE, + .combine = "rbind", + .errorhandling = "stop", + .multicombine = TRUE, + .export = c("splits", "fit_bart", "newdata", "params", "of"), + .packages = c("dbarts", "matrixStats")) %do% { + i <- which(splits == s) + + pred_bart <- predict(object = obj, + newdata = newdata[i, ], + type = params$type, + weights = w[i], + offset = of[i] + ) + # Summarize quantiles and sd from posterior + ms <- as.data.frame( + cbind( apply(pred_bart, 2, function(x) mean(x, na.rm = TRUE)), + matrixStats::colSds(pred_bart), + matrixStats::colQuantiles(pred_bart, probs = c(.05,.5,.95)), + apply(pred_bart, 2, modal) + ) + ) + names(ms) <- c("mean","sd", "q05", "q50", "q95", "mode") + ms$cv <- ms$sd / ms$mean + rm(pred_bart) + return( ms ) + } + } else { + # Set up future if set + check_package("doFuture") + # If not set, use number of threads + if(is.null(N)) N <- getOption("ibis.nthread",default = 10) + + # Tile the problem + splits <- cut(1:nrow(newdata), nrow(newdata) / min(nrow(newdata) / 4, 100) ) + + # Make a prediction + out <- foreach::foreach(s = unique(splits), + .inorder = TRUE, + .combine = "rbind", + .errorhandling = "stop", + .options.future = list(seed = TRUE)) %dofuture% { + i <- which(splits == s) + + pred_bart <- predict(object = obj, + newdata = newdata[i, ], + type = params$type, + weights = w[i], + offset = of[i] + ) + # Summarize quantiles and sd from posterior + ms <- as.data.frame( + cbind( apply(pred_bart, 2, function(x) mean(x, na.rm = TRUE)), + matrixStats::colSds(pred_bart), + matrixStats::colQuantiles(pred_bart, probs = c(.05,.5,.95)), + apply(pred_bart, 2, terra::modal) + ) + ) + names(ms) <- c("mean","sd", "q05", "q50", "q95", "mode") + ms$cv <- ms$sd / ms$mean + rm(pred_bart) + return( ms ) + } + + assertthat::assert_that(nrow(out)>0) + } + return(out) +} + #' Partial effects for bart models adapted from embarcadero package #' #' @param model A fitted [dbarts::bart] model. @@ -192,7 +303,7 @@ bart_partial_effect <- function(model, x.var, equal = FALSE, cbind( apply(pd$fd[[i]], 2, function(x) mean(x, na.rm = TRUE)), matrixStats::colSds(pd$fd[[i]]), matrixStats::colQuantiles(pd$fd[[i]], probs = c(.05,.5,.95)), - apply(pd$fd[[i]], 2, mode) + apply(pd$fd[[i]], 2, modal) ) ) names(ms) <- c("mean","sd", "q05", "q50", "q95", "mode") diff --git a/R/utils-breg.R b/R/utils-breg.R index d55615dc..dc0d82d5 100644 --- a/R/utils-breg.R +++ b/R/utils-breg.R @@ -168,58 +168,126 @@ setup_prior_boom <- function(form, data, priors, family, exposure = NULL){ #' model fitting. #' @param w A [`numeric`] [`vector`] containing the exposure variables for PPMs. #' Can be \code{NULL} if the model is not a PPM. +#' @param run_future A [`logical`] on whether the model is to be run through chunking +#' and the [future] package (Default: \code{FALSE}). **Note that this also summarizes +#' directly the posterior!** +#' @param N An optional [`numeric`] value describing the number of chunking pieces (Default: \code{NULL}). #' #' @note By Default 20% of the iterations are considered as burnin. #' #' @returns A [`data.frame`] with the respective prediction. #' -#' @keywords utils +#' @keywords internal #' #' @noRd -#' -#' @keywords internal -predict_boom <- function(obj, newdata, fam, params, w = NULL) { +predict_boom <- function(obj, newdata, fam, params, w = NULL, run_future = FALSE, N = NULL) { assertthat::assert_that( is.list(obj), is.data.frame(newdata) || inherits(newdata, "SpatialPixelsDataFrame"), is.character(fam), is.list(params), - is.null(w) || is.numeric(w) + is.null(w) || is.numeric(w), + is.logical(run_future), + is.null(N) || is.numeric(N) ) - check_package("BoomSpikeSlab") - - # Make a prediction - if(fam == "poisson"){ - suppressWarnings( - pred_breg <- BoomSpikeSlab::predict.poisson.spike( - object = obj, - newdata = newdata, - exposure = w, - burn = ceiling(params$iter*0.2), - type = params$type, - mean.only = FALSE # Return full posterior + + # Non-future + if(!run_future){ + # Make a prediction + if(fam == "poisson"){ + suppressWarnings( + pred_breg <- BoomSpikeSlab::predict.poisson.spike( + object = obj, + newdata = newdata, + exposure = w, + burn = ceiling(params$iter*0.2), + type = params$type, + mean.only = FALSE # Return full posterior + ) ) - ) - } else if(fam == "binomial"){ - suppressWarnings( - pred_breg <- BoomSpikeSlab::predict.logit.spike( - object = obj, - newdata = newdata, - burn = ceiling(params$iter*0.2), - type = params$type, - mean.only = FALSE # Return full posterior + } else if(fam == "binomial"){ + suppressWarnings( + pred_breg <- BoomSpikeSlab::predict.logit.spike( + object = obj, + newdata = newdata, + burn = ceiling(params$iter*0.2), + type = params$type, + mean.only = FALSE # Return full posterior + ) ) - ) - } else { - suppressWarnings( - pred_breg <- BoomSpikeSlab::predict.lm.spike( - object = obj, - newdata = newdata, - burn = ceiling(params$iter*0.2), - type = params$type, - mean.only = FALSE # Return full posterior + } else { + suppressWarnings( + pred_breg <- BoomSpikeSlab::predict.lm.spike( + object = obj, + newdata = newdata, + burn = ceiling(params$iter*0.2), + type = params$type, + mean.only = FALSE # Return full posterior + ) ) - ) + } + } else { + # Set up future if set + check_package("doFuture") + # If not set, use number of threads + if(is.null(N)) N <- getOption("ibis.nthread",default = 10) + # Chunk the data + splits <- chunk_data(newdata, N = N, index_only = TRUE) + + pred_breg <- foreach::foreach(s = splits, + .combine = "rbind", + .inorder = TRUE, + .options.future = list(seed = TRUE, + packages = c("matrixStats"), + globals = structure(TRUE, add = "modal")) + ) %dofuture% { + if(fam == "poisson"){ + suppressWarnings( + pred_breg <- BoomSpikeSlab::predict.poisson.spike( + object = obj, + newdata = newdata[s,], + exposure = w[s], + burn = ceiling(params$iter*0.2), + type = params$type, + mean.only = FALSE # Return full posterior + ) + ) + } else if(fam == "binomial"){ + suppressWarnings( + pred_breg <- BoomSpikeSlab::predict.logit.spike( + object = obj, + newdata = newdata[s,], + burn = ceiling(params$iter*0.2), + type = params$type, + mean.only = FALSE # Return full posterior + ) + ) + } else { + suppressWarnings( + pred_breg <- BoomSpikeSlab::predict.lm.spike( + object = obj, + newdata = newdata[s,], + burn = ceiling(params$iter*0.2), + type = params$type, + mean.only = FALSE # Return full posterior + ) + ) + } + # Summarize the posterior + preds <- as.data.frame( + cbind( + matrixStats::rowMeans2(pred_breg, na.rm = TRUE), + matrixStats::rowSds(pred_breg, na.rm = TRUE), + matrixStats::rowQuantiles(pred_breg, probs = c(.05,.5,.95), na.rm = TRUE) + ) + ) + names(preds) <- c("mean", "sd", "q05", "q50", "q95") + preds$cv <- (preds$sd / preds$mean) + # Mode + preds$mode <- base::apply(pred_breg, 1, FUN = terra::modal) |> as.numeric() + return(preds) + } + assertthat::assert_that(nrow(pred_breg)>0) } return(pred_breg) } diff --git a/R/utils-predictors.R b/R/utils-predictors.R index a350ef12..afb51cdb 100644 --- a/R/utils-predictors.R +++ b/R/utils-predictors.R @@ -51,10 +51,14 @@ #' @keywords utils #' #' @examples -#' \dontrun{ -#' # Where x is a SpatRaster -#' new_x <- predictor_transform(x, option = 'scale') -#' } +#' # Dummy raster +#' r_ori <- terra::rast(nrows = 10, ncols = 10, res = 0.05, xmin = -1.5, xmax = 1.5, ymin = -1.5, ymax = 1.5, vals = rnorm(3600,mean = .01,sd = .1)) +#' +#' # Normalize +#' r_norm <- predictor_transform(r_ori, option = 'norm') +#' new <- c(r_ori, r_norm) +#' names(new) <- c("original scale", "normalized units") +#' terra::plot(new) #' #' @export predictor_transform <- function(env, option, windsor_props = c(.05,.95), pca.var = 0.8, state = NULL, method = NULL, ...){ @@ -175,7 +179,7 @@ predictor_transform <- function(env, option, windsor_props = c(.05,.95), pca.var perc <- terra::global(env, fun = function(z) terra::quantile(z, probs = seq(0,1, length.out = 11), na.rm = TRUE)) } perc <- unique(perc) - out <- terra::classify(env, t(perc)) |> terra::as.int() + out <- terra::classify(env, t(perc), overwrite = TRUE) |> terra::as.int() attr(out, "transform_params") <- perc } else { out <- lapply(env_list, function(x) { @@ -187,7 +191,7 @@ predictor_transform <- function(env, option, windsor_props = c(.05,.95), pca.var perc <- unique(perc) # For terra need to loop here as classify does not support multiple columns o <- terra::rast() - for(i in 1:nrow(perc)) o <- suppressWarnings( c(o, terra::classify(x[[i]], rcl = t(perc)[,i]) |> terra::as.int() )) + for(i in 1:nrow(perc)) o <- suppressWarnings( c(o, terra::classify(x[[i]], rcl = t(perc)[,i], overwrite = TRUE) |> terra::as.int() )) return(o) }) } @@ -258,7 +262,7 @@ predictor_transform <- function(env, option, windsor_props = c(.05,.95), pca.var envMask <- !sum(terra::app(env, is.na)) assertthat::assert_that(terra::global(envMask, "sum")[,1]>0, msg = 'A predictor is either NA only or no valid values across all layers') - env <- terra::mask(env, envMask, maskvalues = 0) + env <- terra::mask(env, envMask, maskvalues = 0, overwrite = TRUE) # Sample covariance from stack and fit PCA covMat <- terra::layerCor(env, fun = "cov", na.rm = TRUE) @@ -325,7 +329,8 @@ predictor_transform <- function(env, option, windsor_props = c(.05,.95), pca.var #' #' @param env A [`SpatRaster`] object. #' @param option A [`vector`] stating whether predictors should be preprocessed in any -#' way (Options: \code{'none'}, \code{'quadratic'}, \code{'hinge'}, \code{'thresh'}, \code{'bin'}). +#' way (Options: \code{'none'}, \code{'quadratic'}, \code{'hinge'}, +#' \code{'kmeans'}, \code{'thresh'}, \code{'bin'}). #' @param nknots The number of knots to be used for the transformation (Default: \code{4}). #' @param deriv A [`vector`] with [`character`] of specific derivates to create (Default: \code{NULL}). #' @param int_variables A [`vector`] with length greater or equal than \code{2} @@ -351,6 +356,8 @@ predictor_transform <- function(env, option, windsor_props = c(.05,.95), pca.var #' * \code{'bin'} - Creates a factor representation of a covariates by cutting the #' range of covariates by their percentiles. The number of percentile cuts and thus #' new derivates is specified via the parameter \code{'nknots'} (Default: \code{4}). +#' * \code{'kmeans'} Creates a factor representation of a covariates through a +#' [`kmeans()`] clustering. The number of clusters are specified via the parameter \code{'nknots'}. #' #' @return Returns the derived adjusted [`SpatRaster`] objects of identical resolution. #' @@ -358,13 +365,20 @@ predictor_transform <- function(env, option, windsor_props = c(.05,.95), pca.var #' @keywords utils #' #' @examples -#' \dontrun{ -#' # Create a hinge transformation of one or multiple SpatRaster. -#' predictor_derivate(covs, option = "hinge", knots = 4) -#' } +#' # Dummy raster +#' r_ori <- terra::rast(nrows = 10, ncols = 10, res = 0.05, xmin = -1.5, xmax = 1.5, ymin = -1.5, ymax = 1.5, vals = rpois(3600, 10)) +#' +#' # Create a hinge transformation with 4 knots of one or multiple SpatRaster. +#' new <- predictor_derivate(r_ori, option = "hinge", knots = 4) +#' terra::plot(new) +#' +#' # Or a quadratic transformation +#' new2 <- predictor_derivate(r_ori, option = "quad", knots = 4) +#' terra::plot(new2) #' #' @export -predictor_derivate <- function(env, option, nknots = 4, deriv = NULL, int_variables = NULL, method = NULL, ...){ +predictor_derivate <- function(env, option, nknots = 4, deriv = NULL, + int_variables = NULL, method = NULL, ...){ assertthat::assert_that( is.Raster(env) || inherits(env, "stars"), !missing(env), @@ -382,7 +396,7 @@ predictor_derivate <- function(env, option, nknots = 4, deriv = NULL, int_variab base::length(option) == 1 ) # Match argument. - option <- match.arg(option, c('none','quadratic', 'hinge', + option <- match.arg(option, c('none','quadratic', 'hinge', 'kmeans', 'thresh', 'bin', 'interaction'), several.ok = FALSE) @@ -556,12 +570,78 @@ predictor_derivate <- function(env, option, nknots = 4, deriv = NULL, int_variab for(val in names(env)){ suppressWarnings( o <- makeBin(env[[val]], n = val, nknots = nknots, cutoffs = cutoffs) ) if(is.null(o)) next() - new_env <- c(new_env, o) + suppressWarnings( new_env <- c(new_env, o) ) + rm(o) + } + } else { + # For stats layers + for(val in names(env_list)){ + # Format cutoffs + cu <- cutoffs[which(cutoffs$deriv == val), 3] + cu <- strsplit(cu, "_") |> unlist() + # Remove any leading points + if(any(substr(cu,1, 1)==".")){ + cu[which(substr(cu,1, 1)==".")] <- gsub("^.","",cu[which(substr(cu,1, 1)==".")]) + } + suppressWarnings( cu <- as.numeric(cu) ) + nn <- terra::rast() + # If NA, set + if(is.na(cu[1]) || is.na(cu[2])){ + # Use the default knots for cutting and get knots + o <- makeBin(env_list[[val]][[1]], n = val, nknots = nknots, cutoffs = NULL) + suppressWarnings( cu <- strsplit(names(o), "_") |> + unlist() |> + as.numeric()) + cu <- subset(cu, stats::complete.cases(cu)) + cu <- matrix(cu,ncol=2,byrow = TRUE) # Convert to pmin and pmax + cu <- cbind(cu, 1:nrow(cu)) + } + for(k in 1:terra::nlyr(env_list[[val]])){ + newcu <- cu + # Set smallest and largest value to global minimum/maximum to account for rounding issues + newcu[1] <- terra::global(env_list[[val]][[k]], "min",na.rm=T)[,1] + newcu[nrow(newcu)*2] <- terra::global(env_list[[val]][[k]], "max",na.rm=T)[,1] + + o <- terra::classify(env_list[[val]][[k]], newcu, include.lowest=TRUE, overwrite = TRUE) + terra::time(o) <- terra::time( env_list[[val]][[k]] ) + names(o) <- paste0(val, "_",nrow(newcu)) + suppressWarnings( nn <- append(nn, o) ) + rm(o, newcu) + } + new_env[[val]] <- nn + } + invisible(gc()) + } + } + + # For k-means thresholding + if(option == 'kmeans'){ + if(is.Raster(env)){ + new_env <- terra::rast() + for(val in names(env)){ + # Also get the cut_off values + ex <- terra::values(env[[val]]) |> (\(.) subset(., stats::complete.cases(.)))() + if(!is.null(cutoffs)) k <- cutoffs else k <- nknots + cu <- stats::kmeans(ex,centers = k) + assertthat::assert_that(inherits(cu, "kmeans"), + msg = "K-means clustering failed...") + suppressWarnings( o <- terra::k_means(env[[val]], centers = cu$centers[,1]) ) + if(is.null(o)) next() + # Factorize and explode + o <- explode_factorized_raster( terra::as.factor(o), + name = paste0('kmeans_', val)) + for(i in 1:terra::nlyr(o)){ + names(o[[i]]) <- paste0('kmeans_',val,'_',round(cu$centers[i], 3)) + attr(o[[i]], "deriv.kmeans") <- cu$centers[i] + } + suppressWarnings( new_env <- c(new_env, o) ) rm(o) } } else { # For stats layers for(val in names(env_list)){ + # FIXME: To be implemented once there is a need... + stop("KMeans for stars to be implemented...") # Format cutoffs cu <- cutoffs[which(cutoffs$deriv == val), 3] cu <- strsplit(cu, "_") |> unlist() @@ -588,7 +668,7 @@ predictor_derivate <- function(env, option, nknots = 4, deriv = NULL, int_variab newcu[1] <- terra::global(env_list[[val]][[k]], "min",na.rm=T)[,1] newcu[nrow(newcu)*2] <- terra::global(env_list[[val]][[k]], "max",na.rm=T)[,1] - o <- terra::classify(env_list[[val]][[k]], newcu, include.lowest=TRUE) + o <- terra::classify(env_list[[val]][[k]], newcu, include.lowest=TRUE, overwrite = TRUE) terra::time(o) <- terra::time( env_list[[val]][[k]] ) names(o) <- paste0(val, "_",nrow(newcu)) suppressWarnings( nn <- append(nn, o) ) @@ -770,7 +850,7 @@ predictor_homogenize_na <- function(env, fill = FALSE, fill_method = 'ngb', retu # Remove grid cells that are equal to the number of layers (all values NA) none_area <- mask_na == nl none_area[none_area == 0 ] <- NA - mask_na <- terra::mask(mask_na, mask = none_area, inverse = TRUE) + mask_na <- terra::mask(mask_na, mask = none_area, inverse = TRUE, overwrite = TRUE) # Should any fill be conducted? if(fill){ @@ -779,7 +859,7 @@ predictor_homogenize_na <- function(env, fill = FALSE, fill_method = 'ngb', retu # Otherwise just homogenize NA values across predictors if(terra::global(mask_na,'max',na.rm = TRUE)[,1] > 0){ mask_all <- mask_na == 0; mask_all[mask_all == 0] <- NA - env <- terra::mask(env, mask = mask_all) + env <- terra::mask(env, mask = mask_all, overwrite = TRUE) } } # Should NA coordinates of cells where 1 or more predictor is NA be @@ -932,7 +1012,7 @@ makeBin <- function(v, n, nknots, cutoffs = NULL){ if(anyDuplicated(cu)){ # If duplicated quantiles (e.g. 0, 0, 0.2..), sample from a larger number - cu <- terra::quantile(v[], probs = seq(0, 1, by = 1/(nknots*2)) ) + cu <- terra::quantile(v[], probs = seq(0, 1, by = 1/(nknots*2)), na.rm = TRUE ) cu <- cu[-which(duplicated(cu))] # Remove duplicated cuts if(length(cu)<=2) return( NULL ) if(length(cu) > nknots){ @@ -941,7 +1021,7 @@ makeBin <- function(v, n, nknots, cutoffs = NULL){ } # Make cuts and explode out <- explode_factorized_raster( - terra::classify(v, cu) + terra::classify(v, cu, overwrite = TRUE) ) # Format threshold names @@ -956,6 +1036,78 @@ makeBin <- function(v, n, nknots, cutoffs = NULL){ return(out) } +#### Check predictors ---- + +#' Helper function to check extracted predictors for issues +#' @description +#' Here we check the variables in a provided [`data.frame`] for known issues. +#' Note that this is done vertically (per column) and not horizontally (thus removing observations). +#' +#' If any of the conditions are satistified the entire predictor is removed from the model! +#' @details +#' Specifically checked are: +#' [*] Whether all values in a column are \code{NA}. +#' [*] Whether all values in a column are finite. +#' [*] Whether the variance of all variables is greater than 0. +#' +#' @param env A [`data.frame`] with all predictor variables. +#' @return A [`data.frame`] potentially with any variable names excluded. If the +#' function fails due to some reason it returns the original \code{env}. +#' +#' @keywords utils +#' +#' @examples +#' \dontrun{ +#' # Remove highly correlated predictors +#' env <- predictor_check( env ) +#' } +#' @author Martin Jung +#' @noRd +predictor_check <- function(env){ + assertthat::assert_that( + is.data.frame(env) + ) + # Dummy copy + dummy <- env + + # Check NaN + check_nan <- apply(env, 2, function(z) all(is.nan(z))) + if(any(check_nan)){ + if(getOption('ibis.setupmessages', default = TRUE)) { + myLog('[Setup]','yellow', 'Excluded ', paste0(names(which(check_nan)),collapse = "; "), + ' variables owing to exclusively NA data!' ) + } + env <- env |> dplyr::select(-dplyr::any_of(names(which(check_nan)))) + } + + # Check inifinites + check_infinite <- apply(env, 2, function(z) any( is.infinite(z) ) ) + if(any(check_infinite)){ + if(getOption('ibis.setupmessages', default = TRUE)) { + myLog('[Setup]','yellow', 'Excluded ', paste0(names(which(check_infinite)),collapse = "; "), + ' variables owing to observations with infinite values!' ) + } + env <- env |> dplyr::select(-dplyr::any_of(names(which(check_infinite)))) + } + + # Check variance + check_var <- apply(env, 2, function(z) var(z, na.rm = TRUE)) == 0 + if(any(check_var)){ + if(getOption('ibis.setupmessages', default = TRUE)) { + myLog('[Setup]','yellow', 'Excluded ', paste0(names(which(check_var)),collapse = "; "), + ' variables owing to zero variance!' ) + } + env <- env |> dplyr::select(-dplyr::any_of(names(which(check_var)))) + } + + # Check whether all columns have been removed, if so revert back for safety? + if(ncol(env)==0) env <- dummy + rm(dummy) + + # Return + return(env) +} + #### Filter predictor functions ---- #' Filter a set of correlated predictors to fewer ones diff --git a/R/utils-spatial.R b/R/utils-spatial.R index 15091a53..44cb91be 100644 --- a/R/utils-spatial.R +++ b/R/utils-spatial.R @@ -344,7 +344,8 @@ create_zonaloccurrence_mask <- function(df, zones = NULL, buffer_width = NULL, c # Align with template if set if(!is.null(template)){ if(terra::compareGeom(zones, template, stopOnError = FALSE)){ - zones <- terra::resample(zones, template, method = "near", threads = getOption("ibis.nthread")) + zones <- terra::resample(zones, template, method = "near", + threads = getOption("ibis.nthread"), overwrite = TRUE) } } } @@ -357,8 +358,8 @@ create_zonaloccurrence_mask <- function(df, zones = NULL, buffer_width = NULL, c buf <- sf::st_buffer(x = df, dist = buffer_width, nQuadSegs = 50) ) # Rasterize - zones <- terra::rasterize(buf, template, field = 1, background = 0) - zones <- terra::mask(zones, template) + zones <- terra::rasterize(buf, template, field = 1, background = 0, overwrite = TRUE) + zones <- terra::mask(zones, template, overwrite = TRUE) # Ratify zones <- terra::droplevels(zones) } @@ -629,9 +630,9 @@ st_kde <- function(points, background, bandwidth = 3){ # Resample output for small point mismatches if(!terra::compareGeom(out, background, stopOnError = FALSE)){ - out <- terra::resample(out, background, threads = getOption("ibis.nthread")) + out <- terra::resample(out, background, threads = getOption("ibis.nthread"), overwrite = TRUE) } - out <- terra::mask(out, background) + out <- terra::mask(out, background, overwrite = TRUE) names(out) <- "kde__coordinates" rm(matrix, coords) return( out ) @@ -737,7 +738,7 @@ polygon_to_points <- function(poly, template, field_occurrence ) { ) # Rasterize the polygon to - out <- terra::rasterize(x = poly, y = template, field = field_occurrence) + out <- terra::rasterize(x = poly, y = template, field = field_occurrence, overwrite = TRUE) # Construct new point data co <- terra::xyFromCell(out, cell = which(!is.na(out[])) ) |> as.data.frame() @@ -867,11 +868,12 @@ alignRasters <- function(data, template, method = "bilinear", func = mean, cl = if(sf::st_crs(data) != sf::st_crs(template)){ # Project Raster layer data <- terra::project(data, terra::crs(template), - method = method, threads = getOption("ibis.nthread")) + method = method, + threads = getOption("ibis.nthread"), overwrite = TRUE) } # Crop raster to template - data <- terra::crop(data, template, snap = "out") + data <- terra::crop(data, template, snap = "out", overwrite = TRUE) # Aggregate to minimal scale if(is.Raster(template)){ @@ -879,12 +881,14 @@ alignRasters <- function(data, template, method = "bilinear", func = mean, cl = factor <- floor(data@ncols/template@ncols) data <- terra::aggregate(data, fact = factor, fun = func, - cores = ifelse(cl, getOption("ibis.nthread"), 1)) + cores = ifelse(cl, getOption("ibis.nthread"), 1), + overwrite = TRUE) } } else { # Resample with target method - ras <- terra::rasterize(template, data) - data <- terra::resample(data, ras, method = method, threads = getOption("ibis.nthread")) + ras <- terra::rasterize(template, data, overwrite = TRUE) + data <- terra::resample(data, ras, method = method, + threads = getOption("ibis.nthread"), overwrite = TRUE) } return(data) } @@ -1044,11 +1048,11 @@ get_ngbvalue <- function(coords, env, longlat = TRUE, field_space = c('x','y'), return(out) } -#' Function to extract directly the raster value of provided points +#' Function to extract point values directly from a SpatRaster #' #' @description This function simply extracts the values from a provided #' [`SpatRaster`], [`SpatRasterDataset`] or [`SpatRasterCollection`] object. For -#' points where or NA values were extracted a small buffer is applied to try and +#' points where or \code{NA} values were extracted a small buffer is applied to try and #' obtain the remaining values. #' #' @param coords A [`data.frame`], [`matrix`] or [`sf`] object. @@ -1066,10 +1070,14 @@ get_ngbvalue <- function(coords, env, longlat = TRUE, field_space = c('x','y'), #' @keywords utils #' #' @examples -#' \dontrun{ +#' # Dummy raster: +#' r <- terra::rast(nrows = 10, ncols = 10, res = 0.05, xmin = -1.5, xmax = 1.5, ymin = -1.5, ymax = 1.5, vals = rnorm(3600,mean = .5,sd = .1)) +#' # (dummy points) +#' pp <- terra::spatSample(r,20,as.points = TRUE) |> sf::st_as_sf() +#' #' # Extract values -#' vals <- get_rastervalue(coords, env) -#' } +#' vals <- get_rastervalue(pp, r) +#' head(vals) #' #' @export get_rastervalue <- function(coords, env, ngb_fill = TRUE, rm.na = FALSE){ @@ -1516,7 +1524,7 @@ thin_observations <- function(data, background, env = NULL, method = "random", r # Take coordinates of supplied data and rasterize coords <- sf::st_coordinates(data) - ras <- terra::rasterize(coords, background, fun = sum) # Get the number of observations per grid cell + ras <- terra::rasterize(coords, background, fun = sum, overwrite = TRUE) # Get the number of observations per grid cell # Lower and upper bounds for thinning totake <- c(lower = remainpoints, upper = max(terra::global(ras, "min", na.rm = TRUE)[,1], diff --git a/R/utils-stan.R b/R/utils-stan.R index 37329e15..f60726c0 100644 --- a/R/utils-stan.R +++ b/R/utils-stan.R @@ -323,7 +323,7 @@ run_stan <- function( model_code, data = list(), #' @param obj A \code{"stanfit"} object (as used by rstan). #' @param form A [`formula`] object created for the [ibis.iSDM::DistributionModel]. #' @param newdata A [data.frame] with new data to be used for prediction. -#' @param mode A [`character`] of whether the linear `predictor` or the `response` +#' @param type A [`character`] of whether the linear `predictor` or the `response` #' is to be summarized. #' @param family A [`character`] giving the family for simulating linear response #' values (Default: \code{NULL}) @@ -335,16 +335,18 @@ run_stan <- function( model_code, data = list(), #' * The brms R-package. #' #' @export -posterior_predict_stanfit <- function(obj, form, newdata, mode = "predictor", family = NULL, offset = NULL, draws = NULL){ +posterior_predict_stanfit <- function(obj, form, newdata, type = "predictor", + family = NULL, offset = NULL, draws = NULL){ assertthat::assert_that( inherits(obj, "stanfit") || inherits(obj, "CmdStanFit"), is.formula(form), is.data.frame(newdata), + is.character(type), is.null(family) || is.character(family), is.null(draws) || is.numeric(draws), is.null(offset) || (length(offset) == nrow(newdata)) ) - mode <- match.arg(mode, c("predictor", "response"), several.ok = FALSE) + type <- match.arg(type, c("predictor", "response"), several.ok = FALSE) # Build model matrix # Note: This removes all NA cells from matrix A <- stats::model.matrix(object = stats::delete.response(stats::terms(form)), @@ -386,7 +388,7 @@ posterior_predict_stanfit <- function(obj, form, newdata, mode = "predictor", fa ) # 16/01/2023 - Change towards matrix multiplication by default (below) - # if(mode == "predictor"){ + # if(type == "predictor"){ # # Summarize the coefficients from the posterior # pp <- posterior::summarise_draws(pp) |> # subset(select = c("variable", "mean", "q5", "median", "q95", "sd")) |> @@ -438,7 +440,7 @@ posterior_predict_stanfit <- function(obj, form, newdata, mode = "predictor", fa colnames(a) <- rownames(a) <- NULL # Backtransformation - if(mode == "response"){ + if(type == "response"){ if(family == "poisson"){ a <- apply(a, 2, function(lambda) ilink(lambda, link = "log")) } else if(family == "binomial") { diff --git a/R/utils.R b/R/utils.R index cd747522..91c408a5 100644 --- a/R/utils.R +++ b/R/utils.R @@ -60,18 +60,23 @@ text_yellow <- function(text) { paste0('\033[33m',text,'\033[39m') } #' @inheritParams text_red text_green <- function(text) { paste0('\033[32m',text,'\033[39m') } -#' Calculate the mode +#' Calculate the mode of a provided vector #' -#' @param A [`vector`] of values or characters. +#' @param x A [`vector`] of values or characters. +#' @param na.rm [`logical`] whether \code{NA} values are to be removed (Default: \code{TRUE}) #' -#' @keywords utils -#' -#' @noRd +#' @keywords utils, misc +#' @examples +#' # Example +#' modal(trees$Girth) #' -#' @keywords internal -mode <- function(x) { +#' @returns The most common (mode) estimate. +#' @export +modal <- function(x, na.rm = TRUE) { + if(na.rm) x <- x[!is.na(x)] + if(length(x)==0) return(NA) ux <- unique(x) - ux[which.max(tabulate(match(x, ux)))] + ux[which.max(table(match(x, ux)))] } #' Check whether function exist in name space @@ -337,116 +342,6 @@ sanitize_names <- function(names){ ) } -#' Parallel computation of function -#' -#' @description Some computations take considerable amount of time to execute. -#' This function provides a helper wrapper for running functions of the -#' [`apply`] family to specified outputs. -#' -#' @param X A [`list`], [`data.frame`] or [`matrix`] object to be fed to a single -#' core or parallel [apply] call. -#' @param FUN A [`function`] passed on for computation. -#' @param cores A [numeric] of the number of cores to use (Default: \code{1}). -#' @param approach [`character`] for the parallelization approach taken (Options: -#' \code{"parallel"} or \code{"future"}). -#' @param export_package A [`vector`] with packages to export for use on parallel -#' nodes (Default: \code{NULL}). -#' -#' @details By default, the [parallel] package is used for parallel computation, -#' however an option exists to use the [future] package instead. -#' -#' @keywords utils -#' -#' @examples -#' \dontrun{ -#' run_par(list, mean, cores = 4) -#' } -#' -#' @noRd -#' -#' @keywords internal -run_parallel <- function(X, FUN, cores = 1, approach = "parallel", export_packages = NULL, ...) { - assertthat::assert_that( - is.list(X) || is.data.frame(X) || is.matrix(X), - is.function(FUN), - is.numeric(cores), - is.null(export_packages) || is.character(export_packages) - ) - # Match approach - approach <- match.arg(approach, c("parallel", "future"), several.ok = FALSE) - - # Collect dots - dots <- list(...) - - if(!is.list(X)){ - # Convert input object to a list of split parameters - n_vars <- nrow(X) - chunk_size <- ceiling(n_vars / cores) - n_chunks <- ceiling(n_vars / chunk_size) - chunk_list <- vector(length = n_chunks, mode = "list") - - for (i in seq_len(n_chunks)) { - if ((chunk_size * (i - 1) + 1) <= n_vars) { - chunk <- (chunk_size * (i - 1) + 1):(min(c(chunk_size * - i, n_vars))) - chunk_list[[i]] <- X[chunk, ] - } - } - assertthat::assert_that(sum(sapply(chunk_list, nrow)) == nrow(X)) - X <- chunk_list;rm(chunk_list) - input_type = "data.frame" # Save to aggregate later again - } else { input_type = "list"} - - # Process depending on cores - if (cores == 1) { - out <- lapply(X, FUN, ...) - } else { - if(approach == "parallel"){ - # check_package('doParallel') - # require(foreach) - # isTRUE(Sys.info()[["sysname"]] == "Windows") - # Other operating systems - if(!isTRUE(Sys.info()[["sysname"]] == "Windows") && is.list(X)) { - out <- parallel::mclapply(X = X, FUN = FUN, mc.cores = cores, - ...) - } else { - # Other operating systems - cl <- parallel::makePSOCKcluster(cores) - on.exit(parallel::stopCluster(cl)) - if(!is.null(export_packages)){ - # Send all specified packages to the cluster - for(val in export_packages){ - parallel::clusterExport(cl, varlist = val, - envir = as.environment(asNamespace(val))) - } - } - out <- parallel::parLapply(cl = cl, X = X, fun = FUN, ...) - } - # out <- foreach::foreach(z = iterators::iter(X), - # .combine = ifelse(input_type!="list", "rbind", foreach:::defcombine), - # .inorder = FALSE, - # .multicombine = TRUE, - # .errorhandling = 'stop', - # .export = c("FUN"), - # .packages = export_packages, - # ... - # ) %dopar% { return( FUN(z, ...) ) } - } else { - # Check that future is loaded - check_package('future.apply') - # Check that plan for future has been set up! - assertthat::assert_that( getOption("ibis.use_future") == TRUE, - msg = "Set up a future plan via [ibis_future] to use this approach.") - out <- future.apply::future_lapply(cl = cl, X = X, fun = FUN, ...) - } - } - # If input data was not a list, combine again - if(input_type != "list" && is.list(out)){ - out <- do.call(rbind, out) - } - return( out ) -} - #' Clamp a predictor matrix by given values #' #' @description To limit extreme extrapolation it is possible to \code{'clamp'} @@ -737,3 +632,108 @@ collect_occurrencepoints <- function(model, include_absences = FALSE, } return(locs) } + +#' @title Shows size of objects in the R environment +#' @description Shows the size of the objects currently in the R environment. +#' Helps to locate large objects cluttering the R environment and/or +#' causing memory problems during the execution of large workflows. +#' +#' @param n Number of objects to show, Default: `10` +#' @return A data frame with the row names indicating the object name, +#' the field 'Type' indicating the object type, 'Size' indicating the object size, +#' and the columns 'Length/Rows' and 'Columns' indicating the object dimensions if applicable. +#' +#' @examples +#' if(interactive()){ +#' +#' #creating dummy objects +#' x <- matrix(runif(100), 10, 10) +#' y <- matrix(runif(10000), 100, 100) +#' +#' #reading their in-memory size +#' objects_size() +#' +#' } +#' @author Bias Benito +#' @rdname objects_size +#' @importFrom utils object.size +#' @export +objects_size <- function(n = 10) { + + .ls.objects <- function ( + pos = 1, + pattern, + order.by, + decreasing=FALSE, + head=FALSE, + n=5 + ){ + + napply <- function(names, fn) sapply( + names, + function(x) fn(get(x, pos = pos)) + ) + + names <- ls( + pos = pos, + pattern = pattern + ) + + obj.class <- napply( + names, + function(x) as.character(class(x))[1] + ) + + obj.mode <- napply( + names, + mode + ) + + obj.type <- ifelse( + is.na(obj.class), + obj.mode, + obj.class + ) + + obj.prettysize <- napply( + names, + function(x) {format(utils::object.size(x), units = "auto") } + ) + + obj.size <- napply( + names, + object.size + ) + + obj.dim <- t( + napply( + names, + function(x)as.numeric(dim(x))[1:2] + ) + ) + + vec <- is.na(obj.dim)[, 1] & (obj.type != "function") + + obj.dim[vec, 1] <- napply(names, length)[vec] + + out <- data.frame( + obj.type, + obj.prettysize, + obj.dim + ) + names(out) <- c("Type", "Size", "Length/Rows", "Columns") + if (!missing(order.by)) + out <- out[order(out[[order.by]], decreasing=decreasing), ] + if (head) + out <- head(out, n) + out + } + + .ls.objects( + order.by = "Size", + decreasing=TRUE, + head=TRUE, + n=n + ) + +} diff --git a/R/validate.R b/R/validate.R index 91f33b5d..9a2d9c39 100644 --- a/R/validate.R +++ b/R/validate.R @@ -31,7 +31,7 @@ #' * \code{'logloss'} = Log loss, TBD #' * \code{'normgini'} = Normalized Gini index, TBD #' * \code{'cont.boyce'} = Continuous Boyce index, Ratio of predicted against expected frequency calculated over -#' a moving window: \deqn{\frac{P_{i}{E_{i}} }}, where \deqn{ P_{i} = \frac{p_{i}}{\sum{j=1}^{b} p_{j} }} and \deqn{ E_{i} = \frac{a_{i}}{\sum{j=1}^{b} a_{j} }} +#' a moving window: \deqn{\frac{P_{i}}{E_{i}}}, where \deqn{ P_{i} = \frac{p_{i}}{\sum{j=1}^{b} p_{j}} } and \deqn{ E_{i} = \frac{a_{i}}{\sum{j=1}^{b} a_{j}} } #' #' **Discrete:** #' * \code{'n'} = Number of observations. @@ -426,6 +426,10 @@ methods::setMethod( return(results) } + # R2 score + R2_Score <- function(pred, obs, na.rm = TRUE) { + return( 1 - sum((obs - pred)^2,na.rm = na.rm)/sum((obs - mean(obs,na.rm = na.rm))^2,na.rm = na.rm) ) + } # Function for Root-mean square error RMSE <- function(pred, obs, na.rm = TRUE) { sqrt(mean((pred - obs)^2, na.rm = na.rm)) @@ -434,6 +438,10 @@ methods::setMethod( MAE <- function(pred, obs, na.rm = TRUE) { mean(abs(pred - obs), na.rm = na.rm) } + # Mean Absolute Percentage Error Loss + MAPE <- function(pred, obs, na.rm = TRUE){ + mean(abs((obs - pred)/obs), na.rm = TRUE) + } # Function for log loss/cross-entropy loss. Poisson_LogLoss <- function(y_pred, y_true) { eps <- 1e-15 @@ -458,21 +466,23 @@ methods::setMethod( modelid = id, name = name, method = method, - metric = c('n','rmse', 'mae', + metric = c('n', 'r2', 'rmse', 'mae', 'mape', 'logloss','normgini', 'cont.boyce'), value = NA ) # - # out$value[out$metric=='n'] <- nrow(df2) # Number of records + out$value[out$metric=='r2'] <- R2_Score(pred = df2$pred, obs = df2[[point_column]]) # R2 out$value[out$metric=='rmse'] <- RMSE(pred = df2$pred, obs = df2[[point_column]]) # RMSE out$value[out$metric=='mae'] <- MAE(pred = df2$pred, obs = df2[[point_column]]) # Mean absolute error + out$value[out$metric=='mape'] <- MAPE(pred = df2$pred, obs = df2[[point_column]]) # Mean Absolute Percentage Error Loss out$value[out$metric=='normgini'] <- NormalizedGini(y_pred = df2$pred, y_true = df2[[point_column]]) if(!is.null(mod)){ if( any( sapply(mod$model$biodiversity, function(x) x$family) == "binomial" ) ){ - LogLoss <- function(y_pred, y_true) { - y_pred <- pmax(y_pred, 1e-15) + LogLoss <- function(y_pred, y_true, eps = 1e-15) { + y_pred <- pmax(pmin(y_pred, 1 - eps), eps) LogLoss <- -mean(y_true * log(y_pred) + (1 - y_true) * log(1 - y_pred)) return(LogLoss) } diff --git a/R/zzz.R b/R/zzz.R index 4c3983e3..bffd7c78 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -31,7 +31,8 @@ # Use the future package for any options. Default is FALSE options('ibis.nthread' = parallel::detectCores() - 1) options('ibis.runparallel' = FALSE) - options('ibis.futurestrategy' = "multisession") + options('ibis.futurestrategy' = "sequential") + options(future.globals.onReference = "ignore") # Raise an error for pointer objects (SpatRaster) options(doFuture.foreach.export = ".export-and-automatic-with-warning") # Other dependencies not directly added in DESCRIPTION (to minimize potential diff --git a/_pkgdown.yml b/_pkgdown.yml index aa686df0..8a7f075b 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -104,6 +104,8 @@ reference: desc: Key functions to summarize, validate or extract information from trained models. contents: - plot + - bivplot + - nicheplot - print - summary - coef @@ -134,6 +136,7 @@ reference: - predictor_derivate - predictor_filter - interpolate_gaps + - objects_size - run_stan - wrap_stanmodel - sanitize_names @@ -163,6 +166,16 @@ reference: - Log - has_keyword("classes") + - title: Parallel processing + desc: > + Functions to enable or set up parallel processing + contents: + - ibis_future + - ibis_enable_parallel + - ibis_set_strategy + - ibis_set_threads + - run_parallel + - title: Miscellaneous functions desc: > Other functions only relevant for development @@ -173,9 +186,9 @@ reference: - has_keyword("misc") - print - myLog + - modal - new_id - new_waiver - - ibis_future - ibis_options - ibis_dependencies diff --git a/man/BiodiversityDistribution-class.Rd b/man/BiodiversityDistribution-class.Rd index fb2830f0..a05c3b10 100644 --- a/man/BiodiversityDistribution-class.Rd +++ b/man/BiodiversityDistribution-class.Rd @@ -28,7 +28,7 @@ names(x) } \seealso{ -\code{\link[=add_control_extrapolation]{add_control_extrapolation()}} +\code{\link[=add_limits_extrapolation]{add_limits_extrapolation()}} \code{\link[=add_latent_spatial]{add_latent_spatial()}} diff --git a/man/BiodiversityScenario-class.Rd b/man/BiodiversityScenario-class.Rd index fc119746..82f17f71 100644 --- a/man/BiodiversityScenario-class.Rd +++ b/man/BiodiversityScenario-class.Rd @@ -58,6 +58,7 @@ This requires set \code{\link{threshold}} prior to projection. \item \href{#method-BiodiversityScenario-get_resolution}{\code{BiodiversityScenario$get_resolution()}} \item \href{#method-BiodiversityScenario-get_model}{\code{BiodiversityScenario$get_model()}} \item \href{#method-BiodiversityScenario-get_limits}{\code{BiodiversityScenario$get_limits()}} +\item \href{#method-BiodiversityScenario-rm_limits}{\code{BiodiversityScenario$rm_limits()}} \item \href{#method-BiodiversityScenario-get_predictor_names}{\code{BiodiversityScenario$get_predictor_names()}} \item \href{#method-BiodiversityScenario-get_timeperiod}{\code{BiodiversityScenario$get_timeperiod()}} \item \href{#method-BiodiversityScenario-get_constraints}{\code{BiodiversityScenario$get_constraints()}} @@ -72,6 +73,7 @@ This requires set \code{\link{threshold}} prior to projection. \item \href{#method-BiodiversityScenario-get_predictors}{\code{BiodiversityScenario$get_predictors()}} \item \href{#method-BiodiversityScenario-rm_predictors}{\code{BiodiversityScenario$rm_predictors()}} \item \href{#method-BiodiversityScenario-get_data}{\code{BiodiversityScenario$get_data()}} +\item \href{#method-BiodiversityScenario-rm_data}{\code{BiodiversityScenario$rm_data()}} \item \href{#method-BiodiversityScenario-set_data}{\code{BiodiversityScenario$set_data()}} \item \href{#method-BiodiversityScenario-set_latent}{\code{BiodiversityScenario$set_latent()}} \item \href{#method-BiodiversityScenario-get_latent}{\code{BiodiversityScenario$get_latent()}} @@ -203,6 +205,19 @@ A \code{\link{sf}} object or NULL. } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BiodiversityScenario-rm_limits}{}}} +\subsection{Method \code{rm_limits()}}{ +Remove current limits. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BiodiversityScenario$rm_limits()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +Invisible +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-BiodiversityScenario-get_predictor_names}{}}} \subsection{Method \code{get_predictor_names()}}{ @@ -434,6 +449,26 @@ This object. } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BiodiversityScenario-rm_data}{}}} +\subsection{Method \code{rm_data()}}{ +Remove scenario predictions +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BiodiversityScenario$rm_data()}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{what}}{A \code{\link{character}} vector with names of what} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Invisible +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-BiodiversityScenario-set_data}{}}} \subsection{Method \code{set_data()}}{ diff --git a/man/add_control_bias.Rd b/man/add_control_bias.Rd index 60517694..2f292bb4 100644 --- a/man/add_control_bias.Rd +++ b/man/add_control_bias.Rd @@ -123,7 +123,7 @@ from opportunistic presence‐only data. Methods in Ecology and Evolution, 12(5) } } \seealso{ -\code{\link[=add_control_extrapolation]{add_control_extrapolation()}} +\code{\link[=add_limits_extrapolation]{add_limits_extrapolation()}} } \concept{The spatial bias weighting was inspired by code in the \code{enmSdmX} package.} \keyword{bias} diff --git a/man/add_predictors.Rd b/man/add_predictors.Rd index 40498c8a..8bd0bbc7 100644 --- a/man/add_predictors.Rd +++ b/man/add_predictors.Rd @@ -179,6 +179,7 @@ Available options for creating derivates are: \item \code{'interaction'} - Add interacting predictors. Interactions need to be specified (\code{"int_variables"})! \item \code{'thresh'} - Add threshold derivate predictors. \item \code{'hinge'} - Add hinge derivate predictors. +\item \code{'kmeans'} - Add k-means derived factors. \item \code{'bin'} - Add predictors binned by their percentiles. } } diff --git a/man/bivplot.Rd b/man/bivplot.Rd index 3a4e01a5..6528dbda 100644 --- a/man/bivplot.Rd +++ b/man/bivplot.Rd @@ -3,7 +3,7 @@ \name{bivplot} \alias{bivplot} \alias{bivplot,ANY-method} -\title{Bivariate plot wrapper for distribution objects} +\title{Bivariate prediction plot for distribution objects} \usage{ bivplot( mod, diff --git a/man/engine_bart.Rd b/man/engine_bart.Rd index 3be00c8f..44fa401c 100644 --- a/man/engine_bart.Rd +++ b/man/engine_bart.Rd @@ -16,7 +16,7 @@ sum-of-trees formulation (Default: \code{1000}).} \item{chains}{A number of the number of chains to be used (Default: \code{4}).} -\item{type}{The mode used for creating posterior predictions. Either \code{"link"} +\item{type}{The type used for creating posterior predictions. Either \code{"link"} or \code{"response"} (Default: \code{"response"}).} \item{...}{Other options.} diff --git a/man/engine_gdb.Rd b/man/engine_gdb.Rd index e0620fac..18a81d11 100644 --- a/man/engine_gdb.Rd +++ b/man/engine_gdb.Rd @@ -52,6 +52,10 @@ projections. Such as for instance the ability to specifically add spatial baselearners via \link{add_latent_spatial} or the specification of monotonically constrained priors via \link{GDBPrior}. } +\note{ +The coefficients resulting from gdb with poipa data (Binomial) are only 0.5 +of the typical coefficients of a logit model obtained via glm (see Binomial). +} \examples{ \dontrun{ # Add GDB as an engine diff --git a/man/engine_glm.Rd b/man/engine_glm.Rd index fbef789d..023b1f5d 100644 --- a/man/engine_glm.Rd +++ b/man/engine_glm.Rd @@ -37,6 +37,11 @@ for \code{\link[=ensemble]{ensemble()}} of small models (a practice common for r \details{ This engine is essentially a wrapper for \code{\link[stats:glm]{stats::glm.fit()}}, however with customized settings to support offsets and weights. + +If \code{"optim_hyperparam"} is set to \code{TRUE} in \code{\link[=train]{train()}}, then a AIC +based step-wise (backwards) model selection is performed. +Generally however \code{\link{engine_glmnet}} should be the preferred package for models +with more than \code{>3} covariates. } \examples{ # Load background @@ -45,6 +50,7 @@ package='ibis.iSDM',mustWork = TRUE)) # Add GLM as an engine x <- distribution(background) |> engine_glm() +print(x) } \references{ diff --git a/man/ensemble.Rd b/man/ensemble.Rd index 2056a7d6..c0eed7d3 100644 --- a/man/ensemble.Rd +++ b/man/ensemble.Rd @@ -13,6 +13,8 @@ ensemble( layer = "mean", normalize = FALSE, uncertainty = "cv", + point = NULL, + field_occurrence = "observed", apply_threshold = TRUE ) @@ -24,6 +26,8 @@ ensemble( layer = "mean", normalize = FALSE, uncertainty = "cv", + point = NULL, + field_occurrence = "observed", apply_threshold = TRUE ) } @@ -52,6 +56,11 @@ standard deviation (\code{"sd"}), the average of all PCA axes except the first \code{"pca"}, the coefficient of variation (\code{"cv"}, Default) or the range between the lowest and highest value (\code{"range"}).} +\item{point}{A \code{\link{sf}} object containing observational data used for model training. Used +for method \code{'superlearner'} only (Default: \code{NULL}).} + +\item{field_occurrence}{A \code{\link{character}} location of biodiversity point records (Default: \code{'observed'}).} + \item{apply_threshold}{A \code{\link{logical}} flag (Default: \code{TRUE}) specifying whether threshold values should also be created via \code{"method"}. Only applies and works for \code{\link{DistributionModel}} and thresholds found.} @@ -87,10 +96,13 @@ Possible options for creating an ensemble includes: \item \code{'median'} - Calculates the median of several predictions. \item \code{'max'} - The maximum value across predictions. \item \code{'min'} - The minimum value across predictions. +\item \code{'mode'} - The mode/modal values as the most commonly occurring value. \item \code{'weighted.mean'} - Calculates a weighted mean. Weights have to be supplied separately (e.g. TSS). \item \code{'min.sd'} - Ensemble created by minimizing the uncertainty among predictions. \item \code{'threshold.frequency'} - Returns an ensemble based on threshold frequency (simple count). Requires thresholds to be computed. \item \code{'pca'} - Calculates a PCA between predictions of each algorithm and then extract the first axis (the one explaining the most variation). +\item \code{'superlearner'} - Composites two predictions through a 'meta-model' fitted on top +(using a \code{\link{glm}} by default). Requires binomial data in current Setup. } In addition to the different ensemble methods, a minimal threshold diff --git a/man/get_rastervalue.Rd b/man/get_rastervalue.Rd index cc7a0e1f..052c40dd 100644 --- a/man/get_rastervalue.Rd +++ b/man/get_rastervalue.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/utils-spatial.R \name{get_rastervalue} \alias{get_rastervalue} -\title{Function to extract directly the raster value of provided points} +\title{Function to extract point values directly from a SpatRaster} \usage{ get_rastervalue(coords, env, ngb_fill = TRUE, rm.na = FALSE) } @@ -24,17 +24,21 @@ data point. \description{ This function simply extracts the values from a provided \code{\link{SpatRaster}}, \code{\link{SpatRasterDataset}} or \code{\link{SpatRasterCollection}} object. For -points where or NA values were extracted a small buffer is applied to try and +points where or \code{NA} values were extracted a small buffer is applied to try and obtain the remaining values. } \details{ It is essentially a wrapper for \code{\link[terra:extract]{terra::extract}}. } \examples{ -\dontrun{ +# Dummy raster: +r <- terra::rast(nrows = 10, ncols = 10, res = 0.05, xmin = -1.5, xmax = 1.5, ymin = -1.5, ymax = 1.5, vals = rnorm(3600,mean = .5,sd = .1)) +# (dummy points) +pp <- terra::spatSample(r,20,as.points = TRUE) |> sf::st_as_sf() + # Extract values -vals <- get_rastervalue(coords, env) -} +vals <- get_rastervalue(pp, r) +head(vals) } \keyword{utils} diff --git a/man/ibis_enable_parallel.Rd b/man/ibis_enable_parallel.Rd new file mode 100644 index 00000000..3a188e62 --- /dev/null +++ b/man/ibis_enable_parallel.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R +\name{ibis_enable_parallel} +\alias{ibis_enable_parallel} +\title{Set the parallel processing flag to TRUE} +\usage{ +ibis_enable_parallel() +} +\value{ +Invisible +} +\description{ +Small helper function to enable parallel processing. If set +to \code{TRUE}, then parallel inference (if supported by engines) and projection is +enabled across the package. +For enabling prediction support beyond sequential prediction see the \code{\link{ibis_future}} function. +} +\seealso{ +\link{future}, \link{ibis_future} +} +\keyword{misc} diff --git a/man/ibis_future.Rd b/man/ibis_future.Rd index 4f32e1fa..761af892 100644 --- a/man/ibis_future.Rd +++ b/man/ibis_future.Rd @@ -2,29 +2,56 @@ % Please edit documentation in R/misc.R \name{ibis_future} \alias{ibis_future} -\title{Options to set up ibis for parallel processing with future} +\title{Internal function to enable (a)synchronous parallel processing} \usage{ ibis_future( - cores = getOption("ibis.nthread"), - strategy = getOption("ibis.futurestrategy") + plan_exists = FALSE, + cores = getOption("ibis.nthread", default = 2), + strategy = getOption("ibis.futurestrategy"), + workers = NULL ) } \arguments{ +\item{plan_exists}{A \code{\link{logical}} check on whether an existing \code{\link{future}} plan exists (Default: \code{FALSE}).} + \item{cores}{A \code{\link{numeric}} number stating the number of cores to use.} \item{strategy}{A \code{\link{character}} denoting the strategy to be used for future. See help of \code{\link{future}} for options. (Default: \code{"multisession"}).} + +\item{workers}{An optional list of remote machines or workers, e.g. \code{"c(remote.server.org)"}. +Alternatively a \code{"cluster"} object can be provided.} } \value{ -None +Invisible } \description{ -Options to set up ibis for parallel processing with future +This function checks if parallel processing can be set up and enables it. +\strong{Ideally this is done by the user for more control!} +In the package parallelization is usually only used for predictions and projections, +but not for inference in which case parallel inference should be handled by the engine. +} +\details{ +Currently supported strategies are: +\itemize{ +\item \code{"sequential"} = Resolves futures sequentially in the current R process (Package default). +\item \code{"multisession"} = Resolves futures asynchronously across \code{'cores'} sessions. +\item \code{"multicore"} = Resolves futures asynchronously across on forked processes. Only works on UNIX systems! +\item \code{"cluster"} = Resolves futures asynchronously in sessions on this or more machines. +\item \code{"slurm"} = To be implemented: Slurm linkage via batchtools. +} +} +\note{ +The \code{'plan'} set by \link{future} exists after the function has been executed. + +If the aim is to parallize across many species, this is better done in a scripted solution. +Make sure not to parallize predictions within existing clusters to avoid out-of-memory +issues. } \examples{ \dontrun{ -# Starts future job -ibis_future(cores = 4) +# Starts future job. F in this case is a prediction function. +ibis_future(cores = 4, strategy = "multisession") } } diff --git a/man/ibis_set_strategy.Rd b/man/ibis_set_strategy.Rd new file mode 100644 index 00000000..190f11b9 --- /dev/null +++ b/man/ibis_set_strategy.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R +\name{ibis_set_strategy} +\alias{ibis_set_strategy} +\title{Set the number of threads for parallel processing.} +\usage{ +ibis_set_strategy(strategy = "sequential") +} +\arguments{ +\item{strategy}{A \code{\link{character}} with the strategy.} +} +\value{ +Invisible +} +\description{ +Small helper function to respecify the strategy for parallel processing (Default: \code{'sequential'}). +} +\details{ +Currently supported strategies are: +\itemize{ +\item \code{"sequential"} = Resolves futures sequentially in the current R process (Package default). +\item \code{"multisession"} = Resolves futures asynchronously across \code{'cores'} sessions. +\item \code{"multicore"} = Resolves futures asynchronously across on forked processes. Only works on UNIX systems! +\item \code{"cluster"} = Resolves futures asynchronously in sessions on this or more machines. +\item \code{"slurm"} = To be implemented: Slurm linkage via batchtools. +} +} +\seealso{ +\link{future}, \link{ibis_future} +} +\keyword{misc} diff --git a/man/ibis_set_threads.Rd b/man/ibis_set_threads.Rd new file mode 100644 index 00000000..b870b205 --- /dev/null +++ b/man/ibis_set_threads.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R +\name{ibis_set_threads} +\alias{ibis_set_threads} +\title{Set the threads for parallel processing.} +\usage{ +ibis_set_threads(threads = 2) +} +\arguments{ +\item{threads}{A \code{\link{numeric}} greater thna \code{0}.} +} +\value{ +Invisible +} +\description{ +Small helper function to respecify the number of threads for parallel processing. +} +\seealso{ +\link{future}, \link{ibis_future_run} +} +\keyword{misc} diff --git a/man/modal.Rd b/man/modal.Rd new file mode 100644 index 00000000..b8db6be7 --- /dev/null +++ b/man/modal.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{modal} +\alias{modal} +\title{Calculate the mode of a provided vector} +\usage{ +modal(x, na.rm = TRUE) +} +\arguments{ +\item{x}{A \code{\link{vector}} of values or characters.} + +\item{na.rm}{\code{\link{logical}} whether \code{NA} values are to be removed (Default: \code{TRUE})} +} +\value{ +The most common (mode) estimate. +} +\description{ +Calculate the mode of a provided vector +} +\examples{ +# Example +modal(trees$Girth) + +} +\keyword{misc} +\keyword{utils,} diff --git a/man/nicheplot.Rd b/man/nicheplot.Rd new file mode 100644 index 00000000..33a364bd --- /dev/null +++ b/man/nicheplot.Rd @@ -0,0 +1,102 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot.R +\name{nicheplot} +\alias{nicheplot} +\alias{nicheplot,ANY-method} +\title{Niche plot for distribution objects} +\usage{ +nicheplot( + mod, + xvar = NULL, + yvar = NULL, + envvars = NULL, + overlay_data = FALSE, + plot = TRUE, + fname = NULL, + title = NULL, + pal = NULL, + ... +) + +\S4method{nicheplot}{ANY}( + mod, + xvar = NULL, + yvar = NULL, + envvars = NULL, + overlay_data = FALSE, + plot = TRUE, + fname = NULL, + title = NULL, + pal = NULL, + ... +) +} +\arguments{ +\item{mod}{A trained \code{\link{DistributionModel}} or alternatively a \code{\link{SpatRaster}} +object with \code{prediction} model within.} + +\item{xvar}{A \code{\link{character}} denoting the predictor on the x-axis. Alternatively a \code{\link{SpatRaster}} +object can be provided.} + +\item{yvar}{A \code{\link{character}} denoting the predictor on the y-axis. Alternatively a \code{\link{SpatRaster}} +object can be provided.} + +\item{envvars}{A \code{\link{SpatRaster}} object containing all environmental variables. Only +used if \code{xvar} and \code{yvar} is empty (Default: \code{NULL}).} + +\item{overlay_data}{A \code{\link{logical}} on whether training data should be overlaid +on the plot. Only used for \code{\link{DistributionModel}} objects (Default: \code{FALSE}).} + +\item{plot}{A \code{\link{logical}} indication of whether the result is to be plotted +(Default: \code{TRUE})?} + +\item{fname}{A \code{\link{character}} specifying the output file name a created figure +should be written to.} + +\item{title}{Allows to respecify the title through a \code{\link{character}} (Default: \code{NULL}).} + +\item{pal}{An optional \code{\link{vector}} with continuous custom colours (Default: \code{NULL}).} + +\item{...}{Other engine specific parameters.} +} +\value{ +Saved niche plot in \code{'fname'} if specified, otherwise plot. +} +\description{ +The suitability of any given area for a biodiversity feature can in +many instances be complex and non-linear. Visualizing obtained suitability +predictions (e.g. from \code{\link[=train]{train()}}) against underlying predictors might help +to explain the underlying gradients of the niche. + +Supported Inputs for this function are either single trained \code{ibis.iSDM} +\code{\link{DistributionModel}} objects or alternatively a set of three \code{\link{SpatRaster}} objects. +In both cases, users can specify \code{"xvar"} and \code{"yvar"} explicitly +or leave them empty. In the latter case a principal component analysis (PCA) +is conducted on the full environmental stack (loaded from \code{\link{DistributionModel}} +or supplied separately). +} +\examples{ +# Make quick prediction +background <- terra::rast(system.file('extdata/europegrid_50km.tif', +package='ibis.iSDM',mustWork = TRUE)) +virtual_points <- sf::st_read(system.file('extdata/input_data.gpkg', package='ibis.iSDM'), 'points',quiet = TRUE) +ll <- list.files(system.file('extdata/predictors/',package = 'ibis.iSDM',mustWork = TRUE),full.names = TRUE) + +# Load them as rasters +predictors <- terra::rast(ll);names(predictors) <- tools::file_path_sans_ext(basename(ll)) + +# Add GLM as an engine and predict +fit <- distribution(background) |> +add_biodiversity_poipo(virtual_points, field_occurrence = 'Observed', +name = 'Virtual points',docheck = FALSE) |> +add_predictors(predictors, transform = 'none',derivates = 'none') |> +engine_glm() |> +train() + +# Plot niche for prediction for temperature and forest cover +nicheplot(fit, xvar = "bio01_mean_50km", yvar = "CLC3_312_mean_50km" ) +} +\seealso{ +\link{partial}, \link{plot.DistributionModel} +} +\keyword{misc} diff --git a/man/objects_size.Rd b/man/objects_size.Rd new file mode 100644 index 00000000..e98d5662 --- /dev/null +++ b/man/objects_size.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{objects_size} +\alias{objects_size} +\title{Shows size of objects in the R environment} +\usage{ +objects_size(n = 10) +} +\arguments{ +\item{n}{Number of objects to show, Default: \code{10}} +} +\value{ +A data frame with the row names indicating the object name, +the field 'Type' indicating the object type, 'Size' indicating the object size, +and the columns 'Length/Rows' and 'Columns' indicating the object dimensions if applicable. +} +\description{ +Shows the size of the objects currently in the R environment. +Helps to locate large objects cluttering the R environment and/or +causing memory problems during the execution of large workflows. +} +\examples{ +if(interactive()){ + + #creating dummy objects + x <- matrix(runif(100), 10, 10) + y <- matrix(runif(10000), 100, 100) + + #reading their in-memory size + objects_size() + +} +} +\author{ +Bias Benito +} diff --git a/man/posterior_predict_stanfit.Rd b/man/posterior_predict_stanfit.Rd index bc482694..3e357281 100644 --- a/man/posterior_predict_stanfit.Rd +++ b/man/posterior_predict_stanfit.Rd @@ -8,7 +8,7 @@ posterior_predict_stanfit( obj, form, newdata, - mode = "predictor", + type = "predictor", family = NULL, offset = NULL, draws = NULL @@ -21,7 +21,7 @@ posterior_predict_stanfit( \item{newdata}{A \link{data.frame} with new data to be used for prediction.} -\item{mode}{A \code{\link{character}} of whether the linear \code{predictor} or the \code{response} +\item{type}{A \code{\link{character}} of whether the linear \code{predictor} or the \code{response} is to be summarized.} \item{family}{A \code{\link{character}} giving the family for simulating linear response diff --git a/man/predictor_derivate.Rd b/man/predictor_derivate.Rd index d172db3e..ac5df5d1 100644 --- a/man/predictor_derivate.Rd +++ b/man/predictor_derivate.Rd @@ -18,7 +18,8 @@ predictor_derivate( \item{env}{A \code{\link{SpatRaster}} object.} \item{option}{A \code{\link{vector}} stating whether predictors should be preprocessed in any -way (Options: \code{'none'}, \code{'quadratic'}, \code{'hinge'}, \code{'thresh'}, \code{'bin'}).} +way (Options: \code{'none'}, \code{'quadratic'}, \code{'hinge'}, +\code{'kmeans'}, \code{'thresh'}, \code{'bin'}).} \item{nknots}{The number of knots to be used for the transformation (Default: \code{4}).} @@ -61,13 +62,21 @@ The number of thresholds and thus new derivates is specified via the parameter \item \code{'bin'} - Creates a factor representation of a covariates by cutting the range of covariates by their percentiles. The number of percentile cuts and thus new derivates is specified via the parameter \code{'nknots'} (Default: \code{4}). +\item \code{'kmeans'} Creates a factor representation of a covariates through a +\code{\link[=kmeans]{kmeans()}} clustering. The number of clusters are specified via the parameter \code{'nknots'}. } } \examples{ -\dontrun{ -# Create a hinge transformation of one or multiple SpatRaster. -predictor_derivate(covs, option = "hinge", knots = 4) -} +# Dummy raster + r_ori <- terra::rast(nrows = 10, ncols = 10, res = 0.05, xmin = -1.5, xmax = 1.5, ymin = -1.5, ymax = 1.5, vals = rpois(3600, 10)) + +# Create a hinge transformation with 4 knots of one or multiple SpatRaster. +new <- predictor_derivate(r_ori, option = "hinge", knots = 4) +terra::plot(new) + +# Or a quadratic transformation +new2 <- predictor_derivate(r_ori, option = "quad", knots = 4) +terra::plot(new2) } \seealso{ diff --git a/man/predictor_transform.Rd b/man/predictor_transform.Rd index f8287dc6..33eb27ca 100644 --- a/man/predictor_transform.Rd +++ b/man/predictor_transform.Rd @@ -75,10 +75,14 @@ statistical moments on which the models were trained for any variable transforma also to ensure that variable ranges are consistent among relative values. } \examples{ -\dontrun{ -# Where x is a SpatRaster -new_x <- predictor_transform(x, option = 'scale') -} +# Dummy raster +r_ori <- terra::rast(nrows = 10, ncols = 10, res = 0.05, xmin = -1.5, xmax = 1.5, ymin = -1.5, ymax = 1.5, vals = rnorm(3600,mean = .01,sd = .1)) + +# Normalize +r_norm <- predictor_transform(r_ori, option = 'norm') +new <- c(r_ori, r_norm) +names(new) <- c("original scale", "normalized units") +terra::plot(new) } \seealso{ diff --git a/man/run_parallel.Rd b/man/run_parallel.Rd new file mode 100644 index 00000000..8b1a4e5f --- /dev/null +++ b/man/run_parallel.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R +\name{run_parallel} +\alias{run_parallel} +\title{Parallel computation of function} +\usage{ +run_parallel( + X, + FUN, + cores = 1, + approach = "future", + export_packages = NULL, + ... +) +} +\arguments{ +\item{X}{A \code{\link{list}}, \code{\link{data.frame}} or \code{\link{matrix}} object to be fed to a single +core or parallel \link{apply} call.} + +\item{FUN}{A \code{\link{function}} passed on for computation.} + +\item{cores}{A \link{numeric} of the number of cores to use (Default: \code{1}).} + +\item{approach}{\code{\link{character}} for the parallelization approach taken (Options: +\code{"parallel"} or \code{"future"}).} + +\item{export_packages}{A \code{\link{vector}} with packages to export for use on parallel +nodes (Default: \code{NULL}).} + +\item{...}{Any other parameter passed on.} +} +\description{ +Some computations take considerable amount of time to execute. +This function provides a helper wrapper for running functions of the +\code{\link{apply}} family to specified outputs. +} +\details{ +By default, the \link{parallel} package is used for parallel computation, +however an option exists to use the \link{future} package instead. +} +\examples{ +\dontrun{ + run_parallel(list, mean, cores = 4) +} + +} +\keyword{utils} diff --git a/man/scenario.Rd b/man/scenario.Rd index 382b961d..237307e4 100644 --- a/man/scenario.Rd +++ b/man/scenario.Rd @@ -30,7 +30,7 @@ that contains the projections of a model. } \note{ If a limit has been defined already during \code{\link[=train]{train()}}, for example by adding -an extrapolation limit \code{\link[=add_control_extrapolation]{add_control_extrapolation()}}, this zonal layer can be +an extrapolation limit \code{\link[=add_limits_extrapolation]{add_limits_extrapolation()}}, this zonal layer can be reused for the projections. \strong{Note: This effectively fixes the projections to certain areas.} } \examples{ diff --git a/man/threshold.Rd b/man/threshold.Rd index 9aae1ce0..745ee5eb 100644 --- a/man/threshold.Rd +++ b/man/threshold.Rd @@ -127,6 +127,8 @@ Requires the \code{"modEvA"} package to be installed. Requires the \code{"modEvA"} package to be installed. \item \code{'AUC'} = Determines the optimal AUC of presence records. Requires the \code{"modEvA"} package to be installed. +\item \code{'kmeans'} = Determines a threshold based on a 2 cluster k-means clustering. +The presence class is assumed to be the cluster with the larger mean. } } \examples{ diff --git a/man/validate.Rd b/man/validate.Rd index bdc190a2..5a6c1c91 100644 --- a/man/validate.Rd +++ b/man/validate.Rd @@ -80,7 +80,7 @@ FP the false positive and FN the false negative) \item \code{'logloss'} = Log loss, TBD \item \code{'normgini'} = Normalized Gini index, TBD \item \code{'cont.boyce'} = Continuous Boyce index, Ratio of predicted against expected frequency calculated over -a moving window: \deqn{\frac{P_{i}{E_{i}} }}, where \deqn{ P_{i} = \frac{p_{i}}{\sum{j=1}^{b} p_{j} }} and \deqn{ E_{i} = \frac{a_{i}}{\sum{j=1}^{b} a_{j} }} +a moving window: \deqn{\frac{P_{i}}{E_{i}}}, where \deqn{ P_{i} = \frac{p_{i}}{\sum{j=1}^{b} p_{j}} } and \deqn{ E_{i} = \frac{a_{i}}{\sum{j=1}^{b} a_{j}} } } \strong{Discrete:} diff --git a/pkgdown/favicon/apple-touch-icon-120x120.png b/pkgdown/favicon/apple-touch-icon-120x120.png index 2766747f..142e8d21 100644 Binary files a/pkgdown/favicon/apple-touch-icon-120x120.png and b/pkgdown/favicon/apple-touch-icon-120x120.png differ diff --git a/pkgdown/favicon/apple-touch-icon-152x152.png b/pkgdown/favicon/apple-touch-icon-152x152.png index 3459cf29..929fc717 100644 Binary files a/pkgdown/favicon/apple-touch-icon-152x152.png and b/pkgdown/favicon/apple-touch-icon-152x152.png differ diff --git a/pkgdown/favicon/apple-touch-icon-180x180.png b/pkgdown/favicon/apple-touch-icon-180x180.png index e22ab0de..27df41f1 100644 Binary files a/pkgdown/favicon/apple-touch-icon-180x180.png and b/pkgdown/favicon/apple-touch-icon-180x180.png differ diff --git a/pkgdown/favicon/apple-touch-icon-60x60.png b/pkgdown/favicon/apple-touch-icon-60x60.png index 42f00214..4622a578 100644 Binary files a/pkgdown/favicon/apple-touch-icon-60x60.png and b/pkgdown/favicon/apple-touch-icon-60x60.png differ diff --git a/pkgdown/favicon/apple-touch-icon-76x76.png b/pkgdown/favicon/apple-touch-icon-76x76.png index d37b5ed3..f3efa90f 100644 Binary files a/pkgdown/favicon/apple-touch-icon-76x76.png and b/pkgdown/favicon/apple-touch-icon-76x76.png differ diff --git a/pkgdown/favicon/apple-touch-icon.png b/pkgdown/favicon/apple-touch-icon.png index 9d041968..b5116d85 100644 Binary files a/pkgdown/favicon/apple-touch-icon.png and b/pkgdown/favicon/apple-touch-icon.png differ diff --git a/pkgdown/favicon/favicon-16x16.png b/pkgdown/favicon/favicon-16x16.png index 258211a2..2d91606d 100644 Binary files a/pkgdown/favicon/favicon-16x16.png and b/pkgdown/favicon/favicon-16x16.png differ diff --git a/pkgdown/favicon/favicon-32x32.png b/pkgdown/favicon/favicon-32x32.png index 56ef9a6e..f7472337 100644 Binary files a/pkgdown/favicon/favicon-32x32.png and b/pkgdown/favicon/favicon-32x32.png differ diff --git a/pkgdown/favicon/favicon.ico b/pkgdown/favicon/favicon.ico index db96b931..36322915 100644 Binary files a/pkgdown/favicon/favicon.ico and b/pkgdown/favicon/favicon.ico differ diff --git a/tests/testthat/test_Scenarios.R b/tests/testthat/test_Scenarios.R index 1d2746be..20d6a34c 100644 --- a/tests/testthat/test_Scenarios.R +++ b/tests/testthat/test_Scenarios.R @@ -213,7 +213,7 @@ test_that('Scenarios and constraints', { expect_null(sc$get_limits()) # Add covariates in various transformations - sc <- scenario(fit) + sc <- scenario(fit, copy_model = TRUE) # Copy model here over for the test x <- sc |> add_predictors(pred_future, transform = "none") expect_length(x$get_predictor_names(), 9) suppressWarnings( diff --git a/tests/testthat/test_functions.R b/tests/testthat/test_functions.R index 05ea1086..233ee73f 100644 --- a/tests/testthat/test_functions.R +++ b/tests/testthat/test_functions.R @@ -167,6 +167,10 @@ test_that('Custom functions - Test gridded transformations and ensembles', { expect_match(attr(tr1, "method"), "percentile") expect_match(attr(tr1, "format"), "binary") + # Kmeans threshold + expect_no_error(tr2 <- threshold(r2, method = "kmeans")) + expect_match(attr(tr2, "method"), "kmeans") + expect_match(attr(tr2, "format"), "binary") # --- # }) @@ -200,6 +204,14 @@ test_that('Test other generic functions', { suppressWarnings( expect_true( is.nan(logit(5)) ) ) expect_length(logisticRichard(rpois(10,.5)), 10) + # Combine formulas + form1 <- y ~ x + z + form2 <- y ~ x + expect_true(is.formula( combine_formulas(form1, form2) )) + + # Modal calculations + expect_type( modal(rpois(50,1)), "integer") + }) # ---- # diff --git a/tests/testthat/test_parallelPrediction.R b/tests/testthat/test_parallelPrediction.R new file mode 100644 index 00000000..99f29aee --- /dev/null +++ b/tests/testthat/test_parallelPrediction.R @@ -0,0 +1,85 @@ +# Ensure that future setup works +# This might be tricky in CI as different setups make use of different parallel +# configurations +test_that('Testing parallel setup', { + + # Set to verbose + options("ibis.setupmessages" = FALSE) + + skip_if_not_installed("future") + skip_if_not_installed("doFuture") + + # Load data + # Background Raster + background <- terra::rast(system.file('extdata/europegrid_50km.tif', package='ibis.iSDM',mustWork = TRUE)) + # Get test species + virtual_points <- sf::st_read(system.file('extdata/input_data.gpkg', package='ibis.iSDM',mustWork = TRUE),'points',quiet = TRUE) + # Get list of test predictors + ll <- list.files(system.file('extdata/predictors/',package = 'ibis.iSDM',mustWork = TRUE),full.names = T) + # Load them as rasters + predictors <- terra::rast(ll);names(predictors) <- tools::file_path_sans_ext(basename(ll)) + + # Add pseudo absence + abs <- pseudoabs_settings(nrpoints = 0,min_ratio = 1,method = "mcp") + suppressMessages( + poa <- add_pseudoabsence(virtual_points,template = background, field_occurrence = "Observed", settings = abs) + ) + + # Now set them one up step by step + x <- distribution(background) |> + add_biodiversity_poipa(poipa = poa,field_occurrence = 'Observed',docheck = FALSE) |> + add_predictors(predictors, transform = 'none',derivates = 'none') |> + engine_glm() + + expect_no_error( + fit1 <- train(x, "test", inference_only = FALSE) + ) + + # Now enable parallel + expect_no_error( + expect_invisible( + ibis_enable_parallel() + ) + ) + # Set nr of threads + expect_no_error( + expect_invisible( + ibis_set_threads(2) + ) + ) + + # Set strategy + expect_no_error( + expect_invisible( + ibis_set_strategy(strategy = "sequential") + ) + ) + + # --- # + # Now define a plan + ibis_future() + + expect_no_error( + fit2 <- train(x, "test", inference_only = FALSE) + ) + + # Try with multi-session + ibis_future(strategy = "multisession") + + expect_no_error( + fit3 <- train(x, "test", inference_only = FALSE) + ) + + # Assume they are all identical + expect_gte( + cor(fit1$get_coefficients()[,2], fit2$get_coefficients()[,2]), + 0.99 + ) + expect_gte( + cor(fit1$get_coefficients()[,2], fit3$get_coefficients()[,2]), + 0.99 + ) + + # Set parallel to FALSE again + options('ibis.runparallel' = FALSE) +}) diff --git a/vignettes/Get_started.Rmd b/vignettes/Get_started.Rmd deleted file mode 100644 index e76d1046..00000000 --- a/vignettes/Get_started.Rmd +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: "Get started" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Get started} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -```{r setup, eval = FALSE} -library(ibis.iSDM) -``` diff --git a/vignettes/articles/08_frequently-asked-questions.Rmd b/vignettes/articles/08_frequently-asked-questions.Rmd index f180e96c..9dd5256e 100644 --- a/vignettes/articles/08_frequently-asked-questions.Rmd +++ b/vignettes/articles/08_frequently-asked-questions.Rmd @@ -324,6 +324,39 @@ plot(fit$get_data("fit_best")$evaluation_log) ``` +
+I am predicting over a large area, is there a way to parallize the computation? + + +Yes and no. +Generally, the computation speed is handled by the respective engine and not every +engine supports for example multi-threaded computations. +However, the most computationally demanding steps in the package is usually the +spatial prediction and there are some functionalities to 'tile' up the data over which +predictions are made. + +This is using the R [`future`] package for asynchronous projections. + +**Note: This won't usually improve things for small models/covariates as the overhead of setting up a model negates any speed improvements** + +To set this up, simply execute the following +```{r, echo=TRUE,eval=FALSE} + +# Set parallel option +ibis_enable_parallel() # Enable parallel processing in general +ibis_set_threads(4) # 4 Threads +ibis_set_strategy("multisession") # R multi-session +ibis_future() # Set up a future plan +``` + +Now most prediction should make use of the specified future plan. This counts for +both initial model predictions and projections. + +If you aim to parallelize over a range of species instead, it might be more worthhile +to rather parallize the iteration and not the prediction. + +
+ ## Model troubleshooting