From 6875ffcc77298149bc0b7bc17ada94e50f5d371f Mon Sep 17 00:00:00 2001 From: Martin Jung Date: Thu, 7 Nov 2024 20:29:33 +0100 Subject: [PATCH] Small helper function for object size and validation fixes --- NAMESPACE | 2 + R/utils.R | 105 ++++++++++++++++++++++++++++++++++++++++++++ R/validate.R | 16 +++++-- _pkgdown.yml | 1 + man/objects_size.Rd | 36 +++++++++++++++ 5 files changed, 157 insertions(+), 3 deletions(-) create mode 100644 man/objects_size.Rd diff --git a/NAMESPACE b/NAMESPACE index 0f685e84..36c99004 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -126,6 +126,7 @@ export(modal) export(new_id) export(new_waiver) export(nicheplot) +export(objects_size) export(partial) export(partial.DistributionModel) export(partial_density) @@ -172,3 +173,4 @@ import(terra) importFrom(foreach,"%do%") importFrom(foreach,"%dopar%") importFrom(stats,effects) +importFrom(utils,object.size) diff --git a/R/utils.R b/R/utils.R index 3ff76625..91c408a5 100644 --- a/R/utils.R +++ b/R/utils.R @@ -632,3 +632,108 @@ collect_occurrencepoints <- function(model, include_absences = FALSE, } return(locs) } + +#' @title Shows size of objects in the R environment +#' @description Shows the size of the objects currently in the R environment. +#' Helps to locate large objects cluttering the R environment and/or +#' causing memory problems during the execution of large workflows. +#' +#' @param n Number of objects to show, Default: `10` +#' @return A data frame with the row names indicating the object name, +#' the field 'Type' indicating the object type, 'Size' indicating the object size, +#' and the columns 'Length/Rows' and 'Columns' indicating the object dimensions if applicable. +#' +#' @examples +#' if(interactive()){ +#' +#' #creating dummy objects +#' x <- matrix(runif(100), 10, 10) +#' y <- matrix(runif(10000), 100, 100) +#' +#' #reading their in-memory size +#' objects_size() +#' +#' } +#' @author Bias Benito +#' @rdname objects_size +#' @importFrom utils object.size +#' @export +objects_size <- function(n = 10) { + + .ls.objects <- function ( + pos = 1, + pattern, + order.by, + decreasing=FALSE, + head=FALSE, + n=5 + ){ + + napply <- function(names, fn) sapply( + names, + function(x) fn(get(x, pos = pos)) + ) + + names <- ls( + pos = pos, + pattern = pattern + ) + + obj.class <- napply( + names, + function(x) as.character(class(x))[1] + ) + + obj.mode <- napply( + names, + mode + ) + + obj.type <- ifelse( + is.na(obj.class), + obj.mode, + obj.class + ) + + obj.prettysize <- napply( + names, + function(x) {format(utils::object.size(x), units = "auto") } + ) + + obj.size <- napply( + names, + object.size + ) + + obj.dim <- t( + napply( + names, + function(x)as.numeric(dim(x))[1:2] + ) + ) + + vec <- is.na(obj.dim)[, 1] & (obj.type != "function") + + obj.dim[vec, 1] <- napply(names, length)[vec] + + out <- data.frame( + obj.type, + obj.prettysize, + obj.dim + ) + names(out) <- c("Type", "Size", "Length/Rows", "Columns") + if (!missing(order.by)) + out <- out[order(out[[order.by]], decreasing=decreasing), ] + if (head) + out <- head(out, n) + out + } + + .ls.objects( + order.by = "Size", + decreasing=TRUE, + head=TRUE, + n=n + ) + +} diff --git a/R/validate.R b/R/validate.R index 2d89f124..9a2d9c39 100644 --- a/R/validate.R +++ b/R/validate.R @@ -426,6 +426,10 @@ methods::setMethod( return(results) } + # R2 score + R2_Score <- function(pred, obs, na.rm = TRUE) { + return( 1 - sum((obs - pred)^2,na.rm = na.rm)/sum((obs - mean(obs,na.rm = na.rm))^2,na.rm = na.rm) ) + } # Function for Root-mean square error RMSE <- function(pred, obs, na.rm = TRUE) { sqrt(mean((pred - obs)^2, na.rm = na.rm)) @@ -434,6 +438,10 @@ methods::setMethod( MAE <- function(pred, obs, na.rm = TRUE) { mean(abs(pred - obs), na.rm = na.rm) } + # Mean Absolute Percentage Error Loss + MAPE <- function(pred, obs, na.rm = TRUE){ + mean(abs((obs - pred)/obs), na.rm = TRUE) + } # Function for log loss/cross-entropy loss. Poisson_LogLoss <- function(y_pred, y_true) { eps <- 1e-15 @@ -458,21 +466,23 @@ methods::setMethod( modelid = id, name = name, method = method, - metric = c('n','rmse', 'mae', + metric = c('n', 'r2', 'rmse', 'mae', 'mape', 'logloss','normgini', 'cont.boyce'), value = NA ) # - # out$value[out$metric=='n'] <- nrow(df2) # Number of records + out$value[out$metric=='r2'] <- R2_Score(pred = df2$pred, obs = df2[[point_column]]) # R2 out$value[out$metric=='rmse'] <- RMSE(pred = df2$pred, obs = df2[[point_column]]) # RMSE out$value[out$metric=='mae'] <- MAE(pred = df2$pred, obs = df2[[point_column]]) # Mean absolute error + out$value[out$metric=='mape'] <- MAPE(pred = df2$pred, obs = df2[[point_column]]) # Mean Absolute Percentage Error Loss out$value[out$metric=='normgini'] <- NormalizedGini(y_pred = df2$pred, y_true = df2[[point_column]]) if(!is.null(mod)){ if( any( sapply(mod$model$biodiversity, function(x) x$family) == "binomial" ) ){ - LogLoss <- function(y_pred, y_true) { - y_pred <- pmax(y_pred, 1e-15) + LogLoss <- function(y_pred, y_true, eps = 1e-15) { + y_pred <- pmax(pmin(y_pred, 1 - eps), eps) LogLoss <- -mean(y_true * log(y_pred) + (1 - y_true) * log(1 - y_pred)) return(LogLoss) } diff --git a/_pkgdown.yml b/_pkgdown.yml index d9e5dd68..8a7f075b 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -136,6 +136,7 @@ reference: - predictor_derivate - predictor_filter - interpolate_gaps + - objects_size - run_stan - wrap_stanmodel - sanitize_names diff --git a/man/objects_size.Rd b/man/objects_size.Rd new file mode 100644 index 00000000..e98d5662 --- /dev/null +++ b/man/objects_size.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{objects_size} +\alias{objects_size} +\title{Shows size of objects in the R environment} +\usage{ +objects_size(n = 10) +} +\arguments{ +\item{n}{Number of objects to show, Default: \code{10}} +} +\value{ +A data frame with the row names indicating the object name, +the field 'Type' indicating the object type, 'Size' indicating the object size, +and the columns 'Length/Rows' and 'Columns' indicating the object dimensions if applicable. +} +\description{ +Shows the size of the objects currently in the R environment. +Helps to locate large objects cluttering the R environment and/or +causing memory problems during the execution of large workflows. +} +\examples{ +if(interactive()){ + + #creating dummy objects + x <- matrix(runif(100), 10, 10) + y <- matrix(runif(10000), 100, 100) + + #reading their in-memory size + objects_size() + +} +} +\author{ +Bias Benito +}