From 6875ffcc77298149bc0b7bc17ada94e50f5d371f Mon Sep 17 00:00:00 2001
From: Martin Jung <martinjung@zoho.com>
Date: Thu, 7 Nov 2024 20:29:33 +0100
Subject: [PATCH] Small helper function for object size and validation fixes

---
 NAMESPACE           |   2 +
 R/utils.R           | 105 ++++++++++++++++++++++++++++++++++++++++++++
 R/validate.R        |  16 +++++--
 _pkgdown.yml        |   1 +
 man/objects_size.Rd |  36 +++++++++++++++
 5 files changed, 157 insertions(+), 3 deletions(-)
 create mode 100644 man/objects_size.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 0f685e84..36c99004 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -126,6 +126,7 @@ export(modal)
 export(new_id)
 export(new_waiver)
 export(nicheplot)
+export(objects_size)
 export(partial)
 export(partial.DistributionModel)
 export(partial_density)
@@ -172,3 +173,4 @@ import(terra)
 importFrom(foreach,"%do%")
 importFrom(foreach,"%dopar%")
 importFrom(stats,effects)
+importFrom(utils,object.size)
diff --git a/R/utils.R b/R/utils.R
index 3ff76625..91c408a5 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -632,3 +632,108 @@ collect_occurrencepoints <- function(model, include_absences = FALSE,
   }
   return(locs)
 }
+
+#' @title Shows size of objects in the R environment
+#' @description Shows the size of the objects currently in the R environment.
+#' Helps to locate large objects cluttering the R environment and/or
+#' causing memory problems during the execution of large workflows.
+#'
+#' @param n Number of objects to show, Default: `10`
+#' @return A data frame with the row names indicating the object name,
+#' the field 'Type' indicating the object type, 'Size' indicating the object size,
+#' and the columns 'Length/Rows' and 'Columns' indicating the object dimensions if applicable.
+#'
+#' @examples
+#' if(interactive()){
+#'
+#'  #creating dummy objects
+#'  x <- matrix(runif(100), 10, 10)
+#'  y <- matrix(runif(10000), 100, 100)
+#'
+#'  #reading their in-memory size
+#'  objects_size()
+#'
+#' }
+#' @author Bias Benito
+#' @rdname objects_size
+#' @importFrom utils object.size
+#' @export
+objects_size <- function(n = 10) {
+
+  .ls.objects <- function (
+    pos = 1,
+    pattern,
+    order.by,
+    decreasing=FALSE,
+    head=FALSE,
+    n=5
+  ){
+
+    napply <- function(names, fn) sapply(
+      names,
+      function(x) fn(get(x, pos = pos))
+    )
+
+    names <- ls(
+      pos = pos,
+      pattern = pattern
+    )
+
+    obj.class <- napply(
+      names,
+      function(x) as.character(class(x))[1]
+    )
+
+    obj.mode <- napply(
+      names,
+      mode
+    )
+
+    obj.type <- ifelse(
+      is.na(obj.class),
+      obj.mode,
+      obj.class
+    )
+
+    obj.prettysize <- napply(
+      names,
+      function(x) {format(utils::object.size(x), units = "auto") }
+    )
+
+    obj.size <- napply(
+      names,
+      object.size
+    )
+
+    obj.dim <- t(
+      napply(
+        names,
+        function(x)as.numeric(dim(x))[1:2]
+      )
+    )
+
+    vec <- is.na(obj.dim)[, 1] & (obj.type != "function")
+
+    obj.dim[vec, 1] <- napply(names, length)[vec]
+
+    out <- data.frame(
+      obj.type,
+      obj.prettysize,
+      obj.dim
+    )
+    names(out) <- c("Type", "Size", "Length/Rows", "Columns")
+    if (!missing(order.by))
+      out <- out[order(out[[order.by]], decreasing=decreasing), ]
+    if (head)
+      out <- head(out, n)
+    out
+  }
+
+  .ls.objects(
+    order.by = "Size",
+    decreasing=TRUE,
+    head=TRUE,
+    n=n
+  )
+
+}
diff --git a/R/validate.R b/R/validate.R
index 2d89f124..9a2d9c39 100644
--- a/R/validate.R
+++ b/R/validate.R
@@ -426,6 +426,10 @@ methods::setMethod(
         return(results)
       }
 
+    # R2 score
+    R2_Score <- function(pred, obs, na.rm = TRUE) {
+      return( 1 - sum((obs - pred)^2,na.rm = na.rm)/sum((obs - mean(obs,na.rm = na.rm))^2,na.rm = na.rm) )
+    }
     # Function for Root-mean square error
     RMSE <- function(pred, obs, na.rm = TRUE) {
       sqrt(mean((pred - obs)^2, na.rm = na.rm))
@@ -434,6 +438,10 @@ methods::setMethod(
     MAE <- function(pred, obs, na.rm = TRUE) {
       mean(abs(pred - obs), na.rm = na.rm)
     }
+    # Mean Absolute Percentage Error Loss
+    MAPE <- function(pred, obs, na.rm = TRUE){
+      mean(abs((obs - pred)/obs), na.rm = TRUE)
+    }
     # Function for log loss/cross-entropy loss.
     Poisson_LogLoss <- function(y_pred, y_true) {
       eps <- 1e-15
@@ -458,21 +466,23 @@ methods::setMethod(
       modelid = id,
       name = name,
       method = method,
-      metric = c('n','rmse', 'mae',
+      metric = c('n', 'r2', 'rmse', 'mae', 'mape',
                  'logloss','normgini',
                  'cont.boyce'),
       value = NA
     )
     # - #
     out$value[out$metric=='n'] <- nrow(df2) # Number of records
+    out$value[out$metric=='r2'] <- R2_Score(pred = df2$pred, obs = df2[[point_column]]) # R2
     out$value[out$metric=='rmse'] <- RMSE(pred = df2$pred, obs = df2[[point_column]]) # RMSE
     out$value[out$metric=='mae'] <- MAE(pred = df2$pred, obs = df2[[point_column]]) # Mean absolute error
+    out$value[out$metric=='mape'] <- MAPE(pred = df2$pred, obs = df2[[point_column]]) # Mean Absolute Percentage Error Loss
     out$value[out$metric=='normgini'] <- NormalizedGini(y_pred = df2$pred, y_true = df2[[point_column]])
 
     if(!is.null(mod)){
       if( any( sapply(mod$model$biodiversity, function(x) x$family) == "binomial" ) ){
-        LogLoss <- function(y_pred, y_true) {
-          y_pred <- pmax(y_pred, 1e-15)
+        LogLoss <- function(y_pred, y_true, eps = 1e-15) {
+          y_pred <- pmax(pmin(y_pred, 1 - eps), eps)
           LogLoss <- -mean(y_true * log(y_pred) + (1 - y_true) * log(1 - y_pred))
           return(LogLoss)
         }
diff --git a/_pkgdown.yml b/_pkgdown.yml
index d9e5dd68..8a7f075b 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -136,6 +136,7 @@ reference:
       - predictor_derivate
       - predictor_filter
       - interpolate_gaps
+      - objects_size
       - run_stan
       - wrap_stanmodel
       - sanitize_names
diff --git a/man/objects_size.Rd b/man/objects_size.Rd
new file mode 100644
index 00000000..e98d5662
--- /dev/null
+++ b/man/objects_size.Rd
@@ -0,0 +1,36 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{objects_size}
+\alias{objects_size}
+\title{Shows size of objects in the R environment}
+\usage{
+objects_size(n = 10)
+}
+\arguments{
+\item{n}{Number of objects to show, Default: \code{10}}
+}
+\value{
+A data frame with the row names indicating the object name,
+the field 'Type' indicating the object type, 'Size' indicating the object size,
+and the columns 'Length/Rows' and 'Columns' indicating the object dimensions if applicable.
+}
+\description{
+Shows the size of the objects currently in the R environment.
+Helps to locate large objects cluttering the R environment and/or
+causing memory problems during the execution of large workflows.
+}
+\examples{
+if(interactive()){
+
+ #creating dummy objects
+ x <- matrix(runif(100), 10, 10)
+ y <- matrix(runif(10000), 100, 100)
+
+ #reading their in-memory size
+ objects_size()
+
+}
+}
+\author{
+Bias Benito
+}