From eae57d43e2ad06efb1f1a64f5693d419653a346f Mon Sep 17 00:00:00 2001 From: Juniper Simonis Date: Fri, 6 Sep 2019 20:56:44 -0700 Subject: [PATCH 01/18] in prog eval work --- R/data_input_output.R | 11 ++++++++--- R/process_casts.R | 45 +++++++++++++++++++++++++++++++++++++++++++ notes.R | 24 +++++++++++++++++++++++ 3 files changed, 77 insertions(+), 3 deletions(-) create mode 100644 notes.R diff --git a/R/data_input_output.R b/R/data_input_output.R index 307d8f28e..ade01062e 100644 --- a/R/data_input_output.R +++ b/R/data_input_output.R @@ -318,7 +318,8 @@ read_rodents <- function(main = ".", data_sets = c("all", "controls"), read_covariates <- function(main = ".", control_files = files_control(), arg_checks = TRUE){ check_args(arg_checks) - fpath <- file_path(main = main, sub = "data", files = "covariates.csv", + fpath <- file_path(main = main, sub = "data", + files = control_files$filename_cov, arg_checks = arg_checks) if(!file.exists(fpath)){ return(prep_covariates(main = main, arg_checks = arg_checks)) @@ -374,7 +375,9 @@ read_covariate_casts <- function(main = ".", control_files = files_control(), read_moons <- function(main = ".", control_files = files_control(), arg_checks = TRUE){ check_args(arg_checks = arg_checks) - fpath <- file_path(main, "data", "moon_dates.csv") + fpath <- file_path(main = main, sub = "data", + files = control_files$filename_moons, + arg_checks = arg_checks) if(!file.exists(fpath)){ return(prep_moons(main = main, arg_checks = arg_checks)) } @@ -388,7 +391,9 @@ read_moons <- function(main = ".", control_files = files_control(), read_metadata <- function(main = ".", control_files = files_control(), arg_checks = TRUE){ check_args(arg_checks) - fpath <- file_path(main, "data", "metadata.yaml") + fpath <- file_path(main = main, sub = "data", + files = control_files$filename_meta, + arg_checks = arg_checks) if(!file.exists(fpath)){ md <- prep_metadata(main = main, arg_checks = arg_checks) return(md) diff --git a/R/process_casts.R b/R/process_casts.R index 25bcdad3b..961298348 100644 --- a/R/process_casts.R +++ b/R/process_casts.R @@ -1,4 +1,49 @@ +add_obs_to_cast_tab <- function(main = ".", cast_id = NULL, + arg_checks = TRUE){ + check_args(arg_checks) + if(is.null(cast_id)){ + casts_meta <- select_casts(main = main, arg_checks = arg_checks) + cast_id <- max(casts_meta$cast_id) + } + cast_tab <- read_cast_tab(main = main, cast_id = cast_id) + cast_tab <- na_conformer(cast_tab) + cast_data_set <- unique(cast_tab$data_set) + cast_data_set <- gsub("_interp", "", cast_data_set) + obs <- read_rodents_table(main = main, data_set = cast_data_set, + arg_checks = arg_checks) + obs <- na.omit(obs) + species <- "total" + spec_cast <- cast_tab[cast_tab$species == species, ] + moon_match <- obs$moon %in% spec_cast$moon + species_match <- colnames(obs) == species + spec_obs <- obs[moon_match, species_match] + spec_moon <- obs[moon_match, "moon"] + + out <- spec_cast[spec_cast$moon %in% spec_moon, ] + out$obs <- spec_obs + out +} + + +# should probably be added to the documentation for read_cast_tab +# generalize to read_cast_output or something + +read_cast_metadata <- function(main = ".", cast_id = NULL, arg_checks = TRUE){ + check_args(arg_checks) + if(is.null(cast_id)){ + casts_meta <- select_casts(main = main, arg_checks = arg_checks) + cast_id <- max(casts_meta$cast_id) + } + lpath <- paste0("cast_id_", cast_id, "_metadata.yaml") + cpath <- file_path(main, "casts", lpath, arg_checks) + if(!file.exists(cpath)){ + stop("cast_id does not have a cast_metadata file") + } + yaml.load_file(cpath) +} + + #' @title Read in the cast tab output from a given cast #' #' @description Retrieve the \code{cast_tab} of a cast in the casts diff --git a/notes.R b/notes.R new file mode 100644 index 000000000..15e860410 --- /dev/null +++ b/notes.R @@ -0,0 +1,24 @@ +evaluations + +devtools::document() +main <- "~/hindcasting" +plot_cast_point(main) + + +select_casts(main) + +# at the starting point, we want to compare observation to predictions +# at each observation value +# an important consideration is that we want to actually predict the real +# data, not the interpolated data, so we want to make sure to point the +# _interp models to the true data, so we force remove that suffix to align +# predictions appropriately + + +# constrain things down to a single observation compared with a single +# predictive distribution +# we likely want to pull in the model's metadata so we can use the PIs + + +add_obs_to_cast_tab(main = main, cast_id = 1) + From 5c8f61451e354db6f0618665d1fa050c19cda9ea Mon Sep 17 00:00:00 2001 From: Juniper Simonis Date: Mon, 9 Sep 2019 14:00:52 -0700 Subject: [PATCH 02/18] fixes --- .travis.yml | 1 + NEWS.md | 2 +- _pkgdown.yml | 6 +++--- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5bcb376ab..a93ab6b95 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,7 @@ warnings_are_errors: false before_install: - sudo apt-get update -qq - sudo apt-get install texlive-latex-base + - sudo apt-get install gsl-bin libgsl0-dev r_packages: - covr diff --git a/NEWS.md b/NEWS.md index a6e3e7493..88e6690eb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,7 +13,7 @@ Version numbers follow [Semantic Versioning](https://semver.org/). ### Directory tree structure simplified * `dirtree` was removed -* `base` (both as a function and a concept) was removed. To make that structure use main = "./name" +* `base` (both as a function and a concept) was removed. To make that structure use `main = "./name"`. * "PortalData" has been removed as a sub and replaced with "raw", which includes all raw versions of files (post unzipping) downloaded: Portal Data and Portal Predictions and covariate forecasts (whose saving is also new here). ### Tightened messaging diff --git a/_pkgdown.yml b/_pkgdown.yml index 5cd2832b1..9b78c66f5 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -54,7 +54,7 @@ reference: - prep_cast_covariates - prep_hist_covariates - prep_weather_data - - summarize_daily_weather_by_newmoon + - summarize_daily_weather_by_moon - title: "Directory setup" desc: "Functions that combine directory creation and filling" contents: @@ -123,8 +123,8 @@ reference: - title: "Model generating functions" desc: "Functions for formatting model-running scripts" contents: - - model_script_control - - model_script_controls + - model_control + - model_controls - write_model - model_template - prefab_models From e869c587146ccde2321f437b756b22722e1be4f5 Mon Sep 17 00:00:00 2001 From: Juniper Simonis Date: Mon, 9 Sep 2019 18:05:31 -0700 Subject: [PATCH 03/18] add obs to cast tab --- NAMESPACE | 2 + R/process_casts.R | 116 ++++++++++++------ man/add_obs_to_cast_tab.Rd | 38 ++++++ man/{read_cast_tab.Rd => read_cast_output.Rd} | 20 ++- 4 files changed, 130 insertions(+), 46 deletions(-) create mode 100644 man/add_obs_to_cast_tab.Rd rename man/{read_cast_tab.Rd => read_cast_output.Rd} (61%) diff --git a/NAMESPACE b/NAMESPACE index 0815ab217..1ce12ae81 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ export(add_date_from_components) export(add_extra_future_moons) export(add_future_moons) export(add_moons_from_date) +export(add_obs_to_cast_tab) export(all_species) export(append_csv) export(base_species) @@ -87,6 +88,7 @@ export(prep_rodents_table) export(prep_weather_data) export(process_rodent_data) export(raw_path) +export(read_cast_metadata) export(read_cast_tab) export(read_casts_metadata) export(read_climate_casts) diff --git a/R/process_casts.R b/R/process_casts.R index 961298348..4a1257aee 100644 --- a/R/process_casts.R +++ b/R/process_casts.R @@ -1,53 +1,61 @@ - -add_obs_to_cast_tab <- function(main = ".", cast_id = NULL, +#' @title Add the associated observations to a cast tab +#' +#' @description Append a column of observations to a cast table. \cr +#' If an interpolated data set is used to fit a model, the true data +#' are appended here (so that model predictions are all compared to the +#' same data). +#' +#' @param main \code{character} value of the name of the main component of +#' the directory tree. +#' +#' @param cast_tab A \code{data.frame} of a cast's output. See +#' \code{\link{read_cast_tab}}. +#' +#' @param arg_checks \code{logical} value of if the arguments should be +#' checked using standard protocols via \code{\link{check_args}}. The +#' default (\code{arg_checks = TRUE}) ensures that all inputs are +#' formatted correctly and provides directed error messages if not. +#' +#' @return \code{data.frame} of \code{cast_tab} with an \code{obs} column. +#' +#' @examples +#' \donttest{ +#' setup_dir() +#' portalcast(models = c("AutoArima", "NaiveArima"), end_moons = 515:520) +#' cast_tab <- read_cast_tab(cast_id = 1) +#' add_obs_to_cast_tab(cast_tab = cast_tab) +#' } +#' +#' @export +#' +add_obs_to_cast_tab <- function(main = ".", cast_tab = NULL, arg_checks = TRUE){ check_args(arg_checks) - if(is.null(cast_id)){ - casts_meta <- select_casts(main = main, arg_checks = arg_checks) - cast_id <- max(casts_meta$cast_id) - } - cast_tab <- read_cast_tab(main = main, cast_id = cast_id) + return_if_null(cast_tab) cast_tab <- na_conformer(cast_tab) cast_data_set <- unique(cast_tab$data_set) cast_data_set <- gsub("_interp", "", cast_data_set) obs <- read_rodents_table(main = main, data_set = cast_data_set, arg_checks = arg_checks) - obs <- na.omit(obs) - species <- "total" - spec_cast <- cast_tab[cast_tab$species == species, ] - moon_match <- obs$moon %in% spec_cast$moon - species_match <- colnames(obs) == species - spec_obs <- obs[moon_match, species_match] - spec_moon <- obs[moon_match, "moon"] - - out <- spec_cast[spec_cast$moon %in% spec_moon, ] - out$obs <- spec_obs - out -} - - -# should probably be added to the documentation for read_cast_tab -# generalize to read_cast_output or something - -read_cast_metadata <- function(main = ".", cast_id = NULL, arg_checks = TRUE){ - check_args(arg_checks) - if(is.null(cast_id)){ - casts_meta <- select_casts(main = main, arg_checks = arg_checks) - cast_id <- max(casts_meta$cast_id) - } - lpath <- paste0("cast_id_", cast_id, "_metadata.yaml") - cpath <- file_path(main, "casts", lpath, arg_checks) - if(!file.exists(cpath)){ - stop("cast_id does not have a cast_metadata file") + cast_tab$obs <- NA + for(i in 1:NROW(cast_tab)){ + obs_moon <- which(obs$moon == cast_tab$moon[i]) + obs_species <- which(colnames(obs) == cast_tab$species[i]) + if(length(obs_moon) == 1 & length(obs_species) == 1){ + cast_tab$obs[i] <- obs[obs_moon, obs_species] + } } - yaml.load_file(cpath) + cast_tab } -#' @title Read in the cast tab output from a given cast + +#' @title Read in cast output from a given cast #' -#' @description Retrieve the \code{cast_tab} of a cast in the casts -#' sub directory. +#' @description Read in the various output files of a cast in the casts +#' sub directory. \cr \cr +#' \code{read_cast_tab} retrieves the \code{cast_tab}. \cr \cr +#' \code{read_cast_tab} retrieves the \code{cast_metdata} #' #' @param main \code{character} value of the name of the main component of #' the directory tree. @@ -56,22 +64,31 @@ read_cast_metadata <- function(main = ".", cast_id = NULL, arg_checks = TRUE){ #' representing the cast of interest, as indexed within the directory in #' the \code{casts} sub folder. See the casts metadata file #' (\code{casts_metadata.csv}) for summary information. If \code{NULL} (the -#' default), the most recent generated \code{cast_tab} is read in. +#' default), the most recently generated cast's output is read in. #' #' @param arg_checks \code{logical} value of if the arguments should be #' checked using standard protocols via \code{\link{check_args}}. The #' default (\code{arg_checks = TRUE}) ensures that all inputs are #' formatted correctly and provides directed error messages if not. #' -#' @return \code{data.frame} of the \code{cast_tab}. +#' @return +#' \code{read_cast_tab}: \code{data.frame} of the \code{cast_tab}. +#' \code{read_cast_metadata}: \code{list} of the \code{cast_metadata}. #' #' @examples #' \donttest{ #' setup_dir() #' portalcast(models = c("AutoArima", "NaiveArima"), end_moons = 515:520) #' read_cast_tab(cast_id = 1) +#' read_cast_metadata(cast_id = 1) #' } #' +#' @name read_cast_output +#' +NULL + +#' @rdname read_cast_output +#' #' @export #' read_cast_tab <- function(main = ".", cast_id = NULL, arg_checks = TRUE){ @@ -88,6 +105,25 @@ read_cast_tab <- function(main = ".", cast_id = NULL, arg_checks = TRUE){ read.csv(cpath, stringsAsFactors = FALSE) } +#' @rdname read_cast_output +#' +#' @export +#' +read_cast_metadata <- function(main = ".", cast_id = NULL, arg_checks = TRUE){ + check_args(arg_checks) + if(is.null(cast_id)){ + casts_meta <- select_casts(main = main, arg_checks = arg_checks) + cast_id <- max(casts_meta$cast_id) + } + lpath <- paste0("cast_id_", cast_id, "_metadata.yaml") + cpath <- file_path(main, "casts", lpath, arg_checks) + if(!file.exists(cpath)){ + stop("cast_id does not have a cast_metadata file") + } + yaml.load_file(cpath) +} + + #' @title Find casts that fit specifications #' #' @description Determines the casts that match user specifications. \cr diff --git a/man/add_obs_to_cast_tab.Rd b/man/add_obs_to_cast_tab.Rd new file mode 100644 index 000000000..1733dbf6d --- /dev/null +++ b/man/add_obs_to_cast_tab.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/process_casts.R +\name{add_obs_to_cast_tab} +\alias{add_obs_to_cast_tab} +\title{Add the associated observations to a cast tab} +\usage{ +add_obs_to_cast_tab(main = ".", cast_tab = NULL, arg_checks = TRUE) +} +\arguments{ +\item{main}{\code{character} value of the name of the main component of +the directory tree.} + +\item{cast_tab}{A \code{data.frame} of a cast's output. See +\code{\link{read_cast_tab}}.} + +\item{arg_checks}{\code{logical} value of if the arguments should be +checked using standard protocols via \code{\link{check_args}}. The +default (\code{arg_checks = TRUE}) ensures that all inputs are +formatted correctly and provides directed error messages if not.} +} +\value{ +\code{data.frame} of \code{cast_tab} with an \code{obs} column. +} +\description{ +Append a column of observations to a cast table. \cr + If an interpolated data set is used to fit a model, the true data + are appended here (so that model predictions are all compared to the + same data). +} +\examples{ + \donttest{ + setup_dir() + portalcast(models = c("AutoArima", "NaiveArima"), end_moons = 515:520) + cast_tab <- read_cast_tab(cast_id = 1) + add_obs_to_cast_tab(cast_tab = cast_tab) +} + +} diff --git a/man/read_cast_tab.Rd b/man/read_cast_output.Rd similarity index 61% rename from man/read_cast_tab.Rd rename to man/read_cast_output.Rd index 964dc8524..bccf5e2bf 100644 --- a/man/read_cast_tab.Rd +++ b/man/read_cast_output.Rd @@ -1,10 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/process_casts.R -\name{read_cast_tab} +\name{read_cast_output} +\alias{read_cast_output} \alias{read_cast_tab} -\title{Read in the cast tab output from a given cast} +\alias{read_cast_metadata} +\title{Read in cast output from a given cast} \usage{ read_cast_tab(main = ".", cast_id = NULL, arg_checks = TRUE) + +read_cast_metadata(main = ".", cast_id = NULL, arg_checks = TRUE) } \arguments{ \item{main}{\code{character} value of the name of the main component of @@ -14,7 +18,7 @@ the directory tree.} representing the cast of interest, as indexed within the directory in the \code{casts} sub folder. See the casts metadata file (\code{casts_metadata.csv}) for summary information. If \code{NULL} (the -default), the most recent generated \code{cast_tab} is read in.} +default), the most recently generated cast's output is read in.} \item{arg_checks}{\code{logical} value of if the arguments should be checked using standard protocols via \code{\link{check_args}}. The @@ -22,17 +26,21 @@ default (\code{arg_checks = TRUE}) ensures that all inputs are formatted correctly and provides directed error messages if not.} } \value{ -\code{data.frame} of the \code{cast_tab}. +\code{read_cast_tab}: \code{data.frame} of the \code{cast_tab}. + \code{read_cast_metadata}: \code{list} of the \code{cast_metadata}. } \description{ -Retrieve the \code{cast_tab} of a cast in the casts - sub directory. +Read in the various output files of a cast in the casts + sub directory. \cr \cr + \code{read_cast_tab} retrieves the \code{cast_tab}. \cr \cr + \code{read_cast_tab} retrieves the \code{cast_metdata} } \examples{ \donttest{ setup_dir() portalcast(models = c("AutoArima", "NaiveArima"), end_moons = 515:520) read_cast_tab(cast_id = 1) + read_cast_metadata(cast_id = 1) } } From 840461567d8959ad0ffbe1f8c2d35d6f69ab8304 Mon Sep 17 00:00:00 2001 From: Juniper Simonis Date: Tue, 10 Sep 2019 11:49:41 -0700 Subject: [PATCH 04/18] adding columns to cast tabs group of functions for adding observations, raw error, and covered or not columns to cast tabs --- NAMESPACE | 3 + R/process_casts.R | 78 +++++++++++++++++++++++--- man/add_obs_to_cast_tab.Rd | 38 ------------- man/add_to_cast_tab.Rd | 62 ++++++++++++++++++++ tests/testthat/test-16-process_casts.R | 14 +++++ 5 files changed, 150 insertions(+), 45 deletions(-) delete mode 100644 man/add_obs_to_cast_tab.Rd create mode 100644 man/add_to_cast_tab.Rd diff --git a/NAMESPACE b/NAMESPACE index 1ce12ae81..eed9daad0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,9 +4,12 @@ export(AutoArima) export(ESSS) export(NMME_urls) export(NaiveArima) +export(add_covered_to_cast_tab) export(add_date_from_components) +export(add_err_to_cast_tab) export(add_extra_future_moons) export(add_future_moons) +export(add_lead_to_cast_tab) export(add_moons_from_date) export(add_obs_to_cast_tab) export(all_species) diff --git a/R/process_casts.R b/R/process_casts.R index 4a1257aee..060862b7c 100644 --- a/R/process_casts.R +++ b/R/process_casts.R @@ -1,9 +1,63 @@ -#' @title Add the associated observations to a cast tab +#' @rdname add_to_cast_tab +#' +#' @export +#' +add_lead_to_cast_tab <- function(main = ".", cast_tab = NULL, + arg_checks = TRUE){ + check_args(arg_checks = arg_checks) + return_if_null(cast_tab) + cast_tab$lead <- cast_tab$moon - cast_tab$end_moon + cast_tab +} + +#' @rdname add_to_cast_tab +#' +#' @export +#' +add_err_to_cast_tab <- function(main = ".", cast_tab = NULL, + arg_checks = TRUE){ + check_args(arg_checks = arg_checks) + return_if_null(cast_tab) + if(is.null(cast_tab$obs)){ + cast_tab <- add_obs_to_cast_tab(main = main, cast_tab = cast_tab, + arg_checks = arg_checks) + } + cast_tab$error <- cast_tab$estimate - cast_tab$obs + cast_tab +} + +#' @rdname add_to_cast_tab +#' +#' @export +#' +add_covered_to_cast_tab <- function(main = ".", cast_tab = NULL, + arg_checks = TRUE){ + check_args(arg_checks = arg_checks) + return_if_null(cast_tab) + if(is.null(cast_tab$obs)){ + cast_tab <- add_obs_to_cast_tab(main = main, cast_tab = cast_tab, + arg_checks = arg_checks) + } + cast_tab$covered <- cast_tab$estimate >= cast_tab$lower_pi & + cast_tab$estimate <= cast_tab$upper_pi + cast_tab$covered[is.na(cast_tab$obs)] <- NA + cast_tab +} + +#' @title Add the associated values to a cast tab #' -#' @description Append a column of observations to a cast table. \cr -#' If an interpolated data set is used to fit a model, the true data -#' are appended here (so that model predictions are all compared to the -#' same data). +#' @description Add values to a cast's cast tab. If necessary components are +#' missing (such as no observations added yet and the user requests errors), +#' the missing components are added. \cr \cr +#' \code{add_lead_to_cast_tab} adds a column of lead times. \cr \cr +#' \code{add_obs_to_cast_tab} appends a column of observations. \cr \cr +#' \code{add_err_to_cast_tab} adds a column of raw error values. \cr \cr +#' \code{add_covered_to_cast_tab} appends a \code{logical} column indicating +#' if the observation was within the prediction interval. +#' +#' @details If an interpolated data set is used to fit a model, +#' \code{add_obs_to_cast_tab} adds the true (non-interpolated) observations +#' so that model predictions are all compared to the same data. #' #' @param main \code{character} value of the name of the main component of #' the directory tree. @@ -16,21 +70,31 @@ #' default (\code{arg_checks = TRUE}) ensures that all inputs are #' formatted correctly and provides directed error messages if not. #' -#' @return \code{data.frame} of \code{cast_tab} with an \code{obs} column. +#' @return \code{data.frame} of \code{cast_tab} with an additional column or +#' columns if needed. #' #' @examples #' \donttest{ #' setup_dir() #' portalcast(models = c("AutoArima", "NaiveArima"), end_moons = 515:520) #' cast_tab <- read_cast_tab(cast_id = 1) +#' add_lead_to_cast_tab(cast_tab = cast_tab) #' add_obs_to_cast_tab(cast_tab = cast_tab) +#' add_err_to_cast_tab(cast_tab = cast_tab) +#' add_covered_to_cast_tab(cast_tab = cast_tab) #' } #' +#' @name add_to_cast_tab +#' +NULL + +#' @rdname add_to_cast_tab +#' #' @export #' add_obs_to_cast_tab <- function(main = ".", cast_tab = NULL, arg_checks = TRUE){ - check_args(arg_checks) + check_args(arg_checks = arg_checks) return_if_null(cast_tab) cast_tab <- na_conformer(cast_tab) cast_data_set <- unique(cast_tab$data_set) diff --git a/man/add_obs_to_cast_tab.Rd b/man/add_obs_to_cast_tab.Rd deleted file mode 100644 index 1733dbf6d..000000000 --- a/man/add_obs_to_cast_tab.Rd +++ /dev/null @@ -1,38 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/process_casts.R -\name{add_obs_to_cast_tab} -\alias{add_obs_to_cast_tab} -\title{Add the associated observations to a cast tab} -\usage{ -add_obs_to_cast_tab(main = ".", cast_tab = NULL, arg_checks = TRUE) -} -\arguments{ -\item{main}{\code{character} value of the name of the main component of -the directory tree.} - -\item{cast_tab}{A \code{data.frame} of a cast's output. See -\code{\link{read_cast_tab}}.} - -\item{arg_checks}{\code{logical} value of if the arguments should be -checked using standard protocols via \code{\link{check_args}}. The -default (\code{arg_checks = TRUE}) ensures that all inputs are -formatted correctly and provides directed error messages if not.} -} -\value{ -\code{data.frame} of \code{cast_tab} with an \code{obs} column. -} -\description{ -Append a column of observations to a cast table. \cr - If an interpolated data set is used to fit a model, the true data - are appended here (so that model predictions are all compared to the - same data). -} -\examples{ - \donttest{ - setup_dir() - portalcast(models = c("AutoArima", "NaiveArima"), end_moons = 515:520) - cast_tab <- read_cast_tab(cast_id = 1) - add_obs_to_cast_tab(cast_tab = cast_tab) -} - -} diff --git a/man/add_to_cast_tab.Rd b/man/add_to_cast_tab.Rd new file mode 100644 index 000000000..25f20e647 --- /dev/null +++ b/man/add_to_cast_tab.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/process_casts.R +\name{add_lead_to_cast_tab} +\alias{add_lead_to_cast_tab} +\alias{add_err_to_cast_tab} +\alias{add_covered_to_cast_tab} +\alias{add_to_cast_tab} +\alias{add_obs_to_cast_tab} +\title{Add the associated values to a cast tab} +\usage{ +add_lead_to_cast_tab(main = ".", cast_tab = NULL, arg_checks = TRUE) + +add_err_to_cast_tab(main = ".", cast_tab = NULL, arg_checks = TRUE) + +add_covered_to_cast_tab(main = ".", cast_tab = NULL, + arg_checks = TRUE) + +add_obs_to_cast_tab(main = ".", cast_tab = NULL, arg_checks = TRUE) +} +\arguments{ +\item{main}{\code{character} value of the name of the main component of +the directory tree.} + +\item{cast_tab}{A \code{data.frame} of a cast's output. See +\code{\link{read_cast_tab}}.} + +\item{arg_checks}{\code{logical} value of if the arguments should be +checked using standard protocols via \code{\link{check_args}}. The +default (\code{arg_checks = TRUE}) ensures that all inputs are +formatted correctly and provides directed error messages if not.} +} +\value{ +\code{data.frame} of \code{cast_tab} with an additional column or + columns if needed. +} +\description{ +Add values to a cast's cast tab. If necessary components are + missing (such as no observations added yet and the user requests errors), + the missing components are added. \cr \cr + \code{add_lead_to_cast_tab} adds a column of lead times. \cr \cr + \code{add_obs_to_cast_tab} appends a column of observations. \cr \cr + \code{add_err_to_cast_tab} adds a column of raw error values. \cr \cr + \code{add_covered_to_cast_tab} appends a \code{logical} column indicating + if the observation was within the prediction interval. +} +\details{ +If an interpolated data set is used to fit a model, + \code{add_obs_to_cast_tab} adds the true (non-interpolated) observations + so that model predictions are all compared to the same data. +} +\examples{ + \donttest{ + setup_dir() + portalcast(models = c("AutoArima", "NaiveArima"), end_moons = 515:520) + cast_tab <- read_cast_tab(cast_id = 1) + add_lead_to_cast_tab(cast_tab = cast_tab) + add_obs_to_cast_tab(cast_tab = cast_tab) + add_err_to_cast_tab(cast_tab = cast_tab) + add_covered_to_cast_tab(cast_tab = cast_tab) +} + +} diff --git a/tests/testthat/test-16-process_casts.R b/tests/testthat/test-16-process_casts.R index 159e0d3ca..b80d33329 100644 --- a/tests/testthat/test-16-process_casts.R +++ b/tests/testthat/test-16-process_casts.R @@ -6,3 +6,17 @@ test_that("read_cast_tab", { skip_on_cran() # downloads take too long for cran checks expect_error(read_cast_tab(main = main, cast_id = 1e10)) }) + + +test_that("read_cast_tab", { + skip_on_cran() # downloads take too long for cran checks + cast_tab <- read_cast_tab(main = main, cast_id = 1) + expect_is(add_lead_to_cast_tab(main = main, cast_tab = cast_tab), + "data.frame") + expect_is(add_obs_to_cast_tab(main = main, cast_tab = cast_tab), + "data.frame") + expect_is(add_err_to_cast_tab(main = main, cast_tab = cast_tab), + "data.frame") + expect_is(add_covered_to_cast_tab(main = main, cast_tab = cast_tab), + "data.frame") +}) From 4ec0b706ef4db9ce519c466da1e58a49c1dcad49 Mon Sep 17 00:00:00 2001 From: Juniper Simonis Date: Tue, 10 Sep 2019 12:53:08 -0700 Subject: [PATCH 05/18] flexing add_obs now allows for multiple data types to be added, by iterating over them for read obs --- NAMESPACE | 1 + R/process_casts.R | 54 ++++++++++++++++++++++++++++++++--------- man/read_cast_output.Rd | 10 ++++++-- 3 files changed, 52 insertions(+), 13 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index eed9daad0..172746edc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -93,6 +93,7 @@ export(process_rodent_data) export(raw_path) export(read_cast_metadata) export(read_cast_tab) +export(read_cast_tabs) export(read_casts_metadata) export(read_climate_casts) export(read_covariate_casts) diff --git a/R/process_casts.R b/R/process_casts.R index 060862b7c..7190bd194 100644 --- a/R/process_casts.R +++ b/R/process_casts.R @@ -97,16 +97,22 @@ add_obs_to_cast_tab <- function(main = ".", cast_tab = NULL, check_args(arg_checks = arg_checks) return_if_null(cast_tab) cast_tab <- na_conformer(cast_tab) - cast_data_set <- unique(cast_tab$data_set) - cast_data_set <- gsub("_interp", "", cast_data_set) - obs <- read_rodents_table(main = main, data_set = cast_data_set, - arg_checks = arg_checks) cast_tab$obs <- NA - for(i in 1:NROW(cast_tab)){ - obs_moon <- which(obs$moon == cast_tab$moon[i]) - obs_species <- which(colnames(obs) == cast_tab$species[i]) - if(length(obs_moon) == 1 & length(obs_species) == 1){ - cast_tab$obs[i] <- obs[obs_moon, obs_species] + cast_data_set <- gsub("_interp", "", cast_tab$data_set) + ucast_data_set <- unique(cast_data_set) + ncast_data_sets <- length(ucast_data_set) + for(j in 1:ncast_data_sets){ + obs <- read_rodents_table(main = main, data_set = ucast_data_set[j], + arg_checks = arg_checks) + matches <- which(cast_data_set == ucast_data_set[j]) + nmatches <- length(matches) + for(i in 1:nmatches){ + spot <- matches[i] + obs_moon <- which(obs$moon == cast_tab$moon[spot]) + obs_species <- which(colnames(obs) == cast_tab$species[spot]) + if(length(obs_moon) == 1 & length(obs_species) == 1){ + cast_tab$obs[spot] <- obs[obs_moon, obs_species] + } } } cast_tab @@ -116,9 +122,10 @@ add_obs_to_cast_tab <- function(main = ".", cast_tab = NULL, #' @title Read in cast output from a given cast #' -#' @description Read in the various output files of a cast in the casts -#' sub directory. \cr \cr +#' @description Read in the various output files of a cast or casts in the +#' casts sub directory. \cr \cr #' \code{read_cast_tab} retrieves the \code{cast_tab}. \cr \cr +#' \code{read_cast_tabs} combines one or more \code{cast_tab}s. \cr \cr #' \code{read_cast_tab} retrieves the \code{cast_metdata} #' #' @param main \code{character} value of the name of the main component of @@ -137,6 +144,7 @@ add_obs_to_cast_tab <- function(main = ".", cast_tab = NULL, #' #' @return #' \code{read_cast_tab}: \code{data.frame} of the \code{cast_tab}. +#' \code{read_cast_tabs}: \code{data.frame} of the \code{cast_tab}s. #' \code{read_cast_metadata}: \code{list} of the \code{cast_metadata}. #' #' @examples @@ -144,6 +152,7 @@ add_obs_to_cast_tab <- function(main = ".", cast_tab = NULL, #' setup_dir() #' portalcast(models = c("AutoArima", "NaiveArima"), end_moons = 515:520) #' read_cast_tab(cast_id = 1) +#' read_cast_tabs(cast_ids = 1:2) #' read_cast_metadata(cast_id = 1) #' } #' @@ -169,6 +178,29 @@ read_cast_tab <- function(main = ".", cast_id = NULL, arg_checks = TRUE){ read.csv(cpath, stringsAsFactors = FALSE) } +#' @rdname read_cast_output +#' +#' @export +#' +read_cast_tabs <- function(main = ".", cast_ids = NULL, arg_checks = TRUE){ + check_args(arg_checks) + if(is.null(cast_ids)){ + casts_meta <- select_casts(main = main, arg_checks = arg_checks) + cast_ids <- max(casts_meta$cast_id) + } + cast_tab <- read_cast_tab(main = main, cast_id = cast_ids[1], + arg_checks = arg_checks) + ncasts <- length(cast_ids) + if(ncasts > 1){ + for(i in 2:ncasts){ + cast_tab_i <- read_cast_tab(main = main, cast_id = cast_ids[i], + arg_checks = arg_checks) + cast_tab <- rbind(cast_tab, cast_tab_i) + } + } + cast_tab +} + #' @rdname read_cast_output #' #' @export diff --git a/man/read_cast_output.Rd b/man/read_cast_output.Rd index bccf5e2bf..a5d4b8864 100644 --- a/man/read_cast_output.Rd +++ b/man/read_cast_output.Rd @@ -3,11 +3,14 @@ \name{read_cast_output} \alias{read_cast_output} \alias{read_cast_tab} +\alias{read_cast_tabs} \alias{read_cast_metadata} \title{Read in cast output from a given cast} \usage{ read_cast_tab(main = ".", cast_id = NULL, arg_checks = TRUE) +read_cast_tabs(main = ".", cast_ids = NULL, arg_checks = TRUE) + read_cast_metadata(main = ".", cast_id = NULL, arg_checks = TRUE) } \arguments{ @@ -27,12 +30,14 @@ formatted correctly and provides directed error messages if not.} } \value{ \code{read_cast_tab}: \code{data.frame} of the \code{cast_tab}. + \code{read_cast_tabs}: \code{data.frame} of the \code{cast_tab}s. \code{read_cast_metadata}: \code{list} of the \code{cast_metadata}. } \description{ -Read in the various output files of a cast in the casts - sub directory. \cr \cr +Read in the various output files of a cast or casts in the + casts sub directory. \cr \cr \code{read_cast_tab} retrieves the \code{cast_tab}. \cr \cr + \code{read_cast_tabs} combines one or more \code{cast_tab}s. \cr \cr \code{read_cast_tab} retrieves the \code{cast_metdata} } \examples{ @@ -40,6 +45,7 @@ Read in the various output files of a cast in the casts setup_dir() portalcast(models = c("AutoArima", "NaiveArima"), end_moons = 515:520) read_cast_tab(cast_id = 1) + read_cast_tabs(cast_ids = 1:2) read_cast_metadata(cast_id = 1) } From 673ba4cbdffedb7668ce5338b6a1a1d731fbc6d9 Mon Sep 17 00:00:00 2001 From: Juniper Simonis Date: Tue, 10 Sep 2019 13:06:48 -0700 Subject: [PATCH 06/18] Update test-16-process_casts.R --- tests/testthat/test-16-process_casts.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-16-process_casts.R b/tests/testthat/test-16-process_casts.R index b80d33329..45fccc247 100644 --- a/tests/testthat/test-16-process_casts.R +++ b/tests/testthat/test-16-process_casts.R @@ -4,11 +4,17 @@ main <- "./testing" test_that("read_cast_tab", { skip_on_cran() # downloads take too long for cran checks + expect_is(read_cast_tab(main = main, cast_id = NULL), "data.frame") expect_error(read_cast_tab(main = main, cast_id = 1e10)) }) +test_that("read_cast_metadata", { + skip_on_cran() # downloads take too long for cran checks + expect_is(read_cast_metadata(main = main, cast_id = NULL), "list") + expect_error(read_cast_metadata(main = main, cast_id = 1e10)) +}) -test_that("read_cast_tab", { +test_that("add_to_cast_tab", { skip_on_cran() # downloads take too long for cran checks cast_tab <- read_cast_tab(main = main, cast_id = 1) expect_is(add_lead_to_cast_tab(main = main, cast_tab = cast_tab), From a3dfef9663c75d5832788e2d3e9c43128f8f65fd Mon Sep 17 00:00:00 2001 From: Juniper Simonis Date: Tue, 10 Sep 2019 13:49:03 -0700 Subject: [PATCH 07/18] docs --- R/process_casts.R | 8 +++++--- man/read_cast_output.Rd | 14 ++++++++------ tests/testthat/test-16-process_casts.R | 6 ++++++ 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/R/process_casts.R b/R/process_casts.R index 7190bd194..672df519b 100644 --- a/R/process_casts.R +++ b/R/process_casts.R @@ -131,11 +131,13 @@ add_obs_to_cast_tab <- function(main = ".", cast_tab = NULL, #' @param main \code{character} value of the name of the main component of #' the directory tree. #' -#' @param cast_id \code{integer} (or integer \code{numeric}) value -#' representing the cast of interest, as indexed within the directory in +#' @param cast_ids,cast_id \code{integer} (or integer \code{numeric}) value(s) +#' representing the cast(s) of interest, as indexed within the directory in #' the \code{casts} sub folder. See the casts metadata file #' (\code{casts_metadata.csv}) for summary information. If \code{NULL} (the -#' default), the most recently generated cast's output is read in. +#' default), the most recently generated cast's output is read in. \cr +#' \code{cast_ids} can be NULL, one value, or more than one values, +#' \code{cast_id} can only be NULL or one value. #' #' @param arg_checks \code{logical} value of if the arguments should be #' checked using standard protocols via \code{\link{check_args}}. The diff --git a/man/read_cast_output.Rd b/man/read_cast_output.Rd index a5d4b8864..c88388a1c 100644 --- a/man/read_cast_output.Rd +++ b/man/read_cast_output.Rd @@ -17,16 +17,18 @@ read_cast_metadata(main = ".", cast_id = NULL, arg_checks = TRUE) \item{main}{\code{character} value of the name of the main component of the directory tree.} -\item{cast_id}{\code{integer} (or integer \code{numeric}) value -representing the cast of interest, as indexed within the directory in -the \code{casts} sub folder. See the casts metadata file -(\code{casts_metadata.csv}) for summary information. If \code{NULL} (the -default), the most recently generated cast's output is read in.} - \item{arg_checks}{\code{logical} value of if the arguments should be checked using standard protocols via \code{\link{check_args}}. The default (\code{arg_checks = TRUE}) ensures that all inputs are formatted correctly and provides directed error messages if not.} + +\item{cast_ids, cast_id}{\code{integer} (or integer \code{numeric}) value(s) +representing the cast(s) of interest, as indexed within the directory in +the \code{casts} sub folder. See the casts metadata file +(\code{casts_metadata.csv}) for summary information. If \code{NULL} (the +default), the most recently generated cast's output is read in. \cr +\code{cast_ids} can be NULL, one value, or more than one values, +\code{cast_id} can only be NULL or one value.} } \value{ \code{read_cast_tab}: \code{data.frame} of the \code{cast_tab}. diff --git a/tests/testthat/test-16-process_casts.R b/tests/testthat/test-16-process_casts.R index 45fccc247..a2c25c6c2 100644 --- a/tests/testthat/test-16-process_casts.R +++ b/tests/testthat/test-16-process_casts.R @@ -8,6 +8,12 @@ test_that("read_cast_tab", { expect_error(read_cast_tab(main = main, cast_id = 1e10)) }) +test_that("read_cast_tabs", { + skip_on_cran() # downloads take too long for cran checks + expect_is(read_cast_tabs(main = main, cast_ids = NULL), "data.frame") + expect_is(read_cast_tabs(main = main, cast_ids = 1:2), "data.frame") +}) + test_that("read_cast_metadata", { skip_on_cran() # downloads take too long for cran checks expect_is(read_cast_metadata(main = main, cast_id = NULL), "list") From 7e7fe7a4bb48e77fbca3fefc7872b0b9144bf534 Mon Sep 17 00:00:00 2001 From: Juniper Simonis Date: Tue, 10 Sep 2019 17:19:39 -0700 Subject: [PATCH 08/18] formalize plot casts err lead --- NAMESPACE | 1 + R/figures.R | 137 +++++++++++++++++++++++++++++++ R/process_casts.R | 10 ++- man/plot_casts_err_lead.Rd | 64 +++++++++++++++ man/read_cast_output.Rd | 5 +- notes.R | 3 + tests/testthat/test-13-figures.R | 9 ++ 7 files changed, 225 insertions(+), 4 deletions(-) create mode 100644 man/plot_casts_err_lead.Rd diff --git a/NAMESPACE b/NAMESPACE index 172746edc..8ef0f91a2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -76,6 +76,7 @@ export(path_no_ext) export(pevGARCH) export(plot_cast_point) export(plot_cast_ts) +export(plot_casts_err_lead) export(portalcast) export(portalcast_goodbye) export(portalcast_welcome) diff --git a/R/figures.R b/R/figures.R index 970191034..51e931e71 100644 --- a/R/figures.R +++ b/R/figures.R @@ -1,3 +1,140 @@ +#' @title Plot predictions for a given point in time across multiple species +#' +#' @description Plot the raw error (estimate - observation) as a function +#' of lead time for a given model focused on a specific species (or total) +#' within a data set, but across multiple model runs from different +#' forecast origins (\code{end_moons}). \cr +#' A pre-loaded table of casts can be input, but if not (default), the +#' table will be efficiently (as defined by the inputs) loaded and trimmed. +#' \cr +#' The casts can be trimmed specifically using the \code{cast_ids} input, +#' otherwise, all relevant casts will be plotted. +#' +#' @param main \code{character} value of the name of the main component of +#' the directory tree. +#' +#' @param cast_ids \code{integer} (or integer \code{numeric}) values +#' representing the casts of interest for restricting plotting, as indexed +#' within the directory in the \code{casts} sub folder. +#' See the casts metadata file (\code{casts_metadata.csv}) for summary +#' information. +#' +#' @param end_moons \code{integer} (or integer \code{numeric}) +#' newmoon numbers of the forecast origin. Default value is +#' \code{NULL}, which equates to no selection with respect to +#' \code{end_moon}. +#' +#' @param model \code{character} value of the name of the model to +#' include. Default value is \code{NULL}, which equates to no selection with +#' respect to \code{model}. +#' +#' @param data_set \code{character} value of the rodent data set to include +#' Default value is \code{NULL}, which equates to no selection with +#' respect to \code{data_set}. +#' +#' @param arg_checks \code{logical} value of if the arguments should be +#' checked using standard protocols via \code{\link{check_args}}. The +#' default (\code{arg_checks = TRUE}) ensures that all inputs are +#' formatted correctly and provides directed error messages if not. +#' +#' @param species \code{character} vector of the species codes (or +#' \code{"total"} for the total across species) to be plotted or +#' \code{NULL} (default) to plot all species in \code{data_set}. +#' +#' @return \code{NULL}. Plot is generated. +#' +#' @examples +#' \donttest{ +#' setup_dir() +#' portalcast(models = "AutoArima", end_moons = 515:520) +#' plot_casts_err_lead() +#' } +#' +#' @export +#' +plot_casts_err_lead <- function(main = ".", cast_ids = NULL, + cast_tab = NULL, end_moons = NULL, + model = NULL, data_set = NULL, + species = NULL, arg_checks = TRUE){ + check_args(arg_checks = arg_checks) + + if(is.null(cast_tab)){ + cast_choices <- select_casts(main = main, cast_ids = cast_ids, + model = model, end_moons = end_moons, + data_set = data_set, arg_checks = arg_checks) + if(NROW(cast_choices) == 0){ + stop("no casts available for requested plot") + }else{ + cast_tab <- read_cast_tabs(main = main, cast_ids = cast_choices$cast_id, + arg_checks = arg_checks) + cast_tab <- add_obs_to_cast_tab(main = main, cast_tab = cast_tab, + arg_checks = arg_checks) + cast_tab <- add_err_to_cast_tab(main = main, cast_tab = cast_tab, + arg_checks = arg_checks) + cast_tab <- add_lead_to_cast_tab(main = main, cast_tab = cast_tab, + arg_checks = arg_checks) + } + } + cast_ids <- ifnull(cast_ids, unique(cast_tab$cast_id)) + model <- ifnull(model, unique(cast_tab$model)[1]) + data_set <- ifnull(data_set, unique(cast_tab$data_set)[1]) + species <- ifnull(species, unique(cast_tab$species)[1]) + end_moons <- ifnull(end_moons, unique(cast_tab$end_moon)) + cast_id_in <- cast_tab$cast_id %in% cast_ids + model_in <- cast_tab$model == model + data_set_in <- cast_tab$data_set == data_set + species_in <- cast_tab$species == species + end_moon_in <- cast_tab$end_moon %in% end_moons + all_in <- cast_id_in & model_in & data_set_in & species_in & end_moon_in + if(sum(all_in) == 0){ + stop("no casts available for requested plot") + } + pcast_tab <- cast_tab[all_in, ] + yy <- round(pcast_tab$error, 3) + oldpar <- par(no.readonly = TRUE) + on.exit(par(oldpar)) + par(bty = "L", mar = c(4, 4.5, 2.5, 1.5)) + yrange <- range(yy, na.rm = TRUE) + xrange <- c(max(pcast_tab$lead) + 0.25, 0.75) + plot(1, 1, type = "n", xlab = "", ylab = "", xaxt = "n", yaxt = "n", + ylim = yrange, xlim = xrange) + abline(h = 0, lty = 3, lwd = 2, col = grey(0.6)) + ucast_ids <- unique(pcast_tab$cast_id) + ncast_ids <- length(ucast_ids) + cols <- viridis(ncast_ids, 1, 0, 0.75) + for(i in 1:ncast_ids){ + matches <- which(pcast_tab$cast_id == ucast_ids[i]) + x <- pcast_tab$lead[matches] + y <- pcast_tab$error[matches] + x <- x[!is.na(y)] + y <- y[!is.na(y)] + points(x, y, type = "o", pch = 16, col = cols[i]) + } + + axis(1, cex.axis = 1.25) + axis(1, at = seq(1, xrange[1] - 0.25, 1), tck = -0.01, labels = FALSE) + axis(2, cex.axis = 1.25, las = 1) + mtext(side = 1, line = 2.5, "Lead time (newmoons)", cex = 1.5) + mtext(side = 2, line = 3, "Error (individuals)", cex = 1.75) + lab <- list(text = "", font = 1) + lp <- file_path(main, "raw", "PortalData/Rodents/Portal_rodent_species.csv") + sptab <- read.csv(lp, stringsAsFactors = FALSE) %>% + na_conformer("speciescode") + if (species == "total"){ + spp <- "total abundance" + title <- paste0(model, ", ", data_set, ", ", spp) + } else{ + sppmatch <- which(sptab[ , "speciescode"] == species) + spp <- sptab[sppmatch , "scientificname"] + title <- eval( + substitute( + expression(paste(model, ", ", data_set, ", ", italic(spp))), + env = list(spp = spp, data_set = data_set, model = model))) + } + mtext(title, side = 3, cex = 1.25, line = 0.5, at = xrange[1], adj = 0) +} + + #' @title Plot predictions for a given point in time across multiple species #' diff --git a/R/process_casts.R b/R/process_casts.R index 672df519b..16a58f3ad 100644 --- a/R/process_casts.R +++ b/R/process_casts.R @@ -1,3 +1,6 @@ + + + #' @rdname add_to_cast_tab #' #' @export @@ -145,8 +148,9 @@ add_obs_to_cast_tab <- function(main = ".", cast_tab = NULL, #' formatted correctly and provides directed error messages if not. #' #' @return -#' \code{read_cast_tab}: \code{data.frame} of the \code{cast_tab}. -#' \code{read_cast_tabs}: \code{data.frame} of the \code{cast_tab}s. +#' \code{read_cast_tab}: \code{data.frame} of the \code{cast_tab}. \cr \cr +#' \code{read_cast_tabs}: \code{data.frame} of the \code{cast_tab}s with +#' a \code{cast_id} column added to distinguish among casts. \cr \cr #' \code{read_cast_metadata}: \code{list} of the \code{cast_metadata}. #' #' @examples @@ -192,11 +196,13 @@ read_cast_tabs <- function(main = ".", cast_ids = NULL, arg_checks = TRUE){ } cast_tab <- read_cast_tab(main = main, cast_id = cast_ids[1], arg_checks = arg_checks) + cast_tab$cast_id <- cast_ids[1] ncasts <- length(cast_ids) if(ncasts > 1){ for(i in 2:ncasts){ cast_tab_i <- read_cast_tab(main = main, cast_id = cast_ids[i], arg_checks = arg_checks) + cast_tab_i$cast_id <- cast_ids[i] cast_tab <- rbind(cast_tab, cast_tab_i) } } diff --git a/man/plot_casts_err_lead.Rd b/man/plot_casts_err_lead.Rd new file mode 100644 index 000000000..63ff15b3e --- /dev/null +++ b/man/plot_casts_err_lead.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/figures.R +\name{plot_casts_err_lead} +\alias{plot_casts_err_lead} +\title{Plot predictions for a given point in time across multiple species} +\usage{ +plot_casts_err_lead(main = ".", cast_ids = NULL, cast_tab = NULL, + end_moons = NULL, model = NULL, data_set = NULL, species = NULL, + arg_checks = TRUE) +} +\arguments{ +\item{main}{\code{character} value of the name of the main component of +the directory tree.} + +\item{cast_ids}{\code{integer} (or integer \code{numeric}) values +representing the casts of interest for restricting plotting, as indexed +within the directory in the \code{casts} sub folder. +See the casts metadata file (\code{casts_metadata.csv}) for summary +information.} + +\item{end_moons}{\code{integer} (or integer \code{numeric}) +newmoon numbers of the forecast origin. Default value is +\code{NULL}, which equates to no selection with respect to +\code{end_moon}.} + +\item{model}{\code{character} value of the name of the model to +include. Default value is \code{NULL}, which equates to no selection with +respect to \code{model}.} + +\item{data_set}{\code{character} value of the rodent data set to include +Default value is \code{NULL}, which equates to no selection with +respect to \code{data_set}.} + +\item{species}{\code{character} vector of the species codes (or +\code{"total"} for the total across species) to be plotted or +\code{NULL} (default) to plot all species in \code{data_set}.} + +\item{arg_checks}{\code{logical} value of if the arguments should be +checked using standard protocols via \code{\link{check_args}}. The +default (\code{arg_checks = TRUE}) ensures that all inputs are +formatted correctly and provides directed error messages if not.} +} +\value{ +\code{NULL}. Plot is generated. +} +\description{ +Plot the raw error (estimate - observation) as a function + of lead time for a given model focused on a specific species (or total) + within a data set, but across multiple model runs from different + forecast origins (\code{end_moons}). \cr + A pre-loaded table of casts can be input, but if not (default), the + table will be efficiently (as defined by the inputs) loaded and trimmed. + \cr + The casts can be trimmed specifically using the \code{cast_ids} input, + otherwise, all relevant casts will be plotted. +} +\examples{ + \donttest{ + setup_dir() + portalcast(models = "AutoArima", end_moons = 515:520) + plot_casts_err_lead() +} + +} diff --git a/man/read_cast_output.Rd b/man/read_cast_output.Rd index c88388a1c..ba2923808 100644 --- a/man/read_cast_output.Rd +++ b/man/read_cast_output.Rd @@ -31,8 +31,9 @@ default), the most recently generated cast's output is read in. \cr \code{cast_id} can only be NULL or one value.} } \value{ -\code{read_cast_tab}: \code{data.frame} of the \code{cast_tab}. - \code{read_cast_tabs}: \code{data.frame} of the \code{cast_tab}s. +\code{read_cast_tab}: \code{data.frame} of the \code{cast_tab}. \cr \cr + \code{read_cast_tabs}: \code{data.frame} of the \code{cast_tab}s with + a \code{cast_id} column added to distinguish among casts. \cr \cr \code{read_cast_metadata}: \code{list} of the \code{cast_metadata}. } \description{ diff --git a/notes.R b/notes.R index 15e860410..0e1baa740 100644 --- a/notes.R +++ b/notes.R @@ -20,5 +20,8 @@ select_casts(main) # we likely want to pull in the model's metadata so we can use the PIs +cast_tab <- read_cast_tab(main = main, cast_id = 1) + + add_obs_to_cast_tab(main = main, cast_id = 1) diff --git a/tests/testthat/test-13-figures.R b/tests/testthat/test-13-figures.R index 37c503458..fbc0755c2 100644 --- a/tests/testthat/test-13-figures.R +++ b/tests/testthat/test-13-figures.R @@ -19,4 +19,13 @@ test_that("plot_cast_ts", { expect_silent(plot_cast_ts(main = main, species = "DM")) expect_error(plot_cast_ts(main = main, cast_id = 1e10)) +}) + + +test_that("plot_casts_err_lead", { + skip_on_cran() # downloads and casting take too long to run on cran + expect_silent(plot_casts_err_lead(main = main)) + expect_silent(plot_casts_err_lead(main = main, model = "AutoArima", + species = "total", data_set = "all")) + expect_error(plot_casts_err_lead(main = main, cast_id = 1e10)) }) \ No newline at end of file From 155b68f01feda106029caac053acd811dad5028c Mon Sep 17 00:00:00 2001 From: Juniper Simonis Date: Wed, 11 Sep 2019 00:46:21 -0700 Subject: [PATCH 09/18] flexed err plot to the multi panel version --- R/figures.R | 224 +++++++++++++++++++++++++++---------- R/process_casts.R | 96 ++++++++-------- man/add_to_cast_tab.Rd | 4 +- man/plot_casts_err_lead.Rd | 30 ++--- 4 files changed, 231 insertions(+), 123 deletions(-) diff --git a/R/figures.R b/R/figures.R index 51e931e71..0dd355c19 100644 --- a/R/figures.R +++ b/R/figures.R @@ -1,12 +1,13 @@ -#' @title Plot predictions for a given point in time across multiple species +#' @title Plot the forecast error as a function of lead time #' #' @description Plot the raw error (estimate - observation) as a function -#' of lead time for a given model focused on a specific species (or total) -#' within a data set, but across multiple model runs from different -#' forecast origins (\code{end_moons}). \cr -#' A pre-loaded table of casts can be input, but if not (default), the -#' table will be efficiently (as defined by the inputs) loaded and trimmed. -#' \cr +#' of lead time across model runs from different forecast origins +#' (\code{end_moons}) for multiple models and multiple species (or total) +#' within a data set. +#' +#' @details A pre-loaded table of casts can be input, but if not (default), +#' the table will be efficiently (as defined by the inputs) loaded and +#' trimmed. \cr #' The casts can be trimmed specifically using the \code{cast_ids} input, #' otherwise, all relevant casts will be plotted. #' @@ -24,9 +25,10 @@ #' \code{NULL}, which equates to no selection with respect to #' \code{end_moon}. #' -#' @param model \code{character} value of the name of the model to +#' @param models \code{character} value(s) of the name of the model to #' include. Default value is \code{NULL}, which equates to no selection with -#' respect to \code{model}. +#' respect to \code{model}. \code{NULL} translates to all \code{models} +#' in the table. #' #' @param data_set \code{character} value of the rodent data set to include #' Default value is \code{NULL}, which equates to no selection with @@ -37,9 +39,10 @@ #' default (\code{arg_checks = TRUE}) ensures that all inputs are #' formatted correctly and provides directed error messages if not. #' -#' @param species \code{character} vector of the species codes (or -#' \code{"total"} for the total across species) to be plotted or -#' \code{NULL} (default) to plot all species in \code{data_set}. +#' @param species \code{character} vector of the species code(s) +#' or \code{"total"} for the total across species) to be plotted +#' \code{NULL} translates to the species defined by +#' \code{evalplot_species}. #' #' @return \code{NULL}. Plot is generated. #' @@ -54,14 +57,14 @@ #' plot_casts_err_lead <- function(main = ".", cast_ids = NULL, cast_tab = NULL, end_moons = NULL, - model = NULL, data_set = NULL, + models = NULL, data_set = NULL, species = NULL, arg_checks = TRUE){ check_args(arg_checks = arg_checks) - if(is.null(cast_tab)){ cast_choices <- select_casts(main = main, cast_ids = cast_ids, - model = model, end_moons = end_moons, - data_set = data_set, arg_checks = arg_checks) + models = models, end_moons = end_moons, + data_sets = data_set, + arg_checks = arg_checks) if(NROW(cast_choices) == 0){ stop("no casts available for requested plot") }else{ @@ -75,67 +78,170 @@ plot_casts_err_lead <- function(main = ".", cast_ids = NULL, arg_checks = arg_checks) } } + cast_tab$data_set <- gsub("_interp", "", cast_tab$data_set) cast_ids <- ifnull(cast_ids, unique(cast_tab$cast_id)) - model <- ifnull(model, unique(cast_tab$model)[1]) + models <- ifnull(models, unique(cast_tab$model)) data_set <- ifnull(data_set, unique(cast_tab$data_set)[1]) - species <- ifnull(species, unique(cast_tab$species)[1]) + species <- ifnull(species, evalplot_species(arg_checks = arg_checks)) end_moons <- ifnull(end_moons, unique(cast_tab$end_moon)) cast_id_in <- cast_tab$cast_id %in% cast_ids - model_in <- cast_tab$model == model + model_in <- cast_tab$model %in% models data_set_in <- cast_tab$data_set == data_set - species_in <- cast_tab$species == species + species_in <- cast_tab$species %in% species end_moon_in <- cast_tab$end_moon %in% end_moons all_in <- cast_id_in & model_in & data_set_in & species_in & end_moon_in if(sum(all_in) == 0){ stop("no casts available for requested plot") } - pcast_tab <- cast_tab[all_in, ] - yy <- round(pcast_tab$error, 3) - oldpar <- par(no.readonly = TRUE) - on.exit(par(oldpar)) - par(bty = "L", mar = c(4, 4.5, 2.5, 1.5)) - yrange <- range(yy, na.rm = TRUE) - xrange <- c(max(pcast_tab$lead) + 0.25, 0.75) - plot(1, 1, type = "n", xlab = "", ylab = "", xaxt = "n", yaxt = "n", - ylim = yrange, xlim = xrange) - abline(h = 0, lty = 3, lwd = 2, col = grey(0.6)) - ucast_ids <- unique(pcast_tab$cast_id) - ncast_ids <- length(ucast_ids) - cols <- viridis(ncast_ids, 1, 0, 0.75) - for(i in 1:ncast_ids){ - matches <- which(pcast_tab$cast_id == ucast_ids[i]) - x <- pcast_tab$lead[matches] - y <- pcast_tab$error[matches] - x <- x[!is.na(y)] - y <- y[!is.na(y)] - points(x, y, type = "o", pch = 16, col = cols[i]) - } - axis(1, cex.axis = 1.25) - axis(1, at = seq(1, xrange[1] - 0.25, 1), tck = -0.01, labels = FALSE) - axis(2, cex.axis = 1.25, las = 1) - mtext(side = 1, line = 2.5, "Lead time (newmoons)", cex = 1.5) - mtext(side = 2, line = 3, "Error (individuals)", cex = 1.75) - lab <- list(text = "", font = 1) lp <- file_path(main, "raw", "PortalData/Rodents/Portal_rodent_species.csv") sptab <- read.csv(lp, stringsAsFactors = FALSE) %>% na_conformer("speciescode") - if (species == "total"){ - spp <- "total abundance" - title <- paste0(model, ", ", data_set, ", ", spp) - } else{ - sppmatch <- which(sptab[ , "speciescode"] == species) - spp <- sptab[sppmatch , "scientificname"] - title <- eval( - substitute( - expression(paste(model, ", ", data_set, ", ", italic(spp))), - env = list(spp = spp, data_set = data_set, model = model))) - } - mtext(title, side = 3, cex = 1.25, line = 0.5, at = xrange[1], adj = 0) + + nmodels <- length(models) + nspecies <- length(species) + + if(nmodels == 1 & nspecies == 1){ + + model_in <- cast_tab$model %in% models + species_in <- cast_tab$species %in% species + all_in <- model_in & species_in + if(sum(all_in) == 0){ + stop("no casts available for requested plot") + } + pcast_tab <- cast_tab[all_in, ] + yy <- round(pcast_tab$error, 3) + yrange <- range(c(0, yy), na.rm = TRUE) + xrange <- c(max(pcast_tab$lead) + 0.25, 0) + + oldpar <- par(no.readonly = TRUE) + on.exit(par(oldpar)) + par(bty = "L", mar = c(4, 4.5, 3, 1)) + + plot(1, 1, type = "n", xlab = "", ylab = "", xaxt = "n", yaxt = "n", + ylim = yrange, xlim = xrange) + abline(h = 0, lty = 3, lwd = 2, col = grey(0.6)) + ucast_ids <- unique(pcast_tab$cast_id) + ncast_ids <- length(ucast_ids) + cols <- viridis(ncast_ids, 0.8, 0, 0.75) + for(k in 1:ncast_ids){ + matches <- which(pcast_tab$cast_id == ucast_ids[k]) + x <- pcast_tab$lead[matches] + y <- pcast_tab$error[matches] + x <- x[!is.na(y)] + y <- y[!is.na(y)] + points(x, y, type = "o", pch = 16, col = cols[k], cex = 1) + } + axis(1, cex.axis = 1.25) + axis(2, cex.axis = 1.25, las = 1) + mtext(side = 1, "Lead time (new moons)", cex = 1.5, line = 2.75) + mtext(side = 2, "Forecast error", cex = 1.75, line = 3) + if (species == "total"){ + spp <- "total abundance" + title <- paste0(models, ", ", data_set, ", ", spp) + } else{ + sppmatch <- which(sptab[ , "speciescode"] == species) + spp <- sptab[sppmatch , "scientificname"] + title <- eval(substitute( + expression( + paste(models_i, ", ", data_set, ", ", italic(spp))), + env = list(spp = spp, data_set = data_set, + models_i = models))) + } + mtext(title, side = 3, cex = 1.25, line = 0.5, at = xrange[1], adj = 0) + + }else{ + + oldpar <- par(no.readonly = TRUE) + on.exit(par(oldpar)) + par(fig = c(0, 1, 0, 1), mar = c(0.5, 0, 0, 0.5)) + plot(1, 1, type = "n", xaxt = "n", yaxt = "n", ylab = "", xlab = "", + bty = "n") + mtext(side = 1, "Lead time (new moons)", line = -0.65) + mtext(side = 2, "Forecast error", line = -0.85) + + x1 <- seq(0.05, 0.95 - 0.9/nmodels, 0.9/nmodels) + x2 <- seq(0.05 + 0.9/nmodels, 0.95, 0.9/nmodels) + y1 <- seq(0.05, 0.95 - 0.9/nspecies, 0.9/nspecies) + y2 <- seq(0.05 + 0.9/nspecies, 0.95, 0.9/nspecies) + + for(j in 1:nspecies){ + + species_in <- cast_tab$species %in% species[j] + if(sum(species_in) == 0){ + stop("no casts available for requested plot") + } + yy <- round(cast_tab$error[species_in], 3) + yrange <- range(c(0, yy), na.rm = TRUE) + + for(i in 1:nmodels){ + + model_in <- cast_tab$model %in% models[i] + species_in <- cast_tab$species %in% species[j] + all_in <- model_in & species_in + if(sum(all_in) == 0){ + stop("no casts available for requested plot") + } + pcast_tab <- cast_tab[all_in, ] + + par(bty = "L", mar = c(0.5, 0.5, 0.25, 0.25), + fig = c(x1[i], x2[i], y1[j], y2[j]), new = TRUE) + + xrange <- c(max(pcast_tab$lead) + 0.25, 0) + plot(1, 1, type = "n", xlab = "", ylab = "", xaxt = "n", yaxt = "n", + ylim = yrange, xlim = xrange) + abline(h = 0, lty = 3, lwd = 2, col = grey(0.6)) + ucast_ids <- unique(pcast_tab$cast_id) + ncast_ids <- length(ucast_ids) + cols <- viridis(ncast_ids, 0.8, 0, 0.75) + for(k in 1:ncast_ids){ + matches <- which(pcast_tab$cast_id == ucast_ids[k]) + x <- pcast_tab$lead[matches] + y <- pcast_tab$error[matches] + x <- x[!is.na(y)] + y <- y[!is.na(y)] + points(x, y, type = "o", pch = 16, col = cols[k], cex = 0.5) + } + xaxl <- ifelse(j == 1, TRUE, FALSE) + xat <- seq(0, max(pcast_tab$lead), 4) + axis(1, tck = -0.07, labels = FALSE, at = xat) + axis(1, tck = -0.07, labels = xaxl, at = xat, cex.axis = 0.6, + line = -0.9, lwd = 0) + xat <- seq(0, max(pcast_tab$lead), 1) + axis(1, tck = -0.04, labels = FALSE, at = xat) + + yaxl <- ifelse(i == 1, TRUE, FALSE) + axis(2, tck = -0.06, labels = FALSE) + axis(2, tck = -0.06, labels = yaxl, cex.axis = 0.6, las = 1, + line = -0.55, lwd = 0) + if(j == nspecies){ + mtext(side = 3, models[i], cex = 0.75, font = 2, line = 0.5) + } + if(i == nmodels){ + par(mar = c(0, 0, 0, 0), fig = c(x2[i], 1, y1[j], y2[j]), + new = TRUE) + plot(1, 1, type = "n", xaxt = "n", yaxt = "n", ylab = "", xlab = "", + bty = "n") + if (species[j] == "total"){ + spt <- "Total Abundance" + spf <- 2 + } else{ + spptextmatch <- which(sptab[ , "speciescode"] == species[j]) + spt <- sptab[spptextmatch, "scientificname"] + spf <- 4 + } + text(0.75, 1, spt, font = spf, cex = 0.55, xpd = TRUE, srt = 270) + } + + } + } + } } + + #' @title Plot predictions for a given point in time across multiple species #' #' @description Plot the point value with confidence interval for a time point diff --git a/R/process_casts.R b/R/process_casts.R index 16a58f3ad..ad4f5cfa5 100644 --- a/R/process_casts.R +++ b/R/process_casts.R @@ -1,5 +1,46 @@ - - +#' @title Add the associated values to a cast tab +#' +#' @description Add values to a cast's cast tab. If necessary components are +#' missing (such as no observations added yet and the user requests errors), +#' the missing components are added. \cr \cr +#' \code{add_lead_to_cast_tab} adds a column of lead times. \cr \cr +#' \code{add_obs_to_cast_tab} appends a column of observations. \cr \cr +#' \code{add_err_to_cast_tab} adds a column of raw error values. \cr \cr +#' \code{add_covered_to_cast_tab} appends a \code{logical} column indicating +#' if the observation was within the prediction interval. +#' +#' @details If an interpolated data set is used to fit a model, +#' \code{add_obs_to_cast_tab} adds the true (non-interpolated) observations +#' so that model predictions are all compared to the same data. +#' +#' @param main \code{character} value of the name of the main component of +#' the directory tree. +#' +#' @param cast_tab A \code{data.frame} of a cast's output. See +#' \code{\link{read_cast_tab}}. +#' +#' @param arg_checks \code{logical} value of if the arguments should be +#' checked using standard protocols via \code{\link{check_args}}. The +#' default (\code{arg_checks = TRUE}) ensures that all inputs are +#' formatted correctly and provides directed error messages if not. +#' +#' @return \code{data.frame} of \code{cast_tab} with an additional column or +#' columns if needed. +#' +#' @examples +#' \donttest{ +#' setup_dir() +#' portalcast(models = c("AutoArima", "NaiveArima"), end_moons = 515:520) +#' cast_tab <- read_cast_tab(cast_id = 1) +#' add_lead_to_cast_tab(cast_tab = cast_tab) +#' add_obs_to_cast_tab(cast_tab = cast_tab) +#' add_err_to_cast_tab(cast_tab = cast_tab) +#' add_covered_to_cast_tab(cast_tab = cast_tab) +#' } +#' +#' @name add_to_cast_tab +#' +NULL #' @rdname add_to_cast_tab #' @@ -47,50 +88,6 @@ add_covered_to_cast_tab <- function(main = ".", cast_tab = NULL, cast_tab } -#' @title Add the associated values to a cast tab -#' -#' @description Add values to a cast's cast tab. If necessary components are -#' missing (such as no observations added yet and the user requests errors), -#' the missing components are added. \cr \cr -#' \code{add_lead_to_cast_tab} adds a column of lead times. \cr \cr -#' \code{add_obs_to_cast_tab} appends a column of observations. \cr \cr -#' \code{add_err_to_cast_tab} adds a column of raw error values. \cr \cr -#' \code{add_covered_to_cast_tab} appends a \code{logical} column indicating -#' if the observation was within the prediction interval. -#' -#' @details If an interpolated data set is used to fit a model, -#' \code{add_obs_to_cast_tab} adds the true (non-interpolated) observations -#' so that model predictions are all compared to the same data. -#' -#' @param main \code{character} value of the name of the main component of -#' the directory tree. -#' -#' @param cast_tab A \code{data.frame} of a cast's output. See -#' \code{\link{read_cast_tab}}. -#' -#' @param arg_checks \code{logical} value of if the arguments should be -#' checked using standard protocols via \code{\link{check_args}}. The -#' default (\code{arg_checks = TRUE}) ensures that all inputs are -#' formatted correctly and provides directed error messages if not. -#' -#' @return \code{data.frame} of \code{cast_tab} with an additional column or -#' columns if needed. -#' -#' @examples -#' \donttest{ -#' setup_dir() -#' portalcast(models = c("AutoArima", "NaiveArima"), end_moons = 515:520) -#' cast_tab <- read_cast_tab(cast_id = 1) -#' add_lead_to_cast_tab(cast_tab = cast_tab) -#' add_obs_to_cast_tab(cast_tab = cast_tab) -#' add_err_to_cast_tab(cast_tab = cast_tab) -#' add_covered_to_cast_tab(cast_tab = cast_tab) -#' } -#' -#' @name add_to_cast_tab -#' -NULL - #' @rdname add_to_cast_tab #' #' @export @@ -99,7 +96,6 @@ add_obs_to_cast_tab <- function(main = ".", cast_tab = NULL, arg_checks = TRUE){ check_args(arg_checks = arg_checks) return_if_null(cast_tab) - cast_tab <- na_conformer(cast_tab) cast_tab$obs <- NA cast_data_set <- gsub("_interp", "", cast_tab$data_set) ucast_data_set <- unique(cast_data_set) @@ -109,10 +105,11 @@ add_obs_to_cast_tab <- function(main = ".", cast_tab = NULL, arg_checks = arg_checks) matches <- which(cast_data_set == ucast_data_set[j]) nmatches <- length(matches) + obs_cols <- gsub("NA.", "NA", colnames(obs)) for(i in 1:nmatches){ spot <- matches[i] obs_moon <- which(obs$moon == cast_tab$moon[spot]) - obs_species <- which(colnames(obs) == cast_tab$species[spot]) + obs_species <- which(obs_cols == cast_tab$species[spot]) if(length(obs_moon) == 1 & length(obs_species) == 1){ cast_tab$obs[spot] <- obs[obs_moon, obs_species] } @@ -181,7 +178,8 @@ read_cast_tab <- function(main = ".", cast_id = NULL, arg_checks = TRUE){ if(!file.exists(cpath)){ stop("cast_id does not have a cast_table") } - read.csv(cpath, stringsAsFactors = FALSE) + out <- read.csv(cpath, stringsAsFactors = FALSE) + na_conformer(out, arg_checks = arg_checks) } #' @rdname read_cast_output diff --git a/man/add_to_cast_tab.Rd b/man/add_to_cast_tab.Rd index 25f20e647..4449a1d63 100644 --- a/man/add_to_cast_tab.Rd +++ b/man/add_to_cast_tab.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/process_casts.R -\name{add_lead_to_cast_tab} +\name{add_to_cast_tab} +\alias{add_to_cast_tab} \alias{add_lead_to_cast_tab} \alias{add_err_to_cast_tab} \alias{add_covered_to_cast_tab} -\alias{add_to_cast_tab} \alias{add_obs_to_cast_tab} \title{Add the associated values to a cast tab} \usage{ diff --git a/man/plot_casts_err_lead.Rd b/man/plot_casts_err_lead.Rd index 63ff15b3e..bf46ced07 100644 --- a/man/plot_casts_err_lead.Rd +++ b/man/plot_casts_err_lead.Rd @@ -2,10 +2,10 @@ % Please edit documentation in R/figures.R \name{plot_casts_err_lead} \alias{plot_casts_err_lead} -\title{Plot predictions for a given point in time across multiple species} +\title{Plot the forecast error as a function of lead time} \usage{ plot_casts_err_lead(main = ".", cast_ids = NULL, cast_tab = NULL, - end_moons = NULL, model = NULL, data_set = NULL, species = NULL, + end_moons = NULL, models = NULL, data_set = NULL, species = NULL, arg_checks = TRUE) } \arguments{ @@ -23,17 +23,19 @@ newmoon numbers of the forecast origin. Default value is \code{NULL}, which equates to no selection with respect to \code{end_moon}.} -\item{model}{\code{character} value of the name of the model to +\item{models}{\code{character} value(s) of the name of the model to include. Default value is \code{NULL}, which equates to no selection with -respect to \code{model}.} +respect to \code{model}. \code{NULL} translates to all \code{models} +in the table.} \item{data_set}{\code{character} value of the rodent data set to include Default value is \code{NULL}, which equates to no selection with respect to \code{data_set}.} -\item{species}{\code{character} vector of the species codes (or -\code{"total"} for the total across species) to be plotted or -\code{NULL} (default) to plot all species in \code{data_set}.} +\item{species}{\code{character} vector of the species code(s) +or \code{"total"} for the total across species) to be plotted +\code{NULL} translates to the species defined by +\code{evalplot_species}.} \item{arg_checks}{\code{logical} value of if the arguments should be checked using standard protocols via \code{\link{check_args}}. The @@ -45,12 +47,14 @@ formatted correctly and provides directed error messages if not.} } \description{ Plot the raw error (estimate - observation) as a function - of lead time for a given model focused on a specific species (or total) - within a data set, but across multiple model runs from different - forecast origins (\code{end_moons}). \cr - A pre-loaded table of casts can be input, but if not (default), the - table will be efficiently (as defined by the inputs) loaded and trimmed. - \cr + of lead time across model runs from different forecast origins + (\code{end_moons}) for multiple models and multiple species (or total) + within a data set. +} +\details{ +A pre-loaded table of casts can be input, but if not (default), + the table will be efficiently (as defined by the inputs) loaded and + trimmed. \cr The casts can be trimmed specifically using the \code{cast_ids} input, otherwise, all relevant casts will be plotted. } From 4df47d70634c85fdd8515d465dedaa98ff1fffd5 Mon Sep 17 00:00:00 2001 From: Juniper Simonis Date: Wed, 11 Sep 2019 01:29:51 -0700 Subject: [PATCH 10/18] coverage and docs --- NAMESPACE | 1 + R/figures.R | 36 +++++++++++++++----------------- R/portalcasting.R | 2 +- man/plot_casts_err_lead.Rd | 3 +++ notes.R | 4 +++- tests/testthat/test-13-figures.R | 2 ++ 6 files changed, 27 insertions(+), 21 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 8ef0f91a2..cb8f2d2e5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -158,6 +158,7 @@ importFrom(forecast,auto.arima) importFrom(forecast,ets) importFrom(forecast,forecast) importFrom(forecast,na.interp) +importFrom(grDevices,grey) importFrom(grDevices,rgb) importFrom(graphics,abline) importFrom(graphics,axis) diff --git a/R/figures.R b/R/figures.R index 0dd355c19..c54bc2cc7 100644 --- a/R/figures.R +++ b/R/figures.R @@ -25,6 +25,9 @@ #' \code{NULL}, which equates to no selection with respect to #' \code{end_moon}. #' +#' @param cast_tab Optional \code{data.frame} of cast table outputs. If not +#' input, will be loaded. +#' #' @param models \code{character} value(s) of the name of the model to #' include. Default value is \code{NULL}, which equates to no selection with #' respect to \code{model}. \code{NULL} translates to all \code{models} @@ -103,12 +106,6 @@ plot_casts_err_lead <- function(main = ".", cast_ids = NULL, if(nmodels == 1 & nspecies == 1){ - model_in <- cast_tab$model %in% models - species_in <- cast_tab$species %in% species - all_in <- model_in & species_in - if(sum(all_in) == 0){ - stop("no casts available for requested plot") - } pcast_tab <- cast_tab[all_in, ] yy <- round(pcast_tab$error, 3) yrange <- range(c(0, yy), na.rm = TRUE) @@ -157,13 +154,13 @@ plot_casts_err_lead <- function(main = ".", cast_ids = NULL, par(fig = c(0, 1, 0, 1), mar = c(0.5, 0, 0, 0.5)) plot(1, 1, type = "n", xaxt = "n", yaxt = "n", ylab = "", xlab = "", bty = "n") - mtext(side = 1, "Lead time (new moons)", line = -0.65) - mtext(side = 2, "Forecast error", line = -0.85) + mtext(side = 1, "Lead time (new moons)", cex = 1.25, line = -0.5) + mtext(side = 2, "Forecast error", cex = 1.25, line = -1) - x1 <- seq(0.05, 0.95 - 0.9/nmodels, 0.9/nmodels) - x2 <- seq(0.05 + 0.9/nmodels, 0.95, 0.9/nmodels) - y1 <- seq(0.05, 0.95 - 0.9/nspecies, 0.9/nspecies) - y2 <- seq(0.05 + 0.9/nspecies, 0.95, 0.9/nspecies) + x1 <- seq(0.04, 0.96 - 0.92/nmodels, 0.92/nmodels) + x2 <- seq(0.04 + 0.92/nmodels, 0.96, 0.92/nmodels) + y1 <- seq(0.04, 0.96 - 0.92/nspecies, 0.92/nspecies) + y2 <- seq(0.04 + 0.92/nspecies, 0.96, 0.92/nspecies) for(j in 1:nspecies){ @@ -178,7 +175,8 @@ plot_casts_err_lead <- function(main = ".", cast_ids = NULL, model_in <- cast_tab$model %in% models[i] species_in <- cast_tab$species %in% species[j] - all_in <- model_in & species_in + all_in <- cast_id_in & model_in & data_set_in & species_in & + end_moon_in if(sum(all_in) == 0){ stop("no casts available for requested plot") } @@ -203,19 +201,19 @@ plot_casts_err_lead <- function(main = ".", cast_ids = NULL, points(x, y, type = "o", pch = 16, col = cols[k], cex = 0.5) } xaxl <- ifelse(j == 1, TRUE, FALSE) - xat <- seq(0, max(pcast_tab$lead), 4) - axis(1, tck = -0.07, labels = FALSE, at = xat) - axis(1, tck = -0.07, labels = xaxl, at = xat, cex.axis = 0.6, + xat <- seq(0, max(pcast_tab$lead), 2) + axis(1, tck = -0.06, labels = FALSE, at = xat) + axis(1, tck = -0.06, labels = xaxl, at = xat, cex.axis = 0.6, line = -0.9, lwd = 0) xat <- seq(0, max(pcast_tab$lead), 1) axis(1, tck = -0.04, labels = FALSE, at = xat) yaxl <- ifelse(i == 1, TRUE, FALSE) - axis(2, tck = -0.06, labels = FALSE) - axis(2, tck = -0.06, labels = yaxl, cex.axis = 0.6, las = 1, + axis(2, tck = -0.05, labels = FALSE) + axis(2, tck = -0.05, labels = yaxl, cex.axis = 0.6, las = 1, line = -0.55, lwd = 0) if(j == nspecies){ - mtext(side = 3, models[i], cex = 0.75, font = 2, line = 0.5) + mtext(side = 3, models[i], cex = 0.85, font = 2, line = 0.5) } if(i == nmodels){ par(mar = c(0, 0, 0, 0), fig = c(x2[i], 1, y1[j], y2[j]), diff --git a/R/portalcasting.R b/R/portalcasting.R index 27e44f61b..03445a4d9 100644 --- a/R/portalcasting.R +++ b/R/portalcasting.R @@ -4,7 +4,7 @@ #' left_join mutate n one_of rename right_join select summarize ungroup #' @importFrom forecast Arima auto.arima ets forecast na.interp #' @importFrom graphics abline axis mtext par plot points polygon rect text -#' @importFrom grDevices rgb +#' @importFrom grDevices grey rgb #' @importFrom httr content GET stop_for_status #' @importFrom lubridate month year #' @importFrom portalr find_incomplete_censuses get_future_moons diff --git a/man/plot_casts_err_lead.Rd b/man/plot_casts_err_lead.Rd index bf46ced07..6efc09d0a 100644 --- a/man/plot_casts_err_lead.Rd +++ b/man/plot_casts_err_lead.Rd @@ -18,6 +18,9 @@ within the directory in the \code{casts} sub folder. See the casts metadata file (\code{casts_metadata.csv}) for summary information.} +\item{cast_tab}{Optional \code{data.frame} of cast table outputs. If not +input, will be loaded.} + \item{end_moons}{\code{integer} (or integer \code{numeric}) newmoon numbers of the forecast origin. Default value is \code{NULL}, which equates to no selection with respect to diff --git a/notes.R b/notes.R index 0e1baa740..756e8d8bd 100644 --- a/notes.R +++ b/notes.R @@ -2,8 +2,10 @@ evaluations devtools::document() main <- "~/hindcasting" -plot_cast_point(main) +tiff("1.tiff", width = 10, height = 10, units = "in", res = 400) +plot_casts_err_lead(main) +dev.off() select_casts(main) diff --git a/tests/testthat/test-13-figures.R b/tests/testthat/test-13-figures.R index fbc0755c2..0857f9fe6 100644 --- a/tests/testthat/test-13-figures.R +++ b/tests/testthat/test-13-figures.R @@ -27,5 +27,7 @@ test_that("plot_casts_err_lead", { expect_silent(plot_casts_err_lead(main = main)) expect_silent(plot_casts_err_lead(main = main, model = "AutoArima", species = "total", data_set = "all")) + expect_silent(plot_casts_err_lead(main = main, model = "AutoArima", + species = "BA", data_set = "all")) expect_error(plot_casts_err_lead(main = main, cast_id = 1e10)) }) \ No newline at end of file From 2dc37b20d121bd1e0fdfc0ef94edafea74f67b24 Mon Sep 17 00:00:00 2001 From: Juniper Simonis Date: Wed, 11 Sep 2019 02:04:57 -0700 Subject: [PATCH 11/18] coverage --- R/figures.R | 6 ------ tests/testthat/test-13-figures.R | 4 +++- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/R/figures.R b/R/figures.R index c54bc2cc7..cbc2b67de 100644 --- a/R/figures.R +++ b/R/figures.R @@ -165,9 +165,6 @@ plot_casts_err_lead <- function(main = ".", cast_ids = NULL, for(j in 1:nspecies){ species_in <- cast_tab$species %in% species[j] - if(sum(species_in) == 0){ - stop("no casts available for requested plot") - } yy <- round(cast_tab$error[species_in], 3) yrange <- range(c(0, yy), na.rm = TRUE) @@ -177,9 +174,6 @@ plot_casts_err_lead <- function(main = ".", cast_ids = NULL, species_in <- cast_tab$species %in% species[j] all_in <- cast_id_in & model_in & data_set_in & species_in & end_moon_in - if(sum(all_in) == 0){ - stop("no casts available for requested plot") - } pcast_tab <- cast_tab[all_in, ] par(bty = "L", mar = c(0.5, 0.5, 0.25, 0.25), diff --git a/tests/testthat/test-13-figures.R b/tests/testthat/test-13-figures.R index 0857f9fe6..09ed94b82 100644 --- a/tests/testthat/test-13-figures.R +++ b/tests/testthat/test-13-figures.R @@ -29,5 +29,7 @@ test_that("plot_casts_err_lead", { species = "total", data_set = "all")) expect_silent(plot_casts_err_lead(main = main, model = "AutoArima", species = "BA", data_set = "all")) - expect_error(plot_casts_err_lead(main = main, cast_id = 1e10)) + cast_tab <- read_cast_tabs(main = main) + expect_error(plot_casts_err_lead(main = main, cast_tab = cast_tab, + cast_id = 1e10)) }) \ No newline at end of file From 47560f5abc66f7990e90110a142ca242dcbe1925 Mon Sep 17 00:00:00 2001 From: Juniper Simonis Date: Wed, 11 Sep 2019 23:53:26 -0700 Subject: [PATCH 12/18] flex model controls to allow user defined overrides --- .Rbuildignore | 1 + DESCRIPTION | 2 +- NEWS.md | 14 ++++++ R/prepare_models.R | 67 +++++++++++++++++++++-------- R/prepare_rodents.R | 17 +++++++- man/model_control.Rd | 15 +++---- man/model_controls.Rd | 22 +++++++--- man/prefab_data_sets.Rd | 8 +++- vignettes/adding_model_and_data.Rmd | 4 +- 9 files changed, 111 insertions(+), 39 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 0639049f9..e686da6e7 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -10,3 +10,4 @@ ^\.zenodo\.json$ ^inst/WORDLIST$ ^notes.R$ +^docs$ \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 25d0bcbe3..83483c887 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: portalcasting Title: Functions Used in Predicting Portal Rodent Dynamics -Version: 0.9.0 +Version: 0.10.0 Authors@R: c( person(c("Juniper", "L."), "Simonis", email = "juniper.simonis@weecology.org", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index 88e6690eb..5ae79edd8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,20 @@ Version numbers follow [Semantic Versioning](https://semver.org/). +# portalcasting 0.10.0 +*Active development* + +### Model evaluation and ensembling added back in +* Were removed with the updated version from 0.8.1 to 0.9.0 to allow time to develop the code with the new infrastructure. +* Model evaluation happens within the cast tab output as before. + +### Temporarily removed figures returned +* Associated with the evaluation and ensembling. +* Plotting of error as a function of lead time for multiple species and multiple models. Now has a fall-back arrangement that works for a single species-model combination. + +### Flexing model controls to allow user-defined lists for prefab models +* For sandboxing with existing models, it is useful to be able to change a parameter in the model's controls, such as the data sets. Previously, that would require a lot of hacking around. Now, it's as simple as inputting the desired controls and flipping `arg_checks = FALSE`. + # [portalcasting 0.9.0](https://github.com/weecology/portalcasting/releases/tag/v0.9.0) *2019-09-06* diff --git a/R/prepare_models.R b/R/prepare_models.R index 0420c13de..3ecdf34dc 100644 --- a/R/prepare_models.R +++ b/R/prepare_models.R @@ -27,10 +27,10 @@ prefab_model_controls <- function(){ } -#' @title Produce the control lists for the model script writing +#' @title Produce the control lists for models #' -#' @description The model scripts are written using a generalized function -#' that takes a few arguments that vary among models, and so this function +#' @description The models are written and managed using generalized functions +#' that take a few arguments that vary among models, and so this function #' produces the appropriate \code{list} of \code{list}s of model #' script-writing control arguments. The specific \code{models} must be #' input by the user, with the default being \code{NULL}, which returns. @@ -50,9 +50,13 @@ prefab_model_controls <- function(){ #' only need to include controls for non-prefab \code{models}. \cr \cr #' Any user-defined \code{models} that are not included in \code{controls_m} #' will throw an error. \cr \cr -#' If any user-defined \code{controls_m} duplicate any existing controls for -#' the prefab models or if \code{controls_m} contains any duplicate named -#' elements, an error will be thrown. \cr \cr +#' If any user-defined \code{controls_model} duplicate any existing controls +#' for the prefab models or if \code{controls_model} contains any duplicate +#' named elements, an error will be thrown unless \code{arg_checks = FALSE}, +#' in which case, the first user-input element for any and all conflicting +#' copies is selected. This is override allowers users to test an existing +#' prefab model with different configurations, such as on a new data set, +#' without requiring a new model. \cr \cr #' Users interested in adding models to the prefab set should add the #' controls to the \code{prefab_model_controls} non-exported function #' found in the \code{prepare_models.R} script. @@ -95,7 +99,9 @@ prefab_model_controls <- function(){ #' the number of elements in \code{models} and with elements that are each #' \code{list}s of those \code{models}'s script-writing controls. \cr \cr #' \code{extract_min_lag}: \code{numeric} value of the minimum non-0 lag -#' from any included model or \code{NA} if no models have lags. +#' from any included model or \code{NA} if no models have lags. \cr \cr +#' \code{extract_data_sets}: \code{character} vector of the data set names +#' from all included models. #' #' @examples #' model_controls(prefab_models()) @@ -106,6 +112,8 @@ prefab_model_controls <- function(){ #' model_controls(c("xx", "ESSS"), controls) #' extract_min_lag() #' extract_min_lag("AutoArima") +#' extract_data_sets() +#' extract_data_sets("AutoArima") #' #' @export #' @@ -117,6 +125,15 @@ model_controls <- function(models = NULL, controls_model = NULL, controls_model <- list(controls_model) names(controls_model) <- controls_model[[1]]$name } + + replicates <- table(names(controls_model)) + if(any(replicates > 1)){ + which_conflicting <- names(replicates)[which(replicates > 1)] + all_conflicting <- paste(which_conflicting, collapse = ", ") + msg <- paste0("conflicting copies of model(s): ", all_conflicting) + stop(msg) + } + nadd <- length(controls_model) prefab_controls <- prefab_model_controls() nprefab <- length(prefab_controls) @@ -143,12 +160,30 @@ model_controls <- function(models = NULL, controls_model = NULL, } } replicates <- table(names(controls_model)) - if(any(replicates > 1)){ + if(any(replicates > 1) & arg_checks){ which_conflicting <- names(replicates)[which(replicates > 1)] all_conflicting <- paste(which_conflicting, collapse = ", ") msg <- paste0("conflicting copies of model(s): ", all_conflicting) - stop(msg) + msg2 <- paste0(msg, "\n to override, set `arg_checks = FALSE`") + stop(msg2) } + if(any(replicates > 1) & !arg_checks){ + which_conflicting <- names(replicates)[which(replicates > 1)] + all_conflicting <- paste(which_conflicting, collapse = ", ") + msg <- paste0("conflicting copies of model(s): ", all_conflicting) + msg2 <- c(msg, " using first user-defined input for each") + messageq(msg2, quiet) + umods <- unique(names(controls_model)) + nmods <- length(umods) + controls_model2 <- vector("list", length = nmods) + for(i in 1:nmods){ + choice <- which(names(controls_model) == umods[i])[1] + controls_model2[[i]] <- controls_model[[choice]] + } + names(controls_model2) <- umods + controls_model <- controls_model2 + } + included_models <- which(names(controls_model) %in% models) controls_model[included_models] } @@ -388,20 +423,17 @@ model_template <- function(name = NULL, data_sets = NULL, } -#' @title Create a control list for generating a model script +#' @title Create a control list for a model #' -#' @description Given the number of arguments into -#' \code{\link{model_template}} via \code{\link{write_model}} -#' it helps to have a control \code{list} -#' to organize them. \code{model_control} provides a template for the +#' @description Provides a ready-to-use template for the #' controls \code{list} used to define the specific arguments for a user's -#' novel data set, setting the formal arguments to the basic default +#' novel model, setting the formal arguments to the basic default #' values. # #' @param name \code{character} value of the name of the model. #' #' @param data_sets \code{character} vector of the rodent data set names -#' that the model is applied to. +#' that the model is applied to. Defaults to all non-interpolated data sets. #' #' @param covariatesTF \code{logical} indicator for if the model requires #' covariates. @@ -419,7 +451,8 @@ model_template <- function(name = NULL, data_sets = NULL, #' #' @export #' -model_control <- function(name = "model", data_sets = prefab_data_sets(), +model_control <- function(name = "model", + data_sets = prefab_data_sets(interpolate = FALSE), covariatesTF = FALSE, lag = NA, arg_checks = TRUE){ check_args(arg_checks = arg_checks) list(name = name, data_sets = data_sets, covariatesTF = covariatesTF, diff --git a/R/prepare_rodents.R b/R/prepare_rodents.R index 9c6ae671d..d0e3af5bf 100644 --- a/R/prepare_rodents.R +++ b/R/prepare_rodents.R @@ -745,6 +745,7 @@ process_rodent_data <- function(rodents_tab, main = ".", moons = NULL, #' #' @name species_in_tables #' +NULL #' @rdname species_in_tables #' @@ -874,6 +875,11 @@ evalplot_species <- function(species = NULL, nadot = FALSE, total = TRUE, #' The currently prefabricated options include (\code{"all"}, #' \code{"controls"}, \code{"all_interp"}, and \code{"controls_interp"}). #' +#' @param interpolate \code{logical} value indicating if the interpolated data +#' only should be listed (\code{interpolate = TRUE}), if the non-interpolated +#' data only should be listed (\code{interpolate = FALSE}), or if both +#' should be listed (\code{interpolate = NULL}, default). +#' #' @return \code{character} vector of data set names. #' #' @examples @@ -881,8 +887,15 @@ evalplot_species <- function(species = NULL, nadot = FALSE, total = TRUE, #' #' @export #' -prefab_data_sets <- function(){ - names(prefab_rodents_controls()) +prefab_data_sets <- function(interpolate = NULL){ + dsnames <- names(prefab_rodents_controls()) + return_if_null(interpolate, dsnames) + if(interpolate){ + dsnames <- dsnames[grepl("_interp", dsnames)] + }else{ + dsnames <- dsnames[!grepl("_interp", dsnames)] + } + dsnames } diff --git a/man/model_control.Rd b/man/model_control.Rd index e086eebad..5c30fabc8 100644 --- a/man/model_control.Rd +++ b/man/model_control.Rd @@ -2,16 +2,16 @@ % Please edit documentation in R/prepare_models.R \name{model_control} \alias{model_control} -\title{Create a control list for generating a model script} +\title{Create a control list for a model} \usage{ -model_control(name = "model", data_sets = prefab_data_sets(), - covariatesTF = FALSE, lag = NA, arg_checks = TRUE) +model_control(name = "model", data_sets = prefab_data_sets(interpolate + = FALSE), covariatesTF = FALSE, lag = NA, arg_checks = TRUE) } \arguments{ \item{name}{\code{character} value of the name of the model.} \item{data_sets}{\code{character} vector of the rodent data set names -that the model is applied to.} +that the model is applied to. Defaults to all non-interpolated data sets.} \item{covariatesTF}{\code{logical} indicator for if the model requires covariates.} @@ -29,11 +29,8 @@ Named \code{list} of model's script-generating controls, for input as part of \code{controls_m} in \code{\link{write_model}}. } \description{ -Given the number of arguments into - \code{\link{model_template}} via \code{\link{write_model}} - it helps to have a control \code{list} - to organize them. \code{model_control} provides a template for the +Provides a ready-to-use template for the controls \code{list} used to define the specific arguments for a user's - novel data set, setting the formal arguments to the basic default + novel model, setting the formal arguments to the basic default values. } diff --git a/man/model_controls.Rd b/man/model_controls.Rd index b4101313d..d7ac3c830 100644 --- a/man/model_controls.Rd +++ b/man/model_controls.Rd @@ -4,7 +4,7 @@ \alias{model_controls} \alias{extract_min_lag} \alias{extract_data_sets} -\title{Produce the control lists for the model script writing} +\title{Produce the control lists for models} \usage{ model_controls(models = NULL, controls_model = NULL, quiet = FALSE, arg_checks = TRUE) @@ -55,11 +55,13 @@ formatted correctly and provides directed error messages if not.} the number of elements in \code{models} and with elements that are each \code{list}s of those \code{models}'s script-writing controls. \cr \cr \code{extract_min_lag}: \code{numeric} value of the minimum non-0 lag - from any included model or \code{NA} if no models have lags. + from any included model or \code{NA} if no models have lags. \cr \cr + \code{extract_data_sets}: \code{character} vector of the data set names + from all included models. } \description{ -The model scripts are written using a generalized function - that takes a few arguments that vary among models, and so this function +The models are written and managed using generalized functions + that take a few arguments that vary among models, and so this function produces the appropriate \code{list} of \code{list}s of model script-writing control arguments. The specific \code{models} must be input by the user, with the default being \code{NULL}, which returns. @@ -80,9 +82,13 @@ Any model that is part of the \code{prefab} set only need to include controls for non-prefab \code{models}. \cr \cr Any user-defined \code{models} that are not included in \code{controls_m} will throw an error. \cr \cr - If any user-defined \code{controls_m} duplicate any existing controls for - the prefab models or if \code{controls_m} contains any duplicate named - elements, an error will be thrown. \cr \cr + If any user-defined \code{controls_model} duplicate any existing controls + for the prefab models or if \code{controls_model} contains any duplicate + named elements, an error will be thrown unless \code{arg_checks = FALSE}, + in which case, the first user-input element for any and all conflicting + copies is selected. This is override allowers users to test an existing + prefab model with different configurations, such as on a new data set, + without requiring a new model. \cr \cr Users interested in adding models to the prefab set should add the controls to the \code{prefab_model_controls} non-exported function found in the \code{prepare_models.R} script. @@ -96,5 +102,7 @@ Any model that is part of the \code{prefab} set model_controls(c("xx", "ESSS"), controls) extract_min_lag() extract_min_lag("AutoArima") + extract_data_sets() + extract_data_sets("AutoArima") } diff --git a/man/prefab_data_sets.Rd b/man/prefab_data_sets.Rd index 1b5ed22d1..d7f52d490 100644 --- a/man/prefab_data_sets.Rd +++ b/man/prefab_data_sets.Rd @@ -4,7 +4,13 @@ \alias{prefab_data_sets} \title{Provide the names of the prefab data sets} \usage{ -prefab_data_sets() +prefab_data_sets(interpolate = NULL) +} +\arguments{ +\item{interpolate}{\code{logical} value indicating if the interpolated data +only should be listed (\code{interpolate = TRUE}), if the non-interpolated +data only should be listed (\code{interpolate = FALSE}), or if both +should be listed (\code{interpolate = NULL}, default).} } \value{ \code{character} vector of data set names. diff --git a/vignettes/adding_model_and_data.Rmd b/vignettes/adding_model_and_data.Rmd index bdbcfc881..b574826f6 100644 --- a/vignettes/adding_model_and_data.Rmd +++ b/vignettes/adding_model_and_data.Rmd @@ -77,7 +77,7 @@ If you would like your output to be archived with the other cast output, your co As an example here, I have filled out the model script for newmod with a very basic model. It uses a linear regression with no autocorrelation and only analyzes the total rodent counts. The forecast is simply the linear model's prediction over the future moons. -![A trivial newmod](imgs/sandbox_1_newmod.jpg){width=400px} +![A trivial newmod](imgs/sandbox_1_newmod.jpg){width=600px} As you can see, the model metadata contains a lot of key information used to fit the models, such as the forecast horizon (which for Portal may not necessarily be equal to the theoretical lead time, as some surveys are missed and there is a lag to entering data, so sometimes additional surveys need to be cast) @@ -85,7 +85,7 @@ As you can see, the model metadata contains a lot of key information used to fit To add "newdata" to the available data set, it is important to recognize that the rodent preparation functions point directly to the [**portalr** `summarize_rodent_data` function](https://weecology.github.io/portalr/reference/summarize_rodent_data.html), which allows for flexible data customization. -**portalcasting** turns the arguments in `summarize_rodent_data` into arguments in `rodent_control()`, which sets default values for most arguments, but does require an input for `name`, which names the data set, otherwise it will return `NULL`. Here, we create the data control list and call it `ndc`, allowing us to model the rodents found on the removal plots only. We can then create the data set to verify it looks like how we want by running the `prep_rodents()` function, which returns the data set to the console and saves it in the `data` sub-directory (Note that we **must** set `arg_checks = FALSE` to allow for the creation and use of the non-standard rodents data configuration.) +**portalcasting** turns the arguments in `summarize_rodent_data` into arguments in `rodents_control()`, which sets default values for most arguments, but does require an input for `name`, which names the data set, otherwise it will return `NULL`. Here, we create the data control list and call it `newdata_controls`, allowing us to model the rodents found on the removal plots only. We can then create the data set to verify it looks like how we want by running the `prep_rodents()` function, which returns the data set to the console and saves it in the `data` sub-directory (Note that we **must** set `arg_checks = FALSE` to allow for the creation and use of the non-standard rodents data configuration.) ```{r, eval=FALSE} newdata_controls <- rodents_control(name = "newdata", level = "Treatment", treatment = "removal", arg_checks = FALSE) From 1edeb0eefda56db1afb71fe3d28af039adff5825 Mon Sep 17 00:00:00 2001 From: Juniper Simonis Date: Thu, 12 Sep 2019 00:21:42 -0700 Subject: [PATCH 13/18] coverage --- R/prepare_models.R | 9 --------- notes.R | 4 +++- tests/testthat/test-09-prepare_models.R | 6 ++++++ 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/R/prepare_models.R b/R/prepare_models.R index 3ecdf34dc..5ef736f30 100644 --- a/R/prepare_models.R +++ b/R/prepare_models.R @@ -125,15 +125,6 @@ model_controls <- function(models = NULL, controls_model = NULL, controls_model <- list(controls_model) names(controls_model) <- controls_model[[1]]$name } - - replicates <- table(names(controls_model)) - if(any(replicates > 1)){ - which_conflicting <- names(replicates)[which(replicates > 1)] - all_conflicting <- paste(which_conflicting, collapse = ", ") - msg <- paste0("conflicting copies of model(s): ", all_conflicting) - stop(msg) - } - nadd <- length(controls_model) prefab_controls <- prefab_model_controls() nprefab <- length(prefab_controls) diff --git a/notes.R b/notes.R index 756e8d8bd..95ea72fe4 100644 --- a/notes.R +++ b/notes.R @@ -1,7 +1,9 @@ evaluations devtools::document() -main <- "~/hindcasting" +main <- "~/xx" +setup_dir(main) + tiff("1.tiff", width = 10, height = 10, units = "in", res = 400) plot_casts_err_lead(main) diff --git a/tests/testthat/test-09-prepare_models.R b/tests/testthat/test-09-prepare_models.R index 1821152b7..b92dfe93f 100644 --- a/tests/testthat/test-09-prepare_models.R +++ b/tests/testthat/test-09-prepare_models.R @@ -38,6 +38,12 @@ test_that("model_controls", { expect_error(model_controls(prefab_models(), list(name = "AutoArima", covariates = FALSE, lag = NA))) + mm <- list(ESSS = model_control("ESSS"), xx = model_control("xx")) + expect_error(model_controls(c("xx", "ESSS"), controls_model = mm)) + expect_is(model_controls(c("xx", "ESSS"), controls_model = mm, + arg_checks = FALSE), + "list") + expect_message(model_controls("xx")) }) From 60f4a44432f36d2565b33e91fa91b409acbb065a Mon Sep 17 00:00:00 2001 From: Juniper Simonis Date: Thu, 12 Sep 2019 17:38:58 -0700 Subject: [PATCH 14/18] measuring cast level error --- NAMESPACE | 1 + R/figures.R | 59 ++++++++++++++++++++++++++ R/process_casts.R | 58 ++++++++++++++++++++++++- man/measure_cast_level_error.Rd | 38 +++++++++++++++++ notes.R | 6 +-- tests/testthat/test-13-figures.R | 9 ++++ tests/testthat/test-16-process_casts.R | 11 +++++ 7 files changed, 177 insertions(+), 5 deletions(-) create mode 100644 man/measure_cast_level_error.Rd diff --git a/NAMESPACE b/NAMESPACE index cb8f2d2e5..efe9323a7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -61,6 +61,7 @@ export(last_census) export(last_moon) export(list_depth) export(main_path) +export(measure_cast_level_error) export(messageq) export(model_control) export(model_controls) diff --git a/R/figures.R b/R/figures.R index cbc2b67de..f82e29655 100644 --- a/R/figures.R +++ b/R/figures.R @@ -1,3 +1,62 @@ +plot_casts_cov_RMSE <- function(main = ".", cast_ids = NULL, + cast_tab = NULL, end_moons = NULL, + models = NULL, data_set = NULL, + species = NULL, arg_checks = TRUE){ + check_args(arg_checks = arg_checks) + if(is.null(cast_tab)){ + cast_choices <- select_casts(main = main, cast_ids = cast_ids, + models = models, end_moons = end_moons, + data_sets = data_set, + arg_checks = arg_checks) + if(NROW(cast_choices) == 0){ + stop("no casts available for requested plot") + }else{ + cast_tab <- read_cast_tabs(main = main, cast_ids = cast_choices$cast_id, + arg_checks = arg_checks) + cast_tab <- add_obs_to_cast_tab(main = main, cast_tab = cast_tab, + arg_checks = arg_checks) + cast_tab <- add_err_to_cast_tab(main = main, cast_tab = cast_tab, + arg_checks = arg_checks) + cast_tab <- add_lead_to_cast_tab(main = main, cast_tab = cast_tab, + arg_checks = arg_checks) + cast_tab <- add_covered_to_cast_tab(main = main, cast_tab = cast_tab, + arg_checks = arg_checks) + } + } + + cast_tab$data_set <- gsub("_interp", "", cast_tab$data_set) + cast_ids <- ifnull(cast_ids, unique(cast_tab$cast_id)) + models <- ifnull(models, unique(cast_tab$model)) + data_set <- ifnull(data_set, unique(cast_tab$data_set)[1]) + species <- ifnull(species, evalplot_species(arg_checks = arg_checks)) + end_moons <- ifnull(end_moons, unique(cast_tab$end_moon)) + cast_id_in <- cast_tab$cast_id %in% cast_ids + model_in <- cast_tab$model %in% models + data_set_in <- cast_tab$data_set == data_set + species_in <- cast_tab$species %in% species + end_moon_in <- cast_tab$end_moon %in% end_moons + all_in <- cast_id_in & model_in & data_set_in & species_in & end_moon_in + if(sum(all_in) == 0){ + stop("no casts available for requested plot") + } + cast_tab <- cast_tab[all_in, ] + cast_level_errs <- measure_cast_level_error(cast_tab = cast_tab, + arg_checks = arg_checks) + + + lp <- file_path(main, "raw", "PortalData/Rodents/Portal_rodent_species.csv") + sptab <- read.csv(lp, stringsAsFactors = FALSE) %>% + na_conformer("speciescode") + + nmodels <- length(models) + nspecies <- length(species) + + + + +} + + #' @title Plot the forecast error as a function of lead time #' #' @description Plot the raw error (estimate - observation) as a function diff --git a/R/process_casts.R b/R/process_casts.R index ad4f5cfa5..c5280a207 100644 --- a/R/process_casts.R +++ b/R/process_casts.R @@ -1,3 +1,57 @@ + + +#' @title Measure error/fit metrics for forecasts +#' +#' @description Summarize the cast-level errors or fits to the observations. +#' \cr +#' Presently included are root mean square error and coverage. +#' +#' @param cast_tab A \code{data.frame} of a cast's output. See +#' \code{\link{read_cast_tab}}. +#' +#' @param arg_checks \code{logical} value of if the arguments should be +#' checked using standard protocols via \code{\link{check_args}}. The +#' default (\code{arg_checks = TRUE}) ensures that all inputs are +#' formatted correctly and provides directed error messages if not. +#' +#' @return \code{data.frame} of metrics for each \code{cast_id}. +#' +#' @examples +#' \donttest{ +#' setup_dir() +#' portalcast(models = c("AutoArima", "NaiveArima"), end_moons = 515:520) +#' cast_tab <- read_cast_tab(cast_id = 1) +#' cast_tab <- add_lead_to_cast_tab(cast_tab = cast_tab) +#' cast_tab <- add_obs_to_cast_tab(cast_tab = cast_tab) +#' cast_tab <- add_err_to_cast_tab(cast_tab = cast_tab) +#' cast_tab <- add_covered_to_cast_tab(cast_tab = cast_tab) +#' measure_cast_level_error(cast_tab = cast_tab) +#' } +#' +#' @export +#' +measure_cast_level_error <- function(cast_tab = NULL, arg_checks = TRUE){ + check_args(arg_checks = arg_checks) + return_if_null(cast_tab) + + ucast_ids <- unique(cast_tab$cast_id) + ncast_ids <- length(ucast_ids) + RMSE <- rep(NA, ncast_ids) + coverage <- rep(NA, ncast_ids) + for(i in 1:ncast_ids){ + cast_tab_i <- cast_tab[cast_tab$cast_id == ucast_ids[i], ] + err <- cast_tab_i$err + if(!is.null(err)){ + RMSE[i] <- sqrt(mean(err^2, na.rm = TRUE)) + } + covered <- cast_tab_i$covered + if(!is.null(covered)){ + coverage[i] <- mean(covered, na.rm = TRUE) + } + } + data.frame(cast_id = ucast_ids, RMSE = RMSE, coverage = coverage) +} + #' @title Add the associated values to a cast tab #' #' @description Add values to a cast's cast tab. If necessary components are @@ -82,8 +136,8 @@ add_covered_to_cast_tab <- function(main = ".", cast_tab = NULL, cast_tab <- add_obs_to_cast_tab(main = main, cast_tab = cast_tab, arg_checks = arg_checks) } - cast_tab$covered <- cast_tab$estimate >= cast_tab$lower_pi & - cast_tab$estimate <= cast_tab$upper_pi + cast_tab$covered <- cast_tab$obs >= cast_tab$lower_pi & + cast_tab$obs <= cast_tab$upper_pi cast_tab$covered[is.na(cast_tab$obs)] <- NA cast_tab } diff --git a/man/measure_cast_level_error.Rd b/man/measure_cast_level_error.Rd new file mode 100644 index 000000000..268a42d83 --- /dev/null +++ b/man/measure_cast_level_error.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/process_casts.R +\name{measure_cast_level_error} +\alias{measure_cast_level_error} +\title{Measure error/fit metrics for forecasts} +\usage{ +measure_cast_level_error(cast_tab = NULL, arg_checks = TRUE) +} +\arguments{ +\item{cast_tab}{A \code{data.frame} of a cast's output. See +\code{\link{read_cast_tab}}.} + +\item{arg_checks}{\code{logical} value of if the arguments should be +checked using standard protocols via \code{\link{check_args}}. The +default (\code{arg_checks = TRUE}) ensures that all inputs are +formatted correctly and provides directed error messages if not.} +} +\value{ +\code{data.frame} of metrics for each \code{cast_id}. +} +\description{ +Summarize the cast-level errors or fits to the observations. + \cr + Presently included are root mean square error and coverage. +} +\examples{ + \donttest{ + setup_dir() + portalcast(models = c("AutoArima", "NaiveArima"), end_moons = 515:520) + cast_tab <- read_cast_tab(cast_id = 1) + cast_tab <- add_lead_to_cast_tab(cast_tab = cast_tab) + cast_tab <- add_obs_to_cast_tab(cast_tab = cast_tab) + cast_tab <- add_err_to_cast_tab(cast_tab = cast_tab) + cast_tab <- add_covered_to_cast_tab(cast_tab = cast_tab) + measure_cast_level_error(cast_tab = cast_tab) +} + +} diff --git a/notes.R b/notes.R index 95ea72fe4..52dd0a440 100644 --- a/notes.R +++ b/notes.R @@ -1,12 +1,12 @@ evaluations devtools::document() -main <- "~/xx" +main <- "~/hindcasting" setup_dir(main) -tiff("1.tiff", width = 10, height = 10, units = "in", res = 400) -plot_casts_err_lead(main) +tiff("1.tiff", width = 9, height = 15, units = "in", res = 400) +plot_casts_cov_RMSE(main) dev.off() select_casts(main) diff --git a/tests/testthat/test-13-figures.R b/tests/testthat/test-13-figures.R index 09ed94b82..50587fb15 100644 --- a/tests/testthat/test-13-figures.R +++ b/tests/testthat/test-13-figures.R @@ -32,4 +32,13 @@ test_that("plot_casts_err_lead", { cast_tab <- read_cast_tabs(main = main) expect_error(plot_casts_err_lead(main = main, cast_tab = cast_tab, cast_id = 1e10)) +}) + + +test_that("plot_casts_cov_RMSE", { + skip_on_cran() # downloads and casting take too long to run on cran + expect_silent(plot_casts_cov_RMSE(main = main)) + cast_tab <- read_cast_tabs(main = main) + expect_error(plot_casts_cov_RMSE(main = main, cast_tab = cast_tab, + cast_id = 1e10)) }) \ No newline at end of file diff --git a/tests/testthat/test-16-process_casts.R b/tests/testthat/test-16-process_casts.R index a2c25c6c2..4be12822f 100644 --- a/tests/testthat/test-16-process_casts.R +++ b/tests/testthat/test-16-process_casts.R @@ -32,3 +32,14 @@ test_that("add_to_cast_tab", { expect_is(add_covered_to_cast_tab(main = main, cast_tab = cast_tab), "data.frame") }) + +test_that("measure_cast_level_error", { + skip_on_cran() # downloads take too long for cran checks + cast_choices <- select_casts(main = main) + cast_tab <- read_cast_tabs(main = main, cast_ids = cast_choices$cast_id) + cast_tab <- add_obs_to_cast_tab(main = main, cast_tab = cast_tab) + cast_tab <- add_err_to_cast_tab(main = main, cast_tab = cast_tab) + cast_tab <- add_lead_to_cast_tab(main = main, cast_tab = cast_tab) + cast_tab <- add_covered_to_cast_tab(main = main, cast_tab = cast_tab) + expect_is(measure_cast_level_error(cast_tab), "data.frame") +}) From e1fa21d4d08b513cf7415c407bb55e9324924b9a Mon Sep 17 00:00:00 2001 From: Juniper Simonis Date: Fri, 13 Sep 2019 12:59:59 -0700 Subject: [PATCH 15/18] plot casts cov rmse finished --- NAMESPACE | 1 + R/figures.R | 158 ++++++++++++++++++++++++++++++++ R/process_casts.R | 36 +++++--- man/measure_cast_level_error.Rd | 2 +- man/plot_casts_cov_RMSE.Rd | 69 ++++++++++++++ 5 files changed, 253 insertions(+), 13 deletions(-) create mode 100644 man/plot_casts_cov_RMSE.Rd diff --git a/NAMESPACE b/NAMESPACE index efe9323a7..770a37073 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -77,6 +77,7 @@ export(path_no_ext) export(pevGARCH) export(plot_cast_point) export(plot_cast_ts) +export(plot_casts_cov_RMSE) export(plot_casts_err_lead) export(portalcast) export(portalcast_goodbye) diff --git a/R/figures.R b/R/figures.R index f82e29655..7db925a97 100644 --- a/R/figures.R +++ b/R/figures.R @@ -1,3 +1,62 @@ + +#' @title Plot the forecast coverage and RMSE +#' +#' @description Plot the coverage (fraction of predictions within the CI) and +#' RMSE (root mean squared error) of each model among multiple species. +#' +#' @details A pre-loaded table of casts can be input, but if not (default), +#' the table will be efficiently (as defined by the inputs) loaded and +#' trimmed. \cr +#' The casts can be trimmed specifically using the \code{cast_ids} input, +#' otherwise, all relevant casts will be plotted. +#' +#' @param main \code{character} value of the name of the main component of +#' the directory tree. +#' +#' @param cast_ids \code{integer} (or integer \code{numeric}) values +#' representing the casts of interest for restricting plotting, as indexed +#' within the directory in the \code{casts} sub folder. +#' See the casts metadata file (\code{casts_metadata.csv}) for summary +#' information. +#' +#' @param end_moons \code{integer} (or integer \code{numeric}) +#' newmoon numbers of the forecast origin. Default value is +#' \code{NULL}, which equates to no selection with respect to +#' \code{end_moon}. +#' +#' @param cast_tab Optional \code{data.frame} of cast table outputs. If not +#' input, will be loaded. +#' +#' @param models \code{character} value(s) of the name of the model to +#' include. Default value is \code{NULL}, which equates to no selection with +#' respect to \code{model}. \code{NULL} translates to all \code{models} +#' in the table. +#' +#' @param data_set \code{character} value of the rodent data set to include +#' Default value is \code{NULL}, which equates to no selection with +#' respect to \code{data_set}. +#' +#' @param arg_checks \code{logical} value of if the arguments should be +#' checked using standard protocols via \code{\link{check_args}}. The +#' default (\code{arg_checks = TRUE}) ensures that all inputs are +#' formatted correctly and provides directed error messages if not. +#' +#' @param species \code{character} vector of the species code(s) +#' or \code{"total"} for the total across species) to be plotted +#' \code{NULL} translates to the species defined by +#' \code{evalplot_species}. +#' +#' @return \code{NULL}. Plot is generated. +#' +#' @examples +#' \donttest{ +#' setup_dir() +#' portalcast(models = "AutoArima", end_moons = 515:520) +#' plot_casts_cov_RMSE() +#' } +#' +#' @export +#' plot_casts_cov_RMSE <- function(main = ".", cast_ids = NULL, cast_tab = NULL, end_moons = NULL, models = NULL, data_set = NULL, @@ -52,6 +111,105 @@ plot_casts_cov_RMSE <- function(main = ".", cast_ids = NULL, nspecies <- length(species) + oldpar <- par(no.readonly = TRUE) + on.exit(par(oldpar)) + par(fig = c(0, 1, 0, 1), mar = c(0.5, 0, 0, 0.5)) + plot(1, 1, type = "n", xaxt = "n", yaxt = "n", ylab = "", xlab = "", + bty = "n") + + x1 <- 0 + x2 <- 0.48 + y1 <- 0.0 + y2 <- 0.05 + par(mar = c(0, 2.5, 0, 0.5), fig = c(x1, x2, y1, y2), new = TRUE) + plot(1, 1, type = "n", xaxt = "n", yaxt = "n", ylab = "", xlab = "", + bty = "n", xlim = c(0.5, nmodels + 0.5), ylim = c(0, 1)) + text(x = 1:nmodels, y = rep(0.9, nmodels), labels = models, cex = 0.7, + xpd = TRUE, srt = 45, adj = 1) + x1 <- 0.49 + x2 <- 0.97 + y1 <- 0.0 + y2 <- 0.05 + par(mar = c(0, 2.5, 0, 0.5), fig = c(x1, x2, y1, y2), new = TRUE) + plot(1, 1, type = "n", xaxt = "n", yaxt = "n", ylab = "", xlab = "", + bty = "n", xlim = c(0.5, nmodels + 0.5), ylim = c(0, 1)) + text(x = 1:nmodels, y = rep(0.9, nmodels), labels = models, cex = 0.7, + xpd = TRUE, srt = 45, adj = 1) + + for (i in 1:nspecies){ + + x1 <- 0 + x2 <- 0.48 + y1 <- 0.05 + (i - 1) * 0.95 * (1/nspecies) + y2 <- y1 + 0.94 * (1/nspecies) + par(mar = c(0, 2.5, 1.5, 0.5), fig = c(x1, x2, y1, y2), new = TRUE) + plot(1, 1, type = "n", xaxt = "n", yaxt = "n", ylab = "", + xlab = "", xlim = c(0.5, nmodels + 0.5), ylim = c(0,1), bty = "L") + axis(2, at = seq(0, 1, 0.2), cex.axis = 0.6, las = 1, line = -0.5, + lwd = 0) + axis(2, at = seq(0, 1, 0.2), labels = FALSE, tck = -0.025) + mtext(side = 2, "Coverage", line = 1.5, cex = 0.75) + axis(1, at = 1:nmodels, labels = FALSE, tck = -0.025) + for(j in 1:nmodels){ + in_ij <- which(cast_level_errs$species == species[i] & + cast_level_errs$model == models[j]) + + ys <- na.omit(cast_level_errs$coverage[in_ij]) + ys2 <- runif(length(ys), ys - 0.005, ys + 0.005) + xs <- runif(length(ys), j - 0.05, j + 0.05) + quants <- quantile(ys, seq(0, 1, 0.25)) + points(rep(j, 2), quants[c(1, 5)], type = "l") + points(c(j - 0.02, j + 0.02), rep(quants[1], 2), type = "l") + points(c(j - 0.02, j + 0.02), rep(quants[5], 2), type = "l") + rect(j - 0.1, quants[2], j + 0.1, quants[4], col = "white") + points(c(j - 0.1, j + 0.1), rep(quants[3], 2), type = "l", lwd = 2) + points(xs, ys2, col = rgb(0.3, 0.3, 0.3, 0.4), pch = 1, cex = 0.5) + } + + abline(h = 0.95, lwd = 2, lty = 3) + + in_i <- which(cast_level_errs$species == species[i]) + cast_level_errs_i <- cast_level_errs[in_i, ] + ymax <- max(max(cast_level_errs_i$RMSE, na.rm = TRUE)) + x1 <- 0.49 + x2 <- 0.97 + y1 <- 0.05 + (i - 1) * 0.95 * (1/nspecies) + y2 <- y1 + 0.94 * (1/nspecies) + par(mar = c(0, 2.5, 1.5, 0.5), fig = c(x1, x2, y1, y2), new = TRUE) + plot(1, 1, type = "n", xaxt = "n", yaxt = "n", ylab = "", bty = "L", + xlab = "", xlim = c(0.5, nmodels + 0.5), ylim = c(0, ymax)) + axis(2, cex.axis = 0.6, las = 1, line = -0.5, lwd = 0) + axis(2, labels = FALSE, tck = -0.025) + mtext(side = 2, "RMSE", line = 1.625, cex = 0.75) + axis(1, at = 1:nmodels, labels = FALSE, tck = -0.025) + for(j in 1:nmodels){ + in_ij <- which(cast_level_errs$species == species[i] & + cast_level_errs$model == models[j]) + + ys <- na.omit(cast_level_errs$RMSE[in_ij]) + ys2 <- runif(length(ys), ys - 0.005, ys + 0.005) + xs <- runif(length(ys), j - 0.05, j + 0.05) + quants <- quantile(ys, seq(0, 1, 0.25)) + points(rep(j, 2), quants[c(1, 5)], type = "l") + points(c(j - 0.02, j + 0.02), rep(quants[1], 2), type = "l") + points(c(j - 0.02, j + 0.02), rep(quants[5], 2), type = "l") + rect(j - 0.1, quants[2], j + 0.1, quants[4], col = "white") + points(c(j - 0.1, j + 0.1), rep(quants[3], 2), type = "l", lwd = 2) + points(xs, ys2, col = rgb(0.3, 0.3, 0.3, 0.4), pch = 1, cex = 0.5) + } + par(mar = c(0, 0, 0, 0), fig = c(0.97, 1, y1, y2), new = TRUE) + plot(1, 1, type = "n", xaxt = "n", yaxt = "n", ylab = "", xlab = "", + bty = "n") + if (uspecies[i] == "total"){ + spt <- "Total Rodents" + spf <- 2 + } else{ + spptextmatch <- which(sptab[ , "speciescode"] == species[i]) + spt <- sptab[spptextmatch, "scientificname"] + spf <- 4 + } + text(0.9, 1, spt, font = spf, cex = 0.8, xpd = TRUE, srt = 270) + } } diff --git a/R/process_casts.R b/R/process_casts.R index c5280a207..3b8c45cc9 100644 --- a/R/process_casts.R +++ b/R/process_casts.R @@ -14,7 +14,7 @@ #' default (\code{arg_checks = TRUE}) ensures that all inputs are #' formatted correctly and provides directed error messages if not. #' -#' @return \code{data.frame} of metrics for each \code{cast_id}. +#' @return \code{data.frame} of metrics for each cast for each species. #' #' @examples #' \donttest{ @@ -36,20 +36,32 @@ measure_cast_level_error <- function(cast_tab = NULL, arg_checks = TRUE){ ucast_ids <- unique(cast_tab$cast_id) ncast_ids <- length(ucast_ids) - RMSE <- rep(NA, ncast_ids) - coverage <- rep(NA, ncast_ids) + uspecies <- unique(cast_tab$species) + nspecies <- length(uspecies) + RMSE <- rep(NA, ncast_ids * nspecies) + coverage <- rep(NA, ncast_ids * nspecies) + model <- rep(NA, ncast_ids * nspecies) + counter <- 1 for(i in 1:ncast_ids){ - cast_tab_i <- cast_tab[cast_tab$cast_id == ucast_ids[i], ] - err <- cast_tab_i$err - if(!is.null(err)){ - RMSE[i] <- sqrt(mean(err^2, na.rm = TRUE)) - } - covered <- cast_tab_i$covered - if(!is.null(covered)){ - coverage[i] <- mean(covered, na.rm = TRUE) + for(j in 1:nspecies){ + ij <- cast_tab$cast_id == ucast_ids[i] & cast_tab$species == uspecies[j] + cast_tab_ij <- cast_tab[ij, ] + err <- cast_tab_ij$err + if(!is.null(err)){ + RMSE[counter] <- sqrt(mean(err^2, na.rm = TRUE)) + } + covered <- cast_tab_ij$covered + if(!is.null(covered)){ + coverage[counter] <- mean(covered, na.rm = TRUE) + } + model[counter] <- unique(cast_tab_ij$model) + counter <- counter + 1 } } - data.frame(cast_id = ucast_ids, RMSE = RMSE, coverage = coverage) + ids <- rep(ucast_ids, each = nspecies) + spp <- rep(uspecies, ncast_ids) + data.frame(cast_id = ids, species = spp, model = model, + RMSE = RMSE, coverage = coverage) } #' @title Add the associated values to a cast tab diff --git a/man/measure_cast_level_error.Rd b/man/measure_cast_level_error.Rd index 268a42d83..509c78268 100644 --- a/man/measure_cast_level_error.Rd +++ b/man/measure_cast_level_error.Rd @@ -16,7 +16,7 @@ default (\code{arg_checks = TRUE}) ensures that all inputs are formatted correctly and provides directed error messages if not.} } \value{ -\code{data.frame} of metrics for each \code{cast_id}. +\code{data.frame} of metrics for each cast for each species. } \description{ Summarize the cast-level errors or fits to the observations. diff --git a/man/plot_casts_cov_RMSE.Rd b/man/plot_casts_cov_RMSE.Rd new file mode 100644 index 000000000..fde558776 --- /dev/null +++ b/man/plot_casts_cov_RMSE.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/figures.R +\name{plot_casts_cov_RMSE} +\alias{plot_casts_cov_RMSE} +\title{Plot the forecast coverage and RMSE} +\usage{ +plot_casts_cov_RMSE(main = ".", cast_ids = NULL, cast_tab = NULL, + end_moons = NULL, models = NULL, data_set = NULL, species = NULL, + arg_checks = TRUE) +} +\arguments{ +\item{main}{\code{character} value of the name of the main component of +the directory tree.} + +\item{cast_ids}{\code{integer} (or integer \code{numeric}) values +representing the casts of interest for restricting plotting, as indexed +within the directory in the \code{casts} sub folder. +See the casts metadata file (\code{casts_metadata.csv}) for summary +information.} + +\item{cast_tab}{Optional \code{data.frame} of cast table outputs. If not +input, will be loaded.} + +\item{end_moons}{\code{integer} (or integer \code{numeric}) +newmoon numbers of the forecast origin. Default value is +\code{NULL}, which equates to no selection with respect to +\code{end_moon}.} + +\item{models}{\code{character} value(s) of the name of the model to +include. Default value is \code{NULL}, which equates to no selection with +respect to \code{model}. \code{NULL} translates to all \code{models} +in the table.} + +\item{data_set}{\code{character} value of the rodent data set to include +Default value is \code{NULL}, which equates to no selection with +respect to \code{data_set}.} + +\item{species}{\code{character} vector of the species code(s) +or \code{"total"} for the total across species) to be plotted +\code{NULL} translates to the species defined by +\code{evalplot_species}.} + +\item{arg_checks}{\code{logical} value of if the arguments should be +checked using standard protocols via \code{\link{check_args}}. The +default (\code{arg_checks = TRUE}) ensures that all inputs are +formatted correctly and provides directed error messages if not.} +} +\value{ +\code{NULL}. Plot is generated. +} +\description{ +Plot the coverage (fraction of predictions within the CI) and + RMSE (root mean squared error) of each model among multiple species. +} +\details{ +A pre-loaded table of casts can be input, but if not (default), + the table will be efficiently (as defined by the inputs) loaded and + trimmed. \cr + The casts can be trimmed specifically using the \code{cast_ids} input, + otherwise, all relevant casts will be plotted. +} +\examples{ + \donttest{ + setup_dir() + portalcast(models = "AutoArima", end_moons = 515:520) + plot_casts_cov_RMSE() +} + +} From 05ff44c9d81abe9c59f9d744b5f385ed38eec642 Mon Sep 17 00:00:00 2001 From: Juniper Simonis Date: Fri, 13 Sep 2019 13:05:49 -0700 Subject: [PATCH 16/18] Update figures.R --- R/figures.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/figures.R b/R/figures.R index 7db925a97..3a33465d9 100644 --- a/R/figures.R +++ b/R/figures.R @@ -200,7 +200,7 @@ plot_casts_cov_RMSE <- function(main = ".", cast_ids = NULL, par(mar = c(0, 0, 0, 0), fig = c(0.97, 1, y1, y2), new = TRUE) plot(1, 1, type = "n", xaxt = "n", yaxt = "n", ylab = "", xlab = "", bty = "n") - if (uspecies[i] == "total"){ + if (species[i] == "total"){ spt <- "Total Rodents" spf <- 2 } else{ From 77fbdd4e3c6542724e89b10fb7c4c1236d9c0c94 Mon Sep 17 00:00:00 2001 From: Juniper Simonis Date: Fri, 13 Sep 2019 15:00:25 -0700 Subject: [PATCH 17/18] Update process_casts.R --- R/process_casts.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/process_casts.R b/R/process_casts.R index 3b8c45cc9..cae1d16ef 100644 --- a/R/process_casts.R +++ b/R/process_casts.R @@ -54,8 +54,10 @@ measure_cast_level_error <- function(cast_tab = NULL, arg_checks = TRUE){ if(!is.null(covered)){ coverage[counter] <- mean(covered, na.rm = TRUE) } - model[counter] <- unique(cast_tab_ij$model) - counter <- counter + 1 + if(length(cast_tab_ij$model) > 0){ + model[counter] <- unique(cast_tab_ij$model) + } + counter <- counter + 1 } } ids <- rep(ucast_ids, each = nspecies) From 87360848f00fd3ab8bccf9f099b82b9aee609853 Mon Sep 17 00:00:00 2001 From: Juniper Simonis Date: Fri, 13 Sep 2019 18:11:03 -0700 Subject: [PATCH 18/18] tidying the package up for release --- NEWS.md | 7 ++++--- R/portalcast.R | 2 +- R/prepare_models.R | 2 +- README.md | 4 ++-- inst/WORDLIST | 1 + man/model_controls.Rd | 2 +- man/portalcast.Rd | 2 +- notes.R | 28 +--------------------------- 8 files changed, 12 insertions(+), 36 deletions(-) diff --git a/NEWS.md b/NEWS.md index 5ae79edd8..d27cccf75 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,16 +2,17 @@ Version numbers follow [Semantic Versioning](https://semver.org/). -# portalcasting 0.10.0 -*Active development* +# [portalcasting 0.10.0](https://github.com/weecology/portalcasting/releases/tag/v0.10.0) +*2019-09-13* ### Model evaluation and ensembling added back in * Were removed with the updated version from 0.8.1 to 0.9.0 to allow time to develop the code with the new infrastructure. * Model evaluation happens within the cast tab output as before. ### Temporarily removed figures returned -* Associated with the evaluation and ensembling. +* Associated with the evaluation. * Plotting of error as a function of lead time for multiple species and multiple models. Now has a fall-back arrangement that works for a single species-model combination. +* Plotting RMSE and coverage within species-model combinations. ### Flexing model controls to allow user-defined lists for prefab models * For sandboxing with existing models, it is useful to be able to change a parameter in the model's controls, such as the data sets. Previously, that would require a lot of hacking around. Now, it's as simple as inputting the desired controls and flipping `arg_checks = FALSE`. diff --git a/R/portalcast.R b/R/portalcast.R index 49aa02488..6ef87e6e5 100644 --- a/R/portalcast.R +++ b/R/portalcast.R @@ -1,7 +1,7 @@ #' @title Cast Portal rodents models #' #' @description Cast the Portal rodent population data using the (updated if -#' needed) data and models in a portalcasting directory. \cr ]cr +#' needed) data and models in a portalcasting directory. \cr \cr #' \code{portalcast} wraps around \code{cast} to allow multiple runs of #' multiple models, with data preparation as needed between runs occurring #' via \code{prep_data}. \cr \cr diff --git a/R/prepare_models.R b/R/prepare_models.R index 5ef736f30..5997c4aa7 100644 --- a/R/prepare_models.R +++ b/R/prepare_models.R @@ -54,7 +54,7 @@ prefab_model_controls <- function(){ #' for the prefab models or if \code{controls_model} contains any duplicate #' named elements, an error will be thrown unless \code{arg_checks = FALSE}, #' in which case, the first user-input element for any and all conflicting -#' copies is selected. This is override allowers users to test an existing +#' copies is selected. This override allows users to test an existing #' prefab model with different configurations, such as on a new data set, #' without requiring a new model. \cr \cr #' Users interested in adding models to the prefab set should add the diff --git a/README.md b/README.md index feca9be5e..4e9ad5e11 100644 --- a/README.md +++ b/README.md @@ -76,12 +76,12 @@ to spin up a container is forthcoming. ## Usage Get started with the ["how to set up a Portal Predictions directory" -vignette](https://weecology.github.io/portalcasting/articles/howto.html) +vignette](https://weecology.github.io/portalcasting/articles/getting_started.html) If you are interested in adding a model to the preloaded [set of models](https://weecology.github.io/portalcasting/articles/models.html), see the ["adding a model" -vignette](https://weecology.github.io/portalcasting/articles/adding_a_model.html). +vignette](https://weecology.github.io/portalcasting/articles/adding_model_and_data.html). ## Acknowledgements diff --git a/inst/WORDLIST b/inst/WORDLIST index 6aae0b92d..c684cc8e2 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -27,6 +27,7 @@ differencing DOI downscaled emptor +ensembling ESS ESSS exclosures diff --git a/man/model_controls.Rd b/man/model_controls.Rd index d7ac3c830..1998f2743 100644 --- a/man/model_controls.Rd +++ b/man/model_controls.Rd @@ -86,7 +86,7 @@ Any model that is part of the \code{prefab} set for the prefab models or if \code{controls_model} contains any duplicate named elements, an error will be thrown unless \code{arg_checks = FALSE}, in which case, the first user-input element for any and all conflicting - copies is selected. This is override allowers users to test an existing + copies is selected. This override allows users to test an existing prefab model with different configurations, such as on a new data set, without requiring a new model. \cr \cr Users interested in adding models to the prefab set should add the diff --git a/man/portalcast.Rd b/man/portalcast.Rd index ed9e4ef17..f2db10740 100644 --- a/man/portalcast.Rd +++ b/man/portalcast.Rd @@ -101,7 +101,7 @@ Results are saved to files, \code{NULL} is returned. } \description{ Cast the Portal rodent population data using the (updated if - needed) data and models in a portalcasting directory. \cr ]cr + needed) data and models in a portalcasting directory. \cr \cr \code{portalcast} wraps around \code{cast} to allow multiple runs of multiple models, with data preparation as needed between runs occurring via \code{prep_data}. \cr \cr diff --git a/notes.R b/notes.R index 52dd0a440..5db429401 100644 --- a/notes.R +++ b/notes.R @@ -1,31 +1,5 @@ -evaluations - -devtools::document() +devtools::load_all() main <- "~/hindcasting" setup_dir(main) -tiff("1.tiff", width = 9, height = 15, units = "in", res = 400) -plot_casts_cov_RMSE(main) -dev.off() - -select_casts(main) - -# at the starting point, we want to compare observation to predictions -# at each observation value -# an important consideration is that we want to actually predict the real -# data, not the interpolated data, so we want to make sure to point the -# _interp models to the true data, so we force remove that suffix to align -# predictions appropriately - - -# constrain things down to a single observation compared with a single -# predictive distribution -# we likely want to pull in the model's metadata so we can use the PIs - - -cast_tab <- read_cast_tab(main = main, cast_id = 1) - - -add_obs_to_cast_tab(main = main, cast_id = 1) -