From bd97af88df5d56bcb1f48f15fab86ae46fd943eb Mon Sep 17 00:00:00 2001 From: Scott Chamberlain Date: Tue, 24 Mar 2020 11:46:22 -0700 Subject: [PATCH] fix #331 added cache_mssg internal fxn for handling caching messages and added rnoaa_options() exported fxn to manage whether cache messages are thrown or not fix #164 ghcnd_stations() now caching files; and caching rds alrady parsed data.frame - big speed up fix #346 rnoaa_caching manual file with all caching objects fix #347 transitioned all (i think) data sources to using hoardr objects for caching --- DESCRIPTION | 2 +- NAMESPACE | 4 + NEWS.md | 8 ++ R/caching.R | 28 ------ R/defunct.R | 10 ++ R/ersst.R | 39 +++----- R/ghcnd.R | 162 +++++++++++++++---------------- R/isd_stations.R | 21 ++-- R/meteo_cache.r | 4 +- R/onload.R | 25 +++++ R/rnoaa_caching.R | 30 +++++- R/rnoaa_options.R | 30 ++++-- R/storm_events.R | 13 +-- R/storm_shp.R | 12 +-- R/storms.R | 45 ++++----- R/tornadoes.R | 58 +++++------ man/caching.Rd | 27 ------ man/ersst.Rd | 13 +-- man/ghcnd.Rd | 15 +-- man/ghcnd_clear_cache-defunct.Rd | 12 +++ man/ghcnd_stations.Rd | 5 +- man/isd_stations.Rd | 13 +-- man/rnoaa-defunct.Rd | 2 + man/rnoaa_caching.Rd | 10 +- man/storm_events.Rd | 9 +- man/storms.Rd | 9 +- man/tornadoes.Rd | 14 +-- 27 files changed, 294 insertions(+), 326 deletions(-) delete mode 100644 R/caching.R delete mode 100644 man/caching.Rd create mode 100644 man/ghcnd_clear_cache-defunct.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 05a10485..dfd20d42 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,7 +7,7 @@ Description: Client for many 'NOAA' data sources including the 'NCDC' climate for 'NOAA' sea ice data, the 'NOAA' severe weather inventory, 'NOAA' Historical Observing 'Metadata' Repository ('HOMR') data, 'NOAA' storm data via 'IBTrACS', tornado data via the 'NOAA' storm prediction center, and more. -Version: 0.9.5.97 +Version: 0.9.5.98 License: MIT + file LICENSE Encoding: UTF-8 Language: en-US diff --git a/NAMESPACE b/NAMESPACE index d96d49c2..72876e69 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -39,6 +39,7 @@ export(erddap_info) export(erddap_search) export(erddap_table) export(ersst) +export(ersst_cache) export(gefs) export(gefs_dimension_values) export(gefs_dimensions) @@ -48,6 +49,7 @@ export(gefs_longitudes) export(gefs_times) export(gefs_variables) export(ghcnd) +export(ghcnd_cache) export(ghcnd_clear_cache) export(ghcnd_countries) export(ghcnd_read) @@ -118,8 +120,10 @@ export(storm_data) export(storm_meta) export(storm_shp) export(storm_shp_read) +export(storms_cache) export(swdi) export(theme_ice) +export(torn_cache) export(tornadoes) export(type_summ) export(vis_miss) diff --git a/NEWS.md b/NEWS.md index 8cc89991..7fbc507c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +rnoaa 0.9.6 +=========== + +### MINOR IMPROVEMENTS + +* remove internal code in many exported functions looking for user input `path` parameter and telling them it's no longer used; been defunct for quite a while + + rnoaa 0.9.5 =========== diff --git a/R/caching.R b/R/caching.R deleted file mode 100644 index ff905a82..00000000 --- a/R/caching.R +++ /dev/null @@ -1,28 +0,0 @@ -#' Clear cached files -#' -#' @name caching -#' @param force (logical) Should we force removal of files if permissions -#' say otherwise?. Default: `FALSE` -#' -#' @details BEWARE: this will clear all cached files. -#' -#' @section File storage: -#' We use \pkg{rappdirs} to store files, see -#' [rappdirs::user_cache_dir()] for how -#' we determine the directory on your machine to save files to, and run -#' `user_cache_dir("rnoaa")` to get that directory. - -#' @export -#' @rdname caching -ghcnd_clear_cache <- function(force = FALSE) { - calls <- names(sapply(match.call(), deparse))[-1] - calls_vec <- "path" %in% calls - if (any(calls_vec)) { - stop("The parameter path has been removed, see ?ghcnd_clear_cache", - call. = FALSE) - } - - path <- file.path(rnoaa_cache_dir(), "ghcnd") - files <- list.files(path, full.names = TRUE) - unlink(files, recursive = TRUE, force = force) -} diff --git a/R/defunct.R b/R/defunct.R index 9873787b..d2a5e6f9 100644 --- a/R/defunct.R +++ b/R/defunct.R @@ -239,6 +239,14 @@ gefs_times <- function(...) { gefs_variables <- function(...) { .Defunct(msg = "`gefs_variables` is defunct; it may return later") } +#' This function is defunct. +#' @export +#' @rdname ghcnd_clear_cache-defunct +#' @keywords internal +ghcnd_clear_cache <- function(...) { + .Defunct(msg = "`ghcnd_clear_cache` is defunct; see ?rnoaa_caching") +} + #' Defunct functions in rnoaa #' @@ -272,6 +280,8 @@ gefs_variables <- function(...) { #' \item \code{\link{seaice}}: Replaced with \code{\link{sea_ice}} #' \item \code{\link{lcd_cleanup}}: No longer available. See \code{\link{lcd}} #' docs +#' \item \code{\link{ghcnd_clear_cache}}: No longer available. +#' See \code{\link{rnoaa_caching}} #' } #' #' The functions for working with GEFS ensemble forecast data (prefixed with diff --git a/R/ersst.R b/R/ersst.R index f92a1f9c..7dee8923 100644 --- a/R/ersst.R +++ b/R/ersst.R @@ -14,16 +14,7 @@ #' perhaps a data.frame. See \pkg{ncdf4} for parsing the output. #' @references #' -#' -#' @section File storage: -#' We use \pkg{rappdirs} to store files, see -#' [rappdirs::user_cache_dir()] for how we determine the directory on -#' your machine to save files to, and run -#' `rappdirs::user_cache_dir("rnoaa/ersst")` -#' to get that directory. -#' -#' Files are quite small, so we don't worry about reading in cached data to -#' save time, as we do in some of the other functions in this package. +#' @note See [ersst_cache] for managing cached files #' @examples \dontrun{ #' # October, 2015 #' ersst(year = 2015, month = 10) @@ -44,15 +35,8 @@ #' ncdf4::ncvar_get(res, "ssta") #' } ersst <- function(year, month, overwrite = TRUE, ...) { - calls <- names(sapply(match.call(), deparse))[-1] - calls_vec <- "path" %in% calls - if (any(calls_vec)) { - stop("The parameter path has been removed, see docs for ?ersst", - call. = FALSE) - } check4pkg("ncdf4") - path <- file.path(rnoaa_cache_dir(), "ersst") - ff <- ersst_local(path, year, month) + ff <- ersst_local(year, month) dpath <- ersst_GET(make_ersst(year, month), path = ff, overwrite, ...) ncdf4::nc_open(dpath) } @@ -82,13 +66,18 @@ check_month <- function(x) { } ersst_GET <- function(dat, path, overwrite, ...) { - dir.create(dirname(path), showWarnings = FALSE, recursive = TRUE) - cli <- crul::HttpClient$new(paste0(ersst_base(), dat), opts = list(...)) - res <- cli$get(disk = path) - res$raise_for_status() - res$content + ersst_cache$mkdir() + if (!file.exists(path)) { + cli <- crul::HttpClient$new(paste0(ersst_base(), dat), opts = list(...)) + res <- cli$get(disk = path) + res$raise_for_status() + res$content + } else { + cache_mssg(path) + return(path) + } } -ersst_local <- function(path, year, month) { - file.path(path, sprintf("%s%s.nc", year, month)) +ersst_local <- function(year, month) { + file.path(ersst_cache$cache_path_get(), sprintf("%s%s.nc", year, month)) } diff --git a/R/ghcnd.R b/R/ghcnd.R index d46f8050..1d3ad104 100644 --- a/R/ghcnd.R +++ b/R/ghcnd.R @@ -24,12 +24,12 @@ #' keep in the final data (e.g., `c("TMAX", "TMIN")` to only keep #' maximum and minimum temperature). Example choices for this argument #' include: -#' +#' #' - `PRCP`: Precipitation, in tenths of millimeters #' - `TAVG`: Average temperature, in tenths of degrees Celsius #' - `TMAX`: Maximum temperature, in tenths of degrees Celsius #' - `TMIN`: Minimum temperature, in tenths of degrees Celsius -#' +#' #' A full list of possible weather variables is available in NOAA's README #' file for the GHCND data #' (). @@ -42,7 +42,7 @@ #' dataframe with daily observations, as well as flag values, for one of #' the weather variables. The flag values give information on the quality #' and source of each observation; see the NOAA README file linked above -#' for more information. Each data.frame is sorted by date, with the +#' for more information. Each data.frame is sorted by date, with the #' earliest date first. #' #' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}, @@ -54,8 +54,8 @@ #' and / or weather variables, using the `date_min`, `date_max`, #' and `var` arguments, does not occur until after the full data has #' been pulled. -#' -#' @details +#' +#' @details #' Messages are printed to the console about file path, file last modified time #' which you can suppress with `suppressMessages()` #' @@ -72,19 +72,12 @@ #' ghcnd_search("AGE00147704", var = c("PRCP","TMIN")) #' ghcnd_search("AGE00147704", var = c("PRCP","TMIN"), date_min = "1920-01-01") #' ghcnd_search("AGE00147704", var = "adfdf") -#' +#' #' # refresh the cached file #' ghcnd_search("AGE00147704", var = "PRCP", refresh = TRUE) #' } ghcnd_search <- function(stationid, date_min = NULL, date_max = NULL, var = "all", refresh = FALSE, ...) { - calls <- names(sapply(match.call(), deparse))[-1] - calls_vec <- "path" %in% calls - if (any(calls_vec)) { - stop("The parameter path has been removed, see docs for ?ghcnd_search", - call. = FALSE) - } - out <- ghcnd(stationid, refresh = refresh, ...) dat <- ghcnd_splitvars(out) @@ -121,18 +114,18 @@ ghcnd_search <- function(stationid, date_min = NULL, date_max = NULL, #' entire weather dataset for the site. #' #' @export -#' @param stationid (character) A character string giving the identification of -#' the weather station for which the user would like to pull data. To get a full +#' @param stationid (character) A character string giving the identification of +#' the weather station for which the user would like to pull data. To get a full #' and current list of stations, the user can use the \code{\link{ghcnd_stations}} #' function. To identify stations within a certain radius of a location, the #' user can use the \code{\link{meteo_nearby_stations}} function. #' @param path (character) a path to a file with a \code{.dly} extension - already #' downloaded on your computer -#' @param refresh (logical) If \code{TRUE} force re-download of data. +#' @param refresh (logical) If \code{TRUE} force re-download of data. #' Default: \code{FALSE} -#' @param ... In the case of \code{ghcnd} additional curl options to pass -#' through to \code{\link[crul]{HttpClient}}. In the case of \code{ghcnd_read} -#' further options passed on to \code{read.csv} +#' @param ... In the case of \code{ghcnd} additional curl options to pass +#' through to \code{\link[crul]{HttpClient}}. In the case of \code{ghcnd_read} +#' further options passed on to \code{read.csv} #' #' @return A tibble (data.frame) which contains data pulled from NOAA's FTP #' server for the queried weather site. A README file with more information @@ -148,10 +141,10 @@ ghcnd_search <- function(stationid, date_min = NULL, date_max = NULL, #' site locally in the directory specified by the \code{path} argument. #' #' You can access the path for the cached file via \code{attr(x, "source")} -#' -#' You can access the last modified time for the cached file via +#' +#' You can access the last modified time for the cached file via #' \code{attr(x, "file_modified")} -#' +#' #' Messages are printed to the console about file path and file last modified time #' which you can suppress with \code{suppressMessages()} #' @@ -164,19 +157,7 @@ ghcnd_search <- function(stationid, date_min = NULL, date_max = NULL, #' processes the output, or \code{\link{meteo_tidy_ghcnd}}, which wraps the #' \code{\link{ghcnd_search}} function to output a tidy dataframe. To pull #' GHCND data from multiple monitors, see \code{\link{meteo_pull_monitors}}. -#' -#' @section File storage: -#' We use \pkg{rappdirs} to store files, see -#' \code{\link[rappdirs]{user_cache_dir}} for how we determine the directory on -#' your machine to save files to, and run -#' \code{rappdirs::user_cache_dir("rnoaa/ghcnd")} to get that directory. -#' -#' Note that between versions of \pkg{rnoaa} you may want to clear your -#' cache of ghcnd files IF there are changes in ghcnd functions. See -#' \code{\link{ghcnd_clear_cache}} or you can do so manually. -#' -#' Using \code{refresh = TRUE} you can force a re-download of the data file. -#' +#' @note See [ghcnd_cache] for managing cached files #' @examples \dontrun{ #' # Get data #' ghcnd(stationid = "AGE00147704") @@ -196,38 +177,28 @@ ghcnd_search <- function(stationid, date_min = NULL, date_max = NULL, #' library("dplyr") #' dat <- ghcnd(stationid = "AGE00147704") #' filter(dat, element == "PRCP", year == 1909) -#' +#' #' # refresh the cached file #' ghcnd(stationid = "AGE00147704", refresh = TRUE) -#' +#' #' # Read in a .dly file you've already downloaded #' path <- system.file("examples/AGE00147704.dly", package = "rnoaa") #' ghcnd_read(path) #' } - ghcnd <- function(stationid, refresh = FALSE, ...) { - calls <- names(sapply(match.call(), deparse))[-1] - calls_vec <- "path" %in% calls - if (any(calls_vec)) { - stop("The parameter path has been removed, see docs for ?ghcnd", - call. = FALSE) - } - - path <- file.path(rnoaa_cache_dir(), "ghcnd") - csvpath <- ghcnd_local(stationid, path) + csvpath <- ghcnd_local(stationid) if (!is_ghcnd(x = csvpath) || refresh) { - res <- ghcnd_GET(path, stationid, ...) + res <- ghcnd_GET(stationid, ...) } else { + cache_mssg(csvpath) res <- read.csv(csvpath, stringsAsFactors = FALSE, - colClasses = ghcnd_col_classes) + colClasses = ghcnd_col_classes) } fi <- file.info(csvpath) res <- remove_na_row(res) # remove trailing row of NA's res <- tibble::as_tibble(res) attr(res, 'source') <- csvpath attr(res, 'file_modified') <- fi[['mtime']] - message("file path: ", csvpath) - message("file last updated: ", attr(res, "file_modified")) return(res) } @@ -322,36 +293,63 @@ fm <- function(n) { #' # filter by station long name #' stations %>% filter(name == "CALLATHARRA") #' } -ghcnd_stations <- function(...){ - sta <- get_stations(...) - inv <- get_inventory(...) +ghcnd_stations <- function(refresh = FALSE, ...) { + assert(refresh, "logical") + stopifnot(length(refresh) == 1) + sta <- get_stations(refresh, ...) + inv <- get_inventory(refresh, ...) df <- merge(sta, inv[, -c(2, 3)], by = "id") tibble::as_tibble(df[stats::complete.cases(df), ]) } -get_stations <- function(...){ - res <- GET_retry( - "ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt", - ...) - df <- read.fwf(as_tc_p(res), - widths = c(11, 9, 11, 7, 2, 31, 5, 10), - header = FALSE, strip.white = TRUE, comment.char = "", - stringsAsFactors = FALSE) - nms <- c("id","latitude", "longitude", "elevation", - "state", "name", "gsn_flag", "wmo_id") - stats::setNames(df, nms) +get_stations <- function(refresh = FALSE, ...) { + ff <- file.path(ghcnd_cache$cache_path_get(), "ghcnd-stations.txt") + ffrds <- file.path(ghcnd_cache$cache_path_get(), "ghcnd-stations.rds") + if (file.exists(ffrds) && !refresh) { + cache_mssg(ffrds) + return(readRDS(ffrds)) + } else { + if (file.exists(ff)) unlink(ff) + if (file.exists(ffrds)) unlink(ffrds) + res <- GET_retry( + "ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt", + disk = ff, ...) + df <- read.fwf(as_tc_p(res), + widths = c(11, 9, 11, 7, 2, 31, 5, 10), + header = FALSE, strip.white = TRUE, comment.char = "", + stringsAsFactors = FALSE) + nms <- c("id","latitude", "longitude", "elevation", + "state", "name", "gsn_flag", "wmo_id") + df <- stats::setNames(df, nms) + saveRDS(df, file = ffrds) + unlink(ff) + return(df) + } } -get_inventory <- function(...){ - res <- GET_retry( - "ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-inventory.txt", ...) - df <- read.fwf(as_tc_p(res), - widths = c(11, 9, 10, 5, 5, 5), - header = FALSE, strip.white = TRUE, comment.char = "", - stringsAsFactors = FALSE) - nms <- c("id","latitude", "longitude", - "element", "first_year", "last_year") - stats::setNames(df, nms) +get_inventory <- function(refresh = FALSE, ...) { + gg <- file.path(ghcnd_cache$cache_path_get(), "ghcnd-inventory.txt") + ggrds <- file.path(ghcnd_cache$cache_path_get(), "ghcnd-inventory.rds") + if (file.exists(ggrds) && !refresh) { + cache_mssg(ggrds) + return(readRDS(ggrds)) + } else { + if (file.exists(gg)) unlink(gg) + if (file.exists(ggrds)) unlink(ggrds) + res <- GET_retry( + "ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-inventory.txt", + disk = gg, ...) + df <- read.fwf(as_tc_p(res), + widths = c(11, 9, 10, 5, 5, 5), + header = FALSE, strip.white = TRUE, comment.char = "", + stringsAsFactors = FALSE) + nms <- c("id","latitude", "longitude", + "element", "first_year", "last_year") + df <- stats::setNames(df, nms) + saveRDS(df, file = ggrds) + unlink(gg) + return(df) + } } strex <- function(x) str_extract_(x, "[0-9]+") @@ -511,9 +509,9 @@ ghcnd_zip <- function(x){ "adf" } -ghcnd_GET <- function(bp, stationid, ...){ - dir.create(bp, showWarnings = FALSE, recursive = TRUE) - fp <- ghcnd_local(stationid, bp) +ghcnd_GET <- function(stationid, ...){ + ghcnd_cache$mkdir() + fp <- ghcnd_local(stationid) cli <- crul::HttpClient$new(ghcnd_remote(stationid), opts = list(...)) res <- suppressWarnings(cli$get()) tt <- res$parse("UTF-8") @@ -537,12 +535,8 @@ ghcndbase <- function() "ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/all" ghcnd_remote <- function(stationid) { file.path(ghcndbase(), paste0(stationid, ".dly")) } -ghcnd_local <- function(stationid, path) { - # if (!is.null(attributes(stationid))) { - # stationid <- paste(stationid, attr(stationid, "date_min"), - # attr(stationid, "date_max"), sep = "_") - # } - file.path(path, paste0(stationid, ".dly")) +ghcnd_local <- function(stationid) { + file.path(ghcnd_cache$cache_path_get(), paste0(stationid, ".dly")) } is_ghcnd <- function(x) if (file.exists(x)) TRUE else FALSE str_extract_ <- function(string, pattern) { diff --git a/R/isd_stations.R b/R/isd_stations.R index 6c746fd8..47104432 100644 --- a/R/isd_stations.R +++ b/R/isd_stations.R @@ -3,11 +3,10 @@ #' @export #' @param refresh (logical) Download station data from NOAA ftp server again. #' Default: `FALSE` -#' #' @references ftp://ftp.ncdc.noaa.gov/pub/data/noaa/ #' @family isd -#' @details The data table is cached, but you can force download of data from NOAA -#' by setting `refresh=TRUE` +#' @details The data table is cached, but you can force download of data from +#' NOAA by setting `refresh=TRUE` #' @return a tibble (data.frame) with the columns: #' #' - usaf - USAF number, character @@ -22,13 +21,7 @@ #' - begin - Begin date of data coverage, of form YYYYMMDD, numeric #' - end - End date of data coverage, of form YYYYMMDD, numeric #' -#' -#' @section File storage: -#' We use \pkg{rappdirs} to store files, see -#' [rappdirs::user_cache_dir()] for how we determine the directory on -#' your machine to save files to, and run -#' `rappdirs::user_cache_dir("rnoaa")` to get that directory. -#' +#' @note See [isd_cache] for managing cached files #' @examples \dontrun{ #' # Get station table #' (stations <- isd_stations()) @@ -44,7 +37,9 @@ #' addCircles() #' } isd_stations <- function(refresh = FALSE) { - path <- normalizePath(file.path(rnoaa_cache_dir(), "isd_stations.rds")) + isd_cache$mkdir() + path <- suppressWarnings(normalizePath(file.path(isd_cache$cache_path_get(), + "isd_stations.rds"))) basedir <- normalizePath(dirname(path), winslash = "/") if (refresh || !file.exists(path)) { df <- read.csv(paste0(isdbase(), "/isd-history.csv"), @@ -55,11 +50,13 @@ isd_stations <- function(refresh = FALSE) { df$ELEV.M. <- as.numeric(df$ELEV.M.) df$BEGIN <- as.numeric(df$BEGIN) df$END <- as.numeric(df$END) - dat <- stats::setNames(df, gsub("_$", "", gsub("\\.", "_", tolower(names(df))))) + dat <- stats::setNames(df, + gsub("_$", "", gsub("\\.", "_", tolower(names(df))))) if (!file.exists(basedir)) dir.create(basedir, recursive = TRUE) saveRDS(dat, file = path) as_tibble(dat) } else { + cache_mssg(path) as_tibble(readRDS(path)) } } diff --git a/R/meteo_cache.r b/R/meteo_cache.r index ea9e28ac..c011b633 100644 --- a/R/meteo_cache.r +++ b/R/meteo_cache.r @@ -7,7 +7,7 @@ #' @family meteo #' @export meteo_clear_cache <- function(force = FALSE) { - files <- list.files(file.path(rnoaa_cache_dir(), "ghcnd"), full.names = TRUE) + files <- list.files(ghcnd_cache$cache_path_get(), full.names = TRUE) unlink(files, recursive = TRUE, force = force) } @@ -18,5 +18,5 @@ meteo_clear_cache <- function(force = FALSE) { #' @family meteo #' @export meteo_show_cache <- function() { - cat(file.path(rnoaa_cache_dir(), "ghcnd"), "\n") + cat(ghcnd_cache$cache_path_get(), "\n") } diff --git a/R/onload.R b/R/onload.R index 45999e5c..58163923 100644 --- a/R/onload.R +++ b/R/onload.R @@ -3,6 +3,11 @@ arc2_cache <- NULL lcd_cache <- NULL bsw_cache <- NULL isd_cache <- NULL +storms_cache <- NULL +stormevents_cache <- NULL +ersst_cache <- NULL +torn_cache <- NULL +ghcnd_cache <- NULL .onLoad <- function(libname, pkgname){ x <- hoardr::hoard() @@ -24,4 +29,24 @@ isd_cache <- NULL g <- hoardr::hoard() g$cache_path_set("noaa_isd") isd_cache <<- g + + s <- hoardr::hoard() + s$cache_path_set("noaa_storms") + storms_cache <<- s + + ss <- hoardr::hoard() + ss$cache_path_set("noaa_stormevents") + stormevents_cache <<- ss + + p <- hoardr::hoard() + p$cache_path_set("noaa_ersst") + ersst_cache <<- p + + torn <- hoardr::hoard() + torn$cache_path_set("noaa_tornadoes") + torn_cache <<- torn + + hh <- hoardr::hoard() + hh$cache_path_set("noaa_ghcnd") + ghcnd_cache <<- hh } diff --git a/R/rnoaa_caching.R b/R/rnoaa_caching.R index e934c40e..cd25f696 100644 --- a/R/rnoaa_caching.R +++ b/R/rnoaa_caching.R @@ -33,11 +33,15 @@ #' #' @section Caching objects for each data source: #' -#' - `isd()`: `isd_cache` +#' - `isd()`/`isd_stations()`: `isd_cache` #' - `cpc_prcp()`: `cpc_cache` #' - `arc2()`: `arc2_cache` #' - `lcd()`: `lcd_cache` #' - `bsw()`: `bsw_cache` +#' - `storm_data()`: `storms_cache` +#' - `ersst()`: `ersst_cache` +#' - `tornadoes()`: `torn_cache` +#' - `ghcnd()`/`ghcnd_search()`: `ghcnd_cache` #' NULL @@ -70,3 +74,27 @@ NULL #' @usage NULL #' @export "bsw_cache" + +#' @rdname rnoaa_caching +#' @format NULL +#' @usage NULL +#' @export +"storms_cache" + +#' @rdname rnoaa_caching +#' @format NULL +#' @usage NULL +#' @export +"ersst_cache" + +#' @rdname rnoaa_caching +#' @format NULL +#' @usage NULL +#' @export +"torn_cache" + +#' @rdname rnoaa_caching +#' @format NULL +#' @usage NULL +#' @export +"ghcnd_cache" diff --git a/R/rnoaa_options.R b/R/rnoaa_options.R index 27ccdb53..a93c4a8c 100644 --- a/R/rnoaa_options.R +++ b/R/rnoaa_options.R @@ -21,11 +21,29 @@ stract <- function(str, pattern) regmatches(str, regexpr(pattern, str)) cache_mssg <- function(file) { if (roenv$cache_messages) { fi <- file.info(file) - size <- round(fi$size/1000000, 3) - chaftdec <- nchar(stract(as.character(size), '^[0-9]+')) - if (chaftdec > 1) size <- round(size, 1) - message("using cached file: ", file) - message( - sprintf("date created/size(mb): %s / %s", fi$ctime, size)) + if (NROW(fi) > 1) { + message("in directory: ", dirname(file[1])) + to_get <- min(c(3, length(file))) + ss <- file[seq_len(to_get)] + ss_str <- paste0(basename(ss), collapse = ", ") + if (to_get < length(file)) ss_str <- paste0(ss_str, " ...") + message("using cached files (first 3): ", ss_str) + message( + sprintf("[%s] date created: %s", + basename(row.names(fi)[1]), fi[1,"ctime"])) + } else { + if (!fi$isdir) { + size <- round(fi$size/1000000, 3) + chaftdec <- nchar(stract(as.character(size), '^[0-9]+')) + if (chaftdec > 1) size <- round(size, 1) + message("using cached file: ", file) + message( + sprintf("date created (size, mb): %s (%s)", fi$ctime, size)) + } else { + message("using cached directory: ", file) + message( + sprintf("date created: %s", fi$ctime)) + } + } } } diff --git a/R/storm_events.R b/R/storm_events.R index c65a9173..b1f85f36 100644 --- a/R/storm_events.R +++ b/R/storm_events.R @@ -12,11 +12,7 @@ storm_events_env <- new.env() #' @param ... Curl options passed on to [crul::verb-GET] #' (optional) #' @return A tibble (data.frame) -#' @section File storage: -#' We use \pkg{rappdirs} to store files, see -#' [rappdirs::user_cache_dir()] for how -#' we determine the directory on your machine to save files to, and run -#' `rappdirs::user_cache_dir("rnoaa/stormevents")` to get that directory. +#' @note See [storms_cache] for managing cached files #' @references #' #' @examples \dontrun{ @@ -52,7 +48,7 @@ se_data <- function(year, type, overwrite = TRUE, ...) { if (is.null(storm_events_env$files)) { storm_events_env$files <- se_files() } - path <- file.path(rnoaa_cache_dir(), "stormevents") + path <- stormevents_cache$cache_path_get() assert_range(year, unique(sort(storm_events_env$files$year))) if (!type %in% c("details", "fatalities", "locations", "legacy")) { stop("'type' must be one of: details, fatalities, locations, legacy") @@ -60,8 +56,9 @@ se_data <- function(year, type, overwrite = TRUE, ...) { csvpath <- se_csv_local(year, type, path) if (!is_se(x = csvpath)) { csvpath <- se_GET(path, year, type, overwrite, ...) + } else { + cache_mssg(csvpath) } - message(sprintf("%s", csvpath), "\n") tmp <- read.csv(csvpath, header = TRUE, sep = ",", stringsAsFactors = FALSE) names(tmp) <- tolower(names(tmp)) @@ -117,7 +114,7 @@ se_legacy_dir <- function(...) { } se_GET <- function(bp, year, type, overwrite, ...){ - dir.create(bp, showWarnings = FALSE, recursive = TRUE) + stormevents_cache$mkdir() fp <- se_csv_local(year, type, bp) if (!overwrite) { if (file.exists(fp)) { diff --git a/R/storm_shp.R b/R/storm_shp.R index 6d48b315..7c8a9606 100644 --- a/R/storm_shp.R +++ b/R/storm_shp.R @@ -2,17 +2,12 @@ #' @rdname storms storm_shp <- function(basin=NULL, storm=NULL, year=NULL, type="points", overwrite = TRUE) { - calls <- names(sapply(match.call(), deparse))[-1] - calls_vec <- "path" %in% calls - if (any(calls_vec)) { - stop("The parameter path has been removed, see ?storms", - call. = FALSE) - } - - path <- file.path(rnoaa_cache_dir(), "storms") + path <- storms_cache$cache_path_get() shppath <- shp_local(basin, storm, year, path, type) if (!is_shpstorm(x = shppath)) { shppath <- shpstorm_GET(path, basin, storm, year, type, overwrite) + } else { + cache_mssg(shppath) } structure(list(path = spth(shppath)), class = "storm_shp", basin = basin, storm = storm, year = year, type = type) @@ -44,6 +39,7 @@ print.storm_shp <- function(x, ...) { } shpstorm_GET <- function(bp, basin, storm, year, type, overwrite) { + storms_cache$mkdir() dir.create(local_base(basin, storm, year, bp), showWarnings = FALSE, recursive = TRUE) fp <- shp_local(basin, storm, year, bp, type) diff --git a/R/storms.R b/R/storms.R index 50f608b7..ec1ad45b 100644 --- a/R/storms.R +++ b/R/storms.R @@ -37,15 +37,8 @@ #' The datasets included in the package [storm_names()], and #' [storm_columns()] may help in using these storm functions. #' -#' -#' @section File storage: -#' We use \pkg{rappdirs} to store files, see -#' [rappdirs::user_cache_dir()] for how -#' we determine the directory on your machine to save files to, and run -#' `rappdirs::user_cache_dir("rnoaa/storms")` to get that directory. -#' +#' @note See [storms_cache] for managing cached files #' @references -#' #' @examples \dontrun{ #' # Metadata #' head( storm_meta() ) @@ -92,29 +85,25 @@ #' @rdname storms storm_data <- function(basin = NULL, storm = NULL, year = NULL, overwrite = TRUE, ...) { - calls <- names(sapply(match.call(), deparse))[-1] - calls_vec <- "path" %in% calls - if (any(calls_vec)) { - stop("The parameter path has been removed, see ?storms", - call. = FALSE) - } - - path <- file.path(rnoaa_cache_dir(), "storms") - csvpath <- csv_local(basin, storm, year, path) - if (!is_storm(x = csvpath)) { - csvpath <- storm_GET(path, basin, storm, year, overwrite, ...) - } - message(sprintf("%s", csvpath), "\n") + csvpath <- storm_GET(basin, storm, year, overwrite, ...) tibble::as_tibble(storms_read_csv(csvpath)) } -storm_GET <- function(bp, basin, storm, year, overwrite, ...){ - dir.create(local_base(basin, storm, year, bp), showWarnings = FALSE, - recursive = TRUE) - fp <- csv_local(basin, storm, year, bp) - cli <- crul::HttpClient$new(csv_remote(basin, storm, year), opts = list(...)) - res <- suppressWarnings(cli$get(disk = fp)) - res$content +storm_GET <- function(basin, storm, year, overwrite, ...){ + storms_cache$mkdir() + bp <- storms_cache$cache_path_get() + csvpath <- csv_local(basin, storm, year, bp) + if (!is_storm(x = csvpath)) { + dir.create(local_base(basin, storm, year, bp), showWarnings = FALSE, + recursive = TRUE) + cli <- crul::HttpClient$new(csv_remote(basin, storm, year), + opts = list(...)) + res <- suppressWarnings(cli$get(disk = csvpath)) + res$content + } else { + cache_mssg(csvpath) + return(csvpath) + } } filecheck <- function(basin, storm, year){ diff --git a/R/tornadoes.R b/R/tornadoes.R index 3cfeaef4..d83cf74d 100644 --- a/R/tornadoes.R +++ b/R/tornadoes.R @@ -4,19 +4,10 @@ #' Service Storm Prediction Center Severe Weather GIS web page. #' #' @export -#' @param overwrite (logical) To overwrite the path to store files in or not, -#' Default: `TRUE` #' @param ... Curl options passed on to [crul::verb-GET] (optional) -#' #' @return A Spatial object is returned of class SpatialLinesDataFrame. #' @references https://www.spc.noaa.gov/gis/svrgis/ -#' -#' @section File storage: -#' We use \pkg{rappdirs} to store files, see -#' [rappdirs::user_cache_dir()] for how -#' we determine the directory on your machine to save files to, and run -#' `rappdirs::user_cache_dir("rnoaa/tornadoes")` to get that directory. -#' +#' @note See [torn_cache] for managing cached files #' @examples \dontrun{ #' shp <- tornadoes() #' library('sp') @@ -25,32 +16,25 @@ #' plot(shp) #' } #' } -tornadoes <- function(overwrite = TRUE, ...) { - calls <- names(sapply(match.call(), deparse))[-1] - calls_vec <- "path" %in% calls - if (any(calls_vec)) { - stop("The parameter path has been removed, see docs for ?tornadoes") - } - +tornadoes <- function(...) { check4pkg('rgdal') - path <- file.path(rnoaa_cache_dir(), "tornadoes") - if (!is_tornadoes(path)) { - url <- 'https://www.spc.noaa.gov/gis/svrgis/zipped/1950-2018-torn-aspath.zip' - tornadoes_GET(path, url, overwrite, ...) - } - readshp(file.path(path, tornadoes_basename)) + url <- 'https://www.spc.noaa.gov/gis/svrgis/zipped/1950-2018-torn-aspath.zip' + tornadoes_GET(url, ...) + readshp(file.path(torn_cache$cache_path_get(), tornadoes_basename)) } -tornadoes_GET <- function(bp, url, overwrite, ...){ - dir.create(bp, showWarnings = FALSE, recursive = TRUE) - fp <- file.path(bp, "tornadoes.zip") - if (!overwrite && file.exists(fp)) { - stop("file exists and overwrite=FALSE") +tornadoes_GET <- function(url, ...) { + bp <- torn_cache$cache_path_get() + torn_cache$mkdir() + if (!is_tornadoes(file.path(bp, tornadoes_basename))) { + fp <- file.path(bp, "tornadoes.zip") + cli <- crul::HttpClient$new(url, opts = list(...)) + res <- cli$get(disk = fp) + res$raise_for_status() + unzip(fp, exdir = bp) + } else { + cache_mssg(bp) } - cli <- crul::HttpClient$new(url, opts = list(...)) - res <- cli$get(disk = fp) - res$raise_for_status() - unzip(fp, exdir = bp) } is_tornadoes <- function(x){ @@ -63,8 +47,10 @@ is_tornadoes <- function(x){ tornadoes_basename <- "1950-2018-torn-aspath" -readshp <- function(x) rgdal::readOGR(dsn = path.expand(x), - layer = tornadoes_basename, - stringsAsFactors = FALSE) +readshp <- function(x) { + rgdal::readOGR(dsn = path.expand(x), + layer = tornadoes_basename, stringsAsFactors = FALSE) +} -tornadoes_files <- paste0(tornadoes_basename, c(".dbf", ".prj", ".shp", ".shx")) +tornadoes_files <- paste0(tornadoes_basename, + c(".dbf", ".prj", ".shp", ".shx", ".cpg")) diff --git a/man/caching.Rd b/man/caching.Rd deleted file mode 100644 index 3602d51e..00000000 --- a/man/caching.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/caching.R -\name{caching} -\alias{caching} -\alias{ghcnd_clear_cache} -\title{Clear cached files} -\usage{ -ghcnd_clear_cache(force = FALSE) -} -\arguments{ -\item{force}{(logical) Should we force removal of files if permissions -say otherwise?. Default: \code{FALSE}} -} -\description{ -Clear cached files -} -\details{ -BEWARE: this will clear all cached files. -} -\section{File storage}{ - -We use \pkg{rappdirs} to store files, see -\code{\link[rappdirs:user_cache_dir]{rappdirs::user_cache_dir()}} for how -we determine the directory on your machine to save files to, and run -\code{user_cache_dir("rnoaa")} to get that directory. -} - diff --git a/man/ersst.Rd b/man/ersst.Rd index c106db9d..c099269f 100644 --- a/man/ersst.Rd +++ b/man/ersst.Rd @@ -26,18 +26,9 @@ perhaps a data.frame. See \pkg{ncdf4} for parsing the output. \description{ NOAA Extended Reconstructed Sea Surface Temperature (ERSST) data } -\section{File storage}{ - -We use \pkg{rappdirs} to store files, see -\code{\link[rappdirs:user_cache_dir]{rappdirs::user_cache_dir()}} for how we determine the directory on -your machine to save files to, and run -\code{rappdirs::user_cache_dir("rnoaa/ersst")} -to get that directory. - -Files are quite small, so we don't worry about reading in cached data to -save time, as we do in some of the other functions in this package. +\note{ +See \link{ersst_cache} for managing cached files } - \examples{ \dontrun{ # October, 2015 diff --git a/man/ghcnd.Rd b/man/ghcnd.Rd index 08420189..0273ff83 100644 --- a/man/ghcnd.Rd +++ b/man/ghcnd.Rd @@ -55,20 +55,9 @@ You can access the last modified time for the cached file via Messages are printed to the console about file path and file last modified time which you can suppress with \code{suppressMessages()} } -\section{File storage}{ - -We use \pkg{rappdirs} to store files, see -\code{\link[rappdirs]{user_cache_dir}} for how we determine the directory on -your machine to save files to, and run -\code{rappdirs::user_cache_dir("rnoaa/ghcnd")} to get that directory. - -Note that between versions of \pkg{rnoaa} you may want to clear your -cache of ghcnd files IF there are changes in ghcnd functions. See -\code{\link{ghcnd_clear_cache}} or you can do so manually. - -Using \code{refresh = TRUE} you can force a re-download of the data file. +\note{ +See \link{ghcnd_cache} for managing cached files } - \examples{ \dontrun{ # Get data diff --git a/man/ghcnd_clear_cache-defunct.Rd b/man/ghcnd_clear_cache-defunct.Rd new file mode 100644 index 00000000..2006cd48 --- /dev/null +++ b/man/ghcnd_clear_cache-defunct.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/defunct.R +\name{ghcnd_clear_cache} +\alias{ghcnd_clear_cache} +\title{This function is defunct.} +\usage{ +ghcnd_clear_cache(...) +} +\description{ +This function is defunct. +} +\keyword{internal} diff --git a/man/ghcnd_stations.Rd b/man/ghcnd_stations.Rd index f71da8a1..a371e037 100644 --- a/man/ghcnd_stations.Rd +++ b/man/ghcnd_stations.Rd @@ -4,9 +4,12 @@ \alias{ghcnd_stations} \title{Get information on the GHCND weather stations} \usage{ -ghcnd_stations(...) +ghcnd_stations(refresh = FALSE, ...) } \arguments{ +\item{refresh}{(logical) If \code{TRUE} force re-download of data. +Default: \code{FALSE}} + \item{...}{In the case of \code{ghcnd} additional curl options to pass through to \code{\link[crul]{HttpClient}}. In the case of \code{ghcnd_read} further options passed on to \code{read.csv}} diff --git a/man/isd_stations.Rd b/man/isd_stations.Rd index b3c74075..b32001cb 100644 --- a/man/isd_stations.Rd +++ b/man/isd_stations.Rd @@ -30,17 +30,12 @@ a tibble (data.frame) with the columns: Get NOAA ISD/ISH station data from NOAA FTP server. } \details{ -The data table is cached, but you can force download of data from NOAA -by setting \code{refresh=TRUE} +The data table is cached, but you can force download of data from +NOAA by setting \code{refresh=TRUE} } -\section{File storage}{ - -We use \pkg{rappdirs} to store files, see -\code{\link[rappdirs:user_cache_dir]{rappdirs::user_cache_dir()}} for how we determine the directory on -your machine to save files to, and run -\code{rappdirs::user_cache_dir("rnoaa")} to get that directory. +\note{ +See \link{isd_cache} for managing cached files } - \examples{ \dontrun{ # Get station table diff --git a/man/rnoaa-defunct.Rd b/man/rnoaa-defunct.Rd index 79859ce0..f1fee2b4 100644 --- a/man/rnoaa-defunct.Rd +++ b/man/rnoaa-defunct.Rd @@ -34,6 +34,8 @@ package rerddap \item \code{\link{seaice}}: Replaced with \code{\link{sea_ice}} \item \code{\link{lcd_cleanup}}: No longer available. See \code{\link{lcd}} docs +\item \code{\link{ghcnd_clear_cache}}: No longer available. +See \code{\link{rnoaa_caching}} } } \details{ diff --git a/man/rnoaa_caching.Rd b/man/rnoaa_caching.Rd index fcfbfb86..7d63b540 100644 --- a/man/rnoaa_caching.Rd +++ b/man/rnoaa_caching.Rd @@ -8,6 +8,10 @@ \alias{arc2_cache} \alias{lcd_cache} \alias{bsw_cache} +\alias{storms_cache} +\alias{ersst_cache} +\alias{torn_cache} +\alias{ghcnd_cache} \title{rnoaa caching} \description{ Manage data caches @@ -46,11 +50,15 @@ Assuming x is a \code{HoardClient} class object, e.g., \code{lcd_cache} \section{Caching objects for each data source}{ \itemize{ -\item \code{isd()}: \code{isd_cache} +\item \code{isd()}/\code{isd_stations()}: \code{isd_cache} \item \code{cpc_prcp()}: \code{cpc_cache} \item \code{arc2()}: \code{arc2_cache} \item \code{lcd()}: \code{lcd_cache} \item \code{bsw()}: \code{bsw_cache} +\item \code{storm_data()}: \code{storms_cache} +\item \code{ersst()}: \code{ersst_cache} +\item \code{tornadoes()}: \code{torn_cache} +\item \code{ghcnd()}/\code{ghcnd_search()}: \code{ghcnd_cache} } } diff --git a/man/storm_events.Rd b/man/storm_events.Rd index c9f6e5e0..d9fc0d4e 100644 --- a/man/storm_events.Rd +++ b/man/storm_events.Rd @@ -29,14 +29,9 @@ A tibble (data.frame) \description{ NOAA Storm Events data } -\section{File storage}{ - -We use \pkg{rappdirs} to store files, see -\code{\link[rappdirs:user_cache_dir]{rappdirs::user_cache_dir()}} for how -we determine the directory on your machine to save files to, and run -\code{rappdirs::user_cache_dir("rnoaa/stormevents")} to get that directory. +\note{ +See \link{storms_cache} for managing cached files } - \examples{ \dontrun{ # get list of files and their urls diff --git a/man/storms.Rd b/man/storms.Rd index b69eb4aa..ed5ad033 100644 --- a/man/storms.Rd +++ b/man/storms.Rd @@ -72,14 +72,9 @@ See \url{http://www.ncdc.noaa.gov/ibtracs/index.php?name=numbering} for more The datasets included in the package \code{\link[=storm_names]{storm_names()}}, and \code{\link[=storm_columns]{storm_columns()}} may help in using these storm functions. } -\section{File storage}{ - -We use \pkg{rappdirs} to store files, see -\code{\link[rappdirs:user_cache_dir]{rappdirs::user_cache_dir()}} for how -we determine the directory on your machine to save files to, and run -\code{rappdirs::user_cache_dir("rnoaa/storms")} to get that directory. +\note{ +See \link{storms_cache} for managing cached files } - \examples{ \dontrun{ # Metadata diff --git a/man/tornadoes.Rd b/man/tornadoes.Rd index a41bf9b0..67f22a61 100644 --- a/man/tornadoes.Rd +++ b/man/tornadoes.Rd @@ -4,12 +4,9 @@ \alias{tornadoes} \title{Get NOAA tornado data.} \usage{ -tornadoes(overwrite = TRUE, ...) +tornadoes(...) } \arguments{ -\item{overwrite}{(logical) To overwrite the path to store files in or not, -Default: \code{TRUE}} - \item{...}{Curl options passed on to \link[crul:verb-GET]{crul::verb-GET} (optional)} } \value{ @@ -19,14 +16,9 @@ A Spatial object is returned of class SpatialLinesDataFrame. This function gets spatial paths of tornadoes from NOAA's National Weather Service Storm Prediction Center Severe Weather GIS web page. } -\section{File storage}{ - -We use \pkg{rappdirs} to store files, see -\code{\link[rappdirs:user_cache_dir]{rappdirs::user_cache_dir()}} for how -we determine the directory on your machine to save files to, and run -\code{rappdirs::user_cache_dir("rnoaa/tornadoes")} to get that directory. +\note{ +See \link{torn_cache} for managing cached files } - \examples{ \dontrun{ shp <- tornadoes()