This repository has been archived by the owner on Sep 9, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#19 started work for colletor names helpers - not done yet
using tibble::as_data_frame for dframe now #21 still need to problably just use by deaflut everywhere
- Loading branch information
Showing
10 changed files
with
227 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ Description: Clean biological occurrence records. Includes functionality | |
for cleaning based on various aspects of spatial coordinates, | ||
unlikely values due to political 'centroids', coordinates based on | ||
where collections of specimens are held, and more. | ||
Version: 0.1.1.9400 | ||
Version: 0.1.2.9115 | ||
Authors@R: c(person("Scott", "Chamberlain", role = c("aut", "cre"), | ||
email = "[email protected]")) | ||
License: MIT + file LICENSE | ||
|
@@ -20,7 +20,10 @@ Imports: | |
Matrix, | ||
magrittr, | ||
qlcMatrix, | ||
lazyeval | ||
lazyeval, | ||
crul, | ||
jsonlite, | ||
tibble | ||
Suggests: | ||
testthat, | ||
knitr, | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#' Collector based cleaning | ||
#' | ||
#' @name collectors | ||
#' @keywords internal | ||
#' @param x (data.frame) A data.frame | ||
#' @param collector (character) Collector field to use. See Details. | ||
#' @param drop (logical) Drop bad data points or not. Either way, we parse | ||
#' out bade data points as an attribute you can access. Default: \code{TRUE} | ||
#' | ||
#' @return Returns a data.frame, with attributes | ||
#' | ||
#' @details | ||
#' Explanation of the functions: | ||
#' | ||
#' \itemize{ | ||
#' \item coll_clean - Standardize collector names | ||
#' } | ||
#' | ||
#' @examples | ||
#' df <- data.frame( | ||
#' coll = c('K.F.P. Martius', 'C. F. P. Martius', 'C. F. P. von Martius'), | ||
#' species = 'Poa annua', | ||
#' lat = 1:3, | ||
#' lon = 4:6, | ||
#' stringsAsFactors = FALSE | ||
#' ) | ||
#' | ||
#' # Standardize names | ||
#' NROW(df) | ||
#' df <- dframe(df) %>% coll_clean() | ||
#' NROW(df) | ||
#' attr(df, "coll_clean") | ||
|
||
#' @rdname collectors | ||
coll_clean <- function(x, collector = NULL) { | ||
x <- do_collectors(x, collector) | ||
x <- stand_collectors(x) | ||
if (NROW(x) == 0) x <- NA | ||
row.names(x) <- NULL | ||
structure(reassign(x), coll_clean = incomp) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# assign consitent collector variable to be able to act on data inputs | ||
# more easily - and save original column names to rename data on return | ||
|
||
do_collectors <- function(x, collector) { | ||
x <- guess_collector(x, collector) | ||
if (is.null(attr(x, "coll_var_orig"))) attr(x, "coll_var_orig") <- collector | ||
return(x) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
guess_collector <- function(x, collector = NULL) { | ||
nms <- names(x) | ||
|
||
if (!is.null(attr(x, "coll_var_orig"))) collector <- attr(x, "coll_var_orig") | ||
|
||
if (is.null(collector)) { | ||
colls <- nms[grep(sprintf("^(%s)$", paste0(coll_options, collapse = "|")), | ||
nms, ignore.case = TRUE)] | ||
|
||
if (length(colls) == 1) { | ||
if (length(nms) > 2) { | ||
message("Assuming '", colls, "' is collector") | ||
} | ||
names(x)[names(x) %in% colls] <- coll_var <- "collector" | ||
} else { | ||
stop("Couldn't infer collector column, please specify w/ 'collector' parameter", | ||
call. = FALSE) | ||
} | ||
} else { | ||
if (!any(names(x) %in% collector)) { | ||
stop("'", collector, "' not found in your data", call. = FALSE) | ||
} | ||
names(x)[names(x) %in% collector] <- coll_var <- "collector" | ||
} | ||
|
||
structure(x, coll_var = coll_var) | ||
} | ||
|
||
coll_options <- c("recordedBy", "collector", "coll") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
#' Harvard botanist index functions | ||
#' | ||
#' @keywords internal | ||
#' @examples \dontrun{ | ||
#' bot_search(name = "Asa Gray") | ||
#' bot_search(name = "A. Gray") | ||
#' bot_search(remarks = "harvard") | ||
#' bot_search(name = "Gray", fuzzy = TRUE) | ||
#' bot_search() | ||
#' bot_search() | ||
#' | ||
#' ## FIXME - this leads to a JSON parsing error because they give | ||
#' ## bad JSON in some results, including this example | ||
#' # bot_search(country = "China") | ||
#' } | ||
bot_search <- function(name = NULL, individual = FALSE, start = NULL, | ||
fuzzy = FALSE, remarks = NULL, speciality = NULL, country = NULL, | ||
is_collector = FALSE, is_author = FALSE, team = FALSE, | ||
error = stop, ...) { | ||
|
||
cli <- crul::HttpClient$new(url = hbi_base()) | ||
args <- ct(list( | ||
name = name, json = "y", individual = logt(individual), start = start, | ||
soundslike = if (fuzzy) "true" else NULL, remarks = remarks, | ||
speciality = speciality, country = country, is_collector = logt(is_collector), | ||
is_author = logt(is_author), team = logt(team) | ||
)) | ||
res <- cli$get(query = args, ...) | ||
res$raise_for_status() | ||
if ((err <- grepl("no matching result", res$parse("UTF-8"), ignore.case = TRUE))) { | ||
error("(404) no matching results found", call. = FALSE) | ||
} | ||
if (err && as.character(substitute(error)) != "stop") return(NULL) | ||
tibble::as_data_frame( | ||
jsonlite::fromJSON(res$parse("UTF-8"))$botanists | ||
) | ||
} | ||
|
||
hbi_base <- function() 'http://kiki.huh.harvard.edu/databases/botanist_search.php' | ||
|
||
logt <- function(x) if (x) "on" else NULL | ||
|
||
clean_dirty_json <- function(x) { | ||
tmp <- gregexpr("\"\"[A-Za-z0-9]+\"\"", x)[[1]] | ||
if (tmp == -1) { | ||
x | ||
} else { | ||
substring(x, tmp, (tmp + attr(tmp, "match.length")) - 1) | ||
} | ||
} | ||
|
||
# http://kiki.huh.harvard.edu/databases/botanist_search.php?name=Asa+Gray&individual=on&json=y | ||
# http://kiki.huh.harvard.edu/databases/botanist_search.php?start=1&name=Gray&id=&soundslike=true&remarks=&specialty=&country=&is_collector=on | ||
|
||
stand_collectors <- function(x) { | ||
nms <- unique(x$collector) | ||
res <- ct(lapply(nms, bot_search)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.