Skip to content

Commit

Permalink
Add database_create_combined_table & revise join_ functions (#104)
Browse files Browse the repository at this point in the history
Update previous join_ functions and create new database_create_combined_table function to replace join_all.

This includes:

    Splitting the old join_locations into two functions: join_location_coordinates to explicitly join latitude/longitude and join_location_properties to join location metadata
    For join_contexts, join_locations and join_contributors offer different output formats: many_columns, single_column_pretty, single_column_json
    For all functions add option vars = "all", which will add all columns/location properties/context properties.
    Join_contexts is reworked, using the variant that was developed on traits.build for database_create_combined_table. The old join_contexts is still required for as_wide_table (the combined table currently output via austraits.build API) and has been moved to that file.
    as_wide_table maintained for now to support austraits.build API, but will be removed in the coming months.

---------

Co-authored-by: Daniel Falster <[email protected]>
  • Loading branch information
ehwenk and dfalster authored Sep 26, 2024
1 parent 20ced9a commit 16d932d
Show file tree
Hide file tree
Showing 25 changed files with 1,033 additions and 283 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ Suggests:
ggpointdensity,
ggbeeswarm (>= 0.7.1),
gridExtra,
readr,
scales,
forcats,
viridis,
Expand Down
13 changes: 8 additions & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,21 @@ S3method(print,austraits)
export("%>%")
export(as_wide_table)
export(bind_trait_values)
export(database_create_combined_table)
export(extract_dataset)
export(extract_taxa)
export(extract_trait)
export(get_version_latest)
export(get_versions)
export(join_all)
export(join_contexts)
export(join_locations)
export(join_contributors)
export(join_location_coordinates)
export(join_location_properties)
export(join_methods)
export(join_sites)
export(join_taxonomy)
export(join_taxa)
export(join_taxonomic_updates)
export(load_austraits)
export(lookup_context_property)
export(lookup_location_property)
export(lookup_trait)
export(plot_locations)
export(plot_site_locations)
Expand Down
62 changes: 61 additions & 1 deletion R/as_wide_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ as_wide_table <- function(austraits){

# The contexts table needs the contexts collapsed to one context name per site
austraits %>%
join_contexts(collapse_context = TRUE) -> austraits
join_contexts_old(collapse = TRUE) -> austraits

# Getting rid of the columns that will soon be deleted in the next austraits release and renaming the description column
austraits$methods <-
Expand Down Expand Up @@ -104,4 +104,64 @@ collapse_cols <- function(data) {

data %>% purrr::imap_dfr(~ sprintf("%s='%s'",.y,.x)) %>%
tidyr::unite("text", sep="; ") %>% dplyr::pull(text)
}

#' Old join contexts function that collapses contexts into a single column and doesn't specify categories of context properties.
#' @keywords internal
#' @noRd
join_contexts_old <- function(austraits, collapse_context = FALSE){
# Check compatability
status <- check_compatibility(austraits)

# If compatible
if(!status){
function_not_supported(austraits)
}

traits2 <- split(austraits$traits, austraits$traits$dataset_id)
contexts2 <- split(austraits$contexts, austraits$contexts$dataset_id)

traits_vars <- names(austraits$traits)

problem_studies <- c("Hall_1981")

for(id in names(traits2)) {

if(!is.null(contexts2[[id]][1]) & ! (id %in% problem_studies)) {

context_ids <-
unique(contexts2[[id]]$link_id)

for(v in context_ids[!is.na(context_ids)]) {

context_sub <-
contexts2[[id]] %>%
dplyr::select(-dplyr::any_of(c("category", "description"))) %>%
dplyr::filter(link_id == v) %>%
tidyr::separate_rows(link_vals) %>%
tidyr::pivot_wider(values_from = value, names_from = context_property) %>%
tidyr::pivot_wider(names_from = link_id, values_from = link_vals)

traits2[[id]] <-
dplyr::left_join(by = c("dataset_id", v),
traits2[[id]],
context_sub
)
}

if(collapse_context == TRUE){
context_text <-
traits2[[id]] %>%
dplyr::select(-dplyr::any_of(traits_vars)) %>% collapse_cols()

traits2[[id]] <- traits2[[id]] %>%
dplyr::mutate(context = context_text) %>%
dplyr::select(dplyr::any_of(traits_vars), context)
}
}
}

austraits$traits <- traits2 %>% dplyr::bind_rows()

austraits
}
40 changes: 40 additions & 0 deletions R/database_create_combined_table.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#' Create combined traits.build table
#'
#' Create a single database output that merges together the information
#' in all relational tables within a traits.build database.
#' Trait measurements are still output in long format (1 row per trait value),
#' but all measurement-related metadata (methods, location properties, context properties, contributors)
#' are now included as additional columns in a single table.
#'
#' @param database A traits.build database
#'
#' @return A table combining information in 7 traits.build relational tables: traits, locations, contexts, methods, taxa, taxonomic_updates, and contributors
#' @export
#'
#' @usage database_create_combined_table(database)
#'
database_create_combined_table <- function(austraits,
format = "single_column_pretty",
vars = list(
location = "all",
context = "all",
contributors = "all",
taxonomy = "all",
taxonomic_updates = "all",
methods = setdiff(names(austraits$methods), c("data_collectors"))
),
include_description = TRUE
) {
# Since `data_collectors` is also merged into the combined_table via the contributors tibble, we don't want the information twice.

combined_table_relational <- austraits %>%
join_location_coordinates() %>%
join_location_properties(format = format, vars = vars$location) %>%
join_context_properties(format = format, vars = vars$context, include_description = TRUE) %>%
join_methods(vars = vars$methods) %>%
join_contributors(format = format, vars = vars$contributors) %>%
join_taxa(vars = vars$taxonomy) %>%
join_taxonomic_updates(vars = vars$taxonomic_updates)

combined_table <- combined_table_relational$traits
}
Loading

0 comments on commit 16d932d

Please sign in to comment.