Skip to content

Commit

Permalink
Mass removal of version switches (#102)
Browse files Browse the repository at this point in the history
Addresses #91

### Removal of switches
The following functions will no longer support AusTraits version < 5.0.0:
- extract_*
- trait_pivot_wider
- join_*
- plot_locations
- plot_trait_beeswarm
- as_wide_table

### Changes to existing functions
- plot_locations and trait_pivot_wider now accepts either traits table OR traits.build list object
- summarise_trait_means will trigger warning due to uninformed calculation of means.

### Deprecations
- plot_site_locations and trait_pivot_longer are deprecated

### Addition of helper functions to assist with removal of switches
- Added internal function to check the compatibility of databases
- Added internal function notify users that some database versions will not be supported
  • Loading branch information
fontikar authored Sep 12, 2024
1 parent 4df279b commit 1c10ad1
Show file tree
Hide file tree
Showing 36 changed files with 413 additions and 922 deletions.
9 changes: 0 additions & 9 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,7 @@ export(summarise_trait_means)
export(trait_pivot_longer)
export(trait_pivot_wider)
import(RefManageR)
importFrom(dplyr,arrange)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,select)
importFrom(dplyr,summarise)
importFrom(lifecycle,deprecated)
importFrom(magrittr,"%>%")
importFrom(stats,family)
importFrom(stringr,str_detect)
importFrom(tidyr,pivot_longer)
importFrom(tidyr,pivot_wider)
importFrom(tidyselect,all_of)
importFrom(utils,methods)
10 changes: 9 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,12 @@
- Updated dependencies, placing graphics related packages in Suggests
- Added internal function to check the compatibility of databases
- Added internal function notify users that some database versions will not be supported
- No longer support AusTraits version <5.0.0 in `extract_*` functions
- The following functions will no longer support AusTraits version < 5.0.0:
- `extract_*`
- `trait_pivot_wider`
- `join_*`
- `plot_site/locations`
- `plot_trait_beeswarm`
- `as_wide_table`
- `trait_pivot_longer` is deprecated
- `summarise_trait_means` will trigger warning due to uninformed calculations
211 changes: 6 additions & 205 deletions R/as_wide_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,13 @@
#' @importFrom utils methods

as_wide_table <- function(austraits){
# Switch for different versions
version <- what_version(austraits)
# Check compatability
status <- check_compatibility(austraits)

switch (version,
"5-series" = as_wide_table3(austraits),
"4-series" = as_wide_table2(austraits),
"3-series-earlier" = as_wide_table1(austraits)
)
}

#' Turning entire AusTraits object into wide table v5
#' @noRd
#' @keywords internal
as_wide_table3 <- function(austraits){
# If compatible
if(!status){
function_not_supported(austraits)
}

# Function to collapse columns in locations and contexts into single column
process_table3 <- function(data) {
Expand Down Expand Up @@ -102,198 +95,6 @@ as_wide_table3 <- function(austraits){
austraits_wide
}

#' Turning entire AusTraits object into wide table v4
#' @noRd
#' @keywords internal
as_wide_table2 <- function(austraits){

# Function to collapse columns in locations and contexts into single column
process_table2 <- function(data) {
data %>%
tidyr::pivot_wider(names_from = "property", values_from = "value") %>%
tidyr::nest(data=-dplyr::any_of(c("dataset_id", "location_id", "latitude (deg)", "longitude (deg)"))) %>%
dplyr::mutate(location = purrr::map_chr(data, collapse_cols)) %>%
dplyr::select(-data)
}

################################################################################
# Define and adapt each table in the list of austraits to prepare for the wide table format

# The contexts table needs the contexts collapsed to one context name per site
austraits %>%
join_contexts(collapse_context = TRUE) -> austraits

# Getting rid of the columns that will soon be deleted in the next austraits release and renaming the description column
austraits$methods <-
austraits$methods %>%
dplyr::rename(dataset_description = "description") %>%
dplyr::distinct()

# collapse into one column
austraits$locations <-
austraits$locations %>%
dplyr::filter(value!="unknown") %>%
dplyr::rename(property = "location_property") %>%
split(., .$dataset_id) %>%
purrr::map_dfr(process_table2)

# rename taxonomic_dataset field to reflect the APC/APNI name matching process better
austraits$taxa <-
austraits$taxa %>%
dplyr::distinct()

austraits_wide <-
austraits$traits %>%
dplyr::left_join(by=c("dataset_id", "location_id"), austraits$locations) %>%
dplyr::left_join(by=c("dataset_id", "trait_name"), austraits$methods) %>%
dplyr::left_join(by=c("taxon_name"), austraits$taxa)

# reorder the names to be more intuitive
austraits_wide %>% dplyr::select(dplyr::any_of(c(

# The most useful (if you are filtering for just one taxon_name)
"dataset_id", "observation_id", "trait_name", "taxon_name", "value", "unit",
"entity_type", "population_id", "individual_id",
"value_type", "basis_of_value",
"replicates",
# tissue, trait_category, # Add after new zenodo release

# More stuff you can filter on
"collection_date", "basis_of_record", "life_stage", "sampling_strategy",
"treatment_id", "temporal_id",

#stuff relating to locations
"latitude (deg)", "longitude (deg)", "location", "plot_id",

#stuff relating to contexts and methods
"context", "methods", "original_name",

#the citations
"dataset_description", "source_primary_citation", "source_secondary_citation",

#the taxa details
"taxonomic_status", "taxon_distribution",
"taxon_rank", "genus", "family"

)
)
)

austraits_wide
}

#' Turning entire AusTraits object into wide table <=3.0.2
#' @noRd
#' @keywords internal
as_wide_table1 <- function(austraits){


################################################################################
# TODO: this updated with next zenodo release
# Load the trait classification doc - classifies the tissue type and type of trait based on the trait_name data field
# Exclude this for now -- will be added to definitions file in future release
# trait_class = read.csv("data-raw/Trait_classifications_v3.csv")
# trait_class[is.na(trait_class)] = ""
# trait_class <- trait_class %>% as_tibble()
#
# we only need two extra columns from the trait class table - collapsing two category and other_tags cols and renaming them for clarity
# x2 <-
# trait_class %>% dplyr::mutate(
# trait_category = str_c(category, "; ", other_tags) %>% gsub("; $", "", .)
# ) %>%
# dplyr::select(trait_name, tissue, trait_category)
#
# Function to collapse columns in sites and contexts into single column
process_table <- function(data) {

data %>%
tidyr::pivot_wider(names_from = "property", values_from = "value") %>%
tidyr::nest(data=-dplyr::any_of(c("dataset_id", "site_name", "context_name", "latitude (deg)", "longitude (deg)"))) %>%
dplyr::mutate(site = purrr::map_chr(data, collapse_cols)) %>%
dplyr::select(-data)
}

################################################################################
# Define and adapt each table in the list of austraits to prepare for the wide table format

# the trait table needs little prep. Rename the value columns as value
austraits$traits <-
austraits$traits %>%
dplyr::rename(trait_value = "value")

# The contexts table needs the contexts collapsed to one context name per site
austraits$contexts <-
austraits$contexts %>%
dplyr::rename(property = "context_property") %>%
split(austraits$contexts$dataset_id) %>%
purrr::map_dfr(process_table) %>%
dplyr::rename(context = "site")

# Getting rid of the columns that will soon be deleted in the next austraits release and renaming the description column
austraits$methods <-
austraits$methods %>%
# -----------
# TODO: this section can be removed for next release
# Some studies have multiple records per traits. This breaks things when joining
# For now select first
dplyr::group_by(dataset_id, trait_name) %>%
dplyr::slice(1) %>%
dplyr:: ungroup() %>%
#------------
dplyr::select(-c("year_collected_start", "year_collected_end")) %>%
dplyr::rename(dataset_description = "description")

# collapse into one column
austraits$sites <-
austraits$sites %>%
dplyr::filter(value!="unknown") %>%
# next line is a fix -- one dataset in 3.0.2 has value "site_name"
dplyr::mutate(site_property = gsub("site_name", "name", site_property)) %>%
dplyr::rename(property = "site_property") %>%
split(., .$dataset_id) %>%
purrr::map_dfr(process_table)

# rename source data field to reflect the APC/APNI name matching process better
austraits$taxa <-
austraits$taxa %>%
dplyr::rename(taxonNameValidation = "source")

austraits_wide <-
austraits$traits %>%
dplyr::left_join(by=c("dataset_id", "context_name"), austraits$contexts) %>%
dplyr::left_join(by=c("dataset_id", "site_name"), austraits$sites) %>%
dplyr::left_join(by=c("dataset_id", "trait_name"), austraits$methods) %>%
dplyr::left_join(by=c("taxon_name"), austraits$taxa) %>%

# reorder the names to be more intuitive
dplyr::select(

# The most useful (if you are filtering for just one taxon_name)
"dataset_id", "observation_id", "trait_name", "taxon_name", "trait_value", "unit",
"value_type", "replicates",
# tissue, trait_category, # Add after new zenodo release

# More stuff you can filter on
"date", "collection_type", "sample_age_class", "sampling_strategy",

#stuff relating to sites
"latitude (deg)", "longitude (deg)", "site_name", "site",

#stuff relating to contexts and methods
"context_name", "context", "methods", "original_name",

#the citations
"dataset_description", "source_primary_citation", "source_secondary_citation",

#the taxa details
"taxonomicStatus", "taxonDistribution",
"taxonRank", "genus", "family", "acceptedNameUsageID",
"scientificNameAuthorship", "ccAttributionIRI"
)

austraits_wide
}

#' Collapse columns into text string
#' @keywords internal
#' @noRd
Expand Down
24 changes: 22 additions & 2 deletions R/check_compatibility.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' @title Check compatibility
#' @title Check compatibility of traits.build object
#' @description Function to check whether the data object has been compiled by the traits.build workflow and
#' therefore has a data structure that is appropriate for use with austraits functions.
#' @param austraits dataframe generated by traits build
Expand All @@ -24,7 +24,8 @@ check_compatibility <- function(austraits) {
compiled_by_traits.build <-
austraits$metadata$related_identifiers %>%
util_list_to_df2() %>%
filter(relation_type == "isCompiledBy", stringr::str_detect(identifier, "github.com/traitecoevo/traits.build"))
dplyr::filter(relation_type == "isCompiledBy") |>
dplyr::filter(stringr::str_detect(identifier, "github.com/traitecoevo/traits.build"))

if(is.null(compiled_by_traits.build) | nrow(compiled_by_traits.build) > 0) {
compatible <- TRUE
Expand All @@ -39,3 +40,22 @@ check_compatibility <- function(austraits) {
invisible(compatible)

}




#' Check compatibility of traits table
#'
#' @param traits table in traits.build object
#'
#' @return logical, TRUE indicating version traits table came from traits.build version > 1.0

check_traits_compatibility <- function(traits){
# Check compatibility using column
if(any(names(traits) %in% c("treatment_context_id", "repeat_measurements_id"))){
compatible <- TRUE
} else
compatible <- FALSE

invisible(compatible)
}
18 changes: 0 additions & 18 deletions R/clean_NA.R

This file was deleted.

18 changes: 1 addition & 17 deletions R/extract_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,7 @@ extract_dataset <- function(austraits, dataset_id) {
if(!status){
function_not_supported(austraits)
}
extract_dataset2(austraits, dataset_id)
}

#' @title Extract specific dataset from austraits object for versions >3.0.2
#' @noRd

extract_dataset2 <- function(austraits, dataset_id){

austraits$taxonomic_updates <-
tidyr::separate_rows(austraits$taxonomic_updates, dataset_id, sep=" ")

Expand Down Expand Up @@ -64,13 +58,3 @@ extract_dataset2 <- function(austraits, dataset_id){
}


#' @title Extract specific dataset from austraits object for versions <=3.0.2
#' @noRd

extract_dataset1 <- function(austraits, dataset_id){

function_not_supported(austraits)

}


9 changes: 0 additions & 9 deletions R/extract_taxa.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,6 @@ extract_taxa <- function(austraits, family = NULL, genus = NULL, taxon_name = NU
if(!status){
function_not_supported(austraits)
}
extract_taxa2(austraits, family, genus, taxon_name)
}


#'Extract taxa for supported versions of databases
#' @noRd
#' @keywords internal
extract_taxa2 <- function(austraits, family = NULL, genus = NULL, taxon_name = NULL){

ret <- austraits

if(missing(family) & missing(genus) & missing(taxon_name)){
Expand Down
18 changes: 0 additions & 18 deletions R/extract_trait.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,7 @@ extract_trait <- function(austraits, trait_names, taxon_names=NULL) {
if(!status){
function_not_supported(austraits)
}
extract_trait2(austraits, trait_names, taxon_names)

}


#' @title Extract specific trait data from austraits object for versions >3.0.2
#' @noRd
#' @keywords internal
extract_trait2 <- function(austraits, trait_names, taxon_names=NULL) {

ret <- austraits

# Traits table
Expand Down Expand Up @@ -89,12 +80,3 @@ extract_trait2 <- function(austraits, trait_names, taxon_names=NULL) {

ret
}

#' @title Extract specific trait data from austraits object for versions <=3.0.2
#' @noRd
#' @keywords internal
extract_trait1 <- function(austraits, trait_names, taxon_names=NULL) {

function_not_supported(austraits)

}
Loading

0 comments on commit 1c10ad1

Please sign in to comment.