Mass removal of version switches (#102)

Addresses #91 ### Removal of switches The following functions will no longer support AusTraits version < 5.0.0: - extract_* - trait_pivot_wider - join_* - plot_locations - plot_trait_beeswarm - as_wide_table ### Changes to existing functions - plot_locations and trait_pivot_wider now accepts either traits table OR traits.build list object - summarise_trait_means will trigger warning due to uninformed calculation of means. ### Deprecations - plot_site_locations and trait_pivot_longer are deprecated ### Addition of helper functions to assist with removal of switches - Added internal function to check the compatibility of databases - Added internal function notify users that some database versions will not be supported
traitecoevo · Sep 12, 2024 · 1c10ad1 · 1c10ad1
1 parent 4df279b
commit 1c10ad1
Show file tree

Hide file tree

Showing 36 changed files with 413 additions and 922 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -26,16 +26,7 @@ export(summarise_trait_means)
 export(trait_pivot_longer)
 export(trait_pivot_wider)
 import(RefManageR)
-importFrom(dplyr,arrange)
-importFrom(dplyr,filter)
-importFrom(dplyr,group_by)
-importFrom(dplyr,select)
-importFrom(dplyr,summarise)
 importFrom(lifecycle,deprecated)
 importFrom(magrittr,"%>%")
 importFrom(stats,family)
-importFrom(stringr,str_detect)
-importFrom(tidyr,pivot_longer)
-importFrom(tidyr,pivot_wider)
-importFrom(tidyselect,all_of)
 importFrom(utils,methods)
diff --git a/NEWS.md b/NEWS.md
@@ -2,4 +2,12 @@
 - Updated dependencies, placing graphics related packages in Suggests
 - Added internal function to check the compatibility of databases
 - Added internal function notify users that some database versions will not be supported
-- No longer support AusTraits version <5.0.0 in `extract_*` functions
+- The following functions will no longer support AusTraits version < 5.0.0:
+    - `extract_*` 
+    - `trait_pivot_wider`
+    - `join_*`
+    - `plot_site/locations`
+    - `plot_trait_beeswarm`
+    - `as_wide_table`
+- `trait_pivot_longer` is deprecated
+- `summarise_trait_means` will trigger warning due to uninformed calculations
diff --git a/R/as_wide_table.R b/R/as_wide_table.R
@@ -15,20 +15,13 @@
 #' @importFrom utils methods
 
 as_wide_table <- function(austraits){
-  # Switch for different versions
-  version <- what_version(austraits)
+  # Check compatability
+  status <- check_compatibility(austraits)
 
-  switch (version,
-          "5-series" = as_wide_table3(austraits),
-          "4-series" = as_wide_table2(austraits),
-          "3-series-earlier" = as_wide_table1(austraits)
-          )
-}
-
-#' Turning entire AusTraits object into wide table v5
-#' @noRd
-#' @keywords internal
-as_wide_table3 <- function(austraits){
+  # If compatible
+  if(!status){
+    function_not_supported(austraits)
+  }
 
   # Function to collapse columns in locations and contexts into single column
   process_table3 <- function(data) {
@@ -102,198 +95,6 @@ as_wide_table3 <- function(austraits){
   austraits_wide
 }
 
-#' Turning entire AusTraits object into wide table v4
-#' @noRd
-#' @keywords internal
-as_wide_table2 <- function(austraits){
-
-  # Function to collapse columns in locations and contexts into single column
-  process_table2 <- function(data) {
-    data %>% 
-      tidyr::pivot_wider(names_from = "property", values_from = "value") %>% 
-      tidyr::nest(data=-dplyr::any_of(c("dataset_id", "location_id", "latitude (deg)", "longitude (deg)"))) %>%
-      dplyr::mutate(location = purrr::map_chr(data, collapse_cols)) %>%
-      dplyr::select(-data) 
-  }
-
-  ################################################################################
-  # Define and adapt each table in the list of austraits to prepare for the wide table format 
-
-  # The contexts table needs the contexts collapsed to one context name per site
-  austraits %>% 
-    join_contexts(collapse_context = TRUE) -> austraits
-
-  # Getting rid of the columns that will soon be deleted in the next austraits release and renaming the description column
-  austraits$methods <- 
-    austraits$methods %>% 
-    dplyr::rename(dataset_description = "description")  %>% 
-    dplyr::distinct()
-
-  # collapse into one column
-  austraits$locations <- 
-    austraits$locations %>% 
-    dplyr::filter(value!="unknown") %>% 
-    dplyr::rename(property = "location_property") %>%
-    split(., .$dataset_id) %>%
-    purrr::map_dfr(process_table2)
-
-  # rename taxonomic_dataset field to reflect the APC/APNI name matching process better
-  austraits$taxa <- 
-    austraits$taxa %>% 
-    dplyr::distinct()
-
-  austraits_wide <- 
-    austraits$traits %>% 
-    dplyr::left_join(by=c("dataset_id", "location_id"), austraits$locations) %>%
-    dplyr::left_join(by=c("dataset_id", "trait_name"), austraits$methods) %>%
-    dplyr::left_join(by=c("taxon_name"), austraits$taxa)
-
-  # reorder the names to be more intuitive
-  austraits_wide %>% dplyr::select(dplyr::any_of(c(
-
-    # The most useful (if you are filtering for just one taxon_name)
-    "dataset_id", "observation_id", "trait_name", "taxon_name", "value", "unit", 
-    "entity_type", "population_id", "individual_id",
-    "value_type", "basis_of_value", 
-    "replicates", 
-    # tissue, trait_category,  # Add after new zenodo release
-
-    # More stuff you can filter on
-    "collection_date", "basis_of_record", "life_stage", "sampling_strategy", 
-    "treatment_id", "temporal_id", 
-
-    #stuff relating to locations
-    "latitude (deg)", "longitude (deg)", "location", "plot_id",
-
-    #stuff relating to contexts and methods
-    "context", "methods", "original_name",
-
-    #the citations
-    "dataset_description", "source_primary_citation", "source_secondary_citation",
-
-    #the taxa details
-    "taxonomic_status", "taxon_distribution", 
-    "taxon_rank", "genus", "family"
-
-  )
-  )
-  )
-
-  austraits_wide
-}
-
-#' Turning entire AusTraits object into wide table <=3.0.2
-#' @noRd
-#' @keywords  internal
-as_wide_table1 <- function(austraits){
-
-
-  ################################################################################
-  # TODO: this updated with next zenodo release
-  # Load the trait classification doc - classifies the tissue type and type of trait based on the trait_name data field
-  # Exclude this for now -- will be added to definitions file in future release
-  # trait_class = read.csv("data-raw/Trait_classifications_v3.csv")
-  # trait_class[is.na(trait_class)] = ""
-  # trait_class <- trait_class %>% as_tibble()
-  # 
-  # we only need two extra columns from the trait class table - collapsing two category and other_tags cols and renaming them for clarity
-  # x2 <- 
-  #   trait_class %>% dplyr::mutate(
-  #   trait_category = str_c(category, "; ", other_tags) %>% gsub("; $", "", .)
-  # ) %>% 
-  #   dplyr::select(trait_name, tissue, trait_category)
-  # 
-  # Function to collapse columns in sites and contexts into single column
-  process_table <- function(data) {
-
-    data %>% 
-      tidyr::pivot_wider(names_from = "property", values_from = "value") %>% 
-      tidyr::nest(data=-dplyr::any_of(c("dataset_id", "site_name", "context_name", "latitude (deg)", "longitude (deg)"))) %>%
-      dplyr::mutate(site = purrr::map_chr(data, collapse_cols)) %>%
-      dplyr::select(-data) 
-  }
-
-  ################################################################################
-  # Define and adapt each table in the list of austraits to prepare for the wide table format 
-
-  # the trait table needs little prep. Rename the value columns as value
-  austraits$traits <- 
-    austraits$traits %>% 
-    dplyr::rename(trait_value = "value")
-
-  # The contexts table needs the contexts collapsed to one context name per site
-  austraits$contexts <- 
-    austraits$contexts %>% 
-    dplyr::rename(property = "context_property") %>%
-    split(austraits$contexts$dataset_id) %>%
-    purrr::map_dfr(process_table)  %>% 
-    dplyr::rename(context = "site")
-
-  # Getting rid of the columns that will soon be deleted in the next austraits release and renaming the description column
-  austraits$methods <- 
-    austraits$methods %>% 
-    #  -----------
-  # TODO: this section can be removed for next release
-  # Some studies have multiple records per traits. This breaks things when joining
-  # For now select first
-  dplyr::group_by(dataset_id, trait_name) %>% 
-    dplyr::slice(1) %>%
-    dplyr:: ungroup() %>%
-    #------------
-  dplyr::select(-c("year_collected_start", "year_collected_end")) %>% 
-    dplyr::rename(dataset_description = "description")  
-
-  # collapse into one column
-  austraits$sites <- 
-    austraits$sites %>% 
-    dplyr::filter(value!="unknown") %>% 
-    # next line is a fix -- one dataset in 3.0.2 has value "site_name"
-    dplyr::mutate(site_property = gsub("site_name", "name", site_property)) %>%
-    dplyr::rename(property = "site_property") %>%
-    split(., .$dataset_id) %>%
-    purrr::map_dfr(process_table)
-
-  # rename source data field to reflect the APC/APNI name matching process better
-  austraits$taxa <- 
-    austraits$taxa %>% 
-    dplyr::rename(taxonNameValidation = "source")
-
-  austraits_wide <- 
-    austraits$traits %>%
-    dplyr::left_join(by=c("dataset_id", "context_name"), austraits$contexts) %>%
-    dplyr::left_join(by=c("dataset_id", "site_name"), austraits$sites) %>%
-    dplyr::left_join(by=c("dataset_id", "trait_name"), austraits$methods) %>%
-    dplyr::left_join(by=c("taxon_name"), austraits$taxa) %>%
-
-    # reorder the names to be more intuitive
-    dplyr::select(
-
-      # The most useful (if you are filtering for just one taxon_name)
-      "dataset_id", "observation_id", "trait_name", "taxon_name", "trait_value", "unit", 
-      "value_type", "replicates", 
-      # tissue, trait_category,  # Add after new zenodo release
-
-      # More stuff you can filter on
-      "date", "collection_type", "sample_age_class", "sampling_strategy", 
-
-      #stuff relating to sites
-      "latitude (deg)", "longitude (deg)", "site_name", "site",
-
-      #stuff relating to contexts and methods
-      "context_name", "context", "methods", "original_name",
-
-      #the citations
-      "dataset_description", "source_primary_citation", "source_secondary_citation",
-
-      #the taxa details
-      "taxonomicStatus", "taxonDistribution", 
-      "taxonRank", "genus", "family", "acceptedNameUsageID", 
-      "scientificNameAuthorship", "ccAttributionIRI"
-    )
-
-  austraits_wide
-}
-
 #' Collapse columns into text string
 #' @keywords internal
 #' @noRd

diff --git a/R/check_compatibility.R b/R/check_compatibility.R
@@ -1,4 +1,4 @@
-#' @title Check compatibility
+#' @title Check compatibility of traits.build object
 #' @description Function to check whether the data object has been compiled by the traits.build workflow and 
 #' therefore has a data structure that is appropriate for use with austraits functions.
 #' @param austraits dataframe generated by traits build
@@ -24,7 +24,8 @@ check_compatibility <- function(austraits) {
     compiled_by_traits.build <-
       austraits$metadata$related_identifiers %>% 
       util_list_to_df2() %>%
-      filter(relation_type == "isCompiledBy", stringr::str_detect(identifier, "github.com/traitecoevo/traits.build"))
+      dplyr::filter(relation_type == "isCompiledBy") |> 
+      dplyr::filter(stringr::str_detect(identifier, "github.com/traitecoevo/traits.build"))
 
     if(is.null(compiled_by_traits.build) | nrow(compiled_by_traits.build) > 0) {
       compatible <- TRUE
@@ -39,3 +40,22 @@ check_compatibility <- function(austraits) {
   invisible(compatible)
 
 }
+
+
+
+
+#' Check compatibility of traits table
+#'
+#' @param traits table in traits.build object
+#'
+#' @return logical, TRUE indicating version traits table came from traits.build version > 1.0
+
+check_traits_compatibility <- function(traits){
+  # Check compatibility using column 
+  if(any(names(traits) %in% c("treatment_context_id", "repeat_measurements_id"))){
+    compatible <- TRUE
+  } else
+    compatible <- FALSE
+
+  invisible(compatible)
+}
diff --git a/R/clean_NA.R b/R/clean_NA.R
diff --git a/R/extract_dataset.R b/R/extract_dataset.R
@@ -22,13 +22,7 @@ extract_dataset <- function(austraits, dataset_id) {
   if(!status){
     function_not_supported(austraits)
   }
-    extract_dataset2(austraits, dataset_id)
-}
-
-#' @title Extract specific dataset from austraits object for versions >3.0.2
-#' @noRd 
-
-extract_dataset2 <- function(austraits, dataset_id){
+
   austraits$taxonomic_updates <-
     tidyr::separate_rows(austraits$taxonomic_updates, dataset_id, sep=" ")
 
@@ -64,13 +58,3 @@ extract_dataset2 <- function(austraits, dataset_id){
 }
 
 
-#' @title Extract specific dataset from austraits object for versions <=3.0.2
-#' @noRd
-
-extract_dataset1 <- function(austraits, dataset_id){
-
-  function_not_supported(austraits)
-
-  }
-
-
diff --git a/R/extract_taxa.R b/R/extract_taxa.R
@@ -23,15 +23,6 @@ extract_taxa <- function(austraits, family = NULL, genus = NULL, taxon_name = NU
   if(!status){
     function_not_supported(austraits)
   } 
-  extract_taxa2(austraits, family, genus, taxon_name)
-}
-
-
-#'Extract taxa for supported versions of databases
-#' @noRd
-#' @keywords internal
-extract_taxa2 <- function(austraits, family = NULL, genus = NULL, taxon_name = NULL){
-
   ret <- austraits
 
   if(missing(family) & missing(genus) & missing(taxon_name)){

diff --git a/R/extract_trait.R b/R/extract_trait.R
@@ -24,16 +24,7 @@ extract_trait <- function(austraits, trait_names, taxon_names=NULL) {
   if(!status){
     function_not_supported(austraits)
   } 
-  extract_trait2(austraits, trait_names, taxon_names)
-
-}
-
 
-#' @title Extract specific trait data from austraits object for versions >3.0.2
-#' @noRd
-#' @keywords internal
-extract_trait2 <- function(austraits, trait_names, taxon_names=NULL) {
-
   ret <- austraits
 
   # Traits table
@@ -89,12 +80,3 @@ extract_trait2 <- function(austraits, trait_names, taxon_names=NULL) {
 
   ret
 }
-
-#' @title Extract specific trait data from austraits object for versions <=3.0.2
-#' @noRd
-#' @keywords internal
-extract_trait1 <- function(austraits, trait_names, taxon_names=NULL) {
-
-  function_not_supported(austraits)
-
-}