Merge pull request #99 from data-mermaid/v1.1.0

mermaidr 1.1.0
data-mermaid · Sep 12, 2024 · c25a9b9 · c25a9b9
2 parents 31914dd + 8cf68a7
commit c25a9b9
Show file tree

Hide file tree

Showing 14 changed files with 648 additions and 637 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: mermaidr
 Title: Interface to the 'MERMAID' API
-Version: 1.0.4
+Version: 1.1.0
 Authors@R: 
     c(person(given = "Sharla",
            family = "Gelfand",

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,11 @@
+# mermaidr 1.1.0
+
+* Add `life_histories` and `growth_form_life_histories` to `mermaid_get_reference("benthicattributes")`
+* Add relevant life histories to `"benthicpqt"` method in `mermaid_get_project_data()`
+    * `data = "observations"` gains `life_histories__competitive`, `life_histories__generalist`, `life_histories__stress_tolerant`, `life_histories__weedy`
+    * `data = "sampleunits"` gains `percent_cover_life_histories_weedy`, `percent_cover_life_histories_generalist`, `percent_cover_life_histories_competitive`, `percent_cover_life_histories_stress_tolerant`
+    * `data = "sampleevents"` gains `percent_cover_life_histories_avg_weedy`, `percent_cover_life_histories_avg_generalist`, `percent_cover_life_histories_avg_competitive`, `percent_cover_life_histories_avg_stress-tolerant`, `percent_cover_life_histories_sd_weedy`, `percent_cover_life_histories_sd_generalist`, `percent_cover_life_histories_sd_competitive`, `percent_cover_life_histories_sd_stress-tolerant`
+
 # mermaidr 1.0.4
 
 * Add `project_id`, `site_id`, and `management_id` to `mermaid_get_summary_sampleevents()`

diff --git a/R/mermaid_get.R b/R/mermaid_get.R
@@ -205,6 +205,16 @@ initial_cleanup <- function(results, endpoint) {
       extract_covariates()
   }
 
+  if ("life_histories" %in% names(results)) {
+    results <- results %>%
+      extract_life_histories()
+  }
+
+  if ("growth_form_life_histories" %in% names(results)) {
+    results <- results %>%
+      extract_growth_form_life_histories()
+  }
+
   if (!endpoint %in% c("choices", "me")) {
     results <- collapse_id_name_lists(results)
 
@@ -236,7 +246,85 @@ initial_cleanup <- function(results, endpoint) {
 }
 
 is_list_col <- function(x) {
-  is.list(x) && !is.data.frame(x)
+  list_col <- is.list(x) && !is.data.frame(x)
+
+  if (list_col) {
+    list_col <- !(purrr::map_lgl(x, is.data.frame) %>% any())
+  }
+
+  list_col
+}
+
+extract_life_histories <- function(results) {
+  if (!purrr::map_lgl(results[["life_histories"]], is.data.frame) %>% any()) {
+    return(results)
+  }
+
+  old_names <- names(results)
+
+  res <- results %>%
+    tidyr::unnest("life_histories", names_sep = "___") %>%
+    dplyr::select(-dplyr::all_of(c("life_histories___id"))) %>%
+    tidyr::pivot_wider(
+      names_from = dplyr::all_of("life_histories___name"),
+      values_from = dplyr::all_of("life_histories___proportion")
+    )
+
+  new_names <- names(res)
+  additional_cols <- setdiff(new_names, old_names)
+
+  res <- res %>%
+    dplyr::rename_with(.cols = dplyr::all_of(additional_cols), \(x) glue::glue("life_histories__{x}") %>% snakecase::to_snake_case())
+
+  new_names <- names(res)
+  additional_cols <- setdiff(new_names, old_names)
+
+  res %>%
+    dplyr::relocate(dplyr::all_of(additional_cols), .after = which(old_names == "life_histories") - 1)
+}
+
+extract_growth_form_life_histories <- function(results) {
+  if (all(results[["growth_form_life_histories"]] %>% is.na())) {
+    return(
+      results %>%
+        dplyr::mutate(growth_form_life_histories = purrr::map(
+          .data$growth_form_life_histories,
+          function(x) {
+            dplyr::tibble(
+              growth_form = character(0),
+              life_history = character(0)
+            )
+          }
+        ))
+    )
+  }
+
+  choices <- mermaid_get_endpoint("choices")
+
+  choices_growth_forms <- choices %>% dplyr::filter(.data$name == "growthforms")
+  choices_growth_forms <- choices_growth_forms[["data"]][[1]]
+
+  choices_life_histories <- choices %>% dplyr::filter(.data$name == "benthiclifehistories")
+  choices_life_histories <- choices_life_histories[["data"]][[1]]
+
+  results %>%
+    dplyr::mutate(growth_form_life_histories = purrr::map(
+      .data$growth_form_life_histories,
+      function(x) {
+        if (is.null(x)) {
+          dplyr::tibble(
+            growth_form = character(0),
+            life_history = character(0)
+          )
+        } else {
+          x %>%
+            dplyr::left_join(choices_growth_forms, by = c("growth_form" = "id")) %>%
+            dplyr::select(dplyr::all_of(c("growth_form" = "name", "life_history"))) %>%
+            dplyr::left_join(choices_life_histories, by = c("life_history" = "id")) %>%
+            dplyr::select(dplyr::all_of(c("growth_form", "life_history" = "name")))
+        }
+      }
+    ))
 }
 
 collapse_id_name_lists <- function(results) {

diff --git a/R/mermaid_get_project_data.R b/R/mermaid_get_project_data.R
@@ -166,9 +166,12 @@ project_data_columns <- list(
   `beltfishes/obstransectbeltfishes` = c(common_cols[["obs/su"]], "transect_length", "transect_width", "assigned_transect_width_m", "size_bin", "observers", "transect_number", "label", "fish_family", "fish_genus", "fish_taxon", "size", "biomass_constant_a", "biomass_constant_b", "biomass_constant_c", "count", "biomass_kgha", "trophic_level", "trophic_group", "functional_group", "vulnerability", "data_policy_beltfish", common_cols[["obs_closing"]]),
   `beltfishes/sampleunits` = c(common_cols[["obs/su"]], "transect_number", "label", "size_bin", "transect_length", "transect_width", "biomass_kgha", "total_abundance", "biomass_kgha_trophic_group", "biomass_kgha_fish_family", "data_policy_beltfish", common_cols[["su_closing"]]),
   `beltfishes/sampleevents` = c(common_cols[["se"]], "biomass_kgha_avg", "biomass_kgha_sd", "biomass_kgha_trophic_group_avg", "biomass_kgha_trophic_group_sd", "biomass_kgha_fish_family_avg", "biomass_kgha_fish_family_sd", "data_policy_beltfish", common_cols[["se_closing"]]),
-  `benthicpits/obstransectbenthicpits` = c(common_cols[["obs/su"]], "transect_number", "transect_length", "interval_start", "interval_size", "label", "observers", "interval", "benthic_category", "benthic_attribute", "growth_form", "data_policy_benthicpit", common_cols[["obs_closing"]]),
-  `benthicpits/sampleunits` = c(common_cols[["obs/su"]], "transect_number", "transect_length", "label", "interval_start", "interval_size", "observers", "percent_cover_benthic_category", "data_policy_benthicpit", common_cols[["su_closing"]]),
-  `benthicpits/sampleevents` = c(common_cols[["se"]], "percent_cover_benthic_category_avg", "percent_cover_benthic_category_sd", "data_policy_benthicpit", common_cols[["se_closing"]]),
+  `benthicpits/obstransectbenthicpits` = c(common_cols[["obs/su"]], "transect_number", "transect_length", "interval_start", "interval_size", "label", "observers", "interval", "benthic_category", "benthic_attribute", "growth_form", "life_histories", "data_policy_benthicpit", common_cols[["obs_closing"]]),
+  `benthicpits/obstransectbenthicpits/csv` = c(common_cols[["obs/su"]], "transect_number", "transect_length", "interval_start", "interval_size", "label", "observers", "interval", "benthic_category", "benthic_attribute", "growth_form", "life_histories__competitive", "life_histories__generalist", "life_histories__stress-tolerant", "life_histories__weedy", "data_policy_benthicpit", common_cols[["obs_closing"]]),
+  `benthicpits/sampleunits` = c(common_cols[["obs/su"]], "transect_number", "transect_length", "label", "interval_start", "interval_size", "observers", "percent_cover_benthic_category", "percent_cover_life_histories", "data_policy_benthicpit", common_cols[["su_closing"]]),
+  `benthicpits/sampleunits/csv` = c(common_cols[["obs/su"]], "transect_number", "transect_length", "label", "interval_start", "interval_size", "observers", "percent_cover_benthic_category", "percent_cover_life_histories_weedy", "percent_cover_life_histories_generalist", "percent_cover_life_histories_competitive", "percent_cover_life_histories_stress-tolerant", "data_policy_benthicpit", common_cols[["su_closing"]]),
+  `benthicpits/sampleevents` = c(common_cols[["se"]], "percent_cover_benthic_category_avg", "percent_cover_benthic_category_sd", "percent_cover_life_histories_avg", "percent_cover_life_histories_sd", "data_policy_benthicpit", common_cols[["se_closing"]]),
+  `benthicpits/sampleevents/csv` = c(common_cols[["se"]], "percent_cover_benthic_category_avg", "percent_cover_benthic_category_sd", "percent_cover_life_histories_avg_weedy", "percent_cover_life_histories_avg_generalist", "percent_cover_life_histories_avg_competitive", "percent_cover_life_histories_avg_stress-tolerant", "percent_cover_life_histories_sd_weedy", "percent_cover_life_histories_sd_generalist", "percent_cover_life_histories_sd_competitive", "percent_cover_life_histories_sd_stress-tolerant", "data_policy_benthicpit", common_cols[["se_closing"]]),
   `benthiclits/obstransectbenthiclits` = c(common_cols[["obs/su"]], "transect_number", "transect_length", "label", "observers", "benthic_category", "benthic_attribute", "growth_form", "length", "total_length", "data_policy_benthiclit", common_cols[["obs_closing"]]),
   `benthiclits/sampleunits` = c(common_cols[["obs/su"]], "transect_number", "transect_length", "label", "observers", "total_length", "percent_cover_benthic_category", "data_policy_benthiclit", common_cols[["su_closing"]]),
   `benthiclits/sampleevents` = c(common_cols[["se"]], "percent_cover_benthic_category_avg", "percent_cover_benthic_category_sd", "data_policy_benthiclit", common_cols[["se_closing"]]),
@@ -181,20 +184,23 @@ project_data_columns <- list(
   `bleachingqcs/sampleevents` = c(common_cols[["se"]], "quadrat_size_avg", "count_total_avg", "count_total_sd", "count_genera_avg", "count_genera_sd", "percent_normal_avg", "percent_normal_sd", "percent_pale_avg", "percent_pale_sd", "percent_bleached_avg", "percent_bleached_sd", "quadrat_count_avg", "percent_hard_avg_avg", "percent_hard_avg_sd", "percent_soft_avg_avg", "percent_soft_avg_sd", "percent_algae_avg_avg", "percent_algae_avg_sd", "data_policy_bleachingqc", common_cols[["se_closing"]])
 )
 
-project_data_columns_csv <- project_data_columns
-names(project_data_columns_csv) <- paste0(names(project_data_columns), "/csv")
+project_data_columns_csv <- project_data_columns[!stringr::str_ends(names(project_data_columns), "/csv")]
+names(project_data_columns_csv) <- paste0(names(project_data_columns_csv), "/csv")
+
+project_data_columns_csv <- append(project_data_columns_csv[!names(project_data_columns_csv) %in% names(project_data_columns)], project_data_columns[stringr::str_ends(names(project_data_columns), "csv")])
 
 project_data_columns_csv <- project_data_columns_csv %>%
   purrr::map(~ c(.x, "sample_date_year", "sample_date_month", "sample_date_day"))
 
-project_data_columns <- append(project_data_columns, project_data_columns_csv)
+project_data_columns <- append(project_data_columns[!names(project_data_columns) %in% names(project_data_columns_csv)], project_data_columns_csv)
 
 # For testing columns, after df-cols have been expanded
 project_data_df_columns_list <- list(
   `beltfishes/sampleunits` = c("biomass_kgha_trophic_group", "biomass_kgha_fish_family"),
   `beltfishes/sampleevents` = c("biomass_kgha_trophic_group_avg", "biomass_kgha_fish_family_avg", "biomass_kgha_trophic_group_sd", "biomass_kgha_fish_family_sd"),
-  `benthicpits/sampleunits` = c("percent_cover_benthic_category"),
-  `benthicpits/sampleevents` = c("percent_cover_benthic_category_avg", "percent_cover_benthic_category_sd"),
+  `benthicpits/obstransectbenthicpits` = "life_histories",
+  `benthicpits/sampleunits` = c("percent_cover_benthic_category", "percent_cover_life_histories"),
+  `benthicpits/sampleevents` = c("percent_cover_benthic_category_avg", "percent_cover_benthic_category_sd", "percent_cover_life_histories_avg", "percent_cover_life_histories_sd"),
   `benthiclits/sampleunits` = c("percent_cover_benthic_category"),
   `benthiclits/sampleevents` = c("percent_cover_benthic_category_avg", "percent_cover_benthic_category_sd"),
   `benthicpqts/sampleunits` = c("percent_cover_benthic_category"),
@@ -214,10 +220,12 @@ project_data_df_columns <- project_data_df_columns_list %>%
 
 project_data_test_columns <- project_data_columns %>%
   purrr::map_df(dplyr::as_tibble, .id = "endpoint") %>%
+  dplyr::distinct() %>%
   dplyr::anti_join(project_data_df_columns, by = c("endpoint", "value")) %>%
   dplyr::filter(!value %in% c("sample_date_year", "sample_date_month", "sample_date_day")) %>%
   split(.$endpoint) %>%
-  purrr::map(dplyr::pull, value)
+  purrr::map(dplyr::pull, value) %>%
+  purrr::map(snakecase::to_snake_case)
 
 add_covariates_to_data <- function(data, covariates) {
   for (i in names(data)) {

diff --git a/R/mermaid_get_reference.R b/R/mermaid_get_reference.R
@@ -36,7 +36,7 @@ get_single_reference <- function(reference, limit = NULL, choices = mermaid_get_
     fishfamilies = get_endpoint("fishfamilies", limit = limit),
     fishgenera = get_reference_fishgenera(limit = limit),
     fishspecies = get_reference_fishspecies(limit = limit, choices = choices),
-    benthicattributes = get_reference_benthicattributes(limit = limit)
+    benthicattributes = get_reference_benthicattributes(limit = limit, choices = choices)
   )
 }
 
@@ -77,9 +77,13 @@ get_reference_fishspecies <- function(limit = NULL, choices = mermaid_get_endpoi
     dplyr::left_join(fishgroupfunctions, by = c("functional_group" = "id"), suffix = c("_id", ""))
 }
 
-get_reference_benthicattributes <- function(limit = NULL) {
+get_reference_benthicattributes <- function(limit = NULL, choices = mermaid_get_endpoint("choices")) {
   benthicattributes <- get_endpoint("benthicattributes", limit = limit)
 
+  # Lookup life histories
+  res <- benthicattributes %>%
+    lookup_benthiclifehistories(choices)
+
   benthicattributes %>%
     dplyr::left_join(benthicattributes %>%
       dplyr::select(tidyselect::all_of(c(parent_id = "id", parent = "name"))), by = c("parent" = "parent_id"), suffix = c("_id", ""))
@@ -97,9 +101,10 @@ lookup_regions <- function(results, choices = mermaid_get_endpoint("choices")) {
   row_regions <- results_row %>%
     dplyr::select(tidyselect::all_of(c("row", "regions"))) %>%
     tidyr::separate_rows("regions", sep = ", ") %>%
-    dplyr::filter(.data$regions != "NA") %>%
+    dplyr::filter(!is.na(.data$regions)) %>%
     dplyr::left_join(regions, by = c("regions" = "id"), suffix = c("_id", "")) %>%
     dplyr::group_by(.data$row) %>%
+    dplyr::arrange(.data$regions) %>%
     dplyr::summarise(regions = paste(.data$regions, collapse = ", "))
 
   results_row %>%
@@ -108,7 +113,48 @@ lookup_regions <- function(results, choices = mermaid_get_endpoint("choices")) {
     dplyr::select(names(results))
 }
 
+lookup_benthiclifehistories <- function(results, choices = mermaid_get_endpoint("choices")) {
+  life_histories <- choices %>%
+    tibble::deframe() %>%
+    purrr::pluck("benthiclifehistories") %>%
+    dplyr::select(tidyselect::all_of(c("id", "name")))
+
+  results_row <- results %>%
+    dplyr::mutate(row = dplyr::row_number())
+
+  row_lifehistories <- results_row %>%
+    dplyr::select(tidyselect::all_of(c("row", "id" = "life_histories"))) %>%
+    tidyr::separate_rows("id", sep = ", ") %>%
+    dplyr::filter(!is.na(.data$id)) %>%
+    dplyr::left_join(life_histories, by = "id", suffix = c("_id", "")) %>%
+    dplyr::group_by(.data$row) %>%
+    dplyr::arrange(.data$id) %>%
+    dplyr::summarise(id = paste(.data$name, collapse = ", "))
+
+  names(row_lifehistories) <- c("row", "life_histories")
+
+  results_row %>%
+    dplyr::left_join(row_lifehistories, by = "row", suffix = c("_id", "")) %>%
+    dplyr::select(-tidyselect::all_of(c("row", "life_histories_id"))) %>%
+    dplyr::select(names(results))
+}
+
+match_lifehistories <- function(x, column, life_histories) {
+  x <- x %>%
+    dplyr::select(tidyselect::all_of(c("row", id = "life_histories"))) %>%
+    tidyr::separate_rows("id", sep = ", ") %>%
+    dplyr::filter(!is.na(.data$id)) %>%
+    dplyr::left_join(life_histories, by = "id", suffix = c("_id", "")) %>%
+    dplyr::group_by(.data$row) %>%
+    dplyr::arrange(.data$id) %>%
+    dplyr::summarise(id = paste(.data$name, collapse = ", "))
+
+  names(x) <- c("id", "life_histories")
+
+  x
+}
+
 fishfamilies_columns <- c("id", "name", "status", "biomass_constant_a", "biomass_constant_b", "biomass_constant_c", "regions", "created_on", "updated_on")
 fishgenera_columns <- c("id", "name", "status", "biomass_constant_a", "biomass_constant_b", "biomass_constant_c", "family", "regions", "created_on", "updated_on")
 fishspecies_columns <- c("id", "name", "display", "notes", "status", "biomass_constant_a", "biomass_constant_b", "biomass_constant_c", "climate_score", "vulnerability", "max_length", "trophic_level", "max_length_type", "genus", "group_size", "trophic_group", "functional_group", "regions", "created_on", "updated_on")
-benthicattributes_columns <- c("id", "name", "status", "parent", "regions", "updated_on", "created_on")
+benthicattributes_columns <- c("id", "name", "status", "parent", "regions", "life_histories", "growth_form_life_histories", "updated_on", "created_on")
diff --git a/R/zzz_get_endpoint.R b/R/zzz_get_endpoint.R
@@ -14,6 +14,9 @@ get_endpoint <- function(endpoint = c("benthicattributes", "choices", "fishfamil
 
   res_columns <- purrr::map2(res_strip_name_suffix, names(res_strip_name_suffix), construct_endpoint_columns)
 
+  # Replace any "" or "NA" with NAs
+  res_columns <- purrr::map(res_columns, \(x) x %>% dplyr::mutate(dplyr::across(dplyr::where(is.character), \(y) ifelse(y %in% c("NA", ""), NA_character_, y))))
+
   if (length(res_columns) > 1) {
     res_columns
   } else {

diff --git a/R/zzz_get_project_endpoint.R b/R/zzz_get_project_endpoint.R
@@ -123,7 +123,9 @@ construct_project_endpoint_columns <- function(res, endpoint, multiple_projects
 
       df_col_placement <- which(endpoint_cols == col)
 
-      endpoint_cols <- c(endpoint_cols[1:(df_col_placement - 1)], df_col_names, endpoint_cols[(df_col_placement + 1):length(endpoint_cols)])
+      if (length(df_col_placement) == 1) {
+        endpoint_cols <- c(endpoint_cols[1:(df_col_placement - 1)], df_col_names, endpoint_cols[(df_col_placement + 1):length(endpoint_cols)])
+      }
     }
 
     if (multiple_projects) {
@@ -132,7 +134,6 @@ construct_project_endpoint_columns <- function(res, endpoint, multiple_projects
       res <- dplyr::select(res, dplyr::any_of(endpoint_cols))
     }
 
-    # TODO - don't need to use snakecase, is done in another way elsewhere
     names(res) <- snakecase::to_snake_case(names(res))
 
     res
@@ -283,3 +284,5 @@ mermaid_project_endpoint_columns <- list(
 mermaid_project_endpoint_columns <- append(mermaid_project_endpoint_columns, project_other_endpoint_columns)
 
 mermaid_project_endpoint_columns <- append(mermaid_project_endpoint_columns, project_data_columns)
+
+mermaid_project_endpoint_columns_test <- purrr::map(mermaid_project_endpoint_columns, snakecase::to_snake_case)