Skip to content

Commit

Permalink
Merge pull request #99 from data-mermaid/v1.1.0
Browse files Browse the repository at this point in the history
mermaidr 1.1.0
  • Loading branch information
sharlagelfand authored Sep 12, 2024
2 parents 31914dd + 8cf68a7 commit c25a9b9
Show file tree
Hide file tree
Showing 14 changed files with 648 additions and 637 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: mermaidr
Title: Interface to the 'MERMAID' API
Version: 1.0.4
Version: 1.1.0
Authors@R:
c(person(given = "Sharla",
family = "Gelfand",
Expand Down
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# mermaidr 1.1.0

* Add `life_histories` and `growth_form_life_histories` to `mermaid_get_reference("benthicattributes")`
* Add relevant life histories to `"benthicpqt"` method in `mermaid_get_project_data()`
* `data = "observations"` gains `life_histories__competitive`, `life_histories__generalist`, `life_histories__stress_tolerant`, `life_histories__weedy`
* `data = "sampleunits"` gains `percent_cover_life_histories_weedy`, `percent_cover_life_histories_generalist`, `percent_cover_life_histories_competitive`, `percent_cover_life_histories_stress_tolerant`
* `data = "sampleevents"` gains `percent_cover_life_histories_avg_weedy`, `percent_cover_life_histories_avg_generalist`, `percent_cover_life_histories_avg_competitive`, `percent_cover_life_histories_avg_stress-tolerant`, `percent_cover_life_histories_sd_weedy`, `percent_cover_life_histories_sd_generalist`, `percent_cover_life_histories_sd_competitive`, `percent_cover_life_histories_sd_stress-tolerant`

# mermaidr 1.0.4

* Add `project_id`, `site_id`, and `management_id` to `mermaid_get_summary_sampleevents()`
Expand Down
90 changes: 89 additions & 1 deletion R/mermaid_get.R
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,16 @@ initial_cleanup <- function(results, endpoint) {
extract_covariates()
}

if ("life_histories" %in% names(results)) {
results <- results %>%
extract_life_histories()
}

if ("growth_form_life_histories" %in% names(results)) {
results <- results %>%
extract_growth_form_life_histories()
}

if (!endpoint %in% c("choices", "me")) {
results <- collapse_id_name_lists(results)

Expand Down Expand Up @@ -236,7 +246,85 @@ initial_cleanup <- function(results, endpoint) {
}

is_list_col <- function(x) {
is.list(x) && !is.data.frame(x)
list_col <- is.list(x) && !is.data.frame(x)

if (list_col) {
list_col <- !(purrr::map_lgl(x, is.data.frame) %>% any())
}

list_col
}

extract_life_histories <- function(results) {
if (!purrr::map_lgl(results[["life_histories"]], is.data.frame) %>% any()) {
return(results)
}

old_names <- names(results)

res <- results %>%
tidyr::unnest("life_histories", names_sep = "___") %>%
dplyr::select(-dplyr::all_of(c("life_histories___id"))) %>%
tidyr::pivot_wider(
names_from = dplyr::all_of("life_histories___name"),
values_from = dplyr::all_of("life_histories___proportion")
)

new_names <- names(res)
additional_cols <- setdiff(new_names, old_names)

res <- res %>%
dplyr::rename_with(.cols = dplyr::all_of(additional_cols), \(x) glue::glue("life_histories__{x}") %>% snakecase::to_snake_case())

new_names <- names(res)
additional_cols <- setdiff(new_names, old_names)

res %>%
dplyr::relocate(dplyr::all_of(additional_cols), .after = which(old_names == "life_histories") - 1)
}

extract_growth_form_life_histories <- function(results) {
if (all(results[["growth_form_life_histories"]] %>% is.na())) {
return(
results %>%
dplyr::mutate(growth_form_life_histories = purrr::map(
.data$growth_form_life_histories,
function(x) {
dplyr::tibble(
growth_form = character(0),
life_history = character(0)
)
}
))
)
}

choices <- mermaid_get_endpoint("choices")

choices_growth_forms <- choices %>% dplyr::filter(.data$name == "growthforms")
choices_growth_forms <- choices_growth_forms[["data"]][[1]]

choices_life_histories <- choices %>% dplyr::filter(.data$name == "benthiclifehistories")
choices_life_histories <- choices_life_histories[["data"]][[1]]

results %>%
dplyr::mutate(growth_form_life_histories = purrr::map(
.data$growth_form_life_histories,
function(x) {
if (is.null(x)) {
dplyr::tibble(
growth_form = character(0),
life_history = character(0)
)
} else {
x %>%
dplyr::left_join(choices_growth_forms, by = c("growth_form" = "id")) %>%
dplyr::select(dplyr::all_of(c("growth_form" = "name", "life_history"))) %>%
dplyr::left_join(choices_life_histories, by = c("life_history" = "id")) %>%
dplyr::select(dplyr::all_of(c("growth_form", "life_history" = "name")))
}
}
))
}

collapse_id_name_lists <- function(results) {
Expand Down
26 changes: 17 additions & 9 deletions R/mermaid_get_project_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,12 @@ project_data_columns <- list(
`beltfishes/obstransectbeltfishes` = c(common_cols[["obs/su"]], "transect_length", "transect_width", "assigned_transect_width_m", "size_bin", "observers", "transect_number", "label", "fish_family", "fish_genus", "fish_taxon", "size", "biomass_constant_a", "biomass_constant_b", "biomass_constant_c", "count", "biomass_kgha", "trophic_level", "trophic_group", "functional_group", "vulnerability", "data_policy_beltfish", common_cols[["obs_closing"]]),
`beltfishes/sampleunits` = c(common_cols[["obs/su"]], "transect_number", "label", "size_bin", "transect_length", "transect_width", "biomass_kgha", "total_abundance", "biomass_kgha_trophic_group", "biomass_kgha_fish_family", "data_policy_beltfish", common_cols[["su_closing"]]),
`beltfishes/sampleevents` = c(common_cols[["se"]], "biomass_kgha_avg", "biomass_kgha_sd", "biomass_kgha_trophic_group_avg", "biomass_kgha_trophic_group_sd", "biomass_kgha_fish_family_avg", "biomass_kgha_fish_family_sd", "data_policy_beltfish", common_cols[["se_closing"]]),
`benthicpits/obstransectbenthicpits` = c(common_cols[["obs/su"]], "transect_number", "transect_length", "interval_start", "interval_size", "label", "observers", "interval", "benthic_category", "benthic_attribute", "growth_form", "data_policy_benthicpit", common_cols[["obs_closing"]]),
`benthicpits/sampleunits` = c(common_cols[["obs/su"]], "transect_number", "transect_length", "label", "interval_start", "interval_size", "observers", "percent_cover_benthic_category", "data_policy_benthicpit", common_cols[["su_closing"]]),
`benthicpits/sampleevents` = c(common_cols[["se"]], "percent_cover_benthic_category_avg", "percent_cover_benthic_category_sd", "data_policy_benthicpit", common_cols[["se_closing"]]),
`benthicpits/obstransectbenthicpits` = c(common_cols[["obs/su"]], "transect_number", "transect_length", "interval_start", "interval_size", "label", "observers", "interval", "benthic_category", "benthic_attribute", "growth_form", "life_histories", "data_policy_benthicpit", common_cols[["obs_closing"]]),
`benthicpits/obstransectbenthicpits/csv` = c(common_cols[["obs/su"]], "transect_number", "transect_length", "interval_start", "interval_size", "label", "observers", "interval", "benthic_category", "benthic_attribute", "growth_form", "life_histories__competitive", "life_histories__generalist", "life_histories__stress-tolerant", "life_histories__weedy", "data_policy_benthicpit", common_cols[["obs_closing"]]),
`benthicpits/sampleunits` = c(common_cols[["obs/su"]], "transect_number", "transect_length", "label", "interval_start", "interval_size", "observers", "percent_cover_benthic_category", "percent_cover_life_histories", "data_policy_benthicpit", common_cols[["su_closing"]]),
`benthicpits/sampleunits/csv` = c(common_cols[["obs/su"]], "transect_number", "transect_length", "label", "interval_start", "interval_size", "observers", "percent_cover_benthic_category", "percent_cover_life_histories_weedy", "percent_cover_life_histories_generalist", "percent_cover_life_histories_competitive", "percent_cover_life_histories_stress-tolerant", "data_policy_benthicpit", common_cols[["su_closing"]]),
`benthicpits/sampleevents` = c(common_cols[["se"]], "percent_cover_benthic_category_avg", "percent_cover_benthic_category_sd", "percent_cover_life_histories_avg", "percent_cover_life_histories_sd", "data_policy_benthicpit", common_cols[["se_closing"]]),
`benthicpits/sampleevents/csv` = c(common_cols[["se"]], "percent_cover_benthic_category_avg", "percent_cover_benthic_category_sd", "percent_cover_life_histories_avg_weedy", "percent_cover_life_histories_avg_generalist", "percent_cover_life_histories_avg_competitive", "percent_cover_life_histories_avg_stress-tolerant", "percent_cover_life_histories_sd_weedy", "percent_cover_life_histories_sd_generalist", "percent_cover_life_histories_sd_competitive", "percent_cover_life_histories_sd_stress-tolerant", "data_policy_benthicpit", common_cols[["se_closing"]]),
`benthiclits/obstransectbenthiclits` = c(common_cols[["obs/su"]], "transect_number", "transect_length", "label", "observers", "benthic_category", "benthic_attribute", "growth_form", "length", "total_length", "data_policy_benthiclit", common_cols[["obs_closing"]]),
`benthiclits/sampleunits` = c(common_cols[["obs/su"]], "transect_number", "transect_length", "label", "observers", "total_length", "percent_cover_benthic_category", "data_policy_benthiclit", common_cols[["su_closing"]]),
`benthiclits/sampleevents` = c(common_cols[["se"]], "percent_cover_benthic_category_avg", "percent_cover_benthic_category_sd", "data_policy_benthiclit", common_cols[["se_closing"]]),
Expand All @@ -181,20 +184,23 @@ project_data_columns <- list(
`bleachingqcs/sampleevents` = c(common_cols[["se"]], "quadrat_size_avg", "count_total_avg", "count_total_sd", "count_genera_avg", "count_genera_sd", "percent_normal_avg", "percent_normal_sd", "percent_pale_avg", "percent_pale_sd", "percent_bleached_avg", "percent_bleached_sd", "quadrat_count_avg", "percent_hard_avg_avg", "percent_hard_avg_sd", "percent_soft_avg_avg", "percent_soft_avg_sd", "percent_algae_avg_avg", "percent_algae_avg_sd", "data_policy_bleachingqc", common_cols[["se_closing"]])
)

project_data_columns_csv <- project_data_columns
names(project_data_columns_csv) <- paste0(names(project_data_columns), "/csv")
project_data_columns_csv <- project_data_columns[!stringr::str_ends(names(project_data_columns), "/csv")]
names(project_data_columns_csv) <- paste0(names(project_data_columns_csv), "/csv")

project_data_columns_csv <- append(project_data_columns_csv[!names(project_data_columns_csv) %in% names(project_data_columns)], project_data_columns[stringr::str_ends(names(project_data_columns), "csv")])

project_data_columns_csv <- project_data_columns_csv %>%
purrr::map(~ c(.x, "sample_date_year", "sample_date_month", "sample_date_day"))

project_data_columns <- append(project_data_columns, project_data_columns_csv)
project_data_columns <- append(project_data_columns[!names(project_data_columns) %in% names(project_data_columns_csv)], project_data_columns_csv)

# For testing columns, after df-cols have been expanded
project_data_df_columns_list <- list(
`beltfishes/sampleunits` = c("biomass_kgha_trophic_group", "biomass_kgha_fish_family"),
`beltfishes/sampleevents` = c("biomass_kgha_trophic_group_avg", "biomass_kgha_fish_family_avg", "biomass_kgha_trophic_group_sd", "biomass_kgha_fish_family_sd"),
`benthicpits/sampleunits` = c("percent_cover_benthic_category"),
`benthicpits/sampleevents` = c("percent_cover_benthic_category_avg", "percent_cover_benthic_category_sd"),
`benthicpits/obstransectbenthicpits` = "life_histories",
`benthicpits/sampleunits` = c("percent_cover_benthic_category", "percent_cover_life_histories"),
`benthicpits/sampleevents` = c("percent_cover_benthic_category_avg", "percent_cover_benthic_category_sd", "percent_cover_life_histories_avg", "percent_cover_life_histories_sd"),
`benthiclits/sampleunits` = c("percent_cover_benthic_category"),
`benthiclits/sampleevents` = c("percent_cover_benthic_category_avg", "percent_cover_benthic_category_sd"),
`benthicpqts/sampleunits` = c("percent_cover_benthic_category"),
Expand All @@ -214,10 +220,12 @@ project_data_df_columns <- project_data_df_columns_list %>%

project_data_test_columns <- project_data_columns %>%
purrr::map_df(dplyr::as_tibble, .id = "endpoint") %>%
dplyr::distinct() %>%
dplyr::anti_join(project_data_df_columns, by = c("endpoint", "value")) %>%
dplyr::filter(!value %in% c("sample_date_year", "sample_date_month", "sample_date_day")) %>%
split(.$endpoint) %>%
purrr::map(dplyr::pull, value)
purrr::map(dplyr::pull, value) %>%
purrr::map(snakecase::to_snake_case)

add_covariates_to_data <- function(data, covariates) {
for (i in names(data)) {
Expand Down
54 changes: 50 additions & 4 deletions R/mermaid_get_reference.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ get_single_reference <- function(reference, limit = NULL, choices = mermaid_get_
fishfamilies = get_endpoint("fishfamilies", limit = limit),
fishgenera = get_reference_fishgenera(limit = limit),
fishspecies = get_reference_fishspecies(limit = limit, choices = choices),
benthicattributes = get_reference_benthicattributes(limit = limit)
benthicattributes = get_reference_benthicattributes(limit = limit, choices = choices)
)
}

Expand Down Expand Up @@ -77,9 +77,13 @@ get_reference_fishspecies <- function(limit = NULL, choices = mermaid_get_endpoi
dplyr::left_join(fishgroupfunctions, by = c("functional_group" = "id"), suffix = c("_id", ""))
}

get_reference_benthicattributes <- function(limit = NULL) {
get_reference_benthicattributes <- function(limit = NULL, choices = mermaid_get_endpoint("choices")) {
benthicattributes <- get_endpoint("benthicattributes", limit = limit)

# Lookup life histories
res <- benthicattributes %>%
lookup_benthiclifehistories(choices)

benthicattributes %>%
dplyr::left_join(benthicattributes %>%
dplyr::select(tidyselect::all_of(c(parent_id = "id", parent = "name"))), by = c("parent" = "parent_id"), suffix = c("_id", ""))
Expand All @@ -97,9 +101,10 @@ lookup_regions <- function(results, choices = mermaid_get_endpoint("choices")) {
row_regions <- results_row %>%
dplyr::select(tidyselect::all_of(c("row", "regions"))) %>%
tidyr::separate_rows("regions", sep = ", ") %>%
dplyr::filter(.data$regions != "NA") %>%
dplyr::filter(!is.na(.data$regions)) %>%
dplyr::left_join(regions, by = c("regions" = "id"), suffix = c("_id", "")) %>%
dplyr::group_by(.data$row) %>%
dplyr::arrange(.data$regions) %>%
dplyr::summarise(regions = paste(.data$regions, collapse = ", "))

results_row %>%
Expand All @@ -108,7 +113,48 @@ lookup_regions <- function(results, choices = mermaid_get_endpoint("choices")) {
dplyr::select(names(results))
}

lookup_benthiclifehistories <- function(results, choices = mermaid_get_endpoint("choices")) {
life_histories <- choices %>%
tibble::deframe() %>%
purrr::pluck("benthiclifehistories") %>%
dplyr::select(tidyselect::all_of(c("id", "name")))

results_row <- results %>%
dplyr::mutate(row = dplyr::row_number())

row_lifehistories <- results_row %>%
dplyr::select(tidyselect::all_of(c("row", "id" = "life_histories"))) %>%
tidyr::separate_rows("id", sep = ", ") %>%
dplyr::filter(!is.na(.data$id)) %>%
dplyr::left_join(life_histories, by = "id", suffix = c("_id", "")) %>%
dplyr::group_by(.data$row) %>%
dplyr::arrange(.data$id) %>%
dplyr::summarise(id = paste(.data$name, collapse = ", "))

names(row_lifehistories) <- c("row", "life_histories")

results_row %>%
dplyr::left_join(row_lifehistories, by = "row", suffix = c("_id", "")) %>%
dplyr::select(-tidyselect::all_of(c("row", "life_histories_id"))) %>%
dplyr::select(names(results))
}

match_lifehistories <- function(x, column, life_histories) {
x <- x %>%
dplyr::select(tidyselect::all_of(c("row", id = "life_histories"))) %>%
tidyr::separate_rows("id", sep = ", ") %>%
dplyr::filter(!is.na(.data$id)) %>%
dplyr::left_join(life_histories, by = "id", suffix = c("_id", "")) %>%
dplyr::group_by(.data$row) %>%
dplyr::arrange(.data$id) %>%
dplyr::summarise(id = paste(.data$name, collapse = ", "))

names(x) <- c("id", "life_histories")

x
}

fishfamilies_columns <- c("id", "name", "status", "biomass_constant_a", "biomass_constant_b", "biomass_constant_c", "regions", "created_on", "updated_on")
fishgenera_columns <- c("id", "name", "status", "biomass_constant_a", "biomass_constant_b", "biomass_constant_c", "family", "regions", "created_on", "updated_on")
fishspecies_columns <- c("id", "name", "display", "notes", "status", "biomass_constant_a", "biomass_constant_b", "biomass_constant_c", "climate_score", "vulnerability", "max_length", "trophic_level", "max_length_type", "genus", "group_size", "trophic_group", "functional_group", "regions", "created_on", "updated_on")
benthicattributes_columns <- c("id", "name", "status", "parent", "regions", "updated_on", "created_on")
benthicattributes_columns <- c("id", "name", "status", "parent", "regions", "life_histories", "growth_form_life_histories", "updated_on", "created_on")
3 changes: 3 additions & 0 deletions R/zzz_get_endpoint.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ get_endpoint <- function(endpoint = c("benthicattributes", "choices", "fishfamil

res_columns <- purrr::map2(res_strip_name_suffix, names(res_strip_name_suffix), construct_endpoint_columns)

# Replace any "" or "NA" with NAs
res_columns <- purrr::map(res_columns, \(x) x %>% dplyr::mutate(dplyr::across(dplyr::where(is.character), \(y) ifelse(y %in% c("NA", ""), NA_character_, y))))

if (length(res_columns) > 1) {
res_columns
} else {
Expand Down
7 changes: 5 additions & 2 deletions R/zzz_get_project_endpoint.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,9 @@ construct_project_endpoint_columns <- function(res, endpoint, multiple_projects

df_col_placement <- which(endpoint_cols == col)

endpoint_cols <- c(endpoint_cols[1:(df_col_placement - 1)], df_col_names, endpoint_cols[(df_col_placement + 1):length(endpoint_cols)])
if (length(df_col_placement) == 1) {
endpoint_cols <- c(endpoint_cols[1:(df_col_placement - 1)], df_col_names, endpoint_cols[(df_col_placement + 1):length(endpoint_cols)])
}
}

if (multiple_projects) {
Expand All @@ -132,7 +134,6 @@ construct_project_endpoint_columns <- function(res, endpoint, multiple_projects
res <- dplyr::select(res, dplyr::any_of(endpoint_cols))
}

# TODO - don't need to use snakecase, is done in another way elsewhere
names(res) <- snakecase::to_snake_case(names(res))

res
Expand Down Expand Up @@ -283,3 +284,5 @@ mermaid_project_endpoint_columns <- list(
mermaid_project_endpoint_columns <- append(mermaid_project_endpoint_columns, project_other_endpoint_columns)

mermaid_project_endpoint_columns <- append(mermaid_project_endpoint_columns, project_data_columns)

mermaid_project_endpoint_columns_test <- purrr::map(mermaid_project_endpoint_columns, snakecase::to_snake_case)
Loading

0 comments on commit c25a9b9

Please sign in to comment.