From c5c568b18b14c51badd294ae418a11a824261c69 Mon Sep 17 00:00:00 2001 From: Karlo Guidoni Date: Sun, 11 Dec 2022 19:26:35 -0300 Subject: [PATCH] dataset names must be unique --- NEWS.md | 1 + R/bdc_standardize_datasets.R | 6 ++++- .../testthat/test-bdc_standardize_datasets.R | 25 +++++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 0881f57b..95cc87ab 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # bdc 1.1.3 +- `bdc_standardize_datasets` now throws an error when dataset names defined in the metadata file are not unique. - Fix minor bug in `bdc_coordinates_country_inconsistent()` (see: 5c4e0aa). - `{countrycode}` and `{rangeBuilder}` dependencies were removed. Country names now are derived from [Stefan Gabos](https://github.com/stefangabos/world_countries/) repository diff --git a/R/bdc_standardize_datasets.R b/R/bdc_standardize_datasets.R index 6eb0ab90..f1f6c4f5 100644 --- a/R/bdc_standardize_datasets.R +++ b/R/bdc_standardize_datasets.R @@ -105,6 +105,10 @@ bdc_standardize_datasets <- metadata %>% dplyr::pull(fileName) + if (length(unique(metadata$datasetName)) != nrow(metadata)) { + stop("[ERROR]: Dataset names defined in the `datasetName` column must be unique.") + } + for (file_index in seq_along(input_file)) { input_filename <- metadata %>% @@ -258,7 +262,7 @@ bdc_standardize_datasets <- # here::here("data", "temp_datasets") %>% save_in_dir %>% fs::dir_ls(regexp = "*.qs") %>% - purrr::map_dfr(~ qs::qread(.x) %>% + purrr::map_dfr(~ qs::qread(.x) %>% dplyr::mutate(dplyr::across( .cols = dplyr::everything(), ~ as.character(.x) ))) diff --git a/tests/testthat/test-bdc_standardize_datasets.R b/tests/testthat/test-bdc_standardize_datasets.R index 969a69de..63fed5e9 100644 --- a/tests/testthat/test-bdc_standardize_datasets.R +++ b/tests/testthat/test-bdc_standardize_datasets.R @@ -81,6 +81,13 @@ wrong_metadata <- tibble::tribble( "datafake4", df4_path, NA, "nome_das_especies", "y", "x", NA, "notes" ) +metadata_repeated_datasetName <- tibble::tribble( + ~datasetName, ~fileName, ~occurrenceID, ~scientificName, ~decimalLatitude, ~decimalLongitude, + "datafake1", df1_path, "id", "species", "latitude", "longitude", + "datafake2", df2_path, "id_number", "spp", "lat", "lon", + "datafake1", df1_path, "id", "species", "latitude", "longitude" +) + bdc_standardize_datasets(metadata = metadata, overwrite = TRUE, format = "qs", save_database = FALSE) test_that("bdc_standardize_datasets can create qs files", { @@ -192,3 +199,21 @@ test_that("bdc_standardize_datasets can create 00_merged_datasets.qs", { unlink(here::here("Output"), recursive = TRUE) }) + +test_that("bdc_standardize_datasets throw an error when dataset names are not unique", { + + res <- + capture_message( + bdc_standardize_datasets( + metadata = metadata_repeated_datasetName, + overwrite = TRUE, + format = "csv", + save_database = FALSE + ) + ) + + expect_equal(res$message, "[ERROR]: Dataset names defined in the `datasetName` column must be unique.") + + unlink(here::here("Output"), recursive = TRUE) + +})