Skip to content

Commit

Permalink
dataset names must be unique
Browse files Browse the repository at this point in the history
  • Loading branch information
kguidonimartins committed Dec 11, 2022
1 parent 097d1e6 commit c5c568b
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 1 deletion.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# bdc 1.1.3

- `bdc_standardize_datasets` now throws an error when dataset names defined in the metadata file are not unique.
- Fix minor bug in `bdc_coordinates_country_inconsistent()` (see: 5c4e0aa).
- `{countrycode}` and `{rangeBuilder}` dependencies were
removed. Country names now are derived from [Stefan Gabos](https://github.com/stefangabos/world_countries/) repository
Expand Down
6 changes: 5 additions & 1 deletion R/bdc_standardize_datasets.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ bdc_standardize_datasets <-
metadata %>%
dplyr::pull(fileName)

if (length(unique(metadata$datasetName)) != nrow(metadata)) {
stop("[ERROR]: Dataset names defined in the `datasetName` column must be unique.")
}

for (file_index in seq_along(input_file)) {
input_filename <-
metadata %>%
Expand Down Expand Up @@ -258,7 +262,7 @@ bdc_standardize_datasets <-
# here::here("data", "temp_datasets") %>%
save_in_dir %>%
fs::dir_ls(regexp = "*.qs") %>%
purrr::map_dfr(~ qs::qread(.x) %>%
purrr::map_dfr(~ qs::qread(.x) %>%
dplyr::mutate(dplyr::across(
.cols = dplyr::everything(), ~ as.character(.x)
)))
Expand Down
25 changes: 25 additions & 0 deletions tests/testthat/test-bdc_standardize_datasets.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ wrong_metadata <- tibble::tribble(
"datafake4", df4_path, NA, "nome_das_especies", "y", "x", NA, "notes"
)

metadata_repeated_datasetName <- tibble::tribble(
~datasetName, ~fileName, ~occurrenceID, ~scientificName, ~decimalLatitude, ~decimalLongitude,
"datafake1", df1_path, "id", "species", "latitude", "longitude",
"datafake2", df2_path, "id_number", "spp", "lat", "lon",
"datafake1", df1_path, "id", "species", "latitude", "longitude"
)

bdc_standardize_datasets(metadata = metadata, overwrite = TRUE, format = "qs", save_database = FALSE)

test_that("bdc_standardize_datasets can create qs files", {
Expand Down Expand Up @@ -192,3 +199,21 @@ test_that("bdc_standardize_datasets can create 00_merged_datasets.qs", {

unlink(here::here("Output"), recursive = TRUE)
})

test_that("bdc_standardize_datasets throw an error when dataset names are not unique", {

res <-
capture_message(
bdc_standardize_datasets(
metadata = metadata_repeated_datasetName,
overwrite = TRUE,
format = "csv",
save_database = FALSE
)
)

expect_equal(res$message, "[ERROR]: Dataset names defined in the `datasetName` column must be unique.")

unlink(here::here("Output"), recursive = TRUE)

})

0 comments on commit c5c568b

Please sign in to comment.