Skip to content

Commit

Permalink
fix global data + add page wait param (fix #39)
Browse files Browse the repository at this point in the history
* fix importing of global data
* tweak cleaning msgs
* improve invalid geometry handling
* update install Makefile cmd
* use prepr to fix extreme geometry issues
* remove unused makefile cmd
* skip tests if prepr pkg needed but missing
* add dplyr to Suggests
* add prepr install cmds to readme
* new page_wait param for slow connections
  • Loading branch information
jeffreyhanson authored Oct 31, 2021
1 parent 1bd7054 commit d42330c
Show file tree
Hide file tree
Showing 51 changed files with 982 additions and 255 deletions.
8 changes: 3 additions & 5 deletions .github/workflows/R-CMD-check-mac-osx.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,10 @@ jobs:
restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-

- name: Install system dependencies
if: runner.os == 'Linux'
run: |
while read -r cmd
do
eval sudo $cmd
done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
brew install gmp
brew install mpfr
brew install gdal
- name: Install dependencies
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/R-CMD-check-ubuntu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ jobs:
- name: Install system dependencies
if: runner.os == 'Linux'
run: |
sudo apt-get -y install libcurl4-openssl-dev
sudo apt-get -y install libcurl4-openssl-dev libmpfr-dev libgmp3-dev libudunits2-dev libgdal-dev libgeos-dev libproj-dev
while read -r cmd
do
eval sudo $cmd
Expand Down
16 changes: 12 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
Package: wdpar
Type: Package
Version: 1.3.1.3
Version: 1.3.1.4
Title: Interface to the World Database on Protected Areas
Description: Fetch and clean data from the World Database on Protected
Areas (WDPA). Data is obtained from Protected Planet
<https://www.protectedplanet.net/en>.
<https://www.protectedplanet.net/en>. To augment data cleaning procedures,
users can install the 'prepr' R package (available at
<https://github.com/dickoa/prepr>).
Authors@R: c(person(c('Jeffrey', 'O'), 'Hanson',
email='[email protected]', role = c('aut', 'cre')))
Imports:
Expand All @@ -30,7 +32,9 @@ Suggests:
rmarkdown (>= 1.10),
ggmap (>= 2.6.1),
ggplot2 (>= 3.1.0),
pingr (>= 1.1.2)
pingr (>= 1.1.2),
prepr (>= 0.1.9000),
dplyr (>= 1.0.7)
Depends:
R (>= 3.5.0),
sf (>= 1.0-2)
Expand All @@ -43,8 +47,10 @@ VignetteBuilder: knitr
RoxygenNote: 7.1.2
Collate:
'internal.R'
'geo.R'
'package.R'
'read_sf_n.R'
'st_erase_overlaps.R'
'st_repair_geometry.R'
'wdpa_clean.R'
'wdpa_dissolve.R'
'wdpa_url.R'
Expand All @@ -53,3 +59,5 @@ Collate:
'wdpa_read.R'
'zzz.R'
Roxygen: list(markdown = TRUE)
Remotes:
dickoa/prepr
6 changes: 1 addition & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@ clean:
rm -rf docs/*
rm -rf inst/doc/*

initc:
R --slave -e "Rcpp::compileAttributes()"
R --slave -e "tools::package_native_routine_registration_skeleton('.', 'src/init.c', character_only = FALSE)"

docs: man readme vigns site

man:
Expand Down Expand Up @@ -55,7 +51,7 @@ build:
cp -R doc inst/

install:
R --slave -e "devtools::install_local('../wdpar')"
R --slave -e "devtools::install_local('../wdpar', force = TRUE)"

spellcheck:
echo "\n===== SPELL CHECK =====\n" > spell.log 2>&1
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(st_erase_overlaps)
export(st_repair_geometry)
export(wdpa_clean)
export(wdpa_dissolve)
export(wdpa_fetch)
Expand Down
16 changes: 16 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
# wdpar 1.3.1.4

- Fix bug in `wdpa_read` to ensure that all data from global database.
This is a bug previously meant that protected areas denoted with polygon
geometry data were not imported. It is strongly recommended that users
double check processed versions of the global database to verify correctness.
- Add example script for downloading and cleaning data
(see `inst/scripts/global-example-script.R`)
- New `st_repair_geometry` function to repair geometry using a combination
of `sf::st_make_valid` and `prepr::st_prepair`. This function is now
used by the `wdpa_clean` function to augment data cleaning procedures.
- Update `wdpa_url` and `wdpa_fetch` to have a `page_wait` parameter
to specify the wait time for loading web pages when finding the download
URLs for datasets (#39).
- Add _dplyr_ package to Suggests because it is used in an example.

# wdpar 1.3.1.3

- Update `wdpa_clean` to provide better information when cleaning data.
Expand Down
52 changes: 0 additions & 52 deletions R/internal.R
Original file line number Diff line number Diff line change
Expand Up @@ -192,58 +192,6 @@ convert_wdpa_version_to_POSIXct <- function(x) {
out
}

#' Read spatial data
#'
#' Import spatial data. If desired, only a subset of the available data
#' are imported.
#'
#' @param dsn `character` data source name.
#'
#' @param layer `character` layer name. Defaults to `NULL`.
#'
#' @param n `integer` number of records to import.
#' Defaults to `NULL` such that all data are imported.
#'
#' @return [sf::sf()] object.
#'
#' @noRd
read_sf_n <- function(dsn, layer = NULL, n = NULL) {
# validate arguments
assertthat::assert_that(assertthat::is.string(dsn),
inherits(layer, c("character", "NULL")),
inherits(n, c("numeric", "NULL")))
if (!is.null(n)) {
assertthat::assert_that(assertthat::is.count(n),
assertthat::noNA(n))
}
if (is.null(layer)) {
layer <- sf::st_layers(dsn)$name[[1]]
}
assertthat::assert_that(assertthat::is.string(layer),
assertthat::noNA(layer))
# construct query
if (!is.null(n)) {
query <- paste0("SELECT * FROM \"", layer,"\" WHERE FID <= ", n)
} else {
query <- NA
}
# import data
out <- sf::read_sf(dsn = dsn, query = query)
if (!is.null(n)) {
if (nrow(out) > n) {
out <- out[seq_len(n), ]
}
}
# force sf_geometry column to be called "geometry"
if (!"geometry" %in% names(out)) {
old_name <- attr(out, "sf_column")
names(out)[names(out) == old_name] <- "geometry"
attr(out, "sf_column") <- "geometry"
}
# return result
out
}

#' Download file
#'
#' @param url `character` URL for downloading file.
Expand Down
51 changes: 51 additions & 0 deletions R/read_sf_n.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#' Read spatial data
#'
#' Import spatial data. If desired, only a subset of the available data
#' are imported.
#'
#' @param dsn `character` data source name.
#'
#' @param layer `character` layer name. Defaults to `NULL`.
#'
#' @param n `integer` number of records to import.
#' Defaults to `NULL` such that all data are imported.
#'
#' @return [sf::sf()] object.
#'
#' @noRd
read_sf_n <- function(dsn, layer = NULL, n = NULL) {
# validate arguments
assertthat::assert_that(assertthat::is.string(dsn),
inherits(layer, c("character", "NULL")),
inherits(n, c("numeric", "NULL")))
if (!is.null(n)) {
assertthat::assert_that(assertthat::is.count(n),
assertthat::noNA(n))
}
if (is.null(layer)) {
layer <- sf::st_layers(dsn)$name[[1]]
}
assertthat::assert_that(assertthat::is.string(layer),
assertthat::noNA(layer))
# construct query
if (!is.null(n)) {
query <- paste0("SELECT * FROM \"", layer,"\" WHERE FID <= ", n)
} else {
query <- paste0("SELECT * FROM \"", layer, "\"")
}
# import data
out <- sf::read_sf(dsn = dsn, query = query)
if (!is.null(n)) {
if (nrow(out) > n) {
out <- out[seq_len(n), ]
}
}
# force sf_geometry column to be called "geometry"
if (!"geometry" %in% names(out)) {
old_name <- attr(out, "sf_column")
names(out)[names(out) == old_name] <- "geometry"
attr(out, "sf_column") <- "geometry"
}
# return result
out
}
File renamed without changes.
134 changes: 134 additions & 0 deletions R/st_repair_geometry.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#' Repair geometry
#'
#' Repair the geometry of a [sf::st_sf()] object.
#'
#' @param x [sf::sf()] object.
#'
#' @inheritParams wdpa_clean
#'
#' @details
#' This function works by first using the [sf::st_make_valid()] function
#' to attempt to fix geometry issues. Since the [sf::st_make_valid()] function
#' sometimes produce incorrect geometries in rare cases
#' (e.g. when fixing invalid geometries that cross the dateline),
#' this function then uses the `st_prepair()` function from the \pkg{prepr}
#' package to fix those geometries instead
#' (see <https://github.com/dickoa/prepr> for details).
#'
#' @section Installation:
#' This function uses the \pkg{prepr} package to help repair geometries
#' in certain cases. Because the \pkg{prepr} package is not available on
#' the Comprehensive R Archive Network (CRAN), it must be installed from
#' its online code repository. To achieve this, please
#' use the following code:
#' ```
#' if (!require(remotes)) install.packages("remotes")
#' remotes::install_github("dickoa/prepr")
#' ```
#'
#' Note that the \pkg{prepr} package has system dependencies that need to be
#' installed before the package itself can be installed
#' (see package README file for platform-specific instructions).
#'
#' @examples
#' # create sf object
#' p1 <- st_sf(
#' id = 1,
#' geometry = st_as_sfc("POLYGON((0 0, 0 10, 10 0, 10 10, 0 0))", crs = 3857)
#' )
#'
#' # repair geometry
#' p2 <- st_repair_geometry(p1)
#'
#' # print object
#' print(p2)
#' @export
st_repair_geometry <- function(x, geometry_precision = 1500) {
# assert arguments are valid
assertthat::assert_that(
inherits(x, "sf"),
!assertthat::has_name(x, "_repair_id"),
assertthat::is.count(geometry_precision),
assertthat::noNA(geometry_precision)
)

# add in identifier column to keep track of geometries
x[["_repair_id"]] <- seq_len(nrow(x))

# set precision
x <- sf::st_set_precision(x, geometry_precision)

# apply first pass for fixing geometry
x2 <- sf::st_make_valid(x)

# remove empty geometries
x2 <- x2[!sf::st_is_empty(x2), ]

# extract polygons and points (if needed)
x2 <- extract_polygons_and_points(x2)

# detect if any invalid geometries persist
## subset repaired polygons
x_sub <- x[match(x2[["_repair_id"]], x[["_repair_id"]]), , drop = FALSE]
## detect if invalid polygons based on changes in area
area_threshold <- ifelse(sf::st_is_longlat(x), 1, 1e+4)
invalid_idx <- which(
abs(
as.numeric(sf::st_area(sf::st_set_crs(x_sub, NA))) -
as.numeric(sf::st_area(sf::st_set_crs(x2, NA)))
) >= area_threshold
)
## refine detections to only include polygons that span width of planet
## note this only works if x has a defined CRS
if (sf::st_crs(x) != st_crs(NA)) {
## compute global extent in coordinate system of x (if crs defined)
global_bbox <- sf::st_as_sfc(
"POLYGON((-180 -90, 180 -90, 180 90, -180 90, -180 -90))",
crs = 4326
)
if (sf::st_crs(x) != sf::st_crs(4326)) {
global_bbox <- sf::st_transform(global_bbox, sf::st_crs(x))
}
global_bbox <- sf::st_bbox(global_bbox)
## compute distance threshold for invalid outputs from st_make_valid()
dist_threshold <- unname(global_bbox$xmax - global_bbox$xmin) * 0.7
## detect if invalid polygons based on total width across planet
invalid_bbox_idx <- which(
vapply(sf::st_geometry(x2), FUN.VALUE = logical(1), function(y) {
b <- sf::st_bbox(y)
(b$xmax - b$xmin) > dist_threshold
})
)
## subset geometries
invalid_idx <- intersect(invalid_idx, invalid_bbox_idx)
}

# manually fix geometries if needed
if (length(invalid_idx) > 0) {
### verify that prepr package is installed
assertthat::assert_that(
requireNamespace("prepr"),
msg = paste(
"the \"prepr\" package needs to be installed, use: \n",
"remotes::install_github(\"dickoa/prepr\")"
)
)
### find geometries to repair
invalid_ids <- x_sub[["_repair_id"]][invalid_idx]
rm(x_sub)
### fix geometries
x2 <- rbind(
x2[!x2[["_repair_id"]] %in% invalid_ids, , drop = FALSE],
prepr::st_prepair(
x[x[["_repair_id"]] %in% invalid_ids, , drop = FALSE]
)
)
}

# remove custom id column
geom_col <- attr(x2, "sf_column")
x2 <- x2[, setdiff(names(x2), c("_repair_id", geom_col)), drop = FALSE]

# return result
x2
}
Loading

0 comments on commit d42330c

Please sign in to comment.