-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
971 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
Package: PreSPickR | ||
Title: Downloading Species Presences (Occurrences) From Public Repositories | ||
Version: 0.1 | ||
Authors@R: person("Xavier", "Rotllan-Puig", email = "[email protected]", role = c("aut", "cre")) | ||
Description: Some functions for downloading species presences (occurrences) from several | ||
public repositories (for now, GBIF and Bioatles). | ||
They clean the data sets and save them in a csv file. | ||
Depends: R (>= 3.3.3), dplyr, raster, rgbif, rgdal, rvest, sp | ||
License: GPL-3 | ||
Encoding: UTF-8 | ||
LazyData: true | ||
RoxygenNote: 6.0.1 | ||
URL: https://github.com/xavi-rp/PreSPickR | ||
BugReports: https://github.com/xavi-rp/PreSPickR/issues |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
Version: 1.0 | ||
|
||
RestoreWorkspace: No | ||
SaveWorkspace: No | ||
AlwaysSaveHistory: Default | ||
|
||
EnableCodeIndexing: Yes | ||
UseSpacesForTab: Yes | ||
NumSpacesForTab: 2 | ||
Encoding: UTF-8 | ||
|
||
RnwWeave: knitr | ||
LaTeX: pdfLaTeX | ||
|
||
AutoAppendNewline: Yes | ||
StripTrailingWhitespace: Yes | ||
|
||
BuildType: Package | ||
PackageUseDevtools: Yes | ||
PackageInstallArgs: --no-multiarch --with-keep.source | ||
PackageRoxygenize: rd,collate,namespace |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
########################################################################### | ||
######## ############ | ||
######## Downloading data from Bioatles ############ | ||
######## ############ | ||
########################################################################### | ||
|
||
#' Downloading species ocurrence data (presences) from Bioatles | ||
#' | ||
#' The aim of this script is to define the function bioatles(), which is used to download species (presences) data from the Bioatles http://bioatles.caib.es. As data in Bioatles is projected in European Datum 1950 (31N), the function also trensform it in Lat/Long Geographic Coordinates System WGS84. bioatles() is based on several functions included in the packages "rvest" (Wickham, 2016) and "xml2" (Wickham et al., 2017). Finally, it saves the data in a csv file. | ||
#' | ||
#' @author Xavier Rotllan-Puig | ||
#' @description Download species (presences) data from the Bioatles http://bioatles.caib.es. As data in Bioatles is projected in European Datum 1950 (31N), the function also trensform it in Lat/Long Geographic Coordinates System WGS84. bioatles() is based on several functions included in the packages "rvest" (Wickham, 2016) and "xml2" (Wickham et al., 2017). Finally, it saves the data in a csv file. | ||
#' @param sp_dir Directory of the species list | ||
#' @param sp_list A csv file name or a vector containing species to be downloaded | ||
#' @param out_name Name to the output data set (csv file) | ||
#' @return The sum of \code{sp_dir} and \code{sp_list} and \code{out_name} | ||
#' @name bioatles() | ||
#' | ||
#' | ||
#' | ||
#' bioatles() | ||
# | ||
# Created on: Winter-Spring 2018 (under construction) | ||
# | ||
# Created by: ([email protected]) | ||
# | ||
# Inputs: | ||
# - The location of a csv file with the names of the species to be downloaded | ||
# ATTENTION: Make sure that the spelling is exactly the same used by GBIF (e.g. "Chamaerops humilis") | ||
# | ||
# Outputs: | ||
# - A csv file with 3 columns (species, decimalLatitude, decimalLongitude) | ||
# | ||
# References: | ||
# - Hadley Wickham (2016). rvest: Easily Harvest (Scrape) Web Pages. R package | ||
# version 0.3.2. https://CRAN.R-project.org/package=rvest | ||
# - Hadley Wickham, James Hester and Jeroen Ooms (2017). xml2: Parse XML. R | ||
# package version 1.1.1. https://CRAN.R-project.org/package=xml2 | ||
# | ||
# | ||
# ------------------------------------------ | ||
|
||
|
||
bioatles <- function(sp_dir = NULL, sp_list = NULL, out_name = "sp_records"){ | ||
#### Settings #### | ||
# Working directory | ||
wd <- getwd() | ||
|
||
|
||
# Bioatles webpage | ||
page <- "http://bioatles.caib.es/serproesfront/cuadriculas.do?seccion=distribEspecies" %>% | ||
read_html() # web site of Bioatles | ||
|
||
|
||
# List of species | ||
if (any(grepl(".csv", sp_list))) { | ||
if (is.null(sp_dir)) sp_dir <- wd | ||
species <- read.csv(paste0(sp_dir, "/", sp_list), header = FALSE) | ||
species <- as.vector(species$V1) # species to be downdloaded | ||
} else if (is.vector(sp_list)) { | ||
species <- sp_list | ||
} else { | ||
stop("Not supported format (must be .csv file or vector)") | ||
} | ||
|
||
|
||
#### Downloading Species Name and BIOATLES code #### | ||
noms <- as.data.frame(page %>% html_nodes('option') %>% html_text()) | ||
chks <- which(grepl("^Selecciona", noms$`page %>% | ||
html_nodes("option") %>% | ||
html_text()`)) | ||
noms1 <- as.data.frame(noms[(chks[length(chks) - 1] + 1) : (chks[length(chks)] - 1), ]) | ||
|
||
nds1 <- page %>% html_nodes('select#selectEspecie') | ||
x <- list(.name = xml_name(nds1[[1]])) | ||
attrs <- xml_attrs(nds1[[1]]) | ||
attrs <- attrs[!grepl("xmlns", names(attrs))] | ||
x <- c(x, attrs) | ||
children <- xml_children(nds1[[1]]) | ||
code <- as.data.frame(bind_rows( | ||
lapply(xml_attrs(children), | ||
function(x) data.frame(as.list(x), stringsAsFactors = FALSE)))$value) | ||
|
||
code_name <- cbind(as.data.frame(code[-1, ]), noms1) | ||
names(code_name) <- c("code", "name") | ||
|
||
#### Downloading data of species presences #### | ||
data1 <- data.frame() | ||
|
||
for (sps in species) { | ||
print(paste0("Downloading data: ", sps)) | ||
spec2 <- tolower(paste(substr(sps, 1, 3), substr(sub(".* ", "", sps), 1, 3), sep = "_")) | ||
spec <- code_name[code_name$name %in% sps, ] | ||
if (nrow(spec) == 0) stop("No data for this species in Bioatles, please check name/spelling") | ||
spec_code <- as.vector(spec$code) | ||
|
||
page2 <- paste0("http://bioatles.caib.es/serproesfront/registros.do?accion=listarRegistros&codiEspecie=", spec_code, "&codiFamilia=0&codiGrupo=0") %>% read_html() | ||
tbl_sp <- page2 %>% html_table(fill = TRUE, header = T) | ||
pres2export <- tbl_sp[[2]] | ||
pres2export <- pres2export[ - c(1, ncol(pres2export))] | ||
|
||
data02 <- pres2export[, c(7, 1, 2)] | ||
data02 <- data02[!duplicated(data02), ] | ||
data02[, 2:3] <- data02[, 2:3] * 1000 | ||
names(data02) <- c("species", "decimalLongitude", "decimalLatitude") | ||
|
||
#### Transform presences' projection #### | ||
coordinates(data02) <- c("decimalLongitude", "decimalLatitude") # setting spatial coordinates | ||
proj4string(data02) <- CRS("+init=EPSG:23031") # define projection: European Datum 1950 (31N) | ||
CRS.new <- CRS("+init=EPSG:4326") # Lat/Long Geographic Coordinates System WGS84 | ||
data02_WGS84 <- spTransform(data02, CRS.new) #projecting | ||
|
||
data02 <- as.data.frame(data02_WGS84@coords) | ||
data02$species <- sps | ||
data02$sp2 <- spec2 | ||
|
||
data1 <- rbind(data1, data02) | ||
} | ||
|
||
#### Saving data #### | ||
print(paste0("Saving Bioatles data as ", wd, "/", out_name, ".csv")) | ||
write.csv(data1[, 1:3], paste0(wd, "/", out_name, ".csv"), quote = FALSE, row.names = FALSE) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
|
||
########################################################################### | ||
######## ############ | ||
######## Downloading data from GBIF ############ | ||
######## ############ | ||
######## ############ | ||
########################################################################### | ||
|
||
|
||
#' Downloading species ocurrence data (presences) from GBIF | ||
#' | ||
#' The aim of this script is to define the function GetBIF(), which is used to download species occurrences from GBIF (Global Biodiversity Information Facility ), and saves them as a csv data set. It is based on several functions included in the package "rgbif" (Chamberlain, 2017). GetBIF() retrieve your GBIF credentials (user and password) and automatically checks in a loop until the request of data made to GBIF is ready and starts the download. Finally, it saves the data in a csv file. | ||
#' | ||
#' @author Xavier Rotllan-Puig | ||
#' @description Download species occurrences from GBIF (Global Biodiversity Information Facility ), and saves them as a csv data set. It is based on several functions included in the package "rgbif" (Chamberlain, 2017). GetBIF() retrieve your GBIF credentials (user and password) and automatically checks in a loop until the request of data made to GBIF is ready and starts the download. Finally, it saves the data in a csv file. | ||
#' @param gbif_usr User name in GBIF | ||
#' @param gbif_pwrd Password in GBIF | ||
#' @param email email in GBIF | ||
#' @param credentials .RData file containing a list with gbif_usr, gbif_pwrd and email | ||
#' @param out_name Name to the output data set (csv file) | ||
#' @return The sum of \code{gbif_usr} and \code{gbif_pwrd} and \code{email} and \code{credentials} and \code{out_name} | ||
#' @name GetBIF() | ||
#' | ||
#' | ||
#' | ||
#' GetBIF() | ||
# | ||
# Created on: Winter 2018 (under construction) | ||
# | ||
# Created by: Xavier Rotllan-Puig ([email protected]) | ||
# | ||
# Inputs: | ||
# - The location of a csv file with the names of the species to be downloaded | ||
# ATTENTION: Make sure that the spelling is exactly the same used by GBIF (e.g. "FAGUS SYLVATICA L.") | ||
# - For security reasons, your GBIF credentials (user, password and email) | ||
# can be loaded from a RData file (location needs to be given). Otherwise, | ||
# they can be passed as arguments | ||
# | ||
# | ||
# Outputs: | ||
# - A csv file with 3 columns (species, decimalLatitude, decimalLongitude) | ||
# | ||
# References: | ||
# - Scott Chamberlain (2017). rgbif: Interface to the Global 'Biodiversity' | ||
# Information Facility 'API'. R package version 0.9.8. | ||
# https://CRAN.R-project.org/package=rgbif | ||
|
||
|
||
# ------------------------------------------ | ||
|
||
|
||
GetBIF <- function(gbif_usr = NULL, gbif_pwrd = NULL, email = NULL, | ||
credentials = NULL, | ||
sp_dir = NULL, sp_list = NULL, | ||
out_name = "sp_records" | ||
){ | ||
|
||
#### Settings #### | ||
# Working directory | ||
wd <- getwd() | ||
|
||
# Calling GBIF credentials | ||
if (!is.null(credentials)) load(credentials, verbose = FALSE) | ||
|
||
# List of species | ||
if (any(grepl(".csv", sp_list))) { | ||
if (is.null(sp_dir)) sp_dir <- wd | ||
species <- read.csv(paste0(sp_dir, "/", sp_list), header = FALSE) | ||
species <- as.vector(species$V1) # species to be downdloaded | ||
} else if (is.vector(sp_list)) { | ||
species <- sp_list | ||
} else { | ||
stop("Not supported format (must be .csv file or vector)") | ||
} | ||
|
||
#### Downloading Data #### | ||
## Spin up a download request for SEVERAL species data | ||
|
||
for (sps in species){ | ||
print(paste0("Downloading data for ", sps)) | ||
rqst_02 <- occ_download(paste0("taxonKey = ", name_backbone(name = sps)$speciesKey), | ||
"hasCoordinate = TRUE", | ||
type = "and", | ||
user = gbif_usr, pwd = gbif_pwrd, email = email) #prepares the spin up | ||
# Creates metadata | ||
rqst_02_meta <- data.frame(status = "INITIAL") | ||
round <- 1 | ||
while (rqst_02_meta$status != "SUCCEEDED") { | ||
cat("\r", paste0("round = ", round, " / ", | ||
"status = ", rqst_02_meta$status)) | ||
Sys.sleep(60) | ||
rqst_02_meta <- rqst_02 %>% occ_download_meta | ||
cat("\r", paste0("round = ", round, " / ", | ||
"status = ", rqst_02_meta$status)) | ||
round <- round + 1 | ||
} | ||
|
||
# Start download when meta says "Status: SUCCEEDED" | ||
dta <- occ_download_get(key = rqst_02_meta$key, path = ".", | ||
overwrite = TRUE, curlopts = list(verbose = TRUE)) | ||
|
||
# saving citation | ||
citation_02 <- dta %>% gbif_citation | ||
|
||
# Saving download info | ||
save(list = c("rqst_02", "rqst_02_meta", "dta", "citation_02"), | ||
file = paste0("download_info_", sps, ".RData")) | ||
|
||
} | ||
|
||
|
||
#### Retrieving Data #### | ||
data1 <- data.frame() | ||
|
||
for (sps in species){ | ||
cat(paste0("Reading data for ", sps), "\n") | ||
load(paste0("download_info_", sps, ".RData"), verbose = FALSE) | ||
|
||
# Reading in data | ||
data02 <- occ_download_import(dta) | ||
data02 <- data02[!duplicated(data02[, c(133:134)]), ] | ||
data02 <- data02[, names(data02) %in% | ||
c("species", "decimalLatitude", "decimalLongitude")] | ||
|
||
data1 <- rbind(data1, data02) | ||
} | ||
|
||
data1 <- as.data.frame(data1) #data set with coordinates and name of species | ||
data1$sp2 <- tolower(paste(substr(data1$species, 1, 3), | ||
substr(sub("^\\S+\\s+", '', data1$species), 1, 3), | ||
sep = "_")) | ||
|
||
#### Saving data #### | ||
print(paste0("Saving GBIF data as ", wd, "/", out_name, ".csv")) | ||
write.csv(data1, paste0(wd, "/", out_name, ".csv"), | ||
quote = FALSE, row.names = FALSE) | ||
} |
42 changes: 42 additions & 0 deletions
42
man/GetBIF-open-paren-close-paren-GetBIF-open-paren-close-paren.Rd
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
36 changes: 36 additions & 0 deletions
36
man/bioatles-open-paren-close-paren-bioatles-open-paren-close-paren.Rd
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.