Skip to content

Commit

Permalink
second first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
xavi-rp committed Jul 16, 2018
1 parent 06b40ca commit ea67bba
Show file tree
Hide file tree
Showing 8 changed files with 971 additions and 0 deletions.
14 changes: 14 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Package: PreSPickR
Title: Downloading Species Presences (Occurrences) From Public Repositories
Version: 0.1
Authors@R: person("Xavier", "Rotllan-Puig", email = "[email protected]", role = c("aut", "cre"))
Description: Some functions for downloading species presences (occurrences) from several
public repositories (for now, GBIF and Bioatles).
They clean the data sets and save them in a csv file.
Depends: R (>= 3.3.3), dplyr, raster, rgbif, rgdal, rvest, sp
License: GPL-3
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.0.1
URL: https://github.com/xavi-rp/PreSPickR
BugReports: https://github.com/xavi-rp/PreSPickR/issues
595 changes: 595 additions & 0 deletions LICENSE.md

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Generated by roxygen2: do not edit by hand

21 changes: 21 additions & 0 deletions PreSPickR.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Version: 1.0

RestoreWorkspace: No
SaveWorkspace: No
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: knitr
LaTeX: pdfLaTeX

AutoAppendNewline: Yes
StripTrailingWhitespace: Yes

BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace
124 changes: 124 additions & 0 deletions R/bioatles.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
###########################################################################
######## ############
######## Downloading data from Bioatles ############
######## ############
###########################################################################

#' Downloading species ocurrence data (presences) from Bioatles
#'
#' The aim of this script is to define the function bioatles(), which is used to download species (presences) data from the Bioatles http://bioatles.caib.es. As data in Bioatles is projected in European Datum 1950 (31N), the function also trensform it in Lat/Long Geographic Coordinates System WGS84. bioatles() is based on several functions included in the packages "rvest" (Wickham, 2016) and "xml2" (Wickham et al., 2017). Finally, it saves the data in a csv file.
#'
#' @author Xavier Rotllan-Puig
#' @description Download species (presences) data from the Bioatles http://bioatles.caib.es. As data in Bioatles is projected in European Datum 1950 (31N), the function also trensform it in Lat/Long Geographic Coordinates System WGS84. bioatles() is based on several functions included in the packages "rvest" (Wickham, 2016) and "xml2" (Wickham et al., 2017). Finally, it saves the data in a csv file.
#' @param sp_dir Directory of the species list
#' @param sp_list A csv file name or a vector containing species to be downloaded
#' @param out_name Name to the output data set (csv file)
#' @return The sum of \code{sp_dir} and \code{sp_list} and \code{out_name}
#' @name bioatles()
#'
#'
#'
#' bioatles()
#
# Created on: Winter-Spring 2018 (under construction)
#
# Created by: ([email protected])
#
# Inputs:
# - The location of a csv file with the names of the species to be downloaded
# ATTENTION: Make sure that the spelling is exactly the same used by GBIF (e.g. "Chamaerops humilis")
#
# Outputs:
# - A csv file with 3 columns (species, decimalLatitude, decimalLongitude)
#
# References:
# - Hadley Wickham (2016). rvest: Easily Harvest (Scrape) Web Pages. R package
# version 0.3.2. https://CRAN.R-project.org/package=rvest
# - Hadley Wickham, James Hester and Jeroen Ooms (2017). xml2: Parse XML. R
# package version 1.1.1. https://CRAN.R-project.org/package=xml2
#
#
# ------------------------------------------


bioatles <- function(sp_dir = NULL, sp_list = NULL, out_name = "sp_records"){
#### Settings ####
# Working directory
wd <- getwd()


# Bioatles webpage
page <- "http://bioatles.caib.es/serproesfront/cuadriculas.do?seccion=distribEspecies" %>%
read_html() # web site of Bioatles


# List of species
if (any(grepl(".csv", sp_list))) {
if (is.null(sp_dir)) sp_dir <- wd
species <- read.csv(paste0(sp_dir, "/", sp_list), header = FALSE)
species <- as.vector(species$V1) # species to be downdloaded
} else if (is.vector(sp_list)) {
species <- sp_list
} else {
stop("Not supported format (must be .csv file or vector)")
}


#### Downloading Species Name and BIOATLES code ####
noms <- as.data.frame(page %>% html_nodes('option') %>% html_text())
chks <- which(grepl("^Selecciona", noms$`page %>%
html_nodes("option") %>%
html_text()`))
noms1 <- as.data.frame(noms[(chks[length(chks) - 1] + 1) : (chks[length(chks)] - 1), ])

nds1 <- page %>% html_nodes('select#selectEspecie')
x <- list(.name = xml_name(nds1[[1]]))
attrs <- xml_attrs(nds1[[1]])
attrs <- attrs[!grepl("xmlns", names(attrs))]
x <- c(x, attrs)
children <- xml_children(nds1[[1]])
code <- as.data.frame(bind_rows(
lapply(xml_attrs(children),
function(x) data.frame(as.list(x), stringsAsFactors = FALSE)))$value)

code_name <- cbind(as.data.frame(code[-1, ]), noms1)
names(code_name) <- c("code", "name")

#### Downloading data of species presences ####
data1 <- data.frame()

for (sps in species) {
print(paste0("Downloading data: ", sps))
spec2 <- tolower(paste(substr(sps, 1, 3), substr(sub(".* ", "", sps), 1, 3), sep = "_"))
spec <- code_name[code_name$name %in% sps, ]
if (nrow(spec) == 0) stop("No data for this species in Bioatles, please check name/spelling")
spec_code <- as.vector(spec$code)

page2 <- paste0("http://bioatles.caib.es/serproesfront/registros.do?accion=listarRegistros&codiEspecie=", spec_code, "&codiFamilia=0&codiGrupo=0") %>% read_html()
tbl_sp <- page2 %>% html_table(fill = TRUE, header = T)
pres2export <- tbl_sp[[2]]
pres2export <- pres2export[ - c(1, ncol(pres2export))]

data02 <- pres2export[, c(7, 1, 2)]
data02 <- data02[!duplicated(data02), ]
data02[, 2:3] <- data02[, 2:3] * 1000
names(data02) <- c("species", "decimalLongitude", "decimalLatitude")

#### Transform presences' projection ####
coordinates(data02) <- c("decimalLongitude", "decimalLatitude") # setting spatial coordinates
proj4string(data02) <- CRS("+init=EPSG:23031") # define projection: European Datum 1950 (31N)
CRS.new <- CRS("+init=EPSG:4326") # Lat/Long Geographic Coordinates System WGS84
data02_WGS84 <- spTransform(data02, CRS.new) #projecting

data02 <- as.data.frame(data02_WGS84@coords)
data02$species <- sps
data02$sp2 <- spec2

data1 <- rbind(data1, data02)
}

#### Saving data ####
print(paste0("Saving Bioatles data as ", wd, "/", out_name, ".csv"))
write.csv(data1[, 1:3], paste0(wd, "/", out_name, ".csv"), quote = FALSE, row.names = FALSE)

}
137 changes: 137 additions & 0 deletions R/gbif_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@

###########################################################################
######## ############
######## Downloading data from GBIF ############
######## ############
######## ############
###########################################################################


#' Downloading species ocurrence data (presences) from GBIF
#'
#' The aim of this script is to define the function GetBIF(), which is used to download species occurrences from GBIF (Global Biodiversity Information Facility ), and saves them as a csv data set. It is based on several functions included in the package "rgbif" (Chamberlain, 2017). GetBIF() retrieve your GBIF credentials (user and password) and automatically checks in a loop until the request of data made to GBIF is ready and starts the download. Finally, it saves the data in a csv file.
#'
#' @author Xavier Rotllan-Puig
#' @description Download species occurrences from GBIF (Global Biodiversity Information Facility ), and saves them as a csv data set. It is based on several functions included in the package "rgbif" (Chamberlain, 2017). GetBIF() retrieve your GBIF credentials (user and password) and automatically checks in a loop until the request of data made to GBIF is ready and starts the download. Finally, it saves the data in a csv file.
#' @param gbif_usr User name in GBIF
#' @param gbif_pwrd Password in GBIF
#' @param email email in GBIF
#' @param credentials .RData file containing a list with gbif_usr, gbif_pwrd and email
#' @param out_name Name to the output data set (csv file)
#' @return The sum of \code{gbif_usr} and \code{gbif_pwrd} and \code{email} and \code{credentials} and \code{out_name}
#' @name GetBIF()
#'
#'
#'
#' GetBIF()
#
# Created on: Winter 2018 (under construction)
#
# Created by: Xavier Rotllan-Puig ([email protected])
#
# Inputs:
# - The location of a csv file with the names of the species to be downloaded
# ATTENTION: Make sure that the spelling is exactly the same used by GBIF (e.g. "FAGUS SYLVATICA L.")
# - For security reasons, your GBIF credentials (user, password and email)
# can be loaded from a RData file (location needs to be given). Otherwise,
# they can be passed as arguments
#
#
# Outputs:
# - A csv file with 3 columns (species, decimalLatitude, decimalLongitude)
#
# References:
# - Scott Chamberlain (2017). rgbif: Interface to the Global 'Biodiversity'
# Information Facility 'API'. R package version 0.9.8.
# https://CRAN.R-project.org/package=rgbif


# ------------------------------------------


GetBIF <- function(gbif_usr = NULL, gbif_pwrd = NULL, email = NULL,
credentials = NULL,
sp_dir = NULL, sp_list = NULL,
out_name = "sp_records"
){

#### Settings ####
# Working directory
wd <- getwd()

# Calling GBIF credentials
if (!is.null(credentials)) load(credentials, verbose = FALSE)

# List of species
if (any(grepl(".csv", sp_list))) {
if (is.null(sp_dir)) sp_dir <- wd
species <- read.csv(paste0(sp_dir, "/", sp_list), header = FALSE)
species <- as.vector(species$V1) # species to be downdloaded
} else if (is.vector(sp_list)) {
species <- sp_list
} else {
stop("Not supported format (must be .csv file or vector)")
}

#### Downloading Data ####
## Spin up a download request for SEVERAL species data

for (sps in species){
print(paste0("Downloading data for ", sps))
rqst_02 <- occ_download(paste0("taxonKey = ", name_backbone(name = sps)$speciesKey),
"hasCoordinate = TRUE",
type = "and",
user = gbif_usr, pwd = gbif_pwrd, email = email) #prepares the spin up
# Creates metadata
rqst_02_meta <- data.frame(status = "INITIAL")
round <- 1
while (rqst_02_meta$status != "SUCCEEDED") {
cat("\r", paste0("round = ", round, " / ",
"status = ", rqst_02_meta$status))
Sys.sleep(60)
rqst_02_meta <- rqst_02 %>% occ_download_meta
cat("\r", paste0("round = ", round, " / ",
"status = ", rqst_02_meta$status))
round <- round + 1
}

# Start download when meta says "Status: SUCCEEDED"
dta <- occ_download_get(key = rqst_02_meta$key, path = ".",
overwrite = TRUE, curlopts = list(verbose = TRUE))

# saving citation
citation_02 <- dta %>% gbif_citation

# Saving download info
save(list = c("rqst_02", "rqst_02_meta", "dta", "citation_02"),
file = paste0("download_info_", sps, ".RData"))

}


#### Retrieving Data ####
data1 <- data.frame()

for (sps in species){
cat(paste0("Reading data for ", sps), "\n")
load(paste0("download_info_", sps, ".RData"), verbose = FALSE)

# Reading in data
data02 <- occ_download_import(dta)
data02 <- data02[!duplicated(data02[, c(133:134)]), ]
data02 <- data02[, names(data02) %in%
c("species", "decimalLatitude", "decimalLongitude")]

data1 <- rbind(data1, data02)
}

data1 <- as.data.frame(data1) #data set with coordinates and name of species
data1$sp2 <- tolower(paste(substr(data1$species, 1, 3),
substr(sub("^\\S+\\s+", '', data1$species), 1, 3),
sep = "_"))

#### Saving data ####
print(paste0("Saving GBIF data as ", wd, "/", out_name, ".csv"))
write.csv(data1, paste0(wd, "/", out_name, ".csv"),
quote = FALSE, row.names = FALSE)
}
42 changes: 42 additions & 0 deletions man/GetBIF-open-paren-close-paren-GetBIF-open-paren-close-paren.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit ea67bba

Please sign in to comment.