second first commit

xavi-rp · Jul 16, 2018 · ea67bba · ea67bba
1 parent 06b40ca
commit ea67bba
Show file tree

Hide file tree

Showing 8 changed files with 971 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,14 @@
+Package: PreSPickR
+Title: Downloading Species Presences (Occurrences) From Public Repositories
+Version: 0.1
+Authors@R: person("Xavier", "Rotllan-Puig", email = "[email protected]", role = c("aut", "cre"))
+Description: Some functions for downloading species presences (occurrences) from several 
+    public repositories (for now, GBIF and Bioatles).
+    They clean the data sets and save them in a csv file.
+Depends: R (>= 3.3.3), dplyr, raster, rgbif, rgdal, rvest, sp
+License: GPL-3
+Encoding: UTF-8
+LazyData: true
+RoxygenNote: 6.0.1
+URL: https://github.com/xavi-rp/PreSPickR
+BugReports: https://github.com/xavi-rp/PreSPickR/issues
diff --git a/LICENSE.md b/LICENSE.md
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,2 @@
+# Generated by roxygen2: do not edit by hand
+
diff --git a/PreSPickR.Rproj b/PreSPickR.Rproj
@@ -0,0 +1,21 @@
+Version: 1.0
+
+RestoreWorkspace: No
+SaveWorkspace: No
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: knitr
+LaTeX: pdfLaTeX
+
+AutoAppendNewline: Yes
+StripTrailingWhitespace: Yes
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
+PackageRoxygenize: rd,collate,namespace
diff --git a/R/bioatles.R b/R/bioatles.R
@@ -0,0 +1,124 @@
+###########################################################################
+########                                                       ############
+########             Downloading data from Bioatles            ############
+########                                                       ############
+###########################################################################
+
+#' Downloading species ocurrence data (presences) from Bioatles
+#'
+#' The aim of this script is to define the function bioatles(), which is used to download species (presences) data from the Bioatles http://bioatles.caib.es. As data in Bioatles is projected in European Datum 1950 (31N), the function also trensform it in Lat/Long Geographic Coordinates System WGS84. bioatles() is based on several functions included in the packages "rvest" (Wickham, 2016) and "xml2" (Wickham et al., 2017). Finally, it saves the data in a csv file.
+#'
+#' @author Xavier Rotllan-Puig
+#' @description Download species (presences) data from the Bioatles http://bioatles.caib.es. As data in Bioatles is projected in European Datum 1950 (31N), the function also trensform it in Lat/Long Geographic Coordinates System WGS84. bioatles() is based on several functions included in the packages "rvest" (Wickham, 2016) and "xml2" (Wickham et al., 2017). Finally, it saves the data in a csv file.
+#' @param sp_dir Directory of the species list
+#' @param sp_list A csv file name or a vector containing species to be downloaded
+#' @param out_name Name to the output data set (csv file)
+#' @return The sum of \code{sp_dir} and \code{sp_list} and \code{out_name}
+#' @name bioatles()
+#'
+#'
+#'
+#' bioatles()
+#
+# Created on: Winter-Spring 2018 (under construction)
+#
+# Created by:  ([email protected])
+#
+# Inputs:
+#       - The location of a csv file with the names of the species to be downloaded
+#         ATTENTION: Make sure that the spelling is exactly the same used by GBIF (e.g. "Chamaerops humilis")
+#
+# Outputs:
+#       - A csv file with 3 columns (species, decimalLatitude, decimalLongitude)
+#
+# References:
+#       - Hadley Wickham (2016). rvest: Easily Harvest (Scrape) Web Pages. R package
+#         version 0.3.2. https://CRAN.R-project.org/package=rvest
+#       - Hadley Wickham, James Hester and Jeroen Ooms (2017). xml2: Parse XML. R
+#         package version 1.1.1. https://CRAN.R-project.org/package=xml2
+#
+#
+# ------------------------------------------
+
+
+bioatles <- function(sp_dir = NULL, sp_list = NULL, out_name = "sp_records"){
+  #### Settings ####
+  # Working directory
+  wd <- getwd()
+
+
+  # Bioatles webpage
+  page <- "http://bioatles.caib.es/serproesfront/cuadriculas.do?seccion=distribEspecies" %>%
+    read_html()  # web site of Bioatles
+
+
+  # List of species
+  if (any(grepl(".csv", sp_list))) {
+    if (is.null(sp_dir)) sp_dir <- wd
+    species <- read.csv(paste0(sp_dir, "/", sp_list), header = FALSE)
+    species <- as.vector(species$V1)  # species to be downdloaded
+  } else if (is.vector(sp_list)) {
+    species <- sp_list
+  } else {
+    stop("Not supported format (must be .csv file or vector)")
+  }
+
+
+  #### Downloading Species Name and BIOATLES code ####
+  noms <- as.data.frame(page %>% html_nodes('option') %>% html_text())
+  chks <- which(grepl("^Selecciona", noms$`page %>%
+                        html_nodes("option") %>%
+                        html_text()`))
+  noms1 <- as.data.frame(noms[(chks[length(chks) - 1] + 1) : (chks[length(chks)] - 1), ])
+
+  nds1 <- page %>% html_nodes('select#selectEspecie')
+  x <- list(.name = xml_name(nds1[[1]]))
+  attrs <- xml_attrs(nds1[[1]])
+  attrs <- attrs[!grepl("xmlns", names(attrs))]
+  x <- c(x, attrs)
+  children <- xml_children(nds1[[1]])
+  code <- as.data.frame(bind_rows(
+    lapply(xml_attrs(children),
+           function(x) data.frame(as.list(x), stringsAsFactors = FALSE)))$value)
+
+  code_name <- cbind(as.data.frame(code[-1, ]), noms1)
+  names(code_name) <- c("code", "name")
+
+  #### Downloading data of species presences ####
+  data1 <- data.frame()
+
+  for (sps in species) {
+    print(paste0("Downloading data: ", sps))
+    spec2 <- tolower(paste(substr(sps, 1, 3), substr(sub(".* ", "", sps), 1, 3), sep = "_"))
+    spec <- code_name[code_name$name %in% sps, ]
+    if (nrow(spec) == 0) stop("No data for this species in Bioatles, please check name/spelling")
+    spec_code <- as.vector(spec$code)
+
+    page2 <- paste0("http://bioatles.caib.es/serproesfront/registros.do?accion=listarRegistros&codiEspecie=", spec_code, "&codiFamilia=0&codiGrupo=0") %>% read_html()
+    tbl_sp <- page2 %>% html_table(fill = TRUE, header = T)
+    pres2export <- tbl_sp[[2]]
+    pres2export <- pres2export[ - c(1, ncol(pres2export))]
+
+    data02 <- pres2export[, c(7, 1, 2)]
+    data02 <- data02[!duplicated(data02), ]
+    data02[, 2:3] <- data02[, 2:3] * 1000
+    names(data02) <- c("species", "decimalLongitude", "decimalLatitude")
+
+    #### Transform presences' projection ####
+    coordinates(data02) <- c("decimalLongitude", "decimalLatitude")  # setting spatial coordinates
+    proj4string(data02) <- CRS("+init=EPSG:23031")  # define projection: European Datum 1950 (31N)
+    CRS.new <- CRS("+init=EPSG:4326") # Lat/Long Geographic Coordinates System WGS84
+    data02_WGS84 <- spTransform(data02, CRS.new)  #projecting
+
+    data02 <- as.data.frame(data02_WGS84@coords)
+    data02$species <- sps
+    data02$sp2 <- spec2
+
+    data1 <- rbind(data1, data02)
+  }
+
+  #### Saving data ####
+  print(paste0("Saving Bioatles data as ", wd, "/", out_name, ".csv"))
+  write.csv(data1[, 1:3], paste0(wd, "/", out_name, ".csv"), quote = FALSE, row.names = FALSE)
+
+}
diff --git a/R/gbif_data.R b/R/gbif_data.R
@@ -0,0 +1,137 @@
+
+###########################################################################
+########                                                       ############
+########               Downloading data from GBIF              ############
+########                                                       ############
+########                                                       ############
+###########################################################################
+
+
+#' Downloading species ocurrence data (presences) from GBIF
+#'
+#' The aim of this script is to define the function GetBIF(), which is used to download species occurrences from GBIF (Global Biodiversity Information Facility ), and saves them as a csv data set. It is based on several functions included in the package "rgbif" (Chamberlain, 2017). GetBIF() retrieve your GBIF credentials (user and password) and automatically checks in a loop until the request of data made to GBIF is ready and starts the download. Finally, it saves the data in a csv file.
+#'
+#' @author Xavier Rotllan-Puig
+#' @description Download species occurrences from GBIF (Global Biodiversity Information Facility ), and saves them as a csv data set. It is based on several functions included in the package "rgbif" (Chamberlain, 2017). GetBIF() retrieve your GBIF credentials (user and password) and automatically checks in a loop until the request of data made to GBIF is ready and starts the download. Finally, it saves the data in a csv file.
+#' @param gbif_usr User name in GBIF
+#' @param gbif_pwrd Password in GBIF
+#' @param email email in GBIF
+#' @param credentials .RData file containing a list with gbif_usr, gbif_pwrd and email
+#' @param out_name Name to the output data set (csv file)
+#' @return The sum of \code{gbif_usr} and \code{gbif_pwrd} and \code{email} and \code{credentials} and \code{out_name}
+#' @name GetBIF()
+#'
+#'
+#'
+#' GetBIF()
+#
+# Created on: Winter 2018 (under construction)
+#
+# Created by: Xavier Rotllan-Puig ([email protected])
+#
+# Inputs:
+#       - The location of a csv file with the names of the species to be downloaded
+#         ATTENTION: Make sure that the spelling is exactly the same used by GBIF (e.g. "FAGUS SYLVATICA L.")
+#       - For security reasons, your GBIF credentials (user, password and email)
+#         can be loaded from a RData file (location needs to be given). Otherwise,
+#         they can be passed as arguments
+#
+#
+# Outputs:
+#       - A csv file with 3 columns (species, decimalLatitude, decimalLongitude)
+#
+# References:
+#       - Scott Chamberlain (2017). rgbif: Interface to the Global 'Biodiversity'
+#         Information Facility 'API'. R package version 0.9.8.
+#         https://CRAN.R-project.org/package=rgbif
+
+
+# ------------------------------------------
+
+
+GetBIF <- function(gbif_usr = NULL, gbif_pwrd = NULL, email = NULL,
+                   credentials = NULL,
+                   sp_dir = NULL, sp_list = NULL,
+                   out_name = "sp_records"
+                   ){
+
+  #### Settings ####
+  # Working directory
+  wd <- getwd()
+
+  # Calling GBIF credentials
+  if (!is.null(credentials)) load(credentials, verbose = FALSE)
+
+  # List of species
+  if (any(grepl(".csv", sp_list))) {
+    if (is.null(sp_dir)) sp_dir <- wd
+    species <- read.csv(paste0(sp_dir, "/", sp_list), header = FALSE)
+    species <- as.vector(species$V1)  # species to be downdloaded
+  } else if (is.vector(sp_list)) {
+    species <- sp_list
+  } else {
+    stop("Not supported format (must be .csv file or vector)")
+  }
+
+  #### Downloading Data ####
+  ## Spin up a download request for SEVERAL species data
+
+  for (sps in species){
+    print(paste0("Downloading data for ", sps))
+    rqst_02 <- occ_download(paste0("taxonKey = ", name_backbone(name = sps)$speciesKey),
+                            "hasCoordinate = TRUE",
+                            type = "and",
+                            user = gbif_usr, pwd = gbif_pwrd, email = email)    #prepares the spin up
+    # Creates metadata
+    rqst_02_meta <- data.frame(status = "INITIAL")
+    round <- 1
+    while (rqst_02_meta$status != "SUCCEEDED") {
+      cat("\r", paste0("round = ", round, " / ",
+                       "status = ", rqst_02_meta$status))
+      Sys.sleep(60)
+      rqst_02_meta <- rqst_02 %>% occ_download_meta
+      cat("\r", paste0("round = ", round, " / ",
+                       "status = ", rqst_02_meta$status))
+      round <- round + 1
+    }
+
+    # Start download when meta says "Status: SUCCEEDED"
+    dta <- occ_download_get(key = rqst_02_meta$key, path = ".",
+                            overwrite = TRUE, curlopts = list(verbose = TRUE))
+
+    # saving citation
+    citation_02 <- dta %>% gbif_citation
+
+    # Saving download info
+    save(list = c("rqst_02", "rqst_02_meta", "dta", "citation_02"),
+         file = paste0("download_info_", sps, ".RData"))
+
+  }
+
+
+  #### Retrieving Data ####
+  data1 <- data.frame()
+
+  for (sps in species){
+    cat(paste0("Reading data for ", sps), "\n")
+    load(paste0("download_info_", sps, ".RData"), verbose = FALSE)
+
+    # Reading in data
+    data02 <- occ_download_import(dta)
+    data02 <- data02[!duplicated(data02[, c(133:134)]), ]
+    data02 <- data02[, names(data02) %in%
+                       c("species", "decimalLatitude", "decimalLongitude")]
+
+    data1 <- rbind(data1, data02)
+  }
+
+  data1 <- as.data.frame(data1)  #data set with coordinates and name of species
+  data1$sp2 <- tolower(paste(substr(data1$species, 1, 3),
+                             substr(sub("^\\S+\\s+", '', data1$species), 1, 3),
+                             sep = "_"))
+
+  #### Saving data ####
+  print(paste0("Saving GBIF data as ", wd, "/", out_name, ".csv"))
+  write.csv(data1, paste0(wd, "/", out_name, ".csv"),
+            quote = FALSE, row.names = FALSE)
+}
diff --git a/man/GetBIF-open-paren-close-paren-GetBIF-open-paren-close-paren.Rd b/man/GetBIF-open-paren-close-paren-GetBIF-open-paren-close-paren.Rd
diff --git a/man/bioatles-open-paren-close-paren-bioatles-open-paren-close-paren.Rd b/man/bioatles-open-paren-close-paren-bioatles-open-paren-close-paren.Rd