Skip to content

Commit

Permalink
removed raster dependency, but work in progress
Browse files Browse the repository at this point in the history
  • Loading branch information
stineb committed Jun 20, 2024
1 parent 1709852 commit 9a67b88
Show file tree
Hide file tree
Showing 7 changed files with 172 additions and 54 deletions.
3 changes: 1 addition & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,8 @@ Imports:
multidplyr,
readr,
hwsdr,
raster,
terra,
stringr,
sp,
ncdf4,
signal,
sf,
Expand Down
4 changes: 3 additions & 1 deletion R/helper_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,6 @@ findna_tail <- function( vec ){

}

na.omit.list <- function(y) { return(y[!sapply(y, function(x) all(is.na(x)))]) }
na.omit.list <- function(y){
return(y[!sapply(y, function(x) all(is.na(x)))])
}
44 changes: 33 additions & 11 deletions R/ingest.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,13 @@ ingest <- function(
if (!(source %in% c(
"hwsd",
"etopo1",
"stocker23",
"wwf",
"soilgrids",
"wise",
"gsde",
"worldclim"))
"worldclim"
))
) {

# complement dates information
Expand Down Expand Up @@ -842,14 +844,28 @@ ingest <- function(

# Get ETOPO1 elevation data. year_start and year_end not required

ddf <- ingest_globalfields(siteinfo,
source = source,
dir = dir,
getvars = NULL,
timescale = NULL,
verbose = FALSE
ddf <- ingest_globalfields(
siteinfo,
source = source,
dir = dir,
getvars = NULL,
timescale = NULL,
verbose = FALSE
)


} else if (source == "stocker23"){

# Get root zone water storage capacity data. year_start and year_end not required

ddf <- ingest_globalfields(
siteinfo,
source = source,
dir = dir,
getvars = NULL,
timescale = NULL,
verbose = FALSE
)

} else if (source == "hwsd"){

# Get HWSD soil data. year_start and year_end not required
Expand Down Expand Up @@ -893,8 +909,14 @@ ingest <- function(
layer = settings$layer, dir = dir)) %>%
purrr::reduce(left_join, by = c("lon", "lat")) %>%
distinct() %>%
right_join(dplyr::select(
siteinfo, sitename, lon, lat), by = c("lon", "lat")) %>%
right_join(
dplyr::select(all_of(
siteinfo,
sitename,
lon,
lat
)),
by = c("lon", "lat")) %>%
dplyr::select(-lon, -lat)

} else if (source == "gsde"){
Expand Down Expand Up @@ -941,7 +963,7 @@ ingest <- function(
stop(
"ingest(): Argument 'source' could not be identified.
Use one of 'fluxnet', 'cru', 'watch_wfdei', 'wfde5',
co2_mlo', 'etopo1', or 'gee'.")
co2_mlo', 'etopo1', 'stocker23', or 'gee'.")
}

ddf <- ddf %>%
Expand Down
24 changes: 22 additions & 2 deletions R/ingest_bysite.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ ingest_bysite <- function(

if (!(source %in% c(
"etopo1",
"stocker23",
"hwsd",
"soilgrids",
"wise",
Expand Down Expand Up @@ -640,6 +641,25 @@ ingest_bysite <- function(
verbose = FALSE
)

} else if (source == "stocker23"){

# Get ETOPO1 elevation data. year_start and year_end not required

siteinfo <- tibble(
sitename = sitename,
lon = lon,
lat = lat
)

df <- ingest_globalfields(
siteinfo,
source = source,
getvars = NULL,
dir = dir,
timescale = NULL,
verbose = FALSE
)

} else if (source == "hwsd"){

# Get HWSD soil data. year_start and year_end not required
Expand Down Expand Up @@ -784,11 +804,11 @@ ingest_bysite <- function(
rlang::warn(paste("you selected source =", source))
stop("ingest(): Argument 'source' could not be identified.
Use one of 'fluxnet', 'cru', 'watch_wfdei', 'wfde5',
'co2_mlo', 'etopo1', or 'gee'.")
'co2_mlo', 'etopo1', 'stocker23', or 'gee'.")
}

# add data frame to nice data frame containing all required time steps
if (!(source %in% c("etopo1", "hwsd", "soilgrids", "wise", "gsde", "worldclim"))){
if (!(source %in% c("etopo1", "stocker23", "hwsd", "soilgrids", "wise", "gsde", "worldclim"))){
if (timescale=="m"){
df <- df_tmp %>%
mutate(month = lubridate::month(date), year = lubridate::year(date)) %>%
Expand Down
118 changes: 87 additions & 31 deletions R/ingest_globalfields.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ ingest_globalfields <- function(
stop("At least one entry for siteinfo$sitename is missing.")
}

if (!(source %in% c("etopo1", "wwf", "gsde", "worldclim"))){
if (!(source %in% c("etopo1", "stocker23", "wwf", "gsde", "worldclim"))){

# get a daily (monthly) data frame with all dates for all sites
# (if monthly, day 15 of each month)
Expand Down Expand Up @@ -424,12 +424,33 @@ ingest_globalfields <- function(
lat = siteinfo$lat
)

df_out <- extract_pointdata_allsites( paste0(dir, filename), df_lonlat, get_time = FALSE ) %>%
dplyr::select(-lon, -lat) %>%
tidyr::unnest(data) %>%
dplyr::rename(elv = V1) %>%
df_out <- extract_pointdata_allsites( paste0(dir, filename), df_lonlat, get_time = FALSE ) |>
dplyr::ungroup() |>
dplyr::select(-lon, -lat) |>
tidyr::unnest(data) |>
dplyr::rename(elv = ETOPO1_Bed_g_geotiff) |>
dplyr::select(sitename, elv)

} else if (source == "stocker23"){

filename <- list.files(dir, pattern = "cwdx80_forcing_halfdeg.nc")
if (length(filename) > 1) stop("ingest_globalfields(): Found more than 1 file for source 'stocker23'.")
if (length(filename) == 0) stop("ingest_globalfields(): Found no files for source 'stocker23' in the directory provided by argument 'dir'.")

# re-construct this data frame (tibble) - otherwise SpatialPointsDataframe() won't work
df_lonlat <- tibble(
sitename = siteinfo$sitename,
lon = siteinfo$lon,
lat = siteinfo$lat
)

df_out <- extract_pointdata_allsites( paste0(dir, filename), df_lonlat, get_time = FALSE ) |>
dplyr::ungroup() |>
dplyr::select(-lon, -lat) |>
tidyr::unnest(data) |>
dplyr::rename(whc = cwdx80_forcing) |>
dplyr::select(sitename, whc)

} else if (source == "gsde"){

# re-construct this data frame (tibble) - otherwise SpatialPointsDataframe() won't work
Expand Down Expand Up @@ -611,15 +632,15 @@ ingest_globalfields_watch_byvar <- function( ddf, siteinfo, dir, varnam ) {
rowwise() %>%
dplyr::mutate(filename = paste0( dirn, "/", varnam, addstring, sprintf( "%4d", yr ), sprintf( "%02d", mo ), ".nc" )) %>%
ungroup() %>%
dplyr::mutate(data = purrr::map(filename, ~extract_pointdata_allsites(., df_lonlat, get_time = FALSE ) ))
dplyr::mutate(data = purrr::map(filename, ~extract_pointdata_allsites(., df_lonlat, get_time = TRUE ) ))

# rearrange to a daily data frame
complement_df <- function(df){
df <- df %>%
stats::setNames(., c("myvar")) %>%
mutate( dom = 1:nrow(.))
df <- df |>
dplyr::select(dom = tstep, myvar = value)
return(df)
}

ddf <- df %>%
tidyr::unnest(data) %>%
dplyr::mutate(data = purrr::map(data, ~complement_df(.))) %>%
Expand Down Expand Up @@ -1122,33 +1143,68 @@ extract_pointdata_allsites <- function(
# load file using the raster library
#print(paste("Creating raster brick from file", filename))
if (!file.exists(filename)) stop(paste0("File not found: ", filename))

# message(paste0("Reading file: ", filename))
rasta <- raster::brick(filename)

df_lonlat <- raster::extract(
rasta,
sp::SpatialPoints(dplyr::select(df_lonlat, lon, lat)), # , proj4string = rasta@crs
sp = TRUE
) %>%
as_tibble() %>%
tidyr::nest(data = c(-lon, -lat)) %>%
right_join(df_lonlat, by = c("lon", "lat")) %>%
mutate( data = purrr::map(data, ~dplyr::slice(., 1)) ) %>%
dplyr::mutate(data = purrr::map(data, ~t(.))) %>%
dplyr::mutate(data = purrr::map(data, ~as_tibble(.)))

# xxx todo: use argument df = TRUE in the extract() function call in order to
# return a data frame directly (and not having to rearrange the data afterwards)
# xxx todo: implement the GWR method for interpolating using elevation as a
# covariate here.

# new code with terra library
rasta <- terra::rast(filename)
coords <- dplyr::select(df_lonlat, lon, lat)
points <- terra::vect(coords, geom = c("lon", "lat"), crs = "EPSG:4326")
values <- terra::extract(rasta, points, xy = FALSE, ID = FALSE, method = "bilinear")

if (get_time){
timevals <- raster::getZ(rasta)
df_lonlat <- df_lonlat %>%
mutate( data = purrr::map(data, ~bind_cols(., tibble(date = timevals))))

out <- df_lonlat |>
dplyr::select(sitename, lon, lat) |>
bind_cols(
values
) |>
tidyr::pivot_longer(-one_of(c("lon", "lat", "sitename")), names_to = "tstep") |>
tidyr::separate_wider_delim(
tstep,
delim = "=",
names = c("varnam", "tstep")
) |>
dplyr::mutate(
tstep = as.numeric(tstep) + 1,
varnam = stringr::str_remove(varnam, "_tstep")
) |>
dplyr::group_by(sitename, lon, lat) |>
tidyr::nest()

} else {

out <- df_lonlat |>
dplyr::select(sitename, lon, lat) |>
bind_cols(
values
) |>
dplyr::group_by(sitename, lon, lat) |>
tidyr::nest()

}

return(df_lonlat)
# # old code with {raster} library:
# rasta <- raster::brick(filename)
# df_lonlat <- raster::extract(
# rasta,
# sp::SpatialPoints(dplyr::select(df_lonlat, lon, lat)), # , proj4string = rasta@crs
# sp = TRUE
# ) %>%
# as_tibble() %>%
# tidyr::nest(data = c(-lon, -lat)) %>%
# right_join(df_lonlat, by = c("lon", "lat")) %>%
# mutate( data = purrr::map(data, ~dplyr::slice(., 1)) ) %>%
# dplyr::mutate(data = purrr::map(data, ~t(.))) %>%
# dplyr::mutate(data = purrr::map(data, ~as_tibble(.)))
#
# if (get_time){
# timevals <- raster::getZ(rasta)
# df_lonlat <- df_lonlat %>%
# mutate( data = purrr::map(data, ~bind_cols(., tibble(date = timevals))))
# }

return(out)
}


Expand Down
1 change: 1 addition & 0 deletions R/ingest_modis_bysite.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ ingest_modis_bysite <- function(
)

} else {

filnam_daily_csv <- file.path(
dirnam_daily_csv,
paste0(
Expand Down
32 changes: 25 additions & 7 deletions vignettes/example.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -178,12 +178,12 @@ df_watch <- ingest_bysite(
getvars = c("temp"),
dir = "~/data/watch_wfdei/",
timescale = "d",
year_start = 2007,
year_end = 2007,
year_start = 2018,
year_end = 2018,
lon = 3.5958,
lat = 43.7414,
verbose = TRUE,
settings = list(correct_bias = "worldclim", dir_bias = "~/data/worldclim")
verbose = TRUE
#settings = list(correct_bias = "worldclim", dir_bias = "~/data/worldclim")
)
df_watch
```
Expand Down Expand Up @@ -855,11 +855,16 @@ WATCH-WFDEI data is available for years from 1979. If `year_start` is before tha

```{r warning=FALSE, eval = FALSE}
ddf_watch <- ingest(
siteinfo = siteinfo %>% slice(1:2),
siteinfo = siteinfo %>%
slice(1:2) |>
mutate(
year_star = 2018,
year_end = 2018
),
source = "watch_wfdei",
getvars = c("temp", "prec"),
dir = "~/data/watch_wfdei/", # adjust this with your local path
settings = list(correct_bias = "worldclim", dir_bias = "~/data/worldclim")
dir = "~/data/watch_wfdei/" # adjust this with your local path
# settings = list(correct_bias = "worldclim", dir_bias = "~/data/worldclim")
)
```

Expand Down Expand Up @@ -1022,6 +1027,19 @@ df_etopo <- ingest(
)
```

## Root-zone water-storage capacity

This reads from the 0.05 degrees resolution map of root-zone water-storage capacity from [Stocker et al. (2023)](https://www.nature.com/articles/s41561-023-01125-2). The nested data column contains a tibble with one value for one variable `whc`. Download the data from [here](https://doi.org/10.5281/zenodo.10885724) and specify the local path with the argument `dir`.


```{r warning=FALSE, eval = FALSE}
df_stocker23 <- ingest(
siteinfo,
source = "stocker23",
dir ="~/data/mct_data/" # adjust this with your local path
)
```

## HWSD

Four steps are required before you can use `ingest()` to get HWSD data:
Expand Down

0 comments on commit 9a67b88

Please sign in to comment.