diff --git a/DESCRIPTION b/DESCRIPTION
index 7e1c3e3..e236625 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -13,7 +13,7 @@ License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.1
+RoxygenNote: 7.2.3
Imports:
coda,
data.table,
diff --git a/R/mod_fit.R b/R/mod_fit.R
index 4bec478..afa0cc2 100644
--- a/R/mod_fit.R
+++ b/R/mod_fit.R
@@ -1,726 +1,756 @@
-#*******************************************************************************
-# Description: Model fit functions.
-# Date: 2022-03-29
-#*******************************************************************************
-
-#' Format empty BLSP output to return
-#'
-#' @param years The years vector
-#' @param date_vec The date vector, be sure to convert the vector to "Date"
-#' format or use "yyyy-mm-dd" format string.
-#' @param vi_vec The vegetation index vector.
-#' @param weights_vec A numeric vector of same length as vi_vec specifying the
-#' weights for the supplied observations. Must be between 0 and 1, inclusive.
-#'
-#' @return An empty BLSP class object.
-#'
-#' @noRd
-EmptyBlspOutput <- function(years, date_vec, vi_vec, weights_vec) {
- bf_phenos <- data.table(
- Year = years,
- midgup_lower = NA, midgup = NA, midgup_upper = NA,
- midgdown_lower = NA, midgdown = NA, midgdown_upper = NA
- )
- bf_phenos[,
- colnames(bf_phenos) := lapply(.SD, as.numeric),
- .SDcols = colnames(bf_phenos)
- ]
-
- blsp_fit <- list(
- phenos = bf_phenos,
- params = NULL,
- data = data.table::data.table(
- date = date_vec,
- vi = vi_vec,
- weights = weights_vec
- )
- )
- class(blsp_fit) <- "BlspFit"
-
- return(blsp_fit)
-}
-
-
-#' Fit a Bayesian mixed hierarchical land surface phenology model.
-#'
-#' This function fits a Bayesian mixed hierarchical land surface phenology model
-#' to the supplied data (can be sparse), and returns phenometrics for the
-#' entire time frame. For further explanation, please see the vignette.
-#'
-#' @param date_vec The date vector, be sure to convert the vector to "Date"
-#' format or use "yyyy-mm-dd" format string.
-#' @param vi_vec The vegetation index vector.
-#' @param weights_vec A numeric vector of same length as vi_vec specifying the
-#' weights for the supplied observations. Must be between 0 and 1, inclusive.
-#' @param initValues Initial values for MCMC sampling. By default, it is
-#' assgined `NULL`. It could also be an object returned from the `FitAvgModel()`
-#' function that fits an averaged model or a numeric vector provided by the user.
-#' @param verbose logical. If `TRUE`, the progress will be reported.
-#' @param start_yr The start year of the result. Default is NULL, which means
-#' determined by data.
-#' @param end_yr The end year of the result. Default is NULL, which means
-#' determined by data.
-#'
-#' @return An object of class `BlspFit` will be returned. The object contains the
-#' estimated spring and autumn phenometrics for each year, the generated model
-#' parameter samples, and the input data.
-#' @examples
-#' \dontrun{
-#' data(landsatEVI2)
-#' blsp_fit <- FitBLSP(date_vec = landsatEVI2$date, vi_vec = landsatEVI2$evi2)
-#' }
-#' @export
-#' @import data.table
-FitBLSP <- function(date_vec, vi_vec,
- weights_vec = NULL,
- initValues = NULL,
- verbose = FALSE,
- start_yr = NULL, end_yr = NULL
-) {
- # Check if date_vec is in Date format
- if (sum(!is.na(lubridate::parse_date_time(date_vec, orders = "ymd"))) !=
- length(date_vec)) {
- stop("There're invalid Date values in the `date_vec`!
- Be sure to use `yyyy-mm-dd` format.")
- }
-
- # Check weights to be in the range of [0, 1]
- if (!is.null(weights_vec)) {
- if (min(weights_vec) < 0 | max(weights_vec) > 1) {
- stop("Weights must be within [0, 1].")
- }
-
- # Check the length of dates, vis, and weights
- if (length(weights_vec) != length(date_vec) |
- length(vi_vec) != length(date_vec)) {
- stop("date_vec, vi_vec, and weights_vec have different lengths.")
- }
- }
-
- # Check NAs
- if (any(is.na(date_vec)) | any(is.na(vi_vec))) {
- stop("Please remove NAs in the input data.")
- }
-
- # Reorder data to make sure they are sorted by time
- od <- order(date_vec)
- date_vec <- date_vec[od]
- vi_vec <- vi_vec[od]
- weights_vec <- weights_vec[od]
-
- # Convert data to jags format
- y <- vi_vec
- t <- lubridate::yday(date_vec)
- n <- length(y) # total num of observations
- # year id vector
- if (is.null(start_yr) || is.null(end_yr)) {
- yr <- lubridate::year(date_vec) - lubridate::year(date_vec)[1] + 1
- tmp <- sort(unique(year(date_vec)))
- years <- tmp[1]:tmp[length(tmp)]
- numYears <- length(1:yr[length(yr)])
- } else {
- yr <- lubridate::year(date_vec) - lubridate::year(date_vec)[1] + 1
- years <- start_yr:end_yr
- numYears <- length(years)
- }
- # If user specified weights
- if (is.null(weights_vec)) {
- weights_vec <- rep(1, n)
- }
-
- # ~ Format data, inits, and model
- model_string <- "model {
- # Likelihood
- for (i in 1:n) {
- Y[i] ~ dnorm(mu[i], tau_y)
- mu[i] <- weights[i] * (m1[yr[i]] + (m2[yr[i]] - m7[yr[i]] * t[i]) *
- ((1 / (1 + exp((m3[yr[i]] - t[i]) / m4[yr[i]]))) -
- (1 / (1 + exp((m5[yr[i]] - t[i]) / m6[yr[i]])))))
- }
-
- # Priors
- for (j in 1:N) {
- M1[j] ~ dnorm(mu_m1, tau[1])
- logit(m1[j]) <- M1[j]
- m2[j] ~ dnorm(mu_m2, tau[2])
- m3[j] ~ dnorm(mu_m3, tau[3])
- m4[j] ~ dnorm(mu_m4, tau[4])
- m5[j] ~ dnorm(mu_m5, tau[5])
- m6[j] ~ dnorm(mu_m6, tau[6])
- M7[j] ~ dbeta(4, 4 * (1 - mu_m7 * 100) / (mu_m7 * 100))
- m7[j] <- M7[j] / 100
- }
-
- mu_m1 ~ dunif(0, 0.3)
- mu_m2 ~ dunif(0.5, 2)
- mu_m3 ~ dunif(0, 185)
- mu_m4 ~ dunif(1, 15)
- mu_m5 ~ dunif(185, 366)
- mu_m6 ~ dunif(1, 15)
- mu_m7 ~ dunif(0, 0.01)
-
- for (k in 1:7) {
- tau[k] ~ dgamma(0.1, 0.1)
- }
- tau_y ~ dgamma(0.1, 0.1)
- }"
-
- if (!is.null(initValues) && class(initValues) == "nls") {
- p_m1 <- stats::coef(initValues)["m1"]
- p_m2 <- stats::coef(initValues)["m2"]
- p_m3 <- stats::coef(initValues)["m3"]
- p_m4 <- stats::coef(initValues)["m4"]
- p_m5 <- stats::coef(initValues)["m5"]
- p_m6 <- stats::coef(initValues)["m6"]
- p_m7 <- stats::coef(initValues)["m7"]
- } else if (!is.null(initValues) && class(initValues) == "numeric") {
- if (length(initValues) != 7) {
- stop("The length of the initial values does not match",
- "the number of model parameters."
- )
- }
- p_m1 <- initValues[1]
- p_m2 <- initValues[2]
- p_m3 <- initValues[3]
- p_m4 <- initValues[4]
- p_m5 <- initValues[5]
- p_m6 <- initValues[6]
- p_m7 <- initValues[7]
- } else {
- p_m1 <- 0.05
- p_m2 <- 1
- p_m3 <- 120
- p_m4 <- 8
- p_m5 <- 290
- p_m6 <- 8
- p_m7 <- 0.001
- }
-
- data <- list(
- Y = y, t = t, n = n, yr = yr, N = numYears, weights = weights_vec
- )
-
- inits <- list(
- M1 = rep(p_m1, numYears),
- m2 = rep(p_m2, numYears), m3 = rep(p_m3, numYears),
- m4 = rep(p_m4, numYears), m5 = rep(p_m5, numYears),
- m6 = rep(p_m6, numYears)
- )
-
- blsp_fit <- tryCatch(
- {
- if (verbose) {
- message("Initialize model...")
- }
- pb_type <- ifelse(verbose, "text", "none")
-
- model <- rjags::jags.model(textConnection(model_string),
- data = data, inits = inits,
- n.chains = 3, quiet = TRUE
- )
- stats::update(model, 2000, progress.bar = pb_type)
-
- if (verbose) {
- message("Sampling (could have multiple chains)...")
- }
-
- iteration_times <- 0
- repeat {
- samp <- rjags::coda.samples(model,
- variable.names = c(
- "m1", "m2", "m3", "m4", "m5", "m6", "m7"
- ),
- n.iter = 5000,
- thin = 10,
- progress.bar = pb_type
- )
- iteration_times <- iteration_times + 5000
-
- # Try to make it converge
- if (coda::gelman.diag(samp)$mpsrf <= 1.3 |
- iteration_times > 50000) {
- break
- }
- }
- if (verbose) {
- message("total interation times:", iteration_times)
- }
-
- # ~ Retrieve parameter estimates
- if (verbose) {
- message("Estimate phenometrics...")
- }
- m1 <- m2 <- m3 <- m4 <- m5 <- m6 <- m7 <- NULL
- for (i in 1:numYears) {
- m1 <- cbind(m1, c(
- samp[[1]][, paste0("m1", "[", i, "]")],
- samp[[2]][, paste0("m1", "[", i, "]")]
- ))
- m2 <- cbind(m2, c(
- samp[[1]][, paste0("m2", "[", i, "]")],
- samp[[2]][, paste0("m2", "[", i, "]")]
- ))
- m3 <- cbind(m3, c(
- samp[[1]][, paste0("m3", "[", i, "]")],
- samp[[2]][, paste0("m3", "[", i, "]")]
- ))
- m4 <- cbind(m4, c(
- samp[[1]][, paste0("m4", "[", i, "]")],
- samp[[2]][, paste0("m4", "[", i, "]")]
- ))
- m5 <- cbind(m5, c(
- samp[[1]][, paste0("m5", "[", i, "]")],
- samp[[2]][, paste0("m5", "[", i, "]")]
- ))
- m6 <- cbind(m6, c(
- samp[[1]][, paste0("m6", "[", i, "]")],
- samp[[2]][, paste0("m6", "[", i, "]")]
- ))
- m7 <- cbind(m7, c(
- samp[[1]][, paste0("m7", "[", i, "]")],
- samp[[2]][, paste0("m7", "[", i, "]")]
- ))
- }
-
- m1_quan <- data.table::data.table(
- apply(m1, 2, stats::quantile, c(0.05, 0.5, 0.95))
- )
- m2_quan <- data.table::data.table(
- apply(m2, 2, stats::quantile, c(0.05, 0.5, 0.95))
- )
- m3_quan <- data.table::data.table(
- apply(m3, 2, stats::quantile, c(0.05, 0.5, 0.95))
- )
- m5_quan <- data.table::data.table(
- apply(m5, 2, stats::quantile, c(0.05, 0.5, 0.95))
- )
-
-
- # Construct `blsp_fit` object to return
- blsp_fit <- EmptyBlspOutput(years, date_vec, vi_vec, weights_vec)
-
- for (i in 1:numYears) {
- # suppress some amplitude-too-low year
- if (m2_quan[2, ][[i]] > 0.4) {
- blsp_fit$phenos[
- i,
- colnames(blsp_fit$phenos) := list(
- Year = years[i],
- midgup_lower = m3_quan[1, ][[i]],
- midgup = m3_quan[2, ][[i]],
- midgup_upper = m3_quan[3, ][[i]],
- midgdown_lower = m5_quan[1, ][[i]],
- midgdown = m5_quan[2, ][[i]],
- midgdown_upper = m5_quan[3, ][[i]]
- )
- ]
- }
- }
-
- blsp_fit$params <- list(
- m1 = m1, m2 = m2, m3 = m3,
- m4 = m4, m5 = m5, m6 = m6, m7 = m7
- )
-
- if (verbose) {
- message("Done!")
- }
-
- blsp_fit
- },
- error = function(e) {
- if (verbose) {
- message("Something went wrong!")
- }
- blsp_fit <- EmptyBlspOutput(years, date_vec, vi_vec, weights_vec)
-
- return(blsp_fit)
- }
- )
-
- return(blsp_fit)
-}
-
-
-#' Fit a Bayesian mixed hierarchical land surface phenology model. Spring only!
-#' Note that the result CANNOT be used to plot the fit.
-#'
-#' This function fits a Bayesian mixed hierarchical land surface phenology model
-#' to the supplied data (can be sparse), and returns phenometrics for the
-#' entire time frame. For further explanation, please see the vignette.
-#'
-#' @param date_vec The date vector, be sure to convert the vector to "Date"
-#' format or use "yyyy-mm-dd" format string.
-#' @param vi_vec The vegetation index vector.
-#' @param weights_vec A numeric vector of same length as vi_vec specifying the
-#' weights for the supplied observations. Must be between 0 and 1, inclusive.
-#' @param initValues Initial values for MCMC sampling. By default, it is
-#' assgined `NULL`. It could also be an object returned from the `FitAvgModel()`
-#' function that fits an averaged model or a numeric vector provided by the user.
-#' @param verbose logical. If `TRUE`, the progress will be reported.
-#' @return An object of class `BlspFit` will be returned. The object contains the
-#' estimated spring and autumn phenometrics for each year, the generated model
-#' parameter samples, and the input data.
-#' @examples
-#' \dontrun{
-#' data(landsatEVI2)
-#' blsp_fit <- FitBLSP(date_vec = landsatEVI2$date, vi_vec = landsatEVI2$evi2)
-#' }
-#' @export
-#' @import data.table
-FitBLSP_spring <- function(date_vec, vi_vec,
- weights_vec = NULL,
- initValues = NULL,
- verbose = FALSE
-) {
- # Check if date_vec is in Date format
- if (sum(!is.na(lubridate::parse_date_time(date_vec, orders = "ymd"))) !=
- length(date_vec)) {
- stop("There're invalid Date values in the `date_vec`!
- Be sure to use `yyyy-mm-dd` format.")
- }
-
- # Check weights to be in the range of [0, 1]
- if (!is.null(weights_vec)) {
- if (min(weights_vec) < 0 | max(weights_vec) > 1) {
- stop("Weights must be within [0, 1].")
- }
-
- # Check the length of dates, vis, and weights
- if (length(weights_vec) != length(date_vec) |
- length(vi_vec) != length(date_vec)) {
- stop("date_vec, vi_vec, and weights_vec have different lengths.")
- }
- }
-
- # Check NAs
- if (any(is.na(date_vec)) | any(is.na(vi_vec))) {
- stop("Please remove NAs in the input data.")
- }
-
- # Reorder data to make sure they are sorted by time
- od <- order(date_vec)
- date_vec <- date_vec[od]
- vi_vec <- vi_vec[od]
- weights_vec <- weights_vec[od]
-
- # Convert data to jags format
- y <- vi_vec
- t <- lubridate::yday(date_vec)
- n <- length(y) # total num of observations
- # year id vector
- yr <- lubridate::year(date_vec) - lubridate::year(date_vec)[1] + 1
- numYears <- length(unique(yr))
-
- # If user specified weights
- if (is.null(weights_vec)) {
- weights_vec <- rep(1, n)
- }
-
- # ~ Format data, inits, and model
- model_string <- "model {
- # Likelihood
- for (i in 1:n) {
- Y[i] ~ dnorm(mu[i], tau_y)
- mu[i] <- weights[i] * (m1[yr[i]] + (m2[yr[i]] - m7[yr[i]] * t[i]) *
- ((1 / (1 + exp((m3[yr[i]] - t[i]) / m4[yr[i]]))) -
- (1 / (1 + exp((m5[yr[i]] - t[i]) / m6[yr[i]])))))
- }
-
- # Priors
- for (j in 1:N) {
- M1[j] ~ dnorm(mu_m1, tau[1])
- logit(m1[j]) <- M1[j]
- m2[j] ~ dnorm(mu_m2, tau[2])
- m3[j] ~ dnorm(mu_m3, tau[3])
- m4[j] ~ dnorm(mu_m4, tau[4])
- m5[j] ~ dnorm(mu_m5, tau[5])
- m6[j] ~ dnorm(mu_m6, tau[6])
- M7[j] ~ dbeta(4, 4 * (1 - mu_m7 * 100) / (mu_m7 * 100))
- m7[j] <- M7[j] / 100
- }
-
- mu_m1 ~ dunif(0, 0.3)
- mu_m2 ~ dunif(0.5, 2)
- mu_m3 ~ dunif(0, 185)
- mu_m4 ~ dunif(1, 15)
- mu_m5 ~ dunif(185, 366)
- mu_m6 ~ dunif(1, 15)
- mu_m7 ~ dunif(0, 0.01)
-
- for (k in 1:7) {
- tau[k] ~ dgamma(0.1, 0.1)
- }
- tau_y ~ dgamma(0.1, 0.1)
- }"
-
- if (!is.null(initValues) && class(initValues) == "nls") {
- p_m1 <- stats::coef(initValues)["m1"]
- p_m2 <- stats::coef(initValues)["m2"]
- p_m3 <- stats::coef(initValues)["m3"]
- p_m4 <- stats::coef(initValues)["m4"]
- p_m5 <- stats::coef(initValues)["m5"]
- p_m6 <- stats::coef(initValues)["m6"]
- p_m7 <- stats::coef(initValues)["m7"]
- } else if (!is.null(initValues) && class(initValues) == "numeric") {
- if (length(initValues) != 7) {
- stop("The length of the initial values does not match",
- "the number of model parameters."
- )
- }
- p_m1 <- initValues[1]
- p_m2 <- initValues[2]
- p_m3 <- initValues[3]
- p_m4 <- initValues[4]
- p_m5 <- initValues[5]
- p_m6 <- initValues[6]
- p_m7 <- initValues[7]
- } else {
- p_m1 <- 0.05
- p_m2 <- 1
- p_m3 <- 120
- p_m4 <- 8
- p_m5 <- 290
- p_m6 <- 8
- p_m7 <- 0.001
- }
-
- data <- list(
- Y = y, t = t, n = n, yr = yr, N = numYears, weights = weights_vec
- )
-
- inits <- list(
- M1 = rep(p_m1, numYears),
- m2 = rep(p_m2, numYears), m3 = rep(p_m3, numYears),
- m4 = rep(p_m4, numYears), m5 = rep(p_m5, numYears),
- m6 = rep(p_m6, numYears)
- )
-
- tryCatch(
- {
- if (verbose) {
- message("Initialize model...")
- }
- pb_type <- ifelse(verbose, "text", "none")
-
- model <- rjags::jags.model(textConnection(model_string),
- data = data, inits = inits,
- n.chains = 3, quiet = TRUE
- )
- stats::update(model, 2000, progress.bar = pb_type)
-
- if (verbose) {
- message("Sampling (could have multiple chains)...")
- }
-
- iteration_times <- 0
- repeat {
- samp <- rjags::coda.samples(model,
- variable.names = c("m2", "m3"),
- n.iter = 5000,
- thin = 10,
- progress.bar = pb_type
- )
- iteration_times <- iteration_times + 5000
-
- # Try to make it converge
- if(coda::gelman.diag(samp)$mpsrf <= 1.3 |
- iteration_times > 100000) {
- break
- }
- }
- if (verbose) {
- message("total interation times:", iteration_times)
- }
- },
- error = function(e) {
- years <- sort(unique(year(date_vec)))
- bf_phenos <- NULL
- for (i in 1:numYears) {
- bf_phenos <- rbind(bf_phenos, list(
- Id = NA, Year = years[i],
- midgup_lower = NA, midgup = NA, midgup_upper = NA,
- midgdown_lower = NA, midgdown = NA, midgdown_upper = NA
- ))
- }
- return(list(fitted = NA, phenos = bf_phenos))
- }
- )
-
- # ~ Retrieve parameter estimates
- if (verbose) {
- message("Estimate phenometrics...")
- }
- m1 <- m2 <- m3 <- m4 <- m5 <- m6 <- m7 <- NULL
- for (i in 1:numYears) {
- m2 <- cbind(m2, c(samp[[1]][, paste0("m2", "[", i, "]")],
- samp[[2]][, paste0("m2", "[", i, "]")]))
- m3 <- cbind(m3, c(samp[[1]][, paste0("m3", "[", i, "]")],
- samp[[2]][, paste0("m3", "[", i, "]")]))
- }
-
- m2_quan <- data.table::data.table(
- apply(m2, 2, stats::quantile, c(0.05, 0.5, 0.95))
- )
- m3_quan <- data.table::data.table(
- apply(m3, 2, stats::quantile, c(0.05, 0.5, 0.95))
- )
-
- years <- sort(unique(lubridate::year(date_vec)))
- bf_phenos <- NULL
- for (i in 1:numYears) {
- if (m2_quan[2, ][[i]] > 0.4) { # suppress some amplitude-too-low year
- bf_phenos <- rbind(bf_phenos, data.table::data.table(
- Year = years[i],
- midgup_lower = m3_quan[1, ][[i]],
- midgup = m3_quan[2, ][[i]],
- midgup_upper = m3_quan[3, ][[i]]
- ))
- } else {
- bf_phenos <- rbind(bf_phenos, data.table::data.table(
- Year = years[i],
- midgup_lower = NA,
- midgup = NA,
- midgup_upper = NA
- ))
- }
- }
-
- # Construct `blsp_fit` object to return
- blsp_fit <- list(
- phenos = bf_phenos,
- params = list(m2 = m2, m3 = m3),
- data = data.table::data.table(
- date = date_vec,
- vi = vi_vec,
- weights = weights_vec
- )
- )
- class(blsp_fit) <- "BlspFit"
-
- if (verbose) {
- message("Done!")
- }
- return(blsp_fit)
-}
-
-
-
-
-#' Generate pheno from the predicted curve.
-#' Only supports Elmore model (The double-logistic model used in BLSP).
-#' This function is used inside the FitBLSP function.
-#'
-#' @param equation The model equation.
-#' @param params The Parameter list.
-#' @param t Date vector.
-#' @return The phenological timing in day of year (DOY)
-#'
-#' @noRd
-GetPhenosIdx <- function(equation, params, t) {
- y <- eval(equation, envir = list(
- m1 = params$m1, m2 = params$m2, m3 = params$m3, m4 = params$m4,
- m5 = params$m5, m6 = params$m6, m7 = params$m7, t = t
- ))
-
- d1 <- stats::D(equation, "t")
- d2 <- stats::D(d1, "t")
-
- y1 <- eval(d1, envir = list(
- m1 = params$m1, m2 = params$m2, m3 = params$m3, m4 = params$m4,
- m5 = params$m5, m6 = params$m6, m7 = params$m7, t = t
- ))
- y2 <- eval(d2, envir = list(
- m1 = params$m1, m2 = params$m2, m3 = params$m3, m4 = params$m4,
- m5 = params$m5, m6 = params$m6, m7 = params$m7, t = t
- ))
- k <- abs(y2^2) / ((1 + y1^2)^(3 / 2))
-
- d <- diff(y)
- d_code <- (d > 0) + (2 * (d < 0)) # 0=no change, 1=inc, 2=dec
-
- # Find the MidGreenup and MidGreeendown
- midgup <- round(params$m3)
- midgdown <- round(params$m5)
-
- # find peak
- peak <- NULL
- peaks <- unlist(gregexpr("12", paste(d_code, collapse = ""))) # no match is -1
- if (peaks[1] == -1) peaks <- NULL
- # no match is -1
- flat_peaks <- unlist(gregexpr("10+2", paste(d_code, collapse = "")))
- if (flat_peaks[1] == -1) flat_peaks <- NULL
-
- if (is.null(peaks) & is.null(flat_peaks)) {
- print("no peaks found in in GetPhenoIdx function!")
- return(NULL)
- } else {
- peak <- ifelse (!is.null(peaks), peaks[1], flat_peaks[1])
- }
-
- # the derivative of curvature
- d_k <- c(0, 0, diff(k*1000, differences = 2))
- # local extremes
- localextr <- which(diff(sign(diff(d_k*1000))) == -2) + 1
-
- maturity <- localextr[which(localextr < peak & localextr > midgup)]
- maturity <- maturity[length(maturity)]
-
- sene <- localextr[which(localextr > peak & localextr < midgdown)][1]
-
- gup <- localextr[which(localextr < midgup)]
- gup <- gup[length(gup)]
-
- dormancy <- localextr[which(localextr > (midgdown + 7))][1]
-
- return(list(gup = gup, midgup = midgup, maturity = maturity, peak = peak,
- sene = sene, midgdown = midgdown, dormancy = dormancy))
-}
-
-
-#' Fit the averaged model and get the model parameters.
-#'
-#' @param date_vec the date vector, be sure to convert the vector to "Date"
-#' format or use "yyyy-mm-dd" format string.
-#' @param vi_vec The vegetation index vector.
-#' @return Model parameters to be used as MCMC initial parameters in
-#' the FitBLSP function.
-#' @export
-FitAvgModel <- function(date_vec, vi_vec) {
- # Format data
- avg_dt <- FormatAvgData(date_vec, vi_vec)
-
- # Unpack data
- y <- unlist(avg_dt$evi2)
- t <- unlist(avg_dt$date)
-
- # Fake year information for the averaged year
- cur_start_date <- as.Date("1970-01-01")
- cur_end_date <- as.Date("1970-12-31")
- full_date <- seq(cur_start_date, cur_end_date, by = "day")
- full_t <- as.integer(full_date - cur_start_date + 1)
-
- # Fit model to get the prior
- avg_fit <- tryCatch(
- {
- model_equ <- stats::as.formula(paste("VI", "~", model_str))
- minpack.lm::nlsLM(model_equ,
- data = list(VI = y, t = as.integer(t)), start = list(
- m1 = 0.05, m2 = 1, m3 = 120, m4 = 6, m5 = 290, m6 = 8,
- m7 = 0.001),
- lower = c(0, 0.1, 1, 0, 1, 0, 0.00001),
- upper = c(1, 100, 185, 100, 370, 100, 0.01),
- control = list(warnOnly = TRUE)
- )
- },
- error = function(e) {
- print(paste("Average fit failed", sep = ":"))
- return(NULL)
- }
- )
-
- return(avg_fit)
-}
-
-
-
+#*******************************************************************************
+# Description: Model fit functions.
+# Date: 2022-03-29
+#*******************************************************************************
+
+#' Format empty BLSP output to return
+#'
+#' @param years The years vector
+#' @param date_vec The date vector, be sure to convert the vector to "Date"
+#' format or use "yyyy-mm-dd" format string.
+#' @param vi_vec The vegetation index vector.
+#' @param weights_vec A numeric vector of same length as vi_vec specifying the
+#' weights for the supplied observations. Must be between 0 and 1, inclusive.
+#' @param cred_int_level A scalar value from 0 to 1 (exclusive) that specifies
+#' the level for equal-tailed credible intervals of the estimated phenometrics.
+#' The default level is 0.9, generating `90%` credible intervals. The end
+#' points of these intervals define the upper and lower bounds for the estimated
+#' phenometrics.
+#'
+#' @return An empty BLSP class object.
+#'
+#' @noRd
+EmptyBlspOutput <- function(years, date_vec, vi_vec, weights_vec, cred_int_level) {
+ bf_phenos <- data.table(
+ Year = years,
+ midgup_lower = NA, midgup = NA, midgup_upper = NA,
+ midgdown_lower = NA, midgdown = NA, midgdown_upper = NA
+ )
+ bf_phenos[,
+ colnames(bf_phenos) := lapply(.SD, as.numeric),
+ .SDcols = colnames(bf_phenos)
+ ]
+
+ blsp_fit <- list(
+ phenos = bf_phenos,
+ params = NULL,
+ data = data.table::data.table(
+ date = date_vec,
+ vi = vi_vec,
+ weights = weights_vec
+ ),
+ cred_int_level = cred_int_level
+ )
+ class(blsp_fit) <- "BlspFit"
+
+ return(blsp_fit)
+}
+
+
+#' Fit a Bayesian mixed hierarchical land surface phenology model.
+#'
+#' This function fits a Bayesian mixed hierarchical land surface phenology model
+#' to the supplied data (can be sparse), and returns phenometrics for the
+#' entire time frame. For further explanation, please see the vignette.
+#'
+#' @param date_vec The date vector, be sure to convert the vector to "Date"
+#' format or use "yyyy-mm-dd" format string.
+#' @param vi_vec The vegetation index vector.
+#' @param weights_vec A numeric vector of same length as vi_vec specifying the
+#' weights for the supplied observations. Must be between 0 and 1, inclusive.
+#' @param initValues Initial values for MCMC sampling. By default, it is
+#' assgined `NULL`. It could also be an object returned from the `FitAvgModel()`
+#' function that fits an averaged model or a numeric vector provided by the user.
+#' @param verbose logical. If `TRUE`, the progress will be reported.
+#' @param start_yr The start year of the result. Default is NULL, which means
+#' determined by data.
+#' @param end_yr The end year of the result. Default is NULL, which means
+#' determined by data.
+#' @param cred_int_level A scalar value from 0 to 1 (exclusive) that specifies
+#' the level for equal-tailed credible intervals of the estimated phenometrics.
+#' The default level is 0.9, generating `90%` credible intervals. The end
+#' points of these intervals define the upper and lower bounds for the estimated
+#' phenometrics.
+#'
+#' @return An object of class `BlspFit` will be returned. The object contains the
+#' estimated spring and autumn phenometrics for each year, the generated model
+#' parameter samples, and the input data.
+#' @examples
+#' \dontrun{
+#' data(landsatEVI2)
+#' blsp_fit <- FitBLSP(date_vec = landsatEVI2$date, vi_vec = landsatEVI2$evi2)
+#' }
+#' @export
+#' @import data.table
+FitBLSP <- function(date_vec, vi_vec,
+ weights_vec = NULL,
+ initValues = NULL,
+ cred_int_level = 0.9,
+ verbose = FALSE,
+ start_yr = NULL, end_yr = NULL
+) {
+ # Check if date_vec is in Date format
+ if (sum(!is.na(lubridate::parse_date_time(date_vec, orders = "ymd"))) !=
+ length(date_vec)) {
+ stop("There're invalid Date values in the `date_vec`!
+ Be sure to use `yyyy-mm-dd` format.")
+ }
+
+ # Check weights to be in the range of [0, 1]
+ if (!is.null(weights_vec)) {
+ if (min(weights_vec) < 0 | max(weights_vec) > 1) {
+ stop("Weights must be within [0, 1].")
+ }
+
+ # Check the length of dates, vis, and weights
+ if (length(weights_vec) != length(date_vec) |
+ length(vi_vec) != length(date_vec)) {
+ stop("date_vec, vi_vec, and weights_vec have different lengths.")
+ }
+ }
+
+ # Check NAs
+ if (any(is.na(date_vec)) | any(is.na(vi_vec))) {
+ stop("Please remove NAs in the input data.")
+ }
+
+ # Check if credible interval level is valid
+ if (cred_int_level <= 0 | cred_int_level >= 1) {
+ stop("Credible interval level must be a value between 0 and 1 (exclusive).")
+ }
+
+ # Reorder data to make sure they are sorted by time
+ od <- order(date_vec)
+ date_vec <- date_vec[od]
+ vi_vec <- vi_vec[od]
+ weights_vec <- weights_vec[od]
+
+ # Convert data to jags format
+ y <- vi_vec
+ t <- lubridate::yday(date_vec)
+ n <- length(y) # total num of observations
+ # year id vector
+ if (is.null(start_yr) || is.null(end_yr)) {
+ yr <- lubridate::year(date_vec) - lubridate::year(date_vec)[1] + 1
+ tmp <- sort(unique(year(date_vec)))
+ years <- tmp[1]:tmp[length(tmp)]
+ numYears <- length(1:yr[length(yr)])
+ } else {
+ yr <- lubridate::year(date_vec) - lubridate::year(date_vec)[1] + 1
+ years <- start_yr:end_yr
+ numYears <- length(years)
+ }
+ # If user specified weights
+ if (is.null(weights_vec)) {
+ weights_vec <- rep(1, n)
+ }
+
+ # ~ Format data, inits, and model
+ model_string <- "model {
+ # Likelihood
+ for (i in 1:n) {
+ Y[i] ~ dnorm(mu[i], tau_y)
+ mu[i] <- weights[i] * (m1[yr[i]] + (m2[yr[i]] - m7[yr[i]] * t[i]) *
+ ((1 / (1 + exp((m3[yr[i]] - t[i]) / m4[yr[i]]))) -
+ (1 / (1 + exp((m5[yr[i]] - t[i]) / m6[yr[i]])))))
+ }
+
+ # Priors
+ for (j in 1:N) {
+ M1[j] ~ dnorm(mu_m1, tau[1])
+ logit(m1[j]) <- M1[j]
+ m2[j] ~ dnorm(mu_m2, tau[2])
+ m3[j] ~ dnorm(mu_m3, tau[3])
+ m4[j] ~ dnorm(mu_m4, tau[4])
+ m5[j] ~ dnorm(mu_m5, tau[5])
+ m6[j] ~ dnorm(mu_m6, tau[6])
+ M7[j] ~ dbeta(4, 4 * (1 - mu_m7 * 100) / (mu_m7 * 100))
+ m7[j] <- M7[j] / 100
+ }
+
+ mu_m1 ~ dunif(0, 0.3)
+ mu_m2 ~ dunif(0.5, 2)
+ mu_m3 ~ dunif(0, 185)
+ mu_m4 ~ dunif(1, 15)
+ mu_m5 ~ dunif(185, 366)
+ mu_m6 ~ dunif(1, 15)
+ mu_m7 ~ dunif(0, 0.01)
+
+ for (k in 1:7) {
+ tau[k] ~ dgamma(0.1, 0.1)
+ }
+ tau_y ~ dgamma(0.1, 0.1)
+ }"
+
+ if (!is.null(initValues) && class(initValues) == "nls") {
+ p_m1 <- stats::coef(initValues)["m1"]
+ p_m2 <- stats::coef(initValues)["m2"]
+ p_m3 <- stats::coef(initValues)["m3"]
+ p_m4 <- stats::coef(initValues)["m4"]
+ p_m5 <- stats::coef(initValues)["m5"]
+ p_m6 <- stats::coef(initValues)["m6"]
+ p_m7 <- stats::coef(initValues)["m7"]
+ } else if (!is.null(initValues) && class(initValues) == "numeric") {
+ if (length(initValues) != 7) {
+ stop("The length of the initial values does not match",
+ "the number of model parameters."
+ )
+ }
+ p_m1 <- initValues[1]
+ p_m2 <- initValues[2]
+ p_m3 <- initValues[3]
+ p_m4 <- initValues[4]
+ p_m5 <- initValues[5]
+ p_m6 <- initValues[6]
+ p_m7 <- initValues[7]
+ } else {
+ p_m1 <- 0.05
+ p_m2 <- 1
+ p_m3 <- 120
+ p_m4 <- 8
+ p_m5 <- 290
+ p_m6 <- 8
+ p_m7 <- 0.001
+ }
+
+ data <- list(
+ Y = y, t = t, n = n, yr = yr, N = numYears, weights = weights_vec
+ )
+
+ inits <- list(
+ M1 = rep(p_m1, numYears),
+ m2 = rep(p_m2, numYears), m3 = rep(p_m3, numYears),
+ m4 = rep(p_m4, numYears), m5 = rep(p_m5, numYears),
+ m6 = rep(p_m6, numYears)
+ )
+
+ blsp_fit <- tryCatch(
+ {
+ if (verbose) {
+ message("Initialize model...")
+ }
+ pb_type <- ifelse(verbose, "text", "none")
+
+ model <- rjags::jags.model(textConnection(model_string),
+ data = data, inits = inits,
+ n.chains = 3, quiet = TRUE
+ )
+ stats::update(model, 2000, progress.bar = pb_type)
+
+ if (verbose) {
+ message("Sampling (could have multiple chains)...")
+ }
+
+ iteration_times <- 0
+ repeat {
+ samp <- rjags::coda.samples(model,
+ variable.names = c(
+ "m1", "m2", "m3", "m4", "m5", "m6", "m7"
+ ),
+ n.iter = 5000,
+ thin = 10,
+ progress.bar = pb_type
+ )
+ iteration_times <- iteration_times + 5000
+
+ # Try to make it converge
+ if (coda::gelman.diag(samp)$mpsrf <= 1.3 |
+ iteration_times > 50000) {
+ break
+ }
+ }
+ if (verbose) {
+ message("total interation times:", iteration_times)
+ }
+
+ # ~ Retrieve parameter estimates
+ if (verbose) {
+ message("Estimate phenometrics...")
+ }
+ m1 <- m2 <- m3 <- m4 <- m5 <- m6 <- m7 <- NULL
+ for (i in 1:numYears) {
+ m1 <- cbind(m1, c(
+ samp[[1]][, paste0("m1", "[", i, "]")],
+ samp[[2]][, paste0("m1", "[", i, "]")]
+ ))
+ m2 <- cbind(m2, c(
+ samp[[1]][, paste0("m2", "[", i, "]")],
+ samp[[2]][, paste0("m2", "[", i, "]")]
+ ))
+ m3 <- cbind(m3, c(
+ samp[[1]][, paste0("m3", "[", i, "]")],
+ samp[[2]][, paste0("m3", "[", i, "]")]
+ ))
+ m4 <- cbind(m4, c(
+ samp[[1]][, paste0("m4", "[", i, "]")],
+ samp[[2]][, paste0("m4", "[", i, "]")]
+ ))
+ m5 <- cbind(m5, c(
+ samp[[1]][, paste0("m5", "[", i, "]")],
+ samp[[2]][, paste0("m5", "[", i, "]")]
+ ))
+ m6 <- cbind(m6, c(
+ samp[[1]][, paste0("m6", "[", i, "]")],
+ samp[[2]][, paste0("m6", "[", i, "]")]
+ ))
+ m7 <- cbind(m7, c(
+ samp[[1]][, paste0("m7", "[", i, "]")],
+ samp[[2]][, paste0("m7", "[", i, "]")]
+ ))
+ }
+
+ alpha <- (1-cred_int_level)/2
+ m1_quan <- data.table::data.table(
+ apply(m1, 2, stats::quantile, c(alpha, 0.5, 1-alpha))
+ )
+ m2_quan <- data.table::data.table(
+ apply(m2, 2, stats::quantile, c(alpha, 0.5, 1-alpha))
+ )
+ m3_quan <- data.table::data.table(
+ apply(m3, 2, stats::quantile, c(alpha, 0.5, 1-alpha))
+ )
+ m5_quan <- data.table::data.table(
+ apply(m5, 2, stats::quantile, c(alpha, 0.5, 1-alpha))
+ )
+
+
+ # Construct `blsp_fit` object to return
+ blsp_fit <- EmptyBlspOutput(years, date_vec, vi_vec, weights_vec, cred_int_level)
+
+ for (i in 1:numYears) {
+ # suppress some amplitude-too-low year
+ if (m2_quan[2, ][[i]] > 0.4) {
+ blsp_fit$phenos[
+ i,
+ colnames(blsp_fit$phenos) := list(
+ Year = years[i],
+ midgup_lower = m3_quan[1, ][[i]],
+ midgup = m3_quan[2, ][[i]],
+ midgup_upper = m3_quan[3, ][[i]],
+ midgdown_lower = m5_quan[1, ][[i]],
+ midgdown = m5_quan[2, ][[i]],
+ midgdown_upper = m5_quan[3, ][[i]]
+ )
+ ]
+ }
+ }
+
+ blsp_fit$params <- list(
+ m1 = m1, m2 = m2, m3 = m3,
+ m4 = m4, m5 = m5, m6 = m6, m7 = m7
+ )
+
+ if (verbose) {
+ message("Done!")
+ }
+
+ blsp_fit
+ },
+ error = function(e) {
+ if (verbose) {
+ message("Something went wrong!")
+ }
+ blsp_fit <- EmptyBlspOutput(years, date_vec, vi_vec, weights_vec, cred_int_level)
+
+ return(blsp_fit)
+ }
+ )
+
+ return(blsp_fit)
+}
+
+
+#' Fit a Bayesian mixed hierarchical land surface phenology model. Spring only!
+#' Note that the result CANNOT be used to plot the fit.
+#'
+#' This function fits a Bayesian mixed hierarchical land surface phenology model
+#' to the supplied data (can be sparse), and returns phenometrics for the
+#' entire time frame. For further explanation, please see the vignette.
+#'
+#' @param date_vec The date vector, be sure to convert the vector to "Date"
+#' format or use "yyyy-mm-dd" format string.
+#' @param vi_vec The vegetation index vector.
+#' @param weights_vec A numeric vector of same length as vi_vec specifying the
+#' weights for the supplied observations. Must be between 0 and 1, inclusive.
+#' @param initValues Initial values for MCMC sampling. By default, it is
+#' assgined `NULL`. It could also be an object returned from the `FitAvgModel()`
+#' function that fits an averaged model or a numeric vector provided by the user.
+#' @param cred_int_level A scalar value from 0 to 1 (exclusive) that specifies
+#' the level for equal-tailed credible intervals of the estimated phenometrics.
+#' The default level is 0.9, generating `90%` credible intervals. The end
+#' points of these intervals define the upper and lower bounds for the estimated
+#' phenometrics.
+#' @param verbose logical. If `TRUE`, the progress will be reported.
+#' @return An object of class `BlspFit` will be returned. The object contains the
+#' estimated spring and autumn phenometrics for each year, the generated model
+#' parameter samples, and the input data.
+#' @examples
+#' \dontrun{
+#' data(landsatEVI2)
+#' blsp_fit <- FitBLSP(date_vec = landsatEVI2$date, vi_vec = landsatEVI2$evi2)
+#' }
+#' @export
+#' @import data.table
+FitBLSP_spring <- function(date_vec, vi_vec,
+ weights_vec = NULL,
+ initValues = NULL,
+ cred_int_level = 0.9,
+ verbose = FALSE
+) {
+ # Check if date_vec is in Date format
+ if (sum(!is.na(lubridate::parse_date_time(date_vec, orders = "ymd"))) !=
+ length(date_vec)) {
+ stop("There're invalid Date values in the `date_vec`!
+ Be sure to use `yyyy-mm-dd` format.")
+ }
+
+ # Check weights to be in the range of [0, 1]
+ if (!is.null(weights_vec)) {
+ if (min(weights_vec) < 0 | max(weights_vec) > 1) {
+ stop("Weights must be within [0, 1].")
+ }
+
+ # Check the length of dates, vis, and weights
+ if (length(weights_vec) != length(date_vec) |
+ length(vi_vec) != length(date_vec)) {
+ stop("date_vec, vi_vec, and weights_vec have different lengths.")
+ }
+ }
+
+ # Check NAs
+ if (any(is.na(date_vec)) | any(is.na(vi_vec))) {
+ stop("Please remove NAs in the input data.")
+ }
+
+ # Check if credible interval level is valid
+ if (cred_int_level <= 0 | cred_int_level >= 1) {
+ stop("Credible interval level must be a value between 0 and 1 (exclusive).")
+ }
+ # Reorder data to make sure they are sorted by time
+ od <- order(date_vec)
+ date_vec <- date_vec[od]
+ vi_vec <- vi_vec[od]
+ weights_vec <- weights_vec[od]
+
+ # Convert data to jags format
+ y <- vi_vec
+ t <- lubridate::yday(date_vec)
+ n <- length(y) # total num of observations
+ # year id vector
+ yr <- lubridate::year(date_vec) - lubridate::year(date_vec)[1] + 1
+ numYears <- length(unique(yr))
+
+ # If user specified weights
+ if (is.null(weights_vec)) {
+ weights_vec <- rep(1, n)
+ }
+
+ # ~ Format data, inits, and model
+ model_string <- "model {
+ # Likelihood
+ for (i in 1:n) {
+ Y[i] ~ dnorm(mu[i], tau_y)
+ mu[i] <- weights[i] * (m1[yr[i]] + (m2[yr[i]] - m7[yr[i]] * t[i]) *
+ ((1 / (1 + exp((m3[yr[i]] - t[i]) / m4[yr[i]]))) -
+ (1 / (1 + exp((m5[yr[i]] - t[i]) / m6[yr[i]])))))
+ }
+
+ # Priors
+ for (j in 1:N) {
+ M1[j] ~ dnorm(mu_m1, tau[1])
+ logit(m1[j]) <- M1[j]
+ m2[j] ~ dnorm(mu_m2, tau[2])
+ m3[j] ~ dnorm(mu_m3, tau[3])
+ m4[j] ~ dnorm(mu_m4, tau[4])
+ m5[j] ~ dnorm(mu_m5, tau[5])
+ m6[j] ~ dnorm(mu_m6, tau[6])
+ M7[j] ~ dbeta(4, 4 * (1 - mu_m7 * 100) / (mu_m7 * 100))
+ m7[j] <- M7[j] / 100
+ }
+
+ mu_m1 ~ dunif(0, 0.3)
+ mu_m2 ~ dunif(0.5, 2)
+ mu_m3 ~ dunif(0, 185)
+ mu_m4 ~ dunif(1, 15)
+ mu_m5 ~ dunif(185, 366)
+ mu_m6 ~ dunif(1, 15)
+ mu_m7 ~ dunif(0, 0.01)
+
+ for (k in 1:7) {
+ tau[k] ~ dgamma(0.1, 0.1)
+ }
+ tau_y ~ dgamma(0.1, 0.1)
+ }"
+
+ if (!is.null(initValues) && class(initValues) == "nls") {
+ p_m1 <- stats::coef(initValues)["m1"]
+ p_m2 <- stats::coef(initValues)["m2"]
+ p_m3 <- stats::coef(initValues)["m3"]
+ p_m4 <- stats::coef(initValues)["m4"]
+ p_m5 <- stats::coef(initValues)["m5"]
+ p_m6 <- stats::coef(initValues)["m6"]
+ p_m7 <- stats::coef(initValues)["m7"]
+ } else if (!is.null(initValues) && class(initValues) == "numeric") {
+ if (length(initValues) != 7) {
+ stop("The length of the initial values does not match",
+ "the number of model parameters."
+ )
+ }
+ p_m1 <- initValues[1]
+ p_m2 <- initValues[2]
+ p_m3 <- initValues[3]
+ p_m4 <- initValues[4]
+ p_m5 <- initValues[5]
+ p_m6 <- initValues[6]
+ p_m7 <- initValues[7]
+ } else {
+ p_m1 <- 0.05
+ p_m2 <- 1
+ p_m3 <- 120
+ p_m4 <- 8
+ p_m5 <- 290
+ p_m6 <- 8
+ p_m7 <- 0.001
+ }
+
+ data <- list(
+ Y = y, t = t, n = n, yr = yr, N = numYears, weights = weights_vec
+ )
+
+ inits <- list(
+ M1 = rep(p_m1, numYears),
+ m2 = rep(p_m2, numYears), m3 = rep(p_m3, numYears),
+ m4 = rep(p_m4, numYears), m5 = rep(p_m5, numYears),
+ m6 = rep(p_m6, numYears)
+ )
+
+ tryCatch(
+ {
+ if (verbose) {
+ message("Initialize model...")
+ }
+ pb_type <- ifelse(verbose, "text", "none")
+
+ model <- rjags::jags.model(textConnection(model_string),
+ data = data, inits = inits,
+ n.chains = 3, quiet = TRUE
+ )
+ stats::update(model, 2000, progress.bar = pb_type)
+
+ if (verbose) {
+ message("Sampling (could have multiple chains)...")
+ }
+
+ iteration_times <- 0
+ repeat {
+ samp <- rjags::coda.samples(model,
+ variable.names = c("m2", "m3"),
+ n.iter = 5000,
+ thin = 10,
+ progress.bar = pb_type
+ )
+ iteration_times <- iteration_times + 5000
+
+ # Try to make it converge
+ if(coda::gelman.diag(samp)$mpsrf <= 1.3 |
+ iteration_times > 100000) {
+ break
+ }
+ }
+ if (verbose) {
+ message("total interation times:", iteration_times)
+ }
+ },
+ error = function(e) {
+ years <- sort(unique(year(date_vec)))
+ bf_phenos <- NULL
+ for (i in 1:numYears) {
+ bf_phenos <- rbind(bf_phenos, list(
+ Id = NA, Year = years[i],
+ midgup_lower = NA, midgup = NA, midgup_upper = NA,
+ midgdown_lower = NA, midgdown = NA, midgdown_upper = NA
+ ))
+ }
+ return(list(fitted = NA, phenos = bf_phenos))
+ }
+ )
+
+ # ~ Retrieve parameter estimates
+ if (verbose) {
+ message("Estimate phenometrics...")
+ }
+ m1 <- m2 <- m3 <- m4 <- m5 <- m6 <- m7 <- NULL
+ for (i in 1:numYears) {
+ m2 <- cbind(m2, c(samp[[1]][, paste0("m2", "[", i, "]")],
+ samp[[2]][, paste0("m2", "[", i, "]")]))
+ m3 <- cbind(m3, c(samp[[1]][, paste0("m3", "[", i, "]")],
+ samp[[2]][, paste0("m3", "[", i, "]")]))
+ }
+
+ alpha <- (1-cred_int_level)/2
+ m2_quan <- data.table::data.table(
+ apply(m2, 2, stats::quantile, c(alpha, 0.5, 1-alpha))
+ )
+ m3_quan <- data.table::data.table(
+ apply(m3, 2, stats::quantile, c(alpha, 0.5, 1-alpha))
+ )
+
+ years <- sort(unique(lubridate::year(date_vec)))
+ bf_phenos <- NULL
+ for (i in 1:numYears) {
+ if (m2_quan[2, ][[i]] > 0.4) { # suppress some amplitude-too-low year
+ bf_phenos <- rbind(bf_phenos, data.table::data.table(
+ Year = years[i],
+ midgup_lower = m3_quan[1, ][[i]],
+ midgup = m3_quan[2, ][[i]],
+ midgup_upper = m3_quan[3, ][[i]]
+ ))
+ } else {
+ bf_phenos <- rbind(bf_phenos, data.table::data.table(
+ Year = years[i],
+ midgup_lower = NA,
+ midgup = NA,
+ midgup_upper = NA
+ ))
+ }
+ }
+
+ # Construct `blsp_fit` object to return
+ blsp_fit <- list(
+ phenos = bf_phenos,
+ params = list(m2 = m2, m3 = m3),
+ data = data.table::data.table(
+ date = date_vec,
+ vi = vi_vec,
+ weights = weights_vec
+ ),
+ cred_int_level = cred_int_level
+ )
+ class(blsp_fit) <- "BlspFit"
+
+ if (verbose) {
+ message("Done!")
+ }
+ return(blsp_fit)
+}
+
+
+
+
+#' Generate pheno from the predicted curve.
+#' Only supports Elmore model (The double-logistic model used in BLSP).
+#' This function is used inside the FitBLSP function.
+#'
+#' @param equation The model equation.
+#' @param params The Parameter list.
+#' @param t Date vector.
+#' @return The phenological timing in day of year (DOY)
+#'
+#' @noRd
+GetPhenosIdx <- function(equation, params, t) {
+ y <- eval(equation, envir = list(
+ m1 = params$m1, m2 = params$m2, m3 = params$m3, m4 = params$m4,
+ m5 = params$m5, m6 = params$m6, m7 = params$m7, t = t
+ ))
+
+ d1 <- stats::D(equation, "t")
+ d2 <- stats::D(d1, "t")
+
+ y1 <- eval(d1, envir = list(
+ m1 = params$m1, m2 = params$m2, m3 = params$m3, m4 = params$m4,
+ m5 = params$m5, m6 = params$m6, m7 = params$m7, t = t
+ ))
+ y2 <- eval(d2, envir = list(
+ m1 = params$m1, m2 = params$m2, m3 = params$m3, m4 = params$m4,
+ m5 = params$m5, m6 = params$m6, m7 = params$m7, t = t
+ ))
+ k <- abs(y2^2) / ((1 + y1^2)^(3 / 2))
+
+ d <- diff(y)
+ d_code <- (d > 0) + (2 * (d < 0)) # 0=no change, 1=inc, 2=dec
+
+ # Find the MidGreenup and MidGreeendown
+ midgup <- round(params$m3)
+ midgdown <- round(params$m5)
+
+ # find peak
+ peak <- NULL
+ peaks <- unlist(gregexpr("12", paste(d_code, collapse = ""))) # no match is -1
+ if (peaks[1] == -1) peaks <- NULL
+ # no match is -1
+ flat_peaks <- unlist(gregexpr("10+2", paste(d_code, collapse = "")))
+ if (flat_peaks[1] == -1) flat_peaks <- NULL
+
+ if (is.null(peaks) & is.null(flat_peaks)) {
+ print("no peaks found in in GetPhenoIdx function!")
+ return(NULL)
+ } else {
+ peak <- ifelse (!is.null(peaks), peaks[1], flat_peaks[1])
+ }
+
+ # the derivative of curvature
+ d_k <- c(0, 0, diff(k*1000, differences = 2))
+ # local extremes
+ localextr <- which(diff(sign(diff(d_k*1000))) == -2) + 1
+
+ maturity <- localextr[which(localextr < peak & localextr > midgup)]
+ maturity <- maturity[length(maturity)]
+
+ sene <- localextr[which(localextr > peak & localextr < midgdown)][1]
+
+ gup <- localextr[which(localextr < midgup)]
+ gup <- gup[length(gup)]
+
+ dormancy <- localextr[which(localextr > (midgdown + 7))][1]
+
+ return(list(gup = gup, midgup = midgup, maturity = maturity, peak = peak,
+ sene = sene, midgdown = midgdown, dormancy = dormancy))
+}
+
+
+#' Fit the averaged model and get the model parameters.
+#'
+#' @param date_vec the date vector, be sure to convert the vector to "Date"
+#' format or use "yyyy-mm-dd" format string.
+#' @param vi_vec The vegetation index vector.
+#' @return Model parameters to be used as MCMC initial parameters in
+#' the FitBLSP function.
+#' @export
+FitAvgModel <- function(date_vec, vi_vec) {
+ # Format data
+ avg_dt <- FormatAvgData(date_vec, vi_vec)
+
+ # Unpack data
+ y <- unlist(avg_dt$evi2)
+ t <- unlist(avg_dt$date)
+
+ # Fake year information for the averaged year
+ cur_start_date <- as.Date("1970-01-01")
+ cur_end_date <- as.Date("1970-12-31")
+ full_date <- seq(cur_start_date, cur_end_date, by = "day")
+ full_t <- as.integer(full_date - cur_start_date + 1)
+
+ # Fit model to get the prior
+ avg_fit <- tryCatch(
+ {
+ model_equ <- stats::as.formula(paste("VI", "~", model_str))
+ minpack.lm::nlsLM(model_equ,
+ data = list(VI = y, t = as.integer(t)), start = list(
+ m1 = 0.05, m2 = 1, m3 = 120, m4 = 6, m5 = 290, m6 = 8,
+ m7 = 0.001),
+ lower = c(0, 0.1, 1, 0, 1, 0, 0.00001),
+ upper = c(1, 100, 185, 100, 370, 100, 0.01),
+ control = list(warnOnly = TRUE)
+ )
+ },
+ error = function(e) {
+ print(paste("Average fit failed", sep = ":"))
+ return(NULL)
+ }
+ )
+
+ return(avg_fit)
+}
+
+
+
diff --git a/R/vis_fit.R b/R/vis_fit.R
index 504b990..ef98f8c 100644
--- a/R/vis_fit.R
+++ b/R/vis_fit.R
@@ -103,7 +103,8 @@ PlotBLSP <- function(blsp_fit, if_return_fit = FALSE) {
bf_phenos <- blsp_fit$phenos
yr <- lubridate::year(date_vec) - lubridate::year(date_vec)[1] + 1
numYears <- length(unique(yr))
-
+ disp_cred_int_level <- round(blsp_fit$cred_int_level*100)
+
#~ Predict fitted value for full dates ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
bf_pred <- NULL
years <- sort(unique(lubridate::year(date_vec)))
@@ -140,9 +141,10 @@ PlotBLSP <- function(blsp_fit, if_return_fit = FALSE) {
predCI <- cbind(predCI, pred)
}
+ alpha <- (1-blsp_fit$cred_int_level)/2
predCI <- t(data.table::data.table(
apply(predCI, 1, function(x) {
- stats::quantile(x, c(0.025, 0.975))
+ stats::quantile(x, c(alpha, 1-alpha))
}
)))
@@ -202,8 +204,9 @@ PlotBLSP <- function(blsp_fit, if_return_fit = FALSE) {
pch = c(16, NA, 16, 15, 16, NA),
col = c("black", "red", pheno_colors[1],
Transparent("red", 0.2), pheno_colors[2], "black"),
- legend = c("Observations", "Median Fit", "SOS", "95% C.I. of fit",
- "EOS", "95% C.I. of phenometrics"),
+ legend = c("Observations", "Median Fit", "SOS",
+ paste0(disp_cred_int_level, "% C.I. of fit"),
+ "EOS", paste0(disp_cred_int_level, "% C.I. of phenometrics")),
xpd = NA
)
graphics::legend("bottomright", bty = "n",
diff --git a/README.md b/README.md
index 9fd5012..e12ffe6 100644
--- a/README.md
+++ b/README.md
@@ -73,4 +73,3 @@ We thank the following people for their assistance with the creation of this pac
---
_Graphs used in the icon are created by Freepik - Flaticon_
-
diff --git a/man/FitBLSP.Rd b/man/FitBLSP.Rd
index 8a9338d..02c9df2 100644
--- a/man/FitBLSP.Rd
+++ b/man/FitBLSP.Rd
@@ -9,6 +9,7 @@ FitBLSP(
vi_vec,
weights_vec = NULL,
initValues = NULL,
+ cred_int_level = 0.9,
verbose = FALSE,
start_yr = NULL,
end_yr = NULL
@@ -27,6 +28,12 @@ weights for the supplied observations. Must be between 0 and 1, inclusive.}
assgined \code{NULL}. It could also be an object returned from the \code{FitAvgModel()}
function that fits an averaged model or a numeric vector provided by the user.}
+\item{cred_int_level}{A scalar value from 0 to 1 (exclusive) that specifies
+the level for equal-tailed credible intervals of the estimated phenometrics.
+The default level is 0.9, generating \verb{90\%} credible intervals. The end
+points of these intervals define the upper and lower bounds for the estimated
+phenometrics.}
+
\item{verbose}{logical. If \code{TRUE}, the progress will be reported.}
\item{start_yr}{The start year of the result. Default is NULL, which means
diff --git a/man/FitBLSP_spring.Rd b/man/FitBLSP_spring.Rd
index c12fbcb..812ffeb 100644
--- a/man/FitBLSP_spring.Rd
+++ b/man/FitBLSP_spring.Rd
@@ -10,6 +10,7 @@ FitBLSP_spring(
vi_vec,
weights_vec = NULL,
initValues = NULL,
+ cred_int_level = 0.9,
verbose = FALSE
)
}
@@ -26,6 +27,12 @@ weights for the supplied observations. Must be between 0 and 1, inclusive.}
assgined \code{NULL}. It could also be an object returned from the \code{FitAvgModel()}
function that fits an averaged model or a numeric vector provided by the user.}
+\item{cred_int_level}{A scalar value from 0 to 1 (exclusive) that specifies
+the level for equal-tailed credible intervals of the estimated phenometrics.
+The default level is 0.9, generating \verb{90\%} credible intervals. The end
+points of these intervals define the upper and lower bounds for the estimated
+phenometrics.}
+
\item{verbose}{logical. If \code{TRUE}, the progress will be reported.}
}
\value{
diff --git a/man/GetEvi2PointTs.Rd b/man/GetEvi2PointTs.Rd
index 44140f5..19b3f4f 100644
--- a/man/GetEvi2PointTs.Rd
+++ b/man/GetEvi2PointTs.Rd
@@ -12,7 +12,7 @@ GetEvi2PointTs(pt_coords, focalDates = "1984-01-01/2022-12-31", ncores = 1)
\item{focalDates}{Temporal period.}
-\item{ncore}{Number of cores used to parallel the process.}
+\item{ncores}{Number of cores used to parallel the process.}
}
\value{
A data.table containing EVI2 time series along with QA values.
diff --git a/vignettes/geeResults.png b/vignettes/geeResults.png
index be2c865..1ac0a3f 100644
Binary files a/vignettes/geeResults.png and b/vignettes/geeResults.png differ
diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd
index 4c6a245..3c76c3e 100644
--- a/vignettes/introduction.Rmd
+++ b/vignettes/introduction.Rmd
@@ -1,278 +1,278 @@
----
-title: "Introduction to blsp"
-author: "Xiaojie Gao, Ian R. McGregor, Owen Smith, Izzi Hinks, Matt Shisler"
-output: rmarkdown::html_vignette
-vignette: >
- %\VignetteIndexEntry{Introduction to blsp}
- %\VignetteEngine{knitr::rmarkdown}
- %\VignetteEncoding{UTF-8}
----
-
-```{r, include = FALSE}
-knitr::opts_chunk$set(
- eval=TRUE,
- collapse = TRUE,
- comment = "#>"
-)
-```
-
-
-
-# Background
-The code was originally developed by [Gao et al 2021](https://www.sciencedirect.com/science/article/pii/S0034425721002029?via%3Dihub), whose paper detailed a Bayesian hierarchical model to quantify land surface phenology from disparate, optical remote sensing time series. In other words, the model is able to take sparse vegetation index observations from the entirety of the Landsat time series (for example), and create a continuous estimate of annual land surface phenology. In addition to calculating start of season (SOS) and end of season (EOS) dates, the model also calculated pixel-wise uncertainty estimates for each of these phenometrics.
-
-This vignette will walk you through the `blsp` package and how to use it. For the source code, please visit the [Github repository](https://github.com/ncsuSEAL/Bayesian_LSP).
-
-# Running the code
-## Set-up
-As an example dataset, we will use the `landsatEVI2` dataset, which has EVI2 (two-band enhanced vegetation index, for more details, see [here](https://en.wikipedia.org/wiki/Enhanced_vegetation_index)) data calculated from Landsats 5-8 from 1984-2019. For each measurement, there is a corresponding, boolean snow flag (to be used for assigning weights).
-```{r laod-data}
-library(blsp)
-data(landsatEVI2)
-```
-
-## Fitting the model
-At its core, all you need to run the main function (`FitBLSP`) is a vector of vegetation index data that follows a [double logistic function](https://www.desmos.com/calculator/rftz67g7eb) (e.g. NDVI, EVI, EVI2), and the corresponding dates of the individual data points. With these, running the code is simple.
-
-Here to save some computing time but show the power of the BLSP algorithm in handling data sparsity, we only use data from 1984 to 1994 to fit the model.
-```{r fit-blsp}
-sub_dt <- landsatEVI2[lubridate::year(date) %in% 1984:1994, ]
-results <- FitBLSP(
- date_vec = sub_dt$date,
- vi_vec = sub_dt$evi2,
- verbose = TRUE
-)
-```
-
-The results are stored in an object with three elements. First, "phenos" is a table containing the estimated day of year (DOY) of "midgreenup"(or SOS) and "midgreendown" (or EOS) for each year contained in the sample data, along with upper and lower bounds. Second, "params" is a table containing the generated model parameter samples by the [Markov chain Monte Carlo (MCMC) sampling](https://en.wikipedia.org/wiki/Markov_chain_Monte_Carlo). Third, "data" is the input data table used to fit the BLSP algorithm.
-```{r blsp-result-structure}
-str(results)
-```
-
-Let's plot the reconstruction of the EVI2 time series using the supplied data. This will take a little time to run as the function will try each combination of the MCMC-generated parameters to predict the best fit curve.
-```{r blsp-fit-figure, fig.width=7.5, fig.height=4}
-fitted_dt <- PlotBLSP(results, if_return_fit = TRUE)
-```
-
-We see that "fitted_dt" is a data table with the information used to construct the plot.
-```{r blsp-fitted-table}
-head(fitted_dt)
-```
-
-### Changing weights
-Some users may want to assign different weights to certain observations. For example, perhaps the QA for several days is bad and we want to downweight those observations such that they contribute less to the posterior distribution. To do so, we supply a vector of equal length to `vi_vec` in the range [0,1]. In the sample data, the `snow` column indicates the presence of snow for the corresponding dates. Here, we will assign weights of 0.1 to those observations. (Again, we only use data from 1984 to 1994 as an example)
-```{r assign-weights}
-sub_dt <- landsatEVI2[lubridate::year(date) %in% 1984:1994,
- .(date, evi2, weights = ifelse(snow, 0.1, 1), snow)
-]
-head(sub_dt)
-```
-
-Adding in weights will make the code run longer. The reason is that the downweighted observations can make the data more sparse than it already is (e.g., if some points have weight near 0, they would contribute little information), thus the Bayesian model may need more time to converge.
-
-Compare the first few rows of "phenos" below with the previous results above.
-```{r blsp-with-weights}
-results <- FitBLSP(
- date_vec = sub_dt$date,
- vi_vec = sub_dt$evi2,
- weights_vec = sub_dt$weights,
- verbose = TRUE
-)
-head(results$phenos)
-```
-
-
-### Assigning initial values
-As documented in [Gao et al 2021](https://www.sciencedirect.com/science/article/pii/S0034425721002029?via%3Dihub), good initial values can save some time for the model to converge. By default, the `initValues` of the `FitBLSP()` function are determined by the BLSP model so the users don't have to provide them. But if you want, we *can* get initial values from fitting an average model in which all years of data are pooled together. However, there is some risk with this, as the average model inherently can't successfully fit all time series data (that's one of the reasons that we need the BLSP model!). If you try obtaining initial values using `FitAvgModel` with your data and it fails, then we suggest simply running `FitBLSP()` with the default `initValues` argument of `NULL`.
-
-```{r fit-avg-model}
-avg_fit <- FitAvgModel(sub_dt$date, sub_dt$evi2)
-print(avg_fit)
-```
-
-The average model can also be plotted out:
-```{r avg-fit-figure, fig.width=7, fig.height=5}
-par(mar = c(3, 3, 1, 1))
-PlotAvg(
- date_vec = sub_dt$date,
- vi_vec = sub_dt$evi2,
- avg_fit = avg_fit
-)
-```
-
-Returning to `FitBLSP()`, the `iniValues` argument can either be a numeric vector or an object returned by the `FitAvgModel()` function. Here we will use the latter.
-```{r blsp-with-initVal}
-results <- FitBLSP(
- date_vec = sub_dt$date,
- vi_vec = sub_dt$evi2,
- initValues = avg_fit,
- verbose = TRUE
-)
-```
-
-We can then see the result of the fit:
-```{r blsp-result}
-str(results)
-```
-
-
-## Processing BLSP for your own data
-If you have your own data you want to use this package for, that's great! Otherwise, we've provided a simple way for users to obtain EVI2 time series data for their area(s) of interest via Google Earth Engine (GEE) and Microsoft Planetary Computer (MPC). Here we are using Landsats 5,7,8,9 Collection 2 Surface Reflectance data. **Note this method requires you have a Google account**.
-
-
-### Get data from Microsoft Planetary Computer
-Getting EVI2 time series using Microsoft Planetary Computer (MPC) is easy by using the functions we provide, you don't need to register an account, and you can do it in the R environment so it's good for keeping you code consistent. But, keep in mind that since PC is relatively new, the API may change which would make our functions throw errors in the future. Also, note that running the code in this section is going to take a while, so in the vignette, we only put the example code and the corresponding results rather than running the code each time you install the library. It will save some time, but the result may be different from what you get.
-
-To get EVI2 time series from MPC, you need to have point locations specified by longitude and latitude. Although our functions only do one point location at a time, you can do multiple points in a loop.
-
-```{r mpc-point-loc, eval = FALSE}
-# A point location specified by long/lat format
-# You MUST name the coordinates with `x` and `y` and put them into a data.frame
-pt_coords <- data.frame(x = -71.700975, y = 43.945733)
-```
-
-Then, you can specify the temporal period by a character string. And, since we use parallel procesing to process the data, you may specify the number of cores you want to use for parallel. **Note that for processing speed, here in the example we are getting data for 6 years, as we know the BLSP model borrows information from other years, doing 6 years could yield relatively large uncertainty on the estimate depending on the data sparsity. So, once you are sure the code runs well on your machine, do multiple tests on the data side.**
-
-```{r mpc-focal-dates, eval = FALSE}
-# Use the format of "start_date/end_date"
-focalDates <- "2011-01-01/2016-12-31"
-
-# Number of cores
-ncores <- 10
-```
-
-To get the data, you can use `GetEvi2PointTs()` function (it's going to take around 5 minutes for the function to prepare the data, depending on how long the requested temperal period is and the internet speed).
-
-```{r mpc-get-data, eval = FALSE}
-evi2_dt <- GetEvi2PointTs(
- pt_coords = pt_coords,
- focalDates = focalDates,
- ncores = ncores
-)
-```
-
-After the code is done, you can see the result in the `evi2_dt` object (note that we did not remove values with snow):
-```{r mpc-evi2-ts, eval = FALSE}
-head(evi2_dt)
-```
-
-```
- img_id lon lat evi2 date qa snow
-1: LT05_L2SP_013029_20100109_02_T1 -71.70097 43.94573 0.1000716 2010-01-09 13600 TRUE
-2: LT05_L2SP_012030_20100307_02_T1 -71.70097 43.94573 0.1410364 2010-03-07 5440 FALSE
-3: LT05_L2SP_013029_20100415_02_T1 -71.70097 43.94573 0.1754859 2010-04-15 5440 FALSE
-4: LE07_L2SP_013029_20100423_02_T1 -71.70097 43.94573 0.2231386 2010-04-23 5440 FALSE
-5: LT05_L2SP_012030_20100424_02_T1 -71.70097 43.94573 0.2273574 2010-04-24 5440 FALSE
-6: LT05_L2SP_012030_20100510_02_T1 -71.70097 43.94573 0.5618129 2010-05-10 5440 FALSE
-```
-
-Once you get the EVI2 time series, you can process BLSP from the data as usual. The code might run for several minutes depending on the number of years in the time series.
-
-```{r mpc-blsp, eval=FALSE}
-# Remove snow
-evi2_dt <- evi2_dt[snow == FALSE, ]
-
-# Fit the average model
-avg_fit <- FitAvgModel(
- date_vec = evi2_dt$date,
- vi_vec = evi2_dt$evi2
-)
-
-# Fit the BLSP model
-results <- FitBLSP(
- date_vec = evi2_dt$date,
- vi_vec = evi2_dt$evi2,
- initValues = avg_fit,
- verbose = TRUE
-)
-```
-
-And plot out the model fit:
-```{r mpc-plot-blsp, eval=FALSE}
-png("mpcResults.png", width=1200, height=400)
-fitted_dt <- PlotBLSP(results, if_return_fit = TRUE)
-dev.off()
-```
-
-
-
-
-
-
-### Get data from GEE
-First, please go to [this link](https://code.earthengine.google.com/7e2cdb594772506e296bda8411b05d20) for the GEE script. If the link does not work, please open a new [code browser](https://code.earthengine.googlecom) and copy/paste the script found [here](https://github.com/ncsuSEAL/Bayesian_LSP/blob/main/gee/getSampleData.js).
-
-Second, change the start and end dates (*Section 1: Define variables*) and the point coordinates (*Section 1: Define study locations*) as you need, and run the script by clicking the "Run" button.
-
-Third, go to "Tasks" panel, and click the "RUN" button in the row of the "exportEVI2" task. It will open a window with the detailed information including task name, export directory (has to be a Google Drive folder), export file name, and file type. We will use the default setting for this vignette. Click the "RUN" button of the window.
-
-It might take several minutes for GEE to process the time series depending on the current availability of their cloud computing resource. After it's done, the exported time series data file should be in your Google Drive folder.
-
-### Apply BLSP to GEE data
-
-Note: we don't have this GEE data included in our package as standalone data. Instead, here we are showing the code as if we are loading from a local directory after downloading from GEE. Because of this, any outputs shown here are from us running the code then manually adding the results to each section.
-
-In R, load in the time series data file by:
-```{r gee-data, eval=FALSE}
-library(data.table)
-sample_data <- fread("sampleData.csv")
-```
-Using `head(sample_data)`, you will see it has 7 columns containing information of sensor type, date, longitude, latitude, point id, EVI2 value, and pixel QA value.
-```{r, ex1}
-# satellite date lon lat id evi2 QA_PIXEL
-# 1: LANDSAT/LT05/C02/T1_L2 1991-10-29 43.94573 -71.70097 0 NA NA
-# 2: LANDSAT/LT05/C02/T1_L2 1991-10-29 43.95202 -71.72556 1 NA NA
-# 3: LANDSAT/LT05/C02/T1_L2 1991-10-29 43.95092 -71.72874 2 NA NA
-# 4: LANDSAT/LT05/C02/T1_L2 1991-10-29 43.95895 -71.73166 3 0.1578591 5440
-# 5: LANDSAT/LT05/C02/T1_L2 1991-10-29 43.94913 -71.73183 4 NA NA
-# 6: LANDSAT/LT05/C02/T1_L2 1991-10-29 43.95740 -71.73687 5 NA NA
-```
-
-To run the BLSP model, let's select a single time series and omit all `NA` rows (important!).
-```{r single-ts, eval=FALSE}
-# Select one single time series
-single_ts <- sample_data[id == 1, .(date, evi2)]
-
-# Remember to remove NA rows before running the `blsp` package
-single_ts <- na.omit(single_ts)
-head(single_ts)
-
-# date evi2
-# 1: 1984-06-19 0.7871580
-# 2: 1984-09-23 0.5792558
-# 3: 1985-06-22 0.7871400
-# 4: 1985-08-09 0.7356111
-# 5: 1985-10-12 0.4650306
-# 6: 1986-06-09 0.7066563
-```
-
-Now, we can use the `blsp` package as usual. This code chunk might run for several minutes depending on how many years are in the time series.
-```{r fit-gee-data, eval=FALSE}
-# Fit the average model
-avg_fit <- FitAvgModel(
- date_vec = single_ts$date,
- vi_vec = single_ts$evi2
-)
-
-# Fit the BLSP model
-results <- FitBLSP(
- date_vec = single_ts$date,
- vi_vec = single_ts$evi2,
- initValues = avg_fit,
- verbose = TRUE
-)
-```
-
-Plot out the model fit:
-```{r plot-blsp-gee-data, eval=FALSE}
-png("geeResults.png", width=1200, height=400)
-fitted_dt <- PlotBLSP(results, if_return_fit = TRUE)
-dev.off()
-```
-
-
-
-If you encounter any issues while using the package, please feel free to [leave us an issue on Github](https://github.com/ncsuSEAL/Bayesian_LSP).
-
-
-
+---
+title: "Introduction to blsp"
+author: "Xiaojie Gao, Ian R. McGregor, Owen Smith, Izzi Hinks, Matt Shisler"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{Introduction to blsp}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+ eval=TRUE,
+ collapse = TRUE,
+ comment = "#>"
+)
+```
+
+
+
+# Background
+The code was originally developed by [Gao et al 2021](https://www.sciencedirect.com/science/article/pii/S0034425721002029?via%3Dihub), whose paper detailed a Bayesian hierarchical model to quantify land surface phenology from disparate, optical remote sensing time series. In other words, the model is able to take sparse vegetation index observations from the entirety of the Landsat time series (for example), and create a continuous estimate of annual land surface phenology. In addition to calculating start of season (SOS) and end of season (EOS) dates, the model also calculated pixel-wise uncertainty estimates for each of these phenometrics.
+
+This vignette will walk you through the `blsp` package and how to use it. For the source code, please visit the [Github repository](https://github.com/ncsuSEAL/Bayesian_LSP).
+
+# Running the code
+## Set-up
+As an example dataset, we will use the `landsatEVI2` dataset, which has EVI2 (two-band enhanced vegetation index, for more details, see [here](https://en.wikipedia.org/wiki/Enhanced_vegetation_index)) data calculated from Landsats 5-8 from 1984-2019. For each measurement, there is a corresponding, boolean snow flag (to be used for assigning weights).
+```{r laod-data}
+library(blsp)
+data(landsatEVI2)
+```
+
+## Fitting the model
+At its core, all you need to run the main function (`FitBLSP`) is a vector of vegetation index data that follows a [double logistic function](https://www.desmos.com/calculator/rftz67g7eb) (e.g. NDVI, EVI, EVI2), and the corresponding dates of the individual data points. With these, running the code is simple.
+
+Here to save some computing time but show the power of the BLSP algorithm in handling data sparsity, we only use data from 1984 to 1994 to fit the model.
+```{r fit-blsp}
+sub_dt <- landsatEVI2[lubridate::year(date) %in% 1984:1994, ]
+results <- FitBLSP(
+ date_vec = sub_dt$date,
+ vi_vec = sub_dt$evi2,
+ verbose = TRUE
+)
+```
+
+The results are stored in an object with three elements. First, "phenos" is a table containing the estimated day of year (DOY) of "midgreenup"(or SOS) and "midgreendown" (or EOS) for each year contained in the sample data, along with upper and lower bounds. Second, "params" is a table containing the generated model parameter samples by the [Markov chain Monte Carlo (MCMC) sampling](https://en.wikipedia.org/wiki/Markov_chain_Monte_Carlo). Third, "data" is the input data table used to fit the BLSP algorithm.
+```{r blsp-result-structure}
+str(results)
+```
+
+Let's plot the reconstruction of the EVI2 time series using the supplied data. This will take a little time to run as the function will try each combination of the MCMC-generated parameters to predict the best fit curve.
+```{r blsp-fit-figure, fig.width=7.5, fig.height=4}
+fitted_dt <- PlotBLSP(results, if_return_fit = TRUE)
+```
+
+We see that "fitted_dt" is a data table with the information used to construct the plot.
+```{r blsp-fitted-table}
+head(fitted_dt)
+```
+
+### Changing weights
+Some users may want to assign different weights to certain observations. For example, perhaps the QA for several days is bad and we want to downweight those observations such that they contribute less to the posterior distribution. To do so, we supply a vector of equal length to `vi_vec` in the range [0,1]. In the sample data, the `snow` column indicates the presence of snow for the corresponding dates. Here, we will assign weights of 0.1 to those observations. (Again, we only use data from 1984 to 1994 as an example)
+```{r assign-weights}
+sub_dt <- landsatEVI2[lubridate::year(date) %in% 1984:1994,
+ .(date, evi2, weights = ifelse(snow, 0.1, 1), snow)
+]
+head(sub_dt)
+```
+
+Adding in weights will make the code run longer. The reason is that the downweighted observations can make the data more sparse than it already is (e.g., if some points have weight near 0, they would contribute little information), thus the Bayesian model may need more time to converge.
+
+Compare the first few rows of "phenos" below with the previous results above.
+```{r blsp-with-weights}
+results <- FitBLSP(
+ date_vec = sub_dt$date,
+ vi_vec = sub_dt$evi2,
+ weights_vec = sub_dt$weights,
+ verbose = TRUE
+)
+head(results$phenos)
+```
+
+
+### Assigning initial values
+As documented in [Gao et al 2021](https://www.sciencedirect.com/science/article/pii/S0034425721002029?via%3Dihub), good initial values can save some time for the model to converge. By default, the `initValues` of the `FitBLSP()` function are determined by the BLSP model so the users don't have to provide them. But if you want, we *can* get initial values from fitting an average model in which all years of data are pooled together. However, there is some risk with this, as the average model inherently can't successfully fit all time series data (that's one of the reasons that we need the BLSP model!). If you try obtaining initial values using `FitAvgModel` with your data and it fails, then we suggest simply running `FitBLSP()` with the default `initValues` argument of `NULL`.
+
+```{r fit-avg-model}
+avg_fit <- FitAvgModel(sub_dt$date, sub_dt$evi2)
+print(avg_fit)
+```
+
+The average model can also be plotted out:
+```{r avg-fit-figure, fig.width=7, fig.height=5}
+par(mar = c(3, 3, 1, 1))
+PlotAvg(
+ date_vec = sub_dt$date,
+ vi_vec = sub_dt$evi2,
+ avg_fit = avg_fit
+)
+```
+
+Returning to `FitBLSP()`, the `iniValues` argument can either be a numeric vector or an object returned by the `FitAvgModel()` function. Here we will use the latter.
+```{r blsp-with-initVal}
+results <- FitBLSP(
+ date_vec = sub_dt$date,
+ vi_vec = sub_dt$evi2,
+ initValues = avg_fit,
+ verbose = TRUE
+)
+```
+
+We can then see the result of the fit:
+```{r blsp-result}
+str(results)
+```
+
+
+## Processing BLSP for your own data
+If you have your own data you want to use this package for, that's great! Otherwise, we've provided a simple way for users to obtain EVI2 time series data for their area(s) of interest via Google Earth Engine (GEE) and Microsoft Planetary Computer (MPC). Here we are using Landsats 5,7,8,9 Collection 2 Surface Reflectance data. **Note this method requires you have a Google account**.
+
+
+### Get data from Microsoft Planetary Computer
+Getting EVI2 time series using Microsoft Planetary Computer (MPC) is easy by using the functions we provide, you don't need to register an account, and you can do it in the R environment so it's good for keeping you code consistent. But, keep in mind that since PC is relatively new, the API may change which would make our functions throw errors in the future. Also, note that running the code in this section is going to take a while, so in the vignette, we only put the example code and the corresponding results rather than running the code each time you install the library. It will save some time, but the result may be different from what you get.
+
+To get EVI2 time series from MPC, you need to have point locations specified by longitude and latitude. Although our functions only do one point location at a time, you can do multiple points in a loop.
+
+```{r mpc-point-loc, eval = FALSE}
+# A point location specified by long/lat format
+# You MUST name the coordinates with `x` and `y` and put them into a data.frame
+pt_coords <- data.frame(x = -71.700975, y = 43.945733)
+```
+
+Then, you can specify the temporal period by a character string. And, since we use parallel procesing to process the data, you may specify the number of cores you want to use for parallel. **Note that for processing speed, here in the example we are getting data for 6 years, as we know the BLSP model borrows information from other years, doing 6 years could yield relatively large uncertainty on the estimate depending on the data sparsity. So, once you are sure the code runs well on your machine, do multiple tests on the data side.**
+
+```{r mpc-focal-dates, eval = FALSE}
+# Use the format of "start_date/end_date"
+focalDates <- "2011-01-01/2016-12-31"
+
+# Number of cores
+ncores <- 10
+```
+
+To get the data, you can use `GetEvi2PointTs()` function (it's going to take around 5 minutes for the function to prepare the data, depending on how long the requested temperal period is and the internet speed).
+
+```{r mpc-get-data, eval = FALSE}
+evi2_dt <- GetEvi2PointTs(
+ pt_coords = pt_coords,
+ focalDates = focalDates,
+ ncores = ncores
+)
+```
+
+After the code is done, you can see the result in the `evi2_dt` object (note that we did not remove values with snow):
+```{r mpc-evi2-ts, eval = FALSE}
+head(evi2_dt)
+```
+
+```
+ img_id lon lat evi2 date qa snow
+1: LT05_L2SP_013029_20100109_02_T1 -71.70097 43.94573 0.1000716 2010-01-09 13600 TRUE
+2: LT05_L2SP_012030_20100307_02_T1 -71.70097 43.94573 0.1410364 2010-03-07 5440 FALSE
+3: LT05_L2SP_013029_20100415_02_T1 -71.70097 43.94573 0.1754859 2010-04-15 5440 FALSE
+4: LE07_L2SP_013029_20100423_02_T1 -71.70097 43.94573 0.2231386 2010-04-23 5440 FALSE
+5: LT05_L2SP_012030_20100424_02_T1 -71.70097 43.94573 0.2273574 2010-04-24 5440 FALSE
+6: LT05_L2SP_012030_20100510_02_T1 -71.70097 43.94573 0.5618129 2010-05-10 5440 FALSE
+```
+
+Once you get the EVI2 time series, you can process BLSP from the data as usual. The code might run for several minutes depending on the number of years in the time series.
+
+```{r mpc-blsp, eval=FALSE}
+# Remove snow
+evi2_dt <- evi2_dt[snow == FALSE, ]
+
+# Fit the average model
+avg_fit <- FitAvgModel(
+ date_vec = evi2_dt$date,
+ vi_vec = evi2_dt$evi2
+)
+
+# Fit the BLSP model
+results <- FitBLSP(
+ date_vec = evi2_dt$date,
+ vi_vec = evi2_dt$evi2,
+ initValues = avg_fit,
+ verbose = TRUE
+)
+```
+
+And plot out the model fit:
+```{r mpc-plot-blsp, eval=FALSE}
+png("mpcResults.png", width=1200, height=400)
+fitted_dt <- PlotBLSP(results, if_return_fit = TRUE)
+dev.off()
+```
+
+
+
+
+
+
+### Get data from GEE
+First, please go to [this link](https://code.earthengine.google.com/7e2cdb594772506e296bda8411b05d20) for the GEE script. If the link does not work, please open a new [code browser](https://code.earthengine.googlecom) and copy/paste the script found [here](https://github.com/ncsuSEAL/Bayesian_LSP/blob/main/gee/getSampleData.js).
+
+Second, change the start and end dates (*Section 1: Define variables*) and the point coordinates (*Section 1: Define study locations*) as you need, and run the script by clicking the "Run" button.
+
+Third, go to "Tasks" panel, and click the "RUN" button in the row of the "exportEVI2" task. It will open a window with the detailed information including task name, export directory (has to be a Google Drive folder), export file name, and file type. We will use the default setting for this vignette. Click the "RUN" button of the window.
+
+It might take several minutes for GEE to process the time series depending on the current availability of their cloud computing resource. After it's done, the exported time series data file should be in your Google Drive folder.
+
+### Apply BLSP to GEE data
+
+Note: we don't have this GEE data included in our package as standalone data. Instead, here we are showing the code as if we are loading from a local directory after downloading from GEE. Because of this, any outputs shown here are from us running the code then manually adding the results to each section.
+
+In R, load in the time series data file by:
+```{r gee-data, eval=FALSE}
+library(data.table)
+sample_data <- fread("sampleData.csv")
+```
+Using `head(sample_data)`, you will see it has 7 columns containing information of sensor type, date, longitude, latitude, point id, EVI2 value, and pixel QA value.
+```{r, ex1}
+# satellite date lon lat id evi2 QA_PIXEL
+# 1: LANDSAT/LT05/C02/T1_L2 1991-10-29 43.94573 -71.70097 0 NA NA
+# 2: LANDSAT/LT05/C02/T1_L2 1991-10-29 43.95202 -71.72556 1 NA NA
+# 3: LANDSAT/LT05/C02/T1_L2 1991-10-29 43.95092 -71.72874 2 NA NA
+# 4: LANDSAT/LT05/C02/T1_L2 1991-10-29 43.95895 -71.73166 3 0.1578591 5440
+# 5: LANDSAT/LT05/C02/T1_L2 1991-10-29 43.94913 -71.73183 4 NA NA
+# 6: LANDSAT/LT05/C02/T1_L2 1991-10-29 43.95740 -71.73687 5 NA NA
+```
+
+To run the BLSP model, let's select a single time series and omit all `NA` rows (important!).
+```{r single-ts, eval=FALSE}
+# Select one single time series
+single_ts <- sample_data[id == 1, .(date, evi2)]
+
+# Remember to remove NA rows before running the `blsp` package
+single_ts <- na.omit(single_ts)
+head(single_ts)
+
+# date evi2
+# 1: 1984-06-19 0.7871580
+# 2: 1984-09-23 0.5792558
+# 3: 1985-06-22 0.7871400
+# 4: 1985-08-09 0.7356111
+# 5: 1985-10-12 0.4650306
+# 6: 1986-06-09 0.7066563
+```
+
+Now, we can use the `blsp` package as usual. This code chunk might run for several minutes depending on how many years are in the time series.
+```{r fit-gee-data, eval=FALSE}
+# Fit the average model
+avg_fit <- FitAvgModel(
+ date_vec = single_ts$date,
+ vi_vec = single_ts$evi2
+)
+
+# Fit the BLSP model
+results <- FitBLSP(
+ date_vec = single_ts$date,
+ vi_vec = single_ts$evi2,
+ initValues = avg_fit,
+ verbose = TRUE
+)
+```
+
+Plot out the model fit:
+```{r plot-blsp-gee-data, eval=FALSE}
+png("geeResults.png", width=1200, height=400)
+fitted_dt <- PlotBLSP(results, if_return_fit = TRUE)
+dev.off()
+```
+
+
+
+If you encounter any issues while using the package, please feel free to [leave us an issue on Github](https://github.com/ncsuSEAL/Bayesian_LSP).
+
+
+
diff --git a/vignettes/mpcResults.png b/vignettes/mpcResults.png
index 5512d71..8daf6d9 100644
Binary files a/vignettes/mpcResults.png and b/vignettes/mpcResults.png differ