Skip to content

Commit

Permalink
solved #86
Browse files Browse the repository at this point in the history
  • Loading branch information
loelschlaeger committed Apr 12, 2023
1 parent e1e3e32 commit 1bda8eb
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 65 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ Imports:
Rcpp,
progress,
foreach,
cli
cli,
padr
LinkingTo:
Rcpp,
RcppArmadillo
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ importFrom(graphics,plot.new)
importFrom(graphics,points)
importFrom(graphics,text)
importFrom(graphics,title)
importFrom(padr,pad)
importFrom(stats,AIC)
importFrom(stats,BIC)
importFrom(stats,acf)
Expand Down
94 changes: 65 additions & 29 deletions R/download_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
#' This function downloads stock data from <https://finance.yahoo.com/>.
#'
#' @details
#' The downloaded data has the following columns:
#' Yahoo Finance provides the following historical daily data for a stock or
#' an index:
#' \itemize{
#' \item \code{Date}: The date.
#' \item \code{Open}: Opening price.
Expand All @@ -19,36 +20,46 @@
#' A \code{character}, the stock's symbol. It must match the identifier on
#' <https://finance.yahoo.com/>.
#' @param from
#' A \code{character}, a date in format \code{"YYYY-MM-DD"}, setting the lower
#' A \code{character} in the format \code{"YYYY-MM-DD"}, setting the lower
#' data bound. Must not be earlier than \code{"1902-01-01"} (default).
#' @param to
#' A \code{character}, a date in format \code{"YYYY-MM-DD"}, setting the upper
#' A \code{character} in the format \code{"YYYY-MM-DD"}, setting the upper
#' data bound. Default is the current date \code{Sys.date()}.
#' @param columns
#' A \code{character}
#' @param fill_dates
#' Set to \code{TRUE} to fill missing dates (e.g., days at which the stock
#' market is closed )with \code{NA}s.
#' @param file
#' Either
#' * \code{NULL} (default) to return the data as a \code{data.frame},
#' * or a \code{character}, the name of the file where the data is saved as a
#' \code{.csv}-file.
#' * or a \code{character}, the path where the data is saved as a .csv-file.
#' @param verbose
#' Set to \code{TRUE} to return information about download success.
#' Set to \code{TRUE} (default) to print information about download success.
#'
#' @return
#' A \code{data.frame} if \code{file = NULL}.
#'
#' @examples
#' ### download 21st century DAX data
#' data <- download_data(symbol = "^GDAXI", from = "2000-01-03")
#' ### download 21st century closing prices of the DAX data
#' data <- download_data(
#' symbol = "^GDAXI", from = "2000-01-01", columns = c("Date", "Close"),
#' fill_dates = TRUE
#' )
#' head(data)
#'
#' @export
#'
#' @importFrom utils download.file read.csv head tail
#' @importFrom padr pad

download_data <- function(
symbol, from = "1902-01-01", to = Sys.Date(), file = NULL, verbose = TRUE
symbol, from = "1902-01-01", to = Sys.Date(), file = NULL,
columns = c("Date", "Open", "High", "Low", "Close", "Adj.Close", "Volume"),
fill_dates = FALSE, verbose = TRUE
) {

### check input
### check inputs
if (!is.character(symbol) || length(symbol) != 1) {
stop("'symbol' must be a single character.", call. = FALSE)
}
Expand All @@ -62,7 +73,19 @@ download_data <- function(
stop("'file' is invalid.", call. = FALSE)
}
}
if (length(verbose) != 1 || (!isTRUE(verbose) && !isFALSE(verbose))) {
if (!is.character(columns)) {
stop("'columns' must be a character vector.", call. = FALSE)
}
col_all <- c("Date", "Open", "High", "Low", "Close", "Adj.Close", "Volume")
columns <- intersect(columns, col_all)
if (length(columns) == 0) {
warning("'columns' is misspecified, no columns selected.", call. = FALSE)
return(invisible(NULL))
}
if (!isTRUE(fill_dates) && !isFALSE(fill_dates)) {
stop("'fill_dates' must be either TRUE or FALSE.", call. = FALSE)
}
if (!isTRUE(verbose) && !isFALSE(verbose)) {
stop("'verbose' must be either TRUE or FALSE.", call. = FALSE)
}

Expand Down Expand Up @@ -98,36 +121,49 @@ download_data <- function(
return(url)
}

### try to download data
### download data
data_url <- create_url(symbol, from, to)
destfile <- ifelse(save_file, file, tempfile())
download_try <- suppressWarnings(
try(utils::download.file(data_url, destfile = destfile, quiet = TRUE),
silent = TRUE
)
)

### check 'download_try'
if (inherits(download_try, "try-error")) {
stop(
"Download failed.\n",
"Either 'symbol' is unknown or there is no data for the specified time interval.",
"Download failed. This can have different reasons:\n",
"Maybe 'symbol' is unknown.\n",
"Or there is no data for the specified time interval.",
call. = FALSE
)
}
data <- utils::read.csv(file = destfile, header = TRUE, sep = ",", na.strings = "null")
if (save_file) {
if (verbose) {
### print summary of new data
message(
"Download successful.\n",
"* symbol: ", symbol, "\n",
"* from: ", utils::head(data$Date, n = 1), "\n",
"* to: ", utils::tail(data$Date, n = 1), "\n",
"* path: ", normalizePath(destfile)
)
}
} else {
data <- utils::read.csv(
file = destfile, header = TRUE, sep = ",", na.strings = "null"
)

### fill missing dates with NA
if (fill_dates) {
data$Date <- as.Date(data$Date)
data <- padr::pad(data, interval = "day", start_val = from, end_val = to)
data$Date <- as.character(data$Date)
}

### select columns
data <- data[, columns, drop = FALSE]

### create output
if (verbose) {
### print summary of new data
message(
"Download successful.\n",
"* symbol: ", symbol, "\n",
"* columns: ", paste(columns, collapse =", ") , "\n",
"* from: ", utils::head(data$Date, n = 1), "\n",
"* to: ", utils::tail(data$Date, n = 1),
if (save_file) paste("\n* path:", normalizePath(destfile))
)
}
if (!save_file) {
return(data)
}
}
26 changes: 18 additions & 8 deletions man/download_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

80 changes: 53 additions & 27 deletions tests/testthat/test-download_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,53 +8,79 @@ test_that("input checks for data download work", {
"'file' is invalid."
)
expect_error(
download_data(symbol = "^GDAXI", file = paste0(tempfile(), ".csv"), verbose = "not_TRUE_or_FALSE"),
download_data(symbol = "^GDAXI", columns = 1),
"'columns' must be a character vector"
)
expect_warning(
download_data(symbol = "^GDAXI", columns = "not_a_column_name"),
"'columns' is misspecified, no columns selected."
)
expect_error(
download_data(
symbol = "^GDAXI",
file = paste0(tempfile(), ".csv"),
verbose = "not_TRUE_or_FALSE"
),
"'verbose' must be either TRUE or FALSE."
)
expect_error(
download_data(
symbol = "^GDAXI",
fill_dates = "not_TRUE_or_FALSE"
),
"'fill_dates' must be either TRUE or FALSE."
)
expect_message(
download_data(symbol = "^GDAXI", file = paste0(tempfile(), ".csv"), verbose = TRUE)
download_data(
symbol = "^GDAXI", file = paste0(tempfile(), ".csv"), verbose = TRUE
)
)
})

test_that("data download works", {
test_that("data download returns expected data", {
skip_if_offline()
symbol <- "^GDAXI"
from <- "2000-01-03"
from <- "2000-01-01"
to <- "2000-01-10"
data2 <- download_data(symbol = symbol, from = from, to = to, file = NULL, verbose = FALSE)
expect_true(is.data.frame(data2))
data_direct <- download_data(
symbol = symbol, from = from, to = to, verbose = FALSE
)
expect_true(is.data.frame(data_direct))
file <- paste0(tempfile(), ".csv")
download_data(symbol = symbol, from = from, to = to, file = file, verbose = FALSE)
data <- read.csv(file = file, header = TRUE, sep = ",", na.strings = "null")
expect_identical(data, data2)
download_data(
symbol = symbol, from = from, to = to, file = file, verbose = FALSE
)
data_file <- read.csv(
file = file, header = TRUE, sep = ",", na.strings = "null"
)
expect_identical(data_file, data_direct)
data_small <- download_data(
symbol = symbol, from = from, to = to, columns = "Close", verbose = FALSE,
fill_dates = TRUE
)
expect_equal(
data,
data_small,
structure(
list(Date = c("2000-01-03", "2000-01-04", "2000-01-05",
"2000-01-06", "2000-01-07", "2000-01-10"),
Open = c(6961.720215, 6747.240234, 6585.850098, 6501.450195,
6489.939941, 6785.470215),
High = c(7159.330078, 6755.359863, 6585.850098, 6539.310059,
6791.529785, 6975.259766),
Low = c(6720.870117, 6510.459961, 6388.910156, 6402.629883,
6470.140137, 6785.470215),
Close = c(6750.759766, 6586.950195, 6502.069824, 6474.919922,
6780.959961, 6925.52002),
Adj.Close = c(6750.759766, 6586.950195, 6502.069824, 6474.919922,
6780.959961, 6925.52002),
Volume = c(43072500L, 46678400L, 52682800L, 41180600L,
56058900L, 42006200L)),
list(
Close = c(NA, NA, 6750.759766, 6586.950195, 6502.069824, 6474.919922,
6780.959961, NA, NA, 6925.52002)
),
class = "data.frame",
row.names = c(NA, -6L)
row.names = c(NA, -10L)
)
)
wrong_from <- "1901-01-01"
expect_warning(
download_data(symbol = symbol, from = wrong_from, to = to, file = file, verbose = FALSE),
download_data(
symbol = symbol, from = wrong_from, to = to, file = file, verbose = FALSE
),
"'from' is set to lower bound of '1902-01-01'."
)
expect_error(
download_data(symbol = symbol, from = from, to = as.Date(from) - 1, file = file, verbose = FALSE),
download_data(
symbol = symbol, from = from, to = as.Date(from) - 1, file = file,
verbose = FALSE
),
"'to' must not be earlier than 'from'."
)
expect_error(
Expand Down

0 comments on commit 1bda8eb

Please sign in to comment.