Skip to content

Commit

Permalink
Fix date operations for R 4.3; speed up summarization by ~60%
Browse files Browse the repository at this point in the history
Extracting date() directly from datetimes vs. converting via as_date()
saves a LOT of time.

Model building actually seems quite fast compared to other summaries,
so doesn't seem necessary to limit the time period just yet.
  • Loading branch information
glin committed Aug 26, 2023
1 parent cd4ea6c commit bf70018
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 8 deletions.
4 changes: 2 additions & 2 deletions R/stocks.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ get_price_data <- function(data) {
prices_5d <- last_n_days(data, 5)[, list(time, ticker, curr)]
prices_1m <- last_n_days(data, 30)[, list(time, ticker, curr)]
# Aggregate by day
prices_all <- data[, list(curr = max(curr)), by = list(time = as_date(time), ticker)]
prices_all <- data[, list(curr = max(curr)), by = list(time = date(time), ticker)]

peaks <- peaks_by_day(data)

Expand Down Expand Up @@ -163,7 +163,7 @@ last_n_days <- function(data, n, full_days = FALSE) {
time_day <- 3600*24
start_time <- max(data$time) - n * time_day
if (full_days) {
start_time <- as_date(start_time)
start_time <- floor_date(start_time)
}
data[time >= start_time]
}
Expand Down
6 changes: 3 additions & 3 deletions R/summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,11 @@ summarize_all <- function(data) {
# Get price highs by day
high_by_day <- function(data, fill = TRUE) {
highs <- data[data[, list(.I = .I[which.max(curr)]),
by = list(ticker, date = as_date(time))]$.I]
by = list(ticker, date = date(time))]$.I]

if (fill) {
# Fill in missing days
highs[, date := as_date(time)]
highs[, date := date(time)]
all_dates <- highs[, list(date = seq(min(date), max(date), by = "days")), by = ticker]
highs <- highs[all_dates, on = list(date, ticker)]
highs[is.na(curr), time := as_datetime(format(date))]
Expand Down Expand Up @@ -148,7 +148,7 @@ summarize_volume_price <- function(data) {
# Summarize market volume (shares bought) by day
summarize_volume_day <- function(data) {
summ <- data[, list(volume = total_volume(volume)),
by = list(date = as_date(time), ticker)]
by = list(date = date(time), ticker)]
summ <- summ[, list(volume = sum(volume)), by = date]
summ
}
Expand Down
11 changes: 11 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,20 @@ as_datetime <- function(x, tz = TZ_NST) {
}

as_date <- function(x, tz = TZ_NST) {
# Note: as.Date ignores tz for character strings
as.Date(x, tz = tz)
}

# Get the date of an existing date-time object. Use this over as_date() for
# date-time objects as date() will be much faster.
date <- function(x) {
lubridate::date(x)
}

floor_date <- function(x, unit = "day") {
lubridate::floor_date(x, unit = unit)
}

to_iso_string <- function(time, tz = "UTC") {
strftime(time, tz = tz, format = "%Y-%m-%dT%H:%M:%SZ")
}
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-predict.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
test_that("predict_prices", {
data <- read_stock_data(stock_data_example())
predicted <- predict_prices(data[time <= as_date("2018-07-15")])
predicted <- predict_prices(data[time <= as_datetime("2018-07-15")])

p <- predicted[curr %in% c(6, 10, 15, 30, 60, 90), p]
expected_p <- c(8.03, 7.18, 4.46, 0.348, 0.176, 0.052)
Expand Down
10 changes: 9 additions & 1 deletion tests/testthat/test-stocks.R
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ test_that("get_price_data", {
expect_equal(prices[["1d"]], data[1:2, ])
expect_equal(prices[["5d"]], data[1:3, ])
expect_equal(prices[["1m"]], data[1:4, ])
expect_equal(prices[["all"]], data[, list(time = as_date(time), ticker, curr)])
expect_equal(prices[["all"]], data[, list(time = date(time), ticker, curr)])
expect_equal(prices$peaks, peaks_by_day(data))
})

Expand Down Expand Up @@ -316,6 +316,14 @@ test_that("last_n_days", {
))
))

expect_equal(last_n_days(data, 3), data.table(
time = as_datetime(c(
"2018-03-06 8:24:58",
"2018-03-07",
"2018-03-07 08:24:59"
))
))

expect_equal(last_n_days(data, 3, TRUE), data.table(
time = as_datetime(c(
"2018-03-04",
Expand Down
15 changes: 14 additions & 1 deletion tests/testthat/test-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,23 @@ test_that("as_datetime converts time strings", {

test_that("as_date converts date strings", {
date <- as_date("2018-07-03")
expected <- as.Date("2018-07-03", tz = TZ_NST)
expected <- as.Date("2018-07-03")
expect_equal(date, expected)
})

test_that("date gets the date of date-times", {
time <- as_datetime("2018-07-01T17:30:00Z")
expect_equal(date(time), as.Date("2018-07-01"))
date <- as_date("2018-07-03")
expect_equal(date(date), as.Date("2018-07-03"))
})

test_that("floor_date rounds dates down", {
time <- floor_date(as_datetime("2018-07-01T17:30:00Z"))
expected <- as_datetime("2018-07-01")
expect_equal(time, expected)
})

test_that("to_iso_string converts datetimes", {
time <- as_datetime("2018-07-01T17:30:00Z")
str <- to_iso_string(time)
Expand Down

0 comments on commit bf70018

Please sign in to comment.