Skip to content

Commit

Permalink
Add adjust_tz argument to writer functions (tidyverse#714)
Browse files Browse the repository at this point in the history
  • Loading branch information
gorcha authored Feb 25, 2023
1 parent cb6bae5 commit 742a266
Show file tree
Hide file tree
Showing 10 changed files with 162 additions and 15 deletions.
17 changes: 16 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,23 @@
* Ignore invalid SAV timestamp strings (#683).
* Fix compiler warnings (#707).

* `write_*()` functions gain a new `adjust_tz` argument to allow more control
over time zone conversion for date-time variables (#702). Thanks to @jmobrien
for the detailed issue and feedback.

Stata, SPSS and SAS do not have a concept of time zone. Since haven 2.4.0
date-time values in non-UTC time zones are implicitly converted when writing
to ensure the time displayed in Stata/SPSS/SAS will match the time displayed
to the user in R (see #555). This is the behaviour when `adjust_tz = TRUE`
(the default). Although this is in line with general user expectations it can
cause issues when the time zone is important, for e.g. when looking at
differences between time points, since the underlying numeric data is changed
to preserve the displayed time. Use `adjust_tz = FALSE` to write the time as
the corresponding UTC value, which will appear different to the user but
preserves the underlying numeric data.

* Fixed issue in `write_*()` functions where invisible return of input data
frame included unintended alteration of date/time variables. (@jmobrien, #702)
frame included unintended alteration of date time variables. (@jmobrien, #702)

* The experimental `write_sas()` function has been deprecated (#224). The
sas7bdat file format is complex and undocumented, and as such writing SAS
Expand Down
25 changes: 20 additions & 5 deletions R/haven-sas.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,11 @@ read_sas <- function(data_file, catalog_file = NULL,
#' @export
write_sas <- function(data, path) {
lifecycle::deprecate_warn("2.6.0", "write_sas()", "write_xpt()")

validate_sas(data)
data_out <- adjust_tz(data)
write_sas_(data_out, normalizePath(path, mustWork = FALSE))

invisible(data)
}

Expand Down Expand Up @@ -134,7 +134,19 @@ read_xpt <- function(file, col_select = NULL, skip = 0, n_max = Inf, .name_repai
#'
#' Note that although SAS itself supports dataset labels up to 256 characters
#' long, dataset labels in SAS transport files must be <= 40 characters.
write_xpt <- function(data, path, version = 8, name = NULL, label = attr(data, "label")) {
#' @param adjust_tz Stata, SPSS and SAS do not have a concept of time zone,
#' and all [date-time] variables are treated as UTC. `adjust_tz` controls
#' how the timezone of date-time values is treated when writing.
#'
#' * If `TRUE` (the default) the timezone of date-time values is ignored, and
#' they will display the same in R and Stata/SPSS/SAS, e.g.
#' `"2010-01-01 09:00:00 NZDT"` will be written as `"2010-01-01 09:00:00"`.
#' Note that this changes the underlying numeric data, so use caution if
#' preserving between-time-point differences is critical.
#' * If `FALSE`, date-time values are written as the corresponding UTC value,
#' e.g. `"2010-01-01 09:00:00 NZDT"` will be written as
#' `"2009-12-31 20:00:00"`.
write_xpt <- function(data, path, version = 8, name = NULL, label = attr(data, "label"), adjust_tz = TRUE) {
if (!version %in% c(5, 8)) {
cli_abort("SAS transport file version {.val {version}} is not currently supported.")
}
Expand All @@ -145,8 +157,11 @@ write_xpt <- function(data, path, version = 8, name = NULL, label = attr(data, "
name <- validate_xpt_name(name, version)
label <- validate_xpt_label(label)

validate_sas(data)
data_out <- adjust_tz(data)
data_out <- validate_sas(data)

if (isTRUE(adjust_tz)) {
data_out <- adjust_tz(data_out)
}

write_xpt_(
data_out,
Expand Down
22 changes: 19 additions & 3 deletions R/haven-spss.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,19 @@ read_por <- function(file, user_na = FALSE, col_select = NULL, skip = 0, n_max =
#'
#' `TRUE` and `FALSE` can be used for backwards compatibility, and correspond
#' to the "zsav" and "none" options respectively.
write_sav <- function(data, path, compress = c("byte", "none", "zsav")) {
#' @param adjust_tz Stata, SPSS and SAS do not have a concept of time zone,
#' and all [date-time] variables are treated as UTC. `adjust_tz` controls
#' how the timezone of date-time values is treated when writing.
#'
#' * If `TRUE` (the default) the timezone of date-time values is ignored, and
#' they will display the same in R and Stata/SPSS/SAS, e.g.
#' `"2010-01-01 09:00:00 NZDT"` will be written as `"2010-01-01 09:00:00"`.
#' Note that this changes the underlying numeric data, so use caution if
#' preserving between-time-point differences is critical.
#' * If `FALSE`, date-time values are written as the corresponding UTC value,
#' e.g. `"2010-01-01 09:00:00 NZDT"` will be written as
#' `"2009-12-31 20:00:00"`.
write_sav <- function(data, path, compress = c("byte", "none", "zsav"), adjust_tz = TRUE) {
if (isTRUE(compress)) {
compress <- "zsav"
} else if (isFALSE(compress)) {
Expand All @@ -85,8 +97,12 @@ write_sav <- function(data, path, compress = c("byte", "none", "zsav")) {
compress <- arg_match(compress)
}

validate_sav(data)
data_out <- adjust_tz(data)
data_out <- validate_sav(data)

if (isTRUE(adjust_tz)) {
data_out <- adjust_tz(data_out)
}

write_sav_(data_out, normalizePath(path, mustWork = FALSE), compress = compress)
invisible(data)
}
Expand Down
21 changes: 18 additions & 3 deletions R/haven-stata.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,26 @@ read_stata <- read_dta
#' 2045, the maximum length of str# variables. See the Stata [long
#' string](https://www.stata.com/features/overview/long-strings/)
#' documentation for more details.
write_dta <- function(data, path, version = 14, label = attr(data, "label"), strl_threshold = 2045) {
validate_dta(data, version = version)
#' @param adjust_tz Stata, SPSS and SAS do not have a concept of time zone,
#' and all [date-time] variables are treated as UTC. `adjust_tz` controls
#' how the timezone of date-time values is treated when writing.
#'
#' * If `TRUE` (the default) the timezone of date-time values is ignored, and
#' they will display the same in R and Stata/SPSS/SAS, e.g.
#' `"2010-01-01 09:00:00 NZDT"` will be written as `"2010-01-01 09:00:00"`.
#' Note that this changes the underlying numeric data, so use caution if
#' preserving between-time-point differences is critical.
#' * If `FALSE`, date-time values are written as the corresponding UTC value,
#' e.g. `"2010-01-01 09:00:00 NZDT"` will be written as
#' `"2009-12-31 20:00:00"`.
write_dta <- function(data, path, version = 14, label = attr(data, "label"), strl_threshold = 2045, adjust_tz = TRUE) {
data_out <- validate_dta(data, version = version)
validate_dta_label(label)

data_out <- adjust_tz(data)
if (isTRUE(adjust_tz)) {
data_out <- adjust_tz(data_out)
}

write_dta_(
data_out,
normalizePath(path, mustWork = FALSE),
Expand Down
17 changes: 16 additions & 1 deletion man/read_dta.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 15 additions & 1 deletion man/read_spss.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 22 additions & 1 deletion man/read_xpt.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions tests/testthat/test-haven-sas.R
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,28 @@ test_that("can roundtrip missing values (as much as possible)", {
expect_equal(roundtrip_var(NA_character_, "xpt"), "")
})

test_that("can roundtrip date times", {
x1 <- c(as.Date("2010-01-01"), NA)
expect_equal(roundtrip_var(x1, "xpt"), x1)

# converted to same time in UTC
x2 <- as.POSIXct("2010-01-01 09:00", tz = "Pacific/Auckland")
expect_equal(
roundtrip_var(x2, "xpt"),
as.POSIXct("2010-01-01 09:00", tz = "UTC")
)

x2_utc <- x2
attr(x2_utc, "tzone") <- "UTC"
expect_equal(
roundtrip_var(x2, "xpt", adjust_tz = FALSE),
x2_utc
)

attr(x2, "label") <- "abc"
expect_equal(attr(roundtrip_var(x2, "xpt"), "label"), "abc")
})

test_that("invalid files generate informative errors", {
expect_snapshot(error = TRUE, {
write_xpt(mtcars, file.path(tempdir(), " temp.xpt"))
Expand Down
7 changes: 7 additions & 0 deletions tests/testthat/test-haven-spss.R
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,13 @@ test_that("can roundtrip date times", {
as.POSIXct("2010-01-01 09:00", tz = "UTC")
)

x2_utc <- x2
attr(x2_utc, "tzone") <- "UTC"
expect_equal(
roundtrip_var(x2, "sav", adjust_tz = FALSE),
x2_utc
)

attr(x2, "label") <- "abc"
expect_equal(attr(roundtrip_var(x2, "sav"), "label"), "abc")
})
Expand Down
7 changes: 7 additions & 0 deletions tests/testthat/test-haven-stata.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,13 @@ test_that("can roundtrip date times", {
as.POSIXct("2010-01-01 09:00", tz = "UTC")
)

x2_utc <- x2
attr(x2_utc, "tzone") <- "UTC"
expect_equal(
roundtrip_var(x2, "sav", adjust_tz = FALSE),
x2_utc
)

attr(x2, "label") <- "abc"
expect_equal(attr(roundtrip_var(x2, "dta"), "label"), "abc")
})
Expand Down

0 comments on commit 742a266

Please sign in to comment.