Skip to content

Commit

Permalink
Implement fct() (#305)
Browse files Browse the repository at this point in the history
Fixes #299
  • Loading branch information
hadley authored May 19, 2022
1 parent db1a486 commit 4315e76
Show file tree
Hide file tree
Showing 9 changed files with 192 additions and 2 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ S3method(as_factor,logical)
S3method(as_factor,numeric)
export("%>%")
export(as_factor)
export(fct)
export(fct_anon)
export(fct_c)
export(fct_collapse)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# forcats (development version)

* New `fct()` which works like `factor()` but errors if values of `x`
are not included in the levels specification (#299)

* `first2()` and `last2()` now ignore missing values in both `x` and `y` (#303).

* Error messages are more informative.
Expand Down
5 changes: 4 additions & 1 deletion R/explicit_na.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@
#' appear in summaries and on plots.
#'
#' @param f A factor (or character vector).
#' @param na_level Level to use for missing values: this is what NAs will be changed to.
#' @param na_level Level to use for missing values: this is what `NA`s will be
#' changed to.
#' @export
#' @examples
#' f1 <- factor(c("a", "a", NA, NA, "a", "b", NA, "c", "a", "c", "b"))
#' fct_count(f1)
#' table(is.na(f1))
#'
#' f2 <- fct_explicit_na(f1)
#' fct_count(f2)
#' table(is.na(f2))
fct_explicit_na <- function(f, na_level = "(Missing)") {
f <- check_factor(f)

Expand Down
62 changes: 62 additions & 0 deletions R/fct.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#' Create a factor
#'
#' `fct()` is a stricter version of [factor()] that errors if your
#' specification of `levels` is inconsistent with the values in `x`.
#'
#' @param x A character vector. Values must occur in either `levels` or `na`.
#' @param levels A character vector of known levels. If not supplied, will
#' be computed from the unique values of `x`, in the order in which they
#' occur.
#' @param na A character vector of values that should become missing values.
#' @return A factor.
#' @export
#' @examples
#' # Use factors when you know the set of possible values a variable might take
#' x <- c("A", "O", "O", "AB", "A")
#' fct(x, levels = c("O", "A", "B", "AB"))
#'
#' # If you don't specify the levels, fct will create from the data
#' # in the order that they're seen
#' fct(x)
#'
#'
#' # Differences with base R -----------------------------------------------
#' # factor() silently generates NAs
#' x <- c("a", "b", "c")
#' factor(x, levels = c("a", "b"))
#' # fct() errors
#' try(fct(x, levels = c("a", "b")))
#' # Unless you explicitly supply NA:
#' fct(x, levels = c("a", "b"), na = "c")
#'
#' # factor() sorts default levels:
#' factor(c("y", "x"))
#' # fct() uses in order of appearance:
#' fct(c("y", "x"))
fct <- function(x = character(), levels = NULL, na = character()) {
if (!is.character(x)) {
cli::cli_abort("{.arg x} must be a character vector")
}
if (!is.character(na)) {
cli::cli_abort("{.arg na} must be a character vector")
}

x[x %in% na] <- NA

if (is.null(levels)) {
levels <- unique(x)
levels <- levels[!is.na(levels)]
} else if (!is.character(levels)) {
cli::cli_abort("{.arg levels} must be a character vector")
}

invalid <- setdiff(x, c(levels, NA))
if (length(invalid) > 0 ) {
cli::cli_abort(c(
"All values of {.arg x} must appear in {.arg levels} or {.arg na}",
i = "Missing level{?s}: {.str {invalid}}"
))
}

factor(x, levels = levels, exclude = NULL)
}
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ reference:

- title: Other helpers
contents:
- fct
- as_factor
- fct_count
- fct_match
Expand Down
48 changes: 48 additions & 0 deletions man/fct.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/fct_explicit_na.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions tests/testthat/_snaps/fct.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# checks input types

Code
fct(1:3)
Condition
Error in `fct()`:
! `x` must be a character vector
Code
fct("x", 1:3)
Condition
Error in `fct()`:
! `levels` must be a character vector
Code
fct("x", "y", na = 1)
Condition
Error in `fct()`:
! `na` must be a character vector

# clear error if levels are incomplete

Code
fct(c("x", "y", "z"), c("x", "y"))
Condition
Error in `fct()`:
! All values of `x` must appear in `levels` or `na`
i Missing level: "z"

42 changes: 42 additions & 0 deletions tests/testthat/test-fct.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
test_that("can create simple example", {
expect_equal(
fct(c("x", "y", "z")),
factor(c("x", "y", "z"))
)
})

test_that("orders by appearance", {
expect_equal(
fct(c("y", "x")),
factor(c("y", "x"), levels = c("y", "x"))
)
})

test_that("checks input types", {
expect_snapshot(error = TRUE, {
fct(1:3)
fct("x", 1:3)
fct("x", "y", na = 1)
})
})

test_that("clear error if levels are incomplete", {
expect_snapshot(error = TRUE,
fct(c("x", "y", "z"), c("x", "y"))
)
})

test_that("can covert values to implicit or explcit NA", {
expect_equal(
fct(c("x", "y", "z"), na = "z"),
factor(c("x", "y", NA), levels = c("x", "y"))
)
expect_equal(
fct(c("x", "y", "z"), c("x", "y"), na = "z"),
factor(c("x", "y", NA), levels = c("x", "y"))
)
expect_equal(
fct(c("x", "y", "z"), c("x", "y", NA), na = "z"),
factor(c("x", "y", NA), levels = c("x", "y", NA), exclude = NULL)
)
})

0 comments on commit 4315e76

Please sign in to comment.