change name of zip_zones to zips_zones_sample

aedobbyn · Jun 26, 2018 · c1283e1 · c1283e1
1 parent fc85afb
commit c1283e1
Show file tree

Hide file tree

Showing 9 changed files with 77 additions and 45 deletions.
diff --git a/R/data.R b/R/data.R
@@ -1,6 +1,6 @@
 #' Zips and Zones
 #'
-#' All 3-digit zips and zones. The result of running fetch_all() with \code{as_range = TRUE}.
+#' A random sample of all 3-digit zips and zones. The result of running fetch_all() with \code{as_range = TRUE} and taking a 1m row sample.
 #'
 #' @format A data frame with 3,804,494 rows and 6 variables:
 #' \describe{
@@ -11,4 +11,4 @@
 #'   \item{same_ndc}{Origin and destination in same Network Distribution Center?}
 #'   \item{specific_to_priority_mail}{Zone specific to Priority Mail?}
 #' }
-"zips_zones"
+"zips_zones_sample"
diff --git a/R/scrub_mail.R b/R/scrub_mail.R
@@ -14,7 +14,7 @@
 #'          destination_zip = "11238", type = "envelope") %>% scrub_mail()
 #' }
 #'
-#' @return A tibble with the same number of rows and same columns as the input, save that \code{delivery_day} is now \code{delivery_date} and \code{delivery_by_time}.
+#' @return A tibble with the same number of rows the input. \code{delivery_day} becomes \code{delivery_date} and \code{delivery_by_time}, from which \code{delivery_duration} in days is calculated (\code{delivery_date - shipping_date}).
 #' @export
 #'
 
@@ -40,7 +40,9 @@ scrub_mail <- function(tbl) {
 
       delivery_by_time = delivery_day %>%
         stringr::str_extract("by [A-Za-z0-9: ]+") %>%
-        stringr::str_replace_all("by ", "")
+        stringr::str_replace_all("by ", ""),
+
+      delivery_duration = delivery_date - lubridate::as_date(shipping_date)
     ) %>%
     dplyr::ungroup() %>%
     dplyr::select(-delivery_day) %>%

diff --git a/R/utils.R b/R/utils.R
@@ -23,8 +23,7 @@ zone_detail_definitions <-
     "same_ndc", "3, 5",
     "The origin and destination zips are in the same Network Distribution Center.",
     "has_five_digit_exceptions", "3",
-    "This 3 digit destination zip prefix appears at the \\
-    beginning of certain 5 digit destination zips that correspond to a different zone.",
+    "This 3 digit destination zip prefix appears at the beginning of certain 5 digit destination zips that correspond to a different zone.",
     "local", "5",
     "Is this a local zone?",
     "full_response", "5",
@@ -310,6 +309,34 @@ cap_word <- function(x) {
   x
 }
 
+get_shipping_date <- function(shipping_date,
+                              verbose = FALSE) {
+  if (shipping_date == "today") {
+    shipping_date <-
+      Sys.Date() %>%
+      as.character()
+
+    if (verbose) message(glue::glue("Using ship on date {shipping_date}."))
+  }
+  return(shipping_date)
+}
+
+
+get_shipping_time <- function(shipping_time,
+                              verbose = FALSE) {
+  if (shipping_time == "now") {
+    shipping_time <-
+      stringr::str_c(
+        lubridate::now() %>% lubridate::hour(),
+        ":",
+        lubridate::now() %>% lubridate::minute()
+      )
+
+    if (verbose) message(glue::glue("Using ship on time {shipping_time}."))
+  }
+  return(shipping_time)
+}
+
 
 get_mail <- function(origin_zip = NULL,
                      destination_zip = NULL,
@@ -361,24 +388,10 @@ get_mail <- function(origin_zip = NULL,
     stop(glue::glue("Argument {not_lgl} is not of type logical."))
   }
 
-  if (shipping_date == "today") {
-    shipping_date <-
-      Sys.Date() %>%
-      as.character()
-
-    if (verbose) message(glue::glue("Using ship on date {shipping_date}."))
-  }
-
-  if (shipping_time == "now") {
-    shipping_time <-
-      stringr::str_c(
-        lubridate::now() %>% lubridate::hour(),
-        ":",
-        lubridate::now() %>% lubridate::minute()
-      )
-
-    if (verbose) message(glue::glue("Using ship on time {shipping_time}."))
-  }
+  shipping_date <- get_shipping_date(shipping_date,
+                                     verbose = verbose)
+  shipping_time <- get_shipping_time(shipping_time,
+                                     verbose = verbose)
 
   shipping_date <-
     shipping_date %>%
@@ -563,12 +576,19 @@ fetch_mail <- function(origin_zip = NULL,
     out <- resp$result
   }
 
+  shipping_date <- get_shipping_date(shipping_date,
+                                     verbose = verbose)
+  shipping_time <- get_shipping_time(shipping_time,
+                                     verbose = verbose)
+
   out <-
     out %>%
     clean_mail(show_details = show_details) %>%
     dplyr::mutate(
       origin_zip = origin_zip,
-      dest_zip = destination_zip
+      dest_zip = destination_zip,
+      shipping_date = shipping_date,
+      shipping_time = shipping_time
     ) %>%
     dplyr::select(
       origin_zip, dest_zip,

diff --git a/README.Rmd b/README.Rmd
@@ -62,13 +62,15 @@ We'll ask for `type = "box"` and get back all the options for boxes along with t
                      type = "box"))
 ```
 
-The website should display the same results,
+The web interface should display the same results:
 
 <p align="center">
   <img src="./man/figures/bk_to_chi.jpg" alt="post_calc" width="70%">
 </p>
 
-which can be tidied with `scrub_mail`. This allows for computing on the prices and dates.
+and this is a good option if you want to display data in the way USPS does. If you want to compute on prices and dates, you can tidy the dataframe with `scrub_mail`. 
+
+This replaces `"Not available"`s with `NA`s, changes prices to numeric, splits delivery day into a date and time of day (we infer year by the current year), and computes the delivery duration in days.
 
 ```{r}
 mail %>% 
@@ -305,23 +307,23 @@ tibble::tribble(
 
 <br>
 
-#### Well, not all of it
+#### Almost all of it
 
-The `zips_zones` dataset included in this package contains a random sample of 1,000,000 rows of all the 3 digit origin-destination pairs. Load it with:
+The `zips_zones_sample` dataset included in this package contains a random sample of 1,000,000 rows of all the 3 digit origin-destination pairs. Load it with:
 
 ```{r}
-data(zips_zones)
+data(zips_zones_sample)
 ```
 
 
 It's what you'd get by running `fetch_all(show_details = TRUE)`, waiting a while, and then taking a sample.
 
 ```{r}
-zips_zones
+zips_zones_sample
 ```
 
 
-The sample is about a quarter of the total number of rows between all origin prefixes and all destination prefixes, plus the 5 digit exceptions (3,804,494 rows). See it put to use in the [vignette](https://github.com/aedobbyn/usps/blob/dev/vignettes/getting-zoned.Rmd).
+The sample is about a quarter of the total number of rows between all origin prefixes and all destination prefixes, plus the 5 digit exceptions (~4m rows). See it put to use in the [vignette](https://github.com/aedobbyn/usps/blob/dev/vignettes/getting-zoned.Rmd).
 
 <br>
 

diff --git a/data/zips_zones.rda b/data/zips_zones.rda
diff --git a/data/zips_zones_sample.rda b/data/zips_zones_sample.rda
diff --git a/man/zips_zones.Rd → man/zips_zones_sample.Rd b/man/zips_zones.Rd → man/zips_zones_sample.Rd
diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R
@@ -1,5 +1,13 @@
 testthat::context("Test utility functions")
 
+testthat::test_that("zips_zones_sample", {
+  testthat::expect_is(zips_zones_sample,
+                      "data.frame")
+
+  testthat::expect_equal(nrow(zips_zones_sample),
+                         1000000)
+})
+
 testthat::test_that("Safely getting data works", {
   testthat::expect_null(try_get_data("foo") %>%
     purrr::pluck("result"))

diff --git a/vignettes/getting-zoned.Rmd b/vignettes/getting-zoned.Rmd
@@ -24,17 +24,17 @@ library(dplyr)
 library(tidyr)
 ```
 
-The `usps::zips_zones` dataset provides a quarter of the ~4 million 3-digit origin-destination pairs. (If you want allll of them, you can run `fetch_all` and put on a pot of coffee.) 
+The `usps::zips_zones_sample` dataset provides a quarter of the ~4 million 3-digit origin-destination pairs. (If you want allll of them, you can run `fetch_all` and put on a pot of coffee.) 
 
 What if we wanted to use that to plot how zones increase as the destination gets farther from the origin?
 
-We'll need a measure of latitude and longitude for that. Luckily, data from the [`zipcode`](https://github.com/cran/zipcode) package relates every zip codes to its latitude and longitude. We can that along with  the *partial* data in `usps::zips_zones` to match up zips to latitudes and longitudes.
+We'll need a measure of latitude and longitude for that. Luckily, data from the [`zipcode`](https://github.com/cran/zipcode) package relates every zip codes to its latitude and longitude. We can that along with  the *partial* data in `usps::zips_zones_sample` to match up zips to latitudes and longitudes.
 
 Let's load in the randomly sampled `usps` package data. 
 
 ```{r}
-data(zips_zones)
-zips_zones
+data(zips_zones_sample)
+zips_zones_sample
 ```
 
 Later we'll `select` away all the other details and focus just on `origin_zip`, `dest_zip`, and `zone`. As we know, the `usps::zip_zones` data displays all origin zips as 3 digit prefixes and most destination zips as 3 digits as well.
@@ -67,8 +67,8 @@ Let's get a tibble of all possible USPS zips, both origin prefixes and destinati
 (usps_zips <-
   tibble(
     zip =
-      unique(zips_zones$origin_zip) %>% 
-      c(unique(zips_zones$dest_zip)) 
+      unique(zips_zones_sample$origin_zip) %>% 
+      c(unique(zips_zones_sample$dest_zip)) 
   ) %>% 
   distinct())
 ```
@@ -84,13 +84,13 @@ Now we can join the `zipcode` trimmed zips on our `usps_zips` to get a correspon
   select(zip_trim, latitude, longitude))
 ```
 
-Now we have a mapping between each possible 3 digit zip and its latitude and longitude. But for every row in our `zips_zones` dataset, we have two lats and two longs: one for the origin and one for the destination. 
+Now we have a mapping between each possible 3 digit zip and its latitude and longitude. But for every row in our `zips_zones_sample` dataset, we have two lats and two longs: one for the origin and one for the destination. 
 
-So we'll want to take our `zips_lat_long` mapping and use that to attach latitude and longitude to each origin and each destination in `zips_zones`.
+So we'll want to take our `zips_lat_long` mapping and use that to attach latitude and longitude to each origin and each destination in `zips_zones_sample`.
 
 ```{r zips_zones_lat_long}
 (zips_zones_lat_long <- 
-  zips_zones %>% 
+  zips_zones_sample %>% 
   select(origin_zip, dest_zip, zone) %>% 
   left_join(zips_lat_long, by = c("origin_zip" = "zip_trim")) %>% 
   rename(