From 691e8e43d4334f9f4a1fa5102f0b70e5f3bd9a0f Mon Sep 17 00:00:00 2001
From: gfinak <greg.finak@gmail.com>
Date: Thu, 5 Jul 2018 13:38:07 -0700
Subject: [PATCH] This commit and previous should address remaining ropensci
 reviewer issues.

#21 #22 #23 #24 #25 #26 #27 #28 #29 #30 #31 #32
---
 DESCRIPTION                      |   5 +-
 R/build.R                        |   6 +-
 R/processData.R                  |  27 +-
 man/DataPackageR.Rd              |   4 +-
 man/package_build.Rd             |   5 +-
 tests/testthat/test-skeleton.R   |  10 +-
 vignettes/usingDataPackageR.R    | 120 ++++---
 vignettes/usingDataPackageR.Rmd  |   5 +
 vignettes/usingDataPackageR.html | 530 +++++++++++++++++++------------
 vignettes/usingDataPackageR.md   | 474 ++++++++++++++++-----------
 10 files changed, 718 insertions(+), 468 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 8bd722d..9bd8ac5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -4,7 +4,7 @@ Title: Construct Reproducible Analytic Data Sets as R Packages
 Authors@R:
     c(person(given = "Greg Finak", role=c("aut","cre","cph"), email="gfinak@fredhutch.org"),
       person(given = "Paul Obrecht", role=c("ctb")))
-Version: 0.13.6
+Version: 0.14.0
 Description: Construct reproducible analytic data sets as R packages. 
 License: MIT + file LICENSE
 Depends: R (>= 3.5.0)
@@ -28,6 +28,7 @@ RoxygenNote: 6.0.1
 Suggests:
     testthat,
     covr,
-    data.tree
+    data.tree,
+    mtcars20
 URL: https://github.com/RGLab/DataPackageR
 BugReports: https://github.com/RGLab/DataPackageR/issues
diff --git a/R/build.R b/R/build.R
index 9ec5355..a41a903 100644
--- a/R/build.R
+++ b/R/build.R
@@ -7,6 +7,7 @@
 #' @param packageName \code{character} path to package source directory. Defaults to the current path when NULL.
 #' @param vignettes \code{logical} specify whether to build vignettes. Default FALSE.
 #' @param log log level \code{INFO,WARN,DEBUG,FATAL}
+#' @param deps \code{logical} should we pass data objects into subsequent scripts? Default TRUE
 #' @importFrom roxygen2 roxygenise roxygenize
 #' @importFrom devtools build_vignettes build parse_deps
 #' @importFrom usethis use_build_ignore use_rstudio proj_set use_directory
@@ -32,7 +33,8 @@
 #' package_build(file.path(tempdir(),pname))
 package_build <- function(packageName = NULL,
                           vignettes = FALSE,
-                          log=INFO) {
+                          log = INFO,
+                          deps = TRUE) {
   flog.threshold(log)
   flog.appender(appender.console())
   requireNamespace("rprojroot")
@@ -71,7 +73,7 @@ package_build <- function(packageName = NULL,
 
   # Return success if we've processed everything
   success <-
-    DataPackageR(arg = package_path)
+    DataPackageR(arg = package_path, deps = deps)
   ifelse(success,
     flog.info("DataPackageR succeeded"),
     flog.warn("DataPackageR failed")
diff --git a/R/processData.R b/R/processData.R
index cb1ec23..be62d84 100644
--- a/R/processData.R
+++ b/R/processData.R
@@ -108,12 +108,13 @@ NULL
 #' Meant to be called before R CMD build.
 #' @name DataPackageR
 #' @param arg \code{character} name of the package to build.
+#' @param deps \code{logical} should scripts pass data objects to each other (default=TRUE) 
 #' @return logical TRUE if succesful, FALSE, if not.
 #' @importFrom desc desc
 #' @importFrom rmarkdown render
 #' @importFrom utils getSrcref modifyList
 #' @importFrom usethis proj_set proj_get
-DataPackageR <- function(arg = NULL) {
+DataPackageR <- function(arg = NULL, deps = TRUE) {
   requireNamespace("futile.logger")
   requireNamespace("yaml")
   pkg_dir <- arg
@@ -132,7 +133,7 @@ DataPackageR <- function(arg = NULL) {
       stop("exiting", call. = FALSE)
     }
   } else {
-    logpath <-
+    logpath <- 
       normalizePath(
         file.path(pkg_dir, "inst/extdata"),
         winslash = "/"
@@ -232,20 +233,9 @@ DataPackageR <- function(arg = NULL) {
     pkg_description <- try(read.description(file = description_file),
       silent = TRUE
     )
-    if (inherits(pkg_description, "try-error")) {
-      flog.fatal("No valid DESCRIPTION file")
-      {
-        stop(
-          paste0(
-            "You need a valid package DESCRIPTION file.",
-            "Please see Writing R Extensions",
-            "(http://cran.r-project.org/doc/manuals/",
-            "r-release/R-exts.html#The-DESCRIPTION-file).\n"
-          ),
-          pkg_description
-        )
-      }
-    }
+    # The test for a valid DESCRIPTION here is no longer needed since
+    # we use proj_set().
+    
     # check that we have at least one file
     # This is caught elsewhere
 
@@ -264,8 +254,9 @@ DataPackageR <- function(arg = NULL) {
     for (i in seq_along(r_files)) {
       dataenv <- new.env(hash = TRUE, parent = .GlobalEnv)
       # assign ENVS into dataenv.
-      # provide functions in the package to read from it.
-      assign(x = "ENVS", value = ENVS, dataenv)
+      # provide functions in the package to read from it (if deps = TRUE)
+      if(deps)
+        assign(x = "ENVS", value = ENVS, dataenv)
       flog.info(paste0(
         "Processing ", i, " of ",
         length(r_files), ": ", r_files[i],
diff --git a/man/DataPackageR.Rd b/man/DataPackageR.Rd
index a865f5a..746246c 100644
--- a/man/DataPackageR.Rd
+++ b/man/DataPackageR.Rd
@@ -4,10 +4,12 @@
 \alias{DataPackageR}
 \title{Process data generation code in 'data-raw'}
 \usage{
-DataPackageR(arg = NULL)
+DataPackageR(arg = NULL, deps = TRUE)
 }
 \arguments{
 \item{arg}{\code{character} name of the package to build.}
+
+\item{deps}{\code{logical} should scripts pass data objects to each other (default=TRUE)}
 }
 \value{
 logical TRUE if succesful, FALSE, if not.
diff --git a/man/package_build.Rd b/man/package_build.Rd
index 8d89509..52274cd 100644
--- a/man/package_build.Rd
+++ b/man/package_build.Rd
@@ -4,7 +4,8 @@
 \alias{package_build}
 \title{Preprocess, document and build a data package}
 \usage{
-package_build(packageName = NULL, vignettes = FALSE, log = INFO)
+package_build(packageName = NULL, vignettes = FALSE, log = INFO,
+  deps = TRUE)
 }
 \arguments{
 \item{packageName}{\code{character} path to package source directory. Defaults to the current path when NULL.}
@@ -12,6 +13,8 @@ package_build(packageName = NULL, vignettes = FALSE, log = INFO)
 \item{vignettes}{\code{logical} specify whether to build vignettes. Default FALSE.}
 
 \item{log}{log level \code{INFO,WARN,DEBUG,FATAL}}
+
+\item{deps}{\code{logical} should we pass data objects into subsequent scripts? Default TRUE}
 }
 \description{
 Combines the preprocessing, documentation, and build steps into one.
diff --git a/tests/testthat/test-skeleton.R b/tests/testthat/test-skeleton.R
index 8615748..0bdf4a6 100644
--- a/tests/testthat/test-skeleton.R
+++ b/tests/testthat/test-skeleton.R
@@ -37,7 +37,8 @@ test_that("package can be built from different locations", {
         file.path(tmp, "subsetCars"))),
     "subsetCars_1.0.tar.gz")
   
-  old <- setwd(file.path(tmp, "subsetCars"))
+  old <- 
+    setwd(file.path(tmp, "subsetCars"))
   on.exit(setwd(old))
   expect_equal(basename(package_build(".")), "subsetCars_1.0.tar.gz")
   expect_error(package_build("subsetCars"))
@@ -474,6 +475,9 @@ test_that("package built in different edge cases", {
          force = TRUE,
          recursive = TRUE)
   package.skeleton("foo", path = tmp)
+  suppressWarnings(expect_error(
+    DataPackageR:::DataPackageR(
+      file.path(tmp, "foo"))))
   dir.create(file.path(tmp, "foo", "data-raw"))
   suppressWarnings(expect_error(
     DataPackageR:::DataPackageR(
@@ -492,8 +496,8 @@ test_that("package built in different edge cases", {
          recursive = TRUE)
   
   
-  package.skeleton("foo", path = tmp)
-  expect_error(yml_find(file.path(tmp, "foo")))
+  package.skeleton("foo", path = tempdir(),force=TRUE)
+  expect_error(yml_find(file.path(tempdir(), "foo")))
   dir.create(file.path(tmp, "foo", "data-raw"))
   unlink(file.path(tmp, "foo", "DESCRIPTION"))
   yml <- DataPackageR:::construct_yml_config("foo.Rmd")
diff --git a/vignettes/usingDataPackageR.R b/vignettes/usingDataPackageR.R
index ba57f46..8efd4f1 100644
--- a/vignettes/usingDataPackageR.R
+++ b/vignettes/usingDataPackageR.R
@@ -5,58 +5,104 @@ knitr::opts_chunk$set(
   eval = TRUE
 )
 
-## ------------------------------------------------------------------------
-library(data.tree)
+## ----minimal_example, results='hide'-------------------------------------
 library(DataPackageR)
-tmp = normalizePath(tempdir())
-processing_code = system.file("extdata","tests","subsetCars.Rmd",package="DataPackageR")
-print(processing_code)
-setwd(tmp)
-DataPackageR::datapackage.skeleton("Test", 
-                                   force=TRUE, 
-                                   code_files = processing_code, 
-                                   r_object_names = "cars_over_20") # cars_over_20 is an R object 
-                                                                    # created in the Rmd file.
+
+# Let's reproducibly package up
+# the cars in the mtcars dataset
+# with speed > 20.
+# Our dataset will be called cars_over_20.
+
+# Get the code file that turns the raw data
+# to our packaged and processed analysis-ready dataset.
+processing_code <-
+  system.file("extdata", 
+              "tests",
+              "subsetCars.Rmd",
+              package = "DataPackageR")
+
+# Create the package framework.
+DataPackageR::datapackage_skeleton(
+  "mtcars20",
+  force = TRUE,
+  code_files = processing_code,
+  r_object_names = "cars_over_20",
+  path = tempdir()
+  ) 
 
 ## ----dirstructure,echo=FALSE---------------------------------------------
-df = data.frame(pathString=file.path("Test",(list.files(tmp,recursive=TRUE))))
+library(data.tree)
+df = data.frame(pathString = file.path(
+  "mtcars20",
+  list.files(
+  file.path(tempdir(), "mtcars20"),
+  include.dirs = TRUE,
+  recursive = TRUE
+  )
+  ))
 as.Node(df)
 
 ## ---- echo=FALSE---------------------------------------------------------
-library(yaml)
-setwd(tmp)
-cat(as.yaml(yaml.load_file("Test/datapackager.yml")))
+cat(yaml::as.yaml(yaml::yaml.load_file(file.path(tempdir(),"mtcars20","datapackager.yml"))))
 
-## ------------------------------------------------------------------------
-# Within the package directory
-setwd(tmp)
-DataPackageR:::package_build("Test") 
+## ----eval=TRUE-----------------------------------------------------------
+# Run the preprocessing code to build cars_over_20
+# and reproducibly enclose it in a package.
+DataPackageR:::package_build(file.path(tempdir(),"mtcars20"))
 
 ## ---- echo=FALSE---------------------------------------------------------
-library(yaml)
-setwd(tmp)
-df = data.frame(pathString=file.path("Test",(list.files("Test",recursive=TRUE))))
-as.Node(df)
+df = data.frame(pathString = file.path(
+  "mtcars20",
+  list.files(
+  file.path(tempdir(), "mtcars20"),
+  include.dirs = TRUE,
+  recursive = TRUE
+  )
+  ))
+  as.Node(df)
 
-## ---- echo=FALSE---------------------------------------------------------
-setwd(tmp)
-cat(readLines("Test/DATADIGEST"),sep="\n")
+## ------------------------------------------------------------------------
+# Let's use the package we just created.
+install.packages(file.path(tempdir(),"mtcars20_1.0.tar.gz"), type = "source", repos = NULL)
+library(mtcars20)
+data("cars_over_20") # load the data
+cars_over_20  # Now we can use it.
+?cars_over_20 # See the documentation you wrote in data-raw/documentation.R.
 
-## ----echo=FALSE----------------------------------------------------------
-setwd(tmp)
-cat(readLines("Test/DESCRIPTION"),sep="\n")
+vignettes = vignette(package="mtcars20")
+vignettes$results
 
-## ----construct_config----------------------------------------------------
-#assume I have file1.Rmd and file2.R located in /data-raw, and these create 'object1' and 'object2' respectively.
+## ------------------------------------------------------------------------
+# We can easily check the version of the data
+DataPackageR::data_version("mtcars20")
 
-config = construct_yml_config(code = c("file1.Rmd","file2.R"), data = c("object1","object2"))
-cat(as.yaml(config))
+# You can use an assert to check the data version in  reports and
+# analyses that use the packaged data.
+assert_data_version(data_package_name = "mtcars20",
+                    version_string = "0.1.0",
+                    acceptable = "equal")  #If this fails, execution stops
+                                           #and provides an informative error.
 
-## ------------------------------------------------------------------------
-path_to_package = tempdir() #pretend this is the root of our package
-yml_write(config,path = path_to_package)
+## ----construct_config, echo=1:2------------------------------------------
+# assume I have file1.Rmd and file2.R located in /data-raw, 
+# and these create 'object1' and 'object2' respectively.
+
+config = construct_yml_config(code = c("file1.Rmd", "file2.R"),
+                              data = c("object1", "object2"))
+cat(yaml::as.yaml(config))
 
 ## ------------------------------------------------------------------------
+path_to_package = tempdir() #e.g., if tempdir() was the root of our package.
+yml_write(config, path = path_to_package)
+
+## ----echo=1:2------------------------------------------------------------
 config = yml_disable_compile(config,filenames = "file2.R")
-cat(as.yaml(config))
+yml_write(config, path = path_to_package) # write modified yml to the package.
+cat(yaml::as.yaml(config))
+
+## ---- echo=FALSE---------------------------------------------------------
+cat(readLines(file.path(tempdir(),"mtcars20","DATADIGEST")),sep="\n")
+
+## ----echo=FALSE----------------------------------------------------------
+cat(readLines(file.path(tempdir(),"mtcars20","DESCRIPTION")),sep="\n")
 
diff --git a/vignettes/usingDataPackageR.Rmd b/vignettes/usingDataPackageR.Rmd
index cf313d5..c810e5c 100644
--- a/vignettes/usingDataPackageR.Rmd
+++ b/vignettes/usingDataPackageR.Rmd
@@ -290,6 +290,11 @@ A script (e.g., `script2.Rmd`) running after `script1.Rmd` can access a stored d
 
 `DataPackageR::datapackager_object_read("script1_dataset")`. 
 
+Passing of data objects amongst scripts can be turned off via:
+
+`package_build(deps = FALSE)`
+
+
 # Additional Details
 
 We provide some additional details for the interested.
diff --git a/vignettes/usingDataPackageR.html b/vignettes/usingDataPackageR.html
index da7e101..7e9cdcb 100644
--- a/vignettes/usingDataPackageR.html
+++ b/vignettes/usingDataPackageR.html
@@ -12,7 +12,7 @@
 
 <meta name="author" content="Greg Finak gfinak@fredhutch.org" />
 
-<meta name="date" content="2018-07-02" />
+<meta name="date" content="2018-07-05" />
 
 <title>Using DataPackageR</title>
 
@@ -280,97 +280,108 @@
 
 <h1 class="title toc-ignore">Using DataPackageR</h1>
 <h4 class="author"><em>Greg Finak <a href="mailto:gfinak@fredhutch.org">gfinak@fredhutch.org</a></em></h4>
-<h4 class="date"><em>2018-07-02</em></h4>
+<h4 class="date"><em>2018-07-05</em></h4>
 
 
-
-<div id="datapackager" class="section level1">
-<h1>DataPackageR</h1>
-<p>A package to reproducibly process raw data into packaged, analysis-ready data sets.</p>
-<p><a href="https://travis-ci.org/RGLab/DataPackageR"><img src="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI4MSIgaGVpZ2h0PSIyMCI+PGxpbmVhckdyYWRpZW50IGlkPSJhIiB4Mj0iMCIgeTI9IjEwMCUiPjxzdG9wIG9mZnNldD0iMCIgc3RvcC1jb2xvcj0iI2JiYiIgc3RvcC1vcGFjaXR5PSIuMSIvPjxzdG9wIG9mZnNldD0iMSIgc3RvcC1vcGFjaXR5PSIuMSIvPjwvbGluZWFyR3JhZGllbnQ+PHJlY3Qgcng9IjMiIHdpZHRoPSI4MSIgaGVpZ2h0PSIyMCIgZmlsbD0iIzU1NSIvPjxyZWN0IHJ4PSIzIiB4PSIzNyIgd2lkdGg9IjQ0IiBoZWlnaHQ9IjIwIiBmaWxsPSIjZTA1ZDQ0Ii8+PHBhdGggZmlsbD0iI2UwNWQ0NCIgZD0iTTM3IDBoNHYyMGgtNHoiLz48cmVjdCByeD0iMyIgd2lkdGg9IjgxIiBoZWlnaHQ9IjIwIiBmaWxsPSJ1cmwoI2EpIi8+PGcgZmlsbD0iI2ZmZiIgdGV4dC1hbmNob3I9Im1pZGRsZSIgZm9udC1mYW1pbHk9IkRlamFWdSBTYW5zLFZlcmRhbmEsR2VuZXZhLHNhbnMtc2VyaWYiIGZvbnQtc2l6ZT0iMTEiPjx0ZXh0IHg9IjE5LjUiIHk9IjE1IiBmaWxsPSIjMDEwMTAxIiBmaWxsLW9wYWNpdHk9Ii4zIj5idWlsZDwvdGV4dD48dGV4dCB4PSIxOS41IiB5PSIxNCI+YnVpbGQ8L3RleHQ+PHRleHQgeD0iNTgiIHk9IjE1IiBmaWxsPSIjMDEwMTAxIiBmaWxsLW9wYWNpdHk9Ii4zIj5mYWlsaW5nPC90ZXh0Pjx0ZXh0IHg9IjU4IiB5PSIxNCI+ZmFpbGluZzwvdGV4dD48L2c+PC9zdmc+" alt="Build Status" /></a> <a href="https://codecov.io/github/RGLab/DataPackageR?branch=master"><img src="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI5NiIgaGVpZ2h0PSIyMCI+CiAgICA8bGluZWFyR3JhZGllbnQgaWQ9ImIiIHgyPSIwIiB5Mj0iMTAwJSI+CiAgICAgICAgPHN0b3Agb2Zmc2V0PSIwIiBzdG9wLWNvbG9yPSIjYmJiIiBzdG9wLW9wYWNpdHk9Ii4xIiAvPgogICAgICAgIDxzdG9wIG9mZnNldD0iMSIgc3RvcC1vcGFjaXR5PSIuMSIgLz4KICAgIDwvbGluZWFyR3JhZGllbnQ+CiAgICA8bWFzayBpZD0iYSI+CiAgICAgICAgPHJlY3Qgd2lkdGg9Ijk2IiBoZWlnaHQ9IjIwIiByeD0iMyIgZmlsbD0iI2ZmZiIgLz4KICAgIDwvbWFzaz4KICAgIDxnIG1hc2s9InVybCgjYSkiPgogICAgICAgIDxwYXRoIGZpbGw9IiM1NTUiIGQ9Ik0wIDBoNjB2MjBIMHoiIC8+CiAgICAgICAgPHBhdGggZmlsbD0iIzVmY2MwYiIgZD0iTTYwIDBoMzZ2MjBINjB6IiAvPgogICAgICAgIDxwYXRoIGZpbGw9InVybCgjYikiIGQ9Ik0wIDBoOTZ2MjBIMHoiIC8+CiAgICA8L2c+CiAgICA8ZyBmaWxsPSIjZmZmIiB0ZXh0LWFuY2hvcj0ibWlkZGxlIiBmb250LWZhbWlseT0iRGVqYVZ1IFNhbnMsVmVyZGFuYSxHZW5ldmEsc2Fucy1zZXJpZiIgZm9udC1zaXplPSIxMSI+CiAgICAgICAgPHRleHQgeD0iMzAiIHk9IjE1IiBmaWxsPSIjMDEwMTAxIiBmaWxsLW9wYWNpdHk9Ii4zIj5jb2RlY292PC90ZXh0PgogICAgICAgIDx0ZXh0IHg9IjMwIiB5PSIxNCI+Y29kZWNvdjwvdGV4dD4KICAgICAgICA8dGV4dCB4PSI3NyIgeT0iMTUiIGZpbGw9IiMwMTAxMDEiIGZpbGwtb3BhY2l0eT0iLjMiPjk4JTwvdGV4dD4KICAgICAgICA8dGV4dCB4PSI3NyIgeT0iMTQiPjk4JTwvdGV4dD4KICAgIDwvZz4KPC9zdmc+" alt="Coverage status" /></a> <a href="https://ci.appveyor.com/project/RGLab/DataPackageR"><img src="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI5NyIgaGVpZ2h0PSIyMCIgc3R5bGU9InNoYXBlLXJlbmRlcmluZzpnZW9tZXRyaWNQcmVjaXNpb247IGltYWdlLXJlbmRlcmluZzpvcHRpbWl6ZVF1YWxpdHk7IGZpbGwtcnVsZTpldmVub2RkOyBjbGlwLXJ1bGU6ZXZlbm9kZCI+DQogIDxsaW5lYXJHcmFkaWVudCBpZD0iYiIgeDI9IjAiIHkyPSIxMDAlIj4NCiAgICA8c3RvcCBvZmZzZXQ9IjAiIHN0b3AtY29sb3I9IiNiYmIiIHN0b3Atb3BhY2l0eT0iLjEiLz4NCiAgICA8c3RvcCBvZmZzZXQ9IjEiIHN0b3Atb3BhY2l0eT0iLjEiLz4NCiAgPC9saW5lYXJHcmFkaWVudD4NCiAgPG1hc2sgaWQ9ImEiPg0KICAgIDxyZWN0IHdpZHRoPSI5NyIgaGVpZ2h0PSIyMCIgcng9IjMiIGZpbGw9IiNmZmYiLz4NCiAgPC9tYXNrPg0KICA8ZyBtYXNrPSJ1cmwoI2EpIj4NCiAgICA8cGF0aCBmaWxsPSIjNTU1IiBkPSJNMCAwaDUzdjIwSDB6Ii8+DQogICAgPHBhdGggZmlsbD0iI2UwNWQ0NCIgZD0iTTUzIDBoNzV2MjBINTN6Ii8+DQogICAgPHBhdGggZmlsbD0idXJsKCNiKSIgZD0iTTAgMGg5N3YyMEgweiIvPg0KICA8L2c+DQogIDxnIHRyYW5zZm9ybT0ibWF0cml4KDAuMDQ1LDAsMCwwLjA0NSwwLDEuMDIyNzI3MikiPg0KICAgIDxwYXRoIGZpbGw9IiNjY2MiIGQ9Ik0yNDIgNDhjODYsMCAxNTUsNjkgMTU1LDE1NCAwLDg2IC02OSwxNTUgLTE1NSwxNTUgLTg1LDAgLTE1NCwtNjkgLTE1NCwtMTU1IDAsLTg1IDY5LC0xNTQgMTU0LC0xNTR6bTM4IDE4NGMtMTcsMjIgLTQ4LDI2IC02OSw5IC0yMSwtMTYgLTI0LC00NyAtNywtNjkgMTgsLTIxIDQ5LC0yNSA3MCwtOSAyMSwxNyAyNCw0OCA2LDY5em0tODIgMTAxbDU5IC01N2MtMjIsNSAtNDUsMSAtNjMsLTE0IC0yMSwtMTYgLTMwLC00MyAtMjcsLTY4bC01MyA1OGMwLDAgLTcsLTEzIC05LC0zN2w5MyAtNzNjMjgsLTIwIDY2LC0yMSA5MywwIDMwLDI0IDM2LDY4IDE0LDEwMWwtNjggOTdjLTEwLDAgLTMwLC0zIC0zOSwtN3oiLz4NCiAgPC9nPg0KICA8ZyBmaWxsPSIjZmZmIiBmb250LWZhbWlseT0iRGVqYVZ1IFNhbnMsVmVyZGFuYSxHZW5ldmEsc2Fucy1zZXJpZiIgZm9udC1zaXplPSIxMSI+DQoNCiAgICA8dGV4dCB4PSIyMiIgeT0iMTUiIGZpbGw9IiMwMTAxMDEiIGZpbGwtb3BhY2l0eT0iLjMiPmJ1aWxkPC90ZXh0Pg0KICAgIDx0ZXh0IHg9IjIyIiB5PSIxNCI+YnVpbGQ8L3RleHQ+DQoNCiAgICA8dGV4dCB4PSI1OCIgeT0iMTUiIGZpbGw9IiMwMTAxMDEiIGZpbGwtb3BhY2l0eT0iLjMiPmZhaWxpbmc8L3RleHQ+DQogICAgPHRleHQgeD0iNTgiIHk9IjE0Ij5mYWlsaW5nPC90ZXh0Pg0KICA8L2c+DQo8L3N2Zz4=" alt="AppVeyor build status" /></a> <a href="https://doi.org/10.5281/zenodo.1292095"><img src="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciCiAgICAgd2lkdGg9IjE4NiIgaGVpZ2h0PSIyMCI+CiAgICAgICAgPGxpbmVhckdyYWRpZW50IGlkPSJiIiB4Mj0iMCIgeTI9IjEwMCUiPgogICAgICAgICAgICA8c3RvcCBvZmZzZXQ9IjAiIHN0b3AtY29sb3I9IiNiYmIiIHN0b3Atb3BhY2l0eT0iLjEiLz4KICAgICAgICAgICAgPHN0b3Agb2Zmc2V0PSIxIiBzdG9wLW9wYWNpdHk9Ii4xIi8+CiAgICAgICAgPC9saW5lYXJHcmFkaWVudD4KICAgICAgICA8bWFzayBpZD0iYSIgd2lkdGg9IjE4NiIgaGVpZ2h0PSIyMCI+CiAgICAgICAgICAgIDxyZWN0IHdpZHRoPSIxODYiIGhlaWdodD0iMjAiIHJ4PSIzIgogICAgICAgICAgICBmaWxsPSIjZmZmIi8+CiAgICAgICAgPC9tYXNrPgogICAgICAgIDxnIG1hc2s9InVybCgjYSkiPgogICAgICAgICAgICA8cGF0aCBmaWxsPSIjNTU1IiBkPSJNMCAwaDMxdjIwSDB6IiAvPgogICAgICAgICAgICA8cGF0aCBmaWxsPSIjMDA3ZWM2IgogICAgICAgICAgICBkPSJNMzEgMGgxNTV2MjBIMzF6IgogICAgICAgICAgICAvPgogICAgICAgICAgICA8cGF0aCBmaWxsPSJ1cmwoI2IpIiBkPSJNMCAwaDE4NnYyMEgweiIgLz4KICAgICAgICA8L2c+CiAgICAgICAgPGcgZmlsbD0iI2ZmZiIgdGV4dC1hbmNob3I9Im1pZGRsZSIgZm9udC1mYW1pbHk9IkRlamFWdSBTYW5zLAogICAgICAgIFZlcmRhbmEsR2VuZXZhLHNhbnMtc2VyaWYiIGZvbnQtc2l6ZT0iMTEiPgogICAgICAgICAgICA8dGV4dCB4PSIxNiIgeT0iMTUiIGZpbGw9IiMwMTAxMDEiCiAgICAgICAgICAgIGZpbGwtb3BhY2l0eT0iLjMiPgogICAgICAgICAgICAgICAgRE9JCiAgICAgICAgICAgIDwvdGV4dD4KICAgICAgICAgICAgPHRleHQgeD0iMTYiIHk9IjE0Ij4KICAgICAgICAgICAgICAgIERPSQogICAgICAgICAgICA8L3RleHQ+CiAgICAgICAgICAgIDx0ZXh0IHg9IjEwOCIKICAgICAgICAgICAgeT0iMTUiIGZpbGw9IiMwMTAxMDEiIGZpbGwtb3BhY2l0eT0iLjMiPgogICAgICAgICAgICAgICAgMTAuNTI4MS96ZW5vZG8uMTI5MjMxMgogICAgICAgICAgICA8L3RleHQ+CiAgICAgICAgICAgIDx0ZXh0IHg9IjEwOCIgeT0iMTQiPgogICAgICAgICAgICAgICAgMTAuNTI4MS96ZW5vZG8uMTI5MjMxMgogICAgICAgICAgICA8L3RleHQ+CiAgICAgICAgPC9nPgogICAgPC9zdmc+" alt="DOI" /></a></p>
-<div id="code-of-conduct" class="section level2">
-<h2>Code of conduct</h2>
-<p>Please note that this project is released with a <a href="CODE_OF_CONDUCT.md">Contributor Code of Conduct</a>. By participating in this project you agree to abide by its terms.</p>
-</div>
-<div id="preprint-and-publication." class="section level2">
-<h2>Preprint and publication.</h2>
-<p>The publication describing the package is now available at <a href="https://gatesopenresearch.org/articles/2-31/v1">Gates Open Research</a>.</p>
-<p>The preprint is on <a href="https://doi.org/10.1101/342907">biorxiv</a>.</p>
-</div>
-<div id="goals" class="section level2">
-<h2>Goals</h2>
-<p>You have raw data that needs to be tidied and otherwise processed into a standardized analytic data set (a data set that is ready for analysis). You want to do the processing using best practices for reproducible research.</p>
-<div id="the-current-state-of-affairs" class="section level3">
-<h3>The current state of affairs</h3>
-<p>Normally, you’ll write some code that does the tidying and outputs a tidy data set. If you want to distribute your data set, you can put it in an R package. The preferred mechanism is to place your data tidying code in <code>data-raw</code> in the package source tree and use the <code>devtools</code> package (specifically <code>devtools::use_data</code>) to save the data into the <code>data</code> directory. The build process will include your data set in the final package. You’ll also have to remember to document the data set in <code>roxygen</code>, and write a vignette showing how to use the data. For version control and easy distribution you might post the package on github.</p>
-</div>
-<div id="scaling-up" class="section level3">
-<h3>Scaling up</h3>
-<p>The process outlined works well for smaller data sets. It can be a hassle if you have complex data that change frequently (as is often the case in biology, where data trickle in from collaborators and follow-up experiments), or more generally if you have large data sets where raw data can’t be distributed as part of the package source due to size restrictions (e.g. FASTQ files for sequencing, FCS files for flow cytometry, or other “omics” data).</p>
-</div>
-<div id="datapackager-1" class="section level3">
-<h3>DataPackageR</h3>
-<p>The <code>DataPackageR</code> package simplifies bundling of code, data and documentation into a single R package that can be versioned and distributed. The <code>datapackage.skeleton()</code> API lets you point <code>DataPackageR</code> at your data processing code (in the form of Rmd and / or R files). These are expected to produce <code>data objects</code> to be stored in the final package. The names of these are also passed to <code>datapackage.skeleton()</code>. This produces the necessary package structure, and populations a <code>datapackager.yml</code> configuration file used by the build process.</p>
-<p>The <code>package_build()</code> API runs the processing code specified in the <code>.yml</code> files and produces html reports of the processing as <strong>package vignettes</strong>. It also builds boilerplate <code>roxygen</code> documentation of the R objects specified in the <code>.yml</code>, computes checksums of stored R objects and version tags the entire data set collection.</p>
-<p>If raw data changes, the user can rebuild the data sets in the R package with subsequent calls to <code>package_build()</code> which will re-run the processing, compare the cheksums of new R objects against those currently stored in the package. Any changes force an increment of the <code>Dataversion</code> string in the package DESCRIPTION file. When the package is installed, data sets can be accessed via the standard <code>data()</code> API, package vignettes describing the data processing can be accessed via <code>vignette()</code>, documentation via <code>?</code>, and the data version via <code>dataVersion(packageName)</code>.</p>
-</div>
-</div>
+<div id="TOC">
+<ul>
+<li><a href="#purpose">Purpose</a></li>
+<li><a href="#set-up-a-new-data-package.">Set up a new data package.</a><ul>
+<li><a href="#whats-in-the-package-skeleton-structure">What’s in the package skeleton structure?</a></li>
+<li><a href="#a-few-words-abou-the-yaml-config-file">A few words abou the YAML config file</a></li>
+<li><a href="#where-do-i-put-raw-data">Where do I put raw data?</a></li>
+</ul></li>
+<li><a href="#build-the-data-package.">Build the data package.</a><ul>
+<li><a href="#a-log-of-the-build-process">A log of the build process</a></li>
+<li><a href="#a-note-about-the-package-source-directory-after-building.">A note about the package source directory after building.</a></li>
+</ul></li>
+<li><a href="#installing-and-using-the-new-data-package">Installing and using the new data package</a><ul>
+<li><a href="#accessing-vignettes-data-sets-and-data-set-documentation.">Accessing vignettes, data sets, and data set documentation.</a></li>
+<li><a href="#using-the-dataversion">Using the DataVersion</a></li>
+</ul></li>
+<li><a href="#next-steps">Next steps</a></li>
+<li><a href="#partial-builds-and-migrating-old-data-packages.">Partial builds and migrating old data packages.</a><ul>
+<li><a href="#partial-builds">Partial builds</a></li>
+</ul></li>
+<li><a href="#multi-script-pipelines.">Multi-script pipelines.</a><ul>
+<li><a href="#file-system-artifacts">File system artifacts</a></li>
+<li><a href="#passing-data-objects-to-subsequent-scripts.">Passing data objects to subsequent scripts.</a></li>
+</ul></li>
+<li><a href="#additional-details">Additional Details</a><ul>
+<li><a href="#fingerprints-of-stored-data-objects">Fingerprints of stored data objects</a></li>
+<li><a href="#why-not-use-r-cmd-build">Why not use R CMD build?</a></li>
+</ul></li>
+</ul>
 </div>
-<div id="installation" class="section level1">
-<h1>Installation</h1>
-<p>The usual package installation mechanism works:</p>
-<pre><code>library(devtools)
-devtools::install_github(&quot;RGLab/DataPackageR&quot;, auth_token=NULL)</code></pre>
+
+<div id="purpose" class="section level2">
+<h2>Purpose</h2>
+<p>This vignette demonstrates how to use DataPackageR to build a datapackage from the <code>mtcars</code> data set.</p>
 </div>
-<div id="usage" class="section level1">
-<h1>Usage</h1>
-<p>Set up a new data package.</p>
-<p>We’ll set up a new data package that processes the <code>cars</code> data by subsetting it to include only measurements of stopping distances of cars at speeds greater than 20 mph. It is processed using an Rmd file located in <code>inst/extdata/tests/subsetCars.Rmd</code> that produces a new object called <code>cars_over_20</code>. The package will be called <code>Test</code>. The work will be done in the system <code>/tmp</code> directory.</p>
-<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb2-1" data-line-number="1"><span class="kw">library</span>(data.tree)</a>
-<a class="sourceLine" id="cb2-2" data-line-number="2"><span class="kw">library</span>(DataPackageR)</a>
-<a class="sourceLine" id="cb2-3" data-line-number="3">tmp =<span class="st"> </span><span class="kw">normalizePath</span>(<span class="kw">tempdir</span>())</a>
-<a class="sourceLine" id="cb2-4" data-line-number="4">processing_code =<span class="st"> </span><span class="kw">system.file</span>(<span class="st">&quot;extdata&quot;</span>,<span class="st">&quot;tests&quot;</span>,<span class="st">&quot;subsetCars.Rmd&quot;</span>,<span class="dt">package=</span><span class="st">&quot;DataPackageR&quot;</span>)</a>
-<a class="sourceLine" id="cb2-5" data-line-number="5"><span class="kw">print</span>(processing_code)</a>
-<a class="sourceLine" id="cb2-6" data-line-number="6">[<span class="dv">1</span>] <span class="st">&quot;/Users/gfinak/Documents/Projects/DataPackageR/inst/extdata/tests/subsetCars.Rmd&quot;</span></a>
-<a class="sourceLine" id="cb2-7" data-line-number="7"><span class="kw">setwd</span>(tmp)</a>
-<a class="sourceLine" id="cb2-8" data-line-number="8">DataPackageR<span class="op">::</span><span class="kw">datapackage.skeleton</span>(<span class="st">&quot;Test&quot;</span>, </a>
-<a class="sourceLine" id="cb2-9" data-line-number="9">                                   <span class="dt">force=</span><span class="ot">TRUE</span>, </a>
-<a class="sourceLine" id="cb2-10" data-line-number="10">                                   <span class="dt">code_files =</span> processing_code, </a>
-<a class="sourceLine" id="cb2-11" data-line-number="11">                                   <span class="dt">r_object_names =</span> <span class="st">&quot;cars_over_20&quot;</span>) <span class="co"># cars_over_20 is an R object </span></a>
-<a class="sourceLine" id="cb2-12" data-line-number="12">Warning <span class="cf">in</span> DataPackageR<span class="op">::</span><span class="kw">datapackage.skeleton</span>(<span class="st">&quot;Test&quot;</span>, <span class="dt">force =</span> <span class="ot">TRUE</span>,</a>
-<a class="sourceLine" id="cb2-13" data-line-number="13"><span class="dt">code_files =</span> processing_code, <span class="op">:</span><span class="st"> </span>Please use <span class="kw">datapackage_skeleton</span>() instead</a>
-<a class="sourceLine" id="cb2-14" data-line-number="14">of <span class="kw">datapackage.skeleton</span>()</a>
-<a class="sourceLine" id="cb2-15" data-line-number="15">Creating directories ...</a>
-<a class="sourceLine" id="cb2-16" data-line-number="16">Creating DESCRIPTION ...</a>
-<a class="sourceLine" id="cb2-17" data-line-number="17">Creating NAMESPACE ...</a>
-<a class="sourceLine" id="cb2-18" data-line-number="18">Creating Read<span class="op">-</span>and<span class="op">-</span>delete<span class="op">-</span>me ...</a>
-<a class="sourceLine" id="cb2-19" data-line-number="19">Saving functions and data ...</a>
-<a class="sourceLine" id="cb2-20" data-line-number="20">Making help files ...</a>
-<a class="sourceLine" id="cb2-21" data-line-number="21">Done.</a>
-<a class="sourceLine" id="cb2-22" data-line-number="22">Further steps are described <span class="cf">in</span> <span class="st">'./Test/Read-and-delete-me'</span>.</a>
-<a class="sourceLine" id="cb2-23" data-line-number="23">Adding DataVersion string to DESCRIPTION</a>
-<a class="sourceLine" id="cb2-24" data-line-number="24">Creating data and data<span class="op">-</span>raw directories</a>
-<a class="sourceLine" id="cb2-25" data-line-number="25">configuring yaml file</a>
-<a class="sourceLine" id="cb2-26" data-line-number="26">                                                                    <span class="co"># created in the Rmd file.</span></a></code></pre></div>
-<div id="package-skeleton-structure" class="section level3">
-<h3>Package skeleton structure</h3>
-<p>This has created a directory, “Test” with the skeleton of a data package.</p>
-<p>The <code>DESCRIPTION</code> file should be filled out to describe your package. It contains a new <code>DataVersion</code> string, and the revision is automatically incremented if the packaged data changes.</p>
-<p><code>Read-and-delete-me</code> has some helpful instructions on how to proceed.</p>
-<p>The <code>data-raw</code> directory is where the data cleaning code (<code>Rmd</code>) files reside. The contents of this directory are:</p>
-<pre><code>                   levelName
-1 Test                      
-2  °--Test                  
-3      ¦--DESCRIPTION       
-4      ¦--Read-and-delete-me
-5      ¦--data-raw          
-6      ¦   °--subsetCars.Rmd
-7      °--datapackager.yml  </code></pre>
-<p><code>datapackager.yml</code> can be edited as necessary to include additional processing scripts (which should be placed in <code>data-raw</code>), and raw data should be located under under <code>/inst/extdata</code>. It should be copied into that path and the data munging scripts edited to read from there.</p>
+<div id="set-up-a-new-data-package." class="section level2">
+<h2>Set up a new data package.</h2>
+<p>We’ll set up a new data package based on <code>mtcars</code> example in the <a href="https://github.com/RGLab/DataPackageR/blob/master/README.md">README</a>. The <code>datapackage_skeleton()</code> API is used to set up a new package. The user needs to provide:</p>
+<ul>
+<li>R or Rmd code files that do data processing.</li>
+<li>A list of R object names created by those code files.</li>
+</ul>
+<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb1-1" data-line-number="1"><span class="kw">library</span>(DataPackageR)</a>
+<a class="sourceLine" id="cb1-2" data-line-number="2"></a>
+<a class="sourceLine" id="cb1-3" data-line-number="3"><span class="co"># Let's reproducibly package up</span></a>
+<a class="sourceLine" id="cb1-4" data-line-number="4"><span class="co"># the cars in the mtcars dataset</span></a>
+<a class="sourceLine" id="cb1-5" data-line-number="5"><span class="co"># with speed &gt; 20.</span></a>
+<a class="sourceLine" id="cb1-6" data-line-number="6"><span class="co"># Our dataset will be called cars_over_20.</span></a>
+<a class="sourceLine" id="cb1-7" data-line-number="7"></a>
+<a class="sourceLine" id="cb1-8" data-line-number="8"><span class="co"># Get the code file that turns the raw data</span></a>
+<a class="sourceLine" id="cb1-9" data-line-number="9"><span class="co"># to our packaged and processed analysis-ready dataset.</span></a>
+<a class="sourceLine" id="cb1-10" data-line-number="10">processing_code &lt;-</a>
+<a class="sourceLine" id="cb1-11" data-line-number="11"><span class="st">  </span><span class="kw">system.file</span>(<span class="st">&quot;extdata&quot;</span>, </a>
+<a class="sourceLine" id="cb1-12" data-line-number="12">              <span class="st">&quot;tests&quot;</span>,</a>
+<a class="sourceLine" id="cb1-13" data-line-number="13">              <span class="st">&quot;subsetCars.Rmd&quot;</span>,</a>
+<a class="sourceLine" id="cb1-14" data-line-number="14">              <span class="dt">package =</span> <span class="st">&quot;DataPackageR&quot;</span>)</a>
+<a class="sourceLine" id="cb1-15" data-line-number="15"></a>
+<a class="sourceLine" id="cb1-16" data-line-number="16"><span class="co"># Create the package framework.</span></a>
+<a class="sourceLine" id="cb1-17" data-line-number="17">DataPackageR<span class="op">::</span><span class="kw">datapackage_skeleton</span>(</a>
+<a class="sourceLine" id="cb1-18" data-line-number="18">  <span class="st">&quot;mtcars20&quot;</span>,</a>
+<a class="sourceLine" id="cb1-19" data-line-number="19">  <span class="dt">force =</span> <span class="ot">TRUE</span>,</a>
+<a class="sourceLine" id="cb1-20" data-line-number="20">  <span class="dt">code_files =</span> processing_code,</a>
+<a class="sourceLine" id="cb1-21" data-line-number="21">  <span class="dt">r_object_names =</span> <span class="st">&quot;cars_over_20&quot;</span>,</a>
+<a class="sourceLine" id="cb1-22" data-line-number="22">  <span class="dt">path =</span> <span class="kw">tempdir</span>()</a>
+<a class="sourceLine" id="cb1-23" data-line-number="23">  ) </a>
+<a class="sourceLine" id="cb1-24" data-line-number="24">Creating directories ...</a>
+<a class="sourceLine" id="cb1-25" data-line-number="25">Creating DESCRIPTION ...</a>
+<a class="sourceLine" id="cb1-26" data-line-number="26">Creating NAMESPACE ...</a>
+<a class="sourceLine" id="cb1-27" data-line-number="27">Creating Read<span class="op">-</span>and<span class="op">-</span>delete<span class="op">-</span>me ...</a>
+<a class="sourceLine" id="cb1-28" data-line-number="28">Saving functions and data ...</a>
+<a class="sourceLine" id="cb1-29" data-line-number="29">Making help files ...</a>
+<a class="sourceLine" id="cb1-30" data-line-number="30">Done.</a>
+<a class="sourceLine" id="cb1-31" data-line-number="31">Further steps are described <span class="cf">in</span> <span class="st">'/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T//Rtmp3EWJ9k/mtcars20/Read-and-delete-me'</span>.</a>
+<a class="sourceLine" id="cb1-32" data-line-number="32">Adding DataVersion string to DESCRIPTION</a>
+<a class="sourceLine" id="cb1-33" data-line-number="33">Creating data and data<span class="op">-</span>raw directories</a>
+<a class="sourceLine" id="cb1-34" data-line-number="34">configuring yaml file</a></code></pre></div>
+<div id="whats-in-the-package-skeleton-structure" class="section level3">
+<h3>What’s in the package skeleton structure?</h3>
+<p>This has created a datapackage source tree named “mtcars2” (in a temporary directory). For a real use case you would pick a <code>path</code> on your filesystem where you could then initialize a new github repository for the package.</p>
+<p>The contents of <code>mtcars20</code> are:</p>
+<pre><code>                levelName
+1  mtcars20              
+2   ¦--DESCRIPTION       
+3   ¦--R                 
+4   ¦--Read-and-delete-me
+5   ¦--data              
+6   ¦--data-raw          
+7   ¦   °--subsetCars.Rmd
+8   ¦--datapackager.yml  
+9   ¦--inst              
+10  ¦   °--extdata       
+11  °--man               </code></pre>
+<p>You should fill out the <code>DESCRIPTION</code> file to describe your data package. It contains a new <code>DataVersion</code> string that will be automatically incremented when the data package is built <em>if the packaged data has changed</em>.</p>
+<p>The user-provided code files reside in <code>data-raw</code>. They are executed during the data package build process.</p>
 </div>
-<div id="yaml-configuration" class="section level3">
-<h3>Yaml configuration</h3>
-<p>Here are the contents of <code>datapackager.yml</code>:</p>
+<div id="a-few-words-abou-the-yaml-config-file" class="section level3">
+<h3>A few words abou the YAML config file</h3>
+<p>A <code>datapackager.yml</code> file is used to configure and control the build process.</p>
+<p>The contents are:</p>
 <pre><code>configuration:
   files:
     subsetCars.Rmd:
@@ -378,71 +389,90 @@ <h3>Yaml configuration</h3>
       enabled: yes
   objects: cars_over_20
   render_root:
-    tmp: '787709'</code></pre>
-<p>It includes a <code>files</code> property that has an entry for each script, with the <code>name:</code> and <code>enabled:</code> keys for each file. The <code>objects</code> property lists the data objects produced by the scripts.</p>
-<p>The <code>render_root</code> property specifies the directory where the Rmd files are rendered. If temporary objects are produced during the processing, they will appear in this directory without polluting the package source tree. A temporary directory is used by default.</p>
+    tmp: '95288'</code></pre>
+<p>The two main pieces of information in the configuration are a list of the files to be processed and the data sets the package will store.</p>
+<p>This example packages an R data set named <code>cars_over_20</code> (the name was passed in to <code>datapackage_skeleton()</code>). It is created by the <code>subsetCars.Rmd</code> file.</p>
+<p>The objects must be listed in the yaml configuration file. <code>datapackage_skeleton()</code> ensures this is done for you automatically.</p>
+<p>DataPackageR provides an API for modifying this file, so it does not need to be done by hand.</p>
+<p>Further information on the contents of the YAML configuration file, and the API are in the <a href="https://github.com/RGLab/DataPackageR/blob/master/YAML_CONFIG.md">YAML Configuration Details</a></p>
 </div>
-<div id="build-your-package." class="section level3">
-<h3>Build your package.</h3>
-<p>Once your scripts are in place and the data objects are documented, you build the package.</p>
-<p>To run the build process:</p>
-<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb5-1" data-line-number="1"><span class="co"># Within the package directory</span></a>
-<a class="sourceLine" id="cb5-2" data-line-number="2"><span class="kw">setwd</span>(tmp)</a>
-<a class="sourceLine" id="cb5-3" data-line-number="3">DataPackageR<span class="op">:::</span><span class="kw">package_build</span>(<span class="st">&quot;Test&quot;</span>) </a>
-<a class="sourceLine" id="cb5-4" data-line-number="4">INFO [<span class="dv">2018-07-02</span> <span class="dv">12</span><span class="op">:</span><span class="dv">26</span><span class="op">:</span><span class="dv">31</span>] Logging to <span class="op">/</span>private<span class="op">/</span>var<span class="op">/</span>folders<span class="op">/</span>jh<span class="op">/</span>x0h3v3pd4dd497g3gtzsm8500000gn<span class="op">/</span>T<span class="op">/</span>Rtmp7wPrTh<span class="op">/</span>Test<span class="op">/</span>inst<span class="op">/</span>extdata<span class="op">/</span>Logfiles<span class="op">/</span>processing.log</a>
-<a class="sourceLine" id="cb5-5" data-line-number="5">INFO [<span class="dv">2018-07-02</span> <span class="dv">12</span><span class="op">:</span><span class="dv">26</span><span class="op">:</span><span class="dv">31</span>] Processing data</a>
-<a class="sourceLine" id="cb5-6" data-line-number="6">INFO [<span class="dv">2018-07-02</span> <span class="dv">12</span><span class="op">:</span><span class="dv">26</span><span class="op">:</span><span class="dv">31</span>] Reading yaml configuration</a>
-<a class="sourceLine" id="cb5-7" data-line-number="7">INFO [<span class="dv">2018-07-02</span> <span class="dv">12</span><span class="op">:</span><span class="dv">26</span><span class="op">:</span><span class="dv">31</span>] Found <span class="op">/</span>private<span class="op">/</span>var<span class="op">/</span>folders<span class="op">/</span>jh<span class="op">/</span>x0h3v3pd4dd497g3gtzsm8500000gn<span class="op">/</span>T<span class="op">/</span>Rtmp7wPrTh<span class="op">/</span>Test<span class="op">/</span>data<span class="op">-</span>raw<span class="op">/</span>subsetCars.Rmd</a>
-<a class="sourceLine" id="cb5-8" data-line-number="8">INFO [<span class="dv">2018-07-02</span> <span class="dv">12</span><span class="op">:</span><span class="dv">26</span><span class="op">:</span><span class="dv">31</span>] Processing <span class="dv">1</span> of <span class="dv">1</span><span class="op">:</span><span class="st"> </span><span class="er">/</span>private<span class="op">/</span>var<span class="op">/</span>folders<span class="op">/</span>jh<span class="op">/</span>x0h3v3pd4dd497g3gtzsm8500000gn<span class="op">/</span>T<span class="op">/</span>Rtmp7wPrTh<span class="op">/</span>Test<span class="op">/</span>data<span class="op">-</span>raw<span class="op">/</span>subsetCars.Rmd</a>
-<a class="sourceLine" id="cb5-9" data-line-number="9">processing file<span class="op">:</span><span class="st"> </span>subsetCars.Rmd</a>
-<a class="sourceLine" id="cb5-10" data-line-number="10">output file<span class="op">:</span><span class="st"> </span>subsetCars.knit.md</a>
-<a class="sourceLine" id="cb5-11" data-line-number="11"><span class="op">/</span>usr<span class="op">/</span>local<span class="op">/</span>bin<span class="op">/</span>pandoc <span class="op">+</span>RTS <span class="op">-</span>K512m <span class="op">-</span>RTS subsetCars.utf8.md <span class="op">--</span>to html4 <span class="op">--</span>from markdown<span class="op">+</span>autolink_bare_uris<span class="op">+</span>ascii_identifiers<span class="op">+</span>tex_math_single_backslash<span class="op">+</span>smart <span class="op">--</span>output <span class="op">/</span>private<span class="op">/</span>var<span class="op">/</span>folders<span class="op">/</span>jh<span class="op">/</span>x0h3v3pd4dd497g3gtzsm8500000gn<span class="op">/</span>T<span class="op">/</span>Rtmp7wPrTh<span class="op">/</span>Test<span class="op">/</span>inst<span class="op">/</span>extdata<span class="op">/</span>Logfiles<span class="op">/</span>subsetCars.html <span class="op">--</span>email<span class="op">-</span>obfuscation none <span class="op">--</span>self<span class="op">-</span>contained <span class="op">--</span>standalone <span class="op">--</span>section<span class="op">-</span>divs <span class="op">--</span>template <span class="op">/</span>Library<span class="op">/</span>Frameworks<span class="op">/</span>R.framework<span class="op">/</span>Versions<span class="op">/</span><span class="fl">3.5</span><span class="op">/</span>Resources<span class="op">/</span>library<span class="op">/</span>rmarkdown<span class="op">/</span>rmd<span class="op">/</span>h<span class="op">/</span>default.html <span class="op">--</span>no<span class="op">-</span>highlight <span class="op">--</span>variable highlightjs=<span class="dv">1</span> <span class="op">--</span>variable <span class="st">'theme:bootstrap'</span> <span class="op">--</span>include<span class="op">-</span><span class="cf">in</span><span class="op">-</span>header <span class="op">/</span>var<span class="op">/</span>folders<span class="op">/</span>jh<span class="op">/</span>x0h3v3pd4dd497g3gtzsm8500000gn<span class="op">/</span>T<span class="op">/</span><span class="er">/</span>Rtmp7wPrTh<span class="op">/</span>rmarkdown<span class="op">-</span>strcd9679f83bd2.html <span class="op">--</span>mathjax <span class="op">--</span>variable <span class="st">'mathjax-url:https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'</span> </a>
-<a class="sourceLine" id="cb5-12" data-line-number="12"></a>
-<a class="sourceLine" id="cb5-13" data-line-number="13">Output created<span class="op">:</span><span class="st"> </span>Test<span class="op">/</span>inst<span class="op">/</span>extdata<span class="op">/</span>Logfiles<span class="op">/</span>subsetCars.html</a>
-<a class="sourceLine" id="cb5-14" data-line-number="14">INFO [<span class="dv">2018-07-02</span> <span class="dv">12</span><span class="op">:</span><span class="dv">26</span><span class="op">:</span><span class="dv">31</span>] <span class="dv">1</span> required data objects created by subsetCars.Rmd</a>
-<a class="sourceLine" id="cb5-15" data-line-number="15">INFO [<span class="dv">2018-07-02</span> <span class="dv">12</span><span class="op">:</span><span class="dv">26</span><span class="op">:</span><span class="dv">31</span>] Saving to data</a>
-<a class="sourceLine" id="cb5-16" data-line-number="16">INFO [<span class="dv">2018-07-02</span> <span class="dv">12</span><span class="op">:</span><span class="dv">26</span><span class="op">:</span><span class="dv">32</span>] Copied documentation to <span class="op">/</span>private<span class="op">/</span>var<span class="op">/</span>folders<span class="op">/</span>jh<span class="op">/</span>x0h3v3pd4dd497g3gtzsm8500000gn<span class="op">/</span>T<span class="op">/</span>Rtmp7wPrTh<span class="op">/</span>Test<span class="op">/</span>R<span class="op">/</span>Test.R</a>
-<a class="sourceLine" id="cb5-17" data-line-number="17">✔ Creating <span class="st">'vignettes/'</span></a>
-<a class="sourceLine" id="cb5-18" data-line-number="18">✔ Creating <span class="st">'inst/doc/'</span></a>
-<a class="sourceLine" id="cb5-19" data-line-number="19">INFO [<span class="dv">2018-07-02</span> <span class="dv">12</span><span class="op">:</span><span class="dv">26</span><span class="op">:</span><span class="dv">32</span>] Done</a>
-<a class="sourceLine" id="cb5-20" data-line-number="20">INFO [<span class="dv">2018-07-02</span> <span class="dv">12</span><span class="op">:</span><span class="dv">26</span><span class="op">:</span><span class="dv">32</span>] DataPackageR succeeded</a>
-<a class="sourceLine" id="cb5-21" data-line-number="21">INFO [<span class="dv">2018-07-02</span> <span class="dv">12</span><span class="op">:</span><span class="dv">26</span><span class="op">:</span><span class="dv">32</span>] Building documentation</a>
-<a class="sourceLine" id="cb5-22" data-line-number="22">First time using roxygen2. Upgrading automatically...</a>
-<a class="sourceLine" id="cb5-23" data-line-number="23">Updating roxygen version <span class="cf">in</span> <span class="op">/</span>private<span class="op">/</span>var<span class="op">/</span>folders<span class="op">/</span>jh<span class="op">/</span>x0h3v3pd4dd497g3gtzsm8500000gn<span class="op">/</span>T<span class="op">/</span>Rtmp7wPrTh<span class="op">/</span>Test<span class="op">/</span>DESCRIPTION</a>
-<a class="sourceLine" id="cb5-24" data-line-number="24">Writing NAMESPACE</a>
-<a class="sourceLine" id="cb5-25" data-line-number="25">Writing Test.Rd</a>
-<a class="sourceLine" id="cb5-26" data-line-number="26">Writing cars_over_<span class="fl">20.</span>Rd</a>
-<a class="sourceLine" id="cb5-27" data-line-number="27">INFO [<span class="dv">2018-07-02</span> <span class="dv">12</span><span class="op">:</span><span class="dv">26</span><span class="op">:</span><span class="dv">32</span>] Building package</a>
-<a class="sourceLine" id="cb5-28" data-line-number="28"><span class="st">'/Library/Frameworks/R.framework/Resources/bin/R'</span> <span class="op">--</span>no<span class="op">-</span>site<span class="op">-</span>file  \</a>
-<a class="sourceLine" id="cb5-29" data-line-number="29">  <span class="op">--</span>no<span class="op">-</span>environ <span class="op">--</span>no<span class="op">-</span>save <span class="op">--</span>no<span class="op">-</span>restore <span class="op">--</span>quiet CMD build  \</a>
-<a class="sourceLine" id="cb5-30" data-line-number="30">  <span class="st">'/private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp7wPrTh/Test'</span>  \</a>
-<a class="sourceLine" id="cb5-31" data-line-number="31">  <span class="op">--</span>no<span class="op">-</span>resave<span class="op">-</span>data <span class="op">--</span>no<span class="op">-</span>manual <span class="op">--</span>no<span class="op">-</span>build<span class="op">-</span>vignettes </a>
-<a class="sourceLine" id="cb5-32" data-line-number="32"></a>
-<a class="sourceLine" id="cb5-33" data-line-number="33">[<span class="dv">1</span>] <span class="st">&quot;/private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp7wPrTh/Test_1.0.tar.gz&quot;</span></a></code></pre></div>
+<div id="where-do-i-put-raw-data" class="section level3">
+<h3>Where do I put raw data?</h3>
+<p>Raw data (provided the size is not prohibitive) can be placed in <code>inst/extdata</code>.</p>
+<p>In this example we are reading from <code>data(mtcars)</code> rather than from the file system.</p>
+<div id="an-api-to-locate-data-sets-within-an-r-or-rmd-file." class="section level4">
+<h4>An API to locate data sets within an R or Rmd file.</h4>
+<p>To locate the data to read from the filesystem:</p>
+<ul>
+<li><p><code>DataPackageR::project_extdata_path()</code> to get the path to <code>inst/extdata</code> from inside an <code>Rmd</code> or <code>R</code> file. (e.g., /var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T//Rtmp3EWJ9k/mtcars20/inst/extdata)</p></li>
+<li><p><code>DataPackageR::project_path()</code> to get the path to the datapackage root. (e.g., /var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T//Rtmp3EWJ9k/mtcars20)</p></li>
+</ul>
+<p>Raw data stored externally can be retreived relative to these paths.</p>
 </div>
-<div id="logging-the-build-process" class="section level3">
-<h3>Logging the build process</h3>
-<p>DataPackageR uses the <code>futile.logger</code> pagckage to log progress. If there are errors in the processing, the script will notify you via logging to console and to <code>/private/tmp/Test/inst/extdata/Logfiles/processing.log</code>. Errors should be corrected and the build repeated.</p>
-<p>If everything goes smoothly, you will have a new package built in the parent directory. In this case we have a new package <code>Test_1.0.tar.gz</code>. When the package is installed, it will contain a vignette <code>subsetCars</code> that can be loaded using the <code>vignette()</code> API. The vignette will detail the processing performed by the <code>subsetCars.Rmd</code> processing script.</p>
 </div>
-<div id="the-package-source-directory-after-building" class="section level3">
-<h3>The package source directory after building</h3>
+</div>
+<div id="build-the-data-package." class="section level2">
+<h2>Build the data package.</h2>
+<p>Once the skeleton framework is set up,</p>
+<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb4-1" data-line-number="1"><span class="co"># Run the preprocessing code to build cars_over_20</span></a>
+<a class="sourceLine" id="cb4-2" data-line-number="2"><span class="co"># and reproducibly enclose it in a package.</span></a>
+<a class="sourceLine" id="cb4-3" data-line-number="3">DataPackageR<span class="op">:::</span><span class="kw">package_build</span>(<span class="kw">file.path</span>(<span class="kw">tempdir</span>(),<span class="st">&quot;mtcars20&quot;</span>))</a>
+<a class="sourceLine" id="cb4-4" data-line-number="4">INFO [<span class="dv">2018-07-05</span> <span class="dv">11</span><span class="op">:</span><span class="dv">41</span><span class="op">:</span><span class="dv">30</span>] Logging to <span class="op">/</span>private<span class="op">/</span>var<span class="op">/</span>folders<span class="op">/</span>jh<span class="op">/</span>x0h3v3pd4dd497g3gtzsm8500000gn<span class="op">/</span>T<span class="op">/</span>Rtmp3EWJ9k<span class="op">/</span>mtcars20<span class="op">/</span>inst<span class="op">/</span>extdata<span class="op">/</span>Logfiles<span class="op">/</span>processing.log</a>
+<a class="sourceLine" id="cb4-5" data-line-number="5">INFO [<span class="dv">2018-07-05</span> <span class="dv">11</span><span class="op">:</span><span class="dv">41</span><span class="op">:</span><span class="dv">30</span>] Processing data</a>
+<a class="sourceLine" id="cb4-6" data-line-number="6">INFO [<span class="dv">2018-07-05</span> <span class="dv">11</span><span class="op">:</span><span class="dv">41</span><span class="op">:</span><span class="dv">30</span>] Reading yaml configuration</a>
+<a class="sourceLine" id="cb4-7" data-line-number="7">INFO [<span class="dv">2018-07-05</span> <span class="dv">11</span><span class="op">:</span><span class="dv">41</span><span class="op">:</span><span class="dv">30</span>] Found <span class="op">/</span>private<span class="op">/</span>var<span class="op">/</span>folders<span class="op">/</span>jh<span class="op">/</span>x0h3v3pd4dd497g3gtzsm8500000gn<span class="op">/</span>T<span class="op">/</span>Rtmp3EWJ9k<span class="op">/</span>mtcars20<span class="op">/</span>data<span class="op">-</span>raw<span class="op">/</span>subsetCars.Rmd</a>
+<a class="sourceLine" id="cb4-8" data-line-number="8">INFO [<span class="dv">2018-07-05</span> <span class="dv">11</span><span class="op">:</span><span class="dv">41</span><span class="op">:</span><span class="dv">30</span>] Processing <span class="dv">1</span> of <span class="dv">1</span><span class="op">:</span><span class="st"> </span><span class="er">/</span>private<span class="op">/</span>var<span class="op">/</span>folders<span class="op">/</span>jh<span class="op">/</span>x0h3v3pd4dd497g3gtzsm8500000gn<span class="op">/</span>T<span class="op">/</span>Rtmp3EWJ9k<span class="op">/</span>mtcars20<span class="op">/</span>data<span class="op">-</span>raw<span class="op">/</span>subsetCars.Rmd</a>
+<a class="sourceLine" id="cb4-9" data-line-number="9">processing file<span class="op">:</span><span class="st"> </span>subsetCars.Rmd</a>
+<a class="sourceLine" id="cb4-10" data-line-number="10">output file<span class="op">:</span><span class="st"> </span>subsetCars.knit.md</a>
+<a class="sourceLine" id="cb4-11" data-line-number="11"><span class="op">/</span>usr<span class="op">/</span>local<span class="op">/</span>bin<span class="op">/</span>pandoc <span class="op">+</span>RTS <span class="op">-</span>K512m <span class="op">-</span>RTS subsetCars.utf8.md <span class="op">--</span>to html4 <span class="op">--</span>from markdown<span class="op">+</span>autolink_bare_uris<span class="op">+</span>ascii_identifiers<span class="op">+</span>tex_math_single_backslash<span class="op">+</span>smart <span class="op">--</span>output <span class="op">/</span>private<span class="op">/</span>var<span class="op">/</span>folders<span class="op">/</span>jh<span class="op">/</span>x0h3v3pd4dd497g3gtzsm8500000gn<span class="op">/</span>T<span class="op">/</span>Rtmp3EWJ9k<span class="op">/</span>mtcars20<span class="op">/</span>inst<span class="op">/</span>extdata<span class="op">/</span>Logfiles<span class="op">/</span>subsetCars.html <span class="op">--</span>email<span class="op">-</span>obfuscation none <span class="op">--</span>self<span class="op">-</span>contained <span class="op">--</span>standalone <span class="op">--</span>section<span class="op">-</span>divs <span class="op">--</span>template <span class="op">/</span>Library<span class="op">/</span>Frameworks<span class="op">/</span>R.framework<span class="op">/</span>Versions<span class="op">/</span><span class="fl">3.5</span><span class="op">/</span>Resources<span class="op">/</span>library<span class="op">/</span>rmarkdown<span class="op">/</span>rmd<span class="op">/</span>h<span class="op">/</span>default.html <span class="op">--</span>no<span class="op">-</span>highlight <span class="op">--</span>variable highlightjs=<span class="dv">1</span> <span class="op">--</span>variable <span class="st">'theme:bootstrap'</span> <span class="op">--</span>include<span class="op">-</span><span class="cf">in</span><span class="op">-</span>header <span class="op">/</span>var<span class="op">/</span>folders<span class="op">/</span>jh<span class="op">/</span>x0h3v3pd4dd497g3gtzsm8500000gn<span class="op">/</span>T<span class="op">/</span><span class="er">/</span>Rtmp3EWJ9k<span class="op">/</span>rmarkdown<span class="op">-</span>str1c6861088f2d.html <span class="op">--</span>mathjax <span class="op">--</span>variable <span class="st">'mathjax-url:https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'</span> </a>
+<a class="sourceLine" id="cb4-12" data-line-number="12"></a>
+<a class="sourceLine" id="cb4-13" data-line-number="13">Output created<span class="op">:</span><span class="st"> </span><span class="er">/</span>private<span class="op">/</span>var<span class="op">/</span>folders<span class="op">/</span>jh<span class="op">/</span>x0h3v3pd4dd497g3gtzsm8500000gn<span class="op">/</span>T<span class="op">/</span>Rtmp3EWJ9k<span class="op">/</span>mtcars20<span class="op">/</span>inst<span class="op">/</span>extdata<span class="op">/</span>Logfiles<span class="op">/</span>subsetCars.html</a>
+<a class="sourceLine" id="cb4-14" data-line-number="14">INFO [<span class="dv">2018-07-05</span> <span class="dv">11</span><span class="op">:</span><span class="dv">41</span><span class="op">:</span><span class="dv">30</span>] <span class="dv">1</span> required data objects created by subsetCars.Rmd</a>
+<a class="sourceLine" id="cb4-15" data-line-number="15">INFO [<span class="dv">2018-07-05</span> <span class="dv">11</span><span class="op">:</span><span class="dv">41</span><span class="op">:</span><span class="dv">30</span>] Saving to data</a>
+<a class="sourceLine" id="cb4-16" data-line-number="16">INFO [<span class="dv">2018-07-05</span> <span class="dv">11</span><span class="op">:</span><span class="dv">41</span><span class="op">:</span><span class="dv">30</span>] Copied documentation to <span class="op">/</span>private<span class="op">/</span>var<span class="op">/</span>folders<span class="op">/</span>jh<span class="op">/</span>x0h3v3pd4dd497g3gtzsm8500000gn<span class="op">/</span>T<span class="op">/</span>Rtmp3EWJ9k<span class="op">/</span>mtcars20<span class="op">/</span>R<span class="op">/</span>mtcars20.R</a>
+<a class="sourceLine" id="cb4-17" data-line-number="17">✔ Creating <span class="st">'vignettes/'</span></a>
+<a class="sourceLine" id="cb4-18" data-line-number="18">✔ Creating <span class="st">'inst/doc/'</span></a>
+<a class="sourceLine" id="cb4-19" data-line-number="19">INFO [<span class="dv">2018-07-05</span> <span class="dv">11</span><span class="op">:</span><span class="dv">41</span><span class="op">:</span><span class="dv">30</span>] Done</a>
+<a class="sourceLine" id="cb4-20" data-line-number="20">INFO [<span class="dv">2018-07-05</span> <span class="dv">11</span><span class="op">:</span><span class="dv">41</span><span class="op">:</span><span class="dv">30</span>] DataPackageR succeeded</a>
+<a class="sourceLine" id="cb4-21" data-line-number="21">INFO [<span class="dv">2018-07-05</span> <span class="dv">11</span><span class="op">:</span><span class="dv">41</span><span class="op">:</span><span class="dv">30</span>] Building documentation</a>
+<a class="sourceLine" id="cb4-22" data-line-number="22">First time using roxygen2. Upgrading automatically...</a>
+<a class="sourceLine" id="cb4-23" data-line-number="23">Updating roxygen version <span class="cf">in</span> <span class="op">/</span>private<span class="op">/</span>var<span class="op">/</span>folders<span class="op">/</span>jh<span class="op">/</span>x0h3v3pd4dd497g3gtzsm8500000gn<span class="op">/</span>T<span class="op">/</span>Rtmp3EWJ9k<span class="op">/</span>mtcars20<span class="op">/</span>DESCRIPTION</a>
+<a class="sourceLine" id="cb4-24" data-line-number="24">Writing NAMESPACE</a>
+<a class="sourceLine" id="cb4-25" data-line-number="25">Writing mtcars20.Rd</a>
+<a class="sourceLine" id="cb4-26" data-line-number="26">Writing cars_over_<span class="fl">20.</span>Rd</a>
+<a class="sourceLine" id="cb4-27" data-line-number="27">INFO [<span class="dv">2018-07-05</span> <span class="dv">11</span><span class="op">:</span><span class="dv">41</span><span class="op">:</span><span class="dv">30</span>] Building package</a>
+<a class="sourceLine" id="cb4-28" data-line-number="28"><span class="st">'/Library/Frameworks/R.framework/Resources/bin/R'</span> <span class="op">--</span>no<span class="op">-</span>site<span class="op">-</span>file  \</a>
+<a class="sourceLine" id="cb4-29" data-line-number="29">  <span class="op">--</span>no<span class="op">-</span>environ <span class="op">--</span>no<span class="op">-</span>save <span class="op">--</span>no<span class="op">-</span>restore <span class="op">--</span>quiet CMD build  \</a>
+<a class="sourceLine" id="cb4-30" data-line-number="30">  <span class="st">'/private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp3EWJ9k/mtcars20'</span>  \</a>
+<a class="sourceLine" id="cb4-31" data-line-number="31">  <span class="op">--</span>no<span class="op">-</span>resave<span class="op">-</span>data <span class="op">--</span>no<span class="op">-</span>manual <span class="op">--</span>no<span class="op">-</span>build<span class="op">-</span>vignettes </a>
+<a class="sourceLine" id="cb4-32" data-line-number="32"></a>
+<a class="sourceLine" id="cb4-33" data-line-number="33">[<span class="dv">1</span>] <span class="st">&quot;/private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp3EWJ9k/mtcars20_1.0.tar.gz&quot;</span></a></code></pre></div>
+<div id="a-log-of-the-build-process" class="section level3">
+<h3>A log of the build process</h3>
+<p>DataPackageR uses the <code>futile.logger</code> pagckage to log progress.</p>
+<p>If there are errors in the processing, the script will notify you via logging to console and to <code>/private/tmp/Test/inst/extdata/Logfiles/processing.log</code>. Errors should be corrected and the build repeated.</p>
+<p>If everything goes smoothly, you will have a new package built in the parent directory.</p>
+<p>In this case we have a new package <code>mtcars20_1.0.tar.gz</code>.</p>
+</div>
+<div id="a-note-about-the-package-source-directory-after-building." class="section level3">
+<h3>A note about the package source directory after building.</h3>
+<p>The pacakge source directory changes after the first build.</p>
 <pre><code>                         levelName
-1  Test                           
+1  mtcars20                       
 2   ¦--DATADIGEST                 
 3   ¦--DESCRIPTION                
 4   ¦--NAMESPACE                  
 5   ¦--R                          
-6   ¦   °--Test.R                 
+6   ¦   °--mtcars20.R             
 7   ¦--Read-and-delete-me         
-8   ¦--data-raw                   
-9   ¦   ¦--documentation.R        
-10  ¦   ¦--subsetCars.R           
-11  ¦   ¦--subsetCars.Rmd         
-12  ¦   ¦--subsetCars.knit.md     
-13  ¦   °--subsetCars.utf8.md     
-14  ¦--data                       
-15  ¦   °--cars_over_20.rda       
+8   ¦--data                       
+9   ¦   °--cars_over_20.rda       
+10  ¦--data-raw                   
+11  ¦   ¦--documentation.R        
+12  ¦   ¦--subsetCars.R           
+13  ¦   ¦--subsetCars.Rmd         
+14  ¦   ¦--subsetCars.knit.md     
+15  ¦   °--subsetCars.utf8.md     
 16  ¦--datapackager.yml           
 17  ¦--inst                       
 18  ¦   ¦--doc                    
@@ -453,101 +483,181 @@ <h3>The package source directory after building</h3>
 23  ¦           ¦--processing.log 
 24  ¦           °--subsetCars.html
 25  ¦--man                        
-26  ¦   ¦--Test.Rd                
-27  ¦   °--cars_over_20.Rd        
+26  ¦   ¦--cars_over_20.Rd        
+27  ¦   °--mtcars20.Rd            
 28  °--vignettes                  
 29      °--subsetCars.Rmd         </code></pre>
-<div id="details" class="section level4">
-<h4>Details</h4>
-<p>A number of things have changed. The subsetCars processing script now appears under <code>/vignettes</code> and <code>inst/doc</code> as a processed html report so that it will be available to view via <code>vignette()</code> once the package is installed. <code>inst/extdata/Logfiles</code> contains a log file of the entire build process as well as intermediate files created while parsing the R / Rmd code. Documentation Rd files appear in <code>/man</code>, these should be edite to provide further details on the data objects in the package. The data objects are stored under <code>/data</code> where we see <code>cars_over_20.rda</code>, the object we initially specified in <code>datapackager.yml</code>.</p>
+<div id="update-the-autogenerated-documentation." class="section level4">
+<h4>Update the autogenerated documentation.</h4>
+<p>After the first build, the <code>R</code> directory contains <code>mtcars.R</code> that has autogenerated <code>roxygen2</code> markup documentation for the data package and for the packaged data <code>cars_over20</code>.</p>
+<p>The processed <code>Rd</code> files can be found in <code>man</code>.</p>
+</div>
+<div id="dont-forget-to-rebuild-the-package." class="section level4">
+<h4>Dont’ forget to rebuild the package.</h4>
+<p>You should update the documentation in <code>R/mtcars.R</code>, then call <code>package_build()</code> again.</p>
+</div>
 </div>
 </div>
-<div id="versioning-data-objects" class="section level2">
-<h2>Versioning data objects</h2>
-<p>The DataPackageR package calculates an md5 checksum of each data object it stores, and keeps track of them in a file called <code>DATADIGEST</code>.</p>
+<div id="installing-and-using-the-new-data-package" class="section level2">
+<h2>Installing and using the new data package</h2>
+<div id="accessing-vignettes-data-sets-and-data-set-documentation." class="section level3">
+<h3>Accessing vignettes, data sets, and data set documentation.</h3>
+<p>The package source also contains files in the <code>vignettes</code> and <code>inst/doc</code> directories that provide a log of the data processing.</p>
+<p>When the package is installed, these will be accessible via the <code>vignette()</code> API.</p>
+<p>The vignette will detail the processing performed by the <code>subsetCars.Rmd</code> processing script.</p>
+<p>The data set documentation will be accessible via <code>?cars_over_20</code>, and the data sets via <code>data()</code>.</p>
+<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb6-1" data-line-number="1"><span class="co"># Let's use the package we just created.</span></a>
+<a class="sourceLine" id="cb6-2" data-line-number="2"><span class="kw">install.packages</span>(<span class="kw">file.path</span>(<span class="kw">tempdir</span>(),<span class="st">&quot;mtcars20_1.0.tar.gz&quot;</span>), <span class="dt">type =</span> <span class="st">&quot;source&quot;</span>, <span class="dt">repos =</span> <span class="ot">NULL</span>)</a>
+<a class="sourceLine" id="cb6-3" data-line-number="3"><span class="kw">library</span>(mtcars20)</a>
+<a class="sourceLine" id="cb6-4" data-line-number="4"><span class="kw">data</span>(<span class="st">&quot;cars_over_20&quot;</span>) <span class="co"># load the data</span></a>
+<a class="sourceLine" id="cb6-5" data-line-number="5">cars_over_<span class="dv">20</span>  <span class="co"># Now we can use it.</span></a>
+<a class="sourceLine" id="cb6-6" data-line-number="6">   speed dist</a>
+<a class="sourceLine" id="cb6-7" data-line-number="7"><span class="dv">44</span>    <span class="dv">22</span>   <span class="dv">66</span></a>
+<a class="sourceLine" id="cb6-8" data-line-number="8"><span class="dv">45</span>    <span class="dv">23</span>   <span class="dv">54</span></a>
+<a class="sourceLine" id="cb6-9" data-line-number="9"><span class="dv">46</span>    <span class="dv">24</span>   <span class="dv">70</span></a>
+<a class="sourceLine" id="cb6-10" data-line-number="10"><span class="dv">47</span>    <span class="dv">24</span>   <span class="dv">92</span></a>
+<a class="sourceLine" id="cb6-11" data-line-number="11"><span class="dv">48</span>    <span class="dv">24</span>   <span class="dv">93</span></a>
+<a class="sourceLine" id="cb6-12" data-line-number="12"><span class="dv">49</span>    <span class="dv">24</span>  <span class="dv">120</span></a>
+<a class="sourceLine" id="cb6-13" data-line-number="13"><span class="dv">50</span>    <span class="dv">25</span>   <span class="dv">85</span></a>
+<a class="sourceLine" id="cb6-14" data-line-number="14">?cars_over_<span class="dv">20</span> <span class="co"># See the documentation you wrote in data-raw/documentation.R.</span></a>
+<a class="sourceLine" id="cb6-15" data-line-number="15"></a>
+<a class="sourceLine" id="cb6-16" data-line-number="16">vignettes =<span class="st"> </span><span class="kw">vignette</span>(<span class="dt">package=</span><span class="st">&quot;mtcars20&quot;</span>)</a>
+<a class="sourceLine" id="cb6-17" data-line-number="17">vignettes<span class="op">$</span>results</a>
+<a class="sourceLine" id="cb6-18" data-line-number="18">      Package   </a>
+<a class="sourceLine" id="cb6-19" data-line-number="19">Topic <span class="st">&quot;mtcars20&quot;</span></a>
+<a class="sourceLine" id="cb6-20" data-line-number="20">      LibPath                                                         </a>
+<a class="sourceLine" id="cb6-21" data-line-number="21">Topic <span class="st">&quot;/Library/Frameworks/R.framework/Versions/3.5/Resources/library&quot;</span></a>
+<a class="sourceLine" id="cb6-22" data-line-number="22">      Item         Title                                            </a>
+<a class="sourceLine" id="cb6-23" data-line-number="23">Topic <span class="st">&quot;subsetCars&quot;</span> <span class="st">&quot;A Test Document for DataPackageR (source, html)&quot;</span></a></code></pre></div>
+</div>
+<div id="using-the-dataversion" class="section level3">
+<h3>Using the DataVersion</h3>
+<p>Your downstream data analysis can depend on a specific version of the data in your data package by tesing the DataVersion string in the DESCRIPTION file.</p>
+<p>We provide an API for this:</p>
+<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb7-1" data-line-number="1"><span class="co"># We can easily check the version of the data</span></a>
+<a class="sourceLine" id="cb7-2" data-line-number="2">DataPackageR<span class="op">::</span><span class="kw">data_version</span>(<span class="st">&quot;mtcars20&quot;</span>)</a>
+<a class="sourceLine" id="cb7-3" data-line-number="3">[<span class="dv">1</span>] <span class="st">'0.1.0'</span></a>
+<a class="sourceLine" id="cb7-4" data-line-number="4"></a>
+<a class="sourceLine" id="cb7-5" data-line-number="5"><span class="co"># You can use an assert to check the data version in  reports and</span></a>
+<a class="sourceLine" id="cb7-6" data-line-number="6"><span class="co"># analyses that use the packaged data.</span></a>
+<a class="sourceLine" id="cb7-7" data-line-number="7"><span class="kw">assert_data_version</span>(<span class="dt">data_package_name =</span> <span class="st">&quot;mtcars20&quot;</span>,</a>
+<a class="sourceLine" id="cb7-8" data-line-number="8">                    <span class="dt">version_string =</span> <span class="st">&quot;0.1.0&quot;</span>,</a>
+<a class="sourceLine" id="cb7-9" data-line-number="9">                    <span class="dt">acceptable =</span> <span class="st">&quot;equal&quot;</span>)  <span class="co">#If this fails, execution stops</span></a>
+<a class="sourceLine" id="cb7-10" data-line-number="10">                                           <span class="co">#and provides an informative error.</span></a></code></pre></div>
+</div>
+</div>
+<div id="next-steps" class="section level1">
+<h1>Next steps</h1>
+<p>You should place the data package source directory under <code>git</code> version control. This allows you to version control your data processing code.</p>
+</div>
+<div id="partial-builds-and-migrating-old-data-packages." class="section level1">
+<h1>Partial builds and migrating old data packages.</h1>
+<p>Version 1.12.0 has moved away from controlling the build process using <code>datasets.R</code> and an additional <code>masterfile</code> argument.</p>
+<p>The build process is now controlled via a <code>datapackager.yml</code> configuration file located in the package root directory. (see <a href="https://github.com/RGLab/DataPackageR/blob/master/YAML_CONFIG.md">YAML Configuration Details</a>)</p>
+<p>You can migrate an old package by constructing such a config file using the <code>construct_yml_config()</code> API.</p>
+<div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb8-1" data-line-number="1"><span class="co"># assume I have file1.Rmd and file2.R located in /data-raw, </span></a>
+<a class="sourceLine" id="cb8-2" data-line-number="2"><span class="co"># and these create 'object1' and 'object2' respectively.</span></a>
+<a class="sourceLine" id="cb8-3" data-line-number="3">configuration<span class="op">:</span></a>
+<a class="sourceLine" id="cb8-4" data-line-number="4"><span class="st">  </span>files<span class="op">:</span></a>
+<a class="sourceLine" id="cb8-5" data-line-number="5"><span class="st">    </span>file1.Rmd<span class="op">:</span></a>
+<a class="sourceLine" id="cb8-6" data-line-number="6"><span class="st">      </span>name<span class="op">:</span><span class="st"> </span>file1.Rmd</a>
+<a class="sourceLine" id="cb8-7" data-line-number="7">      enabled<span class="op">:</span><span class="st"> </span>yes</a>
+<a class="sourceLine" id="cb8-8" data-line-number="8">    file2.R<span class="op">:</span></a>
+<a class="sourceLine" id="cb8-9" data-line-number="9"><span class="st">      </span>name<span class="op">:</span><span class="st"> </span>file2.R</a>
+<a class="sourceLine" id="cb8-10" data-line-number="10">      enabled<span class="op">:</span><span class="st"> </span>yes</a>
+<a class="sourceLine" id="cb8-11" data-line-number="11">  objects<span class="op">:</span></a>
+<a class="sourceLine" id="cb8-12" data-line-number="12"><span class="st">  </span><span class="op">-</span><span class="st"> </span>object1</a>
+<a class="sourceLine" id="cb8-13" data-line-number="13">  <span class="op">-</span><span class="st"> </span>object2</a>
+<a class="sourceLine" id="cb8-14" data-line-number="14">  render_root<span class="op">:</span></a>
+<a class="sourceLine" id="cb8-15" data-line-number="15"><span class="st">    </span>tmp<span class="op">:</span><span class="st"> '288022'</span></a></code></pre></div>
+<p><code>config</code> is a newly constructed yaml configuration object. It can be written to the package directory:</p>
+<div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb9-1" data-line-number="1">path_to_package =<span class="st"> </span><span class="kw">tempdir</span>() <span class="co">#e.g., if tempdir() was the root of our package.</span></a>
+<a class="sourceLine" id="cb9-2" data-line-number="2"><span class="kw">yml_write</span>(config, <span class="dt">path =</span> path_to_package)</a></code></pre></div>
+<p>Now the package at <code>path_to_package</code> will build with version 1.12.0 or greater.</p>
+<div id="partial-builds" class="section level2">
+<h2>Partial builds</h2>
+<p>We can also perform partial builds of a subset of files in a package by toggling the <code>enabled</code> key in the config file.</p>
+<p>This can be done with the following API:</p>
+<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb10-1" data-line-number="1">config =<span class="st"> </span><span class="kw">yml_disable_compile</span>(config,<span class="dt">filenames =</span> <span class="st">&quot;file2.R&quot;</span>)</a>
+<a class="sourceLine" id="cb10-2" data-line-number="2"><span class="kw">yml_write</span>(config, <span class="dt">path =</span> path_to_package) <span class="co"># write modified yml to the package.</span></a>
+<a class="sourceLine" id="cb10-3" data-line-number="3">configuration<span class="op">:</span></a>
+<a class="sourceLine" id="cb10-4" data-line-number="4"><span class="st">  </span>files<span class="op">:</span></a>
+<a class="sourceLine" id="cb10-5" data-line-number="5"><span class="st">    </span>file1.Rmd<span class="op">:</span></a>
+<a class="sourceLine" id="cb10-6" data-line-number="6"><span class="st">      </span>name<span class="op">:</span><span class="st"> </span>file1.Rmd</a>
+<a class="sourceLine" id="cb10-7" data-line-number="7">      enabled<span class="op">:</span><span class="st"> </span>yes</a>
+<a class="sourceLine" id="cb10-8" data-line-number="8">    file2.R<span class="op">:</span></a>
+<a class="sourceLine" id="cb10-9" data-line-number="9"><span class="st">      </span>name<span class="op">:</span><span class="st"> </span>file2.R</a>
+<a class="sourceLine" id="cb10-10" data-line-number="10">      enabled<span class="op">:</span><span class="st"> </span>no</a>
+<a class="sourceLine" id="cb10-11" data-line-number="11">  objects<span class="op">:</span></a>
+<a class="sourceLine" id="cb10-12" data-line-number="12"><span class="st">  </span><span class="op">-</span><span class="st"> </span>object1</a>
+<a class="sourceLine" id="cb10-13" data-line-number="13">  <span class="op">-</span><span class="st"> </span>object2</a>
+<a class="sourceLine" id="cb10-14" data-line-number="14">  render_root<span class="op">:</span></a>
+<a class="sourceLine" id="cb10-15" data-line-number="15"><span class="st">    </span>tmp<span class="op">:</span><span class="st"> '288022'</span></a></code></pre></div>
+<p>Note that the modified configuration needs to be written back to the package source directory in order for the changes to take effect.</p>
+<p>The consequence of toggling a file to <code>enable: no</code> is that it will be skipped when the package is rebuilt, but the data will still be retained in the package, and the documentation will not be altered.</p>
+<p>This is useful in situations where we have multiple data sets, and want to re-run one script to update a specific data set, but not the other scripts because they may be too time consuming, for example.</p>
+</div>
+</div>
+<div id="multi-script-pipelines." class="section level1">
+<h1>Multi-script pipelines.</h1>
+<p>We may have situations where we have mutli-script pipelines. There are two ways to share data among scripts.</p>
+<ol style="list-style-type: decimal">
+<li>filesystem artifacts</li>
+<li>data objects passed to subsequent scripts.</li>
+</ol>
+<div id="file-system-artifacts" class="section level3">
+<h3>File system artifacts</h3>
+<p>The yaml configuration property <code>render_root</code> specifies the working directory where scripts will be rendered.</p>
+<p>If a script writes files to the working directory, that is where files will appear. These can be read by subsequent scripts.</p>
+</div>
+<div id="passing-data-objects-to-subsequent-scripts." class="section level3">
+<h3>Passing data objects to subsequent scripts.</h3>
+<p>A script (e.g., <code>script2.Rmd</code>) running after <code>script1.Rmd</code> can access a stored data object named <code>script1_dataset</code> created by <code>script1.Rmd</code> by calling</p>
+<p><code>DataPackageR::datapackager_object_read(&quot;script1_dataset&quot;)</code>.</p>
+</div>
+</div>
+<div id="additional-details" class="section level1">
+<h1>Additional Details</h1>
+<p>We provide some additional details for the interested.</p>
+<div id="fingerprints-of-stored-data-objects" class="section level3">
+<h3>Fingerprints of stored data objects</h3>
+<p>DataPackageR calculates an md5 checksum of each data object it stores, and keeps track of them in a file called <code>DATADIGEST</code>.</p>
 <ul>
 <li>Each time the package is rebuilt, the md5 sums of the new data objects are compared against the DATADIGEST.</li>
 <li>If they don’t match, the build process checks that the <code>DataVersion</code> string has been incremented in the <code>DESCRIPTION</code> file.</li>
 <li>If it has not the build process will exit and produce an error message.</li>
 </ul>
-<div id="datadigest" class="section level3">
-<h3>DATADIGEST</h3>
+<div id="datadigest" class="section level4">
+<h4>DATADIGEST</h4>
 <p>The <code>DATADIGEST</code> file contains the following:</p>
 <pre><code>DataVersion: 0.1.0
 cars_over_20: 3ccb5b0aaa74fe7cfc0d3ca6ab0b5cf3</code></pre>
 </div>
-<div id="description" class="section level3">
-<h3>DESCRIPTION</h3>
+<div id="description" class="section level4">
+<h4>DESCRIPTION</h4>
 <p>The description file has the new <code>DataVersion</code> string.</p>
-<pre><code>Package: Test
+<pre><code>Package: mtcars20
 Type: Package
 Title: What the package does (short line)
 Version: 1.0
-Date: 2018-07-02
+Date: 2018-07-05
 Author: Who wrote it
 Maintainer: Who to complain to &lt;yourfault@somewhere.net&gt;
 Description: More about what it does (maybe more than one line)
 License: What license is it under?
 DataVersion: 0.1.0
+Suggests: 
+    knitr,
+    rmarkdown
+VignetteBuilder: knitr
 RoxygenNote: 6.0.1</code></pre>
 </div>
-<div id="next-steps" class="section level3">
-<h3>Next steps</h3>
-<p>Your downstream data analysis can depend on a specific version of your data package (for example by tesing the <code>packageVersion()</code> string);</p>
-<pre class="r{}"><code>if(DataPackageR::packageVersion(&quot;MyNewStudy&quot;) != &quot;1.0.0&quot;)
-  stop(&quot;The expected version of MyNewStudy is 1.0.0, but &quot;,packageVersion(&quot;MyNewStudy&quot;),&quot; is installed! Analysis results may differ!&quot;)</code></pre>
-<p>The DataPackageR packge also provides <code>datasetVersion()</code> to extract the data set version information.</p>
-<p>You should also place the data package source directory under <code>git</code> version control. This allows you to version control your data processing code.</p>
 </div>
-<div id="why-not-use-r-cmd-build" class="section level3">
-<h3>Why not use R CMD build?</h3>
+<div id="why-not-use-r-cmd-build" class="section level2">
+<h2>Why not use R CMD build?</h2>
 <p>If the processing script is time consuming or the data set is particularly large, then <code>R CMD build</code> would run the code each time the package is installed. In such cases, raw data may not be available, or the environment to do the data processing may not be set up for each user of the data. In such cases, DataPackageR provides a mechanism to decouple data processing from package building/installation for downstream users of the data.</p>
 </div>
 </div>
-<div id="partial-builds-and-migrating-old-data-packages." class="section level2">
-<h2>Partial builds and migrating old data packages.</h2>
-<p>Version 1.12.0 has moved away from controlling the build process using <code>datasets.R</code> and an additional <code>masterfile</code> argument. The build process is now controlled via a <code>datapackager.yml</code> configuration file located in the package root directory.</p>
-<p>You can migrate an old package by constructing such a config file using the <code>construct_yml_config()</code> API.</p>
-<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb10-1" data-line-number="1"><span class="co">#assume I have file1.Rmd and file2.R located in /data-raw, and these create 'object1' and 'object2' respectively.</span></a>
-<a class="sourceLine" id="cb10-2" data-line-number="2"></a>
-<a class="sourceLine" id="cb10-3" data-line-number="3">config =<span class="st"> </span><span class="kw">construct_yml_config</span>(<span class="dt">code =</span> <span class="kw">c</span>(<span class="st">&quot;file1.Rmd&quot;</span>,<span class="st">&quot;file2.R&quot;</span>), <span class="dt">data =</span> <span class="kw">c</span>(<span class="st">&quot;object1&quot;</span>,<span class="st">&quot;object2&quot;</span>))</a>
-<a class="sourceLine" id="cb10-4" data-line-number="4"><span class="kw">cat</span>(<span class="kw">as.yaml</span>(config))</a>
-<a class="sourceLine" id="cb10-5" data-line-number="5">configuration<span class="op">:</span></a>
-<a class="sourceLine" id="cb10-6" data-line-number="6"><span class="st">  </span>files<span class="op">:</span></a>
-<a class="sourceLine" id="cb10-7" data-line-number="7"><span class="st">    </span>file1.Rmd<span class="op">:</span></a>
-<a class="sourceLine" id="cb10-8" data-line-number="8"><span class="st">      </span>name<span class="op">:</span><span class="st"> </span>file1.Rmd</a>
-<a class="sourceLine" id="cb10-9" data-line-number="9">      enabled<span class="op">:</span><span class="st"> </span>yes</a>
-<a class="sourceLine" id="cb10-10" data-line-number="10">    file2.R<span class="op">:</span></a>
-<a class="sourceLine" id="cb10-11" data-line-number="11"><span class="st">      </span>name<span class="op">:</span><span class="st"> </span>file2.R</a>
-<a class="sourceLine" id="cb10-12" data-line-number="12">      enabled<span class="op">:</span><span class="st"> </span>yes</a>
-<a class="sourceLine" id="cb10-13" data-line-number="13">  objects<span class="op">:</span></a>
-<a class="sourceLine" id="cb10-14" data-line-number="14"><span class="st">  </span><span class="op">-</span><span class="st"> </span>object1</a>
-<a class="sourceLine" id="cb10-15" data-line-number="15">  <span class="op">-</span><span class="st"> </span>object2</a>
-<a class="sourceLine" id="cb10-16" data-line-number="16">  render_root<span class="op">:</span></a>
-<a class="sourceLine" id="cb10-17" data-line-number="17"><span class="st">    </span>tmp<span class="op">:</span><span class="st"> '127771'</span></a></code></pre></div>
-<p><code>config</code> is a newly constructed yaml configuration object. It can be written to the package directory:</p>
-<div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb11-1" data-line-number="1">path_to_package =<span class="st"> </span><span class="kw">tempdir</span>() <span class="co">#pretend this is the root of our package</span></a>
-<a class="sourceLine" id="cb11-2" data-line-number="2"><span class="kw">yml_write</span>(config,<span class="dt">path =</span> path_to_package)</a></code></pre></div>
-<p>Now the package at <code>path_to_package</code> will build with version 1.12.0 or greater.</p>
-<p>We can also perform partial builds of a subset of files in a package by toggling the <code>enabled</code> key in the config file. This can be done with the following API:</p>
-<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb12-1" data-line-number="1">config =<span class="st"> </span><span class="kw">yml_disable_compile</span>(config,<span class="dt">filenames =</span> <span class="st">&quot;file2.R&quot;</span>)</a>
-<a class="sourceLine" id="cb12-2" data-line-number="2"><span class="kw">cat</span>(<span class="kw">as.yaml</span>(config))</a>
-<a class="sourceLine" id="cb12-3" data-line-number="3">configuration<span class="op">:</span></a>
-<a class="sourceLine" id="cb12-4" data-line-number="4"><span class="st">  </span>files<span class="op">:</span></a>
-<a class="sourceLine" id="cb12-5" data-line-number="5"><span class="st">    </span>file1.Rmd<span class="op">:</span></a>
-<a class="sourceLine" id="cb12-6" data-line-number="6"><span class="st">      </span>name<span class="op">:</span><span class="st"> </span>file1.Rmd</a>
-<a class="sourceLine" id="cb12-7" data-line-number="7">      enabled<span class="op">:</span><span class="st"> </span>yes</a>
-<a class="sourceLine" id="cb12-8" data-line-number="8">    file2.R<span class="op">:</span></a>
-<a class="sourceLine" id="cb12-9" data-line-number="9"><span class="st">      </span>name<span class="op">:</span><span class="st"> </span>file2.R</a>
-<a class="sourceLine" id="cb12-10" data-line-number="10">      enabled<span class="op">:</span><span class="st"> </span>no</a>
-<a class="sourceLine" id="cb12-11" data-line-number="11">  objects<span class="op">:</span></a>
-<a class="sourceLine" id="cb12-12" data-line-number="12"><span class="st">  </span><span class="op">-</span><span class="st"> </span>object1</a>
-<a class="sourceLine" id="cb12-13" data-line-number="13">  <span class="op">-</span><span class="st"> </span>object2</a>
-<a class="sourceLine" id="cb12-14" data-line-number="14">  render_root<span class="op">:</span></a>
-<a class="sourceLine" id="cb12-15" data-line-number="15"><span class="st">    </span>tmp<span class="op">:</span><span class="st"> '127771'</span></a></code></pre></div>
-<p>Where <code>config</code> is a configuration read from a data package root directory. The <code>config</code> object needs to be written back to the package root in order for the changes to take effect. The consequence of toggling a file to <code>enable: no</code> is that it will be skipped when the package is built, but the data will be retained, and the documentation will not be altered.</p>
-</div>
-</div>
 
 
 
diff --git a/vignettes/usingDataPackageR.md b/vignettes/usingDataPackageR.md
index 8494aab..96f09d4 100644
--- a/vignettes/usingDataPackageR.md
+++ b/vignettes/usingDataPackageR.md
@@ -1,12 +1,13 @@
 ---
 title: "Using DataPackageR"
 author: "Greg Finak <gfinak@fredhutch.org>"
-date: "2018-07-02"
+date: "2018-07-05"
 output: 
   rmarkdown::html_vignette:
     keep_md: TRUE
+    toc: yes
 vignette: >
-  %\VignetteIndexEntry{A quick guide to using DataPackageR}
+  %\VignetteIndexEntry{A Guide to using DataPackageR}
   %\VignetteEngine{knitr::rmarkdown}
   \usepackage[utf8]{inputenc}
   \usepackage{graphicx}
@@ -14,87 +15,46 @@ vignette: >
 
 
 
-# DataPackageR
+## Purpose
 
-A package to reproducibly process raw data into packaged, analysis-ready data sets.
+This vignette demonstrates how to use DataPackageR to build a datapackage from the `mtcars` data set.
 
- [![Build Status](https://travis-ci.org/RGLab/DataPackageR.svg?branch=master)](https://travis-ci.org/RGLab/DataPackageR)
- [![Coverage status](https://codecov.io/gh/RGLab/DataPackageR/branch/master/graph/badge.svg)](https://codecov.io/github/RGLab/DataPackageR?branch=master)
- [![AppVeyor build status](https://ci.appveyor.com/api/projects/status/github/RGLab/DataPackageR?branch=master&svg=true)](https://ci.appveyor.com/project/RGLab/DataPackageR)
-[![DOI](https://zenodo.org/badge/29267435.svg)](https://doi.org/10.5281/zenodo.1292095)
+## Set up a new data package.
 
-## Code of conduct 
+We'll set up a new data package based on `mtcars` example in the [README](https://github.com/RGLab/DataPackageR/blob/master/README.md).
+The `datapackage_skeleton()` API is used to set up a new package. 
+The user needs to provide:
 
-Please note that this project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md).
-  By participating in this project you agree to abide by its terms.
+- R or Rmd code files that do data processing.
+- A list of R object names created by those code files.
 
-## Preprint and publication.
 
-The publication describing the package is now available at [Gates Open Research](https://gatesopenresearch.org/articles/2-31/v1).
-
-The preprint is on [biorxiv](https://doi.org/10.1101/342907).
-
-## Goals
-
-You have raw data that needs to be tidied and otherwise processed into a standardized analytic data set (a data set that is ready for analysis). 
-You want to do the processing using best practices for reproducible research. 
-
-### The current state of affairs
-
-Normally, you'll write some code that does the tidying and outputs a tidy data set. 
-If you want to distribute your data set, you can put it in an R package. 
-The preferred mechanism is to place your data tidying code in `data-raw` in the package source tree and use the `devtools` package (specifically `devtools::use_data`) to save the data into the `data` directory. The build process will include your data set in the final package.
-You'll also have to remember to document the data set in `roxygen`, and write a vignette showing how to use the data. 
-For version control and easy distribution you might post the package on github. 
-
-### Scaling up
-
-The process outlined works well for smaller data sets. 
-It can be a hassle if you have complex data that change frequently (as is often the case in biology, where data trickle in from collaborators and follow-up experiments), or more generally if you have large data sets where raw data can't be distributed as part of the package source due to size restrictions (e.g. FASTQ files for sequencing, FCS files for flow cytometry, or other "omics" data).
-
-### DataPackageR
-
-The `DataPackageR` package simplifies bundling of code, data and documentation into a single R package that can be versioned and distributed.
-The `datapackage.skeleton()` API lets you point `DataPackageR` at your data processing code (in the form of Rmd and / or R files). These are expected to produce `data objects` to be stored in the final package. The names of these are also passed to `datapackage.skeleton()`. This produces the necessary package structure, and populations a `datapackager.yml` configuration file used by the build process.
-
-The `package_build()` API runs the processing code specified in the `.yml` files and produces html reports of the processing as **package vignettes**. It also builds boilerplate `roxygen` documentation of the R objects specified in the `.yml`, computes checksums of stored R objects and version tags the entire data set collection.
-
-If raw data changes, the user can rebuild the data sets in the R package with subsequent calls to `package_build()` which will re-run the processing, compare the cheksums of new R objects against those currently stored in the package. 
-Any changes force an increment of the `Dataversion` string in the package DESCRIPTION file. 
-When the package is installed, data sets can be accessed via the standard `data()` API, package vignettes describing the data processing can be accessed via `vignette()`, documentation via `?`, and the data version via `dataVersion(packageName)`. 
-
-
-# Installation 
-
-The usual package installation mechanism works: 
-
-```
-library(devtools)
-devtools::install_github("RGLab/DataPackageR", auth_token=NULL)
-```
-
-# Usage
-
-Set up a new data package.
-
-We'll set up a new data package that processes the `cars` data by subsetting it to include only measurements of stopping distances of cars at speeds greater than 20 mph. It is processed using an Rmd file located in `inst/extdata/tests/subsetCars.Rmd` that produces a new object called `cars_over_20`. The package will be called `Test`. The work will be done in the system `/tmp` directory.
 
 
 ```r
-library(data.tree)
 library(DataPackageR)
-tmp = normalizePath(tempdir())
-processing_code = system.file("extdata","tests","subsetCars.Rmd",package="DataPackageR")
-print(processing_code)
-[1] "/Users/gfinak/Documents/Projects/DataPackageR/inst/extdata/tests/subsetCars.Rmd"
-setwd(tmp)
-DataPackageR::datapackage.skeleton("Test", 
-                                   force=TRUE, 
-                                   code_files = processing_code, 
-                                   r_object_names = "cars_over_20") # cars_over_20 is an R object 
-Warning in DataPackageR::datapackage.skeleton("Test", force = TRUE,
-code_files = processing_code, : Please use datapackage_skeleton() instead
-of datapackage.skeleton()
+
+# Let's reproducibly package up
+# the cars in the mtcars dataset
+# with speed > 20.
+# Our dataset will be called cars_over_20.
+
+# Get the code file that turns the raw data
+# to our packaged and processed analysis-ready dataset.
+processing_code <-
+  system.file("extdata", 
+              "tests",
+              "subsetCars.Rmd",
+              package = "DataPackageR")
+
+# Create the package framework.
+DataPackageR::datapackage_skeleton(
+  "mtcars20",
+  force = TRUE,
+  code_files = processing_code,
+  r_object_names = "cars_over_20",
+  path = tempdir()
+  ) 
 Creating directories ...
 Creating DESCRIPTION ...
 Creating NAMESPACE ...
@@ -102,42 +62,45 @@ Creating Read-and-delete-me ...
 Saving functions and data ...
 Making help files ...
 Done.
-Further steps are described in './Test/Read-and-delete-me'.
+Further steps are described in '/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T//Rtmp3EWJ9k/mtcars20/Read-and-delete-me'.
 Adding DataVersion string to DESCRIPTION
 Creating data and data-raw directories
 configuring yaml file
-                                                                    # created in the Rmd file.
 ```
 
-### Package skeleton structure 
-
-This has created a directory, "Test" with the skeleton of a data package.
+### What's in the package skeleton structure?
 
-The `DESCRIPTION` file should be filled out to describe your package. It contains a new `DataVersion` string, and the
-revision is automatically incremented if the packaged data changes.
+This has created a datapackage source tree named "mtcars2" (in a temporary directory). 
+For a real use case you would pick a `path` on your filesystem where you could then initialize a new github repository for the package.
 
-`Read-and-delete-me` has some helpful instructions on how to proceed. 
-
-The `data-raw` directory is where the data cleaning code (`Rmd`) files reside.
-The contents of this directory are:
+The contents of `mtcars20` are:
 
 
 ```
-                   levelName
-1 Test                      
-2  °--Test                  
-3      ¦--DESCRIPTION       
-4      ¦--Read-and-delete-me
-5      ¦--data-raw          
-6      ¦   °--subsetCars.Rmd
-7      °--datapackager.yml  
+                levelName
+1  mtcars20              
+2   ¦--DESCRIPTION       
+3   ¦--R                 
+4   ¦--Read-and-delete-me
+5   ¦--data              
+6   ¦--data-raw          
+7   ¦   °--subsetCars.Rmd
+8   ¦--datapackager.yml  
+9   ¦--inst              
+10  ¦   °--extdata       
+11  °--man               
 ```
 
-`datapackager.yml` can be edited as necessary to include additional processing scripts (which should be placed in `data-raw`), and raw data should be located under under `/inst/extdata`. It should be copied into that path and the data munging scripts edited to read from there.
+You should fill out the `DESCRIPTION` file to describe your data package. 
+It contains a new `DataVersion` string that will be automatically incremented when the data package is built *if the packaged data has changed*. 
+
+The user-provided code files reside in `data-raw`. They are executed during the data package build process.
 
-### Yaml configuration 
+### A few words abou the YAML config file
 
-Here are the contents of `datapackager.yml`:
+A `datapackager.yml` file is used to configure and control the build process.
+
+The contents are:
 
 
 ```
@@ -148,83 +111,113 @@ configuration:
       enabled: yes
   objects: cars_over_20
   render_root:
-    tmp: '787709'
+    tmp: '95288'
 ```
 
-It includes a `files` property that has an entry for each script, with the `name:` and `enabled:` keys for each file. The `objects` property  lists the data objects produced by the scripts.
+The two main pieces of information in the configuration are a list of the files to be processed and the data sets the package will store.
+
+This example packages an R data set named `cars_over_20` (the name was passed in to `datapackage_skeleton()`).
+It is created by the `subsetCars.Rmd` file. 
+
+
+The objects must be listed in the yaml configuration file. `datapackage_skeleton()`  ensures this is done for you automatically. 
+
+DataPackageR provides an API for modifying this file, so it does not need to be done by hand. 
+
+Further information on the contents of the YAML configuration file, and the API are in the [YAML Configuration Details](https://github.com/RGLab/DataPackageR/blob/master/YAML_CONFIG.md)
+
+### Where do I put raw data?
 
-The `render_root` property specifies the directory where the Rmd files are rendered. If temporary objects are produced during the processing, they will appear in this directory without polluting the package source tree. A temporary directory is used by default.
+Raw data (provided the size is not prohibitive) can be placed in `inst/extdata`.
 
-### Build your package.
+In this example we are reading from `data(mtcars)` rather than from the file system.
 
-Once your scripts are in place and the data objects are documented, you build the package.
-  
-To run the build process:
+#### An API to locate data sets within an R or Rmd file.
+
+To locate the data to read from the filesystem:
+
+- `DataPackageR::project_extdata_path()` to get the path to `inst/extdata` from inside an `Rmd` or `R` file. (e.g., /var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T//Rtmp3EWJ9k/mtcars20/inst/extdata)
+
+- `DataPackageR::project_path()`  to get the path to the datapackage root. (e.g., /var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T//Rtmp3EWJ9k/mtcars20)
+
+Raw data stored externally can be retreived relative to these paths.
+
+
+## Build the data package.
+
+Once the skeleton framework is set up, 
 
 
 ```r
-# Within the package directory
-setwd(tmp)
-DataPackageR:::package_build("Test") 
-INFO [2018-07-02 12:26:31] Logging to /private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp7wPrTh/Test/inst/extdata/Logfiles/processing.log
-INFO [2018-07-02 12:26:31] Processing data
-INFO [2018-07-02 12:26:31] Reading yaml configuration
-INFO [2018-07-02 12:26:31] Found /private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp7wPrTh/Test/data-raw/subsetCars.Rmd
-INFO [2018-07-02 12:26:31] Processing 1 of 1: /private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp7wPrTh/Test/data-raw/subsetCars.Rmd
+# Run the preprocessing code to build cars_over_20
+# and reproducibly enclose it in a package.
+DataPackageR:::package_build(file.path(tempdir(),"mtcars20"))
+INFO [2018-07-05 11:41:30] Logging to /private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp3EWJ9k/mtcars20/inst/extdata/Logfiles/processing.log
+INFO [2018-07-05 11:41:30] Processing data
+INFO [2018-07-05 11:41:30] Reading yaml configuration
+INFO [2018-07-05 11:41:30] Found /private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp3EWJ9k/mtcars20/data-raw/subsetCars.Rmd
+INFO [2018-07-05 11:41:30] Processing 1 of 1: /private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp3EWJ9k/mtcars20/data-raw/subsetCars.Rmd
 processing file: subsetCars.Rmd
 output file: subsetCars.knit.md
-/usr/local/bin/pandoc +RTS -K512m -RTS subsetCars.utf8.md --to html4 --from markdown+autolink_bare_uris+ascii_identifiers+tex_math_single_backslash+smart --output /private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp7wPrTh/Test/inst/extdata/Logfiles/subsetCars.html --email-obfuscation none --self-contained --standalone --section-divs --template /Library/Frameworks/R.framework/Versions/3.5/Resources/library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable 'theme:bootstrap' --include-in-header /var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T//Rtmp7wPrTh/rmarkdown-strcd9679f83bd2.html --mathjax --variable 'mathjax-url:https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' 
+/usr/local/bin/pandoc +RTS -K512m -RTS subsetCars.utf8.md --to html4 --from markdown+autolink_bare_uris+ascii_identifiers+tex_math_single_backslash+smart --output /private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp3EWJ9k/mtcars20/inst/extdata/Logfiles/subsetCars.html --email-obfuscation none --self-contained --standalone --section-divs --template /Library/Frameworks/R.framework/Versions/3.5/Resources/library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable 'theme:bootstrap' --include-in-header /var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T//Rtmp3EWJ9k/rmarkdown-str1c6861088f2d.html --mathjax --variable 'mathjax-url:https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' 
 
-Output created: Test/inst/extdata/Logfiles/subsetCars.html
-INFO [2018-07-02 12:26:31] 1 required data objects created by subsetCars.Rmd
-INFO [2018-07-02 12:26:31] Saving to data
-INFO [2018-07-02 12:26:32] Copied documentation to /private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp7wPrTh/Test/R/Test.R
+Output created: /private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp3EWJ9k/mtcars20/inst/extdata/Logfiles/subsetCars.html
+INFO [2018-07-05 11:41:30] 1 required data objects created by subsetCars.Rmd
+INFO [2018-07-05 11:41:30] Saving to data
+INFO [2018-07-05 11:41:30] Copied documentation to /private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp3EWJ9k/mtcars20/R/mtcars20.R
 ✔ Creating 'vignettes/'
 ✔ Creating 'inst/doc/'
-INFO [2018-07-02 12:26:32] Done
-INFO [2018-07-02 12:26:32] DataPackageR succeeded
-INFO [2018-07-02 12:26:32] Building documentation
+INFO [2018-07-05 11:41:30] Done
+INFO [2018-07-05 11:41:30] DataPackageR succeeded
+INFO [2018-07-05 11:41:30] Building documentation
 First time using roxygen2. Upgrading automatically...
-Updating roxygen version in /private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp7wPrTh/Test/DESCRIPTION
+Updating roxygen version in /private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp3EWJ9k/mtcars20/DESCRIPTION
 Writing NAMESPACE
-Writing Test.Rd
+Writing mtcars20.Rd
 Writing cars_over_20.Rd
-INFO [2018-07-02 12:26:32] Building package
+INFO [2018-07-05 11:41:30] Building package
 '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
   --no-environ --no-save --no-restore --quiet CMD build  \
-  '/private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp7wPrTh/Test'  \
+  '/private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp3EWJ9k/mtcars20'  \
   --no-resave-data --no-manual --no-build-vignettes 
 
-[1] "/private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp7wPrTh/Test_1.0.tar.gz"
+[1] "/private/var/folders/jh/x0h3v3pd4dd497g3gtzsm8500000gn/T/Rtmp3EWJ9k/mtcars20_1.0.tar.gz"
 ```
 
-### Logging the build process
+### A log of the build process
+
+DataPackageR uses the `futile.logger` pagckage to log progress. 
 
-DataPackageR uses the `futile.logger` pagckage to log progress. If there are errors in the processing, the script will notify you via logging to console and to  `/private/tmp/Test/inst/extdata/Logfiles/processing.log`. Errors should be corrected and the build repeated.
+If there are errors in the processing, the script will notify you via logging to console and to  `/private/tmp/Test/inst/extdata/Logfiles/processing.log`. Errors should be corrected and the build repeated.
 
-If everything goes smoothly, you will have a new package built in the parent directory. In this case we have a new package 
-`Test_1.0.tar.gz`. When the package is installed, it will contain a vignette `subsetCars` that can be loaded using the `vignette()` API. The vignette will detail the processing performed by the `subsetCars.Rmd` processing script. 
+If everything goes smoothly, you will have a new package built in the parent directory. 
 
-### The package source directory after building
+In this case we have a new package 
+`mtcars20_1.0.tar.gz`. 
+
+
+### A note about the package source directory after building.
+
+The pacakge source directory changes after the first build.
 
 
 ```
                          levelName
-1  Test                           
+1  mtcars20                       
 2   ¦--DATADIGEST                 
 3   ¦--DESCRIPTION                
 4   ¦--NAMESPACE                  
 5   ¦--R                          
-6   ¦   °--Test.R                 
+6   ¦   °--mtcars20.R             
 7   ¦--Read-and-delete-me         
-8   ¦--data-raw                   
-9   ¦   ¦--documentation.R        
-10  ¦   ¦--subsetCars.R           
-11  ¦   ¦--subsetCars.Rmd         
-12  ¦   ¦--subsetCars.knit.md     
-13  ¦   °--subsetCars.utf8.md     
-14  ¦--data                       
-15  ¦   °--cars_over_20.rda       
+8   ¦--data                       
+9   ¦   °--cars_over_20.rda       
+10  ¦--data-raw                   
+11  ¦   ¦--documentation.R        
+12  ¦   ¦--subsetCars.R           
+13  ¦   ¦--subsetCars.Rmd         
+14  ¦   ¦--subsetCars.knit.md     
+15  ¦   °--subsetCars.utf8.md     
 16  ¦--datapackager.yml           
 17  ¦--inst                       
 18  ¦   ¦--doc                    
@@ -235,89 +228,101 @@ If everything goes smoothly, you will have a new package built in the parent dir
 23  ¦           ¦--processing.log 
 24  ¦           °--subsetCars.html
 25  ¦--man                        
-26  ¦   ¦--Test.Rd                
-27  ¦   °--cars_over_20.Rd        
+26  ¦   ¦--cars_over_20.Rd        
+27  ¦   °--mtcars20.Rd            
 28  °--vignettes                  
 29      °--subsetCars.Rmd         
 ```
 
-#### Details
+#### Update the autogenerated documentation. 
 
-A number of things have changed. The subsetCars processing script now appears under `/vignettes` and `inst/doc` as a processed html report so that it will be available to view via `vignette()` once the package is installed. 
-`inst/extdata/Logfiles` contains a log file of the entire build process as well as intermediate files created while parsing the R / Rmd code. Documentation Rd files appear in `/man`, these should be edite to provide further details on the data objects in the package. The data objects are stored under `/data` where we see `cars_over_20.rda`, the object we initially specified in `datapackager.yml`.
+After the first build, the `R` directory contains `mtcars.R` that has autogenerated `roxygen2` markup documentation for the data package and for the packaged data `cars_over20`. 
 
+The processed `Rd` files can be found in `man`. 
 
-## Versioning data objects
+#### Dont' forget to rebuild the package.
 
-The DataPackageR package calculates an md5 checksum of each data object it stores, and keeps track of them in a file
-called `DATADIGEST`.
+You should update the documentation in `R/mtcars.R`, then call `package_build()` again.
 
-- Each time the package is rebuilt, the md5 sums of the new data objects are compared against the DATADIGEST.
-- If they don't match, the build process checks that the `DataVersion` string has been incremented in the `DESCRIPTION` file.
-- If it has not the build process will exit and produce an error message.
 
-### DATADIGEST
+## Installing and using the new data package
 
 
-The `DATADIGEST` file contains the following:
+### Accessing vignettes, data sets, and data set documentation. 
 
+The package source also contains files in the `vignettes` and `inst/doc` directories that provide a log of the data processing. 
 
-```
-DataVersion: 0.1.0
-cars_over_20: 3ccb5b0aaa74fe7cfc0d3ca6ab0b5cf3
-```
+When the package is installed, these will be accessible via the `vignette()` API. 
 
+The vignette will detail the processing performed by the `subsetCars.Rmd` processing script. 
 
-### DESCRIPTION
-
-The description file has the new `DataVersion` string.
+The data set documentation will be accessible via `?cars_over_20`, and the data sets via `data()`. 
 
 
-```
-Package: Test
-Type: Package
-Title: What the package does (short line)
-Version: 1.0
-Date: 2018-07-02
-Author: Who wrote it
-Maintainer: Who to complain to <yourfault@somewhere.net>
-Description: More about what it does (maybe more than one line)
-License: What license is it under?
-DataVersion: 0.1.0
-RoxygenNote: 6.0.1
+```r
+# Let's use the package we just created.
+install.packages(file.path(tempdir(),"mtcars20_1.0.tar.gz"), type = "source", repos = NULL)
+library(mtcars20)
+data("cars_over_20") # load the data
+cars_over_20  # Now we can use it.
+   speed dist
+44    22   66
+45    23   54
+46    24   70
+47    24   92
+48    24   93
+49    24  120
+50    25   85
+?cars_over_20 # See the documentation you wrote in data-raw/documentation.R.
+
+vignettes = vignette(package="mtcars20")
+vignettes$results
+      Package   
+Topic "mtcars20"
+      LibPath                                                         
+Topic "/Library/Frameworks/R.framework/Versions/3.5/Resources/library"
+      Item         Title                                            
+Topic "subsetCars" "A Test Document for DataPackageR (source, html)"
 ```
 
-### Next steps
 
-Your downstream data analysis can depend on a specific version of your data package (for example by tesing the `packageVersion()` string);
+### Using the DataVersion
 
-```r{}
-if(DataPackageR::packageVersion("MyNewStudy") != "1.0.0")
-  stop("The expected version of MyNewStudy is 1.0.0, but ",packageVersion("MyNewStudy")," is installed! Analysis results may differ!")
-```
+Your downstream data analysis can depend on a specific version of the data in your data package by tesing the DataVersion string in the DESCRIPTION file. 
 
-The DataPackageR packge also provides `datasetVersion()` to extract the data set version information. 
+We provide an API for this:
 
-You should also place the data package source directory under `git` version control.
-This allows you to version control your data processing code. 
 
-### Why not use R CMD build?
+```r
+# We can easily check the version of the data
+DataPackageR::data_version("mtcars20")
+[1] '0.1.0'
+
+# You can use an assert to check the data version in  reports and
+# analyses that use the packaged data.
+assert_data_version(data_package_name = "mtcars20",
+                    version_string = "0.1.0",
+                    acceptable = "equal")  #If this fails, execution stops
+                                           #and provides an informative error.
+```
 
-If the processing script is time consuming or the data set is particularly large, then `R CMD build` would run the code each time the package is installed. In such cases, raw data may not be available, or the environment to do the data processing may not be set up for each user of the data. In such cases, DataPackageR provides a mechanism to decouple data processing from package building/installation for downstream users of the data.
+# Next steps 
+
+You should place the data package source directory under `git` version control.
+This allows you to version control your data processing code. 
 
+# Partial builds and migrating old data packages.
 
-## Partial builds and migrating old data packages.
+Version 1.12.0 has moved away from controlling the build process using `datasets.R` and an additional `masterfile` argument. 
 
-Version 1.12.0 has moved away from controlling the build process using `datasets.R` and an additional `masterfile` argument. The build process is now controlled via a `datapackager.yml` configuration file located in the package root directory. 
+The build process is now controlled via a `datapackager.yml` configuration file located in the package root directory.  (see [YAML Configuration Details](https://github.com/RGLab/DataPackageR/blob/master/YAML_CONFIG.md))
 
 You can migrate an old package by constructing such a config file using the `construct_yml_config()` API.
 
 
 ```r
-#assume I have file1.Rmd and file2.R located in /data-raw, and these create 'object1' and 'object2' respectively.
-
-config = construct_yml_config(code = c("file1.Rmd","file2.R"), data = c("object1","object2"))
-cat(as.yaml(config))
+# assume I have file1.Rmd and file2.R located in /data-raw, 
+# and these create 'object1' and 'object2' respectively.
 configuration:
   files:
     file1.Rmd:
@@ -330,25 +335,29 @@ configuration:
   - object1
   - object2
   render_root:
-    tmp: '127771'
+    tmp: '288022'
 ```
 
 `config` is a newly constructed yaml configuration object. It can be written to the package directory:
 
 
 ```r
-path_to_package = tempdir() #pretend this is the root of our package
-yml_write(config,path = path_to_package)
+path_to_package = tempdir() #e.g., if tempdir() was the root of our package.
+yml_write(config, path = path_to_package)
 ```
 
 Now the package at `path_to_package` will build with version 1.12.0 or greater.
 
-We can also perform partial builds of a subset of files in a package by toggling the `enabled` key in the config file. This can be done with the following API:
+## Partial builds
+
+We can also perform partial builds of a subset of files in a package by toggling the `enabled` key in the config file.
+
+This can be done with the following API:
 
 
 ```r
 config = yml_disable_compile(config,filenames = "file2.R")
-cat(as.yaml(config))
+yml_write(config, path = path_to_package) # write modified yml to the package.
 configuration:
   files:
     file1.Rmd:
@@ -361,11 +370,88 @@ configuration:
   - object1
   - object2
   render_root:
-    tmp: '127771'
+    tmp: '288022'
 ```
 
-Where `config` is a configuration read from a data package root directory. The `config` object needs to be written back to the package root in order for the changes to take effect. The consequence of toggling a file to `enable: no` is that it will be skipped when the package is built, but the data will be retained, and the documentation will not be altered. 
+Note that the modified configuration needs to be written back to the package source directory in order for the 
+changes to take effect. 
+
+The consequence of toggling a file to `enable: no` is that it will be skipped when the package is rebuilt, 
+but the data will still be retained in the package, and the documentation will not be altered. 
+
+This is useful in situations where we have multiple data sets, and want to re-run one script to update a specific data set, but
+not the other scripts because they may be too time consuming, for example.
+
+# Multi-script pipelines.
+
+We may have situations where we have mutli-script pipelines. There are two ways to share data among scripts. 
 
+1. filesystem artifacts
+2. data objects passed to subsequent scripts.
+
+### File system artifacts
+
+The yaml configuration property `render_root` specifies the working directory where scripts will be rendered.
+
+If a script writes files to the working directory, that is where files will appear. These can be read by subsequent scripts.
+
+### Passing data objects to subsequent scripts.
+
+A script (e.g., `script2.Rmd`) running after `script1.Rmd` can access a stored data object named `script1_dataset` created by `script1.Rmd` by calling
+
+`DataPackageR::datapackager_object_read("script1_dataset")`. 
+
+# Additional Details
+
+We provide some additional details for the interested.
+
+### Fingerprints of stored data objects
+
+DataPackageR calculates an md5 checksum of each data object it stores, and keeps track of them in a file
+called `DATADIGEST`.
+
+- Each time the package is rebuilt, the md5 sums of the new data objects are compared against the DATADIGEST.
+- If they don't match, the build process checks that the `DataVersion` string has been incremented in the `DESCRIPTION` file.
+- If it has not the build process will exit and produce an error message.
+
+#### DATADIGEST
+
+
+The `DATADIGEST` file contains the following:
+
+
+```
+DataVersion: 0.1.0
+cars_over_20: 3ccb5b0aaa74fe7cfc0d3ca6ab0b5cf3
+```
+
+
+#### DESCRIPTION
+
+The description file has the new `DataVersion` string.
+
+
+```
+Package: mtcars20
+Type: Package
+Title: What the package does (short line)
+Version: 1.0
+Date: 2018-07-05
+Author: Who wrote it
+Maintainer: Who to complain to <yourfault@somewhere.net>
+Description: More about what it does (maybe more than one line)
+License: What license is it under?
+DataVersion: 0.1.0
+Suggests: 
+    knitr,
+    rmarkdown
+VignetteBuilder: knitr
+RoxygenNote: 6.0.1
+```
+
+## Why not use R CMD build?
+
+If the processing script is time consuming or the data set is particularly large, then `R CMD build` would run the code each time the package is installed. In such cases, raw data may not be available, or the environment to do the data processing may not be set up for each user of the data. In such cases, DataPackageR provides a mechanism to decouple data processing from package building/installation for downstream users of the data.