Skip to content

Commit

Permalink
This commit and previous should address remaining ropensci reviewer i…
Browse files Browse the repository at this point in the history
  • Loading branch information
gfinak committed Jul 5, 2018
1 parent d3554c6 commit 691e8e4
Show file tree
Hide file tree
Showing 10 changed files with 718 additions and 468 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Title: Construct Reproducible Analytic Data Sets as R Packages
Authors@R:
c(person(given = "Greg Finak", role=c("aut","cre","cph"), email="[email protected]"),
person(given = "Paul Obrecht", role=c("ctb")))
Version: 0.13.6
Version: 0.14.0
Description: Construct reproducible analytic data sets as R packages.
License: MIT + file LICENSE
Depends: R (>= 3.5.0)
Expand All @@ -28,6 +28,7 @@ RoxygenNote: 6.0.1
Suggests:
testthat,
covr,
data.tree
data.tree,
mtcars20
URL: https://github.com/RGLab/DataPackageR
BugReports: https://github.com/RGLab/DataPackageR/issues
6 changes: 4 additions & 2 deletions R/build.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#' @param packageName \code{character} path to package source directory. Defaults to the current path when NULL.
#' @param vignettes \code{logical} specify whether to build vignettes. Default FALSE.
#' @param log log level \code{INFO,WARN,DEBUG,FATAL}
#' @param deps \code{logical} should we pass data objects into subsequent scripts? Default TRUE
#' @importFrom roxygen2 roxygenise roxygenize
#' @importFrom devtools build_vignettes build parse_deps
#' @importFrom usethis use_build_ignore use_rstudio proj_set use_directory
Expand All @@ -32,7 +33,8 @@
#' package_build(file.path(tempdir(),pname))
package_build <- function(packageName = NULL,
vignettes = FALSE,
log=INFO) {
log = INFO,
deps = TRUE) {
flog.threshold(log)
flog.appender(appender.console())
requireNamespace("rprojroot")
Expand Down Expand Up @@ -71,7 +73,7 @@ package_build <- function(packageName = NULL,

# Return success if we've processed everything
success <-
DataPackageR(arg = package_path)
DataPackageR(arg = package_path, deps = deps)
ifelse(success,
flog.info("DataPackageR succeeded"),
flog.warn("DataPackageR failed")
Expand Down
27 changes: 9 additions & 18 deletions R/processData.R
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,13 @@ NULL
#' Meant to be called before R CMD build.
#' @name DataPackageR
#' @param arg \code{character} name of the package to build.
#' @param deps \code{logical} should scripts pass data objects to each other (default=TRUE)
#' @return logical TRUE if succesful, FALSE, if not.
#' @importFrom desc desc
#' @importFrom rmarkdown render
#' @importFrom utils getSrcref modifyList
#' @importFrom usethis proj_set proj_get
DataPackageR <- function(arg = NULL) {
DataPackageR <- function(arg = NULL, deps = TRUE) {
requireNamespace("futile.logger")
requireNamespace("yaml")
pkg_dir <- arg
Expand All @@ -132,7 +133,7 @@ DataPackageR <- function(arg = NULL) {
stop("exiting", call. = FALSE)
}
} else {
logpath <-
logpath <-
normalizePath(
file.path(pkg_dir, "inst/extdata"),
winslash = "/"
Expand Down Expand Up @@ -232,20 +233,9 @@ DataPackageR <- function(arg = NULL) {
pkg_description <- try(read.description(file = description_file),
silent = TRUE
)
if (inherits(pkg_description, "try-error")) {
flog.fatal("No valid DESCRIPTION file")
{
stop(
paste0(
"You need a valid package DESCRIPTION file.",
"Please see Writing R Extensions",
"(http://cran.r-project.org/doc/manuals/",
"r-release/R-exts.html#The-DESCRIPTION-file).\n"
),
pkg_description
)
}
}
# The test for a valid DESCRIPTION here is no longer needed since
# we use proj_set().

# check that we have at least one file
# This is caught elsewhere

Expand All @@ -264,8 +254,9 @@ DataPackageR <- function(arg = NULL) {
for (i in seq_along(r_files)) {
dataenv <- new.env(hash = TRUE, parent = .GlobalEnv)
# assign ENVS into dataenv.
# provide functions in the package to read from it.
assign(x = "ENVS", value = ENVS, dataenv)
# provide functions in the package to read from it (if deps = TRUE)
if(deps)
assign(x = "ENVS", value = ENVS, dataenv)
flog.info(paste0(
"Processing ", i, " of ",
length(r_files), ": ", r_files[i],
Expand Down
4 changes: 3 additions & 1 deletion man/DataPackageR.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/package_build.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 7 additions & 3 deletions tests/testthat/test-skeleton.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ test_that("package can be built from different locations", {
file.path(tmp, "subsetCars"))),
"subsetCars_1.0.tar.gz")

old <- setwd(file.path(tmp, "subsetCars"))
old <-
setwd(file.path(tmp, "subsetCars"))
on.exit(setwd(old))
expect_equal(basename(package_build(".")), "subsetCars_1.0.tar.gz")
expect_error(package_build("subsetCars"))
Expand Down Expand Up @@ -474,6 +475,9 @@ test_that("package built in different edge cases", {
force = TRUE,
recursive = TRUE)
package.skeleton("foo", path = tmp)
suppressWarnings(expect_error(
DataPackageR:::DataPackageR(
file.path(tmp, "foo"))))
dir.create(file.path(tmp, "foo", "data-raw"))
suppressWarnings(expect_error(
DataPackageR:::DataPackageR(
Expand All @@ -492,8 +496,8 @@ test_that("package built in different edge cases", {
recursive = TRUE)


package.skeleton("foo", path = tmp)
expect_error(yml_find(file.path(tmp, "foo")))
package.skeleton("foo", path = tempdir(),force=TRUE)
expect_error(yml_find(file.path(tempdir(), "foo")))
dir.create(file.path(tmp, "foo", "data-raw"))
unlink(file.path(tmp, "foo", "DESCRIPTION"))
yml <- DataPackageR:::construct_yml_config("foo.Rmd")
Expand Down
120 changes: 83 additions & 37 deletions vignettes/usingDataPackageR.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,58 +5,104 @@ knitr::opts_chunk$set(
eval = TRUE
)

## ------------------------------------------------------------------------
library(data.tree)
## ----minimal_example, results='hide'-------------------------------------
library(DataPackageR)
tmp = normalizePath(tempdir())
processing_code = system.file("extdata","tests","subsetCars.Rmd",package="DataPackageR")
print(processing_code)
setwd(tmp)
DataPackageR::datapackage.skeleton("Test",
force=TRUE,
code_files = processing_code,
r_object_names = "cars_over_20") # cars_over_20 is an R object
# created in the Rmd file.

# Let's reproducibly package up
# the cars in the mtcars dataset
# with speed > 20.
# Our dataset will be called cars_over_20.

# Get the code file that turns the raw data
# to our packaged and processed analysis-ready dataset.
processing_code <-
system.file("extdata",
"tests",
"subsetCars.Rmd",
package = "DataPackageR")

# Create the package framework.
DataPackageR::datapackage_skeleton(
"mtcars20",
force = TRUE,
code_files = processing_code,
r_object_names = "cars_over_20",
path = tempdir()
)

## ----dirstructure,echo=FALSE---------------------------------------------
df = data.frame(pathString=file.path("Test",(list.files(tmp,recursive=TRUE))))
library(data.tree)
df = data.frame(pathString = file.path(
"mtcars20",
list.files(
file.path(tempdir(), "mtcars20"),
include.dirs = TRUE,
recursive = TRUE
)
))
as.Node(df)

## ---- echo=FALSE---------------------------------------------------------
library(yaml)
setwd(tmp)
cat(as.yaml(yaml.load_file("Test/datapackager.yml")))
cat(yaml::as.yaml(yaml::yaml.load_file(file.path(tempdir(),"mtcars20","datapackager.yml"))))

## ------------------------------------------------------------------------
# Within the package directory
setwd(tmp)
DataPackageR:::package_build("Test")
## ----eval=TRUE-----------------------------------------------------------
# Run the preprocessing code to build cars_over_20
# and reproducibly enclose it in a package.
DataPackageR:::package_build(file.path(tempdir(),"mtcars20"))

## ---- echo=FALSE---------------------------------------------------------
library(yaml)
setwd(tmp)
df = data.frame(pathString=file.path("Test",(list.files("Test",recursive=TRUE))))
as.Node(df)
df = data.frame(pathString = file.path(
"mtcars20",
list.files(
file.path(tempdir(), "mtcars20"),
include.dirs = TRUE,
recursive = TRUE
)
))
as.Node(df)

## ---- echo=FALSE---------------------------------------------------------
setwd(tmp)
cat(readLines("Test/DATADIGEST"),sep="\n")
## ------------------------------------------------------------------------
# Let's use the package we just created.
install.packages(file.path(tempdir(),"mtcars20_1.0.tar.gz"), type = "source", repos = NULL)
library(mtcars20)
data("cars_over_20") # load the data
cars_over_20 # Now we can use it.
?cars_over_20 # See the documentation you wrote in data-raw/documentation.R.

## ----echo=FALSE----------------------------------------------------------
setwd(tmp)
cat(readLines("Test/DESCRIPTION"),sep="\n")
vignettes = vignette(package="mtcars20")
vignettes$results

## ----construct_config----------------------------------------------------
#assume I have file1.Rmd and file2.R located in /data-raw, and these create 'object1' and 'object2' respectively.
## ------------------------------------------------------------------------
# We can easily check the version of the data
DataPackageR::data_version("mtcars20")

config = construct_yml_config(code = c("file1.Rmd","file2.R"), data = c("object1","object2"))
cat(as.yaml(config))
# You can use an assert to check the data version in reports and
# analyses that use the packaged data.
assert_data_version(data_package_name = "mtcars20",
version_string = "0.1.0",
acceptable = "equal") #If this fails, execution stops
#and provides an informative error.

## ------------------------------------------------------------------------
path_to_package = tempdir() #pretend this is the root of our package
yml_write(config,path = path_to_package)
## ----construct_config, echo=1:2------------------------------------------
# assume I have file1.Rmd and file2.R located in /data-raw,
# and these create 'object1' and 'object2' respectively.

config = construct_yml_config(code = c("file1.Rmd", "file2.R"),
data = c("object1", "object2"))
cat(yaml::as.yaml(config))

## ------------------------------------------------------------------------
path_to_package = tempdir() #e.g., if tempdir() was the root of our package.
yml_write(config, path = path_to_package)

## ----echo=1:2------------------------------------------------------------
config = yml_disable_compile(config,filenames = "file2.R")
cat(as.yaml(config))
yml_write(config, path = path_to_package) # write modified yml to the package.
cat(yaml::as.yaml(config))

## ---- echo=FALSE---------------------------------------------------------
cat(readLines(file.path(tempdir(),"mtcars20","DATADIGEST")),sep="\n")

## ----echo=FALSE----------------------------------------------------------
cat(readLines(file.path(tempdir(),"mtcars20","DESCRIPTION")),sep="\n")

5 changes: 5 additions & 0 deletions vignettes/usingDataPackageR.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,11 @@ A script (e.g., `script2.Rmd`) running after `script1.Rmd` can access a stored d

`DataPackageR::datapackager_object_read("script1_dataset")`.

Passing of data objects amongst scripts can be turned off via:

`package_build(deps = FALSE)`


# Additional Details

We provide some additional details for the interested.
Expand Down
Loading

0 comments on commit 691e8e4

Please sign in to comment.