From 2905adecd4ffc01f18e9dd6092f4559baed38224 Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Sat, 14 Jan 2017 20:56:47 -0600
Subject: [PATCH 01/14] [R] xgb.save must work when handle in nil but raw
 exists

---
 R-package/R/xgb.save.R                  | 1 +
 R-package/tests/testthat/test_helpers.R | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/R-package/R/xgb.save.R b/R-package/R/xgb.save.R
index 5b2421b7fabd..b4075defb870 100644
--- a/R-package/R/xgb.save.R
+++ b/R-package/R/xgb.save.R
@@ -22,6 +22,7 @@ xgb.save <- function(model, fname) {
   if (class(model) != "xgb.Booster")
     stop("the input must be xgb.Booster. Use xgb.DMatrix.save to save xgb.DMatrix object.")
   
+  model <- xgb.Booster.check(model, saveraw = FALSE)
   .Call("XGBoosterSaveModel_R", model$handle, fname, PACKAGE = "xgboost")
   return(TRUE)
 }
diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R
index 4536fe50e447..508f77a0fbf8 100644
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@@ -100,6 +100,15 @@ if (grepl('Windows', Sys.info()[['sysname']]) || grepl('Linux', Sys.info()[['sys
     })
 }
 
+test_that("xgb.Booster serializing as R object works", {
+  saveRDS(bst.Tree, 'xgb.model.rds')
+  bst <- readRDS('xgb.model.rds')
+  dtrain <- xgb.DMatrix(sparse_matrix, label = label)
+  expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain))
+  expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
+  xgb.save(bst, 'xgb.model')
+})
+
 test_that("xgb.model.dt.tree works with and without feature names", {
   names.dt.trees <- c("Tree", "Node", "ID", "Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover")
   dt.tree <- xgb.model.dt.tree(feature_names = feature.names, model = bst.Tree)

From 150b936958fe096b65e615626fe615f1c4630946 Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Sat, 14 Jan 2017 20:58:04 -0600
Subject: [PATCH 02/14] [R] print.xgb.Booster should still print other info
 when handle is nil

---
 R-package/R/xgb.Booster.R | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R
index cf428e4e8f15..a8a4bd6fed63 100644
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -429,9 +429,10 @@ xgb.ntree <- function(bst) {
 print.xgb.Booster <- function(x, verbose=FALSE, ...) {
   cat('##### xgb.Booster\n')
   
+  valid_handle <- TRUE
   if (is.null(x$handle) || .Call("XGCheckNullPtr_R", x$handle, PACKAGE="xgboost")) {
     cat("handle is invalid\n")
-    return(x)
+    valid_handle <- FALSE
   }
   
   cat('raw: ')
@@ -454,7 +455,9 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) {
   }
   # TODO: need an interface to access all the xgboosts parameters
 
-  attrs <- xgb.attributes(x)
+  attrs <- character(0)
+  if (valid_handle)
+    attrs <- xgb.attributes(x)
   if (length(attrs) > 0) {
     cat('xgb.attributes:\n')
     if (verbose) {

From 01dc2c6036b9cc1d18ddf8c7fa1f4801dbbc2bb4 Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Sat, 14 Jan 2017 21:17:22 -0600
Subject: [PATCH 03/14] [R] rename internal function xgb.Booster to
 xgb.Booster.handle to make its intent clear

---
 R-package/R/xgb.Booster.R | 6 +++---
 R-package/R/xgb.cv.R      | 4 ++--
 R-package/R/xgb.load.R    | 2 +-
 R-package/R/xgb.train.R   | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R
index a8a4bd6fed63..7c6f1bda511f 100644
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -1,6 +1,6 @@
-# Construct a Booster from cachelist
+# Construct an internal xgboost Booster and return a handle to it
 # internal utility function
-xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
+xgb.Booster.handle <- function(params = list(), cachelist = list(), modelfile = NULL) {
   if (typeof(cachelist) != "list" ||
       any(sapply(cachelist, class) != 'xgb.DMatrix')) {
     stop("xgb.Booster only accepts list of DMatrix as cachelist")
@@ -59,7 +59,7 @@ xgb.Booster.check <- function(bst, saveraw = TRUE) {
     isnull <- .Call("XGCheckNullPtr_R", bst$handle, PACKAGE="xgboost")
   }
   if (isnull) {
-    bst$handle <- xgb.Booster(modelfile = bst$raw)
+    bst$handle <- xgb.Booster.handle(modelfile = bst$raw)
   } else {
     if (is.null(bst$raw) && saveraw)
       bst$raw <- xgb.save.raw(bst$handle)
diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R
index 8325f5976601..d455a407933d 100644
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -181,8 +181,8 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
   bst_folds <- lapply(1:length(folds), function(k) {
     dtest  <- slice(dall, folds[[k]])
     dtrain <- slice(dall, unlist(folds[-k]))
-    bst <- xgb.Booster(params, list(dtrain, dtest))
-    list(dtrain=dtrain, bst=bst, watchlist=list(train=dtrain, test=dtest), index=folds[[k]])
+    handle <- xgb.Booster.handle(params, list(dtrain, dtest))
+    list(dtrain=dtrain, bst=handle, watchlist=list(train=dtrain, test=dtest), index=folds[[k]])
   })
   # a "basket" to collect some results from callbacks
   basket <- list()
diff --git a/R-package/R/xgb.load.R b/R-package/R/xgb.load.R
index a93a2c042b15..424205f881f0 100644
--- a/R-package/R/xgb.load.R
+++ b/R-package/R/xgb.load.R
@@ -19,7 +19,7 @@ xgb.load <- function(modelfile) {
   if (is.null(modelfile))
     stop("xgb.load: modelfile cannot be NULL")
 
-  handle <- xgb.Booster(modelfile = modelfile)
+  handle <- xgb.Booster.handle(modelfile = modelfile)
   # re-use modelfile if it is raw so we do not need to serialize
   if (typeof(modelfile) == "raw") {
     bst <- xgb.handleToBooster(handle, modelfile)
diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index 2ed2194d3a0c..5e11a29fc9b2 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -288,7 +288,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
   is_update <- NVL(params[['process_type']], '.') == 'update'
 
   # Construct a booster (either a new one or load from xgb_model)
-  handle <- xgb.Booster(params, append(watchlist, dtrain), xgb_model)
+  handle <- xgb.Booster.handle(params, append(watchlist, dtrain), xgb_model)
   bst <- xgb.handleToBooster(handle)
 
   # extract parameters that can affect the relationship b/w #trees and #iterations

From 54d9c03e7616fbc6bb409ce6b66f37a3f9b5d7f8 Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Sun, 15 Jan 2017 01:27:52 -0600
Subject: [PATCH 04/14] [R] rename xgb.Booster.check to xgb.Booster.complete
 and make it visible; more docs

---
 R-package/NAMESPACE                     |  1 +
 R-package/R/callbacks.R                 |  2 +-
 R-package/R/xgb.Booster.R               | 80 +++++++++++++++++++------
 R-package/R/xgb.dump.R                  | 21 ++++---
 R-package/R/xgb.load.R                  | 21 ++++++-
 R-package/R/xgb.save.R                  | 21 +++++--
 R-package/R/xgb.train.R                 |  2 +-
 R-package/man/xgb.dump.Rd               | 16 ++---
 R-package/man/xgb.load.Rd               | 19 +++++-
 R-package/man/xgb.save.Rd               | 19 +++++-
 R-package/tests/testthat/test_helpers.R |  6 ++
 11 files changed, 159 insertions(+), 49 deletions(-)

diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
index 4182179ad3dc..c3fac04e30d8 100644
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -24,6 +24,7 @@ export(cb.save.model)
 export(getinfo)
 export(setinfo)
 export(slice)
+export(xgb.Booster.complete)
 export(xgb.DMatrix)
 export(xgb.DMatrix.save)
 export(xgb.attr)
diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R
index f35565273eda..c66bbfd6bfd3 100644
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@@ -507,7 +507,7 @@ cb.cv.predict <- function(save_models = FALSE) {
     if (save_models) {
       env$basket$models <- lapply(env$bst_folds, function(fd) {
         xgb.attr(fd$bst, 'niter') <- env$end_iteration - 1
-        xgb.Booster.check(xgb.handleToBooster(fd$bst), saveraw = TRUE)
+        xgb.Booster.complete(xgb.handleToBooster(fd$bst), saveraw = TRUE)
       })
     }
   }
diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R
index 7c6f1bda511f..b2543f8c4e0f 100644
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -13,8 +13,8 @@ xgb.Booster.handle <- function(params = list(), cachelist = list(), modelfile =
     } else if (typeof(modelfile) == "raw") {
       .Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost")
     } else if (class(modelfile) == "xgb.Booster") {
-      modelfile <- xgb.Booster.check(modelfile, saveraw=TRUE)
-      .Call("XGBoosterLoadModelFromRaw_R", handle, modelfile$raw, PACKAGE = "xgboost")
+      bst <- xgb.Booster.complete(modelfile, saveraw=TRUE)
+      .Call("XGBoosterLoadModelFromRaw_R", handle, bst$raw, PACKAGE = "xgboost")
     } else {
       stop("modelfile must be either character filename, or raw booster dump, or xgb.Booster object")
     }
@@ -34,6 +34,17 @@ xgb.handleToBooster <- function(handle, raw = NULL) {
   return(bst)
 }
 
+# Check whether xgb.Booster.handle is null
+# internal utility function
+is.null.handle <- function(handle) {
+  if (class(handle) != "xgb.Booster.handle")
+    stop("argument type must be xgb.Booster.handle")
+  
+  if (is.null(handle) || .Call("XGCheckNullPtr_R", handle, PACKAGE="xgboost"))
+    return(TRUE)
+  return(FALSE)
+}
+
 # Return a verified to be valid handle out of either xgb.Booster.handle or xgb.Booster
 # internal utility function
 xgb.get.handle <- function(object) {
@@ -42,23 +53,57 @@ xgb.get.handle <- function(object) {
     xgb.Booster.handle = object,
     stop("argument must be of either xgb.Booster or xgb.Booster.handle class")
   )
-  if (is.null(handle) || .Call("XGCheckNullPtr_R", handle, PACKAGE="xgboost")) {
+  if (is.null.handle(handle)) {
     stop("invalid xgb.Booster.handle")
   }
   handle
 }
 
-# Check whether an xgb.Booster object is complete
-# internal utility function
-xgb.Booster.check <- function(bst, saveraw = TRUE) {
+#' Restore missing parts of an incomplete xgb.Booster object.
+#'
+#' It attempts to complete an \code{xgb.Booster} object by restoring either its missing 
+#' raw model memory dump (when it has no \code{raw} data but its \code{xgb.Booster.handle} is valid)
+#' or its missing internal handle (when its \code{xgb.Booster.handle} is not valid 
+#' but it has a raw Booster memory dump).
+#' 
+#' @param object Object of class \code{xgb.Booster}
+#' @param saveraw a flag indicating whether to append \code{raw} Booster memory dump data 
+#'                when it doesn't already exist.
+#' 
+#' @details
+#' 
+#' While this method is primarily for internal use, it might be useful in some practical situations.
+#' 
+#' E.g., when an \code{xgb.Booster} model is saved as an R object and then is loaded as an R object,
+#' its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods 
+#' should still work for such a model object since those methods would be using 
+#' \code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the  
+#' \code{xgb.Booster.complete} function once after loading a model as an R-object. That which would
+#' prevent further reconstruction (potentially, multiple times) of an internal booster model.
+#' 
+#' @return 
+#' An object of \code{xgb.Booster} class.
+#' 
+#' @examples
+#' 
+#' data(agaricus.train, package='xgboost')
+#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, 
+#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+#' saveRDS(bst, "xgb.model.rds")
+#' 
+#' bst1 <- readRDS("xgb.model.rds")
+#' # the handle is invalid:
+#' print(bst1$handle)
+#' bst1 <- xgb.Booster.complete(bst1)
+#' # now the handle points to a valid internal booster model:
+#' print(bst1$handle)
+#' 
+#' @export
+xgb.Booster.complete <- function(bst, saveraw = TRUE) {
   if (class(bst) != "xgb.Booster")
     stop("argument type must be xgb.Booster")
   
-  isnull <- is.null(bst$handle)
-  if (!isnull) {
-    isnull <- .Call("XGCheckNullPtr_R", bst$handle, PACKAGE="xgboost")
-  }
-  if (isnull) {
+  if (is.null.handle(bst$handle)) {
     bst$handle <- xgb.Booster.handle(modelfile = bst$raw)
   } else {
     if (is.null(bst$raw) && saveraw)
@@ -67,7 +112,6 @@ xgb.Booster.check <- function(bst, saveraw = TRUE) {
   return(bst)
 }
 
-
 #' Predict method for eXtreme Gradient Boosting model
 #' 
 #' Predicted values based on either xgboost model or model handle object.
@@ -180,7 +224,7 @@ xgb.Booster.check <- function(bst, saveraw = TRUE) {
 predict.xgb.Booster <- function(object, newdata, missing = NA,
     outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, reshape = FALSE, ...) {
 
-  object <- xgb.Booster.check(object, saveraw = FALSE)
+  object <- xgb.Booster.complete(object, saveraw = FALSE)
   if (class(newdata) != "xgb.DMatrix")
     newdata <- xgb.DMatrix(newdata, missing = missing)
   if (is.null(ntreelimit))
@@ -429,12 +473,10 @@ xgb.ntree <- function(bst) {
 print.xgb.Booster <- function(x, verbose=FALSE, ...) {
   cat('##### xgb.Booster\n')
   
-  valid_handle <- TRUE
-  if (is.null(x$handle) || .Call("XGCheckNullPtr_R", x$handle, PACKAGE="xgboost")) {
-    cat("handle is invalid\n")
-    valid_handle <- FALSE
-  }
-  
+  valid_handle <- is.null.handle(x$handle)
+  if (!valid_handle)
+    cat("Handle is invalid! Suggest using xgb.Booster.complete\n")
+
   cat('raw: ')
   if (!is.null(x$raw)) {
     cat(format(object.size(x$raw), units="auto"), '\n')
diff --git a/R-package/R/xgb.dump.R b/R-package/R/xgb.dump.R
index 86e97b4e738d..f059612212c2 100644
--- a/R-package/R/xgb.dump.R
+++ b/R-package/R/xgb.dump.R
@@ -1,24 +1,26 @@
-#' Save xgboost model to text file
+#' Dump an xgboost model in text format.
 #' 
-#' Save a xgboost model to text file. Could be parsed later.
+#' Dump an xgboost model in text format.
 #' 
 #' @param model the model object.
-#' @param fname the name of the text file where to save the model text dump. If not provided or set to \code{NULL} the function will return the model as a \code{character} vector.
-#' @param fmap feature map file representing the type of feature. 
+#' @param fname the name of the text file where to save the model text dump. 
+#'        If not provided or set to \code{NULL}, the model is returned as a \code{character} vector.
+#' @param fmap feature map file representing feature types.
 #'        Detailed description could be found at 
 #'        \url{https://github.com/dmlc/xgboost/wiki/Binary-Classification#dump-model}.
 #'        See demo/ for walkthrough example in R, and
 #'        \url{https://github.com/dmlc/xgboost/blob/master/demo/data/featmap.txt} 
 #'        for example Format.
-#' @param with_stats whether dump statistics of splits 
-#'        When this option is on, the model dump comes with two additional statistics:
+#' @param with_stats whether to dump some additional statistics about the splits.
+#'        When this option is on, the model dump contains two additional values:
 #'        gain is the approximate loss function gain we get in each split;
 #'        cover is the sum of second order gradient in each node.
 #' @param dump_format either 'text' or 'json' format could be specified.
 #' @param ... currently not used
 #'
 #' @return
-#' if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}.
+#' If fname is not provided or set to \code{NULL} the function will return the model
+#' as a \code{character} vector. Otherwise it will return \code{TRUE}.
 #'
 #' @examples
 #' data(agaricus.train, package='xgboost')
@@ -37,7 +39,8 @@
 #' cat(xgb.dump(bst, with_stats = TRUE, dump_format='json'))
 #' 
 #' @export
-xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with_stats=FALSE, dump_format = c("text", "json"), ...) {
+xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with_stats=FALSE,
+                     dump_format = c("text", "json"), ...) {
   check.deprecation(...)
   dump_format <- match.arg(dump_format)
   if (class(model) != "xgb.Booster")
@@ -47,7 +50,7 @@ xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with_stats=FALSE, du
   if (!(class(fmap) %in% c("character", "NULL") && length(fmap) <= 1))
     stop("fmap: argument must be of type character (when provided)")
   
-  model <- xgb.Booster.check(model)
+  model <- xgb.Booster.complete(model)
   model_dump <- .Call("XGBoosterDumpModel_R", model$handle, fmap, as.integer(with_stats),
                       as.character(dump_format), PACKAGE = "xgboost")
 
diff --git a/R-package/R/xgb.load.R b/R-package/R/xgb.load.R
index 424205f881f0..cfe5dc87c0da 100644
--- a/R-package/R/xgb.load.R
+++ b/R-package/R/xgb.load.R
@@ -1,8 +1,23 @@
 #' Load xgboost model from binary file
 #' 
-#' Load xgboost model from the binary model file
+#' Load xgboost model from the binary model file. 
 #' 
-#' @param modelfile the name of the binary file.
+#' @param modelfile the name of the binary input file.
+#' 
+#' @details 
+#' The input file is expected to contain a model saved in an xgboost-internal binary format
+#' using either \code{\link{xgb.save}} or \code{\link{cb.save.model}} in R, or using some 
+#' appropriate methods from other xgboost interfaces. E.g., a model trained in Python and 
+#' saved from there in xgboost format, could be loaded from R.
+#' 
+#' Note: a model saved as an R-object, has to be loaded using corresponding R-methods,
+#' not \code{xgb.load}.
+#' 
+#' @return 
+#' An object of \code{xgb.Booster} class.
+#' 
+#' @seealso 
+#' \code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}. 
 #' 
 #' @examples
 #' data(agaricus.train, package='xgboost')
@@ -26,6 +41,6 @@ xgb.load <- function(modelfile) {
   } else {
     bst <- xgb.handleToBooster(handle, NULL)
   }
-  bst <- xgb.Booster.check(bst, saveraw = TRUE)
+  bst <- xgb.Booster.complete(bst, saveraw = TRUE)
   return(bst)
 }
diff --git a/R-package/R/xgb.save.R b/R-package/R/xgb.save.R
index b4075defb870..8162f0fa2951 100644
--- a/R-package/R/xgb.save.R
+++ b/R-package/R/xgb.save.R
@@ -1,9 +1,22 @@
 #' Save xgboost model to binary file
 #' 
-#' Save xgboost model from xgboost or xgb.train
+#' Save xgboost model to a file in binary format.
 #' 
-#' @param model the model object.
-#' @param fname the name of the file to write.
+#' @param model model object of \code{xgb.Booster} class.
+#' @param fname name of the file to write.
+#' 
+#' @details 
+#' This methods allows to save a model in an xgboost-internal binary format which is universal 
+#' among the various xgboost interfaces. In R, the saved model file could be read-in later
+#' using either the \code{\link{xgb.load}} function or the \code{xgb_model} parameter 
+#' of \code{\link{xgb.train}}.
+#' 
+#' Note: a model can also be saved as an R-object (e.g., by using \code{\link[base]{readRDS}} 
+#' or \code{\link[base]{save}}). However, it would then only be compatible with R, and 
+#' corresponding R-methods would need to be used to load it.
+#' 
+#' @seealso 
+#' \code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}. 
 #' 
 #' @examples
 #' data(agaricus.train, package='xgboost')
@@ -22,7 +35,7 @@ xgb.save <- function(model, fname) {
   if (class(model) != "xgb.Booster")
     stop("the input must be xgb.Booster. Use xgb.DMatrix.save to save xgb.DMatrix object.")
   
-  model <- xgb.Booster.check(model, saveraw = FALSE)
+  model <- xgb.Booster.complete(model, saveraw = FALSE)
   .Call("XGBoosterSaveModel_R", model$handle, fname, PACKAGE = "xgboost")
   return(TRUE)
 }
diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index 5e11a29fc9b2..a7bb90e631e2 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -332,7 +332,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
   }
   for (f in cb$finalize) f(finalize=TRUE)
   
-  bst <- xgb.Booster.check(bst, saveraw = TRUE)
+  bst <- xgb.Booster.complete(bst, saveraw = TRUE)
   
   # store the total number of boosting iterations
   bst$niter = end_iteration
diff --git a/R-package/man/xgb.dump.Rd b/R-package/man/xgb.dump.Rd
index 2ec26c743ea5..bd536b4bf03a 100644
--- a/R-package/man/xgb.dump.Rd
+++ b/R-package/man/xgb.dump.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/xgb.dump.R
 \name{xgb.dump}
 \alias{xgb.dump}
-\title{Save xgboost model to text file}
+\title{Dump an xgboost model in text format.}
 \usage{
 xgb.dump(model = NULL, fname = NULL, fmap = "", with_stats = FALSE,
   dump_format = c("text", "json"), ...)
@@ -10,17 +10,18 @@ xgb.dump(model = NULL, fname = NULL, fmap = "", with_stats = FALSE,
 \arguments{
 \item{model}{the model object.}
 
-\item{fname}{the name of the text file where to save the model text dump. If not provided or set to \code{NULL} the function will return the model as a \code{character} vector.}
+\item{fname}{the name of the text file where to save the model text dump. 
+If not provided or set to \code{NULL}, the model is returned as a \code{character} vector.}
 
-\item{fmap}{feature map file representing the type of feature. 
+\item{fmap}{feature map file representing feature types.
 Detailed description could be found at 
 \url{https://github.com/dmlc/xgboost/wiki/Binary-Classification#dump-model}.
 See demo/ for walkthrough example in R, and
 \url{https://github.com/dmlc/xgboost/blob/master/demo/data/featmap.txt} 
 for example Format.}
 
-\item{with_stats}{whether dump statistics of splits 
-When this option is on, the model dump comes with two additional statistics:
+\item{with_stats}{whether to dump some additional statistics about the splits.
+When this option is on, the model dump contains two additional values:
 gain is the approximate loss function gain we get in each split;
 cover is the sum of second order gradient in each node.}
 
@@ -29,10 +30,11 @@ cover is the sum of second order gradient in each node.}
 \item{...}{currently not used}
 }
 \value{
-if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}.
+If fname is not provided or set to \code{NULL} the function will return the model
+as a \code{character} vector. Otherwise it will return \code{TRUE}.
 }
 \description{
-Save a xgboost model to text file. Could be parsed later.
+Dump an xgboost model in text format.
 }
 \examples{
 data(agaricus.train, package='xgboost')
diff --git a/R-package/man/xgb.load.Rd b/R-package/man/xgb.load.Rd
index 1499df2d4f20..1634a8a38113 100644
--- a/R-package/man/xgb.load.Rd
+++ b/R-package/man/xgb.load.Rd
@@ -7,10 +7,22 @@
 xgb.load(modelfile)
 }
 \arguments{
-\item{modelfile}{the name of the binary file.}
+\item{modelfile}{the name of the binary input file.}
+}
+\value{
+An object of \code{xgb.Booster} class.
 }
 \description{
-Load xgboost model from the binary model file
+Load xgboost model from the binary model file.
+}
+\details{
+The input file is expected to contain a model saved in an xgboost-internal binary format
+using either \code{\link{xgb.save}} or \code{\link{cb.save.model}} in R, or using some 
+appropriate methods from other xgboost interfaces. E.g., a model trained in Python and 
+saved from there in xgboost format, could be loaded from R.
+
+Note: a model saved as an R-object, has to be loaded using corresponding R-methods,
+not \code{xgb.load}.
 }
 \examples{
 data(agaricus.train, package='xgboost')
@@ -23,4 +35,7 @@ xgb.save(bst, 'xgb.model')
 bst <- xgb.load('xgb.model')
 pred <- predict(bst, test$data)
 }
+\seealso{
+\code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}.
+}
 
diff --git a/R-package/man/xgb.save.Rd b/R-package/man/xgb.save.Rd
index 85acdecd055d..00b32ef78125 100644
--- a/R-package/man/xgb.save.Rd
+++ b/R-package/man/xgb.save.Rd
@@ -7,12 +7,22 @@
 xgb.save(model, fname)
 }
 \arguments{
-\item{model}{the model object.}
+\item{model}{model object of \code{xgb.Booster} class.}
 
-\item{fname}{the name of the file to write.}
+\item{fname}{name of the file to write.}
 }
 \description{
-Save xgboost model from xgboost or xgb.train
+Save xgboost model to a file in binary format.
+}
+\details{
+This methods allows to save a model in an xgboost-internal binary format which is universal 
+among the various xgboost interfaces. In R, the saved model file could be read-in later
+using either the \code{\link{xgb.load}} function or the \code{xgb_model} parameter 
+of \code{\link{xgb.train}}.
+
+Note: a model can also be saved as an R-object (e.g., by using \code{\link[base]{readRDS}} 
+or \code{\link[base]{save}}). However, it would then only be compatible with R, and 
+corresponding R-methods would need to be used to load it.
 }
 \examples{
 data(agaricus.train, package='xgboost')
@@ -25,4 +35,7 @@ xgb.save(bst, 'xgb.model')
 bst <- xgb.load('xgb.model')
 pred <- predict(bst, test$data)
 }
+\seealso{
+\code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}.
+}
 
diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R
index 508f77a0fbf8..5a3a9bb028c4 100644
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@@ -107,6 +107,12 @@ test_that("xgb.Booster serializing as R object works", {
   expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain))
   expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
   xgb.save(bst, 'xgb.model')
+  nil_ptr <- new("externalptr")
+  class(nil_ptr) <- "xgb.Booster.handle"
+  expect_true(identical(bst$handle, nil_ptr))
+  bst <- xgb.Booster.complete(bst)
+  expect_true(!identical(bst$handle, nil_ptr))
+  expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain))
 })
 
 test_that("xgb.model.dt.tree works with and without feature names", {

From d108c77666d682ffd7306e222a5475d062c4c5f1 Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Sun, 15 Jan 2017 14:01:29 -0600
Subject: [PATCH 05/14] [R] storing evaluation_log should depend only on
 watchlist, not on verbose

---
 R-package/R/xgb.train.R    | 60 +++++++++++++++++++-------------------
 R-package/R/xgboost.R      |  8 ++---
 R-package/man/xgb.train.Rd | 51 ++++++++++++++++----------------
 3 files changed, 59 insertions(+), 60 deletions(-)

diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index a7bb90e631e2..1a5f842b8b9e 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -1,6 +1,7 @@
 #' eXtreme Gradient Boosting Training
 #' 
-#' \code{xgb.train} is an advanced interface for training an xgboost model. The \code{xgboost} function provides a simpler interface.
+#' \code{xgb.train} is an advanced interface for training an xgboost model.
+#' The \code{xgboost} function is a simpler wrapper for \code{xgb.train}.
 #'
 #' @param params the list of parameters. 
 #'        The complete list of parameters is available at \url{http://xgboost.readthedocs.io/en/latest/parameter.html}.
@@ -9,8 +10,7 @@
 #' 1. General Parameters
 #' 
 #' \itemize{
-#'   \item \code{booster} which booster to use, can be \code{gbtree} or \code{gblinear}. Default: \code{gbtree}
-#'   \item \code{silent} 0 means printing running messages, 1 means silent mode. Default: 0
+#'   \item \code{booster} which booster to use, can be \code{gbtree} or \code{gblinear}. Default: \code{gbtree}.
 #' }
 #'  
 #' 2. Booster Parameters
@@ -54,24 +54,26 @@
 #'   \item \code{eval_metric} evaluation metrics for validation data. Users can pass a self-defined function to it. Default: metric will be assigned according to objective(rmse for regression, and error for classification, mean average precision for ranking). List is provided in detail section.
 #' }
 #' 
-#' @param data input dataset. \code{xgb.train} takes only an \code{xgb.DMatrix} as the input.
-#'        \code{xgboost}, in addition, also accepts \code{matrix}, \code{dgCMatrix}, or local data file.
-#' @param nrounds the max number of iterations
-#' @param watchlist what information should be printed when \code{verbose=1} or
-#'        \code{verbose=2}. Watchlist is used to specify validation set monitoring
-#'        during training. For example user can specify
-#'        watchlist=list(validation1=mat1, validation2=mat2) to watch
-#'        the performance of each round's model on mat1 and mat2
-#'
+#' @param data training dataset. \code{xgb.train} accepts only an \code{xgb.DMatrix} as the input.
+#'        \code{xgboost}, in addition, also accepts \code{matrix}, \code{dgCMatrix}, or name of a local data file.
+#' @param nrounds max number of boosting iterations.
+#' @param watchlist named list of xgb.DMatrix datasets to use for evaluating model performance.
+#'        Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
+#'        of these datasets during each boosting iteration, and stored in the end as a field named 
+#'        \code{evaluation_log} in the resulting object. When either \code{verbose>=1} or 
+#'        \code{\link{cb.print.evaluation}} callback is engaged, the performance results are continuously
+#'        printed out during the training. 
+#'        E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
+#'        the performance of each round's model on mat1 and mat2.
 #' @param obj customized objective function. Returns gradient and second order 
 #'        gradient with given prediction and dtrain.
 #' @param feval custimized evaluation function. Returns 
 #'        \code{list(metric='metric-name', value='metric-value')} with given 
 #'        prediction and dtrain.
-#' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print 
-#'        information of performance. If 2, xgboost will print some additional information.
-#'        Setting \code{verbose > 0} automatically engages the \code{\link{cb.evaluation.log}} and 
-#'        \code{\link{cb.print.evaluation}} callback functions.
+#' @param verbose If 0, xgboost will stay silent. If 1, it will print information about performance.
+#'        If 2, some additional information will be printed out.
+#'        Note that setting \code{verbose > 0} automatically engages the 
+#'        \code{\link{cb.print.evaluation(period=1)}} callback function.
 #' @param print_every_n Print each n-th iteration evaluation messages when \code{verbose>0}.
 #'        Default is 1 which means all messages are printed. This parameter is passed to the 
 #'        \code{\link{cb.print.evaluation}} callback.
@@ -106,7 +108,7 @@
 #' 
 #' The \code{xgb.train} interface supports advanced features such as \code{watchlist}, 
 #' customized objective and evaluation metric functions, therefore it is more flexible 
-#' than the \code{\link{xgboost}} interface.
+#' than the \code{xgboost} interface.
 #'
 #' Parallelization is automatically enabled if \code{OpenMP} is present. 
 #' Number of threads can also be manually specified via \code{nthread} parameter.
@@ -132,7 +134,7 @@
 #' \itemize{
 #'   \item \code{cb.print.evaluation} is turned on when \code{verbose > 0};
 #'         and the \code{print_every_n} parameter is passed to it.
-#'   \item \code{cb.evaluation.log} is on when \code{verbose > 0} and \code{watchlist} is present.
+#'   \item \code{cb.evaluation.log} is on when \code{watchlist} is present.
 #'   \item \code{cb.early.stop}: when \code{early_stopping_rounds} is set.
 #'   \item \code{cb.save.model}: when \code{save_period > 0} is set.
 #' }
@@ -171,7 +173,7 @@
 #' 
 #' dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 #' dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
-#' watchlist <- list(eval = dtest, train = dtrain)
+#' watchlist <- list(train = dtrain, eval = dtest)
 #' 
 #' ## A simple xgb.train example:
 #' param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2, 
@@ -210,17 +212,15 @@
 #' 
 #' 
 #' ## An xgb.train example of using variable learning rates at each iteration:
-#' param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2)
+#' param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2,
+#'               objective = "binary:logistic", eval_metric = "auc")
 #' my_etas <- list(eta = c(0.5, 0.1))
 #' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
 #'                  callbacks = list(cb.reset.parameters(my_etas)))
 #' 
-#' 
-#' ## Explicit use of the cb.evaluation.log callback allows to run 
-#' ## xgb.train silently but still store the evaluation results:
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
-#'                  verbose = 0, callbacks = list(cb.evaluation.log()))
-#' print(bst$evaluation_log)
+#' ## Early stopping:
+#' bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
+#'                  early_stopping_rounds = 3)
 #' 
 #' ## An 'xgboost' interface example:
 #' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, 
@@ -259,13 +259,13 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
   # evaluation printing callback
   params <- c(params, list(silent = ifelse(verbose > 1, 0, 1)))
   print_every_n <- max( as.integer(print_every_n), 1L)
-  if (!has.callbacks(callbacks, 'cb.print.evaluation') && verbose) {
+  if (!has.callbacks(callbacks, 'cb.print.evaluation') &&
+      verbose) {
     callbacks <- add.cb(callbacks, cb.print.evaluation(print_every_n))
   }
-  # evaluation log callback:  it is automatically enabled only when verbose > 0
+  # evaluation log callback:  it is automatically enabled when watchlist is provided
   evaluation_log <- list()
-  if (verbose > 0 &&
-      !has.callbacks(callbacks, 'cb.evaluation.log') &&
+  if (!has.callbacks(callbacks, 'cb.evaluation.log') &&
       length(watchlist) > 0) {
     callbacks <- add.cb(callbacks, cb.evaluation.log())
   }
diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R
index b7ce3d526d97..f2ce90b127b4 100644
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -1,4 +1,4 @@
-# Simple interface for training an xgboost model.
+# Simple interface for training an xgboost model that wraps \code{xgb.train}
 # Its documentation is combined with xgb.train.
 #
 #' @rdname xgb.train
@@ -12,11 +12,9 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
 
   dtrain <- xgb.get.DMatrix(data, label, missing, weight)
 
-  watchlist <- list()
-  if (verbose > 0)
-    watchlist$train = dtrain
+  watchlist <- list(train = dtrain)
 
-  bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose = verbose, print_every_n=print_every_n,
+  bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose = verbose, print_every_n = print_every_n,
                    early_stopping_rounds = early_stopping_rounds, maximize = maximize,
                    save_period = save_period, save_name = save_name,
                    xgb_model = xgb_model, callbacks = callbacks, ...)
diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd
index 4f37b78b84d7..7aa5e3e36d14 100644
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -23,8 +23,7 @@ xgboost(data = NULL, label = NULL, missing = NA, weight = NULL,
 1. General Parameters
 
 \itemize{
-  \item \code{booster} which booster to use, can be \code{gbtree} or \code{gblinear}. Default: \code{gbtree}
-  \item \code{silent} 0 means printing running messages, 1 means silent mode. Default: 0
+  \item \code{booster} which booster to use, can be \code{gbtree} or \code{gblinear}. Default: \code{gbtree}.
 }
  
 2. Booster Parameters
@@ -68,16 +67,19 @@ xgboost(data = NULL, label = NULL, missing = NA, weight = NULL,
   \item \code{eval_metric} evaluation metrics for validation data. Users can pass a self-defined function to it. Default: metric will be assigned according to objective(rmse for regression, and error for classification, mean average precision for ranking). List is provided in detail section.
 }}
 
-\item{data}{input dataset. \code{xgb.train} takes only an \code{xgb.DMatrix} as the input.
-\code{xgboost}, in addition, also accepts \code{matrix}, \code{dgCMatrix}, or local data file.}
+\item{data}{training dataset. \code{xgb.train} accepts only an \code{xgb.DMatrix} as the input.
+\code{xgboost}, in addition, also accepts \code{matrix}, \code{dgCMatrix}, or name of a local data file.}
 
-\item{nrounds}{the max number of iterations}
+\item{nrounds}{max number of boosting iterations.}
 
-\item{watchlist}{what information should be printed when \code{verbose=1} or
-\code{verbose=2}. Watchlist is used to specify validation set monitoring
-during training. For example user can specify
-watchlist=list(validation1=mat1, validation2=mat2) to watch
-the performance of each round's model on mat1 and mat2}
+\item{watchlist}{named list of xgb.DMatrix datasets to use for evaluating model performance.
+Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
+of these datasets during each boosting iteration, and stored in the end as a field named 
+\code{evaluation_log} in the resulting object. When either \code{verbose>=1} or 
+\code{\link{cb.print.evaluation}} callback is engaged, the performance results are continuously
+printed out during the training. 
+E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
+the performance of each round's model on mat1 and mat2.}
 
 \item{obj}{customized objective function. Returns gradient and second order 
 gradient with given prediction and dtrain.}
@@ -86,10 +88,10 @@ gradient with given prediction and dtrain.}
 \code{list(metric='metric-name', value='metric-value')} with given 
 prediction and dtrain.}
 
-\item{verbose}{If 0, xgboost will stay silent. If 1, xgboost will print 
-information of performance. If 2, xgboost will print some additional information.
-Setting \code{verbose > 0} automatically engages the \code{\link{cb.evaluation.log}} and 
-\code{\link{cb.print.evaluation}} callback functions.}
+\item{verbose}{If 0, xgboost will stay silent. If 1, it will print information about performance.
+If 2, some additional information will be printed out.
+Note that setting \code{verbose > 0} automatically engages the 
+\code{\link{cb.print.evaluation(period=1)}} callback function.}
 
 \item{print_every_n}{Print each n-th iteration evaluation messages when \code{verbose>0}.
 Default is 1 which means all messages are printed. This parameter is passed to the 
@@ -154,14 +156,15 @@ An object of class \code{xgb.Booster} with the following elements:
 }
 }
 \description{
-\code{xgb.train} is an advanced interface for training an xgboost model. The \code{xgboost} function provides a simpler interface.
+\code{xgb.train} is an advanced interface for training an xgboost model.
+The \code{xgboost} function is a simpler wrapper for \code{xgb.train}.
 }
 \details{
 These are the training functions for \code{xgboost}. 
 
 The \code{xgb.train} interface supports advanced features such as \code{watchlist}, 
 customized objective and evaluation metric functions, therefore it is more flexible 
-than the \code{\link{xgboost}} interface.
+than the \code{xgboost} interface.
 
 Parallelization is automatically enabled if \code{OpenMP} is present. 
 Number of threads can also be manually specified via \code{nthread} parameter.
@@ -187,7 +190,7 @@ The following callbacks are automatically created when certain parameters are se
 \itemize{
   \item \code{cb.print.evaluation} is turned on when \code{verbose > 0};
         and the \code{print_every_n} parameter is passed to it.
-  \item \code{cb.evaluation.log} is on when \code{verbose > 0} and \code{watchlist} is present.
+  \item \code{cb.evaluation.log} is on when \code{watchlist} is present.
   \item \code{cb.early.stop}: when \code{early_stopping_rounds} is set.
   \item \code{cb.save.model}: when \code{save_period > 0} is set.
 }
@@ -198,7 +201,7 @@ data(agaricus.test, package='xgboost')
 
 dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
-watchlist <- list(eval = dtest, train = dtrain)
+watchlist <- list(train = dtrain, eval = dtest)
 
 ## A simple xgb.train example:
 param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2, 
@@ -237,17 +240,15 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
 
 
 ## An xgb.train example of using variable learning rates at each iteration:
-param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2)
+param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2,
+              objective = "binary:logistic", eval_metric = "auc")
 my_etas <- list(eta = c(0.5, 0.1))
 bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
                  callbacks = list(cb.reset.parameters(my_etas)))
 
-
-## Explicit use of the cb.evaluation.log callback allows to run 
-## xgb.train silently but still store the evaluation results:
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
-                 verbose = 0, callbacks = list(cb.evaluation.log()))
-print(bst$evaluation_log)
+## Early stopping:
+bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
+                 early_stopping_rounds = 3)
 
 ## An 'xgboost' interface example:
 bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, 

From 57001df23e24d6944a009f53c8d0025e3790a4e3 Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Sun, 15 Jan 2017 14:05:19 -0600
Subject: [PATCH 06/14] [R] reduce the excessive chattiness of unit tests

---
 R-package/tests/testthat/test_basic.R     | 20 +++++----
 R-package/tests/testthat/test_callbacks.R | 53 +++++++++++++----------
 R-package/tests/testthat/test_helpers.R   |  8 ++--
 R-package/tests/testthat/test_monotone.R  |  4 +-
 4 files changed, 48 insertions(+), 37 deletions(-)

diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index 7ca96077b950..b34330461024 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -109,7 +109,7 @@ test_that("train and predict RF with softprob", {
   set.seed(11)
   bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
                  max_depth = 3, eta = 0.9, nthread = 2, nrounds = nrounds,
-                 objective = "multi:softprob", num_class=3,
+                 objective = "multi:softprob", num_class=3, verbose = 0,
                  num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5)
   expect_equal(bst$niter, 15)
   expect_equal(xgb.ntree(bst), 15*3*4)
@@ -144,25 +144,25 @@ test_that("training continuation works", {
 
   # for the reference, use 4 iterations at once:
   set.seed(11)
-  bst <- xgb.train(param, dtrain, nrounds = 4, watchlist)
+  bst <- xgb.train(param, dtrain, nrounds = 4, watchlist, verbose = 0)
   # first two iterations:
   set.seed(11)
-  bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist)
+  bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
   # continue for two more:
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, xgb_model = bst1)
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
   if (!windows_flag)
     expect_equal(bst$raw, bst2$raw)
   expect_false(is.null(bst2$evaluation_log))
   expect_equal(dim(bst2$evaluation_log), c(4, 2))
   expect_equal(bst2$evaluation_log, bst$evaluation_log)
   # test continuing from raw model data
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, xgb_model = bst1$raw)
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw)
   if (!windows_flag)
     expect_equal(bst$raw, bst2$raw)
   expect_equal(dim(bst2$evaluation_log), c(2, 2))
   # test continuing from a model in file
   xgb.save(bst1, "xgboost.model")
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, xgb_model = "xgboost.model")
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.model")
   if (!windows_flag)
     expect_equal(bst$raw, bst2$raw)
   expect_equal(dim(bst2$evaluation_log), c(2, 2))
@@ -171,9 +171,11 @@ test_that("training continuation works", {
 
 test_that("xgb.cv works", {
   set.seed(11)
-  cv <- xgb.cv(data = train$data, label = train$label, max_depth = 2, nfold = 5,
-               eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
-               verbose=TRUE)
+  expect_output(
+    cv <- xgb.cv(data = train$data, label = train$label, max_depth = 2, nfold = 5,
+                 eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
+                 verbose=TRUE)
+  , "train-error:")
   expect_is(cv, 'xgb.cv.synchronous')
   expect_false(is.null(cv$evaluation_log))
   expect_lt(cv$evaluation_log[, min(test_error_mean)], 0.03)
diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R
index a95d1079709a..57dc6c5d4d5f 100644
--- a/R-package/tests/testthat/test_callbacks.R
+++ b/R-package/tests/testthat/test_callbacks.R
@@ -107,18 +107,27 @@ test_that("cb.evaluation.log works as expected", {
 
 param <- list(objective = "binary:logistic", max_depth = 4, nthread = 2)
 
+test_that("can store evaluation_log without printing", {
+  expect_silent(
+    bst <- xgb.train(param, dtrain, nrounds = 10, watchlist, eta = 1, verbose = 0)
+  )
+  expect_false(is.null(bst$evaluation_log))
+  expect_false(is.null(bst$evaluation_log$train_error))
+  expect_lt(bst$evaluation_log[, min(train_error)], 0.2)
+})
+
 test_that("cb.reset.parameters works as expected", {
 
   # fixed eta
   set.seed(111)
-  bst0 <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 0.9)
+  bst0 <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 0.9, verbose = 0)
   expect_false(is.null(bst0$evaluation_log))
   expect_false(is.null(bst0$evaluation_log$train_error))
 
   # same eta but re-set as a vector parameter in the callback
   set.seed(111)
   my_par <- list(eta = c(0.9, 0.9))
-  bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+  bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
                     callbacks = list(cb.reset.parameters(my_par)))
   expect_false(is.null(bst1$evaluation_log$train_error))
   expect_equal(bst0$evaluation_log$train_error, 
@@ -127,7 +136,7 @@ test_that("cb.reset.parameters works as expected", {
   # same eta but re-set via a function in the callback
   set.seed(111)
   my_par <- list(eta = function(itr, itr_end) 0.9)
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
                     callbacks = list(cb.reset.parameters(my_par)))
   expect_false(is.null(bst2$evaluation_log$train_error))
   expect_equal(bst0$evaluation_log$train_error, 
@@ -136,7 +145,7 @@ test_that("cb.reset.parameters works as expected", {
   # different eta re-set as a vector parameter in the callback
   set.seed(111)
   my_par <- list(eta = c(0.6, 0.5))
-  bst3 <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+  bst3 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
                     callbacks = list(cb.reset.parameters(my_par)))
   expect_false(is.null(bst3$evaluation_log$train_error))
   expect_false(all(bst0$evaluation_log$train_error == bst3$evaluation_log$train_error))
@@ -144,18 +153,18 @@ test_that("cb.reset.parameters works as expected", {
   # resetting multiple parameters at the same time runs with no error
   my_par <- list(eta = c(1., 0.5), gamma = c(1, 2), max_depth = c(4, 8))
   expect_error(
-    bst4 <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+    bst4 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
                       callbacks = list(cb.reset.parameters(my_par)))
   , NA) # NA = no error
   # CV works as well
   expect_error(
-    bst4 <- xgb.cv(param, dtrain, nfold = 2, nrounds = 2,
+    bst4 <- xgb.cv(param, dtrain, nfold = 2, nrounds = 2, verbose = 0,
                    callbacks = list(cb.reset.parameters(my_par)))
   , NA) # NA = no error
 
   # expect no learning with 0 learning rate
   my_par <- list(eta = c(0., 0.))
-  bstX <- xgb.train(param, dtrain, nrounds = 2, watchlist, 
+  bstX <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
                     callbacks = list(cb.reset.parameters(my_par)))
   expect_false(is.null(bstX$evaluation_log$train_error))
   er <- unique(bstX$evaluation_log$train_error)
@@ -167,7 +176,7 @@ test_that("cb.save.model works as expected", {
   files <- c('xgboost_01.model', 'xgboost_02.model', 'xgboost.model')
   for (f in files) if (file.exists(f)) file.remove(f)
   
-  bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1,
+  bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
                    save_period = 1, save_name = "xgboost_%02d.model")
   expect_true(file.exists('xgboost_01.model'))
   expect_true(file.exists('xgboost_02.model'))
@@ -178,7 +187,8 @@ test_that("cb.save.model works as expected", {
   expect_equal(bst$raw, b2$raw)
 
   # save_period = 0 saves the last iteration's model
-  bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, save_period = 0)
+  bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
+                   save_period = 0)
   expect_true(file.exists('xgboost.model'))
   b2 <- xgb.load('xgboost.model')
   expect_equal(bst$raw, b2$raw)
@@ -186,16 +196,6 @@ test_that("cb.save.model works as expected", {
   for (f in files) if (file.exists(f)) file.remove(f)
 })
 
-test_that("can store evaluation_log without printing", {
-  expect_silent(
-    bst <- xgb.train(param, dtrain, nrounds = 10, watchlist, eta = 1,
-                     verbose = 0, callbacks = list(cb.evaluation.log()))
-  )
-  expect_false(is.null(bst$evaluation_log))
-  expect_false(is.null(bst$evaluation_log$train_error))
-  expect_lt(bst$evaluation_log[, min(train_error)], 0.2)
-})
-
 test_that("early stopping xgb.train works", {
   set.seed(11)
   expect_output(
@@ -211,6 +211,13 @@ test_that("early stopping xgb.train works", {
   err_pred <- err(ltest, pred)
   err_log <- bst$evaluation_log[bst$best_iteration, test_error]
   expect_equal(err_log, err_pred, tolerance = 5e-6)
+  
+  set.seed(11)
+  expect_silent(
+    bst0 <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.3,
+                      early_stopping_rounds = 3, maximize = FALSE, verbose = 0)
+  )
+  expect_equal(bst$evaluation_log, bst0$evaluation_log)
 })
 
 test_that("early stopping using a specific metric works", {
@@ -248,7 +255,7 @@ test_that("early stopping xgb.cv works", {
 test_that("prediction in xgb.cv works", {
   set.seed(11)
   nrounds = 4
-  cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE)
+  cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0)
   expect_false(is.null(cv$evaluation_log))
   expect_false(is.null(cv$pred))
   expect_length(cv$pred, nrow(train$data))
@@ -258,7 +265,7 @@ test_that("prediction in xgb.cv works", {
 
   # save CV models
   set.seed(11)
-  cvx <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE,
+  cvx <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0,
                 callbacks = list(cb.cv.predict(save_models = TRUE)))
   expect_equal(cv$evaluation_log, cvx$evaluation_log)
   expect_length(cvx$models, 5)
@@ -268,7 +275,7 @@ test_that("prediction in xgb.cv works", {
 test_that("prediction in xgb.cv works for gblinear too", {
   set.seed(11)
   p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = 2)
-  cv <- xgb.cv(p, dtrain, nfold = 5, eta = 0.5, nrounds = 2, prediction = TRUE)
+  cv <- xgb.cv(p, dtrain, nfold = 5, eta = 0.5, nrounds = 2, prediction = TRUE, verbose = 0)
   expect_false(is.null(cv$evaluation_log))
   expect_false(is.null(cv$pred))
   expect_length(cv$pred, nrow(train$data))
@@ -300,7 +307,7 @@ test_that("prediction in xgb.cv for softprob works", {
   expect_warning(
     cv <- xgb.cv(data = as.matrix(iris[, -5]), label = lb, nfold = 4,
                  eta = 0.5, nrounds = 5, max_depth = 3, nthread = 2,
-                 subsample = 0.8, gamma = 2,
+                 subsample = 0.8, gamma = 2, verbose = 0,
                  prediction = TRUE, objective = "multi:softprob", num_class = 3)
   , NA)
   expect_false(is.null(cv$pred))
diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R
index 5a3a9bb028c4..c25a0d3194d1 100644
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@@ -3,7 +3,7 @@ context('Test helper functions')
 require(xgboost)
 require(data.table)
 require(Matrix)
-require(vcd)
+require(vcd, quietly = TRUE)
 
 set.seed(1982)
 data(Arthritis)
@@ -15,10 +15,12 @@ sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df)
 label <- df[, ifelse(Improved == "Marked", 1, 0)]
 
 bst.Tree <- xgboost(data = sparse_matrix, label = label, max_depth = 9,
-               eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic", booster = "gbtree")
+                    eta = 1, nthread = 2, nrounds = 10, verbose = 0,
+                    objective = "binary:logistic", booster = "gbtree")
 
 bst.GLM <- xgboost(data = sparse_matrix, label = label,
-                   eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic", booster = "gblinear")
+                   eta = 1, nthread = 2, nrounds = 10, verbose = 0,
+                   objective = "binary:logistic", booster = "gblinear")
 
 feature.names <- colnames(sparse_matrix)
 
diff --git a/R-package/tests/testthat/test_monotone.R b/R-package/tests/testthat/test_monotone.R
index 822fefb65620..9991e917e8a6 100644
--- a/R-package/tests/testthat/test_monotone.R
+++ b/R-package/tests/testthat/test_monotone.R
@@ -10,8 +10,8 @@ train = matrix(x, ncol = 1)
 
 test_that("monotone constraints for regression", {
   bst = xgboost(data = train, label = y, max_depth = 2,
-                 eta = 0.1, nthread = 2, nrounds = 100,
-                 monotone_constraints = -1)
+                eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
+                monotone_constraints = -1)
   
   pred = predict(bst, train)
   

From 94a278ea3df5ec61cb7e8d34a8fb53a91ed2f758 Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Sun, 15 Jan 2017 14:18:45 -0600
Subject: [PATCH 07/14] [R] only disable some tests in windows when it's not
 64-bit

---
 R-package/tests/testthat/test_basic.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index b34330461024..951f4621712d 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -8,7 +8,9 @@ train <- agaricus.train
 test <- agaricus.test
 set.seed(1994)
 
-windows_flag = grepl('Windows', Sys.info()[['sysname']])
+# disable some tests for Win32
+windows_flag = .Platform$OS.type == "windows" &&
+               .Machine$sizeof.pointer != 8
 
 test_that("train and predict binary classification", {
   nrounds = 2

From 01f08410a720ed8d129f7a8d88eb9759ffd991e6 Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Sun, 15 Jan 2017 14:45:23 -0600
Subject: [PATCH 08/14] [R] clean-up xgb.DMatrix

---
 R-package/R/xgb.DMatrix.R | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R
index 732c5c7262aa..9dc0d1f26368 100644
--- a/R-package/R/xgb.DMatrix.R
+++ b/R-package/R/xgb.DMatrix.R
@@ -31,18 +31,13 @@ xgb.DMatrix <- function(data, info = list(), missing = NA, ...) {
                     PACKAGE = "xgboost")
     cnames <- colnames(data)
   } else {
-    stop(paste("xgb.DMatrix: does not support to construct from ",
-               typeof(data)))
+    stop("xgb.DMatrix does not support construction from ", typeof(data))
   }
   dmat <- handle
   attributes(dmat) <- list(.Dimnames = list(NULL, cnames), class = "xgb.DMatrix")
-  #dmat <- list(handle = handle, colnames = cnames)
-  #attr(dmat, 'class') <- "xgb.DMatrix"
 
   info <- append(info, list(...))
-  if (length(info) == 0)
-    return(dmat)
-  for (i in 1:length(info)) {
+  for (i in seq_along(info)) {
     p <- info[i]
     setinfo(dmat, names(p), p[[1]])
   }
@@ -70,11 +65,10 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
       dtrain <- xgb.DMatrix(data)
     } else if (inClass == "xgb.DMatrix") {
       dtrain <- data
-    } else if (inClass == "data.frame") {
-      stop("xgboost only support numerical matrix input,
-           use 'data.matrix' to transform the data.")
+    } else if ("data.frame" %in% inClass) {
+      stop("xgboost doesn't support data.frame as input. Convert it to matrix first.")
     } else {
-      stop("xgboost: Invalid input of data")
+      stop("xgboost: invalid input data")
     }
   }
   return (dtrain)
@@ -190,7 +184,7 @@ getinfo.xgb.DMatrix <- function(object, name, ...) {
   if (typeof(name) != "character" ||
       length(name) != 1 ||
       !name %in% c('label', 'weight', 'base_margin', 'nrow')) {
-    stop("getinfo: name must one of the following\n",
+    stop("getinfo: name must be one of the following\n",
          "    'label', 'weight', 'base_margin', 'nrow'")
   }
   if (name != "nrow"){
@@ -266,7 +260,7 @@ setinfo.xgb.DMatrix <- function(object, name, info, ...) {
           PACKAGE = "xgboost")
     return(TRUE)
   }
-  stop(paste("setinfo: unknown info name", name))
+  stop("setinfo: unknown info name ", name)
   return(FALSE)
 }
 

From 4983ea6eec1c93d3e22b9ba345afbd9b7bd9a35b Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Sun, 15 Jan 2017 17:28:47 -0600
Subject: [PATCH 09/14] [R] test xgb.DMatrix loading from libsvm text file

---
 R-package/tests/testthat/test_dmatrix.R | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/R-package/tests/testthat/test_dmatrix.R b/R-package/tests/testthat/test_dmatrix.R
index 0aea2f0a7759..965e3f4805f9 100644
--- a/R-package/tests/testthat/test_dmatrix.R
+++ b/R-package/tests/testthat/test_dmatrix.R
@@ -21,6 +21,15 @@ test_that("xgb.DMatrix: basic construction, saving, loading", {
   dtest3 <- xgb.DMatrix(tmp_file)
   unlink(tmp_file)
   expect_equal(getinfo(dtest1, 'label'), getinfo(dtest3, 'label'))
+  
+  # from a libsvm text file
+  tmp <- c("0 1:1 2:1","1 3:1","0 1:1")
+  tmp_file <- 'tmp.libsvm'
+  writeLines(tmp, tmp_file)
+  dtest4 <- xgb.DMatrix(tmp_file)
+  expect_equal(dim(dtest4), c(3, 4))
+  expect_equal(getinfo(dtest4, 'label'), c(0,1,0))
+  unlink(tmp_file)
 })
 
 test_that("xgb.DMatrix: getinfo & setinfo", {

From 2f97d728ea24b7a29a3154d5228397da1ca0f112 Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Sun, 15 Jan 2017 23:42:44 -0600
Subject: [PATCH 10/14] [R] store feature_names in xgb.Booster, use them from
 utility functions

---
 R-package/R/xgb.Booster.R               | 10 +++++++---
 R-package/R/xgb.model.dt.tree.R         | 24 +++++++++++++++---------
 R-package/R/xgb.train.R                 |  4 ++++
 R-package/man/xgb.model.dt.tree.Rd      | 12 ++++++++----
 R-package/man/xgb.train.Rd              |  2 ++
 R-package/tests/testthat/test_helpers.R | 24 ++++++++++++++++++++++--
 6 files changed, 58 insertions(+), 18 deletions(-)

diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R
index b2543f8c4e0f..fc9956bc9fc5 100644
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -519,15 +519,19 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) {
     })
   }
   
+  if (!is.null(x$feature_names))
+    cat('# of features:', length(x$feature_names), '\n')
+  
   cat('niter: ', x$niter, '\n', sep='')
   # TODO: uncomment when faster xgb.ntree is implemented
   #cat('ntree: ', xgb.ntree(x), '\n', sep='')
   
-  for (n in setdiff(names(x), c('handle', 'raw', 'call', 'params', 'callbacks','evaluation_log','niter'))) {
+  for (n in setdiff(names(x), c('handle', 'raw', 'call', 'params', 'callbacks',
+                                'evaluation_log','niter','feature_names'))) {
     if (is.atomic(x[[n]])) {
-      cat(n, ': ', x[[n]], '\n', sep='')
+      cat(n, ':', x[[n]], '\n', sep=' ')
     } else {
-      cat(n, ':\n\t', sep='')
+      cat(n, ':\n\t', sep=' ')
       print(x[[n]])
     }
   }
diff --git a/R-package/R/xgb.model.dt.tree.R b/R-package/R/xgb.model.dt.tree.R
index a364aaa70be1..75adf5f950d6 100644
--- a/R-package/R/xgb.model.dt.tree.R
+++ b/R-package/R/xgb.model.dt.tree.R
@@ -3,14 +3,16 @@
 #' Parse a boosted tree model text dump into a \code{data.table} structure.
 #' 
 #' @param feature_names character vector of feature names. If the model already
-#'          contains feature names, this argument should be \code{NULL} (default value)
+#'          contains feature names, those would be used when \code{feature_names=NULL} (default value).
+#'          Non-null \code{feature_names} could be provided to override those in the model.
 #' @param model object of class \code{xgb.Booster}
 #' @param text \code{character} vector previously generated by the \code{xgb.dump} 
 #'          function  (where parameter \code{with_stats = TRUE} should have been set).
+#'          \code{text} takes precedence over \code{model}.
 #' @param trees an integer vector of tree indices that should be parsed.
 #'          If set to \code{NULL}, all trees of the model are parsed.
 #'          It could be useful, e.g., in multiclass classification to get only
-#'          the trees of one certain class. IMPORTANT: the tree index in xgboost model
+#'          the trees of one certain class. IMPORTANT: the tree index in xgboost models
 #'          is zero-based (e.g., use \code{trees = 0:4} for first 5 trees).
 #' @param ... currently not used.
 #'
@@ -43,7 +45,9 @@
 #'                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
 #' 
 #' (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
-#' 
+#' # This bst has feature_names stored in it, so those would be used when 
+#' # the feature_names parameter is not provided:
+#' (dt <- xgb.model.dt.tree(model = bst))
 #' 
 #' # How to match feature names of splits that are following a current 'Yes' branch:
 #' 
@@ -53,11 +57,6 @@
 xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
                               trees = NULL, ...){
   check.deprecation(...)
-  if (!class(feature_names) %in% c("character", "NULL")) {
-    stop("feature_names: Has to be a vector of character\n",
-         "  or NULL if the model dump already contains feature names.\n",
-         "  Look at this function documentation to see where to get feature names.")
-  }
   
   if (class(model) != "xgb.Booster" & class(text) != "character") {
     stop("Either 'model' has to be an object of class xgb.Booster\n",
@@ -65,12 +64,19 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
          "  (or NULL if the model was provided).")
   }
   
+  if (is.null(feature_names) && !is.null(model) && !is.null(model$feature_names))
+    feature_names <- model$feature_names
+  
+  if (!class(feature_names) %in% c("character", "NULL")) {
+    stop("feature_names: Has to be a character vector")
+  }
+  
   if (!class(trees) %in% c("integer", "numeric", "NULL")) {
     stop("trees: Has to be a vector of integers.")
   }
   
   if (is.null(text)){
-    text <- xgb.dump(model = model, with_stats = T)
+    text <- xgb.dump(model = model, with_stats = TRUE)
   }
   
   if (length(text) < 2 ||
diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index 1a5f842b8b9e..0c17060de41c 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -160,6 +160,8 @@
 #'         (only available with early stopping).
 #'   \item \code{best_score} the best evaluation metric value during early stopping.
 #'         (only available with early stopping).
+#'   \item \code{feature_names} names of the training dataset features
+#'         (only when comun names were defined in training data).
 #' }
 #' 
 #' @seealso
@@ -354,6 +356,8 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
   bst$call <- match.call()
   bst$params <- params
   bst$callbacks <- callbacks
+  if (!is.null(colnames(dtrain)))
+    bst$feature_names <- colnames(dtrain)
   
   return(bst)
 }
diff --git a/R-package/man/xgb.model.dt.tree.Rd b/R-package/man/xgb.model.dt.tree.Rd
index 8176303c6690..9d7a5056ab5e 100644
--- a/R-package/man/xgb.model.dt.tree.Rd
+++ b/R-package/man/xgb.model.dt.tree.Rd
@@ -9,17 +9,19 @@ xgb.model.dt.tree(feature_names = NULL, model = NULL, text = NULL,
 }
 \arguments{
 \item{feature_names}{character vector of feature names. If the model already
-contains feature names, this argument should be \code{NULL} (default value)}
+contains feature names, those would be used when \code{feature_names=NULL} (default value).
+Non-null \code{feature_names} could be provided to override those in the model.}
 
 \item{model}{object of class \code{xgb.Booster}}
 
 \item{text}{\code{character} vector previously generated by the \code{xgb.dump} 
-function  (where parameter \code{with_stats = TRUE} should have been set).}
+function  (where parameter \code{with_stats = TRUE} should have been set).
+\code{text} takes precedence over \code{model}.}
 
 \item{trees}{an integer vector of tree indices that should be parsed.
 If set to \code{NULL}, all trees of the model are parsed.
 It could be useful, e.g., in multiclass classification to get only
-the trees of one certain class. IMPORTANT: the tree index in xgboost model
+the trees of one certain class. IMPORTANT: the tree index in xgboost models
 is zero-based (e.g., use \code{trees = 0:4} for first 5 trees).}
 
 \item{...}{currently not used.}
@@ -56,7 +58,9 @@ bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_dep
                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
 
 (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
-
+# This bst has feature_names stored in it, so those would be used when 
+# the feature_names parameter is not provided:
+(dt <- xgb.model.dt.tree(model = bst))
 
 # How to match feature names of splits that are following a current 'Yes' branch:
 
diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd
index 7aa5e3e36d14..51c24520e8ad 100644
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -153,6 +153,8 @@ An object of class \code{xgb.Booster} with the following elements:
         (only available with early stopping).
   \item \code{best_score} the best evaluation metric value during early stopping.
         (only available with early stopping).
+  \item \code{feature_names} names of the training dataset features
+        (only when comun names were defined in training data).
 }
 }
 \description{
diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R
index c25a0d3194d1..b12d98d29cd2 100644
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@@ -122,7 +122,17 @@ test_that("xgb.model.dt.tree works with and without feature names", {
   dt.tree <- xgb.model.dt.tree(feature_names = feature.names, model = bst.Tree)
   expect_equal(names.dt.trees, names(dt.tree))
   expect_equal(dim(dt.tree), c(162, 10))
-  expect_output(str(xgb.model.dt.tree(model = bst.Tree)), 'Feature.*\\"3\\"')
+  expect_output(str(dt.tree), 'Feature.*\\"Age\\"')
+  
+  dt.tree.0 <- xgb.model.dt.tree(model = bst.Tree)
+  expect_equal(dt.tree, dt.tree.0)
+  
+  # when model contains no feature names:
+  bst.Tree.x <- bst.Tree
+  bst.Tree.x$feature_names <- NULL
+  dt.tree.x <- xgb.model.dt.tree(model = bst.Tree.x)
+  expect_output(str(dt.tree.x), 'Feature.*\\"3\\"')
+  expect_equal(dt.tree[, -4, with=FALSE], dt.tree.x[, -4, with=FALSE])
 })
 
 test_that("xgb.model.dt.tree throws error for gblinear", {
@@ -133,7 +143,17 @@ test_that("xgb.importance works with and without feature names", {
   importance.Tree <- xgb.importance(feature_names = feature.names, model = bst.Tree)
   expect_equal(dim(importance.Tree), c(7, 4))
   expect_equal(colnames(importance.Tree), c("Feature", "Gain", "Cover", "Frequency"))
-  expect_output(str(xgb.importance(model = bst.Tree)), 'Feature.*\\"3\\"')
+  expect_output(str(importance.Tree), 'Feature.*\\"Age\\"')
+  
+  importance.Tree.0 <- xgb.importance(model = bst.Tree)
+  expect_equal(importance.Tree, importance.Tree.0)
+  
+  # when model contains no feature names:
+  bst.Tree.x <- bst.Tree
+  bst.Tree.x$feature_names <- NULL
+  importance.Tree.x <- xgb.importance(model = bst.Tree)
+  expect_equal(importance.Tree[, -1, with=FALSE], importance.Tree.x[, -1, with=FALSE])
+  
   imp2plot <- xgb.plot.importance(importance_matrix = importance.Tree)
   expect_equal(colnames(imp2plot), c("Feature", "Gain", "Cover", "Frequency", "Importance"))
   xgb.ggplot.importance(importance_matrix = importance.Tree)

From 5a8d55b2af0e550554d0fa4432cc4c888347b7ce Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Sun, 15 Jan 2017 23:45:26 -0600
Subject: [PATCH 11/14] [R] remove non-functional co-occurence computation from
 xgb.importance

---
 R-package/R/xgb.importance.R    | 144 +++++++++++++++-----------------
 R-package/man/xgb.importance.Rd |  71 ++++++++--------
 2 files changed, 103 insertions(+), 112 deletions(-)

diff --git a/R-package/R/xgb.importance.R b/R-package/R/xgb.importance.R
index 3c66fa8cf46a..4fb1f08c4644 100644
--- a/R-package/R/xgb.importance.R
+++ b/R-package/R/xgb.importance.R
@@ -1,102 +1,92 @@
-#' Show importance of features in a model
+#' Importance of features in a model.
 #' 
-#' Create a \code{data.table} of the most important features of a model. 
+#' Creates a \code{data.table} of feature importances in a model.
 #' 
-#' @param feature_names names of each feature as a \code{character} vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
-#' @param model generated by the \code{xgb.train} function.
-#' @param data the dataset used for the training step. Will be used with \code{label} parameter for co-occurence computation. More information in \code{Detail} part. This parameter is optional.
-#' @param label the label vector used for the training step. Will be used with \code{data} parameter for co-occurence computation. More information in \code{Detail} part. This parameter is optional.
-#' @param target a function which returns \code{TRUE} or \code{1} when an observation should be count as a co-occurence and \code{FALSE} or \code{0} otherwise. Default function is provided for computing co-occurences in a binary classification. The \code{target} function should have only one parameter. This parameter will be used to provide each important feature vector after having applied the split condition, therefore these vector will be only made of 0 and 1 only, whatever was the information before. More information in \code{Detail} part. This parameter is optional.
-#'
-#' @return A \code{data.table} of the features used in the model with their average gain (and their weight for boosted tree model) in the model.
+#' @param feature_names character vector of feature names. If the model already
+#'       contains feature names, those would be used when \code{feature_names=NULL} (default value).
+#'       Non-null \code{feature_names} could be provided to override those in the model.
+#' @param model object of class \code{xgb.Booster}.
+#' @param data deprecated.
+#' @param label deprecated.
+#' @param target deprecated.
 #'
 #' @details 
-#' This function is for both linear and tree models.
-#' 
-#' \code{data.table} is returned by the function. 
-#' The columns are:
-#' \itemize{
-#'   \item \code{Features} name of the features as provided in \code{feature_names} or already present in the model dump;
-#'   \item \code{Gain} contribution of each feature to the model. For boosted tree model, each gain of each feature of each tree is taken into account, then average per feature to give a vision of the entire model. Highest percentage means important feature to predict the \code{label} used for the training (only available for tree models);
-#'   \item \code{Cover} metric of the number of observation related to this feature (only available for tree models);
-#'   \item \code{Weight} percentage representing the relative number of times a feature have been taken into trees.
-#' }
 #' 
-#' If you don't provide \code{feature_names}, index of the features will be used instead.
+#' This function works for both linear and tree models.
 #' 
-#' Because the index is extracted from the model dump (made on the C++ side), it starts at 0 (usual in C++) instead of 1 (usual in R).
+#' For linear models, the importance is the absolute magnitude of linear coefficients. 
+#' For that reason, in order to obtain a meaningful ranking by importance for a linear model, 
+#' the features need to be on the same scale (which you also would want to do when using either 
+#' L1 or L2 regularization).
 #' 
-#' Co-occurence count
-#' ------------------
+#' @return
 #' 
-#' The gain gives you indication about the information of how a feature is important in making a branch of a decision tree more pure. However, with this information only, you can't know if this feature has to be present or not to get a specific classification. In the example code, you may wonder if odor=none should be \code{TRUE} to not eat a mushroom.
+#' For a tree model, a \code{data.table} with the following columns:
+#' \itemize{
+#'   \item \code{Features} names of the features used in the model;
+#'   \item \code{Gain} represents fractional contribution of each feature to the model based on
+#'        the total gain of this feature's splits. Higher percentage means a more important 
+#'        predictive feature.
+#'   \item \code{Cover} metric of the number of observation related to this feature;
+#'   \item \code{Frequency} percentage representing the relative number of times
+#'        a feature have been used in trees.
+#' }
 #' 
-#' Co-occurence computation is here to help in understanding this relation between a predictor and a specific class. It will count how many observations are returned as \code{TRUE} by the \code{target} function (see parameters). When you execute the example below, there are 92 times only over the 3140 observations of the train dataset where a mushroom have no odor and can be eaten safely.
+#' A linear model's importance \code{data.table} has only two columns:
+#' \itemize{
+#'   \item \code{Features} names of the features used in the model;
+#'   \item \code{Weight} the linear coefficient of this feature.
+#' }
 #' 
-#' If you need to remember only one thing: unless you want to leave us early, don't eat a mushroom which has no odor :-)
+#' If you don't provide or \code{model} doesn't have \code{feature_names}, 
+#' index of the features will be used instead. Because the index is extracted from the model dump
+#' (based on C++ code), it starts at 0 (as in C/C++ or Python) instead of 1 (usual in R).
 #' 
 #' @examples
+#' 
 #' data(agaricus.train, package='xgboost')
 #' 
 #' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, 
 #'                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
 #' 
-#' xgb.importance(colnames(agaricus.train$data), model = bst)
-#' 
-#' # Same thing with co-occurence computation this time
-#' xgb.importance(colnames(agaricus.train$data), model = bst, 
-#'                data = agaricus.train$data, label = agaricus.train$label)
+#' xgb.importance(model = bst)
 #' 
 #' @export
-xgb.importance <- function(feature_names = NULL, model = NULL, data = NULL, label = NULL, target = function(x) ( (x + label) == 2)){
-  if (!class(feature_names) %in% c("character", "NULL")) {
-    stop("feature_names: Has to be a vector of character or NULL if the model already contains feature name. Look at this function documentation to see where to get feature names.")
-  }
-
-  if (class(model) != "xgb.Booster") {
-    stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
-  }
-
-  if((is.null(data) & !is.null(label)) | (!is.null(data) & is.null(label))) {
-    stop("data/label: Provide the two arguments if you want co-occurence computation or none of them if you are not interested but not one of them only.")
-  }
-
-  if(class(label) == "numeric"){
-    if(sum(label == 0) / length(label) > 0.5) label <- as(label, "sparseVector")
-  }
+xgb.importance <- function(feature_names = NULL, model = NULL, 
+                           data = NULL, label = NULL, target = NULL){
   
-  treeDump <- function(feature_names, text, keepDetail){
-    if(keepDetail) groupBy <- c("Feature", "Split", "MissingNo") else groupBy <- "Feature"
-    xgb.model.dt.tree(feature_names = feature_names, text = text)[,"MissingNo" := Missing == No ][Feature != "Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequency = .N), by = groupBy, with = T][,`:=`(Gain = Gain / sum(Gain), Cover = Cover / sum(Cover), Frequency = Frequency / sum(Frequency))][order(Gain, decreasing = T)]
-  }
+  if (!(is.null(data) && is.null(label) && is.null(target)))
+    warning("xgb.importance: parameters 'data', 'label' and 'target' are deprecated")
   
-  linearDump <- function(feature_names, text){
-    weights <- which(text == "weight:") %>% {a =. + 1; text[a:length(text)]} %>% as.numeric
-    if(is.null(feature_names)) feature_names <- seq(to = length(weights))
-    data.table(Feature = feature_names, Weight = weights)
-  }
-
-  model.text.dump <- xgb.dump(model = model, with_stats = T)
+  if (class(model) != "xgb.Booster")
+    stop("Either 'model' has to be an object of class xgb.Booster")
   
-  if(model.text.dump[2] == "bias:"){
-    result <- model.text.dump %>% linearDump(feature_names, .)
-    if(!is.null(data) | !is.null(label)) warning("data/label: these parameters should only be provided with decision tree based models.")
-  }  else {
-    result <- treeDump(feature_names, text = model.text.dump, keepDetail = !is.null(data))
-
-    # Co-occurence computation
-    if(!is.null(data) & !is.null(label) & nrow(result) > 0) {
-      # Take care of missing column
-      a <- data[, result[MissingNo == T,Feature], drop=FALSE] != 0
-      # Bind the two Matrix and reorder columns
-      c <- data[, result[MissingNo == F,Feature], drop=FALSE] %>% cBind(a,.) %>% .[,result[,Feature]]
-      rm(a)
-      # Apply split
-      d <- data[, result[,Feature], drop=FALSE] < as.numeric(result[,Split])
-      apply(c & d, 2, . %>% target %>% sum) -> vec
+  if (is.null(feature_names) && !is.null(model$feature_names))
+    feature_names <- model$feature_names
+  
+  if (!class(feature_names) %in% c("character", "NULL"))
+    stop("feature_names: Has to be a character vector")
 
-      result <- result[, "RealCover" := as.numeric(vec), with = F][, "RealCover %" := RealCover / sum(label)][, MissingNo := NULL]
-    }
+  model_text_dump <- xgb.dump(model = model, with_stats = TRUE)
+  
+  # linear model
+  if(model_text_dump[2] == "bias:"){
+    weights <- which(model_text_dump == "weight:") %>%
+               {model_text_dump[(. + 1):length(model_text_dump)]} %>%
+               as.numeric
+    if(is.null(feature_names)) 
+      feature_names <- seq(to = length(weights))
+    result <- data.table(Feature = feature_names, Weight = weights)[order(-abs(Weight))]
+  } else { 
+  # tree model
+    result <- xgb.model.dt.tree(feature_names = feature_names, text = model_text_dump)[
+      Feature != "Leaf", .(Gain = sum(Quality), 
+                           Cover = sum(Cover), 
+                           Frequency = .N), by = Feature][
+      ,`:=`(Gain = Gain / sum(Gain), 
+            Cover = Cover / sum(Cover),
+            Frequency = Frequency / sum(Frequency))][
+      order(Gain, decreasing = TRUE)]
   }
   result
 }
@@ -104,4 +94,4 @@ xgb.importance <- function(feature_names = NULL, model = NULL, data = NULL, labe
 # Avoid error messages during CRAN check.
 # The reason is that these variables are never declared
 # They are mainly column names inferred by Data.table...
-globalVariables(c(".", ".N", "Gain", "Frequency", "Feature", "Split", "No", "Missing", "MissingNo", "RealCover"))
+globalVariables(c(".", ".N", "Gain", "Cover", "Frequency", "Feature"))
diff --git a/R-package/man/xgb.importance.Rd b/R-package/man/xgb.importance.Rd
index 2b0237aa0992..3270a1b709c7 100644
--- a/R-package/man/xgb.importance.Rd
+++ b/R-package/man/xgb.importance.Rd
@@ -2,64 +2,65 @@
 % Please edit documentation in R/xgb.importance.R
 \name{xgb.importance}
 \alias{xgb.importance}
-\title{Show importance of features in a model}
+\title{Importance of features in a model.}
 \usage{
 xgb.importance(feature_names = NULL, model = NULL, data = NULL,
-  label = NULL, target = function(x) ((x + label) == 2))
+  label = NULL, target = NULL)
 }
 \arguments{
-\item{feature_names}{names of each feature as a \code{character} vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
+\item{feature_names}{character vector of feature names. If the model already
+contains feature names, those would be used when \code{feature_names=NULL} (default value).
+Non-null \code{feature_names} could be provided to override those in the model.}
 
-\item{model}{generated by the \code{xgb.train} function.}
+\item{model}{object of class \code{xgb.Booster}.}
 
-\item{data}{the dataset used for the training step. Will be used with \code{label} parameter for co-occurence computation. More information in \code{Detail} part. This parameter is optional.}
+\item{data}{deprecated.}
 
-\item{label}{the label vector used for the training step. Will be used with \code{data} parameter for co-occurence computation. More information in \code{Detail} part. This parameter is optional.}
+\item{label}{deprecated.}
 
-\item{target}{a function which returns \code{TRUE} or \code{1} when an observation should be count as a co-occurence and \code{FALSE} or \code{0} otherwise. Default function is provided for computing co-occurences in a binary classification. The \code{target} function should have only one parameter. This parameter will be used to provide each important feature vector after having applied the split condition, therefore these vector will be only made of 0 and 1 only, whatever was the information before. More information in \code{Detail} part. This parameter is optional.}
+\item{target}{deprecated.}
 }
 \value{
-A \code{data.table} of the features used in the model with their average gain (and their weight for boosted tree model) in the model.
-}
-\description{
-Create a \code{data.table} of the most important features of a model.
+For a tree model, a \code{data.table} with the following columns:
+\itemize{
+  \item \code{Features} names of the features used in the model;
+  \item \code{Gain} represents fractional contribution of each feature to the model based on
+       the total gain of this feature's splits. Higher percentage means a more important 
+       predictive feature.
+  \item \code{Cover} metric of the number of observation related to this feature;
+  \item \code{Frequency} percentage representing the relative number of times
+       a feature have been used in trees.
 }
-\details{
-This function is for both linear and tree models.
 
-\code{data.table} is returned by the function. 
-The columns are:
+A linear model's importance \code{data.table} has only two columns:
 \itemize{
-  \item \code{Features} name of the features as provided in \code{feature_names} or already present in the model dump;
-  \item \code{Gain} contribution of each feature to the model. For boosted tree model, each gain of each feature of each tree is taken into account, then average per feature to give a vision of the entire model. Highest percentage means important feature to predict the \code{label} used for the training (only available for tree models);
-  \item \code{Cover} metric of the number of observation related to this feature (only available for tree models);
-  \item \code{Weight} percentage representing the relative number of times a feature have been taken into trees.
+  \item \code{Features} names of the features used in the model;
+  \item \code{Weight} the linear coefficient of this feature.
 }
 
-If you don't provide \code{feature_names}, index of the features will be used instead.
-
-Because the index is extracted from the model dump (made on the C++ side), it starts at 0 (usual in C++) instead of 1 (usual in R).
-
-Co-occurence count
-------------------
-
-The gain gives you indication about the information of how a feature is important in making a branch of a decision tree more pure. However, with this information only, you can't know if this feature has to be present or not to get a specific classification. In the example code, you may wonder if odor=none should be \code{TRUE} to not eat a mushroom.
-
-Co-occurence computation is here to help in understanding this relation between a predictor and a specific class. It will count how many observations are returned as \code{TRUE} by the \code{target} function (see parameters). When you execute the example below, there are 92 times only over the 3140 observations of the train dataset where a mushroom have no odor and can be eaten safely.
+If you don't provide or \code{model} doesn't have \code{feature_names}, 
+index of the features will be used instead. Because the index is extracted from the model dump
+(based on C++ code), it starts at 0 (as in C/C++ or Python) instead of 1 (usual in R).
+}
+\description{
+Creates a \code{data.table} of feature importances in a model.
+}
+\details{
+This function works for both linear and tree models.
 
-If you need to remember only one thing: unless you want to leave us early, don't eat a mushroom which has no odor :-)
+For linear models, the importance is the absolute magnitude of linear coefficients. 
+For that reason, in order to obtain a meaningful ranking by importance for a linear model, 
+the features need to be on the same scale (which you also would want to do when using either 
+L1 or L2 regularization).
 }
 \examples{
+
 data(agaricus.train, package='xgboost')
 
 bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, 
                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
 
-xgb.importance(colnames(agaricus.train$data), model = bst)
-
-# Same thing with co-occurence computation this time
-xgb.importance(colnames(agaricus.train$data), model = bst, 
-               data = agaricus.train$data, label = agaricus.train$label)
+xgb.importance(model = bst)
 
 }
 

From 6b1d5e1677856c7239e5b418658c306b0c0a4fb4 Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Mon, 16 Jan 2017 00:20:45 -0600
Subject: [PATCH 12/14] [R] verbose=0 is enough without a callback

---
 R-package/tests/testthat/test_update.R | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/R-package/tests/testthat/test_update.R b/R-package/tests/testthat/test_update.R
index 1cbed1c338cb..8518711fb323 100644
--- a/R-package/tests/testthat/test_update.R
+++ b/R-package/tests/testthat/test_update.R
@@ -9,24 +9,23 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 
 test_that("updating the model works", {
   watchlist = list(train = dtrain, test = dtest)
-  cb = list(cb.evaluation.log()) # to run silent, but store eval. log
-  
+
   # no-subsampling
   p1 <- list(objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = 2)
   set.seed(11)
-  bst1 <- xgb.train(p1, dtrain, nrounds = 10, watchlist, verbose = 0, callbacks = cb)
+  bst1 <- xgb.train(p1, dtrain, nrounds = 10, watchlist, verbose = 0)
   tr1 <- xgb.model.dt.tree(model = bst1)
   
   # with subsampling
   p2 <- modifyList(p1, list(subsample = 0.1))
   set.seed(11)
-  bst2 <- xgb.train(p2, dtrain, nrounds = 10, watchlist, verbose = 0, callbacks = cb)
+  bst2 <- xgb.train(p2, dtrain, nrounds = 10, watchlist, verbose = 0)
   tr2 <- xgb.model.dt.tree(model = bst2)
   
   # the same no-subsampling boosting with an extra 'refresh' updater:
   p1r <- modifyList(p1, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))
   set.seed(11)
-  bst1r <- xgb.train(p1r, dtrain, nrounds = 10, watchlist, verbose = 0, callbacks = cb)
+  bst1r <- xgb.train(p1r, dtrain, nrounds = 10, watchlist, verbose = 0)
   tr1r <- xgb.model.dt.tree(model = bst1r)
   # all should be the same when no subsampling
   expect_equal(bst1$evaluation_log, bst1r$evaluation_log)
@@ -35,7 +34,7 @@ test_that("updating the model works", {
   # the same boosting with subsampling with an extra 'refresh' updater:
   p2r <- modifyList(p2, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))
   set.seed(11)
-  bst2r <- xgb.train(p2r, dtrain, nrounds = 10, watchlist, verbose = 0, callbacks = cb)
+  bst2r <- xgb.train(p2r, dtrain, nrounds = 10, watchlist, verbose = 0)
   tr2r <- xgb.model.dt.tree(model = bst2r)
   # should be the same evaluation but different gains and larger cover
   expect_equal(bst2$evaluation_log, bst2r$evaluation_log)
@@ -45,7 +44,7 @@ test_that("updating the model works", {
 
   # process type 'update' for no-subsampling model, refreshing the tree stats AND leaves from training data:
   p1u <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = TRUE))
-  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, watchlist, verbose = 0, callbacks = cb, xgb_model = bst1)
+  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
   tr1u <- xgb.model.dt.tree(model = bst1u)
   # all should be the same when no subsampling
   expect_equal(bst1$evaluation_log, bst1u$evaluation_log)
@@ -53,7 +52,7 @@ test_that("updating the model works", {
   
   # process type 'update' for model with subsampling, refreshing only the tree stats from training data:
   p2u <- modifyList(p2, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
-  bst2u <- xgb.train(p2u, dtrain, nrounds = 10, watchlist, verbose = 0, callbacks = cb, xgb_model = bst2)
+  bst2u <- xgb.train(p2u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst2)
   tr2u <- xgb.model.dt.tree(model = bst2u)
   # should be the same evaluation but different gains and larger cover
   expect_equal(bst2$evaluation_log, bst2u$evaluation_log)
@@ -66,7 +65,7 @@ test_that("updating the model works", {
   
   # process type 'update' for no-subsampling model, refreshing only the tree stats from TEST data:
   p1ut <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
-  bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, watchlist, verbose = 0, callbacks = cb, xgb_model = bst1)
+  bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
   tr1ut <- xgb.model.dt.tree(model = bst1ut)
   # should be the same evaluations but different gains and smaller cover (test data is smaller)
   expect_equal(bst1$evaluation_log, bst1ut$evaluation_log)

From 15dee173652fd9ae76b27cc953e10c80ffe45605 Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Mon, 16 Jan 2017 20:02:37 -0600
Subject: [PATCH 13/14] [R] added forgotten xgb.Booster.complete.Rd; cran check
 fixes

---
 R-package/R/xgb.Booster.R             | 16 ++++-----
 R-package/R/xgb.plot.tree.R           |  2 +-
 R-package/R/xgb.train.R               |  2 +-
 R-package/man/xgb.Booster.complete.Rd | 49 +++++++++++++++++++++++++++
 R-package/man/xgb.train.Rd            |  2 +-
 5 files changed, 60 insertions(+), 11 deletions(-)
 create mode 100644 R-package/man/xgb.Booster.complete.Rd

diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R
index fc9956bc9fc5..8aafee1f32da 100644
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -66,7 +66,7 @@ xgb.get.handle <- function(object) {
 #' or its missing internal handle (when its \code{xgb.Booster.handle} is not valid 
 #' but it has a raw Booster memory dump).
 #' 
-#' @param object Object of class \code{xgb.Booster}
+#' @param object object of class \code{xgb.Booster}
 #' @param saveraw a flag indicating whether to append \code{raw} Booster memory dump data 
 #'                when it doesn't already exist.
 #' 
@@ -99,17 +99,17 @@ xgb.get.handle <- function(object) {
 #' print(bst1$handle)
 #' 
 #' @export
-xgb.Booster.complete <- function(bst, saveraw = TRUE) {
-  if (class(bst) != "xgb.Booster")
+xgb.Booster.complete <- function(object, saveraw = TRUE) {
+  if (class(object) != "xgb.Booster")
     stop("argument type must be xgb.Booster")
   
-  if (is.null.handle(bst$handle)) {
-    bst$handle <- xgb.Booster.handle(modelfile = bst$raw)
+  if (is.null.handle(object$handle)) {
+    object$handle <- xgb.Booster.handle(modelfile = object$raw)
   } else {
-    if (is.null(bst$raw) && saveraw)
-      bst$raw <- xgb.save.raw(bst$handle)
+    if (is.null(object$raw) && saveraw)
+      object$raw <- xgb.save.raw(object$handle)
   }
-  return(bst)
+  return(object)
 }
 
 #' Predict method for eXtreme Gradient Boosting model
diff --git a/R-package/R/xgb.plot.tree.R b/R-package/R/xgb.plot.tree.R
index 41b72c8a0f37..313cc9d6f436 100644
--- a/R-package/R/xgb.plot.tree.R
+++ b/R-package/R/xgb.plot.tree.R
@@ -126,4 +126,4 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot
 # Avoid error messages during CRAN check.
 # The reason is that these variables are never declared
 # They are mainly column names inferred by Data.table...
-globalVariables(c("Feature", "ID", "Cover", "Quality", "Split", "Yes", "No", ".", "shape", "filledcolor", "label"))
+globalVariables(c("Feature", "ID", "Cover", "Quality", "Split", "Yes", "No", "Missing", ".", "shape", "filledcolor", "label"))
diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index 0c17060de41c..a9eb863ad674 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -73,7 +73,7 @@
 #' @param verbose If 0, xgboost will stay silent. If 1, it will print information about performance.
 #'        If 2, some additional information will be printed out.
 #'        Note that setting \code{verbose > 0} automatically engages the 
-#'        \code{\link{cb.print.evaluation(period=1)}} callback function.
+#'        \code{cb.print.evaluation(period=1)} callback function.
 #' @param print_every_n Print each n-th iteration evaluation messages when \code{verbose>0}.
 #'        Default is 1 which means all messages are printed. This parameter is passed to the 
 #'        \code{\link{cb.print.evaluation}} callback.
diff --git a/R-package/man/xgb.Booster.complete.Rd b/R-package/man/xgb.Booster.complete.Rd
new file mode 100644
index 000000000000..0e821e33c6bf
--- /dev/null
+++ b/R-package/man/xgb.Booster.complete.Rd
@@ -0,0 +1,49 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xgb.Booster.R
+\name{xgb.Booster.complete}
+\alias{xgb.Booster.complete}
+\title{Restore missing parts of an incomplete xgb.Booster object.}
+\usage{
+xgb.Booster.complete(object, saveraw = TRUE)
+}
+\arguments{
+\item{object}{object of class \code{xgb.Booster}}
+
+\item{saveraw}{a flag indicating whether to append \code{raw} Booster memory dump data 
+when it doesn't already exist.}
+}
+\value{
+An object of \code{xgb.Booster} class.
+}
+\description{
+It attempts to complete an \code{xgb.Booster} object by restoring either its missing 
+raw model memory dump (when it has no \code{raw} data but its \code{xgb.Booster.handle} is valid)
+or its missing internal handle (when its \code{xgb.Booster.handle} is not valid 
+but it has a raw Booster memory dump).
+}
+\details{
+While this method is primarily for internal use, it might be useful in some practical situations.
+
+E.g., when an \code{xgb.Booster} model is saved as an R object and then is loaded as an R object,
+its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods 
+should still work for such a model object since those methods would be using 
+\code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the  
+\code{xgb.Booster.complete} function once after loading a model as an R-object. That which would
+prevent further reconstruction (potentially, multiple times) of an internal booster model.
+}
+\examples{
+
+data(agaricus.train, package='xgboost')
+bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, 
+               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+saveRDS(bst, "xgb.model.rds")
+
+bst1 <- readRDS("xgb.model.rds")
+# the handle is invalid:
+print(bst1$handle)
+bst1 <- xgb.Booster.complete(bst1)
+# now the handle points to a valid internal booster model:
+print(bst1$handle)
+
+}
+
diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd
index 51c24520e8ad..269789b194a5 100644
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -91,7 +91,7 @@ prediction and dtrain.}
 \item{verbose}{If 0, xgboost will stay silent. If 1, it will print information about performance.
 If 2, some additional information will be printed out.
 Note that setting \code{verbose > 0} automatically engages the 
-\code{\link{cb.print.evaluation(period=1)}} callback function.}
+\code{cb.print.evaluation(period=1)} callback function.}
 
 \item{print_every_n}{Print each n-th iteration evaluation messages when \code{verbose>0}.
 Default is 1 which means all messages are printed. This parameter is passed to the 

From 3a173a4fbd361183dffff84e824b8a37ccf12394 Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Mon, 16 Jan 2017 21:36:52 -0600
Subject: [PATCH 14/14] [R] update installation instructions

---
 doc/build.md | 47 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/doc/build.md b/doc/build.md
index d41cc5f0e4ba..d2eaf99cd8b3 100644
--- a/doc/build.md
+++ b/doc/build.md
@@ -189,7 +189,15 @@ There are several ways to install the package:
 
 ## R Package Installation
 
-You can install R package from cran just like other packages, or you can install from our weekly updated drat repo:
+### Installing pre-packaged version
+
+You can install xgboost from CRAN just like any other R package:
+
+```r
+install.packages("xgboost")
+```
+
+Or you can install it from our weekly updated drat repo:
 
 ```r
 install.packages("drat", repos="https://cran.rstudio.com")
@@ -197,10 +205,8 @@ drat:::addRepo("dmlc")
 install.packages("xgboost", repos="http://dmlc.ml/drat/", type = "source")
 ```
 
-If you would like to use the latest xgboost version and already compiled xgboost, use `library(devtools); install('xgboost/R-package')` to install manually xgboost package (change the path accordingly to where you compiled xgboost).
-
-For OSX users, single threaded version will be installed, to install multi-threaded version.
-First follow [Building on OSX](#building-on-osx) to get the OpenMP enabled compiler, then:
+For OSX users, single threaded version will be installed. To install multi-threaded version,
+first follow [Building on OSX](#building-on-osx) to get the OpenMP enabled compiler, then:
 
 - Set the `Makevars` file in highest piority for R.
 
@@ -214,24 +220,35 @@ First follow [Building on OSX](#building-on-osx) to get the OpenMP enabled compi
   install.packages("xgboost", repos="http://dmlc.ml/drat/", type = "source")
   ```
 
-Due to the usage of submodule, `install_github` is no longer support to install the
-latest version of R package. To install the latest version run the following bash script,
+### Installing the development version
+
+Make sure you have installed git and a recent C++ compiler supporting C++11 (e.g., g++-4.6 or higher).
+On Windows, Rtools must be installed, and its bin directory has to be added to PATH during the installation.
+And see the previous subsection for an OSX tip.
+
+Due to the use of git-submodules, `devtools::install_github` can no longer be used to install the latest version of R package.
+Thus, one has to run git to check out the code first:
 
 ```bash
 git clone --recursive https://github.com/dmlc/xgboost
 cd xgboost
 git submodule init
 git submodule update
-alias make='mingw32-make'
-cd dmlc-core
-make -j4
-cd ../rabit
-make lib/librabit_empty.a -j4
-cd ..
-cp make/mingw64.mk config.mk
-make -j4
+cd R-package
+R CMD INSTALL .
 ```
 
+If the last line fails because of "R: command not found", it means that R was not set up to run from command line.
+In this case, just start R as you would normally do and run the following:
+
+```r
+setwd('wherever/you/cloned/it/xgboost/R-package/')
+install.packages('.', repos = NULL, type="source")
+```
+
+If all fails, try [building the shared library](#build-the-shared-library) to see whether a problem is specific to R package or not.
+
+
 ## Trouble Shooting
 
 1. **Compile failed after `git pull`**