Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[R] various R code maintenance #1964

Merged
merged 15 commits into from
Jan 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions R-package/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export(cb.save.model)
export(getinfo)
export(setinfo)
export(slice)
export(xgb.Booster.complete)
export(xgb.DMatrix)
export(xgb.DMatrix.save)
export(xgb.attr)
Expand Down
2 changes: 1 addition & 1 deletion R-package/R/callbacks.R
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ cb.cv.predict <- function(save_models = FALSE) {
if (save_models) {
env$basket$models <- lapply(env$bst_folds, function(fd) {
xgb.attr(fd$bst, 'niter') <- env$end_iteration - 1
xgb.Booster.check(xgb.handleToBooster(fd$bst), saveraw = TRUE)
xgb.Booster.complete(xgb.handleToBooster(fd$bst), saveraw = TRUE)
})
}
}
Expand Down
107 changes: 78 additions & 29 deletions R-package/R/xgb.Booster.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Construct a Booster from cachelist
# Construct an internal xgboost Booster and return a handle to it
# internal utility function
xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
xgb.Booster.handle <- function(params = list(), cachelist = list(), modelfile = NULL) {
if (typeof(cachelist) != "list" ||
any(sapply(cachelist, class) != 'xgb.DMatrix')) {
stop("xgb.Booster only accepts list of DMatrix as cachelist")
Expand All @@ -13,8 +13,8 @@ xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
} else if (typeof(modelfile) == "raw") {
.Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost")
} else if (class(modelfile) == "xgb.Booster") {
modelfile <- xgb.Booster.check(modelfile, saveraw=TRUE)
.Call("XGBoosterLoadModelFromRaw_R", handle, modelfile$raw, PACKAGE = "xgboost")
bst <- xgb.Booster.complete(modelfile, saveraw=TRUE)
.Call("XGBoosterLoadModelFromRaw_R", handle, bst$raw, PACKAGE = "xgboost")
} else {
stop("modelfile must be either character filename, or raw booster dump, or xgb.Booster object")
}
Expand All @@ -34,6 +34,17 @@ xgb.handleToBooster <- function(handle, raw = NULL) {
return(bst)
}

# Check whether xgb.Booster.handle is null
# internal utility function
is.null.handle <- function(handle) {
if (class(handle) != "xgb.Booster.handle")
stop("argument type must be xgb.Booster.handle")

if (is.null(handle) || .Call("XGCheckNullPtr_R", handle, PACKAGE="xgboost"))
return(TRUE)
return(FALSE)
}

# Return a verified to be valid handle out of either xgb.Booster.handle or xgb.Booster
# internal utility function
xgb.get.handle <- function(object) {
Expand All @@ -42,32 +53,65 @@ xgb.get.handle <- function(object) {
xgb.Booster.handle = object,
stop("argument must be of either xgb.Booster or xgb.Booster.handle class")
)
if (is.null(handle) || .Call("XGCheckNullPtr_R", handle, PACKAGE="xgboost")) {
if (is.null.handle(handle)) {
stop("invalid xgb.Booster.handle")
}
handle
}

# Check whether an xgb.Booster object is complete
# internal utility function
xgb.Booster.check <- function(bst, saveraw = TRUE) {
if (class(bst) != "xgb.Booster")
#' Restore missing parts of an incomplete xgb.Booster object.
#'
#' It attempts to complete an \code{xgb.Booster} object by restoring either its missing
#' raw model memory dump (when it has no \code{raw} data but its \code{xgb.Booster.handle} is valid)
#' or its missing internal handle (when its \code{xgb.Booster.handle} is not valid
#' but it has a raw Booster memory dump).
#'
#' @param object object of class \code{xgb.Booster}
#' @param saveraw a flag indicating whether to append \code{raw} Booster memory dump data
#' when it doesn't already exist.
#'
#' @details
#'
#' While this method is primarily for internal use, it might be useful in some practical situations.
#'
#' E.g., when an \code{xgb.Booster} model is saved as an R object and then is loaded as an R object,
#' its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods
#' should still work for such a model object since those methods would be using
#' \code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the
#' \code{xgb.Booster.complete} function once after loading a model as an R-object. That which would
#' prevent further reconstruction (potentially, multiple times) of an internal booster model.
#'
#' @return
#' An object of \code{xgb.Booster} class.
#'
#' @examples
#'
#' data(agaricus.train, package='xgboost')
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#' saveRDS(bst, "xgb.model.rds")
#'
#' bst1 <- readRDS("xgb.model.rds")
#' # the handle is invalid:
#' print(bst1$handle)
#' bst1 <- xgb.Booster.complete(bst1)
#' # now the handle points to a valid internal booster model:
#' print(bst1$handle)
#'
#' @export
xgb.Booster.complete <- function(object, saveraw = TRUE) {
if (class(object) != "xgb.Booster")
stop("argument type must be xgb.Booster")

isnull <- is.null(bst$handle)
if (!isnull) {
isnull <- .Call("XGCheckNullPtr_R", bst$handle, PACKAGE="xgboost")
}
if (isnull) {
bst$handle <- xgb.Booster(modelfile = bst$raw)
if (is.null.handle(object$handle)) {
object$handle <- xgb.Booster.handle(modelfile = object$raw)
} else {
if (is.null(bst$raw) && saveraw)
bst$raw <- xgb.save.raw(bst$handle)
if (is.null(object$raw) && saveraw)
object$raw <- xgb.save.raw(object$handle)
}
return(bst)
return(object)
}


#' Predict method for eXtreme Gradient Boosting model
#'
#' Predicted values based on either xgboost model or model handle object.
Expand Down Expand Up @@ -180,7 +224,7 @@ xgb.Booster.check <- function(bst, saveraw = TRUE) {
predict.xgb.Booster <- function(object, newdata, missing = NA,
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, reshape = FALSE, ...) {

object <- xgb.Booster.check(object, saveraw = FALSE)
object <- xgb.Booster.complete(object, saveraw = FALSE)
if (class(newdata) != "xgb.DMatrix")
newdata <- xgb.DMatrix(newdata, missing = missing)
if (is.null(ntreelimit))
Expand Down Expand Up @@ -429,11 +473,10 @@ xgb.ntree <- function(bst) {
print.xgb.Booster <- function(x, verbose=FALSE, ...) {
cat('##### xgb.Booster\n')

if (is.null(x$handle) || .Call("XGCheckNullPtr_R", x$handle, PACKAGE="xgboost")) {
cat("handle is invalid\n")
return(x)
}

valid_handle <- is.null.handle(x$handle)
if (!valid_handle)
cat("Handle is invalid! Suggest using xgb.Booster.complete\n")

cat('raw: ')
if (!is.null(x$raw)) {
cat(format(object.size(x$raw), units="auto"), '\n')
Expand All @@ -454,7 +497,9 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) {
}
# TODO: need an interface to access all the xgboosts parameters

attrs <- xgb.attributes(x)
attrs <- character(0)
if (valid_handle)
attrs <- xgb.attributes(x)
if (length(attrs) > 0) {
cat('xgb.attributes:\n')
if (verbose) {
Expand All @@ -474,15 +519,19 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) {
})
}

if (!is.null(x$feature_names))
cat('# of features:', length(x$feature_names), '\n')

cat('niter: ', x$niter, '\n', sep='')
# TODO: uncomment when faster xgb.ntree is implemented
#cat('ntree: ', xgb.ntree(x), '\n', sep='')

for (n in setdiff(names(x), c('handle', 'raw', 'call', 'params', 'callbacks','evaluation_log','niter'))) {
for (n in setdiff(names(x), c('handle', 'raw', 'call', 'params', 'callbacks',
'evaluation_log','niter','feature_names'))) {
if (is.atomic(x[[n]])) {
cat(n, ': ', x[[n]], '\n', sep='')
cat(n, ':', x[[n]], '\n', sep=' ')
} else {
cat(n, ':\n\t', sep='')
cat(n, ':\n\t', sep=' ')
print(x[[n]])
}
}
Expand Down
20 changes: 7 additions & 13 deletions R-package/R/xgb.DMatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,13 @@ xgb.DMatrix <- function(data, info = list(), missing = NA, ...) {
PACKAGE = "xgboost")
cnames <- colnames(data)
} else {
stop(paste("xgb.DMatrix: does not support to construct from ",
typeof(data)))
stop("xgb.DMatrix does not support construction from ", typeof(data))
}
dmat <- handle
attributes(dmat) <- list(.Dimnames = list(NULL, cnames), class = "xgb.DMatrix")
#dmat <- list(handle = handle, colnames = cnames)
#attr(dmat, 'class') <- "xgb.DMatrix"

info <- append(info, list(...))
if (length(info) == 0)
return(dmat)
for (i in 1:length(info)) {
for (i in seq_along(info)) {
p <- info[i]
setinfo(dmat, names(p), p[[1]])
}
Expand Down Expand Up @@ -70,11 +65,10 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
dtrain <- xgb.DMatrix(data)
} else if (inClass == "xgb.DMatrix") {
dtrain <- data
} else if (inClass == "data.frame") {
stop("xgboost only support numerical matrix input,
use 'data.matrix' to transform the data.")
} else if ("data.frame" %in% inClass) {
stop("xgboost doesn't support data.frame as input. Convert it to matrix first.")
} else {
stop("xgboost: Invalid input of data")
stop("xgboost: invalid input data")
}
}
return (dtrain)
Expand Down Expand Up @@ -190,7 +184,7 @@ getinfo.xgb.DMatrix <- function(object, name, ...) {
if (typeof(name) != "character" ||
length(name) != 1 ||
!name %in% c('label', 'weight', 'base_margin', 'nrow')) {
stop("getinfo: name must one of the following\n",
stop("getinfo: name must be one of the following\n",
" 'label', 'weight', 'base_margin', 'nrow'")
}
if (name != "nrow"){
Expand Down Expand Up @@ -266,7 +260,7 @@ setinfo.xgb.DMatrix <- function(object, name, info, ...) {
PACKAGE = "xgboost")
return(TRUE)
}
stop(paste("setinfo: unknown info name", name))
stop("setinfo: unknown info name ", name)
return(FALSE)
}

Expand Down
4 changes: 2 additions & 2 deletions R-package/R/xgb.cv.R
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,8 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
bst_folds <- lapply(1:length(folds), function(k) {
dtest <- slice(dall, folds[[k]])
dtrain <- slice(dall, unlist(folds[-k]))
bst <- xgb.Booster(params, list(dtrain, dtest))
list(dtrain=dtrain, bst=bst, watchlist=list(train=dtrain, test=dtest), index=folds[[k]])
handle <- xgb.Booster.handle(params, list(dtrain, dtest))
list(dtrain=dtrain, bst=handle, watchlist=list(train=dtrain, test=dtest), index=folds[[k]])
})
# a "basket" to collect some results from callbacks
basket <- list()
Expand Down
21 changes: 12 additions & 9 deletions R-package/R/xgb.dump.R
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
#' Save xgboost model to text file
#' Dump an xgboost model in text format.
#'
#' Save a xgboost model to text file. Could be parsed later.
#' Dump an xgboost model in text format.
#'
#' @param model the model object.
#' @param fname the name of the text file where to save the model text dump. If not provided or set to \code{NULL} the function will return the model as a \code{character} vector.
#' @param fmap feature map file representing the type of feature.
#' @param fname the name of the text file where to save the model text dump.
#' If not provided or set to \code{NULL}, the model is returned as a \code{character} vector.
#' @param fmap feature map file representing feature types.
#' Detailed description could be found at
#' \url{https://github.com/dmlc/xgboost/wiki/Binary-Classification#dump-model}.
#' See demo/ for walkthrough example in R, and
#' \url{https://github.com/dmlc/xgboost/blob/master/demo/data/featmap.txt}
#' for example Format.
#' @param with_stats whether dump statistics of splits
#' When this option is on, the model dump comes with two additional statistics:
#' @param with_stats whether to dump some additional statistics about the splits.
#' When this option is on, the model dump contains two additional values:
#' gain is the approximate loss function gain we get in each split;
#' cover is the sum of second order gradient in each node.
#' @param dump_format either 'text' or 'json' format could be specified.
#' @param ... currently not used
#'
#' @return
#' if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}.
#' If fname is not provided or set to \code{NULL} the function will return the model
#' as a \code{character} vector. Otherwise it will return \code{TRUE}.
#'
#' @examples
#' data(agaricus.train, package='xgboost')
Expand All @@ -37,7 +39,8 @@
#' cat(xgb.dump(bst, with_stats = TRUE, dump_format='json'))
#'
#' @export
xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with_stats=FALSE, dump_format = c("text", "json"), ...) {
xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with_stats=FALSE,
dump_format = c("text", "json"), ...) {
check.deprecation(...)
dump_format <- match.arg(dump_format)
if (class(model) != "xgb.Booster")
Expand All @@ -47,7 +50,7 @@ xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with_stats=FALSE, du
if (!(class(fmap) %in% c("character", "NULL") && length(fmap) <= 1))
stop("fmap: argument must be of type character (when provided)")

model <- xgb.Booster.check(model)
model <- xgb.Booster.complete(model)
model_dump <- .Call("XGBoosterDumpModel_R", model$handle, fmap, as.integer(with_stats),
as.character(dump_format), PACKAGE = "xgboost")

Expand Down
Loading