diff --git a/.ci/lint_r_code.R b/.ci/lint_r_code.R index 4a5b89b865ff..e9a8522bd7c3 100755 --- a/.ci/lint_r_code.R +++ b/.ci/lint_r_code.R @@ -4,7 +4,7 @@ library(lintr) args <- commandArgs( trailingOnly = TRUE ) -SOURCE_DIR <- args[[1]] +SOURCE_DIR <- args[[1L]] FILES_TO_LINT <- list.files( path = SOURCE_DIR @@ -17,22 +17,34 @@ FILES_TO_LINT <- list.files( ) LINTERS_TO_USE <- list( - "closed_curly" = lintr::closed_curly_linter + "assignment" = lintr::assignment_linter + , "closed_curly" = lintr::closed_curly_linter + , "equals_na" = lintr::equals_na_linter + , "function_left" = lintr::function_left_parentheses_linter + , "commas" = lintr::commas_linter + , "concatenation" = lintr::unneeded_concatenation_linter + , "implicit_integers" = lintr::implicit_integer_linter , "infix_spaces" = lintr::infix_spaces_linter - , "long_lines" = lintr::line_length_linter(length = 120) + , "long_lines" = lintr::line_length_linter(length = 120L) , "tabs" = lintr::no_tab_linter , "open_curly" = lintr::open_curly_linter + , "paren_brace_linter" = lintr::paren_brace_linter + , "semicolon" = lintr::semicolon_terminator_linter + , "seq" = lintr::seq_linter + , "single_quotes" = lintr::single_quotes_linter , "spaces_inside" = lintr::spaces_inside_linter , "spaces_left_parens" = lintr::spaces_left_parentheses_linter + , "todo_comments" = lintr::todo_comment_linter , "trailing_blank" = lintr::trailing_blank_lines_linter , "trailing_white" = lintr::trailing_whitespace_linter + , "true_false" = lintr::T_and_F_symbol_linter ) cat(sprintf("Found %i R files to lint\n", length(FILES_TO_LINT))) -results <- c() +results <- NULL -for (r_file in FILES_TO_LINT){ +for (r_file in FILES_TO_LINT) { this_result <- lintr::lint( filename = r_file @@ -52,7 +64,7 @@ for (r_file in FILES_TO_LINT){ issues_found <- length(results) -if (issues_found > 0){ +if (issues_found > 0L) { cat("\n") print(results) } diff --git a/.ci/test.sh b/.ci/test.sh index 81e283338006..1b156eab7a71 100755 --- a/.ci/test.sh +++ b/.ci/test.sh @@ -50,11 +50,14 @@ if [[ $TRAVIS == "true" ]] && [[ $TASK == "check-docs" ]]; then exit 0 fi -if [[ $TASK == "lint" ]]; then +if [[ $TRAVIS == "true" ]] && [[ $TASK == "lint" ]]; then conda install -q -y -n $CONDA_ENV \ pycodestyle \ pydocstyle \ - r-lintr + r-stringi # stringi needs to be installed separate from r-lintr to avoid issues like 'unable to load shared object stringi.so' + conda install -q -y -n $CONDA_ENV \ + -c conda-forge \ + r-lintr>=2.0 pip install --user cpplint echo "Linting Python code" pycodestyle --ignore=E501,W503 --exclude=./compute,./.nuget . || exit -1 diff --git a/R-package/R/aliases.R b/R-package/R/aliases.R index 76d00aff1919..a4c831bdb7e8 100644 --- a/R-package/R/aliases.R +++ b/R-package/R/aliases.R @@ -5,7 +5,7 @@ # lazy evaluation (so it doesn't matter what order R sources files during installation). # [return] A named list, where each key is a main LightGBM parameter and each value is a character # vector of corresponding aliases. -.PARAMETER_ALIASES <- function(){ +.PARAMETER_ALIASES <- function() { return(list( "boosting" = c( "boosting" diff --git a/R-package/R/callback.R b/R-package/R/callback.R index 1b5f4f4562a8..a5b2e765551b 100644 --- a/R-package/R/callback.R +++ b/R-package/R/callback.R @@ -9,7 +9,7 @@ CB_ENV <- R6::R6Class( end_iteration = NULL, eval_list = list(), eval_err_list = list(), - best_iter = -1, + best_iter = -1L, best_score = NA, met_early_stop = FALSE ) @@ -30,7 +30,7 @@ cb.reset.parameters <- function(new_params) { init <- function(env) { # Store boosting rounds - nrounds <<- env$end_iteration - env$begin_iteration + 1 + nrounds <<- env$end_iteration - env$begin_iteration + 1L # Check for model environment if (is.null(env$model)) { stop("Env should have a ", sQuote("model")) } @@ -60,7 +60,7 @@ cb.reset.parameters <- function(new_params) { if (is.function(p)) { # Check if requires at least two arguments - if (length(formals(p)) != 2) { + if (length(formals(p)) != 2L) { stop("Parameter ", sQuote(n), " is a function but not of two arguments") } @@ -117,7 +117,7 @@ cb.reset.parameters <- function(new_params) { format.eval.string <- function(eval_res, eval_err = NULL) { # Check for empty evaluation string - if (is.null(eval_res) || length(eval_res) == 0) { + if (is.null(eval_res) || length(eval_res) == 0L) { stop("no evaluation results") } @@ -133,7 +133,7 @@ format.eval.string <- function(eval_res, eval_err = NULL) { merge.eval.string <- function(env) { # Check length of evaluation list - if (length(env$eval_list) <= 0) { + if (length(env$eval_list) <= 0L) { return("") } @@ -141,7 +141,7 @@ merge.eval.string <- function(env) { msg <- list(sprintf("[%d]:", env$iteration)) # Set if evaluation error - is_eval_err <- length(env$eval_err_list) > 0 + is_eval_err <- length(env$eval_err_list) > 0L # Loop through evaluation list for (j in seq_along(env$eval_list)) { @@ -162,25 +162,25 @@ merge.eval.string <- function(env) { } -cb.print.evaluation <- function(period = 1) { +cb.print.evaluation <- function(period = 1L) { # Create callback callback <- function(env) { # Check if period is at least 1 or more - if (period > 0) { + if (period > 0L) { # Store iteration i <- env$iteration # Check if iteration matches moduo - if ( (i - 1) %% period == 0 || is.element(i, c(env$begin_iteration, env$end_iteration))) { + if ((i - 1L) %% period == 0L || is.element(i, c(env$begin_iteration, env$end_iteration))) { # Merge evaluation string msg <- merge.eval.string(env) # Check if message is existing - if (nchar(msg) > 0) { + if (nchar(msg) > 0L) { cat(merge.eval.string(env), "\n") } @@ -205,15 +205,15 @@ cb.record.evaluation <- function() { callback <- function(env) { # Return empty if empty evaluation list - if (length(env$eval_list) <= 0) { + if (length(env$eval_list) <= 0L) { return() } # Set if evaluation error - is_eval_err <- length(env$eval_err_list) > 0 + is_eval_err <- length(env$eval_err_list) > 0L # Check length of recorded evaluation - if (length(env$model$record_evals) == 0) { + if (length(env$model$record_evals) == 0L) { # Loop through each evaluation list element for (j in seq_along(env$eval_list)) { @@ -290,7 +290,7 @@ cb.early.stop <- function(stopping_rounds, verbose = TRUE) { eval_len <<- length(env$eval_list) # Early stopping cannot work without metrics - if (eval_len == 0) { + if (eval_len == 0L) { stop("For early stopping, valids must have at least one element") } @@ -301,7 +301,7 @@ cb.early.stop <- function(stopping_rounds, verbose = TRUE) { # Maximization or minimization task factor_to_bigger_better <<- rep.int(1.0, eval_len) - best_iter <<- rep.int(-1, eval_len) + best_iter <<- rep.int(-1L, eval_len) best_score <<- rep.int(-Inf, eval_len) best_msg <<- list() diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index a23245533b06..fe73bfbbad6a 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -4,7 +4,7 @@ Booster <- R6::R6Class( cloneable = FALSE, public = list( - best_iter = -1, + best_iter = -1L, best_score = NA, record_evals = list(), @@ -55,7 +55,7 @@ Booster <- R6::R6Class( # Create private booster information private$train_set <- train_set - private$num_dataset <- 1 + private$num_dataset <- 1L private$init_predictor <- train_set$.__enclos_env__$private$predictor # Check if predictor is existing @@ -176,7 +176,7 @@ Booster <- R6::R6Class( # Store private information private$valid_sets <- c(private$valid_sets, data) private$name_valid_sets <- c(private$name_valid_sets, name) - private$num_dataset <- private$num_dataset + 1 + private$num_dataset <- private$num_dataset + 1L private$is_predicted_cur_iter <- c(private$is_predicted_cur_iter, FALSE) # Return self @@ -229,7 +229,7 @@ Booster <- R6::R6Class( ) # Store private train set - private$train_set = train_set + private$train_set <- train_set } @@ -249,13 +249,13 @@ Booster <- R6::R6Class( } if (!private$set_objective_to_none) { self$reset_parameter(params = list(objective = "none")) - private$set_objective_to_none = TRUE + private$set_objective_to_none <- TRUE } # Perform objective calculation - gpair <- fobj(private$inner_predict(1), private$train_set) + gpair <- fobj(private$inner_predict(1L), private$train_set) # Check for gradient and hessian as list - if (is.null(gpair$grad) || is.null(gpair$hess)){ + if (is.null(gpair$grad) || is.null(gpair$hess)) { stop("lgb.Booster.update: custom objective should return a list with attributes (hess, grad)") } @@ -322,13 +322,13 @@ Booster <- R6::R6Class( } # Check for identical data - data_idx <- 0 + data_idx <- 0L if (identical(data, private$train_set)) { - data_idx <- 1 + data_idx <- 1L } else { # Check for validation data - if (length(private$valid_sets) > 0) { + if (length(private$valid_sets) > 0L) { # Loop through each validation set for (i in seq_along(private$valid_sets)) { @@ -337,7 +337,7 @@ Booster <- R6::R6Class( if (identical(data, private$valid_sets[[i]])) { # Found identical data, skip - data_idx <- i + 1 + data_idx <- i + 1L break } @@ -349,7 +349,7 @@ Booster <- R6::R6Class( } # Check if evaluation was not done - if (data_idx == 0) { + if (data_idx == 0L) { # Add validation data by name self$add_valid(data, name) @@ -364,17 +364,17 @@ Booster <- R6::R6Class( # Evaluation training data eval_train = function(feval = NULL) { - private$inner_eval(private$name_train_set, 1, feval) + private$inner_eval(private$name_train_set, 1L, feval) }, # Evaluation validation data eval_valid = function(feval = NULL) { # Create ret list - ret = list() + ret <- list() # Check if validation is empty - if (length(private$valid_sets) <= 0) { + if (length(private$valid_sets) <= 0L) { return(ret) } @@ -382,7 +382,7 @@ Booster <- R6::R6Class( for (i in seq_along(private$valid_sets)) { ret <- append( x = ret - , values = private$inner_eval(private$name_valid_sets[[i]], i + 1, feval) + , values = private$inner_eval(private$name_valid_sets[[i]], i + 1L, feval) ) } @@ -491,8 +491,8 @@ Booster <- R6::R6Class( name_valid_sets = list(), predict_buffer = list(), is_predicted_cur_iter = list(), - num_class = 1, - num_dataset = 0, + num_class = 1L, + num_dataset = 0L, init_predictor = NULL, eval_names = NULL, higher_better_inner_eval = NULL, @@ -504,8 +504,8 @@ Booster <- R6::R6Class( data_name <- private$name_train_set # Check for id bigger than 1 - if (idx > 1) { - data_name <- private$name_valid_sets[[idx - 1]] + if (idx > 1L) { + data_name <- private$name_valid_sets[[idx - 1L]] } # Check for unknown dataset (over the maximum provided range) @@ -522,7 +522,7 @@ Booster <- R6::R6Class( "LGBM_BoosterGetNumPredict_R" , ret = npred , private$handle - , as.integer(idx - 1) + , as.integer(idx - 1L) ) private$predict_buffer[[data_name]] <- numeric(npred) @@ -536,7 +536,7 @@ Booster <- R6::R6Class( "LGBM_BoosterGetPredict_R" , ret = private$predict_buffer[[data_name]] , private$handle - , as.integer(idx - 1) + , as.integer(idx - 1L) ) private$is_predicted_cur_iter[[idx]] <- TRUE } @@ -558,10 +558,10 @@ Booster <- R6::R6Class( ) # Check names' length - if (nchar(names) > 0) { + if (nchar(names) > 0L) { # Parse and store privately names - names <- strsplit(names, "\t")[[1]] + names <- strsplit(names, "\t")[[1L]] private$eval_names <- names private$higher_better_inner_eval <- grepl("^ndcg|^map|^auc$", names) @@ -589,7 +589,7 @@ Booster <- R6::R6Class( ret <- list() # Check evaluation names existence - if (length(private$eval_names) > 0) { + if (length(private$eval_names) > 0L) { # Create evaluation values tmp_vals <- numeric(length(private$eval_names)) @@ -597,7 +597,7 @@ Booster <- R6::R6Class( "LGBM_BoosterGetEval_R" , ret = tmp_vals , private$handle - , as.integer(data_idx - 1) + , as.integer(data_idx - 1L) ) # Loop through all evaluation names @@ -627,8 +627,8 @@ Booster <- R6::R6Class( data <- private$train_set # Check if data to assess is existing differently - if (data_idx > 1) { - data <- private$valid_sets[[data_idx - 1]] + if (data_idx > 1L) { + data <- private$valid_sets[[data_idx - 1L]] } # Perform function evaluation @@ -671,14 +671,13 @@ Booster <- R6::R6Class( #' prediction outputs per case. #' @param ... Additional named arguments passed to the \code{predict()} method of #' the \code{lgb.Booster} object passed to \code{object}. -#' @return -#' For regression or binary classification, it returns a vector of length \code{nrows(data)}. -#' For multiclass classification, either a \code{num_class * nrows(data)} vector or -#' a \code{(nrows(data), num_class)} dimension matrix is returned, depending on -#' the \code{reshape} value. +#' @return For regression or binary classification, it returns a vector of length \code{nrows(data)}. +#' For multiclass classification, either a \code{num_class * nrows(data)} vector or +#' a \code{(nrows(data), num_class)} dimension matrix is returned, depending on +#' the \code{reshape} value. #' -#' When \code{predleaf = TRUE}, the output is a matrix object with the -#' number of columns corresponding to the number of trees. +#' When \code{predleaf = TRUE}, the output is a matrix object with the +#' number of columns corresponding to the number of trees. #' #' @examples #' library(lightgbm) @@ -693,11 +692,11 @@ Booster <- R6::R6Class( #' model <- lgb.train( #' params = params #' , data = dtrain -#' , nrounds = 10 +#' , nrounds = 10L #' , valids = valids -#' , min_data = 1 -#' , learning_rate = 1 -#' , early_stopping_rounds = 5 +#' , min_data = 1L +#' , learning_rate = 1.0 +#' , early_stopping_rounds = 5L #' ) #' preds <- predict(model, test$data) #' @@ -755,11 +754,11 @@ predict.lgb.Booster <- function(object, #' model <- lgb.train( #' params = params #' , data = dtrain -#' , nrounds = 10 +#' , nrounds = 10L #' , valids = valids -#' , min_data = 1 -#' , learning_rate = 1 -#' , early_stopping_rounds = 5 +#' , min_data = 1L +#' , learning_rate = 1.0 +#' , early_stopping_rounds = 5L #' ) #' lgb.save(model, "model.txt") #' load_booster <- lgb.load(filename = "model.txt") @@ -768,7 +767,7 @@ predict.lgb.Booster <- function(object, #' #' @rdname lgb.load #' @export -lgb.load <- function(filename = NULL, model_str = NULL){ +lgb.load <- function(filename = NULL, model_str = NULL) { if (is.null(filename) && is.null(model_str)) { stop("lgb.load: either filename or model_str must be given") @@ -815,17 +814,17 @@ lgb.load <- function(filename = NULL, model_str = NULL){ #' model <- lgb.train( #' params = params #' , data = dtrain -#' , nrounds = 10 +#' , nrounds = 10L #' , valids = valids -#' , min_data = 1 -#' , learning_rate = 1 -#' , early_stopping_rounds = 5 +#' , min_data = 1L +#' , learning_rate = 1.0 +#' , early_stopping_rounds = 5L #' ) #' lgb.save(model, "model.txt") #' #' @rdname lgb.save #' @export -lgb.save <- function(booster, filename, num_iteration = NULL){ +lgb.save <- function(booster, filename, num_iteration = NULL) { # Check if booster is booster if (!lgb.is.Booster(booster)) { @@ -864,17 +863,17 @@ lgb.save <- function(booster, filename, num_iteration = NULL){ #' model <- lgb.train( #' params = params #' , data = dtrain -#' , nrounds = 10 +#' , nrounds = 10L #' , valids = valids -#' , min_data = 1 -#' , learning_rate = 1 -#' , early_stopping_rounds = 5 +#' , min_data = 1L +#' , learning_rate = 1.0 +#' , early_stopping_rounds = 5L #' ) #' json_model <- lgb.dump(model) #' #' @rdname lgb.dump #' @export -lgb.dump <- function(booster, num_iteration = NULL){ +lgb.dump <- function(booster, num_iteration = NULL) { # Check if booster is booster if (!lgb.is.Booster(booster)) { @@ -910,11 +909,11 @@ lgb.dump <- function(booster, num_iteration = NULL){ #' model <- lgb.train( #' params = params #' , data = dtrain -#' , nrounds = 10 +#' , nrounds = 10L #' , valids = valids -#' , min_data = 1 -#' , learning_rate = 1 -#' , early_stopping_rounds = 5 +#' , min_data = 1L +#' , learning_rate = 1.0 +#' , early_stopping_rounds = 5L #' ) #' lgb.get.eval.result(model, "test", "l2") #' @rdname lgb.get.eval.result @@ -956,7 +955,7 @@ lgb.get.eval.result <- function(booster, data_name, eval_name, iters = NULL, is_ # Parse iteration and booster delta iters <- as.integer(iters) - delta <- booster$record_evals$start_iter - 1 + delta <- booster$record_evals$start_iter - 1.0 iters <- iters - delta # Return requested result diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R index e898b7985d97..e7ac44bdba18 100644 --- a/R-package/R/lgb.Dataset.R +++ b/R-package/R/lgb.Dataset.R @@ -140,10 +140,10 @@ Dataset <- R6::R6Class( # Check for character name if (is.character(private$categorical_feature)) { - cate_indices <- as.list(match(private$categorical_feature, private$colnames) - 1) + cate_indices <- as.list(match(private$categorical_feature, private$colnames) - 1L) # Provided indices, but some indices are not existing? - if (sum(is.na(cate_indices)) > 0) { + if (sum(is.na(cate_indices)) > 0L) { stop( "lgb.self.get.handle: supplied an unknown feature in categorical_feature: " , sQuote(private$categorical_feature[is.na(cate_indices)]) @@ -164,7 +164,7 @@ Dataset <- R6::R6Class( } # Store indices as [0, n-1] indexed instead of [1, n] indexed - cate_indices <- as.list(private$categorical_feature - 1) + cate_indices <- as.list(private$categorical_feature - 1L) } @@ -221,7 +221,7 @@ Dataset <- R6::R6Class( ) } else if (methods::is(private$raw_data, "dgCMatrix")) { - if (length(private$raw_data@p) > 2147483647) { + if (length(private$raw_data@p) > 2147483647L) { stop("Cannot support large CSC matrix") } # Are we using a dgCMatrix (sparsed matrix column compressed) @@ -300,13 +300,13 @@ Dataset <- R6::R6Class( } # Get private information - if (length(private$info) > 0) { + if (length(private$info) > 0L) { # Set infos for (i in seq_along(private$info)) { p <- private$info[i] - self$setinfo(names(p), p[[1]]) + self$setinfo(names(p), p[[1L]]) } @@ -361,7 +361,7 @@ Dataset <- R6::R6Class( # Get feature names and write them cnames <- lgb.call.return.str("LGBM_DatasetGetFeatureNames_R", private$handle) - private$colnames <- as.character(base::strsplit(cnames, "\t")[[1]]) + private$colnames <- as.character(base::strsplit(cnames, "\t")[[1L]]) private$colnames } else if (is.matrix(private$raw_data) || methods::is(private$raw_data, "dgCMatrix")) { @@ -391,7 +391,7 @@ Dataset <- R6::R6Class( # Check empty column names colnames <- as.character(colnames) - if (length(colnames) == 0) { + if (length(colnames) == 0L) { return(invisible(self)) } @@ -422,14 +422,14 @@ Dataset <- R6::R6Class( INFONAMES <- c("label", "weight", "init_score", "group") # Check if attribute key is in the known attribute list - if (!is.character(name) || length(name) != 1 || !name %in% INFONAMES) { + if (!is.character(name) || length(name) != 1L || !name %in% INFONAMES) { stop("getinfo: name must one of the following: ", paste0(sQuote(INFONAMES), collapse = ", ")) } # Check for info name and handle if (is.null(private$info[[name]])) { - if (lgb.is.null.handle(private$handle)){ + if (lgb.is.null.handle(private$handle)) { stop("Cannot perform getinfo before constructing Dataset.") } @@ -443,7 +443,7 @@ Dataset <- R6::R6Class( ) # Check if info is not empty - if (info_len > 0) { + if (info_len > 0L) { # Get back fields ret <- NULL @@ -476,7 +476,7 @@ Dataset <- R6::R6Class( INFONAMES <- c("label", "weight", "init_score", "group") # Check if attribute key is in the known attribute list - if (!is.character(name) || length(name) != 1 || !name %in% INFONAMES) { + if (!is.character(name) || length(name) != 1L || !name %in% INFONAMES) { stop("setinfo: name must one of the following: ", paste0(sQuote(INFONAMES), collapse = ", ")) } @@ -492,7 +492,7 @@ Dataset <- R6::R6Class( if (!lgb.is.null.handle(private$handle) && !is.null(info)) { - if (length(info) > 0) { + if (length(info) > 0L) { lgb.call( "LGBM_DatasetSetField_R" @@ -851,7 +851,7 @@ dim.lgb.Dataset <- function(x, ...) { #' lgb.Dataset.construct(dtrain) #' dimnames(dtrain) #' colnames(dtrain) -#' colnames(dtrain) <- make.names(1:ncol(train$data)) +#' colnames(dtrain) <- make.names(seq_len(ncol(train$data))) #' print(dtrain, verbose = TRUE) #' #' @rdname dimnames.lgb.Dataset @@ -883,7 +883,7 @@ dimnames.lgb.Dataset <- function(x) { } # Check for second value missing - if (is.null(value[[2]])) { + if (is.null(value[[2L]])) { # No column names x$set_colnames(NULL) @@ -892,10 +892,10 @@ dimnames.lgb.Dataset <- function(x) { } # Check for unmatching column size - if (ncol(x) != length(value[[2]])) { + if (ncol(x) != length(value[[2L]])) { stop( "can't assign " - , sQuote(length(value[[2]])) + , sQuote(length(value[[2L]])) , " colnames to an lgb.Dataset with " , sQuote(ncol(x)) , " columns" @@ -903,7 +903,7 @@ dimnames.lgb.Dataset <- function(x) { } # Set column names properly, and return - x$set_colnames(value[[2]]) + x$set_colnames(value[[2L]]) x } @@ -924,7 +924,7 @@ dimnames.lgb.Dataset <- function(x) { #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) #' -#' dsub <- lightgbm::slice(dtrain, 1:42) +#' dsub <- lightgbm::slice(dtrain, seq_len(42L)) #' lgb.Dataset.construct(dsub) #' labels <- lightgbm::getinfo(dsub, "label") #' @@ -1059,7 +1059,7 @@ setinfo.lgb.Dataset <- function(dataset, name, info, ...) { #' dtrain <- lgb.Dataset(train$data, label = train$label) #' lgb.Dataset.save(dtrain, "lgb.Dataset.data") #' dtrain <- lgb.Dataset("lgb.Dataset.data") -#' lgb.Dataset.set.categorical(dtrain, 1:2) +#' lgb.Dataset.set.categorical(dtrain, 1L:2L) #' #' @rdname lgb.Dataset.set.categorical #' @export diff --git a/R-package/R/lgb.Predictor.R b/R-package/R/lgb.Predictor.R index 333dd5057a16..a73c683d4c97 100644 --- a/R-package/R/lgb.Predictor.R +++ b/R-package/R/lgb.Predictor.R @@ -84,14 +84,14 @@ Predictor <- R6::R6Class( # Check if number of iterations is existing - if not, then set it to -1 (use all) if (is.null(num_iteration)) { - num_iteration <- -1 + num_iteration <- -1L } # Set temporary variable num_row <- 0L # Check if data is a file name and not a matrix - if (identical(class(data), "character") && length(data) == 1) { + if (identical(class(data), "character") && length(data) == 1L) { # Data is a filename, create a temporary file with a "lightgbm_" pattern in it tmp_filename <- tempfile(pattern = "lightgbm_") @@ -156,7 +156,7 @@ Predictor <- R6::R6Class( ) } else if (methods::is(data, "dgCMatrix")) { - if (length(data@p) > 2147483647) { + if (length(data@p) > 2147483647L) { stop("Cannot support large CSC matrix") } # Check if data is a dgCMatrix (sparse matrix, column compressed format) @@ -187,11 +187,11 @@ Predictor <- R6::R6Class( } # Check if number of rows is strange (not a multiple of the dataset rows) - if (length(preds) %% num_row != 0) { + if (length(preds) %% num_row != 0L) { stop( "predict: prediction length " , sQuote(length(preds)) - ," is not a multiple of nrows(data): " + , " is not a multiple of nrows(data): " , sQuote(num_row) ) } @@ -207,7 +207,7 @@ Predictor <- R6::R6Class( # Predict leaves only, reshaping is mandatory preds <- matrix(preds, ncol = npred_per_case, byrow = TRUE) - } else if (reshape && npred_per_case > 1) { + } else if (reshape && npred_per_case > 1L) { # Predict with data reshaping preds <- matrix(preds, ncol = npred_per_case, byrow = TRUE) diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index c810432b01ba..3315c96edc7a 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -3,7 +3,7 @@ CVBooster <- R6::R6Class( classname = "lgb.CVBooster", cloneable = FALSE, public = list( - best_iter = -1, + best_iter = -1L, best_score = NA, record_evals = list(), boosters = list(), @@ -64,34 +64,35 @@ CVBooster <- R6::R6Class( #' model <- lgb.cv( #' params = params #' , data = dtrain -#' , nrounds = 10 -#' , nfold = 3 -#' , min_data = 1 -#' , learning_rate = 1 -#' , early_stopping_rounds = 5 +#' , nrounds = 10L +#' , nfold = 3L +#' , min_data = 1L +#' , learning_rate = 1.0 +#' , early_stopping_rounds = 5L #' ) #' @export -lgb.cv <- function(params = list(), - data, - nrounds = 10, - nfold = 3, - label = NULL, - weight = NULL, - obj = NULL, - eval = NULL, - verbose = 1, - record = TRUE, - eval_freq = 1L, - showsd = TRUE, - stratified = TRUE, - folds = NULL, - init_model = NULL, - colnames = NULL, - categorical_feature = NULL, - early_stopping_rounds = NULL, - callbacks = list(), - reset_data = FALSE, - ...) { +lgb.cv <- function(params = list() + , data + , nrounds = 10L + , nfold = 3L + , label = NULL + , weight = NULL + , obj = NULL + , eval = NULL + , verbose = 1L + , record = TRUE + , eval_freq = 1L + , shows = TRUE + , stratified = TRUE + , folds = NULL + , init_model = NULL + , colnames = NULL + , categorical_feature = NULL + , early_stopping_rounds = NULL + , callbacks = list() + , reset_data = FALSE + , ... + ) { # Setup temporary variables addiction_params <- list(...) @@ -102,7 +103,7 @@ lgb.cv <- function(params = list(), fobj <- NULL feval <- NULL - if (nrounds <= 0) { + if (nrounds <= 0L) { stop("nrounds should be greater than zero") } @@ -131,16 +132,16 @@ lgb.cv <- function(params = list(), } # Set the iteration to start from / end to (and check for boosting from a trained model, again) - begin_iteration <- 1 + begin_iteration <- 1L if (!is.null(predictor)) { - begin_iteration <- predictor$current_iter() + 1 + begin_iteration <- predictor$current_iter() + 1L } # Check for number of rounds passed as parameter - in case there are multiple ones, take only the first one n_trees <- .PARAMETER_ALIASES()[["num_iterations"]] if (any(names(params) %in% n_trees)) { - end_iteration <- begin_iteration + params[[which(names(params) %in% n_trees)[1]]] - 1 + end_iteration <- begin_iteration + params[[which(names(params) %in% n_trees)[1L]]] - 1L } else { - end_iteration <- begin_iteration + nrounds - 1 + end_iteration <- begin_iteration + nrounds - 1L } # Check for training dataset type correctness @@ -179,7 +180,7 @@ lgb.cv <- function(params = list(), if (!is.null(folds)) { # Check for list of folds or for single value - if (!is.list(folds) || length(folds) < 2) { + if (!is.list(folds) || length(folds) < 2L) { stop(sQuote("folds"), " must be a list with 2 or more elements that are vectors of indices for each CV-fold") } @@ -189,7 +190,7 @@ lgb.cv <- function(params = list(), } else { # Check fold value - if (nfold <= 1) { + if (nfold <= 1L) { stop(sQuote("nfold"), " must be > 1") } @@ -206,7 +207,7 @@ lgb.cv <- function(params = list(), } # Add printing log callback - if (verbose > 0 && eval_freq > 0) { + if (verbose > 0L && eval_freq > 0L) { callbacks <- add.cb(callbacks, cb.print.evaluation(eval_freq)) } @@ -220,7 +221,7 @@ lgb.cv <- function(params = list(), early_stop <- .PARAMETER_ALIASES()[["early_stopping_round"]] early_stop_param_indx <- names(params) %in% early_stop if (any(early_stop_param_indx)) { - first_early_stop_param <- which(early_stop_param_indx)[[1]] + first_early_stop_param <- which(early_stop_param_indx)[[1L]] first_early_stop_param_name <- names(params)[[first_early_stop_param]] early_stopping_rounds <- params[[first_early_stop_param_name]] } @@ -232,20 +233,20 @@ lgb.cv <- function(params = list(), using_dart <- any( sapply( X = boosting_param_names - , FUN = function(param){ - identical(params[[param]], 'dart') + , FUN = function(param) { + identical(params[[param]], "dart") } ) ) # Cannot use early stopping with 'dart' boosting - if (using_dart){ + if (using_dart) { warning("Early stopping is not available in 'dart' mode.") using_early_stopping_via_args <- FALSE # Remove the cb.early.stop() function if it was passed in to callbacks callbacks <- Filter( - f = function(cb_func){ + f = function(cb_func) { !identical(attr(cb_func, "name"), "cb.early.stop") } , x = callbacks @@ -253,7 +254,7 @@ lgb.cv <- function(params = list(), } # If user supplied early_stopping_rounds, add the early stopping callback - if (using_early_stopping_via_args){ + if (using_early_stopping_via_args) { callbacks <- add.cb( callbacks , cb.early.stop( @@ -267,7 +268,7 @@ lgb.cv <- function(params = list(), cb <- categorize.callbacks(callbacks) # Construct booster using a list apply, check if requires group or not - if (!is.list(folds[[1]])) { + if (!is.list(folds[[1L]])) { bst_folds <- lapply(seq_along(folds), function(k) { dtest <- slice(data, folds[[k]]) dtrain <- slice(data, seq_len(nrow(data))[-folds[[k]]]) @@ -345,12 +346,12 @@ lgb.cv <- function(params = list(), } if (record && is.na(env$best_score)) { - if (env$eval_list[[1]]$higher_better[1] == TRUE) { - cv_booster$best_iter <- unname(which.max(unlist(cv_booster$record_evals[[2]][[1]][[1]]))) - cv_booster$best_score <- cv_booster$record_evals[[2]][[1]][[1]][[cv_booster$best_iter]] + if (env$eval_list[[1L]]$higher_better[1L] == TRUE) { + cv_booster$best_iter <- unname(which.max(unlist(cv_booster$record_evals[[2L]][[1L]][[1L]]))) + cv_booster$best_score <- cv_booster$record_evals[[2L]][[1L]][[1L]][[cv_booster$best_iter]] } else { - cv_booster$best_iter <- unname(which.min(unlist(cv_booster$record_evals[[2]][[1]][[1]]))) - cv_booster$best_score <- cv_booster$record_evals[[2]][[1]][[1]][[cv_booster$best_iter]] + cv_booster$best_iter <- unname(which.min(unlist(cv_booster$record_evals[[2L]][[1L]][[1L]]))) + cv_booster$best_score <- cv_booster$record_evals[[2L]][[1L]][[1L]][[cv_booster$best_iter]] } } @@ -398,7 +399,7 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, group, params) { # Loop through each fold for (i in seq_len(nfold)) { - kstep <- length(rnd_idx) %/% (nfold - i + 1) + kstep <- length(rnd_idx) %/% (nfold - i + 1L) folds[[i]] <- rnd_idx[seq_len(kstep)] rnd_idx <- rnd_idx[-seq_len(kstep)] } @@ -423,7 +424,7 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, group, params) { # Loop through each fold for (i in seq_len(nfold)) { - kstep <- length(rnd_idx) %/% (nfold - i + 1) + kstep <- length(rnd_idx) %/% (nfold - i + 1L) folds[[i]] <- list( fold = which(ungrouped %in% rnd_idx[seq_len(kstep)]) , group = rnd_idx[seq_len(kstep)] @@ -442,7 +443,7 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, group, params) { # It was borrowed from caret::lgb.stratified.folds and simplified # by always returning an unnamed list of fold indices. #' @importFrom stats quantile -lgb.stratified.folds <- function(y, k = 10) { +lgb.stratified.folds <- function(y, k = 10L) { ## Group the numeric data based on their magnitudes ## and sample within those groups. @@ -455,15 +456,15 @@ lgb.stratified.folds <- function(y, k = 10) { if (is.numeric(y)) { cuts <- length(y) %/% k - if (cuts < 2) { - cuts <- 2 + if (cuts < 2L) { + cuts <- 2L } - if (cuts > 5) { - cuts <- 5 + if (cuts > 5L) { + cuts <- 5L } y <- cut( y - , unique(stats::quantile(y, probs = seq.int(0, 1, length.out = cuts))) + , unique(stats::quantile(y, probs = seq.int(0.0, 1.0, length.out = cuts))) , include.lowest = TRUE ) @@ -489,7 +490,7 @@ lgb.stratified.folds <- function(y, k = 10) { seqVector <- rep(seq_len(k), numInClass[i] %/% k) ## Add enough random integers to get length(seqVector) == numInClass[i] - if (numInClass[i] %% k > 0) { + if (numInClass[i] %% k > 0L) { seqVector <- c(seqVector, sample.int(k, numInClass[i] %% k)) } @@ -513,15 +514,15 @@ lgb.stratified.folds <- function(y, k = 10) { lgb.merge.cv.result <- function(msg, showsd = TRUE) { # Get CV message length - if (length(msg) == 0) { + if (length(msg) == 0L) { stop("lgb.cv: size of cv result error") } # Get evaluation message length - eval_len <- length(msg[[1]]) + eval_len <- length(msg[[1L]]) # Is evaluation message empty? - if (eval_len == 0) { + if (eval_len == 0L) { stop("lgb.cv: should provide at least one metric for CV") } @@ -532,7 +533,7 @@ lgb.merge.cv.result <- function(msg, showsd = TRUE) { }) # Get evaluation - ret_eval <- msg[[1]] + ret_eval <- msg[[1L]] # Go through evaluation length items for (j in seq_len(eval_len)) { @@ -549,7 +550,7 @@ lgb.merge.cv.result <- function(msg, showsd = TRUE) { for (j in seq_len(eval_len)) { ret_eval_err <- c( ret_eval_err - , sqrt(mean(eval_result[[j]] ^ 2) - mean(eval_result[[j]]) ^ 2) + , sqrt(mean(eval_result[[j]] ^ 2L) - mean(eval_result[[j]]) ^ 2L) ) } diff --git a/R-package/R/lgb.importance.R b/R-package/R/lgb.importance.R index ba9d02476166..80d3892d3016 100644 --- a/R-package/R/lgb.importance.R +++ b/R-package/R/lgb.importance.R @@ -24,12 +24,12 @@ #' params <- list( #' objective = "binary" #' , learning_rate = 0.01 -#' , num_leaves = 63 -#' , max_depth = -1 -#' , min_data_in_leaf = 1 -#' , min_sum_hessian_in_leaf = 1 +#' , num_leaves = 63L +#' , max_depth = -1L +#' , min_data_in_leaf = 1L +#' , min_sum_hessian_in_leaf = 1.0 #' ) -#' model <- lgb.train(params, dtrain, 10) +#' model <- lgb.train(params, dtrain, 10L) #' #' tree_imp1 <- lgb.importance(model, percentage = TRUE) #' tree_imp2 <- lgb.importance(model, percentage = FALSE) @@ -62,8 +62,8 @@ lgb.importance <- function(model, percentage = TRUE) { # Sort features by Gain data.table::setorderv( x = tree_imp_dt - , cols = c("Gain") - , order = -1 + , cols = "Gain" + , order = -1L ) # Check if relative values are requested diff --git a/R-package/R/lgb.interprete.R b/R-package/R/lgb.interprete.R index bc147f5adaf0..1a4c40bd0a31 100644 --- a/R-package/R/lgb.interprete.R +++ b/R-package/R/lgb.interprete.R @@ -7,18 +7,18 @@ #' @param idxset an integer vector of indices of rows needed. #' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration. #' -#' @return -#' -#' For regression, binary classification and lambdarank model, a \code{list} of \code{data.table} with the following columns: -#' \itemize{ -#' \item \code{Feature} Feature names in the model. -#' \item \code{Contribution} The total contribution of this feature's splits. -#' } -#' For multiclass classification, a \code{list} of \code{data.table} with the Feature column and Contribution columns to each class. +#' @return For regression, binary classification and lambdarank model, a \code{list} of \code{data.table} +#' with the following columns: +#' \itemize{ +#' \item \code{Feature} Feature names in the model. +#' \item \code{Contribution} The total contribution of this feature's splits. +#' } +#' For multiclass classification, a \code{list} of \code{data.table} with the Feature column and +#' Contribution columns to each class. #' #' @examples -#' Sigmoid <- function(x) 1 / (1 + exp(-x)) -#' Logit <- function(x) log(x / (1 - x)) +#' Sigmoid <- function(x) 1.0 / (1.0 + exp(-x)) +#' Logit <- function(x) log(x / (1.0 - x)) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -29,14 +29,14 @@ #' params <- list( #' objective = "binary" #' , learning_rate = 0.01 -#' , num_leaves = 63 -#' , max_depth = -1 -#' , min_data_in_leaf = 1 -#' , min_sum_hessian_in_leaf = 1 +#' , num_leaves = 63L +#' , max_depth = -1L +#' , min_data_in_leaf = 1L +#' , min_sum_hessian_in_leaf = 1.0 #' ) -#' model <- lgb.train(params, dtrain, 10) +#' model <- lgb.train(params, dtrain, 10L) #' -#' tree_interpretation <- lgb.interprete(model, test$data, 1:5) +#' tree_interpretation <- lgb.interprete(model, test$data, 1L:5L) #' #' @importFrom data.table as.data.table #' @export @@ -71,8 +71,8 @@ lgb.interprete <- function(model, # Get list of trees tree_index_mat_list <- lapply( X = leaf_index_mat_list - , FUN = function(x){ - matrix(seq_len(length(x)) - 1, ncol = num_class, byrow = TRUE) + , FUN = function(x) { + matrix(seq_len(length(x)) - 1L, ncol = num_class, byrow = TRUE) } ) @@ -106,8 +106,8 @@ single.tree.interprete <- function(tree_dt, node_dt <- single_tree_dt[!is.na(split_index), .(split_index, split_feature, node_parent, internal_value)] # Prepare sequences - feature_seq <- character(0) - value_seq <- numeric(0) + feature_seq <- character(0L) + value_seq <- numeric(0L) # Get to root from leaf leaf_to_root <- function(parent_id, current_value) { @@ -185,15 +185,15 @@ single.row.interprete <- function(tree_dt, num_class, tree_index_mat, leaf_index next_interp_dt <- multiple.tree.interprete( tree_dt = tree_dt - , tree_index = tree_index_mat[,i] - , leaf_index = leaf_index_mat[,i] + , tree_index = tree_index_mat[, i] + , leaf_index = leaf_index_mat[, i] ) - if (num_class > 1){ + if (num_class > 1L) { data.table::setnames( next_interp_dt , old = "Contribution" - , new = paste("Class", i - 1) + , new = paste("Class", i - 1L) ) } @@ -202,29 +202,29 @@ single.row.interprete <- function(tree_dt, num_class, tree_index_mat, leaf_index } # Check for numbe rof classes larger than 1 - if (num_class == 1) { + if (num_class == 1L) { # First interpretation element - tree_interpretation_dt <- tree_interpretation[[1]] + tree_interpretation_dt <- tree_interpretation[[1L]] } else { # Full interpretation elements tree_interpretation_dt <- Reduce( - f = function(x, y){ + f = function(x, y) { merge(x, y, by = "Feature", all = TRUE) } , x = tree_interpretation ) # Loop throughout each tree - for (j in 2:ncol(tree_interpretation_dt)) { + for (j in 2L:ncol(tree_interpretation_dt)) { data.table::set( tree_interpretation_dt , i = which(is.na(tree_interpretation_dt[[j]])) , j = j - , value = 0 + , value = 0.0 ) } diff --git a/R-package/R/lgb.model.dt.tree.R b/R-package/R/lgb.model.dt.tree.R index ef493ce7a1f6..efd8d2dd854e 100644 --- a/R-package/R/lgb.model.dt.tree.R +++ b/R-package/R/lgb.model.dt.tree.R @@ -38,12 +38,12 @@ #' params <- list( #' objective = "binary" #' , learning_rate = 0.01 -#' , num_leaves = 63 -#' , max_depth = -1 -#' , min_data_in_leaf = 1 -#' , min_sum_hessian_in_leaf = 1 +#' , num_leaves = 63L +#' , max_depth = -1L +#' , min_data_in_leaf = 1L +#' , min_sum_hessian_in_leaf = 1.0 #' ) -#' model <- lgb.train(params, dtrain, 10) +#' model <- lgb.train(params, dtrain, 10L) #' #' tree_dt <- lgb.model.dt.tree(model) #' @@ -74,7 +74,7 @@ lgb.model.dt.tree <- function(model, num_iteration = NULL) { # Since the index comes from C++ (which is 0-indexed), be sure # to add 1 (e.g. index 28 means the 29th feature in feature_names) - split_feature_indx <- tree_dt[, split_feature] + 1 + split_feature_indx <- tree_dt[, split_feature] + 1L # Get corresponding feature names. Positions in split_feature_indx # which are NA will result in an NA feature name @@ -97,21 +97,21 @@ single.tree.parse <- function(lgb_tree) { # Setup initial default data.table with default types env <- new.env(parent = emptyenv()) env$single_tree_dt <- data.table::data.table( - tree_index = integer(0) - , depth = integer(0) - , split_index = integer(0) - , split_feature = integer(0) - , node_parent = integer(0) - , leaf_index = integer(0) - , leaf_parent = integer(0) - , split_gain = numeric(0) - , threshold = numeric(0) - , decision_type = character(0) - , default_left = character(0) - , internal_value = integer(0) - , internal_count = integer(0) - , leaf_value = integer(0) - , leaf_count = integer(0) + tree_index = integer(0L) + , depth = integer(0L) + , split_index = integer(0L) + , split_feature = integer(0L) + , node_parent = integer(0L) + , leaf_index = integer(0L) + , leaf_parent = integer(0L) + , split_gain = numeric(0L) + , threshold = numeric(0L) + , decision_type = character(0L) + , default_left = character(0L) + , internal_value = integer(0L) + , internal_count = integer(0L) + , leaf_value = integer(0L) + , leaf_count = integer(0L) ) # start tree traversal pre_order_traversal(env, tree_node_leaf, current_depth, parent_index) diff --git a/R-package/R/lgb.plot.importance.R b/R-package/R/lgb.plot.importance.R index a101e439196a..664cc066abde 100644 --- a/R-package/R/lgb.plot.importance.R +++ b/R-package/R/lgb.plot.importance.R @@ -24,23 +24,24 @@ #' params <- list( #' objective = "binary" #' , learning_rate = 0.01 -#' , num_leaves = 63 -#' , max_depth = -1 -#' , min_data_in_leaf = 1 -#' , min_sum_hessian_in_leaf = 1 +#' , num_leaves = 63L +#' , max_depth = -1L +#' , min_data_in_leaf = 1L +#' , min_sum_hessian_in_leaf = 1.0 #' ) #' #' model <- lgb.train(params, dtrain, 10) #' #' tree_imp <- lgb.importance(model, percentage = TRUE) -#' lgb.plot.importance(tree_imp, top_n = 10, measure = "Gain") +#' lgb.plot.importance(tree_imp, top_n = 10L, measure = "Gain") #' @importFrom graphics barplot par #' @export lgb.plot.importance <- function(tree_imp, - top_n = 10, + top_n = 10L, measure = "Gain", - left_margin = 10, - cex = NULL) { + left_margin = 10L, + cex = NULL + ) { # Check for measurement (column names) correctness measure <- match.arg( @@ -53,11 +54,11 @@ lgb.plot.importance <- function(tree_imp, top_n <- min(top_n, nrow(tree_imp)) # Parse importance - tree_imp <- tree_imp[order(abs(get(measure)), decreasing = TRUE),][seq_len(top_n),] + tree_imp <- tree_imp[order(abs(get(measure)), decreasing = TRUE), ][seq_len(top_n), ] # Attempt to setup a correct cex if (is.null(cex)) { - cex <- 2.5 / log2(1 + top_n) + cex <- 2.5 / log2(1.0 + top_n) } # Refresh plot @@ -66,15 +67,15 @@ lgb.plot.importance <- function(tree_imp, graphics::par( mar = c( - op$mar[1] + op$mar[1L] , left_margin - , op$mar[3] - , op$mar[4] + , op$mar[3L] + , op$mar[4L] ) ) # Do plot - tree_imp[.N:1, + tree_imp[.N:1L, graphics::barplot( height = get(measure) , names.arg = Feature @@ -83,7 +84,7 @@ lgb.plot.importance <- function(tree_imp, , main = "Feature Importance" , xlab = measure , cex.names = cex - , las = 1 + , las = 1L )] # Return invisibly diff --git a/R-package/R/lgb.plot.interpretation.R b/R-package/R/lgb.plot.interpretation.R index 6309a5154cb2..e655ef03408b 100644 --- a/R-package/R/lgb.plot.interpretation.R +++ b/R-package/R/lgb.plot.interpretation.R @@ -17,8 +17,8 @@ #' #' @examples #' library(lightgbm) -#' Sigmoid <- function(x) {1 / (1 + exp(-x))} -#' Logit <- function(x) {log(x / (1 - x))} +#' Sigmoid <- function(x) {1.0 / (1.0 + exp(-x))} +#' Logit <- function(x) {log(x / (1.0 - x))} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -29,26 +29,26 @@ #' params <- list( #' objective = "binary" #' , learning_rate = 0.01 -#' , num_leaves = 63 -#' , max_depth = -1 -#' , min_data_in_leaf = 1 -#' , min_sum_hessian_in_leaf = 1 +#' , num_leaves = 63L +#' , max_depth = -1L +#' , min_data_in_leaf = 1L +#' , min_sum_hessian_in_leaf = 1.0 #' ) -#' model <- lgb.train(params, dtrain, 10) +#' model <- lgb.train(params, dtrain, 10L) #' -#' tree_interpretation <- lgb.interprete(model, test$data, 1:5) -#' lgb.plot.interpretation(tree_interpretation[[1]], top_n = 10) +#' tree_interpretation <- lgb.interprete(model, test$data, 1L:5L) +#' lgb.plot.interpretation(tree_interpretation[[1L]], top_n = 10L) #' @importFrom data.table setnames #' @importFrom graphics barplot par #' @export lgb.plot.interpretation <- function(tree_interpretation_dt, - top_n = 10, - cols = 1, - left_margin = 10, + top_n = 10L, + cols = 1L, + left_margin = 10L, cex = NULL) { # Get number of columns - num_class <- ncol(tree_interpretation_dt) - 1 + num_class <- ncol(tree_interpretation_dt) - 1L # Refresh plot op <- graphics::par(no.readonly = TRUE) @@ -57,7 +57,7 @@ lgb.plot.interpretation <- function(tree_interpretation_dt, # Do some magic plotting bottom_margin <- 3.0 top_margin <- 2.0 - right_margin <- op$mar[4] + right_margin <- op$mar[4L] graphics::par( mar = c( @@ -69,7 +69,7 @@ lgb.plot.interpretation <- function(tree_interpretation_dt, ) # Check for number of classes - if (num_class == 1) { + if (num_class == 1L) { # Only one class, plot straight away multiple.tree.plot.interpretation( @@ -95,7 +95,7 @@ lgb.plot.interpretation <- function(tree_interpretation_dt, for (i in seq_len(num_class)) { # Prepare interpretation, perform T, get the names, and plot straight away - plot_dt <- tree_interpretation_dt[, c(1, i + 1), with = FALSE] + plot_dt <- tree_interpretation_dt[, c(1L, i + 1L), with = FALSE] data.table::setnames( plot_dt , old = names(plot_dt) @@ -104,7 +104,7 @@ lgb.plot.interpretation <- function(tree_interpretation_dt, multiple.tree.plot.interpretation( plot_dt , top_n = top_n - , title = paste("Class", i - 1) + , title = paste("Class", i - 1L) , cex = cex ) @@ -119,24 +119,24 @@ multiple.tree.plot.interpretation <- function(tree_interpretation, cex) { # Parse tree - tree_interpretation <- tree_interpretation[order(abs(Contribution), decreasing = TRUE),][seq_len(min(top_n, .N)),] + tree_interpretation <- tree_interpretation[order(abs(Contribution), decreasing = TRUE), ][seq_len(min(top_n, .N)), ] # Attempt to setup a correct cex if (is.null(cex)) { - cex <- 2.5 / log2(1 + top_n) + cex <- 2.5 / log2(1.0 + top_n) } # Do plot - tree_interpretation[.N:1, + tree_interpretation[.N:1L, graphics::barplot( height = Contribution , names.arg = Feature , horiz = TRUE - , col = ifelse(Contribution > 0, "firebrick", "steelblue") + , col = ifelse(Contribution > 0L, "firebrick", "steelblue") , border = NA , main = title , cex.names = cex - , las = 1 + , las = 1L )] # Return invisibly diff --git a/R-package/R/lgb.prepare.R b/R-package/R/lgb.prepare.R index 432d484512df..4e8e238779ea 100644 --- a/R-package/R/lgb.prepare.R +++ b/R-package/R/lgb.prepare.R @@ -41,13 +41,13 @@ lgb.prepare <- function(data) { # Convert characters to factors only (we can change them to numeric after) is_char <- which(list_classes == "character") - if (length(is_char) > 0) { + if (length(is_char) > 0L) { data[, (is_char) := lapply(.SD, function(x) {as.numeric(as.factor(x))}), .SDcols = is_char] } # Convert factors to numeric (integer is more efficient actually) is_fact <- c(which(list_classes == "factor"), is_char) - if (length(is_fact) > 0) { + if (length(is_fact) > 0L) { data[, (is_fact) := lapply(.SD, function(x) {as.numeric(x)}), .SDcols = is_fact] } @@ -61,13 +61,13 @@ lgb.prepare <- function(data) { # Convert characters to factors to numeric (integer is more efficient actually) is_char <- which(list_classes == "character") - if (length(is_char) > 0) { + if (length(is_char) > 0L) { data[is_char] <- lapply(data[is_char], function(x) {as.numeric(as.factor(x))}) } # Convert factors to numeric (integer is more efficient actually) is_fact <- which(list_classes == "factor") - if (length(is_fact) > 0) { + if (length(is_fact) > 0L) { data[is_fact] <- lapply(data[is_fact], function(x) {as.numeric(x)}) } diff --git a/R-package/R/lgb.prepare2.R b/R-package/R/lgb.prepare2.R index 8809936f3e05..232a680d71c0 100644 --- a/R-package/R/lgb.prepare2.R +++ b/R-package/R/lgb.prepare2.R @@ -41,17 +41,17 @@ lgb.prepare2 <- function(data) { if (inherits(data, "data.table")) { # Get data classes - list_classes <- vapply(data, class, character(1)) + list_classes <- vapply(data, class, character(1L)) # Convert characters to factors only (we can change them to numeric after) is_char <- which(list_classes == "character") - if (length(is_char) > 0) { + if (length(is_char) > 0L) { data[, (is_char) := lapply(.SD, function(x) {as.integer(as.factor(x))}), .SDcols = is_char] } # Convert factors to numeric (integer is more efficient actually) is_fact <- c(which(list_classes == "factor"), is_char) - if (length(is_fact) > 0) { + if (length(is_fact) > 0L) { data[, (is_fact) := lapply(.SD, function(x) {as.integer(x)}), .SDcols = is_fact] } @@ -61,17 +61,17 @@ lgb.prepare2 <- function(data) { if (inherits(data, "data.frame")) { # Get data classes - list_classes <- vapply(data, class, character(1)) + list_classes <- vapply(data, class, character(1L)) # Convert characters to factors to numeric (integer is more efficient actually) is_char <- which(list_classes == "character") - if (length(is_char) > 0) { + if (length(is_char) > 0L) { data[is_char] <- lapply(data[is_char], function(x) {as.integer(as.factor(x))}) } # Convert factors to numeric (integer is more efficient actually) is_fact <- which(list_classes == "factor") - if (length(is_fact) > 0) { + if (length(is_fact) > 0L) { data[is_fact] <- lapply(data[is_fact], function(x) {as.integer(x)}) } diff --git a/R-package/R/lgb.prepare_rules.R b/R-package/R/lgb.prepare_rules.R index 2f51cd0e64e4..5fbfd880d725 100644 --- a/R-package/R/lgb.prepare_rules.R +++ b/R-package/R/lgb.prepare_rules.R @@ -21,16 +21,16 @@ #' str(new_iris$data) #' #' data(iris) # Erase iris dataset -#' iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA) +#' iris$Species[1L] <- "NEW FACTOR" # Introduce junk factor (NA) #' #' # Use conversion using known rules #' # Unknown factors become 0, excellent for sparse datasets #' newer_iris <- lgb.prepare_rules(data = iris, rules = new_iris$rules) #' #' # Unknown factor is now zero, perfect for sparse datasets -#' newer_iris$data[1, ] # Species became 0 as it is an unknown factor +#' newer_iris$data[1L, ] # Species became 0 as it is an unknown factor #' -#' newer_iris$data[1, 5] <- 1 # Put back real initial value +#' newer_iris$data[1L, 5L] <- 1.0 # Put back real initial value #' #' # Is the newly created dataset equal? YES! #' all.equal(new_iris$data, newer_iris$data) @@ -39,9 +39,9 @@ #' data(iris) # Erase iris dataset #' #' # We remapped values differently -#' personal_rules <- list(Species = c("setosa" = 3, -#' "versicolor" = 2, -#' "virginica" = 1)) +#' personal_rules <- list(Species = c("setosa" = 3L, +#' "versicolor" = 2L, +#' "virginica" = 1L)) #' newest_iris <- lgb.prepare_rules(data = iris, rules = personal_rules) #' str(newest_iris$data) # SUCCESS! #' @@ -59,21 +59,21 @@ lgb.prepare_rules <- function(data, rules = NULL) { for (i in names(rules)) { data.table::set(data, j = i, value = unname(rules[[i]][data[[i]]])) - data[[i]][is.na(data[[i]])] <- 0 # Overwrite NAs by 0s + data[[i]][is.na(data[[i]])] <- 0L # Overwrite NAs by 0s } } else { # Get data classes - list_classes <- vapply(data, class, character(1)) + list_classes <- vapply(data, class, character(1L)) # Map characters/factors is_fix <- which(list_classes %in% c("character", "factor")) rules <- list() # Need to create rules? - if (length(is_fix) > 0) { + if (length(is_fix) > 0L) { # Go through all characters/factors for (i in is_fix) { @@ -114,7 +114,7 @@ lgb.prepare_rules <- function(data, rules = NULL) { for (i in names(rules)) { data[[i]] <- unname(rules[[i]][data[[i]]]) - data[[i]][is.na(data[[i]])] <- 0 # Overwrite NAs by 0s + data[[i]][is.na(data[[i]])] <- 0L # Overwrite NAs by 0s } @@ -124,14 +124,14 @@ lgb.prepare_rules <- function(data, rules = NULL) { if (inherits(data, "data.frame")) { # Get data classes - list_classes <- vapply(data, class, character(1)) + list_classes <- vapply(data, class, character(1L)) # Map characters/factors is_fix <- which(list_classes %in% c("character", "factor")) rules <- list() # Need to create rules? - if (length(is_fix) > 0) { + if (length(is_fix) > 0L) { # Go through all characters/factors for (i in is_fix) { diff --git a/R-package/R/lgb.prepare_rules2.R b/R-package/R/lgb.prepare_rules2.R index 119cd32308ac..6be0570c2aa4 100644 --- a/R-package/R/lgb.prepare_rules2.R +++ b/R-package/R/lgb.prepare_rules2.R @@ -24,16 +24,16 @@ #' str(new_iris$data) #' #' data(iris) # Erase iris dataset -#' iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA) +#' iris$Species[1L] <- "NEW FACTOR" # Introduce junk factor (NA) #' #' # Use conversion using known rules #' # Unknown factors become 0, excellent for sparse datasets #' newer_iris <- lgb.prepare_rules2(data = iris, rules = new_iris$rules) #' #' # Unknown factor is now zero, perfect for sparse datasets -#' newer_iris$data[1, ] # Species became 0 as it is an unknown factor +#' newer_iris$data[1L, ] # Species became 0 as it is an unknown factor #' -#' newer_iris$data[1, 5] <- 1 # Put back real initial value +#' newer_iris$data[1L, 5L] <- 1.0 # Put back real initial value #' #' # Is the newly created dataset equal? YES! #' all.equal(new_iris$data, newer_iris$data) @@ -73,14 +73,14 @@ lgb.prepare_rules2 <- function(data, rules = NULL) { } else { # Get data classes - list_classes <- vapply(data, class, character(1)) + list_classes <- vapply(data, class, character(1L)) # Map characters/factors is_fix <- which(list_classes %in% c("character", "factor")) rules <- list() # Need to create rules? - if (length(is_fix) > 0) { + if (length(is_fix) > 0L) { # Go through all characters/factors for (i in is_fix) { @@ -130,14 +130,14 @@ lgb.prepare_rules2 <- function(data, rules = NULL) { if (inherits(data, "data.frame")) { # Get data classes - list_classes <- vapply(data, class, character(1)) + list_classes <- vapply(data, class, character(1L)) # Map characters/factors is_fix <- which(list_classes %in% c("character", "factor")) rules <- list() # Need to create rules? - if (length(is_fix) > 0) { + if (length(is_fix) > 0L) { # Go through all characters/factors for (i in is_fix) { diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R index eb4bef40782e..f48d2c6030cf 100644 --- a/R-package/R/lgb.train.R +++ b/R-package/R/lgb.train.R @@ -41,20 +41,20 @@ #' model <- lgb.train( #' params = params #' , data = dtrain -#' , nrounds = 10 +#' , nrounds = 10L #' , valids = valids -#' , min_data = 1 -#' , learning_rate = 1 -#' , early_stopping_rounds = 5 +#' , min_data = 1L +#' , learning_rate = 1.0 +#' , early_stopping_rounds = 5L #' ) #' @export lgb.train <- function(params = list(), data, - nrounds = 10, + nrounds = 10L, valids = list(), obj = NULL, eval = NULL, - verbose = 1, + verbose = 1L, record = TRUE, eval_freq = 1L, init_model = NULL, @@ -74,7 +74,7 @@ lgb.train <- function(params = list(), fobj <- NULL feval <- NULL - if (nrounds <= 0) { + if (nrounds <= 0L) { stop("nrounds should be greater than zero") } @@ -103,16 +103,16 @@ lgb.train <- function(params = list(), } # Set the iteration to start from / end to (and check for boosting from a trained model, again) - begin_iteration <- 1 + begin_iteration <- 1L if (!is.null(predictor)) { - begin_iteration <- predictor$current_iter() + 1 + begin_iteration <- predictor$current_iter() + 1L } # Check for number of rounds passed as parameter - in case there are multiple ones, take only the first one n_trees <- .PARAMETER_ALIASES()[["num_iterations"]] if (any(names(params) %in% n_trees)) { - end_iteration <- begin_iteration + params[[which(names(params) %in% n_trees)[1]]] - 1 + end_iteration <- begin_iteration + params[[which(names(params) %in% n_trees)[1L]]] - 1L } else { - end_iteration <- begin_iteration + nrounds - 1 + end_iteration <- begin_iteration + nrounds - 1L } # Check for training dataset type correctness @@ -121,12 +121,12 @@ lgb.train <- function(params = list(), } # Check for validation dataset type correctness - if (length(valids) > 0) { + if (length(valids) > 0L) { # One or more validation dataset # Check for list as input and type correctness by object - if (!is.list(valids) || !all(vapply(valids, lgb.is.Dataset, logical(1)))) { + if (!is.list(valids) || !all(vapply(valids, lgb.is.Dataset, logical(1L)))) { stop("lgb.train: valids must be a list of lgb.Dataset elements") } @@ -162,7 +162,7 @@ lgb.train <- function(params = list(), reduced_valid_sets <- list() # Parse validation datasets - if (length(valids) > 0) { + if (length(valids) > 0L) { # Loop through all validation datasets using name for (key in names(valids)) { @@ -187,12 +187,12 @@ lgb.train <- function(params = list(), } # Add printing log callback - if (verbose > 0 && eval_freq > 0) { + if (verbose > 0L && eval_freq > 0L) { callbacks <- add.cb(callbacks, cb.print.evaluation(eval_freq)) } # Add evaluation log callback - if (record && length(valids) > 0) { + if (record && length(valids) > 0L) { callbacks <- add.cb(callbacks, cb.record.evaluation()) } @@ -201,7 +201,7 @@ lgb.train <- function(params = list(), early_stop <- .PARAMETER_ALIASES()[["early_stopping_round"]] early_stop_param_indx <- names(params) %in% early_stop if (any(early_stop_param_indx)) { - first_early_stop_param <- which(early_stop_param_indx)[[1]] + first_early_stop_param <- which(early_stop_param_indx)[[1L]] first_early_stop_param_name <- names(params)[[first_early_stop_param]] early_stopping_rounds <- params[[first_early_stop_param_name]] } @@ -213,20 +213,20 @@ lgb.train <- function(params = list(), using_dart <- any( sapply( X = boosting_param_names - , FUN = function(param){ - identical(params[[param]], 'dart') + , FUN = function(param) { + identical(params[[param]], "dart") } ) ) # Cannot use early stopping with 'dart' boosting - if (using_dart){ + if (using_dart) { warning("Early stopping is not available in 'dart' mode.") using_early_stopping_via_args <- FALSE # Remove the cb.early.stop() function if it was passed in to callbacks callbacks <- Filter( - f = function(cb_func){ + f = function(cb_func) { !identical(attr(cb_func, "name"), "cb.early.stop") } , x = callbacks @@ -234,7 +234,7 @@ lgb.train <- function(params = list(), } # If user supplied early_stopping_rounds, add the early stopping callback - if (using_early_stopping_via_args){ + if (using_early_stopping_via_args) { callbacks <- add.cb( callbacks , cb.early.stop( @@ -279,7 +279,7 @@ lgb.train <- function(params = list(), eval_list <- list() # Collection: Has validation dataset? - if (length(valids) > 0) { + if (length(valids) > 0L) { # Validation has training dataset? if (vaild_contain_train) { @@ -305,13 +305,13 @@ lgb.train <- function(params = list(), # When early stopping is not activated, we compute the best iteration / score ourselves by # selecting the first metric and the first dataset - if (record && length(valids) > 0 && is.na(env$best_score)) { - if (env$eval_list[[1]]$higher_better[1] == TRUE) { - booster$best_iter <- unname(which.max(unlist(booster$record_evals[[2]][[1]][[1]]))) - booster$best_score <- booster$record_evals[[2]][[1]][[1]][[booster$best_iter]] + if (record && length(valids) > 0L && is.na(env$best_score)) { + if (env$eval_list[[1L]]$higher_better[1L] == TRUE) { + booster$best_iter <- unname(which.max(unlist(booster$record_evals[[2L]][[1L]][[1L]]))) + booster$best_score <- booster$record_evals[[2L]][[1L]][[1L]][[booster$best_iter]] } else { - booster$best_iter <- unname(which.min(unlist(booster$record_evals[[2]][[1]][[1]]))) - booster$best_score <- booster$record_evals[[2]][[1]][[1]][[booster$best_iter]] + booster$best_iter <- unname(which.min(unlist(booster$record_evals[[2L]][[1L]][[1L]]))) + booster$best_score <- booster$record_evals[[2L]][[1L]][[1L]][[booster$best_iter]] } } diff --git a/R-package/R/lgb.unloader.R b/R-package/R/lgb.unloader.R index e4bfc99ad889..adfd8980194b 100644 --- a/R-package/R/lgb.unloader.R +++ b/R-package/R/lgb.unloader.R @@ -1,6 +1,8 @@ #' LightGBM unloading error fix #' -#' Attempts to unload LightGBM packages so you can remove objects cleanly without having to restart R. This is useful for instance if an object becomes stuck for no apparent reason and you do not want to restart R to fix the lost object. +#' Attempts to unload LightGBM packages so you can remove objects cleanly without having to restart R. +#' This is useful for instance if an object becomes stuck for no apparent reason and you do not want +#' to restart R to fix the lost object. #' #' @param restore Whether to reload \code{LightGBM} immediately after detaching from R. #' Defaults to \code{TRUE} which means automatically reload \code{LightGBM} once @@ -25,11 +27,11 @@ #' model <- lgb.train( #' params = params #' , data = dtrain -#' , nrounds = 10 +#' , nrounds = 10L #' , valids = valids -#' , min_data = 1 -#' , learning_rate = 1 -#' , early_stopping_rounds = 5 +#' , min_data = 1L +#' , learning_rate = 1.0 +#' , early_stopping_rounds = 5L #' ) #' #' \dontrun{ @@ -50,13 +52,13 @@ lgb.unloader <- function(restore = TRUE, wipe = FALSE, envir = .GlobalEnv) { # Should we wipe variables? (lgb.Booster, lgb.Dataset) if (wipe) { boosters <- Filter( - f = function(x){ + f = function(x) { inherits(get(x, envir = envir), "lgb.Booster") } , x = ls(envir = envir) ) datasets <- Filter( - f = function(x){ + f = function(x) { inherits(get(x, envir = envir), "lgb.Dataset") } , x = ls(envir = envir) diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R index d93849d28927..c8b133845044 100644 --- a/R-package/R/lightgbm.R +++ b/R-package/R/lightgbm.R @@ -48,8 +48,8 @@ lightgbm <- function(data, label = NULL, weight = NULL, params = list(), - nrounds = 10, - verbose = 1, + nrounds = 10L, + verbose = 1L, eval_freq = 1L, early_stopping_rounds = NULL, save_name = "lightgbm.model", @@ -59,7 +59,7 @@ lightgbm <- function(data, # Set data to a temporary variable dtrain <- data - if (nrounds <= 0) { + if (nrounds <= 0L) { stop("nrounds should be greater than zero") } # Check whether data is lgb.Dataset, if not then create lgb.Dataset manually @@ -69,8 +69,8 @@ lightgbm <- function(data, # Set validation as oneself valids <- list() - if (verbose > 0) { - valids$train = dtrain + if (verbose > 0L) { + valids$train <- dtrain } # Train a model using the regular way diff --git a/R-package/R/readRDS.lgb.Booster.R b/R-package/R/readRDS.lgb.Booster.R index 0682fd56d52d..bb202dc2eb6b 100644 --- a/R-package/R/readRDS.lgb.Booster.R +++ b/R-package/R/readRDS.lgb.Booster.R @@ -20,11 +20,11 @@ #' model <- lgb.train( #' params = params #' , data = dtrain -#' , nrounds = 10 +#' , nrounds = 10L #' , valids = valids -#' , min_data = 1 -#' , learning_rate = 1 -#' , early_stopping_rounds = 5 +#' , min_data = 1L +#' , learning_rate = 1.0 +#' , early_stopping_rounds = 5L #' ) #' saveRDS.lgb.Booster(model, "model.rds") #' new_model <- readRDS.lgb.Booster("model.rds") diff --git a/R-package/R/saveRDS.lgb.Booster.R b/R-package/R/saveRDS.lgb.Booster.R index 9ec4d6772123..295978e6d543 100644 --- a/R-package/R/saveRDS.lgb.Booster.R +++ b/R-package/R/saveRDS.lgb.Booster.R @@ -31,11 +31,11 @@ #' model <- lgb.train( #' params = params #' , data = dtrain -#' , nrounds = 10 +#' , nrounds = 10L #' , valids = valids -#' , min_data = 1 -#' , learning_rate = 1 -#' , early_stopping_rounds = 5 +#' , min_data = 1L +#' , learning_rate = 1.0 +#' , early_stopping_rounds = 5L #' ) #' saveRDS.lgb.Booster(model, "model.rds") #' @export diff --git a/R-package/R/utils.R b/R-package/R/utils.R index a22bc992e2a1..43ae1b507959 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -81,7 +81,7 @@ lgb.call <- function(fun_name, ret, ...) { lgb.call.return.str <- function(fun_name, ...) { # Create buffer - buf_len <- as.integer(1024 * 1024) + buf_len <- as.integer(1024L * 1024L) act_len <- 0L buf <- raw(buf_len) @@ -115,7 +115,7 @@ lgb.params2str <- function(params, ...) { names(dot_params) <- gsub("\\.", "_", names(dot_params)) # Check for identical parameters - if (length(intersect(names(params), names(dot_params))) > 0) { + if (length(intersect(names(params), names(dot_params))) > 0L) { stop( "Same parameters in " , sQuote("params") @@ -136,7 +136,7 @@ lgb.params2str <- function(params, ...) { # Join multi value first val <- paste0(format(params[[key]], scientific = FALSE), collapse = ",") - if (nchar(val) <= 0) next # Skip join + if (nchar(val) <= 0L) next # Skip join # Join key value pair <- paste0(c(key, val), collapse = "=") @@ -145,7 +145,7 @@ lgb.params2str <- function(params, ...) { } # Check ret length - if (length(ret) == 0) { + if (length(ret) == 0L) { # Return empty string lgb.c_str("") @@ -163,7 +163,7 @@ lgb.c_str <- function(x) { # Perform character to raw conversion ret <- charToRaw(as.character(x)) - ret <- c(ret, as.raw(0)) + ret <- c(ret, as.raw(0L)) ret } diff --git a/R-package/demo/basic_walkthrough.R b/R-package/demo/basic_walkthrough.R index 35c2621f09b7..d241bd858f5d 100644 --- a/R-package/demo/basic_walkthrough.R +++ b/R-package/demo/basic_walkthrough.R @@ -20,9 +20,9 @@ print("Training lightgbm with sparseMatrix") bst <- lightgbm( data = train$data , label = train$label - , num_leaves = 4 - , learning_rate = 1 - , nrounds = 2 + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 2L , objective = "binary" ) @@ -31,9 +31,9 @@ print("Training lightgbm with Matrix") bst <- lightgbm( data = as.matrix(train$data) , label = train$label - , num_leaves = 4 - , learning_rate = 1 - , nrounds = 2 + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 2L , objective = "binary" ) @@ -45,9 +45,9 @@ dtrain <- lgb.Dataset( ) bst <- lightgbm( data = dtrain - , num_leaves = 4 - , learning_rate = 1 - , nrounds = 2 + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 2L , objective = "binary" ) @@ -55,42 +55,42 @@ bst <- lightgbm( print("Train lightgbm with verbose 0, no message") bst <- lightgbm( data = dtrain - , num_leaves = 4 - , learning_rate = 1 - , nrounds = 2 + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 2L , objective = "binary" - , verbose = 0 + , verbose = 0L ) print("Train lightgbm with verbose 1, print evaluation metric") bst <- lightgbm( data = dtrain - , num_leaves = 4 - , learning_rate = 1 - , nrounds = 2 - , nthread = 2 + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 2L + , nthread = 2L , objective = "binary" - , verbose = 1 + , verbose = 1L ) print("Train lightgbm with verbose 2, also print information about tree") bst <- lightgbm( data = dtrain - , num_leaves = 4 - , learning_rate = 1 - , nrounds = 2 - , nthread = 2 + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 2L + , nthread = 2L , objective = "binary" - , verbose = 2 + , verbose = 2L ) # You can also specify data as file path to a LibSVM/TCV/CSV format input # Since we do not have this file with us, the following line is just for illustration # bst <- lightgbm( # data = "agaricus.train.svm" -# , num_leaves = 4 -# , learning_rate = 1 -# , nrounds = 2 +# , num_leaves = 4L +# , learning_rate = 1.0 +# , nrounds = 2L # , objective = "binary" # ) @@ -126,11 +126,11 @@ valids <- list(train = dtrain, test = dtest) print("Train lightgbm using lgb.train with valids") bst <- lgb.train( data = dtrain - , num_leaves = 4 - , learning_rate = 1 - , nrounds = 2 + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 2L , valids = valids - , nthread = 2 + , nthread = 2L , objective = "binary" ) @@ -138,12 +138,12 @@ bst <- lgb.train( print("Train lightgbm using lgb.train with valids, watch logloss and error") bst <- lgb.train( data = dtrain - , num_leaves = 4 - , learning_rate = 1 - , nrounds = 2 + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 2L , valids = valids , eval = c("binary_error", "binary_logloss") - , nthread = 2 + , nthread = 2L , objective = "binary" ) @@ -154,16 +154,16 @@ lgb.Dataset.save(dtrain, "dtrain.buffer") dtrain2 <- lgb.Dataset("dtrain.buffer") bst <- lgb.train( data = dtrain2 - , num_leaves = 4 - , learning_rate = 1 - , nrounds = 2 + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 2L , valids = valids - , nthread = 2 + , nthread = 2L , objective = "binary" ) # information can be extracted from lgb.Dataset using getinfo -label = getinfo(dtest, "label") +label <- getinfo(dtest, "label") pred <- predict(bst, test$data) err <- as.numeric(sum(as.integer(pred > 0.5) != label)) / length(label) print(paste("test-error=", err)) diff --git a/R-package/demo/boost_from_prediction.R b/R-package/demo/boost_from_prediction.R index 391aa24ac0c8..d96113b12861 100644 --- a/R-package/demo/boost_from_prediction.R +++ b/R-package/demo/boost_from_prediction.R @@ -14,12 +14,12 @@ print("Start running example to start from an initial prediction") # Train lightgbm for 1 round param <- list( - num_leaves = 4 - , learning_rate = 1 - , nthread = 2 + num_leaves = 4L + , learning_rate = 1.0 + , nthread = 2L , objective = "binary" ) -bst <- lgb.train(param, dtrain, 1, valids = valids) +bst <- lgb.train(param, dtrain, 1L, valids = valids) # Note: we need the margin value instead of transformed prediction in set_init_score ptrain <- predict(bst, agaricus.train$data, rawscore = TRUE) @@ -34,6 +34,6 @@ print("This is result of boost from initial prediction") bst <- lgb.train( params = param , data = dtrain - , nrounds = 5 + , nrounds = 5L , valids = valids ) diff --git a/R-package/demo/categorical_features_prepare.R b/R-package/demo/categorical_features_prepare.R index a14646f503f0..6830e5a1232c 100644 --- a/R-package/demo/categorical_features_prepare.R +++ b/R-package/demo/categorical_features_prepare.R @@ -53,36 +53,36 @@ bank <- lgb.prepare(data = bank) str(bank) # Remove 1 to label because it must be between 0 and 1 -bank$y <- bank$y - 1 +bank$y <- bank$y - 1L # Data input to LightGBM must be a matrix, without the label -my_data <- as.matrix(bank[, 1:16, with = FALSE]) +my_data <- as.matrix(bank[, 1L:16L, with = FALSE]) # Creating the LightGBM dataset with categorical features # The categorical features must be indexed like in R (1-indexed, not 0-indexed) lgb_data <- lgb.Dataset( data = my_data , label = bank$y - , categorical_feature = c(2, 3, 4, 5, 7, 8, 9, 11, 16) + , categorical_feature = c(2L, 3L, 4L, 5L, 7L, 8L, 9L, 11L, 16L) ) # We can now train a model params <- list( objective = "binary" , metric = "l2" - , min_data = 1 + , min_data = 1L , learning_rate = 0.1 - , min_data = 0 - , min_hessian = 1 - , max_depth = 2 + , min_data = 0L + , min_hessian = 1.0 + , max_depth = 2L ) model <- lgb.train( params = params , data = lgb_data - , nrounds = 100 + , nrounds = 100L , valids = list(train = lgb_data) ) # Try to find split_feature: 2 # If you find it, it means it used a categorical feature in the first tree -lgb.dump(model, num_iteration = 1) +lgb.dump(model, num_iteration = 1L) diff --git a/R-package/demo/categorical_features_rules.R b/R-package/demo/categorical_features_rules.R index 26b92c2bc493..54b49c8bae14 100644 --- a/R-package/demo/categorical_features_rules.R +++ b/R-package/demo/categorical_features_rules.R @@ -28,8 +28,8 @@ data(bank, package = "lightgbm") str(bank) # We are dividing the dataset into two: one train, one validation -bank_train <- bank[1:4000, ] -bank_test <- bank[4001:4521, ] +bank_train <- bank[1L:4000L, ] +bank_test <- bank[4001L:4521L, ] # We must now transform the data to fit in LightGBM # For this task, we use lgb.prepare @@ -59,19 +59,19 @@ bank_test <- lgb.prepare_rules(data = bank_test, rules = bank_rules$rules)$data str(bank_test) # Remove 1 to label because it must be between 0 and 1 -bank_train$y <- bank_train$y - 1 -bank_test$y <- bank_test$y - 1 +bank_train$y <- bank_train$y - 1L +bank_test$y <- bank_test$y - 1L # Data input to LightGBM must be a matrix, without the label -my_data_train <- as.matrix(bank_train[, 1:16, with = FALSE]) -my_data_test <- as.matrix(bank_test[, 1:16, with = FALSE]) +my_data_train <- as.matrix(bank_train[, 1L:16L, with = FALSE]) +my_data_test <- as.matrix(bank_test[, 1L:16L, with = FALSE]) # Creating the LightGBM dataset with categorical features # The categorical features can be passed to lgb.train to not copy and paste a lot dtrain <- lgb.Dataset( data = my_data_train , label = bank_train$y - , categorical_feature = c(2, 3, 4, 5, 7, 8, 9, 11, 16) + , categorical_feature = c(2L, 3L, 4L, 5L, 7L, 8L, 9L, 11L, 16L) ) dtest <- lgb.Dataset.create.valid( dtrain @@ -83,19 +83,19 @@ dtest <- lgb.Dataset.create.valid( params <- list( objective = "binary" , metric = "l2" - , min_data = 1 + , min_data = 1L , learning_rate = 0.1 - , min_data = 0 - , min_hessian = 1 - , max_depth = 2 + , min_data = 0L + , min_hessian = 1.0 + , max_depth = 2L ) model <- lgb.train( params = params , data = dtrain - , nrounds = 100 + , nrounds = 100L , valids = list(train = dtrain, valid = dtest) ) # Try to find split_feature: 11 # If you find it, it means it used a categorical feature in the first tree -lgb.dump(model, num_iteration = 1) +lgb.dump(model, num_iteration = 1L) diff --git a/R-package/demo/cross_validation.R b/R-package/demo/cross_validation.R index 3d79a3b1da70..8b8ce4433c05 100644 --- a/R-package/demo/cross_validation.R +++ b/R-package/demo/cross_validation.R @@ -5,10 +5,10 @@ data(agaricus.test, package = "lightgbm") dtrain <- lgb.Dataset(agaricus.train$data, label = agaricus.train$label) dtest <- lgb.Dataset.create.valid(dtrain, data = agaricus.test$data, label = agaricus.test$label) -nrounds <- 2 +nrounds <- 2L param <- list( - num_leaves = 4 - , learning_rate = 1 + num_leaves = 4L + , learning_rate = 1.0 , objective = "binary" ) @@ -20,7 +20,7 @@ lgb.cv( param , dtrain , nrounds - , nfold = 5 + , nfold = 5L , eval = "binary_error" ) @@ -32,7 +32,7 @@ lgb.cv( param , dtrain , nrounds - , nfold = 5 + , nfold = 5L , eval = "binary_error" , showsd = FALSE ) @@ -42,14 +42,14 @@ print("Running cross validation, with cutomsized loss function") logregobj <- function(preds, dtrain) { labels <- getinfo(dtrain, "label") - preds <- 1 / (1 + exp(-preds)) + preds <- 1.0 / (1.0 + exp(-preds)) grad <- preds - labels - hess <- preds * (1 - preds) + hess <- preds * (1.0 - preds) return(list(grad = grad, hess = hess)) } evalerror <- function(preds, dtrain) { labels <- getinfo(dtrain, "label") - err <- as.numeric(sum(labels != (preds > 0))) / length(labels) + err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels) return(list(name = "error", value = err, higher_better = FALSE)) } @@ -60,5 +60,5 @@ lgb.cv( , nrounds = nrounds , obj = logregobj , eval = evalerror - , nfold = 5 + , nfold = 5L ) diff --git a/R-package/demo/early_stopping.R b/R-package/demo/early_stopping.R index 92470e7bb739..57c52ec3ca88 100644 --- a/R-package/demo/early_stopping.R +++ b/R-package/demo/early_stopping.R @@ -12,19 +12,19 @@ dtest <- lgb.Dataset.create.valid(dtrain, data = agaricus.test$data, label = aga # Note: what we are getting is margin value in prediction # You must know what you are doing param <- list( - num_leaves = 4 - , learning_rate = 1 + num_leaves = 4L + , learning_rate = 1.0 ) valids <- list(eval = dtest) -num_round <- 20 +num_round <- 20L # User define objective function, given prediction, return gradient and second order gradient # This is loglikelihood loss logregobj <- function(preds, dtrain) { labels <- getinfo(dtrain, "label") - preds <- 1 / (1 + exp(-preds)) + preds <- 1.0 / (1.0 + exp(-preds)) grad <- preds - labels - hess <- preds * (1 - preds) + hess <- preds * (1.0 - preds) return(list(grad = grad, hess = hess)) } @@ -48,5 +48,5 @@ bst <- lgb.train( , valids , objective = logregobj , eval = evalerror - , early_stopping_round = 3 + , early_stopping_round = 3L ) diff --git a/R-package/demo/efficient_many_training.R b/R-package/demo/efficient_many_training.R index 50c45f3a0a55..c7a05d2046c0 100644 --- a/R-package/demo/efficient_many_training.R +++ b/R-package/demo/efficient_many_training.R @@ -12,9 +12,9 @@ library(lightgbm) # Generate fictive data of size 1M x 100 -set.seed(11111) -x_data <- matrix(rnorm(n = 100000000, mean = 0, sd = 100), nrow = 1000000, ncol = 100) -y_data <- rnorm(n = 1000000, mean = 0, sd = 5) +set.seed(11111L) +x_data <- matrix(rnorm(n = 100000000L, mean = 0.0, sd = 100.0), nrow = 1000000L, ncol = 100L) +y_data <- rnorm(n = 1000000L, mean = 0.0, sd = 5.0) # Create lgb.Dataset for training data <- lgb.Dataset(x_data, label = y_data) @@ -24,12 +24,12 @@ data$construct() # It MUST remain constant (if not increasing very slightly) gbm <- list() -for (i in 1:1000) { +for (i in 1L:1000L) { print(i) gbm[[i]] <- lgb.train( params = list(objective = "regression") , data = data - , 1 + , 1L , reset_data = TRUE ) gc(verbose = FALSE) diff --git a/R-package/demo/leaf_stability.R b/R-package/demo/leaf_stability.R index b97fc3e5e241..dfa11c252c0e 100644 --- a/R-package/demo/leaf_stability.R +++ b/R-package/demo/leaf_stability.R @@ -20,13 +20,13 @@ valids <- list(test = dtest) model <- lgb.train( params , dtrain - , 50 + , 50L , valids - , min_data = 1 + , min_data = 1L , learning_rate = 0.1 , bagging_fraction = 0.1 - , bagging_freq = 1 - , bagging_seed = 1 + , bagging_freq = 1L + , bagging_seed = 1L ) # We create a data.frame with the following structure: @@ -45,20 +45,20 @@ new_data <- data.frame( predict(model, agaricus.test$data) , 1e-15 ) - , 1 - 1e-15 + , 1.0 - 1e-15 ) ) -new_data$Z <- -1 * (agaricus.test$label * log(new_data$Y) + (1 - agaricus.test$label) * log(1 - new_data$Y)) +new_data$Z <- -1.0 * (agaricus.test$label * log(new_data$Y) + (1L - agaricus.test$label) * log(1L - new_data$Y)) new_data$binned <- .bincode( x = new_data$X , breaks = quantile( x = new_data$X - , probs = (1:9) / 10 + , probs = seq_len(9L) / 10.0 ) , right = TRUE , include.lowest = TRUE ) -new_data$binned[is.na(new_data$binned)] <- 0 +new_data$binned[is.na(new_data$binned)] <- 0L new_data$binned <- as.factor(new_data$binned) # We can check the binned content @@ -91,10 +91,10 @@ ggplot( model2 <- lgb.train( params , dtrain - , 100 + , 100L , valids - , min_data = 1 - , learning_rate = 1 + , min_data = 1L + , learning_rate = 1.0 ) # We create the data structure, but for model2 @@ -112,20 +112,20 @@ new_data2 <- data.frame( ) , 1e-15 ) - , 1 - 1e-15 + , 1.0 - 1e-15 ) ) -new_data2$Z <- -1 * (agaricus.test$label * log(new_data2$Y) + (1 - agaricus.test$label) * log(1 - new_data2$Y)) +new_data2$Z <- -1.0 * (agaricus.test$label * log(new_data2$Y) + (1L - agaricus.test$label) * log(1L - new_data2$Y)) new_data2$binned <- .bincode( x = new_data2$X , breaks = quantile( x = new_data2$X - , probs = (1:9) / 10 + , probs = seq_len(9L) / 10.0 ) , right = TRUE , include.lowest = TRUE ) -new_data2$binned[is.na(new_data2$binned)] <- 0 +new_data2$binned[is.na(new_data2$binned)] <- 0L new_data2$binned <- as.factor(new_data2$binned) # We can check the binned content @@ -133,7 +133,8 @@ table(new_data2$binned) # We can plot the binned content # On the second plot, we clearly notice the lower the bin (the lower the leaf value), the higher the loss -# On the third plot, it is clearly not smooth! We are severely overfitting the data, but the rules are real thus it is not an issue +# On the third plot, it is clearly not smooth! We are severely overfitting the data, but the rules are +# real thus it is not an issue # However, if the rules were not true, the loss would explode. ggplot( data = new_data2 @@ -159,10 +160,10 @@ ggplot( model3 <- lgb.train( params , dtrain - , 1000 + , 1000L , valids - , min_data = 1 - , learning_rate = 1 + , min_data = 1L + , learning_rate = 1.0 ) # We create the data structure, but for model3 @@ -180,20 +181,20 @@ new_data3 <- data.frame( ) , 1e-15 ) - , 1 - 1e-15 + , 1.0 - 1e-15 ) ) -new_data3$Z <- -1 * (agaricus.test$label * log(new_data3$Y) + (1 - agaricus.test$label) * log(1 - new_data3$Y)) +new_data3$Z <- -1.0 * (agaricus.test$label * log(new_data3$Y) + (1L - agaricus.test$label) * log(1L - new_data3$Y)) new_data3$binned <- .bincode( x = new_data3$X , breaks = quantile( x = new_data3$X - , probs = (1:9) / 10 + , probs = seq_len(9L) / 10.0 ) , right = TRUE , include.lowest = TRUE ) -new_data3$binned[is.na(new_data3$binned)] <- 0 +new_data3$binned[is.na(new_data3$binned)] <- 0L new_data3$binned <- as.factor(new_data3$binned) # We can check the binned content diff --git a/R-package/demo/multiclass.R b/R-package/demo/multiclass.R index 2bf54bd54260..6aeff9b95787 100644 --- a/R-package/demo/multiclass.R +++ b/R-package/demo/multiclass.R @@ -6,65 +6,65 @@ data(iris) # We must convert factors to numeric # They must be starting from number 0 to use multiclass # For instance: 0, 1, 2, 3, 4, 5... -iris$Species <- as.numeric(as.factor(iris$Species)) - 1 +iris$Species <- as.numeric(as.factor(iris$Species)) - 1L # We cut the data set into 80% train and 20% validation # The 10 last samples of each class are for validation -train <- as.matrix(iris[c(1:40, 51:90, 101:140), ]) -test <- as.matrix(iris[c(41:50, 91:100, 141:150), ]) -dtrain <- lgb.Dataset(data = train[, 1:4], label = train[, 5]) -dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1:4], label = test[, 5]) +train <- as.matrix(iris[c(1L:40L, 51L:90L, 101L:140L), ]) +test <- as.matrix(iris[c(41L:50L, 91L:100L, 141L:150L), ]) +dtrain <- lgb.Dataset(data = train[, 1L:4L], label = train[, 5L]) +dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1L:4L], label = test[, 5L]) valids <- list(test = dtest) # Method 1 of training -params <- list(objective = "multiclass", metric = "multi_error", num_class = 3) +params <- list(objective = "multiclass", metric = "multi_error", num_class = 3L) model <- lgb.train( params , dtrain - , 100 + , 100L , valids - , min_data = 1 - , learning_rate = 1 - , early_stopping_rounds = 10 + , min_data = 1L + , learning_rate = 1.0 + , early_stopping_rounds = 10L ) # We can predict on test data, outputs a 90-length vector # Order: obs1 class1, obs1 class2, obs1 class3, obs2 class1, obs2 class2, obs2 class3... -my_preds <- predict(model, test[, 1:4]) +my_preds <- predict(model, test[, 1L:4L]) # Method 2 of training, identical model <- lgb.train( list() , dtrain - , 100 + , 100L , valids - , min_data = 1 - , learning_rate = 1 - , early_stopping_rounds = 10 + , min_data = 1L + , learning_rate = 1.0 + , early_stopping_rounds = 10L , objective = "multiclass" , metric = "multi_error" - , num_class = 3 + , num_class = 3L ) # We can predict on test data, identical -my_preds <- predict(model, test[, 1:4]) +my_preds <- predict(model, test[, 1L:4L]) # A (30x3) matrix with the predictions, use parameter reshape # class1 class2 class3 # obs1 obs1 obs1 # obs2 obs2 obs2 # .... .... .... -my_preds <- predict(model, test[, 1:4], reshape = TRUE) +my_preds <- predict(model, test[, 1L:4L], reshape = TRUE) # We can also get the predicted scores before the Sigmoid/Softmax application -my_preds <- predict(model, test[, 1:4], rawscore = TRUE) +my_preds <- predict(model, test[, 1L:4L], rawscore = TRUE) # Raw score predictions as matrix instead of vector -my_preds <- predict(model, test[, 1:4], rawscore = TRUE, reshape = TRUE) +my_preds <- predict(model, test[, 1L:4L], rawscore = TRUE, reshape = TRUE) # We can also get the leaf index -my_preds <- predict(model, test[, 1:4], predleaf = TRUE) +my_preds <- predict(model, test[, 1L:4L], predleaf = TRUE) # Predict leaf index as matrix instead of vector -my_preds <- predict(model, test[, 1:4], predleaf = TRUE, reshape = TRUE) +my_preds <- predict(model, test[, 1L:4L], predleaf = TRUE, reshape = TRUE) diff --git a/R-package/demo/multiclass_custom_objective.R b/R-package/demo/multiclass_custom_objective.R index 313fa8364782..fee8f42ef28d 100644 --- a/R-package/demo/multiclass_custom_objective.R +++ b/R-package/demo/multiclass_custom_objective.R @@ -6,15 +6,15 @@ data(iris) # We must convert factors to numeric # They must be starting from number 0 to use multiclass # For instance: 0, 1, 2, 3, 4, 5... -iris$Species <- as.numeric(as.factor(iris$Species)) - 1 +iris$Species <- as.numeric(as.factor(iris$Species)) - 1L # Create imbalanced training data (20, 30, 40 examples for classes 0, 1, 2) -train <- as.matrix(iris[c(1:20, 51:80, 101:140), ]) +train <- as.matrix(iris[c(1L:20L, 51L:80L, 101L:140L), ]) # The 10 last samples of each class are for validation -test <- as.matrix(iris[c(41:50, 91:100, 141:150), ]) +test <- as.matrix(iris[c(41L:50L, 91L:100L, 141L:150L), ]) -dtrain <- lgb.Dataset(data = train[, 1:4], label = train[, 5]) -dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1:4], label = test[, 5]) +dtrain <- lgb.Dataset(data = train[, 1L:4L], label = train[, 5L]) +dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1L:4L], label = test[, 5L]) valids <- list(train = dtrain, test = dtest) # Method 1 of training with built-in multiclass objective @@ -24,52 +24,52 @@ model_builtin <- lgb.train( list() , dtrain , boost_from_average = FALSE - , 100 + , 100L , valids - , min_data = 1 - , learning_rate = 1 - , early_stopping_rounds = 10 + , min_data = 1L + , learning_rate = 1.0 + , early_stopping_rounds = 10L , objective = "multiclass" , metric = "multi_logloss" - , num_class = 3 + , num_class = 3L ) -preds_builtin <- predict(model_builtin, test[, 1:4], rawscore = TRUE, reshape = TRUE) +preds_builtin <- predict(model_builtin, test[, 1L:4L], rawscore = TRUE, reshape = TRUE) probs_builtin <- exp(preds_builtin) / rowSums(exp(preds_builtin)) # Method 2 of training with custom objective function # User defined objective function, given prediction, return gradient and second order gradient -custom_multiclass_obj = function(preds, dtrain) { - labels = getinfo(dtrain, "label") +custom_multiclass_obj <- function(preds, dtrain) { + labels <- getinfo(dtrain, "label") # preds is a matrix with rows corresponding to samples and colums corresponding to choices - preds = matrix(preds, nrow = length(labels)) + preds <- matrix(preds, nrow = length(labels)) # to prevent overflow, normalize preds by row - preds = preds - apply(preds, 1, max) - prob = exp(preds) / rowSums(exp(preds)) + preds <- preds - apply(preds, 1L, max) + prob <- exp(preds) / rowSums(exp(preds)) # compute gradient - grad = prob - grad[cbind(1:length(labels), labels + 1)] = grad[cbind(1:length(labels), labels + 1)] - 1 + grad <- prob + grad[cbind(seq_len(length(labels)), labels + 1L)] <- grad[cbind(seq_len(length(labels)), labels + 1L)] - 1L # compute hessian (approximation) - hess = 2 * prob * (1 - prob) + hess <- 2.0 * prob * (1.0 - prob) return(list(grad = grad, hess = hess)) } # define custom metric -custom_multiclass_metric = function(preds, dtrain) { - labels = getinfo(dtrain, "label") - preds = matrix(preds, nrow = length(labels)) - preds = preds - apply(preds, 1, max) - prob = exp(preds) / rowSums(exp(preds)) +custom_multiclass_metric <- function(preds, dtrain) { + labels <- getinfo(dtrain, "label") + preds <- matrix(preds, nrow = length(labels)) + preds <- preds - apply(preds, 1L, max) + prob <- exp(preds) / rowSums(exp(preds)) return(list( name = "error" - , value = -mean(log(prob[cbind(1:length(labels), labels + 1)])) + , value = -mean(log(prob[cbind(seq_len(length(labels)), labels + 1L)])) , higher_better = FALSE )) } @@ -77,17 +77,17 @@ custom_multiclass_metric = function(preds, dtrain) { model_custom <- lgb.train( list() , dtrain - , 100 + , 100L , valids - , min_data = 1 - , learning_rate = 1 - , early_stopping_rounds = 10 + , min_data = 1L + , learning_rate = 1.0 + , early_stopping_rounds = 10L , objective = custom_multiclass_obj , eval = custom_multiclass_metric - , num_class = 3 + , num_class = 3L ) -preds_custom <- predict(model_custom, test[, 1:4], rawscore = TRUE, reshape = TRUE) +preds_custom <- predict(model_custom, test[, 1L:4L], rawscore = TRUE, reshape = TRUE) probs_custom <- exp(preds_custom) / rowSums(exp(preds_custom)) # compare predictions diff --git a/R-package/demo/weight_param.R b/R-package/demo/weight_param.R index 8d9b52b80878..461b8caa79be 100644 --- a/R-package/demo/weight_param.R +++ b/R-package/demo/weight_param.R @@ -11,8 +11,8 @@ library(lightgbm) # - Run 3: sum of weights equal to 6513 (x 1e5) with adjusted regularization (learning) # Setup small weights -weights1 <- rep(1 / 100000, 6513) -weights2 <- rep(1 / 100000, 1611) +weights1 <- rep(1.0 / 100000.0, 6513L) +weights2 <- rep(1.0 / 100000.0, 1611L) # Load data and create datasets data(agaricus.train, package = "lightgbm") @@ -30,19 +30,19 @@ params <- list( objective = "regression" , metric = "l2" , device = "cpu" - , min_sum_hessian = 10 - , num_leaves = 7 - , max_depth = 3 - , nthread = 1 + , min_sum_hessian = 10.0 + , num_leaves = 7L + , max_depth = 3L + , nthread = 1L ) model <- lgb.train( params , dtrain - , 50 + , 50L , valids - , min_data = 1 - , learning_rate = 1 - , early_stopping_rounds = 10 + , min_data = 1L + , learning_rate = 1.0 + , early_stopping_rounds = 10L ) weight_loss <- as.numeric(model$record_evals$test$l2$eval) plot(weight_loss) # Shows how poor the learning was: a straight line! @@ -55,18 +55,18 @@ params <- list( , metric = "l2" , device = "cpu" , min_sum_hessian = 1e-4 - , num_leaves = 7 - , max_depth = 3 - , nthread = 1 + , num_leaves = 7L + , max_depth = 3L + , nthread = 1L ) model <- lgb.train( params , dtrain - , 50 + , 50L , valids - , min_data = 1 - , learning_rate = 1 - , early_stopping_rounds = 10 + , min_data = 1L + , learning_rate = 1.0 + , early_stopping_rounds = 10L ) small_weight_loss <- as.numeric(model$record_evals$test$l2$eval) plot(small_weight_loss) # It learns! @@ -90,19 +90,19 @@ params <- list( objective = "regression" , metric = "l2" , device = "cpu" - , min_sum_hessian = 10 - , num_leaves = 7 - , max_depth = 3 - , nthread = 1 + , min_sum_hessian = 10.0 + , num_leaves = 7L + , max_depth = 3L + , nthread = 1L ) model <- lgb.train( params , dtrain - , 50 + , 50L , valids - , min_data = 1 - , learning_rate = 1 - , early_stopping_rounds = 10 + , min_data = 1L + , learning_rate = 1.0 + , early_stopping_rounds = 10L ) large_weight_loss <- as.numeric(model$record_evals$test$l2$eval) plot(large_weight_loss) # It learns! @@ -110,4 +110,4 @@ plot(large_weight_loss) # It learns! # Do you want to compare the learning? They both converge. plot(small_weight_loss, large_weight_loss) -curve(1 * x, from = 0, to = 0.02, add = TRUE) +curve(1.0 * x, from = 0L, to = 0.02, add = TRUE) diff --git a/R-package/man/dimnames.lgb.Dataset.Rd b/R-package/man/dimnames.lgb.Dataset.Rd index 54563ac5c00c..84a6447cf381 100644 --- a/R-package/man/dimnames.lgb.Dataset.Rd +++ b/R-package/man/dimnames.lgb.Dataset.Rd @@ -31,7 +31,7 @@ dtrain <- lgb.Dataset(train$data, label = train$label) lgb.Dataset.construct(dtrain) dimnames(dtrain) colnames(dtrain) -colnames(dtrain) <- make.names(1:ncol(train$data)) +colnames(dtrain) <- make.names(seq_len(ncol(train$data))) print(dtrain, verbose = TRUE) } diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd index 8bdca02ca940..b5b8112bd2d3 100644 --- a/R-package/man/getinfo.Rd +++ b/R-package/man/getinfo.Rd @@ -12,9 +12,9 @@ getinfo(dataset, ...) \arguments{ \item{dataset}{Object of class \code{lgb.Dataset}} -\item{name}{the name of the information field to get (see details)} - \item{...}{other parameters} + +\item{name}{the name of the information field to get (see details)} } \value{ info data diff --git a/R-package/man/lgb.Dataset.set.categorical.Rd b/R-package/man/lgb.Dataset.set.categorical.Rd index 1cec77c13d85..f57096c1827f 100644 --- a/R-package/man/lgb.Dataset.set.categorical.Rd +++ b/R-package/man/lgb.Dataset.set.categorical.Rd @@ -24,6 +24,6 @@ train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) lgb.Dataset.save(dtrain, "lgb.Dataset.data") dtrain <- lgb.Dataset("lgb.Dataset.data") -lgb.Dataset.set.categorical(dtrain, 1:2) +lgb.Dataset.set.categorical(dtrain, 1L:2L) } diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd index a419122592ff..8d83d3ffd5b9 100644 --- a/R-package/man/lgb.cv.Rd +++ b/R-package/man/lgb.cv.Rd @@ -7,16 +7,16 @@ lgb.cv( params = list(), data, - nrounds = 10, - nfold = 3, + nrounds = 10L, + nfold = 3L, label = NULL, weight = NULL, obj = NULL, eval = NULL, - verbose = 1, + verbose = 1L, record = TRUE, eval_freq = 1L, - showsd = TRUE, + shows = TRUE, stratified = TRUE, folds = NULL, init_model = NULL, @@ -53,8 +53,6 @@ lgb.cv( \item{eval_freq}{evaluation output frequency, only effect when verbose > 0} -\item{showsd}{\code{boolean}, whether to show standard deviation of cross validation} - \item{stratified}{a \code{boolean} indicating whether sampling of folds should be stratified by the values of outcome labels.} @@ -90,6 +88,8 @@ into a predictor model which frees up memory and the original datasets} the number of real CPU cores, not the number of threads (most CPU using hyper-threading to generate 2 threads per CPU core).} }} + +\item{showsd}{\code{boolean}, whether to show standard deviation of cross validation} } \value{ a trained model \code{lgb.CVBooster}. @@ -106,10 +106,10 @@ params <- list(objective = "regression", metric = "l2") model <- lgb.cv( params = params , data = dtrain - , nrounds = 10 - , nfold = 3 - , min_data = 1 - , learning_rate = 1 - , early_stopping_rounds = 5 + , nrounds = 10L + , nfold = 3L + , min_data = 1L + , learning_rate = 1.0 + , early_stopping_rounds = 5L ) } diff --git a/R-package/man/lgb.dump.Rd b/R-package/man/lgb.dump.Rd index c03dcdd7fc6b..828ba4ac7ea9 100644 --- a/R-package/man/lgb.dump.Rd +++ b/R-package/man/lgb.dump.Rd @@ -30,11 +30,11 @@ valids <- list(test = dtest) model <- lgb.train( params = params , data = dtrain - , nrounds = 10 + , nrounds = 10L , valids = valids - , min_data = 1 - , learning_rate = 1 - , early_stopping_rounds = 5 + , min_data = 1L + , learning_rate = 1.0 + , early_stopping_rounds = 5L ) json_model <- lgb.dump(model) diff --git a/R-package/man/lgb.get.eval.result.Rd b/R-package/man/lgb.get.eval.result.Rd index 0b124ffb62cb..e049ff53ebb2 100644 --- a/R-package/man/lgb.get.eval.result.Rd +++ b/R-package/man/lgb.get.eval.result.Rd @@ -42,11 +42,11 @@ valids <- list(test = dtest) model <- lgb.train( params = params , data = dtrain - , nrounds = 10 + , nrounds = 10L , valids = valids - , min_data = 1 - , learning_rate = 1 - , early_stopping_rounds = 5 + , min_data = 1L + , learning_rate = 1.0 + , early_stopping_rounds = 5L ) lgb.get.eval.result(model, "test", "l2") } diff --git a/R-package/man/lgb.importance.Rd b/R-package/man/lgb.importance.Rd index 2a75e1c14eb3..fdbff49e8858 100644 --- a/R-package/man/lgb.importance.Rd +++ b/R-package/man/lgb.importance.Rd @@ -32,12 +32,12 @@ dtrain <- lgb.Dataset(train$data, label = train$label) params <- list( objective = "binary" , learning_rate = 0.01 - , num_leaves = 63 - , max_depth = -1 - , min_data_in_leaf = 1 - , min_sum_hessian_in_leaf = 1 + , num_leaves = 63L + , max_depth = -1L + , min_data_in_leaf = 1L + , min_sum_hessian_in_leaf = 1.0 ) -model <- lgb.train(params, dtrain, 10) +model <- lgb.train(params, dtrain, 10L) tree_imp1 <- lgb.importance(model, percentage = TRUE) tree_imp2 <- lgb.importance(model, percentage = FALSE) diff --git a/R-package/man/lgb.interprete.Rd b/R-package/man/lgb.interprete.Rd index 798f39c2eef7..906875d190b4 100644 --- a/R-package/man/lgb.interprete.Rd +++ b/R-package/man/lgb.interprete.Rd @@ -16,19 +16,21 @@ lgb.interprete(model, data, idxset, num_iteration = NULL) \item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration.} } \value{ -For regression, binary classification and lambdarank model, a \code{list} of \code{data.table} with the following columns: -\itemize{ - \item \code{Feature} Feature names in the model. - \item \code{Contribution} The total contribution of this feature's splits. -} -For multiclass classification, a \code{list} of \code{data.table} with the Feature column and Contribution columns to each class. +For regression, binary classification and lambdarank model, a \code{list} of \code{data.table} + with the following columns: + \itemize{ + \item \code{Feature} Feature names in the model. + \item \code{Contribution} The total contribution of this feature's splits. + } + For multiclass classification, a \code{list} of \code{data.table} with the Feature column and + Contribution columns to each class. } \description{ Computes feature contribution components of rawscore prediction. } \examples{ -Sigmoid <- function(x) 1 / (1 + exp(-x)) -Logit <- function(x) log(x / (1 - x)) +Sigmoid <- function(x) 1.0 / (1.0 + exp(-x)) +Logit <- function(x) log(x / (1.0 - x)) data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) @@ -39,13 +41,13 @@ test <- agaricus.test params <- list( objective = "binary" , learning_rate = 0.01 - , num_leaves = 63 - , max_depth = -1 - , min_data_in_leaf = 1 - , min_sum_hessian_in_leaf = 1 + , num_leaves = 63L + , max_depth = -1L + , min_data_in_leaf = 1L + , min_sum_hessian_in_leaf = 1.0 ) -model <- lgb.train(params, dtrain, 10) +model <- lgb.train(params, dtrain, 10L) -tree_interpretation <- lgb.interprete(model, test$data, 1:5) +tree_interpretation <- lgb.interprete(model, test$data, 1L:5L) } diff --git a/R-package/man/lgb.load.Rd b/R-package/man/lgb.load.Rd index ebe14d0e39bc..524eb8967c08 100644 --- a/R-package/man/lgb.load.Rd +++ b/R-package/man/lgb.load.Rd @@ -32,11 +32,11 @@ valids <- list(test = dtest) model <- lgb.train( params = params , data = dtrain - , nrounds = 10 + , nrounds = 10L , valids = valids - , min_data = 1 - , learning_rate = 1 - , early_stopping_rounds = 5 + , min_data = 1L + , learning_rate = 1.0 + , early_stopping_rounds = 5L ) lgb.save(model, "model.txt") load_booster <- lgb.load(filename = "model.txt") diff --git a/R-package/man/lgb.model.dt.tree.Rd b/R-package/man/lgb.model.dt.tree.Rd index fc85105eee50..d7c96001c5e4 100644 --- a/R-package/man/lgb.model.dt.tree.Rd +++ b/R-package/man/lgb.model.dt.tree.Rd @@ -47,12 +47,12 @@ dtrain <- lgb.Dataset(train$data, label = train$label) params <- list( objective = "binary" , learning_rate = 0.01 - , num_leaves = 63 - , max_depth = -1 - , min_data_in_leaf = 1 - , min_sum_hessian_in_leaf = 1 + , num_leaves = 63L + , max_depth = -1L + , min_data_in_leaf = 1L + , min_sum_hessian_in_leaf = 1.0 ) -model <- lgb.train(params, dtrain, 10) +model <- lgb.train(params, dtrain, 10L) tree_dt <- lgb.model.dt.tree(model) diff --git a/R-package/man/lgb.plot.importance.Rd b/R-package/man/lgb.plot.importance.Rd index cc419ab0ae8d..c3998398462b 100644 --- a/R-package/man/lgb.plot.importance.Rd +++ b/R-package/man/lgb.plot.importance.Rd @@ -6,9 +6,9 @@ \usage{ lgb.plot.importance( tree_imp, - top_n = 10, + top_n = 10L, measure = "Gain", - left_margin = 10, + left_margin = 10L, cex = NULL ) } @@ -42,14 +42,14 @@ dtrain <- lgb.Dataset(train$data, label = train$label) params <- list( objective = "binary" , learning_rate = 0.01 - , num_leaves = 63 - , max_depth = -1 - , min_data_in_leaf = 1 - , min_sum_hessian_in_leaf = 1 + , num_leaves = 63L + , max_depth = -1L + , min_data_in_leaf = 1L + , min_sum_hessian_in_leaf = 1.0 ) model <- lgb.train(params, dtrain, 10) tree_imp <- lgb.importance(model, percentage = TRUE) -lgb.plot.importance(tree_imp, top_n = 10, measure = "Gain") +lgb.plot.importance(tree_imp, top_n = 10L, measure = "Gain") } diff --git a/R-package/man/lgb.plot.interpretation.Rd b/R-package/man/lgb.plot.interpretation.Rd index a026d619c8f9..f5fa6497f2c0 100644 --- a/R-package/man/lgb.plot.interpretation.Rd +++ b/R-package/man/lgb.plot.interpretation.Rd @@ -6,9 +6,9 @@ \usage{ lgb.plot.interpretation( tree_interpretation_dt, - top_n = 10, - cols = 1, - left_margin = 10, + top_n = 10L, + cols = 1L, + left_margin = 10L, cex = NULL ) } @@ -35,8 +35,8 @@ contribution of a feature. Features are shown ranked in a decreasing contributio } \examples{ library(lightgbm) -Sigmoid <- function(x) {1 / (1 + exp(-x))} -Logit <- function(x) {log(x / (1 - x))} +Sigmoid <- function(x) {1.0 / (1.0 + exp(-x))} +Logit <- function(x) {log(x / (1.0 - x))} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) @@ -47,13 +47,13 @@ test <- agaricus.test params <- list( objective = "binary" , learning_rate = 0.01 - , num_leaves = 63 - , max_depth = -1 - , min_data_in_leaf = 1 - , min_sum_hessian_in_leaf = 1 + , num_leaves = 63L + , max_depth = -1L + , min_data_in_leaf = 1L + , min_sum_hessian_in_leaf = 1.0 ) -model <- lgb.train(params, dtrain, 10) +model <- lgb.train(params, dtrain, 10L) -tree_interpretation <- lgb.interprete(model, test$data, 1:5) -lgb.plot.interpretation(tree_interpretation[[1]], top_n = 10) +tree_interpretation <- lgb.interprete(model, test$data, 1L:5L) +lgb.plot.interpretation(tree_interpretation[[1L]], top_n = 10L) } diff --git a/R-package/man/lgb.prepare_rules.Rd b/R-package/man/lgb.prepare_rules.Rd index d8caa74475aa..2010a5755036 100644 --- a/R-package/man/lgb.prepare_rules.Rd +++ b/R-package/man/lgb.prepare_rules.Rd @@ -31,16 +31,16 @@ new_iris <- lgb.prepare_rules(data = iris) # Autoconverter str(new_iris$data) data(iris) # Erase iris dataset -iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA) +iris$Species[1L] <- "NEW FACTOR" # Introduce junk factor (NA) # Use conversion using known rules # Unknown factors become 0, excellent for sparse datasets newer_iris <- lgb.prepare_rules(data = iris, rules = new_iris$rules) # Unknown factor is now zero, perfect for sparse datasets -newer_iris$data[1, ] # Species became 0 as it is an unknown factor +newer_iris$data[1L, ] # Species became 0 as it is an unknown factor -newer_iris$data[1, 5] <- 1 # Put back real initial value +newer_iris$data[1L, 5L] <- 1.0 # Put back real initial value # Is the newly created dataset equal? YES! all.equal(new_iris$data, newer_iris$data) @@ -49,9 +49,9 @@ all.equal(new_iris$data, newer_iris$data) data(iris) # Erase iris dataset # We remapped values differently -personal_rules <- list(Species = c("setosa" = 3, - "versicolor" = 2, - "virginica" = 1)) +personal_rules <- list(Species = c("setosa" = 3L, + "versicolor" = 2L, + "virginica" = 1L)) newest_iris <- lgb.prepare_rules(data = iris, rules = personal_rules) str(newest_iris$data) # SUCCESS! diff --git a/R-package/man/lgb.save.Rd b/R-package/man/lgb.save.Rd index df483f6513bb..da28b866acd3 100644 --- a/R-package/man/lgb.save.Rd +++ b/R-package/man/lgb.save.Rd @@ -32,11 +32,11 @@ valids <- list(test = dtest) model <- lgb.train( params = params , data = dtrain - , nrounds = 10 + , nrounds = 10L , valids = valids - , min_data = 1 - , learning_rate = 1 - , early_stopping_rounds = 5 + , min_data = 1L + , learning_rate = 1.0 + , early_stopping_rounds = 5L ) lgb.save(model, "model.txt") diff --git a/R-package/man/lgb.train.Rd b/R-package/man/lgb.train.Rd index f7096389d428..05d8f81904b7 100644 --- a/R-package/man/lgb.train.Rd +++ b/R-package/man/lgb.train.Rd @@ -7,11 +7,11 @@ lgb.train( params = list(), data, - nrounds = 10, + nrounds = 10L, valids = list(), obj = NULL, eval = NULL, - verbose = 1, + verbose = 1L, record = TRUE, eval_freq = 1L, init_model = NULL, @@ -93,10 +93,10 @@ valids <- list(test = dtest) model <- lgb.train( params = params , data = dtrain - , nrounds = 10 + , nrounds = 10L , valids = valids - , min_data = 1 - , learning_rate = 1 - , early_stopping_rounds = 5 + , min_data = 1L + , learning_rate = 1.0 + , early_stopping_rounds = 5L ) } diff --git a/R-package/man/lgb.unloader.Rd b/R-package/man/lgb.unloader.Rd index b77046cf5644..0f06b786353c 100644 --- a/R-package/man/lgb.unloader.Rd +++ b/R-package/man/lgb.unloader.Rd @@ -21,7 +21,9 @@ environment. Defaults to \code{FALSE} which means to not remove them.} NULL invisibly. } \description{ -Attempts to unload LightGBM packages so you can remove objects cleanly without having to restart R. This is useful for instance if an object becomes stuck for no apparent reason and you do not want to restart R to fix the lost object. +Attempts to unload LightGBM packages so you can remove objects cleanly without having to restart R. +This is useful for instance if an object becomes stuck for no apparent reason and you do not want +to restart R to fix the lost object. } \examples{ library(lightgbm) @@ -36,11 +38,11 @@ valids <- list(test = dtest) model <- lgb.train( params = params , data = dtrain - , nrounds = 10 + , nrounds = 10L , valids = valids - , min_data = 1 - , learning_rate = 1 - , early_stopping_rounds = 5 + , min_data = 1L + , learning_rate = 1.0 + , early_stopping_rounds = 5L ) \dontrun{ diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd index d85a922e67c3..f7b0fb2982c5 100644 --- a/R-package/man/lightgbm.Rd +++ b/R-package/man/lightgbm.Rd @@ -9,8 +9,8 @@ lightgbm( label = NULL, weight = NULL, params = list(), - nrounds = 10, - verbose = 1, + nrounds = 10L, + verbose = 1L, eval_freq = 1L, early_stopping_rounds = NULL, save_name = "lightgbm.model", diff --git a/R-package/man/predict.lgb.Booster.Rd b/R-package/man/predict.lgb.Booster.Rd index e2359c225390..3768c5cc5e93 100644 --- a/R-package/man/predict.lgb.Booster.Rd +++ b/R-package/man/predict.lgb.Booster.Rd @@ -41,12 +41,12 @@ the \code{lgb.Booster} object passed to \code{object}.} } \value{ For regression or binary classification, it returns a vector of length \code{nrows(data)}. -For multiclass classification, either a \code{num_class * nrows(data)} vector or -a \code{(nrows(data), num_class)} dimension matrix is returned, depending on -the \code{reshape} value. + For multiclass classification, either a \code{num_class * nrows(data)} vector or + a \code{(nrows(data), num_class)} dimension matrix is returned, depending on + the \code{reshape} value. -When \code{predleaf = TRUE}, the output is a matrix object with the -number of columns corresponding to the number of trees. + When \code{predleaf = TRUE}, the output is a matrix object with the + number of columns corresponding to the number of trees. } \description{ Predicted values based on class \code{lgb.Booster} @@ -64,11 +64,11 @@ valids <- list(test = dtest) model <- lgb.train( params = params , data = dtrain - , nrounds = 10 + , nrounds = 10L , valids = valids - , min_data = 1 - , learning_rate = 1 - , early_stopping_rounds = 5 + , min_data = 1L + , learning_rate = 1.0 + , early_stopping_rounds = 5L ) preds <- predict(model, test$data) diff --git a/R-package/man/readRDS.lgb.Booster.Rd b/R-package/man/readRDS.lgb.Booster.Rd index a8fc219ffd95..80ad28efb421 100644 --- a/R-package/man/readRDS.lgb.Booster.Rd +++ b/R-package/man/readRDS.lgb.Booster.Rd @@ -30,11 +30,11 @@ valids <- list(test = dtest) model <- lgb.train( params = params , data = dtrain - , nrounds = 10 + , nrounds = 10L , valids = valids - , min_data = 1 - , learning_rate = 1 - , early_stopping_rounds = 5 + , min_data = 1L + , learning_rate = 1.0 + , early_stopping_rounds = 5L ) saveRDS.lgb.Booster(model, "model.rds") new_model <- readRDS.lgb.Booster("model.rds") diff --git a/R-package/man/saveRDS.lgb.Booster.Rd b/R-package/man/saveRDS.lgb.Booster.Rd index afa639d4245c..f2eeef31fbf3 100644 --- a/R-package/man/saveRDS.lgb.Booster.Rd +++ b/R-package/man/saveRDS.lgb.Booster.Rd @@ -54,11 +54,11 @@ valids <- list(test = dtest) model <- lgb.train( params = params , data = dtrain - , nrounds = 10 + , nrounds = 10L , valids = valids - , min_data = 1 - , learning_rate = 1 - , early_stopping_rounds = 5 + , min_data = 1L + , learning_rate = 1.0 + , early_stopping_rounds = 5L ) saveRDS.lgb.Booster(model, "model.rds") } diff --git a/R-package/man/setinfo.Rd b/R-package/man/setinfo.Rd index 5b0a23f6992b..861e5f0219ef 100644 --- a/R-package/man/setinfo.Rd +++ b/R-package/man/setinfo.Rd @@ -12,11 +12,11 @@ setinfo(dataset, ...) \arguments{ \item{dataset}{Object of class \code{lgb.Dataset}} +\item{...}{other parameters} + \item{name}{the name of the field to get} \item{info}{the specific field of information to set} - -\item{...}{other parameters} } \value{ passed object diff --git a/R-package/man/slice.Rd b/R-package/man/slice.Rd index e13aacaf4034..53a1d2888b00 100644 --- a/R-package/man/slice.Rd +++ b/R-package/man/slice.Rd @@ -12,9 +12,9 @@ slice(dataset, ...) \arguments{ \item{dataset}{Object of class \code{lgb.Dataset}} -\item{idxset}{an integer vector of indices of rows needed} - \item{...}{other parameters (currently not used)} + +\item{idxset}{an integer vector of indices of rows needed} } \value{ constructed sub dataset @@ -29,7 +29,7 @@ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) -dsub <- lightgbm::slice(dtrain, 1:42) +dsub <- lightgbm::slice(dtrain, seq_len(42L)) lgb.Dataset.construct(dsub) labels <- lightgbm::getinfo(dsub, "label") diff --git a/R-package/src/install.libs.R b/R-package/src/install.libs.R index 2ab2a6a77af0..b731ed60f706 100644 --- a/R-package/src/install.libs.R +++ b/R-package/src/install.libs.R @@ -3,20 +3,20 @@ use_precompile <- FALSE use_gpu <- FALSE use_mingw <- FALSE -if (.Machine$sizeof.pointer != 8){ +if (.Machine$sizeof.pointer != 8L) { stop("Only support 64-bit R, please check your the version of your R and Rtools.") } R_int_UUID <- .Internal(internalsID()) -R_ver <- as.double(R.Version()$major) + as.double(R.Version()$minor) / 10 +R_ver <- as.double(R.Version()$major) + as.double(R.Version()$minor) / 10.0 if (!(R_int_UUID == "0310d4b8-ccb1-4bb8-ba94-d36a55f60262" - || R_int_UUID == "2fdf6c18-697a-4ba7-b8ef-11c0d92f1327")){ + || R_int_UUID == "2fdf6c18-697a-4ba7-b8ef-11c0d92f1327")) { print("Warning: unmatched R_INTERNALS_UUID, may cannot run normally.") } # Move in CMakeLists.txt -if (!file.copy("../inst/bin/CMakeLists.txt", "CMakeLists.txt", overwrite = TRUE)){ +if (!file.copy("../inst/bin/CMakeLists.txt", "CMakeLists.txt", overwrite = TRUE)) { stop("Copying CMakeLists failed") } @@ -48,14 +48,14 @@ if (!use_precompile) { # Using this kind-of complicated pattern to avoid matching to # things like "pgcc" using_gcc <- grepl( - pattern = '^gcc$|[/\\]+gcc$|^gcc\\-[0-9]+$|[/\\]+gcc\\-[0-9]+$' - , x = Sys.getenv('CC', '') + pattern = "^gcc$|[/\\]+gcc$|^gcc\\-[0-9]+$|[/\\]+gcc\\-[0-9]+$" + , x = Sys.getenv("CC", "") ) using_gpp <- grepl( - pattern = '^g\\+\\+$|[/\\]+g\\+\\+$|^g\\+\\+\\-[0-9]+$|[/\\]+g\\+\\+\\-[0-9]+$' - , x = Sys.getenv('CXX', '') + pattern = "^g\\+\\+$|[/\\]+g\\+\\+$|^g\\+\\+\\-[0-9]+$|[/\\]+g\\+\\+\\-[0-9]+$" + , x = Sys.getenv("CXX", "") ) - on_mac <- Sys.info()['sysname'] == 'Darwin' + on_mac <- Sys.info()["sysname"] == "Darwin" if (on_mac && !(using_gcc & using_gpp)) { cmake_cmd <- paste(cmake_cmd, ' -DOpenMP_C_FLAGS="-Xpreprocessor -fopenmp -I$(brew --prefix libomp)/include" ') cmake_cmd <- paste(cmake_cmd, ' -DOpenMP_C_LIB_NAMES="omp" ') @@ -71,21 +71,21 @@ if (!use_precompile) { build_cmd <- "mingw32-make.exe _lightgbm" system(paste0(cmake_cmd, " ..")) # Must build twice for Windows due sh.exe in Rtools } else { - try_vs <- 0 + try_vs <- 0L local_vs_def <- "" vs_versions <- c("Visual Studio 16 2019", "Visual Studio 15 2017", "Visual Studio 14 2015") - for (vs in vs_versions){ + for (vs in vs_versions) { vs_def <- paste0(" -G \"", vs, "\" -A x64") tmp_cmake_cmd <- paste0(cmake_cmd, vs_def) try_vs <- system(paste0(tmp_cmake_cmd, " ..")) - if (try_vs == 0) { - local_vs_def = vs_def + if (try_vs == 0L) { + local_vs_def <- vs_def break } else { unlink("./*", recursive = TRUE) # Clean up build directory } } - if (try_vs == 1) { + if (try_vs == 1L) { cmake_cmd <- paste0(cmake_cmd, " -G \"MinGW Makefiles\" ") # Switch to MinGW on failure, try build once system(paste0(cmake_cmd, " ..")) # Must build twice for Windows due sh.exe in Rtools build_cmd <- "mingw32-make.exe _lightgbm" diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 90925196a5f1..af6aeb651d8f 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -1,18 +1,18 @@ context("basic functions") -data(agaricus.train, package = 'lightgbm') -data(agaricus.test, package = 'lightgbm') +data(agaricus.train, package = "lightgbm") +data(agaricus.test, package = "lightgbm") train <- agaricus.train test <- agaricus.test -windows_flag = grepl('Windows', Sys.info()[['sysname']]) +windows_flag <- grepl("Windows", Sys.info()[["sysname"]]) test_that("train and predict binary classification", { - nrounds = 10 + nrounds <- 10L bst <- lightgbm( data = train$data , label = train$label - , num_leaves = 5 + , num_leaves = 5L , nrounds = nrounds , objective = "binary" , metric = "binary_error" @@ -22,38 +22,38 @@ test_that("train and predict binary classification", { expect_lt(min(record_results), 0.02) pred <- predict(bst, test$data) - expect_equal(length(pred), 1611) + expect_equal(length(pred), 1611L) - pred1 <- predict(bst, train$data, num_iteration = 1) - expect_equal(length(pred1), 6513) - err_pred1 <- sum( (pred1 > 0.5) != train$label) / length(train$label) - err_log <- record_results[1] + pred1 <- predict(bst, train$data, num_iteration = 1L) + expect_equal(length(pred1), 6513L) + err_pred1 <- sum((pred1 > 0.5) != train$label) / length(train$label) + err_log <- record_results[1L] expect_lt(abs(err_pred1 - err_log), 10e-6) }) test_that("train and predict softmax", { - lb <- as.numeric(iris$Species) - 1 + lb <- as.numeric(iris$Species) - 1L bst <- lightgbm( - data = as.matrix(iris[, -5]) + data = as.matrix(iris[, -5L]) , label = lb - , num_leaves = 4 + , num_leaves = 4L , learning_rate = 0.1 - , nrounds = 20 - , min_data = 20 - , min_hess = 20 + , nrounds = 20L + , min_data = 20L + , min_hess = 20.0 , objective = "multiclass" , metric = "multi_error" - , num_class = 3 + , num_class = 3L ) expect_false(is.null(bst$record_evals)) record_results <- lgb.get.eval.result(bst, "train", "multi_error") expect_lt(min(record_results), 0.03) - pred <- predict(bst, as.matrix(iris[, -5])) - expect_equal(length(pred), nrow(iris) * 3) + pred <- predict(bst, as.matrix(iris[, -5L])) + expect_equal(length(pred), nrow(iris) * 3L) }) @@ -61,11 +61,11 @@ test_that("use of multiple eval metrics works", { bst <- lightgbm( data = train$data , label = train$label - , num_leaves = 4 - , learning_rate = 1 - , nrounds = 10 + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 10L , objective = "binary" - , metric = list("binary_error","auc","binary_logloss") + , metric = list("binary_error", "auc", "binary_logloss") ) expect_false(is.null(bst$record_evals)) }) @@ -78,28 +78,28 @@ test_that("training continuation works", { , label = train$label , free_raw_data = FALSE ) - watchlist = list(train = dtrain) + watchlist <- list(train = dtrain) param <- list( objective = "binary" , metric = "binary_logloss" - , num_leaves = 5 - , learning_rate = 1 + , num_leaves = 5L + , learning_rate = 1.0 ) # for the reference, use 10 iterations at once: - bst <- lgb.train(param, dtrain, nrounds = 10, watchlist) - err_bst <- lgb.get.eval.result(bst, "train", "binary_logloss", 10) + bst <- lgb.train(param, dtrain, nrounds = 10L, watchlist) + err_bst <- lgb.get.eval.result(bst, "train", "binary_logloss", 10L) # first 5 iterations: - bst1 <- lgb.train(param, dtrain, nrounds = 5, watchlist) + bst1 <- lgb.train(param, dtrain, nrounds = 5L, watchlist) # test continuing from a model in file lgb.save(bst1, "lightgbm.model") # continue for 5 more: - bst2 <- lgb.train(param, dtrain, nrounds = 5, watchlist, init_model = bst1) - err_bst2 <- lgb.get.eval.result(bst2, "train", "binary_logloss", 10) + bst2 <- lgb.train(param, dtrain, nrounds = 5L, watchlist, init_model = bst1) + err_bst2 <- lgb.get.eval.result(bst2, "train", "binary_logloss", 10L) expect_lt(abs(err_bst - err_bst2), 0.01) - bst2 <- lgb.train(param, dtrain, nrounds = 5, watchlist, init_model = "lightgbm.model") - err_bst2 <- lgb.get.eval.result(bst2, "train", "binary_logloss", 10) + bst2 <- lgb.train(param, dtrain, nrounds = 5L, watchlist, init_model = "lightgbm.model") + err_bst2 <- lgb.get.eval.result(bst2, "train", "binary_logloss", 10L) expect_lt(abs(err_bst - err_bst2), 0.01) }) @@ -110,11 +110,11 @@ test_that("cv works", { bst <- lgb.cv( params , dtrain - , 10 - , nfold = 5 - , min_data = 1 - , learning_rate = 1 - , early_stopping_rounds = 10 + , 10L + , nfold = 5L + , min_data = 1L + , learning_rate = 1.0 + , early_stopping_rounds = 10L ) expect_false(is.null(bst$record_evals)) }) diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R index 4d0d52d73251..7b4757b86587 100644 --- a/R-package/tests/testthat/test_custom_objective.R +++ b/R-package/tests/testthat/test_custom_objective.R @@ -1,22 +1,22 @@ -context('Test models with custom objective') +context("Test models with custom objective") -data(agaricus.train, package = 'lightgbm') -data(agaricus.test, package = 'lightgbm') +data(agaricus.train, package = "lightgbm") +data(agaricus.test, package = "lightgbm") dtrain <- lgb.Dataset(agaricus.train$data, label = agaricus.train$label) dtest <- lgb.Dataset(agaricus.test$data, label = agaricus.test$label) watchlist <- list(eval = dtest, train = dtrain) logregobj <- function(preds, dtrain) { labels <- getinfo(dtrain, "label") - preds <- 1 / (1 + exp(-preds)) + preds <- 1.0 / (1.0 + exp(-preds)) grad <- preds - labels - hess <- preds * (1 - preds) + hess <- preds * (1.0 - preds) return(list(grad = grad, hess = hess)) } evalerror <- function(preds, dtrain) { labels <- getinfo(dtrain, "label") - err <- as.numeric(sum(labels != (preds > 0))) / length(labels) + err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels) return(list( name = "error" , value = err @@ -25,12 +25,12 @@ evalerror <- function(preds, dtrain) { } param <- list( - num_leaves = 8 - , learning_rate = 1 + num_leaves = 8L + , learning_rate = 1.0 , objective = logregobj , metric = "auc" ) -num_round <- 10 +num_round <- 10L test_that("custom objective works", { bst <- lgb.train(param, dtrain, num_round, watchlist, eval = evalerror) diff --git a/R-package/tests/testthat/test_dataset.R b/R-package/tests/testthat/test_dataset.R index 6bad414bc010..6a72a993e953 100644 --- a/R-package/tests/testthat/test_dataset.R +++ b/R-package/tests/testthat/test_dataset.R @@ -3,49 +3,49 @@ require(Matrix) context("testing lgb.Dataset functionality") -data(agaricus.test, package = 'lightgbm') -test_data <- agaricus.test$data[1:100,] -test_label <- agaricus.test$label[1:100] +data(agaricus.test, package = "lightgbm") +test_data <- agaricus.test$data[1L:100L, ] +test_label <- agaricus.test$label[1L:100L] test_that("lgb.Dataset: basic construction, saving, loading", { # from sparse matrix dtest1 <- lgb.Dataset(test_data, label = test_label) # from dense matrix dtest2 <- lgb.Dataset(as.matrix(test_data), label = test_label) - expect_equal(getinfo(dtest1, 'label'), getinfo(dtest2, 'label')) + expect_equal(getinfo(dtest1, "label"), getinfo(dtest2, "label")) # save to a local file - tmp_file <- tempfile('lgb.Dataset_') + tmp_file <- tempfile("lgb.Dataset_") lgb.Dataset.save(dtest1, tmp_file) # read from a local file dtest3 <- lgb.Dataset(tmp_file) lgb.Dataset.construct(dtest3) unlink(tmp_file) - expect_equal(getinfo(dtest1, 'label'), getinfo(dtest3, 'label')) + expect_equal(getinfo(dtest1, "label"), getinfo(dtest3, "label")) }) test_that("lgb.Dataset: getinfo & setinfo", { dtest <- lgb.Dataset(test_data) dtest$construct() - setinfo(dtest, 'label', test_label) - labels <- getinfo(dtest, 'label') - expect_equal(test_label, getinfo(dtest, 'label')) + setinfo(dtest, "label", test_label) + labels <- getinfo(dtest, "label") + expect_equal(test_label, getinfo(dtest, "label")) - expect_true(length(getinfo(dtest, 'weight')) == 0) - expect_true(length(getinfo(dtest, 'init_score')) == 0) + expect_true(length(getinfo(dtest, "weight")) == 0L) + expect_true(length(getinfo(dtest, "init_score")) == 0L) # any other label should error - expect_error(setinfo(dtest, 'asdf', test_label)) + expect_error(setinfo(dtest, "asdf", test_label)) }) test_that("lgb.Dataset: slice, dim", { dtest <- lgb.Dataset(test_data, label = test_label) lgb.Dataset.construct(dtest) expect_equal(dim(dtest), dim(test_data)) - dsub1 <- slice(dtest, 1:42) + dsub1 <- slice(dtest, seq_len(42L)) lgb.Dataset.construct(dsub1) - expect_equal(nrow(dsub1), 42) + expect_equal(nrow(dsub1), 42L) expect_equal(ncol(dsub1), ncol(test_data)) }) @@ -54,15 +54,17 @@ test_that("lgb.Dataset: colnames", { expect_equal(colnames(dtest), colnames(test_data)) lgb.Dataset.construct(dtest) expect_equal(colnames(dtest), colnames(test_data)) - expect_error( colnames(dtest) <- 'asdf') - new_names <- make.names(1:ncol(test_data)) + expect_error({ + colnames(dtest) <- "asdf" + }) + new_names <- make.names(seq_len(ncol(test_data))) expect_silent(colnames(dtest) <- new_names) expect_equal(colnames(dtest), new_names) }) test_that("lgb.Dataset: nrow is correct for a very sparse matrix", { - nr <- 1000 - x <- Matrix::rsparsematrix(nr, 100, density = 0.0005) + nr <- 1000L + x <- Matrix::rsparsematrix(nr, 100L, density = 0.0005) # we want it very sparse, so that last rows are empty expect_lt(max(x@i), nr) dtest <- lgb.Dataset(x) @@ -70,7 +72,7 @@ test_that("lgb.Dataset: nrow is correct for a very sparse matrix", { }) test_that("lgb.Dataset: Dataset should be able to construct from matrix and return non-null handle", { - rawData <- matrix(runif(1000), ncol = 10) + rawData <- matrix(runif(1000L), ncol = 10L) handle <- NA_real_ ref_handle <- NULL handle <- lightgbm:::lgb.call( diff --git a/R-package/tests/testthat/test_lgb.importance.R b/R-package/tests/testthat/test_lgb.importance.R index 9a0d1e9c3e32..c0e1d6e8ca82 100644 --- a/R-package/tests/testthat/test_lgb.importance.R +++ b/R-package/tests/testthat/test_lgb.importance.R @@ -10,28 +10,28 @@ test_that("lgb.importance() should reject bad inputs", { , -10L:10L , list(c("a", "b", "c")) , data.frame( - x = rnorm(20) + x = rnorm(20L) , y = sample( - x = c(1, 2) - , size = 20 + x = c(1L, 2L) + , size = 20L , replace = TRUE ) ) , data.table::data.table( - x = rnorm(20) + x = rnorm(20L) , y = sample( - x = c(1, 2) - , size = 20 + x = c(1L, 2L) + , size = 20L , replace = TRUE ) ) , lgb.Dataset( - data = matrix(rnorm(100), ncol = 2) - , label = matrix(sample(c(0, 1), 50, replace = TRUE)) + data = matrix(rnorm(100L), ncol = 2L) + , label = matrix(sample(c(0L, 1L), 50L, replace = TRUE)) ) , "lightgbm.model" ) - for (input in bad_inputs){ + for (input in bad_inputs) { expect_error({ lgb.importance(input) }, regexp = "'model' has to be an object of class lgb\\.Booster") diff --git a/R-package/tests/testthat/test_lgb.interprete.R b/R-package/tests/testthat/test_lgb.interprete.R index e4656b9879b7..e4664710d9cb 100644 --- a/R-package/tests/testthat/test_lgb.interprete.R +++ b/R-package/tests/testthat/test_lgb.interprete.R @@ -1,10 +1,10 @@ context("lgb.interpete") -.sigmoid <- function(x){ - 1 / (1 + exp(-x)) +.sigmoid <- function(x) { + 1.0 / (1.0 + exp(-x)) } -.logit <- function(x){ - log(x / (1 - x)) +.logit <- function(x) { + log(x / (1.0 - x)) } test_that("lgb.intereprete works as expected for binary classification", { @@ -24,21 +24,21 @@ test_that("lgb.intereprete works as expected for binary classification", { params <- list( objective = "binary" , learning_rate = 0.01 - , num_leaves = 63 - , max_depth = -1 - , min_data_in_leaf = 1 - , min_sum_hessian_in_leaf = 1 + , num_leaves = 63L + , max_depth = -1L + , min_data_in_leaf = 1L + , min_sum_hessian_in_leaf = 1.0 ) model <- lgb.train( params = params , data = dtrain - , nrounds = 10 + , nrounds = 10L ) - num_trees <- 5 + num_trees <- 5L tree_interpretation <- lgb.interprete( model = model , data = test$data - , idxset = 1:num_trees + , idxset = seq_len(num_trees) ) expect_true(methods::is(tree_interpretation, "list")) expect_true(length(tree_interpretation) == num_trees) @@ -46,7 +46,7 @@ test_that("lgb.intereprete works as expected for binary classification", { expect_true(all( sapply( X = tree_interpretation - , FUN = function(treeDT){ + , FUN = function(treeDT) { checks <- c( data.table::is.data.table(treeDT) , identical(names(treeDT), c("Feature", "Contribution")) @@ -65,31 +65,31 @@ test_that("lgb.intereprete works as expected for multiclass classification", { # We must convert factors to numeric # They must be starting from number 0 to use multiclass # For instance: 0, 1, 2, 3, 4, 5... - iris$Species <- as.numeric(as.factor(iris$Species)) - 1 + iris$Species <- as.numeric(as.factor(iris$Species)) - 1L # Create imbalanced training data (20, 30, 40 examples for classes 0, 1, 2) - train <- as.matrix(iris[c(1:20, 51:80, 101:140), ]) + train <- as.matrix(iris[c(1L:20L, 51L:80L, 101L:140L), ]) # The 10 last samples of each class are for validation - test <- as.matrix(iris[c(41:50, 91:100, 141:150), ]) - dtrain <- lgb.Dataset(data = train[, 1:4], label = train[, 5]) - dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1:4], label = test[, 5]) + test <- as.matrix(iris[c(41L:50L, 91L:100L, 141L:150L), ]) + dtrain <- lgb.Dataset(data = train[, 1L:4L], label = train[, 5L]) + dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1L:4L], label = test[, 5L]) params <- list( objective = "multiclass" , metric = "multi_logloss" - , num_class = 3 + , num_class = 3L , learning_rate = 0.00001 ) model <- lgb.train( params = params , data = dtrain - , nrounds = 10 - , min_data = 1 + , nrounds = 10L + , min_data = 1L ) - num_trees <- 5 + num_trees <- 5L tree_interpretation <- lgb.interprete( model = model - , data = test[, 1:4] - , idxset = 1:num_trees + , data = test[, 1L:4L] + , idxset = seq_len(num_trees) ) expect_true(methods::is(tree_interpretation, "list")) expect_true(length(tree_interpretation) == num_trees) @@ -97,7 +97,7 @@ test_that("lgb.intereprete works as expected for multiclass classification", { expect_true(all( sapply( X = tree_interpretation - , FUN = function(treeDT){ + , FUN = function(treeDT) { checks <- c( data.table::is.data.table(treeDT) , identical(names(treeDT), c("Feature", "Class 0", "Class 1", "Class 2")) diff --git a/R-package/tests/testthat/test_lgb.plot.interpretation.R b/R-package/tests/testthat/test_lgb.plot.interpretation.R index 9332c01a07bd..906f036050f2 100644 --- a/R-package/tests/testthat/test_lgb.plot.interpretation.R +++ b/R-package/tests/testthat/test_lgb.plot.interpretation.R @@ -1,10 +1,10 @@ context("lgb.plot.interpretation") -.sigmoid <- function(x){ - 1 / (1 + exp(-x)) +.sigmoid <- function(x) { + 1.0 / (1.0 + exp(-x)) } -.logit <- function(x){ - log(x / (1 - x)) +.logit <- function(x) { + log(x / (1.0 - x)) } test_that("lgb.plot.interepretation works as expected for binary classification", { @@ -24,34 +24,34 @@ test_that("lgb.plot.interepretation works as expected for binary classification" params <- list( objective = "binary" , learning_rate = 0.01 - , num_leaves = 63 - , max_depth = -1 - , min_data_in_leaf = 1 - , min_sum_hessian_in_leaf = 1 + , num_leaves = 63L + , max_depth = -1L + , min_data_in_leaf = 1L + , min_sum_hessian_in_leaf = 1.0 ) model <- lgb.train( params = params , data = dtrain - , nrounds = 10 + , nrounds = 10L ) - num_trees <- 5 + num_trees <- 5L tree_interpretation <- lgb.interprete( model = model , data = test$data - , idxset = 1:num_trees + , idxset = seq_len(num_trees) ) expect_true({ lgb.plot.interpretation( - tree_interpretation_dt = tree_interpretation[[1]] - , top_n = 5 + tree_interpretation_dt = tree_interpretation[[1L]] + , top_n = 5L ) TRUE }) # should also work when you explicitly pass cex plot_res <- lgb.plot.interpretation( - tree_interpretation_dt = tree_interpretation[[1]] - , top_n = 5 + tree_interpretation_dt = tree_interpretation[[1L]] + , top_n = 5L , cex = 0.95 ) expect_null(plot_res) @@ -63,35 +63,35 @@ test_that("lgb.plot.interepretation works as expected for multiclass classificat # We must convert factors to numeric # They must be starting from number 0 to use multiclass # For instance: 0, 1, 2, 3, 4, 5... - iris$Species <- as.numeric(as.factor(iris$Species)) - 1 + iris$Species <- as.numeric(as.factor(iris$Species)) - 1L # Create imbalanced training data (20, 30, 40 examples for classes 0, 1, 2) - train <- as.matrix(iris[c(1:20, 51:80, 101:140), ]) + train <- as.matrix(iris[c(1L:20L, 51L:80L, 101L:140L), ]) # The 10 last samples of each class are for validation - test <- as.matrix(iris[c(41:50, 91:100, 141:150), ]) - dtrain <- lgb.Dataset(data = train[, 1:4], label = train[, 5]) - dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1:4], label = test[, 5]) + test <- as.matrix(iris[c(41L:50L, 91L:100L, 141L:150L), ]) + dtrain <- lgb.Dataset(data = train[, 1L:4L], label = train[, 5L]) + dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1L:4L], label = test[, 5L]) params <- list( objective = "multiclass" , metric = "multi_logloss" - , num_class = 3 + , num_class = 3L , learning_rate = 0.00001 ) model <- lgb.train( params = params , data = dtrain - , nrounds = 10 - , min_data = 1 + , nrounds = 10L + , min_data = 1L ) - num_trees <- 5 + num_trees <- 5L tree_interpretation <- lgb.interprete( model = model - , data = test[, 1:4] - , idxset = 1:num_trees + , data = test[, 1L:4L] + , idxset = seq_len(num_trees) ) plot_res <- lgb.plot.interpretation( - tree_interpretation_dt = tree_interpretation[[1]] - , top_n = 5 + tree_interpretation_dt = tree_interpretation[[1L]] + , top_n = 5L ) expect_null(plot_res) }) diff --git a/R-package/tests/testthat/test_parameters.R b/R-package/tests/testthat/test_parameters.R index 83aa4ce9f4a9..63fc124e57f8 100644 --- a/R-package/tests/testthat/test_parameters.R +++ b/R-package/tests/testthat/test_parameters.R @@ -1,29 +1,29 @@ context("feature penalties") -data(agaricus.train, package = 'lightgbm') -data(agaricus.test, package = 'lightgbm') +data(agaricus.train, package = "lightgbm") +data(agaricus.test, package = "lightgbm") train <- agaricus.train test <- agaricus.test test_that("Feature penalties work properly", { # Fit a series of models with varying penalty on most important variable var_name <- "odor=none" - var_index <- which(train$data@Dimnames[[2]] == var_name) + var_index <- which(train$data@Dimnames[[2L]] == var_name) - bst <- lapply(seq(1, 0, by = -0.1), function(x) { - feature_penalties <- rep(1, ncol(train$data)) + bst <- lapply(seq(1.0, 0.0, by = -0.1), function(x) { + feature_penalties <- rep(1.0, ncol(train$data)) feature_penalties[var_index] <- x lightgbm( data = train$data , label = train$label - , num_leaves = 5 + , num_leaves = 5L , learning_rate = 0.05 - , nrounds = 20 + , nrounds = 20L , objective = "binary" , feature_penalty = paste0(feature_penalties, collapse = ",") , metric = "binary_error" - , verbose = -1 + , verbose = -1L ) }) @@ -32,16 +32,16 @@ test_that("Feature penalties work properly", { var_freq <- lapply(bst, function(x) lgb.importance(x)[Feature == var_name, Frequency]) # Ensure that feature gain, cover, and frequency decreases with stronger penalties - expect_true(all(diff(unlist(var_gain)) <= 0)) - expect_true(all(diff(unlist(var_cover)) <= 0)) - expect_true(all(diff(unlist(var_freq)) <= 0)) + expect_true(all(diff(unlist(var_gain)) <= 0.0)) + expect_true(all(diff(unlist(var_cover)) <= 0.0)) + expect_true(all(diff(unlist(var_freq)) <= 0.0)) - expect_lt(min(diff(unlist(var_gain))), 0) - expect_lt(min(diff(unlist(var_cover))), 0) - expect_lt(min(diff(unlist(var_freq))), 0) + expect_lt(min(diff(unlist(var_gain))), 0.0) + expect_lt(min(diff(unlist(var_cover))), 0.0) + expect_lt(min(diff(unlist(var_freq))), 0.0) # Ensure that feature is not used when feature_penalty = 0 - expect_length(var_gain[[length(var_gain)]], 0) + expect_length(var_gain[[length(var_gain)]], 0L) }) expect_true(".PARAMETER_ALIASES() returns a named list", { @@ -56,17 +56,17 @@ expect_true(".PARAMETER_ALIASES() returns a named list", { }) expect_true("training should warn if you use 'dart' boosting, specified with 'boosting' or aliases", { - for (boosting_param in .PARAMETER_ALIASES()[["boosting"]]){ + for (boosting_param in .PARAMETER_ALIASES()[["boosting"]]) { expect_warning({ result <- lightgbm( data = train$data , label = train$label - , num_leaves = 5 + , num_leaves = 5L , learning_rate = 0.05 - , nrounds = 5 + , nrounds = 5L , objective = "binary" , metric = "binary_error" - , verbose = -1 + , verbose = -1L , params = stats::setNames( object = "dart" , nm = boosting_param diff --git a/build_r.R b/build_r.R index 8a9924c75df7..74b025847366 100644 --- a/build_r.R +++ b/build_r.R @@ -15,9 +15,9 @@ # system() will not raise an R exception if the process called # fails. Wrapping it here to get that behavior -.run_shell_command <- function(cmd, ...){ +.run_shell_command <- function(cmd, ...) { exit_code <- system(cmd, ...) - if (exit_code != 0){ + if (exit_code != 0L) { stop(paste0("Command failed with exit code: ", exit_code)) } } @@ -27,33 +27,43 @@ unlink(x = "lightgbm_r", recursive = TRUE) dir.create("lightgbm_r") # copy in the relevant files -result <- file.copy(from = "R-package/./", - to = "lightgbm_r/", - recursive = TRUE, - overwrite = TRUE) +result <- file.copy( + from = "R-package/./" + , to = "lightgbm_r/" + , recursive = TRUE + , overwrite = TRUE +) .handle_result(result) -result <- file.copy(from = "include/", - to = file.path("lightgbm_r", "src/"), - recursive = TRUE, - overwrite = TRUE) +result <- file.copy( + from = "include/" + , to = file.path("lightgbm_r", "src/") + , recursive = TRUE + , overwrite = TRUE +) .handle_result(result) -result <- file.copy(from = "src/", - to = file.path("lightgbm_r", "src/"), - recursive = TRUE, - overwrite = TRUE) +result <- file.copy( + from = "src/" + , to = file.path("lightgbm_r", "src/") + , recursive = TRUE + , overwrite = TRUE +) .handle_result(result) -result <- file.copy(from = "compute/", - to = file.path("lightgbm_r", "src/"), - recursive = TRUE, - overwrite = TRUE) +result <- file.copy( + from = "compute/" + , to = file.path("lightgbm_r", "src/") + , recursive = TRUE + , overwrite = TRUE +) .handle_result(result) -result <- file.copy(from = "CMakeLists.txt", - to = file.path("lightgbm_r", "inst", "bin/"), - overwrite = TRUE) +result <- file.copy( + from = "CMakeLists.txt" + , to = file.path("lightgbm_r", "inst", "bin/") + , overwrite = TRUE +) .handle_result(result) # Build the package (do not touch this line!) @@ -68,15 +78,12 @@ version <- gsub( "Version: ", "", grep( - "Version: ", - readLines(con = file.path("lightgbm_r", "DESCRIPTION")), - value = TRUE + "Version: " + , readLines(con = file.path("lightgbm_r", "DESCRIPTION")) + , value = TRUE ) ) tarball <- file.path(getwd(), sprintf("lightgbm_%s.tar.gz", version)) cmd <- sprintf("R CMD INSTALL %s --no-multiarch", tarball) .run_shell_command(cmd) - -# Run R CMD CHECK -# R CMD CHECK lightgbm_2.1.2.tar.gz --as-cran | tee check.log | cat diff --git a/build_r_site.R b/build_r_site.R index fd2f95bc7c7d..7a2435ea0e8b 100644 --- a/build_r_site.R +++ b/build_r_site.R @@ -9,9 +9,11 @@ devtools::document() clean_site() init_site() build_home(preview = FALSE, quiet = FALSE) -build_reference(lazy = FALSE, - document = FALSE, - examples = TRUE, - run_dont_run = FALSE, - seed = 42, - preview = FALSE) +build_reference( + lazy = FALSE + , document = FALSE + , examples = TRUE + , run_dont_run = FALSE + , seed = 42L + , preview = FALSE +)