From dd51fa372f1ff0b4ab162b16c53da3bb4e9f88ad Mon Sep 17 00:00:00 2001 From: Philip Cho Date: Wed, 31 Oct 2018 16:53:50 -0700 Subject: [PATCH 01/11] Clean up logic for converting tree_method to updater sequence --- src/learner.cc | 120 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 85 insertions(+), 35 deletions(-) diff --git a/src/learner.cc b/src/learner.cc index d73c8b18e2d2..5dd79a3bd366 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -154,37 +154,43 @@ class LearnerImpl : public Learner { } void ConfigureUpdaters() { - if (tparam_.tree_method == 0 || tparam_.tree_method == 1 || - tparam_.tree_method == 2) { - if (cfg_.count("updater") == 0) { - if (tparam_.dsplit == 1) { - cfg_["updater"] = "distcol"; - } else if (tparam_.dsplit == 2) { - cfg_["updater"] = "grow_histmaker,prune"; - } - } - } else if (tparam_.tree_method == 3) { - /* histogram-based algorithm */ - LOG(CONSOLE) << "Tree method is selected to be \'hist\', which uses a " - "single updater " - << "grow_fast_histmaker."; + /* Choose updaters according to tree_method parameters */ + if (cfg_.count("updater") > 0) { + LOG(CONSOLE) << "DANGER AHEAD: You have manually specified `updater` " + "parameter. The `tree_method` parameter will be ignored. " + "Incorrect sequence of updaters will produce undefined " + "behavior. For common uses, we recommend using " + "`tree_method` parameter instead."; + return; + } + + if (tparam_.tree_method == 0) { // tree_method='auto' + // Use heuristic to choose between 'exact' and 'approx' + // This choice is deferred to PerformTreeMethodHeuristic(). + ; + } else if (tparam_.tree_method == 1) { // tree_method='approx' + cfg_["updater"] = "grow_histmaker,prune"; + } else if (tparam_.tree_method == 2) { // tree_method='exact' + cfg_["updater"] = "grow_colmaker,prune"; + } else if (tparam_.tree_method == 3) { // tree_method='hist' + LOG(CONSOLE) << "Tree method is selected to be 'hist', which uses a " + "single updater grow_fast_histmaker."; cfg_["updater"] = "grow_fast_histmaker"; - } else if (tparam_.tree_method == 4) { + } else if (tparam_.tree_method == 4) { // tree_method='gpu_exact' this->AssertGPUSupport(); - if (cfg_.count("updater") == 0) { - cfg_["updater"] = "grow_gpu,prune"; - } + cfg_["updater"] = "grow_gpu,prune"; if (cfg_.count("predictor") == 0) { cfg_["predictor"] = "gpu_predictor"; } - } else if (tparam_.tree_method == 5) { + } else if (tparam_.tree_method == 5) { // tree_method='gpu_hist' this->AssertGPUSupport(); - if (cfg_.count("updater") == 0) { - cfg_["updater"] = "grow_gpu_hist"; - } + cfg_["updater"] = "grow_gpu_hist"; if (cfg_.count("predictor") == 0) { cfg_["predictor"] = "gpu_predictor"; } + } else { + LOG(FATAL) << "Unknown tree_method (" << tparam_.tree_method + << ") detected"; } } @@ -376,7 +382,7 @@ class LearnerImpl : public Learner { if (tparam_.seed_per_iteration || rabit::IsDistributed()) { common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter); } - this->LazyInitDMatrix(train); + this->PerformTreeMethodHeuristic(train); monitor_.Start("PredictRaw"); this->PredictRaw(train, &preds_); monitor_.Stop("PredictRaw"); @@ -393,7 +399,7 @@ class LearnerImpl : public Learner { if (tparam_.seed_per_iteration || rabit::IsDistributed()) { common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter); } - this->LazyInitDMatrix(train); + this->PerformTreeMethodHeuristic(train); gbm_->DoBoost(train, in_gpair); monitor_.Stop("BoostOneIter"); } @@ -479,21 +485,65 @@ class LearnerImpl : public Learner { } protected: - // check if p_train is ready to used by training. - // if not, initialize the column access. - inline void LazyInitDMatrix(DMatrix* p_train) { - if (tparam_.tree_method == 3 || tparam_.tree_method == 4 || - tparam_.tree_method == 5 || name_gbm_ == "gblinear") { + // Revise `tree_method` and `updater` parameters after seeing the training + // data matrix + inline void PerformTreeMethodHeuristic(DMatrix* p_train) { + if (name_gbm_ != "gbtree" || cfg_.count("updater") > 0) { + // 1. This method is not applicable for non-tree learners + // 2. This method is disabled when `updater` parameter is explicitly + // set, since only experts are expected to do so. return; } - if (!p_train->SingleColBlock() && cfg_.count("updater") == 0) { - if (tparam_.tree_method == 2) { - LOG(CONSOLE) << "tree method is set to be 'exact'," - << " but currently we are only able to proceed with " - "approximate algorithm"; + const int current_tree_method = tparam_.tree_method; + if (rabit::IsDistributed()) { + /* Choose tree_method='approx' when distributed training is activated */ + CHECK(tparam_.dsplit != 0) + << "Precondition violated; dsplit cannot be zero in distributed mode"; + if (tparam_.dsplit == 1) { + LOG(FATAL) << "Column-wise data split is currently not supported"; } - cfg_["updater"] = "grow_histmaker,prune"; + if (current_tree_method == 0) { + LOG(CONSOLE) << "Tree method is automatically selected to be 'approx' " + "for distributed training."; + } else if (current_tree_method == 2 || current_tree_method == 3) { + LOG(CONSOLE) << "Tree method was set to be '" + << (current_tree_method == 2 ? "exact" : "hist") + << "', but only 'approx' is available for distributed " + "training. The `tree_method` parameter is now being " + "changed to 'approx'"; + } else if (current_tree_method == 4 || current_tree_method == 5) { + LOG(FATAL) << "Distributed training is not available with GPU algoritms"; + } + tparam_.tree_method = 1; + } else if (!p_train->SingleColBlock()) { + /* Some tree methods are not available for external-memory DMatrix */ + if (current_tree_method == 0) { + LOG(CONSOLE) << "Tree method is automatically set to 'approx' " + "since external-memory data matrix is used."; + } else if (current_tree_method == 2) { + LOG(CONSOLE) << "Tree method was set to be 'exact', " + "but currently we are only able to proceed with " + "approximate algorithm ('approx') because external-" + "memory data matrix is used."; + } else if (current_tree_method == 4 || current_tree_method == 5) { + LOG(FATAL) + << "External-memory data matrix is not available with GPU algorithms"; + } + tparam_.tree_method = 1; + } else if (p_train->Info().num_row_ >= (4UL << 20UL) + && current_tree_method == 0) { + /* Choose tree_method='approx' automatically for large data matrix */ + LOG(CONSOLE) << "Tree method is automatically selected to be " + "'approx' for faster speed. To use old behavior " + "(exact greedy algorithm on single machine), " + "set tree_method to 'exact'."; + tparam_.tree_method = 1; + } + + /* If tree_method was changed, re-configure updaters and gradient boosters */ + if (tparam_.tree_method != current_tree_method) { + ConfigureUpdaters(); if (gbm_ != nullptr) { gbm_->Configure(cfg_.begin(), cfg_.end()); } From a4aa22e5f9c381acd90c4b89033ea5129a24e044 Mon Sep 17 00:00:00 2001 From: Philip Cho Date: Wed, 31 Oct 2018 16:58:04 -0700 Subject: [PATCH 02/11] Use C++11 enum class for extra safety Compiler will give warnings if switch statements don't handle all possible values of C++11 enum class. Also allow enum class to be used as DMLC parameter. --- src/common/enum_class_param.h | 45 ++++++++++ src/learner.cc | 101 +++++++++++++++------- tests/cpp/common/test_enum_class_param.cc | 55 ++++++++++++ 3 files changed, 171 insertions(+), 30 deletions(-) create mode 100644 src/common/enum_class_param.h create mode 100644 tests/cpp/common/test_enum_class_param.cc diff --git a/src/common/enum_class_param.h b/src/common/enum_class_param.h new file mode 100644 index 000000000000..7bb0c5748171 --- /dev/null +++ b/src/common/enum_class_param.h @@ -0,0 +1,45 @@ +/*! + * Copyright 2015-2018 by Contributors + * \file enum_class_param.h + * \brief macro for using C++11 enum class as DMLC parameter + * \author Hyunsu Philip Cho + */ + +#ifndef XGBOOST_COMMON_ENUM_CLASS_PARAM_H_ +#define XGBOOST_COMMON_ENUM_CLASS_PARAM_H_ + +#include +#include +#include + +// specialization of FieldEntry for enum class (backed by int) +#define DECLARE_FIELD_ENUM_CLASS(EnumClass) \ +template <> \ +class dmlc::parameter::FieldEntry< EnumClass > \ + : public dmlc::parameter::FieldEntry { \ + public: \ + FieldEntry() { \ + static_assert( \ + std::is_same::type>::value, \ + "enum class must be backed by int"); \ + is_enum_ = true; \ + } \ + typedef FieldEntry Super; \ + void Set(void *head, const std::string &value) const override { \ + Super::Set(head, value); \ + } \ + inline FieldEntry& add_enum(const std::string &key, EnumClass value) { \ + Super::add_enum(key, static_cast(value)); \ + return *this; \ + } \ + inline FieldEntry& set_default(const EnumClass& default_value) { \ + default_value_ = static_cast(default_value); \ + has_default_ = true; \ + return *this; \ + } \ + inline void Init(const std::string &key, void *head, EnumClass& ref) { \ + Super::Init(key, head, *reinterpret_cast(&ref)); \ + } \ +}; + +#endif // XGBOOST_COMMON_ENUM_CLASS_PARAM_H_ diff --git a/src/learner.cc b/src/learner.cc index 5dd79a3bd366..913727f2eab9 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -19,14 +19,22 @@ #include "./common/host_device_vector.h" #include "./common/io.h" #include "./common/random.h" +#include "./common/enum_class_param.h" #include "common/timer.h" namespace { const char* kMaxDeltaStepDefaultValue = "0.7"; +enum class TreeMethod : int { + kAuto = 0, kApprox = 1, kExact = 2, kHist = 3, + kGPUExact = 4, kGPUHist = 5 +}; + } // anonymous namespace +DECLARE_FIELD_ENUM_CLASS(TreeMethod); + namespace xgboost { // implementation of base learner. bool Learner::AllowLazyCheckPoint() const { @@ -82,7 +90,7 @@ struct LearnerTrainParam : public dmlc::Parameter { // data split mode, can be row, col, or none. int dsplit; // tree construction method - int tree_method; + TreeMethod tree_method; // internal test flag std::string test_flag; // number of threads to use if OpenMP is enabled @@ -109,13 +117,13 @@ struct LearnerTrainParam : public dmlc::Parameter { .add_enum("row", 2) .describe("Data split mode for distributed training."); DMLC_DECLARE_FIELD(tree_method) - .set_default(0) - .add_enum("auto", 0) - .add_enum("approx", 1) - .add_enum("exact", 2) - .add_enum("hist", 3) - .add_enum("gpu_exact", 4) - .add_enum("gpu_hist", 5) + .set_default(TreeMethod::kAuto) + .add_enum("auto", TreeMethod::kAuto) + .add_enum("approx", TreeMethod::kApprox) + .add_enum("exact", TreeMethod::kExact) + .add_enum("hist", TreeMethod::kHist) + .add_enum("gpu_exact", TreeMethod::kGPUExact) + .add_enum("gpu_hist", TreeMethod::kGPUHist) .describe("Choice of tree construction method."); DMLC_DECLARE_FIELD(test_flag).set_default("").describe( "Internal test flag"); @@ -164,33 +172,39 @@ class LearnerImpl : public Learner { return; } - if (tparam_.tree_method == 0) { // tree_method='auto' + switch (tparam_.tree_method) { + case TreeMethod::kAuto: // Use heuristic to choose between 'exact' and 'approx' // This choice is deferred to PerformTreeMethodHeuristic(). - ; - } else if (tparam_.tree_method == 1) { // tree_method='approx' + break; + case TreeMethod::kApprox: cfg_["updater"] = "grow_histmaker,prune"; - } else if (tparam_.tree_method == 2) { // tree_method='exact' + break; + case TreeMethod::kExact: cfg_["updater"] = "grow_colmaker,prune"; - } else if (tparam_.tree_method == 3) { // tree_method='hist' + break; + case TreeMethod::kHist: LOG(CONSOLE) << "Tree method is selected to be 'hist', which uses a " "single updater grow_fast_histmaker."; cfg_["updater"] = "grow_fast_histmaker"; - } else if (tparam_.tree_method == 4) { // tree_method='gpu_exact' + break; + case TreeMethod::kGPUExact: this->AssertGPUSupport(); cfg_["updater"] = "grow_gpu,prune"; if (cfg_.count("predictor") == 0) { cfg_["predictor"] = "gpu_predictor"; } - } else if (tparam_.tree_method == 5) { // tree_method='gpu_hist' + break; + case TreeMethod::kGPUHist: this->AssertGPUSupport(); cfg_["updater"] = "grow_gpu_hist"; if (cfg_.count("predictor") == 0) { cfg_["predictor"] = "gpu_predictor"; } - } else { - LOG(FATAL) << "Unknown tree_method (" << tparam_.tree_method - << ") detected"; + break; + default: + LOG(FATAL) << "Unknown tree_method (" + << static_cast(tparam_.tree_method) << ") detected"; } } @@ -495,7 +509,7 @@ class LearnerImpl : public Learner { return; } - const int current_tree_method = tparam_.tree_method; + const TreeMethod current_tree_method = tparam_.tree_method; if (rabit::IsDistributed()) { /* Choose tree_method='approx' when distributed training is activated */ CHECK(tparam_.dsplit != 0) @@ -503,42 +517,69 @@ class LearnerImpl : public Learner { if (tparam_.dsplit == 1) { LOG(FATAL) << "Column-wise data split is currently not supported"; } - if (current_tree_method == 0) { + switch (current_tree_method) { + case TreeMethod::kAuto: LOG(CONSOLE) << "Tree method is automatically selected to be 'approx' " "for distributed training."; - } else if (current_tree_method == 2 || current_tree_method == 3) { + break; + case TreeMethod::kApprox: + // things are okay, do nothing + break; + case TreeMethod::kExact: + case TreeMethod::kHist: LOG(CONSOLE) << "Tree method was set to be '" - << (current_tree_method == 2 ? "exact" : "hist") + << (current_tree_method == TreeMethod::kExact ? + "exact" : "hist") << "', but only 'approx' is available for distributed " "training. The `tree_method` parameter is now being " "changed to 'approx'"; - } else if (current_tree_method == 4 || current_tree_method == 5) { + break; + case TreeMethod::kGPUExact: + case TreeMethod::kGPUHist: LOG(FATAL) << "Distributed training is not available with GPU algoritms"; + break; + default: + LOG(FATAL) << "Unknown tree_method (" + << static_cast(current_tree_method) << ") detected"; } - tparam_.tree_method = 1; + tparam_.tree_method = TreeMethod::kApprox; } else if (!p_train->SingleColBlock()) { /* Some tree methods are not available for external-memory DMatrix */ - if (current_tree_method == 0) { + switch (current_tree_method) { + case TreeMethod::kAuto: LOG(CONSOLE) << "Tree method is automatically set to 'approx' " "since external-memory data matrix is used."; - } else if (current_tree_method == 2) { + break; + case TreeMethod::kApprox: + // things are okay, do nothing + break; + case TreeMethod::kExact: LOG(CONSOLE) << "Tree method was set to be 'exact', " "but currently we are only able to proceed with " "approximate algorithm ('approx') because external-" "memory data matrix is used."; - } else if (current_tree_method == 4 || current_tree_method == 5) { + break; + case TreeMethod::kHist: + // things are okay, do nothing + break; + case TreeMethod::kGPUExact: + case TreeMethod::kGPUHist: LOG(FATAL) << "External-memory data matrix is not available with GPU algorithms"; + break; + default: + LOG(FATAL) << "Unknown tree_method (" + << static_cast(current_tree_method) << ") detected"; } - tparam_.tree_method = 1; + tparam_.tree_method = TreeMethod::kApprox; } else if (p_train->Info().num_row_ >= (4UL << 20UL) - && current_tree_method == 0) { + && current_tree_method == TreeMethod::kAuto) { /* Choose tree_method='approx' automatically for large data matrix */ LOG(CONSOLE) << "Tree method is automatically selected to be " "'approx' for faster speed. To use old behavior " "(exact greedy algorithm on single machine), " "set tree_method to 'exact'."; - tparam_.tree_method = 1; + tparam_.tree_method = TreeMethod::kApprox; } /* If tree_method was changed, re-configure updaters and gradient boosters */ diff --git a/tests/cpp/common/test_enum_class_param.cc b/tests/cpp/common/test_enum_class_param.cc new file mode 100644 index 000000000000..a9eab9feb8b1 --- /dev/null +++ b/tests/cpp/common/test_enum_class_param.cc @@ -0,0 +1,55 @@ +#include "../../../src/common/enum_class_param.h" +#include +#include + +enum class Foo : int { + kBar = 0, kFrog = 1, kCat = 2, kDog = 3 +}; + +DECLARE_FIELD_ENUM_CLASS(Foo); + +struct MyParam : dmlc::Parameter { + Foo foo; + int bar; + DMLC_DECLARE_PARAMETER(MyParam) { + DMLC_DECLARE_FIELD(foo) + .set_default(Foo::kBar) + .add_enum("bar", Foo::kBar) + .add_enum("frog", Foo::kFrog) + .add_enum("cat", Foo::kCat) + .add_enum("dog", Foo::kDog); + DMLC_DECLARE_FIELD(bar) + .set_default(-1); + } +}; + +DMLC_REGISTER_PARAMETER(MyParam); + +TEST(EnumClassParam, Basic) { + MyParam param; + std::map kwargs{ + {"foo", "frog"}, {"bar", "10"} + }; + // try initializing + param.Init(kwargs); + ASSERT_EQ(param.foo, Foo::kFrog); + ASSERT_EQ(param.bar, 10); + + // try all possible enum values + kwargs["foo"] = "bar"; + param.Init(kwargs); + ASSERT_EQ(param.foo, Foo::kBar); + kwargs["foo"] = "frog"; + param.Init(kwargs); + ASSERT_EQ(param.foo, Foo::kFrog); + kwargs["foo"] = "cat"; + param.Init(kwargs); + ASSERT_EQ(param.foo, Foo::kCat); + kwargs["foo"] = "dog"; + param.Init(kwargs); + ASSERT_EQ(param.foo, Foo::kDog); + + // try setting non-existent enum value + kwargs["foo"] = "human"; + ASSERT_THROW(param.Init(kwargs), dmlc::ParamError); +} From ba924a42bdb97faf48abfb071f22bcec4435ea0a Mon Sep 17 00:00:00 2001 From: Philip Cho Date: Wed, 31 Oct 2018 20:12:01 -0700 Subject: [PATCH 03/11] Fix compiler error + lint --- src/common/enum_class_param.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/common/enum_class_param.h b/src/common/enum_class_param.h index 7bb0c5748171..79134f6a3a21 100644 --- a/src/common/enum_class_param.h +++ b/src/common/enum_class_param.h @@ -14,9 +14,10 @@ // specialization of FieldEntry for enum class (backed by int) #define DECLARE_FIELD_ENUM_CLASS(EnumClass) \ +namespace dmlc { \ +namespace parameter { \ template <> \ -class dmlc::parameter::FieldEntry< EnumClass > \ - : public dmlc::parameter::FieldEntry { \ +class FieldEntry : public FieldEntry { \ public: \ FieldEntry() { \ static_assert( \ @@ -24,7 +25,7 @@ class dmlc::parameter::FieldEntry< EnumClass > \ "enum class must be backed by int"); \ is_enum_ = true; \ } \ - typedef FieldEntry Super; \ + using Super = FieldEntry; \ void Set(void *head, const std::string &value) const override { \ Super::Set(head, value); \ } \ @@ -37,9 +38,12 @@ class dmlc::parameter::FieldEntry< EnumClass > \ has_default_ = true; \ return *this; \ } \ + /* NOLINTNEXTLINE */ \ inline void Init(const std::string &key, void *head, EnumClass& ref) { \ Super::Init(key, head, *reinterpret_cast(&ref)); \ } \ -}; +}; \ +} /* namespace parameter */ \ +} /* namespace dmlc */ #endif // XGBOOST_COMMON_ENUM_CLASS_PARAM_H_ From ed88db54d620dd7dfc36bd5c6ae3787087902e9d Mon Sep 17 00:00:00 2001 From: Philip Cho Date: Wed, 31 Oct 2018 22:06:58 -0700 Subject: [PATCH 04/11] Address reviewer comment --- src/learner.cc | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/learner.cc b/src/learner.cc index 913727f2eab9..a313800a2cd8 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -31,9 +31,14 @@ enum class TreeMethod : int { kGPUExact = 4, kGPUHist = 5 }; +enum class DataSplitMode : int { + kAuto = 0, kCol = 1, kRow = 2 +}; + } // anonymous namespace DECLARE_FIELD_ENUM_CLASS(TreeMethod); +DECLARE_FIELD_ENUM_CLASS(DataSplitMode); namespace xgboost { // implementation of base learner. @@ -88,7 +93,7 @@ struct LearnerTrainParam : public dmlc::Parameter { // whether seed the PRNG each iteration bool seed_per_iteration; // data split mode, can be row, col, or none. - int dsplit; + DataSplitMode dsplit; // tree construction method TreeMethod tree_method; // internal test flag @@ -111,10 +116,10 @@ struct LearnerTrainParam : public dmlc::Parameter { "this option will be switched on automatically on distributed " "mode."); DMLC_DECLARE_FIELD(dsplit) - .set_default(0) - .add_enum("auto", 0) - .add_enum("col", 1) - .add_enum("row", 2) + .set_default(DataSplitMode::kAuto) + .add_enum("auto", DataSplitMode::kAuto) + .add_enum("col", DataSplitMode::kCol) + .add_enum("row", DataSplitMode::kRow) .describe("Data split mode for distributed training."); DMLC_DECLARE_FIELD(tree_method) .set_default(TreeMethod::kAuto) @@ -234,8 +239,8 @@ class LearnerImpl : public Learner { // add additional parameters // These are cosntraints that need to be satisfied. - if (tparam_.dsplit == 0 && rabit::IsDistributed()) { - tparam_.dsplit = 2; + if (tparam_.dsplit == DataSplitMode::kAuto && rabit::IsDistributed()) { + tparam_.dsplit = DataSplitMode::kRow; } if (cfg_.count("num_class") != 0) { @@ -432,7 +437,7 @@ class LearnerImpl : public Learner { for (auto& ev : metrics_) { os << '\t' << data_names[i] << '-' << ev->Name() << ':' << ev->Eval(preds_.ConstHostVector(), data_sets[i]->Info(), - tparam_.dsplit == 2); + tparam_.dsplit == DataSplitMode::kRow); } } @@ -476,7 +481,7 @@ class LearnerImpl : public Learner { obj_->EvalTransform(&preds_); return std::make_pair(metric, ev->Eval(preds_.ConstHostVector(), data->Info(), - tparam_.dsplit == 2)); + tparam_.dsplit == DataSplitMode::kRow)); } void Predict(DMatrix* data, bool output_margin, @@ -512,10 +517,12 @@ class LearnerImpl : public Learner { const TreeMethod current_tree_method = tparam_.tree_method; if (rabit::IsDistributed()) { /* Choose tree_method='approx' when distributed training is activated */ - CHECK(tparam_.dsplit != 0) + CHECK(tparam_.dsplit != DataSplitMode::kAuto) << "Precondition violated; dsplit cannot be zero in distributed mode"; - if (tparam_.dsplit == 1) { - LOG(FATAL) << "Column-wise data split is currently not supported"; + if (tparam_.dsplit == DataSplitMode::kCol) { + // 'distcol' updater hidden until it becomes functional again + // See discussion at https://github.com/dmlc/xgboost/issues/1832 + LOG(FATAL) << "Column-wise data split is currently not supported."; } switch (current_tree_method) { case TreeMethod::kAuto: From 5fdb4ca93ee443954aff399cebc0d4be7d3f8577 Mon Sep 17 00:00:00 2001 From: Philip Cho Date: Wed, 31 Oct 2018 22:14:35 -0700 Subject: [PATCH 05/11] Better docstring for DECLARE_FIELD_ENUM_CLASS --- src/common/enum_class_param.h | 37 +++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/src/common/enum_class_param.h b/src/common/enum_class_param.h index 79134f6a3a21..7877a5b017bc 100644 --- a/src/common/enum_class_param.h +++ b/src/common/enum_class_param.h @@ -1,5 +1,5 @@ /*! - * Copyright 2015-2018 by Contributors + * Copyright 2018 by Contributors * \file enum_class_param.h * \brief macro for using C++11 enum class as DMLC parameter * \author Hyunsu Philip Cho @@ -12,7 +12,40 @@ #include #include -// specialization of FieldEntry for enum class (backed by int) +/*! + * \brief Specialization of FieldEntry for enum class (backed by int) + * + * Use this macro to use C++11 enum class as DMLC parameters + * + * Usage: + * + * \code + * + * // enum class must inherit from int type + * enum class Foo : int { + * kBar = 0, kFrog = 1, kCat = 2, kDog = 3 + * }; + * + * // This line is needed to prevent compilation error + * DECLARE_FIELD_ENUM_CLASS(Foo); + * + * // Now define DMLC parameter as usual; + * // enum classes can now be members. + * struct MyParam : dmlc::Parameter { + * Foo foo; + * DMLC_DECLARE_PARAMETER(MyParam) { + * DMLC_DECLARE_FIELD(foo) + * .set_default(Foo::kBar) + * .add_enum("bar", Foo::kBar) + * .add_enum("frog", Foo::kFrog) + * .add_enum("cat", Foo::kCat) + * .add_enum("dog", Foo::kDog); + * } + * }; + * + * DMLC_REGISTER_PARAMETER(MyParam); + * + */ #define DECLARE_FIELD_ENUM_CLASS(EnumClass) \ namespace dmlc { \ namespace parameter { \ From d319b45e29116450c8f766bfd9af77ffd0f8e6ca Mon Sep 17 00:00:00 2001 From: Philip Cho Date: Wed, 31 Oct 2018 22:39:57 -0700 Subject: [PATCH 06/11] Fix lint --- src/common/enum_class_param.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/enum_class_param.h b/src/common/enum_class_param.h index 7877a5b017bc..11ddcb045f81 100644 --- a/src/common/enum_class_param.h +++ b/src/common/enum_class_param.h @@ -19,7 +19,7 @@ * * Usage: * - * \code + * \code{.cpp} * * // enum class must inherit from int type * enum class Foo : int { @@ -44,7 +44,7 @@ * }; * * DMLC_REGISTER_PARAMETER(MyParam); - * + * \endcode */ #define DECLARE_FIELD_ENUM_CLASS(EnumClass) \ namespace dmlc { \ From c76f6013f5e13959e3ce4dfdcbc071d9384ca01d Mon Sep 17 00:00:00 2001 From: Philip Cho Date: Thu, 1 Nov 2018 00:20:22 -0700 Subject: [PATCH 07/11] Add C++ test to see if tree_method is recognized --- src/learner.cc | 12 +++++++++--- tests/cpp/test_learner.cc | 40 +++++++++++++++++++++++++++++++++++++++ tests/cpp/test_learner.h | 22 +++++++++++++++++++++ 3 files changed, 71 insertions(+), 3 deletions(-) create mode 100644 tests/cpp/test_learner.h diff --git a/src/learner.cc b/src/learner.cc index a313800a2cd8..79747689a8f4 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -20,7 +20,8 @@ #include "./common/io.h" #include "./common/random.h" #include "./common/enum_class_param.h" -#include "common/timer.h" +#include "./common/timer.h" +#include "../tests/cpp/test_learner.h" namespace { @@ -151,7 +152,7 @@ DMLC_REGISTER_PARAMETER(LearnerTrainParam); * \brief learner that performs gradient boosting for a specific objective * function. It does training and prediction. */ -class LearnerImpl : public Learner { +class LearnerImpl : public Learner, public LearnerTestHook { public: explicit LearnerImpl(std::vector > cache) : cache_(std::move(cache)) { @@ -518,7 +519,7 @@ class LearnerImpl : public Learner { if (rabit::IsDistributed()) { /* Choose tree_method='approx' when distributed training is activated */ CHECK(tparam_.dsplit != DataSplitMode::kAuto) - << "Precondition violated; dsplit cannot be zero in distributed mode"; + << "Precondition violated; dsplit cannot be 'auto' in distributed mode"; if (tparam_.dsplit == DataSplitMode::kCol) { // 'distcol' updater hidden until it becomes functional again // See discussion at https://github.com/dmlc/xgboost/issues/1832 @@ -663,6 +664,11 @@ class LearnerImpl : public Learner { std::vector > cache_; common::Monitor monitor_; + + // diagnostic method reserved for C++ test learner.SelectTreeMethod + std::string GetUpdaterSequence(void) const override { + return cfg_.at("updater"); + } }; Learner* Learner::Create( diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc index 0a64487640da..bd2f66e17991 100644 --- a/tests/cpp/test_learner.cc +++ b/tests/cpp/test_learner.cc @@ -2,9 +2,20 @@ #include #include #include "helpers.h" +#include "./test_learner.h" #include "xgboost/learner.h" namespace xgboost { + +class LearnerTestHookAdapter { + public: + static inline std::string GetUpdaterSequence(const Learner* learner) { + const LearnerTestHook* hook = dynamic_cast(learner); + CHECK(hook) << "LearnerImpl did not inherit from LearnerTestHook"; + return hook->GetUpdaterSequence(); + } +}; + TEST(learner, Test) { typedef std::pair arg; auto args = {arg("tree_method", "exact")}; @@ -15,4 +26,33 @@ TEST(learner, Test) { delete mat_ptr; } + +TEST(learner, SelectTreeMethod) { + using arg = std::pair; + auto mat_ptr = CreateDMatrix(10, 10, 0); + std::vector> mat = {*mat_ptr}; + auto learner = std::unique_ptr(Learner::Create(mat)); + + // Test if `tree_method` can be set + learner->Configure({arg("tree_method", "approx")}); + ASSERT_EQ(LearnerTestHookAdapter::GetUpdaterSequence(learner.get()), + "grow_histmaker,prune"); + learner->Configure({arg("tree_method", "exact")}); + ASSERT_EQ(LearnerTestHookAdapter::GetUpdaterSequence(learner.get()), + "grow_colmaker,prune"); + learner->Configure({arg("tree_method", "hist")}); + ASSERT_EQ(LearnerTestHookAdapter::GetUpdaterSequence(learner.get()), + "grow_fast_histmaker"); +#ifdef XGBOOST_USE_CUDA + learner->Configure({arg("tree_method", "gpu_exact")}); + ASSERT_EQ(LearnerTestHookAdapter::GetUpdaterSequence(learner.get()), + "grow_gpu,prune"); + learner->Configure({arg("tree_method", "gpu_hist")}); + ASSERT_EQ(LearnerTestHookAdapter::GetUpdaterSequence(learner.get()), + "grow_gpu_hist"); +#endif + + delete mat_ptr; +} + } // namespace xgboost diff --git a/tests/cpp/test_learner.h b/tests/cpp/test_learner.h new file mode 100644 index 000000000000..ffad63f02883 --- /dev/null +++ b/tests/cpp/test_learner.h @@ -0,0 +1,22 @@ +/*! + * Copyright 2018 by Contributors + * \file test_learner.h + * \brief Interface + * \author Hyunsu Philip Cho + */ + +#ifndef XGBOOST_TESTS_CPP_TEST_LEARNER_H_ +#define XGBOOST_TESTS_CPP_TEST_LEARNER_H_ + +#include + +namespace xgboost { +class LearnerTestHook { + private: + virtual std::string GetUpdaterSequence(void) const = 0; + // allow friend access to C++ test learner.SelectTreeMethod + friend class LearnerTestHookAdapter; +}; +} // namespace xgboost + +#endif // XGBOOST_TESTS_CPP_TEST_LEARNER_H_ From 12ca8abfee000ae55db2888a89554d99d00aac9c Mon Sep 17 00:00:00 2001 From: Philip Cho Date: Thu, 1 Nov 2018 01:02:38 -0700 Subject: [PATCH 08/11] Fix clang-tidy error --- src/common/enum_class_param.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/common/enum_class_param.h b/src/common/enum_class_param.h index 11ddcb045f81..b1d03cddd22a 100644 --- a/src/common/enum_class_param.h +++ b/src/common/enum_class_param.h @@ -71,12 +71,11 @@ class FieldEntry : public FieldEntry { \ has_default_ = true; \ return *this; \ } \ - /* NOLINTNEXTLINE */ \ inline void Init(const std::string &key, void *head, EnumClass& ref) { \ Super::Init(key, head, *reinterpret_cast(&ref)); \ } \ }; \ } /* namespace parameter */ \ -} /* namespace dmlc */ +} /* namespace dmlc */ // NOLINT #endif // XGBOOST_COMMON_ENUM_CLASS_PARAM_H_ From 1d1c8ca80c6e46e8245e6c549468b99bf072674c Mon Sep 17 00:00:00 2001 From: Philip Cho Date: Thu, 1 Nov 2018 01:09:43 -0700 Subject: [PATCH 09/11] Add test_learner.h to R package --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 9ad197ee5459..5a5d1645738f 100644 --- a/Makefile +++ b/Makefile @@ -250,6 +250,8 @@ Rpack: clean_all cp -r src xgboost/src/src cp -r include xgboost/src/include cp -r amalgamation xgboost/src/amalgamation + mkdir -p xgboost/src/tests/cpp + cp tests/cpp/test_learner.h xgboost/src/tests/cpp mkdir -p xgboost/src/rabit cp -r rabit/include xgboost/src/rabit/include cp -r rabit/src xgboost/src/rabit/src From 2a017c4ee35d8d7d4fc4f1b1c405f9784db296c8 Mon Sep 17 00:00:00 2001 From: Philip Cho Date: Thu, 1 Nov 2018 01:11:16 -0700 Subject: [PATCH 10/11] Update comments --- tests/cpp/test_learner.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cpp/test_learner.h b/tests/cpp/test_learner.h index ffad63f02883..0a4204ab8dd7 100644 --- a/tests/cpp/test_learner.h +++ b/tests/cpp/test_learner.h @@ -1,7 +1,7 @@ /*! * Copyright 2018 by Contributors * \file test_learner.h - * \brief Interface + * \brief Hook to access implementation class of Learner * \author Hyunsu Philip Cho */ @@ -14,7 +14,7 @@ namespace xgboost { class LearnerTestHook { private: virtual std::string GetUpdaterSequence(void) const = 0; - // allow friend access to C++ test learner.SelectTreeMethod + // allow friend access to C++ tests for Learner friend class LearnerTestHookAdapter; }; } // namespace xgboost From 647d41a3ffca69d7bd3b5ff5f82b8ad737841047 Mon Sep 17 00:00:00 2001 From: Philip Cho Date: Thu, 1 Nov 2018 12:35:59 -0700 Subject: [PATCH 11/11] Fix lint error --- src/common/enum_class_param.h | 4 ++-- src/learner.cc | 2 +- tests/cpp/test_learner.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/common/enum_class_param.h b/src/common/enum_class_param.h index b1d03cddd22a..1b0ca96dd912 100644 --- a/src/common/enum_class_param.h +++ b/src/common/enum_class_param.h @@ -71,11 +71,11 @@ class FieldEntry : public FieldEntry { \ has_default_ = true; \ return *this; \ } \ - inline void Init(const std::string &key, void *head, EnumClass& ref) { \ + inline void Init(const std::string &key, void *head, EnumClass& ref) { /* NOLINT */ \ Super::Init(key, head, *reinterpret_cast(&ref)); \ } \ }; \ } /* namespace parameter */ \ -} /* namespace dmlc */ // NOLINT +} /* namespace dmlc */ #endif // XGBOOST_COMMON_ENUM_CLASS_PARAM_H_ diff --git a/src/learner.cc b/src/learner.cc index 79747689a8f4..339bc0827c33 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -666,7 +666,7 @@ class LearnerImpl : public Learner, public LearnerTestHook { common::Monitor monitor_; // diagnostic method reserved for C++ test learner.SelectTreeMethod - std::string GetUpdaterSequence(void) const override { + std::string GetUpdaterSequence() const override { return cfg_.at("updater"); } }; diff --git a/tests/cpp/test_learner.h b/tests/cpp/test_learner.h index 0a4204ab8dd7..b4c62f427168 100644 --- a/tests/cpp/test_learner.h +++ b/tests/cpp/test_learner.h @@ -13,7 +13,7 @@ namespace xgboost { class LearnerTestHook { private: - virtual std::string GetUpdaterSequence(void) const = 0; + virtual std::string GetUpdaterSequence() const = 0; // allow friend access to C++ tests for Learner friend class LearnerTestHookAdapter; };