Skip to content

Commit

Permalink
Avoid resetting seed for every configuration. (#6349)
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis authored Nov 6, 2020
1 parent f3a4253 commit 519cee1
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 4 deletions.
4 changes: 4 additions & 0 deletions doc/parameter.rst
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,10 @@ Specify the learning task and the corresponding learning objective. The objectiv

- Random number seed. This parameter is ignored in R package, use `set.seed()` instead.

* ``seed_per_iteration`` [default=false]

- Seed PRNG determnisticly via iterator number, this option will be switched on automatically on distributed mode.

***********************
Command Line Parameters
***********************
Expand Down
5 changes: 3 additions & 2 deletions include/xgboost/generic_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@ namespace xgboost {
struct GenericParameter : public XGBoostParameter<GenericParameter> {
// Constant representing the device ID of CPU.
static int32_t constexpr kCpuId = -1;
static int64_t constexpr kDefaultSeed = 0;

public:
// stored random seed
int64_t seed;
int64_t seed { kDefaultSeed };
// whether seed the PRNG each iteration
bool seed_per_iteration;
// number of threads to use if OpenMP is enabled
Expand Down Expand Up @@ -46,7 +47,7 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {

// declare parameters
DMLC_DECLARE_PARAMETER(GenericParameter) {
DMLC_DECLARE_FIELD(seed).set_default(0).describe(
DMLC_DECLARE_FIELD(seed).set_default(kDefaultSeed).describe(
"Random number seed during training.");
DMLC_DECLARE_ALIAS(seed, random_state);
DMLC_DECLARE_FIELD(seed_per_iteration)
Expand Down
13 changes: 11 additions & 2 deletions src/learner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ DMLC_REGISTER_PARAMETER(LearnerTrainParam);
DMLC_REGISTER_PARAMETER(GenericParameter);

int constexpr GenericParameter::kCpuId;
int64_t constexpr GenericParameter::kDefaultSeed;

void GenericParameter::ConfigureGpuId(bool require_gpu) {
#if defined(XGBOOST_USE_CUDA)
Expand Down Expand Up @@ -239,6 +240,9 @@ using ThreadLocalPredictionCache =
dmlc::ThreadLocalStore<std::map<Learner const *, PredictionContainer>>;

class LearnerConfiguration : public Learner {
private:
std::mutex config_lock_;

protected:
static std::string const kEvalMetric; // NOLINT

Expand All @@ -252,7 +256,6 @@ class LearnerConfiguration : public Learner {
LearnerModelParam learner_model_param_;
LearnerTrainParam tparam_;
std::vector<std::string> metric_names_;
std::mutex config_lock_;

public:
explicit LearnerConfiguration(std::vector<std::shared_ptr<DMatrix> > cache)
Expand Down Expand Up @@ -283,7 +286,11 @@ class LearnerConfiguration : public Learner {

tparam_.UpdateAllowUnknown(args);
auto mparam_backup = mparam_;

mparam_.UpdateAllowUnknown(args);

auto initialized = generic_parameters_.GetInitialised();
auto old_seed = generic_parameters_.seed;
generic_parameters_.UpdateAllowUnknown(args);
generic_parameters_.CheckDeprecated();

Expand All @@ -297,7 +304,9 @@ class LearnerConfiguration : public Learner {
}

// set seed only before the model is initialized
common::GlobalRandom().seed(generic_parameters_.seed);
if (!initialized || generic_parameters_.seed != old_seed) {
common::GlobalRandom().seed(generic_parameters_.seed);
}

// must precede configure gbm since num_features is required for gbm
this->ConfigureNumFeatures();
Expand Down
23 changes: 23 additions & 0 deletions tests/cpp/test_learner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <xgboost/version_config.h>
#include "xgboost/json.h"
#include "../../src/common/io.h"
#include "../../src/common/random.h"

namespace xgboost {

Expand Down Expand Up @@ -333,4 +334,26 @@ TEST(Learner, Seed) {
ASSERT_EQ(std::to_string(seed),
get<String>(config["learner"]["generic_param"]["seed"]));
}

TEST(Learner, ConstantSeed) {
auto m = RandomDataGenerator{10, 10, 0}.GenerateDMatrix(true);
std::unique_ptr<Learner> learner{Learner::Create({m})};
learner->Configure(); // seed the global random

std::uniform_real_distribution<float> dist;
auto& rng = common::GlobalRandom();
float v_0 = dist(rng);

learner->SetParam("", "");
learner->Configure(); // check configure doesn't change the seed.
float v_1 = dist(rng);
CHECK_NE(v_0, v_1);

{
rng.seed(GenericParameter::kDefaultSeed);
std::uniform_real_distribution<float> dist;
float v_2 = dist(rng);
CHECK_EQ(v_0, v_2);
}
}
} // namespace xgboost

0 comments on commit 519cee1

Please sign in to comment.