From 69f819b021c25d243ad2fc18cb2d23b03fd53638 Mon Sep 17 00:00:00 2001 From: jacksona Date: Mon, 23 Sep 2019 14:31:30 -0700 Subject: [PATCH 1/2] add an average_winrate_observed metric to tensorboard --- dual_net.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/dual_net.py b/dual_net.py index 6391ef85f..4d2b3cbfb 100644 --- a/dual_net.py +++ b/dual_net.py @@ -85,6 +85,8 @@ 'The Estimator working directory. Used to dump: ' 'checkpoints, tensorboard logs, etc..') +flags.DEFINE_integer('num_gpus', 1, 'Number of GPUs to use for training.') + flags.DEFINE_bool('use_tpu', False, 'Whether to use TPU for training.') flags.DEFINE_string( @@ -279,8 +281,9 @@ def model_fn(features, labels, mode, params): train_op = optimizer.minimize(combined_cost, global_step=global_step) # Computations to be executed on CPU, outside of the main TPU queues. - def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, policy_cost, - value_cost, l2_cost, combined_cost, step, + def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, + value_tensor, policy_cost, value_cost, + l2_cost, combined_cost, step, est_mode=tf.estimator.ModeKeys.TRAIN): policy_entropy = -tf.reduce_mean(tf.reduce_sum( policy_output * tf.log(policy_output), axis=1)) @@ -299,6 +302,7 @@ def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, policy_cos tf.one_hot(policy_target_top_1, tf.shape(policy_output)[1])) value_cost_normalized = value_cost / params['value_cost_weight'] + avg_value_observed = tf.reduce_mean(value_tensor) with tf.variable_scope('metrics'): metric_ops = { @@ -308,7 +312,7 @@ def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, policy_cos 'l2_cost': tf.metrics.mean(l2_cost), 'policy_entropy': tf.metrics.mean(policy_entropy), 'combined_cost': tf.metrics.mean(combined_cost), - + 'avg_value_observed': tf.metrics.mean(avg_value_observed), 'policy_accuracy_top_1': tf.metrics.mean(policy_output_in_top1), 'policy_accuracy_top_3': tf.metrics.mean(policy_output_in_top3), 'policy_top_1_confidence': tf.metrics.mean(policy_top_1_confidence), @@ -345,6 +349,7 @@ def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, policy_cos policy_output, value_output, labels['pi_tensor'], + labels['value_tensor'], tf.reshape(policy_cost, [1]), tf.reshape(value_cost, [1]), tf.reshape(l2_cost, [1]), @@ -541,10 +546,18 @@ def get_estimator(): def _get_nontpu_estimator(): session_config = tf.ConfigProto() session_config.gpu_options.allow_growth = True - run_config = tf.estimator.RunConfig( - save_summary_steps=FLAGS.summary_steps, - keep_checkpoint_max=FLAGS.keep_checkpoint_max, - session_config=session_config) + if FLAGS.num_gpus > 1: + strategy = tf.contrib.distribute.MirroredStrategy(num_gpus=FLAGS.num_gpus) + run_config = tf.estimator.RunConfig( + save_summary_steps=FLAGS.summary_steps, + keep_checkpoint_max=FLAGS.keep_checkpoint_max, + train_distribute=strategy, + session_config=session_config) + else: + run_config = tf.estimator.RunConfig( + save_summary_steps=FLAGS.summary_steps, + keep_checkpoint_max=FLAGS.keep_checkpoint_max, + session_config=session_config) return tf.estimator.Estimator( model_fn, model_dir=FLAGS.work_dir, From 9103ae502e19b51af955867108d4bf5265b17e9d Mon Sep 17 00:00:00 2001 From: jacksona Date: Mon, 23 Sep 2019 16:57:29 -0700 Subject: [PATCH 2/2] remove unused num_gpu code --- dual_net.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/dual_net.py b/dual_net.py index 4d2b3cbfb..96cc13d9f 100644 --- a/dual_net.py +++ b/dual_net.py @@ -85,8 +85,6 @@ 'The Estimator working directory. Used to dump: ' 'checkpoints, tensorboard logs, etc..') -flags.DEFINE_integer('num_gpus', 1, 'Number of GPUs to use for training.') - flags.DEFINE_bool('use_tpu', False, 'Whether to use TPU for training.') flags.DEFINE_string( @@ -546,18 +544,10 @@ def get_estimator(): def _get_nontpu_estimator(): session_config = tf.ConfigProto() session_config.gpu_options.allow_growth = True - if FLAGS.num_gpus > 1: - strategy = tf.contrib.distribute.MirroredStrategy(num_gpus=FLAGS.num_gpus) - run_config = tf.estimator.RunConfig( - save_summary_steps=FLAGS.summary_steps, - keep_checkpoint_max=FLAGS.keep_checkpoint_max, - train_distribute=strategy, - session_config=session_config) - else: - run_config = tf.estimator.RunConfig( - save_summary_steps=FLAGS.summary_steps, - keep_checkpoint_max=FLAGS.keep_checkpoint_max, - session_config=session_config) + run_config = tf.estimator.RunConfig( + save_summary_steps=FLAGS.summary_steps, + keep_checkpoint_max=FLAGS.keep_checkpoint_max, + session_config=session_config) return tf.estimator.Estimator( model_fn, model_dir=FLAGS.work_dir,