diff --git a/blueoil/blueoil_init.py b/blueoil/blueoil_init.py index 32071eb96..018dca1d5 100644 --- a/blueoil/blueoil_init.py +++ b/blueoil/blueoil_init.py @@ -15,6 +15,7 @@ # ============================================================================= import inspect import re +from collections import OrderedDict import whaaaaat from jinja2 import Environment, FileSystemLoader @@ -101,6 +102,14 @@ # ] +learning_rate_schedule_map = OrderedDict([ + ("constant", "'constant' -> constant learning rate."), + ("2-step-decay", "'2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1."), + ("3-step-decay", "'3-step-decay' -> learning rate decrease by 1/10 on {epochs}/3 and {epochs}*2/3 and {epochs}-1"), + ("3-step-decay-with-warmup", "'3-step-decay-with-warmup' -> warmup learning rate 1/1000 in first epoch, then train the same way as '3-step-decay'"), +]) + + def network_name_choices(task_type): if task_type == 'classification': return [definition['name'] for definition in classification_network_definitions] @@ -291,21 +300,19 @@ def ask_questions(): } initial_learning_rate_value = prompt(initial_learning_rate_value_question) - training_learning_rate_question = { + # learning rate schedule + learning_rate_schedule_question = { 'type': 'rawlist', 'name': 'value', - 'message': 'choose learning rate setting(tune1 / tune2 / tune3 / fixed):', - 'choices': ['tune1 -> "2 times decay"', 'tune2 -> "3 times decay"', 'tune3 -> "warm-up and 3 times decay"', 'fixed'], - 'default': 'tune1 -> "2 times decay"', - } - choices_key_map = { - 'tune1 -> "2 times decay"': 'tune1', - 'tune2 -> "3 times decay"': 'tune2', - 'tune3 -> "warm-up and 3 times decay"': 'tune3', - 'fixed': 'fixed', + 'message': 'choose learning rate schedule \ +({epochs} is the number of training epochs you entered before):', + 'choices': list(learning_rate_schedule_map.values()), + 'default': learning_rate_schedule_map["constant"], } - tmp_learning_rate_setting = prompt(training_learning_rate_question) - training_learning_rate_setting = choices_key_map[tmp_learning_rate_setting] + _tmp_learning_rate_schedule = prompt(learning_rate_schedule_question) + for key, value in learning_rate_schedule_map.items(): + if value == _tmp_learning_rate_schedule: + learning_rate_schedule = key if prompt(enable_data_augmentation): all_augmentor = {} diff --git a/blueoil/generate_lmnet_config.py b/blueoil/generate_lmnet_config.py index 7dbc7d05e..18d054a2e 100644 --- a/blueoil/generate_lmnet_config.py +++ b/blueoil/generate_lmnet_config.py @@ -16,10 +16,14 @@ import argparse import os import re +import importlib import yaml from jinja2 import Environment, FileSystemLoader +from lmnet.utils.module_loader import load_class + + # TODO(wakisaka): objecte detection, segmentation _TASK_TYPE_TEMPLATE_FILE = { "classification": "classification.tpl.py", @@ -132,12 +136,77 @@ def _blueoil_to_lmnet(blueoil_config): else: dataset_class_property = {"extend_dir": dataset_class_extend_dir} + # load dataset python module from string. + _loaded_dataset_module = importlib.import_module("lmnet.datasets.{}".format(dataset_module)) + # load dataset python module from string. + _loaded_dataset_class = load_class(_loaded_dataset_module, dataset_class) + _dataset_class = type('DATASET_CLASS', (_loaded_dataset_class,), dataset_class_property) + _dataset_obj = _dataset_class(subset="train", batch_size=1) + classes = _dataset_obj.classes + # trainer batch_size = blueoil_config["trainer"]["batch_size"] initial_learning_rate = blueoil_config["trainer"]["initial_learning_rate"] - learning_rate_setting = blueoil_config["trainer"]["learning_rate_setting"] + learning_rate_schedule = blueoil_config["trainer"]["learning_rate_schedule"] max_epochs = blueoil_config["trainer"]["epochs"] + step_per_epoch = float(_dataset_obj.num_per_epoch)/batch_size + + learning_rate_func = None + learning_rate_kwargs = None + if learning_rate_schedule == "constant": + optimizer_kwargs = {"momentum": 0.9, "learning_rate": initial_learning_rate} + else: + optimizer_kwargs = {"momentum": 0.9} + learning_rate_func = "tf.train.piecewise_constant" + + if learning_rate_schedule == "2-step-decay": + learning_rate_kwargs = { + "values": [ + initial_learning_rate, + initial_learning_rate / 10, + initial_learning_rate / 100 + ], + "boundaries": [ + int((step_per_epoch * (max_epochs - 1)) / 2), + int(step_per_epoch * (max_epochs - 1)) + ], + } + + elif learning_rate_schedule == "3-step-decay": + learning_rate_kwargs = { + "values": [ + initial_learning_rate, + initial_learning_rate / 10, + initial_learning_rate / 100, + initial_learning_rate / 1000 + ], + "boundaries": [ + int((step_per_epoch * (max_epochs - 1)) * 1 / 3), + int((step_per_epoch * (max_epochs - 1)) * 2 / 3), + int(step_per_epoch * (max_epochs - 1)) + ], + } + + elif learning_rate_schedule == "3-step-decay-with-warmup": + if max_epochs < 4: + raise ValueError("epoch number must be >= 4, when 3-step-decay-with-warmup is selected.") + learning_rate_kwargs = { + "values": [ + initial_learning_rate / 1000, + initial_learning_rate, + initial_learning_rate / 10, + initial_learning_rate / 100, + initial_learning_rate / 1000 + ], + "boundaries": [ + int(step_per_epoch * 1), + int((step_per_epoch * (max_epochs - 1)) * 1 / 3), + int((step_per_epoch * (max_epochs - 1)) * 2 / 3), + int(step_per_epoch * (max_epochs - 1)) + ], + } + # common image_size = blueoil_config["common"]["image_size"] @@ -167,10 +236,13 @@ def _blueoil_to_lmnet(blueoil_config): "batch_size": batch_size, "max_epochs": max_epochs, - "initial_learning_rate": initial_learning_rate, - "learning_rate_setting": learning_rate_setting, + + "optimizer_kwargs": optimizer_kwargs, + "learning_rate_func": learning_rate_func, + "learning_rate_kwargs": learning_rate_kwargs, "image_size": image_size, + "classes": classes, "quantize_first_convolution": quantize_first_convolution, diff --git a/blueoil/templates/blueoil-config.tpl.yml b/blueoil/templates/blueoil-config.tpl.yml index 2a9b5a2c1..fda0be17e 100644 --- a/blueoil/templates/blueoil-config.tpl.yml +++ b/blueoil/templates/blueoil-config.tpl.yml @@ -11,12 +11,12 @@ dataset: trainer: batch_size: {{ batch_size }} epochs: {{ training_epochs }} -# supported 'learning_rate_setting' is 'tune1', 'tune2', 'tune3', 'fixed'. -# 'tune1' is 2 times decay, learning rate reduce to 1/10 on epoch/2 and epoch-1. -# 'tune2' is 3 times decay, learning rate reduce to 1/10 on epoch/3 and epoch*2/3 and epoch-1. -# 'tune3' is warmup and 3 times decay, warmup learning rate 1/1000 in 1 epoch, then train same as 'tune2'. -# 'fixed' is constant learning rate. - learning_rate_setting: {{ training_learning_rate_setting }} + # supported 'learning_rate_schedule' is 'constant', '2-step-decay', '3-step-decay', '3-step-decay-with-warmup' ({epochs} is the number of training epochs you entered before). + # 'constant' -> constant learning rate. + # '2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1. + # '3-step-decay' -> learning rate decrease by 1/10 on {epochs}/3 and {epochs}*2/3 and {epochs}-1. + # '3-step-decay-with-warmup' -> warmup learning rate 1/1000 in first epoch, then train the same way as '3-step-decay'. + learning_rate_schedule: {{ learning_rate_schedule }} initial_learning_rate: {{ initial_learning_rate_value }} network: diff --git a/blueoil/templates/lmnet/classification.tpl.py b/blueoil/templates/lmnet/classification.tpl.py index c29cffc5c..6ac8dcfc5 100644 --- a/blueoil/templates/lmnet/classification.tpl.py +++ b/blueoil/templates/lmnet/classification.tpl.py @@ -44,10 +44,7 @@ BATCH_SIZE = {{batch_size}} DATA_FORMAT = "NHWC" TASK = Tasks.CLASSIFICATION -# In order to get instance property `classes`, instantiate DATASET_CLASS. -dataset_obj = DATASET_CLASS(subset="train", batch_size=1) -CLASSES = dataset_obj.classes -step_per_epoch = float(dataset_obj.num_per_epoch)/BATCH_SIZE +CLASSES = {{classes}} MAX_EPOCHS = {{max_epochs}} SAVE_STEPS = {{save_steps}} @@ -71,32 +68,9 @@ NETWORK = EasyDict() NETWORK.OPTIMIZER_CLASS = tf.train.MomentumOptimizer - -if '{{learning_rate_setting}}' != 'fixed': - NETWORK.OPTIMIZER_KWARGS = {"momentum": 0.9} - NETWORK.LEARNING_RATE_FUNC = tf.train.piecewise_constant - -if '{{learning_rate_setting}}' == 'tune1': - NETWORK.LEARNING_RATE_KWARGS = { - "values": [{{initial_learning_rate}}, {{initial_learning_rate}} / 10, {{initial_learning_rate}} / 100], - "boundaries": [int((step_per_epoch * (MAX_EPOCHS - 1)) / 2), int(step_per_epoch * (MAX_EPOCHS - 1))], - } -elif '{{learning_rate_setting}}' == 'tune2': - NETWORK.LEARNING_RATE_KWARGS = { - "values": [{{initial_learning_rate}}, {{initial_learning_rate}} / 10, {{initial_learning_rate}} / 100, {{initial_learning_rate}} / 1000], - "boundaries": [int((step_per_epoch * (MAX_EPOCHS - 1)) * 1 / 3), int((step_per_epoch * (MAX_EPOCHS - 1)) * 2 / 3), int(step_per_epoch * (MAX_EPOCHS - 1))], - } -elif '{{learning_rate_setting}}' == 'tune3': - if MAX_EPOCHS < 4: - raise ValueError("epoch number must be >= 4, when tune3 is selected.") - NETWORK.LEARNING_RATE_KWARGS = { - "values": [{{initial_learning_rate}} / 1000, {{initial_learning_rate}}, {{initial_learning_rate}} / 10, {{initial_learning_rate}} / 100, {{initial_learning_rate}} / 1000], - "boundaries": [int(step_per_epoch * 1), int((step_per_epoch * (MAX_EPOCHS - 1)) * 1 / 3), int((step_per_epoch * (MAX_EPOCHS - 1)) * 2 / 3), int(step_per_epoch * (MAX_EPOCHS - 1))], - } -elif '{{learning_rate_setting}}' == 'fixed': - NETWORK.OPTIMIZER_KWARGS = {"momentum": 0.9, "learning_rate": {{initial_learning_rate}}} -else: - raise ValueError +NETWORK.OPTIMIZER_KWARGS = {{optimizer_kwargs}} +NETWORK.LEARNING_RATE_FUNC = {{learning_rate_func}} +NETWORK.LEARNING_RATE_KWARGS = {{learning_rate_kwargs}} NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE diff --git a/blueoil/templates/lmnet/object_detection.tpl.py b/blueoil/templates/lmnet/object_detection.tpl.py index a5e829c13..91332216e 100644 --- a/blueoil/templates/lmnet/object_detection.tpl.py +++ b/blueoil/templates/lmnet/object_detection.tpl.py @@ -49,10 +49,7 @@ BATCH_SIZE = {{batch_size}} DATA_FORMAT = "NHWC" TASK = Tasks.OBJECT_DETECTION -# In order to get instance property `classes`, instantiate DATASET_CLASS. -dataset_obj = DATASET_CLASS(subset="train", batch_size=1) -CLASSES = dataset_obj.classes -step_per_epoch = float(dataset_obj.num_per_epoch)/BATCH_SIZE +CLASSES = {{classes}} MAX_EPOCHS = {{max_epochs}} SAVE_STEPS = {{save_steps}} @@ -90,39 +87,10 @@ ]) NETWORK = EasyDict() - -if '{{optimizer}}' == 'GradientDescentOptimizer': - NETWORK.OPTIMIZER_CLASS = tf.train.GradientDescentOptimizer -elif '{{optimizer}}' == 'MomentumOptimizer': - NETWORK.OPTIMIZER_CLASS = tf.train.MomentumOptimizer - NETWORK.OPTIMIZER_KWARGS = {"momentum": 0.9} -elif '{{optimizer}}' == 'AdamOptimizer': - NETWORK.OPTIMIZER_CLASS = tf.train.AdamOptimizer - -if '{{learning_rate_setting}}' != 'fixed': - NETWORK.LEARNING_RATE_FUNC = tf.train.piecewise_constant - -if '{{learning_rate_setting}}' == 'tune1': - NETWORK.LEARNING_RATE_KWARGS = { - "values": [{{initial_learning_rate}}, {{initial_learning_rate}} / 10, {{initial_learning_rate}} / 100], - "boundaries": [int((step_per_epoch * (MAX_EPOCHS - 1)) / 2), int(step_per_epoch * (MAX_EPOCHS - 1))], - } -elif '{{learning_rate_setting}}' == 'tune2': - NETWORK.LEARNING_RATE_KWARGS = { - "values": [{{initial_learning_rate}}, {{initial_learning_rate}} / 10, {{initial_learning_rate}} / 100, {{initial_learning_rate}} / 1000], - "boundaries": [int((step_per_epoch * (MAX_EPOCHS - 1)) * 1 / 3), int((step_per_epoch * (MAX_EPOCHS - 1)) * 2 / 3), int(step_per_epoch * (MAX_EPOCHS - 1))], - } -elif '{{learning_rate_setting}}' == 'tune3': - if MAX_EPOCHS < 4: - raise ValueError("epoch number must be >= 4, when tune3 is selected.") - NETWORK.LEARNING_RATE_KWARGS = { - "values": [{{initial_learning_rate}} / 1000, {{initial_learning_rate}}, {{initial_learning_rate}} / 10, {{initial_learning_rate}} / 100, {{initial_learning_rate}} / 1000], - "boundaries": [int(step_per_epoch * 1), int((step_per_epoch * (MAX_EPOCHS - 1)) * 1 / 3), int((step_per_epoch * (MAX_EPOCHS - 1)) * 2 / 3), int(step_per_epoch * (MAX_EPOCHS - 1))], - } -elif '{{learning_rate_setting}}' == 'fixed': - NETWORK.OPTIMIZER_KWARGS = {"momentum": 0.9, "learning_rate": {{initial_learning_rate}}} -else: - raise ValueError +NETWORK.OPTIMIZER_CLASS = tf.train.MomentumOptimizer +NETWORK.OPTIMIZER_KWARGS = {{optimizer_kwargs}} +NETWORK.LEARNING_RATE_FUNC = {{learning_rate_func}} +NETWORK.LEARNING_RATE_KWARGS = {{learning_rate_kwargs}} NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE diff --git a/docs/usage/init.md b/docs/usage/init.md index f70e79e94..9c33361f8 100644 --- a/docs/usage/init.md +++ b/docs/usage/init.md @@ -22,7 +22,7 @@ This is an example of configuration. image size (integer x integer): 32x32 how many epochs do you run training (integer): 100 initial learning rate: 0.001 - message': 'choose learning rate setting(tune1 / tune2 / tune3 / fixed): tune1 -> "2 times decay" + choose learning rate schedule ({epochs} is the number of training epochs you entered before): '2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1. enable data augmentation? No apply quantization at the first layer: yes ``` diff --git a/tests/config/caltech101_classification.yml b/tests/config/caltech101_classification.yml index fea7eb0aa..e699921f2 100644 --- a/tests/config/caltech101_classification.yml +++ b/tests/config/caltech101_classification.yml @@ -11,12 +11,12 @@ dataset: trainer: batch_size: 1 epochs: 1 -# supported 'learning_rate_setting' is 'tune1', 'tune2', 'tune3', 'fixed'. -# 'tune1' is 2 times decay, learning rate reduce to 1/10 on epoch/2 and epoch-1. -# 'tune2' is 3 times decay, learning rate reduce to 1/10 on epoch/3 and epoch*2/3 and epoch-1. -# 'tune3' is warmup and 3 times decay, warmup learning rate 1/1000 in 1 epoch, then train same as 'tune2'. -# 'fixed' is constant learning rate. - learning_rate_setting: tune1 + # supported 'learning_rate_schedule' is 'constant', '2-step-decay', '3-step-decay', '3-step-decay-with-warmup' ({epochs} is the number of training epochs you entered before). + # 'constant' -> constant learning rate. + # '2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1. + # '3-step-decay' -> learning rate decrease by 1/10 on {epochs}/3 and {epochs}*2/3 and {epochs}-1. + # '3-step-decay-with-warmup' -> warmup learning rate 1/1000 in first epoch, then train the same way as '3-step-decay'. + learning_rate_schedule: constant initial_learning_rate: 0.001 network: diff --git a/tests/config/caltech101_classification_has_validation.yml b/tests/config/caltech101_classification_has_validation.yml index 34a3ab876..d51f14014 100644 --- a/tests/config/caltech101_classification_has_validation.yml +++ b/tests/config/caltech101_classification_has_validation.yml @@ -11,12 +11,12 @@ dataset: trainer: batch_size: 1 epochs: 1 -# supported 'learning_rate_setting' is 'tune1', 'tune2', 'tune3', 'fixed'. -# 'tune1' is 2 times decay, learning rate reduce to 1/10 on epoch/2 and epoch-1. -# 'tune2' is 3 times decay, learning rate reduce to 1/10 on epoch/3 and epoch*2/3 and epoch-1. -# 'tune3' is warmup and 3 times decay, warmup learning rate 1/1000 in 1 epoch, then train same as 'tune2'. -# 'fixed' is constant learning rate. - learning_rate_setting: tune1 + # supported 'learning_rate_schedule' is 'constant', '2-step-decay', '3-step-decay', '3-step-decay-with-warmup' ({epochs} is the number of training epochs you entered before). + # 'constant' -> constant learning rate. + # '2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1. + # '3-step-decay' -> learning rate decrease by 1/10 on {epochs}/3 and {epochs}*2/3 and {epochs}-1. + # '3-step-decay-with-warmup' -> warmup learning rate 1/1000 in first epoch, then train the same way as '3-step-decay'. + learning_rate_schedule: constant initial_learning_rate: 0.001 network: diff --git a/tests/config/delta_mark_classification.yml b/tests/config/delta_mark_classification.yml index 5ec637cdc..388f4122b 100644 --- a/tests/config/delta_mark_classification.yml +++ b/tests/config/delta_mark_classification.yml @@ -11,12 +11,12 @@ dataset: trainer: batch_size: 1 epochs: 1 -# supported 'learning_rate_setting' is 'tune1', 'tune2', 'tune3', 'fixed'. -# 'tune1' is 2 times decay, learning rate reduce to 1/10 on epoch/2 and epoch-1. -# 'tune2' is 3 times decay, learning rate reduce to 1/10 on epoch/3 and epoch*2/3 and epoch-1. -# 'tune3' is warmup and 3 times decay, warmup learning rate 1/1000 in 1 epoch, then train same as 'tune2'. -# 'fixed' is constant learning rate. - learning_rate_setting: tune1 + # supported 'learning_rate_schedule' is 'constant', '2-step-decay', '3-step-decay', '3-step-decay-with-warmup' ({epochs} is the number of training epochs you entered before). + # 'constant' -> constant learning rate. + # '2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1. + # '3-step-decay' -> learning rate decrease by 1/10 on {epochs}/3 and {epochs}*2/3 and {epochs}-1. + # '3-step-decay-with-warmup' -> warmup learning rate 1/1000 in first epoch, then train the same way as '3-step-decay'. + learning_rate_schedule: constant initial_learning_rate: 0.001 network: diff --git a/tests/config/delta_mark_classification_has_validation.yml b/tests/config/delta_mark_classification_has_validation.yml index 72d989d57..f03e9f59c 100644 --- a/tests/config/delta_mark_classification_has_validation.yml +++ b/tests/config/delta_mark_classification_has_validation.yml @@ -11,12 +11,12 @@ dataset: trainer: batch_size: 1 epochs: 1 -# supported 'learning_rate_setting' is 'tune1', 'tune2', 'tune3', 'fixed'. -# 'tune1' is 2 times decay, learning rate reduce to 1/10 on epoch/2 and epoch-1. -# 'tune2' is 3 times decay, learning rate reduce to 1/10 on epoch/3 and epoch*2/3 and epoch-1. -# 'tune3' is warmup and 3 times decay, warmup learning rate 1/1000 in 1 epoch, then train same as 'tune2'. -# 'fixed' is constant learning rate. - learning_rate_setting: tune1 + # supported 'learning_rate_schedule' is 'constant', '2-step-decay', '3-step-decay', '3-step-decay-with-warmup' ({epochs} is the number of training epochs you entered before). + # 'constant' -> constant learning rate. + # '2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1. + # '3-step-decay' -> learning rate decrease by 1/10 on {epochs}/3 and {epochs}*2/3 and {epochs}-1. + # '3-step-decay-with-warmup' -> warmup learning rate 1/1000 in first epoch, then train the same way as '3-step-decay'. + learning_rate_schedule: constant initial_learning_rate: 0.001 network: diff --git a/tests/config/delta_mark_object_detection.yml b/tests/config/delta_mark_object_detection.yml index e220bfdd0..5e189d7e8 100644 --- a/tests/config/delta_mark_object_detection.yml +++ b/tests/config/delta_mark_object_detection.yml @@ -11,12 +11,12 @@ dataset: trainer: batch_size: 1 epochs: 1 -# supported 'learning_rate_setting' is 'tune1', 'tune2', 'tune3', 'fixed'. -# 'tune1' is 2 times decay, learning rate reduce to 1/10 on epoch/2 and epoch-1. -# 'tune2' is 3 times decay, learning rate reduce to 1/10 on epoch/3 and epoch*2/3 and epoch-1. -# 'tune3' is warmup and 3 times decay, warmup learning rate 1/1000 in 1 epoch, then train same as 'tune2'. -# 'fixed' is constant learning rate. - learning_rate_setting: tune1 + # supported 'learning_rate_schedule' is 'constant', '2-step-decay', '3-step-decay', '3-step-decay-with-warmup' ({epochs} is the number of training epochs you entered before). + # 'constant' -> constant learning rate. + # '2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1. + # '3-step-decay' -> learning rate decrease by 1/10 on {epochs}/3 and {epochs}*2/3 and {epochs}-1. + # '3-step-decay-with-warmup' -> warmup learning rate 1/1000 in first epoch, then train the same way as '3-step-decay'. + learning_rate_schedule: constant initial_learning_rate: 0.001 network: diff --git a/tests/config/delta_mark_object_detection_has_validation.yml b/tests/config/delta_mark_object_detection_has_validation.yml index b669d55f0..af36531b8 100644 --- a/tests/config/delta_mark_object_detection_has_validation.yml +++ b/tests/config/delta_mark_object_detection_has_validation.yml @@ -11,12 +11,12 @@ dataset: trainer: batch_size: 1 epochs: 1 -# supported 'learning_rate_setting' is 'tune1', 'tune2', 'tune3', 'fixed'. -# 'tune1' is 2 times decay, learning rate reduce to 1/10 on epoch/2 and epoch-1. -# 'tune2' is 3 times decay, learning rate reduce to 1/10 on epoch/3 and epoch*2/3 and epoch-1. -# 'tune3' is warmup and 3 times decay, warmup learning rate 1/1000 in 1 epoch, then train same as 'tune2'. -# 'fixed' is constant learning rate. - learning_rate_setting: tune1 + # supported 'learning_rate_schedule' is 'constant', '2-step-decay', '3-step-decay', '3-step-decay-with-warmup' ({epochs} is the number of training epochs you entered before). + # 'constant' -> constant learning rate. + # '2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1. + # '3-step-decay' -> learning rate decrease by 1/10 on {epochs}/3 and {epochs}*2/3 and {epochs}-1. + # '3-step-decay-with-warmup' -> warmup learning rate 1/1000 in first epoch, then train the same way as '3-step-decay'. + learning_rate_schedule: constant initial_learning_rate: 0.001 network: diff --git a/tests/config/make_yml_config.py b/tests/config/make_yml_config.py index aae9c567c..b7b11cada 100644 --- a/tests/config/make_yml_config.py +++ b/tests/config/make_yml_config.py @@ -113,21 +113,25 @@ ' optimizer: MomentumOptimizer\n', ] -trainer_lr_setting_comment = "# supported 'learning_rate_setting' is 'tune1', 'tune2', 'tune3', 'fixed'.\n\ -# 'tune1' is 2 times decay, learning rate reduce to 1/10 on epoch/2 and epoch-1.\n\ -# 'tune2' is 3 times decay, learning rate reduce to 1/10 on epoch/3 and epoch*2/3 and epoch-1.\n\ -# 'tune3' is warmup and 3 times decay, warmup learning rate 1/1000 in 1 epoch, then train same as 'tune2'.\n\ -# 'fixed' is constant learning rate.\n" - -trainer_lr_settings = [ - ' learning_rate_setting: tune1\n', - ' learning_rate_setting: tune1\n', - ' learning_rate_setting: tune1\n', - ' learning_rate_setting: tune1\n', - ' learning_rate_setting: tune1\n', - ' learning_rate_setting: tune1\n', - ' learning_rate_setting: tune1\n', - ' learning_rate_setting: tune1\n', +trainer_lr_schedule_comment = """\ + # supported 'learning_rate_schedule' is 'constant', '2-step-decay', '3-step-decay', '3-step-decay-with-warmup' \ +({epochs} is the number of training epochs you entered before). + # 'constant' -> constant learning rate. + # '2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1. + # '3-step-decay' -> learning rate decrease by 1/10 on {epochs}/3 and {epochs}*2/3 and {epochs}-1. + # '3-step-decay-with-warmup' -> warmup learning rate 1/1000 in first epoch, \ +then train the same way as '3-step-decay'. +""" + +trainer_lr_schedules = [ + ' learning_rate_schedule: constant\n', + ' learning_rate_schedule: constant\n', + ' learning_rate_schedule: constant\n', + ' learning_rate_schedule: constant\n', + ' learning_rate_schedule: constant\n', + ' learning_rate_schedule: constant\n', + ' learning_rate_schedule: constant\n', + ' learning_rate_schedule: constant\n', ] trainer_initial_lrs = [ @@ -242,10 +246,10 @@ def learning_settings_to_yaml(index): #fp.write(str(trainer_optimizer_comment)) # trainer optimizer #fp.write(str(trainer_optimizers[index])) - # trainer lr setting comment - fp.write(str(trainer_lr_setting_comment)) - # trainer lr setting - fp.write(str(trainer_lr_settings[index])) + # trainer lr schedule comment + fp.write(str(trainer_lr_schedule_comment)) + # trainer lr schedule + fp.write(str(trainer_lr_schedules[index])) # trainer initial lr fp.write(str(trainer_initial_lrs[index])) fp.write('\n') diff --git a/tests/config/openimagesv4_object_detection.yml b/tests/config/openimagesv4_object_detection.yml index 1d8c74d70..1c8493978 100644 --- a/tests/config/openimagesv4_object_detection.yml +++ b/tests/config/openimagesv4_object_detection.yml @@ -11,12 +11,12 @@ dataset: trainer: batch_size: 1 epochs: 1 -# supported 'learning_rate_setting' is 'tune1', 'tune2', 'tune3', 'fixed'. -# 'tune1' is 2 times decay, learning rate reduce to 1/10 on epoch/2 and epoch-1. -# 'tune2' is 3 times decay, learning rate reduce to 1/10 on epoch/3 and epoch*2/3 and epoch-1. -# 'tune3' is warmup and 3 times decay, warmup learning rate 1/1000 in 1 epoch, then train same as 'tune2'. -# 'fixed' is constant learning rate. - learning_rate_setting: tune1 + # supported 'learning_rate_schedule' is 'constant', '2-step-decay', '3-step-decay', '3-step-decay-with-warmup' ({epochs} is the number of training epochs you entered before). + # 'constant' -> constant learning rate. + # '2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1. + # '3-step-decay' -> learning rate decrease by 1/10 on {epochs}/3 and {epochs}*2/3 and {epochs}-1. + # '3-step-decay-with-warmup' -> warmup learning rate 1/1000 in first epoch, then train the same way as '3-step-decay'. + learning_rate_schedule: constant initial_learning_rate: 0.001 network: diff --git a/tests/config/openimagesv4_object_detection_has_validation.yml b/tests/config/openimagesv4_object_detection_has_validation.yml index 61566bf47..3f68329d2 100644 --- a/tests/config/openimagesv4_object_detection_has_validation.yml +++ b/tests/config/openimagesv4_object_detection_has_validation.yml @@ -11,12 +11,12 @@ dataset: trainer: batch_size: 1 epochs: 1 -# supported 'learning_rate_setting' is 'tune1', 'tune2', 'tune3', 'fixed'. -# 'tune1' is 2 times decay, learning rate reduce to 1/10 on epoch/2 and epoch-1. -# 'tune2' is 3 times decay, learning rate reduce to 1/10 on epoch/3 and epoch*2/3 and epoch-1. -# 'tune3' is warmup and 3 times decay, warmup learning rate 1/1000 in 1 epoch, then train same as 'tune2'. -# 'fixed' is constant learning rate. - learning_rate_setting: tune1 + # supported 'learning_rate_schedule' is 'constant', '2-step-decay', '3-step-decay', '3-step-decay-with-warmup' ({epochs} is the number of training epochs you entered before). + # 'constant' -> constant learning rate. + # '2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1. + # '3-step-decay' -> learning rate decrease by 1/10 on {epochs}/3 and {epochs}*2/3 and {epochs}-1. + # '3-step-decay-with-warmup' -> warmup learning rate 1/1000 in first epoch, then train the same way as '3-step-decay'. + learning_rate_schedule: constant initial_learning_rate: 0.001 network: