diff --git a/python/mxnet/gluon/contrib/estimator/batch_processor.py b/python/mxnet/gluon/contrib/estimator/batch_processor.py index 4985f8c81bf3..aa5adbfdea5f 100644 --- a/python/mxnet/gluon/contrib/estimator/batch_processor.py +++ b/python/mxnet/gluon/contrib/estimator/batch_processor.py @@ -61,8 +61,8 @@ def evaluate_batch(self, estimator, Batch axis to split the validation data into devices. """ data, label = self._get_data_and_label(val_batch, estimator.context, batch_axis) - pred = [estimator.eval_net(x) for x in data] - loss = [estimator.evaluation_loss(y_hat, y) for y_hat, y in zip(pred, label)] + pred = [estimator.val_net(x) for x in data] + loss = [estimator.val_loss(y_hat, y) for y_hat, y in zip(pred, label)] return data, label, pred, loss diff --git a/python/mxnet/gluon/contrib/estimator/estimator.py b/python/mxnet/gluon/contrib/estimator/estimator.py index 09f43151e235..ed8a53d7c3a6 100644 --- a/python/mxnet/gluon/contrib/estimator/estimator.py +++ b/python/mxnet/gluon/contrib/estimator/estimator.py @@ -61,22 +61,19 @@ class Estimator(object): Trainer to apply optimizer on network parameters. context : Context or list of Context Device(s) to run the training on. - evaluation_loss : gluon.loss.loss - Loss (objective) function to calculate during validation. If set evaluation_loss - None, it will use the same loss function as self.loss - eval_net : gluon.Block + val_net : gluon.Block The model used for validation. The validation model does not necessarily belong to the same model class as the training model. But the two models typically share the same architecture. Therefore the validation model can reuse parameters of the training model. - The code example of consruction of eval_net sharing the same network parameters as + The code example of consruction of val_net sharing the same network parameters as the training net is given below: >>> net = _get_train_network() - >>> eval_net = _get_test_network(params=net.collect_params()) + >>> val_net = _get_test_network(params=net.collect_params()) >>> net.initialize(ctx=ctx) - >>> est = Estimator(net, loss, eval_net=eval_net) + >>> est = Estimator(net, loss, val_net=val_net) Proper namespace match is required for weight sharing between two networks. Most networks inheriting :py:class:`Block` can share their parameters correctly. An exception is @@ -84,6 +81,9 @@ class Estimator(object): the naming in mxnet Gluon API, please refer to the site (https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/blocks/naming.html) for future information. + val_loss : gluon.loss.loss + Loss (objective) function to calculate during validation. If set val_loss + None, it will use the same loss function as self.loss batch_processor: BatchProcessor BatchProcessor provides customized fit_batch() and evaluate_batch() methods """ @@ -113,8 +113,8 @@ def __init__(self, net, initializer=None, trainer=None, context=None, - evaluation_loss=None, - eval_net=None, + val_net=None, + val_loss=None, batch_processor=None): self.net = net self.loss = self._check_loss(loss) @@ -122,12 +122,12 @@ def __init__(self, net, self._val_metrics = _check_metrics(val_metrics) self._add_default_training_metrics() self._add_validation_metrics() - self.evaluation_loss = self.loss - if evaluation_loss is not None: - self.evaluation_loss = self._check_loss(evaluation_loss) - self.eval_net = self.net - if eval_net is not None: - self.eval_net = eval_net + self.val_loss = self.loss + if val_loss is not None: + self.val_loss = self._check_loss(val_loss) + self.val_net = self.net + if val_net is not None: + self.val_net = val_net self.logger = logging.Logger(name='Estimator', level=logging.INFO) self.logger.addHandler(logging.StreamHandler(sys.stdout)) diff --git a/tests/python/unittest/test_gluon_batch_processor.py b/tests/python/unittest/test_gluon_batch_processor.py index 4bd6f769aa44..8604713fc129 100644 --- a/tests/python/unittest/test_gluon_batch_processor.py +++ b/tests/python/unittest/test_gluon_batch_processor.py @@ -84,7 +84,7 @@ def test_batch_processor_validation(): ctx = mx.cpu() loss = gluon.loss.L2Loss() acc = mx.metric.Accuracy() - evaluation_loss = gluon.loss.L1Loss() + val_loss = gluon.loss.L1Loss() net.initialize(ctx=ctx) processor = BatchProcessor() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) @@ -93,7 +93,7 @@ def test_batch_processor_validation(): train_metrics=acc, trainer=trainer, context=ctx, - evaluation_loss=evaluation_loss, + val_loss=val_loss, batch_processor=processor) # Input dataloader est.fit(train_data=dataloader, diff --git a/tests/python/unittest/test_gluon_estimator.py b/tests/python/unittest/test_gluon_estimator.py index 924dd083bef4..ca61e4b40caa 100644 --- a/tests/python/unittest/test_gluon_estimator.py +++ b/tests/python/unittest/test_gluon_estimator.py @@ -88,7 +88,7 @@ def test_validation(): ctx = mx.cpu() loss = gluon.loss.L2Loss() acc = mx.metric.Accuracy() - evaluation_loss = gluon.loss.L1Loss() + val_loss = gluon.loss.L1Loss() net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) est = Estimator(net=net, @@ -96,7 +96,7 @@ def test_validation(): train_metrics=acc, trainer=trainer, context=ctx, - evaluation_loss=evaluation_loss) + val_loss=val_loss) # Input dataloader est.fit(train_data=dataloader, val_data=dataloader, @@ -376,16 +376,16 @@ def test_default_handlers(): assert isinstance(handlers[1], MetricHandler) assert isinstance(handlers[4], LoggingHandler) -def test_eval_net(): - ''' test estimator with a different evaluation net ''' +def test_val_net(): + ''' test estimator with different training and validation networks ''' ''' test weight sharing of sequential networks without namescope ''' net = _get_test_network() - eval_net = _get_test_network(params=net.collect_params()) + val_net = _get_test_network(params=net.collect_params()) dataloader, dataiter = _get_test_data() num_epochs = 1 ctx = mx.cpu() loss = gluon.loss.L2Loss() - evaluation_loss = gluon.loss.L2Loss() + val_loss = gluon.loss.L2Loss() acc = mx.metric.Accuracy() net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) @@ -394,8 +394,8 @@ def test_eval_net(): train_metrics=acc, trainer=trainer, context=ctx, - evaluation_loss=evaluation_loss, - eval_net=eval_net) + val_loss=val_loss, + val_net=val_net) with assert_raises(RuntimeError): est.fit(train_data=dataloader, @@ -404,7 +404,7 @@ def test_eval_net(): ''' test weight sharing of sequential networks with namescope ''' net = _get_test_network_with_namescope() - eval_net = _get_test_network_with_namescope(params=net.collect_params()) + val_net = _get_test_network_with_namescope(params=net.collect_params()) net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) est = Estimator(net=net, @@ -412,8 +412,8 @@ def test_eval_net(): train_metrics=acc, trainer=trainer, context=ctx, - evaluation_loss=evaluation_loss, - eval_net=eval_net) + val_loss=val_loss, + val_net=val_net) est.fit(train_data=dataloader, val_data=dataloader, @@ -422,20 +422,20 @@ def test_eval_net(): ''' test weight sharing of two resnets ''' net = gluon.model_zoo.vision.resnet18_v1(pretrained=False, ctx=ctx) net.output = gluon.nn.Dense(10) - eval_net = gluon.model_zoo.vision.resnet18_v1(pretrained=False, ctx=ctx) - eval_net.output = gluon.nn.Dense(10, params=net.collect_params()) + val_net = gluon.model_zoo.vision.resnet18_v1(pretrained=False, ctx=ctx) + val_net.output = gluon.nn.Dense(10, params=net.collect_params()) dataset = gluon.data.ArrayDataset(mx.nd.zeros((10, 3, 224, 224)), mx.nd.zeros((10, 10))) dataloader = gluon.data.DataLoader(dataset=dataset, batch_size=5) net.initialize(ctx=ctx) - eval_net.initialize(ctx=ctx) + val_net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) est = Estimator(net=net, loss=loss, train_metrics=acc, trainer=trainer, context=ctx, - evaluation_loss=evaluation_loss, - eval_net=eval_net) + val_loss=val_loss, + val_net=val_net) est.fit(train_data=dataloader, val_data=dataloader,