From 10d8bfa3c29aec3167dff5aa859157262dd42ac6 Mon Sep 17 00:00:00 2001
From: Piyush Ghai <ghai.8@osu.edu>
Date: Mon, 18 Mar 2019 15:51:25 -0700
Subject: [PATCH 01/28] Added tutorial for FIT API

---
 docs/tutorials/gluon/fit_api_tutorial.md | 311 +++++++++++++++++++++++
 1 file changed, 311 insertions(+)
 create mode 100644 docs/tutorials/gluon/fit_api_tutorial.md

diff --git a/docs/tutorials/gluon/fit_api_tutorial.md b/docs/tutorials/gluon/fit_api_tutorial.md
new file mode 100644
index 000000000000..920eb6b447a9
--- /dev/null
+++ b/docs/tutorials/gluon/fit_api_tutorial.md
@@ -0,0 +1,311 @@
+<!--- Licensed to the Apache Software Foundation (ASF) under one -->
+<!--- or more contributor license agreements.  See the NOTICE file -->
+<!--- distributed with this work for additional information -->
+<!--- regarding copyright ownership.  The ASF licenses this file -->
+<!--- to you under the Apache License, Version 2.0 (the -->
+<!--- "License"); you may not use this file except in compliance -->
+<!--- with the License.  You may obtain a copy of the License at -->
+
+<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
+
+<!--- Unless required by applicable law or agreed to in writing, -->
+<!--- software distributed under the License is distributed on an -->
+<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
+<!--- KIND, either express or implied.  See the License for the -->
+<!--- specific language governing permissions and limitations -->
+<!--- under the License. -->
+
+# Gluon Fit API
+
+In this tutorial, we will see how to use the [Gluon Fit API](https://cwiki.apache.org/confluence/display/MXNET/Gluon+Fit+API+-+Tech+Design) which is a simple and flexible way to train deep learning models using the [Gluon APIs](http://mxnet.incubator.apache.org/versions/master/gluon/index.html) in Apache MXNet. 
+
+Prior to Fit API, training using Gluon required one to write a custom ["Gluon training loop"](https://mxnet.incubator.apache.org/versions/master/tutorials/gluon/logistic_regression_explained.html#defining-and-training-the-model). Fit API reduces the complexity and amount of boiler plate code required to train a model, provides an easy to use and a powerful API. 
+
+To demonstrate the Fit API, this tutorial will train an Image Classification model using the [AlexNet](https://arxiv.org/abs/1404.5997) architecture for the neural network. The model will be trained using the [Fashion-MNIST dataset](https://research.zalando.com/welcome/mission/research-projects/fashion-mnist/). 
+
+
+## Prerequisites
+
+To complete this tutorial, you will need:
+
+- [MXNet](https://mxnet.incubator.apache.org/install/#overview) (The version of MXNet will be >= 1.5.0)
+- [GluonCV](https://gluon-cv.mxnet.io)
+
+This tutorial works with both Python 2 and Python 3.
+
+
+
+```python
+import mxnet as mx
+from mxnet import gluon, autograd
+from gluoncv import utils
+from gluoncv.model_zoo import get_model
+from mxnet.gluon.estimator import estimator
+
+ctx = mx.gpu(0) # Or mx.cpu(0) is using a GPU backed machine
+mx.random.seed(7) # Set a fixed seed
+```
+
+## Dataset
+
+[Fashion-MNIST](https://research.zalando.com/welcome/mission/research-projects/fashion-mnist/) dataset consists of fashion items divided into ten categories : t-shirt/top, trouser, pullover, dress, coat, sandal, shirt, sneaker, bag and ankle boot. 
+
+- It has 60,000 gray scale images of size 28 * 28 for training.  
+- It has 10,000 gray scale images os size 28 * 28 for testing/validation. 
+
+We will use ```gluon.data.vision``` package to directly import the Fashion-MNIST dataset and perform pre-processing on it.
+
+
+```python
+# Get the training data 
+fashion_mnist_train = gluon.data.vision.FashionMNIST(train=True)
+
+# Get the validation data
+fashion_mnist_val = gluon.data.vision.FashionMNIST(train=False)
+```
+
+## Exploring the Data
+
+
+```python
+text_labels = ['t-shirt/top', 'trouser', 'pullover', 'dress', 'coat',
+                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
+
+# Let's print the size of the dataset for train and validation.
+print ("Number of training samples : %d" % (len(fashion_mnist_train)))
+print ("Number of validation samples : %d" % (len(fashion_mnist_val)))
+
+
+train_first_elem, train_first_label = fashion_mnist_train[0]
+print ("Shape of each iamge : ", train_first_elem.shape)
+```
+
+    Number of training samples : 60000 <!--notebook-skip-line-->
+    Number of validation samples : 10000 <!--notebook-skip-line-->
+    Shape of each iamge :  (28, 28, 1) <!--notebook-skip-line-->
+
+
+Now let's try to visualize the dataset before we proceed further
+
+
+```python
+from IPython import display
+import matplotlib.pyplot as plt
+
+# Function to display the data
+def show_fashion_mnist_data(images, labels):
+    display.set_matplotlib_formats('svg')
+    _, figs = plt.subplots(1, len(images), figsize=(12, 12))
+    
+    for figure, x, y in zip(figs, images, labels):
+        figure.imshow(x.reshape((28, 28)).asnumpy())
+        axes = figure.axes
+        axes.set_title(text_labels[int(y)])
+        axes.title.set_fontsize(12)
+        axes.get_xaxis().set_visible(False)
+        axes.get_yaxis().set_visible(False)
+    
+    plt.show()
+```
+
+
+```python
+images, labels = fashion_mnist_train[0:10]
+show_fashion_mnist_data(images, labels)
+```
+
+
+![png](https://raw.githubusercontent.com/piyushghai/web-data/master/mxnet/doc/tutorials/gluon/fashion_mnist.png)<!--notebook-skip-line-->
+
+
+## Pre-processing the data
+
+In order to prepare our data to training the model, we will perform a few pre-processing steps on the dataset. We will :
+- Resize the image
+- Convert the pixel values in the image from (0 to 255) to (0 to 1)
+- Normalize the images with mean 0 and variance 1. 
+
+We will be using ```gluon.data.vision.tranforms``` which provides out of the box transformation APIs.
+To read more about the available transformations, check out [the official documentation](https://mxnet.incubator.apache.org/api/python/gluon/data.html#vision-transforms).
+
+
+```python
+transformers = [gluon.data.vision.transforms.Resize(224), # We pick 224 as the model we use takes an input of size 224.
+                gluon.data.vision.transforms.ToTensor(), 
+                gluon.data.vision.transforms.Normalize(mean = 0, std = 1)]
+
+# Now we will stack all these together.
+transform = gluon.data.vision.transforms.Compose(transformers)
+```
+
+
+```python
+# Apply the transformations
+fashion_mnist_train = fashion_mnist_train.transform_first(transform)
+fashion_mnist_val = fashion_mnist_val.transform_first(transform)
+```
+
+## Data Loaders
+
+In order to feed the data to our model, we need to use a [Data Loader](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataloader#mxnet.gluon.data.DataLoader). 
+
+Data Loaders are used to read the dataset, create mini-batches from the dataset and feed them to the neural network for training.
+
+
+```python
+batch_size = 256 # Batch size of the images
+num_workers = 4 # The number of parallel workers for loading the data using Data Loaders.
+
+train_iter = gluon.data.DataLoader(fashion_mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
+val_iter = gluon.data.DataLoader(fashion_mnist_val, batch_size=batch_size, shuffle=False, num_workers=num_workers)
+```
+
+## AlexNet Model
+
+AlexNet architecture rose to prominence when it won the [2012 ImageNet LSVRC-2012 competition](http://image-net.org/challenges/LSVRC/2012/).
+
+It contains 5 convolutional layers and 3 fully connected layers. Relu is applied after very convolutional and fully connected layer. Dropout is applied before the first and the second fully connected year.
+
+The [Gluon CV Model Zoo](https://gluon-cv.mxnet.io/model_zoo/classification.html#imagenet) contains a rich collection of state-of-the-art pre-trained models for Computer Vision related tasks.
+
+We will use the ```get_model()``` API from Gluon CV Model Zoo to load the network architecture. 
+
+
+```python
+alexnet_model = get_model('alexnet', pretrained=False, classes = 10, ctx=ctx)
+```
+
+## Initialize the model parameters
+
+
+```python
+alexnet_model.initialize(force_reinit=True, init = mx.init.Xavier(), ctx=ctx)
+```
+
+## Loss Function, Trainer and Optimizer
+
+After defining the model, let's setup the trainer object for training. 
+
+We will be using ```SoftmaxCrossEntropyLoss``` as the loss function since this is a multi-class classification problem. We will be using ```sgd``` (Stochastic Gradient Descent) as the optimizer. 
+
+
+```python
+loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()
+learning_rate = 0.04 # You can experiment with your own learning rate here
+
+trainer = gluon.Trainer(alexnet_model.collect_params(), 
+                        'sgd', {'learning_rate': learning_rate})
+```
+
+## Metrics to Capture
+
+We will be most interested in monitoring the accuracy here. Let's define the metrics for that.
+
+
+```python
+train_acc = mx.metric.Accuracy()
+```
+
+## Training using Fit API
+
+As stated earlier, Fit API greatly simplifies the boiler plate code and complexity for training using MXNet Gluon.
+In just 2 lines of code, we will set up our model for training.
+
+
+```python
+# Define the estimator, by passing to it the model, loss function, metrics, trainer object and context
+estimator = estimator.Estimator(net=alexnet_model, 
+                                loss=loss_fn, 
+                                metrics=train_acc, 
+                                trainers=trainer, 
+                                context=ctx)
+
+# Magic line
+estimator.fit(train_data=train_iter, 
+              epochs=5, 
+              batch_size=batch_size)
+```
+
+    [Epoch 0] [Step 256/60000] time/step: 1.171s accuracy: 0.1133 softmaxcrossentropyloss0: 2.3021 <!--notebook-skip-line-->
+    .... <!--notebook-skip-line-->
+    [Epoch 0] finished in 16.741s: train_accuracy: 0.5996 train_softmaxcrossentropyloss0: 1.0864 <!--notebook-skip-line-->
+    .... <!--notebook-skip-line-->
+    [Epoch 1] finished in 15.313s: train_accuracy: 0.7980 train_softmaxcrossentropyloss0: 0.5410 <!--notebook-skip-line-->
+    .... <!--notebook-skip-line-->
+    [Epoch 2] finished in 15.626s: train_accuracy: 0.8375 train_softmaxcrossentropyloss0: 0.4408 <!--notebook-skip-line-->
+    .... <!--notebook-skip-line-->   
+    [Epoch 3] finished in 15.340s: train_accuracy: 0.8575 train_softmaxcrossentropyloss0: 0.3893 <!--notebook-skip-line-->
+    .... <!--notebook-skip-line-->
+    [Epoch 4] finished in 15.420s: train_accuracy: 0.8694 train_softmaxcrossentropyloss0: 0.3560 <!--notebook-skip-line-->
+
+
+## Comparison with Trainer Loop (Older way)
+
+Without the Fit API, the code to train using the Gluon Trainer Loop looks something like this below :
+
+
+```python
+epochs = 5
+
+alexnet_model = get_model('alexnet', pretrained=False, classes = 10, ctx=ctx)
+alexnet_model.initialize(force_reinit=True, init = mx.init.Xavier(), ctx=ctx)
+
+loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()
+learning_rate = 0.04 # You can experiment with your own learning rate here
+
+trainer = gluon.Trainer(alexnet_model.collect_params(), 
+                        'sgd', {'learning_rate': learning_rate})
+
+acc = mx.metric.Accuracy()
+
+# Gluon Training loop goes here 
+
+for epoch in range(epochs):
+    train_acc = 0.0
+    train_loss = 0.0
+    validation_acc = 0.0
+    acc.reset()
+    for data, label in train_iter:
+        data = data.as_in_context(ctx)
+        label = label.as_in_context(ctx)
+        with autograd.record():
+            output = alexnet_model(data)
+            loss_val = loss_fn(output, label)
+            
+        loss_val.backward()
+        trainer.step(batch_size)
+        acc.update(preds = output, labels=label)
+        l = loss_val.mean().asscalar()
+        train_loss += l
+        train_acc += acc.get()[1]
+    
+    print("Epoch %d: , train acc %.3f , train loss %.3f " % (epoch, train_acc/len(train_iter), train_loss/ len(train_iter)))
+
+# Gluon Training loop ends
+```
+
+    Epoch 0: , train acc 0.412 , train loss 1.106 <!--notebook-skip-line-->
+    Epoch 1: , train acc 0.777 , train loss 0.543 <!--notebook-skip-line-->
+    Epoch 2: , train acc 0.832 , train loss 0.439 <!--notebook-skip-line-->
+    Epoch 3: , train acc 0.857 , train loss 0.387 <!--notebook-skip-line-->
+    Epoch 4: , train acc 0.867 , train loss 0.357 <!--notebook-skip-line-->
+
+
+The training loop involves : 
+- Manually iterating over epochs and batches
+- Recording the gradients during the forward pass
+- Computing the loss function
+- Calling the back propagation on the loss function
+- Applying the training step, i.e, updating the weights
+- Recording any useful metrics in the meanwhile
+
+## Summary
+
+In this tutorial, we learnt how to use ```Gluon Fit APIs``` for training a deep learning model and compared it with the existing gluon trainer loop. 
+
+## Next Steps 
+
+- To learn more about deep learning with MXNet Gluon, see [Deep Learning - The Straight Dope](https://gluon.mxnet.io)
+- For more hands on learning about deep learning, checkout out [Dive into Deep Learning](https://d2l.ai)
+
+<!-- INSERT SOURCE DOWNLOAD BUTTONS -->
\ No newline at end of file

From 39d19e0bb81e599bffee08604df4a88476f0312f Mon Sep 17 00:00:00 2001
From: Piyush Ghai <ghai.8@osu.edu>
Date: Mon, 18 Mar 2019 15:57:09 -0700
Subject: [PATCH 02/28] Added tests for Fit API tutorial

---
 tests/tutorials/test_tutorials.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/tutorials/test_tutorials.py b/tests/tutorials/test_tutorials.py
index 0c4954acbd8b..b39cd2a95d33 100644
--- a/tests/tutorials/test_tutorials.py
+++ b/tests/tutorials/test_tutorials.py
@@ -133,6 +133,9 @@ def test_gluon_learning_rate_schedules_advanced():
 def test_gluon_info_gan():
     assert _test_tutorial_nb('gluon/info_gan')
 
+def test_gluon_fit_api_fashion_mnist():
+    assert _test_tutorial_nb('gluon/fit_api_tutorial')
+
 def test_nlp_cnn():
     assert _test_tutorial_nb('nlp/cnn')
 

From 83b0a9f20105eb04ab8b0ec173d535b756eb4b07 Mon Sep 17 00:00:00 2001
From: Piyush Ghai <ghai.8@osu.edu>
Date: Mon, 18 Mar 2019 16:04:57 -0700
Subject: [PATCH 03/28] Updated index.md for the new tutorial to show up

---
 docs/tutorials/index.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md
index 6e31e825e2ca..242a35a229f2 100644
--- a/docs/tutorials/index.md
+++ b/docs/tutorials/index.md
@@ -137,6 +137,8 @@ Select API:&nbsp;
             * [Data Transforms](/tutorials/gluon/transforms.html)
             * [Applying Data Augmentation](/tutorials/gluon/data_augmentation.html)
             * [Data Augmentation with Masks (for Object Segmentation)](https://mxnet.incubator.apache.org/tutorials/python/data_augmentation_with_masks.html)
+        * Fit API
+            * [Using Fit API](/tutorials/gluon/fit_api_tutorial.html)
 </div> <!--end of gluon-->
 
 <div class="module">

From 76e15a36586e9f270bf7537498e75484d48ea0fe Mon Sep 17 00:00:00 2001
From: Piyush Ghai <ghai.8@osu.edu>
Date: Tue, 19 Mar 2019 15:11:39 -0700
Subject: [PATCH 04/28] Addressed PR feedback

---
 docs/tutorials/gluon/fit_api_tutorial.md | 241 +++++++----------------
 1 file changed, 70 insertions(+), 171 deletions(-)

diff --git a/docs/tutorials/gluon/fit_api_tutorial.md b/docs/tutorials/gluon/fit_api_tutorial.md
index 920eb6b447a9..0a27468b26b6 100644
--- a/docs/tutorials/gluon/fit_api_tutorial.md
+++ b/docs/tutorials/gluon/fit_api_tutorial.md
@@ -15,13 +15,14 @@
 <!--- specific language governing permissions and limitations -->
 <!--- under the License. -->
 
+
 # Gluon Fit API
 
 In this tutorial, we will see how to use the [Gluon Fit API](https://cwiki.apache.org/confluence/display/MXNET/Gluon+Fit+API+-+Tech+Design) which is a simple and flexible way to train deep learning models using the [Gluon APIs](http://mxnet.incubator.apache.org/versions/master/gluon/index.html) in Apache MXNet. 
 
 Prior to Fit API, training using Gluon required one to write a custom ["Gluon training loop"](https://mxnet.incubator.apache.org/versions/master/tutorials/gluon/logistic_regression_explained.html#defining-and-training-the-model). Fit API reduces the complexity and amount of boiler plate code required to train a model, provides an easy to use and a powerful API. 
 
-To demonstrate the Fit API, this tutorial will train an Image Classification model using the [AlexNet](https://arxiv.org/abs/1404.5997) architecture for the neural network. The model will be trained using the [Fashion-MNIST dataset](https://research.zalando.com/welcome/mission/research-projects/fashion-mnist/). 
+To demonstrate the Fit API, this tutorial will train an Image Classification model using the [ResNet-18](https://arxiv.org/abs/1512.03385) architecture for the neural network. The model will be trained using the [Fashion-MNIST dataset](https://research.zalando.com/welcome/mission/research-projects/fashion-mnist/). 
 
 
 ## Prerequisites
@@ -29,7 +30,7 @@ To demonstrate the Fit API, this tutorial will train an Image Classification mod
 To complete this tutorial, you will need:
 
 - [MXNet](https://mxnet.incubator.apache.org/install/#overview) (The version of MXNet will be >= 1.5.0)
-- [GluonCV](https://gluon-cv.mxnet.io)
+- [Jupyter Notebook](https://jupyter.org/index.html) (For interactively running the provided .ipynb file)
 
 This tutorial works with both Python 2 and Python 3.
 
@@ -37,12 +38,11 @@ This tutorial works with both Python 2 and Python 3.
 
 ```python
 import mxnet as mx
-from mxnet import gluon, autograd
-from gluoncv import utils
-from gluoncv.model_zoo import get_model
-from mxnet.gluon.estimator import estimator
+from mxnet import gluon
+from mxnet.gluon.model_zoo import vision
+from mxnet.gluon.estimator import estimator, event_handler
 
-ctx = mx.gpu(0) # Or mx.cpu(0) is using a GPU backed machine
+ctx = mx.gpu(0) # Or mx.cpu(0) if not using a GPU backed machine
 mx.random.seed(7) # Set a fixed seed
 ```
 
@@ -64,70 +64,6 @@ fashion_mnist_train = gluon.data.vision.FashionMNIST(train=True)
 fashion_mnist_val = gluon.data.vision.FashionMNIST(train=False)
 ```
 
-## Exploring the Data
-
-
-```python
-text_labels = ['t-shirt/top', 'trouser', 'pullover', 'dress', 'coat',
-                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
-
-# Let's print the size of the dataset for train and validation.
-print ("Number of training samples : %d" % (len(fashion_mnist_train)))
-print ("Number of validation samples : %d" % (len(fashion_mnist_val)))
-
-
-train_first_elem, train_first_label = fashion_mnist_train[0]
-print ("Shape of each iamge : ", train_first_elem.shape)
-```
-
-    Number of training samples : 60000 <!--notebook-skip-line-->
-    Number of validation samples : 10000 <!--notebook-skip-line-->
-    Shape of each iamge :  (28, 28, 1) <!--notebook-skip-line-->
-
-
-Now let's try to visualize the dataset before we proceed further
-
-
-```python
-from IPython import display
-import matplotlib.pyplot as plt
-
-# Function to display the data
-def show_fashion_mnist_data(images, labels):
-    display.set_matplotlib_formats('svg')
-    _, figs = plt.subplots(1, len(images), figsize=(12, 12))
-    
-    for figure, x, y in zip(figs, images, labels):
-        figure.imshow(x.reshape((28, 28)).asnumpy())
-        axes = figure.axes
-        axes.set_title(text_labels[int(y)])
-        axes.title.set_fontsize(12)
-        axes.get_xaxis().set_visible(False)
-        axes.get_yaxis().set_visible(False)
-    
-    plt.show()
-```
-
-
-```python
-images, labels = fashion_mnist_train[0:10]
-show_fashion_mnist_data(images, labels)
-```
-
-
-![png](https://raw.githubusercontent.com/piyushghai/web-data/master/mxnet/doc/tutorials/gluon/fashion_mnist.png)<!--notebook-skip-line-->
-
-
-## Pre-processing the data
-
-In order to prepare our data to training the model, we will perform a few pre-processing steps on the dataset. We will :
-- Resize the image
-- Convert the pixel values in the image from (0 to 255) to (0 to 1)
-- Normalize the images with mean 0 and variance 1. 
-
-We will be using ```gluon.data.vision.tranforms``` which provides out of the box transformation APIs.
-To read more about the available transformations, check out [the official documentation](https://mxnet.incubator.apache.org/api/python/gluon/data.html#vision-transforms).
-
 
 ```python
 transformers = [gluon.data.vision.transforms.Resize(224), # We pick 224 as the model we use takes an input of size 224.
@@ -145,167 +81,130 @@ fashion_mnist_train = fashion_mnist_train.transform_first(transform)
 fashion_mnist_val = fashion_mnist_val.transform_first(transform)
 ```
 
-## Data Loaders
-
-In order to feed the data to our model, we need to use a [Data Loader](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataloader#mxnet.gluon.data.DataLoader). 
-
-Data Loaders are used to read the dataset, create mini-batches from the dataset and feed them to the neural network for training.
-
 
 ```python
 batch_size = 256 # Batch size of the images
 num_workers = 4 # The number of parallel workers for loading the data using Data Loaders.
 
-train_iter = gluon.data.DataLoader(fashion_mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
-val_iter = gluon.data.DataLoader(fashion_mnist_val, batch_size=batch_size, shuffle=False, num_workers=num_workers)
+train_data_loader = gluon.data.DataLoader(fashion_mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
+val_data_loader = gluon.data.DataLoader(fashion_mnist_val, batch_size=batch_size, shuffle=False, num_workers=num_workers)
 ```
 
-## AlexNet Model
+## Model and Optimizers
 
-AlexNet architecture rose to prominence when it won the [2012 ImageNet LSVRC-2012 competition](http://image-net.org/challenges/LSVRC/2012/).
-
-It contains 5 convolutional layers and 3 fully connected layers. Relu is applied after very convolutional and fully connected layer. Dropout is applied before the first and the second fully connected year.
-
-The [Gluon CV Model Zoo](https://gluon-cv.mxnet.io/model_zoo/classification.html#imagenet) contains a rich collection of state-of-the-art pre-trained models for Computer Vision related tasks.
-
-We will use the ```get_model()``` API from Gluon CV Model Zoo to load the network architecture. 
+Let's load the resnet-18 model architecture from [Gluon Model Zoo](http://mxnet.apache.org/api/python/gluon/model_zoo.html) and initialize it's parameters.
 
 
 ```python
-alexnet_model = get_model('alexnet', pretrained=False, classes = 10, ctx=ctx)
+resnet_18_v1 = vision.resnet18_v1(pretrained=False, classes = 10, ctx=ctx)
+resnet_18_v1.initialize(force_reinit=True, init = mx.init.Xavier(), ctx=ctx)
 ```
 
-## Initialize the model parameters
-
-
-```python
-alexnet_model.initialize(force_reinit=True, init = mx.init.Xavier(), ctx=ctx)
-```
-
-## Loss Function, Trainer and Optimizer
-
 After defining the model, let's setup the trainer object for training. 
 
-We will be using ```SoftmaxCrossEntropyLoss``` as the loss function since this is a multi-class classification problem. We will be using ```sgd``` (Stochastic Gradient Descent) as the optimizer. 
+We will be using ```SoftmaxCrossEntropyLoss``` as the loss function since this is a multi-class classification problem. We will be using ```sgd``` (Stochastic Gradient Descent) as the optimizer. You can experiment with a different optimizer as well. 
 
 
 ```python
 loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()
 learning_rate = 0.04 # You can experiment with your own learning rate here
 
-trainer = gluon.Trainer(alexnet_model.collect_params(), 
+num_epochs = 2 # You can run training for more epochs
+trainer = gluon.Trainer(resnet_18_v1.collect_params(), 
                         'sgd', {'learning_rate': learning_rate})
 ```
 
-## Metrics to Capture
-
-We will be most interested in monitoring the accuracy here. Let's define the metrics for that.
+## Train using Fit API
 
+As stated earlier, Fit API greatly simplifies the boiler plate code and complexity for training using MXNet Gluon.
 
-```python
-train_acc = mx.metric.Accuracy()
-```
-
-## Training using Fit API
+In the basic usage example, with just 2 lines of code, we will set up our model for training.
 
-As stated earlier, Fit API greatly simplifies the boiler plate code and complexity for training using MXNet Gluon.
-In just 2 lines of code, we will set up our model for training.
+### Basic Usage
 
 
 ```python
+train_acc = mx.metric.Accuracy() # Metric to monitor
+
 # Define the estimator, by passing to it the model, loss function, metrics, trainer object and context
-estimator = estimator.Estimator(net=alexnet_model, 
+est = estimator.Estimator(net=resnet_18_v1, 
                                 loss=loss_fn, 
                                 metrics=train_acc, 
                                 trainers=trainer, 
                                 context=ctx)
 
 # Magic line
-estimator.fit(train_data=train_iter, 
-              epochs=5, 
+est.fit(train_data=train_data_loader,
+              epochs=num_epochs, 
               batch_size=batch_size)
 ```
 
-    [Epoch 0] [Step 256/60000] time/step: 1.171s accuracy: 0.1133 softmaxcrossentropyloss0: 2.3021 <!--notebook-skip-line-->
-    .... <!--notebook-skip-line-->
-    [Epoch 0] finished in 16.741s: train_accuracy: 0.5996 train_softmaxcrossentropyloss0: 1.0864 <!--notebook-skip-line-->
+    [Epoch 0] [Step 256/60000] time/step: 1.420s accuracy: 0.0938 softmaxcrossentropyloss0: 2.9419 <!--notebook-skip-line-->
     .... <!--notebook-skip-line-->
-    [Epoch 1] finished in 15.313s: train_accuracy: 0.7980 train_softmaxcrossentropyloss0: 0.5410 <!--notebook-skip-line-->
+    [Epoch 0] finished in 51.375s: train_accuracy: 0.7916 train_softmaxcrossentropyloss0: 0.5750 <!--notebook-skip-line-->
+    [Epoch 1] [Step 256/60000] time/step: 0.414s accuracy: 0.8555 softmaxcrossentropyloss0: 0.3621 <!--notebook-skip-line-->
     .... <!--notebook-skip-line-->
-    [Epoch 2] finished in 15.626s: train_accuracy: 0.8375 train_softmaxcrossentropyloss0: 0.4408 <!--notebook-skip-line-->
-    .... <!--notebook-skip-line-->   
-    [Epoch 3] finished in 15.340s: train_accuracy: 0.8575 train_softmaxcrossentropyloss0: 0.3893 <!--notebook-skip-line-->
-    .... <!--notebook-skip-line-->
-    [Epoch 4] finished in 15.420s: train_accuracy: 0.8694 train_softmaxcrossentropyloss0: 0.3560 <!--notebook-skip-line-->
+    [Epoch 1] finished in 49.889s: train_accuracy: 0.8854 train_softmaxcrossentropyloss0: 0.3157 <!--notebook-skip-line-->
+
 
+### Advanced Usage
 
-## Comparison with Trainer Loop (Older way)
+Fit API is also customizable with several ```Event Handlers``` which gives a fine grained control over the steps in training and exposes callback methods for : ```train_begin```, ```train_end```, ```batch_begin```, ```batch_end```, ```epoch_begin``` and ```epoch_end```. 
 
-Without the Fit API, the code to train using the Gluon Trainer Loop looks something like this below :
+One can use built-in event handlers such as ```LoggingHandler```, ```CheckpointHandler``` or ```EarlyStoppingHandler``` or to create a custom handler, one can create a new class by inherinting [```EventHandler```](https://github.com/apache/incubator-mxnet/blob/fit-api/python/mxnet/gluon/estimator/event_handler.py#L31).
 
 
 ```python
-epochs = 5
+# Let's reset the model, trainer and accuracy objects from above
 
-alexnet_model = get_model('alexnet', pretrained=False, classes = 10, ctx=ctx)
-alexnet_model.initialize(force_reinit=True, init = mx.init.Xavier(), ctx=ctx)
+resnet_18_v1.initialize(force_reinit=True, init = mx.init.Xavier(), ctx=ctx)
+trainer = gluon.Trainer(resnet_18_v1.collect_params(), 
+                        'sgd', {'learning_rate': learning_rate})
+train_acc = mx.metric.Accuracy()
 
-loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()
-learning_rate = 0.04 # You can experiment with your own learning rate here
+```
 
-trainer = gluon.Trainer(alexnet_model.collect_params(), 
-                        'sgd', {'learning_rate': learning_rate})
 
-acc = mx.metric.Accuracy()
-
-# Gluon Training loop goes here 
-
-for epoch in range(epochs):
-    train_acc = 0.0
-    train_loss = 0.0
-    validation_acc = 0.0
-    acc.reset()
-    for data, label in train_iter:
-        data = data.as_in_context(ctx)
-        label = label.as_in_context(ctx)
-        with autograd.record():
-            output = alexnet_model(data)
-            loss_val = loss_fn(output, label)
-            
-        loss_val.backward()
-        trainer.step(batch_size)
-        acc.update(preds = output, labels=label)
-        l = loss_val.mean().asscalar()
-        train_loss += l
-        train_acc += acc.get()[1]
-    
-    print("Epoch %d: , train acc %.3f , train loss %.3f " % (epoch, train_acc/len(train_iter), train_loss/ len(train_iter)))
-
-# Gluon Training loop ends
+```python
+# Define the estimator, by passing to it the model, loss function, metrics, trainer object and context
+est = estimator.Estimator(net=resnet_18_v1,
+                                loss=loss_fn,
+                                metrics=train_acc,
+                                trainers=trainer, 
+                                context=ctx)
+
+# Define the handlers, let's say Checkpointhandler
+checkpoint_handler = event_handler.CheckpointHandler(estimator=est,
+                                                     filepath='./my_best_model.params',
+                                                     monitor='train_accuracy', # Monitors a metric
+                                                     save_best_only=True) # Save the best model in terms of 
+                                                                         # training accuracy
+
+# Magic line
+est.fit(train_data=train_data_loader,
+              epochs=num_epochs,
+              event_handlers=[checkpoint_handler], # Add the event handlers
+              batch_size=batch_size)
 ```
 
-    Epoch 0: , train acc 0.412 , train loss 1.106 <!--notebook-skip-line-->
-    Epoch 1: , train acc 0.777 , train loss 0.543 <!--notebook-skip-line-->
-    Epoch 2: , train acc 0.832 , train loss 0.439 <!--notebook-skip-line-->
-    Epoch 3: , train acc 0.857 , train loss 0.387 <!--notebook-skip-line-->
-    Epoch 4: , train acc 0.867 , train loss 0.357 <!--notebook-skip-line-->
+    [Epoch 0] [Step 256/60000] time/step: 0.426s accuracy: 0.1211 softmaxcrossentropyloss0: 2.6261 
+    .... <!--notebook-skip-line-->
+    [Epoch 0] finished in 50.390s: train_accuracy: 0.7936 train_softmaxcrossentropyloss0: 0.5639 <!--notebook-skip-line-->
+    [Epoch 1] [Step 256/60000] time/step: 0.414s accuracy: 0.8984 softmaxcrossentropyloss0: 0.2958 <!--notebook-skip-line-->
+    .... <!--notebook-skip-line-->
+    [Epoch 1] finished in 50.474s: train_accuracy: 0.8871 train_softmaxcrossentropyloss0: 0.3101 <!--notebook-skip-line-->
 
 
-The training loop involves : 
-- Manually iterating over epochs and batches
-- Recording the gradients during the forward pass
-- Computing the loss function
-- Calling the back propagation on the loss function
-- Applying the training step, i.e, updating the weights
-- Recording any useful metrics in the meanwhile
+You can load the saved model, by using ```load_parameters``` API in Gluon. For more details refer to the [Loding model parameters from file tutorial](http://mxnet.incubator.apache.org/versions/master/tutorials/gluon/save_load_params.html#saving-model-parameters-to-file)
 
 ## Summary
 
-In this tutorial, we learnt how to use ```Gluon Fit APIs``` for training a deep learning model and compared it with the existing gluon trainer loop. 
+In this tutorial, we learnt how to use ```Gluon Fit APIs``` for training a deep learning model and also saw an option to customize it with the use of Event Handlers.
+For more references on the Fit API and advanced usage details, checkout its [documentation](http://mxnet.apache.org/api/python/gluon/gluon.html).
 
 ## Next Steps 
 
 - To learn more about deep learning with MXNet Gluon, see [Deep Learning - The Straight Dope](https://gluon.mxnet.io)
-- For more hands on learning about deep learning, checkout out [Dive into Deep Learning](https://d2l.ai)
+- For more hands on learning about deep learning, check out [Dive into Deep Learning](https://d2l.ai)
 
-<!-- INSERT SOURCE DOWNLOAD BUTTONS -->
\ No newline at end of file
+<!-- INSERT SOURCE DOWNLOAD BUTTONS -->

From 1f26c249305072fc645d7faf7179badf672c57bf Mon Sep 17 00:00:00 2001
From: Piyush Ghai <ghai.8@osu.edu>
Date: Wed, 20 Mar 2019 15:17:01 -0700
Subject: [PATCH 05/28] Addressed PR feedback

---
 docs/tutorials/gluon/fit_api_tutorial.md | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/docs/tutorials/gluon/fit_api_tutorial.md b/docs/tutorials/gluon/fit_api_tutorial.md
index 0a27468b26b6..cf6dd74dec7f 100644
--- a/docs/tutorials/gluon/fit_api_tutorial.md
+++ b/docs/tutorials/gluon/fit_api_tutorial.md
@@ -42,7 +42,7 @@ from mxnet import gluon
 from mxnet.gluon.model_zoo import vision
 from mxnet.gluon.estimator import estimator, event_handler
 
-ctx = mx.gpu(0) # Or mx.cpu(0) if not using a GPU backed machine
+ctx = mx.gpu() if mx.context.num_gpus() > 0 else mx.cpu()
 mx.random.seed(7) # Set a fixed seed
 ```
 
@@ -66,19 +66,18 @@ fashion_mnist_val = gluon.data.vision.FashionMNIST(train=False)
 
 
 ```python
-transformers = [gluon.data.vision.transforms.Resize(224), # We pick 224 as the model we use takes an input of size 224.
-                gluon.data.vision.transforms.ToTensor(), 
-                gluon.data.vision.transforms.Normalize(mean = 0, std = 1)]
+transforms = [gluon.data.vision.transforms.Resize(224), # We pick 224 as the model we use takes an input of size 224.
+                gluon.data.vision.transforms.ToTensor()]
 
 # Now we will stack all these together.
-transform = gluon.data.vision.transforms.Compose(transformers)
+transforms = gluon.data.vision.transforms.Compose(transforms)
 ```
 
 
 ```python
 # Apply the transformations
-fashion_mnist_train = fashion_mnist_train.transform_first(transform)
-fashion_mnist_val = fashion_mnist_val.transform_first(transform)
+fashion_mnist_train = fashion_mnist_train.transform_first(transforms)
+fashion_mnist_val = fashion_mnist_val.transform_first(transforms)
 ```
 
 
@@ -149,7 +148,7 @@ est.fit(train_data=train_data_loader,
 
 ### Advanced Usage
 
-Fit API is also customizable with several ```Event Handlers``` which gives a fine grained control over the steps in training and exposes callback methods for : ```train_begin```, ```train_end```, ```batch_begin```, ```batch_end```, ```epoch_begin``` and ```epoch_end```. 
+Fit API is also customizable with several `Event Handlers` which give a fine grained control over the steps in training and exposes callback methods for : `train_begin`, `train_end`, `batch_begin`, `batch_end`, `epoch_begin` and `epoch_end`.
 
 One can use built-in event handlers such as ```LoggingHandler```, ```CheckpointHandler``` or ```EarlyStoppingHandler``` or to create a custom handler, one can create a new class by inherinting [```EventHandler```](https://github.com/apache/incubator-mxnet/blob/fit-api/python/mxnet/gluon/estimator/event_handler.py#L31).
 
@@ -195,7 +194,7 @@ est.fit(train_data=train_data_loader,
     [Epoch 1] finished in 50.474s: train_accuracy: 0.8871 train_softmaxcrossentropyloss0: 0.3101 <!--notebook-skip-line-->
 
 
-You can load the saved model, by using ```load_parameters``` API in Gluon. For more details refer to the [Loding model parameters from file tutorial](http://mxnet.incubator.apache.org/versions/master/tutorials/gluon/save_load_params.html#saving-model-parameters-to-file)
+You can load the saved model, by using ```load_parameters``` API in Gluon. For more details refer to the [Loading model parameters from file tutorial](http://mxnet.incubator.apache.org/versions/master/tutorials/gluon/save_load_params.html#saving-model-parameters-to-file)
 
 ## Summary
 

From 8ff448ede6edea5a6ad86475979fa5efa4c82686 Mon Sep 17 00:00:00 2001
From: Piyush Ghai <ghai.8@osu.edu>
Date: Wed, 20 Mar 2019 15:21:00 -0700
Subject: [PATCH 06/28] Removed spurious comment for Py2 and Py3 compatibility

---
 docs/tutorials/gluon/fit_api_tutorial.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/tutorials/gluon/fit_api_tutorial.md b/docs/tutorials/gluon/fit_api_tutorial.md
index cf6dd74dec7f..7c9a1b4aec58 100644
--- a/docs/tutorials/gluon/fit_api_tutorial.md
+++ b/docs/tutorials/gluon/fit_api_tutorial.md
@@ -32,7 +32,6 @@ To complete this tutorial, you will need:
 - [MXNet](https://mxnet.incubator.apache.org/install/#overview) (The version of MXNet will be >= 1.5.0)
 - [Jupyter Notebook](https://jupyter.org/index.html) (For interactively running the provided .ipynb file)
 
-This tutorial works with both Python 2 and Python 3.
 
 
 

From 57705079c3f90fd34bf16eb8a76ded73fc7b6d91 Mon Sep 17 00:00:00 2001
From: Piyush Ghai <ghai.8@osu.edu>
Date: Fri, 5 Apr 2019 11:15:52 -0700
Subject: [PATCH 07/28] Address PR feedback

---
 docs/tutorials/gluon/fit_api_tutorial.md | 32 +++++++++++++-----------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/docs/tutorials/gluon/fit_api_tutorial.md b/docs/tutorials/gluon/fit_api_tutorial.md
index 7c9a1b4aec58..3c541ddc3757 100644
--- a/docs/tutorials/gluon/fit_api_tutorial.md
+++ b/docs/tutorials/gluon/fit_api_tutorial.md
@@ -16,11 +16,11 @@
 <!--- under the License. -->
 
 
-# Gluon Fit API
+# MXNet Gluon Fit API
 
-In this tutorial, we will see how to use the [Gluon Fit API](https://cwiki.apache.org/confluence/display/MXNET/Gluon+Fit+API+-+Tech+Design) which is a simple and flexible way to train deep learning models using the [Gluon APIs](http://mxnet.incubator.apache.org/versions/master/gluon/index.html) in Apache MXNet. 
+In this tutorial, we will see how to use the [Gluon Fit API](https://cwiki.apache.org/confluence/display/MXNET/Gluon+Fit+API+-+Tech+Design) which is the easiest way to train deep learning models using the [Gluon API](http://mxnet.incubator.apache.org/versions/master/gluon/index.html) in Apache MXNet. 
 
-Prior to Fit API, training using Gluon required one to write a custom ["Gluon training loop"](https://mxnet.incubator.apache.org/versions/master/tutorials/gluon/logistic_regression_explained.html#defining-and-training-the-model). Fit API reduces the complexity and amount of boiler plate code required to train a model, provides an easy to use and a powerful API. 
+With the Fit API, you can train a deep learning model with miminal amount of code. Just specify the network, loss function and the data you want to train on. You don't need to worry about the boiler plate code to loop through the dataset in batches(often called as 'training loop'). Advanced users can still do this for bespolke training loops, but most use cases will be covered by the Fit API.
 
 To demonstrate the Fit API, this tutorial will train an Image Classification model using the [ResNet-18](https://arxiv.org/abs/1512.03385) architecture for the neural network. The model will be trained using the [Fashion-MNIST dataset](https://research.zalando.com/welcome/mission/research-projects/fashion-mnist/). 
 
@@ -47,12 +47,12 @@ mx.random.seed(7) # Set a fixed seed
 
 ## Dataset
 
-[Fashion-MNIST](https://research.zalando.com/welcome/mission/research-projects/fashion-mnist/) dataset consists of fashion items divided into ten categories : t-shirt/top, trouser, pullover, dress, coat, sandal, shirt, sneaker, bag and ankle boot. 
+[Fashion-MNIST](https://research.zalando.com/welcome/mission/research-projects/fashion-mnist/) dataset consists of fashion items divided into ten categories: t-shirt/top, trouser, pullover, dress, coat, sandal, shirt, sneaker, bag and ankle boot. 
 
 - It has 60,000 gray scale images of size 28 * 28 for training.  
 - It has 10,000 gray scale images os size 28 * 28 for testing/validation. 
 
-We will use ```gluon.data.vision``` package to directly import the Fashion-MNIST dataset and perform pre-processing on it.
+We will use the ```gluon.data.vision``` package to directly import the Fashion-MNIST dataset and perform pre-processing on it.
 
 
 ```python
@@ -90,23 +90,25 @@ val_data_loader = gluon.data.DataLoader(fashion_mnist_val, batch_size=batch_size
 
 ## Model and Optimizers
 
-Let's load the resnet-18 model architecture from [Gluon Model Zoo](http://mxnet.apache.org/api/python/gluon/model_zoo.html) and initialize it's parameters.
+Let's load the resnet-18 model architecture from [Gluon Model Zoo](http://mxnet.apache.org/api/python/gluon/model_zoo.html) and initialize it's parameters. The Gluon Model Zoo contains a repository of pre-trained models as well the model architecture definitions. We are using the model architecture from the model zoo in order to train it from scratch.
 
 
 ```python
-resnet_18_v1 = vision.resnet18_v1(pretrained=False, classes = 10, ctx=ctx)
-resnet_18_v1.initialize(force_reinit=True, init = mx.init.Xavier(), ctx=ctx)
+resnet_18_v1 = vision.resnet18_v1(pretrained=False, classes = 10)
+resnet_18_v1.initialize(init = mx.init.Xavier(), ctx=ctx)
 ```
 
-After defining the model, let's setup the trainer object for training. 
-
 We will be using ```SoftmaxCrossEntropyLoss``` as the loss function since this is a multi-class classification problem. We will be using ```sgd``` (Stochastic Gradient Descent) as the optimizer. You can experiment with a different optimizer as well. 
 
 
 ```python
 loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()
-learning_rate = 0.04 # You can experiment with your own learning rate here
+```
 
+Let's define the trainer object for training the model.
+
+```python
+learning_rate = 0.04 # You can experiment with your own learning rate here
 num_epochs = 2 # You can run training for more epochs
 trainer = gluon.Trainer(resnet_18_v1.collect_params(), 
                         'sgd', {'learning_rate': learning_rate})
@@ -147,9 +149,9 @@ est.fit(train_data=train_data_loader,
 
 ### Advanced Usage
 
-Fit API is also customizable with several `Event Handlers` which give a fine grained control over the steps in training and exposes callback methods for : `train_begin`, `train_end`, `batch_begin`, `batch_end`, `epoch_begin` and `epoch_end`.
+Fit API is also customizable with several `Event Handlers` which give a fine grained control over the steps in training and exposes callback methods that provide control over the stages involved in training. Available callback methods are: `train_begin`, `train_end`, `batch_begin`, `batch_end`, `epoch_begin` and `epoch_end`.
 
-One can use built-in event handlers such as ```LoggingHandler```, ```CheckpointHandler``` or ```EarlyStoppingHandler``` or to create a custom handler, one can create a new class by inherinting [```EventHandler```](https://github.com/apache/incubator-mxnet/blob/fit-api/python/mxnet/gluon/estimator/event_handler.py#L31).
+One can use built-in event handlers such as `LoggingHandler`, `CheckpointHandler` or `EarlyStoppingHandler` to log and save the model at certain timesteps during training and stopping the training when the model's performance plateaus. One can also create a custom handler by inheriting [`EventHandler`](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/estimator/event_handler.py#L31).
 
 
 ```python
@@ -181,7 +183,7 @@ checkpoint_handler = event_handler.CheckpointHandler(estimator=est,
 # Magic line
 est.fit(train_data=train_data_loader,
               epochs=num_epochs,
-              event_handlers=[checkpoint_handler], # Add the event handlers
+              event_handlers=checkpoint_handler, # Add the event handlers
               batch_size=batch_size)
 ```
 
@@ -198,7 +200,7 @@ You can load the saved model, by using ```load_parameters``` API in Gluon. For m
 ## Summary
 
 In this tutorial, we learnt how to use ```Gluon Fit APIs``` for training a deep learning model and also saw an option to customize it with the use of Event Handlers.
-For more references on the Fit API and advanced usage details, checkout its [documentation](http://mxnet.apache.org/api/python/gluon/gluon.html).
+For more references and advanced usage details can be found in the [documentation](http://mxnet.apache.org/api/python/gluon/gluon.html).
 
 ## Next Steps 
 

From c223af1ecfd756e6ae616376199aff8dedb32b7a Mon Sep 17 00:00:00 2001
From: Piyush Ghai <ghai.8@osu.edu>
Date: Fri, 5 Apr 2019 13:17:26 -0700
Subject: [PATCH 08/28] Addressed PR feedback

---
 docs/tutorials/gluon/fit_api_tutorial.md | 77 +++++++++++++-----------
 1 file changed, 43 insertions(+), 34 deletions(-)

diff --git a/docs/tutorials/gluon/fit_api_tutorial.md b/docs/tutorials/gluon/fit_api_tutorial.md
index 3c541ddc3757..6cd48fbc2391 100644
--- a/docs/tutorials/gluon/fit_api_tutorial.md
+++ b/docs/tutorials/gluon/fit_api_tutorial.md
@@ -24,7 +24,6 @@ With the Fit API, you can train a deep learning model with miminal amount of cod
 
 To demonstrate the Fit API, this tutorial will train an Image Classification model using the [ResNet-18](https://arxiv.org/abs/1512.03385) architecture for the neural network. The model will be trained using the [Fashion-MNIST dataset](https://research.zalando.com/welcome/mission/research-projects/fashion-mnist/). 
 
-
 ## Prerequisites
 
 To complete this tutorial, you will need:
@@ -41,7 +40,8 @@ from mxnet import gluon
 from mxnet.gluon.model_zoo import vision
 from mxnet.gluon.estimator import estimator, event_handler
 
-ctx = mx.gpu() if mx.context.num_gpus() > 0 else mx.cpu()
+gpu_count = mx.context.num_gpus()
+ctx = [mx.gpu(i) for i in range(gpu_count)] if gpu_count > 0 else mx.cpu()
 mx.random.seed(7) # Set a fixed seed
 ```
 
@@ -84,8 +84,10 @@ fashion_mnist_val = fashion_mnist_val.transform_first(transforms)
 batch_size = 256 # Batch size of the images
 num_workers = 4 # The number of parallel workers for loading the data using Data Loaders.
 
-train_data_loader = gluon.data.DataLoader(fashion_mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
-val_data_loader = gluon.data.DataLoader(fashion_mnist_val, batch_size=batch_size, shuffle=False, num_workers=num_workers)
+train_data_loader = gluon.data.DataLoader(fashion_mnist_train, batch_size=batch_size, 
+                                          shuffle=True, num_workers=num_workers)
+val_data_loader = gluon.data.DataLoader(fashion_mnist_val, batch_size=batch_size, 
+                                        shuffle=False, num_workers=num_workers)
 ```
 
 ## Model and Optimizers
@@ -107,6 +109,7 @@ loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()
 
 Let's define the trainer object for training the model.
 
+
 ```python
 learning_rate = 0.04 # You can experiment with your own learning rate here
 num_epochs = 2 # You can run training for more epochs
@@ -128,23 +131,23 @@ train_acc = mx.metric.Accuracy() # Metric to monitor
 
 # Define the estimator, by passing to it the model, loss function, metrics, trainer object and context
 est = estimator.Estimator(net=resnet_18_v1, 
-                                loss=loss_fn, 
-                                metrics=train_acc, 
-                                trainers=trainer, 
-                                context=ctx)
+                          loss=loss_fn, 
+                          metrics=train_acc, 
+                          trainer=trainer, 
+                          context=ctx)
 
 # Magic line
 est.fit(train_data=train_data_loader,
-              epochs=num_epochs, 
-              batch_size=batch_size)
+        epochs=num_epochs)
 ```
 
-    [Epoch 0] [Step 256/60000] time/step: 1.420s accuracy: 0.0938 softmaxcrossentropyloss0: 2.9419 <!--notebook-skip-line-->
-    .... <!--notebook-skip-line-->
-    [Epoch 0] finished in 51.375s: train_accuracy: 0.7916 train_softmaxcrossentropyloss0: 0.5750 <!--notebook-skip-line-->
-    [Epoch 1] [Step 256/60000] time/step: 0.414s accuracy: 0.8555 softmaxcrossentropyloss0: 0.3621 <!--notebook-skip-line-->
-    .... <!--notebook-skip-line-->
-    [Epoch 1] finished in 49.889s: train_accuracy: 0.8854 train_softmaxcrossentropyloss0: 0.3157 <!--notebook-skip-line-->
+    Training begin: using optimizer SGD with current learning rate 0.0400 <!--notebook-skip-line-->
+    Train for 2 epochs. <!--notebook-skip-line-->
+    
+    [Epoch 0] finished in 25.110s: train_accuracy : 0.7877 train_softmaxcrossentropyloss0 : 0.5905 <!--notebook-skip-line-->
+    
+    [Epoch 1] finished in 23.595s: train_accuracy : 0.8823 train_softmaxcrossentropyloss0 : 0.3197 <!--notebook-skip-line-->
+    Train finished using total 48s at epoch 1. train_accuracy : 0.8823 train_softmaxcrossentropyloss0 : 0.3197 <!--notebook-skip-line-->
 
 
 ### Advanced Usage
@@ -161,41 +164,47 @@ resnet_18_v1.initialize(force_reinit=True, init = mx.init.Xavier(), ctx=ctx)
 trainer = gluon.Trainer(resnet_18_v1.collect_params(), 
                         'sgd', {'learning_rate': learning_rate})
 train_acc = mx.metric.Accuracy()
-
 ```
 
 
 ```python
 # Define the estimator, by passing to it the model, loss function, metrics, trainer object and context
 est = estimator.Estimator(net=resnet_18_v1,
-                                loss=loss_fn,
-                                metrics=train_acc,
-                                trainers=trainer, 
-                                context=ctx)
+                          loss=loss_fn,
+                          metrics=train_acc,
+                          trainer=trainer, 
+                          context=ctx)
 
 # Define the handlers, let's say Checkpointhandler
-checkpoint_handler = event_handler.CheckpointHandler(estimator=est,
-                                                     filepath='./my_best_model.params',
-                                                     monitor='train_accuracy', # Monitors a metric
+checkpoint_handler = event_handler.CheckpointHandler(filepath='./my_best_model.params',
+                                                     monitor='val_accuracy', # Monitors a metric
                                                      save_best_only=True) # Save the best model in terms of 
                                                                          # training accuracy
 
 # Magic line
 est.fit(train_data=train_data_loader,
-              epochs=num_epochs,
-              event_handlers=checkpoint_handler, # Add the event handlers
-              batch_size=batch_size)
+        val_data=val_data_loader,
+        epochs=num_epochs,
+        event_handlers=checkpoint_handler) # Add the event handlers
 ```
 
-    [Epoch 0] [Step 256/60000] time/step: 0.426s accuracy: 0.1211 softmaxcrossentropyloss0: 2.6261 
-    .... <!--notebook-skip-line-->
-    [Epoch 0] finished in 50.390s: train_accuracy: 0.7936 train_softmaxcrossentropyloss0: 0.5639 <!--notebook-skip-line-->
-    [Epoch 1] [Step 256/60000] time/step: 0.414s accuracy: 0.8984 softmaxcrossentropyloss0: 0.2958 <!--notebook-skip-line-->
-    .... <!--notebook-skip-line-->
-    [Epoch 1] finished in 50.474s: train_accuracy: 0.8871 train_softmaxcrossentropyloss0: 0.3101 <!--notebook-skip-line-->
+    Training begin: using optimizer SGD with current learning rate 0.0400 <!--notebook-skip-line-->
+    Train for 2 epochs. <!--notebook-skip-line-->
+    
+    [Epoch 0] finished in 25.236s: train_accuracy : 0.7917 train_softmaxcrossentropyloss0 : 0.5741 val_accuracy : 0.6612 val_softmaxcrossentropyloss0 : 0.8627 <!--notebook-skip-line-->
+    
+    [Epoch 1] finished in 24.892s: train_accuracy : 0.8826 train_softmaxcrossentropyloss0 : 0.3229 val_accuracy : 0.8474 val_softmaxcrossentropyloss0 : 0.4262 <!--notebook-skip-line-->
+    
+    Train finished using total 50s at epoch 1. train_accuracy : 0.8826 train_softmaxcrossentropyloss0 : 0.3229 val_accuracy : 0.8474 val_softmaxcrossentropyloss0 : 0.4262 <!--notebook-skip-line-->
+
 
+You can load the saved model, by using ```load_parameters``` API in Gluon. For more details refer to the [Loding model parameters from file tutorial](http://mxnet.incubator.apache.org/versions/master/tutorials/gluon/save_load_params.html#saving-model-parameters-to-file)
 
-You can load the saved model, by using ```load_parameters``` API in Gluon. For more details refer to the [Loading model parameters from file tutorial](http://mxnet.incubator.apache.org/versions/master/tutorials/gluon/save_load_params.html#saving-model-parameters-to-file)
+
+```python
+resnet_18_v1 = vision.resnet18_v1(pretrained=False, classes = 10)
+resnet_18_v1.load_parameters('./my_best_model.params', ctx=ctx)
+```
 
 ## Summary
 

From d1f662ffcdab3768aee22db497e3b593e88d7a59 Mon Sep 17 00:00:00 2001
From: Piyush Ghai <ghai.8@osu.edu>
Date: Fri, 5 Apr 2019 13:23:01 -0700
Subject: [PATCH 09/28] Fixed typo

---
 docs/tutorials/gluon/fit_api_tutorial.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/tutorials/gluon/fit_api_tutorial.md b/docs/tutorials/gluon/fit_api_tutorial.md
index 6cd48fbc2391..b643bea0d69b 100644
--- a/docs/tutorials/gluon/fit_api_tutorial.md
+++ b/docs/tutorials/gluon/fit_api_tutorial.md
@@ -198,7 +198,7 @@ est.fit(train_data=train_data_loader,
     Train finished using total 50s at epoch 1. train_accuracy : 0.8826 train_softmaxcrossentropyloss0 : 0.3229 val_accuracy : 0.8474 val_softmaxcrossentropyloss0 : 0.4262 <!--notebook-skip-line-->
 
 
-You can load the saved model, by using ```load_parameters``` API in Gluon. For more details refer to the [Loding model parameters from file tutorial](http://mxnet.incubator.apache.org/versions/master/tutorials/gluon/save_load_params.html#saving-model-parameters-to-file)
+You can load the saved model, by using ```load_parameters``` API in Gluon. For more details refer to the [Loading model parameters from file tutorial](http://mxnet.incubator.apache.org/versions/master/tutorials/gluon/save_load_params.html#saving-model-parameters-to-file)
 
 
 ```python

From 97de8c438f94d40d4ba4195f491395629e9fadf4 Mon Sep 17 00:00:00 2001
From: Piyush Ghai <ghai.8@osu.edu>
Date: Fri, 5 Apr 2019 14:15:39 -0700
Subject: [PATCH 10/28] Added example to showcase custom event handler

---
 docs/tutorials/gluon/fit_api_tutorial.md | 35 ++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 3 deletions(-)

diff --git a/docs/tutorials/gluon/fit_api_tutorial.md b/docs/tutorials/gluon/fit_api_tutorial.md
index b643bea0d69b..239557d7ff49 100644
--- a/docs/tutorials/gluon/fit_api_tutorial.md
+++ b/docs/tutorials/gluon/fit_api_tutorial.md
@@ -156,6 +156,31 @@ Fit API is also customizable with several `Event Handlers` which give a fine gra
 
 One can use built-in event handlers such as `LoggingHandler`, `CheckpointHandler` or `EarlyStoppingHandler` to log and save the model at certain timesteps during training and stopping the training when the model's performance plateaus. One can also create a custom handler by inheriting [`EventHandler`](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/estimator/event_handler.py#L31).
 
+### Custom Event Handler
+
+Here we will showcase an example to create a custom event handler by inheriting from `EventHandler` class. Our custom event handler is a simple one, that just records the loss values at the end of every epoch in our training phase.
+
+
+```python
+class LossRecordHandler(EventHandler):
+    def __init__(self):
+        super(LossRecordHandler, self).__init__()
+        self.losses = []
+    
+    def train_begin(self):
+        print ("Training begin")
+
+    def train_end(self):
+        # Print all the losses at the end of training
+        for i, loss in enumerate(self.losses):
+            print ("Epoch {}, loss {}".format(i, loss)) 
+
+    def epoch_end(self):
+        loss_name = self.estimator.loss[0].name # Access the loss from estimator
+        loss_val = self.estimator.train_stats['train_'+ loss_name] # Get the loss value at current epoch
+        self.losses.append(loss_val) # Append it to losses
+```
+
 
 ```python
 # Let's reset the model, trainer and accuracy objects from above
@@ -175,17 +200,18 @@ est = estimator.Estimator(net=resnet_18_v1,
                           trainer=trainer, 
                           context=ctx)
 
-# Define the handlers, let's say Checkpointhandler
+# Define the handlers, let's say in built Checkpointhandler
 checkpoint_handler = event_handler.CheckpointHandler(filepath='./my_best_model.params',
                                                      monitor='val_accuracy', # Monitors a metric
                                                      save_best_only=True) # Save the best model in terms of 
                                                                          # training accuracy
-
+# Let's instantiate another handler which we defined above 
+loss_record_handler = LossRecordHandler()
 # Magic line
 est.fit(train_data=train_data_loader,
         val_data=val_data_loader,
         epochs=num_epochs,
-        event_handlers=checkpoint_handler) # Add the event handlers
+        event_handlers=[checkpoint_handler, loss_record_handler]) # Add the event handlers
 ```
 
     Training begin: using optimizer SGD with current learning rate 0.0400 <!--notebook-skip-line-->
@@ -197,6 +223,9 @@ est.fit(train_data=train_data_loader,
     
     Train finished using total 50s at epoch 1. train_accuracy : 0.8826 train_softmaxcrossentropyloss0 : 0.3229 val_accuracy : 0.8474 val_softmaxcrossentropyloss0 : 0.4262 <!--notebook-skip-line-->
 
+    Training begin <!--notebook-skip-line-->
+    Epoch 1, loss 0.5741 <!--notebook-skip-line-->
+    Epoch 2, loss 0.3229 <!--notebook-skip-line-->
 
 You can load the saved model, by using ```load_parameters``` API in Gluon. For more details refer to the [Loading model parameters from file tutorial](http://mxnet.incubator.apache.org/versions/master/tutorials/gluon/save_load_params.html#saving-model-parameters-to-file)
 

From 44a560aa930dc527353bf901759103e75f32a060 Mon Sep 17 00:00:00 2001
From: Piyush Ghai <ghai.8@osu.edu>
Date: Fri, 5 Apr 2019 14:20:59 -0700
Subject: [PATCH 11/28] Fixed imports as estimator moved to contrib package

---
 docs/tutorials/gluon/fit_api_tutorial.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/tutorials/gluon/fit_api_tutorial.md b/docs/tutorials/gluon/fit_api_tutorial.md
index 239557d7ff49..85065afc724a 100644
--- a/docs/tutorials/gluon/fit_api_tutorial.md
+++ b/docs/tutorials/gluon/fit_api_tutorial.md
@@ -38,7 +38,8 @@ To complete this tutorial, you will need:
 import mxnet as mx
 from mxnet import gluon
 from mxnet.gluon.model_zoo import vision
-from mxnet.gluon.estimator import estimator, event_handler
+from mxnet.gluon.contrib.estimator import estimator, event_handler
+from mxnet.gluon.contrib.estimator.event_handler import EventHandler
 
 gpu_count = mx.context.num_gpus()
 ctx = [mx.gpu(i) for i in range(gpu_count)] if gpu_count > 0 else mx.cpu()

From f7b13565b5ca321ba59c24c57affb9fed08886be Mon Sep 17 00:00:00 2001
From: Piyush Ghai <ghai.8@osu.edu>
Date: Fri, 5 Apr 2019 14:38:28 -0700
Subject: [PATCH 12/28] Added a side note to inform about estimator reference
 being updated by the handlers

---
 docs/tutorials/gluon/fit_api_tutorial.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/tutorials/gluon/fit_api_tutorial.md b/docs/tutorials/gluon/fit_api_tutorial.md
index 85065afc724a..026e4b64bf36 100644
--- a/docs/tutorials/gluon/fit_api_tutorial.md
+++ b/docs/tutorials/gluon/fit_api_tutorial.md
@@ -161,6 +161,8 @@ One can use built-in event handlers such as `LoggingHandler`, `CheckpointHandler
 
 Here we will showcase an example to create a custom event handler by inheriting from `EventHandler` class. Our custom event handler is a simple one, that just records the loss values at the end of every epoch in our training phase.
 
+Note : The `EventHandler` holds a reference to the `Estimator` object. The Estimator object reference is updated when the Fit API is called.
+
 
 ```python
 class LossRecordHandler(EventHandler):

From dd7f94f3bac2c7eb1fe42f1db0391c1e98d3b81a Mon Sep 17 00:00:00 2001
From: Piyush Ghai <ghai.8@osu.edu>
Date: Tue, 9 Apr 2019 20:13:13 -0700
Subject: [PATCH 13/28] Corrected typo

---
 docs/tutorials/gluon/fit_api_tutorial.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/tutorials/gluon/fit_api_tutorial.md b/docs/tutorials/gluon/fit_api_tutorial.md
index 026e4b64bf36..5f8fcc52bf47 100644
--- a/docs/tutorials/gluon/fit_api_tutorial.md
+++ b/docs/tutorials/gluon/fit_api_tutorial.md
@@ -20,7 +20,7 @@
 
 In this tutorial, we will see how to use the [Gluon Fit API](https://cwiki.apache.org/confluence/display/MXNET/Gluon+Fit+API+-+Tech+Design) which is the easiest way to train deep learning models using the [Gluon API](http://mxnet.incubator.apache.org/versions/master/gluon/index.html) in Apache MXNet. 
 
-With the Fit API, you can train a deep learning model with miminal amount of code. Just specify the network, loss function and the data you want to train on. You don't need to worry about the boiler plate code to loop through the dataset in batches(often called as 'training loop'). Advanced users can still do this for bespolke training loops, but most use cases will be covered by the Fit API.
+With the Fit API, you can train a deep learning model with miminal amount of code. Just specify the network, loss function and the data you want to train on. You don't need to worry about the boiler plate code to loop through the dataset in batches(often called as 'training loop'). Advanced users can still do this for bespoke training loops, but most use cases will be covered by the Fit API.
 
 To demonstrate the Fit API, this tutorial will train an Image Classification model using the [ResNet-18](https://arxiv.org/abs/1512.03385) architecture for the neural network. The model will be trained using the [Fashion-MNIST dataset](https://research.zalando.com/welcome/mission/research-projects/fashion-mnist/). 
 

From 3f2135102cdcab1b8e55e8a83a635c9abcaf4b64 Mon Sep 17 00:00:00 2001
From: Lai Wei <lawei@amazon.com>
Date: Tue, 18 Jun 2019 23:36:11 -0700
Subject: [PATCH 14/28] update tutorial

---
 docs/tutorials/gluon/fit_api_tutorial.md      | 64 +++++++++++--------
 .../gluon/contrib/estimator/event_handler.py  |  2 +-
 2 files changed, 40 insertions(+), 26 deletions(-)

diff --git a/docs/tutorials/gluon/fit_api_tutorial.md b/docs/tutorials/gluon/fit_api_tutorial.md
index 5f8fcc52bf47..cb8e0ad126b3 100644
--- a/docs/tutorials/gluon/fit_api_tutorial.md
+++ b/docs/tutorials/gluon/fit_api_tutorial.md
@@ -38,8 +38,8 @@ To complete this tutorial, you will need:
 import mxnet as mx
 from mxnet import gluon
 from mxnet.gluon.model_zoo import vision
-from mxnet.gluon.contrib.estimator import estimator, event_handler
-from mxnet.gluon.contrib.estimator.event_handler import EventHandler
+from mxnet.gluon.contrib.estimator import estimator
+from mxnet.gluon.contrib.estimator.event_handler import TrainBegin, TrainEnd, EpochEnd, CheckpointHandler
 
 gpu_count = mx.context.num_gpus()
 ctx = [mx.gpu(i) for i in range(gpu_count)] if gpu_count > 0 else mx.cpu()
@@ -155,33 +155,47 @@ est.fit(train_data=train_data_loader,
 
 Fit API is also customizable with several `Event Handlers` which give a fine grained control over the steps in training and exposes callback methods that provide control over the stages involved in training. Available callback methods are: `train_begin`, `train_end`, `batch_begin`, `batch_end`, `epoch_begin` and `epoch_end`.
 
-One can use built-in event handlers such as `LoggingHandler`, `CheckpointHandler` or `EarlyStoppingHandler` to log and save the model at certain timesteps during training and stopping the training when the model's performance plateaus. One can also create a custom handler by inheriting [`EventHandler`](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/estimator/event_handler.py#L31).
+One can use built-in event handlers such as `LoggingHandler`, `CheckpointHandler` or `EarlyStoppingHandler` to log and save the model at certain timesteps during training and stopping the training when the model's performance plateaus. 
+There are also some default utility handlers that will be added to your estimator by default. For example, `StoppingHandler` is used to control when the training ends based on number of epochs or number of batches trained. 
+`MetricHandler` is used to calculate training metrics at end of each batch and epoch. 
+`ValidationHandler` is used to validate your model on test data at epoch end and  calculate validation metrics.
+One can create these utility handlers with different configurations and pass to estimator, it will override the default handler configuration.
+One can also create a custom handler by inheriting one or multiple of 
+[base event handlers](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/contrib/estimator/event_handler.py#L32)
+ including: `TrainBegin`, `TrainEnd`, `EpochBegin`, `EpochEnd`, `BatchBegin`, `BatchEnd`.
 
-### Custom Event Handler
 
-Here we will showcase an example to create a custom event handler by inheriting from `EventHandler` class. Our custom event handler is a simple one, that just records the loss values at the end of every epoch in our training phase.
+### Custom Event Handler
 
-Note : The `EventHandler` holds a reference to the `Estimator` object. The Estimator object reference is updated when the Fit API is called.
+Here we will showcase an example to create a custom event handler by inheriting from a few base handler class. 
+Our custom event handler is a simple one, that just record the loss values at the end of every epoch in our training phase.
 
+Note : For each of the method, the Estimator object is passed along so you can access train metrics.
 
 ```python
-class LossRecordHandler(EventHandler):
+class LossRecordHandler(TrainBegin, TrainEnd, EpochEnd):
     def __init__(self):
         super(LossRecordHandler, self).__init__()
-        self.losses = []
-    
-    def train_begin(self):
-        print ("Training begin")
+        self.loss_history = {}
 
-    def train_end(self):
-        # Print all the losses at the end of training
-        for i, loss in enumerate(self.losses):
-            print ("Epoch {}, loss {}".format(i, loss)) 
+    def train_begin(self, estimator, *args, **kwargs):
+        print("Training begin")
 
-    def epoch_end(self):
-        loss_name = self.estimator.loss[0].name # Access the loss from estimator
-        loss_val = self.estimator.train_stats['train_'+ loss_name] # Get the loss value at current epoch
-        self.losses.append(loss_val) # Append it to losses
+    def train_end(self, estimator, *args, **kwargs):
+        # Print all the losses at the end of training
+        print("Training ended")
+        for loss_name in self.loss_history:
+            for i, loss_val in enumerate(self.loss_history[loss_name]):
+                print("Epoch: {}, Loss name: {}, Loss value: {}".format(i, loss_name, loss_val))
+
+    def epoch_end(self, estimator, *args, **kwargs):
+        for metric in estimator.train_metrics:
+            # look for train Loss in training metrics
+            # we wrapped loss value as a metric to record it
+            if isinstance(metric, mx.metric.Loss):
+                loss_name, loss_val = metric.get()
+                # append loss value for this epoch
+                self.loss_history.setdefault(loss_name, []).append(loss_val)
 ```
 
 
@@ -204,10 +218,10 @@ est = estimator.Estimator(net=resnet_18_v1,
                           context=ctx)
 
 # Define the handlers, let's say in built Checkpointhandler
-checkpoint_handler = event_handler.CheckpointHandler(filepath='./my_best_model.params',
-                                                     monitor='val_accuracy', # Monitors a metric
-                                                     save_best_only=True) # Save the best model in terms of 
-                                                                         # training accuracy
+checkpoint_handler = CheckpointHandler(model_dir='./',
+                                       model_prefix='my_model',
+                                       monitor=train_acc,  # Monitors a metric
+                                       save_best=True)  # Save the best model in terms of
 # Let's instantiate another handler which we defined above 
 loss_record_handler = LossRecordHandler()
 # Magic line
@@ -234,8 +248,8 @@ You can load the saved model, by using ```load_parameters``` API in Gluon. For m
 
 
 ```python
-resnet_18_v1 = vision.resnet18_v1(pretrained=False, classes = 10)
-resnet_18_v1.load_parameters('./my_best_model.params', ctx=ctx)
+resnet_18_v1 = vision.resnet18_v1(pretrained=False, classes=10)
+resnet_18_v1.load_parameters('./my_model-best.params', ctx=ctx)
 ```
 
 ## Summary
diff --git a/python/mxnet/gluon/contrib/estimator/event_handler.py b/python/mxnet/gluon/contrib/estimator/event_handler.py
index ed97c7bc3d19..e9afa5d561b3 100644
--- a/python/mxnet/gluon/contrib/estimator/event_handler.py
+++ b/python/mxnet/gluon/contrib/estimator/event_handler.py
@@ -506,7 +506,7 @@ def _save_checkpoint(self, estimator):
 
     def _save_symbol(self, estimator):
         symbol_file = os.path.join(self.model_dir, self.model_prefix + '-symbol.json')
-        if hasattr(estimator.net, '_cached_graph'):
+        if hasattr(estimator.net, '_cached_graph') and not estimator.net._cached_graph:
             sym = estimator.net._cached_graph[1]
             sym.save(symbol_file)
         else:

From c808ac0993a8450a63f9cab57e6e3aee7bf71c70 Mon Sep 17 00:00:00 2001
From: Lai Wei <lawei@amazon.com>
Date: Tue, 16 Jul 2019 14:18:44 -0700
Subject: [PATCH 15/28] address comments

---
 docs/tutorials/gluon/fit_api_tutorial.md | 44 +++++++++++-------------
 python/mxnet/gluon/contrib/__init__.py   |  2 ++
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/docs/tutorials/gluon/fit_api_tutorial.md b/docs/tutorials/gluon/fit_api_tutorial.md
index cb8e0ad126b3..cef815cb785e 100644
--- a/docs/tutorials/gluon/fit_api_tutorial.md
+++ b/docs/tutorials/gluon/fit_api_tutorial.md
@@ -18,17 +18,17 @@
 
 # MXNet Gluon Fit API
 
-In this tutorial, we will see how to use the [Gluon Fit API](https://cwiki.apache.org/confluence/display/MXNET/Gluon+Fit+API+-+Tech+Design) which is the easiest way to train deep learning models using the [Gluon API](http://mxnet.incubator.apache.org/versions/master/gluon/index.html) in Apache MXNet. 
+In this tutorial, you will learn how to use the [Gluon Fit API](https://cwiki.apache.org/confluence/display/MXNET/Gluon+Fit+API+-+Tech+Design) which is the easiest way to train deep learning models using the [Gluon API](http://mxnet.incubator.apache.org/versions/master/gluon/index.html) in Apache MXNet. 
 
-With the Fit API, you can train a deep learning model with miminal amount of code. Just specify the network, loss function and the data you want to train on. You don't need to worry about the boiler plate code to loop through the dataset in batches(often called as 'training loop'). Advanced users can still do this for bespoke training loops, but most use cases will be covered by the Fit API.
+With the Fit API, you can train a deep learning model with a minimal amount of code. Just specify the network, loss function and the data you want to train on. You don't need to worry about the boiler plate code to loop through the dataset in batches (often called as 'training loop'). Advanced users can train with bespoke training loops, and many of these use cases will be covered by the Fit API.
 
-To demonstrate the Fit API, this tutorial will train an Image Classification model using the [ResNet-18](https://arxiv.org/abs/1512.03385) architecture for the neural network. The model will be trained using the [Fashion-MNIST dataset](https://research.zalando.com/welcome/mission/research-projects/fashion-mnist/). 
+To demonstrate the Fit API, you will train an image classification model using the [ResNet-18](https://arxiv.org/abs/1512.03385) neural network architecture. The model will be trained using the [Fashion-MNIST dataset](https://research.zalando.com/welcome/mission/research-projects/fashion-mnist/). 
 
 ## Prerequisites
 
 To complete this tutorial, you will need:
 
-- [MXNet](https://mxnet.incubator.apache.org/install/#overview) (The version of MXNet will be >= 1.5.0)
+- [MXNet](https://mxnet.incubator.apache.org/install/#overview) (The version of MXNet will be >= 1.5.0, you can use `pip install mxnet --pre` to get the latest pip package or build from source with master, refer to [MXNet installation](http://mxnet.incubator.apache.org/versions/master/install/index.html?platform=Linux&language=Python&processor=CPU)
 - [Jupyter Notebook](https://jupyter.org/index.html) (For interactively running the provided .ipynb file)
 
 
@@ -43,15 +43,14 @@ from mxnet.gluon.contrib.estimator.event_handler import TrainBegin, TrainEnd, Ep
 
 gpu_count = mx.context.num_gpus()
 ctx = [mx.gpu(i) for i in range(gpu_count)] if gpu_count > 0 else mx.cpu()
-mx.random.seed(7) # Set a fixed seed
 ```
 
 ## Dataset
 
 [Fashion-MNIST](https://research.zalando.com/welcome/mission/research-projects/fashion-mnist/) dataset consists of fashion items divided into ten categories: t-shirt/top, trouser, pullover, dress, coat, sandal, shirt, sneaker, bag and ankle boot. 
 
-- It has 60,000 gray scale images of size 28 * 28 for training.  
-- It has 10,000 gray scale images os size 28 * 28 for testing/validation. 
+- It has 60,000 grayscale images of size 28 * 28 for training.  
+- It has 10,000 grayscale images os size 28 * 28 for testing/validation. 
 
 We will use the ```gluon.data.vision``` package to directly import the Fashion-MNIST dataset and perform pre-processing on it.
 
@@ -93,7 +92,7 @@ val_data_loader = gluon.data.DataLoader(fashion_mnist_val, batch_size=batch_size
 
 ## Model and Optimizers
 
-Let's load the resnet-18 model architecture from [Gluon Model Zoo](http://mxnet.apache.org/api/python/gluon/model_zoo.html) and initialize it's parameters. The Gluon Model Zoo contains a repository of pre-trained models as well the model architecture definitions. We are using the model architecture from the model zoo in order to train it from scratch.
+Let's load the resnet-18 model architecture from [Gluon Model Zoo](http://mxnet.apache.org/api/python/gluon/model_zoo.html) and initialize its parameters. The Gluon Model Zoo contains a repository of pre-trained models as well the model architecture definitions. We are using the model architecture from the model zoo in order to train it from scratch.
 
 
 ```python
@@ -101,7 +100,8 @@ resnet_18_v1 = vision.resnet18_v1(pretrained=False, classes = 10)
 resnet_18_v1.initialize(init = mx.init.Xavier(), ctx=ctx)
 ```
 
-We will be using ```SoftmaxCrossEntropyLoss``` as the loss function since this is a multi-class classification problem. We will be using ```sgd``` (Stochastic Gradient Descent) as the optimizer. You can experiment with a different optimizer as well. 
+We will be using `SoftmaxCrossEntropyLoss` as the loss function since this is a multi-class classification problem. We will be using `sgd` (Stochastic Gradient Descent) as the optimizer. 
+You can experiment with a [different loss](http://mxnet.incubator.apache.org/versions/master/api/python/gluon/loss.html) or [optimizer](http://mxnet.incubator.apache.org/versions/master/api/python/optimization/optimization.html) as well. 
 
 
 ```python
@@ -120,7 +120,7 @@ trainer = gluon.Trainer(resnet_18_v1.collect_params(),
 
 ## Train using Fit API
 
-As stated earlier, Fit API greatly simplifies the boiler plate code and complexity for training using MXNet Gluon.
+As stated earlier, the Fit API greatly simplifies the boiler plate code and complexity for training using MXNet Gluon.
 
 In the basic usage example, with just 2 lines of code, we will set up our model for training.
 
@@ -153,24 +153,24 @@ est.fit(train_data=train_data_loader,
 
 ### Advanced Usage
 
-Fit API is also customizable with several `Event Handlers` which give a fine grained control over the steps in training and exposes callback methods that provide control over the stages involved in training. Available callback methods are: `train_begin`, `train_end`, `batch_begin`, `batch_end`, `epoch_begin` and `epoch_end`.
+The Fit API is also customizable with several `Event Handlers` which give a fine grained control over the steps in training and exposes callback methods that provide control over the stages involved in training. Available callback methods are: `train_begin`, `train_end`, `batch_begin`, `batch_end`, `epoch_begin` and `epoch_end`.
 
-One can use built-in event handlers such as `LoggingHandler`, `CheckpointHandler` or `EarlyStoppingHandler` to log and save the model at certain timesteps during training and stopping the training when the model's performance plateaus. 
-There are also some default utility handlers that will be added to your estimator by default. For example, `StoppingHandler` is used to control when the training ends based on number of epochs or number of batches trained. 
+You can use built-in event handlers such as `LoggingHandler`, `CheckpointHandler` or `EarlyStoppingHandler` to log and save the model at certain time-steps during training. You can also stop the training when the model's performance plateaus. 
+There are also some default utility handlers that will be added to your estimator by default. For example, `StoppingHandler` is used to control when the training ends, based on number of epochs or number of batches trained. 
 `MetricHandler` is used to calculate training metrics at end of each batch and epoch. 
-`ValidationHandler` is used to validate your model on test data at epoch end and  calculate validation metrics.
-One can create these utility handlers with different configurations and pass to estimator, it will override the default handler configuration.
-One can also create a custom handler by inheriting one or multiple of 
+`ValidationHandler` is used to validate your model on test data at each epoch's end and then calculate validation metrics.
+You can create these utility handlers with different configurations and pass to estimator. This will override the default handler configuration.
+You can create a custom handler by inheriting one or multiple 
 [base event handlers](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/contrib/estimator/event_handler.py#L32)
  including: `TrainBegin`, `TrainEnd`, `EpochBegin`, `EpochEnd`, `BatchBegin`, `BatchEnd`.
 
 
 ### Custom Event Handler
 
-Here we will showcase an example to create a custom event handler by inheriting from a few base handler class. 
-Our custom event handler is a simple one, that just record the loss values at the end of every epoch in our training phase.
+Here we will showcase an example custom event handler the inherits features from a few base handler classes. 
+Our custom event handler is a simple one: record the loss values at the end of every epoch in our training phase.
 
-Note : For each of the method, the Estimator object is passed along so you can access train metrics.
+Note: For each of the method, the `Estimator` object is passed along, so you can access training metrics.
 
 ```python
 class LossRecordHandler(TrainBegin, TrainEnd, EpochEnd):
@@ -244,7 +244,7 @@ est.fit(train_data=train_data_loader,
     Epoch 1, loss 0.5741 <!--notebook-skip-line-->
     Epoch 2, loss 0.3229 <!--notebook-skip-line-->
 
-You can load the saved model, by using ```load_parameters``` API in Gluon. For more details refer to the [Loading model parameters from file tutorial](http://mxnet.incubator.apache.org/versions/master/tutorials/gluon/save_load_params.html#saving-model-parameters-to-file)
+You can load the saved model, by using the `load_parameters` API in Gluon. For more details refer to the [Loading model parameters from file tutorial](save_load_params.html#saving-model-parameters-to-file)
 
 
 ```python
@@ -254,12 +254,10 @@ resnet_18_v1.load_parameters('./my_model-best.params', ctx=ctx)
 
 ## Summary
 
-In this tutorial, we learnt how to use ```Gluon Fit APIs``` for training a deep learning model and also saw an option to customize it with the use of Event Handlers.
-For more references and advanced usage details can be found in the [documentation](http://mxnet.apache.org/api/python/gluon/gluon.html).
+- To learn more about deep learning with MXNeT, see [Dive Into Deep Learning](http://gluon.io)
 
 ## Next Steps 
 
-- To learn more about deep learning with MXNet Gluon, see [Deep Learning - The Straight Dope](https://gluon.mxnet.io)
 - For more hands on learning about deep learning, check out [Dive into Deep Learning](https://d2l.ai)
 
 <!-- INSERT SOURCE DOWNLOAD BUTTONS -->
diff --git a/python/mxnet/gluon/contrib/__init__.py b/python/mxnet/gluon/contrib/__init__.py
index 83be8a39ba32..0ab5d636bb32 100644
--- a/python/mxnet/gluon/contrib/__init__.py
+++ b/python/mxnet/gluon/contrib/__init__.py
@@ -25,3 +25,5 @@
 from . import cnn
 
 from . import data
+
+from . import estimator
\ No newline at end of file

From eb63fccbe8fa4a834033b27fc1dc46f13520a10f Mon Sep 17 00:00:00 2001
From: Lai Wei <lawei@amazon.com>
Date: Tue, 16 Jul 2019 14:22:46 -0700
Subject: [PATCH 16/28] new line

---
 python/mxnet/gluon/contrib/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/mxnet/gluon/contrib/__init__.py b/python/mxnet/gluon/contrib/__init__.py
index 0ab5d636bb32..7590eb740f67 100644
--- a/python/mxnet/gluon/contrib/__init__.py
+++ b/python/mxnet/gluon/contrib/__init__.py
@@ -26,4 +26,4 @@
 
 from . import data
 
-from . import estimator
\ No newline at end of file
+from . import estimator

From 1ce7e51a5681d15ff7908c9b9f421d6dcb7703e8 Mon Sep 17 00:00:00 2001
From: Lai Wei <lawei@amazon.com>
Date: Fri, 19 Jul 2019 16:36:38 -0700
Subject: [PATCH 17/28] fix import

---
 python/mxnet/gluon/contrib/estimator/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/mxnet/gluon/contrib/estimator/__init__.py b/python/mxnet/gluon/contrib/estimator/__init__.py
index 58600dadffb4..08ef36aabfcf 100644
--- a/python/mxnet/gluon/contrib/estimator/__init__.py
+++ b/python/mxnet/gluon/contrib/estimator/__init__.py
@@ -17,5 +17,7 @@
 
 # pylint: disable=wildcard-import
 """Gluon Estimator Module"""
+from . import  estimator
+from . import  event_handler
 from .estimator import *
 from .event_handler import *

From 6907df077fce432b7211eca7385effdb505d06bf Mon Sep 17 00:00:00 2001
From: Lai Wei <lawei@amazon.com>
Date: Thu, 25 Jul 2019 14:55:21 -0700
Subject: [PATCH 18/28] fix cached graph

---
 python/mxnet/gluon/contrib/estimator/event_handler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/mxnet/gluon/contrib/estimator/event_handler.py b/python/mxnet/gluon/contrib/estimator/event_handler.py
index e9afa5d561b3..57315e0be951 100644
--- a/python/mxnet/gluon/contrib/estimator/event_handler.py
+++ b/python/mxnet/gluon/contrib/estimator/event_handler.py
@@ -506,7 +506,7 @@ def _save_checkpoint(self, estimator):
 
     def _save_symbol(self, estimator):
         symbol_file = os.path.join(self.model_dir, self.model_prefix + '-symbol.json')
-        if hasattr(estimator.net, '_cached_graph') and not estimator.net._cached_graph:
+        if hasattr(estimator.net, '_cached_graph') and estimator.net._cached_graph:
             sym = estimator.net._cached_graph[1]
             sym.save(symbol_file)
         else:

From 19d85f30754acc0d4abb1a57c7d7d8c835b17843 Mon Sep 17 00:00:00 2001
From: Lai Wei <lawei@amazon.com>
Date: Mon, 29 Jul 2019 15:46:53 -0700
Subject: [PATCH 19/28] fix import

---
 python/mxnet/gluon/contrib/estimator/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/mxnet/gluon/contrib/estimator/__init__.py b/python/mxnet/gluon/contrib/estimator/__init__.py
index 08ef36aabfcf..b9e9da4444e8 100644
--- a/python/mxnet/gluon/contrib/estimator/__init__.py
+++ b/python/mxnet/gluon/contrib/estimator/__init__.py
@@ -17,7 +17,6 @@
 
 # pylint: disable=wildcard-import
 """Gluon Estimator Module"""
-from . import  estimator
 from . import  event_handler
 from .estimator import *
 from .event_handler import *

From 0f99e89b1586eb2b84e8ec6ce96e05545358a61e Mon Sep 17 00:00:00 2001
From: Lai Wei <lawei@amazon.com>
Date: Mon, 29 Jul 2019 15:51:00 -0700
Subject: [PATCH 20/28] address comments

---
 docs/tutorials/gluon/fit_api_tutorial.md         | 4 ++--
 python/mxnet/gluon/contrib/estimator/__init__.py | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/docs/tutorials/gluon/fit_api_tutorial.md b/docs/tutorials/gluon/fit_api_tutorial.md
index cef815cb785e..99b9efe62465 100644
--- a/docs/tutorials/gluon/fit_api_tutorial.md
+++ b/docs/tutorials/gluon/fit_api_tutorial.md
@@ -28,7 +28,7 @@ To demonstrate the Fit API, you will train an image classification model using t
 
 To complete this tutorial, you will need:
 
-- [MXNet](https://mxnet.incubator.apache.org/install/#overview) (The version of MXNet will be >= 1.5.0, you can use `pip install mxnet --pre` to get the latest pip package or build from source with master, refer to [MXNet installation](http://mxnet.incubator.apache.org/versions/master/install/index.html?platform=Linux&language=Python&processor=CPU)
+- [MXNet](https://mxnet.incubator.apache.org/install/#overview) (The version of MXNet will be >= 1.5.0, you can use `pip install mxnet` to get 1.5.0 release pip package or build from source with master, refer to [MXNet installation](http://mxnet.incubator.apache.org/versions/master/install/index.html?platform=Linux&language=Python&processor=CPU)
 - [Jupyter Notebook](https://jupyter.org/index.html) (For interactively running the provided .ipynb file)
 
 
@@ -50,7 +50,7 @@ ctx = [mx.gpu(i) for i in range(gpu_count)] if gpu_count > 0 else mx.cpu()
 [Fashion-MNIST](https://research.zalando.com/welcome/mission/research-projects/fashion-mnist/) dataset consists of fashion items divided into ten categories: t-shirt/top, trouser, pullover, dress, coat, sandal, shirt, sneaker, bag and ankle boot. 
 
 - It has 60,000 grayscale images of size 28 * 28 for training.  
-- It has 10,000 grayscale images os size 28 * 28 for testing/validation. 
+- It has 10,000 grayscale images of size 28 * 28 for testing/validation. 
 
 We will use the ```gluon.data.vision``` package to directly import the Fashion-MNIST dataset and perform pre-processing on it.
 
diff --git a/python/mxnet/gluon/contrib/estimator/__init__.py b/python/mxnet/gluon/contrib/estimator/__init__.py
index b9e9da4444e8..09ab2eb6c20a 100644
--- a/python/mxnet/gluon/contrib/estimator/__init__.py
+++ b/python/mxnet/gluon/contrib/estimator/__init__.py
@@ -18,5 +18,4 @@
 # pylint: disable=wildcard-import
 """Gluon Estimator Module"""
 from . import  event_handler
-from .estimator import *
 from .event_handler import *

From 75ec74359096ea2e9274e935f3e1d9a8d8c08ccf Mon Sep 17 00:00:00 2001
From: Lai Wei <lawei@amazon.com>
Date: Mon, 29 Jul 2019 23:26:51 -0700
Subject: [PATCH 21/28] fix doc gen

---
 .../mxnet/gluon/contrib/estimator/__init__.py |  4 ++-
 .../gluon/contrib/estimator/estimator.py      | 31 +++++++++++--------
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/python/mxnet/gluon/contrib/estimator/__init__.py b/python/mxnet/gluon/contrib/estimator/__init__.py
index 09ab2eb6c20a..bb0a0917c363 100644
--- a/python/mxnet/gluon/contrib/estimator/__init__.py
+++ b/python/mxnet/gluon/contrib/estimator/__init__.py
@@ -17,5 +17,7 @@
 
 # pylint: disable=wildcard-import
 """Gluon Estimator Module"""
-from . import  event_handler
+from . import estimator
+from . import event_handler
+from .estimator import *
 from .event_handler import *
diff --git a/python/mxnet/gluon/contrib/estimator/estimator.py b/python/mxnet/gluon/contrib/estimator/estimator.py
index da1a3915caec..9f52e64852e7 100644
--- a/python/mxnet/gluon/contrib/estimator/estimator.py
+++ b/python/mxnet/gluon/contrib/estimator/estimator.py
@@ -24,9 +24,14 @@
 
 from .event_handler import MetricHandler, ValidationHandler, LoggingHandler, StoppingHandler
 from .event_handler import TrainBegin, EpochBegin, BatchBegin, BatchEnd, EpochEnd, TrainEnd
-from .... import gluon, autograd
+from ...data import DataLoader
+from ...loss import Loss as gluon_loss
+from ...trainer import Trainer
+from ...utils import split_and_load
+from .... import autograd
 from ....context import Context, cpu, gpu, num_gpus
-from ....metric import EvalMetric, Loss, Accuracy
+from ....metric import EvalMetric, Accuracy
+from ....metric import Loss as metric_loss
 
 __all__ = ['Estimator']
 
@@ -69,9 +74,9 @@ def __init__(self, net,
         self.trainer = self._check_trainer(trainer)
 
     def _check_loss(self, loss):
-        if isinstance(loss, gluon.loss.Loss):
+        if isinstance(loss, gluon_loss):
             loss = [loss]
-        elif isinstance(loss, list) and all([isinstance(l, gluon.loss.Loss) for l in loss]):
+        elif isinstance(loss, list) and all([isinstance(l, gluon_loss) for l in loss]):
             loss = loss
         else:
             raise ValueError("loss must be a Loss or a list of Loss, "
@@ -146,9 +151,9 @@ def _check_trainer(self, trainer):
         if not trainer:
             warnings.warn("No trainer specified, default SGD optimizer "
                           "with learning rate 0.001 is used.")
-            trainer = gluon.Trainer(self.net.collect_params(),
+            trainer = Trainer(self.net.collect_params(),
                                     'sgd', {'learning_rate': 0.001})
-        elif not isinstance(trainer, gluon.Trainer):
+        elif not isinstance(trainer, Trainer):
             raise ValueError("Trainer must be a Gluon Trainer instance, refer to "
                              "gluon.Trainer:{}".format(trainer))
         return trainer
@@ -165,8 +170,8 @@ def _is_initialized(self):
     def _get_data_and_label(self, batch, ctx, batch_axis=0):
         data = batch[0]
         label = batch[1]
-        data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=batch_axis)
-        label = gluon.utils.split_and_load(label, ctx_list=ctx, batch_axis=batch_axis)
+        data = split_and_load(data, ctx_list=ctx, batch_axis=batch_axis)
+        label = split_and_load(label, ctx_list=ctx, batch_axis=batch_axis)
         return data, label
 
     def prepare_loss_and_metrics(self):
@@ -185,7 +190,7 @@ def prepare_loss_and_metrics(self):
             self.val_metrics = []
             for loss in self.loss:
                 # remove trailing numbers from loss name to avoid confusion
-                self.train_metrics.append(Loss(loss.name.rstrip('1234567890')))
+                self.train_metrics.append(metric_loss(loss.name.rstrip('1234567890')))
             for metric in self.train_metrics:
                 val_metric = copy.deepcopy(metric)
                 metric.name = "train " + metric.name
@@ -208,10 +213,10 @@ def evaluate(self,
          batch_axis : int, default 0
              Batch axis to split the validation data into devices.
          """
-        if not isinstance(val_data, gluon.data.DataLoader):
+        if not isinstance(val_data, DataLoader):
             raise ValueError("Estimator only support input as Gluon DataLoader. Alternatively, you "
                              "can transform your DataIter or any NDArray into Gluon DataLoader. "
-                             "Refer to gluon.data.dataloader")
+                             "Refer to gluon.data.DataLoader")
 
         for metric in val_metrics:
             metric.reset()
@@ -222,7 +227,7 @@ def evaluate(self,
             loss = [self.loss[0](y_hat, y) for y_hat, y in zip(pred, label)]
             # update metrics
             for metric in val_metrics:
-                if isinstance(metric, Loss):
+                if isinstance(metric, metric_loss):
                     metric.update(0, loss)
                 else:
                     metric.update(label, pred)
@@ -254,7 +259,7 @@ def fit(self, train_data,
         batch_axis : int, default 0
             Batch axis to split the training data into devices.
         """
-        if not isinstance(train_data, gluon.data.DataLoader):
+        if not isinstance(train_data, DataLoader):
             raise ValueError("Estimator only support input as Gluon DataLoader. Alternatively, you "
                              "can transform your DataIter or any NDArray into Gluon DataLoader. "
                              "Refer to gluon.data.dataloader")

From 3b1b1853b9859599999fd3bc8a5c54fa8353b71f Mon Sep 17 00:00:00 2001
From: Lai Wei <lawei@amazon.com>
Date: Mon, 29 Jul 2019 23:29:37 -0700
Subject: [PATCH 22/28] add softmax

---
 python/mxnet/gluon/contrib/estimator/estimator.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/mxnet/gluon/contrib/estimator/estimator.py b/python/mxnet/gluon/contrib/estimator/estimator.py
index 9f52e64852e7..5e3804784ba8 100644
--- a/python/mxnet/gluon/contrib/estimator/estimator.py
+++ b/python/mxnet/gluon/contrib/estimator/estimator.py
@@ -25,6 +25,7 @@
 from .event_handler import MetricHandler, ValidationHandler, LoggingHandler, StoppingHandler
 from .event_handler import TrainBegin, EpochBegin, BatchBegin, BatchEnd, EpochEnd, TrainEnd
 from ...data import DataLoader
+from ...loss import SoftmaxCrossEntropyLoss
 from ...loss import Loss as gluon_loss
 from ...trainer import Trainer
 from ...utils import split_and_load
@@ -184,8 +185,8 @@ def prepare_loss_and_metrics(self):
         """
         if any(not hasattr(self, attribute) for attribute in
                ['train_metrics', 'val_metrics']):
-            # Use default mx.metric.Accuracy() for gluon.loss.SoftmaxCrossEntropyLoss()
-            if not self.train_metrics and any([isinstance(l, gluon.loss.SoftmaxCrossEntropyLoss) for l in self.loss]):
+            # Use default mx.metric.Accuracy() for SoftmaxCrossEntropyLoss()
+            if not self.train_metrics and any([isinstance(l, SoftmaxCrossEntropyLoss) for l in self.loss]):
                 self.train_metrics = [Accuracy()]
             self.val_metrics = []
             for loss in self.loss:

From 0b8ebb0b8b2cd8182de86e2424eab45b60802c25 Mon Sep 17 00:00:00 2001
From: Lai Wei <lawei@amazon.com>
Date: Tue, 30 Jul 2019 09:34:24 -0700
Subject: [PATCH 23/28] add to website index

---
 docs/api/python/gluon/contrib.md              | 29 +++++++++++++++++++
 .../gluon/contrib/estimator/event_handler.py  |  7 +++--
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/docs/api/python/gluon/contrib.md b/docs/api/python/gluon/contrib.md
index a940f697de69..2cf893664e37 100644
--- a/docs/api/python/gluon/contrib.md
+++ b/docs/api/python/gluon/contrib.md
@@ -114,6 +114,32 @@ In the rest of this document, we list routines provided by the `gluon.contrib` p
     WikiText103
 ```
 
+### Estimator
+
+```eval_rst
+.. currentmodule:: mxnet.gluon.contrib.estimator
+
+.. autosummary::
+    :nosignatures:
+    
+    Estimator
+```
+
+#### EventHandler
+```eval_rst
+.. currentmodule:: mxnet.gluon.contrib.estimator.event_handler
+
+.. autosummary::
+    :nosignatures:
+
+    StoppingHandler
+    MetricHandler
+    ValidationHandler
+    LoggingHandler
+    CheckpointHandler
+    EarlyStoppingHandler
+```
+
 ## API Reference
 
 <script type="text/javascript" src='../../../_static/js/auto_module_index.js'></script>
@@ -144,6 +170,9 @@ In the rest of this document, we list routines provided by the `gluon.contrib` p
     :members:
     :imported-members:
 
+.. automodule:: mxnet.gluon.contrib.estimator
+    :members:
+    :imported-members:
 ```
 
 <script>auto_index("api-reference");</script>
diff --git a/python/mxnet/gluon/contrib/estimator/event_handler.py b/python/mxnet/gluon/contrib/estimator/event_handler.py
index 57315e0be951..bd7cfce9ab7b 100644
--- a/python/mxnet/gluon/contrib/estimator/event_handler.py
+++ b/python/mxnet/gluon/contrib/estimator/event_handler.py
@@ -26,8 +26,11 @@
 
 import numpy as np
 
-from ....metric import EvalMetric, Loss
+from ....metric import EvalMetric
+from ....metric import Loss as metric_loss
 
+__all__ = ['StoppingHandler', 'MetricHandler', 'ValidationHandler',
+           'LoggingHandler', 'CheckpointHandler', 'EarlyStoppingHandler']
 
 class TrainBegin(object):
     def train_begin(self, estimator, *args, **kwargs):
@@ -127,7 +130,7 @@ def batch_end(self, estimator, *args, **kwargs):
         label = kwargs['label']
         loss = kwargs['loss']
         for metric in self.train_metrics:
-            if isinstance(metric, Loss):
+            if isinstance(metric, metric_loss):
                 # metric wrapper for loss values
                 metric.update(0, loss)
             else:

From 55c54e520785a3073f4aa0ec6e41a9339ca376a6 Mon Sep 17 00:00:00 2001
From: Lai Wei <lawei@amazon.com>
Date: Tue, 30 Jul 2019 09:47:27 -0700
Subject: [PATCH 24/28] fix doc string

---
 python/mxnet/gluon/contrib/estimator/event_handler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/mxnet/gluon/contrib/estimator/event_handler.py b/python/mxnet/gluon/contrib/estimator/event_handler.py
index bd7cfce9ab7b..660bf34122c5 100644
--- a/python/mxnet/gluon/contrib/estimator/event_handler.py
+++ b/python/mxnet/gluon/contrib/estimator/event_handler.py
@@ -138,7 +138,7 @@ def batch_end(self, estimator, *args, **kwargs):
 
 
 class ValidationHandler(TrainBegin, BatchEnd, EpochEnd):
-    """"Validation Handler that evaluate model on validation dataset
+    """Validation Handler that evaluate model on validation dataset
 
     :py:class:`ValidationHandler` takes validation dataset, an evaluation function,
     metrics to be evaluated, and how often to run the validation. You can provide custom

From a69b40624d5f04d74a01c8dba0493fa174489f78 Mon Sep 17 00:00:00 2001
From: Lai Wei <royweilai@gmail.com>
Date: Tue, 30 Jul 2019 17:07:28 -0700
Subject: [PATCH 25/28] Fix doc gen (#12)

* fix warining

* fix test

* fix

* fix

* fix print
---
 docs/tutorials/gluon/fit_api_tutorial.md      | 22 +++++++++++-----
 .../gluon/contrib/estimator/estimator.py      | 26 ++++++++++++-------
 .../gluon/contrib/estimator/event_handler.py  |  7 ++---
 tests/python/unittest/test_gluon_estimator.py |  2 ++
 4 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/docs/tutorials/gluon/fit_api_tutorial.md b/docs/tutorials/gluon/fit_api_tutorial.md
index 99b9efe62465..bc50690ac1a2 100644
--- a/docs/tutorials/gluon/fit_api_tutorial.md
+++ b/docs/tutorials/gluon/fit_api_tutorial.md
@@ -137,8 +137,12 @@ est = estimator.Estimator(net=resnet_18_v1,
                           trainer=trainer, 
                           context=ctx)
 
-# Magic line
-est.fit(train_data=train_data_loader,
+# ignore warnings for nightly test on CI only
+import warnings
+with warnings.catch_warnings():
+    warnings.simplefilter("ignore")
+    # Magic line
+    est.fit(train_data=train_data_loader,
         epochs=num_epochs)
 ```
 
@@ -224,11 +228,15 @@ checkpoint_handler = CheckpointHandler(model_dir='./',
                                        save_best=True)  # Save the best model in terms of
 # Let's instantiate another handler which we defined above 
 loss_record_handler = LossRecordHandler()
-# Magic line
-est.fit(train_data=train_data_loader,
-        val_data=val_data_loader,
-        epochs=num_epochs,
-        event_handlers=[checkpoint_handler, loss_record_handler]) # Add the event handlers
+# ignore warnings for nightly test on CI only
+import warnings
+with warnings.catch_warnings():
+    warnings.simplefilter("ignore")
+    # Magic line
+    est.fit(train_data=train_data_loader,
+            val_data=val_data_loader,
+            epochs=num_epochs,
+            event_handlers=[checkpoint_handler, loss_record_handler]) # Add the event handlers
 ```
 
     Training begin: using optimizer SGD with current learning rate 0.0400 <!--notebook-skip-line-->
diff --git a/python/mxnet/gluon/contrib/estimator/estimator.py b/python/mxnet/gluon/contrib/estimator/estimator.py
index 5e3804784ba8..d077bcd4fdeb 100644
--- a/python/mxnet/gluon/contrib/estimator/estimator.py
+++ b/python/mxnet/gluon/contrib/estimator/estimator.py
@@ -334,28 +334,36 @@ def fit(self, train_data,
     def _prepare_default_handlers(self, val_data, event_handlers):
         event_handlers = event_handlers or []
         default_handlers = []
-        train_metrics, val_metrics = self.prepare_loss_and_metrics()
+        self.prepare_loss_and_metrics()
 
         # no need to add to default handler check as StoppingHandler does not use metrics
         event_handlers.append(StoppingHandler(self.max_epoch, self.max_batch))
+        default_handlers.append("StoppingHandler")
 
         if not any(isinstance(handler, MetricHandler) for handler in event_handlers):
-            event_handlers.append(MetricHandler(train_metrics=train_metrics))
+            event_handlers.append(MetricHandler(train_metrics=self.train_metrics))
             default_handlers.append("MetricHandler")
 
-        if val_data and not any(isinstance(handler, ValidationHandler) for handler in event_handlers):
-            event_handlers.append(ValidationHandler(val_data=val_data, eval_fn=self.evaluate,
-                                                    val_metrics=val_metrics))
-            default_handlers.append("ValidationHandler")
+        if not any(isinstance(handler, ValidationHandler) for handler in event_handlers):
+            # no validation handler
+            if val_data:
+                # add default validation handler if validation data found
+                event_handlers.append(ValidationHandler(val_data=val_data, eval_fn=self.evaluate,
+                                                        val_metrics=self.val_metrics))
+                default_handlers.append("ValidationHandler")
+                val_metrics = self.val_metrics
+            else:
+                # set validation metrics to None if no validation data and no validation handler
+                val_metrics = []
 
         if not any(isinstance(handler, LoggingHandler) for handler in event_handlers):
-            event_handlers.append(LoggingHandler(train_metrics=train_metrics,
+            event_handlers.append(LoggingHandler(train_metrics=self.train_metrics,
                                                  val_metrics=val_metrics))
             default_handlers.append("LoggingHandler")
 
         # if there is a mix of user defined event handlers and default event handlers
         # they should have the same set of loss and metrics
-        if default_handlers:
+        if default_handlers and len(event_handlers) > len(default_handlers):
             msg = "You are training with the following default event handlers: %s. " \
                   "They use loss and metrics from estimator.prepare_loss_and_metrics(). " \
                   "Please use the same set of metrics for all your other handlers." % \
@@ -374,7 +382,7 @@ def _prepare_default_handlers(self, val_data, event_handlers):
             # remove None metric references
             references = set([ref for ref in references if ref])
             for metric in references:
-                if metric not in train_metrics + val_metrics:
+                if metric not in self.train_metrics + self.val_metrics:
                     msg = "We have added following default handlers for you: %s and used " \
                           "estimator.prepare_loss_and_metrics() to pass metrics to " \
                           "those handlers. Please use the same set of metrics " \
diff --git a/python/mxnet/gluon/contrib/estimator/event_handler.py b/python/mxnet/gluon/contrib/estimator/event_handler.py
index 660bf34122c5..0c8dbd9a4e1d 100644
--- a/python/mxnet/gluon/contrib/estimator/event_handler.py
+++ b/python/mxnet/gluon/contrib/estimator/event_handler.py
@@ -29,7 +29,8 @@
 from ....metric import EvalMetric
 from ....metric import Loss as metric_loss
 
-__all__ = ['StoppingHandler', 'MetricHandler', 'ValidationHandler',
+__all__ = ['TrainBegin', 'TrainEnd', 'EpochBegin', 'EpochEnd','BatchBegin', 'BatchEnd',
+           'StoppingHandler', 'MetricHandler', 'ValidationHandler',
            'LoggingHandler', 'CheckpointHandler', 'EarlyStoppingHandler']
 
 class TrainBegin(object):
@@ -513,8 +514,8 @@ def _save_symbol(self, estimator):
             sym = estimator.net._cached_graph[1]
             sym.save(symbol_file)
         else:
-            self.logger.info("Model architecture(symbol file) is not saved, please use HybridBlock"
-                             "to construct your model, can call net.hybridize() before passing to"
+            self.logger.info("Model architecture(symbol file) is not saved, please use HybridBlock "
+                             "to construct your model, can call net.hybridize() before passing to "
                              "Estimator in order to save model architecture as %s.", symbol_file)
 
     def _save_params_and_trainer(self, estimator, file_prefix):
diff --git a/tests/python/unittest/test_gluon_estimator.py b/tests/python/unittest/test_gluon_estimator.py
index d2e8c082aa08..5050a0067f6c 100644
--- a/tests/python/unittest/test_gluon_estimator.py
+++ b/tests/python/unittest/test_gluon_estimator.py
@@ -19,11 +19,13 @@
 
 import sys
 import unittest
+import warnings
 
 import mxnet as mx
 from mxnet import gluon
 from mxnet.gluon import nn
 from mxnet.gluon.contrib.estimator import *
+from mxnet.gluon.contrib.estimator.event_handler import *
 from nose.tools import assert_raises
 
 

From 2b2f85da175976d9d4964abd208ce5fc5911e574 Mon Sep 17 00:00:00 2001
From: Lai Wei <royweilai@gmail.com>
Date: Wed, 31 Jul 2019 10:01:21 -0700
Subject: [PATCH 26/28] fix test (#13)

---
 python/mxnet/gluon/contrib/estimator/estimator.py     | 4 ++--
 python/mxnet/gluon/contrib/estimator/event_handler.py | 9 +++++----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/python/mxnet/gluon/contrib/estimator/estimator.py b/python/mxnet/gluon/contrib/estimator/estimator.py
index d077bcd4fdeb..b6142e100d96 100644
--- a/python/mxnet/gluon/contrib/estimator/estimator.py
+++ b/python/mxnet/gluon/contrib/estimator/estimator.py
@@ -153,7 +153,7 @@ def _check_trainer(self, trainer):
             warnings.warn("No trainer specified, default SGD optimizer "
                           "with learning rate 0.001 is used.")
             trainer = Trainer(self.net.collect_params(),
-                                    'sgd', {'learning_rate': 0.001})
+                              'sgd', {'learning_rate': 0.001})
         elif not isinstance(trainer, Trainer):
             raise ValueError("Trainer must be a Gluon Trainer instance, refer to "
                              "gluon.Trainer:{}".format(trainer))
@@ -363,7 +363,7 @@ def _prepare_default_handlers(self, val_data, event_handlers):
 
         # if there is a mix of user defined event handlers and default event handlers
         # they should have the same set of loss and metrics
-        if default_handlers and len(event_handlers) > len(default_handlers):
+        if default_handlers and len(event_handlers) != len(default_handlers):
             msg = "You are training with the following default event handlers: %s. " \
                   "They use loss and metrics from estimator.prepare_loss_and_metrics(). " \
                   "Please use the same set of metrics for all your other handlers." % \
diff --git a/python/mxnet/gluon/contrib/estimator/event_handler.py b/python/mxnet/gluon/contrib/estimator/event_handler.py
index 0c8dbd9a4e1d..da2c84455e35 100644
--- a/python/mxnet/gluon/contrib/estimator/event_handler.py
+++ b/python/mxnet/gluon/contrib/estimator/event_handler.py
@@ -29,10 +29,11 @@
 from ....metric import EvalMetric
 from ....metric import Loss as metric_loss
 
-__all__ = ['TrainBegin', 'TrainEnd', 'EpochBegin', 'EpochEnd','BatchBegin', 'BatchEnd',
+__all__ = ['TrainBegin', 'TrainEnd', 'EpochBegin', 'EpochEnd', 'BatchBegin', 'BatchEnd',
            'StoppingHandler', 'MetricHandler', 'ValidationHandler',
            'LoggingHandler', 'CheckpointHandler', 'EarlyStoppingHandler']
 
+
 class TrainBegin(object):
     def train_begin(self, estimator, *args, **kwargs):
         pass
@@ -434,7 +435,7 @@ def train_begin(self, estimator, *args, **kwargs):
         self.current_epoch = 0
         self.current_batch = 0
         if self.save_best:
-            self.best = np.Inf if self.monitor_op == np.less else -np.Inf # pylint: disable=comparison-with-callable
+            self.best = np.Inf if self.monitor_op == np.less else -np.Inf  # pylint: disable=comparison-with-callable
         if self.resume_from_checkpoint:
             error_msg = "To use resume from checkpoint, you must only specify " \
                         "the same type of period you used for training." \
@@ -670,7 +671,7 @@ def __init__(self,
                                  "if you want otherwise", self.monitor.get()[0])
                 self.monitor_op = np.less
 
-        if self.monitor_op == np.greater: # pylint: disable=comparison-with-callable
+        if self.monitor_op == np.greater:  # pylint: disable=comparison-with-callable
             self.min_delta *= 1
         else:
             self.min_delta *= -1
@@ -683,7 +684,7 @@ def train_begin(self, estimator, *args, **kwargs):
         if self.baseline is not None:
             self.best = self.baseline
         else:
-            self.best = np.Inf if self.monitor_op == np.less else -np.Inf # pylint: disable=comparison-with-callable
+            self.best = np.Inf if self.monitor_op == np.less else -np.Inf  # pylint: disable=comparison-with-callable
 
     def epoch_end(self, estimator, *args, **kwargs):
         monitor_name, monitor_value = self.monitor.get()

From 5436b625f938f14c186b2a11ce8ab645f05d8e99 Mon Sep 17 00:00:00 2001
From: Lai Wei <royweilai@gmail.com>
Date: Wed, 31 Jul 2019 10:29:36 -0700
Subject: [PATCH 27/28] fix warning (#14)

---
 tests/python/unittest/test_gluon_estimator.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/python/unittest/test_gluon_estimator.py b/tests/python/unittest/test_gluon_estimator.py
index 5050a0067f6c..ae47d925670f 100644
--- a/tests/python/unittest/test_gluon_estimator.py
+++ b/tests/python/unittest/test_gluon_estimator.py
@@ -337,10 +337,9 @@ def test_default_handlers():
                     metrics=train_acc,
                     trainer=trainer,
                     context=ctx)
-    # no handler
+    # no handler(all default handlers), no warning
     with warnings.catch_warnings(record=True) as w:
         est.fit(train_data=train_data, epochs=num_epochs)
-        assert 'You are training with the' in str(w[-1].message)
 
     # handler with prepared loss and metrics
     # use mix of default and user defined handlers
@@ -355,7 +354,7 @@ def test_default_handlers():
     # handler with all user defined metrics
     # use mix of default and user defined handlers
     metric = MetricHandler(train_metrics=[train_acc])
-    logging = LoggingHandler(train_metrics=[train_acc], val_metrics=[mx.metric.RMSE("val acc")])
+    logging = LoggingHandler(train_metrics=[train_acc])
     est.fit(train_data=train_data, epochs=num_epochs, event_handlers=[metric, logging])
 
     # handler with mixed metrics, some handler use metrics prepared by estimator

From 81be5a01cb8d4a269bc801e34b534e82f3afc15b Mon Sep 17 00:00:00 2001
From: Lai Wei <royweilai@gmail.com>
Date: Wed, 31 Jul 2019 16:13:36 -0700
Subject: [PATCH 28/28] fix href (#15)

---
 docs/api/python/gluon/contrib.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/api/python/gluon/contrib.md b/docs/api/python/gluon/contrib.md
index 2cf893664e37..22cdebb53b85 100644
--- a/docs/api/python/gluon/contrib.md
+++ b/docs/api/python/gluon/contrib.md
@@ -126,8 +126,9 @@ In the rest of this document, we list routines provided by the `gluon.contrib` p
 ```
 
 #### EventHandler
+
 ```eval_rst
-.. currentmodule:: mxnet.gluon.contrib.estimator.event_handler
+.. currentmodule:: mxnet.gluon.contrib.estimator
 
 .. autosummary::
     :nosignatures: