diff --git a/src/operator/contrib/multibox_prior.cc b/src/operator/contrib/multibox_prior.cc
index ee8f5bfac772..2ad173a2dd93 100644
--- a/src/operator/contrib/multibox_prior.cc
+++ b/src/operator/contrib/multibox_prior.cc
@@ -44,11 +44,12 @@ inline void MultiBoxPriorForward(const Tensor<cpu, 2, DType> &out,
     float center_y = (r + offsets[0]) * step_y;
     for (int c = 0; c < in_width; ++c) {
       float center_x = (c + offsets[1]) * step_x;
-      // ratio = 1, various sizes
+      // ratio = first ratio, various sizes
+      float ratio = num_ratios > 0? sqrtf(ratios[0]) : 1.f;
       for (int i = 0; i < num_sizes; ++i) {
         float size = sizes[i];
-        float w = size * in_height / in_width / 2;
-        float h = size / 2;
+        float w = size * in_height / in_width * ratio / 2;
+        float h = size / ratio / 2;
         out[count][0] = center_x - w;  // xmin
         out[count][1] = center_y - h;  // ymin
         out[count][2] = center_x + w;  // xmax
diff --git a/src/operator/contrib/multibox_prior.cu b/src/operator/contrib/multibox_prior.cu
index 57901585b45a..54e93adba765 100644
--- a/src/operator/contrib/multibox_prior.cu
+++ b/src/operator/contrib/multibox_prior.cu
@@ -83,10 +83,11 @@ inline void MultiBoxPriorForward(const Tensor<gpu, 2, DType> &out,
 
   const int stride = 4 * (num_sizes + num_ratios - 1);
   int offset = 0;
-  // ratio = 1, various sizes
+  // ratio = first ratio, various sizes
+  float ratio = num_ratios > 0? sqrtf(ratios[0]) : 1.f;
   for (int i = 0; i < num_sizes; ++i) {
     cuda::AssignPriors<DType><<<dimGrid, dimBlock, 0, stream>>>(out_ptr,
-      sizes[i], 1.f, in_width, in_height, step_x, step_y, offset_y, offset_x, stride, offset);
+      sizes[i], ratio, in_width, in_height, step_x, step_y, offset_y, offset_x, stride, offset);
     ++offset;
   }
   MULTIBOXPRIOR_CUDA_CHECK(cudaPeekAtLastError());
diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py
index 38aeb99c2d89..813a6b092b9c 100644
--- a/tests/python/unittest/test_contrib_operator.py
+++ b/tests/python/unittest/test_contrib_operator.py
@@ -293,7 +293,7 @@ def f(x, a, b, c):
     b = np.random.random_sample()
     c = np.random.random_sample()
     m = np.random.random_sample() - 0.5
-    
+
     data = mx.symbol.Variable('data')
     quad_sym = mx.sym.contrib.quadratic(data=data, a=a, b=b, c=c)
     gr_q_sym = mx.sym.contrib.gradientmultiplier(quad_sym, scalar=m)
@@ -320,6 +320,18 @@ def f(x, a, b, c):
                                         [backward_expected],
                                         rtol=1e-2 if dtype is np.float16 else 1e-5,
                                         atol=1e-2 if dtype is np.float16 else 1e-5)
+def test_multibox_prior_op():
+    h = 561
+    w = 728
+    X = mx.nd.random.uniform(shape=(1, 3, h, w))
+    Y = mx.contrib.nd.MultiBoxPrior(X, sizes=[0.75, 0.5, 0.25], ratios=[1, 2, 0.5])
+    assert_array_equal(Y.shape, np.array((1, 2042040, 4)))
+    boxes = Y.reshape((h, w, 5, 4))
+    assert_allclose(boxes.asnumpy()[250, 250, 0, :], np.array([0.055117, 0.071524, 0.63307 , 0.821524]), atol=1e-5, rtol=1e-5)
+    # relax first ratio if user insists
+    Y = mx.contrib.nd.MultiBoxPrior(X, sizes=[0.75, 0.5, 0.25], ratios=[20, 2, 0.5])
+    boxes = Y.reshape((h, w, 5, 4))
+    assert_allclose(boxes.asnumpy()[250, 250, 0, :], np.array([-0.948249,  0.362671,  1.636436,  0.530377]), atol=1e-5, rtol=1e-5)
 
 if __name__ == '__main__':
     import nose