diff --git a/keras/backend/cntk_backend.py b/keras/backend/cntk_backend.py index fe89045a941..35f1dd2a7d6 100644 --- a/keras/backend/cntk_backend.py +++ b/keras/backend/cntk_backend.py @@ -953,6 +953,9 @@ def normalize_batch_in_training(x, gamma, beta, for axis in range(1, ndim(x)): if axis in reduction_axes: target_shape.append(1) + if ndim(gamma) > axis: + gamma = C.reduce_mean(gamma, axis - 1) + beta = C.reduce_mean(beta, axis - 1) else: target_shape.append(x_shape[axis]) diff --git a/tests/keras/backend/backend_test.py b/tests/keras/backend/backend_test.py index f54d7c54328..e7a8459d822 100644 --- a/tests/keras/backend/backend_test.py +++ b/tests/keras/backend/backend_test.py @@ -1089,15 +1089,21 @@ def test_batchnorm(self): x_shape = (1, 4) + shape else: x_shape = (1,) + shape + (4,) - xth = KTH.variable(np.random.random(x_shape)) - xtf = KTF.variable(np.random.random(x_shape)) + x_val = np.random.random(x_shape).astype(np.float32) + xth = KTH.variable(x_val) + xtf = KTF.variable(x_val) + xc = KC.placeholder(x_shape) zth, _, _ = KTH.normalize_batch_in_training(xth, None, None, reduction_axes='per-activation') ztf, _, _ = KTF.normalize_batch_in_training(xtf, None, None, reduction_axes=[0, 1, 2, 3]) + zc, _, _ = KC.normalize_batch_in_training(xc, None, None, + reduction_axes=[0, 1, 2, 3]) zth = KTH.eval(zth) ztf = KTF.eval(ztf) + zc = KC.function([xc], [zc])([x_val])[0] assert zth.shape == ztf.shape + assert zth.shape == zc.shape def test_ctc(self): # simplified version of TensorFlow's test