From f50115ae5b03278176879db96cfd322f9d0fb2d2 Mon Sep 17 00:00:00 2001 From: Zhang Date: Thu, 24 May 2018 17:59:36 -0700 Subject: [PATCH] model and hybrid block --- .../style_transfer/models/download_model.py | 12 +- example/gluon/style_transfer/net.py | 132 ++++++++++++------ 2 files changed, 104 insertions(+), 40 deletions(-) diff --git a/example/gluon/style_transfer/models/download_model.py b/example/gluon/style_transfer/models/download_model.py index db0e78d13bae..51b18e74af0d 100644 --- a/example/gluon/style_transfer/models/download_model.py +++ b/example/gluon/style_transfer/models/download_model.py @@ -15,7 +15,17 @@ # specific language governing permissions and limitations # under the License. +import os +import zipfile +import shutil from mxnet.test_utils import download -download('https://apache-mxnet.s3-accelerate.amazonaws.com/gluon/models/21styles-2cb88353.zip', 'models/21styles.params') +zip_file_path = 'models/21styles.zip' +download('https://apache-mxnet.s3-accelerate.amazonaws.com/gluon/models/21styles-2cb88353.zip', zip_file_path) +with zipfile.ZipFile(zip_file_path) as zf: + zf.extractall() + +os.remove(zip_file_path) + +shutil.move('21styles-2cb88353.params', 'models/21styles.params') diff --git a/example/gluon/style_transfer/net.py b/example/gluon/style_transfer/net.py index 18c4d738f186..fed18d21fed6 100644 --- a/example/gluon/style_transfer/net.py +++ b/example/gluon/style_transfer/net.py @@ -60,7 +60,7 @@ def forward(self, x): return F.pad(x, mode='reflect', pad_width=self.pad_width) -class Bottleneck(Block): +class Bottleneck(HybridBlock): """ Pre-activation residual block Identity Mapping in Deep Residual Networks ref https://arxiv.org/abs/1603.05027 @@ -73,7 +73,7 @@ def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=Insta self.residual_layer = nn.Conv2D(in_channels=inplanes, channels=planes * self.expansion, kernel_size=1, strides=(stride, stride)) - self.conv_block = nn.Sequential() + self.conv_block = nn.HybridSequential() with self.conv_block.name_scope(): self.conv_block.add(norm_layer(in_channels=inplanes)) self.conv_block.add(nn.Activation('relu')) @@ -89,7 +89,7 @@ def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=Insta channels=planes * self.expansion, kernel_size=1)) - def forward(self, x): + def hybrid_forward(self, F, x): if self.downsample is not None: residual = self.residual_layer(x) else: @@ -97,7 +97,7 @@ def forward(self, x): return residual + self.conv_block(x) -class UpBottleneck(Block): +class UpBottleneck(HybridBlock): """ Up-sample residual block (from MSG-Net paper) Enables passing identity all the way through the generator ref https://arxiv.org/abs/1703.06953 @@ -107,7 +107,7 @@ def __init__(self, inplanes, planes, stride=2, norm_layer=InstanceNorm): self.expansion = 4 self.residual_layer = UpsampleConvLayer(inplanes, planes * self.expansion, kernel_size=1, stride=1, upsample=stride) - self.conv_block = nn.Sequential() + self.conv_block = nn.HybridSequential() with self.conv_block.name_scope(): self.conv_block.add(norm_layer(in_channels=inplanes)) self.conv_block.add(nn.Activation('relu')) @@ -122,11 +122,11 @@ def __init__(self, inplanes, planes, stride=2, norm_layer=InstanceNorm): channels=planes * self.expansion, kernel_size=1)) - def forward(self, x): + def hybrid_forward(self, F, x): return self.residual_layer(x) + self.conv_block(x) -class ConvLayer(Block): +class ConvLayer(HybridBlock): def __init__(self, in_channels, out_channels, kernel_size, stride): super(ConvLayer, self).__init__() padding = int(np.floor(kernel_size / 2)) @@ -135,13 +135,13 @@ def __init__(self, in_channels, out_channels, kernel_size, stride): kernel_size=kernel_size, strides=(stride,stride), padding=0) - def forward(self, x): + def hybrid_forward(self, F, x): x = self.pad(x) out = self.conv2d(x) return out -class UpsampleConvLayer(Block): +class UpsampleConvLayer(HybridBlock): """UpsampleConvLayer Upsamples the input and then does a convolution. This method gives better results compared to ConvTranspose2d. @@ -152,41 +152,57 @@ def __init__(self, in_channels, out_channels, kernel_size, stride, upsample=None): super(UpsampleConvLayer, self).__init__() self.upsample = upsample - """ - if upsample: - self.upsample_layer = torch.nn.UpsamplingNearest2d(scale_factor=upsample) - """ self.reflection_padding = int(np.floor(kernel_size / 2)) self.conv2d = nn.Conv2D(in_channels=in_channels, channels=out_channels, kernel_size=kernel_size, strides=(stride,stride), padding=self.reflection_padding) - def forward(self, x): + def hybrid_forward(self, F, x): if self.upsample: x = F.UpSampling(x, scale=self.upsample, sample_type='nearest') - """ - if self.reflection_padding != 0: - x = self.reflection_pad(x) - """ out = self.conv2d(x) return out -def gram_matrix(y): - (b, ch, h, w) = y.shape - features = y.reshape((b, ch, w * h)) - #features_t = F.SwapAxis(features,1, 2) - gram = F.batch_dot(features, features, transpose_b=True) / (ch * h * w) - return gram +class gram_matrix(mx.operator.CustomOp): + def forward(self, is_train, req, in_data, out_data, aux): + x = in_data[0] + _, ch, h, w = x.shape + features = x.reshape((0, 0, -1)) + y = F.batch_dot(features, features, transpose_b=True) / (ch * h * w) + self.assign(out_data[0], req[0], y) + def backward(self, req, out_grad, in_data, out_data, in_grad, aux): + dy = out_grad[0] + x = in_data[0] + _, ch, h, w = x.shape + features = x.reshape((0, 0, -1)) + dx = F.batch_dot(dy, features) + F.batch_dot(dy, features, transpose_a=True) + dx = dx.view(0, 0, h, w) / (ch * h * w) + self.assign(in_grad[0], req[0], dx) + + +@mx.operator.register("gram_matrix") +class GramProp(mx.operator.CustomOpProp): + def list_arguments(self): + return ['data'] + + def infer_shape(self, in_shapes): + data_shape = in_shapes[0] + output_shape = (data_shape[0], data_shape[1], data_shape[1]) + return (data_shape, ), (output_shape,), () + + def create_operator(self, ctx, in_shapes, in_dtypes): + return gram_matrix() -class GramMatrix(Block): - def forward(self, x): - gram = gram_matrix(x) - return gram -class Net(Block): +class GramMatrix(HybridBlock): + def hybrid_forward(self, F, x): + return mx.nd.Custom(x, op_type='gram_matrix') + + +class Net(HybridBlock): def __init__(self, input_nc=3, output_nc=3, ngf=64, norm_layer=InstanceNorm, n_blocks=6, gpu_ids=[]): super(Net, self).__init__() @@ -198,9 +214,9 @@ def __init__(self, input_nc=3, output_nc=3, ngf=64, expansion = 4 with self.name_scope(): - self.model1 = nn.Sequential() + self.model1 = nn.HybridSequential() self.ins = Inspiration(ngf*expansion) - self.model = nn.Sequential() + self.model = nn.HybridSequential() self.model1.add(ConvLayer(input_nc, 64, kernel_size=7, stride=1)) self.model1.add(norm_layer(in_channels=64)) @@ -227,11 +243,43 @@ def set_target(self, Xs): G = self.gram(F) self.ins.set_target(G) - def forward(self, input): + def hybrid_forward(self, F, input): return self.model(input) -class Inspiration(Block): +class broadcast_like(mx.operator.CustomOp): + def forward(self, is_train, req, in_data, out_data, aux): + x = in_data[0] + z = in_data[1] + b, c, _, _ = z.shape + y = F.broadcast_to(x, (b, c, c)) + self.assign(out_data[0], req[0], y) + + def backward(self, req, out_grad, in_data, out_data, in_grad, aux): + dy = out_grad[0] + x = in_data[0] + z = in_data[1] + # hacky solution, only allow expanding at batch dims + dx = F.mean(dy, axis=0) + self.assign(in_grad[0], req[0], dx) + + +@mx.operator.register("broadcast_like") +class BroadcastLikeProp(mx.operator.CustomOpProp): + def list_arguments(self): + return ['data', 'target'] + + def infer_shape(self, in_shapes): + input_shape = in_shapes[1] + output_shape = (input_shape[0], input_shape[1], input_shape[1]) + # return 3 lists representing inputs shapes, outputs shapes, and aux data shapes. + return in_shapes, (output_shape,), () + + def create_operator(self, ctx, in_shapes, in_dtypes): + return broadcast_like() + + +class Inspiration(HybridBlock): """ Inspiration Layer (from MSG-Net paper) tuning the featuremap with target Gram Matrix ref https://arxiv.org/abs/1703.06953 @@ -240,6 +288,7 @@ def __init__(self, C, B=1): super(Inspiration, self).__init__() # B is equal to 1 or input mini_batch self.C = C + self.B = B self.weight = self.params.get('weight', shape=(1,C,C), init=mx.initializer.Uniform(), allow_deferred_init=True) @@ -248,17 +297,22 @@ def __init__(self, C, B=1): def set_target(self, target): self.gram = target - def forward(self, X): + def hybrid_forward(self, F, X, weight): # input X is a 3D feature map - self.P = F.batch_dot(F.broadcast_to(self.weight.data(), shape=(self.gram.shape)), self.gram) - return F.batch_dot(F.SwapAxis(self.P,1,2).broadcast_to((X.shape[0], self.C, self.C)), X.reshape((0,0,X.shape[2]*X.shape[3]))).reshape(X.shape) + P = F.batch_dot( + F.broadcast_to(weight, shape=(self.B, self.C, self.C)), self.gram) + P = F.SwapAxis(P,1,2) + return F.batch_dot( + #P.broadcast_to((X.shape[0], self.C, self.C)), + mx.nd.Custom(P, X, op_type='broadcast_like'), + X.reshape((0, 0, -1))).reshape_like(X) def __repr__(self): return self.__class__.__name__ + '(' \ + 'N x ' + str(self.C) + ')' -class Vgg16(Block): +class Vgg16(HybridBlock): def __init__(self): super(Vgg16, self).__init__() self.conv1_1 = nn.Conv2D(in_channels=3, channels=64, kernel_size=3, strides=1, padding=1) @@ -279,7 +333,7 @@ def __init__(self): self.conv5_2 = nn.Conv2D(in_channels=512, channels=512, kernel_size=3, strides=1, padding=1) self.conv5_3 = nn.Conv2D(in_channels=512, channels=512, kernel_size=3, strides=1, padding=1) - def forward(self, X): + def hybrid_forward(self, F, X): h = F.Activation(self.conv1_1(X), act_type='relu') h = F.Activation(self.conv1_2(h), act_type='relu') relu1_2 = h @@ -301,4 +355,4 @@ def forward(self, X): h = F.Activation(self.conv4_3(h), act_type='relu') relu4_3 = h - return [relu1_2, relu2_2, relu3_3, relu4_3] + return relu1_2, relu2_2, relu3_3, relu4_3