From ad6f972d8c93e41ff4626cd9ae047ccade0d90f4 Mon Sep 17 00:00:00 2001 From: Sergey Kolychev Date: Sat, 29 Apr 2017 22:03:52 -0700 Subject: [PATCH] Sync with python, Ftrl optimizer, new examples, bugfixes, tutorials, expanded/fixed docs, docker files for perl. --- docker/Dockerfiles/Dockerfile.in.perl | 8 + docker/README.md | 12 ++ docker/install/perl.sh | 4 + docker/run.sh | 2 +- docker/tool.sh | 2 +- docs/get_started/build_from_source.md | 25 +++ docs/get_started/index.md | 25 +++ docs/get_started/ubuntu_setup.md | 2 +- docs/tutorials/index.md | 4 + perl-package/AI-MXNet/Changes | 8 +- perl-package/AI-MXNet/MANIFEST | 2 + perl-package/AI-MXNet/META.json | 2 +- perl-package/AI-MXNet/META.yml | 2 +- perl-package/AI-MXNet/Makefile.PL | 2 +- perl-package/AI-MXNet/README | 2 +- perl-package/AI-MXNet/examples/calculator.pl | 138 +++++++++++++ perl-package/AI-MXNet/examples/char_lstm.pl | 7 +- perl-package/AI-MXNet/examples/mnist.pl | 184 ++++++++++++++++++ perl-package/AI-MXNet/lib/AI/MXNet.pm | 2 +- perl-package/AI-MXNet/lib/AI/MXNet/Base.pm | 2 +- .../AI-MXNet/lib/AI/MXNet/Callback.pm | 11 +- .../AI-MXNet/lib/AI/MXNet/Executor/Group.pm | 18 +- perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm | 9 +- .../AI-MXNet/lib/AI/MXNet/Optimizer.pm | 76 +++++++- perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm | 4 +- perl-package/AI-MXNet/t/test_module.t | 64 +++++- perl-package/AI-MXNet/t/test_optimizers.t | 90 ++++++++- perl-package/AI-MXNet/t/test_symbol.t | 4 +- perl-package/AI-MXNetCAPI/Changes | 3 + perl-package/AI-MXNetCAPI/META.json | 2 +- perl-package/AI-MXNetCAPI/META.yml | 2 +- perl-package/AI-MXNetCAPI/README | 2 +- perl-package/AI-MXNetCAPI/lib/AI/MXNetCAPI.pm | 2 +- 33 files changed, 681 insertions(+), 41 deletions(-) create mode 100644 docker/Dockerfiles/Dockerfile.in.perl create mode 100755 docker/install/perl.sh create mode 100755 perl-package/AI-MXNet/examples/calculator.pl create mode 100755 perl-package/AI-MXNet/examples/mnist.pl diff --git a/docker/Dockerfiles/Dockerfile.in.perl b/docker/Dockerfiles/Dockerfile.in.perl new file mode 100644 index 000000000000..14b4f0194d6e --- /dev/null +++ b/docker/Dockerfiles/Dockerfile.in.perl @@ -0,0 +1,8 @@ +# -*- mode: dockerfile -*- +# part of the dockerfile to install the perl binding + +COPY install/perl.sh install/ +RUN install/perl.sh && \ + cd /mxnet/perl-package/AI-MXNetCAPI/ && perl Makefile.PL && make install && \ + cd /mxnet/perl-package/AI-NNVMCAPI/ && perl Makefile.PL && make install && \ + cd /mxnet/perl-package/AI-MXNet/ && perl Makefile.PL && make install diff --git a/docker/README.md b/docker/README.md index 95fa668e97d7..971a2de7894c 100644 --- a/docker/README.md +++ b/docker/README.md @@ -90,6 +90,18 @@ Available tags: - mxnet/scala +### Perl + +Hosted at https://hub.docker.com/r/mxnet/perl/ + +Perl version: 5.18.2 + +Available tags: + +- mxnet/perl +- mxnet/perl:gpu + + ## How to build The following command build the default Python package diff --git a/docker/install/perl.sh b/docker/install/perl.sh new file mode 100755 index 000000000000..da4df67a464a --- /dev/null +++ b/docker/install/perl.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +# install libraries for mxnet's perl package on ubuntu +apt-get update && apt-get install -y libmouse-perl pdl cpanminus swig libgraphviz-perl +cpanm -q Function::Parameters diff --git a/docker/run.sh b/docker/run.sh index 0037ab1926d7..b13e13caa5fc 100644 --- a/docker/run.sh +++ b/docker/run.sh @@ -2,7 +2,7 @@ # Build and push all docker containers DEVICES=('cpu' 'gpu') -LANGUAGES=('python' 'julia' 'r-lang' 'scala') +LANGUAGES=('python' 'julia' 'r-lang' 'scala' 'perl') for DEV in "${DEVICES[@]}"; do for LANG in "${LANGUAGES[@]}"; do ./tool.sh build ${LANG} ${DEV} diff --git a/docker/tool.sh b/docker/tool.sh index 64a7e16513aa..222d428fb68b 100755 --- a/docker/tool.sh +++ b/docker/tool.sh @@ -10,7 +10,7 @@ function show_usage() { echo "" echo " COMMAND: build or commit." echo " commit needs logined in docker hub" - echo " LANGUAGE: the language binding to buld, e.g. python, r-lang, julia, or scala" + echo " LANGUAGE: the language binding to buld, e.g. python, r-lang, julia, scala or perl" echo " DEVICE: targed device, e.g. cpu, or gpu" echo "" } diff --git a/docs/get_started/build_from_source.md b/docs/get_started/build_from_source.md index 84d42b168706..0b468d58094b 100644 --- a/docs/get_started/build_from_source.md +++ b/docs/get_started/build_from_source.md @@ -462,3 +462,28 @@ Install the Julia package for MXNet with: ```bash julia -e 'Pkg.add("MXNet")' ``` + +### Build the Perl package + +Run the following command from the MXNet source root directory to build the MXNet Perl package: + +```bash + sudo apt-get install libmouse-perl pdl cpanminus swig libgraphviz-perl + cpanm -q -L "${HOME}/perl5" Function::Parameters + + MXNET_HOME=${PWD} + export LD_LIBRARY_PATH=${MXNET_HOME}/lib + export PERL5LIB=${HOME}/perl5/lib/perl5 + + cd ${MXNET_HOME}/perl-package/AI-MXNetCAPI/ + perl Makefile.PL INSTALL_BASE=${HOME}/perl5 + make install + + cd ${MXNET_HOME}/perl-package/AI-NNVMCAPI/ + perl Makefile.PL INSTALL_BASE=${HOME}/perl5 + make install + + cd ${MXNET_HOME}/perl-package/AI-MXNet/ + perl Makefile.PL INSTALL_BASE=${HOME}/perl5 + make install +``` diff --git a/docs/get_started/index.md b/docs/get_started/index.md index 844d4a121ca6..4b07ef5f915e 100644 --- a/docs/get_started/index.md +++ b/docs/get_started/index.md @@ -215,6 +215,18 @@ array([[ 3., 3., 3.], [ 3., 3., 3.]], dtype=float32) ``` +```perl +pdl> use AI::MXNet qw(mx) +pdl> $a = mx->sym->var('a') +pdl> $b = $a * 2 + 1 +pdl> $c = $b->eval(args => { a => mx->nd->ones([2,3]) }) +pdl> print @{$c}[0]->aspdl +[ + [3 3 3] + [3 3 3] +] +``` + Run the above codes in GPU in straightforward: ```python @@ -230,6 +242,9 @@ Run the above codes in GPU in straightforward: julia> a = mx.ones((2,3), mx.gpu()) ``` +```perl +pdl> $a = mx->nd->ones([2,3], ctx => mx->gpu()) +``` In additional, MXNet provides a large number of neural network layers and training modules to facilitate developing deep learning algorithms. @@ -243,6 +258,16 @@ training modules to facilitate developing deep learning algorithms. >>> mod.fit(train_data, ctx=[mx.gpu(0), mx.gpu(1)]) # fit on the training data by using 2 GPUs ``` +```perl +pdl> $data = mx->sym->var('data') +pdl> $fc1 = mx->sym->FullyConnected($data, num_hidden=>128) +pdl> $act1 = mx->sym.Activation($fc1, act_type=>"relu") +pdl> $fc2 = mx->sym->FullyConnected($act1, num_hidden=>10) +pdl> $loss = mx->sym->SoftmaxOutput($fc2) +pdl> $mod = mx->mod->Module($loss) +pdl> $mod->fit($train_data, ctx=>[mx->gpu(0), mx->gpu(1)]) # fit on the training data by using 2 GPUs +``` + ## Next Steps * [Tutorials](http://mxnet.io/tutorials/index.html) diff --git a/docs/get_started/ubuntu_setup.md b/docs/get_started/ubuntu_setup.md index af4d75a3de64..5021b95c4133 100644 --- a/docs/get_started/ubuntu_setup.md +++ b/docs/get_started/ubuntu_setup.md @@ -286,7 +286,7 @@ To install the MXNet Scala package into your local Maven repository, run the fol ``` ### Install the MXNet Package for Perl -Before you build MXNet for Scala from source code, you must complete [building the shared library](#build-the-shared-library). After you build the shared library, run the following command from the MXNet source root directory to build the MXNet Scala package: +Before you build MXNet for Perl from source code, you must complete [building the shared library](#build-the-shared-library). After you build the shared library, run the following command from the MXNet source root directory to build the MXNet Perl package: ```bash sudo apt-get install libmouse-perl pdl cpanminus swig libgraphviz-perl diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md index db853e1edc55..4015873b969f 100644 --- a/docs/tutorials/index.md +++ b/docs/tutorials/index.md @@ -80,6 +80,10 @@ These tutorials introduce fundamental concepts in deep learning and their realiz - [Basics](http://mxnet.io/tutorials/c++/basics.html) +### Perl + +- [Calculator, handwritten digits and roboshakespreare](http://blogs.perl.org/users/sergey_kolychev/2017/04/machine-learning-in-perl-part2-a-calculator-handwritten-digits-and-roboshakespeare.html) + ## Contributing Tutorials Want to contribute an MXNet tutorial? To get started, download the [tutorial template](https://github.com/dmlc/mxnet/tree/master/example/MXNetTutorialTemplate.ipynb). diff --git a/perl-package/AI-MXNet/Changes b/perl-package/AI-MXNet/Changes index 755ed9393ad0..00248b3b889a 100644 --- a/perl-package/AI-MXNet/Changes +++ b/perl-package/AI-MXNet/Changes @@ -1,6 +1,12 @@ Revision history for Perl extension AI::MXNet -0.9504 18:59:45 PDT 2017 +0.9506 Sat Apr 29 20:26:50 PDT 2017 + - Ftrl optimizer, new tests, bugfixes. + +0.9505 Sun Apr 23 21:26:04 PDT 2017 + - Perplexity bugfix, two new examples. + +0.9504 Wed Apr 19 18:59:45 PDT 2017 - LR Scheduler bugfix. 0.9503 Wed Apr 19 13:33:57 PDT 2017 diff --git a/perl-package/AI-MXNet/MANIFEST b/perl-package/AI-MXNet/MANIFEST index 9699293c8b21..bf3a620408dc 100644 --- a/perl-package/AI-MXNet/MANIFEST +++ b/perl-package/AI-MXNet/MANIFEST @@ -1,9 +1,11 @@ META.yml MANIFEST +examples/calculator.pl examples/plot_network.pl examples/char_lstm.pl examples/get_ptb_data.sh examples/lstm_bucketing.pl +examples/mnist.pl examples/cudnn_lstm_bucketing.pl Makefile.PL Changes diff --git a/perl-package/AI-MXNet/META.json b/perl-package/AI-MXNet/META.json index a948f76041d4..68afafd9e4e8 100644 --- a/perl-package/AI-MXNet/META.json +++ b/perl-package/AI-MXNet/META.json @@ -43,5 +43,5 @@ } }, "release_status" : "stable", - "version" : "0.9504" + "version" : "0.9506" } diff --git a/perl-package/AI-MXNet/META.yml b/perl-package/AI-MXNet/META.yml index 0e2fe47b4f1c..2f84c00166c9 100644 --- a/perl-package/AI-MXNet/META.yml +++ b/perl-package/AI-MXNet/META.yml @@ -23,4 +23,4 @@ requires: GraphViz: '2.14' Mouse: v2.1.0 PDL: '2.007' -version: '0.9504' +version: '0.9506' diff --git a/perl-package/AI-MXNet/Makefile.PL b/perl-package/AI-MXNet/Makefile.PL index 38ab001c8e2a..4f6826fffcd9 100644 --- a/perl-package/AI-MXNet/Makefile.PL +++ b/perl-package/AI-MXNet/Makefile.PL @@ -27,7 +27,7 @@ my %WriteMakefileArgs = ( "GraphViz" => "2.14" }, "TEST_REQUIRES" => {}, - "VERSION" => "0.9504", + "VERSION" => "0.9506", "test" => { "TESTS" => "t/*.t" } diff --git a/perl-package/AI-MXNet/README b/perl-package/AI-MXNet/README index 7dfa2988803f..d3c68330a4c1 100644 --- a/perl-package/AI-MXNet/README +++ b/perl-package/AI-MXNet/README @@ -1,5 +1,5 @@ This archive contains the distribution AI-MXNet, -version 0.9504: +version 0.9506: Perl interface to MXNet machine learning library diff --git a/perl-package/AI-MXNet/examples/calculator.pl b/perl-package/AI-MXNet/examples/calculator.pl new file mode 100755 index 000000000000..f41895508450 --- /dev/null +++ b/perl-package/AI-MXNet/examples/calculator.pl @@ -0,0 +1,138 @@ +#!/usr/bin/perl +use strict; +use warnings; +use AI::MXNet ('mx'); + +## preparing the samples +## to train our network +sub samples { + my($batch_size, $func) = @_; + # get samples + my $n = 16384; + ## creates a pdl with $n rows and two columns with random + ## floats in the range between 0 and 1 + my $data = PDL->random(2, $n); + ## creates the pdl with $n rows and one column with labels + ## labels are floats that either sum or product, etc of + ## two random values in each corresponding row of the data pdl + my $label = $func->($data->slice('0,:'), $data->slice('1,:')); + # partition into train/eval sets + my $edge = int($n / 8); + my $validation_data = $data->slice(":,0:@{[ $edge - 1 ]}"); + my $validation_label = $label->slice(":,0:@{[ $edge - 1 ]}"); + my $train_data = $data->slice(":,$edge:"); + my $train_label = $label->slice(":,$edge:"); + # build iterators around the sets + return(mx->io->NDArrayIter( + batch_size => $batch_size, + data => $train_data, + label => $train_label, + ), mx->io->NDArrayIter( + batch_size => $batch_size, + data => $validation_data, + label => $validation_label, + )); +} + +## the network model +sub nn_fc { + my $data = mx->sym->Variable('data'); + my $ln = mx->sym->exp(mx->sym->FullyConnected( + data => mx->sym->log($data), + num_hidden => 1, + )); + my $wide = mx->sym->Concat($data, $ln); + my $fc = mx->sym->FullyConnected( + $wide, + num_hidden => 1 + ); + return mx->sym->MAERegressionOutput(data => $fc, name => 'softmax'); +} + +sub learn_function { + my(%args) = @_; + my $func = $args{func}; + my $batch_size = $args{batch_size}//128; + my($train_iter, $eval_iter) = samples($batch_size, $func); + my $sym = nn_fc(); + + ## call as ./calculator.pl 1 to just print model and exit + if($ARGV[0]) { + my @dsz = @{$train_iter->data->[0][1]->shape}; + my @lsz = @{$train_iter->label->[0][1]->shape}; + my $shape = { + data => [ $batch_size, splice @dsz, 1 ], + softmax_label => [ $batch_size, splice @lsz, 1 ], + }; + print mx->viz->plot_network($sym, shape => $shape)->graph->as_png; + exit; + } + + my $model = mx->mod->Module( + symbol => $sym, + context => mx->cpu(), + ); + $model->fit($train_iter, + eval_data => $eval_iter, + optimizer => 'adam', + optimizer_params => { + learning_rate => $args{lr}//0.01, + rescale_grad => 1/$batch_size, + lr_scheduler => AI::MXNet::FactorScheduler->new( + step => 100, + factor => 0.99 + ) + }, + eval_metric => 'mse', + num_epoch => $args{epoch}//25, + ); + + # refit the model for calling on 1 sample at a time + my $iter = mx->io->NDArrayIter( + batch_size => 1, + data => PDL->pdl([[ 0, 0 ]]), + label => PDL->pdl([[ 0 ]]), + ); + $model->reshape( + data_shapes => $iter->provide_data, + label_shapes => $iter->provide_label, + ); + + # wrap a helper around making predictions + my ($arg_params) = $model->get_params; + for my $k (sort keys %$arg_params) + { + print "$k -> ". $arg_params->{$k}->aspdl."\n"; + } + return sub { + my($n, $m) = @_; + return $model->predict(mx->io->NDArrayIter( + batch_size => 1, + data => PDL->new([[ $n, $m ]]), + ))->aspdl->list; + }; +} + +my $add = learn_function(func => sub { + my($n, $m) = @_; + return $n + $m; +}); +my $sub = learn_function(func => sub { + my($n, $m) = @_; + return $n - $m; +}, batch_size => 50, epoch => 40); +my $mul = learn_function(func => sub { + my($n, $m) = @_; + return $n * $m; +}, batch_size => 50, epoch => 40); +my $div = learn_function(func => sub { + my($n, $m) = @_; + return $n / $m; +}, batch_size => 10, epoch => 80); + + +print "12345 + 54321 ≈ ", $add->(12345, 54321), "\n"; +print "188 - 88 ≈ ", $sub->(188, 88), "\n"; +print "250 * 2 ≈ ", $mul->(250, 2), "\n"; +print "250 / 2 ≈ ", $div->(250, 2), "\n"; + diff --git a/perl-package/AI-MXNet/examples/char_lstm.pl b/perl-package/AI-MXNet/examples/char_lstm.pl index 0e33a0d82445..1b69ee1e93c6 100755 --- a/perl-package/AI-MXNet/examples/char_lstm.pl +++ b/perl-package/AI-MXNet/examples/char_lstm.pl @@ -15,7 +15,7 @@ 'gpus=s' => \(my $gpus ), 'kv-store=s' => \(my $kv_store = 'device'), 'num-epoch=i' => \(my $num_epoch = 25 ), - 'lr=f' => \(my $lr = 0.01 ), + 'lr=f' => \(my $lr = 0.001 ), 'optimizer=s' => \(my $optimizer = 'adam' ), 'mom=f' => \(my $mom = 0 ), 'wd=f' => \(my $wd = 0.00001 ), @@ -208,8 +208,9 @@ package main; learning_rate => $lr, momentum => $mom, wd => $wd, - clip_gradient => 1, - rescale_grad => 1/$batch_size + clip_gradient => 5, + rescale_grad => 1/$batch_size, + lr_scheduler => AI::MXNet::FactorScheduler->new(step => 1000, factor => 0.99) }, initializer => mx->init->Xavier(factor_type => "in", magnitude => 2.34), num_epoch => $num_epoch, diff --git a/perl-package/AI-MXNet/examples/mnist.pl b/perl-package/AI-MXNet/examples/mnist.pl new file mode 100755 index 000000000000..891b5348039c --- /dev/null +++ b/perl-package/AI-MXNet/examples/mnist.pl @@ -0,0 +1,184 @@ +#!/usr/bin/perl +use strict; +use warnings; +# derived from http://mxnet.io/tutorials/python/mnist.html +use LWP::UserAgent (); +use PDL (); +#use Gtk2 '-init'; +use AI::MXNet ('mx'); + +my $ua = LWP::UserAgent->new(); + +sub download_data { + my($url, $force_download) = @_; + $force_download = 1 if @_ < 2; + my $fname = (split m{/}, $url)[-1]; + if($force_download or not -f $fname) { + $ua->get($url, ':content_file' => $fname); + } + return $fname; +} + +sub read_data { + my($label_url, $image_url) = @_; + my($magic, $num, $rows, $cols); + + open my($flbl), '<:gzip', download_data($label_url); + read $flbl, my($buf), 8; + ($magic, $num) = unpack 'N2', $buf; + my $label = PDL->new(); + $label->set_datatype($PDL::Types::PDL_B); + $label->setdims([ $num ]); + read $flbl, ${$label->get_dataref}, $num; + $label->upd_data(); + + open my($fimg), '<:gzip', download_data($image_url); + read $fimg, $buf, 16; + ($magic, $num, $rows, $cols) = unpack 'N4', $buf; + my $image = PDL->new(); + $image->set_datatype($PDL::Types::PDL_B); + $image->setdims([ $rows, $cols, $num ]); + read $fimg, ${$image->get_dataref}, $num * $rows * $cols; + $image->upd_data(); + + return($label, $image); +} + +my $path='http://yann.lecun.com/exdb/mnist/'; +my($train_lbl, $train_img) = read_data( + "${path}train-labels-idx1-ubyte.gz", "${path}train-images-idx3-ubyte.gz"); +my($val_lbl, $val_img) = read_data( + "${path}t10k-labels-idx1-ubyte.gz", "${path}t10k-images-idx3-ubyte.gz"); + +sub show_sample { + print 'label: ', $train_lbl->slice('0:9'), "\n"; + my $hbox = Gtk2::HBox->new(0, 2); + for my $i (0 .. 9) { + my $img = $train_img->slice(":,:,$i"); + my($w, $h) = $img->dims; + $img->make_physical(); + # ugh, pixbufs don't have a grayscale colorspace?! + # burst it to rgb I guess. + my $data = pack 'c*', map { $_, $_, $_ } unpack 'c*', ${$img->get_dataref}; + $hbox->add(Gtk2::Image->new_from_pixbuf( + Gtk2::Gdk::Pixbuf->new_from_data($data, 'rgb', 0, 8, $w, $h, $w * 3) + )); + } + my $win = Gtk2::Window->new('toplevel'); + $win->signal_connect(delete_event => sub { Gtk2->main_quit() }); + $win->add($hbox); + $win->show_all(); + Gtk2->main(); +} + +sub show_network { + my($viz) = @_; + my $load = Gtk2::Gdk::PixbufLoader->new(); + $load->write($viz->graph->as_png); + $load->close(); + my $img = Gtk2::Image->new_from_pixbuf($load->get_pixbuf()); + my $sw = Gtk2::ScrolledWindow->new(undef, undef); + $sw->add_with_viewport($img); + my $win = Gtk2::Window->new('toplevel'); + $win->signal_connect(delete_event => sub { Gtk2->main_quit() }); + $win->add($sw); + $win->show_all(); + Gtk2->main(); +} + +#show_sample(); + +sub to4d { + my($img) = @_; + return $img->reshape(28, 28, 1, ($img->dims)[2])->float / 255; +} + +my $batch_size = 100; +my $train_iter = mx->io->NDArrayIter( + data => to4d($train_img), + label => $train_lbl, + batch_size => $batch_size, + shuffle => 1, +); +my $val_iter = mx->io->NDArrayIter( + data => to4d($val_img), + label => $val_lbl, + batch_size => $batch_size, +); + +# Create a place holder variable for the input data +my $data = mx->sym->Variable('data'); + +sub nn_fc { + # Epoch[9] Train-accuracy=0.978889 + # Epoch[9] Time cost=145.437 + # Epoch[9] Validation-accuracy=0.964600 + my($data) = @_; + + # Flatten the data from 4-D shape (batch_size, num_channel, width, height) + # into 2-D (batch_size, num_channel*width*height) + $data = mx->sym->Flatten(data => $data); + + # The first fully-connected layer +# my $fc1 = mx->sym->FullyConnected(data => $data, name => 'fc1', num_hidden => 128); +# # Apply relu to the output of the first fully-connnected layer +# my $act1 = mx->sym->Activation(data => $fc1, name => 'relu1', act_type => "relu"); + + # The second fully-connected layer and the according activation function + my $fc2 = mx->sym->FullyConnected(data => $data, name => 'fc2', num_hidden => 64); + my $act2 = mx->sym->Activation(data => $fc2, name => 'relu2', act_type => "relu"); + + # The thrid fully-connected layer, note that the hidden size should be 10, which is the number of unique digits + my $fc3 = mx->sym->FullyConnected(data => $act2, name => 'fc3', num_hidden => 10); + # The softmax and loss layer + my $mlp = mx->sym->SoftmaxOutput(data => $fc3, name => 'softmax'); + return $mlp; +} + +sub nn_conv { + my($data) = @_; + # Epoch[9] Batch [200] Speed: 1625.07 samples/sec Train-accuracy=0.992090 + # Epoch[9] Batch [400] Speed: 1630.12 samples/sec Train-accuracy=0.992850 + # Epoch[9] Train-accuracy=0.991357 + # Epoch[9] Time cost=36.817 + # Epoch[9] Validation-accuracy=0.988100 + + my $conv1= mx->symbol->Convolution(data => $data, name => 'conv1', num_filter => 20, kernel => [5,5], stride => [2,2]); + my $bn1 = mx->symbol->BatchNorm(data => $conv1, name => "bn1"); + my $act1 = mx->symbol->Activation(data => $bn1, name => 'relu1', act_type => "relu"); + my $mp1 = mx->symbol->Pooling(data => $act1, name => 'mp1', kernel => [2,2], stride =>[1,1], pool_type=>'max'); + + my $conv2= mx->symbol->Convolution(data => $mp1, name => 'conv2', num_filter => 50, kernel=>[3,3], stride=>[2,2]); + my $bn2 = mx->symbol->BatchNorm(data => $conv2, name=>"bn2"); + my $act2 = mx->symbol->Activation(data => $bn2, name=>'relu2', act_type=>"relu"); + my $mp2 = mx->symbol->Pooling(data => $act2, name => 'mp2', kernel=>[2,2], stride=>[1,1], pool_type=>'max'); + + + my $fl = mx->symbol->Flatten(data => $mp2, name=>"flatten"); + my $fc1 = mx->symbol->FullyConnected(data => $fl, name=>"fc1", num_hidden=>100); + my $act3 = mx->symbol->Activation(data => $fc1, name=>'relu3', act_type=>"relu"); + my $fc2 = mx->symbol->FullyConnected(data => $act3, name=>'fc2', num_hidden=>30); + my $act4 = mx->symbol->Activation(data => $fc2, name=>'relu4', act_type=>"relu"); + my $fc3 = mx->symbol->FullyConnected(data => $act4, name=>'fc3', num_hidden=>10); + my $softmax = mx->symbol->SoftmaxOutput(data => $fc3, name => 'softmax'); + return $softmax; +} + +my $mlp = $ARGV[0] ? nn_conv($data) : nn_fc($data); + +#We visualize the network structure with output size (the batch_size is ignored.) +#my $shape = { data => [ $batch_size, 1, 28, 28 ] }; +#show_network(mx->viz->plot_network($mlp, shape => $shape)); + +my $model = mx->mod->Module( + symbol => $mlp, # network structure +); +$model->fit( + $train_iter, # training data + num_epoch => 10, # number of data passes for training + eval_data => $val_iter, # validation data + batch_end_callback => mx->callback->Speedometer($batch_size, 200), # output progress for each 200 data batches + optimizer => 'adam', +); + + diff --git a/perl-package/AI-MXNet/lib/AI/MXNet.pm b/perl-package/AI-MXNet/lib/AI/MXNet.pm index 2cdd9ae7bf8b..45adfb4389fe 100644 --- a/perl-package/AI-MXNet/lib/AI/MXNet.pm +++ b/perl-package/AI-MXNet/lib/AI/MXNet.pm @@ -27,7 +27,7 @@ use AI::MXNet::Visualization; use AI::MXNet::RecordIO; use AI::MXNet::Image; use AI::MXNet::Contrib; -our $VERSION = '0.9504'; +our $VERSION = '0.9506'; sub import { diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Base.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Base.pm index 8fd6f323d1ef..5c6dc84f5668 100644 --- a/perl-package/AI-MXNet/lib/AI/MXNet/Base.pm +++ b/perl-package/AI-MXNet/lib/AI/MXNet/Base.pm @@ -3,7 +3,7 @@ use strict; use warnings; use PDL; use PDL::Types qw(); -use AI::MXNetCAPI 0.9504; +use AI::MXNetCAPI 0.9506; use AI::NNVMCAPI 0.95; use AI::MXNet::Types; use Time::HiRes; diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Callback.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Callback.pm index 8076fd4b22ca..04aaea06c47d 100644 --- a/perl-package/AI-MXNet/lib/AI/MXNet/Callback.pm +++ b/perl-package/AI-MXNet/lib/AI/MXNet/Callback.pm @@ -106,6 +106,8 @@ extends 'AI::MXNet::Callback'; frequent: int How many batches between calculations. Defaults to calculating & logging every 50 batches. + auto_reset: Bool + Reset the metric after each log, defaults to true. =cut has 'batch_size' => (is => 'ro', isa => 'Int', required => 1); @@ -113,6 +115,7 @@ has 'frequent' => (is => 'ro', isa => 'Int', default => 50); has 'init' => (is => 'rw', isa => 'Int', default => 0); has 'tic' => (is => 'rw', isa => 'Num', default => 0); has 'last_count' => (is => 'rw', isa => 'Int', default => 0); +has 'auto_reset' => (is => 'ro', isa => 'Bool', default => 1); method call(AI::MXNet::BatchEndParam $param) { @@ -131,7 +134,7 @@ method call(AI::MXNet::BatchEndParam $param) if(defined $param->eval_metric) { my $name_value = $param->eval_metric->get_name_value; - $param->eval_metric->reset; + $param->eval_metric->reset if $self->auto_reset; while(my ($name, $value) = each %{ $name_value }) { AI::MXNet::Logging->info( @@ -221,7 +224,11 @@ package AI::MXNet::Callback; method Speedometer(@args) { AI::MXNet::Speedometer->new( - @args == 2 ? (batch_size => $args[0], frequent => $args[1]) : (batch_size => $args[0]) + @args == 3 ? + (batch_size => $args[0], frequent => $args[1], auto_reset => $args[2]) + : @args == 2 ? + (batch_size => $args[0], frequent => $args[1]) + : (batch_size => $args[0]) ) } diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Executor/Group.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Executor/Group.pm index 752ecb382a3c..33e54dc1e847 100644 --- a/perl-package/AI-MXNet/lib/AI/MXNet/Executor/Group.pm +++ b/perl-package/AI-MXNet/lib/AI/MXNet/Executor/Group.pm @@ -403,22 +403,20 @@ method _collect_arrays() } } } - my %data_names = map { $_->name => 1 } @{ $self->data_shapes }; + my @data_names = map { $_->name } @{ $self->data_shapes }; + my $j = 0; my %arg_names = map { $_ => $j++ } @{ $self->_p->arg_names }; if($self->inputs_need_grad) { $self->_p->input_grad_arrays([]); - for my $i (0..@{ $self->_p->arg_names }-1) + for my $name (@data_names) { - my $name = $self->_p->arg_names->[$i]; - if(exists $data_names{$name}) + next unless exists $arg_names{$name}; + my @tmp; + for my $exec (@{ $self->_p->execs }) { - my @tmp; - for my $exec (@{ $self->_p->execs }) - { - push @tmp, $exec->grad_arrays->[$i]; - } - push @{ $self->_p->input_grad_arrays }, \@tmp; + push @tmp, $exec->grad_arrays->[$arg_names{$name}]; } + push @{ $self->_p->input_grad_arrays }, \@tmp; } } $self->_p->aux_arrays([]); diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm index 9c97b58d9049..fbb93b006a6f 100644 --- a/perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm +++ b/perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm @@ -378,8 +378,13 @@ method update(ArrayRef[AI::MXNet::NDArray] $labels, ArrayRef[AI::MXNet::NDArray] $loss -= $pred->maximum(1e-10)->log->sum->asscalar; $num += $pred->size; }, $labels, $preds); - $self->sum_metric($self->sum_metric + exp($loss/$num)); - $self->num_inst($self->num_inst + 1); + $self->sum_metric($self->sum_metric + $loss); + $self->num_inst($self->num_inst + $num); +} + +method get() +{ + return ($self->name, exp($self->sum_metric / $self->num_inst)); } #################### diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Optimizer.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Optimizer.pm index 07c29ffc1c03..be48a19d5624 100644 --- a/perl-package/AI-MXNet/lib/AI/MXNet/Optimizer.pm +++ b/perl-package/AI-MXNet/lib/AI/MXNet/Optimizer.pm @@ -25,8 +25,10 @@ method get_opt_registry() method register() { - my $name = lc $self; + my $name = $self; ($name) = $name =~ /::(\w+)$/; + { no strict 'refs'; *{__PACKAGE__."::$name"} = sub { $self }; } + $name = lc $name; if(exists $opt_registry{ $name }) { my $existing = $opt_registry{ $name }; @@ -1007,6 +1009,78 @@ method update( __PACKAGE__->register; +package AI::MXNet::Ftrl; + +=head1 NAME + + AI::MXNet::Ftrl +=cut + +=head1 DESCRIPTION + + Reference:Ad Click Prediction: a View from the Trenches + + Parameters + ---------- + lamda1 : float, optional + L1 regularization coefficient. + + learning_rate : float, optional + The initial learning rate. + + beta : float, optional + Per-coordinate learning rate correlation parameter. + eta_{t,i}=frac{learning_rate}{beta+sqrt{sum_{s=1^}tg_{s,i}^t} +=cut + +use Mouse; +extends 'AI::MXNet::Optimizer'; +has '+learning_rate' => (default => 0.1); +has 'beta' => (is => "ro", isa => "Num", default => 1); +has 'lambda1' => (is => "ro", isa => "Num", default => 0.9); + +method create_state(Index $index, AI::MXNet::NDArray $weight) +{ + return [ + AI::MXNet::NDArray->zeros( + $weight->shape, + ctx => $weight->context + ), # dn + AI::MXNet::NDArray->zeros( + $weight->shape, + ctx => $weight->context + ) # n + ]; +} + +method update( + Index $index, + AI::MXNet::NDArray $weight, + AI::MXNet::NDArray $grad, + ArrayRef[AI::MXNet::NDArray] $state +) +{ + $self->_update_count($index); + my $wd = $self->_get_wd($index); + my $lr = $self->_get_lr($index); + $grad *= $self->rescale_grad; + if($self->clip_gradient) + { + $grad = AI::MXNet::NDArray->clip( + $grad, + -$self->clip_gradient, + $self->clip_gradient + ); + } + my ($dn, $n) = @{ $state }; + $dn += $grad - (($n + $grad * $grad)->sqrt - $n->sqrt) * $weight / $lr; + $n += $grad * $grad; + + $weight .= ($dn->sign * $self->lamda1 - $dn) + / + (($self->beta + $n->sqrtn) / $lr + $wd) * ($dn->abs > $self->lamda1); +} + # updater for kvstore package AI::MXNet::Updater; use Mouse; diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm index c9fda227f508..f4f5f0de3efb 100644 --- a/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm +++ b/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm @@ -1127,7 +1127,8 @@ method Variable( Maybe[Num] :$wd_mult=, Maybe[Dtype] :$dtype=, Maybe[AI::MXNet::Initializer] :$init=, - HashRef[Str] :$kwargs={} + HashRef[Str] :$kwargs={}, + Maybe[Str] :$__layout__= ) { my $handle = check_call(AI::MXNetCAPI::SymbolCreateVariable($name)); @@ -1138,6 +1139,7 @@ method Variable( $attr->{__wd_mult__} = $wd_mult if defined $wd_mult; $attr->{__dtype__} = DTYPE_STR_TO_MX->{ $dtype } if $dtype; $attr->{__init__} = "$init" if defined $init; + $attr->{__layout__} = $__layout__ if defined $__layout__; while(my ($k, $v) = each %{ $kwargs }) { if($k =~ /^__/ and $k =~ /__$/) diff --git a/perl-package/AI-MXNet/t/test_module.t b/perl-package/AI-MXNet/t/test_module.t index 8a761f7045ad..89228c949aa8 100644 --- a/perl-package/AI-MXNet/t/test_module.t +++ b/perl-package/AI-MXNet/t/test_module.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 19; +use Test::More tests => 23; use AI::MXNet qw(mx); use AI::MXNet::Base; use AI::MXNet::TestUtils qw(almost_equal enumerate); @@ -272,6 +272,68 @@ sub test_monitor is_deeply($mon_result_counts, [2, 2, 1, 6, 6, 4]); } +sub test_module_dtype +{ + my $dtype = 'float16'; + my $dshape = [3, 8, 7]; + + my $sym = mx->sym->Variable('data'); + $sym = mx->sym->Activation(data=>$sym, act_type=>'relu', __layout__=>'TNC'); + + my $mod = mx->mod->Module($sym, data_names=>['data'], context => [mx->cpu(0), mx->cpu(1)]); + $mod->bind(data_shapes=>[ + mx->io->DataDesc('data', $dshape, dtype => $dtype, layout=>'TNC') + ]); + $mod->init_params(); + $mod->forward( + mx->io->DataBatch( + data=>[mx->nd->ones($dshape, dtype=>$dtype)] + ) + ); + $mod->backward([mx->nd->ones($dshape, dtype=>$dtype)]); + + for my $x (@{ $mod->get_outputs() }) + { + is($x->dtype, $dtype); + } +} + +sub test_module_input_grads +{ + my $a = mx->sym->Variable('a', __layout__=>'NC'); + my $b = mx->sym->Variable('b', __layout__=>'NC'); + my $c = mx->sym->Variable('c', __layout__=>'NC'); + + $c = $a + 2 * $b + 3 * $c; + my $net = mx->mod->Module( + $c, data_names=>['b', 'c', 'a'], + context=>[mx->cpu(0), mx->cpu(1)] + ); + $net->bind( + data_shapes => [['b', [5, 5]], ['c', [5, 5]], ['a', [5, 5]]], + inputs_need_grad => 1 + ); + $net->init_params(); + + $net->forward( + mx->io->DataBatch(data => [ + mx->nd->ones([5, 5]), + mx->nd->ones([5, 5]), + mx->nd->ones([5, 5]) + ]) + ); + $net->backward([mx->nd->ones([5, 5])]); + my $input_grads = $net->get_input_grads(); + my $b_grad = $input_grads->[0]->aspdl; + my $c_grad = $input_grads->[1]->aspdl; + my $a_grad = $input_grads->[2]->aspdl; + ok(($a_grad == 1)->all); + ok(($b_grad == 2)->all); + ok(($c_grad == 3)->all); +} + +test_module_input_grads(); +test_module_dtype(); test_monitor(); test_module_switch_bucket(); test_module_layout(); diff --git a/perl-package/AI-MXNet/t/test_optimizers.t b/perl-package/AI-MXNet/t/test_optimizers.t index 1248b166d062..e0027b901cf8 100644 --- a/perl-package/AI-MXNet/t/test_optimizers.t +++ b/perl-package/AI-MXNet/t/test_optimizers.t @@ -186,8 +186,58 @@ method update($index, $weight, $grad, $state) } } +package PerlSGD; +# perl reference implemenation of sgd +use Mouse; +extends 'AI::MXNet::Optimizer'; +has '+learning_rate' => (default => 0.01); +has 'momentum' => (is => "ro", isa => "Num", default => 0); + +# Create additional optimizer state: momentum +method create_state(Index $index, AI::MXNet::NDArray $weight) +{ + return undef if $self->momentum == 0; + return mx->nd->zeros($weight->shape, ctx => $weight->context, dtype => $weight->dtype); +} + +method update($index, $weight, $grad, $state) +{ + my $lr = $self->_get_lr($index); + my $wd = $self->_get_wd($index); + $self->_update_count($index); + if($self->momentum == 0) + { + if(defined $self->clip_gradient) + { + $weight .= ((1 - $lr*$wd)*$weight - + $lr*mx->nd->clip($grad*$self->rescale_grad, -$self->clip_gradient, $self->clip_gradient) + ); + } + else + { + $weight .= (1 - $lr*$wd)*$weight - $lr*$self->rescale_grad*$grad; + } + } + else + { + my $mom = $state; + if(defined $self->clip_gradient) + { + $mom .= ($self->momentum*$mom - $lr*$wd*$weight - + $lr*mx->nd->clip($grad*$self->rescale_grad, -$self->clip_gradient, $self->clip_gradient) + ); + $weight += $mom; + } + else + { + $mom .= $self->momentum*$mom - $lr*$wd*$weight - $lr*$self->rescale_grad*$grad; + $weight += $mom; + } + } +} + package main; -use Test::More tests => 162; +use Test::More tests => 190; use AI::MXNet::Base; use PDL::NiceSlice; use AI::MXNet::TestUtils qw(same reldiff almost_equal); @@ -208,8 +258,8 @@ func compare_optimizer($opt1, $opt2, $shape) my ($s1, $s2) = @_; ok(same($s1->aspdl, $s2->aspdl)) }, - $state1, $state2 - ); + ref $state1 eq 'ARRAY' ? $state1 : [$state1], ref $state2 eq 'ARRAY' ? $state2 : [$state2] + ) if defined $state1 and defined $state2; $opt1->update(0, $w1, $g1, $state1); $opt2->update(0, $w2, $g2, $state2); @@ -218,8 +268,8 @@ func compare_optimizer($opt1, $opt2, $shape) my ($s1, $s2) = @_; ok(reldiff($s1->aspdl, $s2->aspdl) < 1e-5) }, - $state1, $state2 - ); + ref $state1 eq 'ARRAY' ? $state1 : [$state1], ref $state2 eq 'ARRAY' ? $state2 : [$state2] + ) if defined $state1 and defined $state2; ok(reldiff($w1->aspdl, $w2->aspdl) < 1e-5); } @@ -278,6 +328,35 @@ func test_rms() } } + +sub test_sgd +{ + mx->random->seed(0); + my $opt1 = 'PerlSGD'; + my $opt2 = mx->optimizer->SGD; + my $shape = [3, 4, 5]; + my @kwargs = ( + {}, + {momentum => 0.9}, + {clip_gradient => 0.5}, + {clip_gradient => 0.4, rescale_grad => 0.14}, + {rescale_grad => 0.8}, + {clip_gradient => 0.5, wd => 0.07}, + {clip_gradient => 0.4, rescale_grad => 0.14, wd => 0.03}, + {rescale_grad => 0.8, wd => 0.05}, + {clip_gradient => 0.5, momentum => 0.9}, + {clip_gradient => 0.4, rescale_grad => 0.14, momentum => 0.9}, + {rescale_grad => 0.8, momentum => 0.9}, + {clip_gradient => 0.5, wd => 0.07, momentum => 0.9}, + {clip_gradient => 0.4, rescale_grad => 0.14, wd => 0.03, momentum => 0.9}, + {rescale_grad => 0.8, wd => 0.05, momentum => 0.9} + ); + for my $kwarg (@kwargs) + { + compare_optimizer($opt1->new(%$kwarg), $opt2->new(%$kwarg), $shape); + } +} + func test_lr_wd_mult() { my $data = mx->sym->Variable('data'); @@ -311,5 +390,6 @@ func test_lr_wd_mult() test_adam(); test_rms(); +test_sgd(); test_lr_wd_mult(); diff --git a/perl-package/AI-MXNet/t/test_symbol.t b/perl-package/AI-MXNet/t/test_symbol.t index 190fa3caa2a4..d6d79eaf30df 100644 --- a/perl-package/AI-MXNet/t/test_symbol.t +++ b/perl-package/AI-MXNet/t/test_symbol.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 99; +use Test::More tests => 98; use AI::MXNet qw(mx); use AI::MXNet::TestUtils qw(mlp2 conv check_consistency zip assert enumerate); use Storable qw(freeze thaw); @@ -32,7 +32,7 @@ sub test_symbol_copy is($data->tojson, $data_2->tojson); } -test_symbol_compose(); +test_symbol_copy(); sub test_symbol_internal { diff --git a/perl-package/AI-MXNetCAPI/Changes b/perl-package/AI-MXNetCAPI/Changes index a327bb21031a..a916a6952cf4 100644 --- a/perl-package/AI-MXNetCAPI/Changes +++ b/perl-package/AI-MXNetCAPI/Changes @@ -1,5 +1,8 @@ Revision history for Perl extension AI::MXNetCAPI +0.9506 Sat Apr 29 20:26:50 PDT 2017 + - removed compiled swig file. + 0.9504 Wed Apr 19 19:07:02 PDT 2017 - callbacks bugfix. diff --git a/perl-package/AI-MXNetCAPI/META.json b/perl-package/AI-MXNetCAPI/META.json index 31c61b6cfec2..df99d3829dc3 100644 --- a/perl-package/AI-MXNetCAPI/META.json +++ b/perl-package/AI-MXNetCAPI/META.json @@ -37,5 +37,5 @@ } }, "release_status" : "stable", - "version" : "0.9504" + "version" : "0.9506" } diff --git a/perl-package/AI-MXNetCAPI/META.yml b/perl-package/AI-MXNetCAPI/META.yml index 723db1c03a8b..26deb03bde79 100644 --- a/perl-package/AI-MXNetCAPI/META.yml +++ b/perl-package/AI-MXNetCAPI/META.yml @@ -19,4 +19,4 @@ no_index: - inc requires: Test::More: '0' -version: '0.9504' +version: '0.9506' diff --git a/perl-package/AI-MXNetCAPI/README b/perl-package/AI-MXNetCAPI/README index 5e49d1b8ff0c..0d21c496574d 100644 --- a/perl-package/AI-MXNetCAPI/README +++ b/perl-package/AI-MXNetCAPI/README @@ -1,4 +1,4 @@ -AI-MXNetCAPI version 0.9504 +AI-MXNetCAPI version 0.9506 ===================== Swig interface to MXNet c api. diff --git a/perl-package/AI-MXNetCAPI/lib/AI/MXNetCAPI.pm b/perl-package/AI-MXNetCAPI/lib/AI/MXNetCAPI.pm index fed05782928d..0d3ece67f43d 100644 --- a/perl-package/AI-MXNetCAPI/lib/AI/MXNetCAPI.pm +++ b/perl-package/AI-MXNetCAPI/lib/AI/MXNetCAPI.pm @@ -1,7 +1,7 @@ package AI::MXNetCAPI; use base qw(DynaLoader); bootstrap AI::MXNetCAPI; -our $VERSION = '0.9504'; +our $VERSION = '0.9506'; 1; __END__