diff --git a/build/convnet-min.js b/build/convnet-min.js deleted file mode 100644 index 24354824..00000000 --- a/build/convnet-min.js +++ /dev/null @@ -1 +0,0 @@ -var convnetjs=convnetjs||{REVISION:"ALPHA"};(function(c){var j=false;var d=0;var k=function(){if(j){j=false;return d}var p=2*Math.random()-1;var o=2*Math.random()-1;var q=p*p+o*o;if(q==0||q>1){return k()}var s=Math.sqrt(-2*Math.log(q)/q);d=o*s;j=true;return p*s};var h=function(p,o){return Math.random()*(o-p)+p};var f=function(p,o){return Math.floor(Math.random()*(o-p)+p)};var b=function(p,o){return p+k()*o};var e=function(q){if(typeof(q)==="undefined"||isNaN(q)){return[]}if(typeof ArrayBuffer==="undefined"){var o=new Array(q);for(var p=0;po){o=p[s];q=s}if(p[s]=f.sx||k+m<0||k+m>=f.sy){continue}for(var j=0;j=0&&g=0&&h=0&&g=0&&h=0&&f=0&&gr){r=t;o=g;k=f}}}}this.switchx[i]=o;this.switchy[i]=k;i++;h.set(e,w,p,r)}}}this.out_act=h;return this.out_act},backward:function(){var h=this.in_act;h.dw=c.zeros(h.w.length);var f=this.out_act;var g=0;for(var j=0;jk){k=j[l]}}var n=e.zeros(this.out_depth);var g=0;for(var l=0;l0){g.dw[h]+=1;g.dw[l]-=1;j+=-f+g.w[h]+k}}return j},getParamsAndGrads:function(){return[]},toJSON:function(){var f={};f.out_depth=this.out_depth;f.out_sx=this.out_sx;f.out_sy=this.out_sy;f.layer_type=this.layer_type;f.num_inputs=this.num_inputs;return f},fromJSON:function(f){this.out_depth=f.out_depth;this.out_sx=f.out_sx;this.out_sy=f.out_sy;this.layer_type=f.layer_type;this.num_inputs=f.num_inputs}};e.RegressionLayer=d;e.SoftmaxLayer=c;e.SVMLayer=b})(convnetjs);(function(d){var a=d.Vol;var e=function(h){var h=h||{};this.out_sx=h.in_sx;this.out_sy=h.in_sy;this.out_depth=h.in_depth;this.layer_type="relu"};e.prototype={forward:function(j,l){this.in_act=j;var h=j.clone();var m=j.w.length;var n=h.w;for(var k=0;ku){u=h;r=o}}v.w[p]=u;this.switches[p]=m+r}}else{var k=0;for(var t=0;tu){u=h;r=o}}v.set(t,s,p,u);this.switches[k]=m+r;k++}}}}this.out_act=v;return this.out_act},backward:function(){var k=this.in_act;var j=this.out_act;var o=this.out_depth;k.dw=d.zeros(k.w.length);if(this.out_sx===1&&this.out_sy===1){for(var l=0;l0){var g=this.layers[f-1];h.in_sx=g.out_sx;h.in_sy=g.out_sy;h.in_depth=g.out_depth}switch(h.type){case"fc":this.layers.push(new c.FullyConnLayer(h));break;case"lrn":this.layers.push(new c.LocalResponseNormalizationLayer(h));break;case"dropout":this.layers.push(new c.DropoutLayer(h));break;case"input":this.layers.push(new c.InputLayer(h));break;case"softmax":this.layers.push(new c.SoftmaxLayer(h));break;case"regression":this.layers.push(new c.RegressionLayer(h));break;case"conv":this.layers.push(new c.ConvLayer(h));break;case"pool":this.layers.push(new c.PoolLayer(h));break;case"relu":this.layers.push(new c.ReluLayer(h));break;case"sigmoid":this.layers.push(new c.SigmoidLayer(h));break;case"tanh":this.layers.push(new c.TanhLayer(h));break;case"maxout":this.layers.push(new c.MaxoutLayer(h));break;case"quadtransform":this.layers.push(new c.QuadTransformLayer(h));break;case"svm":this.layers.push(new c.SVMLayer(h));break;default:console.log("ERROR: UNRECOGNIZED LAYER TYPE!")}}},forward:function(e,g){if(typeof(g)==="undefined"){g=false}var d=this.layers[0].forward(e,g);for(var f=1;f=0;d--){this.layers[d].backward()}return e},getParamsAndGrads:function(){var d=[];for(var f=0;fd){d=h[f];e=f}}return e},toJSON:function(){var e={};e.layers=[];for(var d=0;d0)){for(var E=0;E0?1:-1);var o=l*(w[B]);var t=(o+D+F[B])/this.batch_size;var m=this.gsum[E];var C=this.xsum[E];if(this.method==="adagrad"){m[B]=m[B]+t*t;var v=-this.learning_rate/Math.sqrt(m[B]+this.eps)*t;w[B]+=v}else{if(this.method==="windowgrad"){m[B]=this.ro*m[B]+(1-this.ro)*t*t;var v=-this.learning_rate/Math.sqrt(m[B]+this.eps)*t;w[B]+=v}else{if(this.method==="adadelta"){m[B]=this.ro*m[B]+(1-this.ro)*t*t;var v=-Math.sqrt((C[B]+this.eps)/(m[B]+this.eps))*t;C[B]=this.ro*C[B]+(1-this.ro)*v*v;w[B]+=v}else{if(this.momentum>0){var v=this.momentum*m[B]-this.learning_rate*t;m[B]=v;w[B]+=v}else{w[B]+=-this.learning_rate*t}}}}F[B]=0}}}return{fwd_time:q,bwd_time:G,l2_decay_loss:k,l1_decay_loss:d,cost_loss:A,softmax_loss:A,loss:A+d+k}}};b.Trainer=c;b.SGDTrainer=c})(convnetjs);(function(c){var e=c.randf;var d=c.randi;var j=c.Net;var g=c.Trainer;var b=c.maxmin;var h=c.randperm;var f=c.weightedSample;var i=c.getopt;var k=c.arrUnique;var a=function(m,n,l){var l=l||{};if(typeof m==="undefined"){m=[]}if(typeof n==="undefined"){n=[]}this.data=m;this.labels=n;this.train_ratio=i(l,"train_ratio",0.7);this.num_folds=i(l,"num_folds",10);this.num_candidates=i(l,"num_candidates",50);this.num_epochs=i(l,"num_epochs",50);this.ensemble_size=i(l,"ensemble_size",10);this.batch_size_min=i(l,"batch_size_min",10);this.batch_size_max=i(l,"batch_size_max",300);this.l2_decay_min=i(l,"l2_decay_min",-4);this.l2_decay_max=i(l,"l2_decay_max",2);this.learning_rate_min=i(l,"learning_rate_min",-4);this.learning_rate_max=i(l,"learning_rate_max",0);this.momentum_min=i(l,"momentum_min",0.9);this.momentum_max=i(l,"momentum_max",0.9);this.neurons_min=i(l,"neurons_min",5);this.neurons_max=i(l,"neurons_max",30);this.folds=[];this.candidates=[];this.evaluated_candidates=[];this.unique_labels=k(n);this.iter=0;this.foldix=0;this.finish_fold_callback=null;this.finish_batch_callback=null;if(this.data.length>0){this.sampleFolds();this.sampleCandidates()}};a.prototype={sampleFolds:function(){var o=this.data.length;var m=Math.floor(this.train_ratio*o);this.folds=[];for(var l=0;l=n){var m=this.evalValErrors();for(var q=0;q=this.folds.length){for(var q=0;q(l.accv/l.acc.length)?-1:1});if(this.evaluated_candidates.length>3*this.ensemble_size){this.evaluated_candidates=this.evaluated_candidates.slice(0,3*this.ensemble_size)}if(this.finish_batch_callback!==null){this.finish_batch_callback()}this.sampleCandidates();this.foldix=0}else{for(var q=0;q 1) return gaussRandom(); - var c = Math.sqrt(-2*Math.log(r)/r); - v_val = v*c; // cache this - return_v = true; - return u*c; - } - var randf = function(a, b) { return Math.random()*(b-a)+a; } - var randi = function(a, b) { return Math.floor(Math.random()*(b-a)+a); } - var randn = function(mu, std){ return mu+gaussRandom()*std; } - - // Array utilities - var zeros = function(n) { - if(typeof(n)==='undefined' || isNaN(n)) { return []; } - if(typeof ArrayBuffer === 'undefined') { - // lacking browser support - var arr = new Array(n); - for(var i=0;i maxv) { maxv = w[i]; maxi = i; } - if(w[i] < minv) { minv = w[i]; mini = i; } - } - return {maxi: maxi, maxv: maxv, mini: mini, minv: minv, dv:maxv-minv}; - } - - // create random permutation of numbers, in range [0...n-1] - var randperm = function(n) { - var i = n, - j = 0, - temp; - var array = []; - for(var q=0;qright - var augment = function(V, crop, dx, dy, fliplr) { - // note assumes square outputs of size crop x crop - if(typeof(fliplr)==='undefined') var fliplr = false; - if(typeof(dx)==='undefined') var dx = global.randi(0, V.sx - crop); - if(typeof(dy)==='undefined') var dy = global.randi(0, V.sy - crop); - - // randomly sample a crop in the input volume - var W; - if(crop !== V.sx || dx!==0 || dy!==0) { - W = new Vol(crop, crop, V.depth, 0.0); - for(var x=0;x=V.sx || y+dy<0 || y+dy>=V.sy) continue; // oob - for(var d=0;d=0 && oy=0 && ox=0 && oy=0 && ox=0 && oy=0 && ox a) { a = v; winx=ox; winy=oy;} - } - } - } - this.switchx[n] = winx; - this.switchy[n] = winy; - n++; - A.set(ax, ay, d, a); - } - } - } - this.out_act = A; - return this.out_act; - }, - backward: function() { - // pooling layers have no parameters, so simply compute - // gradient wrt data here - var V = this.in_act; - V.dw = global.zeros(V.w.length); // zero out gradient wrt data - var A = this.out_act; // computed in forward pass - - var n = 0; - for(var d=0;d amax) amax = as[i]; - } - - // compute exponentials (carefully to not blow up) - var es = global.zeros(this.out_depth); - var esum = 0.0; - for(var i=0;i 0) { - // violating example, apply loss - // I love hinge loss, by the way. Truly. - // Seriously, compare this SVM code with Softmax forward AND backprop code above - // it's clear which one is superior, not only in code, simplicity - // and beauty, but also in practice. - x.dw[i] += 1; - x.dw[y] -= 1; - loss += -yscore + x.w[i] + margin; - } - } - - return loss; - }, - getParamsAndGrads: function() { - return []; - }, - toJSON: function() { - var json = {}; - json.out_depth = this.out_depth; - json.out_sx = this.out_sx; - json.out_sy = this.out_sy; - json.layer_type = this.layer_type; - json.num_inputs = this.num_inputs; - return json; - }, - fromJSON: function(json) { - this.out_depth = json.out_depth; - this.out_sx = json.out_sx; - this.out_sy = json.out_sy; - this.layer_type = json.layer_type; - this.num_inputs = json.num_inputs; - } - } - - global.RegressionLayer = RegressionLayer; - global.SoftmaxLayer = SoftmaxLayer; - global.SVMLayer = SVMLayer; - -})(convnetjs); - -(function(global) { - "use strict"; - var Vol = global.Vol; // convenience - - // Implements ReLU nonlinearity elementwise - // x -> max(0, x) - // the output is in [0, inf) - var ReluLayer = function(opt) { - var opt = opt || {}; - - // computed - this.out_sx = opt.in_sx; - this.out_sy = opt.in_sy; - this.out_depth = opt.in_depth; - this.layer_type = 'relu'; - } - ReluLayer.prototype = { - forward: function(V, is_training) { - this.in_act = V; - var V2 = V.clone(); - var N = V.w.length; - var V2w = V2.w; - for(var i=0;i 1/(1+e^(-x)) - // so the output is between 0 and 1. - var SigmoidLayer = function(opt) { - var opt = opt || {}; - - // computed - this.out_sx = opt.in_sx; - this.out_sy = opt.in_sy; - this.out_depth = opt.in_depth; - this.layer_type = 'sigmoid'; - } - SigmoidLayer.prototype = { - forward: function(V, is_training) { - this.in_act = V; - var V2 = V.cloneAndZero(); - var N = V.w.length; - var V2w = V2.w; - var Vw = V.w; - for(var i=0;i max(x) - // where x is a vector of size group_size. Ideally of course, - // the input size should be exactly divisible by group_size - var MaxoutLayer = function(opt) { - var opt = opt || {}; - - // required - this.group_size = typeof opt.group_size !== 'undefined' ? opt.group_size : 2; - - // computed - this.out_sx = opt.in_sx; - this.out_sy = opt.in_sy; - this.out_depth = Math.floor(opt.in_depth / this.group_size); - this.layer_type = 'maxout'; - - this.switches = global.zeros(this.out_sx*this.out_sy*this.out_depth); // useful for backprop - } - MaxoutLayer.prototype = { - forward: function(V, is_training) { - this.in_act = V; - var N = this.out_depth; - var V2 = new Vol(this.out_sx, this.out_sy, this.out_depth, 0.0); - - // optimization branch. If we're operating on 1D arrays we dont have - // to worry about keeping track of x,y,d coordinates inside - // input volumes. In convnets we do :( - if(this.out_sx === 1 && this.out_sy === 1) { - for(var i=0;i a) { - a = a2; - ai = j; - } - } - V2.w[i] = a; - this.switches[i] = ix + ai; - } - } else { - var n=0; // counter for switches - for(var x=0;x a) { - a = a2; - ai = j; - } - } - V2.set(x,y,i,a); - this.switches[n] = ix + ai; - n++; - } - } - } - - } - this.out_act = V2; - return this.out_act; - }, - backward: function() { - var V = this.in_act; // we need to set dw of this - var V2 = this.out_act; - var N = this.out_depth; - V.dw = global.zeros(V.w.length); // zero out gradient wrt data - - // pass the gradient through the appropriate switch - if(this.out_sx === 1 && this.out_sy === 1) { - for(var i=0;i tanh(x) - // so the output is between -1 and 1. - var TanhLayer = function(opt) { - var opt = opt || {}; - - // computed - this.out_sx = opt.in_sx; - this.out_sy = opt.in_sy; - this.out_depth = opt.in_depth; - this.layer_type = 'tanh'; - } - TanhLayer.prototype = { - forward: function(V, is_training) { - this.in_act = V; - var V2 = V.cloneAndZero(); - var N = V.w.length; - for(var i=0;i [x, x_i*x_j forall i,j] - // so the fully connected layer afters will essentially be doing tensor multiplies - var QuadTransformLayer = function(opt) { - var opt = opt || {}; - - // computed - this.out_sx = opt.in_sx; - this.out_sy = opt.in_sy; - // linear terms, and then quadratic terms, of which there are 1/2*n*(n+1), - // (offdiagonals and the diagonal total) and arithmetic series. - // Actually never mind, lets not be fancy here yet and just include - // terms x_ix_j and x_jx_i twice. Half as efficient but much less - // headache. - this.out_depth = opt.in_depth + opt.in_depth * opt.in_depth; - this.layer_type = 'quadtransform'; - - } - QuadTransformLayer.prototype = { - forward: function(V, is_training) { - this.in_act = V; - var N = this.out_depth; - var Ni = V.depth; - var V2 = new Vol(this.out_sx, this.out_sy, this.out_depth, 0.0); - for(var x=0;x0) { - var prev = this.layers[i-1]; - def.in_sx = prev.out_sx; - def.in_sy = prev.out_sy; - def.in_depth = prev.out_depth; - } - - switch(def.type) { - case 'fc': this.layers.push(new global.FullyConnLayer(def)); break; - case 'lrn': this.layers.push(new global.LocalResponseNormalizationLayer(def)); break; - case 'dropout': this.layers.push(new global.DropoutLayer(def)); break; - case 'input': this.layers.push(new global.InputLayer(def)); break; - case 'softmax': this.layers.push(new global.SoftmaxLayer(def)); break; - case 'regression': this.layers.push(new global.RegressionLayer(def)); break; - case 'conv': this.layers.push(new global.ConvLayer(def)); break; - case 'pool': this.layers.push(new global.PoolLayer(def)); break; - case 'relu': this.layers.push(new global.ReluLayer(def)); break; - case 'sigmoid': this.layers.push(new global.SigmoidLayer(def)); break; - case 'tanh': this.layers.push(new global.TanhLayer(def)); break; - case 'maxout': this.layers.push(new global.MaxoutLayer(def)); break; - case 'quadtransform': this.layers.push(new global.QuadTransformLayer(def)); break; - case 'svm': this.layers.push(new global.SVMLayer(def)); break; - default: console.log('ERROR: UNRECOGNIZED LAYER TYPE!'); - } - } - }, - - // forward prop the network. A trainer will pass in is_training = true - forward: function(V, is_training) { - if(typeof(is_training)==='undefined') is_training = false; - var act = this.layers[0].forward(V, is_training); - for(var i=1;i=0;i--) { // first layer assumed input - this.layers[i].backward(); - } - return loss; - }, - getParamsAndGrads: function() { - // accumulate parameters and gradients for the entire network - var response = []; - for(var i=0;i maxv) { maxv = p[i]; maxi = i;} - } - return maxi; - }, - toJSON: function() { - var json = {}; - json.layers = []; - for(var i=0;i 0.0)) { - // only vanilla sgd doesnt need either lists - // momentum needs gsum - // adagrad needs gsum - // adadelta needs gsum and xsum - for(var i=0;i 0 ? 1 : -1); - var l2grad = l2_decay * (p[j]); - - var gij = (l2grad + l1grad + g[j]) / this.batch_size; // raw batch gradient - - var gsumi = this.gsum[i]; - var xsumi = this.xsum[i]; - if(this.method === 'adagrad') { - // adagrad update - gsumi[j] = gsumi[j] + gij * gij; - var dx = - this.learning_rate / Math.sqrt(gsumi[j] + this.eps) * gij; - p[j] += dx; - } else if(this.method === 'windowgrad') { - // this is adagrad but with a moving window weighted average - // so the gradient is not accumulated over the entire history of the run. - // it's also referred to as Idea #1 in Zeiler paper on Adadelta. Seems reasonable to me! - gsumi[j] = this.ro * gsumi[j] + (1-this.ro) * gij * gij; - var dx = - this.learning_rate / Math.sqrt(gsumi[j] + this.eps) * gij; // eps added for better conditioning - p[j] += dx; - } else if(this.method === 'adadelta') { - // assume adadelta if not sgd or adagrad - gsumi[j] = this.ro * gsumi[j] + (1-this.ro) * gij * gij; - var dx = - Math.sqrt((xsumi[j] + this.eps)/(gsumi[j] + this.eps)) * gij; - xsumi[j] = this.ro * xsumi[j] + (1-this.ro) * dx * dx; // yes, xsum lags behind gsum by 1. - p[j] += dx; - } else { - // assume SGD - if(this.momentum > 0.0) { - // momentum update - var dx = this.momentum * gsumi[j] - this.learning_rate * gij; // step - gsumi[j] = dx; // back this up for next iteration of momentum - p[j] += dx; // apply corrected gradient - } else { - // vanilla sgd - p[j] += - this.learning_rate * gij; - } - } - g[j] = 0.0; // zero out gradient so that we can begin accumulating anew - } - } - } - - // appending softmax_loss for backwards compatibility, but from now on we will always use cost_loss - // in future, TODO: have to completely redo the way loss is done around the network as currently - // loss is a bit of a hack. Ideally, user should specify arbitrary number of loss functions on any layer - // and it should all be computed correctly and automatically. - return {fwd_time: fwd_time, bwd_time: bwd_time, - l2_decay_loss: l2_decay_loss, l1_decay_loss: l1_decay_loss, - cost_loss: cost_loss, softmax_loss: cost_loss, - loss: cost_loss + l1_decay_loss + l2_decay_loss} - } - } - - global.Trainer = Trainer; - global.SGDTrainer = Trainer; // backwards compatibility -})(convnetjs); - -(function(global) { - "use strict"; - - // used utilities, make explicit local references - var randf = global.randf; - var randi = global.randi; - var Net = global.Net; - var Trainer = global.Trainer; - var maxmin = global.maxmin; - var randperm = global.randperm; - var weightedSample = global.weightedSample; - var getopt = global.getopt; - var arrUnique = global.arrUnique; - - /* - A MagicNet takes data: a list of convnetjs.Vol(), and labels - which for now are assumed to be class indeces 0..K. MagicNet then: - - creates data folds for cross-validation - - samples candidate networks - - evaluates candidate networks on all data folds - - produces predictions by model-averaging the best networks - */ - var MagicNet = function(data, labels, opt) { - var opt = opt || {}; - if(typeof data === 'undefined') { data = []; } - if(typeof labels === 'undefined') { labels = []; } - - // required inputs - this.data = data; // store these pointers to data - this.labels = labels; - - // optional inputs - this.train_ratio = getopt(opt, 'train_ratio', 0.7); - this.num_folds = getopt(opt, 'num_folds', 10); - this.num_candidates = getopt(opt, 'num_candidates', 50); // we evaluate several in parallel - // how many epochs of data to train every network? for every fold? - // higher values mean higher accuracy in final results, but more expensive - this.num_epochs = getopt(opt, 'num_epochs', 50); - // number of best models to average during prediction. Usually higher = better - this.ensemble_size = getopt(opt, 'ensemble_size', 10); - - // candidate parameters - this.batch_size_min = getopt(opt, 'batch_size_min', 10); - this.batch_size_max = getopt(opt, 'batch_size_max', 300); - this.l2_decay_min = getopt(opt, 'l2_decay_min', -4); - this.l2_decay_max = getopt(opt, 'l2_decay_max', 2); - this.learning_rate_min = getopt(opt, 'learning_rate_min', -4); - this.learning_rate_max = getopt(opt, 'learning_rate_max', 0); - this.momentum_min = getopt(opt, 'momentum_min', 0.9); - this.momentum_max = getopt(opt, 'momentum_max', 0.9); - this.neurons_min = getopt(opt, 'neurons_min', 5); - this.neurons_max = getopt(opt, 'neurons_max', 30); - - // computed - this.folds = []; // data fold indices, gets filled by sampleFolds() - this.candidates = []; // candidate networks that are being currently evaluated - this.evaluated_candidates = []; // history of all candidates that were fully evaluated on all folds - this.unique_labels = arrUnique(labels); - this.iter = 0; // iteration counter, goes from 0 -> num_epochs * num_training_data - this.foldix = 0; // index of active fold - - // callbacks - this.finish_fold_callback = null; - this.finish_batch_callback = null; - - // initializations - if(this.data.length > 0) { - this.sampleFolds(); - this.sampleCandidates(); - } - }; - - MagicNet.prototype = { - - // sets this.folds to a sampling of this.num_folds folds - sampleFolds: function() { - var N = this.data.length; - var num_train = Math.floor(this.train_ratio * N); - this.folds = []; // flush folds, if any - for(var i=0;i