Conversion from IUPRLAB routines from Bitbucket (and iupr1).

tmbdev-archive · Oct 28, 2014 · 239918b · 239918b
commit 239918b
Show file tree

Hide file tree

Showing 28 changed files with 4,337 additions and 0 deletions.
diff --git a/HG_HISTORY b/HG_HISTORY
@@ -0,0 +1,27 @@
+The history couldn't be converted, but it's pretty simple.
+
+changeset:   2:3efce9b3e4c0
+tag:         tip
+user:        <tmbdev>
+date:        Sat Oct 25 11:36:57 2014 -0700
+files:       accel.pyc kmeans.pyc test_quantizer.pyc tpquant.py.~1~
+description:
+cleanup
+
+
+changeset:   1:0d148f87ab59
+user:        <tmbdev>
+date:        Sat Oct 25 11:34:58 2014 -0700
+files:       mlp.py old/pyaccel.c old/pyaccel.f95 old/pyaccel.pyf pyaccel.c pyaccel.f95 pyaccel.pyf
+description:
+cleanup
+
+
+changeset:   0:ec71d8ba8036
+user:        tmb@ixion
+date:        Mon Oct 12 01:15:09 2009 +0200
+files:       Makefile __init__.py accel.py accel.pyc accel_c.c boostedmlp.py boostedstumps.py command.py dataio.py fastica.py fmixtures.py gng.py heap.py imageutil.py kmeans.py kmeans.pyc mixtures.py mlp.py ocrutil.py pca.py pyaccel.c pyaccel.f95 pyaccel.pyf show_ocrdir.py som.py test_all.py test_classifier.py test_density.py test_quantizer.py test_quantizer.pyc test_transformer.py tpquant.py tpquant.py.~1~ unionfind.py
+description:
+initial mercurial version
+
+
diff --git a/Makefile b/Makefile
@@ -0,0 +1,2 @@
+accel_c.so: accel_c.c
+	gcc -shared -fPIC -O4 -fopenmp --std=c99 accel_c.c -o accel_c.so
diff --git a/__init__.py b/__init__.py
@@ -0,0 +1,23 @@
+__all__ = [
+    "boostedmlp",
+    "boostedstumps",
+    "dataio",
+    "fastica",
+    "fmixtures",
+    "heap",
+    "imageutil",
+    "kmeans",
+    "mixtures",
+    "mlp",
+    "ocrother",
+    "ocrutil",
+    "pca",
+    "show_ocrdir",
+    "som",
+    "test_all",
+    "test_classifier",
+    "test_density",
+    "test_quantizer",
+    "test_transformer",
+    "unionfind",
+]
diff --git a/accel.py b/accel.py
@@ -0,0 +1,20 @@
+from numpy import *
+from ctypes import *
+from numpy.ctypeslib import ndpointer
+
+lib = cdll.LoadLibrary("./accel_c.so")
+lib.argmindist_double.argtypes = [
+    c_int,c_int,
+    ndpointer(dtype='float',flags='C_CONTIGUOUS'),
+    ndpointer(dtype='float',flags='C_CONTIGUOUS')]
+
+def argmindist(v,data):
+    print v,data
+    assert prod(data.shape[1:])==prod(v.shape)
+    return lib.argmindist_double(
+        prod(data.shape[1:]),
+        data.shape[0],
+        v,
+        data)
+
+
diff --git a/accel_c.c b/accel_c.c
@@ -0,0 +1,47 @@
+#include <stdio.h>
+
+int test() {
+    printf("hello\n");
+}
+
+int argmindist_float(int d,int n,float v[d],float data[n][d]) {
+    float dists[n];
+#pragma omp parallel for shared(dists) schedule(static,50)
+    for(int i=0;i<n;i++) {
+        float total = 0.0;
+        for(int j=0;j<d;j++) {
+            float delta = v[j]-data[i][j];
+            total += delta*delta;
+        }
+        dists[i] = total;
+    }
+    int mi = -1;
+    float mv = 1e37;
+    for(int i=0;i<n;i++) {
+        if(dists[i]>mv) continue;
+        mi = i;
+        mv = dists[i];
+    }
+    return mi;
+}
+
+int argmindist_double(int d,int n,double v[d],double data[n][d]) {
+    double dists[n];
+#pragma omp parallel for shared(dists) schedule(static,50)
+    for(int i=0;i<n;i++) {
+        double total = 0.0;
+        for(int j=0;j<d;j++) {
+            double delta = v[j]-data[i][j];
+            total += delta*delta;
+        }
+        dists[i] = total;
+    }
+    int mi = -1;
+    double mv = 1e300;
+    for(int i=0;i<n;i++) {
+        if(dists[i]>mv) continue;
+        mi = i;
+        mv = dists[i];
+    }
+    return mi;
+}
diff --git a/boostedmlp.py b/boostedmlp.py
@@ -0,0 +1,163 @@
+__all__ = ["BoostedMLP"]
+
+import os,sys,os.path,re,string,math
+from pylab import *
+from numpy import *
+import mlp
+import pickle
+
+verbose = 0
+
+def finite(x):
+    return not isnan(x).any() and not isinf(x).any()
+
+def perplexity(weights):
+    weights = weights/sum(weights)
+    return exp(-sum(weights*where(weights>0,log(weights),0.0)))
+
+def weighted_sample(weights,n):
+    weights = weights * 1.0 / sum(weights)
+    weights = cumsum(weights)
+    return searchsorted(weights,random.uniform(size=n))
+
+def rowwise(f,data):
+    n,d = data.shape
+    l = [f(data[i]) for i in range(n)]
+    return array(l)
+
+class BoostedMLP:
+    def __init__(self):
+        self.list = None
+    def train(self,data,cls,nclass,nstages=30,nhidden=None,
+              eta=1.0,epochs=10,nsample=10000):
+        assert self.list is None
+        n,d = data.shape
+        assert n==len(cls)
+        assert (cls>=0).all()
+        assert (cls<nclass).all()
+        self.nclass = nclass
+        weights = 1.0/(1+random.permutation(len(data)))
+        weights /= sum(weights)
+        list = []
+        for i in range(nstages):
+            if verbose: print "round",i
+            net = mlp.MLP()
+            # train on weighted sample
+            samples = weighted_sample(weights,n=nsample)
+            net.train(data,cls,nclass,samples=samples)
+            # compute error on entire set
+            pred = net.classify(data)
+            if verbose:
+                print "    err=",sum(pred!=cls)
+                print "    werr=",sum((pred!=cls)*weights)/sum(weights)
+                print "    sample perplexity=",perplexity(weights)
+            err = sum((pred!=cls)*weights)/sum(weights)
+            # SAMME update for multiclass boosting
+            alpha = log((1.0-err)/err) + log(nclass-1.0)
+            weights = weights*exp(alpha*(pred!=cls))
+            weights /= sum(weights)
+            list.append((net,alpha,weights,err))
+        self.list = list
+    def discriminants1(self,v,limit=9999):
+        d = self.nclass
+        totals = zeros(d)
+        for index in range(min(limit,len(self.list))):
+            comp = self.list[index]
+            net = comp[0]
+            alpha = comp[1]
+            c = net.classify(v.reshape(1,len(v)))[0]
+            totals[c] += alpha
+        return totals
+    def classify1(self,v,limit=9999):
+        return argmax(self.discriminants1(v,limit=limit))
+    def discriminants(self,data):
+        return rowwise(self.discriminants1,data)
+    def classify(self,data):
+        return rowwise(self.classify1,data)
+    def save(self,stream):
+        pickle.dump((self.list,self.nclass),stream,protocol=2)
+    def load(self,stream):
+        self.list,self.nclass = pickle.load(stream)
+
+class StackedMLP(BoostedMLP):
+    def __init__(self):
+        BoostedMLP.__init__(self)
+        self.nclass = None
+        self.stacked = None
+    def train(self,data,cls,nclass,nstages,
+              nhidden,eta=1.0,epochs=10,nsample=10000,
+              snhidden=None,seta=1.0,sepochs=10):
+        assert self.stacked is None
+        if snhidden==None: snhidden = 3*nclass
+        BoostedMLP.train(self,data,cls,nclass,
+                         nstages=nstages,nhidden=nhidden,
+                         eta=eta,epochs=epochs,nsample=nsample)
+        sdata = rowwise(self.all_discriminants1,data)
+        assert finite(sdata)
+        # mlp.verbose = 1
+        net = mlp.MLP()
+        net.train(sdata,cls,nclass,nhidden=snhidden,eta=seta,epochs=sepochs)
+        self.stacked = net
+        self.nclass = nclass
+    def all_discriminants1(self,v):
+        assert self.list is not None
+        assert v.ndim==1
+        result = zeros((self.nclass * len(self.list)))
+        i = 0
+        for l in self.list:
+            net = l[0]
+            d = net.discriminants(v.reshape(1,len(v)))[0]
+            result[i:i+len(d)] = d
+            i += len(d)
+        assert finite(d)
+        return result
+    def discriminants1(self,v):
+        ps = self.all_discriminants1(v)
+        assert finite(ps)
+        result = self.stacked.posteriors(ps.reshape(1,len(ps)))[0]
+        assert finite(result)
+        return result
+    def posteriors1(self,v):
+        result = self.discriminants1(v)
+        result /= max(1.0,sum(result))
+        assert finite(result)
+        return result
+    def classify1(self,v):
+        return argmax(self.discriminants1(v))
+    def discriminants(self,data):
+        assert finite(data)
+        result = rowwise(self.discriminants1,data)
+        assert finite(result)
+        return result
+    def posteriors(self,data):
+        assert finite(data)
+        result = rowwise(self.posteriors1,data)
+        assert finite(result)
+        return result
+    def classify(self,data):
+        assert finite(data)
+        return rowwise(self.classify1,data)
+    def save(self,stream):
+        pickle.dump((self.list,self.nclass,self.stacked),stream,protocol=2)
+    def load(self,stream):
+        self.list,self.nclass,self.stacked = pickle.load(stream)
+
+import unittest,fpectl
+from test_classifier import *
+
+class TestBoostedMLP(TestBatchClassifier):
+    params = {"nstages":3,"epochs":1,"nsample":10}
+    factory = BoostedMLP
+
+class TestStackedMLP(TestBatchClassifier):
+    params = {"nstages":3,"epochs":1,"nsample":10,"sepochs":1,"nhidden":3,"snhidden":3}
+    factory = StackedMLP
+
+if __name__ == "__main__":
+    unittest.main()
+
+def test_verbose():
+    fpectl.turnon_sigfpe()
+    suite = unittest.TestLoader().loadTestsFromTestCase(TestBoostedMLP)
+    suite = unittest.TestLoader().loadTestsFromTestCase(TestStackedMLP)
+    unittest.TextTestRunner(verbosity=1).run(suite)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		accel_c.so: accel_c.c
		gcc -shared -fPIC -O4 -fopenmp --std=c99 accel_c.c -o accel_c.so