main.py

#!/usr/bin/env python
# -*- coding: utf-8 -*
import fusim16.Ising as ising
import fusim16.PCA as pca
from fusim16.Cluster import dbscan
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.mplot3d import Axes3D
import time
import argparse

################################################################################
# Agparse for command line parameters
################################################################################

parser = argparse.ArgumentParser(description='2D Ising Model with PCA and Cluster analysis')

parser.add_argument('-N', metavar='LatticeConstant', type=int,  required = False,
                   help='lattice constant of the 2D square Ising model, Default = 16', default=16)

parser.add_argument('-S', metavar='Steps', type=int,  required = False,
                   help='Number of Metropolis steps on the Ising model, Default = 5000', default=5000)

parser.add_argument('-T', metavar='Temperature', type=float,  required = False,
                   help='Temperature of the Ising model simulation, Default = 1', default=1)

parser.add_argument('-E', metavar='Epsilon', type=float,  required = False,
                   help='Radius of the cluster analysis, Default = 0.2', default=0.2)

parser.add_argument('-P', metavar='Points', type=int,  required = False,
                   help='Minimal number of neighbourhood points for cluster analysis, Default = 10', default=10)

parser.add_argument('--plot3D', action='store_true',  required = False,
                   help='Plot 3D projection of PCA')

parser.add_argument('--plotEigVec', action='store_true',  required = False,
                   help='Plot Eigenvectors Map of PCA. Does NOT work with GPU support for PCA!')

parser.add_argument('--equi', action='store_true',  required = False,
                   help='Equilibrate Ising model (run [steps] steps) before further analysis')

parser.add_argument('--gpu', action='store_true', required=False, help='Use GPU support for PCA')
parser.add_argument('-k', type=int, required=False, help='First k dimensions with greatest variances', default=3)
parser.add_argument('-m', type=str, required=False, help='Implementation method for PCA', default="cov")
parser.add_argument('-p', type=float, required=False,
                   help='Quality preservation in percent. If zero, it is ignored', default=0.)

args = parser.parse_args()

################################################################################
# Some sort of integration for all goups with experimental data held in RAM
################################################################################

# Main Benchmark
mainStartTime = time.time()

# Ising
################################################################################

# Create data, for example 5000 features x 10000 samples (if feasible)
# 5.000*5.000*10.000 = 29 GB of data if saved as bitarray
sqrtFeat = args.N
observations = args.S
temperature = args.T
system = ising.System.Metropolis(sqrtFeat, temperature)

if args.equi:
    system.run(observations) #Equilibriate System

initalConfig, configChanges = system.run(observations)

isingTime = time.time()
print "Ising Simulation:", isingTime-mainStartTime

system.toFile('main', initalConfig, configChanges) # 256 size and 10.000 = 2.5 GB of data

writeTime = time.time()
print "Write File:", writeTime-isingTime
# PCA
################################################################################

# Restructure data to meet PCA requirements
obsList = system.fromFile("main_"+str(sqrtFeat)+'_'+str(observations+1)+".npy")

readTime = time.time()
print "read File:", readTime-writeTime

X = np.zeros((sqrtFeat * sqrtFeat, observations))
for idx in range(observations):
    X[:,idx] = obsList.reshape(-1, sqrtFeat * sqrtFeat)[idx]
    
print "Reshape Array:", time.time()-readTime

# Determine starting time
pcaStartTime = time.time()

# Given a MxN numpy-array with M features and N samples, this will return
# a numpy-array with k features and N samples, where every sample has been
# projected onto the first k features of greatest/largest variance.
if args.gpu: # Utilize gpu
    from pca import pcaGpu
    pX = pcaGpu.PCAGpu(X, args.k, args.m).pca()
elif args.plotEigVec:
    pX, eigVec = pca.pca(X, args.k, args.m, p=args.p, returnEigVec=True)
else:
    pX = pca.pca(X, args.k, args.m, p=args.p)

pcaTotalTime = time.time() - pcaStartTime
print "PCA ran:", pcaTotalTime

# Have a look
if args.plot3D:
    ax = plt.figure().add_subplot(111, projection="3d")
    c = cm.rainbow(np.linspace(0, 1, pX.shape[1]))
    ax.scatter(pX[0,:], pX[1,:], pX[2,:], ".", color=c)
    plt.draw()

if args.plotEigVec:
    k = len(eigVec)
    spinGrids = []
    colorbarMax = []
    colorbarMin = []

    for i in range ( k ):
        max = np.amax( eigVec[i] )
        min = np.amin( eigVec[i] )
        colorbarMax.append( max )
        colorbarMin.append( min )

        spinGrids.append( eigVec[i].reshape( (sqrtFeat, sqrtFeat) ) )

    colorbarUpperLim = np.amax(colorbarMax)
    colorbarLowerLim =  np.amin(colorbarMin)

    # define the grid over which the function should be plotted (xx and yy are matrices)

    x = np.arange(0, sqrtFeat, 1.)
    xx, yy = np.meshgrid(x,x)
    
    for i in range( k ):
        # fill a matrix with the function values
        zz = spinGrids[i]

        # set plot title and axis names and limits
        plt.figure(figsize=(12, 9))
        plt.xlim(0, sqrtFeat - 1)
        plt.ylim(0, sqrtFeat - 1)
        plt.xlabel( "x-Position" )
        plt.ylabel( "y-Position" )
        plt.title("Eigenvector " + str(i + 1))

        # plot the calculated function values
        colorMap = plt.get_cmap("hsv")
        plt.pcolor(xx, yy, zz, cmap = colorMap, vmin = colorbarLowerLim, vmax = colorbarUpperLim)

        # and a color bar to show the correspondence between function value and color
        plt.colorbar()

        plt.savefig( "plt/eigenVector_" + str(i), dpi = 72 )

        plt.close()


# Clustering
################################################################################
# DBSCAN algorithm with fixed parameters.
# The dataset needs to be dtype=float!
# The algorithm will return a mapping of the type:
# "Index of sample" => "Number of Cluster"
# Where Cluster 0 is not a cluster, but all noise.
# For example:
# clusterList[10] = 2 => Sample with index 10 belongs to Cluster 2
# clusterList[20] = 0 => Sample with index 20 is noise

clusterStartTime = time.time()

minNeighbors = args.P
epsilon = args.E

minNeighbors = 60
epsilon = 0.7

pX = np.array(pX.T, dtype=float)
dbscanner = dbscan.Dbscan(pX, minNeighbors,epsilon)
dbscanner.run()
clusterList=dbscanner.getClusterList()

sum = 0
for cluster in clusterList:
    print  np.array(cluster).shape[0]
    sum = np.array(cluster).shape[0] + sum

print sum
print np.array(dbscanner.getNoise()).shape

clusterTotalTime = time.time() - clusterStartTime
print "Clustering ran:", clusterTotalTime


# Main runtime
################################################################################
mainEndTime =  time.time()
totalrunTime = mainEndTime - mainStartTime
print "main.py ran:",  totalrunTime

if args.plot3D:
    plt.show()