forked from janekg89/Project-Simulation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
208 lines (155 loc) · 7.04 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#!/usr/bin/env python
# -*- coding: utf-8 -*
import fusim16.Ising as ising
import fusim16.PCA as pca
from fusim16.Cluster import dbscan
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.mplot3d import Axes3D
import time
import argparse
################################################################################
# Agparse for command line parameters
################################################################################
parser = argparse.ArgumentParser(description='2D Ising Model with PCA and Cluster analysis')
parser.add_argument('-N', metavar='LatticeConstant', type=int, required = False,
help='lattice constant of the 2D square Ising model, Default = 16', default=16)
parser.add_argument('-S', metavar='Steps', type=int, required = False,
help='Number of Metropolis steps on the Ising model, Default = 5000', default=5000)
parser.add_argument('-T', metavar='Temperature', type=float, required = False,
help='Temperature of the Ising model simulation, Default = 1', default=1)
parser.add_argument('-E', metavar='Epsilon', type=float, required = False,
help='Radius of the cluster analysis, Default = 0.2', default=0.2)
parser.add_argument('-P', metavar='Points', type=int, required = False,
help='Minimal number of neighbourhood points for cluster analysis, Default = 10', default=10)
parser.add_argument('--plot3D', action='store_true', required = False,
help='Plot 3D projection of PCA')
parser.add_argument('--plotEigVec', action='store_true', required = False,
help='Plot Eigenvectors Map of PCA. Does NOT work with GPU support for PCA!')
parser.add_argument('--equi', action='store_true', required = False,
help='Equilibrate Ising model (run [steps] steps) before further analysis')
parser.add_argument('--gpu', action='store_true', required=False, help='Use GPU support for PCA')
parser.add_argument('-k', type=int, required=False, help='First k dimensions with greatest variances', default=3)
parser.add_argument('-m', type=str, required=False, help='Implementation method for PCA', default="cov")
parser.add_argument('-p', type=float, required=False,
help='Quality preservation in percent. If zero, it is ignored', default=0.)
args = parser.parse_args()
################################################################################
# Some sort of integration for all goups with experimental data held in RAM
################################################################################
# Main Benchmark
mainStartTime = time.time()
# Ising
################################################################################
# Create data, for example 5000 features x 10000 samples (if feasible)
# 5.000*5.000*10.000 = 29 GB of data if saved as bitarray
sqrtFeat = args.N
observations = args.S
temperature = args.T
system = ising.System.Metropolis(sqrtFeat, temperature)
if args.equi:
system.run(observations) #Equilibriate System
initalConfig, configChanges = system.run(observations)
isingTime = time.time()
print "Ising Simulation:", isingTime-mainStartTime
system.toFile('main', initalConfig, configChanges) # 256 size and 10.000 = 2.5 GB of data
writeTime = time.time()
print "Write File:", writeTime-isingTime
# PCA
################################################################################
# Restructure data to meet PCA requirements
obsList = system.fromFile("main_"+str(sqrtFeat)+'_'+str(observations+1)+".npy")
readTime = time.time()
print "read File:", readTime-writeTime
X = np.zeros((sqrtFeat * sqrtFeat, observations))
for idx in range(observations):
X[:,idx] = obsList.reshape(-1, sqrtFeat * sqrtFeat)[idx]
print "Reshape Array:", time.time()-readTime
# Determine starting time
pcaStartTime = time.time()
# Given a MxN numpy-array with M features and N samples, this will return
# a numpy-array with k features and N samples, where every sample has been
# projected onto the first k features of greatest/largest variance.
if args.gpu: # Utilize gpu
from pca import pcaGpu
pX = pcaGpu.PCAGpu(X, args.k, args.m).pca()
elif args.plotEigVec:
pX, eigVec = pca.pca(X, args.k, args.m, p=args.p, returnEigVec=True)
else:
pX = pca.pca(X, args.k, args.m, p=args.p)
pcaTotalTime = time.time() - pcaStartTime
print "PCA ran:", pcaTotalTime
# Have a look
if args.plot3D:
ax = plt.figure().add_subplot(111, projection="3d")
c = cm.rainbow(np.linspace(0, 1, pX.shape[1]))
ax.scatter(pX[0,:], pX[1,:], pX[2,:], ".", color=c)
plt.draw()
if args.plotEigVec:
k = len(eigVec)
spinGrids = []
colorbarMax = []
colorbarMin = []
for i in range ( k ):
max = np.amax( eigVec[i] )
min = np.amin( eigVec[i] )
colorbarMax.append( max )
colorbarMin.append( min )
spinGrids.append( eigVec[i].reshape( (sqrtFeat, sqrtFeat) ) )
colorbarUpperLim = np.amax(colorbarMax)
colorbarLowerLim = np.amin(colorbarMin)
# define the grid over which the function should be plotted (xx and yy are matrices)
x = np.arange(0, sqrtFeat, 1.)
xx, yy = np.meshgrid(x,x)
for i in range( k ):
# fill a matrix with the function values
zz = spinGrids[i]
# set plot title and axis names and limits
plt.figure(figsize=(12, 9))
plt.xlim(0, sqrtFeat - 1)
plt.ylim(0, sqrtFeat - 1)
plt.xlabel( "x-Position" )
plt.ylabel( "y-Position" )
plt.title("Eigenvector " + str(i + 1))
# plot the calculated function values
colorMap = plt.get_cmap("hsv")
plt.pcolor(xx, yy, zz, cmap = colorMap, vmin = colorbarLowerLim, vmax = colorbarUpperLim)
# and a color bar to show the correspondence between function value and color
plt.colorbar()
plt.savefig( "plt/eigenVector_" + str(i), dpi = 72 )
plt.close()
# Clustering
################################################################################
# DBSCAN algorithm with fixed parameters.
# The dataset needs to be dtype=float!
# The algorithm will return a mapping of the type:
# "Index of sample" => "Number of Cluster"
# Where Cluster 0 is not a cluster, but all noise.
# For example:
# clusterList[10] = 2 => Sample with index 10 belongs to Cluster 2
# clusterList[20] = 0 => Sample with index 20 is noise
clusterStartTime = time.time()
minNeighbors = args.P
epsilon = args.E
minNeighbors = 60
epsilon = 0.7
pX = np.array(pX.T, dtype=float)
dbscanner = dbscan.Dbscan(pX, minNeighbors,epsilon)
dbscanner.run()
clusterList=dbscanner.getClusterList()
sum = 0
for cluster in clusterList:
print np.array(cluster).shape[0]
sum = np.array(cluster).shape[0] + sum
print sum
print np.array(dbscanner.getNoise()).shape
clusterTotalTime = time.time() - clusterStartTime
print "Clustering ran:", clusterTotalTime
# Main runtime
################################################################################
mainEndTime = time.time()
totalrunTime = mainEndTime - mainStartTime
print "main.py ran:", totalrunTime
if args.plot3D:
plt.show()