-
Notifications
You must be signed in to change notification settings - Fork 0
/
cluster_descriptors.py
106 lines (76 loc) · 2.68 KB
/
cluster_descriptors.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from datetime import datetime
import cv,cv2
import numpy as np
import sys
import os
import tables
from scipy.cluster.vq import kmeans2 as scipy_kmeans
""" Clusters descriptors and build inverted index. Also updates the HDF file
with the results.
Usage: python cluster_descriptors.py video.avi 1000
Alternative usage:
python cluster_descriptors.py video.avi 1000 [--no-laplacian ][--scipy]
(not recommended) """
# -------------------------------
skip_laplacian = (len(sys.argv)>3 and '--no-laplacian' in sys.argv)
print 'Reading HDF file...'
d1 = datetime.now()
filename = '%s.surf.hdf' % os.path.basename(sys.argv[1])
f = tables.openFile(filename, 'r')
n_keypoints = f.root.descriptors.nrows
print n_keypoints
if skip_laplacian:
m=np.empty((n_keypoints,128) ,dtype=np.float32)
else:
m=np.empty((n_keypoints,129) ,dtype=np.float32)
if skip_laplacian:
for i,row in enumerate(f.root.descriptors):
m[i] = row
else:
for i,row in enumerate(f.root.descriptors):
m[i] = np.append(row, f.root.keypoints[i]["laplacian"]*100)
f.close()
del f
d2 = datetime.now()
print "Loading time was: %d.%d" % ((d2-d1).seconds, (d2-d1).microseconds)
# -------------------------------
K = int(sys.argv[2])
print 'Starting clusterig method...'
d1 = datetime.now()
if len(sys.argv)>3 and '--scipy' in sys.argv:
print "Scipy kmeans"
centroids, labels = scipy_kmeans(m, K, minit='points')
else:
print "Opencv kmeans"
samples = cv.fromarray(m)
labels = cv.CreateMat(samples.height, 1, cv.CV_32SC1)
# crit = (cv.CV_TERMCRIT_EPS + cv.CV_TERMCRIT_ITER, 10, 1.0)
crit = (cv.CV_TERMCRIT_ITER, 10, 0)
cv.KMeans2(samples, K, labels, crit)
d2 = datetime.now()
print "Elapsed time for %d clusters: %d.%d" % (K, (d2-d1).seconds, (d2-d1).microseconds)
print 'Updating HDF file with results...'
d1 = datetime.now()
labels = np.asarray(labels)
f = tables.openFile(filename, 'r+')
for i,row in enumerate(f.root.keypoints):
row['cluster']=labels[i]
row.update()
d2 = datetime.now()
print "Done: %d.%d" % ((d2-d1).seconds, (d2-d1).microseconds)
print 'Building inverted index...'
d1 = datetime.now()
try:
f.root.clusters._f_remove(recursive=1)
except tables.NoSuchNodeError:
pass
f.createGroup(f.root, 'clusters')
for cluster_id in range(K):
data = list(set(f.root.keypoints.readWhere('cluster==%d' % cluster_id, field='pos')))
filters = tables.Filters(complib='blosc', complevel=1)
descriptors = f.createCArray(f.root.clusters, "cluster_%d" % cluster_id, tables.Int32Atom(), (1,len(data)), filters=filters)
descriptors[:] = data[:]
d2 = datetime.now()
print "Building time was: %d.%d" % ((d2-d1).seconds, (d2-d1).microseconds)
import pdb; pdb.set_trace()
f.close()