-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpreparation.py
122 lines (101 loc) · 4.29 KB
/
preparation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# Import the pySIFT code.
import sys
sys.path.append("pySift")
from pySift import sift, matching
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.cluster import k_means
#
# Your goal here: Get SIFT features from training images and cluster them!
# Define the number of clusters.
# Load the file containing the training images.
trainimages = [line.strip().split(" ")[0] for
line in open("trainset-overview.txt", "r")]
print "There are", len(trainimages), "training images"
"""
Given an image, the function will calculate the Hes- and HarSiftpoints and
return them sift1. point1 contains the locations of the sift points in the
original image.
"""
def extract_sift(trainimage):
sigma = 1.0
hespoints = sift.computeHes(trainimage, sigma, magThreshold=15,
hesThreshold=10, NMSneighborhood=10)
harpoints = sift.computeHar(trainimage, sigma, magThreshold=5,
NMSneighborhood=10)
allpoints = np.concatenate((hespoints, harpoints))
point1, sift1 = sift.computeSIFTofPoints(trainimage, allpoints, sigma,
nrOrientBins=8, nrSpatBins=4,
nrPixPerBin=4)
return sift1
# Put the SIFT features from all training images in this variable.
trainpoints = []
for i in xrange(len(trainimages)):
# Extract point locations from the image using your selected point method
#and parameters.
# Compute the SIFT features.
sift1 = extract_sift(trainimages[i])
trainpoints.extend(sift1)
# Cluster the SIFT features and put them in a matrix with the name 'clusters'!
print "Clustering..."
def cluster_data(features, k, nr_iter=25):
centroids = k_means(features, n_clusters=k, max_iter=nr_iter)[0]
return centroids
# Specify the number of clusters to use.
k = 300
# Cluster the synthetic data defined previously.
clusters = cluster_data(trainpoints, k)
print 'done!'
"""Simple function to calculate Euclidean distance"""
def euclidean_distance(x, y):
assert(len(x) == len(y))
return np.sum((x-y)**2)**.5
"""Simple function which will calculate the distances between a single image,
'a', and the X matrix of images."""
def distances(a,X,distance_fn=euclidean_distance):
#Return a list of distances between vector a, and each row of X
#USE distance_fn to calculate distances. eg: some_dist = distance_fn(a,b)
#We create an array to store the distances in
dists = np.zeros(X.shape[0])
for i in range(X.shape[0]):
dist = distance_fn(a, X[i])
dists[i] = dist
return dists
# You can store the histogram results in the following:
# This is the length of your histogram vector.
size_of_histograms = k #equal to the number of clusters!
train_feat = np.zeros((len(trainimages), size_of_histograms))
for i, image in enumerate(trainimages):
image_sift = extract_sift(image)
for point in image_sift:
x = [euclidean_distance(point, clust) for clust in clusters]
clusterin = np.argmin(x)
train_feat[i][clusterin] += 1
print 'done!'
np.save('train_feat.npy',train_feat)
# Go through the SIFTs of every image and create a histogram for the image
# relative to the clusters you discovered in the previous phase.
###Do the same as before, but now for the validation set ###
#Training ground truth labels
train_labels = np.array([int(line.strip().split(" ")[1]) for
line in open("trainset-overview.txt", "r")])
#Validation images
valimages = [line.split(' ')[0] for line in open('valset-overview.txt','r')]
#Validation ground truth labels
val_labels = np.array([int(line.rstrip().split(' ')[1]) for
line in open('valset-overview.txt','r')])
#Calculate the histogram representations for the validation images
val_feat = np.zeros((len(valimages), size_of_histograms))
for i, image in enumerate(valimages):
image_sift = extract_sift(image)
for point in image_sift:
x = [euclidean_distance(point, clust) for clust in clusters]
clusterin = np.argmin(x)
val_feat[i][clusterin] += 1
print 'done!'
#to store the train and validation histograms. This allows us to play around
#with feature selection, while saving the changes. Does overwrite itself, so
#be careful!
np.save('val_feat.npy',val_feat)