-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSVM_IL.py
151 lines (135 loc) · 4.77 KB
/
SVM_IL.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import numpy as np
from sklearn.svm import SVC
from random import shuffle
from sklearn.model_selection import GridSearchCV
################## Tools ##################
def ProcessData(Data, Label):
"""
:param Data: Set of covariates
:param Label: Label for the classification
:return: A data and a label set transformed according to the methodology presented by Herbrich et al., with a random
inversion of the order of tuple preference (i.e. some u_k > v_k are seen as v_k > u_k and should be classified as
-1 instead of 1, the others should be classified as 1).
"""
Data_ = []
Label_ = []
if len(Label) % 2 != 0:
index = np.arange(len(Label) - 1)
else:
index = np.arange(len(Label))
shuffle(index)
splits = np.split(index, 2)
for index, pref in enumerate(Label):
if index in splits[0]:
Data_.append(
Data[pref[0]] - Data[pref[1]]
)
Label_.append(
1
)
else:
Data_.append(
Data[pref[1]] - Data[pref[0]]
)
Label_.append(
-1
)
return Data_, Label_
def ExpansionData(Data, Label):
"""
:param Data: Set of covariates
:param Label: Label for the classification
:return: A data and a label set fully transformed according to the methodology presented by Har-Peled et al.
"""
Data_ = []
Label_ = []
for pref in Label:
Data_.extend([
np.append(Data[pref[0]], (-1)*Data[pref[1]]),
np.append(Data[pref[1]], (-1) * Data[pref[0]])
])
Label_.extend([
1, -1
])
return Data_, Label_
def randomTest(Data, Label):
"""
:param Data: Set of covariates
:param Label: Label for the classification
:return: A data and a label set fully transformed according to the methodology presented by Har-Peled et al. Each
sample results in only one sample (randomly seen as (u_k>v_k,1) or (v_k>u_k,-1)).
"""
Data_ = []
Label_ = []
if len(Label) % 2 != 0:
index = np.arange(len(Label) - 1)
else:
index = np.arange(len(Label))
shuffle(index)
splits = np.split(index, 2)
for index, pref in enumerate(Label):
if index in splits[0]:
Data_.append(
np.append(Data[pref[0]], (-1) * Data[pref[1]])
)
Label_.append(
1
)
else:
Data_.append(
np.append(Data[pref[1]], (-1) * Data[pref[0]])
)
Label_.append(
-1
)
return Data_, Label_
################## Herbrich ##################
class SVM_InstancePref:
def __init__(self, inputs, K, C):
if not isinstance(K, list) and not isinstance(K, np.ndarray):
self.classifier = SVC(gamma=K, C=C)
else:
parameters = {'C': C, 'gamma': K}
self.classifier = GridSearchCV(SVC(), parameters, cv=5)
self.X = inputs[0]
self.D = inputs[1]
self.Data, self.Label = ProcessData(self.X, self.D)
def fit(self):
self.classifier.fit(self.Data, self.Label)
try:
print("(Herbrich) SVM CV-Hyperparameters: K={gamma} and C={C}".format(**self.classifier.best_params_))
except AttributeError:
pass
def score(self, Data=None, Label=None, train=True):
if train:
predic = []
for pref, lab in zip(self.D, self.Label):
predic.append(self.classifier.predict([lab*(self.X[pref[0]]-self.X[pref[1]])])[0])
return np.mean([predic[i] == self.Label[i] for i in range(len(predic))])
else:
Data_, Label_ = ProcessData(Data, Label)
preds = self.classifier.score(Data_, Label_)
return preds
################## Har-Peled ##################
class CCSVM_IL:
def __init__(self, inputs, K, C):
if not isinstance(K, list) and not isinstance(K, np.ndarray):
self.classifier = SVC(gamma=K, C=C)
else:
parameters = {'C': C, 'gamma': K}
self.classifier = GridSearchCV(SVC(), parameters, cv=5)
self.X = inputs[0]
self.D = inputs[1]
self.Data, self.Label = ExpansionData(self.X, self.D)
def fit(self):
self.classifier.fit(self.Data, self.Label)
try:
print("(Har_Peled) SVM CV-Hyperparameters: K={gamma} and C={C}".format(**self.classifier.best_params_))
except AttributeError:
pass
def score(self, Data=None, Label=None, train=True):
if train:
Data, Label = self.X, self.D
Data_, Label_ = randomTest(Data, Label)
preds = self.classifier.score(Data_, Label_)
return preds