-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathundersampling_time.py
74 lines (57 loc) · 2.38 KB
/
undersampling_time.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from undersampling.opf_us1 import OpfUS1
from undersampling.opf_us2 import OpfUS2
from undersampling.opf_us3 import OpfUS3
from undersampling.opf_us import OpfUS
from common.common import COMMON
import os
import numpy as np
import sys
from time import time
def perform_under(**kwargs):
# Apply the undersampling according to the opf-us variant represented by the opf_us_obj
opf_us_obj = kwargs['us_obj']
X = kwargs['X']
y = kwargs['y']
X_test = kwargs['X_test']
y_test = kwargs['y_test']
f = kwargs['fold']
ds = kwargs['ds']
valid = kwargs['valid']
start_time = time()
X_res, y_res = opf_us_obj.fit_resample( X, y, valid)
end_time = time() - start_time
approach = opf_us_obj.__class__.__name__
common = COMMON()
common.saveTimeOnly(ds, f, approach, end_time, 'Results')
datasets = ['vertebral_column']
#paper uses 20 folds, so the next line runs as follows:
#folds = np.arange(1,21)
folds = np.arange(1,2)
# Objects for undersampling
opf_us1 = OpfUS1()
opf_us2 = OpfUS2()
opf_us3 = OpfUS3()
opf_us = OpfUS()
for dsds in range(len(datasets)):
ds = datasets[dsds]
for ff in range(len(folds)):
f = folds[ff]
train = np.loadtxt('data/{}/{}/train.txt'.format(ds,f),delimiter=',', dtype=np.float32)
valid = np.loadtxt('data/{}/{}/valid.txt'.format(ds,f),delimiter=',', dtype=np.float32)
test = np.loadtxt('data/{}/{}/test.txt'.format(ds,f),delimiter=',', dtype=np.float32)
concat = np.concatenate((train, valid))
X = concat[:,:-1]
Y = concat[:,-1].astype(np.int)
X_test = test[:,:-1]
Y_test = test[:,-1].astype(np.int)
pathDataset = 'data/{}/{}'.format(ds,f)
if not os.path.exists(pathDataset):
os.makedirs(pathDataset)
#main approach: remove samples from majority class until balancing the dataset
perform_under(us_obj=opf_us, X=X, y=Y, X_test=X_test, y_test=Y_test, fold=f, ds=ds,valid=valid)
#1st variant: remove samples from majority class with negative scores
perform_under(us_obj=opf_us1, X=X, y=Y, X_test=X_test, y_test=Y_test, fold=f, ds=ds,valid=valid)
#2st variant: remove samples from majority class with negative or zero scores
perform_under(us_obj=opf_us2, X=X, y=Y, X_test=X_test, y_test=Y_test, fold=f, ds=ds,valid=valid)
#3st variant: remove all samples with negative
perform_under(us_obj=opf_us3, X=X, y=Y, X_test=X_test, y_test=Y_test, fold=f, ds=ds,valid=valid)