-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
80 lines (63 loc) · 2.94 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import time
import pickle
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from src import DataProcessing
from src.CorrelationMatrix import CorrMatrix
from src.FeatureImportance import featuring_importance
from src.DeployOut import get_csv_output
def suicide():
start = time.time()
# setting Path to the current working directory
path = os.getcwd() + "/models/"
# processing the data input by user
X_test = DataProcessing.process()
# creating a dictionary where binary blob of every model will be stored to their corresponding names
model_dictionary = {}
# model_name = ['knn', 'log', 'dt', 'rf', 'boost', 'bag', 'stack', 'rand_log', 'rand_knn', 'rand_dt','rand_rf', 'rand_ada','rand_bag','rand_stack']
# Creating a list of all the files in models directory
dir_list = os.listdir(path)
# iterating through the dir_list which contains filenames of all the saved models
for dir in dir_list:
if dir == "dnn_pkl" or dir == "test.py":
continue
# storing the binary blob and the model name to the model_dictionary
with open(path+dir, 'rb') as f:
model_name = dir.partition("p")[0]
model_dictionary[model_name] = pickle.load(f)
# defining the data directory, it will contain datafiles input by users
data_dir = os.getcwd() + "/data/"
# getting X_test as the input by user (y_test is temporary only for testing of accuracy scores)
# X_test = pd.read_csv(data_dir+"xtest", index_col=0)
# y_test = pd.read_csv(data_dir+"ytest", index_col=0)
# creating a list to store all the prediciton values made by our models
y_pred_class = []
# iterating through model_dictionary which contains all our saved models
for key in model_dictionary:
# appending the prediciton values to y_pred_class list
y_pred_class.append(model_dictionary[key].predict(X_test))
print(key)
# converting y_pred_class to a dataFrame to join it for Correlation Matrix
y_pred_class_df = pd.DataFrame(y_pred_class).transpose()
y_pred_class_df.columns = [key for key in model_dictionary]
print(y_pred_class_df)
# creating a list to store accuracy socres of our predictions (only for testing)
# acc_score = []
# iterating through y_pred_class and checking it with y_test to acertain the accuracy score
# and append it to the acc_score list
# for j in y_pred_class[0]:
# acc_score.append(accuracy_score(y_test, y_pred_class[j]))
# print(acc_score)
# creating a correlation matrix between features
combined_data = pd.concat([X_test, y_pred_class_df], axis=1, join='inner')
CorrMatrix(combined_data)
# FeatureImportance Graph
featuring_importance(X_test, y_pred_class_df)
# saving output in csv format
for key in model_dictionary:
get_csv_output(key, combined_data)
end = time.time()
print("Time Taken: ", end-start)
suicide()