-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTrainConf.py
135 lines (121 loc) · 3.07 KB
/
TrainConf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
from ml_package import *
### Load the Data needed for the process ###
proc = process.init_import()
X, y = proc["Train"]
col_details, run_details = proc["Configs"]
# Optional add columns you wanted binned
bin_cols = []
# Prevent Dim Reduction
run_details["group_funcs"] = False
# Create the train flow
train_flow = []
# if bin columns are determined add binning
if len(bin_cols) > 0:
train_flow.append(
(
"Binning",
{
"transformer": binning.Info_Gain_Discretizer(),
"fit": {
"Input": [bin_cols],
"Append": ["y"],
},
},
)
)
# Add Encoding step for the train flow
train_flow.append(
(
"Encoding",
{
"transformer": encoding.Upper_Dimension_Encoder(),
"fit": {
"Input": [
ce.OneHotEncoder(use_cat_names=True),
pd.Index(col_details["Categoric"] + ["Pclass"]).difference(
run_details["high_cardinality"]
),
]
},
},
)
)
# Add imputation step
train_flow.append(
(
"Imputation",
{
"transformer": imputation.Impute_Transformer(),
"fit": {
"Input": [
SimpleImputer(strategy="mean"),
SimpleImputer(strategy="most_frequent"),
col_details["Continious"],
],
"Append": ["noncont"],
},
},
)
)
# Add encoding for Categoric Columns with High Cardinality
train_flow.append(
(
"High_Cardinality_Encoding",
{
"transformer": encoding.Categorical_Encoder(supervised=True),
"fit": {
"Input": [
ce.LeaveOneOutEncoder(),
run_details["high_cardinality"],
],
"Append": ["y"],
},
},
)
)
# Add Centering step for the data
train_flow.append(
(
"Centering",
{
"transformer": custom_comps.Center_Transformer(),
"fit": {
"Input": [
col_details["Continious"],
],
},
},
)
)
# Add Scaling step for the data
train_flow.append(
(
"Scaling",
{
"transformer": custom_comps.Scaling_Transformer(),
"fit": {
"Input": [
RobustScaler(),
col_details["Continious"],
],
},
},
)
)
# if needed add dimensionality reduction
if run_details["group_funcs"]:
train_flow.append(
(
"Dimensionality Reduction",
{
"transformer": custom_comps.PCA_Transformer(),
"fit": {
"Input": [
proc["groups"],
],
},
},
)
)
with open("Config/TrainConf.pickle", "wb") as file:
pickle.dump(train_flow, file)