-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(Label_Ecoder):Support of Label Encoder in Multi Target Task #29
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,21 +22,34 @@ model = {{ model_name }}({{ silent }}random_state=random_state_model, {{ params | |
from sklearn.multioutput import MultiOutputRegressor | ||
|
||
model = MultiOutputRegressor(model) | ||
|
||
{% elif is_multioutput_classification %} | ||
from sklearn.multioutput import MultiOutputClassifier | ||
|
||
model = MultiOutputClassifier(model) | ||
{% endif %} | ||
{% set xgbclassifier = "XGBClassifier" %} | ||
{% if model_name == xgbclassifier %} | ||
{% if pipeline.task.target_columns|length == 1 %} | ||
|
||
from sklearn.preprocessing import LabelEncoder | ||
|
||
label_encoder = LabelEncoder() | ||
target_train = pd.DataFrame(label_encoder.fit_transform(target_train), columns=TARGET_COLUMNS) | ||
{% endif %} | ||
{% if pipeline.task.target_columns|length == 1 %} | ||
if target_train.select_dtypes(include=['object']).columns.any(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't want to show this if-statement to users. I think, in |
||
str_columns = target_train.select_dtypes(include=['object']).columns | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
label_encoder= LabelEncoder() | ||
for col in str_columns: | ||
target_train[col] = label_encoder.fit_transform(target_train[col]) | ||
target_test[col] = label_encoder.transform(target_test[col]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You need not transform |
||
|
||
model.fit(feature_train, target_train.values.ravel()) | ||
{% else %} | ||
|
||
from sklearn.preprocessing import LabelEncoder | ||
if target_train.select_dtypes(include=['object']).columns.any(): | ||
str_columns = target_train.select_dtypes(include=['object']).columns | ||
label_encoder= LabelEncoder() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think label_encoders = {}
for col in str_columns:
label_encoder= LabelEncoder()
target_train[col] = label_encoder.fit_transform(target_train[col])
target_test[col] = label_encoder.transform(target_test[col])
label_encoders[col] = label_encoder |
||
for col in str_columns: | ||
target_train[col] = label_encoder.fit_transform(target_train[col]) | ||
target_test[col] = label_encoder.transform(target_test[col]) | ||
model.fit(feature_train, target_train) | ||
{% endif %} | ||
y_pred = model.predict(feature_test) | ||
|
@@ -45,4 +58,4 @@ y_pred = model.classes_[np.argmax(y_pred, axis=1)].reshape(-1, 1) | |
{% endif %} | ||
{% if model_name == xgbclassifier and (not pipeline.adaptation_metric.startswith("MAP_")) and (not pipeline.adaptation_metric == "LogLoss") and (pipeline.adaptation_metric not in metric_needing_predict_proba) %} | ||
y_pred = label_encoder.inverse_transform(y_pred).reshape(-1, 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you also implement inverse operation for the multicolumn targets? You would need to use |
||
{% endif %} | ||
{% endif %} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,25 +17,47 @@ model = {{ model_name }}(random_state=random_state_model, {{ params }}) | |
from sklearn.multioutput import MultiOutputRegressor | ||
|
||
model = MultiOutputRegressor(model) | ||
|
||
{% elif is_multioutput_classification %} | ||
from sklearn.multioutput import MultiOutputClassifier | ||
|
||
model = MultiOutputClassifier(model) | ||
{% endif %} | ||
|
||
{% set xgbclassifier = "XGBClassifier" %} | ||
{% if model_name == xgbclassifier %} | ||
|
||
{% if pipeline.task.target_columns|length == 1 %} | ||
from sklearn.preprocessing import LabelEncoder | ||
flag=0 | ||
if target_train.select_dtypes(include=['object']).columns.any(): | ||
str_columns = target_train.select_dtypes(include=['object']).columns | ||
label_encoder= LabelEncoder() | ||
flag=1 | ||
for col in str_columns: | ||
target_train[col] = label_encoder.fit_transform(target_train[col]) | ||
|
||
label_encoder = LabelEncoder() | ||
target_train = pd.DataFrame(label_encoder.fit_transform(target_train), columns=TARGET_COLUMNS) | ||
with open('target_LabelEncoder.pkl', 'wb') as f: | ||
pickle.dump(label_encoder, f) | ||
if flag==1: | ||
with open('target_LabelEncoder.pkl', 'wb') as f: | ||
pickle.dump(label_encoder, f) | ||
flag=0 | ||
|
||
{% endif %} | ||
{% if pipeline.task.target_columns|length == 1 %} | ||
model.fit(feature_train, target_train.values.ravel()) | ||
{% else %} | ||
from sklearn.preprocessing import LabelEncoder | ||
flag=0 | ||
if target_train.select_dtypes(include=['object']).columns.any(): | ||
str_columns = target_train.select_dtypes(include=['object']).columns | ||
label_encoder= LabelEncoder() | ||
flag=1 | ||
for col in str_columns: | ||
target_train[col] = label_encoder.fit_transform(target_train[col]) | ||
|
||
if flag==1: | ||
with open('target_LabelEncoder.pkl', 'wb') as f: | ||
pickle.dump(label_encoder, f) | ||
flag=0 | ||
Comment on lines
+46
to
+58
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is not preferable to show the
|
||
|
||
model.fit(feature_train, target_train) | ||
{% endif %} | ||
with open('model.pkl', 'wb') as f: | ||
pickle.dump(model, f) | ||
pickle.dump(model, f) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As you mentioned, the target is not (0,1,2,...) and the model is XGBClassifier, an error is raised.
So, we want to use LabelEncoder when
or,