Skip to content

Commit

Permalink
feat(Label_Ecoder):Support of Label Encoder in Multi Target Task
Browse files Browse the repository at this point in the history
Feature Added

Signed-off-by: Trishala Ahalpara <[email protected]>
  • Loading branch information
tahalpara committed Nov 6, 2023
1 parent 9734c07 commit 6bd5119
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 16 deletions.
25 changes: 19 additions & 6 deletions sapientml_core/templates/model_templates/model.py.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,34 @@ model = {{ model_name }}({{ silent }}random_state=random_state_model, {{ params
from sklearn.multioutput import MultiOutputRegressor

model = MultiOutputRegressor(model)

{% elif is_multioutput_classification %}
from sklearn.multioutput import MultiOutputClassifier

model = MultiOutputClassifier(model)
{% endif %}
{% set xgbclassifier = "XGBClassifier" %}
{% if model_name == xgbclassifier %}
{% if pipeline.task.target_columns|length == 1 %}

from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
target_train = pd.DataFrame(label_encoder.fit_transform(target_train), columns=TARGET_COLUMNS)
{% endif %}
{% if pipeline.task.target_columns|length == 1 %}
if target_train.select_dtypes(include=['object']).columns.any():
str_columns = target_train.select_dtypes(include=['object']).columns
label_encoder= LabelEncoder()
for col in str_columns:
target_train[col] = label_encoder.fit_transform(target_train[col])
target_test[col] = label_encoder.transform(target_test[col])

model.fit(feature_train, target_train.values.ravel())
{% else %}

from sklearn.preprocessing import LabelEncoder
if target_train.select_dtypes(include=['object']).columns.any():
str_columns = target_train.select_dtypes(include=['object']).columns
label_encoder= LabelEncoder()
for col in str_columns:
target_train[col] = label_encoder.fit_transform(target_train[col])
target_test[col] = label_encoder.transform(target_test[col])
model.fit(feature_train, target_train)
{% endif %}
y_pred = model.predict(feature_test)
Expand All @@ -45,4 +58,4 @@ y_pred = model.classes_[np.argmax(y_pred, axis=1)].reshape(-1, 1)
{% endif %}
{% if model_name == xgbclassifier and (not pipeline.adaptation_metric.startswith("MAP_")) and (not pipeline.adaptation_metric == "LogLoss") and (pipeline.adaptation_metric not in metric_needing_predict_proba) %}
y_pred = label_encoder.inverse_transform(y_pred).reshape(-1, 1)
{% endif %}
{% endif %}
38 changes: 30 additions & 8 deletions sapientml_core/templates/model_templates/model_train.py.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,47 @@ model = {{ model_name }}(random_state=random_state_model, {{ params }})
from sklearn.multioutput import MultiOutputRegressor

model = MultiOutputRegressor(model)

{% elif is_multioutput_classification %}
from sklearn.multioutput import MultiOutputClassifier

model = MultiOutputClassifier(model)
{% endif %}

{% set xgbclassifier = "XGBClassifier" %}
{% if model_name == xgbclassifier %}

{% if pipeline.task.target_columns|length == 1 %}
from sklearn.preprocessing import LabelEncoder
flag=0
if target_train.select_dtypes(include=['object']).columns.any():
str_columns = target_train.select_dtypes(include=['object']).columns
label_encoder= LabelEncoder()
flag=1
for col in str_columns:
target_train[col] = label_encoder.fit_transform(target_train[col])

label_encoder = LabelEncoder()
target_train = pd.DataFrame(label_encoder.fit_transform(target_train), columns=TARGET_COLUMNS)
with open('target_LabelEncoder.pkl', 'wb') as f:
pickle.dump(label_encoder, f)
if flag==1:
with open('target_LabelEncoder.pkl', 'wb') as f:
pickle.dump(label_encoder, f)
flag=0

{% endif %}
{% if pipeline.task.target_columns|length == 1 %}
model.fit(feature_train, target_train.values.ravel())
{% else %}
from sklearn.preprocessing import LabelEncoder
flag=0
if target_train.select_dtypes(include=['object']).columns.any():
str_columns = target_train.select_dtypes(include=['object']).columns
label_encoder= LabelEncoder()
flag=1
for col in str_columns:
target_train[col] = label_encoder.fit_transform(target_train[col])

if flag==1:
with open('target_LabelEncoder.pkl', 'wb') as f:
pickle.dump(label_encoder, f)
flag=0

model.fit(feature_train, target_train)
{% endif %}
with open('model.pkl', 'wb') as f:
pickle.dump(model, f)
pickle.dump(model, f)
17 changes: 15 additions & 2 deletions sapientml_core/templates/other_templates/evaluation.py.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,24 @@ for i, col in enumerate(target_test.columns):
one_acc = accuracy_score(target_test[col], y_pred[:, i:i+1])
__accs.append(one_acc)
print(f"RESULT: Accuracy : {str(sum(__accs)/len(__accs))}")
{% elif pipeline.adaptation_metric == macros.Metric.F1.value %}

{% elif (pipeline.adaptation_metric == macros.Metric.F1.value) and (not pipeline.is_multi_class_multi_targets) %}
from sklearn import metrics

f1 = metrics.f1_score(target_test, y_pred, average='macro')
print('RESULT: F1 Score: ' + str(f1))

{% elif pipeline.adaptation_metric == macros.Metric.F1.value and (pipeline.is_multi_class_multi_targets)%}
from sklearn import metrics
f1_scores = []

for i, col in enumerate(target_test.columns):
one_f1 = metrics.f1_score(target_test[col], y_pred[:, i:i+1], average='macro')
f1_scores.append(one_f1)

average_f1 = sum(f1_scores) / len(f1_scores)
print(f"RESULT: Average F1 score: {average_f1}")

{% elif pipeline.adaptation_metric == macros.Metric.R2.value %}
from sklearn import metrics

Expand Down Expand Up @@ -123,4 +136,4 @@ from sklearn import metrics

f1 = metrics.f1_score(target_test, y_pred, average='macro')
print('RESULT: F1 Score: ' + str(f1))
{% endif %}
{% endif %}

0 comments on commit 6bd5119

Please sign in to comment.