Skip to content

Commit

Permalink
hotfix: Bug with target encoding (#1314)
Browse files Browse the repository at this point in the history
 Hotfix for bug in boostings implementation
  • Loading branch information
aPovidlo authored Jul 31, 2024
1 parent 81acea3 commit 0368e9c
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,31 +37,33 @@ def fit(self, input_data: InputData):
if self.params.get('use_eval_set'):
train_input, eval_input = train_test_data_setup(input_data)

train_input = self.convert_to_dataframe(train_input, identify_cats=self.params.get('enable_categorical'))
eval_input = self.convert_to_dataframe(eval_input, identify_cats=self.params.get('enable_categorical'))
X_train, y_train = self.convert_to_dataframe(
train_input, identify_cats=self.params.get('enable_categorical')
)

train_x, train_y = train_input.drop(columns=['target']), train_input['target']
eval_x, eval_y = eval_input.drop(columns=['target']), eval_input['target']
X_eval, y_eval = self.convert_to_dataframe(
eval_input, identify_cats=self.params.get('enable_categorical')
)

self.model.eval_metric = self.set_eval_metric(self.classes_)

self.model.fit(X=train_x, y=train_y, eval_set=[(eval_x, eval_y)], verbose=self.model_params['verbosity'])
self.model.fit(X=X_train, y=y_train, eval_set=[(X_eval, y_eval)], verbose=self.model_params['verbosity'])
else:
train_data = self.convert_to_dataframe(input_data, identify_cats=self.params.get('enable_categorical'))
X_train, y_train = self.convert_to_dataframe(
input_data, identify_cats=self.params.get('enable_categorical')
)
self.features_names = input_data.features_names
train_x, train_y = train_data.drop(columns=['target']), train_data['target']

self.model.fit(X=train_x, y=train_y, verbose=self.model_params['verbosity'])
self.model.fit(X=X_train, y=y_train, verbose=self.model_params['verbosity'])

return self.model

def predict(self, input_data: InputData):
if self.params.get('enable_categorical'):
input_data = input_data.get_not_encoded_data()

input_data = self.convert_to_dataframe(input_data, self.params.get('enable_categorical'))
train_x, _ = input_data.drop(columns=['target']), input_data['target']
prediction = self.model.predict(train_x)
X, _ = self.convert_to_dataframe(input_data, self.params.get('enable_categorical'))
prediction = self.model.predict(X)

return prediction

Expand Down Expand Up @@ -89,7 +91,7 @@ def plot_feature_importance(self, importance_type='weight'):
@staticmethod
def convert_to_dataframe(data: Optional[InputData], identify_cats: bool):
dataframe = pd.DataFrame(data=data.features)
dataframe['target'] = data.target
dataframe['target'] = np.ravel(data.target)

if identify_cats and data.categorical_idx is not None:
for col in dataframe.columns[data.categorical_idx]:
Expand All @@ -99,7 +101,7 @@ def convert_to_dataframe(data: Optional[InputData], identify_cats: bool):
for col in dataframe.columns[data.numerical_idx]:
dataframe[col] = dataframe[col].astype('float')

return dataframe
return dataframe.drop(columns=['target']), dataframe['target']

@staticmethod
def set_eval_metric(n_classes):
Expand Down Expand Up @@ -127,9 +129,8 @@ def predict_proba(self, input_data: InputData):
if self.params.get('enable_categorical'):
input_data = input_data.get_not_encoded_data()

input_data = self.convert_to_dataframe(input_data, self.params.get('enable_categorical'))
train_x = input_data.drop(columns=['target'])
prediction = self.model.predict_proba(train_x)
X, _ = self.convert_to_dataframe(input_data, self.params.get('enable_categorical'))
prediction = self.model.predict_proba(X)
return prediction


Expand Down Expand Up @@ -161,27 +162,30 @@ def fit(self, input_data: InputData):
if self.params.get('use_eval_set'):
train_input, eval_input = train_test_data_setup(input_data)

train_input = self.convert_to_dataframe(train_input, identify_cats=self.params.get('enable_categorical'))
eval_input = self.convert_to_dataframe(eval_input, identify_cats=self.params.get('enable_categorical'))
X_train, y_train = self.convert_to_dataframe(
train_input, identify_cats=self.params.get('enable_categorical')
)

train_x, train_y = train_input.drop(columns=['target']), train_input['target']
eval_x, eval_y = eval_input.drop(columns=['target']), eval_input['target']
X_eval, y_eval = self.convert_to_dataframe(
eval_input, identify_cats=self.params.get('enable_categorical')
)

eval_metric = self.set_eval_metric(self.classes_)
callbacks = self.update_callbacks()

self.model.fit(
X=train_x, y=train_y,
eval_set=[(eval_x, eval_y)], eval_metric=eval_metric,
X=X_train, y=y_train,
eval_set=[(X_eval, y_eval)], eval_metric=eval_metric,
callbacks=callbacks
)

else:
train_data = self.convert_to_dataframe(input_data, identify_cats=self.params.get('enable_categorical'))
train_x, train_y = train_data.drop(columns=['target']), train_data['target']
X_train, y_train = self.convert_to_dataframe(
input_data, identify_cats=self.params.get('enable_categorical')
)

self.model.fit(
X=train_x, y=train_y,
X=X_train, y=y_train,
)

return self.model
Expand All @@ -190,9 +194,8 @@ def predict(self, input_data: InputData):
if self.params.get('enable_categorical'):
input_data = input_data.get_not_encoded_data()

input_data = self.convert_to_dataframe(input_data, identify_cats=self.params.get('enable_categorical'))
train_x = input_data.drop(columns=['target'])
prediction = self.model.predict(train_x)
X, _ = self.convert_to_dataframe(input_data, identify_cats=self.params.get('enable_categorical'))
prediction = self.model.predict(X)

return prediction

Expand Down Expand Up @@ -228,7 +231,7 @@ def set_eval_metric(n_classes):
@staticmethod
def convert_to_dataframe(data: Optional[InputData], identify_cats: bool):
dataframe = pd.DataFrame(data=data.features, columns=data.features_names)
dataframe['target'] = data.target
dataframe['target'] = np.ravel(data.target)

if identify_cats and data.categorical_idx is not None:
for col in dataframe.columns[data.categorical_idx]:
Expand All @@ -238,7 +241,7 @@ def convert_to_dataframe(data: Optional[InputData], identify_cats: bool):
for col in dataframe.columns[data.numerical_idx]:
dataframe[col] = dataframe[col].astype('float')

return dataframe
return dataframe.drop(columns=['target']), dataframe['target']

def plot_feature_importance(self):
plot_feature_importance(self.features_names, self.model.feature_importances_)
Expand All @@ -258,9 +261,8 @@ def predict_proba(self, input_data: InputData):
if self.params.get('enable_categorical'):
input_data = input_data.get_not_encoded_data()

input_data = self.convert_to_dataframe(input_data, self.params.get('enable_categorical'))
train_x = input_data.drop(columns=['target'])
prediction = self.model.predict_proba(train_x)
X, _ = self.convert_to_dataframe(input_data, self.params.get('enable_categorical'))
prediction = self.model.predict_proba(X)
return prediction


Expand Down
1 change: 1 addition & 0 deletions fedot/preprocessing/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ def _prepare_obligatory_unimodal(self, data: InputData, source_name: str,
if is_fit_stage:
self._find_features_lacking_nans(data, source_name)
self._take_only_correct_features(data, source_name)

if is_fit_stage:
data = self._drop_rows_with_nan_in_target(data)

Expand Down

0 comments on commit 0368e9c

Please sign in to comment.