diff --git a/sapientml_core/adaptation/generation/pipeline_template.py b/sapientml_core/adaptation/generation/pipeline_template.py index 6d2a557..dea1b93 100644 --- a/sapientml_core/adaptation/generation/pipeline_template.py +++ b/sapientml_core/adaptation/generation/pipeline_template.py @@ -189,11 +189,12 @@ def generate(self): # Adding Shap Visualization data tpl = env.get_template("other_templates/shap.py.jinja") - pipeline.pipeline_json["shap"]["code"] = self._render(tpl, pipeline=pipeline) + pipeline.pipeline_json["shap"]["code"] = self._render(tpl, pipeline=pipeline, model_name=model_name) tpl = env.get_template("other_templates/prediction_result.py.jinja") - pipeline.pipeline_json["output_prediction"]["code"] = self._render(tpl, pipeline=pipeline, macros=macros) - pipeline.pipeline_json["output_prediction"]["code_test"] = self._render(tpl, pipeline=pipeline, macros=macros) + pipeline.pipeline_json["output_prediction"]["code"] = self._render( + tpl, pipeline=pipeline, model_name=model_name, macros=macros + ) if flag_hyperparameter_tuning: tpl = env.get_template("model_templates/hyperparameters.py.jinja") @@ -220,14 +221,10 @@ def generate(self): if pipeline.adaptation_metric and ( pipeline.adaptation_metric in macros.metric_needing_predict_proba or pipeline.adaptation_metric.startswith(macros.Metric.MAP_K.value) - or pipeline.config.predict_option == macros.PRED_PROBABILITY ): pipeline.pipeline_json["evaluation"]["code_test"] = pipeline.pipeline_json["evaluation"][ "code_test" ].replace("y_pred", "y_prob") - pipeline.pipeline_json["output_prediction"]["code_test"] = pipeline.pipeline_json["output_prediction"][ - "code_test" - ].replace("y_pred", "y_prob") if pipeline.config.permutation_importance: tpl = env.get_template("other_templates/permutation_importance.py.jinja") @@ -517,6 +514,7 @@ def populate_model(self): is_multioutput_regression=_is_multioutput_regression, is_multioutput_classification=_is_multioutput_classification, metric_needing_predict_proba=macros.metric_needing_predict_proba, + macros=macros, ) snippet_test = self._render( tpl_test, @@ -541,9 +539,8 @@ def populate_model(self): tpl = env.get_template("model_templates/classification_post_process.jinja") snippet += "\n" + self._render(tpl, pipeline=pipeline) - snippet_predict = snippet_predict.replace("predict", "predict_proba") tpl_predict = env.get_template("model_templates/classification_post_process.jinja") - snippet_predict += "\n" + self._render(tpl_predict, pipeline=pipeline) + snippet_predict += "\n" + self._render(tpl_predict, pipeline=pipeline).replace("y_pred", "y_prob") tpl_test = env.get_template("model_templates/classification_post_process.jinja") snippet_test += "\n" + self._render(tpl_test, pipeline=pipeline).replace("y_pred", "y_prob") diff --git a/sapientml_core/params.py b/sapientml_core/params.py index b9de483..aacf73d 100644 --- a/sapientml_core/params.py +++ b/sapientml_core/params.py @@ -67,8 +67,8 @@ class SapientMLConfig(Config): Ignored when hyperparameter_tuning is False. hyperparameter_tuning_random_state: int, default 1023 Random seed for hyperparameter tuning. - predict_option: Literal["default", "probability"], default "default" - Specify predict method (default: predict(), probability: predict_proba().) + predict_option: Literal["default", "probability", None], default None + Specify predict method (default: predict(), probability: predict_proba(), None: Comply with metric requirements.) permutation_importance: bool, default True On/Off of outputting permutation importance calculation code. add_explanation: bool, default False @@ -84,7 +84,7 @@ class SapientMLConfig(Config): hyperparameter_tuning_n_trials: int = 10 hyperparameter_tuning_timeout: int = 0 hyperparameter_tuning_random_state: int = 1023 - predict_option: Literal["default", "probability"] = "default" + predict_option: Optional[Literal["default", "probability"]] = None permutation_importance: bool = True add_explanation: bool = False diff --git a/sapientml_core/templates/model_templates/model_predict.py.jinja b/sapientml_core/templates/model_templates/model_predict.py.jinja index 1a913f1..8ff7ab4 100644 --- a/sapientml_core/templates/model_templates/model_predict.py.jinja +++ b/sapientml_core/templates/model_templates/model_predict.py.jinja @@ -5,13 +5,16 @@ import numpy as np with open('model.pkl', 'rb') as f: model = pickle.load(f) +{% if (pipeline.adaptation_metric not in macros.metric_needing_predict_proba) or (pipeline.config.predict_option == macros.PRED_DEFAULT) %} y_pred = model.predict(feature_test) -{% if flag_predict_proba and (not pipeline.adaptation_metric.startswith("MAP_")) and (not pipeline.adaptation_metric == "LogLoss") and (pipeline.adaptation_metric not in metric_needing_predict_proba) %} -y_pred = model.classes_[np.argmax(y_pred, axis=1)].reshape(-1, 1) {% endif %} -{% if model_name == xgbclassifier and (not pipeline.adaptation_metric.startswith("MAP_")) and (not pipeline.adaptation_metric == "LogLoss") and (pipeline.adaptation_metric not in metric_needing_predict_proba) %} +{% if pipeline.adaptation_metric and flag_predict_proba %} +y_prob = model.predict_proba(feature_test) +{% endif %} +{% if model_name == xgbclassifier %} with open('target_LabelEncoder.pkl', 'rb') as f: label_encoder = pickle.load(f) - +{% endif %} +{% if model_name == xgbclassifier and ((pipeline.adaptation_metric not in macros.metric_needing_predict_proba) or (pipeline.config.predict_option == macros.PRED_DEFAULT)) %} y_pred = label_encoder.inverse_transform(y_pred).reshape(-1, 1) {% endif %} \ No newline at end of file diff --git a/sapientml_core/templates/model_templates/model_test.py.jinja b/sapientml_core/templates/model_templates/model_test.py.jinja index ba75f29..621dbf5 100644 --- a/sapientml_core/templates/model_templates/model_test.py.jinja +++ b/sapientml_core/templates/model_templates/model_test.py.jinja @@ -43,12 +43,6 @@ y_pred = model.predict(feature_test) {% if pipeline.adaptation_metric and flag_predict_proba %} y_prob = model.predict_proba(feature_test) {% endif %} -{% if flag_predict_proba and (not pipeline.adaptation_metric.startswith("MAP_")) and (not pipeline.adaptation_metric == "LogLoss") and (pipeline.adaptation_metric not in metric_needing_predict_proba) %} -y_prob = model.classes_[np.argmax(y_prob, axis=1)].reshape(-1, 1) -{% endif %} -{% if flag_predict_proba and model_name == xgbclassifier and (not pipeline.adaptation_metric.startswith("MAP_")) and (not pipeline.adaptation_metric == "LogLoss") and (pipeline.adaptation_metric not in metric_needing_predict_proba) %} -y_pred = label_encoder.inverse_transform(y_pred).reshape(-1, 1) -y_prob = label_encoder.inverse_transform(y_prob).reshape(-1, 1) -{% elif model_name == xgbclassifier and (not pipeline.adaptation_metric.startswith("MAP_")) and (not pipeline.adaptation_metric == "LogLoss") and (pipeline.adaptation_metric not in metric_needing_predict_proba) %} +{% if model_name == xgbclassifier %} y_pred = label_encoder.inverse_transform(y_pred).reshape(-1, 1) {% endif %} \ No newline at end of file diff --git a/sapientml_core/templates/other_templates/prediction_result.py.jinja b/sapientml_core/templates/other_templates/prediction_result.py.jinja index 12fb011..b7d2c1d 100644 --- a/sapientml_core/templates/other_templates/prediction_result.py.jinja +++ b/sapientml_core/templates/other_templates/prediction_result.py.jinja @@ -1,11 +1,20 @@ # OUTPUT PREDICTION -{% if pipeline.task.is_multiclass == True and pipeline.adaptation_metric == "LogLoss"%} -prediction = pd.DataFrame(y_pred, columns=model.classes_, index=feature_test.index) +{% set xgbclassifier = "XGBClassifier" %} +{% if pipeline.config.predict_option == macros.PRED_PROBABILITY and model_name == xgbclassifier and pipeline.task.is_multiclass == True and (pipeline.adaptation_metric in macros.metrics_for_classification) and (not pipeline.adaptation_metric.startswith("MAP_"))%} +prediction = pd.DataFrame(y_prob, columns=label_encoder.inverse_transform(model.classes_), index=feature_test.index) +{% elif pipeline.config.predict_option == macros.PRED_PROBABILITY and pipeline.task.is_multiclass == True and (pipeline.adaptation_metric in macros.metrics_for_classification) and (not pipeline.adaptation_metric.startswith("MAP_"))%} +prediction = pd.DataFrame(y_prob, columns=model.classes_, index=feature_test.index) +{% elif pipeline.config.predict_option == macros.PRED_PROBABILITY and (pipeline.adaptation_metric in macros.metrics_for_classification) and (not pipeline.adaptation_metric.startswith("MAP_"))%} +prediction = pd.DataFrame(y_prob, columns=TARGET_COLUMNS, index=feature_test.index) +{% elif pipeline.config.predict_option is none and model_name == xgbclassifier and pipeline.task.is_multiclass == True and (pipeline.adaptation_metric in macros.metric_needing_predict_proba) and (not pipeline.adaptation_metric.startswith("MAP_"))%} +prediction = pd.DataFrame(y_prob, columns=label_encoder.inverse_transform(model.classes_), index=feature_test.index) +{% elif pipeline.config.predict_option is none and pipeline.task.is_multiclass == True and (pipeline.adaptation_metric in macros.metric_needing_predict_proba) and (not pipeline.adaptation_metric.startswith("MAP_"))%} +prediction = pd.DataFrame(y_prob, columns=model.classes_, index=feature_test.index) +{% elif pipeline.config.predict_option is none and (pipeline.adaptation_metric in macros.metric_needing_predict_proba) and (not pipeline.adaptation_metric.startswith("MAP_"))%} +prediction = pd.DataFrame(y_prob, columns=TARGET_COLUMNS, index=feature_test.index) {% elif pipeline.adaptation_metric.startswith("MAP_") %} {% set k = pipeline.adaptation_metric.split("_")[1] %} -prediction = pd.DataFrame(y_pred, columns=[TARGET_COLUMNS[0] + "_" +str(i) for i in range(1, y_pred.shape[1] + 1)], index=feature_test.index) -{% elif pipeline.task.is_multiclass == True and (pipeline.adaptation_metric in ["auc", "ROC_AUC", "Gini"]) %} -prediction = pd.DataFrame(model.classes_[np.argmax(y_pred, axis=1)], columns=TARGET_COLUMNS, index=feature_test.index) +prediction = pd.DataFrame(y_prob, columns=[TARGET_COLUMNS[0] + "_" +str(i) for i in range(1, y_prob.shape[1] + 1)], index=feature_test.index) {% else %} prediction = pd.DataFrame(y_pred, columns=TARGET_COLUMNS, index=feature_test.index) {% endif %} diff --git a/sapientml_core/templates/other_templates/shap.py.jinja b/sapientml_core/templates/other_templates/shap.py.jinja index e5ffd0d..86365cf 100644 --- a/sapientml_core/templates/other_templates/shap.py.jinja +++ b/sapientml_core/templates/other_templates/shap.py.jinja @@ -1,9 +1,14 @@ # Models are restricted because of execution time. +{% set lgbmclassifier = "LGBMClassifier" %} models_for_shap = ['XGBClassifier', 'XGBRegressor', 'LGBMClassifier', 'LGBMRegressor', 'GradientBoostingClassifier', 'GradientBoostingRegressor'] if model.__class__.__name__ in models_for_shap: import shap feature_shap = feature_train.sample(1000) if feature_train.shape[0] > 1000 else feature_train - explainer = shap.Explainer(model, feature_shap) +{% if model_name == lgbmclassifier %} + explainer = shap.Explainer(model,feature_shap) +{% else %} + explainer = shap.Explainer(model) +{% endif %} shap_values = explainer(feature_shap) # summarize the effects of all the features diff --git a/sapientml_core/templates/pipeline_test.py.jinja b/sapientml_core/templates/pipeline_test.py.jinja index 29f346e..5850baa 100644 --- a/sapientml_core/templates/pipeline_test.py.jinja +++ b/sapientml_core/templates/pipeline_test.py.jinja @@ -66,14 +66,14 @@ if set(TARGET_COLUMNS).issubset(test_dataset.columns.tolist()): {% endif %} {% if 'output_prediction' in pipeline_json %} -{{ pipeline_json['output_prediction']['code_test'] }} +{{ pipeline_json['output_prediction']['code'] }} {% endif %} {% if 'permutation_importance' in pipeline_json %} {{ pipeline_json['permutation_importance']['code'] }} {% endif %} -{% if 'shap' in pipeline_json %} +{% if 'shap' in pipeline_json and not pipeline.task.is_multiclass %} {{ pipeline_json['shap']['code'] }} {% endif %} \ No newline at end of file