-

LocalCascadeEnsemble · Aug 16, 2022 · 4cdc765 · 4cdc765
1 parent 3bdb1e4
commit 4cdc765
Show file tree

Hide file tree

Showing 11 changed files with 171 additions and 297 deletions.
diff --git a/doc/conf.py b/doc/conf.py
diff --git a/doc/quick_start.rst b/doc/quick_start.rst
diff --git a/doc/tutorial.rst b/doc/tutorial.rst
@@ -104,7 +104,7 @@ Code Examples
 =============
 
 The following examples illustrate the use of LCE on public datasets for a classification and a regression task.
-They also demonstrate the compatibility of LCE with scikit-learn pipelines and model selection tools through the use of ``cross_val_score`` and ``GridSearchCV``.
+They also demonstrate the compatibility of LCE with scikit-learn pipelines and model selection tools through the use of ``cross_val_score``.
 An example of LCE on a dataset including missing values is also shown.
 
 Classification
@@ -139,7 +139,7 @@ Classification
 
 
 - **Example 2: LCE with scikit-learn cross validation score**
-This example demonstrates the compatibility of LCE with scikit-learn model selection tools through the use of ``cross_val_score``.
+This example demonstrates the compatibility of LCE with scikit-learn pipelines and model selection tools through the use of ``cross_val_score``.
 
 .. code-block:: python
 
@@ -220,7 +220,7 @@ Regression
 	X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, random_state=0)
 
 	# Train LCERegressor with default parameters
-	reg = LCERegressor(n_jobs=-1, random_state=0)
+	reg = LCERegressor(n_jobs=-1, random_state=123)
 	reg.fit(X_train, y_train)
 
 	# Make prediction 
@@ -230,41 +230,7 @@ Regression
 	
 .. code-block::
 	
-	The mean squared error (MSE) on test set: 3556
-
-
-- **Example 5: LCE with scikit-learn best hyperparameter grid search**
-This example demonstrates the compatibility of LCE with scikit-learn model selection tools through the use of ``GridSearchCV``.
-
-.. code-block:: python
-
-	from lce import LCERegressor
-	from sklearn.datasets import load_diabetes
-	from sklearn.model_selection import train_test_split, GridSearchCV
-
-
-	# Load data and generate a train/test split
-	data = load_diabetes()
-	X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, random_state=0)
-
-	# Build LCERegressor with default parameters
-	reg = LCERegressor(n_jobs=-1, random_state=0)
-
-	# Define parameter ranges for grid search
-	params = {'n_estimators': list(range(10, 51, 20)),
-		  'max_depth': list(range(0, 3, 1))}
-
-	# Run scikit learn grid search 
-	grid_cv = GridSearchCV(reg, param_grid=params, cv=3, n_jobs=-1)
-	grid_cv.fit(X_train, y_train)
-
-	# Print best configuration
-	print("Best n_estimator: ", grid_cv.best_params_['n_estimators'],
-	      ", best max_depth: ", grid_cv.best_params_['max_depth'])
-	
-.. code-block::
-	
-	Best n_estimator:  30 , best max_depth:  1
+	The mean squared error (MSE) on test set: 3576
 	  
 
 
@@ -361,28 +327,6 @@ Python Source Files
    /auto_examples/lceregressor_diabetes
 
 
-.. raw:: html
-
-    <div class="sphx-glr-thumbcontainer" tooltip="LCERegressor on Diabetes Dataset with scikit-learn hyperparameter grid search">
-
-.. only:: html
-
- .. figure:: _images/logo_lce.svg
-     :alt: LCERegressor on Diabetes dataset with scikit-learn hyperparameter grid search
-
-     :ref:`sphx_glr_auto_examples_lceregressor_diabetes_gridsearchcv.py`
-
-.. raw:: html
-
-    </div>
-
-
-.. toctree::
-   :hidden:
-
-   /auto_examples/lceregressor_diabetes_gridsearchcv
-
-
 
 .. raw:: html
 

diff --git a/examples/lceclassifier_iris.py b/examples/lceclassifier_iris.py
@@ -14,7 +14,9 @@
 
 # Load data and generate a train/test split
 data = load_iris()
-X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(
+    data.data, data.target, random_state=0
+)
 
 # Train LCEClassifier with default parameters
 clf = LCEClassifier(n_jobs=-1, random_state=0)
@@ -23,4 +25,4 @@
 # Make prediction and compute accuracy score
 y_pred = clf.predict(X_test)
 accuracy = accuracy_score(y_test, y_pred)
-print("Accuracy: {:.1f}%".format(accuracy*100))
+print("Accuracy: {:.1f}%".format(accuracy * 100))
diff --git a/examples/lceclassifier_iris_cv.py b/examples/lceclassifier_iris_cv.py
@@ -12,12 +12,14 @@
 
 # Load data
 data = load_iris()
-X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(
+    data.data, data.target, random_state=0
+)
 
 # Set LCEClassifier with default parameters
 clf = LCEClassifier(n_jobs=-1, random_state=0)
 
 # Compute cross-validation scores
 cv_scores = cross_val_score(clf, X_train, y_train, cv=3)
-cv_scores = [round(elem*100, 1) for elem in cv_scores.tolist()]
-print("Cross-validation scores on train set: ", cv_scores)
+cv_scores = [round(elem * 100, 1) for elem in cv_scores.tolist()]
+print("Cross-validation scores on train set: ", cv_scores)
diff --git a/examples/lceclassifier_missing_iris.py b/examples/lceclassifier_missing_iris.py
@@ -15,20 +15,22 @@
 
 # Load data and generate a train/test split
 data = load_iris()
-X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(
+    data.data, data.target, random_state=0
+)
 
 # Input 20% of missing values per variable in the train set
 np.random.seed(0)
 m = 0.2
 for j in range(0, X_train.shape[1]):
-    sub = np.random.choice(X_train.shape[0], int(X_train.shape[0]*m))
+    sub = np.random.choice(X_train.shape[0], int(X_train.shape[0] * m))
     X_train[sub, j] = np.nan
 
 # Train LCEClassifier with default parameters
-clf = LCEClassifier(n_jobs=-1, random_state=123)
+clf = LCEClassifier(n_jobs=-1, random_state=0)
 clf.fit(X_train, y_train)
 
 # Make prediction and compute accuracy score
 y_pred = clf.predict(X_test)
 accuracy = accuracy_score(y_test, y_pred)
-print("Accuracy: {:.1f}%".format(accuracy*100))
+print("Accuracy: {:.1f}%".format(accuracy * 100))
diff --git a/examples/lceregressor_diabetes.py b/examples/lceregressor_diabetes.py
@@ -14,13 +14,15 @@
 
 # Load data and generate a train/test split
 data = load_diabetes()
-X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(
+    data.data, data.target, random_state=0
+)
 
 # Train LCERegressor with default parameters
-reg = LCERegressor(n_jobs=-1, random_state=0)
+reg = LCERegressor(n_jobs=-1, random_state=123)
 reg.fit(X_train, y_train)
 
-# Make prediction 
+# Make prediction
 y_pred = reg.predict(X_test)
 mse = mean_squared_error(y_test, reg.predict(X_test))
-print("The mean squared error (MSE) on test set: {:.0f}".format(mse))
+print("The mean squared error (MSE) on test set: {:.0f}".format(mse))
diff --git a/examples/lceregressor_diabetes_gridsearchcv.py b/examples/lceregressor_diabetes_gridsearchcv.py
diff --git a/lce/_version.py b/lce/_version.py
@@ -1 +1 @@
-__version__ = "0.2.7"
+__version__ = "0.2.8"
diff --git a/lce/_xgboost.py b/lce/_xgboost.py
@@ -211,17 +211,13 @@ def p_model(params):
         return scorer(clf, X, y)
 
     global best
-    global best_print
-    best = 0
-    best_print = 0
+    best = -np.inf
 
     def f(params):
         global best
-        global best_print
         perf = p_model(params)
         if perf > best:
             best = perf
-        best_print = best
         return {"loss": -best, "status": STATUS_OK}
 
     rstate = np.random.default_rng(random_state)
@@ -462,18 +458,14 @@ def p_model(params):
         return scorer(reg, X, y)
 
     global best
-    global best_print
-    best = 0
-    best_print = 0
+    best = -np.inf
 
     def f(params):
         global best
-        global best_print
         perf = p_model(params)
         if perf > best:
             best = perf
-        best_print = best
-        return {"loss": best, "status": STATUS_OK}
+        return {"loss": -best, "status": STATUS_OK}
 
     rstate = np.random.default_rng(random_state)
     best_config = fmin(