standardscaler

pyRiemann · gcattan · Nov 14, 2023 · Oct 18, 2023 · Oct 18, 2023 · Oct 18, 2023
commit 0d1fd197612c4757d5d407a7aa485b68a9adb581
diff --git a/examples/other_datasets/plot_financial_data.py b/examples/other_datasets/plot_financial_data.py
@@ -22,6 +22,7 @@
 from sklearn.model_selection import GridSearchCV
 from sklearn.pipeline import make_pipeline
 from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
 from sklearn.svm import SVC
 from imblearn.under_sampling import NearMiss
 from pyriemann.preprocessing import Whitening
@@ -81,6 +82,8 @@
 # of the `ToEpochs` transformer (see below)
 features["index"] = features.index
 
+# Apply a StandardScaler to the feature
+features_scaled = StandardScaler().fit_transform(features.to_numpy())
 
 ##############################################################################
 # Pipeline for binary classification
@@ -185,23 +188,19 @@ def transform(self, X):
 # Note: at this stage `features` also contains the `index` column.
 # So `NearMiss` we choose the closest 200 non-fraud epochs to the 200 fraud-epochs
 # based also on this `index` column. This should be improved for real use cases.
-X, y = NearMiss().fit_resample(features.to_numpy(), target.to_numpy())
+X, y = NearMiss().fit_resample(features_scaled, target.to_numpy())
 
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
 labels, counts = np.unique(y_train, return_counts=True)
 print(f"Training set shape: {X_train.shape}, genuine: {counts[0]}, frauds: {counts[1]}")
 
 labels, counts = np.unique(y_test, return_counts=True)
 print(f"Testing set shape: {X_test.shape}, genuine: {counts[0]}, frauds: {counts[1]}")
 
-# before fitting the GridSearchCV, let's display a sample of the epochs:
+# before fitting the GridSearchCV, let's display the "ERP" (see [3]_)
 epochs = ToEpochs(n=10).transform(X_train)
-print("Profile of an epoch:")
-print(epochs[0])
 
-# ...and the "ERP"
-# (see https://pyriemann.readthedocs.io/en/latest/auto_examples/ERP/plot_ERP.html)
 plot_waveforms(epochs, "hist")
 plt.show()
 
@@ -213,8 +212,9 @@ def transform(self, X):
 # Let's fit our GridSearchCV, to find the best hyper parameters
 gs.fit(X_train, y_train)
 
-# Print cross-validation results
-print(gs.cv_results_)
+# Print best parameters
+print("Best parameters are:")
+print(gs.best_params_)
 
 # This is the best score with the classical SVM.
 # (with this train/test split at least)
@@ -234,5 +234,8 @@ def transform(self, X):
 # ----------
 # .. [1] 'SUSPICIOUS ACTIVITY DETECTION USING QUANTUM COMPUTER',
 #         Patent application number: 18/380799
-# .. [2]  'Synthetic Data of Transactions for Inmediate Loans Fraud'
+# .. [2] 'Synthetic Data of Transactions for Inmediate Loans Fraud'
 #         https://zenodo.org/records/7418458
+# .. [3] https://pyriemann.readthedocs.io/en/latest/auto_examples/ERP/plot_ERP.html
+#
+#