From 70a382f2f6b5d49a9304962578cf4a0ec46e4cec Mon Sep 17 00:00:00 2001
From: Dominik Sauerer <dominik.sauerer@gmail.com>
Date: Fri, 15 Mar 2024 10:38:55 +0100
Subject: [PATCH] Add examples

---
 .../control/ValidationRunController.py        |  2 +-
 src/cpdbench/dataset/CPD2DFromFileDataset.py  | 12 +++++--
 src/cpdbench/dataset/CPD2DNdarrayDataset.py   | 19 -----------
 src/cpdbench/dataset/CPDNdarrayDataset.py     | 25 ++++++++++++++
 src/cpdbench/examples/ExampleAlgorithms.py    | 12 +------
 src/cpdbench/examples/ExampleDatasets.py      | 10 +++---
 src/cpdbench/examples/Example_Exception.py    | 31 +++++++++++++++++
 src/cpdbench/examples/Example_Parallelism.py  | 33 ++++++++++++++++++
 src/cpdbench/examples/Example_Validation.py   | 34 +++++++++++++++++++
 .../examples/Example_ValidationRuntime.py     | 32 +++++++++++++++++
 .../examples/configs/parametersConfig.yml     |  1 -
 src/cpdbench/task/DatasetFetchTask.py         |  2 +-
 src/cpdbench/task/TaskFactory.py              |  2 +-
 tests/TODO                                    |  3 --
 14 files changed, 174 insertions(+), 44 deletions(-)
 delete mode 100644 src/cpdbench/dataset/CPD2DNdarrayDataset.py
 create mode 100644 src/cpdbench/dataset/CPDNdarrayDataset.py
 create mode 100644 src/cpdbench/examples/Example_Exception.py
 create mode 100644 src/cpdbench/examples/Example_Parallelism.py
 create mode 100644 src/cpdbench/examples/Example_Validation.py
 create mode 100644 src/cpdbench/examples/Example_ValidationRuntime.py
 delete mode 100644 tests/TODO

diff --git a/src/cpdbench/control/ValidationRunController.py b/src/cpdbench/control/ValidationRunController.py
index 5f495e9..401bccf 100644
--- a/src/cpdbench/control/ValidationRunController.py
+++ b/src/cpdbench/control/ValidationRunController.py
@@ -60,5 +60,5 @@ def execute_run(self, methods: dict) -> CPDResult:
                                                 list(map(lambda x: x.get_task_name(), tasks['metrics'])))
         for i in range(0, len(exception_list)):
             self._logger.info(f"Error {i}")
-            self._logger.exception(exception_list[i])
+            self._logger.exception(exception_list[i], exc_info=exception_list[i])
         return validation_result
diff --git a/src/cpdbench/dataset/CPD2DFromFileDataset.py b/src/cpdbench/dataset/CPD2DFromFileDataset.py
index 9d75d9d..531abd0 100644
--- a/src/cpdbench/dataset/CPD2DFromFileDataset.py
+++ b/src/cpdbench/dataset/CPD2DFromFileDataset.py
@@ -9,7 +9,7 @@ class CPD2DFromFileDataset(CPDDataset):
     into the main memory. Instead numpy will lazy load all needed data points.
     """
 
-    def __init__(self, file_path: str, dtype: str, ground_truths: list[int]):
+    def __init__(self, file_path: str, dtype: str, ground_truths: list[int], validation_amount=-1):
         """Constructor
         :param file_path: The absolute or relative path to numpy file.
         :param dtype: The data type in which the numpy array was saved.
@@ -19,12 +19,20 @@ def __init__(self, file_path: str, dtype: str, ground_truths: list[int]):
         self.dtype = dtype
         self._array = None
         self._ground_truths = ground_truths
+        self._validation_amount = validation_amount
+
 
     def init(self) -> None:
         self._array = memmap(self.file_path, self.dtype, mode='r')
+        if self._validation_amount == -1:
+            self._validation_array = self._array[:]
+        else:
+            self._validation_array = self._array[0:self._validation_amount]
+        validation_array_length = self._validation_array.shape[0]
+        self._validation_ground_truths = [el for el in self._ground_truths if el < validation_array_length]
 
     def get_signal(self) -> tuple[ndarray, list[int]]:
         return self._array, self._ground_truths
 
     def get_validation_preview(self) -> tuple[ndarray, list[int]]:
-        return self._array, self._ground_truths
+        return self._validation_array, self._validation_ground_truths
diff --git a/src/cpdbench/dataset/CPD2DNdarrayDataset.py b/src/cpdbench/dataset/CPD2DNdarrayDataset.py
deleted file mode 100644
index 62dcb96..0000000
--- a/src/cpdbench/dataset/CPD2DNdarrayDataset.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from numpy import ndarray
-
-from cpdbench.dataset.CPDDataset import CPDDataset
-
-
-class CPD2DNdarrayDataset(CPDDataset):
-
-    def get_validation_preview(self) -> tuple[ndarray, list[int]]:
-        return self._ndarray, self._ground_truths
-
-    def __init__(self, numpy_array, ground_truths):
-        self._ndarray = numpy_array
-        self._ground_truths = ground_truths
-
-    def init(self) -> None:
-        pass
-
-    def get_signal(self) -> tuple[ndarray, list[int]]:
-        return self._ndarray, self._ground_truths
diff --git a/src/cpdbench/dataset/CPDNdarrayDataset.py b/src/cpdbench/dataset/CPDNdarrayDataset.py
new file mode 100644
index 0000000..88b5b19
--- /dev/null
+++ b/src/cpdbench/dataset/CPDNdarrayDataset.py
@@ -0,0 +1,25 @@
+from numpy import ndarray
+
+from cpdbench.dataset.CPDDataset import CPDDataset
+
+
+class CPDNdarrayDataset(CPDDataset):
+
+    def get_validation_preview(self) -> tuple[ndarray, list[int]]:
+        return self._validation_array, self._validation_ground_truths
+
+    def __init__(self, numpy_array, ground_truths, validation_amount=-1):
+        self._ndarray = numpy_array
+        self._ground_truths = ground_truths
+        if validation_amount == -1:
+            self._validation_array = self._ndarray[:, :]
+        else:
+            self._validation_array = self._ndarray[:, 0:validation_amount]
+        validation_array_length = self._validation_array.shape[1]
+        self._validation_ground_truths = [el for el in self._ground_truths if el < validation_array_length]
+
+    def init(self) -> None:
+        pass
+
+    def get_signal(self) -> tuple[ndarray, list[int]]:
+        return self._ndarray, self._ground_truths
diff --git a/src/cpdbench/examples/ExampleAlgorithms.py b/src/cpdbench/examples/ExampleAlgorithms.py
index 2e63f3d..4d5894a 100644
--- a/src/cpdbench/examples/ExampleAlgorithms.py
+++ b/src/cpdbench/examples/ExampleAlgorithms.py
@@ -9,17 +9,7 @@ def numpy_array_accesses(dataset, array_indexes):
     return indexes, confidences
 
 
-def algorithm_execute_single_esst(signal):
-    """Uses SST as implemented in the changepoynt library as algorithm."""
-    detector = SST(90, method='rsvd')
-    sig = signal[0]
-    res = detector.transform(sig)
-    indexes = [res.argmax()]
-    confidences = [1.0]
-    return indexes, confidences
-
-
-def algorithm_execute_single_esst(signal, window_length):
+def algorithm_execute_single_esst(signal, window_length=90):
     """Uses SST as implemented in the changepoynt library as algorithm."""
     detector = SST(window_length, method='rsvd')
     sig = signal[0]
diff --git a/src/cpdbench/examples/ExampleDatasets.py b/src/cpdbench/examples/ExampleDatasets.py
index 0d5b26b..4ba77ba 100644
--- a/src/cpdbench/examples/ExampleDatasets.py
+++ b/src/cpdbench/examples/ExampleDatasets.py
@@ -3,24 +3,24 @@
 import numpy as np
 
 from cpdbench.dataset.CPD2DFromFileDataset import CPD2DFromFileDataset
-from cpdbench.dataset.CPD2DNdarrayDataset import CPD2DNdarrayDataset
+from cpdbench.dataset.CPDNdarrayDataset import CPDNdarrayDataset
 
 
-def get_extreme_large_dataset_from_file():
+def get_extreme_large_dataset_from_file(validation_amount=-1):
     path = pathlib.Path(__file__).parent.resolve()
     path = path.joinpath("data", "very_big_numpy_file.dat")
-    dataset = CPD2DFromFileDataset(str(path), "float32", [5, 245, 255, 256, 25])
+    dataset = CPD2DFromFileDataset(str(path), "float32", [5, 245, 255, 256, 25], validation_amount)
     return dataset
 
 def dataset_get_apple_dataset():
     raw_data = np.load("../../../data/apple.npy")
     timeseries = raw_data[:, 0]
     reshaped_ts = np.reshape(timeseries, [1, timeseries.size])
-    return CPD2DNdarrayDataset(reshaped_ts, [337])
+    return CPDNdarrayDataset(reshaped_ts, [337])
 
 
 def dataset_get_bitcoin_dataset():
     raw_data = np.load("../../../data/bitcoin.npy")
     timeseries = raw_data[:, 0]
     reshaped_ts = np.reshape(timeseries, [1, timeseries.size])
-    return CPD2DNdarrayDataset(reshaped_ts, [569])
\ No newline at end of file
+    return CPDNdarrayDataset(reshaped_ts, [569])
\ No newline at end of file
diff --git a/src/cpdbench/examples/Example_Exception.py b/src/cpdbench/examples/Example_Exception.py
new file mode 100644
index 0000000..e5d4d87
--- /dev/null
+++ b/src/cpdbench/examples/Example_Exception.py
@@ -0,0 +1,31 @@
+from cpdbench.CPDBench import CPDBench
+import cpdbench.examples.ExampleDatasets as example_datasets
+import cpdbench.examples.ExampleAlgorithms as example_algorithms
+import cpdbench.examples.ExampleMetrics as example_metrics
+
+cpdb = CPDBench()
+
+
+@cpdb.dataset
+def get_apple_dataset():
+    return example_datasets.dataset_get_apple_dataset()
+
+
+@cpdb.dataset
+def get_bitcoin_dataset():
+    raise KeyError
+    return example_datasets.dataset_get_bitcoin_dataset()
+
+
+@cpdb.algorithm
+def execute_esst_test(signal):
+    return example_algorithms.algorithm_execute_single_esst(signal)
+
+
+@cpdb.metric
+def calc_accuracy(indexes, scores, ground_truth):
+    return example_metrics.metric_accuracy_in_allowed_windows(indexes, scores, ground_truth, window_size=25)
+
+
+if __name__ == '__main__':
+    cpdb.start()
diff --git a/src/cpdbench/examples/Example_Parallelism.py b/src/cpdbench/examples/Example_Parallelism.py
new file mode 100644
index 0000000..5223dd1
--- /dev/null
+++ b/src/cpdbench/examples/Example_Parallelism.py
@@ -0,0 +1,33 @@
+from time import sleep
+
+from cpdbench.CPDBench import CPDBench
+import cpdbench.examples.ExampleDatasets as example_datasets
+import cpdbench.examples.ExampleAlgorithms as example_algorithms
+import cpdbench.examples.ExampleMetrics as example_metrics
+
+cpdb = CPDBench()
+
+
+@cpdb.dataset
+def get_apple_dataset():
+    sleep(10)
+    return example_datasets.dataset_get_apple_dataset()
+
+
+@cpdb.dataset
+def get_bitcoin_dataset():
+    return example_datasets.dataset_get_bitcoin_dataset()
+
+
+@cpdb.algorithm
+def execute_esst_test(signal):
+    return example_algorithms.algorithm_execute_single_esst(signal)
+
+
+@cpdb.metric
+def calc_accuracy(indexes, scores, ground_truth):
+    return example_metrics.metric_accuracy_in_allowed_windows(indexes, scores, ground_truth, window_size=25)
+
+
+if __name__ == '__main__':
+    cpdb.start()
diff --git a/src/cpdbench/examples/Example_Validation.py b/src/cpdbench/examples/Example_Validation.py
new file mode 100644
index 0000000..1e7ea0a
--- /dev/null
+++ b/src/cpdbench/examples/Example_Validation.py
@@ -0,0 +1,34 @@
+from cpdbench.CPDBench import CPDBench
+import cpdbench.examples.ExampleDatasets as example_datasets
+import cpdbench.examples.ExampleAlgorithms as example_algorithms
+import cpdbench.examples.ExampleMetrics as example_metrics
+
+cpdb = CPDBench()
+
+
+@cpdb.dataset
+def get_apple_dataset():
+    return example_datasets.dataset_get_apple_dataset()
+
+
+@cpdb.dataset
+def get_bitcoin_dataset():
+    return example_datasets.dataset_get_bitcoin_dataset()
+
+
+@cpdb.algorithm
+def execute_esst_test_wrong(signal, window):
+    return example_algorithms.algorithm_execute_single_esst(signal)
+
+@cpdb.algorithm
+def execute_esst_test(signal):
+    return example_algorithms.algorithm_execute_single_esst(signal)
+
+
+@cpdb.metric
+def calc_accuracy(indexes, scores, ground_truth):
+    return example_metrics.metric_accuracy_in_allowed_windows(indexes, scores, ground_truth, window_size=25)
+
+
+if __name__ == '__main__':
+    cpdb.start()
diff --git a/src/cpdbench/examples/Example_ValidationRuntime.py b/src/cpdbench/examples/Example_ValidationRuntime.py
new file mode 100644
index 0000000..fc206e9
--- /dev/null
+++ b/src/cpdbench/examples/Example_ValidationRuntime.py
@@ -0,0 +1,32 @@
+from cpdbench.examples import ExampleAlgorithms
+from cpdbench.examples.ExampleDatasets import get_extreme_large_dataset_from_file
+from cpdbench.examples.ExampleMetrics import metric_accuracy_in_allowed_windows
+from cpdbench.CPDBench import CPDBench
+import pathlib
+
+cpdb = CPDBench()
+
+
+@cpdb.dataset
+def get_large_dataset():
+    return get_extreme_large_dataset_from_file(1000)
+
+
+@cpdb.algorithm
+def execute_algorithm(dataset):
+    dataset = dataset.reshape((1, dataset.size))
+    res = ExampleAlgorithms.algorithm_execute_single_esst(dataset)
+    assert dataset.ndim == 3
+    return res
+
+
+@cpdb.metric
+def compute_metric(indexes, confidences, ground_truths):
+    return metric_accuracy_in_allowed_windows(indexes, confidences, ground_truths, window_size=20)
+
+
+if __name__ == '__main__':
+    path = pathlib.Path(__file__).parent.resolve()
+    path = path.joinpath("configs", "VeryLargeDatasetConfig.yml")
+    #cpdb.start(config_file=str(path))
+    cpdb.validate(config_file=str(path))
diff --git a/src/cpdbench/examples/configs/parametersConfig.yml b/src/cpdbench/examples/configs/parametersConfig.yml
index 57f48b3..2be9875 100644
--- a/src/cpdbench/examples/configs/parametersConfig.yml
+++ b/src/cpdbench/examples/configs/parametersConfig.yml
@@ -7,7 +7,6 @@ multiprocessing: True
 result:
   filename: cpdbench-result-parameters.json
 
-
 user:
   algorithm-executions:
     - window_length: 90
diff --git a/src/cpdbench/task/DatasetFetchTask.py b/src/cpdbench/task/DatasetFetchTask.py
index fa07213..cf84967 100644
--- a/src/cpdbench/task/DatasetFetchTask.py
+++ b/src/cpdbench/task/DatasetFetchTask.py
@@ -26,7 +26,7 @@ def validate_input(self, *args) -> CPDDataset:
             dataset.init()
         except Exception as e:
             raise DatasetValidationException(f"The validation of {get_name_of_function(self._function)} failed.") \
-                from e # TODO: Funktioniert das noch?
+                from e
         else:
             return dataset
 
diff --git a/src/cpdbench/task/TaskFactory.py b/src/cpdbench/task/TaskFactory.py
index 9d28699..6c9f924 100644
--- a/src/cpdbench/task/TaskFactory.py
+++ b/src/cpdbench/task/TaskFactory.py
@@ -71,7 +71,7 @@ def create_tasks_with_parameters(self, function: Callable, task_type: TaskType)
             else:
                 for i in range(len(param_values)):
                     if param in global_params:
-                        param_values[i].update({param: vals[0]})  # global param # TODO: was wenn param wo fehlt?
+                        param_values[i].update({param: vals[0]})  # global param
                     else:
                         param_values[i].update({param: vals[i]})  # execution param
 
diff --git a/tests/TODO b/tests/TODO
deleted file mode 100644
index 08914c5..0000000
--- a/tests/TODO
+++ /dev/null
@@ -1,3 +0,0 @@
-Important:
-- BenchConfig
-//TODO: 2+ Runtime Parameter in einer Funktion??
\ No newline at end of file