From 70a382f2f6b5d49a9304962578cf4a0ec46e4cec Mon Sep 17 00:00:00 2001 From: Dominik Sauerer Date: Fri, 15 Mar 2024 10:38:55 +0100 Subject: [PATCH] Add examples --- .../control/ValidationRunController.py | 2 +- src/cpdbench/dataset/CPD2DFromFileDataset.py | 12 +++++-- src/cpdbench/dataset/CPD2DNdarrayDataset.py | 19 ----------- src/cpdbench/dataset/CPDNdarrayDataset.py | 25 ++++++++++++++ src/cpdbench/examples/ExampleAlgorithms.py | 12 +------ src/cpdbench/examples/ExampleDatasets.py | 10 +++--- src/cpdbench/examples/Example_Exception.py | 31 +++++++++++++++++ src/cpdbench/examples/Example_Parallelism.py | 33 ++++++++++++++++++ src/cpdbench/examples/Example_Validation.py | 34 +++++++++++++++++++ .../examples/Example_ValidationRuntime.py | 32 +++++++++++++++++ .../examples/configs/parametersConfig.yml | 1 - src/cpdbench/task/DatasetFetchTask.py | 2 +- src/cpdbench/task/TaskFactory.py | 2 +- tests/TODO | 3 -- 14 files changed, 174 insertions(+), 44 deletions(-) delete mode 100644 src/cpdbench/dataset/CPD2DNdarrayDataset.py create mode 100644 src/cpdbench/dataset/CPDNdarrayDataset.py create mode 100644 src/cpdbench/examples/Example_Exception.py create mode 100644 src/cpdbench/examples/Example_Parallelism.py create mode 100644 src/cpdbench/examples/Example_Validation.py create mode 100644 src/cpdbench/examples/Example_ValidationRuntime.py delete mode 100644 tests/TODO diff --git a/src/cpdbench/control/ValidationRunController.py b/src/cpdbench/control/ValidationRunController.py index 5f495e9..401bccf 100644 --- a/src/cpdbench/control/ValidationRunController.py +++ b/src/cpdbench/control/ValidationRunController.py @@ -60,5 +60,5 @@ def execute_run(self, methods: dict) -> CPDResult: list(map(lambda x: x.get_task_name(), tasks['metrics']))) for i in range(0, len(exception_list)): self._logger.info(f"Error {i}") - self._logger.exception(exception_list[i]) + self._logger.exception(exception_list[i], exc_info=exception_list[i]) return validation_result diff --git a/src/cpdbench/dataset/CPD2DFromFileDataset.py b/src/cpdbench/dataset/CPD2DFromFileDataset.py index 9d75d9d..531abd0 100644 --- a/src/cpdbench/dataset/CPD2DFromFileDataset.py +++ b/src/cpdbench/dataset/CPD2DFromFileDataset.py @@ -9,7 +9,7 @@ class CPD2DFromFileDataset(CPDDataset): into the main memory. Instead numpy will lazy load all needed data points. """ - def __init__(self, file_path: str, dtype: str, ground_truths: list[int]): + def __init__(self, file_path: str, dtype: str, ground_truths: list[int], validation_amount=-1): """Constructor :param file_path: The absolute or relative path to numpy file. :param dtype: The data type in which the numpy array was saved. @@ -19,12 +19,20 @@ def __init__(self, file_path: str, dtype: str, ground_truths: list[int]): self.dtype = dtype self._array = None self._ground_truths = ground_truths + self._validation_amount = validation_amount + def init(self) -> None: self._array = memmap(self.file_path, self.dtype, mode='r') + if self._validation_amount == -1: + self._validation_array = self._array[:] + else: + self._validation_array = self._array[0:self._validation_amount] + validation_array_length = self._validation_array.shape[0] + self._validation_ground_truths = [el for el in self._ground_truths if el < validation_array_length] def get_signal(self) -> tuple[ndarray, list[int]]: return self._array, self._ground_truths def get_validation_preview(self) -> tuple[ndarray, list[int]]: - return self._array, self._ground_truths + return self._validation_array, self._validation_ground_truths diff --git a/src/cpdbench/dataset/CPD2DNdarrayDataset.py b/src/cpdbench/dataset/CPD2DNdarrayDataset.py deleted file mode 100644 index 62dcb96..0000000 --- a/src/cpdbench/dataset/CPD2DNdarrayDataset.py +++ /dev/null @@ -1,19 +0,0 @@ -from numpy import ndarray - -from cpdbench.dataset.CPDDataset import CPDDataset - - -class CPD2DNdarrayDataset(CPDDataset): - - def get_validation_preview(self) -> tuple[ndarray, list[int]]: - return self._ndarray, self._ground_truths - - def __init__(self, numpy_array, ground_truths): - self._ndarray = numpy_array - self._ground_truths = ground_truths - - def init(self) -> None: - pass - - def get_signal(self) -> tuple[ndarray, list[int]]: - return self._ndarray, self._ground_truths diff --git a/src/cpdbench/dataset/CPDNdarrayDataset.py b/src/cpdbench/dataset/CPDNdarrayDataset.py new file mode 100644 index 0000000..88b5b19 --- /dev/null +++ b/src/cpdbench/dataset/CPDNdarrayDataset.py @@ -0,0 +1,25 @@ +from numpy import ndarray + +from cpdbench.dataset.CPDDataset import CPDDataset + + +class CPDNdarrayDataset(CPDDataset): + + def get_validation_preview(self) -> tuple[ndarray, list[int]]: + return self._validation_array, self._validation_ground_truths + + def __init__(self, numpy_array, ground_truths, validation_amount=-1): + self._ndarray = numpy_array + self._ground_truths = ground_truths + if validation_amount == -1: + self._validation_array = self._ndarray[:, :] + else: + self._validation_array = self._ndarray[:, 0:validation_amount] + validation_array_length = self._validation_array.shape[1] + self._validation_ground_truths = [el for el in self._ground_truths if el < validation_array_length] + + def init(self) -> None: + pass + + def get_signal(self) -> tuple[ndarray, list[int]]: + return self._ndarray, self._ground_truths diff --git a/src/cpdbench/examples/ExampleAlgorithms.py b/src/cpdbench/examples/ExampleAlgorithms.py index 2e63f3d..4d5894a 100644 --- a/src/cpdbench/examples/ExampleAlgorithms.py +++ b/src/cpdbench/examples/ExampleAlgorithms.py @@ -9,17 +9,7 @@ def numpy_array_accesses(dataset, array_indexes): return indexes, confidences -def algorithm_execute_single_esst(signal): - """Uses SST as implemented in the changepoynt library as algorithm.""" - detector = SST(90, method='rsvd') - sig = signal[0] - res = detector.transform(sig) - indexes = [res.argmax()] - confidences = [1.0] - return indexes, confidences - - -def algorithm_execute_single_esst(signal, window_length): +def algorithm_execute_single_esst(signal, window_length=90): """Uses SST as implemented in the changepoynt library as algorithm.""" detector = SST(window_length, method='rsvd') sig = signal[0] diff --git a/src/cpdbench/examples/ExampleDatasets.py b/src/cpdbench/examples/ExampleDatasets.py index 0d5b26b..4ba77ba 100644 --- a/src/cpdbench/examples/ExampleDatasets.py +++ b/src/cpdbench/examples/ExampleDatasets.py @@ -3,24 +3,24 @@ import numpy as np from cpdbench.dataset.CPD2DFromFileDataset import CPD2DFromFileDataset -from cpdbench.dataset.CPD2DNdarrayDataset import CPD2DNdarrayDataset +from cpdbench.dataset.CPDNdarrayDataset import CPDNdarrayDataset -def get_extreme_large_dataset_from_file(): +def get_extreme_large_dataset_from_file(validation_amount=-1): path = pathlib.Path(__file__).parent.resolve() path = path.joinpath("data", "very_big_numpy_file.dat") - dataset = CPD2DFromFileDataset(str(path), "float32", [5, 245, 255, 256, 25]) + dataset = CPD2DFromFileDataset(str(path), "float32", [5, 245, 255, 256, 25], validation_amount) return dataset def dataset_get_apple_dataset(): raw_data = np.load("../../../data/apple.npy") timeseries = raw_data[:, 0] reshaped_ts = np.reshape(timeseries, [1, timeseries.size]) - return CPD2DNdarrayDataset(reshaped_ts, [337]) + return CPDNdarrayDataset(reshaped_ts, [337]) def dataset_get_bitcoin_dataset(): raw_data = np.load("../../../data/bitcoin.npy") timeseries = raw_data[:, 0] reshaped_ts = np.reshape(timeseries, [1, timeseries.size]) - return CPD2DNdarrayDataset(reshaped_ts, [569]) \ No newline at end of file + return CPDNdarrayDataset(reshaped_ts, [569]) \ No newline at end of file diff --git a/src/cpdbench/examples/Example_Exception.py b/src/cpdbench/examples/Example_Exception.py new file mode 100644 index 0000000..e5d4d87 --- /dev/null +++ b/src/cpdbench/examples/Example_Exception.py @@ -0,0 +1,31 @@ +from cpdbench.CPDBench import CPDBench +import cpdbench.examples.ExampleDatasets as example_datasets +import cpdbench.examples.ExampleAlgorithms as example_algorithms +import cpdbench.examples.ExampleMetrics as example_metrics + +cpdb = CPDBench() + + +@cpdb.dataset +def get_apple_dataset(): + return example_datasets.dataset_get_apple_dataset() + + +@cpdb.dataset +def get_bitcoin_dataset(): + raise KeyError + return example_datasets.dataset_get_bitcoin_dataset() + + +@cpdb.algorithm +def execute_esst_test(signal): + return example_algorithms.algorithm_execute_single_esst(signal) + + +@cpdb.metric +def calc_accuracy(indexes, scores, ground_truth): + return example_metrics.metric_accuracy_in_allowed_windows(indexes, scores, ground_truth, window_size=25) + + +if __name__ == '__main__': + cpdb.start() diff --git a/src/cpdbench/examples/Example_Parallelism.py b/src/cpdbench/examples/Example_Parallelism.py new file mode 100644 index 0000000..5223dd1 --- /dev/null +++ b/src/cpdbench/examples/Example_Parallelism.py @@ -0,0 +1,33 @@ +from time import sleep + +from cpdbench.CPDBench import CPDBench +import cpdbench.examples.ExampleDatasets as example_datasets +import cpdbench.examples.ExampleAlgorithms as example_algorithms +import cpdbench.examples.ExampleMetrics as example_metrics + +cpdb = CPDBench() + + +@cpdb.dataset +def get_apple_dataset(): + sleep(10) + return example_datasets.dataset_get_apple_dataset() + + +@cpdb.dataset +def get_bitcoin_dataset(): + return example_datasets.dataset_get_bitcoin_dataset() + + +@cpdb.algorithm +def execute_esst_test(signal): + return example_algorithms.algorithm_execute_single_esst(signal) + + +@cpdb.metric +def calc_accuracy(indexes, scores, ground_truth): + return example_metrics.metric_accuracy_in_allowed_windows(indexes, scores, ground_truth, window_size=25) + + +if __name__ == '__main__': + cpdb.start() diff --git a/src/cpdbench/examples/Example_Validation.py b/src/cpdbench/examples/Example_Validation.py new file mode 100644 index 0000000..1e7ea0a --- /dev/null +++ b/src/cpdbench/examples/Example_Validation.py @@ -0,0 +1,34 @@ +from cpdbench.CPDBench import CPDBench +import cpdbench.examples.ExampleDatasets as example_datasets +import cpdbench.examples.ExampleAlgorithms as example_algorithms +import cpdbench.examples.ExampleMetrics as example_metrics + +cpdb = CPDBench() + + +@cpdb.dataset +def get_apple_dataset(): + return example_datasets.dataset_get_apple_dataset() + + +@cpdb.dataset +def get_bitcoin_dataset(): + return example_datasets.dataset_get_bitcoin_dataset() + + +@cpdb.algorithm +def execute_esst_test_wrong(signal, window): + return example_algorithms.algorithm_execute_single_esst(signal) + +@cpdb.algorithm +def execute_esst_test(signal): + return example_algorithms.algorithm_execute_single_esst(signal) + + +@cpdb.metric +def calc_accuracy(indexes, scores, ground_truth): + return example_metrics.metric_accuracy_in_allowed_windows(indexes, scores, ground_truth, window_size=25) + + +if __name__ == '__main__': + cpdb.start() diff --git a/src/cpdbench/examples/Example_ValidationRuntime.py b/src/cpdbench/examples/Example_ValidationRuntime.py new file mode 100644 index 0000000..fc206e9 --- /dev/null +++ b/src/cpdbench/examples/Example_ValidationRuntime.py @@ -0,0 +1,32 @@ +from cpdbench.examples import ExampleAlgorithms +from cpdbench.examples.ExampleDatasets import get_extreme_large_dataset_from_file +from cpdbench.examples.ExampleMetrics import metric_accuracy_in_allowed_windows +from cpdbench.CPDBench import CPDBench +import pathlib + +cpdb = CPDBench() + + +@cpdb.dataset +def get_large_dataset(): + return get_extreme_large_dataset_from_file(1000) + + +@cpdb.algorithm +def execute_algorithm(dataset): + dataset = dataset.reshape((1, dataset.size)) + res = ExampleAlgorithms.algorithm_execute_single_esst(dataset) + assert dataset.ndim == 3 + return res + + +@cpdb.metric +def compute_metric(indexes, confidences, ground_truths): + return metric_accuracy_in_allowed_windows(indexes, confidences, ground_truths, window_size=20) + + +if __name__ == '__main__': + path = pathlib.Path(__file__).parent.resolve() + path = path.joinpath("configs", "VeryLargeDatasetConfig.yml") + #cpdb.start(config_file=str(path)) + cpdb.validate(config_file=str(path)) diff --git a/src/cpdbench/examples/configs/parametersConfig.yml b/src/cpdbench/examples/configs/parametersConfig.yml index 57f48b3..2be9875 100644 --- a/src/cpdbench/examples/configs/parametersConfig.yml +++ b/src/cpdbench/examples/configs/parametersConfig.yml @@ -7,7 +7,6 @@ multiprocessing: True result: filename: cpdbench-result-parameters.json - user: algorithm-executions: - window_length: 90 diff --git a/src/cpdbench/task/DatasetFetchTask.py b/src/cpdbench/task/DatasetFetchTask.py index fa07213..cf84967 100644 --- a/src/cpdbench/task/DatasetFetchTask.py +++ b/src/cpdbench/task/DatasetFetchTask.py @@ -26,7 +26,7 @@ def validate_input(self, *args) -> CPDDataset: dataset.init() except Exception as e: raise DatasetValidationException(f"The validation of {get_name_of_function(self._function)} failed.") \ - from e # TODO: Funktioniert das noch? + from e else: return dataset diff --git a/src/cpdbench/task/TaskFactory.py b/src/cpdbench/task/TaskFactory.py index 9d28699..6c9f924 100644 --- a/src/cpdbench/task/TaskFactory.py +++ b/src/cpdbench/task/TaskFactory.py @@ -71,7 +71,7 @@ def create_tasks_with_parameters(self, function: Callable, task_type: TaskType) else: for i in range(len(param_values)): if param in global_params: - param_values[i].update({param: vals[0]}) # global param # TODO: was wenn param wo fehlt? + param_values[i].update({param: vals[0]}) # global param else: param_values[i].update({param: vals[i]}) # execution param diff --git a/tests/TODO b/tests/TODO deleted file mode 100644 index 08914c5..0000000 --- a/tests/TODO +++ /dev/null @@ -1,3 +0,0 @@ -Important: -- BenchConfig -//TODO: 2+ Runtime Parameter in einer Funktion?? \ No newline at end of file