Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Universal ExperimentAnalyzer #181

Merged
merged 22 commits into from
Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Binary file added examples/experiment_analyzer/data.tar.gz
Binary file not shown.
78 changes: 78 additions & 0 deletions examples/experiment_analyzer/experiment_analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import os
import tarfile

import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu, kruskal, ttest_ind

from experiments.experiment_analyzer import ExperimentAnalyzer
from golem.core.paths import project_root


if __name__ == '__main__':
""" The result of analysis can be seen without running the script in
'~/GOLEM/examples/experiment_analyzer/result_analysis.tar.gz'. """

path_to_root = os.path.join(project_root(), 'examples', 'experiment_analyzer')

# extract data if there is an archive
if 'data.tar.gz' in os.listdir(path_to_root):
tar = tarfile.open(os.path.join(path_to_root, 'data.tar.gz'), "r:gz")
tar.extractall()
tar.close()

path_to_experiment_data = os.path.join(path_to_root, 'data')
path_to_save = os.path.join(path_to_root, 'result_analysis')

analyzer = ExperimentAnalyzer(path_to_root=path_to_experiment_data, folders_to_ignore=['result_analysis',
'Thumbs.db'])

# to get convergence table with mean values
path_to_save_convergence = os.path.join(path_to_save, 'convergence')

convergence_mean = analyzer.analyze_convergence(history_folder='histories', is_raise=False,
path_to_save=path_to_save_convergence,
is_mean=True)

# to get convergence boxplots
convergence = analyzer.analyze_convergence(history_folder='histories', is_raise=False)
path_to_save_convergence_boxplots = os.path.join(path_to_save_convergence, 'convergence_boxplots')

for dataset in convergence[list(convergence.keys())[0]].keys():
to_compare = dict()
for setup in convergence.keys():
to_compare[setup] = [i for i in convergence[setup][dataset]]
plt.boxplot(list(to_compare.values()), labels=list(to_compare.keys()))
plt.title(f'Convergence on {dataset}')
os.makedirs(path_to_save_convergence_boxplots, exist_ok=True)
plt.savefig(os.path.join(path_to_save_convergence_boxplots, f'convergence_{dataset}.png'))
plt.close()

# to get metrics table with mean values
path_to_save_metrics = os.path.join(path_to_save, 'metrics')
metric_names = ['roc_auc', 'f1']
metrics_dict_mean = analyzer.analyze_metrics(metric_names=metric_names, file_name='evaluation_results.csv',
is_raise=False, path_to_save=path_to_save_metrics,
is_mean=True)

# to get metrics boxplots
metrics_dict = analyzer.analyze_metrics(metric_names=metric_names, file_name='evaluation_results.csv',
is_raise=False)
path_to_save_metrics_boxplots = os.path.join(path_to_save_metrics, 'metrics_boxplot')

for metric in metric_names:
for dataset in metrics_dict[metric][list(metrics_dict[metric].keys())[0]].keys():
to_compare = dict()
for setup in metrics_dict[metric].keys():
to_compare[setup] = [-1 * i for i in metrics_dict[metric][setup][dataset]]
plt.boxplot(list(to_compare.values()), labels=list(to_compare.keys()))
plt.title(f'{metric} on {dataset}')
cur_path_to_save = os.path.join(path_to_save_metrics_boxplots, metric)
os.makedirs(cur_path_to_save, exist_ok=True)
plt.savefig(os.path.join(cur_path_to_save, f'{metric}_{dataset}.png'))
plt.close()

# to get stat test results table
path_to_save_stat = os.path.join(path_to_save, 'statistic')
stat_dict = analyzer.analyze_statistical_significance(data_to_analyze=metrics_dict['roc_auc'],
stat_tests=[mannwhitneyu, kruskal, ttest_ind],
path_to_save=path_to_save_stat)
Binary file not shown.
322 changes: 322 additions & 0 deletions experiments/experiment_analyzer.py

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions golem/core/dag/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ def show(self, save_path: Optional[Union[PathLike, str]] = None, engine: Optiona
node_color: Optional[NodeColorType] = None, dpi: Optional[int] = None,
node_size_scale: Optional[float] = None, font_size_scale: Optional[float] = None,
edge_curvature_scale: Optional[float] = None,
title: Optional[str] = None,
nodes_labels: Dict[int, str] = None, edges_labels: Dict[int, str] = None):
"""Visualizes graph or saves its picture to the specified ``path``

Expand All @@ -217,13 +218,15 @@ def show(self, save_path: Optional[Union[PathLike, str]] = None, engine: Optiona
font_size_scale: use to make font size bigger or lesser. Supported only for the engine 'matplotlib'.
edge_curvature_scale: use to make edges more or less curved. Supported only for the engine 'matplotlib'.
dpi: DPI of the output image. Not supported for the engine 'pyvis'.
title: title for plot
nodes_labels: labels to display near nodes
edges_labels: labels to display near edges
"""
GraphVisualizer(graph=self)\
.visualise(save_path=save_path, engine=engine, node_color=node_color, dpi=dpi,
node_size_scale=node_size_scale, font_size_scale=font_size_scale,
edge_curvature_scale=edge_curvature_scale,
title=title,
nodes_labels=nodes_labels, edges_labels=edges_labels)

@property
Expand Down
6 changes: 4 additions & 2 deletions golem/visualisation/graph_viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def visualise(self, save_path: Optional[PathType] = None, engine: Optional[str]
node_color: Optional[NodeColorType] = None, dpi: Optional[int] = None,
node_size_scale: Optional[float] = None,
font_size_scale: Optional[float] = None, edge_curvature_scale: Optional[float] = None,
title: Optional[str] = None,
nodes_labels: Dict[int, str] = None, edges_labels: Dict[int, str] = None):
engine = engine or self.get_predefined_value('engine')

Expand All @@ -67,7 +68,7 @@ def visualise(self, save_path: Optional[PathType] = None, engine: Optional[str]
self.__draw_with_networkx(save_path=save_path, node_color=node_color, dpi=dpi,
node_size_scale=node_size_scale, font_size_scale=font_size_scale,
edge_curvature_scale=edge_curvature_scale,
nodes_labels=nodes_labels, edges_labels=edges_labels)
title=title, nodes_labels=nodes_labels, edges_labels=edges_labels)
elif engine == 'pyvis':
self.__draw_with_pyvis(save_path, node_color)
elif engine == 'graphviz':
Expand Down Expand Up @@ -166,7 +167,7 @@ def __draw_with_networkx(self, save_path: Optional[PathType] = None,
node_color: Optional[NodeColorType] = None,
dpi: Optional[int] = None, node_size_scale: Optional[float] = None,
font_size_scale: Optional[float] = None, edge_curvature_scale: Optional[float] = None,
graph_to_nx_convert_func: Optional[Callable] = None,
graph_to_nx_convert_func: Optional[Callable] = None, title: Optional[str] = None,
nodes_labels: Dict[int, str] = None, edges_labels: Dict[int, str] = None):
save_path = save_path or self.get_predefined_value('save_path')
node_color = node_color or self.get_predefined_value('node_color')
Expand All @@ -180,6 +181,7 @@ def __draw_with_networkx(self, save_path: Optional[PathType] = None,
fig, ax = plt.subplots(figsize=(7, 7))
fig.set_dpi(dpi)

plt.title(title)
self.draw_nx_dag(ax, node_color, node_size_scale, font_size_scale, edge_curvature_scale,
graph_to_nx_convert_func, nodes_labels, edges_labels)
if not save_path:
Expand Down
4 changes: 3 additions & 1 deletion golem/visualisation/opt_history/fitness_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,11 +281,13 @@ def __init__(self,

def visualize(self,
save_path: Optional[Union[os.PathLike, str]] = None,
with_confidence: bool = True,
metric_id: int = 0,
dpi: Optional[int] = None):
""" Visualizes the best fitness values during the evolution in the form of line.
:param save_path: path to save the visualization. If set, then the image will be saved,
and if not, it will be displayed.
:param with_confidence: bool param specifying to use confidence interval or not.
:param metric_id: numeric index of the metric to visualize (for multi-objective opt-n).
:param dpi: DPI of the output figure.
"""
Expand All @@ -294,7 +296,7 @@ def visualize(self,

fig, ax = plt.subplots(figsize=(6.4, 4.8), facecolor='w')
xlabel = 'Generation'
self.plot_multiple_fitness_lines(ax, metric_id)
self.plot_multiple_fitness_lines(ax, metric_id, with_confidence)
setup_fitness_plot(ax, xlabel)
plt.legend()
show_or_save_figure(fig, save_path, dpi)
Expand Down