diff --git a/docs/api.rst b/docs/api.rst index fca405cec..d655ecc78 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -38,6 +38,8 @@ Evaluation and plot manager.plot_synchronized_curves manager.compare_agents + manager.tensorboard_to_dataframe + Agents ==================== diff --git a/docs/basics/userguide/export_training_data.md b/docs/basics/userguide/export_training_data.md index cda123422..8b4bde404 100644 --- a/docs/basics/userguide/export_training_data.md +++ b/docs/basics/userguide/export_training_data.md @@ -1,6 +1,6 @@ (export_training_data)= -# How to export data about training? +# How to export/import data (rlberry data, tensorboard data, ...)? ## How to extract data from the WriterData? @@ -163,3 +163,524 @@ In the default writer you have the following information : - dw_time_elapsed : Time elapsed since writer initialization - global_step : Step at which the value was added. - n_simu : Added by {mod}`rlberry.manager.read_writer_data`, n_simu is an integer identifying the agent (if you use fit>1, you will have information on more than 1 agent in your writer.) + + +## How to import data from tensorboard? + +Maybe you want to use other tools to train your agents, but you want to use rlberry tools for visualisation and/or statistical tests. +If your training is compatible with tensorboard, you can load the data in a pandas dataframes to use them in rlberry. To do that, you can use the tool [tensorboard_to_dataframe](rlberry.manager.tensorboard_to_dataframe). +There are two input formats for the tensorboard data : + +### Option 1: via a directory +Be careful about this 2 things: +- The folder containing tensorboard results must respect the following tree structure : + `` +- You must have only one file (event.out.tfenvent.xxx) by leaf folder(n_simu), only the first one would be imported ! + +For instance, suppose you do the following training with stablebaseline, and log with tensorboard : + +```python +from stable_baselines3 import PPO +from stable_baselines3 import A2C + +log_path = "./log" +path_ppo = str(log_path + "/PPO_cartpole/") +path_a2c = str(log_path + "/A2C_cartpole/") + +model = PPO("MlpPolicy", "CartPole-v1", tensorboard_log=path_ppo) +model2 = A2C("MlpPolicy", "CartPole-v1", tensorboard_log=path_a2c) +model2_seed2 = A2C("MlpPolicy", "CartPole-v1", tensorboard_log=path_a2c) +model.learn(total_timesteps=5_000, tb_log_name="ppo") +model2.learn(total_timesteps=5_000, tb_log_name="A2C") +model2_seed2.learn(total_timesteps=5_000, tb_log_name="A2C") +``` + + +Then, to convert these logs in a pandas dataframe, you can use the tool [tensorboard_to_dataframe](rlberry.manager.tensorboard_to_dataframe). +It will give you a `Dict` with all the scalar data from the tensorboad folder. + + +- The `keys` will be the "tag" (the name of the measure) +- the `values` will be the `dataframe` with 4 columns : ["name", "n_simu", "x", "y"] +(respectively "name of the algorithm", "seed number", "step number", and "measure value" ) + + +```python +from rlberry.manager import tensorboard_to_dataframe + +data_in_dataframe = tensorboard_to_dataframe(log_path) + +print(data_in_dataframe.keys()) +print("-----------") +print(data_in_dataframe) +``` + +```none +dict_keys(['rollout/ep_len_mean', 'rollout/ep_rew_mean', 'time/fps', 'train/approx_kl', 'train/clip_fraction', 'train/clip_range', 'train/entropy_loss', 'train/explained_variance', 'train/learning_rate', 'train/loss', 'train/policy_gradient_loss', 'train/value_loss', 'train/policy_loss']) +----------- +{'rollout/ep_len_mean': name n_simu x y +0 PPO_cartpole ppo_1 2048 22.898876 +1 PPO_cartpole ppo_1 4096 26.700001 +2 PPO_cartpole ppo_1 6144 36.810001 +3 A2C_cartpole A2C_1 500 40.090908 +4 A2C_cartpole A2C_1 1000 45.900002 +5 A2C_cartpole A2C_1 1500 50.724136 +6 A2C_cartpole A2C_1 2000 53.567566 +7 A2C_cartpole A2C_1 2500 55.266666 +8 A2C_cartpole A2C_1 3000 58.666668 +9 A2C_cartpole A2C_1 3500 61.018520 +10 A2C_cartpole A2C_1 4000 68.589287 +11 A2C_cartpole A2C_1 4500 73.114754 +12 A2C_cartpole A2C_1 5000 74.424240 +13 A2C_cartpole A2C_2 500 23.619047 +14 A2C_cartpole A2C_2 1000 23.951220 +15 A2C_cartpole A2C_2 1500 27.865385 +16 A2C_cartpole A2C_2 2000 33.000000 +17 A2C_cartpole A2C_2 2500 38.140625 +18 A2C_cartpole A2C_2 3000 43.405796 +19 A2C_cartpole A2C_2 3500 45.890411 +20 A2C_cartpole A2C_2 4000 49.720001 +21 A2C_cartpole A2C_2 4500 56.139240 +22 A2C_cartpole A2C_2 5000 60.402439, + + 'rollout/ep_rew_mean': name n_simu x y +0 PPO_cartpole ppo_1 2048 22.898876 +1 PPO_cartpole ppo_1 4096 26.700001 +2 PPO_cartpole ppo_1 6144 36.810001 +3 A2C_cartpole A2C_1 500 40.090908 +4 A2C_cartpole A2C_1 1000 45.900002 +5 A2C_cartpole A2C_1 1500 50.724136 +6 A2C_cartpole A2C_1 2000 53.567566 +7 A2C_cartpole A2C_1 2500 55.266666 +8 A2C_cartpole A2C_1 3000 58.666668 +9 A2C_cartpole A2C_1 3500 61.018520 +10 A2C_cartpole A2C_1 4000 68.589287 +11 A2C_cartpole A2C_1 4500 73.114754 +12 A2C_cartpole A2C_1 5000 74.424240 +13 A2C_cartpole A2C_2 500 23.619047 +14 A2C_cartpole A2C_2 1000 23.951220 +15 A2C_cartpole A2C_2 1500 27.865385 +16 A2C_cartpole A2C_2 2000 33.000000 +17 A2C_cartpole A2C_2 2500 38.140625 +18 A2C_cartpole A2C_2 3000 43.405796 +19 A2C_cartpole A2C_2 3500 45.890411 +20 A2C_cartpole A2C_2 4000 49.720001 +21 A2C_cartpole A2C_2 4500 56.139240 +22 A2C_cartpole A2C_2 5000 60.402439, + + 'time/fps': name n_simu x y +0 PPO_cartpole ppo_1 2048 3431.0 +1 PPO_cartpole ppo_1 4096 2396.0 +2 PPO_cartpole ppo_1 6144 2156.0 +3 A2C_cartpole A2C_1 500 1595.0 +4 A2C_cartpole A2C_1 1000 1614.0 +5 A2C_cartpole A2C_1 1500 1568.0 +6 A2C_cartpole A2C_1 2000 1553.0 +7 A2C_cartpole A2C_1 2500 1547.0 +8 A2C_cartpole A2C_1 3000 1530.0 +9 A2C_cartpole A2C_1 3500 1548.0 +10 A2C_cartpole A2C_1 4000 1558.0 +11 A2C_cartpole A2C_1 4500 1551.0 +12 A2C_cartpole A2C_1 5000 1556.0 +13 A2C_cartpole A2C_2 500 1628.0 +14 A2C_cartpole A2C_2 1000 1644.0 +15 A2C_cartpole A2C_2 1500 1561.0 +16 A2C_cartpole A2C_2 2000 1539.0 +17 A2C_cartpole A2C_2 2500 1547.0 +18 A2C_cartpole A2C_2 3000 1562.0 +19 A2C_cartpole A2C_2 3500 1572.0 +20 A2C_cartpole A2C_2 4000 1576.0 +21 A2C_cartpole A2C_2 4500 1586.0 +22 A2C_cartpole A2C_2 5000 1594.0, + + 'train/approx_kl': name n_simu x y +0 PPO_cartpole ppo_1 4096 0.009280 +1 PPO_cartpole ppo_1 6144 0.009204, + + 'train/clip_fraction': name n_simu x y +0 PPO_cartpole ppo_1 4096 0.128174 +1 PPO_cartpole ppo_1 6144 0.057813, + + 'train/clip_range': name n_simu x y +0 PPO_cartpole ppo_1 4096 0.2 +1 PPO_cartpole ppo_1 6144 0.2, + + 'train/entropy_loss': name n_simu x y +0 PPO_cartpole ppo_1 4096 -0.685331 +1 PPO_cartpole ppo_1 6144 -0.659614 +2 A2C_cartpole A2C_1 500 -0.615525 +3 A2C_cartpole A2C_1 1000 -0.484166 +4 A2C_cartpole A2C_1 1500 -0.565144 +5 A2C_cartpole A2C_1 2000 -0.511171 +6 A2C_cartpole A2C_1 2500 -0.551776 +7 A2C_cartpole A2C_1 3000 -0.503026 +8 A2C_cartpole A2C_1 3500 -0.617282 +9 A2C_cartpole A2C_1 4000 -0.261234 +10 A2C_cartpole A2C_1 4500 -0.417461 +11 A2C_cartpole A2C_1 5000 -0.633000 +12 A2C_cartpole A2C_2 500 -0.692809 +13 A2C_cartpole A2C_2 1000 -0.684999 +14 A2C_cartpole A2C_2 1500 -0.649449 +15 A2C_cartpole A2C_2 2000 -0.642278 +16 A2C_cartpole A2C_2 2500 -0.592125 +17 A2C_cartpole A2C_2 3000 -0.301056 +18 A2C_cartpole A2C_2 3500 -0.640023 +19 A2C_cartpole A2C_2 4000 -0.512887 +20 A2C_cartpole A2C_2 4500 -0.432308 +21 A2C_cartpole A2C_2 5000 -0.492796, + + 'train/explained_variance': name n_simu x y +0 PPO_cartpole ppo_1 4096 -0.005981 +1 PPO_cartpole ppo_1 6144 0.095037 +2 A2C_cartpole A2C_1 500 -0.060004 +3 A2C_cartpole A2C_1 1000 -0.009993 +4 A2C_cartpole A2C_1 1500 -0.021823 +5 A2C_cartpole A2C_1 2000 0.001556 +6 A2C_cartpole A2C_1 2500 -0.003476 +7 A2C_cartpole A2C_1 3000 0.006280 +8 A2C_cartpole A2C_1 3500 0.001778 +9 A2C_cartpole A2C_1 4000 0.005313 +10 A2C_cartpole A2C_1 4500 0.002912 +11 A2C_cartpole A2C_1 5000 0.001874 +12 A2C_cartpole A2C_2 500 0.111738 +13 A2C_cartpole A2C_2 1000 0.078319 +14 A2C_cartpole A2C_2 1500 0.000760 +15 A2C_cartpole A2C_2 2000 0.009839 +16 A2C_cartpole A2C_2 2500 0.008209 +17 A2C_cartpole A2C_2 3000 -0.000845 +18 A2C_cartpole A2C_2 3500 -0.000841 +19 A2C_cartpole A2C_2 4000 0.000686 +20 A2C_cartpole A2C_2 4500 0.001162 +21 A2C_cartpole A2C_2 5000 0.000076, + + 'train/learning_rate': name n_simu x y +0 PPO_cartpole ppo_1 4096 0.0003 +1 PPO_cartpole ppo_1 6144 0.0003 +2 A2C_cartpole A2C_1 500 0.0007 +3 A2C_cartpole A2C_1 1000 0.0007 +4 A2C_cartpole A2C_1 1500 0.0007 +5 A2C_cartpole A2C_1 2000 0.0007 +6 A2C_cartpole A2C_1 2500 0.0007 +7 A2C_cartpole A2C_1 3000 0.0007 +8 A2C_cartpole A2C_1 3500 0.0007 +9 A2C_cartpole A2C_1 4000 0.0007 +10 A2C_cartpole A2C_1 4500 0.0007 +11 A2C_cartpole A2C_1 5000 0.0007 +12 A2C_cartpole A2C_2 500 0.0007 +13 A2C_cartpole A2C_2 1000 0.0007 +14 A2C_cartpole A2C_2 1500 0.0007 +15 A2C_cartpole A2C_2 2000 0.0007 +16 A2C_cartpole A2C_2 2500 0.0007 +17 A2C_cartpole A2C_2 3000 0.0007 +18 A2C_cartpole A2C_2 3500 0.0007 +19 A2C_cartpole A2C_2 4000 0.0007 +20 A2C_cartpole A2C_2 4500 0.0007 +21 A2C_cartpole A2C_2 5000 0.0007, + + 'train/loss': name n_simu x y +0 PPO_cartpole ppo_1 4096 6.982748 +1 PPO_cartpole ppo_1 6144 13.480467, + + 'train/policy_gradient_loss': name n_simu x y +0 PPO_cartpole ppo_1 4096 -0.022298 +1 PPO_cartpole ppo_1 6144 -0.016617, + + 'train/value_loss': name n_simu x y +0 PPO_cartpole ppo_1 4096 54.930149 +1 PPO_cartpole ppo_1 6144 32.751965 +2 A2C_cartpole A2C_1 500 9.222057 +3 A2C_cartpole A2C_1 1000 7.639998 +4 A2C_cartpole A2C_1 1500 6.368935 +5 A2C_cartpole A2C_1 2000 5.560571 +6 A2C_cartpole A2C_1 2500 5.007382 +7 A2C_cartpole A2C_1 3000 469.051453 +8 A2C_cartpole A2C_1 3500 3.818318 +9 A2C_cartpole A2C_1 4000 3.285388 +10 A2C_cartpole A2C_1 4500 2.823058 +11 A2C_cartpole A2C_1 5000 2.386893 +12 A2C_cartpole A2C_2 500 8.672586 +13 A2C_cartpole A2C_2 1000 6.938823 +14 A2C_cartpole A2C_2 1500 6.459139 +15 A2C_cartpole A2C_2 2000 5.905715 +16 A2C_cartpole A2C_2 2500 5.079061 +17 A2C_cartpole A2C_2 3000 1009.296082 +18 A2C_cartpole A2C_2 3500 3.968157 +19 A2C_cartpole A2C_2 4000 3.429344 +20 A2C_cartpole A2C_2 4500 2.945411 +21 A2C_cartpole A2C_2 5000 2.487410, + + 'train/policy_loss': name n_simu x y +0 A2C_cartpole A2C_1 500 1.682467 +1 A2C_cartpole A2C_1 1000 1.788085 +2 A2C_cartpole A2C_1 1500 0.925050 +3 A2C_cartpole A2C_1 2000 0.615906 +4 A2C_cartpole A2C_1 2500 0.801314 +5 A2C_cartpole A2C_1 3000 -2.096942 +6 A2C_cartpole A2C_1 3500 1.006535 +7 A2C_cartpole A2C_1 4000 1.268059 +8 A2C_cartpole A2C_1 4500 0.521781 +9 A2C_cartpole A2C_1 5000 0.593369 +10 A2C_cartpole A2C_2 500 1.878575 +11 A2C_cartpole A2C_2 1000 1.407964 +12 A2C_cartpole A2C_2 1500 1.321871 +13 A2C_cartpole A2C_2 2000 1.198855 +14 A2C_cartpole A2C_2 2500 0.724112 +15 A2C_cartpole A2C_2 3000 -24.444633 +16 A2C_cartpole A2C_2 3500 0.851452 +17 A2C_cartpole A2C_2 4000 1.169502 +18 A2C_cartpole A2C_2 4500 1.198329 +19 A2C_cartpole A2C_2 5000 0.700427} +``` +
+ +### Option 2: via a Dict + +In [tensorboard_to_dataframe](rlberry.manager.tensorboard_to_dataframe), you can also use a `Dict` as input. +The Dict must have the `algo_name` in **keys**, and a list of `path` in **values** (path to the `events.out.tfevents` file). In the list, the position of the path will be consider as the `n_simu` + +```python +# creating the dic +import os + +folder_ppo_1 = str(path_ppo + "ppo_1/") +folder_A2C_1 = str(path_a2c + "A2C_1/") +folder_A2C_2 = str(path_a2c + "A2C_2/") + +path_event_ppo_1 = str(folder_ppo_1 + os.listdir(folder_ppo_1)[0]) +path_event_A2C_1 = str(folder_A2C_1 + os.listdir(folder_A2C_1)[0]) +path_event_A2C_2 = str(folder_A2C_2 + os.listdir(folder_A2C_2)[0]) + +input_dict = { + "ppo_cartpole_tensorboard": [path_event_ppo_1], + "a2c_cartpole_tensorboard": [path_event_A2C_1, path_event_A2C_2], +} + + +# same function +data_in_dataframe2 = tensorboard_to_dataframe(input_dict) + +# same results +print(data_in_dataframe2.keys()) +print("-----------") +print(data_in_dataframe2) +``` + + +```none + +dict_keys(['rollout/ep_len_mean', 'rollout/ep_rew_mean', 'time/fps', 'train/approx_kl', 'train/clip_fraction', 'train/clip_range', 'train/entropy_loss', 'train/explained_variance', 'train/learning_rate', 'train/loss', 'train/policy_gradient_loss', 'train/value_loss', 'train/policy_loss']) +----------- +{'rollout/ep_len_mean': name n_simu x y +0 ppo_cartpole_tensorboard 0 2048 22.898876 +1 ppo_cartpole_tensorboard 0 4096 26.700001 +2 ppo_cartpole_tensorboard 0 6144 36.810001 +3 a2c_cartpole_tensorboard 0 500 40.090908 +4 a2c_cartpole_tensorboard 0 1000 45.900002 +5 a2c_cartpole_tensorboard 0 1500 50.724136 +6 a2c_cartpole_tensorboard 0 2000 53.567566 +7 a2c_cartpole_tensorboard 0 2500 55.266666 +8 a2c_cartpole_tensorboard 0 3000 58.666668 +9 a2c_cartpole_tensorboard 0 3500 61.018520 +10 a2c_cartpole_tensorboard 0 4000 68.589287 +11 a2c_cartpole_tensorboard 0 4500 73.114754 +12 a2c_cartpole_tensorboard 0 5000 74.424240 +13 a2c_cartpole_tensorboard 1 500 23.619047 +14 a2c_cartpole_tensorboard 1 1000 23.951220 +15 a2c_cartpole_tensorboard 1 1500 27.865385 +16 a2c_cartpole_tensorboard 1 2000 33.000000 +17 a2c_cartpole_tensorboard 1 2500 38.140625 +18 a2c_cartpole_tensorboard 1 3000 43.405796 +19 a2c_cartpole_tensorboard 1 3500 45.890411 +20 a2c_cartpole_tensorboard 1 4000 49.720001 +21 a2c_cartpole_tensorboard 1 4500 56.139240 +22 a2c_cartpole_tensorboard 1 5000 60.402439, + + 'rollout/ep_rew_mean': name n_simu x y +0 ppo_cartpole_tensorboard 0 2048 22.898876 +1 ppo_cartpole_tensorboard 0 4096 26.700001 +2 ppo_cartpole_tensorboard 0 6144 36.810001 +3 a2c_cartpole_tensorboard 0 500 40.090908 +4 a2c_cartpole_tensorboard 0 1000 45.900002 +5 a2c_cartpole_tensorboard 0 1500 50.724136 +6 a2c_cartpole_tensorboard 0 2000 53.567566 +7 a2c_cartpole_tensorboard 0 2500 55.266666 +8 a2c_cartpole_tensorboard 0 3000 58.666668 +9 a2c_cartpole_tensorboard 0 3500 61.018520 +10 a2c_cartpole_tensorboard 0 4000 68.589287 +11 a2c_cartpole_tensorboard 0 4500 73.114754 +12 a2c_cartpole_tensorboard 0 5000 74.424240 +13 a2c_cartpole_tensorboard 1 500 23.619047 +14 a2c_cartpole_tensorboard 1 1000 23.951220 +15 a2c_cartpole_tensorboard 1 1500 27.865385 +16 a2c_cartpole_tensorboard 1 2000 33.000000 +17 a2c_cartpole_tensorboard 1 2500 38.140625 +18 a2c_cartpole_tensorboard 1 3000 43.405796 +19 a2c_cartpole_tensorboard 1 3500 45.890411 +20 a2c_cartpole_tensorboard 1 4000 49.720001 +21 a2c_cartpole_tensorboard 1 4500 56.139240 +22 a2c_cartpole_tensorboard 1 5000 60.402439, + + 'time/fps': name n_simu x y +0 ppo_cartpole_tensorboard 0 2048 3431.0 +1 ppo_cartpole_tensorboard 0 4096 2396.0 +2 ppo_cartpole_tensorboard 0 6144 2156.0 +3 a2c_cartpole_tensorboard 0 500 1595.0 +4 a2c_cartpole_tensorboard 0 1000 1614.0 +5 a2c_cartpole_tensorboard 0 1500 1568.0 +6 a2c_cartpole_tensorboard 0 2000 1553.0 +7 a2c_cartpole_tensorboard 0 2500 1547.0 +8 a2c_cartpole_tensorboard 0 3000 1530.0 +9 a2c_cartpole_tensorboard 0 3500 1548.0 +10 a2c_cartpole_tensorboard 0 4000 1558.0 +11 a2c_cartpole_tensorboard 0 4500 1551.0 +12 a2c_cartpole_tensorboard 0 5000 1556.0 +13 a2c_cartpole_tensorboard 1 500 1628.0 +14 a2c_cartpole_tensorboard 1 1000 1644.0 +15 a2c_cartpole_tensorboard 1 1500 1561.0 +16 a2c_cartpole_tensorboard 1 2000 1539.0 +17 a2c_cartpole_tensorboard 1 2500 1547.0 +18 a2c_cartpole_tensorboard 1 3000 1562.0 +19 a2c_cartpole_tensorboard 1 3500 1572.0 +20 a2c_cartpole_tensorboard 1 4000 1576.0 +21 a2c_cartpole_tensorboard 1 4500 1586.0 +22 a2c_cartpole_tensorboard 1 5000 1594.0, + + 'train/approx_kl': name n_simu x y +0 ppo_cartpole_tensorboard 0 4096 0.009280 +1 ppo_cartpole_tensorboard 0 6144 0.009204, + + 'train/clip_fraction': name n_simu x y +0 ppo_cartpole_tensorboard 0 4096 0.128174 +1 ppo_cartpole_tensorboard 0 6144 0.057813, + + 'train/clip_range': name n_simu x y +0 ppo_cartpole_tensorboard 0 4096 0.2 +1 ppo_cartpole_tensorboard 0 6144 0.2, + + 'train/entropy_loss': name n_simu x y +0 ppo_cartpole_tensorboard 0 4096 -0.685331 +1 ppo_cartpole_tensorboard 0 6144 -0.659614 +2 a2c_cartpole_tensorboard 0 500 -0.615525 +3 a2c_cartpole_tensorboard 0 1000 -0.484166 +4 a2c_cartpole_tensorboard 0 1500 -0.565144 +5 a2c_cartpole_tensorboard 0 2000 -0.511171 +6 a2c_cartpole_tensorboard 0 2500 -0.551776 +7 a2c_cartpole_tensorboard 0 3000 -0.503026 +8 a2c_cartpole_tensorboard 0 3500 -0.617282 +9 a2c_cartpole_tensorboard 0 4000 -0.261234 +10 a2c_cartpole_tensorboard 0 4500 -0.417461 +11 a2c_cartpole_tensorboard 0 5000 -0.633000 +12 a2c_cartpole_tensorboard 1 500 -0.692809 +13 a2c_cartpole_tensorboard 1 1000 -0.684999 +14 a2c_cartpole_tensorboard 1 1500 -0.649449 +15 a2c_cartpole_tensorboard 1 2000 -0.642278 +16 a2c_cartpole_tensorboard 1 2500 -0.592125 +17 a2c_cartpole_tensorboard 1 3000 -0.301056 +18 a2c_cartpole_tensorboard 1 3500 -0.640023 +19 a2c_cartpole_tensorboard 1 4000 -0.512887 +20 a2c_cartpole_tensorboard 1 4500 -0.432308 +21 a2c_cartpole_tensorboard 1 5000 -0.492796, + + 'train/explained_variance': name n_simu x y +0 ppo_cartpole_tensorboard 0 4096 -0.005981 +1 ppo_cartpole_tensorboard 0 6144 0.095037 +2 a2c_cartpole_tensorboard 0 500 -0.060004 +3 a2c_cartpole_tensorboard 0 1000 -0.009993 +4 a2c_cartpole_tensorboard 0 1500 -0.021823 +5 a2c_cartpole_tensorboard 0 2000 0.001556 +6 a2c_cartpole_tensorboard 0 2500 -0.003476 +7 a2c_cartpole_tensorboard 0 3000 0.006280 +8 a2c_cartpole_tensorboard 0 3500 0.001778 +9 a2c_cartpole_tensorboard 0 4000 0.005313 +10 a2c_cartpole_tensorboard 0 4500 0.002912 +11 a2c_cartpole_tensorboard 0 5000 0.001874 +12 a2c_cartpole_tensorboard 1 500 0.111738 +13 a2c_cartpole_tensorboard 1 1000 0.078319 +14 a2c_cartpole_tensorboard 1 1500 0.000760 +15 a2c_cartpole_tensorboard 1 2000 0.009839 +16 a2c_cartpole_tensorboard 1 2500 0.008209 +17 a2c_cartpole_tensorboard 1 3000 -0.000845 +18 a2c_cartpole_tensorboard 1 3500 -0.000841 +19 a2c_cartpole_tensorboard 1 4000 0.000686 +20 a2c_cartpole_tensorboard 1 4500 0.001162 +21 a2c_cartpole_tensorboard 1 5000 0.000076, + + 'train/learning_rate': name n_simu x y +0 ppo_cartpole_tensorboard 0 4096 0.0003 +1 ppo_cartpole_tensorboard 0 6144 0.0003 +2 a2c_cartpole_tensorboard 0 500 0.0007 +3 a2c_cartpole_tensorboard 0 1000 0.0007 +4 a2c_cartpole_tensorboard 0 1500 0.0007 +5 a2c_cartpole_tensorboard 0 2000 0.0007 +6 a2c_cartpole_tensorboard 0 2500 0.0007 +7 a2c_cartpole_tensorboard 0 3000 0.0007 +8 a2c_cartpole_tensorboard 0 3500 0.0007 +9 a2c_cartpole_tensorboard 0 4000 0.0007 +10 a2c_cartpole_tensorboard 0 4500 0.0007 +11 a2c_cartpole_tensorboard 0 5000 0.0007 +12 a2c_cartpole_tensorboard 1 500 0.0007 +13 a2c_cartpole_tensorboard 1 1000 0.0007 +14 a2c_cartpole_tensorboard 1 1500 0.0007 +15 a2c_cartpole_tensorboard 1 2000 0.0007 +16 a2c_cartpole_tensorboard 1 2500 0.0007 +17 a2c_cartpole_tensorboard 1 3000 0.0007 +18 a2c_cartpole_tensorboard 1 3500 0.0007 +19 a2c_cartpole_tensorboard 1 4000 0.0007 +20 a2c_cartpole_tensorboard 1 4500 0.0007 +21 a2c_cartpole_tensorboard 1 5000 0.0007, + + 'train/loss': name n_simu x y +0 ppo_cartpole_tensorboard 0 4096 6.982748 +1 ppo_cartpole_tensorboard 0 6144 13.480467, + + 'train/policy_gradient_loss': name n_simu x y +0 ppo_cartpole_tensorboard 0 4096 -0.022298 +1 ppo_cartpole_tensorboard 0 6144 -0.016617, + + 'train/value_loss': name n_simu x y +0 ppo_cartpole_tensorboard 0 4096 54.930149 +1 ppo_cartpole_tensorboard 0 6144 32.751965 +2 a2c_cartpole_tensorboard 0 500 9.222057 +3 a2c_cartpole_tensorboard 0 1000 7.639998 +4 a2c_cartpole_tensorboard 0 1500 6.368935 +5 a2c_cartpole_tensorboard 0 2000 5.560571 +6 a2c_cartpole_tensorboard 0 2500 5.007382 +7 a2c_cartpole_tensorboard 0 3000 469.051453 +8 a2c_cartpole_tensorboard 0 3500 3.818318 +9 a2c_cartpole_tensorboard 0 4000 3.285388 +10 a2c_cartpole_tensorboard 0 4500 2.823058 +11 a2c_cartpole_tensorboard 0 5000 2.386893 +12 a2c_cartpole_tensorboard 1 500 8.672586 +13 a2c_cartpole_tensorboard 1 1000 6.938823 +14 a2c_cartpole_tensorboard 1 1500 6.459139 +15 a2c_cartpole_tensorboard 1 2000 5.905715 +16 a2c_cartpole_tensorboard 1 2500 5.079061 +17 a2c_cartpole_tensorboard 1 3000 1009.296082 +18 a2c_cartpole_tensorboard 1 3500 3.968157 +19 a2c_cartpole_tensorboard 1 4000 3.429344 +20 a2c_cartpole_tensorboard 1 4500 2.945411 +21 a2c_cartpole_tensorboard 1 5000 2.487410, + + 'train/policy_loss': name n_simu x y +0 a2c_cartpole_tensorboard 0 500 1.682467 +1 a2c_cartpole_tensorboard 0 1000 1.788085 +2 a2c_cartpole_tensorboard 0 1500 0.925050 +3 a2c_cartpole_tensorboard 0 2000 0.615906 +4 a2c_cartpole_tensorboard 0 2500 0.801314 +5 a2c_cartpole_tensorboard 0 3000 -2.096942 +6 a2c_cartpole_tensorboard 0 3500 1.006535 +7 a2c_cartpole_tensorboard 0 4000 1.268059 +8 a2c_cartpole_tensorboard 0 4500 0.521781 +9 a2c_cartpole_tensorboard 0 5000 0.593369 +10 a2c_cartpole_tensorboard 1 500 1.878575 +11 a2c_cartpole_tensorboard 1 1000 1.407964 +12 a2c_cartpole_tensorboard 1 1500 1.321871 +13 a2c_cartpole_tensorboard 1 2000 1.198855 +14 a2c_cartpole_tensorboard 1 2500 0.724112 +15 a2c_cartpole_tensorboard 1 3000 -24.444633 +16 a2c_cartpole_tensorboard 1 3500 0.851452 +17 a2c_cartpole_tensorboard 1 4000 1.169502 +18 a2c_cartpole_tensorboard 1 4500 1.198329 +19 a2c_cartpole_tensorboard 1 5000 0.700427} + +``` diff --git a/docs/changelog.rst b/docs/changelog.rst index af2e7ef24..71826b437 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -7,6 +7,9 @@ Changelog Dev version ----------- + *PR #474* + +* Create a new tool to load data from tensorboard logs : https://github.com/rlberry-py/rlberry/issues/472 *PR #470* diff --git a/rlberry/manager/__init__.py b/rlberry/manager/__init__.py index e4687cec4..3d559c163 100644 --- a/rlberry/manager/__init__.py +++ b/rlberry/manager/__init__.py @@ -5,6 +5,7 @@ from .comparison import compare_agents, AdastopComparator from .plotting import plot_smoothed_curves, plot_writer_data, plot_synchronized_curves from .env_tools import with_venv, run_venv_xp +from .utils import tensorboard_to_dataframe # AgentManager alias for the ExperimentManager class, for backward compatibility AgentManager = ExperimentManager diff --git a/rlberry/manager/tests/test_utils.py b/rlberry/manager/tests/test_utils.py new file mode 100644 index 000000000..ba03f714c --- /dev/null +++ b/rlberry/manager/tests/test_utils.py @@ -0,0 +1,74 @@ +from rlberry.manager import tensorboard_to_dataframe +from stable_baselines3 import PPO, A2C +import tempfile +import os +import pandas as pd +import pytest + + +def test_tensorboard_to_dataframe(): + with tempfile.TemporaryDirectory() as tmpdirname: + # create data to test + path_ppo = str(tmpdirname + "/ppo_cartpole_tensorboard/") + path_a2c = str(tmpdirname + "/a2c_cartpole_tensorboard/") + model = PPO("MlpPolicy", "CartPole-v1", tensorboard_log=path_ppo) + model2 = A2C("MlpPolicy", "CartPole-v1", tensorboard_log=path_a2c) + model2_seed2 = A2C("MlpPolicy", "CartPole-v1", tensorboard_log=path_a2c) + model.learn(total_timesteps=5_000, tb_log_name="ppo") + model2.learn(total_timesteps=5_000, tb_log_name="A2C") + model2_seed2.learn(total_timesteps=5_000, tb_log_name="A2C") + + assert os.path.exists(path_ppo) + assert os.path.exists(path_a2c) + + # check with parent folder + data_in_dataframe = tensorboard_to_dataframe(tmpdirname) + + assert isinstance(data_in_dataframe, dict) + assert "rollout/ep_rew_mean" in data_in_dataframe + a_dict = data_in_dataframe["rollout/ep_rew_mean"] + + assert isinstance(a_dict, pd.DataFrame) + assert "name" in a_dict.columns + assert "n_simu" in a_dict.columns + assert "x" in a_dict.columns + assert "y" in a_dict.columns + + # check with list of folder + folder_ppo_1 = str(path_ppo + "ppo_1/") + folder_A2C_1 = str(path_a2c + "A2C_1/") + folder_A2C_2 = str(path_a2c + "A2C_2/") + + path_event_ppo_1 = str(folder_ppo_1 + os.listdir(folder_ppo_1)[0]) + path_event_A2C_1 = str(folder_A2C_1 + os.listdir(folder_A2C_1)[0]) + path_event_A2C_2 = str(folder_A2C_2 + os.listdir(folder_A2C_2)[0]) + + input_dict = { + "ppo_cartpole_tensorboard": [path_event_ppo_1], + "a2c_cartpole_tensorboard": [path_event_A2C_1, path_event_A2C_2], + } + + data_in_dataframe2 = tensorboard_to_dataframe(input_dict) + assert isinstance(data_in_dataframe2, dict) + assert "rollout/ep_rew_mean" in data_in_dataframe2 + a_dict2 = data_in_dataframe2["rollout/ep_rew_mean"] + + assert isinstance(a_dict2, pd.DataFrame) + assert "name" in a_dict2.columns + assert "n_simu" in a_dict2.columns + assert "x" in a_dict2.columns + assert "y" in a_dict2.columns + + # check both strategies give the same result + assert set(a_dict.keys()) == set(a_dict2.keys()) + for key in a_dict: + if ( + key != "n_simu" + ): # don't test n_simu/seed, it is different because one come from the folder name, and the other come for the index in the list + assert set(a_dict[key]) == set(a_dict2[key]) + + +def test_tensorboard_to_dataframe_errorIO(): + msg = "Input of 'tensorboard_to_dataframe' must be a str or a dict... not a " + with pytest.raises(IOError, match=msg): + tensorboard_to_dataframe(1) diff --git a/rlberry/manager/utils.py b/rlberry/manager/utils.py index 347ed0ed2..f51aaf93d 100644 --- a/rlberry/manager/utils.py +++ b/rlberry/manager/utils.py @@ -1,4 +1,6 @@ import sqlite3 +import os +import pandas as pd def create_database(db_file): @@ -14,3 +16,107 @@ def create_database(db_file): connection.close() return True return False + + +def tensorboard_to_dataframe(tensorboard_data): + """ + Function to convert 'tensorboard log' to 'Panda DataFrames'. + + | To convert the 'tensorboard log', the input must be must be the path to "the parent folder of all the training log" (path_to_tensorboard_data), and the 'events.out.tfevents' files have to be in this kind of path : + < path_to_tensorboard_data/algo_name/n_simu/events.out.tfevents.xxxxx > + + Or you can specify all the desired 'events.out.tfevents' with a Dict. In that case, the key should be the algorithm name, and the value the list of the 'events.out.tfevents' path. The seed/n_simu number wille be the position in the list. + + The output format is a dictionary. + + | key = tag (type of data) + | value = Panda DataFrame with the following structure (4 column): + + * "name" = algo_name + * "n_simu" = n_simu (seed) + * "x" = step number + * "y" = value of the data + + Parameters + ---------- + path_to_tensorboard_data (str or Dict): + if str: path to the parent folder of the tensorboard's data. + if dict: Key = algo_name , value = list of 'events.out.tfevents.xxxxx' path + + + Returns + ------- + Dict : dict of Panda DataFrame (key = tag, value = Panda.DataFrame) + """ + + dataframe_by_tag = {} + + if isinstance(tensorboard_data, str): + dataframe_by_tag = _tensorboard_to_dataframe_from_parent_path(tensorboard_data) + elif isinstance(tensorboard_data, dict): + dataframe_by_tag = _tensorboard_to_dataframe_from_dict_paths(tensorboard_data) + else: + raise IOError( + str( + "Input of 'tensorboard_to_dataframe' must be a str or a dict... not a " + + str(type(tensorboard_data)) + ) + ) + + # convert the "dict of array" to "dict of panda dataframe" + df = {} + for tag, value in dataframe_by_tag.items(): + df[tag] = pd.DataFrame(value, columns=["name", "n_simu", "x", "y"]) + return df + + +def _tensorboard_to_dataframe_from_parent_path(path_to_tensorboard_data): + from tensorboard.backend.event_processing import event_accumulator + + dataframe_by_tag = {} + for algo_name in os.listdir(path_to_tensorboard_data): + path_for_this_algo = os.path.join(path_to_tensorboard_data, algo_name) + if os.path.isdir(path_for_this_algo): + for seed in os.listdir(path_for_this_algo): + current_seed_path = os.path.join(path_for_this_algo, seed) + content = os.listdir(current_seed_path) + assert len(content) == 1 # should be "events.out.tfevents.xxxxxxxxx" + content_path = os.path.join(current_seed_path, content[0]) + + # load the event in the file, and get the tags + ea = event_accumulator.EventAccumulator(content_path) + ea.Reload() + scalar_tags = ea.Tags()["scalars"] + + for tag in scalar_tags: + events = ea.Scalars(tag) + if ( + tag not in dataframe_by_tag + ): # new tag, create new entry in the dict + dataframe_by_tag[tag] = [] + new_elements = [(algo_name, seed, e.step, e.value) for e in events] + dataframe_by_tag[tag].extend(new_elements) + return dataframe_by_tag + + +def _tensorboard_to_dataframe_from_dict_paths(dict_tensorboard_data): + from tensorboard.backend.event_processing import event_accumulator + + dataframe_by_tag = {} + for algo_name, current_path_list in dict_tensorboard_data.items(): + for idx, path in enumerate(current_path_list): + # load the event in the file, and get the tags + ea = event_accumulator.EventAccumulator(path) + ea.Reload() + scalar_tags = ea.Tags()["scalars"] + + for tag in scalar_tags: + events = ea.Scalars(tag) + if tag not in dataframe_by_tag: # new tag, create new entry in the dict + dataframe_by_tag[tag] = [] + new_elements = [(algo_name, idx, e.step, e.value) for e in events] + dataframe_by_tag[tag].extend(new_elements) + return dataframe_by_tag + + +# Faire un test qui vérifie la 2ème fonction, et un test qui vérifie qu'elles donnent les mêmes résultats