update names following PR350(part2): rlberry-py#350

JulienT01 · Jul 28, 2023 · 399c359 · 399c359
1 parent 357ba1d
commit 399c359
Show file tree

Hide file tree

Showing 25 changed files with 99 additions and 99 deletions.
diff --git a/docs/basics/DeepRLTutorial/TutorialDeepRL.rst b/docs/basics/DeepRLTutorial/TutorialDeepRL.rst
@@ -151,7 +151,7 @@ default networks are:
     """
     The ExperimentManager class is compact way of experimenting with a deepRL agent.
     """
-    default_agent = ExperimentManager(
+    default_xp = ExperimentManager(
         A2CAgent,  # The Agent class.
         (gym_make, dict(id="CartPole-v1")),  # The Environment to solve.
         fit_budget=3e5,  # The number of interactions
@@ -168,12 +168,12 @@ default networks are:
     )
 
     print("Training ...")
-    default_agent.fit()  # Trains the agent on fit_budget steps!
+    default_xp.fit()  # Trains the agent on fit_budget steps!
 
 
     # Plot the training data:
     _ = plot_writer_data(
-        [default_agent],
+        [default_xp],
         tag="episode_rewards",
         title="Training Episode Cumulative Rewards",
         show=True,
@@ -256,7 +256,7 @@ default networks are:
 
     print("Evaluating ...")
     _ = evaluate_agents(
-        [default_agent], n_simulations=50, show=True
+        [default_xp], n_simulations=50, show=True
     )  # Evaluate the trained agent on
     # 10 simulations of 500 steps each.
 
@@ -353,7 +353,7 @@ and bigger batch size to have more stable training.
 
 .. code:: python
 
-    tuned_agent = ExperimentManager(
+    tuned_xp = ExperimentManager(
         A2CAgent,  # The Agent class.
         (gym_make, dict(id="CartPole-v1")),  # The Environment to solve.
         init_kwargs=dict(  # Where to put the agent's hyperparameters
@@ -385,12 +385,12 @@ and bigger batch size to have more stable training.
 
 
     print("Training ...")
-    tuned_agent.fit()  # Trains the agent on fit_budget steps!
+    tuned_xp.fit()  # Trains the agent on fit_budget steps!
 
 
     # Plot the training data:
     _ = plot_writer_data(
-        [default_agent, tuned_agent],
+        [default_xp, tuned_xp],
         tag="episode_rewards",
         title="Training Episode Cumulative Rewards",
         show=True,
@@ -469,7 +469,7 @@ and bigger batch size to have more stable training.
     print("Evaluating ...")
 
     # Evaluate each trained agent on 10 simulations of 500 steps each.
-    _ = evaluate_agents([default_agent, tuned_agent], n_simulations=50, show=True)
+    _ = evaluate_agents([default_xp, tuned_xp], n_simulations=50, show=True)
 
 
 .. parsed-literal::

diff --git a/docs/basics/quick_start_rl/quickstart.rst b/docs/basics/quick_start_rl/quickstart.rst
@@ -146,7 +146,7 @@ Our goal is then to assess the performance of the two algorithms.
 There are a number of agents that are already coded in rlberry. See the
 module :class:`~rlberry.agents.Agent` for more informations.
 
-Agent Manager
+Experiment Manager
 -------------
 
 One of the main feature of rlberry is its :class:`~rlberry.manager.ExperimentManager`
@@ -157,7 +157,7 @@ class. Here is a diagram to explain briefly what it does.
     :align: center
 
 
-In a few words, agent manager spawns agents and environments for training and
+In a few words, experiment manager spawns agents and environments for training and
 then once the agents are trained, it uses these agents and new environments
 to evaluate how well the agent perform. All of these steps can be
 done several times to assess stochasticity of agents and/or environment.
@@ -177,7 +177,7 @@ This gives us 1 value per agent. We do this 10 times (so 10 times 10
 equal 100 simulations) in order to have an idea of the variability of
 our estimation.
 
-In order to manage the agents, we use an Agent Manager. The manager will
+In order to manage the agents, we use an ExperimentManager. The manager will
 then spawn agents as desired during the experiment.
 
 

diff --git a/examples/demo_agents/demo_SAC.py b/examples/demo_agents/demo_SAC.py
@@ -11,7 +11,7 @@
 import gymnasium as gym
 from rlberry.agents.torch.sac import SACAgent
 from rlberry.envs import Pendulum
-from rlberry.manager import AgentManager
+from rlberry.manager import ExperimentManager
 
 
 def env_ctor(env, wrap_spaces=True):
@@ -30,7 +30,7 @@ def env_ctor(env, wrap_spaces=True):
 env_kwargs = dict(env=env)
 
 # Create agent instance
-agent = AgentManager(
+xp_manager = ExperimentManager(
     SACAgent,
     (env_ctor, env_kwargs),
     fit_budget=fit_budget,
@@ -40,4 +40,4 @@ def env_ctor(env, wrap_spaces=True):
 )
 
 # Start training
-agent.fit()
+xp_manager.fit()
diff --git a/examples/demo_bandits/plot_mirror_bandit.py b/examples/demo_bandits/plot_mirror_bandit.py
@@ -158,17 +158,17 @@ def fit(self, budget=None, **kwargs):
 
 # Experiment
 
-agent = ExperimentManager(
+xp_manager = ExperimentManager(
     SeqHalvAgent,
     (env_ctor, env_kwargs),
     fit_budget=100,  # we use only 100 iterations for faster example run in doc.
     n_fit=1,
     agent_name="SH",
 )
-agent.fit()
+xp_manager.fit()
 
-rewards = read_writer_data([agent], tag="reward")["value"]
-actions = read_writer_data([agent], tag="action")["value"]
+rewards = read_writer_data([xp_manager], tag="reward")["value"]
+actions = read_writer_data([xp_manager], tag="action")["value"]
 
 
 plt.boxplot([-rewards[actions == a] for a in range(6)])
@@ -178,5 +178,5 @@ def fit(self, budget=None, **kwargs):
 
 print(
     "The optimal action (fastest server) is server number ",
-    agent.agent_handlers[0].optimal_action + 1,
+    xp_manager.agent_handlers[0].optimal_action + 1,
 )
diff --git a/examples/demo_bandits/plot_ucb_bandit.py b/examples/demo_bandits/plot_ucb_bandit.py
@@ -38,7 +38,7 @@ def __init__(self, env, sigma=1, **kwargs):
 env_ctor = NormalBandit
 env_kwargs = {"means": means, "stds": 2 * np.ones(len(means))}
 
-agent = ExperimentManager(
+xp_manager = ExperimentManager(
     UCBAgent,
     (env_ctor, env_kwargs),
     fit_budget=T,
@@ -52,7 +52,7 @@ def __init__(self, env, sigma=1, **kwargs):
 
 # Agent training
 
-agent.fit()
+xp_manager.fit()
 
 
 # Compute and plot (pseudo-)regret
@@ -63,7 +63,7 @@ def compute_pseudo_regret(actions):
 fig = plt.figure(1, figsize=(5, 3))
 ax = plt.gca()
 output = plot_writer_data(
-    [agent],
+    [xp_manager],
     tag="action",
     preprocess_func=compute_pseudo_regret,
     title="Cumulative Pseudo-Regret",

diff --git a/examples/demo_env/example_atari_atlantis_vectorized_ppo.py b/examples/demo_env/example_atari_atlantis_vectorized_ppo.py
@@ -67,7 +67,7 @@
 }
 
 
-tuned_agent = ExperimentManager(
+tuned_xp = ExperimentManager(
     PPOAgent,  # The Agent class.
     (
         atari_make,
@@ -100,7 +100,7 @@
 print("-------- init agent : done!--------")
 print("-------- train agent --------")
 
-tuned_agent.fit()
+tuned_xp.fit()
 
 print("-------- train agent : done!--------")
 
@@ -118,7 +118,7 @@
 
 observation, info = env.reset()
 for tt in range(30000):
-    action = tuned_agent.get_agent_instances()[0].policy(observation)
+    action = tuned_xp.get_agent_instances()[0].policy(observation)
     observation, reward, terminated, truncated, info = env.step(action)
     done = terminated or truncated
     if done:
@@ -128,7 +128,7 @@
 
 print("-------- test agent with video : done!--------")
 final_test_time = datetime.now()
-tuned_agent.save()
+tuned_xp.save()
 
 # need to move the final result inside the folder used for documentation
 os.rename(

diff --git a/examples/demo_env/example_atari_breakout_vectorized_ppo.py b/examples/demo_env/example_atari_breakout_vectorized_ppo.py
@@ -67,7 +67,7 @@
 }
 
 
-tuned_agent = ExperimentManager(
+tuned_xp = ExperimentManager(
     PPOAgent,  # The Agent class.
     (
         atari_make,
@@ -100,7 +100,7 @@
 print("-------- init agent : done!--------")
 print("-------- train agent --------")
 
-tuned_agent.fit()
+tuned_xp.fit()
 
 print("-------- train agent : done!--------")
 
@@ -118,7 +118,7 @@
 
 observation, info = env.reset()
 for tt in range(30000):
-    action = tuned_agent.get_agent_instances()[0].policy(observation)
+    action = tuned_xp.get_agent_instances()[0].policy(observation)
     observation, reward, terminated, truncated, info = env.step(action)
     done = terminated or truncated
     if done:
@@ -128,7 +128,7 @@
 
 print("-------- test agent with video : done!--------")
 final_test_time = datetime.now()
-tuned_agent.save()
+tuned_xp.save()
 
 # need to move the final result inside the folder used for documentation
 os.rename(

diff --git a/examples/demo_env/video_plot_atari_freeway.py b/examples/demo_env/video_plot_atari_freeway.py
@@ -45,7 +45,7 @@
     "is_policy": False,  # The network should output a distribution
 }
 
-tuned_agent = ExperimentManager(
+tuned_xp = ExperimentManager(
     DQNAgent,  # The Agent class.
     (
         atari_make,
@@ -76,7 +76,7 @@
 print("-------- init agent : done!--------")
 print("-------- train agent --------")
 
-tuned_agent.fit()
+tuned_xp.fit()
 
 print("-------- train agent : done!--------")
 
@@ -94,7 +94,7 @@
 
 observation, info = env.reset()
 for tt in range(30000):
-    action = tuned_agent.get_agent_instances()[0].policy(observation)
+    action = tuned_xp.get_agent_instances()[0].policy(observation)
     observation, reward, terminated, truncated, info = env.step(action)
     done = terminated or truncated
     if done:
@@ -104,7 +104,7 @@
 
 print("-------- test agent with video : done!--------")
 final_test_time = datetime.now()
-tuned_agent.save()
+tuned_xp.save()
 
 # need to move the final result inside the folder used for documentation
 os.rename("_video/temp/rl-video-episode-0.mp4", "_video/video_plot_atari_freeway.mp4")

diff --git a/examples/plot_agent_manager.py b/examples/plot_agent_manager.py
@@ -1,8 +1,8 @@
 """
 =======================
-A demo of Agent Manager
+A demo of Experiment Manager
 =======================
-In this example, we use the agent manager.
+In this example, we use the ExperimentManager.
 
 First, we initialize a grid world environment with finite state space and actions.
 A grid world is a simple environment with finite states and actions, on which
@@ -14,7 +14,7 @@
 
     Q(s, a) \\leftarrow \sum_{s^{\prime}} p(s'|a, s)\\left( R(s, a)+\gamma \max _{a^{\prime}} Q(s^{\prime}, a^{\prime}) \\right).
 
-Finally, we compare with a baseline provided by a random policy using the Agent Manager class which trains, evaluates and gathers statistics about the two agents.
+Finally, we compare with a baseline provided by a random policy using the ExperimentManager class which trains, evaluates and gathers statistics about the two agents.
 """
 
 from rlberry.envs import GridWorld

diff --git a/examples/plot_writer_wrapper.py b/examples/plot_writer_wrapper.py
@@ -53,9 +53,9 @@ def __init__(self, env, **kwargs):
 )
 
 env = env_ctor(**env_kwargs)
-agent = ExperimentManager(VIAgent, (env_ctor, env_kwargs), fit_budget=10, n_fit=3)
+xp_manager = ExperimentManager(VIAgent, (env_ctor, env_kwargs), fit_budget=10, n_fit=3)
 
-agent.fit(budget=10)
+xp_manager.fit(budget=10)
 # comment the line above if you only want to load data from rlberry_data.
 
 
@@ -66,14 +66,14 @@ def compute_reward(rewards):
 
 # Plot of the cumulative reward.
 output = plot_writer_data(
-    agent, tag="reward", preprocess_func=compute_reward, title="Cumulative Reward"
+    xp_manager, tag="reward", preprocess_func=compute_reward, title="Cumulative Reward"
 )
 # The output is for 500 global steps because it uses 10 fit_budget * horizon
 
 # Log-Log plot :
 fig, ax = plt.subplots(1, 1)
 plot_writer_data(
-    agent,
+    xp_manager,
     tag="reward",
     preprocess_func=compute_reward,
     title="Cumulative Reward",

diff --git a/long_tests/rl_agent/ltest_mbqvi_applegold.py b/long_tests/rl_agent/ltest_mbqvi_applegold.py
@@ -11,7 +11,7 @@
 
 # hyperparameters from https://github.com/DLR-RM/rl-baselines3-zoo
 def test_mbqvi_applegold():
-    rbagent = ExperimentManager(
+    rb_xp = ExperimentManager(
         MBQVIAgent,
         (AppleGold, None),
         init_kwargs=params,
@@ -23,6 +23,6 @@ def test_mbqvi_applegold():
         eval_kwargs=dict(eval_horizon=1000),
     )
 
-    rbagent.fit()
-    evaluation = evaluate_agents([rbagent], n_simulations=16, show=False).values
+    rb_xp.fit()
+    evaluation = evaluate_agents([rb_xp], n_simulations=16, show=False).values
     assert np.mean(evaluation) > 470
diff --git a/long_tests/torch_agent/ltest_a2c_cartpole.py b/long_tests/torch_agent/ltest_a2c_cartpole.py
@@ -24,7 +24,7 @@ def test_a2c_cartpole():
     env_ctor = gym_make
     env_kwargs = dict(id="CartPole-v1")
 
-    rbagent = ExperimentManager(
+    rb_xp = ExperimentManager(
         A2CAgent,
         (env_ctor, env_kwargs),
         agent_name="A2CAgent",
@@ -46,8 +46,8 @@ def test_a2c_cartpole():
         seed=42,
     )
 
-    rbagent.fit()
-    writer_data = rbagent.get_writer_data()
+    rb_xp.fit()
+    writer_data = rb_xp.get_writer_data()
     id500 = [
         writer_data[idx].loc[writer_data[idx]["tag"] == "episode_rewards", "value"]
         == 500