Skip to content

Commit

Permalink
update names following PR350(part2): rlberry-py#350
Browse files Browse the repository at this point in the history
  • Loading branch information
JulienT01 committed Jul 28, 2023
1 parent 357ba1d commit 399c359
Show file tree
Hide file tree
Showing 25 changed files with 99 additions and 99 deletions.
16 changes: 8 additions & 8 deletions docs/basics/DeepRLTutorial/TutorialDeepRL.rst
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ default networks are:
"""
The ExperimentManager class is compact way of experimenting with a deepRL agent.
"""
default_agent = ExperimentManager(
default_xp = ExperimentManager(
A2CAgent, # The Agent class.
(gym_make, dict(id="CartPole-v1")), # The Environment to solve.
fit_budget=3e5, # The number of interactions
Expand All @@ -168,12 +168,12 @@ default networks are:
)
print("Training ...")
default_agent.fit() # Trains the agent on fit_budget steps!
default_xp.fit() # Trains the agent on fit_budget steps!
# Plot the training data:
_ = plot_writer_data(
[default_agent],
[default_xp],
tag="episode_rewards",
title="Training Episode Cumulative Rewards",
show=True,
Expand Down Expand Up @@ -256,7 +256,7 @@ default networks are:
print("Evaluating ...")
_ = evaluate_agents(
[default_agent], n_simulations=50, show=True
[default_xp], n_simulations=50, show=True
) # Evaluate the trained agent on
# 10 simulations of 500 steps each.
Expand Down Expand Up @@ -353,7 +353,7 @@ and bigger batch size to have more stable training.
.. code:: python
tuned_agent = ExperimentManager(
tuned_xp = ExperimentManager(
A2CAgent, # The Agent class.
(gym_make, dict(id="CartPole-v1")), # The Environment to solve.
init_kwargs=dict( # Where to put the agent's hyperparameters
Expand Down Expand Up @@ -385,12 +385,12 @@ and bigger batch size to have more stable training.
print("Training ...")
tuned_agent.fit() # Trains the agent on fit_budget steps!
tuned_xp.fit() # Trains the agent on fit_budget steps!
# Plot the training data:
_ = plot_writer_data(
[default_agent, tuned_agent],
[default_xp, tuned_xp],
tag="episode_rewards",
title="Training Episode Cumulative Rewards",
show=True,
Expand Down Expand Up @@ -469,7 +469,7 @@ and bigger batch size to have more stable training.
print("Evaluating ...")
# Evaluate each trained agent on 10 simulations of 500 steps each.
_ = evaluate_agents([default_agent, tuned_agent], n_simulations=50, show=True)
_ = evaluate_agents([default_xp, tuned_xp], n_simulations=50, show=True)
.. parsed-literal::
Expand Down
6 changes: 3 additions & 3 deletions docs/basics/quick_start_rl/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ Our goal is then to assess the performance of the two algorithms.
There are a number of agents that are already coded in rlberry. See the
module :class:`~rlberry.agents.Agent` for more informations.

Agent Manager
Experiment Manager
-------------

One of the main feature of rlberry is its :class:`~rlberry.manager.ExperimentManager`
Expand All @@ -157,7 +157,7 @@ class. Here is a diagram to explain briefly what it does.
:align: center


In a few words, agent manager spawns agents and environments for training and
In a few words, experiment manager spawns agents and environments for training and
then once the agents are trained, it uses these agents and new environments
to evaluate how well the agent perform. All of these steps can be
done several times to assess stochasticity of agents and/or environment.
Expand All @@ -177,7 +177,7 @@ This gives us 1 value per agent. We do this 10 times (so 10 times 10
equal 100 simulations) in order to have an idea of the variability of
our estimation.

In order to manage the agents, we use an Agent Manager. The manager will
In order to manage the agents, we use an ExperimentManager. The manager will
then spawn agents as desired during the experiment.


Expand Down
6 changes: 3 additions & 3 deletions examples/demo_agents/demo_SAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import gymnasium as gym
from rlberry.agents.torch.sac import SACAgent
from rlberry.envs import Pendulum
from rlberry.manager import AgentManager
from rlberry.manager import ExperimentManager


def env_ctor(env, wrap_spaces=True):
Expand All @@ -30,7 +30,7 @@ def env_ctor(env, wrap_spaces=True):
env_kwargs = dict(env=env)

# Create agent instance
agent = AgentManager(
xp_manager = ExperimentManager(
SACAgent,
(env_ctor, env_kwargs),
fit_budget=fit_budget,
Expand All @@ -40,4 +40,4 @@ def env_ctor(env, wrap_spaces=True):
)

# Start training
agent.fit()
xp_manager.fit()
10 changes: 5 additions & 5 deletions examples/demo_bandits/plot_mirror_bandit.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,17 +158,17 @@ def fit(self, budget=None, **kwargs):

# Experiment

agent = ExperimentManager(
xp_manager = ExperimentManager(
SeqHalvAgent,
(env_ctor, env_kwargs),
fit_budget=100, # we use only 100 iterations for faster example run in doc.
n_fit=1,
agent_name="SH",
)
agent.fit()
xp_manager.fit()

rewards = read_writer_data([agent], tag="reward")["value"]
actions = read_writer_data([agent], tag="action")["value"]
rewards = read_writer_data([xp_manager], tag="reward")["value"]
actions = read_writer_data([xp_manager], tag="action")["value"]


plt.boxplot([-rewards[actions == a] for a in range(6)])
Expand All @@ -178,5 +178,5 @@ def fit(self, budget=None, **kwargs):

print(
"The optimal action (fastest server) is server number ",
agent.agent_handlers[0].optimal_action + 1,
xp_manager.agent_handlers[0].optimal_action + 1,
)
6 changes: 3 additions & 3 deletions examples/demo_bandits/plot_ucb_bandit.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(self, env, sigma=1, **kwargs):
env_ctor = NormalBandit
env_kwargs = {"means": means, "stds": 2 * np.ones(len(means))}

agent = ExperimentManager(
xp_manager = ExperimentManager(
UCBAgent,
(env_ctor, env_kwargs),
fit_budget=T,
Expand All @@ -52,7 +52,7 @@ def __init__(self, env, sigma=1, **kwargs):

# Agent training

agent.fit()
xp_manager.fit()


# Compute and plot (pseudo-)regret
Expand All @@ -63,7 +63,7 @@ def compute_pseudo_regret(actions):
fig = plt.figure(1, figsize=(5, 3))
ax = plt.gca()
output = plot_writer_data(
[agent],
[xp_manager],
tag="action",
preprocess_func=compute_pseudo_regret,
title="Cumulative Pseudo-Regret",
Expand Down
8 changes: 4 additions & 4 deletions examples/demo_env/example_atari_atlantis_vectorized_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
}


tuned_agent = ExperimentManager(
tuned_xp = ExperimentManager(
PPOAgent, # The Agent class.
(
atari_make,
Expand Down Expand Up @@ -100,7 +100,7 @@
print("-------- init agent : done!--------")
print("-------- train agent --------")

tuned_agent.fit()
tuned_xp.fit()

print("-------- train agent : done!--------")

Expand All @@ -118,7 +118,7 @@

observation, info = env.reset()
for tt in range(30000):
action = tuned_agent.get_agent_instances()[0].policy(observation)
action = tuned_xp.get_agent_instances()[0].policy(observation)
observation, reward, terminated, truncated, info = env.step(action)
done = terminated or truncated
if done:
Expand All @@ -128,7 +128,7 @@

print("-------- test agent with video : done!--------")
final_test_time = datetime.now()
tuned_agent.save()
tuned_xp.save()

# need to move the final result inside the folder used for documentation
os.rename(
Expand Down
8 changes: 4 additions & 4 deletions examples/demo_env/example_atari_breakout_vectorized_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
}


tuned_agent = ExperimentManager(
tuned_xp = ExperimentManager(
PPOAgent, # The Agent class.
(
atari_make,
Expand Down Expand Up @@ -100,7 +100,7 @@
print("-------- init agent : done!--------")
print("-------- train agent --------")

tuned_agent.fit()
tuned_xp.fit()

print("-------- train agent : done!--------")

Expand All @@ -118,7 +118,7 @@

observation, info = env.reset()
for tt in range(30000):
action = tuned_agent.get_agent_instances()[0].policy(observation)
action = tuned_xp.get_agent_instances()[0].policy(observation)
observation, reward, terminated, truncated, info = env.step(action)
done = terminated or truncated
if done:
Expand All @@ -128,7 +128,7 @@

print("-------- test agent with video : done!--------")
final_test_time = datetime.now()
tuned_agent.save()
tuned_xp.save()

# need to move the final result inside the folder used for documentation
os.rename(
Expand Down
8 changes: 4 additions & 4 deletions examples/demo_env/video_plot_atari_freeway.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"is_policy": False, # The network should output a distribution
}

tuned_agent = ExperimentManager(
tuned_xp = ExperimentManager(
DQNAgent, # The Agent class.
(
atari_make,
Expand Down Expand Up @@ -76,7 +76,7 @@
print("-------- init agent : done!--------")
print("-------- train agent --------")

tuned_agent.fit()
tuned_xp.fit()

print("-------- train agent : done!--------")

Expand All @@ -94,7 +94,7 @@

observation, info = env.reset()
for tt in range(30000):
action = tuned_agent.get_agent_instances()[0].policy(observation)
action = tuned_xp.get_agent_instances()[0].policy(observation)
observation, reward, terminated, truncated, info = env.step(action)
done = terminated or truncated
if done:
Expand All @@ -104,7 +104,7 @@

print("-------- test agent with video : done!--------")
final_test_time = datetime.now()
tuned_agent.save()
tuned_xp.save()

# need to move the final result inside the folder used for documentation
os.rename("_video/temp/rl-video-episode-0.mp4", "_video/video_plot_atari_freeway.mp4")
Expand Down
6 changes: 3 additions & 3 deletions examples/plot_agent_manager.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""
=======================
A demo of Agent Manager
A demo of Experiment Manager
=======================
In this example, we use the agent manager.
In this example, we use the ExperimentManager.
First, we initialize a grid world environment with finite state space and actions.
A grid world is a simple environment with finite states and actions, on which
Expand All @@ -14,7 +14,7 @@
Q(s, a) \\leftarrow \sum_{s^{\prime}} p(s'|a, s)\\left( R(s, a)+\gamma \max _{a^{\prime}} Q(s^{\prime}, a^{\prime}) \\right).
Finally, we compare with a baseline provided by a random policy using the Agent Manager class which trains, evaluates and gathers statistics about the two agents.
Finally, we compare with a baseline provided by a random policy using the ExperimentManager class which trains, evaluates and gathers statistics about the two agents.
"""

from rlberry.envs import GridWorld
Expand Down
8 changes: 4 additions & 4 deletions examples/plot_writer_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ def __init__(self, env, **kwargs):
)

env = env_ctor(**env_kwargs)
agent = ExperimentManager(VIAgent, (env_ctor, env_kwargs), fit_budget=10, n_fit=3)
xp_manager = ExperimentManager(VIAgent, (env_ctor, env_kwargs), fit_budget=10, n_fit=3)

agent.fit(budget=10)
xp_manager.fit(budget=10)
# comment the line above if you only want to load data from rlberry_data.


Expand All @@ -66,14 +66,14 @@ def compute_reward(rewards):

# Plot of the cumulative reward.
output = plot_writer_data(
agent, tag="reward", preprocess_func=compute_reward, title="Cumulative Reward"
xp_manager, tag="reward", preprocess_func=compute_reward, title="Cumulative Reward"
)
# The output is for 500 global steps because it uses 10 fit_budget * horizon

# Log-Log plot :
fig, ax = plt.subplots(1, 1)
plot_writer_data(
agent,
xp_manager,
tag="reward",
preprocess_func=compute_reward,
title="Cumulative Reward",
Expand Down
6 changes: 3 additions & 3 deletions long_tests/rl_agent/ltest_mbqvi_applegold.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

# hyperparameters from https://github.com/DLR-RM/rl-baselines3-zoo
def test_mbqvi_applegold():
rbagent = ExperimentManager(
rb_xp = ExperimentManager(
MBQVIAgent,
(AppleGold, None),
init_kwargs=params,
Expand All @@ -23,6 +23,6 @@ def test_mbqvi_applegold():
eval_kwargs=dict(eval_horizon=1000),
)

rbagent.fit()
evaluation = evaluate_agents([rbagent], n_simulations=16, show=False).values
rb_xp.fit()
evaluation = evaluate_agents([rb_xp], n_simulations=16, show=False).values
assert np.mean(evaluation) > 470
6 changes: 3 additions & 3 deletions long_tests/torch_agent/ltest_a2c_cartpole.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_a2c_cartpole():
env_ctor = gym_make
env_kwargs = dict(id="CartPole-v1")

rbagent = ExperimentManager(
rb_xp = ExperimentManager(
A2CAgent,
(env_ctor, env_kwargs),
agent_name="A2CAgent",
Expand All @@ -46,8 +46,8 @@ def test_a2c_cartpole():
seed=42,
)

rbagent.fit()
writer_data = rbagent.get_writer_data()
rb_xp.fit()
writer_data = rb_xp.get_writer_data()
id500 = [
writer_data[idx].loc[writer_data[idx]["tag"] == "episode_rewards", "value"]
== 500
Expand Down
Loading

0 comments on commit 399c359

Please sign in to comment.