From 8891d50298696ddd445982bb644cc5048649d3b8 Mon Sep 17 00:00:00 2001 From: Maxime Gasse Date: Tue, 8 Oct 2024 17:00:26 -0400 Subject: [PATCH] minor comments and refactors --- .../src/browsergym/experiments/benchmark.py | 12 ++++++------ tests/experiments/test_benchmark.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/browsergym/experiments/src/browsergym/experiments/benchmark.py b/browsergym/experiments/src/browsergym/experiments/benchmark.py index 65bdf1ea..0b85e7a5 100644 --- a/browsergym/experiments/src/browsergym/experiments/benchmark.py +++ b/browsergym/experiments/src/browsergym/experiments/benchmark.py @@ -212,24 +212,24 @@ def task_list_from_csv( seeds_rng=np.random.RandomState(42), ), ), - "workarena_l2_agent_curriculum": lambda: Benchmark( - name="workarena_l2_agent_curriculum", + "workarena_l2_agent_curriculum_eval": lambda: Benchmark( + name="workarena_l2_agent_curriculum_eval", high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["workarena"], env_args_list=_make_env_args_list_from_workarena_curriculum( level="l2", task_category_filter=None, - meta_seed=42, + meta_seed=42, # meta seed for evaluation curriculum max_steps=50, curriculum_type="agent", ), ), - "workarena_l3_agent_curriculum": lambda: Benchmark( - name="workarena_l3_agent_curriculum", + "workarena_l3_agent_curriculum_eval": lambda: Benchmark( + name="workarena_l3_agent_curriculum_eval", high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["workarena"], env_args_list=_make_env_args_list_from_workarena_curriculum( level="l3", task_category_filter=None, - meta_seed=42, + meta_seed=42, # meta seed for evaluation curriculum max_steps=50, curriculum_type="agent", ), diff --git a/tests/experiments/test_benchmark.py b/tests/experiments/test_benchmark.py index 75293783..9650b5af 100644 --- a/tests/experiments/test_benchmark.py +++ b/tests/experiments/test_benchmark.py @@ -60,8 +60,8 @@ def test_build_benchmarks(): "visualwebarena": 910, "workarena_l1": 33 * 10, "workarena_l1_sort": 6 * 10, - "workarena_l2_agent_curriculum": 235, - "workarena_l3_agent_curriculum": 235, + "workarena_l2_agent_curriculum_eval": 235, + "workarena_l3_agent_curriculum_eval": 235, } for name, benchmark_builder in BENCHMARKS.items(): benchmark = benchmark_builder()