From 0191bfcdab84aee40d9fd7096df053c9d0478085 Mon Sep 17 00:00:00 2001 From: Maxime Gasse Date: Tue, 15 Oct 2024 16:00:56 -0400 Subject: [PATCH 01/10] new obs["open_pages_titles"] --- browsergym/core/src/browsergym/core/env.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/browsergym/core/src/browsergym/core/env.py b/browsergym/core/src/browsergym/core/env.py index 22fffaaa..677bb19c 100644 --- a/browsergym/core/src/browsergym/core/env.py +++ b/browsergym/core/src/browsergym/core/env.py @@ -141,6 +141,9 @@ def __init__( "open_pages_urls": gym.spaces.Sequence( Unicode(min_length=0, max_length=TEXT_MAX_LENGTH) ), + "open_pages_titles": gym.spaces.Sequence( + Unicode(min_length=0, max_length=TEXT_MAX_LENGTH) + ), "active_page_index": gym.spaces.Box(low=0, high=255, dtype=int), "url": Unicode(min_length=0, max_length=TEXT_MAX_LENGTH), "screenshot": AnyBox( @@ -542,8 +545,9 @@ def _get_obs(self): "goal": _try_to_extract_legacy_goal(self.goal_object), # legacy goal, deprecated "goal_object": self.goal_object, # new goal format, list of messages openai style "open_pages_urls": [page.url for page in self.context.pages], + "open_pages_titles": [page.title() for page in self.context.pages], "active_page_index": np.asarray([self.context.pages.index(self.page)]), - "url": self.page.url, + "url": self.page.url, # redundant with "open_pages_urls" and "active_page_index" "screenshot": extract_screenshot(self.page), "dom_object": dom, "axtree_object": axtree, From 34a218229a448e2a6d46cc93039dc38e36ca4234 Mon Sep 17 00:00:00 2001 From: Maxime Gasse Date: Thu, 17 Oct 2024 16:41:59 -0400 Subject: [PATCH 02/10] wa / vwa url safeguard --- .../src/browsergym/visualwebarena/task.py | 21 +++++++++++++----- .../webarena/src/browsergym/webarena/task.py | 22 ++++++++++++++----- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/browsergym/visualwebarena/src/browsergym/visualwebarena/task.py b/browsergym/visualwebarena/src/browsergym/visualwebarena/task.py index 00a3107e..3e5290bb 100644 --- a/browsergym/visualwebarena/src/browsergym/visualwebarena/task.py +++ b/browsergym/visualwebarena/src/browsergym/visualwebarena/task.py @@ -1,13 +1,14 @@ +import importlib.resources import json import logging -import playwright.sync_api -import importlib.resources import pathlib import tempfile -import requests - +import urllib.parse from typing import Optional, Tuple +import playwright.sync_api +import requests + from browsergym.core.task import AbstractBrowserTask from .instance import VisualWebArenaInstance @@ -226,7 +227,17 @@ def teardown(self) -> None: def validate( self, page: playwright.sync_api.Page, chat_messages: list[str] ) -> Tuple[float, bool, str, dict]: - # import webarena on instanciation + # check that all open tabs are either blank or within the list of WebArena URLs + authorized_locations = [ + urllib.parse.urlparse(url).netloc + for url in [*self.webarena_instance.urls, self.webarena_instance.home_url] + ] + for open_page in page.context.pages: + page_location = urllib.parse.urlparse(open_page.url).netloc + if not page_location in authorized_locations: + return 0, True, "", {"error": "Unauthorized url, terminating task"} + + # import webarena dynamically from visualwebarena.browser_env.actions import ActionTypes # if any, use the last assistant message as the stop answer for webarena diff --git a/browsergym/webarena/src/browsergym/webarena/task.py b/browsergym/webarena/src/browsergym/webarena/task.py index f1d5a59d..a7e3fe98 100644 --- a/browsergym/webarena/src/browsergym/webarena/task.py +++ b/browsergym/webarena/src/browsergym/webarena/task.py @@ -1,12 +1,13 @@ +import importlib.resources import json import logging -import numpy as np -import playwright.sync_api -import importlib.resources import tempfile - +import urllib.parse from typing import Optional, Tuple +import numpy as np +import playwright.sync_api + from browsergym.core.task import AbstractBrowserTask from .instance import WebArenaInstance @@ -154,7 +155,18 @@ def teardown(self) -> None: def validate( self, page: playwright.sync_api.Page, chat_messages: list[str] ) -> Tuple[float, bool, str, dict]: - # import webarena on instanciation + + # check that all open tabs are either blank or within the list of WebArena URLs + authorized_locations = [ + urllib.parse.urlparse(url).netloc + for url in [*self.webarena_instance.urls, self.webarena_instance.home_url] + ] + for open_page in page.context.pages: + page_location = urllib.parse.urlparse(open_page.url).netloc + if not page_location in authorized_locations: + return 0, True, "", {"error": "Unauthorized url, terminating task"} + + # import webarena dynamically from webarena.browser_env.actions import ActionTypes # if any, use the last assistant message as the stop answer for webarena From 6375634ebe4b8a6209c9bbb6cea1d09c180a1553 Mon Sep 17 00:00:00 2001 From: Maxime Gasse Date: Thu, 17 Oct 2024 16:42:14 -0400 Subject: [PATCH 03/10] basic_agent multi-tab update --- demo_agent/basic_agent.py | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/demo_agent/basic_agent.py b/demo_agent/basic_agent.py index 59ad59e8..e6515c4d 100644 --- a/demo_agent/basic_agent.py +++ b/demo_agent/basic_agent.py @@ -1,14 +1,14 @@ import base64 import dataclasses -import numpy as np import io import logging +import numpy as np from PIL import Image -from browsergym.experiments import Agent, AbstractAgentArgs from browsergym.core.action.highlevel import HighLevelActionSet from browsergym.core.action.python import PythonActionSet +from browsergym.experiments import AbstractAgentArgs, Agent from browsergym.utils.obs import flatten_axtree_to_str, flatten_dom_to_str, prune_html logger = logging.getLogger(__name__) @@ -40,6 +40,9 @@ def obs_preprocessor(self, obs: dict) -> dict: "goal_object": obs["goal_object"], "last_action": obs["last_action"], "last_action_error": obs["last_action_error"], + "open_pages_urls": obs["open_pages_urls"], + "open_pages_titles": obs["open_pages_titles"], + "active_page_index": obs["active_page_index"], "axtree_txt": flatten_axtree_to_str(obs["axtree_object"]), "pruned_html": prune_html(flatten_dom_to_str(obs["dom_object"])), } @@ -68,7 +71,7 @@ def __init__( self.openai_client = OpenAI() self.action_set = HighLevelActionSet( - subsets=["chat", "bid", "infeas"], # define a subset of the action space + subsets=["chat", "tab", "nav", "bid", "infeas"], # define a subset of the action space # subsets=["chat", "bid", "coord", "infeas"] # allow the agent to also use x,y coordinates strict=False, # less strict on the parsing of the actions multiaction=False, # does not enable the agent to take multiple actions at once @@ -151,6 +154,29 @@ def get_action(self, obs: dict) -> tuple[str, dict]: # goal_object is directly presented as a list of openai-style messages user_msgs.extend(obs["goal_object"]) + # append url of all open tabs + user_msgs.append( + { + "type": "text", + "text": f"""\ +# Currently open tabs +""", + } + ) + for page_index, (page_url, page_title) in enumerate( + zip(obs["open_pages_urls"], obs["open_pages_titles"]) + ): + user_msgs.append( + { + "type": "text", + "text": f"""\ +Tab {page_index}{" (active tab)" if page_index == obs["active_page_index"] else ""} + Title: {page_title} + URL: {page_url} +""", + } + ) + # append page AXTree (if asked) if self.use_axtree: user_msgs.append( @@ -234,6 +260,7 @@ def get_action(self, obs: dict) -> tuple[str, dict]: { "type": "text", "text": f"""\ + {action} """, } @@ -261,7 +288,7 @@ def get_action(self, obs: dict) -> tuple[str, dict]: "text": f"""\ # Next action -You will now think step by step and produce your next best action. Reflect on your past actions, any resulting error message, the current state of the page before deciding on your next action. +You will now think step by step and produce your next best action. Reflect on your past actions, any resulting error message, and the current state of the page before deciding on your next action. """, } ) From 9e8943afaf316a8fb5b8eb314ccccadde7d2f076 Mon Sep 17 00:00:00 2001 From: Maxime Gasse Date: Thu, 17 Oct 2024 17:28:00 -0400 Subject: [PATCH 04/10] ci tests + fixes --- .../src/browsergym/core/action/functions.py | 33 +- .../src/browsergym/visualwebarena/task.py | 7 +- .../webarena/src/browsergym/webarena/task.py | 6 +- tests/core/test_actions_highlevel.py | 341 +++--------------- .../test_vwa_tasks_without_reset.py | 45 ++- 5 files changed, 121 insertions(+), 311 deletions(-) diff --git a/browsergym/core/src/browsergym/core/action/functions.py b/browsergym/core/src/browsergym/core/action/functions.py index c9dfe1cb..b6ae9eb3 100644 --- a/browsergym/core/src/browsergym/core/action/functions.py +++ b/browsergym/core/src/browsergym/core/action/functions.py @@ -1,8 +1,9 @@ # these are placeholders # all these symbols will be available in browsergym actions -import playwright.sync_api from typing import Literal +import playwright.sync_api + from .utils import ( add_demo_mode_effects, get_elem_by_bid, @@ -527,7 +528,15 @@ def new_tab(): # set the new page as the active page page = page.context.new_page() # trigger the callback that sets this page as active in browsergym - page.locate("html").dispatch_event("pageshow") + page.evaluate( + """\ +const event = new Event('pageshow', { + bubbles: true, // Whether the event bubbles up through the DOM or not + cancelable: false // Whether the event can be canceled +}); +window.dispatchEvent(event); +""" + ) # https://playwright.dev/python/docs/api/class-page#page-close @@ -548,7 +557,15 @@ def tab_close(): else: page = context.new_page() # trigger the callback that sets this page as active in browsergym - page.locate("html").dispatch_event("pageshow") + page.evaluate( + """\ +const event = new Event('pageshow', { + bubbles: true, // Whether the event bubbles up through the DOM or not + cancelable: false // Whether the event can be canceled +}); +window.dispatchEvent(event); +""" + ) # https://playwright.dev/python/docs/api/class-page#page-bring-to-front @@ -562,7 +579,15 @@ def tab_focus(index: int): global page # set the focused page as the active page page = page.context.pages[index] # trigger the callback that sets this page as active in browsergym - page.locate("html").dispatch_event("pageshow") + page.evaluate( + """\ +const event = new Event('pageshow', { + bubbles: true, // Whether the event bubbles up through the DOM or not + cancelable: false // Whether the event can be canceled +}); +window.dispatchEvent(event); +""" + ) # https://playwright.dev/python/docs/input#upload-files diff --git a/browsergym/visualwebarena/src/browsergym/visualwebarena/task.py b/browsergym/visualwebarena/src/browsergym/visualwebarena/task.py index 3e5290bb..6032410b 100644 --- a/browsergym/visualwebarena/src/browsergym/visualwebarena/task.py +++ b/browsergym/visualwebarena/src/browsergym/visualwebarena/task.py @@ -227,10 +227,11 @@ def teardown(self) -> None: def validate( self, page: playwright.sync_api.Page, chat_messages: list[str] ) -> Tuple[float, bool, str, dict]: - # check that all open tabs are either blank or within the list of WebArena URLs - authorized_locations = [ + + # safeguard: check that all open tabs are either blank or within the list of WebArena URLs + authorized_locations = ["newtab", ""] + [ urllib.parse.urlparse(url).netloc - for url in [*self.webarena_instance.urls, self.webarena_instance.home_url] + for url in [*self.webarena_instance.urls.values(), self.webarena_instance.home_url] ] for open_page in page.context.pages: page_location = urllib.parse.urlparse(open_page.url).netloc diff --git a/browsergym/webarena/src/browsergym/webarena/task.py b/browsergym/webarena/src/browsergym/webarena/task.py index a7e3fe98..90b63a83 100644 --- a/browsergym/webarena/src/browsergym/webarena/task.py +++ b/browsergym/webarena/src/browsergym/webarena/task.py @@ -156,10 +156,10 @@ def validate( self, page: playwright.sync_api.Page, chat_messages: list[str] ) -> Tuple[float, bool, str, dict]: - # check that all open tabs are either blank or within the list of WebArena URLs - authorized_locations = [ + # safeguard: check that all open tabs are either blank or within the list of WebArena URLs + authorized_locations = ["newtab", ""] + [ urllib.parse.urlparse(url).netloc - for url in [*self.webarena_instance.urls, self.webarena_instance.home_url] + for url in [*self.webarena_instance.urls.values(), self.webarena_instance.home_url] ] for open_page in page.context.pages: page_location = urllib.parse.urlparse(open_page.url).netloc diff --git a/tests/core/test_actions_highlevel.py b/tests/core/test_actions_highlevel.py index 93cbf5ee..9540c9c2 100644 --- a/tests/core/test_actions_highlevel.py +++ b/tests/core/test_actions_highlevel.py @@ -1,22 +1,20 @@ import ast -import bs4 -import gymnasium as gym import os import pathlib import platform -import pytest import re +import bs4 +import gymnasium as gym +import pytest from pyparsing.exceptions import ParseException # register openended gym environments import browsergym.core - -from browsergym.utils.obs import flatten_dom_to_str from browsergym.core.action.highlevel import HighLevelActionSet -from browsergym.core.action.parsers import highlevel_action_parser, NamedArgument +from browsergym.core.action.parsers import NamedArgument, highlevel_action_parser from browsergym.core.constants import BROWSERGYM_ID_ATTRIBUTE as BID_ATTR - +from browsergym.utils.obs import flatten_dom_to_str _IS_MAC_OS = platform.system() == "Darwin" @@ -1035,300 +1033,53 @@ def get_top_bottom_elems(obs): env.close() -# def test_meta_action(): -# env = BrowserEnv( -# task_entrypoint=OpenEndedTask, -# task_kwargs={"start_url": TEXT_INPUT_URL}, -# headless=__HEADLESS__, -# ) -# obs, info = env.reset() - -# soup = bs4.BeautifulSoup(obs["html"], "lxml") -# fname = soup.find("input", attrs={"id": "fname"}) -# lname = soup.find("input", attrs={"id": "lname"}) - -# # elementary action -# action = json.dumps({"action_type": "click", "x": 0, "y": 0}) - -# obs, reward, terminated, truncated, info = env.step(action) - -# assert not obs["last_action_error"] - -# # list of actions -# action = json.dumps( -# [{"action_type": "click", "x": 0, "y": 0}, {"action_type": "click", "x": 0, "y": 0}] -# ) - -# obs, reward, terminated, truncated, info = env.step(action) - -# assert not obs["last_action_error"] - -# # invalid action type -# action = json.dumps({"action_type": "clickk", "x": 0, "y": 0}) - -# obs, reward, terminated, truncated, info = env.step(action) - -# assert obs["last_action_error"] -# assert "Invalid" in obs["error_logs"] - -# # missing action type -# action = json.dumps({"x": 0, "y": 0}) - -# obs, reward, terminated, truncated, info = env.step(action) - -# assert obs["last_action_error"] -# assert "Missing" in obs["error_logs"] - -# # not JSON -# action = action_mapping.to_playwright_code("NOT_JSON" - -# obs, reward, terminated, truncated, info = env.step(action) - -# assert obs["last_action_error"] -# assert "JSONDecodeError" in obs["error_logs"] - -# # empty action list -# action = json.dumps([]) - -# obs, reward, terminated, truncated, info = env.step(action) - -# assert obs["last_action_error"] -# assert "Empty" in obs["error_logs"] - - -# def test_input_type_number(): -# env = BrowserEnv( -# task_entrypoint=GuessNumberTask, -# headless=__HEADLESS__, -# ) -# obs, info = env.reset() - -# soup = bs4.BeautifulSoup(obs["html"], "lxml") -# input_elem = soup.find("input", attrs={"type": "number"}) -# input_bid = input_elem.get(BID_ATTR) -# input_value = input_elem.get("value") - -# # type using bid -# action = json.dumps( -# { -# "action_type": "type", -# BID_ATTR: input_bid, -# "text": "6", -# } -# ) -# obs, reward, terminated, truncated, info = env.step(action) - -# soup = bs4.BeautifulSoup(obs["html"], "lxml") -# input_elem = soup.find("input", attrs={"type": "number"}) -# input_bid = input_elem.get(BID_ATTR) -# input_value = input_elem.get("value") - -# assert input_value == "6" - -# action = json.dumps( -# { -# "action_type": "type", -# BID_ATTR: input_bid, -# "text": "7", -# } -# ) -# obs, reward, terminated, truncated, info = env.step(action) - -# soup = bs4.BeautifulSoup(obs["html"], "lxml") -# input_elem = soup.find("input", attrs={"type": "number"}) -# input_bid = input_elem.get(BID_ATTR) -# input_value = input_elem.get("value") - -# assert input_value == "7" - - -# def test_auto_complete(): -# env = BrowserEnv( -# task_entrypoint=BookFlightTask, -# headless=__HEADLESS__, -# ) -# obs, info = env.reset() - -# soup = bs4.BeautifulSoup(obs["html"], "lxml") - -# # type using bid -# action = json.dumps( -# { -# "action_type": "type", -# BID_ATTR: "20", -# "text": "OGG", -# } -# ) -# obs, reward, terminated, truncated, info = env.step(action) -# soup = bs4.BeautifulSoup(obs["html"], "lxml") -# # find element with bid="33" -# element = soup.find("ul", attrs={BID_ATTR: "33"}) -# # extre the list li as python list -# # list_li = element.find_all("li") -# # assert len(list_li) > 0 -# # assert list_li[0].text == "Kahului, HI - Island of Maui, (OGG)" - - -# def test_clear_success(): -# env = BrowserEnv( -# task_entrypoint=OpenEndedTask, -# task_kwargs={"start_url": TEXT_INPUT_URL}, -# headless=__HEADLESS__, -# ) -# obs, info = env.reset() - -# fname_element = env.driver.find_element(By.CSS_SELECTOR, value="input[id='fname']") -# fname_element.send_keys("Christian") - -# # clear using bid -# action = json.dumps( -# { -# "action_type": "clear", -# BID_ATTR: fname_element.get_attribute(BID_ATTR), -# } -# ) - -# assert fname_element.get_attribute("value") == "Christian" -# obs, reward, terminated, truncated, info = env.step(action) -# assert not obs["last_action_error"] -# assert fname_element.get_attribute("value") == "" - -# fname_element.send_keys("Christian") -# # clear using bid -# action = json.dumps({"action_type": "clear", BID_ATTR: fname_element.get_attribute(BID_ATTR)}) - -# obs, reward, terminated, truncated, info = env.step(action) -# assert not obs["last_action_error"] -# assert fname_element.get_attribute("value") == "" - -# fname_element.send_keys("Christian") -# # clear using css selector -# action = json.dumps({"action_type": "clear", "css_selector": "input[id='fname']"}) - -# obs, reward, terminated, truncated, info = env.step(action) -# assert not obs["last_action_error"] -# assert fname_element.get_attribute("value") == "" - -# fname_element.send_keys("Christian") - -# x, y = re.search( -# r"\[" + fname_element.get_attribute(BID_ATTR) + r"\] \(([-+]?[0-9\.]+), ([-+]?[0-9\.]+)\)", -# obs["accessibility_tree"], -# ).groups() - -# # type at x, y coordinates -# action = json.dumps({"action_type": "clear", "x": x, "y": y}) - -# obs, reward, terminated, truncated, info = env.step(action) -# assert not obs["last_action_error"] -# assert fname_element.get_attribute("value") == "" - -# fname_element.send_keys("Christian") - -# # clear in currently focused element -# action = json.dumps({"action_type": "clear"}) -# obs, reward, terminated, truncated, info = env.step(action) -# assert not obs["last_action_error"] -# assert fname_element.get_attribute("value") == "" - -# # de-focus (click 0, 0), then type text -# action = json.dumps({"action_type": "clear", "x": 0, "y": 0}) -# obs, reward, terminated, truncated, info = env.step(action) - -# assert not obs["last_action_error"] - - -# def test_clear_error(): -# """In this test, we try to build a ClearAction but we use invalid args, and we check that the action fails when executed in the environment""" -# env = BrowserEnv( -# task_entrypoint=OpenEndedTask, -# task_kwargs={"start_url": TEXT_INPUT_URL}, -# headless=__HEADLESS__, -# ) -# obs, info = env.reset() - -# soup = bs4.BeautifulSoup(obs["html"], "lxml") -# fname = soup.find("input", attrs={"id": "fname"}) -# lname = soup.find("input", attrs={"id": "lname"}) - -# ################ 1. invalid args : both css_selector and bid mentionned ################ -# action = json.dumps( -# { -# "action_type": "clear", -# BID_ATTR: fname.get(BID_ATTR), -# "css_selector": "input[id='fname']", -# } -# ) - -# obs, reward, terminated, truncated, info = env.step(action) - -# assert obs["last_action_error"] - -# soup = bs4.BeautifulSoup(obs["html"], "lxml") -# fname = soup.find("input", attrs={"id": "fname"}) -# lname = soup.find("input", attrs={"id": "lname"}) - -# x, y = re.search( -# r"\[" + lname.get(BID_ATTR) + r"\] \(([-+]?[0-9\.]+), ([-+]?[0-9\.]+)\)", -# obs["accessibility_tree"], -# ).groups() - -# ################ 2. invalid args : both bid and position mentionned ################ - -# action = json.dumps( -# { -# "action_type": "clear", -# BID_ATTR: lname.get(BID_ATTR), -# "x": x, -# "y": y, -# } -# ) - -# obs, reward, terminated, truncated, info = env.step(action) - -# assert obs["last_action_error"] - -# soup = bs4.BeautifulSoup(obs["html"], "lxml") -# fname = soup.find("input", attrs={"id": "fname"}) -# lname = soup.find("input", attrs={"id": "lname"}) - -# x, y = re.search( -# r"\[" + lname.get(BID_ATTR) + r"\] \(([-+]?[0-9\.]+), ([-+]?[0-9\.]+)\)", -# obs["accessibility_tree"], -# ).groups() - -# ################ 3. invalid args : both css_selector and position mentionned ################ - -# action = json.dumps( -# { -# "action_type": "clear", -# "css_selector": "input[id='lname']", -# "x": x, -# "y": y, -# } -# ) - -# obs, reward, terminated, truncated, info = env.step(action) - -# assert obs["last_action_error"] +def test_tab_actions(): + action_set = HighLevelActionSet(subsets=["tab", "nav"]) + env = gym.make( + "browsergym/openended", + task_kwargs={"start_url": CHECKBOX_URL}, + headless=__HEADLESS, + slow_mo=__SLOW_MO, + timeout=__TIMEOUT, + action_mapping=action_set.to_python_code, + ) + obs, info = env.reset() + assert not obs["last_action_error"] + assert len(obs["open_pages_urls"]) == 1 + assert len(obs["open_pages_titles"]) == 1 + assert obs["active_page_index"] == 0 + assert obs["open_pages_urls"][obs["active_page_index"][0]] == obs["url"] -@pytest.mark.skip(reason="Not implemented yet") -def test_tab_focus(): - # TODO - pass + obs, reward, terminated, truncated, info = env.step("new_tab()") + assert not obs["last_action_error"] + assert len(obs["open_pages_urls"]) == 2 + assert len(obs["open_pages_titles"]) == 2 + assert obs["active_page_index"] == 1 + assert obs["open_pages_urls"][obs["active_page_index"][0]] == obs["url"] + obs, reward, terminated, truncated, info = env.step(f"goto({repr(TEXTBOX_URL)})") + assert not obs["last_action_error"] + assert len(obs["open_pages_urls"]) == 2 + assert len(obs["open_pages_titles"]) == 2 + assert obs["active_page_index"] == 1 + assert obs["open_pages_urls"][obs["active_page_index"][0]] == obs["url"] -@pytest.mark.skip(reason="Not implemented yet") -def test_new_tab(): - # TODO - pass + obs, reward, terminated, truncated, info = env.step("tab_focus(0)") + assert not obs["last_action_error"] + assert len(obs["open_pages_urls"]) == 2 + assert len(obs["open_pages_titles"]) == 2 + assert obs["active_page_index"] == 0 + assert obs["open_pages_urls"][obs["active_page_index"][0]] == obs["url"] + obs, reward, terminated, truncated, info = env.step("tab_close()") + assert not obs["last_action_error"] + assert len(obs["open_pages_urls"]) == 1 + assert len(obs["open_pages_titles"]) == 1 + assert obs["active_page_index"] == 0 + assert obs["open_pages_urls"][obs["active_page_index"][0]] == obs["url"] -@pytest.mark.skip(reason="Not implemented yet") -def test_tab_close(): - # TODO - pass + env.close() def test_mouse_down_up(): diff --git a/tests/visualwebarena/test_vwa_tasks_without_reset.py b/tests/visualwebarena/test_vwa_tasks_without_reset.py index 71896c9d..05d55856 100644 --- a/tests/visualwebarena/test_vwa_tasks_without_reset.py +++ b/tests/visualwebarena/test_vwa_tasks_without_reset.py @@ -1,23 +1,21 @@ -import gymnasium as gym import logging import os -import playwright.sync_api -import pytest import random -from tenacity import retry, stop_after_attempt, retry_if_exception_type, wait_fixed +import gymnasium as gym +import playwright.sync_api +import pytest +from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed # register gym environments import browsergym.visualwebarena - __SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None __HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True from browsergym.visualwebarena import VISUALWEBARENA_TASK_IDS_WITHOUT_RESET - rng = random.Random(1) task_ids = rng.sample(VISUALWEBARENA_TASK_IDS_WITHOUT_RESET, 25) print(task_ids) @@ -40,3 +38,38 @@ def test_env_generic(task_id): ) obs, info = env.reset() env.close() + + +@retry( + stop=stop_after_attempt(5), + retry=retry_if_exception_type(playwright.sync_api.TimeoutError), + wait=wait_fixed(2), + reraise=True, + before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."), +) +def test_domain_safeguard(): + env = gym.make( + f"browsergym/visualwebarena.398", + headless=__HEADLESS, + slow_mo=__SLOW_MO, + ) + obs, info = env.reset() + assert not obs["last_action_error"] + + obs, reward, terminated, truncated, info = env.step("new_tab()") + assert not obs["last_action_error"] + assert not (terminated or truncated) + + obs, reward, terminated, truncated, info = env.step("tab_close()") + assert not obs["last_action_error"] + assert not (terminated or truncated) + + obs, reward, terminated, truncated, info = env.step("tab_focus(0)") + assert not obs["last_action_error"] + assert not (terminated or truncated) + + obs, reward, terminated, truncated, info = env.step('goto("http://www.google.com")') + assert not obs["last_action_error"] + assert terminated + + env.close() From 2b287b7abc956cfd09bbd248ac15136349c093c5 Mon Sep 17 00:00:00 2001 From: Maxime Gasse Date: Thu, 17 Oct 2024 17:28:33 -0400 Subject: [PATCH 05/10] gitignore --- .gitignore | 2 ++ sandbox.py | 23 +++++++++++++++++++++++ sandbox2.py | 22 ++++++++++++++++++++++ sandbox3.py | 0 4 files changed, 47 insertions(+) create mode 100644 sandbox.py create mode 100644 sandbox2.py create mode 100644 sandbox3.py diff --git a/.gitignore b/.gitignore index 4e091724..d6308cca 100644 --- a/.gitignore +++ b/.gitignore @@ -139,3 +139,5 @@ error_logs.txt tests/results tmp.py .vscode/settings.json + +results/ diff --git a/sandbox.py b/sandbox.py new file mode 100644 index 00000000..acfa4bba --- /dev/null +++ b/sandbox.py @@ -0,0 +1,23 @@ +from dataclasses import dataclass + +from dataclasses_json import DataClassJsonMixin + + +@dataclass +class Test(DataClassJsonMixin): + a: int + b: str + + def do_something(self): + print(self.a, self.b) + + +x: Test = Test(0, "hello") + +x_json = x.to_json() + +print(x_json) + +y = Test.from_json(x_json) + +y.do_something() diff --git a/sandbox2.py b/sandbox2.py new file mode 100644 index 00000000..5ead25f3 --- /dev/null +++ b/sandbox2.py @@ -0,0 +1,22 @@ +from browsergym.workarena import ( + AGENT_CURRICULUM_L2, + AGENT_CURRICULUM_L3, + TASK_CATEGORY_MAP, +) + +metadata = [] + +for task_name, category in TASK_CATEGORY_MAP.items(): + metadata.append((task_name, "l1", category)) + +for category, items in AGENT_CURRICULUM_L2.items(): + for task_set in items["buckets"]: + for task in task_set: + metadata.append((task.get_task_id(), "l2", category)) + +for category, items in AGENT_CURRICULUM_L3.items(): + for task_set in items["buckets"]: + for task in task_set: + metadata.append((task.get_task_id(), "l3", category)) + +print("\n".join([",".join(x) for x in metadata])) diff --git a/sandbox3.py b/sandbox3.py new file mode 100644 index 00000000..e69de29b From 78c09021c82c2448a302618e3c07dfaaa182b2aa Mon Sep 17 00:00:00 2001 From: Maxime Gasse Date: Thu, 17 Oct 2024 17:29:30 -0400 Subject: [PATCH 06/10] README update --- README.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index d711b045..b0121d5e 100644 --- a/README.md +++ b/README.md @@ -161,8 +161,7 @@ print("\n".join(env_ids)) If you want to experiment with a demo agent in BrowserGym, follow these steps: ```sh -cd demo-agent -conda env create -f environment.yml +conda env create -f demo-agent/environment.yml conda activate demo-agent # or simply use `pip install -r requirements.txt` playwright install chromium @@ -172,27 +171,27 @@ Our demo agent uses `openai` as a backend, be sure to set your `OPENAI_API_KEY`. Launch the demo agent on the open web: ```sh -python run_demo.py --task_name openended --start_url https://www.google.com +python demo-agent/run_demo.py --task_name openended --start_url https://www.google.com ``` Or use it to solve a simple MiniWoB task: ```sh -python run_demo.py --task_name miniwob.click-test +python demo-agent/run_demo.py --task_name miniwob.click-test ``` A VisualWebArena task: ```sh -python run_demo.py --task_name visualwebarena.398 +python demo-agent/run_demo.py --task_name visualwebarena.398 ``` A WebArena task: ```sh -python run_demo.py --task_name webarena.4 +python demo-agent/run_demo.py --task_name webarena.4 ``` A WorkArena task: ```sh -python run_demo.py --task_name workarena.servicenow.order-standard-laptop +python demo-agent/run_demo.py --task_name workarena.servicenow.order-standard-laptop ``` You can customize your experience by changing the `model_name` to your preferred LLM (it uses `gpt-4o-mini` by default), adding screenshots for your VLMs with `use_screenshot`, and much more! (see `python run_demo.py --help`) From 55a9a05947f3878ca2ae07e2e33c4a72d7884a4c Mon Sep 17 00:00:00 2001 From: Maxime Gasse Date: Thu, 17 Oct 2024 17:34:18 -0400 Subject: [PATCH 07/10] README + demo_agent cleanup --- README.md | 14 +++++++------- demo_agent/{basic_agent.py => agent.py} | 5 ++--- demo_agent/requirements.txt | 16 ++-------------- demo_agent/run_demo.py | 6 +++--- 4 files changed, 14 insertions(+), 27 deletions(-) rename demo_agent/{basic_agent.py => agent.py} (99%) diff --git a/README.md b/README.md index b0121d5e..fefb1473 100644 --- a/README.md +++ b/README.md @@ -161,8 +161,8 @@ print("\n".join(env_ids)) If you want to experiment with a demo agent in BrowserGym, follow these steps: ```sh -conda env create -f demo-agent/environment.yml -conda activate demo-agent +conda env create -f demo_agent/environment.yml +conda activate demo_agent # or simply use `pip install -r requirements.txt` playwright install chromium ``` @@ -171,27 +171,27 @@ Our demo agent uses `openai` as a backend, be sure to set your `OPENAI_API_KEY`. Launch the demo agent on the open web: ```sh -python demo-agent/run_demo.py --task_name openended --start_url https://www.google.com +python demo_agent/run_demo.py --task_name openended --start_url https://www.google.com ``` Or use it to solve a simple MiniWoB task: ```sh -python demo-agent/run_demo.py --task_name miniwob.click-test +python demo_agent/run_demo.py --task_name miniwob.click-test ``` A VisualWebArena task: ```sh -python demo-agent/run_demo.py --task_name visualwebarena.398 +python demo_agent/run_demo.py --task_name visualwebarena.398 ``` A WebArena task: ```sh -python demo-agent/run_demo.py --task_name webarena.4 +python demo_agent/run_demo.py --task_name webarena.4 ``` A WorkArena task: ```sh -python demo-agent/run_demo.py --task_name workarena.servicenow.order-standard-laptop +python demo_agent/run_demo.py --task_name workarena.servicenow.order-standard-laptop ``` You can customize your experience by changing the `model_name` to your preferred LLM (it uses `gpt-4o-mini` by default), adding screenshots for your VLMs with `use_screenshot`, and much more! (see `python run_demo.py --help`) diff --git a/demo_agent/basic_agent.py b/demo_agent/agent.py similarity index 99% rename from demo_agent/basic_agent.py rename to demo_agent/agent.py index e6515c4d..632c0bbc 100644 --- a/demo_agent/basic_agent.py +++ b/demo_agent/agent.py @@ -4,6 +4,7 @@ import logging import numpy as np +import openai from PIL import Image from browsergym.core.action.highlevel import HighLevelActionSet @@ -66,9 +67,7 @@ def __init__( if not (use_html or use_axtree): raise ValueError(f"Either use_html or use_axtree must be set to True.") - from openai import OpenAI - - self.openai_client = OpenAI() + self.openai_client = openai.OpenAI() self.action_set = HighLevelActionSet( subsets=["chat", "tab", "nav", "bid", "infeas"], # define a subset of the action space diff --git a/demo_agent/requirements.txt b/demo_agent/requirements.txt index b4614cce..a0fd3900 100644 --- a/demo_agent/requirements.txt +++ b/demo_agent/requirements.txt @@ -1,14 +1,2 @@ -browsergym-core>=0.3 -browsergym-experiments>=0.3 -openai>=1.35.4,<1.36 -langchain>=0.2,<0.3 -langchain_openai>=0.1.10,<0.2 -tiktoken -huggingface_hub -contexttimer -ipython -pyyaml>=6 -pandas -joblib -transformers -langchain_community>=0.2.6,<0.3 +browsergym +openai diff --git a/demo_agent/run_demo.py b/demo_agent/run_demo.py index a8702cd9..82cc2c96 100644 --- a/demo_agent/run_demo.py +++ b/demo_agent/run_demo.py @@ -1,11 +1,11 @@ import argparse +# locally defined agent +from agent import DemoAgentArgs + # browsergym experiments utils from browsergym.experiments import EnvArgs, ExpArgs, get_exp_result -# locally defined agent -from basic_agent import DemoAgentArgs - def str2bool(v): if isinstance(v, bool): From 9c5f700d938d6cc2f9655057c61312c89db1319f Mon Sep 17 00:00:00 2001 From: Maxime Gasse Date: Thu, 17 Oct 2024 17:38:13 -0400 Subject: [PATCH 08/10] rust dependency for tiktokken --- demo_agent/environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/demo_agent/environment.yml b/demo_agent/environment.yml index a1a4f216..1104051c 100644 --- a/demo_agent/environment.yml +++ b/demo_agent/environment.yml @@ -8,6 +8,7 @@ channels: dependencies: - python>=3.10 - pip + - rust - pip: - -r requirements.txt From 4bef8be71bfce8f53e79e7b937745807c7c302b4 Mon Sep 17 00:00:00 2001 From: Maxime Gasse Date: Thu, 17 Oct 2024 17:46:45 -0400 Subject: [PATCH 09/10] remove rust dependency --- demo_agent/environment.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/demo_agent/environment.yml b/demo_agent/environment.yml index 1104051c..a1a4f216 100644 --- a/demo_agent/environment.yml +++ b/demo_agent/environment.yml @@ -8,7 +8,6 @@ channels: dependencies: - python>=3.10 - pip - - rust - pip: - -r requirements.txt From afeac27aad89e3c83d1a58dc19eee10317770bf3 Mon Sep 17 00:00:00 2001 From: Maxime Gasse Date: Thu, 17 Oct 2024 17:47:05 -0400 Subject: [PATCH 10/10] tab_focus page.bring_to_front() --- browsergym/core/src/browsergym/core/action/functions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/browsergym/core/src/browsergym/core/action/functions.py b/browsergym/core/src/browsergym/core/action/functions.py index b6ae9eb3..222629b2 100644 --- a/browsergym/core/src/browsergym/core/action/functions.py +++ b/browsergym/core/src/browsergym/core/action/functions.py @@ -578,6 +578,7 @@ def tab_focus(index: int): """ global page # set the focused page as the active page page = page.context.pages[index] + page.bring_to_front() # trigger the callback that sets this page as active in browsergym page.evaluate( """\