From 11aa813532a3ce2983219a152871a8390ce7115e Mon Sep 17 00:00:00 2001 From: Maxime Gasse Date: Mon, 28 Oct 2024 10:38:25 -0400 Subject: [PATCH 1/2] gitignore --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 7269e6c2..42f7df3e 100644 --- a/.gitignore +++ b/.gitignore @@ -146,4 +146,7 @@ results/ .vscode/launch.json # assistantbench -tests/assistantbench/assistantbench-predictions-test.jsonl \ No newline at end of file +tests/assistantbench/assistantbench-predictions-test.jsonl + +# weblinx +bg_wl_data/ From 081941486a94454e0e9cd1017967b814f4648116 Mon Sep 17 00:00:00 2001 From: Maxime Gasse Date: Mon, 28 Oct 2024 10:40:44 -0400 Subject: [PATCH 2/2] simplified Unicode() space --- .../core/src/browsergym/core/constants.py | 2 - browsergym/core/src/browsergym/core/env.py | 26 ++++----- browsergym/core/src/browsergym/core/spaces.py | 53 ++++--------------- 3 files changed, 21 insertions(+), 60 deletions(-) diff --git a/browsergym/core/src/browsergym/core/constants.py b/browsergym/core/src/browsergym/core/constants.py index 3169920d..8591e104 100644 --- a/browsergym/core/src/browsergym/core/constants.py +++ b/browsergym/core/src/browsergym/core/constants.py @@ -1,5 +1,3 @@ -TEXT_MAX_LENGTH = 2**32 - 1 - BROWSERGYM_ID_ATTRIBUTE = "bid" # Playwright's default is "data-testid" BROWSERGYM_VISIBILITY_ATTRIBUTE = "browsergym_visibility_ratio" BROWSERGYM_SETOFMARKS_ATTRIBUTE = "browsergym_set_of_marks" diff --git a/browsergym/core/src/browsergym/core/env.py b/browsergym/core/src/browsergym/core/env.py index f115a4dc..69cbd34f 100644 --- a/browsergym/core/src/browsergym/core/env.py +++ b/browsergym/core/src/browsergym/core/env.py @@ -14,7 +14,7 @@ from .action.base import execute_python_code from .action.highlevel import HighLevelActionSet from .chat import Chat -from .constants import BROWSERGYM_ID_ATTRIBUTE, EXTRACT_OBS_MAX_TRIES, TEXT_MAX_LENGTH +from .constants import BROWSERGYM_ID_ATTRIBUTE, EXTRACT_OBS_MAX_TRIES from .observation import ( MarkingError, _post_extract, @@ -131,21 +131,17 @@ def __init__( "chat_messages": gym.spaces.Sequence( gym.spaces.Dict( { - "role": Unicode(min_length=0, max_length=TEXT_MAX_LENGTH), - "message": Unicode(min_length=0, max_length=TEXT_MAX_LENGTH), + "role": Unicode(), + "message": Unicode(), } ) ), - "goal": Unicode(min_length=0, max_length=TEXT_MAX_LENGTH), + "goal": Unicode(), "goal_object": gym.spaces.Sequence(AnyDict()), - "open_pages_urls": gym.spaces.Sequence( - Unicode(min_length=0, max_length=TEXT_MAX_LENGTH) - ), - "open_pages_titles": gym.spaces.Sequence( - Unicode(min_length=0, max_length=TEXT_MAX_LENGTH) - ), + "open_pages_urls": gym.spaces.Sequence(Unicode()), + "open_pages_titles": gym.spaces.Sequence(Unicode()), "active_page_index": gym.spaces.Box(low=0, high=255, dtype=int), - "url": Unicode(min_length=0, max_length=TEXT_MAX_LENGTH), + "url": Unicode(), "screenshot": AnyBox( low=0, high=255, @@ -155,15 +151,15 @@ def __init__( "dom_object": AnyDict(), "axtree_object": AnyDict(), "extra_element_properties": AnyDict(), - "focused_element_bid": Unicode(min_length=0, max_length=TEXT_MAX_LENGTH), - "last_action": Unicode(min_length=0, max_length=TEXT_MAX_LENGTH), - "last_action_error": Unicode(min_length=0, max_length=TEXT_MAX_LENGTH), + "focused_element_bid": Unicode(), + "last_action": Unicode(), + "last_action_error": Unicode(), "elapsed_time": gym.spaces.Box(low=0, high=np.inf, dtype=float), } ) # action space - self.action_space = Unicode(min_length=0, max_length=TEXT_MAX_LENGTH) + self.action_space = Unicode() def close(self): if self.task: diff --git a/browsergym/core/src/browsergym/core/spaces.py b/browsergym/core/src/browsergym/core/spaces.py index fb3ee7fe..e657f66d 100644 --- a/browsergym/core/src/browsergym/core/spaces.py +++ b/browsergym/core/src/browsergym/core/spaces.py @@ -3,63 +3,30 @@ from typing import Any import numpy as np -from gymnasium.spaces import Space, Box, Text -from gymnasium.spaces.utils import flatdim, flatten, flatten_space, unflatten +from gymnasium.spaces import Space from numpy.typing import NDArray -MAX_UNICODE_CODEPOINT = 0x10FFFF - - -class Unicode(Text): - """A space representing a unicode string. - - Unicode is a replacement for the Text space in Gymnasium, with the - following differences: - - - Each character can be an arbitrary unicode character. - - The sample method samples from the specified character set. +class Unicode(Space): """ + A space representing a unicode string. + """ + + def __init__(self): + super().__init__() def contains(self, x: Any) -> bool: """Return boolean specifying if x is a valid member of this space.""" # Do not check the character set. - return isinstance(x, str) and self.min_length <= len(x) <= self.max_length + return isinstance(x, str) def __repr__(self) -> str: """Gives a string representation of this space.""" - return f"Unicode({self.min_length}, {self.max_length})" + return f"Unicode()" def __eq__(self, other: Any) -> bool: """Check whether ``other`` is equivalent to this instance.""" - return ( - isinstance(other, Unicode) - and self.min_length == other.min_length - and self.max_length == other.max_length - ) - - -@flatdim.register(Unicode) -def _flatdim_unicode(space: Unicode) -> int: - return space.max_length - - -@flatten.register(Unicode) -def _flatten_unicode(space: Unicode, x: str) -> NDArray[np.int32]: - arr = np.full(shape=(space.max_length,), fill_value=0, dtype=np.int32) - for i, val in enumerate(x): - arr[i] = ord(val) - return arr - - -@unflatten.register(Unicode) -def _unflatten_unicode(space: Unicode, x: NDArray[np.int32]) -> str: - return "".join(chr(val) for val in x if val) - - -@flatten_space.register(Unicode) -def _flatten_space_unicode(space: Unicode) -> Box: - return Box(low=0, high=MAX_UNICODE_CODEPOINT, shape=(space.max_length,), dtype=np.int32) + return isinstance(other, Unicode) class AnyDict(Space):