Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Problem in click action #321

Open
DsDastgheib opened this issue Feb 16, 2025 · 0 comments
Open

Problem in click action #321

DsDastgheib opened this issue Feb 16, 2025 · 0 comments

Comments

@DsDastgheib
Copy link

It seems the click action does not work when agent want to click on View Pdf for archive article.
Here is a sample code (parse_content_to_elements, and find_matching_anchor borrowed from here)

import re
import browsergym.core  # register the openended task as a gym environment
from browsergym.utils.obs import flatten_axtree_to_str
from dataclasses import dataclass, field

def parse_content_to_elements(content: str):
    """Parse the observation content into a dictionary mapping anchors to their descriptions"""
    elements = {}
    current_anchor = None
    description_lines = []

    for line in content.split('\n'):
        line = line.strip()
        if not line:
            continue

        # Check for anchor line
        anchor_match = re.match(r'\[(\d+)\](.*)', line)
        if anchor_match:
            # Save previous element if it exists
            if current_anchor and description_lines:
                elements[current_anchor] = ' '.join(description_lines)

            # Start new element
            current_anchor = anchor_match.group(1)
            description_lines = [anchor_match.group(2).strip()]
        else:
            # Add to current description if we have an anchor
            if current_anchor:
                description_lines.append(line)

    # Save last element
    if current_anchor and description_lines:
        elements[current_anchor] = ' '.join(description_lines)

    return elements

def find_matching_anchor(content: str, selector: str):
    """Find the anchor ID that matches the given selector description"""
    elements = parse_content_to_elements(content)

    # Clean up selector and create a pattern
    selector = selector.lower().strip()

    for anchor, description in elements.items():
        description = description.lower().strip()
        if selector in description:
            return anchor

    return None


if __name__ == '__main__':


    env = gym.make(
        "browsergym/openended",
        task_kwargs={"start_url": "https://www.google.com/"},  # starting URL
        wait_for_user_message=False,  # wait for a user message after each agent message sent to the chat
    )
    # run the environment <> agent loop until termination
    obs, info = env.reset()





    action0 = 'goto("https://arxiv.org/abs/1706.03762")'
    obs, reward, terminated, truncated, info = env.step(action0)
    print(obs["url"])

    action1 = "noop(2000)"
    obs, reward, terminated, truncated, info = env.step(action1)
    print(obs["url"])

    extra_element_properties={}
    select = find_matching_anchor(flatten_axtree_to_str(obs["axtree_object"],
                                                        extra_properties=extra_element_properties,
                                                        with_clickable=True,
                                                        skip_generic=True,
                                                        filter_visible_only=True,
                                                        ), "link 'View PDF',")
    action2 = f'click("{select}", "left")'
    print(action2)
    obs, reward, terminated, truncated, info = env.step(action2)
    print(obs["url"])

    # release the environment
    env.close()

The output is as

https://arxiv.org/abs/1706.03762
https://arxiv.org/abs/1706.03762
https://arxiv.org/abs/1706.03762

We can see after the click action the URL doesn't change while we expect we redirect to https://arxiv.org/pdf/1706.03762.
I've tested for couple of archive articles and this did not work.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant