From f784a01dae0b22c74cb27a1031ac9c0bdfee248a Mon Sep 17 00:00:00 2001 From: zsd <909087485@qq.com> Date: Fri, 23 Aug 2024 07:13:46 +0000 Subject: [PATCH 01/15] Integrate WebCanvas benchmark into BrowserGym environment --- .gitignore | 5 + Makefile | 2 +- browsergym/core/src/browsergym/core/env.py | 55 +- .../src/browsergym/experiments/loop.py | 2 + browsergym/webcanvas/README.md | 11 + browsergym/webcanvas/pyproject.toml | 31 + browsergym/webcanvas/requirements.txt | 2 + .../src/browsergym/webcanvas/__init__.py | 16 + .../src/browsergym/webcanvas/config.py | 1 + .../webcanvas/data/example_130.json | 5768 +++++++++++++++++ .../data/mind2web-test_104tasks_20240528.json | 4506 +++++++++++++ .../src/browsergym/webcanvas/instance.py | 115 + .../webcanvas/sematic_match/openai.py | 46 + .../sematic_match/prompt_constructor.py | 20 + .../sematic_match/semantic_prompts.py | 11 + .../src/browsergym/webcanvas/step_score.py | 260 + .../src/browsergym/webcanvas/task.py | 179 + .../src/browsergym/webcanvas/utils.py | 118 + demo_agent/run_demo.py | 4 +- 19 files changed, 11132 insertions(+), 20 deletions(-) create mode 100644 browsergym/webcanvas/README.md create mode 100644 browsergym/webcanvas/pyproject.toml create mode 100644 browsergym/webcanvas/requirements.txt create mode 100644 browsergym/webcanvas/src/browsergym/webcanvas/__init__.py create mode 100644 browsergym/webcanvas/src/browsergym/webcanvas/config.py create mode 100644 browsergym/webcanvas/src/browsergym/webcanvas/data/example_130.json create mode 100644 browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-test_104tasks_20240528.json create mode 100644 browsergym/webcanvas/src/browsergym/webcanvas/instance.py create mode 100644 browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/openai.py create mode 100644 browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/prompt_constructor.py create mode 100644 browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/semantic_prompts.py create mode 100644 browsergym/webcanvas/src/browsergym/webcanvas/step_score.py create mode 100644 browsergym/webcanvas/src/browsergym/webcanvas/task.py create mode 100644 browsergym/webcanvas/src/browsergym/webcanvas/utils.py diff --git a/.gitignore b/.gitignore index 42f7df3e..2e8a47c8 100644 --- a/.gitignore +++ b/.gitignore @@ -150,3 +150,8 @@ tests/assistantbench/assistantbench-predictions-test.jsonl # weblinx bg_wl_data/ + +# webcanvas +test_gpt.py +temp_test.py +results/ \ No newline at end of file diff --git a/Makefile b/Makefile index c8b0fd7a..d0bab46f 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ install: @echo "--- 🚀 Installing project dependencies ---" - pip install -e ./browsergym/core -e ./browsergym/miniwob -e ./browsergym/webarena -e ./browsergym/visualwebarena/ -e ./browsergym/experiments -e ./browsergym/assistantbench -e ./browsergym/ + pip install -e ./browsergym/core -e ./browsergym/miniwob -e ./browsergym/webarena -e ./browsergym/visualwebarena/ -e ./browsergym/experiments -e ./browsergym/assistantbench -e ./browsergym/webcanvas -e ./browsergym/ playwright install chromium install-demo: diff --git a/browsergym/core/src/browsergym/core/env.py b/browsergym/core/src/browsergym/core/env.py index 30b565ba..9648aacd 100644 --- a/browsergym/core/src/browsergym/core/env.py +++ b/browsergym/core/src/browsergym/core/env.py @@ -75,7 +75,8 @@ def __init__( pw_chromium_kwargs: dict = {}, pw_context_kwargs: dict = {}, # agent-related arguments - action_mapping: Optional[callable] = HighLevelActionSet().to_python_code, + action_mapping: Optional[callable] = HighLevelActionSet( + ).to_python_code, ): """ Instantiate a ready to use BrowserEnv gym environment. @@ -246,7 +247,8 @@ def override_property(task, env, property): no_viewport=True if self.resizeable_window else None, viewport=viewport if not self.resizeable_window else None, record_video_dir=( - Path(self.record_video_dir) / "task_video" if self.record_video_dir else None + Path(self.record_video_dir) / + "task_video" if self.record_video_dir else None ), record_video_size=viewport, locale=locale, @@ -262,7 +264,8 @@ def override_property(task, env, property): # there is no concept of active page in playwright # https://github.com/microsoft/playwright/issues/2603 self.context.expose_binding( - "browsergym_page_activated", lambda source: self._activate_page_from_js(source["page"]) + "browsergym_page_activated", lambda source: self._activate_page_from_js( + source["page"]) ) self.context.add_init_script( r""" @@ -390,6 +393,16 @@ def report_infeasible_instructions(reason: str): self.chat.add_message(role="infeasible", msg=reason) self.infeasible_message_received = True + if hasattr(self.task, 'webcanvas'): + logger.debug(f"Initiating webcanvas task validation") + # extract reward, done, user_message, info (task-specific) + reward, done, user_message, task_info = self.task.validate( + self.page, self.chat.messages, action) + logger.info(f"WebCanvas task validation result:\n{ + self.task.evaluate_result}") + info["task_info"] = task_info + info["webcanvas_result"] = self.task.evaluate_result + # try to execute the action logger.debug(f"Executing action") try: @@ -406,15 +419,18 @@ def report_infeasible_instructions(reason: str): self.last_action_error = "" except Exception as e: self.last_action_error = f"{type(e).__name__}: {e}" - match = re.match("TimeoutError: Timeout ([0-9]+)ms exceeded.", self.last_action_error) + match = re.match( + "TimeoutError: Timeout ([0-9]+)ms exceeded.", self.last_action_error) if match: - info["action_exec_timeout"] = float(match.groups()[0]) / 1000 # ms to sec + info["action_exec_timeout"] = float( + match.groups()[0]) / 1000 # ms to sec logger.debug(f"Action executed") info["action_exec_stop"] = time.time() # wait a bit (for the JavaScript callback to set the active page) time.sleep(0.5) # wait for JS events to be fired (half a second) - self.context.cookies() # trigger all waiting Playwright callbacks on the stack (hack, see https://playwright.dev/java/docs/multithreading) + # trigger all waiting Playwright callbacks on the stack (hack, see https://playwright.dev/java/docs/multithreading) + self.context.cookies() # wait for the network to idle before extracting the observation, reward etc. self._wait_dom_loaded() @@ -428,11 +444,12 @@ def report_infeasible_instructions(reason: str): self._wait_for_user_message() logger.debug(f"User message done") - logger.debug(f"Initiating task validation") - # extract reward, done, user_message, info (task-specific) - reward, done, user_message, task_info = self._task_validate() - info["task_info"] = task_info - logger.debug(f"Task validation done") + if not hasattr(self.task, 'webcanvas'): + logger.debug(f"Initiating task validation") + # extract reward, done, user_message, info (task-specific) + reward, done, user_message, task_info = self._task_validate() + info["task_info"] = task_info + logger.debug(f"Task validation done") # add any user message sent by the task to the chat if user_message: @@ -455,8 +472,8 @@ def _task_validate(self): prev_active_page = self.page prev_page_history = self.page_history.copy() # call validate - reward, done, user_message, info = self.task.validate(self.page, self.chat.messages) - + reward, done, user_message, info = self.task.validate( + self.page, self.chat.messages) # safety fix, in case validate() did mess up the active page and/or page history if prev_active_page != self.page or prev_page_history != self.page_history: logger.debug( @@ -489,7 +506,8 @@ def _activate_page_from_js(self, page: playwright.sync_api.Page): logger.debug(f"_activate_page_from_js(page) called, page={str(page)}") if not page.context == self.context: raise RuntimeError( - f"Unexpected: activating a page that belongs to a different browser context ({page})." + f"Unexpected: activating a page that belongs to a different browser context ({ + page})." ) # add the activated page to the page history (or move it to last which is the most recent) @@ -498,7 +516,8 @@ def _activate_page_from_js(self, page: playwright.sync_api.Page): page ) # move page to the end of dictionnary else: - self.page_history[page] = None # add page to the end of dictionnary + # add page to the end of dictionnary + self.page_history[page] = None self.page = page @@ -519,12 +538,14 @@ def _active_page_check(self): # active page should share the same browser context with the environment if self.page not in self.context.pages: raise RuntimeError( - f"Unexpected: active page is not part of the browser context's open pages ({self.page})." + f"Unexpected: active page is not part of the browser context's open pages ({ + self.page})." ) # active page should not be closed if self.page.is_closed(): - raise RuntimeError(f"Unexpected: active page has been closed ({self.page}).") + raise RuntimeError( + f"Unexpected: active page has been closed ({self.page}).") def _get_obs(self): diff --git a/browsergym/experiments/src/browsergym/experiments/loop.py b/browsergym/experiments/src/browsergym/experiments/loop.py index 21163671..7cfb167c 100644 --- a/browsergym/experiments/src/browsergym/experiments/loop.py +++ b/browsergym/experiments/src/browsergym/experiments/loop.py @@ -931,6 +931,8 @@ def _get_env_name(task_name: str): import browsergym.webarena elif task_name.startswith("visualwebarena"): import browsergym.visualwebarena + elif task_name.startswith("webcanvas"): + import browsergym.webcanvas elif task_name.startswith("assistantbench"): import browsergym.assistantbench elif task_name.startswith("weblinx"): diff --git a/browsergym/webcanvas/README.md b/browsergym/webcanvas/README.md new file mode 100644 index 00000000..30b22099 --- /dev/null +++ b/browsergym/webcanvas/README.md @@ -0,0 +1,11 @@ +## Setup + +1. Install the package +```sh +pip install browsergym-webcanvas +``` +2. Setup an OpenAI API key + +```sh +export OPENAI_API_KEY=... +``` \ No newline at end of file diff --git a/browsergym/webcanvas/pyproject.toml b/browsergym/webcanvas/pyproject.toml new file mode 100644 index 00000000..34f301e0 --- /dev/null +++ b/browsergym/webcanvas/pyproject.toml @@ -0,0 +1,31 @@ +[build-system] +requires = ["hatchling", "hatch-requirements-txt"] +build-backend = "hatchling.build" + +[project] +name = "browsergym-webcanvas" +description = "WebCanvas benchmark for BrowserGym" +readme = "README.md" +requires-python = ">3.7" +license = {text = "Apache-2.0"} +classifiers = [ + "Development Status :: 2 - Pre-Alpha", + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "License :: OSI Approved :: Apache Software License", +] +dynamic = ["dependencies", "version"] + +[project.urls] +homepage = "https://github.com/ServiceNow/BrowserGym" + +[tool.hatch.version] +path = "../core/src/browsergym/core/__init__.py" + +[tool.hatch.metadata.hooks.requirements_txt] +files = ["requirements.txt"] + +[tool.hatch.build.targets.wheel] +packages = ["src/browsergym"] diff --git a/browsergym/webcanvas/requirements.txt b/browsergym/webcanvas/requirements.txt new file mode 100644 index 00000000..e2d999f0 --- /dev/null +++ b/browsergym/webcanvas/requirements.txt @@ -0,0 +1,2 @@ +browsergym-core==0.4.2 +libwebarena==0.0.3 diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/__init__.py b/browsergym/webcanvas/src/browsergym/webcanvas/__init__.py new file mode 100644 index 00000000..c94dfdf4 --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/__init__.py @@ -0,0 +1,16 @@ +from browsergym.core.registration import register_task + +# register the WebArena benchmark +from . import config, task + +ALL_WEBCANVAS_TASK_IDS = [] + +# register the WebArena benchmark +for task_id in config.TASK_IDS: + gym_id = f"webcanvas.{task_id}" + register_task( + gym_id, + task.GenericWebCanvasTask, + kwargs={"task_kwargs": {"task_id": task_id}}, + ) + ALL_WEBCANVAS_TASK_IDS.append(gym_id) diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/config.py b/browsergym/webcanvas/src/browsergym/webcanvas/config.py new file mode 100644 index 00000000..0f4bba7e --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/config.py @@ -0,0 +1 @@ +TASK_IDS = range(104) diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/data/example_130.json b/browsergym/webcanvas/src/browsergym/webcanvas/data/example_130.json new file mode 100644 index 00000000..d6cc2dd2 --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/data/example_130.json @@ -0,0 +1,5768 @@ +[ + { + "index": "WXkY6peQb2NDE6VKrNQkd", + "task": "View the complexity rating for Frosthaven in boardgamegeek", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "boardgamegeek.", + "url": "https://boardgamegeek.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/boardgame/295770/frosthaven", + "url": "https://boardgamegeek.com/boardgame/295770/frosthaven" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".gameplay >li:nth-child(4)>.gameplay-item-secondary >span:nth-child(1)>.btn.btn-xs.btn-link ", + "netloc": "boardgamegeek", + "url": "https://boardgamegeek.com/boardgame/295770/frosthaven" + } + } + ] + }, + { + "index": "XByRzDf1LGHZDev_fnQrj", + "task": "View the full menu for AMC Dine-In in amctheatres", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "amctheatres.", + "url": "https://www.amctheatres.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/food-and-drink", + "url": "https://www.amctheatres.com/food-and-drink" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/explore-menu", + "url": "https://www.amctheatres.com/food-and-drink/dine-in/explore-menu" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "View Full Menu", + "netloc": "amctheatres", + "url": "https://www.amctheatres.com/food-and-drink/dine-in/explore-menu" + } + } + ] + }, + { + "index": "W7KXP7WT9P2IcIHkrM2bl", + "task": "Search for used Jaguar XF with no black exterior color and save the search as Jaguar to get a notification daily in carmax", + "reference_task_length": 16, + "evaluation": [ + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search", + "reference_answer": "Decide whether are searching for Jaguar XF", + "url": "https://www.carmax.com/cars?search=Jaguar+XF" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "jaguar/xf", + "url": "https://www.carmax.com/cars/jaguar/xf/no-black" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/no-black", + "url": "https://www.carmax.com/cars/jaguar/xf/no-black" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Jaguar", + "netloc": "carmax", + "url": "https://www.carmax.com/cars/jaguar/xf/no-black" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".save-search-button.kmx-button.kmx-button--primary ", + "netloc": "carmax", + "url": "https://www.carmax.com/cars/jaguar/xf/no-black" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".save-search-email--options-radio >div:nth-child(2)>.kmx-radio-label ", + "netloc": "carmax", + "url": "https://www.carmax.com/cars/jaguar/xf/no-black" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".kmx-button.kmx-button--primary.email-options-cta ", + "netloc": "carmax", + "url": "https://www.carmax.com/cars/jaguar/xf/no-black" + } + } + ] + }, + { + "index": "TpT9z9DSgXapCMB2ORulX", + "task": "Browse for wall art with a price range of $25 to $50 in kohls", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/?kwid=p43862634237&utm_source=google&utm_medium=cpc&utm_term=Kohls&utm_campaign=Brand_Search_Google_Brand&UTM_Adgroupid=58700005024606174&pfx=pfx_google_roi&cid=brandsearch&ds_rl=2671&gad_source=1&gclid=CjwKCAiAzc2tBhA6EiwArv-i6UBkb_nmBN_BT3SvQ3M36KQxDVjnLAb6ljp_eSOnuc9nlr4J8uiiuRoCumYQAvD_BwE&gclsrc=aw.ds" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search", + "reference_answer": "Decide whether are searching for wall art", + "url": "https://www.kohls.com/search.jsp?submit-search=web-regular&search=wall+art&kls_sbp=59794123176757812635376180341677737534" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "CN", + "reference_answer": "Price:$25 to $50", + "url": "https://www.kohls.com/search.jsp?CN=Price:%2425%20to%20%2450&BL=y&search=wall%20art&S=1&PPP=48&kls_sbp=59794123176757812635376180341677737534&pfm=search%20refine" + } + } + ] + }, + { + "index": "gDB083_WaqyuVYLxOBqGc", + "task": "Search for queen-size pillow protectors from the Marriot shop, and if found, add two pieces to the cart and checkout in marriott", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "marriott.", + "url": "https://www.marriott.com/default.mi?nst=paid&cid=PAI_GLB0004EMN_GLE000AE45_GLF000KWXG&ppc=ppc&pId=nancppc&gad_source=1&gclid=CjwKCAiA8NKtBhBtEiwAq5aX2J4dgb2cUlR-5yjY6TJiltvKoVfXOH1u3O9_d25sVIHhIicmKEZraBoCWwUQAvD_BwE&gclsrc=aw.ds" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "shopmarriott.", + "url": "https://www.shopmarriott.com/index.aspx" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/category.aspx?pillows", + "url": "https://www.shopmarriott.com/category.aspx?pillows" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/product.aspx?pillow-protector", + "url": "https://www.shopmarriott.com/product.aspx?pillow-protector" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#MAR-107-1Mar_PillowProtector >option:nth-child(3)", + "netloc": "shopmarriott", + "url": "https://www.shopmarriott.com/product.aspx?pillow-protector" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "2", + "netloc": "shopmarriott", + "url": "https://www.shopmarriott.com/product.aspx?pillow-protector" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#addToCartBtn >span:nth-child(1)", + "netloc": "shopmarriott", + "url": "https://www.shopmarriott.com/product.aspx?pillow-protector" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".button.right >.btn-main ", + "netloc": "shopmarriott", + "url": "https://www.shopmarriott.com/product.aspx?pillow-protector" + } + } + ] + }, + { + "index": "gDmEXIumjy6tQOrO0VzNS", + "task": "Find a south african history podcast with length between 10 to 30 minutes and filter it by audiobook tag in soundcloud", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "soundcloud.", + "url": "https://soundcloud.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "q", + "reference_answer": "Decide whether are searching for south african history podcast", + "url": "https://soundcloud.com/search?q=south%20african%20history%20podcast" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "filter.duration", + "reference_answer": "long", + "url": "https://soundcloud.com/search/sounds?q=south%20african%20history%20podcast&filter.duration=long" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "filter.genre", + "reference_answer": "audiobooks", + "url": "https://soundcloud.com/search/sounds?q=south%20african%20history%20podcast&filter.duration=long&filter.genre=audiobooks" + } + } + ] + }, + { + "index": "8zdFjA1w6mKbU4_paQU_6", + "task": "Delete the history of what i heard recently in soundcloud", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "soundcloud.", + "url": "https://soundcloud.com/you/history" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/library", + "url": "https://soundcloud.com/you/history" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/history", + "url": "https://soundcloud.com/you/history" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".sc-button.sc-button-tertiary.sc-text-h4.collectionSection__action.sc-button-small.sc-button-responsive ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/you/history" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".sc-button.sc-button-small.sc-button-primary.sc-ml-2x ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/you/history" + } + } + ] + }, + { + "index": "1fFOd24LBmtgRmw1o5C8n", + "task": "Show me movies produced by Aaron Horvath in imdb", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "imdb.", + "url": "https://www.imdb.com/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Previous", + "netloc": "imdb", + "url": "https://www.imdb.com/name/nm1739338/?ref_=fn_al_nm_1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/nm1739338", + "url": "https://www.imdb.com/name/nm1739338/?ref_=fn_al_nm_1" + } + } + ] + }, + { + "index": "DKVD-ophgCKtDzTA6T65R", + "task": "Find a Blue iPhone 12 Pro 128gb and add to cart in ebay", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ebay.", + "url": "https://www.ebay.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "_nkw", + "reference_answer": "Decide whether are searching for iPhone 12 Pro", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_trksid=p4432023.m570.l1313&_nkw=iphone+12+pro&_sacat=0" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "Storage%20Capacity", + "reference_answer": "128 GB", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=iphone+12+pro&_sacat=0&Storage%2520Capacity=128%2520GB&_dcat=9355" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "0", + "netloc": "ebay", + "url": "https://www.ebay.com/itm/294304467850?epid=9064995793&hash=item4485e9cb8a:g:Fn4AAOSwJyxlDbtk&amdata=enc%3AAQAIAAAAwFOTWZW7Qt6S99BNKFAQ1Hx3iO7VgD8LcKbH1RXPBgN6DVWTyEwMWA8HQf%2BsTREaViQIsTYCdZXUMBytGtE8SQPc37W21MAh23apUGDW1%2FxIRZLxI%2BC%2FuDWoeoeAziQcmTN76wSMNUU7BtxPp%2BfdMNsudHDS8zj3qQQHnSk7wjheMKtOUgD4jNB6E1VuwrtnpN99XXHvUQBgOvLbpt6OQYvn5bxGQjOxOR8tAZH2EKdmmrrwd7%2B4sNopxa5zfi8%2B%2Bg%3D%3D%7Ctkp%3ABlBMULzVg7mpYw" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "View in cart", + "netloc": "ebay", + "url": "https://www.ebay.com/itm/294304467850?epid=9064995793&hash=item4485e9cb8a:g:Fn4AAOSwJyxlDbtk&amdata=enc%3AAQAIAAAAwFOTWZW7Qt6S99BNKFAQ1Hx3iO7VgD8LcKbH1RXPBgN6DVWTyEwMWA8HQf%2BsTREaViQIsTYCdZXUMBytGtE8SQPc37W21MAh23apUGDW1%2FxIRZLxI%2BC%2FuDWoeoeAziQcmTN76wSMNUU7BtxPp%2BfdMNsudHDS8zj3qQQHnSk7wjheMKtOUgD4jNB6E1VuwrtnpN99XXHvUQBgOvLbpt6OQYvn5bxGQjOxOR8tAZH2EKdmmrrwd7%2B4sNopxa5zfi8%2B%2Bg%3D%3D%7Ctkp%3ABlBMULzVg7mpYw&var=592943878447" + } + } + ] + }, + { + "index": "eTA-PZ_y8PT09kfvk-bbR", + "task": "Check the price of Tesla Model Y Performance for the 10001 zip code in tesla", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "tesla.", + "url": "https://www.tesla.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/modely/design", + "url": "https://www.tesla.com/modely/design#overview" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "10001", + "netloc": "tesla", + "url": "https://www.tesla.com/modely/design#overview" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".location-modal--button >.tds-btn.tds-btn--primary.tds-btn--large.tds-btn--width-full ", + "netloc": "tesla", + "url": "https://www.tesla.com/modely/design#overview" + } + } + ] + }, + { + "index": "EtBAYvBAHdCKB2_xJWNDa", + "task": "Check the availability of a Nintendo Switch gaming console at the nearest Target store in target", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "target.", + "url": "https://www.target.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "searchTerm", + "reference_answer": "Decide whether are searching for Nintendo Switch", + "url": "https://www.target.com/s?searchTerm=Nintendo+Switch&tref=typeahead%7Cterm%7CNintendo+Switch%7C%7C%7Chistory" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".styles__ThreeUpTextWrapperInline-sc-11rka0i-1.bJqleI >button:nth-child(1)", + "netloc": "target", + "url": "https://www.target.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".ModalDrawer >div:nth-child(1)>div:nth-child(1)>div:nth-child(2)>div:nth-child(3)>.h-margin-v-default >div:nth-child(1)>div:nth-child(1)>.styles__SecondaryStyles-sc-18fxydo-2.irelhx >div:nth-child(1)>button:nth-child(1)", + "netloc": "target", + "url": "https://www.target.com/" + } + } + ] + }, + { + "index": "3EYfZekKAm_7OW8tkf5J_", + "task": "Find an Xbox Wireless controller rated above 4 stars in newegg", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "newegg.", + "url": "https://www.newegg.com/Newegg-Deals/EventSaleStore/ID-9447?N=100007627&id0=Google&id1=20090904388&id2=145631868461&id3=&id4=&id5=kwd-22448241&id6=&id7=1014226&id8=b&id9=g&id10=c&id11=&id12=CjwKCAiA8NKtBhBtEiwAq5aX2JhtaauB2oL5E0a3mGURAs91TYZILl5tNLRMTzssaWXM7vgGLgLDExoCCQAQAvD_BwE&id13=&id14=Y&id15=&id16=688736071597&id17=newegg&id18=&id19=&id20=&id21=&id22=&id23=&id24=&id25=&id26=&id27=Y&id28=&id29=&id30=16787553722129530369&id31=&id32=&id33=&id34=&gclsrc=aw.ds&&cm_mmc=knc-googlekwless-search-broad-_-newaudience-_-intel-motherboards-_-plp-feature-2&utm_source=google&utm_medium=paid+search&utm_campaign=knc-googlekwless-search-broad-_-newaudience-_-intel-motherboards-_-plp-feature-2&gad_source=1&gclid=CjwKCAiA8NKtBhBtEiwAq5aX2JhtaauB2oL5E0a3mGURAs91TYZILl5tNLRMTzssaWXM7vgGLgLDExoCCQAQAvD_BwE" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "d", + "reference_answer": "Decide whether are searching for Xbox Wireless controller", + "url": "https://www.newegg.com/p/pl?d=xbox+wireless+controller" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "N", + "reference_answer": "4204", + "url": "https://www.newegg.com/p/pl?d=xbox+wireless+controller&N=4204" + } + } + ] + }, + { + "index": "3pgcXaIyKP86RuRIAYxuL", + "task": "View the toddler collection and add one pair of the cheapest socks for a 6 months to 5 years to the wishlist in uniqlo", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "uniqlo.", + "url": "https://www.uniqlo.com/us/en/baby" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "baby/toddler", + "url": "https://www.uniqlo.com/us/en/baby/toddler" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "path", + "reference_answer": "22213,23321,24798,", + "url": "https://www.uniqlo.com/us/en/baby/toddler?path=22213%2C23321%2C24798%2C" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sort", + "reference_answer": "2", + "url": "https://www.uniqlo.com/us/en/baby/toddler?path=22213%2C23321%2C24798%2C&sort=2" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#root >section:nth-child(10)>div:nth-child(1)>section:nth-child(1)>div:nth-child(1)>div:nth-child(2)>div:nth-child(1)>div:nth-child(1)>div:nth-child(1)>div:nth-child(1)>div:nth-child(1)>a:nth-child(1)>div:nth-child(1)>div:nth-child(1)>button:nth-child(2)>svg:nth-child(1)", + "netloc": "uniqlo", + "url": "https://www.uniqlo.com/us/en/baby/toddler?path=22213%2C23321%2C24798%2C&sort=2" + } + } + ] + }, + { + "index": "3RVOB_HPtNe4wglTTrOdK", + "task": "Search for hiking boots and filter the results to show only those with a waterproof rating of at least 3 stars in rei", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "rei.", + "url": "https://www.rei.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "c/hiking-footwear", + "url": "https://www.rei.com/c/hiking-footwear" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/f-waterproof", + "url": "https://www.rei.com/c/hiking-footwear/f/f-waterproof?ir=category%3Ahiking-footwear&r=c%3Bf" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "r", + "reference_answer": "c;f;average-rating:3 to 5", + "url": "https://www.rei.com/c/hiking-footwear/f/f-waterproof?ir=category%3Ahiking-footwear&r=c%3Bf%3Baverage-rating%3A3+to+5" + } + } + ] + }, + { + "index": "ciawYzAILpO7zxb6_OKKG", + "task": "Search receipt with the eTicket 12345678 for the trip reserved by Jason Two in united", + "reference_task_length": 12, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "united.", + "url": "https://www.united.com/en/us" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "receipts", + "url": "https://www.united.com/en/us/receipts" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Confirmation or eTicket number", + "netloc": "united", + "url": "https://www.united.com/en/us/receipts" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "12345678", + "netloc": "united", + "path": "#cqLpdDv_d ", + "url": "https://www.united.com/en/us/receipts" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Jason", + "netloc": "united", + "path": "#gY4kQQ1fF3 ", + "url": "https://www.united.com/en/us/receipts" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Two", + "netloc": "united", + "path": "#zMO2dq6RBo ", + "url": "https://www.united.com/en/us/receipts" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".atm-c-btn.app-components-Receipts-PurchaseDetails-styles__searchButton--BRDhz.atm-c-btn--primary.atm-c-btn--large >.atm-c-btn__text ", + "netloc": "united", + "url": "https://www.united.com/en/us/receipts" + } + } + ] + }, + { + "index": "D-PSiYpk-2evcjGqhhv8z", + "task": "Search for person named John Smith in New York in yellowpages", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yellowpages.", + "url": "https://www.yellowpages.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "people.yellowpages.", + "url": "https://people.yellowpages.com/whitepages/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "first_name", + "reference_answer": "John", + "url": "https://people.yellowpages.com/whitepages/?first_name=John&last_name=Smith&city=New+york&state=" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "last_name", + "reference_answer": "Smith", + "url": "https://people.yellowpages.com/whitepages/?first_name=John&last_name=Smith&city=New+york&state=" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "city", + "reference_answer": "New york", + "url": "https://people.yellowpages.com/whitepages/?first_name=John&last_name=Smith&city=New+york&state=" + } + } + ] + }, + { + "index": "t95CWRtf-KuSTP0knNdmu", + "task": "find cruise deals to Europe this summer that includes airfare and are all-inclusive in travelzoo", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "travelzoo.", + "url": "https://www.travelzoo.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "locationId", + "reference_answer": "1024", + "url": "https://www.travelzoo.com/deals/all-deals-europe-summer/?ctfId=0&locationId=1024&when=Summer&pageType=Collection&ga_s=Europe&search_type=destination" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "when", + "reference_answer": "Summer", + "url": "https://www.travelzoo.com/deals/all-deals-europe-summer/?ctfId=0&locationId=1024&when=Summer&pageType=Collection&ga_s=Europe&search_type=destination" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Airfare Included", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/deals/all-deals-europe-summer/?ctfId=0&locationId=1024&when=Summer&pageType=Collection&ga_s=Europe&search_type=destination" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "All-inclusive", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/deals/all-deals-europe-summer/?ctfId=0&locationId=1024&when=Summer&pageType=Collection&ga_s=Europe&search_type=destination" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".button-primary.button-done.right ", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/deals/all-deals-europe-summer/?ctfId=0&locationId=1024&when=Summer&pageType=Collection&ga_s=Europe&search_type=destination" + } + } + ] + }, + { + "index": "tA52iRDxXW9ZI98p1p5aF", + "task": "Find the safety rating for 2012 Honda Civic in kbb", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kbb.", + "url": "https://www.kbb.com/?&psid=20003&utm_source=GOOGLE&utm_medium=sem_brand-core_perf&utm_campaign=kbb_na_na_national_evergreen_site-visits_na_na&utm_content=keyword_text_na_na_na_20003_na&utm_term=kbb&ds_rl=1293870&gad_source=1&gclid=CjwKCAiAzc2tBhA6EiwArv-i6cts0gEs876CjOGcM1YqfgApEFpdV-hInc7n5R3oHA4UBrPEW4fFrxoCvuUQAvD_BwE&gclsrc=aw.ds" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/car-prices", + "url": "https://www.kbb.com/car-prices/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "2012", + "netloc": "kbb", + "url": "https://www.kbb.com/car-prices/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Honda", + "netloc": "kbb", + "url": "https://www.kbb.com/car-prices/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Civic", + "netloc": "kbb", + "url": "https://www.kbb.com/car-prices/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/honda/civic/2012", + "url": "https://www.kbb.com/honda/civic/2012/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#subnav4 >div:nth-child(1)>div:nth-child(1)", + "netloc": "kbb", + "url": "https://www.kbb.com/honda/civic/2012/" + } + } + ] + }, + { + "index": "j4W0lfpmu2n143wb_vHSN", + "task": "Show me the NFL Scoreboard from the superbowl 2015 in espn", + "reference_task_length": 14, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "espn.", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nfl/scoreboard", + "url": "https://www.espn.com/nfl/scoreboard" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "_/week/5/year/2015/seasontype/3", + "url": "https://www.espn.com/nfl/scoreboard/_/week/5/year/2015/seasontype/3" + } + } + ] + }, + { + "index": "kcTi3b4I9BP33PBBr1Cjv", + "task": "Show me the location of the Altavista bus stop in us.megabus", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "us.megabus.", + "url": "https://us.megabus.com/route-guides" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/stops", + "url": "https://us.megabus.com/stops" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".row >div:nth-child(12)>.blockContent >.panel.panel-default >.panel-heading >.panel-title >a:nth-child(1)", + "netloc": "us", + "url": "https://us.megabus.com/stops" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".panel-collapse.collapse.in >.panel-body >.list-unstyled >li:nth-child(1)>a:nth-child(1)", + "netloc": "us", + "url": "https://us.megabus.com/stops" + } + } + ] + }, + { + "index": "h85WFGkh5JWjY-m7rPcKv", + "task": "Show me the page with information about the NBA Finals schedule in seatgeek", + "reference_task_length": 3, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "seatgeek.", + "url": "https://seatgeek.com/?aid=10717&gid=147220683313&rid=kwd-307402976610&dt=c&ap=&adId=647120651900&loc_interest=&loc_physical=1014226&n=g&mt=e&ext=&xid=1&gad_source=1&gclid=CjwKCAiAzc2tBhA6EiwArv-i6SRpdQUweChDs6bFJjQvJNW0MrRoKDycKgmhKRTUsWRghNdWU6XdHhoC2-MQAvD_BwE" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/nba-finals-3-tickets", + "url": "https://seatgeek.com/nba-finals-3-tickets" + } + } + ] + }, + { + "index": "pIWJqqtx19A-qKymEDRYB", + "task": "Add a set of sonoma bath towels to the cart and apply a coupon code for free shipping in kohls", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search", + "reference_answer": "Decide whether are searching for sonoma bath towels", + "url": "https://www.kohls.com/search.jsp?submit-search=web-ta-keyword&search=sonoma+bath+towels&kls_sbp=59794123176757812635376180341677737534" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "BATH TOWEL", + "netloc": "kohls", + "url": "https://www.kohls.com/product/prd-4569317/sonoma-goods-for-life-ultimate-bath-towel-bath-sheet-hand-towel-or-washcloth-with-hygro-technology.jsp?color=Linen&prdPV=1" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#addtobagID ", + "netloc": "kohls", + "url": "https://www.kohls.com/product/prd-4569317/sonoma-goods-for-life-ultimate-bath-towel-bath-sheet-hand-towel-or-washcloth-with-hygro-technology.jsp?color=Linen&prdPV=1" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn.btn-primary ", + "netloc": "kohls", + "url": "https://www.kohls.com/product/prd-4569317/sonoma-goods-for-life-ultimate-bath-towel-bath-sheet-hand-towel-or-washcloth-with-hygro-technology.jsp?color=Linen&prdPV=1" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".open-offers-text ", + "netloc": "kohls", + "url": "https://www.kohls.com/checkout/shopping_cart.jsp" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "FREESHIP 3093", + "netloc": "kohls", + "url": "https://www.kohls.com/checkout/shopping_cart.jsp" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".kds-button.kds-button-sm >.btn.btn-secondary ", + "netloc": "kohls", + "url": "https://www.kohls.com/checkout/shopping_cart.jsp" + } + } + ] + }, + { + "index": "RyhHhSmdLH_couxp6jwEp", + "task": "Plan an accessible trip from empire state building to little Caribbean with least walking in new.mta.info", + "reference_task_length": 14, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "new.mta.info", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether are searching for empire state building", + "netloc": "new", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether are searching for little Caribbean", + "netloc": "new", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#linkPreferencesModal ", + "netloc": "new", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#edit-minimize ", + "netloc": "new", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#edit-minimize >option:nth-child(3)", + "netloc": "new", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".button.primary-dark-button.travel-preferences-button.close-modal.close-preferences ", + "netloc": "new", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".form-item.js-form-item.form-type-checkbox.js-form-type-checkbox.form-item-accessible.js-form-item-accessible.checkbox >.control-label.option ", + "netloc": "new", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/tripplanner/results", + "url": "https://new.mta.info/tripplanner/results/Empire%20State%20Building%2C%20West%2034th%20Street%2C%20New%20York%2C%20NY%2C%20USA/40.7484405%2C%20-73.98566439999999/Little%20Caribbean%2C%20Brooklyn%2C%20NY%2C%20USA/40.6508146%2C%20-73.9495972/1706595587/D/B%2CX%2CC%2CR/W/804/true/null/null" + } + } + ] + }, + { + "index": "hRtPHCJJzDkwZTO_QzVms", + "task": "Track the status of a rebate for Monty Lue, house number 4847, zip code 10019 in menards", + "reference_task_length": 14, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "menards.", + "url": "https://www.menards.com/main/home.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/rebates", + "url": "https://www.menards.com/main/rebates.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "rebateinternational.", + "url": "https://www.rebateinternational.com/RebateInternational/tracking.do#track-rebate" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "M", + "netloc": "rebateinternational", + "path": ".bg-white.my-3.p-3.rounded-lg >form:nth-child(2)>div:nth-child(1)>.col-12 >span:nth-child(2)>.rounded-pill.form-control ", + "url": "https://www.rebateinternational.com/RebateInternational/tracking.do#track-rebate" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Lue", + "netloc": "rebateinternational", + "path": ".bg-white.my-3.p-3.rounded-lg >form:nth-child(2)>div:nth-child(2)>.col-12 >span:nth-child(2)>.rounded-pill.form-control ", + "url": "https://www.rebateinternational.com/RebateInternational/tracking.do#track-rebate" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "4847", + "netloc": "rebateinternational", + "path": "#house-num ", + "url": "https://www.rebateinternational.com/RebateInternational/tracking.do#track-rebate" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "10019", + "netloc": "rebateinternational", + "path": ".w-100 >div:nth-child(3)>div:nth-child(2)>span:nth-child(2)>.rounded-pill.form-control ", + "url": "https://www.rebateinternational.com/RebateInternational/tracking.do#track-rebate" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".row.pb-4 >.col-12 >button:nth-child(1)", + "netloc": "rebateinternational", + "url": "https://www.rebateinternational.com/RebateInternational/tracking.do#track-rebate" + } + } + ] + }, + { + "index": "m0tbJoOSYhZqciPjTSZEQ", + "task": "Show computer game reviews sorted by score in ign", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ign.", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/reviews/games", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/pc", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#sortBy ", + "netloc": "ign", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#sortBy >option:nth-child(3)", + "netloc": "ign", + "url": "https://www.ign.com/reviews/games/pc" + } + } + ] + }, + { + "index": "LdY-zo1lQh1XrKpBhDD6-", + "task": "Find my trip with ticket number 3329456534543 along with my name John Green in delta", + "reference_task_length": 12, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "delta.", + "url": "https://www.delta.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#headPrimary3 ", + "netloc": "delta", + "url": "https://www.delta.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#ui-list-searchOption2 ", + "netloc": "delta", + "url": "https://www.delta.com/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "3329456534543", + "netloc": "delta", + "path": "#eTicketNo ", + "url": "https://www.delta.com/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "John", + "netloc": "delta", + "path": "#firstName ", + "url": "https://www.delta.com/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Green", + "netloc": "delta", + "path": "#lastName ", + "url": "https://www.delta.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "my-trips/search", + "url": "https://www.delta.com/my-trips/search?errorCodes=MYT0503" + } + } + ] + }, + { + "index": "LhoU2gmqzyP2pmZS6NcLc", + "task": "Select a deal which is highly rated in Fiji location in travelzoo", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "travelzoo.", + "url": "https://www.travelzoo.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "deals/all-deals", + "url": "https://www.travelzoo.com/deals/all-deals-fiji/?ctfId=0&locationId=1093&when=Anytime&pageType=Homepage&ga_s=Fiji&search_type=destination" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "ga_s", + "reference_answer": "Fiji", + "url": "https://www.travelzoo.com/deals/all-deals-fiji/?ctfId=0&locationId=1093&when=Anytime&pageType=Homepage&ga_s=Fiji&search_type=destination" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn-group-items >div:nth-child(2)", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/deals/all-deals-fiji/?ctfId=0&locationId=1093&when=Anytime&pageType=Homepage&ga_s=Fiji&search_type=destination" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".result-list.list-nostyle >li:nth-child(1)>div:nth-child(1)>a:nth-child(1)>.deal-info.clamped >.deal-headline >.deal-headline-text.ts-original ", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/deals/all-deals-fiji/?ctfId=0&locationId=1093&when=Anytime&pageType=Homepage&ga_s=Fiji&search_type=destination" + } + } + ] + }, + { + "index": "mqWXWWBXEQ03hlZexjoh6", + "task": "Find information about baggage allowance for business class in qatarairways", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "qatarairways.", + "url": "https://www.qatarairways.com/en-us/homepage.html?CID=SXUS23456792M&account=Google-AMERICAS-US-EN-Brand&campaign=US-Brand-Hero-EN_exact&adgroup=qatarairways&term=qatarairways&&&&&gad_source=1&gclid=CjwKCAiAzc2tBhA6EiwArv-i6aeW180HqNfN0DVR5ocvrSeWUcoqviRH3n44R9tzif6BgvzjTPlLuhoCeJ4QAvD_BwE&gclsrc=aw.ds" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "baggage/allowance.html", + "url": "https://www.qatarairways.com/en-us/baggage/allowance.html?iid=ALL67670750" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#j-poi-tabs >li:nth-child(2)>.tabmenu-link ", + "netloc": "qatarairways", + "url": "https://www.qatarairways.com/en-us/baggage/allowance.html?iid=ALL67670750" + } + } + ] + }, + { + "index": "joaRU2Ghlt9m6rZvwjqiW", + "task": "Show me the newest on-demand releases in amctheatres", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "vudu.", + "url": "https://www.vudu.com/content/movies/uxrow/New-Releases/14166?affid=13177&cmp=AMC~Web~nav_new_releases" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "content/movies/uxrow/New-Releases/", + "url": "https://www.vudu.com/content/movies/uxrow/New-Releases/14166?affid=13177&cmp=AMC~Web~nav_new_releases" + } + } + ] + }, + { + "index": "T4ZljEgrCyH3iAdER9wHh", + "task": "Find adventure movies coming to theaters in rottentomatoes", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "rottentomatoes.", + "url": "https://www.rottentomatoes.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "browse/movies_coming_soon", + "url": "https://www.rottentomatoes.com/browse/movies_coming_soon/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "genres:adventure", + "url": "https://www.rottentomatoes.com/browse/movies_coming_soon/" + } + } + ] + }, + { + "index": "ON4UUjUE-e8eM4zJgCcXR", + "task": "Find the cheapest used and working Nintendo Switch for under $400 that can be bought right now in ebay", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ebay.", + "url": "https://www.ebay.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "_nkw", + "reference_answer": "Decide whether is searching for Nintendo Switch", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_trksid=p4432023.m570.l1313&_nkw=Nintendo+Switch&_sacat=0" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "LH_ItemCondition", + "reference_answer": "3000", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=Nintendo+Switch&_sacat=0&rt=nc&LH_ItemCondition=3000" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "_udhi", + "reference_answer": "400", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=Nintendo%20Switch&_sacat=0&LH_ItemCondition=3000&rt=nc&_udhi=400" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "LH_BIN", + "reference_answer": "1", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=Nintendo+Switch&_sacat=0&LH_ItemCondition=3000&_udhi=400&rt=nc&LH_BIN=1" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".srp-results.srp-list.clearfix >li:nth-child(3)>.s-item__wrapper.clearfix >.s-item__info.clearfix >.s-item__link >.s-item__title >span:nth-child(1)", + "netloc": "ebay", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=Nintendo+Switch&_sacat=0&LH_ItemCondition=3000&_udhi=400&LH_BIN=1&_sop=15" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "_sop", + "reference_answer": "15", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=Nintendo+Switch&_sacat=0&LH_ItemCondition=3000&_udhi=400&LH_BIN=1&_sop=15" + } + } + ] + }, + { + "index": "iDXwYzZqvTLA6KFitaOnT", + "task": "Browse cafes that have outdoor seating and is dog friendly in yelp", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yelp.", + "url": "https://www.yelp.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_desc", + "reference_answer": "Decide whether is searching for cafes", + "url": "https://www.yelp.com/search?find_desc=cafes&find_loc=West+Hollywood%2C+CA" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "OutdoorSeating", + "url": "https://www.yelp.com/search?find_desc=cafes&find_loc=West+Hollywood%2C+CA&attrs=DogsAllowed%2COutdoorSeating" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "DogsAllowed", + "url": "https://www.yelp.com/search?find_desc=cafes&find_loc=West+Hollywood%2C+CA&attrs=DogsAllowed%2COutdoorSeating" + } + } + ] + }, + { + "index": "gHnk08cXzPxHikXZ1jJMS", + "task": "Find the highest-reviewed landscaper in West Hollywood for the design of lawn, who gives fast responses in yelp", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yelp.", + "url": "https://www.yelp.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "find_desc", + "reference_answer": "Landscaping", + "url": "https://www.yelp.com/search?find_desc=Landscaping&find_loc=Los+Angeles%2C+CA" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_loc", + "reference_answer": "Decide whether is searching for West Hollywood", + "url": "https://www.yelp.com/search?find_desc=Landscaping&find_loc=West+Hollywood" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "rel_landscaping_itms", + "url": "https://www.yelp.com/search?find_desc=Landscaping&find_loc=West+Hollywood&attrs=rel_landscaping_items%3A%3Aitem_greenscape" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "is_fast_mtb_responder", + "url": "https://www.yelp.com/search?find_desc=Landscaping&find_loc=West+Hollywood&attrs=rel_landscaping_items%3A%3Aitem_greenscape%2Cis_fast_mtb_responderhttps://www.yelp.com/search?find_desc=Landscaping&find_loc=West+Hollywood&attrs=rel_landscaping_items%3A%3Aitem_greenscape%2Cis_fast_mtb_responder" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sortby", + "reference_answer": "review_counthttps://www.yelp.com/search?find_desc=Landscaping", + "url": "https://www.yelp.com/search?find_desc=Landscaping&find_loc=West+Hollywood&attrs=rel_landscaping_items%3A%3Aitem_greenscape%2Cis_fast_mtb_responder&sortby=review_counthttps://www.yelp.com/search?find_desc=Landscaping&find_loc=West+Hollywood&attrs=rel_landscaping_items%3A%3Aitem_greenscape%2Cis_fast_mtb_responder&sortby=review_count" + } + } + ] + }, + { + "index": "giPwmhoV4bWCLCVjlvety", + "task": "check the 2022 highest goal scorer in the MLS league in espn", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "espn.", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/usa.1", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "soccer/stats", + "url": "https://www.espn.com/soccer/stats/_/league/usa.1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "season/2022", + "url": "https://www.espn.com/soccer/stats/_/league/USA.1/season/2022" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/hany-mukhtar", + "url": "https://www.espn.com/soccer/player/_/id/174548/hany-mukhtar" + } + } + ] + }, + { + "index": "QVzXOEzKiGiA5j28d644i", + "task": "Upgrade the count of the current SSD in my cart to 10 in newegg", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "newegg.", + "url": "https://www.newegg.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "shop/cart", + "url": "https://secure.newegg.com/shop/cart" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "10", + "netloc": "secure", + "path": ".item-qty >.form-text ", + "url": "https://secure.newegg.com/shop/cart" + } + } + ] + }, + { + "index": "qz1izJwn0Sh5lGHoH3yIc", + "task": "Browse crossbows on sale with limited stock in cabelas", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cabelas.", + "url": "https://www.cabelas.com/shop/en#" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "l/crossbows", + "url": "https://www.cabelas.com/l/crossbows" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "#nf-maxavailquantity=1...10", + "url": "https://www.cabelas.com/l/crossbows#nf-maxavailquantity=1...10" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "#f-currentoffers=Sale", + "url": "https://www.cabelas.com/l/crossbows#f-currentoffers=Sale&nf-maxavailquantity=1...10" + } + } + ] + }, + { + "index": "RriMNRAnwsprnmki_oqM7", + "task": "Show all used Tesla cars for 10017 zip code in cargurus", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cargurus.", + "url": "https://www.cargurus.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=m112&zip=10017" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "zip", + "reference_answer": "10017", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=m112&zip=10017" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "entitySelectingHelper.selectedEntity", + "reference_answer": "m112", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=m112&zip=10017" + } + } + ] + }, + { + "index": "TE18EGhjtZzKiT8NFAAEt", + "task": "Look up information on the potential side effects of rogaine in cvs", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cvs.", + "url": "https://www.cvs.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "searchTerm", + "reference_answer": "Decide whether is searching for rogaine", + "url": "https://www.cvs.com/search?searchTerm=rogaine" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".accordionParent >div:nth-child(5)>.cardContainer >.cardTitleContainer ", + "netloc": "cvs", + "url": "https://www.cvs.com/shop/rogaine-men-s-extra-strength-5-minoxidil-solution-for-hair-regrowth-prodid-1013645" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "shop/", + "url": "https://www.cvs.com/shop/rogaine-men-s-extra-strength-5-minoxidil-solution-for-hair-regrowth-prodid-1013645" + } + } + ] + }, + { + "index": "UoI3S1NseUPgO195KNgml", + "task": "Find a pair of mens running shoes in black, size 7 extra wide, 4+ stars and under $50 and add them to my cart in amazon", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "amazon.", + "url": "https://www.amazon.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "k", + "reference_answer": "Decide whether are searching for men's running shoes", + "url": "https://www.amazon.com/s?k=men%27s+running+shoes&crid=2MMQANPUETJE8&sprefix=men%27s+running+shoes%2Caps%2C636&ref=nb_sb_noss_1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "rh", + "reference_answer": "n:679255011,p_n_size_two_browse-vebin:5391077011", + "url": "https://www.amazon.com/s?k=men%27s+running+shoes&rh=n%3A679255011%2Cp_n_size_two_browse-vebin%3A5391077011&dc&ds=v1%3ASFtSwRmUUn1QStxZlBtZpXzzCgfDIXR9zTmT47h1kF4&crid=2MMQANPUETJE8&qid=1706274799&rnid=5391076011&sprefix=men%27s+running+shoes%2Caps%2C636&ref=sr_nr_p_n_size_two_browse-vebin_1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "rh", + "reference_answer": "p_n_size_browse-vebin:1285096011", + "url": "https://www.amazon.com/s?k=men%27s+running+shoes&i=fashion-mens-shoes&rh=n%3A679286011%2Cp_n_size_two_browse-vebin%3A5391077011%2Cp_n_size_browse-vebin%3A1285096011&dc&ds=v1%3AMXA6en9x4F6k4tkaNkOi9UqCGZDpF%2F8%2FhgkLQTkUR7M&crid=2MMQANPUETJE8&qid=1706274815&rnid=1285068011&sprefix=men%27s+running+shoes%2Caps%2C636&ref=sr_nr_p_n_size_browse-vebin_7" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "rh", + "reference_answer": "p_72:2661618011", + "url": "https://www.amazon.com/s?k=men%27s+running+shoes&i=fashion-mens-shoes&rh=n%3A679286011%2Cp_n_size_two_browse-vebin%3A5391077011%2Cp_n_size_browse-vebin%3A1285096011&dc&ds=v1%3AMXA6en9x4F6k4tkaNkOi9UqCGZDpF%2F8%2FhgkLQTkUR7M&crid=2MMQANPUETJE8&qid=1706274815&rnid=1285068011&sprefix=men%27s+running+shoes%2Caps%2C636&ref=sr_nr_p_n_size_browse-vebin_7" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "rh", + "reference_answer": "p_36:-5000", + "url": "https://www.amazon.com/s?k=men%27s+running+shoes&i=fashion-mens-shoes&rh=n%3A679286011%2Cp_n_size_two_browse-vebin%3A5391077011%2Cp_n_size_browse-vebin%3A1285096011%2Cp_72%3A2661618011%2Cp_36%3A-5000&dc&crid=2MMQANPUETJE8&qid=1706274827&rnid=2661611011&sprefix=men%27s+running+shoes%2Caps%2C636&ref=sr_nr_p_36_4" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#add-to-cart-button ", + "netloc": "amazon", + "url": "https://www.amazon.com/Under-Armour-Charged-Pursuit-Sneaker/dp/B0968ZKJV4/ref=sr_1_1?crid=2MMQANPUETJE8&keywords=men%27s+running+shoes&psc=1&qid=1706274837&refinements=p_n_size_two_browse-vebin%3A5391077011%2Cp_n_size_browse-vebin%3A1285096011%2Cp_72%3A2661618011%2Cp_36%3A-5000&rnid=2661611011&s=apparel&sprefix=men%27s+running+shoes%2Caps%2C636&sr=1-1" + } + } + ] + }, + { + "index": "4syolpfmOW7tHhli_Qal4", + "task": "Find comedy tv shows on netflix sorted by audience score in rottentomatoes", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ottentomatoes.", + "url": "https://www.rottentomatoes.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "browse/tv_series_browse/affiliates:netflix", + "url": "https://www.rottentomatoes.com/browse/tv_series_browse/affiliates:netflix" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "genres:comedy", + "url": "https://www.rottentomatoes.com/browse/tv_series_browse/affiliates:netflix~genres:comedy" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sort:audience_highest", + "url": "https://www.rottentomatoes.com/browse/tv_series_browse/affiliates:netflix~genres:comedy~sort:audience_highest" + } + } + ] + }, + { + "index": "8oiyOS5v16Lpb_jifxuCx", + "task": "Find wall mirrors for under $20 in ikea", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ikea.", + "url": "https://www.ikea.com/us/en/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "q", + "reference_answer": "Decide whether is searchign for wall mirrors", + "url": "https://www.ikea.com/us/en/search/?q=wall%20mirrors" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "filters", + "reference_answer": "f-price-buckets:PRICE_0_2000", + "url": "https://www.ikea.com/us/en/search/?q=wall%20mirrors&filters=f-price-buckets%3APRICE_0_2000" + } + } + ] + }, + { + "index": "8YXEvYzGajLR8rZBSb5et", + "task": "Show me all my offers for 2-5 day cruises in carnival", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "carnival.", + "url": "https://www.carnival.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/cruise-search", + "url": "https://www.carnival.com/cruise-search?pageNumber=1&numadults=2&durdays=2,3,4,5&pagesize=8&sort=fromprice&showBest=true&async=true¤cy=USD&locality=1" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "durdays", + "reference_answer": "2,3,4,5", + "url": "https://www.carnival.com/cruise-search?pageNumber=1&numadults=2&durdays=2,3,4,5&pagesize=8&sort=fromprice&showBest=true&async=true¤cy=USD&locality=1" + } + } + ] + }, + { + "index": "aDUZsHGDATdWGluXuuoT1", + "task": "Make a playlist and add 3 songs from popular Selena Gomez tracks and name it Love in soundcloud", + "reference_task_length": 21, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "soundcloud.", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/selena-gomez-official", + "url": "https://soundcloud.com/selena-gomez-official" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/tracks", + "url": "https://soundcloud.com/selena-gomez-official/tracks" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".sc-button-addtoset.sc-button.moreActions__button.sc-button-medium.sc-button-tertiary ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".g-tabs.g-tabs-large >li:nth-child(2)>.tabs__tab.g-tabs-link ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Love", + "netloc": "soundcloud", + "path": ".textfield.createPlaylist__title >.textfield__inputWrapper >.textfield__input.sc-input.sc-input-medium ", + "url": "https://soundcloud.com/selena-gomez-official/tracks" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".radioGroup.sharingRadio.createPlaylist__sharing.sc-media-additional.horizontal >label:nth-child(3)>.radioGroup__label >.radioGroup__labelText.sc-text-h4 ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".createPlaylist__saveButton.sc-button-cta.sc-button-primary.sc-button.sc-button-medium ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".sc-button-addtoset.sc-button.moreActions__button.sc-button-medium.sc-button-tertiary ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".lazyLoadingList__list.sc-list-nostyle.sc-clearfix >li:nth-child(1)>.addToPlaylistItem.g-flex-row-centered >.addToPlaylistItem__actions.g-flex-row-centered >.addToPlaylistButton.sc-button-secondary.sc-button.sc-button-medium.sc-button-responsive ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".sc-button-addtoset.sc-button.moreActions__button.sc-button-medium.sc-button-tertiary ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".lazyLoadingList__list.sc-list-nostyle.sc-clearfix >li:nth-child(1)>.addToPlaylistItem.g-flex-row-centered >.addToPlaylistItem__actions.g-flex-row-centered >.addToPlaylistButton.sc-button-secondary.sc-button.sc-button-medium.sc-button-responsive ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/discover" + } + } + ] + }, + { + "index": "3XFTKKOEg8Cm2cEnqDF47", + "task": "Locate a large store in Washington that has kids' and maternity products in uniqlo", + "reference_task_length": 12, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "map.uniqlo.", + "url": "https://map.uniqlo.com/us/en/" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide Whether is searching for Washington state", + "netloc": "map", + "url": "https://map.uniqlo.com/us/en/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "product", + "reference_answer": "kids", + "url": "https://map.uniqlo.com/us/en/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "product", + "reference_answer": "maternity", + "url": "https://map.uniqlo.com/us/en/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#undefined-content >fieldset:nth-child(1)>div:nth-child(1)>div:nth-child(1)>div:nth-child(1)>label:nth-child(2)", + "netloc": "map", + "url": "https://map.uniqlo.com/us/en/" + } + } + ] + }, + { + "index": "FoP8QZ-JiAcVODUHGQj8v", + "task": "Buy a single day pass to Six Flags, Magic Mountain in sixflags", + "reference_task_length": 15, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sixflags.", + "url": "https://www.sixflags.com/greatamerica" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "magicmountain", + "url": "https://www.sixflags.com/magicmountain" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".sf-compare-column.sf-compare-column-type-single.sf-compare-column-425324.sf-compare-column-0 >.sf-compare-column-footer >a:nth-child(4)", + "netloc": "sixflags", + "url": "https://www.sixflags.com/magicmountain/store/tickets" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "store/tickets", + "url": "https://www.sixflags.com/magicmountain/store/tickets" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".snap-res--list.gap-col.gap-xs-8.gap-md-16 >snap-package-result:nth-child(2)>.snap-pkg-result--content.gap-col.gap-4.gap-auto >.snap-pkg-result--name.font-semi-bold.blue ", + "netloc": "sixflags", + "url": "https://www.sixflags.com/magicmountain/store/tickets" + } + } + ] + }, + { + "index": "0HM69WbWPtgMJsnigeQiK", + "task": "Confirm my vip tour at the six flags Discovery Kingdom in sixflags", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sixflags.", + "url": "https://www.sixflags.com/america" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "discoverykingdom", + "url": "https://www.sixflags.com/discoverykingdom" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "store/vip-tours", + "url": "https://www.sixflags.com/discoverykingdom/store/vip-tours" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".entry-content.clear >div:nth-child(1)>section:nth-child(1)>div:nth-child(1)>div:nth-child(1)>div:nth-child(1)>section:nth-child(1)>div:nth-child(1)>div:nth-child(2)>div:nth-child(1)>div:nth-child(9)>div:nth-child(1)>div:nth-child(1)>a:nth-child(1)", + "netloc": "sixflags", + "url": "https://www.sixflags.com/discoverykingdom/store/vip-tours" + } + } + ] + }, + { + "index": "29LVD9B4mMWD6tACTcUxz", + "task": "Find a pasta restaurant in Sydney and save it in resy", + "reference_task_length": 12, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "resy.", + "url": "https://resy.com/cities/syd?date=2024-01-27&seats=2&query=pasta" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "query", + "reference_answer": "Decide whether is searching for a pasta restaurant", + "url": "https://resy.com/cities/syd?date=2024-01-27&seats=2&query=pasta" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".ResyIcon.ResyIcon--heart >svg:nth-child(1)>path:nth-child(1)", + "netloc": "resy", + "url": "https://resy.com/cities/syd?date=2024-01-27&seats=2&query=pasta" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cities/syd", + "url": "https://resy.com/cities/syd?date=2024-01-27&seats=2&query=pasta" + } + } + ] + }, + { + "index": "4Eeod20yp8c7xLKgwrt3V", + "task": "Compare two wireless printers that are rated above 4 stars in newegg", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "newegg.", + "url": "https://www.newegg.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "d", + "reference_answer": "Decide whether are searching for wireless printer", + "url": "https://www.newegg.com/p/pl?d=wireless+printer" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "N", + "reference_answer": "4204", + "url": "https://www.newegg.com/p/pl?d=wireless+printer&N=4204" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".button.button-m.bg-blue >span:nth-child(1)", + "netloc": "newegg", + "url": "https://www.newegg.com/p/pl?d=wireless+printer&N=4204" + } + } + ] + }, + { + "index": "BwEba3AFQbwYHDQ39PE97", + "task": "Find all outdoor events this month in NYC in new.mta.info", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "new.mta.info", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/events", + "url": "https://away.mta.info/events?utm_source=mtadotinfo" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/outdoors", + "url": "https://away.mta.info/events/outdoors/new-york-city/this-month" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "new-york-city/", + "url": "https://away.mta.info/events/outdoors/new-york-city/this-month" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/this-month", + "url": "https://away.mta.info/events/outdoors/new-york-city/this-month" + } + } + ] + }, + { + "index": "fuqsgHg3WWfn3uEkHleOT", + "task": "Find cheapest bus for 2 persons from New York to Washington, DC in us.megabus", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "us.megabus.", + "url": "https://us.megabus.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "totalPassengers", + "reference_answer": "2", + "url": "https://us.megabus.com/journey-planner/journeys?days=1&concessionCount=0&departureDate=2024-01-27&destinationId=142&inboundDepartureDate=2024-01-27&inboundOtherDisabilityCount=0&inboundPcaCount=0&inboundWheelchairSeated=0&nusCount=0&originId=123&otherDisabilityCount=0&pcaCount=0&totalPassengers=2&wheelchairSeated=0" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "destinationId", + "reference_answer": "142", + "url": "https://us.megabus.com/journey-planner/journeys?days=1&concessionCount=0&departureDate=2024-01-27&destinationId=142&inboundDepartureDate=2024-01-27&inboundOtherDisabilityCount=0&inboundPcaCount=0&inboundWheelchairSeated=0&nusCount=0&originId=123&otherDisabilityCount=0&pcaCount=0&totalPassengers=2&wheelchairSeated=0" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "originId", + "reference_answer": "123", + "url": "https://us.megabus.com/journey-planner/journeys?days=1&concessionCount=0&departureDate=2024-01-27&destinationId=142&inboundDepartureDate=2024-01-27&inboundOtherDisabilityCount=0&inboundPcaCount=0&inboundWheelchairSeated=0&nusCount=0&originId=123&otherDisabilityCount=0&pcaCount=0&totalPassengers=2&wheelchairSeated=0" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#sortselected ", + "netloc": "us", + "url": "https://us.megabus.com/journey-planner/journeys?days=1&concessionCount=0&departureDate=2024-01-27&destinationId=142&inboundDepartureDate=2024-01-27&inboundOtherDisabilityCount=0&inboundPcaCount=0&inboundWheelchairSeated=0&nusCount=0&originId=123&otherDisabilityCount=0&pcaCount=0&totalPassengers=2&wheelchairSeated=0" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#sortselected >option:nth-child(3)", + "netloc": "us", + "url": "https://us.megabus.com/journey-planner/journeys?days=1&concessionCount=0&departureDate=2024-01-27&destinationId=142&inboundDepartureDate=2024-01-27&inboundOtherDisabilityCount=0&inboundPcaCount=0&inboundWheelchairSeated=0&nusCount=0&originId=123&otherDisabilityCount=0&pcaCount=0&totalPassengers=2&wheelchairSeated=0" + } + } + ] + }, + { + "index": "fRp4AJThGn5cxMsD9L0dj", + "task": "Locate a store in spring, Texas in kohls", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/?kwid=p43857836129&utm_source=google&utm_medium=cpc&utm_term=Kohls.&utm_campaign=Brand_Search_Google_Brand&UTM_Adgroupid=58700005024606624&pfx=pfx_google_roi&cid=brandsearch&ds_rl=2671&gad_source=1&gclid=CjwKCAiAk9itBhASEiwA1my_68vyqjyesKUyXab6l12l_SQ8AqMN0LR84PjKGghYz-Mo-wpzvrh4whoC2T0QAvD_BwE&gclsrc=aw.ds" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/stores", + "url": "https://www.kohls.com/stores.shtml" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "q", + "reference_answer": "Decide whether are searching for Spring, texas", + "url": "https://www.kohls.com/?q=Spring,%20TX" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".map-list >li:nth-child(1)>.map-list-item >.map-list-item-header >.ga-link >.location-name >span:nth-child(1)", + "netloc": "kohls", + "url": "https://www.kohls.com/stores.shtml" + } + } + ] + }, + { + "index": "BcmM3jqu6JC_zGixFIn9n", + "task": "Show MLB tickets for this weekend in ticketcenter", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ticketcenter.", + "url": "https://www.ticketcenter.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/sports/baseball/professional-mlb", + "url": "https://www.ticketcenter.com/sports/baseball/professional-mlb" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".filters >span:nth-child(6)>div:nth-child(1)>ul:nth-child(2)>li:nth-child(1)>a:nth-child(1)", + "netloc": "ticketcenter", + "url": "https://www.ticketcenter.com/sports/baseball/professional-mlb" + } + } + ] + }, + { + "index": "S3OwPY3cGAf71kRLyztef", + "task": "Show me the schedule for the orange line in mbta", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "mbta.", + "url": "https://www.mbta.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/schedules/Orange/line", + "url": "https://www.mbta.com/schedules/Orange/line" + } + } + ] + }, + { + "index": "SeWmxjveoPc9RGHOF3j5z", + "task": "Search for a rental car in Miami, FL for pick up at the airport and drop off at a different location within the same city in kayak", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kayak.", + "url": "https://www.kayak.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/cars", + "url": "https://www.kayak.com/cars" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#oneway >span:nth-child(1)", + "netloc": "kayak", + "url": "https://www.kayak.com/cars" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether are searching for Miami Airport", + "netloc": "kayak", + "path": ".J_T2-field-group.J_T2-mod-collapse-l.J_T2-mod-grow >div:nth-child(1)>.pM26 >div:nth-child(1)>input:nth-child(2)", + "url": "https://www.kayak.com/cars" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether are searching for Miami", + "netloc": "kayak", + "path": ".UBQ2-dropoff-container >.pM26 >div:nth-child(1)>input:nth-child(2)", + "url": "https://www.kayak.com/cars" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".a7Uc-infix >svg:nth-child(1)", + "netloc": "kayak", + "url": "https://www.kayak.com/cars" + } + } + ] + }, + { + "index": "smtGZ3IGKhkLn6u5JZn_N", + "task": "Browse tickets for Chicago Bears games in ticketcenter", + "reference_task_length": 3, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ticketcenter.", + "url": "https://www.ticketcenter.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/chicago-bears-tickets", + "url": "https://www.ticketcenter.com/chicago-bears-tickets" + } + } + ] + }, + { + "index": "T6QdE5lpgWWBrk9FuZ9E5", + "task": "Show me the recent trades in boardgamegeek", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "boardgamegeek.", + "url": "https://boardgamegeek.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/trade", + "url": "https://boardgamegeek.com/trade" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "action", + "reference_answer": "recent", + "url": "https://boardgamegeek.com/geektrade.php?action=recent" + } + } + ] + }, + { + "index": "l-ZNNoU5p81fOEk9td5os", + "task": "Browse hip hop concerts that are happening this weekend in ticketcenter", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ticketcenter.", + "url": "https://www.ticketcenter.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/concerts", + "url": "https://www.ticketcenter.com/concerts" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/rap-hip-hop", + "url": "https://www.ticketcenter.com/concerts/rap-hip-hop" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".filters >span:nth-child(6)>div:nth-child(1)>ul:nth-child(2)>li:nth-child(1)>a:nth-child(1)", + "netloc": "ticketcenter", + "url": "https://www.ticketcenter.com/concerts/rap-hip-hop" + } + } + ] + }, + { + "index": "Im8_msPnXNbCjzJABTwNZ", + "task": "Search for newest girls' training leggings in size yxl in underarmour", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "underarmour.", + "url": "https://www.underarmour.com/en-us/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/girls/clothing/bottoms", + "url": "https://www.underarmour.com/en-us/c/girls/clothing/bottomhttps://www.underarmour.com/en-us/c/girls/clothing/bottom" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/training", + "url": "https://www.underarmour.com/en-us/c/girls/clothing/bottoms/training/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "prefv1", + "reference_answer": "YXL", + "url": "https://www.underarmour.com/en-us/c/girls/clothing/bottoms/training/?prefn1=size&prefv1=YXL" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/fitted-training", + "url": "https://www.underarmour.com/en-us/c/girls/clothing/bottoms/fitted-training/?prefn1=size&prefv1=YXL" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "srule", + "reference_answer": "newest", + "url": "https://www.underarmour.com/en-us/c/girls/clothing/bottoms/fitted-training/?prefn1=size&prefv1=YXL&srule=newest" + } + } + ] + }, + { + "index": "jjXQ0JM7mr09Jl1KVZzSn", + "task": "Show Jazz music genre tracks in last.fm", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "last.fm.", + "url": "https://www.last.fm/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/music", + "url": "https://www.last.fm/music" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/tag/jazz", + "url": "https://www.last.fm/tag/jazz" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/tracks", + "url": "https://www.last.fm/tag/jazz/tracks" + } + } + ] + }, + { + "index": "NNHA-KGGJkKQR3WZT1GhY", + "task": "Find a parking with EV charging for month with lowest price in Chicago in spothero", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "spothero.", + "url": "https://spothero.com/" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether are searching for Chicago", + "netloc": "spothero", + "url": "https://spothero.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "monthly", + "reference_answer": "true", + "url": "https://spothero.com/search?kind=city&id=1&monthly=true" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#Checkbox-FilterItem-ev ", + "netloc": "spothero", + "url": "https://spothero.com/search?kind=city&id=1&monthly=true" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".Button.Button-primary ", + "netloc": "spothero", + "url": "https://spothero.com/search?kind=city&id=1&monthly=true" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".SpotListSorter >.Select.FormElement >.FormElement-control >.FormElement-item ", + "netloc": "spothero", + "url": "https://spothero.com/search?kind=city&id=1&monthly=true" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".SpotListSorter >.Select.FormElement >.FormElement-control >.FormElement-item >option:nth-child(2)", + "netloc": "spothero", + "url": "https://spothero.com/search?kind=city&id=1&monthly=true" + } + } + ] + }, + { + "index": "I4FDUSTAfdXjQYQ8Lpkur", + "task": "Filter search results for guitar tabs to only show songs with a difficulty rating of \"Beginner\" in ultimate-guitar", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ultimate-guitar.", + "url": "https://www.ultimate-guitar.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/explore", + "url": "https://www.ultimate-guitar.com/explore" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "difficulty[]", + "reference_answer": "2", + "url": "https://www.ultimate-guitar.com/explore?difficulty[]=2" + } + } + ] + }, + { + "index": "v3G0RAZWHBsT1N_5BpFzq", + "task": "Find the highest-rated adults-only romantic beach vacation deals in travelzoo", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "travelzoo.", + "url": "https://www.travelzoo.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/beach-vacation-deals/", + "url": "https://www.travelzoo.com/collection/beach-vacation-deals/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn-group.btn-group--icons.btn-group--scrollx-xs.btn-group--single-row.js-btn-group-radio.btn-group--icons-tall >.btn-group-wrapper >.btn-group-items >div:nth-child(2)>.text ", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/beach-vacation-deals/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn-group-items >button:nth-child(2)", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/beach-vacation-deals/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn-group-items >button:nth-child(27)", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/beach-vacation-deals/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".button-primary.button-done.right ", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/beach-vacation-deals/" + } + } + ] + }, + { + "index": "tINRPzcB9gQ9_5KsL5qnW", + "task": "Check reviews and research information about Audi A6 2020 in cargurus", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cargurus.", + "url": "https://www.cargurus.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/2020-", + "url": "https://www.cargurus.com/research/2020-Audi-A6-c29349" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "-Audi-", + "url": "https://www.cargurus.com/research/2020-Audi-A6-c29349" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "-A6-", + "url": "https://www.cargurus.com/research/2020-Audi-A6-c29349" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".Zb9tJO.qs80eV >li:nth-child(2)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/research/2020-Audi-A6-c29349" + } + } + ] + }, + { + "index": "wiqkYolPL-j6RRg38yaoj", + "task": "Search for Mexican restaurants in Concord, CA in yelp", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yelp.", + "url": "https://www.yelp.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_desc", + "reference_answer": "Decide whether are searching for mexican restaurants", + "url": "https://www.yelp.com/search?find_desc=Mexican+Restaurants&find_loc=Concord%2C+CA" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_loc", + "reference_answer": "Decide whether are searching for Concord", + "url": "https://www.yelp.com/search?find_desc=Mexican+Restaurants&find_loc=Concord%2C+CA" + } + } + ] + }, + { + "index": "WVLU1JwIBD1W39pi3I03H", + "task": "Find a person by phone number 123456789 in yellowpages", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yellowpages.", + "url": "https://www.yellowpages.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "people.", + "url": "https://people.yellowpages.com/whitepages/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "whitepages/phone-lookup", + "url": "https://people.yellowpages.com/whitepages/phone-lookup" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "phone", + "reference_answer": "123456789", + "url": "https://people.yellowpages.com/whitepages/phone-lookup?phone=123456789" + } + } + ] + }, + { + "index": "XcGjW6ObOPoGTiepUhImF", + "task": "Play the Wakanda Forever trailer in redbox", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "redbox.", + "url": "https://www.redbox.com/movies/black-panther-wakanda-forever" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/black-panther-wakanda-forever", + "url": "https://www.redbox.com/movies/black-panther-wakanda-forever" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#maincontent >div:nth-child(1)>div:nth-child(2)>div:nth-child(3)>div:nth-child(2)>div:nth-child(1)>button:nth-child(1)>svg:nth-child(1)>g:nth-child(2)>path:nth-child(1)", + "netloc": "redbox", + "url": "https://www.redbox.com/movies/black-panther-wakanda-forever" + } + } + ] + }, + { + "index": "zmaw1q_peESU5FXhv6Nc0", + "task": "Find the flight from New York to Miami from Mar 27 to Mar 31 that has the lowest core price in points in jetblue", + "reference_task_length": 17, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "jetblue.", + "url": "https://www.jetblue.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "from", + "reference_answer": "NYC", + "url": "https://www.jetblue.com/booking/flights?from=NYC&to=XFL&depart=2024-03-27&return=2024-03-31&isMultiCity=false&noOfRoute=1&lang=en&adults=1&children=0&infants=0&sharedMarket=false&roundTripFaresFlag=false&usePoints=false" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "to", + "reference_answer": "XFL", + "url": "https://www.jetblue.com/booking/flights?from=NYC&to=XFL&depart=2024-03-27&return=2024-03-31&isMultiCity=false&noOfRoute=1&lang=en&adults=1&children=0&infants=0&sharedMarket=false&roundTripFaresFlag=false&usePoints=false" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "depart", + "reference_answer": "2024-03-27", + "url": "https://www.jetblue.com/booking/flights?from=NYC&to=XFL&depart=2024-03-27&return=2024-03-31&isMultiCity=false&noOfRoute=1&lang=en&adults=1&children=0&infants=0&sharedMarket=false&roundTripFaresFlag=false&usePoints=false" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "return", + "reference_answer": "2024-03-31", + "url": "https://www.jetblue.com/booking/flights?from=NYC&to=XFL&depart=2024-03-27&return=2024-03-31&isMultiCity=false&noOfRoute=1&lang=en&adults=1&children=0&infants=0&sharedMarket=false&roundTripFaresFlag=false&usePoints=false" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".inline-flex >jb-segment:nth-child(2)>.f6.royal-blue.flex.tc.lh-title.ph3.items-center ", + "netloc": "jetblue", + "url": "https://www.jetblue.com/booking/flights?from=NYC&to=XFL&depart=2024-03-27&return=2024-03-31&isMultiCity=false&noOfRoute=1&lang=en&adults=1&children=0&infants=0&sharedMarket=false&roundTripFaresFlag=false&usePoints=false" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#jb-select-1-option-1 >div:nth-child(1)>span:nth-child(2)", + "netloc": "jetblue", + "url": "https://www.jetblue.com/booking/flights?from=NYC&to=XFL&depart=2024-03-27&return=2024-03-31&isMultiCity=false&noOfRoute=1&lang=en&adults=1&children=0&infants=0&sharedMarket=false&roundTripFaresFlag=false&usePoints=false" + } + } + ] + }, + { + "index": "zZlS3taTzRyPq-m4lWHCK", + "task": "Check the order status using an order number of24124124091. The email address is boobear@gmail.com in apple", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "secure4.store.apple.", + "url": "https://secure4.store.apple.com/shop/signIn/orders?ssi=1AAABjU9whsoguipdI_UC8mWh8GmvFlw8NMQuxEDaa2SNHX6Fc1n15x0AAAAxaHR0cHM6Ly9zZWN1cmU0LnN0b3JlLmFwcGxlLmNvbS9zaG9wL29yZGVyL2xpc3R8fAACAbfdSCduwV2QV_DrgZUKKYf0wgHP3eT1ignsgCuaz3Qi" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "24124124091", + "netloc": "secure4", + "path": ".row.rs-sign-in-main.as-l-container >div:nth-child(3)>div:nth-child(1)>div:nth-child(1)>form:nth-child(2)>div:nth-child(2)>div:nth-child(1)>.form-textbox-input ", + "url": "https://secure4.store.apple.com/shop/signIn/orders?ssi=1AAABjU9whsoguipdI_UC8mWh8GmvFlw8NMQuxEDaa2SNHX6Fc1n15x0AAAAxaHR0cHM6Ly9zZWN1cmU0LnN0b3JlLmFwcGxlLmNvbS9zaG9wL29yZGVyL2xpc3R8fAACAbfdSCduwV2QV_DrgZUKKYf0wgHP3eT1ignsgCuaz3Qi" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "boobear@gmail.com", + "netloc": "secure4", + "path": ".row.rs-sign-in-main.as-l-container >div:nth-child(3)>div:nth-child(1)>div:nth-child(1)>form:nth-child(2)>div:nth-child(2)>div:nth-child(2)>.form-textbox-input ", + "url": "https://secure4.store.apple.com/shop/signIn/orders?ssi=1AAABjU9whsoguipdI_UC8mWh8GmvFlw8NMQuxEDaa2SNHX6Fc1n15x0AAAAxaHR0cHM6Ly9zZWN1cmU0LnN0b3JlLmFwcGxlLmNvbS9zaG9wL29yZGVyL2xpc3R8fAACAbfdSCduwV2QV_DrgZUKKYf0wgHP3eT1ignsgCuaz3Qi" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".column.large-12.large-centered >button:nth-child(1)>span:nth-child(1)>span:nth-child(1)>span:nth-child(1)", + "netloc": "secure4", + "url": "https://secure4.store.apple.com/shop/signIn/orders?ssi=1AAABjU9whsoguipdI_UC8mWh8GmvFlw8NMQuxEDaa2SNHX6Fc1n15x0AAAAxaHR0cHM6Ly9zZWN1cmU0LnN0b3JlLmFwcGxlLmNvbS9zaG9wL29yZGVyL2xpc3R8fAACAbfdSCduwV2QV_DrgZUKKYf0wgHP3eT1ignsgCuaz3Qi" + } + } + ] + }, + { + "index": "TB49voH8mO7cf5RbyDmgH", + "task": "Open gift list for beauty products in instacart", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "instacart.", + "url": "https://www.instacart.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/store/hub/gifts", + "url": "https://www.instacart.com/store/hub/gifts" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".e-64tucv >li:nth-child(7)>.e-19johef >.e-g3bddq >.e-1081mk8 >.e-159k8fc ", + "netloc": "instacart", + "url": "https://www.instacart.com/store/hub/gifts" + } + } + ] + }, + { + "index": "tdiqD1FPDYieEiCd1mlHz", + "task": "locate the store in IL in uniqlo", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "map.uniqlo.", + "url": "https://map.uniqlo.com/us/en/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#option-13 ", + "netloc": "map", + "url": "https://map.uniqlo.com/us/en/" + } + } + ] + }, + { + "index": "TeTQRYT0w-oCl5qCXSHxc", + "task": "Find the trade-in options for PS4 in gamestop", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "gamestop.", + "url": "https://www.gamestop.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/trade", + "url": "https://www.gamestop.com/trade/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "cgid", + "reference_answer": "playstation-4", + "url": "https://www.gamestop.com/trade/?cgid=playstation-4" + } + } + ] + }, + { + "index": "toW0JlkNCDtC3rVlF_j3o", + "task": "What are Gloomhaven's ratings? in boardgamegeek", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "boardgamegeek.", + "url": "https://boardgamegeek.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "boardgame/174430/gloomhaven", + "url": "https://boardgamegeek.com/boardgame/174430/gloomhaven" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/ratings", + "url": "https://boardgamegeek.com/boardgame/174430/gloomhaven" + } + } + ] + }, + { + "index": "VyaGipMbchitfc-n4yeJr", + "task": "Show me a list of children's program events in Illinois in nps.gov", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nps.gov/", + "url": "https://www.nps.gov/index.htm" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/event-search", + "url": "https://www.nps.gov/planyourvisit/event-search.htm" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".multiselect-container.dropdown-menu.show >button:nth-child(18)>.form-check >.form-check-label ", + "netloc": "nps", + "url": "https://www.nps.gov/planyourvisit/event-search.htm" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".multiselect-container.dropdown-menu.show >button:nth-child(6)>.form-check >.form-check-label ", + "netloc": "nps", + "url": "https://www.nps.gov/planyourvisit/event-search.htm" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".EventCalendar__SearchBar__SubmitButton.FacetedSearch-submitButton ", + "netloc": "nps", + "url": "https://www.nps.gov/planyourvisit/event-search.htm" + } + } + ] + }, + { + "index": "acCvItQT2PQxhAEaZpw4i", + "task": "Find a pedicure salon in New York and add two to favorites in yellowpages", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yellowpages.", + "url": "https://www.yellowpages.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search_terms", + "reference_answer": "Decide whether are searching for pedicure salon", + "url": "https://www.yellowpages.com/search?search_terms=pedicure+salon&geo_location_terms=New+York" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "geo_location_terms", + "reference_answer": "Decide whether are searching for New York", + "url": "https://www.yellowpages.com/search?search_terms=pedicure+salon&geo_location_terms=New+York" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".add-to-favorites >svg:nth-child(1)>use:nth-child(1)", + "netloc": "yellowpages", + "url": "https://www.yellowpages.com/new-york-ny/mip/beauty-youth-village-spa-462459927" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".add-to-favorites >svg:nth-child(1)>use:nth-child(1)", + "netloc": "yellowpages", + "url": "https://www.yellowpages.com/new-york-ny/mip/fabio-doti-salon-467213009" + } + } + ] + }, + { + "index": "CGMIrWw9PUbA-_-eobsE4", + "task": "Browse cough medicine that is rated 4 stars and above and is $15-$20 in cvs", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cvs.", + "url": "https://www.cvs.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "searchTerm", + "reference_answer": "Decide whether are searching for cough medicine", + "url": "https://www.cvs.com/search?searchTerm=cough%20medicine" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "", + "netloc": "cvs", + "url": "https://www.cvs.com/search?searchTerm=cough%20medicine" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "", + "netloc": "cvs", + "url": "https://www.cvs.com/search?searchTerm=cough%20medicine" + } + } + ] + }, + { + "index": "-MhfU8oaGzKwxF8Vss27N", + "task": "Find vitamin D that are buy 1 get 1 free and new arrival in cvs", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cvs.", + "url": "https://www.cvs.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "shop/vitamins/letter-vitamins/vitamin-d", + "url": "https://www.cvs.com/shop/vitamins/letter-vitamins/vitamin-d?icid=shop-vitamins-letter-vitamins-vitamin-d" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "Buy_1,_Get_1_Free", + "url": "https://www.cvs.com/shop/vitamins/letter-vitamins/vitamin-d?icid=shop-vitamins-letter-vitamins-vitamin-d" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/prna", + "url": "https://www.cvs.com/shop/vitamins/letter-vitamins/vitamin-d?icid=shop-vitamins-letter-vitamins-vitamin-d" + } + } + ] + }, + { + "index": "4wZJwHZUJURBC52Itut37", + "task": "Search for holiday campground in Alaska in koa", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "koa.", + "url": "https://koa.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "campgrounds/homer/", + "url": "https://koa.com/campgrounds/homer/" + } + } + ] + }, + { + "index": "7_nSOclMlKHrVG21A6e67", + "task": "Locate a store which is nearest to 10017 zip code in uniqlo", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "map.uniqlo.", + "url": "https://map.uniqlo.com/us/en/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "10017", + "netloc": "map", + "url": "https://map.uniqlo.com/us/en/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#root >div:nth-child(1)>div:nth-child(3)>section:nth-child(1)>section:nth-child(3)>div:nth-child(1)>div:nth-child(1)>div:nth-child(1)>div:nth-child(1)>form:nth-child(1)>button:nth-child(2)>div:nth-child(1)>svg:nth-child(1)", + "netloc": "map", + "url": "https://map.uniqlo.com/us/en/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#root >div:nth-child(1)>div:nth-child(3)>section:nth-child(1)>section:nth-child(4)>section:nth-child(2)>div:nth-child(5)>div:nth-child(2)>ul:nth-child(1)>li:nth-child(1)>button:nth-child(1)", + "netloc": "map", + "url": "https://map.uniqlo.com/us/en/" + } + } + ] + }, + { + "index": "EJzybjJ3oqqrdh5bQkOa7", + "task": "watch for news list about the latest news about Lebron James in sports.yahoo", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sports.yahoo.", + "url": "https://sports.yahoo.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nba/players/3704", + "url": "https://sports.yahoo.com/nba/players/3704/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/news", + "url": "https://sports.yahoo.com/nba/players/3704/" + } + } + ] + }, + { + "index": "hTJXHkogeG54F3ZaCfBxH", + "task": "What are the Resy Staff Favorites in Seattle? in resy", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "https://resy.com/", + "url": "https://resy.com/?date=2024-01-28&seats=2" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "list", + "reference_answer": "collection_963", + "url": "https://resy.com/cities/sea?seats=2&date=2024-01-28&list=collection_963" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/sea", + "url": "https://resy.com/cities/sea?seats=2&date=2024-01-28&list=collection_963" + } + } + ] + }, + { + "index": "hWz3-bu-hGsrtoojpBZCU", + "task": "Find the fast responding highest-rated electrician, who is currently open for replacement of some light fixtures in Westminster, and request a quote in yelp", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yelp.", + "url": "https://www.yelp.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_desc", + "reference_answer": "Decide whether are searching for Electricians", + "url": "https://www.yelp.com/search?find_desc=Electricians&find_loc=Honolulu%2C+HI" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_loc", + "reference_answer": "Decide whether are searching for Westminster,CA", + "url": "https://www.yelp.com/search?find_desc=Electricians&find_loc=Westminster%2C+CA" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sortby", + "reference_answer": "rating", + "url": "https://www.yelp.com/search?find_desc=Electricians&find_loc=Westminster%2C+CA&sortby=rating" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "open_now", + "reference_answer": "6", + "url": "https://www.yelp.com/search?find_desc=Electricians&find_loc=Westminster%2C+CA&sortby=rating&open_now=6672" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "is_fast_mtb_responder", + "url": "https://www.yelp.com/search?find_desc=Electricians&find_loc=Westminster%2C+CA&attrs=is_fast_mtb_responder&open_now=6673" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "OnlineMessageThisBusiness", + "url": "https://www.yelp.com/search?find_desc=Electricians&find_loc=Westminster%2C+CA&attrs=is_fast_mtb_responder%2COnlineMessageThisBusiness&open_now=6673" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "item_light_fixture", + "url": "https://www.yelp.com/search?find_desc=Electricians&find_loc=Westminster%2C+CA&attrs=is_fast_mtb_responder%2COnlineMessageThisBusiness%2Crel_job_works_on_item%3A%3Aitem_light_fixture&open_now=6673" + } + } + ] + }, + { + "index": "Lw6bGDBvAklLaOhY0HRVe", + "task": "Browse glamping locations in California in koa", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "koa.", + "url": "https://koa.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#list-states-us >li:nth-child(5)>a:nth-child(1)", + "netloc": "koa", + "url": "https://koa.com/find-a-koa/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/find-a-koa", + "url": "https://koa.com/find-a-koa/" + } + } + ] + }, + { + "index": "n0dCdAEPP3VW2Y_h545lq", + "task": "Show most expensive cruise deals in Europe and Mediterranean in travelzoo", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "travelzoo.", + "url": "https://www.travelzoo.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "collection/cruises/", + "url": "https://www.travelzoo.com/collection/cruises/europe-mediterranean/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "europe-mediterranean/", + "url": "https://www.travelzoo.com/collection/cruises/europe-mediterranean/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn-group-items >div:nth-child(4)>.text ", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/cruises/europe-mediterranean/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "DONE", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/cruises/europe-mediterranean/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".result-list.list-nostyle >li:nth-child(1)>div:nth-child(1)>a:nth-child(1)>.deal-info.table-layout.clamped >div:nth-child(2)>h3:nth-child(1)>.deal-headline-text.ts-original-inline.truncate ", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/cruises/europe-mediterranean/" + } + } + ] + }, + { + "index": "ODAiMd9KUjC7Jv7eodHqp", + "task": "Add a pack of toilet paper and a bottle of laundry detergent to your Amazon cart with the lowest total price in amazon", + "reference_task_length": 16, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "amazon.", + "url": "https://www.amazon.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "k", + "reference_answer": "Decide whether are searching for a pack of toilet paper", + "url": "https://www.amazon.com/s?k=a+pack+of+toilet+paper&crid=2ZDNFT3712A2Z&sprefix=a+pack+of+toilet+paper%2Caps%2C344&ref=nb_sb_noss_2" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".s-main-slot.s-result-list.s-search-results.sg-row >div:nth-child(12)>.sg-col-inner >div:nth-child(1)>span:nth-child(1)>div:nth-child(1)>div:nth-child(1)>div:nth-child(2)>.a-section.s-title-instructions-style >h2:nth-child(1)>.a-link-normal.s-underline-text.s-underline-link-text.s-link-style.a-text-normal >span:nth-child(1)", + "netloc": "amazon", + "url": "https://www.amazon.com/s?k=a+pack+of+toilet+paper&crid=2ZDNFT3712A2Z&sprefix=a+pack+of+toilet+paper%2Caps%2C344&ref=nb_sb_noss_2" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "ref", + "reference_answer": "nb_sb_noss_2", + "url": "https://www.amazon.com/s?k=a+pack+of+toilet+paper&crid=2ZDNFT3712A2Z&sprefix=a+pack+of+toilet+paper%2Caps%2C344&ref=nb_sb_noss_2" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#add-to-cart-button ", + "netloc": "amazon", + "url": "https://www.amazon.com/Peelitical-Toilet-Paper-Roll-Full-Color/dp/B0B6GRTS3K/ref=sr_1_3?crid=2ZDNFT3712A2Z&keywords=a+pack+of+toilet+paper&qid=1706435470&sprefix=a+pack+of+toilet+paper%2Caps%2C344&sr=8-3" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "k", + "reference_answer": "Decide whether are searching for a bottle of laundry detergent", + "url": "https://www.amazon.com/s?k=a+bottle+of+laundry+detergent&crid=24SHUQD9K9BYI&sprefix=a+bottle+of+laundry+detergent%2Caps%2C353&ref=nb_sb_noss_2" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".s-main-slot.s-result-list.s-search-results.sg-row >div:nth-child(10)>.sg-col-inner >div:nth-child(1)>span:nth-child(1)>div:nth-child(1)>div:nth-child(1)>div:nth-child(2)>.a-section.s-title-instructions-style >h2:nth-child(1)>.a-link-normal.s-underline-text.s-underline-link-text.s-link-style.a-text-normal >span:nth-child(1)", + "netloc": "amazon", + "url": "https://www.amazon.com/s?k=a+bottle+of+laundry+detergent&crid=24SHUQD9K9BYI&sprefix=a+bottle+of+laundry+detergent%2Caps%2C353&ref=nb_sb_noss_2" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "ref", + "reference_answer": "nb_sb_noss_2", + "url": "https://www.amazon.com/s?k=a+bottle+of+laundry+detergent&crid=24SHUQD9K9BYI&sprefix=a+bottle+of+laundry+detergent%2Caps%2C353&ref=nb_sb_noss_2" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#add-to-cart-button ", + "netloc": "amazon", + "url": "https://www.amazon.com/MAZI-Laundry-Detergent-Catcher-Economic/dp/B09CF6KC2Y/ref=sr_1_1?crid=24SHUQD9K9BYI&keywords=a+bottle+of+laundry+detergent&qid=1706435504&sprefix=a+bottle+of+laundry+detergent%2Caps%2C353&sr=8-1" + } + } + ] + }, + { + "index": "gw8QYJVt8VkMCuh8WzW5Z", + "task": "Find the highest-rated mover in Honolulu to shift a vehicle and large appliances and who has virtual discussion options available in yelp", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yelp.", + "url": "https://www.yelp.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "find_desc", + "reference_answer": "Movers", + "url": "https://www.yelp.com/search?find_desc=Movers&find_loc=Honolulu%2C+HI" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_loc", + "reference_answer": "Decide whether the place is Honolulu, HI", + "url": "https://www.yelp.com/search?find_desc=Movers&find_loc=Honolulu%2C+HI" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sortby", + "reference_answer": "rating", + "url": "https://www.yelp.com/search?find_desc=Movers&find_loc=Honolulu%2C+HI&sortby=rating" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "item_vehicle", + "url": "https://www.yelp.com/search?find_desc=Movers&find_loc=Honolulu%2C+HI&sortby=rating&attrs=rel_items_moved%3A%3Aitem_vehicle" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "item_large_appliance", + "url": "https://www.yelp.com/search?find_desc=Movers&find_loc=Honolulu%2C+HI&sortby=rating&attrs=rel_items_moved%3A%3Aitem_vehicle%2Crel_items_moved%3A%3Aitem_large_appliance" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "offers_virtual_consultations", + "url": "https://www.yelp.com/search?find_desc=Movers&find_loc=Honolulu%2C+HI&sortby=rating&attrs=rel_items_moved%3A%3Aitem_vehicle%2Crel_items_moved%3A%3Aitem_large_appliance%2Coffers_virtual_consultations" + } + } + ] + }, + { + "index": "Kchv0XWA7pIDs5VTzQclZ", + "task": "Use the 'find by state' method to locate all KOA campgrounds in California", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "koa.", + "url": "https://koa.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "states-provinces/", + "url": "https://koa.com/states-provinces/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/california", + "url": "https://koa.com/states-provinces/california/" + } + } + ] + }, + { + "index": "J0z0DgyUzeHipnA4Ou7jy", + "task": "Browse comedy TV shows streaming on Netflix that was released in 2010 in tvguide", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "tvguide.", + "url": "https://www.tvguide.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "streaming/netflix/", + "url": "https://www.tvguide.com/streaming/netflix/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "genre", + "reference_answer": "comedy", + "url": "https://www.tvguide.com/streaming/netflix/comedy/show/1/?sort=mostPopular&network=netflix&genre=comedy&type=show&releaseYearMin=2010&page=1" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "type", + "reference_answer": "show", + "url": "https://www.tvguide.com/streaming/netflix/comedy/show/1/?sort=mostPopular&network=netflix&genre=comedy&type=show&releaseYearMin=2010&page=1" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "releaseYearMin", + "reference_answer": "2010", + "url": "https://www.tvguide.com/streaming/netflix/comedy/show/1/?sort=mostPopular&network=netflix&genre=comedy&type=show&releaseYearMin=2010&page=1" + } + } + ] + }, + { + "index": "oGmqhhf0kdja3ldy5e_S4", + "task": "Show me the breakfast options on the Carnival Breeze ship in carnival", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "carnival.", + "url": "https://www.carnival.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cruise-food", + "url": "https://www.carnival.com/cruise-food" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".body-container >div:nth-child(2)>div:nth-child(4)>ul:nth-child(2)>li:nth-child(3)>label:nth-child(2)", + "netloc": "carnival", + "url": "https://www.carnival.com/cruise-food" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".body-container >div:nth-child(2)>div:nth-child(5)>ul:nth-child(2)>li:nth-child(1)>label:nth-child(2)", + "netloc": "carnival", + "url": "https://www.carnival.com/cruise-food" + } + } + ] + }, + { + "index": "kjoMizByJ75Xk-tUtZhfB", + "task": "Follow the Denver Nuggets NBA team in sports.yahoo", + "reference_task_length": 3, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sports.yahoo.", + "url": "https://sports.yahoo.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nba/teams/den", + "url": "https://sports.yahoo.com/nba/teams/den/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".follow-text >span:nth-child(1)", + "netloc": "sports", + "url": "https://sports.yahoo.com/nba/teams/den/" + } + } + ] + }, + { + "index": "LH4LFivu0NgPDMPz_UjiZ", + "task": "Browse the trending searches in Columbus in yellowpages", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yellowpages.", + "url": "https://www.yellowpages.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/columbus-oh", + "url": "https://www.yellowpages.com/columbus-oh" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "trends/", + "url": "https://www.yellowpages.com/columbus-oh/trends/1" + } + } + ] + }, + { + "index": "LIBP8TUupuWApGSTg764g", + "task": "List of publishers for board games in boardgamegeek", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "boardgamegeek.", + "url": "https://boardgamegeek.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "browse/boardgamepublisher", + "url": "https://boardgamegeek.com/browse/boardgamepublisher" + } + } + ] + }, + { + "index": "lKHlXMjOvkxH9VIKLSQmO", + "task": "Find hard side Carry-on Luggage in black color in target", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "target.", + "url": "https://www.target.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "carry-on-luggage/", + "url": "https://www.target.com/c/carry-on-luggage/-/N-4xv2o" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/hardside/", + "url": "https://www.target.com/c/carry-on-luggage/hardside/-/N-4xv2oZ1ffcu" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "N-4xv2oZ1ffcuZgup4zc5zk7s", + "url": "https://www.target.com/c/carry-on-luggage/hardside/-/N-4xv2oZ1ffcuZgup4zc5zk7s?moveTo=product-list-grid" + } + } + ] + }, + { + "index": "SfTO0LCl5qsIkPW1sda5X", + "task": "Visit Homepage of 'The Flash' 2023 in IMDb.", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "imdb.", + "url": "https://www.imdb.com/?ref_=nv_home" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "tt0439572", + "url": "https://www.imdb.com/title/tt0439572/?ref_=fn_al_tt_2" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".ipc-slate.ipc-slate--baseAlt.ipc-slate--dynamic-width.sc-248bafc1-0.cFFKvF.hero-media__slate--inline-video.undefined.ipc-sub-grid-item.ipc-sub-grid-item--span-4 >div:nth-child(4)>.ipc-lockup-overlay__screen ", + "netloc": "imdb", + "url": "https://www.imdb.com/title/tt0439572/?ref_=fn_al_tt_2" + } + } + ] + }, + { + "index": "SWZ9bPJuEfhVn3zj7Aemm", + "task": "Renew a existing KOA rewards account with the rewards number 1000000001 e postal code 10023 in koa", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "koa.", + "url": "https://koa.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "rewards/", + "url": "https://koa.com/rewards/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "purchase/", + "url": "https://koa.com/rewards/purchase/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#vkr-purchase-type-tabs >li:nth-child(2)>label:nth-child(2)", + "netloc": "koa", + "url": "https://koa.com/rewards/purchase/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "1000000001", + "netloc": "koa", + "path": "#Step1_AccountNumber ", + "url": "https://koa.com/rewards/purchase/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "10023", + "netloc": "koa", + "path": "#Step1_PostalCode ", + "url": "https://koa.com/rewards/purchase/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#continueButton ", + "netloc": "koa", + "url": "https://koa.com/rewards/purchase/" + } + } + ] + }, + { + "index": "pj97JhANoatrYxkyqIDJ6", + "task": "Browse events happening at Madison Square Garden in parking.com", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "parking.", + "url": "https://parking.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nyc/venues/manhattan-madison-square-garden/events", + "url": "https://parking.com/nyc/venues/manhattan-madison-square-garden/events" + } + } + ] + }, + { + "index": "pxVHNYBq6JINOZKNXYW8E", + "task": "Go to the page with help in choosing a solar energy product for homes in tesla", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "tesla.", + "url": "https://www.tesla.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "energy/design", + "url": "https://www.tesla.com/energy/design" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/help-me-choose", + "url": "https://www.tesla.com/energy/design/help-me-choose" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/help-me-choose-quiz", + "url": "https://www.tesla.com/energy/design/help-me-choose-quiz" + } + } + ] + }, + { + "index": "GL3m2slTLRE-rN_UC62XC", + "task": "Find the cheapest last minute cruise deal in travelzoo", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "travelzoo.", + "url": "https://www.travelzoo.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "collection/last-minute-cruise-deals/", + "url": "https://www.travelzoo.com/collection/last-minute-cruise-deals/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn-group-items >div:nth-child(3)>.text ", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/last-minute-cruise-deals/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".button-primary.button-done.right ", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/last-minute-cruise-deals/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".result-list.list-nostyle >li:nth-child(1)>div:nth-child(1)>a:nth-child(1)>.deal-info.table-layout.clamped >div:nth-child(2)>h3:nth-child(1)>.deal-headline-text.ts-original-inline.truncate ", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/last-minute-cruise-deals/" + } + } + ] + }, + { + "index": "iJYctP6zOJCM8qwUo22jf", + "task": "Find FC Barcelona's next fixture in the Spanish Copa de Rey in espn", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "espn.", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/_/id/83/", + "url": "https://www.espn.com/soccer/team/_/id/83/barcelona" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/fixtures/", + "url": "https://www.espn.com/soccer/team/fixtures/_/id/83/esp.barcelona" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ESP.COPA_DEL_REY", + "url": "https://www.espn.com/soccer/team/fixtures/_/id/83/esp.barcelona" + } + } + ] + }, + { + "index": "fZxEueSga2SBJ__Fh8Lwn", + "task": "Browse washing machines available for free local pickup within 25 miles of zip code 90026 in ebay", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ebay.", + "url": "https://www.ebay.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "_nkw", + "reference_answer": "Decide whether are searching for washing machine", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=washing+machine&_sacat=0&rt=nc&_stpos=90026&_fspt=1&LH_PrefLoc=99&_sadis=25&LH_LPickup=1" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "_sadis", + "reference_answer": "25", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=washing+machine&_sacat=0&rt=nc&_stpos=90026&_fspt=1&LH_PrefLoc=99&_sadis=25&LH_LPickup=1" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "_stpos", + "reference_answer": "90026", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=washing+machine&_sacat=0&rt=nc&_stpos=90026&_fspt=1&LH_PrefLoc=99&_sadis=25&LH_LPickup=1" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "LH_LPickup", + "reference_answer": "1", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=washing+machine&_sacat=0&rt=nc&_stpos=90026&_fspt=1&LH_PrefLoc=99&_sadis=25&LH_LPickup=1" + } + } + ] + }, + { + "index": "cojd69e-u4AozgQz4Pi8d", + "task": "Show me the coming soon AMC Artisan Films in amctheatres", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "amctheatres.", + "url": "https://www.amctheatres.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/movies", + "url": "https://www.amctheatres.com/movies" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "availability", + "reference_answer": "COMING_SOON", + "url": "https://www.amctheatres.com/movies?availability=COMING_SOON" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".select-styled__select >option:nth-child(2)", + "netloc": "amctheatres", + "url": "https://www.amctheatres.com/movies?availability=COMING_SOON" + } + } + ] + }, + { + "index": "AHWpPOnQ8dV6XNM_Elqgy", + "task": "Find Kevin Durant's bio in espn", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "espn.", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/id/3202/", + "url": "https://www.espn.com/nba/player/_/id/3202/kevin-durant" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "bio/", + "url": "https://www.espn.com/nba/player/bio/_/id/3202/kevin-durant" + } + } + ] + }, + { + "index": "aR_52TLjmboDRqUmDNN6y", + "task": "Find camping tents that can fit 6 people and sort the results by price from low to high in rei", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "rei.", + "url": "https://www.rei.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "c/camping-tents", + "url": "https://www.rei.com/c/camping-tents" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sort", + "reference_answer": "min-price", + "url": "https://www.rei.com/c/camping-tents/f/sc-6-person?ir=category%3Acamping-tents&r=c%3Bf&sort=min-price" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sc-6-person", + "url": "https://www.rei.com/c/camping-tents/f/sc-6-person?ir=category%3Acamping-tents&r=c%3Bf&sort=min-price" + } + } + ] + }, + { + "index": "8Rb4YIIUnrFeu3JmhQHBK", + "task": "Order an adult\u2019s bottomless bubbles package for Joe Bloggs who is booked onto cruise 101010 in carnival", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "carnival.", + "url": "https://www.carnival.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "drink-packages", + "url": "https://www.carnival.com/drink-packages?icid=icp_ccl_beverage_0708_footer" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn.btn-red.general-add-to-cart-btn.btn-add-cart.btn-full.add-to-cart-BB_1.sas-drink-packages ", + "netloc": "carnival", + "url": "https://www.carnival.com/drink-packages?icid=icp_ccl_beverage_0708_footer" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#giftLinkFormCheckBoxLabel ", + "netloc": "carnival", + "url": "https://www.carnival.com/drink-packages?icid=icp_ccl_beverage_0708_footer" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Joe", + "netloc": "carnival", + "path": ".recipient-f-name ", + "url": "https://www.carnival.com/drink-packages?icid=icp_ccl_beverage_0708_footer" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Bloggs", + "netloc": "carnival", + "path": ".recipient-l-name ", + "url": "https://www.carnival.com/drink-packages?icid=icp_ccl_beverage_0708_footer" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "101010", + "netloc": "carnival", + "path": ".cruise-info >input:nth-child(2)", + "url": "https://www.carnival.com/drink-packages?icid=icp_ccl_beverage_0708_footer" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#saveButtonOne ", + "netloc": "carnival", + "url": "https://www.carnival.com/drink-packages?icid=icp_ccl_beverage_0708_footer" + } + } + ] + }, + { + "index": "1OJMUzt3nTF3I05jPqIqY", + "task": "Find Airport information of Camarillo Airport, CA and check weather in flightaware", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "flightaware.", + "url": "https://www.flightaware.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "live/airport/KCMA", + "url": "https://www.flightaware.com/live/airport/KCMA#airport-parity-stats-container" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "resources/airport/KCMA/weather", + "url": "https://www.flightaware.com/resources/airport/KCMA/weather" + } + } + ] + }, + { + "index": "U2JVmc0DvWcAvzI4YLwyn", + "task": "Give a like to the #1 track of the Real Time Top Chart in last.fm", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "last.fm", + "url": "https://www.last.fm/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "charts", + "url": "https://www.last.fm/charts" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".charts >div:nth-child(1)>div:nth-child(1)>.globalchart >tbody:nth-child(2)>tr:nth-child(1)>.globalchart-name >.link-block-target ", + "netloc": "last", + "url": "https://www.last.fm/charts" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#mantle_skin >header:nth-child(2)>div:nth-child(1)>div:nth-child(2)>div:nth-child(5)>div:nth-child(2)>div:nth-child(2)>a:nth-child(1)", + "netloc": "last", + "url": "https://www.last.fm/music/Jung+Kook/_/Standing+Next+To+You" + } + } + ] + }, + { + "index": "V7Pu-mgfxfIaXkTcCYUNW", + "task": "Find the newest silver-colored Lexus SUV with a black interior and backup camera to purchase which has been made between 2022-2023 and is within 50 miles of zip 10017 in cargurus", + "reference_task_length": 25, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cargurus.", + "url": "https://www.cargurus.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "zip", + "reference_answer": "10017", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "bodyTypeGroup", + "reference_answer": "bg7", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".Gv7Z4V >div:nth-child(1)>.HObdBl.vT3i0_ >.mvXVZO.Xb7sZe ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".Gv7Z4V >div:nth-child(1)>.HObdBl.vT3i0_ >.mvXVZO.Xb7sZe >option:nth-child(3)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".Gv7Z4V >div:nth-child(3)>.HObdBl.vT3i0_ >.mvXVZO.Xb7sZe ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".Gv7Z4V >div:nth-child(1)>.HObdBl.vT3i0_ >.mvXVZO.Xb7sZe >option:nth-child(4)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#select-filter-search-distance ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#select-filter-search-distance >option:nth-child(3)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#radix-3 >.NoCcgT >.Lgttuc >span:nth-child(1)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".FXiauQ >ul:nth-child(1)>li:nth-child(11)>.fimP4_ >.y0gbTE.zsNQOE >.MaigNX >.pV9_6g ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#radix-5 >.NoCcgT >.Lgttuc >span:nth-child(1)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#radix-4 >.UWCYC >.FXiauQ >ul:nth-child(1)>li:nth-child(1)>.fimP4_ >.y0gbTE.zsNQOE >.MaigNX >.pV9_6g ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#radix-11 >.NoCcgT >.Lgttuc >span:nth-child(1)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".FXiauQ >ul:nth-child(1)>li:nth-child(23)>.fimP4_ >.y0gbTE.zsNQOE ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#radix-19 >.NoCcgT >.Lgttuc >span:nth-child(1)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".FXiauQ >ul:nth-child(2)>ul:nth-child(1)>li:nth-child(1)>.fimP4_ >.y0gbTE.zsNQOE ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#Listing-Sort ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#Listing-Sort >option:nth-child(9)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".IWo5PZ.orzDm5 >div:nth-child(4)>div:nth-child(1)>.MOfIEd.HgPBTU.prRsnF >.Z0_BC0.ZGMXbN.kKD2eQ >.biZGS4.LtXpwU >.k4FSCT >.bLgDNy >.gN7yGT ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + } + ] + }, + { + "index": "TsRGdiY3Kx2ngXHFF62dJ", + "task": "Find the score of the 2020 Super Bowl in nfl.com", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nfl.", + "url": "https://www.nfl.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "scores/", + "url": "https://www.nfl.com/scores/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/2020", + "url": "https://www.nfl.com/scores/2020/POST3" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/POST4", + "url": "https://www.nfl.com/scores/2020/POST4" + } + } + ] + }, + { + "index": "TjgqqYbTC_NCYsw4_dMq2", + "task": "Check my trip for Smith and booking number X89998 in aa", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "aa.", + "url": "https://www.aa.com/homePage.do" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Smith", + "netloc": "aa", + "path": ".zeta >input:nth-child(3)", + "url": "https://www.aa.com/homePage.do" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "X89998", + "netloc": "aa", + "path": ".zeta >input:nth-child(2)", + "url": "https://www.aa.com/homePage.do" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/find-your-trip", + "url": "https://www.aa.com/reservation/view/find-your-trip?fromHomePage=true&from=manageTripsHomeWidget" + } + } + ] + }, + { + "index": "y8BdI5xlwy_VBluYvyaLr", + "task": "Show me the stats of the Athletic Club from spanish Laliga in espn", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "espn.", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/soccer", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/team", + "url": "https://www.espn.com/soccer/teams" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/spanish-laliga", + "url": "https://www.espn.com/soccer/teams" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "id/93", + "url": "https://www.espn.com/soccer/team/stats/_/id/93/athletic-club" + } + } + ] + }, + { + "index": "r6Mkfvlm-mFnqBivBdLMS", + "task": "Search for video game consoles and filter the results to show only those that have Wi-Fi Capability in ebay", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ebay.", + "url": "https://www.ebay.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/Video-Game-Consoles", + "url": "https://www.ebay.com/b/Video-Game-Consoles/139971/bn_320033" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "Features", + "reference_answer": "Wi-Fi Capability", + "url": "https://www.ebay.com/b/Video-Game-Consoles/139971?mag=1&_fsrp=0&rt=nc&_sacat=139971&Features=Wi%252DFi%2520Capability" + } + } + ] + }, + { + "index": "rfNemY0eiTaMuUVmMtE1f", + "task": "Find a Ricky Kej track to listen and share which has been added in the last year and is between 2 to 10 minutes in soundcloud", + "reference_task_length": 12, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "soundcloud.", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "q", + "reference_answer": "Decide whether are searching for Ricky Kej", + "url": "https://soundcloud.com/search?q=Ricky%20Kej" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "filter.created_at", + "reference_answer": "last_year", + "url": "https://soundcloud.com/search/sounds?q=Ricky%20Kej&filter.created_at=last_year" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "filter.duration", + "reference_answer": "medium", + "url": "https://soundcloud.com/search/sounds?q=Ricky%20Kej&filter.created_at=last_year&filter.duration=medium" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "filter.license", + "reference_answer": "to_share", + "url": "https://soundcloud.com/search/sounds?q=Ricky%20Kej&filter.duration=medium&filter.created_at=last_year&filter.license=to_share" + } + } + ] + }, + { + "index": "hjsXdgoF6M-Mw3AHQBT4G", + "task": "Browse and purchase the classic bundle for the simulation game Cities Skylines in store.steampowered", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "store.steampowered.", + "url": "https://store.steampowered.com/app/2167580/Summoners_War_Chronicles/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/Cities_Skylines__The_Classics_Bundle", + "url": "https://store.steampowered.com/bundle/28813/Cities_Skylines__The_Classics_Bundle/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "In Cart", + "netloc": "store", + "url": "https://store.steampowered.com/bundle/28813/Cities_Skylines__The_Classics_Bundle/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "checkout.steampowered.", + "url": "https://checkout.steampowered.com/login/?purchasetype=self&checkout=1&redir=checkout%2F%3Fpurchasetype%3Dself%26cart%3D4893791698082289964%26snr%3D1_8_4__503&redir_ssl=1&snr=1_8_4__503" + } + } + ] + }, + { + "index": "NaDvF_0Wje2V79UrI58Lg", + "task": "Find tickets for events in Boston in ticketcenter", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ticketcenter.", + "url": "https://www.ticketcenter.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "massachusetts/boston-tickets", + "url": "https://www.ticketcenter.com/massachusetts/boston-tickets" + } + } + ] + }, + { + "index": "H16cZ1rGpDK_X3GBL1Csh", + "task": "Get route information for flights from Washington to New York airports that have already arrived in flightaware", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "flightaware.", + "url": "https://www.flightaware.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#Status >.fa_list.hide.text_align_left >li:nth-child(2)>.fa_list_row_right.text_align_right >a:nth-child(1)", + "netloc": "flightaware", + "url": "https://www.flightaware.com/live/findflight?origin=Washington&destination=New+York" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "live/findflight", + "url": "https://www.flightaware.com/live/findflight?origin=Washington&destination=New+York" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "origin", + "reference_answer": "Decide whether is searching for Washington, D.C. or ZDC", + "url": "https://www.flightaware.com/live/findflight?origin=Washington&destination=New+York" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "destination", + "reference_answer": "Decide whether is searching for New York or ZNY", + "url": "https://www.flightaware.com/live/findflight?origin=Washington&destination=New+York" + } + } + ] + }, + { + "index": "tSCh_bVn5QIh_MWCrH1hW", + "task": "Filter women's sports bras that are purple that are S size in underarmour", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "underarmour.", + "url": "https://www.underarmour.com/en-us/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "womens/clothing/sports-bras", + "url": "https://www.underarmour.com/en-us/c/womens/clothing/sports-bras/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "viewPreference", + "reference_answer": "S", + "url": "https://www.underarmour.com/en-us/c/womens/clothing/sports-bras/?viewPreference=S" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/purple", + "url": "https://www.underarmour.com/en-us/c/womens/clothing/sports-bras/purple/?viewPreference=S&prefn1=size&prefv1=S" + } + } + ] + }, + { + "index": "3Iioy0c1kUexRvCK8pS-Q", + "task": "Find parking prices for upcoming Los Angeles Lakers games in seatgeek", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "seatgeek.", + "url": "https://seatgeek.com/los-angeles-lakers-tickets/parking" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/los-angeles-lakers-tickets", + "url": "https://seatgeek.com/los-angeles-lakers-tickets/parking" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/parking", + "url": "https://seatgeek.com/los-angeles-lakers-tickets/parking" + } + } + ] + }, + { + "index": "6k0v1LbqRssEvjiwwvWDn", + "task": "Repost on my feed a rock playlist in soundcloud", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "soundcloud.", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "q", + "reference_answer": "Decide whether is searching for rock", + "url": "https://soundcloud.com/search?q=rock" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/sets", + "url": "https://soundcloud.com/search/sets?q=rock" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".lazyLoadingList__list.sc-list-nostyle.sc-clearfix >li:nth-child(2)>.searchItem >.sound.searchItem__trackItem.playlist.streamContext >.sound__body >.sound__content >.sound__footer.g-all-transitions-300 >div:nth-child(2)>.soundActions.sc-button-toolbar.soundActions__small >.sc-button-group.sc-button-group-small >.sc-button-repost.sc-button-secondary.sc-button.sc-button-small.sc-button-responsive ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/search/sets?q=rock" + } + } + ] + }, + { + "index": "0ZydjXfdu-f7wjwsL02Yj", + "task": "Show most played games based on daily players in store.steampowered", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "store.steampowered.", + "url": "https://store.steampowered.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "charts/mostplayed", + "url": "https://store.steampowered.com/charts" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".dropdown_DialogDropDownMenu_1tiuY._DialogInputContainer >div:nth-child(2)", + "netloc": "store", + "url": "https://store.steampowered.com/charts" + } + } + ] + }, + { + "index": "1qkDSDVzX9LZhx6VQL_Yd", + "task": "Browse strollers and apply a filter for the color black within $100 to $250 in kohls", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/search.jsp", + "url": "https://www.kohls.com/search.jsp?submit-search=web-regular&search=stroller&kls_sbp=68405109696688798524419626354526575444" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search", + "reference_answer": "Decide whether is searching for stroller", + "url": "https://www.kohls.com/search.jsp?submit-search=web-regular&search=stroller&kls_sbp=68405109696688798524419626354526575444" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Color:Black", + "url": "https://www.kohls.com/search/black.jsp?CN=Color:Black&search=stroller&S=1&PPP=48&kls_sbp=68405109696688798524419626354526575444&pfm=search%20refine" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Price:$100 to $250", + "url": "https://www.kohls.com/search/black.jsp?CN=Color:Black+Price:%24100%20to%20%24250&BL=y&search=stroller&S=1&PPP=48&pfm=search%20refine&kls_sbp=68405109696688798524419626354526575444" + } + } + ] + }, + { + "index": "1Te22cad3xH_VsFvczajN", + "task": "Find parking in California city for Limos which also offers free wi-fi in yelp", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yelp.", + "url": "https://www.yelp.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_loc", + "reference_answer": "Decide whether is searching for California city", + "url": "https://www.yelp.com/search?find_desc=Parking&find_loc=California+City%2C+CA" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "find_desc", + "reference_answer": "Parking", + "url": "https://www.yelp.com/search?find_desc=Parking&find_loc=California+City%2C+CA" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "WiFi.free", + "url": "https://www.yelp.com/search?find_desc=Parking&find_loc=California+City%2C+CA&attrs=WiFi.free" + } + } + ] + }, + { + "index": "4_WHLeB50DU8DvgWLUwB-", + "task": "Find offers for VIFP Club loyalty number 9016467685 and show the ones that are for members only in carnival", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "carnival.", + "url": "https://www.carnival.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/vifp", + "url": "https://www.carnival.com/vifp?icid=CC_vifp-club_1866" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/cruise-deals", + "url": "https://www.carnival.com/cruise-deals?icid=icp_vifp_11252020_lp_bttmbanner" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "icid", + "reference_answer": "icp_vifp_11252020_lp_bttmbanner", + "url": "https://www.carnival.com/cruise-deals?icid=icp_vifp_11252020_lp_bttmbanner" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".cd-deal-list__category-title ", + "netloc": "carnival", + "url": "https://www.carnival.com/cruise-deals?icid=icp_vifp_11252020_lp_bttmbanner" + } + } + ] + }, + { + "index": "h0lV1TEIVUn7vjdTai3Xn", + "task": "Show cars with AWD with maximum budget of $50,000 in kbb", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kbb.", + "url": "https://www.kbb.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/cars-for-sale", + "url": "https://www.kbb.com/cars-for-sale/all" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/awd4wd", + "url": "https://www.kbb.com/cars-for-sale/all/awd4wd/san-jose-ca?isNewSearch=true&zip=95101" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/cars-between-0-and-50000", + "url": "https://www.kbb.com/cars-for-sale/all/cars-between-0-and-50000/san-jose-ca?isNewSearch=true&zip=95101" + } + } + ] + }, + { + "index": "rPr8bIONPSXD0LlKoN4gF", + "task": "Add two $50 playstation store gift cards to cart in gamestop", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "gamestop.", + "url": "https://www.gamestop.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/playstation-store-gift-card-50", + "url": "https://www.gamestop.com/gift-cards/gaming-gift-cards/products/playstation-store-gift-card-50/154535.html" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#primary-details-row >div:nth-child(18)>.add-to-cart-buttons.tulsa-atcbutton-toggle >.atc-btns-wrapper >div:nth-child(1)>.js-add-to-cart.add-to-cart.btn.btn-primary.add-to-cart-redesign.all ", + "netloc": "gamestop", + "url": "https://www.gamestop.com/gift-cards/gaming-gift-cards/products/playstation-store-gift-card-50/154535.html" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".icon.icon-close ", + "netloc": "gamestop", + "url": "https://www.gamestop.com/gift-cards/gaming-gift-cards/products/playstation-store-gift-card-50/154535.html" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".atc-btn-wrapper.veiled >.js-add-to-cart.add-to-cart.btn.btn-primary.add-to-cart-redesign.all ", + "netloc": "gamestop", + "url": "https://www.gamestop.com/gift-cards/gaming-gift-cards/products/playstation-store-gift-card-50/154535.html" + } + } + ] + }, + { + "index": "VUe0jDQCHGNaaDIAXDoRs", + "task": "Check the rating and user reviews for the game \"Deathloop\" in ign", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ign.", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/deathloop", + "url": "https://www.ign.com/games/deathloop" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/user-reviews", + "url": "https://www.ign.com/games/deathloop/user-reviews" + } + } + ] + }, + { + "index": "ZpkB1yoN5N36h-JPtwEtv", + "task": "Browse new laptops from $400 to $500 that offers free shippping in ebay", + "reference_task_length": 14, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ebay.", + "url": "https://www.ebay.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "_nkw", + "reference_answer": "Decide whether are searching for laptops ", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_trksid=p4432023.m570.l1313&_nkw=laptops&_sacat=0" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "LH_ItemCondition", + "reference_answer": "1000", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=laptops&_sacat=0&rt=nc&LH_ItemCondition=1000" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "LH_FS", + "reference_answer": "1", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=laptops&_sacat=0&LH_ItemCondition=1000&rt=nc&LH_FS=1" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "_udlo", + "reference_answer": "400", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=laptops&_sacat=0&LH_ItemCondition=1000&LH_FS=1&rt=nc&_udlo=400&_udhi=500" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "_udhi", + "reference_answer": "500", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=laptops&_sacat=0&LH_ItemCondition=1000&LH_FS=1&rt=nc&_udlo=400&_udhi=500" + } + } + ] + }, + { + "index": "NF6hAjWkHaqzdiab5ygr3", + "task": "What is the cheapest luxury car to pickup on the second closest nearby location to New York, United States, 100 in budget", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "budget.", + "url": "https://www.budget.com/en/home" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "vehicles/us/h", + "url": "https://www.budget.com/en/cars/vehicles/us/h" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether are searching for New York,United States,100", + "netloc": "budget", + "url": "https://www.budget.com/en/cars/vehicles/us/h" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".LocContainer.col-sm-12.col-xs-12.clearfix >li:nth-child(2)>.LocContent >.btn.btn-red ", + "netloc": "budget", + "url": "https://www.budget.com/en/cars/vehicles/us/h" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": " Price (Low to High)", + "netloc": "budget", + "url": "https://www.budget.com/en/reservation#/vehicles" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".step2dtl >div:nth-child(2)>div:nth-child(2)>section:nth-child(1)>div:nth-child(1)>div:nth-child(1)>.col-sm-7.avlcarimage >.col-lg-7.col-sm-6.col-xs-12.avlcarimagetab >div:nth-child(2)", + "netloc": "budget", + "url": "https://www.budget.com/en/reservation#/vehicles" + } + } + ] + }, + { + "index": "jeW5vticR4VE6PZbI041y", + "task": "Find the Trains in Spain guide that will help you learn about Renfe fare types in thetrainline", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "thetrainline.", + "url": "https://www.thetrainline.com/en-us" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "trains/spain", + "url": "https://www.thetrainline.com/en-us/trains/spain" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Renfe fare types", + "netloc": "thetrainline", + "url": "https://www.thetrainline.com/en-us/trains/spain" + } + } + ] + }, + { + "index": "PGXZYqmGOlNWG_EsOa7nh", + "task": "Find ideas and recommendation for things to do in Cancun in viator", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "viator.", + "url": "https://www.viator.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/d631", + "url": "https://www.viator.com/Cancun/d631" + } + } + ] + }, + { + "index": "t0a1mtyC2dGrK4l2Tg6OT", + "task": "Browse the page to find classic rock concert tickets in seatgeek", + "reference_task_length": 3, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "seatgeek.", + "url": "https://seatgeek.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "concert-tickets/classic-rock", + "url": "https://seatgeek.com/concert-tickets/classic-rock" + } + } + ] + }, + { + "index": "B5dxfUM1haoCwTQh6VKgY", + "task": "Open the page to answer a question in the home improvement section in yellowpages", + "reference_task_length": 3, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yellowpages.", + "url": "https://www.yellowpages.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "q/home-garden/home-improvement-remodeling", + "url": "https://www.yellowpages.com/q/home-garden/home-improvement-remodeling" + } + } + ] + } +] \ No newline at end of file diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-test_104tasks_20240528.json b/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-test_104tasks_20240528.json new file mode 100644 index 00000000..201beae7 --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-test_104tasks_20240528.json @@ -0,0 +1,4506 @@ +[ + { + "index": "_cuWObkg3Zm5lhrV5BVro", + "task": "Find the store location and hours of the closest Gamestop to zip code 90028 and set as home store on gamestop", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "gamestop.", + "url": "https://www.gamestop.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "store", + "reference_answer": "2630", + "url": "https://www.gamestop.com/search/?store=2630" + } + } + ], + "time": "2024-05-23 06:33:52.510" + }, + { + "index": "_pzJRzGpq38uVD09zuz2g", + "task": "Compare available plans for the AeroAPI on flightaware", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "flightaware.", + "url": "https://www.flightaware.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/aeroap", + "url": "https://www.flightaware.com/commercial/aeroapi/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "#compare-plans-section", + "url": "https://www.flightaware.com/commercial/aeroapi/#compare-plans-section" + } + } + ], + "time": "2024-05-26 06:31:01.887" + }, + { + "index": "0_XlVnFegKhb539-hHKIA", + "task": "Give a 10 rating to The Terminator II: Judgement Day on imdb", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "imdb.", + "url": "https://www.imdb.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/title/tt0103064/", + "url": "https://www.imdb.com/title/tt0103064/?ref_=nv_sr_srsg_0_tt_8_nm_0_q_terminato" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".sc-3a4309f8-0.bjXIAP.sc-b7c53eda-1.iIQkEw >.sc-3a4309f8-1.dggvUg >div:nth-child(2)>button:nth-child(2)>.ipc-btn__text >.sc-acdbf0f3-3.eWQwwe ", + "netloc": "imdb", + "url": "https://www.imdb.com/title/tt0103064/?ref_=nv_sr_srsg_0_tt_8_nm_0_q_terminato" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".ipc-starbar__touch ", + "netloc": "imdb", + "url": "https://www.imdb.com/title/tt0103064/?ref_=nv_sr_srsg_0_tt_8_nm_0_q_terminato" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".ipc-promptable-base__content >div:nth-child(2)>div:nth-child(3)>button:nth-child(2)", + "netloc": "imdb", + "url": "https://www.imdb.com/title/tt0103064/?ref_=nv_sr_srsg_0_tt_8_nm_0_q_terminato" + } + } + ], + "time": "2024-05-21 05:17:38.413" + }, + { + "index": "0Bes-kjPvso3mgjPsaQIJ", + "task": "Check the status of train S92 for any disruptions on new.mta.info.", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "new.mta.info", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/alerts", + "url": "https://new.mta.info/alerts" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "selectedRoutes", + "reference_answer": "MTA:S92", + "url": "https://new.mta.info/alerts?selectedRoutes=MTA%3AS92&selectedDate=3%2F20%2F2024" + } + } + ], + "time": "2024-03-12 08:41:48.689" + }, + { + "index": "2OEKICou9EWm4Bs6N9Ei3", + "task": "Check the status of your iPhone repair on apple.", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "apple.", + "url": "https://www.apple.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "repair", + "url": "https://support.apple.com/repair?cid=gn-ols-repair-lp-get_help" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/my-support", + "url": "https://support.apple.com/my-support" + } + } + ], + "time": "2024-03-25 03:06:23.379" + }, + { + "index": "2rjwRPMba7SmRlyZRNa2m", + "task": "Open the page with a overview about the submission of releases on discogs", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "discogs.", + "url": "https://www.discogs.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "Overview-Of-Submission-Guidelines-For-Releases", + "url": "https://support.discogs.com/hc/en-us/articles/360004016474-Overview-Of-Submission-Guidelines-For-Releases" + } + } + ], + "time": "2024-05-20 05:56:13.543" + }, + { + "index": "3gpqgwl1AQZz5p2FTjHd-", + "task": "View the latest job openings in safety with a salary above 100k per annum, check the details, and apply on mbta.", + "reference_task_length": 16, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "mbta.", + "url": "https://www.mbta.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "governmentjobs.", + "url": "https://www.governmentjobs.com/careers/mbta" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/careers/mbta", + "url": "https://www.governmentjobs.com/careers/mbta" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "department[0]", + "reference_answer": "MBTA - Safety", + "url": "https://www.governmentjobs.com/careers/mbta?department[0]=MBTA%20-%20Safety&salary=100000&sort=PostingDate%7CDescending" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "salary", + "reference_answer": "100000", + "url": "https://www.governmentjobs.com/careers/mbta?department[0]=MBTA%20-%20Safety&salary=100000&sort=PostingDate%7CDescending" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sort", + "reference_answer": "PostingDate|Descending", + "url": "https://www.governmentjobs.com/careers/mbta?department[0]=MBTA%20-%20Safety&salary=100000&sort=PostingDate%7CDescending" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".unstyled >li:nth-child(1)>h3:nth-child(1)>.item-details-link ", + "netloc": "governmentjobs", + "url": "https://www.governmentjobs.com/careers/mbta?department[0]=MBTA%20-%20Safety&salary=100000&sort=PostingDate%7CDescending" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn.btn-large.btn-primary.btn-app-primary.flyout-switch-button ", + "netloc": "governmentjobs", + "url": "https://www.governmentjobs.com/careers/mbta" + } + } + ], + "time": "2024-03-25 02:51:40.381" + }, + { + "index": "3RLzqsxoItEoOZlm3dwjy", + "task": "Add a e-gift card to bag of $100 for recipient John and email address abc@test.com from buckeye.foobar@gmail.com with message gift card on underarmour", + "reference_task_length": 15, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "underarmour.", + "url": "https://www.underarmour.com/en-us/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/ua_egift_card/", + "url": "https://www.underarmour.com/en-us/gift-card-landing/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Johm", + "netloc": "underarmour", + "path": "#name ", + "url": "https://www.underarmour.com/en-us/p/gift_cards/ua_egift_card/GC-0001-ALL.html?dwvar_GC-0001-ALL_color=0001" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "abc@test.com", + "netloc": "underarmour", + "path": "#email ", + "url": "https://www.underarmour.com/en-us/p/gift_cards/ua_egift_card/GC-0001-ALL.html?dwvar_GC-0001-ALL_color=0001" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "buckeye.foobar@gmail.com", + "netloc": "underarmour", + "path": "#from ", + "url": "https://www.underarmour.com/en-us/p/gift_cards/ua_egift_card/GC-0001-ALL.html?dwvar_GC-0001-ALL_color=0001" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "100", + "netloc": "underarmour", + "path": "#amount ", + "url": "https://www.underarmour.com/en-us/p/gift_cards/ua_egift_card/GC-0001-ALL.html?dwvar_GC-0001-ALL_color=0001" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether the content means gift card", + "netloc": "underarmour", + "path": "#message ", + "url": "https://www.underarmour.com/en-us/p/gift_cards/ua_egift_card/GC-0001-ALL.html?dwvar_GC-0001-ALL_color=0001" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".Button_btn__hhPc5.Button_btn__tertiary__RRG5P ", + "netloc": "underarmour", + "url": "https://www.underarmour.com/en-us/gift-card-landing/" + } + } + ], + "time": "2024-05-20 05:25:05.480" + }, + { + "index": "45plKpA3GwhP5bEqIL9Bx", + "task": "Find a person by address Nice st - 1234, Good, FL on yellowpages", + "reference_task_length": 12, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yellowpages.", + "url": "https://www.yellowpages.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/address", + "url": "https://people.yellowpages.com/whitepages/address" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "street", + "reference_answer": "Nice st - 1234", + "url": "https://people.yellowpages.com/whitepages/address?street=Nice+st+-+1234&city=Good&state=FL" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "city", + "reference_answer": "Good", + "url": "https://people.yellowpages.com/whitepages/address?street=Nice+st+-+1234&city=Good&state=FL" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "state", + "reference_answer": "FL", + "url": "https://people.yellowpages.com/whitepages/address?street=Nice+st+-+1234&city=Good&state=FL" + } + } + ], + "time": "2024-05-26 05:22:56.687" + }, + { + "index": "5Jfpi0joLf_6AVRX8mN9R", + "task": "Check my AMC gift card balance with gift card number 87654321 and pin number 9753 on amctheatres.", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "amctheatres.", + "url": "https://www.amctheatres.com/showtimes/all/2024-03-25/default/all?utm_source=google&utm_medium=paidsearch&utm_campaign=OnlineTicketing2023&kclid=bc667845-5652-4d22-b01f-539c6d69db0e&gad_source=1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/gift-cards", + "url": "https://www.amctheatres.com/gift-cards/check-balance" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/check-balance", + "url": "https://www.amctheatres.com/gift-cards/check-balance" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "87654321", + "netloc": "amctheatres", + "path": ".form-full-width-fields >div:nth-child(1)>.gift-card-lookup__input-field ", + "url": "https://www.amctheatres.com/gift-cards/check-balance" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "9753", + "netloc": "amctheatres", + "path": ".form-full-width-fields >div:nth-child(2)>.gift-card-lookup__input-field ", + "url": "https://www.amctheatres.com/gift-cards/check-balance" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".gift-card-lookup__actions >div:nth-child(1)>.gift-card-lookup__item ", + "netloc": "amctheatres", + "url": "https://www.amctheatres.com/showtimes/all/2024-03-25/default/all?utm_source=google&utm_medium=paidsearch&utm_campaign=OnlineTicketing2023&kclid=bc667845-5652-4d22-b01f-539c6d69db0e&gad_source=1" + } + } + ], + "time": "2024-03-25 06:53:38.530" + }, + { + "index": "5okIgoVRM4WfLCb0vND2y", + "task": "Get the lowest priced women's plus size one piece swimsuit in color black with customer rating of atleat 5 on kohls", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Gender:Womens", + "url": "https://www.kohls.com/catalog/womens-clothing.jsp?CN=Gender:Womens+Department:Clothing&cc=wms-TN1.0-S-women&kls_sbp=87660515555844928264416369496642981542" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Swimsuits", + "url": "https://www.kohls.com/catalog/womens-swimsuits-clothing.jsp?CN=Gender:Womens+Category:Swimsuits+Department:Clothing&BST=6749829:6749828&icid=sl-nav-wmn-clothing-womensclothing-swimwear&kls_sbp=87660515555844928264416369496642981542" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "One-Piece", + "url": "https://www.kohls.com/catalog/womens-onepiece-swimsuits-swimsuits-clothing.jsp?CN=Gender:Womens+Product:One-Piece%20Swimsuits+Category:Swimsuits+Department:Clothing&icid=sl-nav-wmn-clothing-swimwear-onepiece&kls_sbp=87660515555844928264416369496642981542" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "SizeRange:Plus", + "url": "https://www.kohls.com/catalog/womens-plus-onepiece-swimsuits-swimsuits-clothing.jsp?CN=Gender:Womens+SizeRange:Plus+Product:One-Piece%20Swimsuits+Category:Swimsuits+Department:Clothing&BST=4582406&icid=womensonepieceswim-VN-plus&pfm=internal%20campaign%20p13n_control%20Visual%20Nav&kls_sbp=87660515555844928264416369496642981542" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Color:Black", + "url": "https://www.kohls.com/catalog/womens-black-plus-onepiece-swimsuits-swimsuits-clothing.jsp?CN=Gender:Womens+Color:Black+SizeRange:Plus+Product:One-Piece%20Swimsuits+Category:Swimsuits+Department:Clothing&S=1&PPP=48&pfm=internal%20campaign%20visual%20nav%20refine&kls_sbp=87660515555844928264416369496642981542" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "S", + "reference_answer": "4", + "url": "https://www.kohls.com/catalog/womens-black-plus-onepiece-swimsuits-swimsuits-clothing.jsp?CN=Gender:Womens+Color:Black+SizeRange:Plus+Product:One-Piece%20Swimsuits+Category:Swimsuits+Department:Clothing&pfm=internal%20campaign%20visual%20nav%20refine&kls_sbp=87660515555844928264416369496642981542&PPP=48&S=4&sks=true" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "TopRated:5", + "url": "https://www.kohls.com/catalog/5-womens-black-plus-onepiece-swimsuits-swimsuits-clothing.jsp?CN=TopRated:5+Gender:Womens+Color:Black+SizeRange:Plus+Product:One-Piece%20Swimsuits+Category:Swimsuits+Department:Clothing&S=4&PPP=48&pfm=internal%20campaign%20visual%20nav%20refine&kls_sbp=87660515555844928264416369496642981542" + } + } + ], + "time": "2024-05-24 09:57:01.066" + }, + { + "index": "7Y6RH7SlTXZZmPaQ4ydVj", + "task": "Find discussions of community and open one with most replies on flightaware.", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "flightaware.", + "url": "https://www.flightaware.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "discussions.", + "url": "https://discussions.flightaware.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/top", + "url": "https://discussions.flightaware.com/top" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".topic-list-body >tr:nth-child(1)>.main-link.clearfix.topic-list-data >.link-top-line >.title.raw-link.raw-topic-link ", + "netloc": "discussions", + "url": "https://discussions.flightaware.com/top" + } + } + ], + "time": "2024-03-23 07:41:02.232" + }, + { + "index": "8S0fsID6FFG6MB4t93eOH", + "task": "Find a 2022 Tesla Model 3 on carmax", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "carmax.", + "url": "https://www.carmax.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/model-3", + "url": "https://www.carmax.com/cars/tesla/model-3?year=2022-2023" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "year", + "reference_answer": "2022", + "url": "https://www.carmax.com/cars/tesla/model-3?year=2022" + } + } + ], + "time": "2024-05-24 07:30:22.531" + }, + { + "index": "A-M0rQCsPu_r9xWt-eqYv", + "task": "Get the report from the final environmental impact statement for the Jamaica Bus Depot expansion on new.mta.info", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "new.mta.info", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/project/jamaica-bus-depot-expansion", + "url": "https://new.mta.info/project/jamaica-bus-depot-expansion" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".mta-details.mta-mb-500 >div:nth-child(2)>details:nth-child(3)>summary:nth-child(1)>.mta-flex.mta-items-center >.mta-flex-grow ", + "netloc": "new", + "url": "https://new.mta.info/project/jamaica-bus-depot-expansion" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".mta-details.mta-mb-500 >div:nth-child(2)>details:nth-child(3)>div:nth-child(2)>.field.field--name-field-accordion-text.field--type-text-long.field--label-hidden.field--item >ul:nth-child(2)>li:nth-child(2)>a:nth-child(1)", + "netloc": "new", + "url": "https://new.mta.info/project/jamaica-bus-depot-expansion" + } + } + ], + "time": "2024-05-28 06:54:52.796" + }, + { + "index": "aEu9zp3GWZb9SVw0PN_2c", + "task": "Search for the lowest millage used Honda Crosstour 2012 to 2013 near 49102 less than $25000 on cargurus.", + "reference_task_length": 17, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cargurus.", + "url": "https://www.cargurus.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "entitySelectingHelper.selectedEntity", + "reference_answer": "d2184", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "zip", + "reference_answer": "49102", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "$25,000", + "netloc": "cargurus", + "path": "#price-range-input-max ", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#year-filter-min ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#year-filter-min ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#year-filter-max ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#year-filter-min >option:nth-child(2)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#Listing-Sort ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#Listing-Sort >option:nth-child(6)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" + } + } + ], + "time": "2024-03-24 12:49:24.093" + }, + { + "index": "AjnXnl3STIls1uc49TDze", + "task": "Browse men's winter coats in size large that is on clearance on kohls.", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search", + "reference_answer": "Decide whether are searching for mens winter coat", + "url": "https://www.kohls.com/search/clearance.jsp?CN=Promotions:Clearance+Size:L&BL=y&search=mens%20winter%20coat&S=1&PPP=48&pfm=search%20refine&kls_sbp=59288101756704364110307077102020115511" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Promotions:Clearance", + "url": "https://www.kohls.com/search/clearance.jsp?CN=Promotions:Clearance+Size:L&BL=y&search=mens%20winter%20coat&S=1&PPP=48&pfm=search%20refine&kls_sbp=59288101756704364110307077102020115511" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Size:L", + "url": "https://www.kohls.com/search/clearance.jsp?CN=Promotions:Clearance+Size:L&BL=y&search=mens%20winter%20coat&S=1&PPP=48&pfm=search%20refine&kls_sbp=59288101756704364110307077102020115511" + } + } + ], + "time": "2024-03-27 10:48:21.173" + }, + { + "index": "AomPMCHZ6Yf7mH_TxyI7e", + "task": "Find a walkthrough for the game \"The Legend of Zelda: Breath of the Wild\" on ign.", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ign.", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/games/the-legend-of-zelda-breath-of-the-wild", + "url": "https://www.ign.com/games/the-legend-of-zelda-breath-of-the-wild" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/Walkthrough", + "url": "https://www.ign.com/wikis/the-legend-of-zelda-breath-of-the-wild/Walkthrough" + } + } + ], + "time": "2024-03-25 05:37:54.561" + }, + { + "index": "apsmDjkuV6iGvfQ9-YkP0", + "task": "Find an editor's choice review with a score of 10 in the boardgame category on ign", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ign.", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/editors-choice", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#scoreRange ", + "netloc": "ign", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#scoreRange >option:nth-child(2)", + "netloc": "ign", + "url": "https://www.ign.com/editors-choice" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#genre ", + "netloc": "ign", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#genre >option:nth-child(5)", + "netloc": "ign", + "url": "https://www.ign.com/editors-choice" + } + } + ], + "time": "2024-05-24 07:12:38.613" + }, + { + "index": "asuczSfk4bCf7OwPA_oVx", + "task": "Sign up for a REI Co-Op membership on rei.", + "reference_task_length": 3, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "rei.", + "url": "https://www.rei.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/membership", + "url": "https://www.rei.com/membership" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".hero__ctas >button:nth-child(2)", + "netloc": "rei", + "url": "https://www.rei.com/membership" + } + } + ], + "time": "2024-03-25 06:29:17.908" + }, + { + "index": "Atgpzhx4geR9YvP8u6AZb", + "task": "Find the weight of baggage allowance for economy class on qatarairways", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "qatarairways.", + "url": "https://www.qatarairways.com/en-us/homepage.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/baggage/allowance", + "url": "https://www.qatarairways.com/en-us/baggage/allowance.html?iid=ALL67670750" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#j-poi-tabs >li:nth-child(1)>.tabmenu-link >span:nth-child(1)", + "netloc": "qatarairways", + "url": "https://www.qatarairways.com/en-us/baggage/allowance.html?iid=ALL67670750" + } + } + ], + "time": "2024-05-26 05:38:21.691" + }, + { + "index": "b18Xcyxl8sjXTUQ-Jbtmj", + "task": "Find flights going from Indira Gandhi to Los Cabos on flightaware", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "flightaware.", + "url": "https://www.flightaware.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "origin", + "reference_answer": "VIDP", + "url": "https://www.flightaware.com/live/findflight?origin=VIDP&destination=MMSD" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "destination", + "reference_answer": "MMSD", + "url": "https://www.flightaware.com/live/findflight?origin=VIDP&destination=MMSD" + } + } + ], + "time": "2024-05-26 06:38:34.537" + }, + { + "index": "BABEHOxbn8rQAoskKTM0y", + "task": "Tell me information about what identification I need to bring on my trip on amtrak", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "amtrak.", + "url": "https://www.amtrak.com/home" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/planning-booking", + "url": "https://www.amtrak.com/planning-booking.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/tickets-id-safety-security", + "url": "https://www.amtrak.com/planning-booking/tickets-id-safety-security.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/passenger-identification", + "url": "https://www.amtrak.com/planning-booking/tickets-id-safety-security/passenger-identification.html" + } + } + ], + "time": "2024-05-26 04:30:36.977" + }, + { + "index": "bfAcQB3vTsCJPNxt7ZEm4", + "task": "Browse used Audi cars made before 2015 and sort by lowest price on kbb", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kbb.", + "url": "https://www.kbb.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "intent", + "reference_answer": "used", + "url": "https://www.kbb.com/audi/?intent=used" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "years", + "reference_answer": "1992-2015", + "url": "https://www.kbb.com/car-finder/?intent=used&manufacturers=audi&years=1992-2015" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sort", + "reference_answer": "priceasc", + "url": "https://www.kbb.com/car-finder/?intent=used&manufacturers=audi&years=1992-2015&sort=priceasc" + } + } + ], + "time": "2024-05-26 05:55:12.822" + }, + { + "index": "BITQL_mGoz9vud1fvryyv", + "task": "Show crazy credits for the movie \" Prometheus\" on imdb", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "imdb.", + "url": "https://www.imdb.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/tt1446714", + "url": "https://www.imdb.com/title/tt1446714/?ref_=nv_sr_srsg_0_tt_7_nm_1_q_Prometheus" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/crazycredits", + "url": "https://www.imdb.com/title/tt1446714/crazycredits/?tab=cz&ref_=tt_trv_cc" + } + } + ], + "time": "2024-05-24 07:48:53.384" + }, + { + "index": "Bvywj9SzJD5eYeZrtYY3m", + "task": "Add the first wireless headphones to your cart with a budget of $100 or less, that has an active noise-cancelling feature on amazon", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "amazon.", + "url": "https://www.amazon.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "k", + "reference_answer": "Decide whether are searching for wireless headphones", + "url": "https://www.amazon.com/s?k=wireless+headphones&crid=3B1DV9AE18GHL&sprefix=wireless+headphones%2Caps%2C737&ref=nb_sb_noss_1" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "high-price", + "reference_answer": "100", + "url": "https://www.amazon.com/s?k=wireless+headphones&crid=3B1DV9AE18GHL&qid=1716877867&rnid=386442011&sprefix=wireless+headphones%2Caps%2C737&ref=sr_nr_p_36_0_0&low-price=&high-price=100" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#a-autoid-3-announce ", + "netloc": "amazon", + "url": "https://www.amazon.com/s?k=wireless+headphones&rh=n%3A172282%2Cp_36%3A-10000%2Cp_n_feature_two_browse-bin%3A23746030011&dc&ds=v1%3AJ8iOECT4r4taWN1uZiq%2BUlDjD4Nhpo6GPIbBN5RxVAk&crid=3B1DV9AE18GHL&qid=1716877974&rnid=23746028011&sprefix=wireless+headphones%2Caps%2C737&ref=sr_nr_p_n_feature_two_browse-bin_1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "rh", + "reference_answer": "p_n_feature_two_browse-bin:23746030011", + "url": "https://www.amazon.com/s?k=wireless+headphones&rh=n%3A172282%2Cp_36%3A-10000%2Cp_n_feature_two_browse-bin%3A23746030011&dc&ds=v1%3AJ8iOECT4r4taWN1uZiq%2BUlDjD4Nhpo6GPIbBN5RxVAk&crid=3B1DV9AE18GHL&qid=1716877974&rnid=23746028011&sprefix=wireless+headphones%2Caps%2C737&ref=sr_nr_p_n_feature_two_browse-bin_1" + } + } + ], + "time": "2024-05-28 06:41:02.718" + }, + { + "index": "C-ptYrbhcyNGw6WYcT3B5", + "task": "Find Playstation 5 digital edition on gamestop.", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "gamestop.", + "url": "https://www.gamestop.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "q", + "reference_answer": "Decide whether are searching for playstation 5 digital edition", + "url": "https://www.gamestop.com/search/?q=playstation+5+digital+edition&lang=default&start=0&sz=20" + } + } + ], + "time": "2024-03-25 06:32:48.505" + }, + { + "index": "CAzoBWdWNepFOwGf6cv0O", + "task": "Browse Marriott Bonvoy credit cards on marriott", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "marriott.", + "url": "https://www.marriott.com/default.mi" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/earn", + "url": "https://www.marriott.com/loyalty/earn.mi" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/credit-cards", + "url": "https://www.marriott.com/credit-cards.mi" + } + } + ], + "time": "2024-05-26 04:24:43.810" + }, + { + "index": "CnJfZVtxvcwIDMj9zkugH", + "task": "Show me the list of Men's Blazers, Black, Size M on uniqlo.", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "uniqlo.", + "url": "https://www.uniqlo.com/us/en/?gad_source=1&gclid=CjwKCAjwh4-wBhB3EiwAeJsppNr8jYQ6QYvd2U5_tXunKxWnQ2bHc31FSKpbeh5QvlSzgyQpEIxi3hoCMyIQAvD_BwE" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/men", + "url": "https://www.uniqlo.com/us/en/men/outerwear-and-blazers/blazers?path=%2C%2C23380&colorCodes=COL09&sizeCodes=SMA004&categoryId=23380" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/outerwear-and-blazers/blazers", + "url": "https://www.uniqlo.com/us/en/men/outerwear-and-blazers/blazers?path=%2C%2C23380&colorCodes=COL09&sizeCodes=SMA004&categoryId=23380" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "colorCodes", + "reference_answer": "COL09", + "url": "https://www.uniqlo.com/us/en/men/outerwear-and-blazers/blazers?path=%2C%2C23380&colorCodes=COL09&sizeCodes=SMA004&categoryId=23380" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sizeCodes", + "reference_answer": "SMA004", + "url": "https://www.uniqlo.com/us/en/men/outerwear-and-blazers/blazers?path=%2C%2C23380&colorCodes=COL09&sizeCodes=SMA004&categoryId=23380" + } + } + ], + "time": "2024-03-27 09:16:35.460" + }, + { + "index": "CPdTwa57mKJJiZPQg2542", + "task": "Add formula 1 to my followed sports on foxsports", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "foxsports.", + "url": "https://www.foxsports.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/motor/formula-1", + "url": "https://www.foxsports.com/motor/formula-1" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".button-favorite.entity.pointer.fs-14 ", + "netloc": "foxsports", + "url": "https://www.foxsports.com/motor/formula-1" + } + } + ], + "time": "2024-05-20 05:29:37.672" + }, + { + "index": "cPEujNGfZgLT2rbtuKPOt", + "task": "Show me the options for a roundtrip leaving from las vegas on flexile dates on the interactive map on united", + "reference_task_length": 12, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "united.", + "url": "https://www.united.com/en/us/destination-map" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/destination-map", + "url": "https://www.united.com/en/us/destination-map" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether is searching for las vegas", + "netloc": "united", + "path": "#filterOriginInput ", + "url": "https://www.united.com/en/us/destination-map" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".app-components-AutoComplete-Atmos-styles__autoCompleteOption--gDdQn ", + "netloc": "united", + "url": "https://www.united.com/en/us/destination-map" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".atm-c-select.atm-c-select-field__control ", + "netloc": "united", + "url": "https://www.united.com/en/us/destination-map" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".atm-c-select.atm-c-select-field__control ", + "netloc": "united", + "url": "https://www.united.com/en/us/destination-map" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".non-atmos-datepicker >div:nth-child(1)>button:nth-child(2)", + "netloc": "united", + "url": "https://www.united.com/en/us/destination-map" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#flexibleDatesOpt ", + "netloc": "united", + "url": "https://www.united.com/en/us/destination-map" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".app-components-PlacesFilters-HorizontalMapSearchForm-styles__actions--KtDCD >.atm-c-btn.atm-c-btn--primary.atm-c-btn--block ", + "netloc": "united", + "url": "https://www.united.com/en/us/destination-map" + } + } + ], + "time": "2024-05-22 04:45:08.380" + }, + { + "index": "cplcsXZ5GetOqRKBt5LBn", + "task": "Download the e-receipt with the last name Smith and confirmation number X123456989 on budget", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "budget.", + "url": "https://www.budget.com/en/home?ARCIATA=0103202Q&gad_source=1&gclid=Cj0KCQjwjLGyBhCYARIsAPqTz18n_6wf0NJ79MtFaDEctHfaWJrjTzaDNJ-PRPqtxnEJywoNVEYPnL0aAmJyEALw_wcB" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/reservation/get-e-receipt", + "url": "https://www.budget.com/en/reservation/get-e-receipt" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Smith", + "netloc": "budget", + "path": "#lastName ", + "url": "https://www.budget.com/en/reservation/get-e-receipt" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "X123456989", + "netloc": "budget", + "path": "#Confirmation-no ", + "url": "https://www.budget.com/en/reservation/get-e-receipt" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".VMC-button >.btn.btn-red ", + "netloc": "budget", + "url": "https://www.budget.com/en/reservation/get-e-receipt" + } + } + ], + "time": "2024-05-22 05:15:26.367" + }, + { + "index": "d2Csfs3T6ABluicCuLwRP", + "task": "show the Life Jackets priced between $100 and $200 on cabelas", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cabelas.", + "url": "https://www.cabelas.com/shop/en#" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/life-jackets", + "url": "https://www.cabelas.com/l/life-jackets" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nf-offerprice=100..200", + "url": "https://www.cabelas.com/l/life-jackets#nf-offerprice=100..200" + } + } + ], + "time": "2024-05-26 04:13:32.518" + }, + { + "index": "DAabNkqk7gX0dHEcC0sbd", + "task": "Find 32\" Curved monitor and add the third one to the wish list on newegg", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "newegg.", + "url": "https://www.newegg.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "d", + "reference_answer": "Decide whether is searching for 32\" curved monitor", + "url": "https://www.newegg.com/p/pl?d=32%22+curved+monitor" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".list-wrap >div:nth-child(2)>div:nth-child(3)>.item-container >.item-img >.item-quick-action-container >.quick-action ", + "netloc": "newegg", + "url": "https://www.newegg.com/p/pl?d=32%22+curved+monitor" + } + } + ], + "time": "2024-05-20 04:55:02.771" + }, + { + "index": "Dc_JXmrltb5LgK9HyhVQb", + "task": "Find the list of all neighborhood maps for Brooklyn on new.mta.info", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "new.mta.info", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/subway/mta-neighborhood-maps", + "url": "https://new.mta.info/maps/subway/mta-neighborhood-maps" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/brooklyn", + "url": "https://new.mta.info/maps/subway/mta-neighborhood-maps/brooklyn" + } + } + ], + "time": "2024-03-22 09:48:12.838" + }, + { + "index": "DE1P2PSPtO3jAAudlH0v-", + "task": "Find me the deals available for the Great escape park on sixflags", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sixflags.", + "url": "https://www.sixflags.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/greatescape", + "url": "https://www.sixflags.com/greatescape" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/store/tickets", + "url": "https://www.sixflags.com/greatescape/store/tickets" + } + } + ], + "time": "2024-05-24 09:04:10.474" + }, + { + "index": "dIFny0pY8Wf8KdKs1kkbM", + "task": "Find the movie Donnie Darko and show its complete cast on imdb", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "imdb.", + "url": "https://www.imdb.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/tt0246578", + "url": "https://www.imdb.com/title/tt0246578/?ref_=nv_sr_srsg_0_tt_8_nm_0_q_Donnie%2520Darko" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/fullcredits", + "url": "https://www.imdb.com/title/tt0246578/fullcredits?ref_=tt_cl_sm" + } + } + ], + "time": "2024-05-20 04:46:08.231" + }, + { + "index": "dRJJLjd2TAIfcOTO0kLth", + "task": "Get the most reviewed shopping store that accepts apple pay in Central New York on yelp.", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yelp.", + "url": "https://www.yelp.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sortby", + "reference_answer": "review_count", + "url": "https://www.yelp.com/search?find_desc=Shopping&find_loc=Central+New+York%2C+NY&attrs=BusinessAcceptsApplePay&sortby=review_count" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "attrs", + "reference_answer": "BusinessAcceptsApplePay", + "url": "https://www.yelp.com/search?find_desc=Shopping&find_loc=Central+New+York%2C+NY&attrs=BusinessAcceptsApplePay&sortby=review_count" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_loc", + "reference_answer": "Decide whether are searching for Central New York", + "url": "https://www.yelp.com/search?find_desc=Shopping&find_loc=Central+New+York%2C+NY&attrs=BusinessAcceptsApplePay&sortby=review_count" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "find_desc", + "reference_answer": "Shopping", + "url": "https://www.yelp.com/search?find_desc=Shopping&find_loc=Central+New+York%2C+NY&attrs=BusinessAcceptsApplePay&sortby=review_count" + } + } + ], + "time": "2024-03-25 03:24:07.862" + }, + { + "index": "E2W7rMnlBIiT6lj0jJMIc", + "task": "Find me the cheapest external HD for an Xbox One on gamestop.", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "gamestop.", + "url": "https://www.gamestop.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "q", + "reference_answer": "Decide whether are searching for external HD", + "url": "https://www.gamestop.com/search/?q=external+HD&lang=default&start=0&sz=20" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "prefv1", + "reference_answer": "Xbox", + "url": "https://www.gamestop.com/search/?prefn1=platform&prefv1=Xbox&q=external+HD&view=new&srule=price-low-to-high" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "srule", + "reference_answer": "price-low-to-high", + "url": "https://www.gamestop.com/search/?prefn1=platform&prefv1=Xbox&q=external+HD&view=new&srule=price-low-to-high" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#product-grid-container >div:nth-child(3)>.product.grid-tile >.product-tile.product-detail.gme-card.gme-card--shadow.product-tile-render.loaded >a:nth-child(1)", + "netloc": "gamestop", + "url": "https://www.gamestop.com/search/?prefn1=platform&prefv1=Xbox&q=external+HD&view=new&srule=price-low-to-high" + } + } + ], + "time": "2024-03-22 09:20:59.469" + }, + { + "index": "ea-0F05c6v48IaE33APkX", + "task": "Search for job in Miami Florida in Human resources on target.", + "reference_task_length": 14, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "target.", + "url": "https://www.target.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/search-jobs", + "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#category-toggle ", + "netloc": "jobs", + "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#category-filters-section >ul:nth-child(2)>li:nth-child(8)>label:nth-child(2)>.filter__facet-name ", + "netloc": "jobs", + "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#country-toggle ", + "netloc": "jobs", + "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#country-filters-section >ul:nth-child(2)>li:nth-child(3)>label:nth-child(2)>.filter__facet-name ", + "netloc": "jobs", + "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#region-toggle ", + "netloc": "jobs", + "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#region-filters-section >ul:nth-child(2)>li:nth-child(7)>label:nth-child(2)>.filter__facet-name ", + "netloc": "jobs", + "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#city-toggle ", + "netloc": "jobs", + "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#city-filters-section >ul:nth-child(2)>li:nth-child(7)>label:nth-child(2)>.filter__facet-name ", + "netloc": "jobs", + "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" + } + } + ], + "time": "2024-03-22 07:47:19.747" + }, + { + "index": "EI_bNWi0gUUGfjCILiyOS", + "task": "Show me products from Calvin Klein brand menswear list on kohls.", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Gender:Mens", + "url": "https://www.kohls.com/catalog/mens-calvin-klein-clothing.jsp?CN=Gender:Mens+Brand:Calvin%20Klein+Department:Clothing&S=1&PPP=48&kls_sbp=59288101756704364110307077102020115511&pfm=internal%20campaign%20refine" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Brand:Calvin Klein", + "url": "https://www.kohls.com/catalog/mens-calvin-klein-clothing.jsp?CN=Gender:Mens+Brand:Calvin%20Klein+Department:Clothing&S=1&PPP=48&kls_sbp=59288101756704364110307077102020115511&pfm=internal%20campaign%20refine" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Department:Clothing", + "url": "https://www.kohls.com/catalog/mens-calvin-klein-clothing.jsp?CN=Gender:Mens+Brand:Calvin%20Klein+Department:Clothing&S=1&PPP=48&kls_sbp=59288101756704364110307077102020115511&pfm=internal%20campaign%20refine" + } + } + ], + "time": "2024-03-25 05:47:19.199" + }, + { + "index": "eUCwvaIx2vbWFCxkmtjAg", + "task": "Contact the support service on spothero", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "spothero.", + "url": "https://spothero.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/contact", + "url": "https://spothero.com/contact" + } + } + ], + "time": "2024-05-21 06:04:06.098" + }, + { + "index": "ewpxEe7H6GHDo0pAWcMe2", + "task": "Find a flight from Dresden to anywhere under $100 on ryanair", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ryanair.", + "url": "https://www.ryanair.com/us/en" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/cheap-flights", + "url": "https://www.ryanair.com/us/en/cheap-flights" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "budget", + "reference_answer": "100", + "url": "https://www.ryanair.com/us/en/cheap-flights/?from=DRS&out-from-date=2024-05-28&out-to-date=2025-05-28&budget=100" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "from", + "reference_answer": "DRS", + "url": "https://www.ryanair.com/us/en/cheap-flights/?from=DRS&out-from-date=2024-05-28&out-to-date=2025-05-28&budget=100" + } + } + ], + "time": "2024-05-28 06:22:31.472" + }, + { + "index": "fc5FwBwnNV2kroxbDjyjX", + "task": "Look for hair salon in San Diego on yellowpages", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yellowpages.", + "url": "https://www.yellowpages.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "geo_location_terms", + "reference_answer": "Decide whether are searching for San Diego", + "url": "https://www.yellowpages.com/search?search_terms=hair+salon&geo_location_terms=San+Diego%2C+CA" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search_terms", + "reference_answer": "Decide whether are searching for hair salon", + "url": "https://www.yellowpages.com/search?search_terms=hair+salon&geo_location_terms=San+Diego%2C+CA" + } + } + ], + "time": "2024-05-24 07:55:10.003" + }, + { + "index": "FFwg7X2CgxQiCq-Sv3EWI", + "task": "Find the closest in-store Gamestop location within 50 miles of 21122 on gamestop.", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "gamestop.", + "url": "https://www.gamestop.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#header-redesign >div:nth-child(1)>div:nth-child(1)>button:nth-child(1)>span:nth-child(2)", + "netloc": "gamestop", + "url": "https://www.gamestop.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#main-header-container >header:nth-child(1)>nav:nth-child(1)>div:nth-child(2)>.container >.row >nav:nth-child(1)>.menu-group >.left-align-nav >.mobile-store-container >.text-decoration-none.js-my-store-link >.mobile-store >.col-10 >.store-header >.store-name ", + "netloc": "gamestop", + "url": "https://www.gamestop.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn-get-in-store-inventory.change-store-button.change-store ", + "netloc": "gamestop", + "url": "https://www.gamestop.com/search/?store=3878" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "21122", + "netloc": "gamestop", + "path": "#store-postal-code ", + "url": "https://www.gamestop.com/search/?store=3878" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#radius ", + "netloc": "gamestop", + "url": "https://www.gamestop.com/search/?store=3878" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#radius >option:nth-child(3)", + "netloc": "gamestop", + "url": "https://www.gamestop.com/search/?store=3878" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".col-12.p-0 >button:nth-child(2)", + "netloc": "gamestop", + "url": "https://www.gamestop.com/search/?store=3878" + } + } + ], + "time": "2024-03-25 06:13:44.816" + }, + { + "index": "FGXMZ7VQ7uquu8fdSHSBp", + "task": "Find information about what I should do when I lose an item on a bus on us.megabus", + "reference_task_length": 3, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "megabus.", + "url": "https://us.megabus.com/?utm_source=google&utm_medium=cpc&utm_campaign=da|megabusus|ggl|brand|ext|trademark&utm_content=457655478662&utm_term=mega%20bus&gad_source=1&gclid=Cj0KCQjwmMayBhDuARIsAM9HM8fCPgvwPIA4e_sHoFbbwfHreiMfR5dd8kni3sV4lB8lAIzLFgUFbhQaAkaXEALw_wcB" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".row >div:nth-child(1)>div:nth-child(4)>.blockContent >.panel.panel-default >.panel-heading >.panel-title >.collapsed ", + "netloc": "us", + "url": "https://us.megabus.com/help" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/help", + "url": "https://us.megabus.com/help" + } + } + ], + "time": "2024-05-26 06:02:13.109" + }, + { + "index": "FtCFJNz4WhhewmINgVRu5", + "task": "Select a E-Gift card and add to cart with Best Wishes as a message. Send it to James Smith with email abc@abc.com on rei", + "reference_task_length": 18, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "rei.", + "url": "https://www.rei.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/gift-card/purchase", + "url": "https://www.rei.com/gift-card/purchase" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "James Smith", + "netloc": "rei", + "path": ".giftcard-form-input__body >div:nth-child(3)>div:nth-child(2)>div:nth-child(1)>input:nth-child(1)", + "url": "https://www.rei.com/gift-card/purchase" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "abc@abc.com", + "netloc": "rei", + "path": ".giftcard-form-input__body >div:nth-child(6)>div:nth-child(2)>div:nth-child(1)>input:nth-child(1)", + "url": "https://www.rei.com/gift-card/purchase" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "abc@abc.com", + "netloc": "rei", + "path": "#cdr-id-9c3ecd ", + "url": "https://www.rei.com/gift-card/purchase" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether are describing best wishes", + "netloc": "rei", + "path": "#cdr-id-cb8de2 ", + "url": "https://www.rei.com/gift-card/purchase" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".giftcard-form__cta >button:nth-child(2)", + "netloc": "rei", + "url": "https://www.rei.com/gift-card/purchase" + } + } + ], + "time": "2024-05-22 04:30:06.572" + }, + { + "index": "fuEsXYy-LGfFLHqNw9zWt", + "task": "Add a front load washing machine under $800 to the cart on menards.", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "menards.", + "url": "https://www.menards.com/main/home.html" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search", + "reference_answer": "Decide whether are searching for front load washing machine", + "url": "https://www.menards.com/main/search.html?search=front+load+washing+machine&ot=SS" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "priceMax_facet", + "reference_answer": "800", + "url": "https://www.menards.com/main/search.html?search=front+load+washing+machine&priceMax_facet=800" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "ADD TO CART", + "netloc": "menards", + "url": "https://www.menards.com/main/search.html?search=front+load+washing+machine&priceMax_facet=800" + } + } + ], + "time": "2024-03-23 07:22:59.969" + }, + { + "index": "G2jfRmIvoXCBKtoUcNUSd", + "task": "Search for a full-time job in sales in Springfield and apply for the most recent job on carmax", + "reference_task_length": 14, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "carmax.", + "url": "https://www.carmax.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "careers.", + "url": "https://careers.carmax.com/us/en" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/search-results", + "url": "https://careers.carmax.com/us/en/search-results" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#JobCategoryBody >.panel-body.au-target >.phs-facet-results >ul:nth-child(1)>li:nth-child(13)>.phs-checkbox.input-check-group.au-target ", + "netloc": "careers", + "url": "https://careers.carmax.com/us/en/search-results" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#CityBody >.panel-body.au-target >.phs-facet-results >ul:nth-child(1)>li:nth-child(176)>.phs-checkbox.input-check-group.au-target ", + "netloc": "careers", + "url": "https://careers.carmax.com/us/en/search-results" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#JobTypeBody >.panel-body.au-target >.phs-facet-results >ul:nth-child(1)>li:nth-child(2)>.phs-checkbox.input-check-group.au-target ", + "netloc": "careers", + "url": "https://careers.carmax.com/us/en/search-results" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#sortselect >option:nth-child(2)", + "netloc": "careers", + "url": "https://careers.carmax.com/us/en/search-results" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".au-target >li:nth-child(1)>.information >span:nth-child(2)>.au-target >.job-title >span:nth-child(1)", + "netloc": "careers", + "url": "https://careers.carmax.com/us/en/search-results" + } + } + ], + "time": "2024-05-21 17:29:20.909" + }, + { + "index": "hMW3NU6H4U1avWsKmMj0y", + "task": "Browse coffee makers that are rated 5 stars on kohls", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search", + "reference_answer": "Decide whether are searching for coffee maker", + "url": "https://www.kohls.com/search.jsp?submit-search=web-regular&search=coffee+maker&kls_sbp=34524031611978259241165260194179142249" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "TopRated:5", + "url": "https://www.kohls.com/search/5.jsp?CN=TopRated:5&search=coffee%20maker&S=1&PPP=48&kls_sbp=87660515555844928264416369496642981542&pfm=search%20refine" + } + } + ], + "time": "2024-05-26 04:58:15.747" + }, + { + "index": "iBh1tZmxZGScFoDu6AcwB", + "task": "Browse spider-man toys for kids and sort by lowest price on kohls", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search", + "reference_answer": "Decide whether is searching for spiderman toy", + "url": "https://www.kohls.com/search.jsp?submit-search=web-ta-keyword&search=spiderman+toys&kls_sbp=59987601549248944582634263361106786813" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "CN", + "reference_answer": "AgeAppropriate:Kids", + "url": "https://www.kohls.com/search/kids.jsp?CN=AgeAppropriate:Kids&search=spiderman%20toys&S=1&PPP=48&kls_sbp=87660515555844928264416369496642981542&pfm=search%20refine" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "S", + "reference_answer": "4", + "url": "https://www.kohls.com/search/kids.jsp?CN=AgeAppropriate:Kids&search=spiderman%20toys&kls_sbp=87660515555844928264416369496642981542&pfm=search%20refine&PPP=48&S=4&sks=true" + } + } + ], + "time": "2024-05-20 05:01:18.502" + }, + { + "index": "idxs7XdOs-DEu_l7d5gDu", + "task": "Find a store in Tempe, Arizona, make it my store, and then visit the store page and see whats happening in store on ikea.", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ikea.", + "url": "https://www.ikea.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#google-maps-store-select ", + "netloc": "ikea", + "url": "https://www.ikea.com/us/en/stores/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "stores/", + "url": "https://www.ikea.com/us/en/stores/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#google-maps-store-select >option:nth-child(2)", + "netloc": "ikea", + "url": "https://www.ikea.com/us/en/stores/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".localstore-buttons__container >button:nth-child(2)>span:nth-child(1)", + "netloc": "ikea", + "url": "https://www.ikea.com/us/en/stores/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/events/ikea-tempe-az/", + "url": "https://www.ikea.com/us/en/stores/events/ikea-tempe-az/" + } + } + ], + "time": "2024-03-12 09:30:55.599" + }, + { + "index": "IfHzHm-U83FzDKIGXVBPB", + "task": "Set the first recommended song on the homepage as a current obsession on last.fm", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "last.fm", + "url": "https://www.last.fm/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#genretodayitem0 >.genretodaybar_wrap >.genretodaybar >.genretodaybar_scroll >.genretoday_track >span:nth-child(1)>a:nth-child(1)", + "netloc": "last", + "url": "https://www.last.fm/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#mantle_skin >header:nth-child(2)>div:nth-child(1)>div:nth-child(2)>div:nth-child(5)>div:nth-child(2)>div:nth-child(3)>button:nth-child(1)", + "netloc": "last", + "url": "https://www.last.fm/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#header-more-actions-41dc55af-ce49-4a96-89f6-47247a5bdf1c >li:nth-child(1)>.dropdown-menu-clickable-item.more-item--obsession ", + "netloc": "last", + "url": "https://www.last.fm/" + } + } + ], + "time": "2024-03-25 06:07:45.480" + }, + { + "index": "Im8n7iG6o0643zl_qS95X", + "task": "Browse the page with event planning tips on eventbrite", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "eventbrite.", + "url": "https://www.eventbrite.com/organizer/overview/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "category/event-planning", + "url": "https://www.eventbrite.com/blog/category/event-planning/" + } + } + ], + "time": "2024-05-28 03:30:06.935" + }, + { + "index": "IQds4ENVIZXE_5c4KUNIM", + "task": "What are the family rides available at frontier city? on sixflags", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sixflags.", + "url": "https://www.sixflags.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/frontiercity/", + "url": "https://www.sixflags.com/frontiercity/store/tickets" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/family-rides", + "url": "https://www.sixflags.com/frontiercity/things-to-do/rides/family-rides#family-ride" + } + } + ], + "time": "2024-05-24 07:08:01.654" + }, + { + "index": "jfjr1xGSPsucAuuUEBwCR", + "task": "Add to my wish list the highest rated activity in Amsterdam on viator", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "viator.", + "url": "https://www.viator.com/?m=28353&supag=122704388281&supca=12512866044&supsc=kwd-270303623&supai=504932286140&supdv=c&supnt=g&suplp=9069536&supli=&supti=kwd-270303623&tsem=true&supci=kwd-270303623&supap1=&supap2=&supfi=&gad_source=1&gclid=Cj0KCQjwxqayBhDFARIsAANWRnTu9xrLeHE6TC3-BDjL2mm8xSrtUXbCs-5JXo32-w7N5UrlIFkVfF8aAvn4EALw_wcB" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/Amsterdam/", + "url": "https://www.viator.com/Amsterdam/d525-ttd" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sortType", + "reference_answer": "rating", + "url": "https://www.viator.com/Amsterdam/d525-ttd?sortType=rating" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".productListCardWrapper__VlW0.borderTopBottom__1o7N >.imageWrapper__1McA >.imageContainer__1Y4g >.imageFeaturesContainer__JRmJ >.addToWishlistBtn__2VnP >.button__11F2 >.wishlistBadge__2XdE >.heart__1EjT >div:nth-child(1)>svg:nth-child(1)", + "netloc": "viator", + "url": "https://www.viator.com/Amsterdam/d525-ttd?sortType=rating" + } + } + ], + "time": "2024-05-20 04:41:01.489" + }, + { + "index": "jIAFDj4vUn97eopTKyLVB", + "task": "Check the current standings for MLS on foxsports", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "foxsports.", + "url": "https://www.foxsports.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/soccer/mls", + "url": "https://www.foxsports.com/soccer/mls" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/standings", + "url": "https://www.foxsports.com/soccer/mls/standings" + } + } + ], + "time": "2024-05-22 05:00:23.740" + }, + { + "index": "jxEtZVxPTK_fFF7x3Qw7h", + "task": "Find the statistics of the player Cristiano Ronaldo for the national team of Portugal in all the world cups.", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "espn.", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "_/id/22774", + "url": "https://www.espn.com/soccer/player/_/id/22774/cristiano-ronaldo" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/team/482", + "url": "https://www.espn.com/soccer/player/stats/_/id/22774/team/482/type/fifa.world" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/fifa.world", + "url": "https://www.espn.com/soccer/player/stats/_/id/22774/team/482/type/fifa.world" + } + } + ], + "time": "2024-03-24 12:33:53.575" + }, + { + "index": "jYlUoyfeTB1iPEyIUY1zc", + "task": "Show schedule for East Boston Ferry outbound Lewis Wharf and Long Wharf (North) stop on mbta", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "mbta.", + "url": "https://www.mbta.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "query", + "reference_answer": "Decide whether are searching for East Boston Ferry", + "url": "https://www.mbta.com/schedules/Boat-EastBoston/timetable?from=search-route--ferry&query=East+Boston+Ferry" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/line", + "url": "https://www.mbta.com/schedules/Boat-EastBoston/line" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "schedule_finder[origin]", + "reference_answer": "Boat-Long", + "url": "https://www.mbta.com/schedules/Boat-EastBoston/line?schedule_finder%5Bdirection_id%5D=0&schedule_finder%5Borigin%5D=Boat-Long" + } + } + ], + "time": "2024-05-27 03:27:02.166" + }, + { + "index": "Ka0lgX3cJ7lKD1Wm7SbUI", + "task": "Find technical specs for the latest Macbook Air on apple", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "apple.", + "url": "https://www.apple.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/macbook-air/", + "url": "https://www.apple.com/macbook-air/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/specs/", + "url": "https://www.apple.com/macbook-air/specs/" + } + } + ], + "time": "2024-05-21 06:01:26.315" + }, + { + "index": "kJrcFAx5Wmx2ectYN5vt7", + "task": "Find out what popular events are being held this weekend in the category performing and visual arts near Chester, UK on eventbrite", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "eventbrite.", + "url": "https://www.eventbrite.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/united-kingdom--chester", + "url": "https://www.eventbrite.com/d/united-kingdom--chester/arts--events--this-weekend/?page=1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/arts--", + "url": "https://www.eventbrite.com/d/united-kingdom--chester/arts--events--this-weekend/?page=1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "events", + "url": "https://www.eventbrite.com/d/united-kingdom--chester/arts--events--this-weekend/?page=1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "--this-weekend", + "url": "https://www.eventbrite.com/d/united-kingdom--chester/arts--events--this-weekend/?page=1" + } + } + ], + "time": "2024-03-28 03:26:17.468" + }, + { + "index": "kpgHV45mGv1RTSDhyp9Da", + "task": "Find the last game of the season for the Toronto Raptors on sports.yahoo.", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sports.yahoo.", + "url": "https://sports.yahoo.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/nba", + "url": "https://sports.yahoo.com/nba/teams/toronto/schedule" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/toronto", + "url": "https://sports.yahoo.com/nba/teams/toronto/schedule" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/schedule/", + "url": "https://sports.yahoo.com/nba/teams/toronto/schedule" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#Col2-7-TeamSchedule-Proxy >div:nth-child(1)>div:nth-child(2)>div:nth-child(4)>div:nth-child(1)>div:nth-child(2)>a:nth-child(1)>div:nth-child(2)", + "netloc": "sports", + "url": "https://sports.yahoo.com/nba/teams/toronto/" + } + } + ], + "time": "2024-03-25 03:00:26.772" + }, + { + "index": "kYgJp5lpek_m7MOQiyrgO", + "task": "Find solutions for Airport and fill the contact form with message to \"Send Brochure\". Contact information John Smith. Email: abc@abc.com. Phone #: 88889999 on flightaware", + "reference_task_length": 18, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "flightaware.", + "url": "https://www.flightaware.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/airports", + "url": "https://industry.flightaware.com/airports" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "John", + "netloc": "industry", + "path": ".hs_firstname.hs-firstname.hs-fieldtype-text.field.hs-form-field >.input >.hs-input ", + "url": "https://industry.flightaware.com/airports" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Smith", + "netloc": "industry", + "path": ".hs_lastname.hs-lastname.hs-fieldtype-text.field.hs-form-field >.input >.hs-input ", + "url": "https://industry.flightaware.com/airports" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "abc@abc.com", + "netloc": "industry", + "path": ".hs_email.hs-email.hs-fieldtype-text.field.hs-form-field >.input >.hs-input ", + "url": "https://industry.flightaware.com/airports" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "88889999", + "netloc": "industry", + "path": ".hs_phone.hs-phone.hs-fieldtype-phonenumber.field.hs-form-field >.input >.hs-input ", + "url": "https://industry.flightaware.com/airports" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether the content is to send brochure", + "netloc": "industry", + "path": ".hs-input.hs-fieldtype-textarea ", + "url": "https://industry.flightaware.com/airports" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".hs-button.primary.large ", + "netloc": "industry", + "url": "https://industry.flightaware.com/airports" + } + } + ], + "time": "2024-05-27 03:37:36.450" + }, + { + "index": "L5MT6CydayMS3Fz4GEPEk", + "task": "Find Toyota Corolla from the year 2018 to 2023 in red color on carmax.", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "carmax.", + "url": "https://www.carmax.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "year", + "reference_answer": "2018", + "url": "https://www.carmax.com/cars/toyota/corolla/corolla-cross/corolla-cross-hybrid/corolla-hatchback/corolla-hybrid/corolla-im/red?year=2018-2023" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/corolla", + "url": "https://www.carmax.com/cars/toyota/corolla/corolla-cross/corolla-cross-hybrid/corolla-hatchback/corolla-hybrid/corolla-im/red?year=2018-2023" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/red", + "url": "https://www.carmax.com/cars/toyota/corolla/corolla-cross/corolla-cross-hybrid/corolla-hatchback/corolla-hybrid/corolla-im/red?year=2018-2023" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "year", + "reference_answer": "2023", + "url": "https://www.carmax.com/cars/toyota/corolla/corolla-cross/corolla-cross-hybrid/corolla-hatchback/corolla-hybrid/corolla-im/red?year=2018-2023" + } + } + ], + "time": "2024-03-25 06:18:12.062" + }, + { + "index": "l730hRXeN7jXAoonQC9IY", + "task": "Find the current NFL standings for the AFC East division and go to the page of which team is in first place on nfl.", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nfl.", + "url": "https://www.nfl.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/standings/", + "url": "https://www.nfl.com/standings/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".d3-l-grid--inner >div:nth-child(1)>.d3-o-table--horizontal-scroll >table:nth-child(1)>tbody:nth-child(3)>tr:nth-child(1)>td:nth-child(1)>.d3-o-club-info >.d3-o-club-shortname ", + "netloc": "nfl", + "url": "https://www.nfl.com/standings/" + } + } + ], + "time": "2024-03-23 07:14:46.033" + }, + { + "index": "ll6ksU97ZFOPlu5wY7kEE", + "task": "Check the national cafe menu on amtrak", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "amtrak.", + "url": "https://www.amtrak.com/home.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/onboard", + "url": "https://www.amtrak.com/onboard.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/meals-dining", + "url": "https://www.amtrak.com/onboard/meals-dining.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/cafe-car", + "url": "https://www.amtrak.com/onboard/meals-dining/cafe-car.html" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".show__tablet-and-above.text-with-image__text-block >ul:nth-child(5)>li:nth-child(1)>a:nth-child(1)>.secondary-link-dark ", + "netloc": "amtrak", + "url": "https://www.amtrak.com/onboard/meals-dining/cafe-car.html" + } + } + ], + "time": "2024-03-24 12:24:44.634" + }, + { + "index": "LPKoeVVlIW8fdc7yB428V", + "task": "View all of the Most Popular TV on rottentomatoes.", + "reference_task_length": 3, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "rottentomatoes.", + "url": "https://www.rottentomatoes.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/browse/tv_series_browse/sort:popular", + "url": "https://www.rottentomatoes.com/browse/tv_series_browse/sort:popular" + } + } + ], + "time": "2024-03-23 08:15:33.298" + }, + { + "index": "mg_XYywJFY2zB_ESXajdY", + "task": "Compare Apple watches and learn more about the ultra version on apple", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "apple.", + "url": "https://www.apple.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/apple-watch-ultra-2", + "url": "https://www.apple.com/watch/compare/" + } + } + ], + "time": "2024-05-26 05:03:19.513" + }, + { + "index": "mpwaLSXFnPbo0DPo9Bybk", + "task": "Search for the playlists \"Pop Workout mix\" and filtered by tag #Dance & edm on soundcloud.", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "soundcloud.", + "url": "https://soundcloud.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "q", + "reference_answer": "Decide whether are searching for pop workout mix", + "url": "https://soundcloud.com/search/sets?q=pop%20workout%20mix&filter.genre=dance%20%26%20edm" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "filter.genre", + "reference_answer": "dance & edm", + "url": "https://soundcloud.com/search/sets?q=pop%20workout%20mix&filter.genre=dance%20%26%20edm" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/search/sets", + "url": "https://soundcloud.com/search/sets?q=pop%20workout%20mix&filter.genre=dance%20%26%20edm" + } + } + ], + "time": "2024-03-23 08:20:25.855" + }, + { + "index": "n5PRoGseX0FMADP6ksDd8", + "task": "Check in with confirmation number 10987654 for my flight with last name Lewis on united.", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "united.", + "url": "https://www.united.com/en/gb" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#checkInTab >h2:nth-child(2)>div:nth-child(1)>div:nth-child(1)", + "netloc": "united", + "url": "https://www.united.com/en/gb" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "10987654", + "netloc": "united", + "path": "#flightCheckInConfNumber ", + "url": "https://www.united.com/en/gb" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Lewis", + "netloc": "united", + "path": "#flightCheckInLastName ", + "url": "https://www.united.com/en/gb" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#formSubmitBtn >.atm-c-btn__text ", + "netloc": "united", + "url": "https://www.united.com/en/gb" + } + } + ], + "time": "2024-03-22 09:11:47.361" + }, + { + "index": "NIl5RrMRtKKn0UTcJOMND", + "task": "Check balance of gift card no. 1234567 with pin 0001 on marriott", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "marriott.", + "url": "https://www.marriott.com/default.mi?nst=paid&cid=PAI_GLB0004EMN_GLE000AE45_GLF000KWXG&ppc=ppc&pId=nancppc&gad_source=1&gclid=CjwKCAjwnv-vBhBdEiwABCYQA-XQ4aocWC7L52fCOIGs9Z0y465fq_nZqFinJ2aYoz-7qg-GfbXY2hoCtPEQAvD_BwE&gclsrc=aw.ds" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "gifts.", + "url": "https://gifts.marriott.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/check-balance/", + "url": "https://gifts.marriott.com/check-balance/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "1234567", + "netloc": "gifts", + "path": "#cws_txt_gcNum ", + "url": "https://gifts.marriott.com/check-balance/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "0001", + "netloc": "gifts", + "path": "#cws_txt_gcPin ", + "url": "https://gifts.marriott.com/check-balance/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#cws_btn_checkGcBalance ", + "netloc": "gifts", + "url": "https://gifts.marriott.com/check-balance/" + } + } + ], + "time": "2024-03-25 06:01:59.182" + }, + { + "index": "nxgeQF2vUcHpMBs7inZ3I", + "task": "Search Cantonese food near Chicago, IL that are open now and priced $$ on yelp", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yelp.", + "url": "https://www.yelp.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_desc", + "reference_answer": "Decide whether are searching for Cantonese food", + "url": "https://www.yelp.com/search?find_desc=Cantonese+food&find_loc=Chicago%2C+IL" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_loc", + "reference_answer": "Decide whether are searching for Chicago, IL", + "url": "https://www.yelp.com/search?find_desc=Cantonese+food&find_loc=Chicago%2C+IL" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "attrs", + "reference_answer": "RestaurantsPriceRange2.2", + "url": "https://www.yelp.com/search?find_desc=Cantonese+food&find_loc=Chicago%2C+IL&attrs=RestaurantsPriceRange2.2" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "open_now", + "reference_answer": "724", + "url": "https://www.yelp.com/search?find_desc=Cantonese+food&find_loc=Chicago%2C+IL&attrs=RestaurantsPriceRange2.2&open_now=724" + } + } + ], + "time": "2024-05-27 04:06:48.866" + }, + { + "index": "nYUIGaNMtV6sxuJhv_b2S", + "task": "Get the frozen vegan cheese pizza between 5 to 10 usd on target.", + "reference_task_length": 17, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "target.", + "url": "https://www.target.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "Zgulk4", + "url": "https://www.target.com/c/frozen-pizza-foods-grocery/-/N-5xsz4Zal25lfgbks1Z76zjqZgulk4?moveTo=product-list-grid" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "Z76zjq", + "url": "https://www.target.com/c/frozen-pizza-foods-grocery/-/N-5xsz4Zal25lfgbks1Z76zjqZgulk4?moveTo=product-list-grid" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "Zal25lfgbks1", + "url": "https://www.target.com/c/frozen-pizza-foods-grocery/-/N-5xsz4Zal25lfgbks1Z76zjqZgulk4?moveTo=product-list-grid" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/frozen-pizza-foods-grocery/", + "url": "https://www.target.com/c/frozen-pizza-foods-grocery/-/N-5xsz4Zal25lfgbks1Z76zjqZgulk4?moveTo=product-list-grid" + } + } + ], + "time": "2024-03-25 06:49:05.838" + }, + { + "index": "p5Rh2uknkoIwgRqseo6wB", + "task": "Find bluetooth vertical mouse with most reviews and add two to my shopping cart on newegg.", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "newegg.", + "url": "https://www.newegg.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "d", + "reference_answer": "Decide whether are searching for bluetooth vertical mouse", + "url": "https://www.newegg.com/p/pl?d=bluetooth+vertical+mouse&Order=5" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "Order", + "reference_answer": "5", + "url": "https://www.newegg.com/p/pl?d=bluetooth+vertical+mouse&Order=5" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".qty-box-plus.fas.fa-plus ", + "netloc": "newegg", + "url": "https://www.newegg.com/kensington-k72356us/p/0TP-001D-00518" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".nav-col >.btn.btn-primary.btn-wide ", + "netloc": "newegg", + "url": "https://www.newegg.com/kensington-k72356us/p/0TP-001D-00518" + } + } + ], + "time": "2024-03-23 07:52:30.488" + }, + { + "index": "Peid7WPQJoMdIBjhIKOwP", + "task": "Find the page with instructions on how to return orders online on uniqlo", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "uniqlo.", + "url": "https://www.uniqlo.com/us/en/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".Navigation.-top.Lead_Body >span:nth-child(1)>li:nth-child(6)>.Navigation_Link.-single.js-Track ", + "netloc": "faq-us", + "url": "https://faq-us.uniqlo.com/articles/en_US/FAQ/Returns-and-Exchanges/?l=en_US&c=category_uq_us%3AUQ_C1_7&fs=Search&pn=1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/Returns-and-Exchanges", + "url": "https://faq-us.uniqlo.com/articles/en_US/FAQ/Returns-and-Exchanges/?l=en_US&c=category_uq_us%3AUQ_C1_7&fs=Search&pn=1" + } + } + ], + "time": "2024-05-26 04:20:24.764" + }, + { + "index": "pzgRb65Q_C-IqoATH6QCe", + "task": "Find the weekly ad for the store closest to zip code 82718 on menards", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "menards.", + "url": "https://www.menards.com/main/home.html" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "store", + "reference_answer": "3325", + "url": "https://www.menards.com/store-details/store.html?store=3325" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/store-details/store", + "url": "https://www.menards.com/store-details/store.html?store=3325" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/main/flyerselectstore", + "url": "https://www.menards.com/main/flyerselectstore.html" + } + } + ], + "time": "2024-03-27 11:58:37.236" + }, + { + "index": "QiIBNsu__PJ_wBbTxAzvf", + "task": "Add a Swivel vacuum under $150 to my cart on menards.", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "menards.", + "url": "https://www.menards.com/main/home.html" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search", + "reference_answer": "Decide whether are searching for swivel vacuum", + "url": "https://www.menards.com/main/search.html?search=swivel+vacuum&priceMax_facet=150" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "priceMax_facet", + "reference_answer": "150", + "url": "https://www.menards.com/main/search.html?search=swivel+vacuum&priceMax_facet=150" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#searchItems >div:nth-child(4)>div:nth-child(1)>.details >.pb-4.row >.col-12.pt-3.xs-single-col-9 >div:nth-child(2)>div:nth-child(1)>.btn.btn-block.btn-outline-primary.d-print-none ", + "netloc": "menards", + "url": "https://www.menards.com/main/search.html?search=swivel+vacuum&priceMax_facet=150" + } + } + ], + "time": "2024-03-25 05:42:00.048" + }, + { + "index": "QrK41w5BG8GFDYzNHA0yn", + "task": "What are the food festivals happening in Colorado This weekend? on eventbrite", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "eventbrite.", + "url": "https://www.eventbrite.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/united-states--colorado", + "url": "https://www.eventbrite.com/d/united-states--colorado/food-festival/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/food-festival", + "url": "https://www.eventbrite.com/d/united-states--colorado/food-festival/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/events--this-weekend", + "url": "https://www.eventbrite.com/d/united-states--colorado/food-festival/" + } + } + ], + "time": "2024-05-24 09:09:40.399" + }, + { + "index": "qwx9Eadt7js7dG6ixaUPU", + "task": "Explore the trending playlists,filter by tag #Electronics, play the first playlist and like it on soundcloud", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "soundcloud.", + "url": "https://soundcloud.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "q", + "reference_answer": "Decide whether is searching for trending playlists", + "url": "https://soundcloud.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/sets", + "url": "https://soundcloud.com/search/sets?q=trending%20playlists" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "filter.genre", + "reference_answer": "electronic", + "url": "https://soundcloud.com/search/sets?q=trending%20playlists&filter.genre=electronic" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".lazyLoadingList__list.sc-list-nostyle.sc-clearfix >li:nth-child(2)>.searchItem >.sound.searchItem__trackItem.playlist.streamContext >.sound__body >.sound__content >div:nth-child(1)>.soundTitle.sc-clearfix.sc-hyphenate.sc-type-h2.sc-text-h4.streamContext >.soundTitle__titleContainer >.soundTitle__playButton >.sc-button-play.playButton.sc-button.sc-button-xlarge ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/search/sets?q=trending%20playlists&filter.genre=electronic" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".sound.searchItem__trackItem.playlist.streamContext.playing >.sound__body >.sound__content >.sound__footer.g-all-transitions-300 >div:nth-child(2)>.soundActions.sc-button-toolbar.soundActions__small >.sc-button-group.sc-button-group-small >.sc-button-like.sc-button-secondary.sc-button.sc-button-small.sc-button-responsive ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/search/sets?q=trending%20playlists&filter.genre=electronic" + } + } + ], + "time": "2024-05-21 04:58:40.524" + }, + { + "index": "rEHHIcDi9AMBMK1DDXpbN", + "task": "Browse through the Las Vegas city guide and find message services nearest to Henderson, the service provider should have a BBB rating of A+ on yellowpages", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yellowpages.", + "url": "https://www.yellowpages.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/las-vegas-nv", + "url": "https://www.yellowpages.com/las-vegas-nv" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "refinements", + "reference_answer": "bbb_grade_display:1", + "url": "https://www.yellowpages.com/las-vegas-nv/massage-therapists?refinements=bbb_grade_display%3A1&refinements=neighborhood%3AHenderson" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "refinements", + "reference_answer": "neighborhood:Henderson", + "url": "https://www.yellowpages.com/las-vegas-nv/massage-therapists?refinements=bbb_grade_display%3A1&refinements=neighborhood%3AHenderson" + } + } + ], + "time": "2024-05-27 03:49:51.803" + }, + { + "index": "rG7Bw2NN2bQ7CRchbMrAq", + "task": "Add The Wire to the watchlist on tvguide", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "tvguide.", + "url": "https://www.tvguide.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/tvshows/the-wire/", + "url": "https://www.tvguide.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".c-buttonAddToWatchlist.g-text-bold.u-text-center.u-text-capitalize.o-button.o-button-large.o-button-icon.o-button-smallRound.o-button-primary ", + "netloc": "tvguide", + "url": "https://www.tvguide.com/" + } + } + ], + "time": "2024-05-20 05:18:22.420" + }, + { + "index": "RtYZe1pcgX6-TGZAmhyf6", + "task": "Find 5 star rated saltwater rods on cabelas.", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cabelas.", + "url": "https://www.cabelas.com/shop/en" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "5 Stars", + "url": "https://www.cabelas.com/l/saltwater-rods#f-bvratings=5%20Stars" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "saltwater-rods", + "url": "https://www.cabelas.com/l/saltwater-rods#f-bvratings=5%20Stars" + } + } + ], + "time": "2024-03-22 07:22:39.119" + }, + { + "index": "rUyFGVLAMz748JXIqLlqE", + "task": "Add Pro Display XDR with nano texture to bag with all the accessories on apple", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "apple.", + "url": "https://www.apple.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/pro-display-xdr/", + "url": "https://www.apple.com/pro-display-xdr/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/shop/buy-mac/", + "url": "https://www.apple.com/shop/buy-mac/pro-display-xdr" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".rc-dimension.rf-flagship-productselection-dimension.rf-flagship-productselection-dimensionfinish >div:nth-child(3)>div:nth-child(2)>label:nth-child(2)>.row.row-logical >span:nth-child(1)>span:nth-child(1)", + "netloc": "apple", + "url": "https://www.apple.com/shop/buy-mac/pro-display-xdr" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/nano-glass", + "url": "https://www.apple.com/shop/buy-mac/pro-display-xdr" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".rf-accessories-wrapper >div:nth-child(1)>.rf-simpleaccessorytileviewslot.rf-simpleaccessorytileviewslot-enabled.row.as-l-container >.rf-simpleaccessorytileviewslot-section.column.large-6.large-offset-1.small-12.small-offset-0 >fieldset:nth-child(1)>.rf-simpleaccessorytileview-selectors >ul:nth-child(1)>li:nth-child(2)>label:nth-child(2)>.row >span:nth-child(1)>span:nth-child(1)", + "netloc": "apple", + "url": "https://www.apple.com/shop/buy-mac/pro-display-xdr" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".rf-simpleaccessorytileviewslot.rf-simpleaccessorytileviewslot-enabled.row.as-l-container >.rf-simpleaccessorytileviewslot-section.column.large-6.large-offset-1.small-12.small-offset-0 >fieldset:nth-child(1)>.rf-simpleaccessorytileview-selectors >ul:nth-child(1)>li:nth-child(2)>label:nth-child(2)>.row >span:nth-child(1)>span:nth-child(1)", + "netloc": "apple", + "url": "https://www.apple.com/shop/buy-mac/pro-display-xdr" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".button.button-block ", + "netloc": "apple", + "url": "https://www.apple.com/shop/buy-mac/pro-display-xdr/nano-glass" + } + } + ], + "time": "2024-05-21 17:50:41.841" + }, + { + "index": "sPw15prlAwsXoQo8Pwikg", + "task": "Find a list of Tours that contain visits to the Louvre rated 5 stars on viator", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "viator.", + "url": "https://www.viator.com/?m=28353&supag=122704388281&supca=12512866044&supsc=aud-2226856787438:kwd-270303623&supai=504932286194&supdv=c&supnt=g&suplp=9069536&supli=&supti=aud-2226856787438:kwd-270303623&tsem=true&supci=aud-2226856787438:kwd-270303623&supap1=&supap2=&supfi=&gad_source=1&gclid=Cj0KCQjwjLGyBhCYARIsAPqTz1-V-L8zxTbqxq2AD5c-WgIk5G9vCYhsbUZ3nRkL4e74DAOtKPl4hY4aAjD-EALw_wcB" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/Paris-attractions/Louvre/", + "url": "https://www.viator.com/Paris-attractions/Louvre/d479-a73" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#experiences >.tabLink__oq7s ", + "netloc": "viator", + "url": "https://www.viator.com/Paris-attractions/Louvre/d479-a73" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".oneColumn__3Jg_.smallSpacingOptions__WkkC.lastVisibleOptionsGroup__c73G >div:nth-child(1)>.radioButton__1K9n >.inputRadio__33yh.md__1d8H ", + "netloc": "viator", + "url": "https://www.viator.com/Paris-attractions/Louvre/d479-a73#experiences" + } + } + ], + "time": "2024-05-22 05:09:24.469" + }, + { + "index": "SxYhvlAHYqr92CmFHyHhq", + "task": "Browse the list of top 250 movies and add the first one to my watchlist on imdb", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "imdb.", + "url": "https://www.imdb.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/chart/top/", + "url": "https://www.imdb.com/chart/top/?ref_=nv_mv_250" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".ipc-metadata-list.ipc-metadata-list--dividers-between.sc-a1e81754-0.eBRbsI.compact-list-view.ipc-metadata-list--base >li:nth-child(1)>.sc-10233bc-1.lkPiVh.cli-post-element >.ipc-icon-button.cli-info-icon.ipc-icon-button--base.ipc-icon-button--onAccent2 ", + "netloc": "imdb", + "url": "https://www.imdb.com/chart/top/?ref_=nv_mv_250" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".sc-d3701649-1.gtFZWJ >button:nth-child(2)", + "netloc": "imdb", + "url": "https://www.imdb.com/chart/top/?ref_=nv_mv_250" + } + } + ], + "time": "2024-05-20 04:49:26.511" + }, + { + "index": "szKuTh5WSKAHTrt6oCi1i", + "task": "Find thrill rides in Six Flags Great America, Chicago, IL on sixflags", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sixflags.", + "url": "https://www.sixflags.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/greatamerica", + "url": "https://www.sixflags.com/greatamerica/store/tickets/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/thrill-rides", + "url": "https://www.sixflags.com/greatamerica/things-to-do/rides/thrill-rides#thrill-ride" + } + } + ], + "time": "2024-05-24 08:18:13.156" + }, + { + "index": "SzZoAslJMjCSQ2YU0B437", + "task": "Show me a list of electronic music dvds in very good condition on discogs", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "discogs.", + "url": "https://www.discogs.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "genre", + "reference_answer": "Electronic", + "url": "https://www.discogs.com/sell/list?genre=Electronic" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "format", + "reference_answer": "DVD", + "url": "https://www.discogs.com/sell/list?genre=Electronic&format=DVD" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "condition", + "reference_answer": "Very Good (VG)", + "url": "https://www.discogs.com/sell/list?genre=Electronic&format=DVD&condition=Very+Good+%28VG%29" + } + } + ], + "time": "2024-05-26 04:39:08.672" + }, + { + "index": "T-PlxYqdEO-9Qq_oiUNKI", + "task": "Find a walkthrough guide for Assassin's Creed Valhalla on ign", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ign.", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/assassins-creed-valhalla", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/Walkthrough", + "url": "https://www.ign.com/" + } + } + ], + "time": "2024-05-26 04:28:05.637" + }, + { + "index": "TExb4W23ACyVIzZA-rTlw", + "task": "Find more films from the director of Smile on tvguide", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "tvguide.", + "url": "https://www.tvguide.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/celebrities/parker-finn", + "url": "https://www.tvguide.com/celebrities/parker-finn/3060049350/" + } + } + ], + "time": "2024-05-20 05:47:01.289" + }, + { + "index": "TfvF4IF2gNH53xipCdh4W", + "task": "Find help page about buying tickets on seatgeek", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "seatgeek.", + "url": "https://seatgeek.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/8985521334291-Buying-Tickets", + "url": "https://seatgeek.com/help/articles/8985521334291-Buying-Tickets" + } + } + ], + "time": "2024-05-23 06:19:32.799" + }, + { + "index": "ThXZAFmqlj6jJb6QyJ2fG", + "task": "Tell me more about the Adirondack route on amtrak.", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "amtrak.", + "url": "https://www.amtrak.com/home.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/routes", + "url": "https://www.amtrak.com/routes.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/adirondack-train", + "url": "https://www.amtrak.com/routes/adirondack-train.html" + } + } + ], + "time": "2024-03-25 06:22:08.019" + }, + { + "index": "TIanz2U5iPllmhS5vXucP", + "task": "Open the baggage fee calculator on united", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "united.", + "url": "https://www.united.com/en/us" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/baggage-calculator", + "url": "https://www.united.com/en/us/baggage-calculator/any-flights" + } + } + ], + "time": "2024-05-26 05:16:41.406" + }, + { + "index": "TPEK67bCB000EObLJJumy", + "task": "Show me the best city tours on nyc", + "reference_task_length": 3, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nyc.", + "url": "https://www.nyc.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/guided_tours", + "url": "https://www.nyc.com/guided_tours/" + } + } + ], + "time": "2024-05-24 09:28:31.350" + }, + { + "index": "U7s1xvWgJRv5pCkwrSVyZ", + "task": "Find the team schedule of the Brooklyn Nets on espn", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "espn.", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/brooklyn-nets", + "url": "https://www.espn.com/nba/team/_/name/bkn/brooklyn-nets" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/schedule", + "url": "https://www.espn.com/nba/team/schedule/_/name/bkn/brooklyn-nets" + } + } + ], + "time": "2024-05-26 07:08:21.889" + }, + { + "index": "Ud2nnGYfINKcnIz7xXeUK", + "task": "Browse used Mercedes cars made between 2004 to 2012 and sort by highest price on kbb", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kbb.", + "url": "https://www.kbb.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "years", + "reference_answer": "2004-2012", + "url": "https://www.kbb.com/car-finder/?manufacturers=mercedesbenz&years=2004-2012" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "intent", + "reference_answer": "used", + "url": "https://www.kbb.com/car-finder/?intent=used&manufacturers=mercedesbenz&years=2004-2012" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sort", + "reference_answer": "pricedesc", + "url": "https://www.kbb.com/car-finder/?intent=used&manufacturers=mercedesbenz&years=2004-2012&sort=pricedesc" + } + } + ], + "time": "2024-05-26 05:50:37.604" + }, + { + "index": "UnxkVMof0YIvazXZb8Fc8", + "task": "Show me the scores for the 2019 super bowl on nfl", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nfl.", + "url": "https://www.nfl.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/scores", + "url": "https://www.nfl.com/scores/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/2019", + "url": "https://www.nfl.com/scores/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/POST4", + "url": "https://www.nfl.com/scores/" + } + } + ], + "time": "2024-05-26 05:08:15.217" + }, + { + "index": "v5stCvJH8fYQhg1gqTxc8", + "task": "Book the cheapest parking spot near Bradley Airport on spothero", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "spothero.", + "url": "https://spothero.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "id", + "reference_answer": "73377", + "url": "https://spothero.com/search?kind=destination&id=73377&view=dl" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".SearchAirportSort >div:nth-child(1)>div:nth-child(2)>div:nth-child(1)>.Select.FormElement >.FormElement-control >.FormElement-item ", + "netloc": "spothero", + "url": "https://spothero.com/search?kind=destination&id=73377&view=dl" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".SearchAirportSort >div:nth-child(1)>div:nth-child(2)>div:nth-child(1)>.Select.FormElement >.FormElement-control >.FormElement-item >option:nth-child(3)", + "netloc": "spothero", + "url": "https://spothero.com/search?kind=destination&id=73377&starts=2024-03-13T12%3A00&ends=2024-03-17T12%3A00&view=dl&hide_modal=true" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".SearchAirportResults-spots >div:nth-child(1)>div:nth-child(1)>div:nth-child(1)>section:nth-child(1)>.Card.Card-open >.Card-content >.SearchAirportCard-view-details-container >a:nth-child(1)", + "netloc": "spothero", + "url": "https://spothero.com/search?kind=destination&id=73377&view=dl" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".Button.Button-primary.Button-block.AirportPurchaseInformation-book-now ", + "netloc": "spothero", + "url": "https://spothero.com/airport-parking/12793/34-old-county-rd?starts=2024-03-13T12%3A00&ends=2024-03-17T12%3A00&airport=true" + } + } + ], + "time": "2024-03-12 07:02:45.009" + }, + { + "index": "vwfgl1oxzBZBfxdogAWpU", + "task": "Browse hot deals near zip code 10019 on koa", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "koa.", + "url": "https://koa.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/hot-deals", + "url": "https://koa.com/hot-deals/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "q", + "reference_answer": "10019", + "url": "https://koa.com/hot-deals/?q=10019" + } + } + ], + "time": "2024-05-26 06:51:00.521" + }, + { + "index": "wi4SWrbpRBBQkq5cR9bmD", + "task": "Create a new list called Bathroom Remodeling on menards", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "menards.", + "url": "https://www.menards.com/main/home.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/myLists", + "url": "https://www.menards.com/main/myLists.html" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#addNewListButton ", + "netloc": "menards", + "url": "https://www.menards.com/main/myLists.html" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Bathroom Remodeling", + "netloc": "menards", + "path": "#titleEdit ", + "url": "https://www.menards.com/main/myLists.html" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#createEditConfirm ", + "netloc": "menards", + "url": "https://www.menards.com/main/myLists.html" + } + } + ], + "time": "2024-05-26 04:46:30.702" + }, + { + "index": "y8kmCn6l3G18O6GS5LC0A", + "task": "Find the most popular movies and showcase those with the highest IMDb ratings on imdb", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "imdb.", + "url": "https://www.imdb.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/chart/moviemeter/", + "url": "https://www.imdb.com/chart/moviemeter/?ref_=nv_mv_mpm" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sort", + "reference_answer": "release_date,desc", + "url": "https://www.imdb.com/chart/moviemeter/?ref_=nv_mv_mpm&sort=release_date%2Cdesc" + } + } + ], + "time": "2024-05-21 17:43:25.182" + }, + { + "index": "yCHsGNiD35HdNUekfL8hI", + "task": "Show list of popular businesses in Cleveland on yellowpages.", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yellowpages.", + "url": "https://www.yellowpages.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/cleveland-oh", + "url": "https://www.yellowpages.com/cleveland-oh/business-listings/1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/business-listings/", + "url": "https://www.yellowpages.com/cleveland-oh/business-listings/1" + } + } + ], + "time": "2024-03-27 11:22:22.644" + }, + { + "index": "zbG_TK-AU6U-xiYf9QOZo", + "task": "See Nissan and Honda cars for sale near Kentwood, MI 49512 on carmax", + "reference_task_length": 12, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "carmax.", + "url": "https://www.carmax.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/cars", + "url": "https://www.carmax.com/cars/all" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#Distance >.panel-menu-item--label.hzn-typography--headline-2 >h4:nth-child(1)", + "netloc": "carmax", + "url": "https://www.carmax.com/cars/all" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".distance--change-store-link--oxb19 >hzn-text-link:nth-child(1)", + "netloc": "carmax", + "url": "https://www.carmax.com/cars/all" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "49512", + "netloc": "carmax", + "path": "#store-chooser-keyword-input ", + "url": "https://www.carmax.com/cars/all" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#store-chooser-keyword-search-form >button:nth-child(2)>svg:nth-child(1)", + "netloc": "carmax", + "url": "https://www.carmax.com/cars/all" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#store-chooser-modal-body >ul:nth-child(1)>li:nth-child(2)>div:nth-child(2)>button:nth-child(1)", + "netloc": "carmax", + "url": "https://www.carmax.com/cars/all" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/nissan", + "url": "https://www.carmax.com/cars/honda/nissan" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/honda", + "url": "https://www.carmax.com/cars/honda/nissan" + } + } + ], + "time": "2024-03-27 09:21:52.246" + }, + { + "index": "ZCpbMXedfNMd1FSMAJ1sP", + "task": "Browse the clearance section and filter for women's dresses in size small on kohls.", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "clearance", + "url": "https://www.kohls.com/catalog/clearance-womens-dresses-clothing.jsp?CN=Promotions:Clearance+Gender:Womens+Category:Dresses+Department:Clothing&icid=wmnsclearance-VN-dresses&pfm=undefined&kls_sbp=59288101756704364110307077102020115511" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Gender:Womens", + "url": "https://www.kohls.com/catalog/clearance-womens-dresses-clothing.jsp?CN=Promotions:Clearance+Gender:Womens+Category:Dresses+Department:Clothing&icid=wmnsclearance-VN-dresses&pfm=undefined&kls_sbp=59288101756704364110307077102020115511" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Category:Dresses", + "url": "https://www.kohls.com/catalog/clearance-womens-dresses-clothing.jsp?CN=Promotions:Clearance+Gender:Womens+Category:Dresses+Department:Clothing&icid=wmnsclearance-VN-dresses&pfm=undefined&kls_sbp=59288101756704364110307077102020115511" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Department:Clothing", + "url": "https://www.kohls.com/catalog/clearance-womens-dresses-clothing.jsp?CN=Promotions:Clearance+Gender:Womens+Category:Dresses+Department:Clothing&icid=wmnsclearance-VN-dresses&pfm=undefined&kls_sbp=59288101756704364110307077102020115511" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Size:S", + "url": "https://www.kohls.com/catalog/clearance-womens-dresses-clothing.jsp?CN=Promotions:Clearance+Gender:Womens+Category:Dresses+Department:Clothing&icid=wmnsclearance-VN-dresses&pfm=undefined&kls_sbp=59288101756704364110307077102020115511" + } + } + ], + "time": "2024-03-23 08:12:48.582" + }, + { + "index": "zSPmWOxOb0SYdHkC_ubJb", + "task": "Find the schedule for upcoming MLB games for the New York Yankees on foxsports", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "foxsports.", + "url": "https://www.foxsports.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/mlb", + "url": "https://www.foxsports.com/mlb" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/new-york-yankees-team", + "url": "https://www.foxsports.com/mlb/new-york-yankees-team" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "new-york-yankees-team-schedule", + "url": "https://www.foxsports.com/mlb/new-york-yankees-team-schedule" + } + } + ], + "time": "2024-05-26 06:27:22.800" + }, + { + "index": "ZZF9RoepyS1ZVAW3RD5k8", + "task": "What are the upcoming soccer events on ESPN2? on espn", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "espn.", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/watch/schedule/_/type/upcoming", + "url": "https://www.espn.com/watch/schedule/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/categoryId/119cfa41-71d4-39bf-a790-6273a52b0259", + "url": "https://www.espn.com/watch/schedule/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/channel/017f41a2-ef4f-39d3-9f45-f680b88cd23b", + "url": "https://www.espn.com/watch/schedule/" + } + } + ], + "time": "2024-05-26 07:02:13.859" + } +] \ No newline at end of file diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/instance.py b/browsergym/webcanvas/src/browsergym/webcanvas/instance.py new file mode 100644 index 00000000..b27bd016 --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/instance.py @@ -0,0 +1,115 @@ +import playwright.sync_api +import os +import requests +import re + +from .utils import step_evaluate + + +class WebCanvasInstance: + """ + Utility class to access a WebCanvas instance. + """ + + def __init__( + self, + ) -> None: + pass + + @staticmethod + def read_task_configs(all_task_configs): + return_list = [] + for task in all_task_configs: + task_name = task["task"] + evaluation_data = task["evaluation"] + reference_task_length = task["reference_task_length"] + task_name_id = task["index"] + reference_evaluate_steps = [] + for i, evaluation in enumerate(evaluation_data): + match_function = evaluation["match_function_name"] + if "url" in match_function: + try: + key = evaluation["content"]["key"] + reference_answer = evaluation["content"]["reference_answer"] + reference_evaluate_steps.append({"match_function": match_function, + "key": key, "reference_answer": reference_answer, "score": 0}) + except: + print( + f"url error in task {task_name_id}, step {i}, match_function: {match_function}") + exit(1) + elif "element_path" in match_function: + try: + reference_answer = evaluation["content"]["reference_answer"] + method = evaluation["method"] + netloc = evaluation["content"]["netloc"] + reference_evaluate_steps.append({"match_function": match_function, "method": method, + "reference_answer": reference_answer, "netloc": netloc, + "score": 0}) + except: + print( + f"element_path error in task {task_name_id}, step {i}, match_function: {match_function}") + exit(1) + elif "element_value" in match_function: + try: + reference_answer = evaluation["content"]["reference_answer"] + netloc = evaluation["content"]["netloc"] + if "path" in evaluation["content"].keys(): + path = evaluation["content"]["path"] + reference_evaluate_steps.append({"match_function": match_function, + "reference_answer": reference_answer, "netloc": netloc, + "path": path, "score": 0}) + else: + reference_evaluate_steps.append({"match_function": match_function, + "reference_answer": reference_answer, "netloc": netloc, + "score": 0}) + except: + print( + f"element_value error in task {task_name_id}, step {i}, match_function: {match_function}") + exit(1) + return_list.append( + [task_name, task_name_id, reference_task_length, reference_evaluate_steps]) + + return return_list + + @staticmethod + def parse_bid_from_action(action_str): + """ + Extracts all actions from the given action_str. + """ + def parse_action(input_str): + pattern = r"(fill|click|press|select_option)\('([^']+)',? ?'?(.*?)'?\)" + match = re.match(pattern, input_str) + if match: + action_type = match.group(1) + bid = match.group(2) + target_value = match.group(3) + if not target_value: + target_value = "" + return action_type, bid, target_value + else: + return "", "", "" + + action_list = action_str.split("\n") + actions = [] + for action in action_list: + actions.append(parse_action(action)) + return actions + + @staticmethod + def evaluate(page, selector, target_value, evaluate_steps, reference_evaluate_steps): + element_value = "" + if selector is not None: + element_value = selector.text_content() + elif target_value and target_value != "None": + element_value = target_value + evaluate_steps, match_result = step_evaluate(page=page, evaluate_steps=evaluate_steps, + input_path=selector, element_value=element_value) + total_step_score = 0 + for evaluate in evaluate_steps: + total_step_score += evaluate["score"] + step_score_rate = str( + total_step_score) + " / " + str(len(reference_evaluate_steps)) + task_finished = False + if total_step_score == len(reference_evaluate_steps): + task_finished = True + return evaluate_steps, step_score_rate, match_result, task_finished diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/openai.py b/browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/openai.py new file mode 100644 index 00000000..da643173 --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/openai.py @@ -0,0 +1,46 @@ + +import os +import openai + + +openai_api_key = os.getenv("OPENAI_API_KEY") + + +openai.api_key = openai_api_key + + +class GPTGenerator: + def __init__(self, model=None): + self.model = model + self.client = openai.OpenAI(api_key=openai_api_key) + + def request(self, messages: list = None, max_tokens: int = 500, temperature: float = 0.7) -> (str, str): + try: + answer = self.chat(messages, max_tokens, temperature) + choice = answer.choices[0] + openai_response = choice.message.content + return openai_response, "" + except Exception as e: + return "", e + + def chat(self, messages, max_tokens=500, temperature=0.7): + data = { + 'model': self.model, + 'max_tokens': max_tokens, + 'temperature': temperature, + 'messages': messages, + } + if hasattr(self, 'response_format'): + data['response_format'] = self.response_format + + return self.client.chat.completions.create(**data) + + +class GPTGenerator35(GPTGenerator): + def __init__(self, model=None): + super().__init__(model=model if model is not None else "gpt-3.5-turbo") + + +class GPTGenerator4(GPTGenerator): + def __init__(self, model=None): + super().__init__(model=model if model is not None else "gpt-4-turbo") diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/prompt_constructor.py b/browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/prompt_constructor.py new file mode 100644 index 00000000..31f4d23c --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/prompt_constructor.py @@ -0,0 +1,20 @@ +from .semantic_prompts import BasePrompts +from jinja2 import Template + + +class BasePromptConstructor: + def __init__(self): + pass + + +class SemanticMatchPromptConstructor(BasePromptConstructor): + def __init__(self): + self.prompt_system = BasePrompts.semantic_match_prompt_system + self.prompt_user = BasePrompts.semantic_match_prompt_user + + def construct(self, input_answer, semantic_method) -> list: + self.prompt_user = Template(self.prompt_user).render( + semantic_method=semantic_method, input_answer=input_answer) + messages = [{"role": "system", "content": self.prompt_system}, { + "role": "user", "content": self.prompt_user}] + return messages diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/semantic_prompts.py b/browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/semantic_prompts.py new file mode 100644 index 00000000..7f6fa175 --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/semantic_prompts.py @@ -0,0 +1,11 @@ +class BasePrompts: + semantic_match_prompt_system = "Now you are an assistant to judge whether 2 elements are semantically same. I'll provide a judge rule and an answer.\n"\ + "If they are the same, you should return 1. If they are not related, you should return 0. "\ + "If they are related but not identical, return a decimal (two decimal places) between 0 and 1 of the degree of relevance you think.\n"\ + "For example, the judge rule is: Decide whether the place is New York. The score of \"new york\" and \"纽约\" are both 1, \"Brooklyn\" should be 0.\n"\ + "However, if the judge rule is: Decide whether the place is in New York. The score of \"new york\" and \"纽约\" and \"Brooklyn\" are all 1.\n"\ + "Another example, the judge rule is: Decide whether I'm looking for clothes. The score of \"red Clothes\" and \"green jacket\"should also be 1.\n"\ + "However, if the judge rule is: Decide whether I'm looking for red clothes. the score of \"bright red Clothing\" could be 0.85(red include bright red but they are not the same), the score of \"green Clothes\"should be 0.5(red is not green).\n"\ + "Remember, you should return a number with ``` and an explanation. Like output: ```1```, (your explanation)" # "Remember, you should only return a number without any punctuation or explanation!" + + semantic_match_prompt_user = "You should judge by the rule below:{{semantic_method}}.\n\nmy answer is:{{input_answer}}\n" diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py b/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py new file mode 100644 index 00000000..778838ce --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py @@ -0,0 +1,260 @@ +import re +from urllib.parse import parse_qs, urlparse, unquote +from lxml import html + +from .sematic_match.prompt_constructor import SemanticMatchPromptConstructor +from .sematic_match.openai import GPTGenerator35 + + +MapTagNameList = [ + "span", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "div", + "li", + "ul", + "p" +] + + +class StepEvaluator(): + def __init__(self): + pass + + +class URLEvaluator(StepEvaluator): + + '''URL Evaluation Scoring''' + @ staticmethod + def url_exact_match(input_url, reference_answer, key=False): + if key: + try: + parsed_url = urlparse(input_url) + url_params = parse_qs(parsed_url.query) + input_answer = url_params[key][0] + except: + return 0 + else: + input_answer = input_url + input_answer = unquote(input_answer) + result_score = MatchFunction.exact_match( + input_answer, reference_answer) + return result_score + + @ staticmethod + def url_include_match(input_url, reference_answer, key=None): + # print(input_url, reference_answer) + if key: + try: + parsed_url = urlparse(input_url) + url_params = parse_qs(parsed_url.query) + input_answer = url_params[key][0] + except: + return 0 + else: + try: + parsed_url = urlparse(input_url) + input_answer = parsed_url.netloc + parsed_url.path + if parsed_url.fragment is not None and (parsed_url.fragment): + input_answer += "#" + parsed_url.fragment + except: + input_answer = input_url + input_answer = unquote(input_answer) + result_score = MatchFunction.include_match( + input_answer, reference_answer) + # print("score:", result_score, input_answer) + return result_score + + @ staticmethod + def url_semantic_match(input_url, semantic_method, key=False): + if key: + try: + parsed_url = urlparse(input_url) + url_params = parse_qs(parsed_url.query) + input_answer = url_params[key][0] + except: + return 0 + else: + input_answer = input_url + input_answer = unquote(input_answer) + result_score = MatchFunction.semantic_match( + input_answer, semantic_method) + return result_score + + +class ElementEvaluator(StepEvaluator): + '''Element evaluation and scoring''' + + @staticmethod + def is_same_element(page, input_element_handle, reference_element_handle): + is_same_element = page.evaluate( + "(elements) => elements[0] === elements[1]", + [input_element_handle, reference_element_handle]) + return int(is_same_element) + + @ staticmethod + def path_exact_match(input_answer, reference_answer, method, page, input_netloc, reference_netloc): + score = 0 + if method == "xpath": + if reference_netloc != input_netloc: + # print("reference_netloc:", reference_netloc, + # "input_netloc:", input_netloc) + return 0 + try: + html_content = page.content() + tree = html.fromstring(html_content) + input_elements = tree.xpath(input_answer) + reference_elements = tree.xpath(reference_answer) + except: + score = 0 + if (input_elements is not None) and (reference_elements is not None): + score = input_elements[0] is reference_elements[0] + try: + if reference_elements[0].tag in MapTagNameList: + trace_up_count = 0 + current_element = reference_elements[0] + while trace_up_count < 3 and score == 0: + trace_up_count += 1 + current_element = current_element.getparent() + parent_score = input_elements[0] is current_element + score = max(score, parent_score) + except: + pass + else: + score = 0 + elif method == "selector": + if reference_netloc != input_netloc: + return 0 + try: + input_element = input_answer + reference_element = page.locator(reference_answer) + input_element_handle = input_element.element_handle() + reference_element_handle = reference_element.element_handle() + if (input_element is not None) and (reference_element is not None): + score = ElementEvaluator.is_same_element(page, input_element_handle=input_element_handle, + reference_element_handle=reference_element_handle) + try: + reference_tag = page.evaluate( + "(element) => element.tagName.toLowerCase()", reference_element_handle) + if reference_tag in MapTagNameList: + trace_up_count = 0 + current_element = reference_element + while trace_up_count < 3 and score == 0: + trace_up_count += 1 + parent_element = current_element.locator( + "xpath=..") + parent_element_handle = parent_element.element_handle() + current_element = parent_element + if parent_element: + parent_score = ElementEvaluator.is_same_element(page, input_element_handle=input_element_handle, + reference_element_handle=parent_element_handle) + score = max(score, parent_score) + except Exception as e: + print(e) + pass + except: + score = 0 + # result_score = MatchFunction.include_match( + # input_answer, reference_answer) + return score + + @ staticmethod + def path_included_match(input_answer, reference_answer, method, html_content): + # TODO Add path inclusion matching method + result_score = MatchFunction.include_match( + input_answer, reference_answer) + return result_score + + @ staticmethod + def element_value_exact_match(input_answer, reference_answer, input_netloc, reference_netloc): + if reference_netloc != input_netloc: + # print("reference_netloc:", reference_netloc, + # "input_netloc:", input_netloc) + return 0 + result_score = MatchFunction.exact_match( + input_answer, reference_answer) + return result_score + + @ staticmethod + def element_value_include_match(input_answer, reference_answer, input_netloc, reference_netloc): + if reference_netloc != input_netloc: + # print("reference_netloc:", reference_netloc, + # "input_netloc:", input_netloc) + return 0 + result_score = MatchFunction.include_match( + input_answer, reference_answer) + return result_score + + @ staticmethod + def element_value_semantic_match(input_answer, semantic_method, input_netloc, reference_netloc=0): + if reference_netloc != input_netloc: + # print("reference_netloc:", reference_netloc, + # "input_netloc:", input_netloc) + return 0 + if len(input_answer) == 0: + return 0 + result_score = MatchFunction.semantic_match( + input_answer, semantic_method) + return result_score + + +class TextEvaluator(StepEvaluator): + '''Text evaluation and scoring''' + @ staticmethod + def text_exact_match(input_answer, reference_answer): + result_score = MatchFunction.exact_match( + input_answer, reference_answer) + return result_score + + @ staticmethod + def text_included_match(input_answer, reference_answer): + result_score = MatchFunction.include_match( + input_answer, reference_answer) + return result_score + + @ staticmethod + def text_semantic_match(input_answer, semantic_method): + result_score = MatchFunction.semantic_match( + input_answer, semantic_method, semantic_method) + return result_score + + +class MatchFunction: + def __init__(self): + pass + + @ staticmethod + def exact_match(input_answer, reference_answer) -> int: + return 1 if input_answer == reference_answer else 0 + + @ staticmethod + def include_match(input_answer, reference_answer) -> int: + return 1 if reference_answer in input_answer else 0 + + @ staticmethod + def semantic_match(input_answer, semantic_method) -> float: + GPT35 = GPTGenerator35() + semantic_request = SemanticMatchPromptConstructor( + ).construct(input_answer, semantic_method) + score = None + for i in range(3): + try: + response, _ = GPT35.request(semantic_request) + score = re.findall("```(.*?)```", response, re.S)[0] + score = eval(score) + # Limit the score between 0 and 1 + score = max(0, min(1, score)) + if score != None: + break + except: + score = None + if score == None: + score = 0 + if score != 0 and score != 1: + return round(score, 2) + else: + return score diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/task.py b/browsergym/webcanvas/src/browsergym/webcanvas/task.py new file mode 100644 index 00000000..4d10789d --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/task.py @@ -0,0 +1,179 @@ +import json +import logging +import numpy as np +import playwright.sync_api +import importlib.resources +import tempfile + +from typing import Optional, Tuple + +from browsergym.core.task import AbstractBrowserTask + +from .instance import WebCanvasInstance + +logger = logging.getLogger(__name__) + + +class GenericWebCanvasTask(AbstractBrowserTask): + """ + Base class for all WebCanvas tasks. + """ + + def __init__( + self, + seed: int, + task_id: Optional[int] = None, + ) -> None: + super().__init__(seed) + + # task properties, will be used to set up the browsergym environment + self.viewport = {"width": 1280, "height": 720} + self.slow_mo = 1000 # ms + self.timeout = 10000 # ms + self.WebCanvas_instance = WebCanvasInstance() + self.config_file: str = None + self.start_url: str = "https://www.google.com/" + self.step_score_rate: str = None + self.match_result: str = None + self.task_finish: bool = False + + if task_id is None: + raise ValueError( + f"One and only one of 'task_id' must be provided (task_id={ + task_id})." + ) + + # read the list of all WebCanvas task configs + import browsergym.webcanvas as wcs + all_configs_str = importlib.resources.files(wcs).joinpath( + "data/example_130.json").read_text() + all_task_configs = json.loads(all_configs_str) + all_task = WebCanvasInstance.read_task_configs(all_task_configs) + if task_id is not None and task_id < len(all_task): + task_configs = all_task[task_id] + else: + raise ValueError( + f"Could not find any task config with task_id={task_id}." + ) + + self.task_configs = task_configs + self.trace_info = [] + self.time_step = 0 + + @classmethod + def get_task_id(cls): + """ + Generic class for several task ids, this way of obtaining the task id is not compatible for now. + """ + raise NotImplementedError + + def setup(self, page: playwright.sync_api.Page, start_url: str = None) -> tuple[str, dict]: + self.goal, _, _, reference_evaluate_steps = self.task_configs + self.evaluaion_step = reference_evaluate_steps + self.reference_evaluate_steps = reference_evaluate_steps + start_url = start_url if start_url else self.start_url + page.goto(start_url, timeout=10000) + return self.goal, {} + + def teardown(self) -> None: + pass + + @property + def evaluate_result(self): + return self.trace_info[-1] + + @property + def webcanvas(self): + return True + + def validate( + self, + page: playwright.sync_api.Page, + chat_messages: list[str], + action: str = "" + ) -> Tuple[float, bool, str, dict]: + reward, done, msg, info = 0, False, "", {} + + for message in chat_messages: + if message["role"] == "user" and message["message"] == "exit": + done = True + break + + self.time_step += 1 + step_action_info = {} + step_action_info["time_step"] = self.time_step + step_action_info["evaluation"] = [] + + actions = WebCanvasInstance.parse_bid_from_action(action) + if len(actions) > 0: + for action_type, bid, target_value in actions: + locator = None + if bid != '': + try: + locator = self.get_element_by_bid(page, bid) + except Exception as e: + logger.warning(f"warning:{e}") + locator = None + self.evaluaion_step, self.step_score_rate, self.match_result, self.task_finished = WebCanvasInstance.evaluate( + page, locator, target_value, self.evaluaion_step, self.reference_evaluate_steps) + + step_action_info["evaluation"].append( + { + "action_type": action_type, + "bid": bid, + "target_value": target_value, + "step_score_rate": self.step_score_rate, + "match_result": self.match_result, + "task_status": self.task_finished + } + ) + + if self.task_finished: + done = True + break + + + self.trace_info.append(step_action_info) + return reward, done, msg, info + + # https://github.com/ServiceNow/BrowserGym/blob/main/browsergym/core/src/browsergym/core/action/utils.py + def get_element_by_bid( + self, page: playwright.sync_api.Page, bid: str, scroll_into_view: bool = False + ) -> playwright.sync_api.Locator: + """ + Parse the given bid to sequentially locate every nested frame leading to the bid, then + locate the bid element. Bids are expected to take the form "abb123", which means + the element abb123 is located inside frame abb, which is located inside frame ab, which is + located inside frame a, which is located inside the page's main frame. + Args: + bid: the browsergym id (playwright testid) of the page element. + scroll_into_view: try to scroll element into view, unless it is completely visible. + + Returns: + Playwright element. + Bounding box of the element. + """ + if not isinstance(bid, str): + raise ValueError(f"expected a string, got {repr(bid)}") + + current_frame = page + + # dive into each nested frame, to the frame where the element is located + i = 0 + while bid[i:] and not bid[i:].isnumeric(): + i += 1 + frame_bid = bid[:i] # bid of the next frame to select + frame_elem = current_frame.get_by_test_id(frame_bid) + if not frame_elem.count(): + raise ValueError(f'Could not find element with bid "{bid}"') + if scroll_into_view: + frame_elem.scroll_into_view_if_needed(timeout=500) + current_frame = frame_elem.frame_locator(":scope") + + # finally, we should have selected the frame where the target element is + elem = current_frame.get_by_test_id(bid) + if not elem.count(): + raise ValueError(f'Could not find element with bid "{bid}"') + if scroll_into_view: + elem.scroll_into_view_if_needed(timeout=500) + return elem diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/utils.py b/browsergym/webcanvas/src/browsergym/webcanvas/utils.py new file mode 100644 index 00000000..84be8692 --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/utils.py @@ -0,0 +1,118 @@ +import re + +from playwright.sync_api import Page +from .step_score import * + + +def get_netloc(url: str) -> str: + """Extract the domain name, for example, extract 'zhihu' from 'zhihu.com', extract 'google' from 'www.google.com.hk' """ + url = urlparse(url) + try: + if url.netloc.startswith("www"): + netloc = re.findall(".*?\.(.*?)\..*?", url.netloc)[0] + else: + netloc = re.findall("(.*?)\..*?", url.netloc)[0] + except: + netloc = "" + return netloc + + +def step_evaluate(page: Page, evaluate_steps=[], input_path=None, element_value=None): + """Evaluate step score""" + step_score = 0 + match_result = [] + for evaluate in evaluate_steps: + if evaluate["score"] != 1: + match_function = evaluate["match_function"] + if match_function == "url_exactly_match": + score = URLEvaluator.url_exact_match( + page.url, evaluate["reference_answer"], evaluate["key"]) + elif match_function == "url_included_match": + score = URLEvaluator.url_include_match( + page.url, evaluate["reference_answer"], evaluate["key"]) + elif match_function == "url_semantic_match": + score = URLEvaluator.url_semantic_match( + page.url, evaluate["reference_answer"], evaluate["key"]) + + elif match_function == "element_path_exactly_match": + input_netloc = get_netloc(page.url) + method = evaluate["method"] + score = ElementEvaluator.path_exact_match( + input_path, evaluate["reference_answer"], method, page, input_netloc, + evaluate["netloc"]) + + elif match_function == "element_path_included_match": + pass + + elif match_function == "element_value_exactly_match": + if input_path is not None and element_value is not None: + input_netloc = get_netloc(page.url) + + if "path" in evaluate.keys(): + path_score = ElementEvaluator.path_exact_match(input_path, evaluate["path"], "selector", + page, input_netloc, + evaluate["netloc"]) + if path_score == 0: + score = 0 + else: + score = ElementEvaluator.element_value_exact_match( + element_value, evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + else: + score = ElementEvaluator.element_value_exact_match( + element_value, evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + + else: + score = 0 + elif match_function == "element_value_included_match": + if input_path is not None and element_value is not None: + input_netloc = get_netloc(page.url) + if "path" in evaluate.keys(): + path_score = ElementEvaluator.path_exact_match(input_path, evaluate["path"], "selector", + page, input_netloc, + evaluate["netloc"]) + if path_score == 0: + score = 0 + else: + score = ElementEvaluator.element_value_include_match( + element_value, evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + else: + score = ElementEvaluator.element_value_include_match( + element_value, evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + else: + score = 0 + elif match_function == "element_value_semantic_match": + if input_path is not None and element_value is not None: + input_netloc = get_netloc(page.url) + + if len(element_value) > 0: + if "path" in evaluate.keys(): + path_score = ElementEvaluator.path_exact_match(input_path, evaluate["path"], "selector", + page, input_netloc, + evaluate["netloc"]) + if path_score == 0: + # print("Path mismatch in value evaluation") + score = 0 + else: + score = ElementEvaluator.element_value_semantic_match( + element_value, evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + else: + score = ElementEvaluator.element_value_semantic_match( + element_value, evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + # print(score, "element_value_semantic_match", + # element_value, "*", evaluate["reference_answer"]) + else: + score = 0 + elif match_function == "text_exact_match": + pass # TODO + elif match_function == "text_include_match": + pass + elif match_function == "text_semantic_match": + pass + + evaluate["score"] = max(evaluate["score"], score) + if evaluate["score"] >= 1: + match_result.append( + {evaluate["match_function"]: evaluate["reference_answer"]}) + step_score += evaluate["score"] + + return evaluate_steps, match_result diff --git a/demo_agent/run_demo.py b/demo_agent/run_demo.py index 82cc2c96..68f4e877 100644 --- a/demo_agent/run_demo.py +++ b/demo_agent/run_demo.py @@ -29,13 +29,13 @@ def parse_args(): parser.add_argument( "--task_name", type=str, - default="openended", + default="webcanvas.1", help="Name of the Browsergym task to run. If 'openended', you need to specify a 'start_url'", ) parser.add_argument( "--start_url", type=str, - default="https://www.google.com", + default="https://www.google.com/", help="Starting URL (only for the openended task).", ) parser.add_argument( From c58ea58d4fc1ad3542430ffc30fd8b32fa78646e Mon Sep 17 00:00:00 2001 From: zsd <909087485@qq.com> Date: Tue, 27 Aug 2024 06:52:33 +0000 Subject: [PATCH 02/15] small fix --- .../webcanvas/{sematic_match => semantic_match}/openai.py | 3 --- .../{sematic_match => semantic_match}/prompt_constructor.py | 0 .../{sematic_match => semantic_match}/semantic_prompts.py | 0 browsergym/webcanvas/src/browsergym/webcanvas/task.py | 2 +- 4 files changed, 1 insertion(+), 4 deletions(-) rename browsergym/webcanvas/src/browsergym/webcanvas/{sematic_match => semantic_match}/openai.py (97%) rename browsergym/webcanvas/src/browsergym/webcanvas/{sematic_match => semantic_match}/prompt_constructor.py (100%) rename browsergym/webcanvas/src/browsergym/webcanvas/{sematic_match => semantic_match}/semantic_prompts.py (100%) diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/openai.py b/browsergym/webcanvas/src/browsergym/webcanvas/semantic_match/openai.py similarity index 97% rename from browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/openai.py rename to browsergym/webcanvas/src/browsergym/webcanvas/semantic_match/openai.py index da643173..f81e2e32 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/openai.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/semantic_match/openai.py @@ -6,9 +6,6 @@ openai_api_key = os.getenv("OPENAI_API_KEY") -openai.api_key = openai_api_key - - class GPTGenerator: def __init__(self, model=None): self.model = model diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/prompt_constructor.py b/browsergym/webcanvas/src/browsergym/webcanvas/semantic_match/prompt_constructor.py similarity index 100% rename from browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/prompt_constructor.py rename to browsergym/webcanvas/src/browsergym/webcanvas/semantic_match/prompt_constructor.py diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/semantic_prompts.py b/browsergym/webcanvas/src/browsergym/webcanvas/semantic_match/semantic_prompts.py similarity index 100% rename from browsergym/webcanvas/src/browsergym/webcanvas/sematic_match/semantic_prompts.py rename to browsergym/webcanvas/src/browsergym/webcanvas/semantic_match/semantic_prompts.py diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/task.py b/browsergym/webcanvas/src/browsergym/webcanvas/task.py index 4d10789d..b8883bfb 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/task.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/task.py @@ -46,7 +46,7 @@ def __init__( # read the list of all WebCanvas task configs import browsergym.webcanvas as wcs all_configs_str = importlib.resources.files(wcs).joinpath( - "data/example_130.json").read_text() + "data/mind2web-test_104tasks_20240528.json").read_text() all_task_configs = json.loads(all_configs_str) all_task = WebCanvasInstance.read_task_configs(all_task_configs) if task_id is not None and task_id < len(all_task): From 1be7d9950b1cd87a7cb167b661b3bf2f56f74af8 Mon Sep 17 00:00:00 2001 From: han032206 Date: Sun, 13 Oct 2024 22:13:19 +0800 Subject: [PATCH 03/15] integrate webcanvas into browsergym --- README.md | 2 ++ browsergym/core/src/browsergym/core/env.py | 9 +++------ browsergym/webcanvas/requirements.txt | 2 +- .../webcanvas/src/browsergym/webcanvas/step_score.py | 4 ++-- browsergym/webcanvas/src/browsergym/webcanvas/task.py | 3 +-- demo_agent/run_demo.py | 4 +++- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index a22adc90..3934cf3d 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ BrowserGym includes the following benchmarks by default: - [WebArena](https://webarena.dev/) - [VisualWebArena](https://jykoh.com/vwa) - [WorkArena](https://github.com/ServiceNow/WorkArena) + - [WebCanvas](https://github.com/iMeanAI/WebCanvas) - [AssistantBench](https://github.com/oriyor/assistantbench) - [WebLINX](https://github.com/McGill-NLP/weblinx) (static benchmark) @@ -71,6 +72,7 @@ Finally, each benchmark comes with its own specific setup that requires to follo - for WebArena, see [webarena/README.md](browsergym/webarena/README.md) - for VisualWebArena, see [visualwebarena/README.md](browsergym/visualwebarena/README.md) - for WorkArena, see [WorkArena](https://github.com/ServiceNow/WorkArena) + - for WebCanvas, see [WebCanvas](https://github.com/iMeanAI/WebCanvas) - for AssistantBench, see [assistantbench/README.md](browsergym/assistantbench/README.md) ### 🏗️ Development setup diff --git a/browsergym/core/src/browsergym/core/env.py b/browsergym/core/src/browsergym/core/env.py index 9648aacd..de7a8bc7 100644 --- a/browsergym/core/src/browsergym/core/env.py +++ b/browsergym/core/src/browsergym/core/env.py @@ -398,8 +398,7 @@ def report_infeasible_instructions(reason: str): # extract reward, done, user_message, info (task-specific) reward, done, user_message, task_info = self.task.validate( self.page, self.chat.messages, action) - logger.info(f"WebCanvas task validation result:\n{ - self.task.evaluate_result}") + logger.info(f"WebCanvas task validation result:\n{self.task.evaluate_result}") info["task_info"] = task_info info["webcanvas_result"] = self.task.evaluate_result @@ -506,8 +505,7 @@ def _activate_page_from_js(self, page: playwright.sync_api.Page): logger.debug(f"_activate_page_from_js(page) called, page={str(page)}") if not page.context == self.context: raise RuntimeError( - f"Unexpected: activating a page that belongs to a different browser context ({ - page})." + f"Unexpected: activating a page that belongs to a different browser context ({page})." ) # add the activated page to the page history (or move it to last which is the most recent) @@ -538,8 +536,7 @@ def _active_page_check(self): # active page should share the same browser context with the environment if self.page not in self.context.pages: raise RuntimeError( - f"Unexpected: active page is not part of the browser context's open pages ({ - self.page})." + f"Unexpected: active page is not part of the browser context's open pages ({self.page})." ) # active page should not be closed diff --git a/browsergym/webcanvas/requirements.txt b/browsergym/webcanvas/requirements.txt index e2d999f0..d51e110a 100644 --- a/browsergym/webcanvas/requirements.txt +++ b/browsergym/webcanvas/requirements.txt @@ -1,2 +1,2 @@ -browsergym-core==0.4.2 +browsergym-core==0.8.0 libwebarena==0.0.3 diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py b/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py index 778838ce..213b5c79 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py @@ -2,8 +2,8 @@ from urllib.parse import parse_qs, urlparse, unquote from lxml import html -from .sematic_match.prompt_constructor import SemanticMatchPromptConstructor -from .sematic_match.openai import GPTGenerator35 +from .semantic_match.prompt_constructor import SemanticMatchPromptConstructor +from .semantic_match.openai import GPTGenerator35 MapTagNameList = [ diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/task.py b/browsergym/webcanvas/src/browsergym/webcanvas/task.py index b8883bfb..8bac96cc 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/task.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/task.py @@ -39,8 +39,7 @@ def __init__( if task_id is None: raise ValueError( - f"One and only one of 'task_id' must be provided (task_id={ - task_id})." + f"One and only one of 'task_id' must be provided (task_id={task_id})." ) # read the list of all WebCanvas task configs diff --git a/demo_agent/run_demo.py b/demo_agent/run_demo.py index 68f4e877..548c10fa 100644 --- a/demo_agent/run_demo.py +++ b/demo_agent/run_demo.py @@ -29,7 +29,7 @@ def parse_args(): parser.add_argument( "--task_name", type=str, - default="webcanvas.1", + default="webcanvas.0", help="Name of the Browsergym task to run. If 'openended', you need to specify a 'start_url'", ) parser.add_argument( @@ -102,6 +102,8 @@ def main(): env_args.wait_for_user_message = True env_args.task_kwargs = {"start_url": args.start_url} + print(args.task_name) + # setting up the experiment exp_args = ExpArgs( env_args=env_args, From e532ce9764165b66a35c9d5d6007a80ba9c54450 Mon Sep 17 00:00:00 2001 From: zsd <909087485@qq.com> Date: Fri, 25 Oct 2024 13:28:45 +0000 Subject: [PATCH 04/15] version 1.0 --- .vscode/launch.json | 15 ++++ browsergym/core/src/browsergym/core/env.py | 76 ++++++++++++++--- browsergym/webcanvas/pyproject.toml | 4 + browsergym/webcanvas/requirements.txt | 6 +- .../src/browsergym/webcanvas/__init__.py | 5 +- ...ample_130.json => mind2web-train_130.json} | 1 + .../src/browsergym/webcanvas/instance.py | 16 +++- .../src/browsergym/webcanvas/step_score.py | 8 +- .../src/browsergym/webcanvas/task.py | 73 ++++++++++++---- .../src/browsergym/webcanvas/utils.py | 83 +++++++++++++++++++ demo_agent/run_demo.py | 2 +- test2.py | 81 ++++++++++++++++++ test3.py | 79 ++++++++++++++++++ test_expose_binding.py | 48 +++++++++++ 14 files changed, 460 insertions(+), 37 deletions(-) create mode 100644 .vscode/launch.json rename browsergym/webcanvas/src/browsergym/webcanvas/data/{example_130.json => mind2web-train_130.json} (99%) create mode 100644 test2.py create mode 100644 test3.py create mode 100644 test_expose_binding.py diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000..61def22f --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: demo_agent/run_demo.py", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + } + ] +} \ No newline at end of file diff --git a/browsergym/core/src/browsergym/core/env.py b/browsergym/core/src/browsergym/core/env.py index de7a8bc7..2e7bc389 100644 --- a/browsergym/core/src/browsergym/core/env.py +++ b/browsergym/core/src/browsergym/core/env.py @@ -267,6 +267,12 @@ def override_property(task, env, property): "browsergym_page_activated", lambda source: self._activate_page_from_js( source["page"]) ) + + self.context.expose_binding( + "handleEvent", lambda source, + selector, event_type,element_text: self._handle_event(selector, event_type,element_text) + ) + self.context.add_init_script( r""" window.browsergym_page_activated(); @@ -396,11 +402,16 @@ def report_infeasible_instructions(reason: str): if hasattr(self.task, 'webcanvas'): logger.debug(f"Initiating webcanvas task validation") # extract reward, done, user_message, info (task-specific) - reward, done, user_message, task_info = self.task.validate( - self.page, self.chat.messages, action) - logger.info(f"WebCanvas task validation result:\n{self.task.evaluate_result}") - info["task_info"] = task_info - info["webcanvas_result"] = self.task.evaluate_result + self.events = self.task.events + self._event_listener( + [event["selector"] for event in self.events if event and event["selector"] and event["status"] == False]) + + # reward, done, user_message, task_info = self.task.validate( + # self.page, self.chat.messages, action) + # logger.info(f"WebCanvas task validation result:\n{ + # self.task.evaluate_result}") + # info["task_info"] = task_info + # info["webcanvas_result"] = self.task.evaluate_result # try to execute the action logger.debug(f"Executing action") @@ -443,12 +454,14 @@ def report_infeasible_instructions(reason: str): self._wait_for_user_message() logger.debug(f"User message done") - if not hasattr(self.task, 'webcanvas'): - logger.debug(f"Initiating task validation") - # extract reward, done, user_message, info (task-specific) - reward, done, user_message, task_info = self._task_validate() - info["task_info"] = task_info - logger.debug(f"Task validation done") + # if not hasattr(self.task, 'webcanvas'): + logger.debug(f"Initiating task validation") + # extract reward, done, user_message, info (task-specific) + reward, done, user_message, task_info = self._task_validate() + logger.info(f"WebCanvas task validation result:\n{ + self.task.evaluate_result}") + info["task_info"] = task_info + logger.debug(f"Task validation done") # add any user message sent by the task to the chat if user_message: @@ -472,7 +485,8 @@ def _task_validate(self): prev_page_history = self.page_history.copy() # call validate reward, done, user_message, info = self.task.validate( - self.page, self.chat.messages) + self.page, self.chat.messages,self.last_action) + # info["webcanvas_result"] = self.task.evaluate_result # safety fix, in case validate() did mess up the active page and/or page history if prev_active_page != self.page or prev_page_history != self.page_history: logger.debug( @@ -602,3 +616,41 @@ def _get_obs(self): } return obs + + def _event_listener(self, selectors): + """ + Add a universal event listener to specified selectors to capture various event types + :param page: Current page object + :param selectors: List of selectors to listen to + """ + self.page.evaluate( + """ + ({selectors}) => { + selectors.forEach((selector) => { + const element = document.querySelector(selector); + if (element) { + const allEvents = [ + 'click', 'input', 'change', 'keydown', 'keyup', + 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' + ]; + allEvents.forEach((eventType) => { + element.addEventListener(eventType, (event) => { + const elementText = event.target.textContent || null; + window.handleEvent(selector, eventType, elementText); + }, true); // 'true' indicates capture phase + }); + } + }); + } + """, + {"selectors": selectors} + ) + + def _handle_event(self, selector, event_type, element_text=None): + logger.debug(f"Element with selector '{selector}' triggered '{ + event_type}' event, text content: {element_text}") + for idx, event in enumerate(self.events): + if event and event["selector"] == selector: + self.events[idx]["status"] = True + self.events[idx]["target_value"] = element_text if element_text else "" + self.task.update_events(self.events) diff --git a/browsergym/webcanvas/pyproject.toml b/browsergym/webcanvas/pyproject.toml index 34f301e0..23a579c9 100644 --- a/browsergym/webcanvas/pyproject.toml +++ b/browsergym/webcanvas/pyproject.toml @@ -5,6 +5,10 @@ build-backend = "hatchling.build" [project] name = "browsergym-webcanvas" description = "WebCanvas benchmark for BrowserGym" +authors = [ + {name = "Sida Zhou"}, + {name = "Dehan Kong"}, +] readme = "README.md" requires-python = ">3.7" license = {text = "Apache-2.0"} diff --git a/browsergym/webcanvas/requirements.txt b/browsergym/webcanvas/requirements.txt index d51e110a..e596c1dd 100644 --- a/browsergym/webcanvas/requirements.txt +++ b/browsergym/webcanvas/requirements.txt @@ -1,2 +1,4 @@ -browsergym-core==0.8.0 -libwebarena==0.0.3 +browsergym-core==0.13.3 +openai +bs4 +lxml diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/__init__.py b/browsergym/webcanvas/src/browsergym/webcanvas/__init__.py index c94dfdf4..b36ade28 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/__init__.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/__init__.py @@ -1,13 +1,12 @@ from browsergym.core.registration import register_task -# register the WebArena benchmark +# register the WebCanvas benchmark from . import config, task ALL_WEBCANVAS_TASK_IDS = [] -# register the WebArena benchmark for task_id in config.TASK_IDS: - gym_id = f"webcanvas.{task_id}" + gym_id = f"webcanvas.mind2web-live.{task_id}" register_task( gym_id, task.GenericWebCanvasTask, diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/data/example_130.json b/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-train_130.json similarity index 99% rename from browsergym/webcanvas/src/browsergym/webcanvas/data/example_130.json rename to browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-train_130.json index d6cc2dd2..afa59f81 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/data/example_130.json +++ b/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-train_130.json @@ -63,6 +63,7 @@ { "match_function_name": "element_value_exactly_match", "content": { + "path": "#main > div:nth-child(3) > section > div > p:nth-child(3) > a", "reference_answer": "View Full Menu", "netloc": "amctheatres", "url": "https://www.amctheatres.com/food-and-drink/dine-in/explore-menu" diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/instance.py b/browsergym/webcanvas/src/browsergym/webcanvas/instance.py index b27bd016..30054aee 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/instance.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/instance.py @@ -3,7 +3,7 @@ import requests import re -from .utils import step_evaluate +from .utils import step_evaluate, step_event_evaluate class WebCanvasInstance: @@ -113,3 +113,17 @@ def evaluate(page, selector, target_value, evaluate_steps, reference_evaluate_st if total_step_score == len(reference_evaluate_steps): task_finished = True return evaluate_steps, step_score_rate, match_result, task_finished + + @staticmethod + def evaluate_events(page, evaluate_steps, task_events, target_value, reference_evaluate_steps): + evaluate_steps, match_result = step_event_evaluate(page=page, evaluate_steps=evaluate_steps, + task_events=task_events, target_value=target_value) + total_step_score = 0 + for evaluate in evaluate_steps: + total_step_score += evaluate["score"] + step_score_rate = str( + total_step_score) + " / " + str(len(reference_evaluate_steps)) + task_finished = False + if total_step_score == len(reference_evaluate_steps): + task_finished = True + return evaluate_steps, step_score_rate, match_result, task_finished diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py b/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py index 213b5c79..d9c74eaf 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py @@ -171,10 +171,10 @@ def path_included_match(input_answer, reference_answer, method, html_content): @ staticmethod def element_value_exact_match(input_answer, reference_answer, input_netloc, reference_netloc): - if reference_netloc != input_netloc: - # print("reference_netloc:", reference_netloc, - # "input_netloc:", input_netloc) - return 0 + # if reference_netloc != input_netloc: + # # print("reference_netloc:", reference_netloc, + # # "input_netloc:", input_netloc) + # return 0 result_score = MatchFunction.exact_match( input_answer, reference_answer) return result_score diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/task.py b/browsergym/webcanvas/src/browsergym/webcanvas/task.py index 8bac96cc..c8c21a2e 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/task.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/task.py @@ -36,6 +36,7 @@ def __init__( self.step_score_rate: str = None self.match_result: str = None self.task_finish: bool = False + self.activate_element = None if task_id is None: raise ValueError( @@ -45,7 +46,7 @@ def __init__( # read the list of all WebCanvas task configs import browsergym.webcanvas as wcs all_configs_str = importlib.resources.files(wcs).joinpath( - "data/mind2web-test_104tasks_20240528.json").read_text() + "data/mind2web-train_130.json").read_text() all_task_configs = json.loads(all_configs_str) all_task = WebCanvasInstance.read_task_configs(all_task_configs) if task_id is not None and task_id < len(all_task): @@ -72,6 +73,7 @@ def setup(self, page: playwright.sync_api.Page, start_url: str = None) -> tuple[ self.reference_evaluate_steps = reference_evaluate_steps start_url = start_url if start_url else self.start_url page.goto(start_url, timeout=10000) + self._init_task_events() return self.goal, {} def teardown(self) -> None: @@ -104,18 +106,37 @@ def validate( step_action_info["evaluation"] = [] actions = WebCanvasInstance.parse_bid_from_action(action) + # if len(actions) > 0: + # for action_type, bid, target_value in actions: + # locator = None + # # if bid != '': + # # try: + # # locator = self.get_element_by_bid(page, bid) + # # except Exception as e: + # # logger.warning(f"warning:{e}") + # # locator = None + # self.evaluaion_step, self.step_score_rate, self.match_result, self.task_finished = WebCanvasInstance.evaluate( + # page, locator, target_value, self.evaluaion_step, self.reference_evaluate_steps) + + # step_action_info["evaluation"].append( + # { + # "action_type": action_type, + # "bid": bid, + # "target_value": target_value, + # "step_score_rate": self.step_score_rate, + # "match_result": self.match_result, + # "task_status": self.task_finished + # } + # ) + + # if self.task_finished: + # done = True + # break if len(actions) > 0: for action_type, bid, target_value in actions: - locator = None - if bid != '': - try: - locator = self.get_element_by_bid(page, bid) - except Exception as e: - logger.warning(f"warning:{e}") - locator = None - self.evaluaion_step, self.step_score_rate, self.match_result, self.task_finished = WebCanvasInstance.evaluate( - page, locator, target_value, self.evaluaion_step, self.reference_evaluate_steps) - + self.evaluaion_step, self.step_score_rate, self.match_result, self.task_finished = WebCanvasInstance.evaluate_events( + page, self.evaluaion_step, self.task_events, target_value, self.reference_evaluate_steps) + step_action_info["evaluation"].append( { "action_type": action_type, @@ -126,12 +147,10 @@ def validate( "task_status": self.task_finished } ) - if self.task_finished: done = True break - - + print(self.task_events) self.trace_info.append(step_action_info) return reward, done, msg, info @@ -176,3 +195,29 @@ def get_element_by_bid( if scroll_into_view: elem.scroll_into_view_if_needed(timeout=500) return elem + + @property + def events(self): + return self.task_events + + def _init_task_events(self): + self.task_events = [] + for evaluation_step in self.reference_evaluate_steps: + event = {} + if evaluation_step["match_function"] in ["element_path_exactly_match", "element_path_included_match"]: + event["selector"] = evaluation_step['reference_answer'] + event["target_value"] = "" + event["reference_value"] = "" + event["status"] = False + elif evaluation_step["match_function"] in ["element_value_exactly_match", "element_value_semantic_match"]: + event["selector"] = evaluation_step.get('path') + event["target_value"] = "" + event["reference_value"] = evaluation_step['reference_answer'] + event["status"] = False + self.task_events.append(event) + + def update_events(self, agent_event): + for ix, event in enumerate(agent_event): + if event and event['status']: + self.task_events[ix]["status"] = event['status'] + self.task_events[ix]["target_value"] = event['target_value'] diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/utils.py b/browsergym/webcanvas/src/browsergym/webcanvas/utils.py index 84be8692..e1cb7636 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/utils.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/utils.py @@ -116,3 +116,86 @@ def step_evaluate(page: Page, evaluate_steps=[], input_path=None, element_value= step_score += evaluate["score"] return evaluate_steps, match_result + + +def step_event_evaluate(page, evaluate_steps, task_events, target_value): + + def check_event_by_selector(events, selector): + for event in events: + if event and event["selector"] == selector: + if event["status"]: + return 1, event + return 0, None + + step_score = 0 + match_result = [] + for evaluate in evaluate_steps: + if evaluate["score"] != 1: + match_function = evaluate["match_function"] + if match_function == "url_exactly_match": + score = URLEvaluator.url_exact_match( + page.url, evaluate["reference_answer"], evaluate["key"]) + elif match_function == "url_included_match": + score = URLEvaluator.url_include_match( + page.url, evaluate["reference_answer"], evaluate["key"]) + elif match_function == "url_semantic_match": + score = URLEvaluator.url_semantic_match( + page.url, evaluate["reference_answer"], evaluate["key"]) + + elif match_function == "element_path_exactly_match": + score, event = check_event_by_selector( + task_events, evaluate["reference_answer"]) + + elif match_function == "element_path_included_match": + pass + + elif match_function == "element_value_exactly_match": + input_netloc = get_netloc(page.url) + if "path" in evaluate.keys(): + path_score, event = check_event_by_selector( + task_events, evaluate["path"]) + if path_score == 0: + score = 0 + else: + score = ElementEvaluator.element_value_exact_match( + event["target_value"], evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + print("score:",score) + else: + score = ElementEvaluator.element_value_exact_match( + target_value, evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + + elif match_function == "element_value_included_match": + input_netloc = get_netloc(page.url) + if "path" in evaluate.keys(): + path_score, event = check_event_by_selector( + task_events, evaluate["path"]) + if path_score == 0: + score = 0 + else: + score = ElementEvaluator.element_value_include_match( + event["target_value"], evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + else: + score = ElementEvaluator.element_value_include_match( + event["target_value"], evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + + elif match_function == "element_value_semantic_match": + input_netloc = get_netloc(page.url) + if "path" in evaluate.keys(): + path_score, event = check_event_by_selector( + task_events, evaluate["path"]) + if path_score == 0: + score = 0 + else: + score = ElementEvaluator.element_value_semantic_match( + event["target_value"], evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + else: + score = ElementEvaluator.element_value_semantic_match( + event["target_value"], evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + + evaluate["score"] = max(evaluate["score"], score) + if evaluate["score"] >= 1: + match_result.append( + {evaluate["match_function"]: evaluate["reference_answer"]}) + step_score += evaluate["score"] + + return evaluate_steps, match_result diff --git a/demo_agent/run_demo.py b/demo_agent/run_demo.py index 548c10fa..d59c90b3 100644 --- a/demo_agent/run_demo.py +++ b/demo_agent/run_demo.py @@ -29,7 +29,7 @@ def parse_args(): parser.add_argument( "--task_name", type=str, - default="webcanvas.0", + default="webcanvas.mind2web-live.1", help="Name of the Browsergym task to run. If 'openended', you need to specify a 'start_url'", ) parser.add_argument( diff --git a/test2.py b/test2.py new file mode 100644 index 00000000..34e0acf6 --- /dev/null +++ b/test2.py @@ -0,0 +1,81 @@ +from playwright.sync_api import sync_playwright, Playwright + +# Define a callback function to handle event triggers +def handle_event(selector, event_type, element_text=None): + print(f"Element with selector '{selector}' triggered '{event_type}' event, text content: {element_text}") + +def event_listener(page, selectors): + """ + Add a universal event listener to specified selectors to capture various event types + :param page: Current page object + :param selectors: List of selectors to listen to + """ + page.evaluate( + """ + ({selectors}) => { + selectors.forEach((selector) => { + const element = document.querySelector(selector); + if (element) { + const allEvents = [ + 'click', 'input', 'change', 'keydown', 'keyup', + 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' + ]; + allEvents.forEach((eventType) => { + element.addEventListener(eventType, (event) => { + const elementText = event.target.textContent || null; + window.handleEvent(selector, eventType, elementText); + }, true); // 'true' indicates capture phase + }); + } + }); + } + """, + {"selectors": selectors} + ) + return page + +def run(playwright: Playwright): + chromium = playwright.chromium + browser = chromium.launch(headless=False) # Set to True to enable headless mode + context = browser.new_context() + page = context.new_page() + + # Navigate to the target page + page.goto("https://store.steampowered.com/app/570/Dota_2/") + + # Wait for the page to fully load + page.wait_for_load_state('load') + + # Expose the Python function to the page context + context.expose_binding("handleEvent", lambda source, selector, event_type, element_text: handle_event(selector, event_type, element_text)) + + # Call the function to add a universal event listener to specified selectors + page = event_listener( + page, + ["#noteworthy_tab > span > a.pulldown_desktop", + "#store_nav_search_term"] # Add selectors you want to listen to + ) + + # Simulate clicking on an element + locator = page.locator("#noteworthy_tab > span > a.pulldown_desktop") + locator.click() + + page.wait_for_timeout(3000) + + # Call event_listener again to listen to new events + page = event_listener( + page, + ["#store_nav_search_term"] # Add selectors you want to listen to + ) + + # Simulate filling content in the input field + fill_locator = page.locator("#store_nav_search_term") + fill_locator.fill("game") + + page.wait_for_timeout(3000) + + # Close the browser + browser.close() + +with sync_playwright() as playwright: + run(playwright) diff --git a/test3.py b/test3.py new file mode 100644 index 00000000..537d10b6 --- /dev/null +++ b/test3.py @@ -0,0 +1,79 @@ +from playwright.sync_api import sync_playwright, Playwright + +# Define a callback function to handle event triggers + + +def handle_event(selector, event_type, element_text=None): + print(f"Element with selector '{selector}' triggered '{ + event_type}' event, text content: {element_text}") + + +def event_listener(page, selectors): + """ + Add a universal event listener to specified selectors to capture various event types + :param page: Current page object + :param selectors: List of selectors to listen to + """ + page.evaluate( + """ + ({selectors}) => { + selectors.forEach((selector) => { + const element = document.querySelector(selector); + if (element) { + const allEvents = [ + 'click', 'input', 'change', 'keydown', 'keyup', + 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' + ]; + allEvents.forEach((eventType) => { + element.addEventListener(eventType, (event) => { + const elementText = event.target.textContent || null; + window.handleEvent(selector, eventType, elementText); + }, true); // 'true' indicates capture phase + }); + } + }); + } + """, + {"selectors": selectors} + ) + return page + + +def run(playwright: Playwright): + chromium = playwright.chromium + # Set to True to enable headless mode + browser = chromium.launch(headless=False) + context = browser.new_context() + page = context.new_page() + + # Navigate to the target page + page.goto("https://www.amctheatres.com/food-and-drink/dine-in/explore-menu") + + # Wait for the page to fully load + page.wait_for_load_state('load') + + # Expose the Python function to the page context + context.expose_binding("handleEvent", lambda source, selector, event_type, + element_text: handle_event(selector, event_type, element_text)) + + # Call the function to add a universal event listener to specified selectors + page = event_listener( + page, + [ + "#main > div:nth-child(3) > section > div > p:nth-child(3) > a" + ] # Add selectors you want to listen to + ) + + # Simulate clicking on an element + locator = page.locator( + "#main > div:nth-child(3) > section > div > p:nth-child(3) > a") + locator.click() + + page.wait_for_timeout(3000) + + # Close the browser + browser.close() + + +with sync_playwright() as playwright: + run(playwright) diff --git a/test_expose_binding.py b/test_expose_binding.py new file mode 100644 index 00000000..e4b05930 --- /dev/null +++ b/test_expose_binding.py @@ -0,0 +1,48 @@ +from playwright.sync_api import sync_playwright, Playwright + +def func(a, b): + print(f"Action detected on element with ID: {b, a}") + +def run(playwright: Playwright): + webkit = playwright.webkit + browser = webkit.launch(headless=False) + context = browser.new_context() + + # 暴露多个函数 + a = "1" + b = 1 + context.expose_binding( + "handleAction", + lambda source, element_id: func(a, b) + ) + context.expose_binding( + "handleClick", + lambda source, element_id: print(f"Click detected on element with ID: {element_id}") + ) + + page = context.new_page() + + # 添加初始化脚本,监听 click 事件并调用 handleAction 和 handleClick 函数 + context.add_init_script( + r""" + window.addEventListener("click", (event) => { + // event.preventDefault(); // 阻止默认的点击行为 + window.handleAction(event.target.id); + window.handleClick(event.target.id); + // 重新触发点击事件 + // event.target.click(); + }, {capture: true}); + """ + ) + + page.goto("https://playwright.dev/python/docs/api/class-browsercontext#browser-context-expose-binding") + + # 定位页面中的元素并模拟点击 + locator = page.locator("#__docusaurus > nav > div.navbar__inner > div:nth-child(1) > a.navbar__brand") + locator.click() + + # 关闭浏览器 + browser.close() + +with sync_playwright() as playwright: + run(playwright) \ No newline at end of file From f6775e749e297736e5ebbce43e87eaed73f44175 Mon Sep 17 00:00:00 2001 From: zsd <909087485@qq.com> Date: Fri, 25 Oct 2024 15:30:54 +0000 Subject: [PATCH 05/15] version 1.1 --- browsergym/core/src/browsergym/core/env.py | 124 +++++++++++++----- .../src/browsergym/webcanvas/__init__.py | 2 +- .../src/browsergym/webcanvas/config.py | 3 +- ...mind2web-live-test_104tasks_20240528.json} | 0 ..._130.json => mind2web-live-train_130.json} | 2 +- .../src/browsergym/webcanvas/step_score.py | 16 +-- .../src/browsergym/webcanvas/task.py | 3 +- .../src/browsergym/webcanvas/utils.py | 21 ++- test3.py | 98 ++++++++------ 9 files changed, 174 insertions(+), 95 deletions(-) rename browsergym/webcanvas/src/browsergym/webcanvas/data/{mind2web-test_104tasks_20240528.json => mind2web-live-test_104tasks_20240528.json} (100%) rename browsergym/webcanvas/src/browsergym/webcanvas/data/{mind2web-train_130.json => mind2web-live-train_130.json} (99%) diff --git a/browsergym/core/src/browsergym/core/env.py b/browsergym/core/src/browsergym/core/env.py index 2e7bc389..a4807523 100644 --- a/browsergym/core/src/browsergym/core/env.py +++ b/browsergym/core/src/browsergym/core/env.py @@ -270,7 +270,7 @@ def override_property(task, env, property): self.context.expose_binding( "handleEvent", lambda source, - selector, event_type,element_text: self._handle_event(selector, event_type,element_text) + selector, event_type, element_text: self._handle_event(selector, event_type, element_text) ) self.context.add_init_script( @@ -401,11 +401,13 @@ def report_infeasible_instructions(reason: str): if hasattr(self.task, 'webcanvas'): logger.debug(f"Initiating webcanvas task validation") - # extract reward, done, user_message, info (task-specific) self.events = self.task.events - self._event_listener( - [event["selector"] for event in self.events if event and event["selector"] and event["status"] == False]) - + selectors = [event["selector"] + for event in self.events if event and event["selector"] and event["status"] == False] + element_value = [event["reference_value"] + for event in self.events if event and event["reference_value"] and event["status"] == False] + self._event_listener(selectors, element_value) + # reward, done, user_message, task_info = self.task.validate( # self.page, self.chat.messages, action) # logger.info(f"WebCanvas task validation result:\n{ @@ -459,7 +461,7 @@ def report_infeasible_instructions(reason: str): # extract reward, done, user_message, info (task-specific) reward, done, user_message, task_info = self._task_validate() logger.info(f"WebCanvas task validation result:\n{ - self.task.evaluate_result}") + self.task.evaluate_result}") info["task_info"] = task_info logger.debug(f"Task validation done") @@ -485,8 +487,8 @@ def _task_validate(self): prev_page_history = self.page_history.copy() # call validate reward, done, user_message, info = self.task.validate( - self.page, self.chat.messages,self.last_action) - # info["webcanvas_result"] = self.task.evaluate_result + self.page, self.chat.messages, self.last_action) + # info["webcanvas_result"] = self.task.evaluate_result # safety fix, in case validate() did mess up the active page and/or page history if prev_active_page != self.page or prev_page_history != self.page_history: logger.debug( @@ -617,34 +619,87 @@ def _get_obs(self): return obs - def _event_listener(self, selectors): + # def _event_listener(self, selectors): + # """ + # Add a universal event listener to specified selectors to capture various event types + # :param page: Current page object + # :param selectors: List of selectors to listen to + # """ + # self.page.evaluate( + # """ + # ({selectors}) => { + # selectors.forEach((selector) => { + # const element = document.querySelector(selector); + # if (element) { + # const allEvents = [ + # 'click', 'input', 'change', 'keydown', 'keyup', + # 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' + # ]; + # allEvents.forEach((eventType) => { + # element.addEventListener(eventType, (event) => { + # const elementText = event.target.textContent || null; + # window.handleEvent(selector, eventType, elementText); + # }, true); // 'true' indicates capture phase + # }); + # } + # }); + # } + # """, + # {"selectors": selectors} + # ) + + def _event_listener(self, selectors: list = [], target_values: list = []): """ - Add a universal event listener to specified selectors to capture various event types + Add event listeners to either specified selectors or globally across all elements. :param page: Current page object - :param selectors: List of selectors to listen to + :param selectors: Optional list of selectors to listen to. If None, will apply globally. + :param target_values: Optional list of text content values to filter events by, used for global listening. """ - self.page.evaluate( - """ - ({selectors}) => { - selectors.forEach((selector) => { - const element = document.querySelector(selector); - if (element) { - const allEvents = [ - 'click', 'input', 'change', 'keydown', 'keyup', - 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' - ]; - allEvents.forEach((eventType) => { - element.addEventListener(eventType, (event) => { - const elementText = event.target.textContent || null; - window.handleEvent(selector, eventType, elementText); - }, true); // 'true' indicates capture phase - }); - } - }); - } - """, - {"selectors": selectors} - ) + if selectors: + # Specific selectors case + self.page.evaluate( + """ + ({selectors}) => { + selectors.forEach((selector) => { + const element = document.querySelector(selector); + if (element) { + const allEvents = [ + 'click', 'input', 'change', 'keydown', 'keyup', + 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' + ]; + allEvents.forEach((eventType) => { + element.addEventListener(eventType, (event) => { + const elementText = event.target.textContent || ''; + window.handleEvent(selector, eventType, elementText); + }, true); // 'true' indicates capture phase + }); + } + }); + } + """, + {"selectors": selectors} + ) + elif target_values: + + self.page.evaluate( + """ + (targetValues) => { + const allEvents = [ + 'click', 'input', 'change', 'keydown', 'keyup', + 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' + ]; + allEvents.forEach((eventType) => { + document.addEventListener(eventType, (event) => { + const elementText = event.target.textContent || ''; + if (targetValues.includes(elementText)) { + window.handleEvent(null, eventType, elementText); // No selector in this case + } + }, true); // 'true' indicates capture phase + }); + } + """, + target_values + ) def _handle_event(self, selector, event_type, element_text=None): logger.debug(f"Element with selector '{selector}' triggered '{ @@ -653,4 +708,7 @@ def _handle_event(self, selector, event_type, element_text=None): if event and event["selector"] == selector: self.events[idx]["status"] = True self.events[idx]["target_value"] = element_text if element_text else "" + elif event and event["reference_value"] == element_text: + self.events[idx]["status"] = True + self.events[idx]["target_value"] = element_text if element_text else "" self.task.update_events(self.events) diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/__init__.py b/browsergym/webcanvas/src/browsergym/webcanvas/__init__.py index b36ade28..04a41033 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/__init__.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/__init__.py @@ -5,7 +5,7 @@ ALL_WEBCANVAS_TASK_IDS = [] -for task_id in config.TASK_IDS: +for task_id in config.TASK_TRAIN_IDS: gym_id = f"webcanvas.mind2web-live.{task_id}" register_task( gym_id, diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/config.py b/browsergym/webcanvas/src/browsergym/webcanvas/config.py index 0f4bba7e..6d7926d1 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/config.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/config.py @@ -1 +1,2 @@ -TASK_IDS = range(104) +TASK_TEST_IDS = range(104) +TASK_TRAIN_IDS = range(130) diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-test_104tasks_20240528.json b/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-live-test_104tasks_20240528.json similarity index 100% rename from browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-test_104tasks_20240528.json rename to browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-live-test_104tasks_20240528.json diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-train_130.json b/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-live-train_130.json similarity index 99% rename from browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-train_130.json rename to browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-live-train_130.json index afa59f81..c279071e 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-train_130.json +++ b/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-live-train_130.json @@ -63,7 +63,7 @@ { "match_function_name": "element_value_exactly_match", "content": { - "path": "#main > div:nth-child(3) > section > div > p:nth-child(3) > a", + "reference_answer": "View Full Menu", "netloc": "amctheatres", "url": "https://www.amctheatres.com/food-and-drink/dine-in/explore-menu" diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py b/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py index d9c74eaf..d807a247 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py @@ -181,20 +181,20 @@ def element_value_exact_match(input_answer, reference_answer, input_netloc, refe @ staticmethod def element_value_include_match(input_answer, reference_answer, input_netloc, reference_netloc): - if reference_netloc != input_netloc: - # print("reference_netloc:", reference_netloc, - # "input_netloc:", input_netloc) - return 0 + # if reference_netloc != input_netloc: + # # print("reference_netloc:", reference_netloc, + # # "input_netloc:", input_netloc) + # return 0 result_score = MatchFunction.include_match( input_answer, reference_answer) return result_score @ staticmethod def element_value_semantic_match(input_answer, semantic_method, input_netloc, reference_netloc=0): - if reference_netloc != input_netloc: - # print("reference_netloc:", reference_netloc, - # "input_netloc:", input_netloc) - return 0 + # if reference_netloc != input_netloc: + # # print("reference_netloc:", reference_netloc, + # # "input_netloc:", input_netloc) + # return 0 if len(input_answer) == 0: return 0 result_score = MatchFunction.semantic_match( diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/task.py b/browsergym/webcanvas/src/browsergym/webcanvas/task.py index c8c21a2e..b37b03ea 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/task.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/task.py @@ -46,7 +46,7 @@ def __init__( # read the list of all WebCanvas task configs import browsergym.webcanvas as wcs all_configs_str = importlib.resources.files(wcs).joinpath( - "data/mind2web-train_130.json").read_text() + "data/mind2web-live-train_130.json").read_text() all_task_configs = json.loads(all_configs_str) all_task = WebCanvasInstance.read_task_configs(all_task_configs) if task_id is not None and task_id < len(all_task): @@ -150,7 +150,6 @@ def validate( if self.task_finished: done = True break - print(self.task_events) self.trace_info.append(step_action_info) return reward, done, msg, info diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/utils.py b/browsergym/webcanvas/src/browsergym/webcanvas/utils.py index e1cb7636..f8f5a3ff 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/utils.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/utils.py @@ -127,6 +127,13 @@ def check_event_by_selector(events, selector): return 1, event return 0, None + def check_event_by_element_value(events, element_value): + for event in events: + if event and event["target_value"] == element_value: + if event["status"]: + return 1, event + return 0, None + step_score = 0 match_result = [] for evaluate in evaluate_steps: @@ -159,10 +166,10 @@ def check_event_by_selector(events, selector): else: score = ElementEvaluator.element_value_exact_match( event["target_value"], evaluate["reference_answer"], input_netloc, evaluate["netloc"]) - print("score:",score) + # print("score:",score) else: - score = ElementEvaluator.element_value_exact_match( - target_value, evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + score, _ = check_event_by_element_value( + task_events, evaluate["reference_answer"]) elif match_function == "element_value_included_match": input_netloc = get_netloc(page.url) @@ -175,8 +182,8 @@ def check_event_by_selector(events, selector): score = ElementEvaluator.element_value_include_match( event["target_value"], evaluate["reference_answer"], input_netloc, evaluate["netloc"]) else: - score = ElementEvaluator.element_value_include_match( - event["target_value"], evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + score, _ = check_event_by_element_value( + task_events, evaluate["reference_answer"]) elif match_function == "element_value_semantic_match": input_netloc = get_netloc(page.url) @@ -189,8 +196,8 @@ def check_event_by_selector(events, selector): score = ElementEvaluator.element_value_semantic_match( event["target_value"], evaluate["reference_answer"], input_netloc, evaluate["netloc"]) else: - score = ElementEvaluator.element_value_semantic_match( - event["target_value"], evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + score, _ = check_event_by_element_value( + task_events, evaluate["reference_answer"]) evaluate["score"] = max(evaluate["score"], score) if evaluate["score"] >= 1: diff --git a/test3.py b/test3.py index 537d10b6..9b94cd20 100644 --- a/test3.py +++ b/test3.py @@ -1,47 +1,65 @@ from playwright.sync_api import sync_playwright, Playwright # Define a callback function to handle event triggers - - def handle_event(selector, event_type, element_text=None): - print(f"Element with selector '{selector}' triggered '{ - event_type}' event, text content: {element_text}") - + print(f"Element with selector '{selector}' triggered '{event_type}' event, text content: {element_text}") -def event_listener(page, selectors): +def add_event_listener(page, selectors=None, target_values=None): """ - Add a universal event listener to specified selectors to capture various event types + Add event listeners to either specified selectors or globally across all elements. :param page: Current page object - :param selectors: List of selectors to listen to + :param selectors: Optional list of selectors to listen to. If None, will apply globally. + :param target_values: Optional list of text content values to filter events by, used for global listening. """ - page.evaluate( - """ - ({selectors}) => { - selectors.forEach((selector) => { - const element = document.querySelector(selector); - if (element) { - const allEvents = [ - 'click', 'input', 'change', 'keydown', 'keyup', - 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' - ]; - allEvents.forEach((eventType) => { - element.addEventListener(eventType, (event) => { - const elementText = event.target.textContent || null; - window.handleEvent(selector, eventType, elementText); - }, true); // 'true' indicates capture phase - }); - } - }); - } - """, - {"selectors": selectors} - ) + if selectors: + # Specific selectors case + page.evaluate( + """ + ({selectors}) => { + selectors.forEach((selector) => { + const element = document.querySelector(selector); + if (element) { + const allEvents = [ + 'click', 'input', 'change', 'keydown', 'keyup', + 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' + ]; + allEvents.forEach((eventType) => { + element.addEventListener(eventType, (event) => { + const elementText = event.target.textContent || ''; + window.handleEvent(selector, eventType, elementText); + }, true); // 'true' indicates capture phase + }); + } + }); + } + """, + {"selectors": selectors} + ) + elif target_values: + # Global listener with multiple target_values filtering + page.evaluate( + """ + (targetValues) => { + const allEvents = [ + 'click', 'input', 'change', 'keydown', 'keyup', + 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' + ]; + allEvents.forEach((eventType) => { + document.addEventListener(eventType, (event) => { + const elementText = event.target.textContent || ''; + if (targetValues.includes(elementText)) { + window.handleEvent(null, eventType, elementText); // No selector in this case + } + }, true); // 'true' indicates capture phase + }); + } + """, + target_values + ) return page - def run(playwright: Playwright): chromium = playwright.chromium - # Set to True to enable headless mode browser = chromium.launch(headless=False) context = browser.new_context() page = context.new_page() @@ -53,20 +71,17 @@ def run(playwright: Playwright): page.wait_for_load_state('load') # Expose the Python function to the page context - context.expose_binding("handleEvent", lambda source, selector, event_type, - element_text: handle_event(selector, event_type, element_text)) + context.expose_binding("handleEvent", lambda source, selector, event_type, element_text: handle_event(selector, event_type, element_text)) - # Call the function to add a universal event listener to specified selectors - page = event_listener( + # Add event listeners with specific selectors and multiple target values + add_event_listener( page, - [ - "#main > div:nth-child(3) > section > div > p:nth-child(3) > a" - ] # Add selectors you want to listen to + selectors=[], # Add specific selectors + target_values=["View Full Menu", "Movies"] # List of target values for global monitoring ) # Simulate clicking on an element - locator = page.locator( - "#main > div:nth-child(3) > section > div > p:nth-child(3) > a") + locator = page.locator("#main > div:nth-child(3) > section > div > p:nth-child(3) > a") locator.click() page.wait_for_timeout(3000) @@ -74,6 +89,5 @@ def run(playwright: Playwright): # Close the browser browser.close() - with sync_playwright() as playwright: run(playwright) From 18103e4483f59f326cc9e2e66efdea66a8feecf2 Mon Sep 17 00:00:00 2001 From: zsd <909087485@qq.com> Date: Sat, 26 Oct 2024 04:59:49 +0000 Subject: [PATCH 06/15] delete test file --- .vscode/launch.json | 15 ------- test2.py | 81 ------------------------------------ test3.py | 93 ------------------------------------------ test_expose_binding.py | 48 ---------------------- 4 files changed, 237 deletions(-) delete mode 100644 .vscode/launch.json delete mode 100644 test2.py delete mode 100644 test3.py delete mode 100644 test_expose_binding.py diff --git a/.vscode/launch.json b/.vscode/launch.json deleted file mode 100644 index 61def22f..00000000 --- a/.vscode/launch.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "name": "Python Debugger: demo_agent/run_demo.py", - "type": "debugpy", - "request": "launch", - "program": "${file}", - "console": "integratedTerminal" - } - ] -} \ No newline at end of file diff --git a/test2.py b/test2.py deleted file mode 100644 index 34e0acf6..00000000 --- a/test2.py +++ /dev/null @@ -1,81 +0,0 @@ -from playwright.sync_api import sync_playwright, Playwright - -# Define a callback function to handle event triggers -def handle_event(selector, event_type, element_text=None): - print(f"Element with selector '{selector}' triggered '{event_type}' event, text content: {element_text}") - -def event_listener(page, selectors): - """ - Add a universal event listener to specified selectors to capture various event types - :param page: Current page object - :param selectors: List of selectors to listen to - """ - page.evaluate( - """ - ({selectors}) => { - selectors.forEach((selector) => { - const element = document.querySelector(selector); - if (element) { - const allEvents = [ - 'click', 'input', 'change', 'keydown', 'keyup', - 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' - ]; - allEvents.forEach((eventType) => { - element.addEventListener(eventType, (event) => { - const elementText = event.target.textContent || null; - window.handleEvent(selector, eventType, elementText); - }, true); // 'true' indicates capture phase - }); - } - }); - } - """, - {"selectors": selectors} - ) - return page - -def run(playwright: Playwright): - chromium = playwright.chromium - browser = chromium.launch(headless=False) # Set to True to enable headless mode - context = browser.new_context() - page = context.new_page() - - # Navigate to the target page - page.goto("https://store.steampowered.com/app/570/Dota_2/") - - # Wait for the page to fully load - page.wait_for_load_state('load') - - # Expose the Python function to the page context - context.expose_binding("handleEvent", lambda source, selector, event_type, element_text: handle_event(selector, event_type, element_text)) - - # Call the function to add a universal event listener to specified selectors - page = event_listener( - page, - ["#noteworthy_tab > span > a.pulldown_desktop", - "#store_nav_search_term"] # Add selectors you want to listen to - ) - - # Simulate clicking on an element - locator = page.locator("#noteworthy_tab > span > a.pulldown_desktop") - locator.click() - - page.wait_for_timeout(3000) - - # Call event_listener again to listen to new events - page = event_listener( - page, - ["#store_nav_search_term"] # Add selectors you want to listen to - ) - - # Simulate filling content in the input field - fill_locator = page.locator("#store_nav_search_term") - fill_locator.fill("game") - - page.wait_for_timeout(3000) - - # Close the browser - browser.close() - -with sync_playwright() as playwright: - run(playwright) diff --git a/test3.py b/test3.py deleted file mode 100644 index 9b94cd20..00000000 --- a/test3.py +++ /dev/null @@ -1,93 +0,0 @@ -from playwright.sync_api import sync_playwright, Playwright - -# Define a callback function to handle event triggers -def handle_event(selector, event_type, element_text=None): - print(f"Element with selector '{selector}' triggered '{event_type}' event, text content: {element_text}") - -def add_event_listener(page, selectors=None, target_values=None): - """ - Add event listeners to either specified selectors or globally across all elements. - :param page: Current page object - :param selectors: Optional list of selectors to listen to. If None, will apply globally. - :param target_values: Optional list of text content values to filter events by, used for global listening. - """ - if selectors: - # Specific selectors case - page.evaluate( - """ - ({selectors}) => { - selectors.forEach((selector) => { - const element = document.querySelector(selector); - if (element) { - const allEvents = [ - 'click', 'input', 'change', 'keydown', 'keyup', - 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' - ]; - allEvents.forEach((eventType) => { - element.addEventListener(eventType, (event) => { - const elementText = event.target.textContent || ''; - window.handleEvent(selector, eventType, elementText); - }, true); // 'true' indicates capture phase - }); - } - }); - } - """, - {"selectors": selectors} - ) - elif target_values: - # Global listener with multiple target_values filtering - page.evaluate( - """ - (targetValues) => { - const allEvents = [ - 'click', 'input', 'change', 'keydown', 'keyup', - 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' - ]; - allEvents.forEach((eventType) => { - document.addEventListener(eventType, (event) => { - const elementText = event.target.textContent || ''; - if (targetValues.includes(elementText)) { - window.handleEvent(null, eventType, elementText); // No selector in this case - } - }, true); // 'true' indicates capture phase - }); - } - """, - target_values - ) - return page - -def run(playwright: Playwright): - chromium = playwright.chromium - browser = chromium.launch(headless=False) - context = browser.new_context() - page = context.new_page() - - # Navigate to the target page - page.goto("https://www.amctheatres.com/food-and-drink/dine-in/explore-menu") - - # Wait for the page to fully load - page.wait_for_load_state('load') - - # Expose the Python function to the page context - context.expose_binding("handleEvent", lambda source, selector, event_type, element_text: handle_event(selector, event_type, element_text)) - - # Add event listeners with specific selectors and multiple target values - add_event_listener( - page, - selectors=[], # Add specific selectors - target_values=["View Full Menu", "Movies"] # List of target values for global monitoring - ) - - # Simulate clicking on an element - locator = page.locator("#main > div:nth-child(3) > section > div > p:nth-child(3) > a") - locator.click() - - page.wait_for_timeout(3000) - - # Close the browser - browser.close() - -with sync_playwright() as playwright: - run(playwright) diff --git a/test_expose_binding.py b/test_expose_binding.py deleted file mode 100644 index e4b05930..00000000 --- a/test_expose_binding.py +++ /dev/null @@ -1,48 +0,0 @@ -from playwright.sync_api import sync_playwright, Playwright - -def func(a, b): - print(f"Action detected on element with ID: {b, a}") - -def run(playwright: Playwright): - webkit = playwright.webkit - browser = webkit.launch(headless=False) - context = browser.new_context() - - # 暴露多个函数 - a = "1" - b = 1 - context.expose_binding( - "handleAction", - lambda source, element_id: func(a, b) - ) - context.expose_binding( - "handleClick", - lambda source, element_id: print(f"Click detected on element with ID: {element_id}") - ) - - page = context.new_page() - - # 添加初始化脚本,监听 click 事件并调用 handleAction 和 handleClick 函数 - context.add_init_script( - r""" - window.addEventListener("click", (event) => { - // event.preventDefault(); // 阻止默认的点击行为 - window.handleAction(event.target.id); - window.handleClick(event.target.id); - // 重新触发点击事件 - // event.target.click(); - }, {capture: true}); - """ - ) - - page.goto("https://playwright.dev/python/docs/api/class-browsercontext#browser-context-expose-binding") - - # 定位页面中的元素并模拟点击 - locator = page.locator("#__docusaurus > nav > div.navbar__inner > div:nth-child(1) > a.navbar__brand") - locator.click() - - # 关闭浏览器 - browser.close() - -with sync_playwright() as playwright: - run(playwright) \ No newline at end of file From 284bc1a6ede55717bdcb260fb0f19c12fde6ecc7 Mon Sep 17 00:00:00 2001 From: zsd <909087485@qq.com> Date: Sat, 26 Oct 2024 05:06:55 +0000 Subject: [PATCH 07/15] version 1.2 --- .gitignore | 6 +- browsergym/core/src/browsergym/core/env.py | 46 ++------------- .../src/browsergym/webcanvas/task.py | 59 ++++++++----------- 3 files changed, 32 insertions(+), 79 deletions(-) diff --git a/.gitignore b/.gitignore index 2e8a47c8..5faa6845 100644 --- a/.gitignore +++ b/.gitignore @@ -152,6 +152,6 @@ tests/assistantbench/assistantbench-predictions-test.jsonl bg_wl_data/ # webcanvas -test_gpt.py -temp_test.py -results/ \ No newline at end of file +.vscode/* +results/ +tests/webcanvas/* \ No newline at end of file diff --git a/browsergym/core/src/browsergym/core/env.py b/browsergym/core/src/browsergym/core/env.py index a4807523..2f5bc6a6 100644 --- a/browsergym/core/src/browsergym/core/env.py +++ b/browsergym/core/src/browsergym/core/env.py @@ -400,7 +400,7 @@ def report_infeasible_instructions(reason: str): self.infeasible_message_received = True if hasattr(self.task, 'webcanvas'): - logger.debug(f"Initiating webcanvas task validation") + logger.debug(f"Initiating webcanvas task event listen") self.events = self.task.events selectors = [event["selector"] for event in self.events if event and event["selector"] and event["status"] == False] @@ -408,13 +408,6 @@ def report_infeasible_instructions(reason: str): for event in self.events if event and event["reference_value"] and event["status"] == False] self._event_listener(selectors, element_value) - # reward, done, user_message, task_info = self.task.validate( - # self.page, self.chat.messages, action) - # logger.info(f"WebCanvas task validation result:\n{ - # self.task.evaluate_result}") - # info["task_info"] = task_info - # info["webcanvas_result"] = self.task.evaluate_result - # try to execute the action logger.debug(f"Executing action") try: @@ -456,12 +449,10 @@ def report_infeasible_instructions(reason: str): self._wait_for_user_message() logger.debug(f"User message done") - # if not hasattr(self.task, 'webcanvas'): logger.debug(f"Initiating task validation") # extract reward, done, user_message, info (task-specific) reward, done, user_message, task_info = self._task_validate() - logger.info(f"WebCanvas task validation result:\n{ - self.task.evaluate_result}") + logger.info(f"Task validation info:\n{task_info}") info["task_info"] = task_info logger.debug(f"Task validation done") @@ -619,35 +610,6 @@ def _get_obs(self): return obs - # def _event_listener(self, selectors): - # """ - # Add a universal event listener to specified selectors to capture various event types - # :param page: Current page object - # :param selectors: List of selectors to listen to - # """ - # self.page.evaluate( - # """ - # ({selectors}) => { - # selectors.forEach((selector) => { - # const element = document.querySelector(selector); - # if (element) { - # const allEvents = [ - # 'click', 'input', 'change', 'keydown', 'keyup', - # 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' - # ]; - # allEvents.forEach((eventType) => { - # element.addEventListener(eventType, (event) => { - # const elementText = event.target.textContent || null; - # window.handleEvent(selector, eventType, elementText); - # }, true); // 'true' indicates capture phase - # }); - # } - # }); - # } - # """, - # {"selectors": selectors} - # ) - def _event_listener(self, selectors: list = [], target_values: list = []): """ Add event listeners to either specified selectors or globally across all elements. @@ -679,8 +641,7 @@ def _event_listener(self, selectors: list = [], target_values: list = []): """, {"selectors": selectors} ) - elif target_values: - + if target_values: self.page.evaluate( """ (targetValues) => { @@ -712,3 +673,4 @@ def _handle_event(self, selector, event_type, element_text=None): self.events[idx]["status"] = True self.events[idx]["target_value"] = element_text if element_text else "" self.task.update_events(self.events) + diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/task.py b/browsergym/webcanvas/src/browsergym/webcanvas/task.py index b37b03ea..faeb1737 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/task.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/task.py @@ -4,6 +4,7 @@ import playwright.sync_api import importlib.resources import tempfile +from pathlib import Path from typing import Optional, Tuple @@ -37,6 +38,7 @@ def __init__( self.match_result: str = None self.task_finish: bool = False self.activate_element = None + self.task_id = task_id if task_id is None: raise ValueError( @@ -68,7 +70,7 @@ def get_task_id(cls): raise NotImplementedError def setup(self, page: playwright.sync_api.Page, start_url: str = None) -> tuple[str, dict]: - self.goal, _, _, reference_evaluate_steps = self.task_configs + self.goal, self.task_uuid, self.reference_task_length, reference_evaluate_steps = self.task_configs self.evaluaion_step = reference_evaluate_steps self.reference_evaluate_steps = reference_evaluate_steps start_url = start_url if start_url else self.start_url @@ -95,43 +97,18 @@ def validate( ) -> Tuple[float, bool, str, dict]: reward, done, msg, info = 0, False, "", {} - for message in chat_messages: - if message["role"] == "user" and message["message"] == "exit": - done = True - break - self.time_step += 1 step_action_info = {} step_action_info["time_step"] = self.time_step + step_action_info["action_str"] = action step_action_info["evaluation"] = [] + + for message in chat_messages: + if message["role"] == "user" and message["message"] == "exit": + done = True + break actions = WebCanvasInstance.parse_bid_from_action(action) - # if len(actions) > 0: - # for action_type, bid, target_value in actions: - # locator = None - # # if bid != '': - # # try: - # # locator = self.get_element_by_bid(page, bid) - # # except Exception as e: - # # logger.warning(f"warning:{e}") - # # locator = None - # self.evaluaion_step, self.step_score_rate, self.match_result, self.task_finished = WebCanvasInstance.evaluate( - # page, locator, target_value, self.evaluaion_step, self.reference_evaluate_steps) - - # step_action_info["evaluation"].append( - # { - # "action_type": action_type, - # "bid": bid, - # "target_value": target_value, - # "step_score_rate": self.step_score_rate, - # "match_result": self.match_result, - # "task_status": self.task_finished - # } - # ) - - # if self.task_finished: - # done = True - # break if len(actions) > 0: for action_type, bid, target_value in actions: self.evaluaion_step, self.step_score_rate, self.match_result, self.task_finished = WebCanvasInstance.evaluate_events( @@ -144,13 +121,16 @@ def validate( "target_value": target_value, "step_score_rate": self.step_score_rate, "match_result": self.match_result, - "task_status": self.task_finished + "task_status": self.task_finished, } ) - if self.task_finished: + if self.task_finished or self.time_step >= int(self.reference_task_length * 1.5): done = True break self.trace_info.append(step_action_info) + info = self.trace_info[-1] + if done: + self._save_result() return reward, done, msg, info # https://github.com/ServiceNow/BrowserGym/blob/main/browsergym/core/src/browsergym/core/action/utils.py @@ -220,3 +200,14 @@ def update_events(self, agent_event): if event and event['status']: self.task_events[ix]["status"] = event['status'] self.task_events[ix]["target_value"] = event['target_value'] + + def _save_result(self,): + import browsergym.webcanvas as wcs + from datetime import datetime + base_path = importlib.resources.files(wcs).joinpath("results") + today = datetime.today().strftime('%Y-%m-%d') + file_path = Path(base_path) / today / \ + f"{self.task_id}--{self.task_uuid}.json" + file_path.parent.mkdir(parents=True, exist_ok=True) + with open(file_path, 'w') as json_file: + json.dump(self.trace_info, json_file, indent=4) From 424880389aaf8ae8cdd2aec9af8ab0a1764d1ed0 Mon Sep 17 00:00:00 2001 From: han032206 Date: Thu, 21 Nov 2024 22:44:59 +0800 Subject: [PATCH 08/15] re-frame event listener used by webcanvas --- demo_agent/run_demo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/demo_agent/run_demo.py b/demo_agent/run_demo.py index d59c90b3..1485823d 100644 --- a/demo_agent/run_demo.py +++ b/demo_agent/run_demo.py @@ -1,4 +1,5 @@ import argparse +import gymnasium as gym # locally defined agent from agent import DemoAgentArgs @@ -29,7 +30,7 @@ def parse_args(): parser.add_argument( "--task_name", type=str, - default="webcanvas.mind2web-live.1", + default="browsergym/webcanvas.mind2web-live.1", help="Name of the Browsergym task to run. If 'openended', you need to specify a 'start_url'", ) parser.add_argument( From a6db8bfa337603c1e44161e312080c3e21773551 Mon Sep 17 00:00:00 2001 From: han032206 Date: Tue, 26 Nov 2024 16:33:09 +0800 Subject: [PATCH 09/15] Revert "version 1.2" This reverts commit 915d58036de2a05e01e0d53852de2767fa14dd67. --- .gitignore | 6 +- browsergym/core/src/browsergym/core/env.py | 46 +++++++++++++-- .../src/browsergym/webcanvas/task.py | 59 +++++++++++-------- 3 files changed, 79 insertions(+), 32 deletions(-) diff --git a/.gitignore b/.gitignore index 5faa6845..2e8a47c8 100644 --- a/.gitignore +++ b/.gitignore @@ -152,6 +152,6 @@ tests/assistantbench/assistantbench-predictions-test.jsonl bg_wl_data/ # webcanvas -.vscode/* -results/ -tests/webcanvas/* \ No newline at end of file +test_gpt.py +temp_test.py +results/ \ No newline at end of file diff --git a/browsergym/core/src/browsergym/core/env.py b/browsergym/core/src/browsergym/core/env.py index 2f5bc6a6..a4807523 100644 --- a/browsergym/core/src/browsergym/core/env.py +++ b/browsergym/core/src/browsergym/core/env.py @@ -400,7 +400,7 @@ def report_infeasible_instructions(reason: str): self.infeasible_message_received = True if hasattr(self.task, 'webcanvas'): - logger.debug(f"Initiating webcanvas task event listen") + logger.debug(f"Initiating webcanvas task validation") self.events = self.task.events selectors = [event["selector"] for event in self.events if event and event["selector"] and event["status"] == False] @@ -408,6 +408,13 @@ def report_infeasible_instructions(reason: str): for event in self.events if event and event["reference_value"] and event["status"] == False] self._event_listener(selectors, element_value) + # reward, done, user_message, task_info = self.task.validate( + # self.page, self.chat.messages, action) + # logger.info(f"WebCanvas task validation result:\n{ + # self.task.evaluate_result}") + # info["task_info"] = task_info + # info["webcanvas_result"] = self.task.evaluate_result + # try to execute the action logger.debug(f"Executing action") try: @@ -449,10 +456,12 @@ def report_infeasible_instructions(reason: str): self._wait_for_user_message() logger.debug(f"User message done") + # if not hasattr(self.task, 'webcanvas'): logger.debug(f"Initiating task validation") # extract reward, done, user_message, info (task-specific) reward, done, user_message, task_info = self._task_validate() - logger.info(f"Task validation info:\n{task_info}") + logger.info(f"WebCanvas task validation result:\n{ + self.task.evaluate_result}") info["task_info"] = task_info logger.debug(f"Task validation done") @@ -610,6 +619,35 @@ def _get_obs(self): return obs + # def _event_listener(self, selectors): + # """ + # Add a universal event listener to specified selectors to capture various event types + # :param page: Current page object + # :param selectors: List of selectors to listen to + # """ + # self.page.evaluate( + # """ + # ({selectors}) => { + # selectors.forEach((selector) => { + # const element = document.querySelector(selector); + # if (element) { + # const allEvents = [ + # 'click', 'input', 'change', 'keydown', 'keyup', + # 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' + # ]; + # allEvents.forEach((eventType) => { + # element.addEventListener(eventType, (event) => { + # const elementText = event.target.textContent || null; + # window.handleEvent(selector, eventType, elementText); + # }, true); // 'true' indicates capture phase + # }); + # } + # }); + # } + # """, + # {"selectors": selectors} + # ) + def _event_listener(self, selectors: list = [], target_values: list = []): """ Add event listeners to either specified selectors or globally across all elements. @@ -641,7 +679,8 @@ def _event_listener(self, selectors: list = [], target_values: list = []): """, {"selectors": selectors} ) - if target_values: + elif target_values: + self.page.evaluate( """ (targetValues) => { @@ -673,4 +712,3 @@ def _handle_event(self, selector, event_type, element_text=None): self.events[idx]["status"] = True self.events[idx]["target_value"] = element_text if element_text else "" self.task.update_events(self.events) - diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/task.py b/browsergym/webcanvas/src/browsergym/webcanvas/task.py index faeb1737..b37b03ea 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/task.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/task.py @@ -4,7 +4,6 @@ import playwright.sync_api import importlib.resources import tempfile -from pathlib import Path from typing import Optional, Tuple @@ -38,7 +37,6 @@ def __init__( self.match_result: str = None self.task_finish: bool = False self.activate_element = None - self.task_id = task_id if task_id is None: raise ValueError( @@ -70,7 +68,7 @@ def get_task_id(cls): raise NotImplementedError def setup(self, page: playwright.sync_api.Page, start_url: str = None) -> tuple[str, dict]: - self.goal, self.task_uuid, self.reference_task_length, reference_evaluate_steps = self.task_configs + self.goal, _, _, reference_evaluate_steps = self.task_configs self.evaluaion_step = reference_evaluate_steps self.reference_evaluate_steps = reference_evaluate_steps start_url = start_url if start_url else self.start_url @@ -97,18 +95,43 @@ def validate( ) -> Tuple[float, bool, str, dict]: reward, done, msg, info = 0, False, "", {} - self.time_step += 1 - step_action_info = {} - step_action_info["time_step"] = self.time_step - step_action_info["action_str"] = action - step_action_info["evaluation"] = [] - for message in chat_messages: if message["role"] == "user" and message["message"] == "exit": done = True break + self.time_step += 1 + step_action_info = {} + step_action_info["time_step"] = self.time_step + step_action_info["evaluation"] = [] + actions = WebCanvasInstance.parse_bid_from_action(action) + # if len(actions) > 0: + # for action_type, bid, target_value in actions: + # locator = None + # # if bid != '': + # # try: + # # locator = self.get_element_by_bid(page, bid) + # # except Exception as e: + # # logger.warning(f"warning:{e}") + # # locator = None + # self.evaluaion_step, self.step_score_rate, self.match_result, self.task_finished = WebCanvasInstance.evaluate( + # page, locator, target_value, self.evaluaion_step, self.reference_evaluate_steps) + + # step_action_info["evaluation"].append( + # { + # "action_type": action_type, + # "bid": bid, + # "target_value": target_value, + # "step_score_rate": self.step_score_rate, + # "match_result": self.match_result, + # "task_status": self.task_finished + # } + # ) + + # if self.task_finished: + # done = True + # break if len(actions) > 0: for action_type, bid, target_value in actions: self.evaluaion_step, self.step_score_rate, self.match_result, self.task_finished = WebCanvasInstance.evaluate_events( @@ -121,16 +144,13 @@ def validate( "target_value": target_value, "step_score_rate": self.step_score_rate, "match_result": self.match_result, - "task_status": self.task_finished, + "task_status": self.task_finished } ) - if self.task_finished or self.time_step >= int(self.reference_task_length * 1.5): + if self.task_finished: done = True break self.trace_info.append(step_action_info) - info = self.trace_info[-1] - if done: - self._save_result() return reward, done, msg, info # https://github.com/ServiceNow/BrowserGym/blob/main/browsergym/core/src/browsergym/core/action/utils.py @@ -200,14 +220,3 @@ def update_events(self, agent_event): if event and event['status']: self.task_events[ix]["status"] = event['status'] self.task_events[ix]["target_value"] = event['target_value'] - - def _save_result(self,): - import browsergym.webcanvas as wcs - from datetime import datetime - base_path = importlib.resources.files(wcs).joinpath("results") - today = datetime.today().strftime('%Y-%m-%d') - file_path = Path(base_path) / today / \ - f"{self.task_id}--{self.task_uuid}.json" - file_path.parent.mkdir(parents=True, exist_ok=True) - with open(file_path, 'w') as json_file: - json.dump(self.trace_info, json_file, indent=4) From 02919c9adb8c25847f75a8cefa5f39e98419d68d Mon Sep 17 00:00:00 2001 From: han032206 Date: Tue, 26 Nov 2024 16:42:03 +0800 Subject: [PATCH 10/15] reframe --- .../src/browsergym/webcanvas/event_handler.py | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 browsergym/webcanvas/src/browsergym/webcanvas/event_handler.py diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/event_handler.py b/browsergym/webcanvas/src/browsergym/webcanvas/event_handler.py new file mode 100644 index 00000000..309f6a10 --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/event_handler.py @@ -0,0 +1,111 @@ +class EventHandler: + def __init__(self, page, events=None): + self.page = page + self.events = events or [] + self.update_callback = None + self._setup_event_listeners() + + def _setup_event_listeners(self): + """Set up event listeners for the page""" + # 首先暴露 handle_event 方法给浏览器 + self.page.expose_function("handleEvent", + lambda selector, event_type, element_info: self.handle_event(selector, event_type, element_info)) + + # 然后设置事件监听器 + self.page.evaluate(""" + () => { + const allEvents = [ + 'click', 'input', 'change', 'keydown', 'keyup', + 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' + ]; + + function getElementInfo(element) { + return { + selector: null, + textContent: element.textContent || '', + value: element.value || '', + type: element.type || '', + checked: element.checked, + selected: element.selected, + tagName: element.tagName.toLowerCase() + }; + } + + document.addEventListener('click', (event) => { + const elementInfo = getElementInfo(event.target); + window.handleEvent(null, 'click', JSON.stringify(elementInfo)); + }, true); + + document.addEventListener('input', (event) => { + const elementInfo = getElementInfo(event.target); + window.handleEvent(null, 'input', JSON.stringify(elementInfo)); + }, true); + + if (window.selectors && window.selectors.length) { + window.selectors.forEach((selector) => { + const element = document.querySelector(selector); + if (element) { + allEvents.forEach((eventType) => { + element.addEventListener(eventType, (event) => { + const elementInfo = getElementInfo(event.target); + elementInfo.selector = selector; + window.handleEvent(selector, eventType, JSON.stringify(elementInfo)); + }, true); + }); + } + }); + } + } + """) + + def set_update_callback(self, callback): + """Set callback function to be called when events are updated""" + self.update_callback = callback + + def handle_event(self, selector, event_type, element_info_str): + """Handle events from the page by collecting and maintaining event states + + Args: + selector: Element selector if any + event_type: Type of the event (click, input, etc) + element_info_str: JSON string containing element information + + Returns: + Updated events list or None if error occurs + """ + try: + import json + import logging + from .utils import get_netloc + + logger = logging.getLogger(__name__) + element_info = json.loads(element_info_str) + logger.debug(f"Element event: {element_info}") + + for idx, event in enumerate(self.events): + if not event: + continue + + # Update event state based on selector match + if event.get("selector") and event["selector"] == selector: + self.events[idx]["status"] = True + self.events[idx]["target_value"] = element_info["textContent"] + self.events[idx]["event_type"] = event_type + + # Update event state based on element value + elif event.get("reference_value"): + element_value = element_info.get("value", "") or element_info.get("textContent", "") + self.events[idx]["target_value"] = element_value + self.events[idx]["event_type"] = event_type + + # Notify callback if exists + if self.update_callback: + self.update_callback(self.events) + + return self.events + + except json.JSONDecodeError: + logger.error(f"Failed to parse element info: {element_info_str}") + except Exception as e: + logger.error(f"Error handling event: {str(e)}") + return None \ No newline at end of file From c972d1b141c90456244247360c06b0ee6324523b Mon Sep 17 00:00:00 2001 From: han032206 Date: Sat, 7 Dec 2024 21:45:48 +0800 Subject: [PATCH 11/15] update a universal event listener and update webcanvas evaluation into browsergym --- demo_agent/run_demo.py | 1 - 1 file changed, 1 deletion(-) diff --git a/demo_agent/run_demo.py b/demo_agent/run_demo.py index 1485823d..07aeac87 100644 --- a/demo_agent/run_demo.py +++ b/demo_agent/run_demo.py @@ -118,7 +118,6 @@ def main(): # loading and printing results exp_result = get_exp_result(exp_args.exp_dir) exp_record = exp_result.get_exp_record() - for key, val in exp_record.items(): print(f"{key}: {val}") From 8d077d9238f462c574bb3a406b0befd11ccc9305 Mon Sep 17 00:00:00 2001 From: han032206 Date: Sat, 7 Dec 2024 21:48:18 +0800 Subject: [PATCH 12/15] update a universal event listener and update webcanvas evaluation into browsergym --- browsergym/core/src/browsergym/core/env.py | 199 +++-- ....json => mind2web-live_test_20241024.json} | 707 ++++++++---------- .../src/browsergym/webcanvas/event_handler.py | 111 --- .../src/browsergym/webcanvas/instance.py | 4 +- .../src/browsergym/webcanvas/step_score.py | 31 +- .../src/browsergym/webcanvas/task.py | 66 +- .../src/browsergym/webcanvas/utils.py | 63 +- 7 files changed, 424 insertions(+), 757 deletions(-) rename browsergym/webcanvas/src/browsergym/webcanvas/data/{mind2web-live-test_104tasks_20240528.json => mind2web-live_test_20241024.json} (93%) delete mode 100644 browsergym/webcanvas/src/browsergym/webcanvas/event_handler.py diff --git a/browsergym/core/src/browsergym/core/env.py b/browsergym/core/src/browsergym/core/env.py index a4807523..299334a4 100644 --- a/browsergym/core/src/browsergym/core/env.py +++ b/browsergym/core/src/browsergym/core/env.py @@ -9,6 +9,7 @@ import gymnasium as gym import numpy as np import playwright.sync_api +import json from . import _get_global_playwright from .action.base import execute_python_code @@ -269,8 +270,7 @@ def override_property(task, env, property): ) self.context.expose_binding( - "handleEvent", lambda source, - selector, event_type, element_text: self._handle_event(selector, event_type, element_text) + "handleEvent", lambda selector, event_type, element_text: self._handle_event(selector, event_type, element_text) ) self.context.add_init_script( @@ -382,6 +382,9 @@ def override_property(task, env, property): def step(self, action: str) -> tuple: + # Setup event listener after page is create + self._event_listener() + self.last_action = action info = {} @@ -399,21 +402,10 @@ def report_infeasible_instructions(reason: str): self.chat.add_message(role="infeasible", msg=reason) self.infeasible_message_received = True + if hasattr(self.task, 'webcanvas'): - logger.debug(f"Initiating webcanvas task validation") - self.events = self.task.events - selectors = [event["selector"] - for event in self.events if event and event["selector"] and event["status"] == False] - element_value = [event["reference_value"] - for event in self.events if event and event["reference_value"] and event["status"] == False] - self._event_listener(selectors, element_value) - - # reward, done, user_message, task_info = self.task.validate( - # self.page, self.chat.messages, action) - # logger.info(f"WebCanvas task validation result:\n{ - # self.task.evaluate_result}") - # info["task_info"] = task_info - # info["webcanvas_result"] = self.task.evaluate_result + logger.debug(f"Initiating webcanvas task event listen") + self._event_listener() # try to execute the action logger.debug(f"Executing action") @@ -488,7 +480,6 @@ def _task_validate(self): # call validate reward, done, user_message, info = self.task.validate( self.page, self.chat.messages, self.last_action) - # info["webcanvas_result"] = self.task.evaluate_result # safety fix, in case validate() did mess up the active page and/or page history if prev_active_page != self.page or prev_page_history != self.page_history: logger.debug( @@ -619,96 +610,98 @@ def _get_obs(self): return obs - # def _event_listener(self, selectors): - # """ - # Add a universal event listener to specified selectors to capture various event types - # :param page: Current page object - # :param selectors: List of selectors to listen to - # """ - # self.page.evaluate( - # """ - # ({selectors}) => { - # selectors.forEach((selector) => { - # const element = document.querySelector(selector); - # if (element) { - # const allEvents = [ - # 'click', 'input', 'change', 'keydown', 'keyup', - # 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' - # ]; - # allEvents.forEach((eventType) => { - # element.addEventListener(eventType, (event) => { - # const elementText = event.target.textContent || null; - # window.handleEvent(selector, eventType, elementText); - # }, true); // 'true' indicates capture phase - # }); - # } - # }); - # } - # """, - # {"selectors": selectors} - # ) - - def _event_listener(self, selectors: list = [], target_values: list = []): - """ - Add event listeners to either specified selectors or globally across all elements. - :param page: Current page object - :param selectors: Optional list of selectors to listen to. If None, will apply globally. - :param target_values: Optional list of text content values to filter events by, used for global listening. - """ - if selectors: - # Specific selectors case - self.page.evaluate( - """ - ({selectors}) => { - selectors.forEach((selector) => { - const element = document.querySelector(selector); - if (element) { - const allEvents = [ - 'click', 'input', 'change', 'keydown', 'keyup', - 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' - ]; - allEvents.forEach((eventType) => { - element.addEventListener(eventType, (event) => { - const elementText = event.target.textContent || ''; - window.handleEvent(selector, eventType, elementText); - }, true); // 'true' indicates capture phase - }); - } - }); - } - """, - {"selectors": selectors} - ) - elif target_values: - - self.page.evaluate( - """ - (targetValues) => { + def _event_listener(self): + """Add universal event listener""" + # # First expose the handle_event function + # self.context.expose_function("handleEvent", + # lambda selector, event_type, element_info: self._handle_event(selector, event_type, element_info)) + logger.info("Setting up event listeners...") # Add debug log + try: + # Then set up event listeners + self.page.evaluate(""" + () => { const allEvents = [ - 'click', 'input', 'change', 'keydown', 'keyup', + 'click', 'input', 'change', 'keydown', 'keyup', 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' ]; - allEvents.forEach((eventType) => { - document.addEventListener(eventType, (event) => { - const elementText = event.target.textContent || ''; - if (targetValues.includes(elementText)) { - window.handleEvent(null, eventType, elementText); // No selector in this case + + function getElementSelector(element) { + if (!element) return null; + // Try to get unique selector for the element + try { + let path = []; + while (element && element.nodeType === Node.ELEMENT_NODE) { + let selector = element.nodeName.toLowerCase(); + if (element.id) { + selector += '#' + element.id; + path.unshift(selector); + break; + } else { + let sibling = element; + let nth = 1; + while (sibling.previousElementSibling) { + sibling = sibling.previousElementSibling; + if (sibling.nodeName === element.nodeName) nth++; + } + if (nth > 1) selector += `:nth-child(${nth})`; + } + path.unshift(selector); + element = element.parentNode; } - }, true); // 'true' indicates capture phase + return path.join(' > '); + } catch (e) { + return null; + } + } + + function getElementInfo(element) { + return { + textContent: element.textContent || '', + value: element.value || '', + tagName: element.tagName.toLowerCase() + }; + } + + allEvents.forEach(eventType => { + document.addEventListener(eventType, (event) => { + const element = event.target; + const selector = getElementSelector(element); + const elementInfo = getElementInfo(element); + + window.handleEvent( + selector, + eventType, + JSON.stringify(elementInfo) + ); + }, true); }); } - """, - target_values - ) + """) + logger.info("Event listeners setup completed") + except Exception as e: + logger.error(f"Failed to setup event listeners: {str(e)}") - def _handle_event(self, selector, event_type, element_text=None): - logger.debug(f"Element with selector '{selector}' triggered '{ - event_type}' event, text content: {element_text}") - for idx, event in enumerate(self.events): - if event and event["selector"] == selector: - self.events[idx]["status"] = True - self.events[idx]["target_value"] = element_text if element_text else "" - elif event and event["reference_value"] == element_text: - self.events[idx]["status"] = True - self.events[idx]["target_value"] = element_text if element_text else "" - self.task.update_events(self.events) + def _handle_event(self, selector, event_type, element_info_str): + """ + Handle DOM events by updating task events + """ + try: + element_info = json.loads(element_info_str) + logger.info(f"Event received - selector: {selector}, type: {event_type}, info: {element_info}") + + # Create current event + current_event = { + "selector": selector, + "status": True, + "target_value": element_info.get("value") or element_info.get("textContent", ""), + "event_type": event_type + } + + # Update task events + if hasattr(self.task, 'update_events'): + self.task.update_events([current_event]) + + except json.JSONDecodeError: + logger.error(f"Failed to parse element info: {element_info_str}") + except Exception as e: + logger.error(f"Error handling event: {str(e)}") diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-live-test_104tasks_20240528.json b/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-live_test_20241024.json similarity index 93% rename from browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-live-test_104tasks_20240528.json rename to browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-live_test_20241024.json index 201beae7..304bc2db 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-live-test_104tasks_20240528.json +++ b/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-live_test_20241024.json @@ -1,6 +1,6 @@ [ { - "index": "_cuWObkg3Zm5lhrV5BVro", + "index": 0, "task": "Find the store location and hours of the closest Gamestop to zip code 90028 and set as home store on gamestop", "reference_task_length": 8, "evaluation": [ @@ -20,11 +20,10 @@ "url": "https://www.gamestop.com/search/?store=2630" } } - ], - "time": "2024-05-23 06:33:52.510" + ] }, { - "index": "_pzJRzGpq38uVD09zuz2g", + "index": 1, "task": "Compare available plans for the AeroAPI on flightaware", "reference_task_length": 4, "evaluation": [ @@ -52,11 +51,10 @@ "url": "https://www.flightaware.com/commercial/aeroapi/#compare-plans-section" } } - ], - "time": "2024-05-26 06:31:01.887" + ] }, { - "index": "0_XlVnFegKhb539-hHKIA", + "index": 2, "task": "Give a 10 rating to The Terminator II: Judgement Day on imdb", "reference_task_length": 8, "evaluation": [ @@ -103,11 +101,10 @@ "url": "https://www.imdb.com/title/tt0103064/?ref_=nv_sr_srsg_0_tt_8_nm_0_q_terminato" } } - ], - "time": "2024-05-21 05:17:38.413" + ] }, { - "index": "0Bes-kjPvso3mgjPsaQIJ", + "index": 3, "task": "Check the status of train S92 for any disruptions on new.mta.info.", "reference_task_length": 6, "evaluation": [ @@ -135,11 +132,10 @@ "url": "https://new.mta.info/alerts?selectedRoutes=MTA%3AS92&selectedDate=3%2F20%2F2024" } } - ], - "time": "2024-03-12 08:41:48.689" + ] }, { - "index": "2OEKICou9EWm4Bs6N9Ei3", + "index": 4, "task": "Check the status of your iPhone repair on apple.", "reference_task_length": 5, "evaluation": [ @@ -167,11 +163,10 @@ "url": "https://support.apple.com/my-support" } } - ], - "time": "2024-03-25 03:06:23.379" + ] }, { - "index": "2rjwRPMba7SmRlyZRNa2m", + "index": 5, "task": "Open the page with a overview about the submission of releases on discogs", "reference_task_length": 6, "evaluation": [ @@ -191,11 +186,10 @@ "url": "https://support.discogs.com/hc/en-us/articles/360004016474-Overview-Of-Submission-Guidelines-For-Releases" } } - ], - "time": "2024-05-20 05:56:13.543" + ] }, { - "index": "3gpqgwl1AQZz5p2FTjHd-", + "index": 6, "task": "View the latest job openings in safety with a salary above 100k per annum, check the details, and apply on mbta.", "reference_task_length": 16, "evaluation": [ @@ -265,11 +259,10 @@ "url": "https://www.governmentjobs.com/careers/mbta" } } - ], - "time": "2024-03-25 02:51:40.381" + ] }, { - "index": "3RLzqsxoItEoOZlm3dwjy", + "index": 7, "task": "Add a e-gift card to bag of $100 for recipient John and email address abc@test.com from buckeye.foobar@gmail.com with message gift card on underarmour", "reference_task_length": 15, "evaluation": [ @@ -343,11 +336,10 @@ "url": "https://www.underarmour.com/en-us/gift-card-landing/" } } - ], - "time": "2024-05-20 05:25:05.480" + ] }, { - "index": "45plKpA3GwhP5bEqIL9Bx", + "index": 8, "task": "Find a person by address Nice st - 1234, Good, FL on yellowpages", "reference_task_length": 12, "evaluation": [ @@ -391,11 +383,10 @@ "url": "https://people.yellowpages.com/whitepages/address?street=Nice+st+-+1234&city=Good&state=FL" } } - ], - "time": "2024-05-26 05:22:56.687" + ] }, { - "index": "5Jfpi0joLf_6AVRX8mN9R", + "index": 9, "task": "Check my AMC gift card balance with gift card number 87654321 and pin number 9753 on amctheatres.", "reference_task_length": 8, "evaluation": [ @@ -450,11 +441,10 @@ "url": "https://www.amctheatres.com/showtimes/all/2024-03-25/default/all?utm_source=google&utm_medium=paidsearch&utm_campaign=OnlineTicketing2023&kclid=bc667845-5652-4d22-b01f-539c6d69db0e&gad_source=1" } } - ], - "time": "2024-03-25 06:53:38.530" + ] }, { - "index": "5okIgoVRM4WfLCb0vND2y", + "index": 10, "task": "Get the lowest priced women's plus size one piece swimsuit in color black with customer rating of atleat 5 on kohls", "reference_task_length": 13, "evaluation": [ @@ -522,11 +512,10 @@ "url": "https://www.kohls.com/catalog/5-womens-black-plus-onepiece-swimsuits-swimsuits-clothing.jsp?CN=TopRated:5+Gender:Womens+Color:Black+SizeRange:Plus+Product:One-Piece%20Swimsuits+Category:Swimsuits+Department:Clothing&S=4&PPP=48&pfm=internal%20campaign%20visual%20nav%20refine&kls_sbp=87660515555844928264416369496642981542" } } - ], - "time": "2024-05-24 09:57:01.066" + ] }, { - "index": "7Y6RH7SlTXZZmPaQ4ydVj", + "index": 11, "task": "Find discussions of community and open one with most replies on flightaware.", "reference_task_length": 6, "evaluation": [ @@ -563,11 +552,10 @@ "url": "https://discussions.flightaware.com/top" } } - ], - "time": "2024-03-23 07:41:02.232" + ] }, { - "index": "8S0fsID6FFG6MB4t93eOH", + "index": 12, "task": "Find a 2022 Tesla Model 3 on carmax", "reference_task_length": 10, "evaluation": [ @@ -595,11 +583,10 @@ "url": "https://www.carmax.com/cars/tesla/model-3?year=2022" } } - ], - "time": "2024-05-24 07:30:22.531" + ] }, { - "index": "A-M0rQCsPu_r9xWt-eqYv", + "index": 13, "task": "Get the report from the final environmental impact statement for the Jamaica Bus Depot expansion on new.mta.info", "reference_task_length": 10, "evaluation": [ @@ -637,11 +624,10 @@ "url": "https://new.mta.info/project/jamaica-bus-depot-expansion" } } - ], - "time": "2024-05-28 06:54:52.796" + ] }, { - "index": "aEu9zp3GWZb9SVw0PN_2c", + "index": 14, "task": "Search for the lowest millage used Honda Crosstour 2012 to 2013 near 49102 less than $25000 on cargurus.", "reference_task_length": 17, "evaluation": [ @@ -732,11 +718,10 @@ "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" } } - ], - "time": "2024-03-24 12:49:24.093" + ] }, { - "index": "AjnXnl3STIls1uc49TDze", + "index": 15, "task": "Browse men's winter coats in size large that is on clearance on kohls.", "reference_task_length": 8, "evaluation": [ @@ -772,11 +757,10 @@ "url": "https://www.kohls.com/search/clearance.jsp?CN=Promotions:Clearance+Size:L&BL=y&search=mens%20winter%20coat&S=1&PPP=48&pfm=search%20refine&kls_sbp=59288101756704364110307077102020115511" } } - ], - "time": "2024-03-27 10:48:21.173" + ] }, { - "index": "AomPMCHZ6Yf7mH_TxyI7e", + "index": 16, "task": "Find a walkthrough for the game \"The Legend of Zelda: Breath of the Wild\" on ign.", "reference_task_length": 6, "evaluation": [ @@ -804,11 +788,10 @@ "url": "https://www.ign.com/wikis/the-legend-of-zelda-breath-of-the-wild/Walkthrough" } } - ], - "time": "2024-03-25 05:37:54.561" + ] }, { - "index": "apsmDjkuV6iGvfQ9-YkP0", + "index": 17, "task": "Find an editor's choice review with a score of 10 in the boardgame category on ign", "reference_task_length": 8, "evaluation": [ @@ -838,10 +821,9 @@ } }, { - "match_function_name": "element_path_exactly_match", - "method": "selector", + "match_function_name": "element_value_exactly_match", "content": { - "reference_answer": "#scoreRange >option:nth-child(2)", + "reference_answer": "10,10", "netloc": "ign", "url": "https://www.ign.com/editors-choice" } @@ -856,19 +838,17 @@ } }, { - "match_function_name": "element_path_exactly_match", - "method": "selector", + "match_function_name": "element_value_exactly_match", "content": { - "reference_answer": "#genre >option:nth-child(5)", + "reference_answer": "board", "netloc": "ign", "url": "https://www.ign.com/editors-choice" } } - ], - "time": "2024-05-24 07:12:38.613" + ] }, { - "index": "asuczSfk4bCf7OwPA_oVx", + "index": 18, "task": "Sign up for a REI Co-Op membership on rei.", "reference_task_length": 3, "evaluation": [ @@ -897,11 +877,10 @@ "url": "https://www.rei.com/membership" } } - ], - "time": "2024-03-25 06:29:17.908" + ] }, { - "index": "Atgpzhx4geR9YvP8u6AZb", + "index": 19, "task": "Find the weight of baggage allowance for economy class on qatarairways", "reference_task_length": 4, "evaluation": [ @@ -930,11 +909,10 @@ "url": "https://www.qatarairways.com/en-us/baggage/allowance.html?iid=ALL67670750" } } - ], - "time": "2024-05-26 05:38:21.691" + ] }, { - "index": "b18Xcyxl8sjXTUQ-Jbtmj", + "index": 20, "task": "Find flights going from Indira Gandhi to Los Cabos on flightaware", "reference_task_length": 10, "evaluation": [ @@ -962,11 +940,10 @@ "url": "https://www.flightaware.com/live/findflight?origin=VIDP&destination=MMSD" } } - ], - "time": "2024-05-26 06:38:34.537" + ] }, { - "index": "BABEHOxbn8rQAoskKTM0y", + "index": 21, "task": "Tell me information about what identification I need to bring on my trip on amtrak", "reference_task_length": 6, "evaluation": [ @@ -1002,11 +979,10 @@ "url": "https://www.amtrak.com/planning-booking/tickets-id-safety-security/passenger-identification.html" } } - ], - "time": "2024-05-26 04:30:36.977" + ] }, { - "index": "bfAcQB3vTsCJPNxt7ZEm4", + "index": 22, "task": "Browse used Audi cars made before 2015 and sort by lowest price on kbb", "reference_task_length": 8, "evaluation": [ @@ -1042,11 +1018,10 @@ "url": "https://www.kbb.com/car-finder/?intent=used&manufacturers=audi&years=1992-2015&sort=priceasc" } } - ], - "time": "2024-05-26 05:55:12.822" + ] }, { - "index": "BITQL_mGoz9vud1fvryyv", + "index": 23, "task": "Show crazy credits for the movie \" Prometheus\" on imdb", "reference_task_length": 6, "evaluation": [ @@ -1074,12 +1049,11 @@ "url": "https://www.imdb.com/title/tt1446714/crazycredits/?tab=cz&ref_=tt_trv_cc" } } - ], - "time": "2024-05-24 07:48:53.384" + ] }, { - "index": "Bvywj9SzJD5eYeZrtYY3m", - "task": "Add the first wireless headphones to your cart with a budget of $100 or less, that has an active noise-cancelling feature on amazon", + "index": 24, + "task": "Go to Amazon and add the first wireless headphones to your cart with a budget of $100 or less, that has an active noise-cancelling feature.", "reference_task_length": 8, "evaluation": [ { @@ -1119,15 +1093,14 @@ "match_function_name": "url_included_match", "content": { "key": "rh", - "reference_answer": "p_n_feature_two_browse-bin:23746030011", + "reference_answer": "n:21514463011,p_n_feature_two_browse-bin:23746030011", "url": "https://www.amazon.com/s?k=wireless+headphones&rh=n%3A172282%2Cp_36%3A-10000%2Cp_n_feature_two_browse-bin%3A23746030011&dc&ds=v1%3AJ8iOECT4r4taWN1uZiq%2BUlDjD4Nhpo6GPIbBN5RxVAk&crid=3B1DV9AE18GHL&qid=1716877974&rnid=23746028011&sprefix=wireless+headphones%2Caps%2C737&ref=sr_nr_p_n_feature_two_browse-bin_1" } } - ], - "time": "2024-05-28 06:41:02.718" + ] }, { - "index": "C-ptYrbhcyNGw6WYcT3B5", + "index": 25, "task": "Find Playstation 5 digital edition on gamestop.", "reference_task_length": 6, "evaluation": [ @@ -1147,11 +1120,10 @@ "url": "https://www.gamestop.com/search/?q=playstation+5+digital+edition&lang=default&start=0&sz=20" } } - ], - "time": "2024-03-25 06:32:48.505" + ] }, { - "index": "CAzoBWdWNepFOwGf6cv0O", + "index": 26, "task": "Browse Marriott Bonvoy credit cards on marriott", "reference_task_length": 4, "evaluation": [ @@ -1179,11 +1151,10 @@ "url": "https://www.marriott.com/credit-cards.mi" } } - ], - "time": "2024-05-26 04:24:43.810" + ] }, { - "index": "CnJfZVtxvcwIDMj9zkugH", + "index": 27, "task": "Show me the list of Men's Blazers, Black, Size M on uniqlo.", "reference_task_length": 11, "evaluation": [ @@ -1227,11 +1198,10 @@ "url": "https://www.uniqlo.com/us/en/men/outerwear-and-blazers/blazers?path=%2C%2C23380&colorCodes=COL09&sizeCodes=SMA004&categoryId=23380" } } - ], - "time": "2024-03-27 09:16:35.460" + ] }, { - "index": "CPdTwa57mKJJiZPQg2542", + "index": 28, "task": "Add formula 1 to my followed sports on foxsports", "reference_task_length": 7, "evaluation": [ @@ -1260,11 +1230,10 @@ "url": "https://www.foxsports.com/motor/formula-1" } } - ], - "time": "2024-05-20 05:29:37.672" + ] }, { - "index": "cPEujNGfZgLT2rbtuKPOt", + "index": 29, "task": "Show me the options for a roundtrip leaving from las vegas on flexile dates on the interactive map on united", "reference_task_length": 12, "evaluation": [ @@ -1273,7 +1242,7 @@ "content": { "key": "", "reference_answer": "united.", - "url": "https://www.united.com/en/us/destination-map" + "url": "https://www.united.com/en/us" } }, { @@ -1347,11 +1316,10 @@ "url": "https://www.united.com/en/us/destination-map" } } - ], - "time": "2024-05-22 04:45:08.380" + ] }, { - "index": "cplcsXZ5GetOqRKBt5LBn", + "index": 30, "task": "Download the e-receipt with the last name Smith and confirmation number X123456989 on budget", "reference_task_length": 8, "evaluation": [ @@ -1398,11 +1366,10 @@ "url": "https://www.budget.com/en/reservation/get-e-receipt" } } - ], - "time": "2024-05-22 05:15:26.367" + ] }, { - "index": "d2Csfs3T6ABluicCuLwRP", + "index": 31, "task": "show the Life Jackets priced between $100 and $200 on cabelas", "reference_task_length": 6, "evaluation": [ @@ -1430,11 +1397,10 @@ "url": "https://www.cabelas.com/l/life-jackets#nf-offerprice=100..200" } } - ], - "time": "2024-05-26 04:13:32.518" + ] }, { - "index": "DAabNkqk7gX0dHEcC0sbd", + "index": 32, "task": "Find 32\" Curved monitor and add the third one to the wish list on newegg", "reference_task_length": 7, "evaluation": [ @@ -1463,11 +1429,10 @@ "url": "https://www.newegg.com/p/pl?d=32%22+curved+monitor" } } - ], - "time": "2024-05-20 04:55:02.771" + ] }, { - "index": "Dc_JXmrltb5LgK9HyhVQb", + "index": 33, "task": "Find the list of all neighborhood maps for Brooklyn on new.mta.info", "reference_task_length": 5, "evaluation": [ @@ -1495,11 +1460,10 @@ "url": "https://new.mta.info/maps/subway/mta-neighborhood-maps/brooklyn" } } - ], - "time": "2024-03-22 09:48:12.838" + ] }, { - "index": "DE1P2PSPtO3jAAudlH0v-", + "index": 34, "task": "Find me the deals available for the Great escape park on sixflags", "reference_task_length": 7, "evaluation": [ @@ -1527,11 +1491,10 @@ "url": "https://www.sixflags.com/greatescape/store/tickets" } } - ], - "time": "2024-05-24 09:04:10.474" + ] }, { - "index": "dIFny0pY8Wf8KdKs1kkbM", + "index": 35, "task": "Find the movie Donnie Darko and show its complete cast on imdb", "reference_task_length": 6, "evaluation": [ @@ -1559,11 +1522,10 @@ "url": "https://www.imdb.com/title/tt0246578/fullcredits?ref_=tt_cl_sm" } } - ], - "time": "2024-05-20 04:46:08.231" + ] }, { - "index": "dRJJLjd2TAIfcOTO0kLth", + "index": 36, "task": "Get the most reviewed shopping store that accepts apple pay in Central New York on yelp.", "reference_task_length": 9, "evaluation": [ @@ -1607,11 +1569,10 @@ "url": "https://www.yelp.com/search?find_desc=Shopping&find_loc=Central+New+York%2C+NY&attrs=BusinessAcceptsApplePay&sortby=review_count" } } - ], - "time": "2024-03-25 03:24:07.862" + ] }, { - "index": "E2W7rMnlBIiT6lj0jJMIc", + "index": 37, "task": "Find me the cheapest external HD for an Xbox One on gamestop.", "reference_task_length": 13, "evaluation": [ @@ -1656,11 +1617,10 @@ "url": "https://www.gamestop.com/search/?prefn1=platform&prefv1=Xbox&q=external+HD&view=new&srule=price-low-to-high" } } - ], - "time": "2024-03-22 09:20:59.469" + ] }, { - "index": "ea-0F05c6v48IaE33APkX", + "index": 38, "task": "Search for job in Miami Florida in Human resources on target.", "reference_task_length": 14, "evaluation": [ @@ -1752,11 +1712,10 @@ "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" } } - ], - "time": "2024-03-22 07:47:19.747" + ] }, { - "index": "EI_bNWi0gUUGfjCILiyOS", + "index": 39, "task": "Show me products from Calvin Klein brand menswear list on kohls.", "reference_task_length": 6, "evaluation": [ @@ -1792,11 +1751,10 @@ "url": "https://www.kohls.com/catalog/mens-calvin-klein-clothing.jsp?CN=Gender:Mens+Brand:Calvin%20Klein+Department:Clothing&S=1&PPP=48&kls_sbp=59288101756704364110307077102020115511&pfm=internal%20campaign%20refine" } } - ], - "time": "2024-03-25 05:47:19.199" + ] }, { - "index": "eUCwvaIx2vbWFCxkmtjAg", + "index": 40, "task": "Contact the support service on spothero", "reference_task_length": 4, "evaluation": [ @@ -1816,11 +1774,10 @@ "url": "https://spothero.com/contact" } } - ], - "time": "2024-05-21 06:04:06.098" + ] }, { - "index": "ewpxEe7H6GHDo0pAWcMe2", + "index": 41, "task": "Find a flight from Dresden to anywhere under $100 on ryanair", "reference_task_length": 13, "evaluation": [ @@ -1856,11 +1813,10 @@ "url": "https://www.ryanair.com/us/en/cheap-flights/?from=DRS&out-from-date=2024-05-28&out-to-date=2025-05-28&budget=100" } } - ], - "time": "2024-05-28 06:22:31.472" + ] }, { - "index": "fc5FwBwnNV2kroxbDjyjX", + "index": 42, "task": "Look for hair salon in San Diego on yellowpages", "reference_task_length": 8, "evaluation": [ @@ -1888,11 +1844,10 @@ "url": "https://www.yellowpages.com/search?search_terms=hair+salon&geo_location_terms=San+Diego%2C+CA" } } - ], - "time": "2024-05-24 07:55:10.003" + ] }, { - "index": "FFwg7X2CgxQiCq-Sv3EWI", + "index": 43, "task": "Find the closest in-store Gamestop location within 50 miles of 21122 on gamestop.", "reference_task_length": 10, "evaluation": [ @@ -1967,11 +1922,10 @@ "url": "https://www.gamestop.com/search/?store=3878" } } - ], - "time": "2024-03-25 06:13:44.816" + ] }, { - "index": "FGXMZ7VQ7uquu8fdSHSBp", + "index": 44, "task": "Find information about what I should do when I lose an item on a bus on us.megabus", "reference_task_length": 3, "evaluation": [ @@ -2000,11 +1954,10 @@ "url": "https://us.megabus.com/help" } } - ], - "time": "2024-05-26 06:02:13.109" + ] }, { - "index": "FtCFJNz4WhhewmINgVRu5", + "index": 45, "task": "Select a E-Gift card and add to cart with Best Wishes as a message. Send it to James Smith with email abc@abc.com on rei", "reference_task_length": 18, "evaluation": [ @@ -2069,11 +2022,10 @@ "url": "https://www.rei.com/gift-card/purchase" } } - ], - "time": "2024-05-22 04:30:06.572" + ] }, { - "index": "fuEsXYy-LGfFLHqNw9zWt", + "index": 46, "task": "Add a front load washing machine under $800 to the cart on menards.", "reference_task_length": 8, "evaluation": [ @@ -2109,11 +2061,10 @@ "url": "https://www.menards.com/main/search.html?search=front+load+washing+machine&priceMax_facet=800" } } - ], - "time": "2024-03-23 07:22:59.969" + ] }, { - "index": "G2jfRmIvoXCBKtoUcNUSd", + "index": 47, "task": "Search for a full-time job in sales in Springfield and apply for the most recent job on carmax", "reference_task_length": 14, "evaluation": [ @@ -2186,11 +2137,10 @@ "url": "https://careers.carmax.com/us/en/search-results" } } - ], - "time": "2024-05-21 17:29:20.909" + ] }, { - "index": "hMW3NU6H4U1avWsKmMj0y", + "index": 48, "task": "Browse coffee makers that are rated 5 stars on kohls", "reference_task_length": 7, "evaluation": [ @@ -2218,11 +2168,10 @@ "url": "https://www.kohls.com/search/5.jsp?CN=TopRated:5&search=coffee%20maker&S=1&PPP=48&kls_sbp=87660515555844928264416369496642981542&pfm=search%20refine" } } - ], - "time": "2024-05-26 04:58:15.747" + ] }, { - "index": "iBh1tZmxZGScFoDu6AcwB", + "index": 49, "task": "Browse spider-man toys for kids and sort by lowest price on kohls", "reference_task_length": 8, "evaluation": [ @@ -2258,70 +2207,10 @@ "url": "https://www.kohls.com/search/kids.jsp?CN=AgeAppropriate:Kids&search=spiderman%20toys&kls_sbp=87660515555844928264416369496642981542&pfm=search%20refine&PPP=48&S=4&sks=true" } } - ], - "time": "2024-05-20 05:01:18.502" + ] }, { - "index": "idxs7XdOs-DEu_l7d5gDu", - "task": "Find a store in Tempe, Arizona, make it my store, and then visit the store page and see whats happening in store on ikea.", - "reference_task_length": 10, - "evaluation": [ - { - "match_function_name": "url_included_match", - "content": { - "key": "", - "reference_answer": "ikea.", - "url": "https://www.ikea.com/" - } - }, - { - "match_function_name": "element_path_exactly_match", - "method": "selector", - "content": { - "reference_answer": "#google-maps-store-select ", - "netloc": "ikea", - "url": "https://www.ikea.com/us/en/stores/" - } - }, - { - "match_function_name": "url_included_match", - "content": { - "key": "", - "reference_answer": "stores/", - "url": "https://www.ikea.com/us/en/stores/" - } - }, - { - "match_function_name": "element_path_exactly_match", - "method": "selector", - "content": { - "reference_answer": "#google-maps-store-select >option:nth-child(2)", - "netloc": "ikea", - "url": "https://www.ikea.com/us/en/stores/" - } - }, - { - "match_function_name": "element_path_exactly_match", - "method": "selector", - "content": { - "reference_answer": ".localstore-buttons__container >button:nth-child(2)>span:nth-child(1)", - "netloc": "ikea", - "url": "https://www.ikea.com/us/en/stores/" - } - }, - { - "match_function_name": "url_included_match", - "content": { - "key": "", - "reference_answer": "/events/ikea-tempe-az/", - "url": "https://www.ikea.com/us/en/stores/events/ikea-tempe-az/" - } - } - ], - "time": "2024-03-12 09:30:55.599" - }, - { - "index": "IfHzHm-U83FzDKIGXVBPB", + "index": 50, "task": "Set the first recommended song on the homepage as a current obsession on last.fm", "reference_task_length": 4, "evaluation": [ @@ -2360,11 +2249,10 @@ "url": "https://www.last.fm/" } } - ], - "time": "2024-03-25 06:07:45.480" + ] }, { - "index": "Im8n7iG6o0643zl_qS95X", + "index": 51, "task": "Browse the page with event planning tips on eventbrite", "reference_task_length": 5, "evaluation": [ @@ -2384,11 +2272,10 @@ "url": "https://www.eventbrite.com/blog/category/event-planning/" } } - ], - "time": "2024-05-28 03:30:06.935" + ] }, { - "index": "IQds4ENVIZXE_5c4KUNIM", + "index": 52, "task": "What are the family rides available at frontier city? on sixflags", "reference_task_length": 7, "evaluation": [ @@ -2416,11 +2303,68 @@ "url": "https://www.sixflags.com/frontiercity/things-to-do/rides/family-rides#family-ride" } } - ], - "time": "2024-05-24 07:08:01.654" + ] + }, + { + "index": 53, + "task": "Find a store in Tempe, Arizona, make it my store, and then visit the store page and see whats happening in store on ikea.", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ikea.", + "url": "https://www.ikea.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/stores", + "url": "https://www.ikea.com/us/en/stores/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#google-maps-store-select ", + "netloc": "ikea", + "url": "https://www.ikea.com/us/en/stores/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#google-maps-store-select >option:nth-child(3)", + "netloc": "ikea", + "url": "https://www.ikea.com/us/en/stores/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".localstore-buttons__container >button:nth-child(2)>span:nth-child(1)", + "netloc": "ikea", + "url": "https://www.ikea.com/us/en/stores/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "events/ikea-tempe-az/", + "url": "https://www.ikea.com/us/en/stores/events/ikea-tempe-az/" + } + } + ] }, { - "index": "jfjr1xGSPsucAuuUEBwCR", + "index": 54, "task": "Add to my wish list the highest rated activity in Amsterdam on viator", "reference_task_length": 6, "evaluation": [ @@ -2457,11 +2401,10 @@ "url": "https://www.viator.com/Amsterdam/d525-ttd?sortType=rating" } } - ], - "time": "2024-05-20 04:41:01.489" + ] }, { - "index": "jIAFDj4vUn97eopTKyLVB", + "index": 55, "task": "Check the current standings for MLS on foxsports", "reference_task_length": 4, "evaluation": [ @@ -2489,12 +2432,11 @@ "url": "https://www.foxsports.com/soccer/mls/standings" } } - ], - "time": "2024-05-22 05:00:23.740" + ] }, { - "index": "jxEtZVxPTK_fFF7x3Qw7h", - "task": "Find the statistics of the player Cristiano Ronaldo for the national team of Portugal in all the world cups.", + "index": 56, + "task": "Find the statistics of the player Cristiano Ronaldo for the national team of Portugal in all the world cups in espn.com.", "reference_task_length": 10, "evaluation": [ { @@ -2529,11 +2471,10 @@ "url": "https://www.espn.com/soccer/player/stats/_/id/22774/team/482/type/fifa.world" } } - ], - "time": "2024-03-24 12:33:53.575" + ] }, { - "index": "jYlUoyfeTB1iPEyIUY1zc", + "index": 57, "task": "Show schedule for East Boston Ferry outbound Lewis Wharf and Long Wharf (North) stop on mbta", "reference_task_length": 9, "evaluation": [ @@ -2569,11 +2510,10 @@ "url": "https://www.mbta.com/schedules/Boat-EastBoston/line?schedule_finder%5Bdirection_id%5D=0&schedule_finder%5Borigin%5D=Boat-Long" } } - ], - "time": "2024-05-27 03:27:02.166" + ] }, { - "index": "Ka0lgX3cJ7lKD1Wm7SbUI", + "index": 58, "task": "Find technical specs for the latest Macbook Air on apple", "reference_task_length": 4, "evaluation": [ @@ -2601,11 +2541,10 @@ "url": "https://www.apple.com/macbook-air/specs/" } } - ], - "time": "2024-05-21 06:01:26.315" + ] }, { - "index": "kJrcFAx5Wmx2ectYN5vt7", + "index": 59, "task": "Find out what popular events are being held this weekend in the category performing and visual arts near Chester, UK on eventbrite", "reference_task_length": 10, "evaluation": [ @@ -2649,11 +2588,10 @@ "url": "https://www.eventbrite.com/d/united-kingdom--chester/arts--events--this-weekend/?page=1" } } - ], - "time": "2024-03-28 03:26:17.468" + ] }, { - "index": "kpgHV45mGv1RTSDhyp9Da", + "index": 60, "task": "Find the last game of the season for the Toronto Raptors on sports.yahoo.", "reference_task_length": 5, "evaluation": [ @@ -2693,16 +2631,15 @@ "match_function_name": "element_path_exactly_match", "method": "selector", "content": { - "reference_answer": "#Col2-7-TeamSchedule-Proxy >div:nth-child(1)>div:nth-child(2)>div:nth-child(4)>div:nth-child(1)>div:nth-child(2)>a:nth-child(1)>div:nth-child(2)", + "reference_answer": "#Col2-5-TeamSchedule-Proxy > div > div > div:nth-child(4) > div", "netloc": "sports", "url": "https://sports.yahoo.com/nba/teams/toronto/" } } - ], - "time": "2024-03-25 03:00:26.772" + ] }, { - "index": "kYgJp5lpek_m7MOQiyrgO", + "index": 61, "task": "Find solutions for Airport and fill the contact form with message to \"Send Brochure\". Contact information John Smith. Email: abc@abc.com. Phone #: 88889999 on flightaware", "reference_task_length": 18, "evaluation": [ @@ -2776,11 +2713,10 @@ "url": "https://industry.flightaware.com/airports" } } - ], - "time": "2024-05-27 03:37:36.450" + ] }, { - "index": "L5MT6CydayMS3Fz4GEPEk", + "index": 62, "task": "Find Toyota Corolla from the year 2018 to 2023 in red color on carmax.", "reference_task_length": 13, "evaluation": [ @@ -2824,12 +2760,11 @@ "url": "https://www.carmax.com/cars/toyota/corolla/corolla-cross/corolla-cross-hybrid/corolla-hatchback/corolla-hybrid/corolla-im/red?year=2018-2023" } } - ], - "time": "2024-03-25 06:18:12.062" + ] }, { - "index": "l730hRXeN7jXAoonQC9IY", - "task": "Find the current NFL standings for the AFC East division and go to the page of which team is in first place on nfl.", + "index": 63, + "task": "Go to nfl.com and find the current NFL standings for the AFC East division and go to the page of which team is in first place.", "reference_task_length": 4, "evaluation": [ { @@ -2857,11 +2792,10 @@ "url": "https://www.nfl.com/standings/" } } - ], - "time": "2024-03-23 07:14:46.033" + ] }, { - "index": "ll6ksU97ZFOPlu5wY7kEE", + "index": 64, "task": "Check the national cafe menu on amtrak", "reference_task_length": 6, "evaluation": [ @@ -2906,11 +2840,10 @@ "url": "https://www.amtrak.com/onboard/meals-dining/cafe-car.html" } } - ], - "time": "2024-03-24 12:24:44.634" + ] }, { - "index": "LPKoeVVlIW8fdc7yB428V", + "index": 65, "task": "View all of the Most Popular TV on rottentomatoes.", "reference_task_length": 3, "evaluation": [ @@ -2930,11 +2863,10 @@ "url": "https://www.rottentomatoes.com/browse/tv_series_browse/sort:popular" } } - ], - "time": "2024-03-23 08:15:33.298" + ] }, { - "index": "mg_XYywJFY2zB_ESXajdY", + "index": 66, "task": "Compare Apple watches and learn more about the ultra version on apple", "reference_task_length": 4, "evaluation": [ @@ -2954,11 +2886,10 @@ "url": "https://www.apple.com/watch/compare/" } } - ], - "time": "2024-05-26 05:03:19.513" + ] }, { - "index": "mpwaLSXFnPbo0DPo9Bybk", + "index": 67, "task": "Search for the playlists \"Pop Workout mix\" and filtered by tag #Dance & edm on soundcloud.", "reference_task_length": 7, "evaluation": [ @@ -2994,11 +2925,10 @@ "url": "https://soundcloud.com/search/sets?q=pop%20workout%20mix&filter.genre=dance%20%26%20edm" } } - ], - "time": "2024-03-23 08:20:25.855" + ] }, { - "index": "n5PRoGseX0FMADP6ksDd8", + "index": 68, "task": "Check in with confirmation number 10987654 for my flight with last name Lewis on united.", "reference_task_length": 7, "evaluation": [ @@ -3046,11 +2976,10 @@ "url": "https://www.united.com/en/gb" } } - ], - "time": "2024-03-22 09:11:47.361" + ] }, { - "index": "NIl5RrMRtKKn0UTcJOMND", + "index": 69, "task": "Check balance of gift card no. 1234567 with pin 0001 on marriott", "reference_task_length": 10, "evaluation": [ @@ -3105,11 +3034,10 @@ "url": "https://gifts.marriott.com/check-balance/" } } - ], - "time": "2024-03-25 06:01:59.182" + ] }, { - "index": "nxgeQF2vUcHpMBs7inZ3I", + "index": 70, "task": "Search Cantonese food near Chicago, IL that are open now and priced $$ on yelp", "reference_task_length": 9, "evaluation": [ @@ -3153,11 +3081,10 @@ "url": "https://www.yelp.com/search?find_desc=Cantonese+food&find_loc=Chicago%2C+IL&attrs=RestaurantsPriceRange2.2&open_now=724" } } - ], - "time": "2024-05-27 04:06:48.866" + ] }, { - "index": "nYUIGaNMtV6sxuJhv_b2S", + "index": 71, "task": "Get the frozen vegan cheese pizza between 5 to 10 usd on target.", "reference_task_length": 17, "evaluation": [ @@ -3201,11 +3128,10 @@ "url": "https://www.target.com/c/frozen-pizza-foods-grocery/-/N-5xsz4Zal25lfgbks1Z76zjqZgulk4?moveTo=product-list-grid" } } - ], - "time": "2024-03-25 06:49:05.838" + ] }, { - "index": "p5Rh2uknkoIwgRqseo6wB", + "index": 72, "task": "Find bluetooth vertical mouse with most reviews and add two to my shopping cart on newegg.", "reference_task_length": 9, "evaluation": [ @@ -3251,11 +3177,10 @@ "url": "https://www.newegg.com/kensington-k72356us/p/0TP-001D-00518" } } - ], - "time": "2024-03-23 07:52:30.488" + ] }, { - "index": "Peid7WPQJoMdIBjhIKOwP", + "index": 73, "task": "Find the page with instructions on how to return orders online on uniqlo", "reference_task_length": 6, "evaluation": [ @@ -3267,15 +3192,6 @@ "url": "https://www.uniqlo.com/us/en/" } }, - { - "match_function_name": "element_path_exactly_match", - "method": "selector", - "content": { - "reference_answer": ".Navigation.-top.Lead_Body >span:nth-child(1)>li:nth-child(6)>.Navigation_Link.-single.js-Track ", - "netloc": "faq-us", - "url": "https://faq-us.uniqlo.com/articles/en_US/FAQ/Returns-and-Exchanges/?l=en_US&c=category_uq_us%3AUQ_C1_7&fs=Search&pn=1" - } - }, { "match_function_name": "url_included_match", "content": { @@ -3284,11 +3200,10 @@ "url": "https://faq-us.uniqlo.com/articles/en_US/FAQ/Returns-and-Exchanges/?l=en_US&c=category_uq_us%3AUQ_C1_7&fs=Search&pn=1" } } - ], - "time": "2024-05-26 04:20:24.764" + ] }, { - "index": "pzgRb65Q_C-IqoATH6QCe", + "index": 74, "task": "Find the weekly ad for the store closest to zip code 82718 on menards", "reference_task_length": 8, "evaluation": [ @@ -3324,11 +3239,10 @@ "url": "https://www.menards.com/main/flyerselectstore.html" } } - ], - "time": "2024-03-27 11:58:37.236" + ] }, { - "index": "QiIBNsu__PJ_wBbTxAzvf", + "index": 75, "task": "Add a Swivel vacuum under $150 to my cart on menards.", "reference_task_length": 8, "evaluation": [ @@ -3365,11 +3279,10 @@ "url": "https://www.menards.com/main/search.html?search=swivel+vacuum&priceMax_facet=150" } } - ], - "time": "2024-03-25 05:42:00.048" + ] }, { - "index": "QrK41w5BG8GFDYzNHA0yn", + "index": 76, "task": "What are the food festivals happening in Colorado This weekend? on eventbrite", "reference_task_length": 9, "evaluation": [ @@ -3405,11 +3318,10 @@ "url": "https://www.eventbrite.com/d/united-states--colorado/food-festival/" } } - ], - "time": "2024-05-24 09:09:40.399" + ] }, { - "index": "qwx9Eadt7js7dG6ixaUPU", + "index": 77, "task": "Explore the trending playlists,filter by tag #Electronics, play the first playlist and like it on soundcloud", "reference_task_length": 9, "evaluation": [ @@ -3463,11 +3375,10 @@ "url": "https://soundcloud.com/search/sets?q=trending%20playlists&filter.genre=electronic" } } - ], - "time": "2024-05-21 04:58:40.524" + ] }, { - "index": "rEHHIcDi9AMBMK1DDXpbN", + "index": 78, "task": "Browse through the Las Vegas city guide and find message services nearest to Henderson, the service provider should have a BBB rating of A+ on yellowpages", "reference_task_length": 11, "evaluation": [ @@ -3503,11 +3414,10 @@ "url": "https://www.yellowpages.com/las-vegas-nv/massage-therapists?refinements=bbb_grade_display%3A1&refinements=neighborhood%3AHenderson" } } - ], - "time": "2024-05-27 03:49:51.803" + ] }, { - "index": "rG7Bw2NN2bQ7CRchbMrAq", + "index": 79, "task": "Add The Wire to the watchlist on tvguide", "reference_task_length": 6, "evaluation": [ @@ -3536,11 +3446,10 @@ "url": "https://www.tvguide.com/" } } - ], - "time": "2024-05-20 05:18:22.420" + ] }, { - "index": "RtYZe1pcgX6-TGZAmhyf6", + "index": 80, "task": "Find 5 star rated saltwater rods on cabelas.", "reference_task_length": 6, "evaluation": [ @@ -3568,11 +3477,10 @@ "url": "https://www.cabelas.com/l/saltwater-rods#f-bvratings=5%20Stars" } } - ], - "time": "2024-03-22 07:22:39.119" + ] }, { - "index": "rUyFGVLAMz748JXIqLlqE", + "index": 81, "task": "Add Pro Display XDR with nano texture to bag with all the accessories on apple", "reference_task_length": 9, "evaluation": [ @@ -3644,11 +3552,10 @@ "url": "https://www.apple.com/shop/buy-mac/pro-display-xdr/nano-glass" } } - ], - "time": "2024-05-21 17:50:41.841" + ] }, { - "index": "sPw15prlAwsXoQo8Pwikg", + "index": 82, "task": "Find a list of Tours that contain visits to the Louvre rated 5 stars on viator", "reference_task_length": 5, "evaluation": [ @@ -3686,11 +3593,10 @@ "url": "https://www.viator.com/Paris-attractions/Louvre/d479-a73#experiences" } } - ], - "time": "2024-05-22 05:09:24.469" + ] }, { - "index": "SxYhvlAHYqr92CmFHyHhq", + "index": 83, "task": "Browse the list of top 250 movies and add the first one to my watchlist on imdb", "reference_task_length": 6, "evaluation": [ @@ -3728,11 +3634,10 @@ "url": "https://www.imdb.com/chart/top/?ref_=nv_mv_250" } } - ], - "time": "2024-05-20 04:49:26.511" + ] }, { - "index": "szKuTh5WSKAHTrt6oCi1i", + "index": 84, "task": "Find thrill rides in Six Flags Great America, Chicago, IL on sixflags", "reference_task_length": 7, "evaluation": [ @@ -3760,11 +3665,10 @@ "url": "https://www.sixflags.com/greatamerica/things-to-do/rides/thrill-rides#thrill-ride" } } - ], - "time": "2024-05-24 08:18:13.156" + ] }, { - "index": "SzZoAslJMjCSQ2YU0B437", + "index": 85, "task": "Show me a list of electronic music dvds in very good condition on discogs", "reference_task_length": 6, "evaluation": [ @@ -3800,11 +3704,10 @@ "url": "https://www.discogs.com/sell/list?genre=Electronic&format=DVD&condition=Very+Good+%28VG%29" } } - ], - "time": "2024-05-26 04:39:08.672" + ] }, { - "index": "T-PlxYqdEO-9Qq_oiUNKI", + "index": 86, "task": "Find a walkthrough guide for Assassin's Creed Valhalla on ign", "reference_task_length": 6, "evaluation": [ @@ -3832,11 +3735,10 @@ "url": "https://www.ign.com/" } } - ], - "time": "2024-05-26 04:28:05.637" + ] }, { - "index": "TExb4W23ACyVIzZA-rTlw", + "index": 87, "task": "Find more films from the director of Smile on tvguide", "reference_task_length": 7, "evaluation": [ @@ -3856,11 +3758,10 @@ "url": "https://www.tvguide.com/celebrities/parker-finn/3060049350/" } } - ], - "time": "2024-05-20 05:47:01.289" + ] }, { - "index": "TfvF4IF2gNH53xipCdh4W", + "index": 88, "task": "Find help page about buying tickets on seatgeek", "reference_task_length": 4, "evaluation": [ @@ -3880,11 +3781,10 @@ "url": "https://seatgeek.com/help/articles/8985521334291-Buying-Tickets" } } - ], - "time": "2024-05-23 06:19:32.799" + ] }, { - "index": "ThXZAFmqlj6jJb6QyJ2fG", + "index": 89, "task": "Tell me more about the Adirondack route on amtrak.", "reference_task_length": 5, "evaluation": [ @@ -3912,35 +3812,10 @@ "url": "https://www.amtrak.com/routes/adirondack-train.html" } } - ], - "time": "2024-03-25 06:22:08.019" + ] }, { - "index": "TIanz2U5iPllmhS5vXucP", - "task": "Open the baggage fee calculator on united", - "reference_task_length": 5, - "evaluation": [ - { - "match_function_name": "url_included_match", - "content": { - "key": "", - "reference_answer": "united.", - "url": "https://www.united.com/en/us" - } - }, - { - "match_function_name": "url_included_match", - "content": { - "key": "", - "reference_answer": "/baggage-calculator", - "url": "https://www.united.com/en/us/baggage-calculator/any-flights" - } - } - ], - "time": "2024-05-26 05:16:41.406" - }, - { - "index": "TPEK67bCB000EObLJJumy", + "index": 90, "task": "Show me the best city tours on nyc", "reference_task_length": 3, "evaluation": [ @@ -3960,11 +3835,10 @@ "url": "https://www.nyc.com/guided_tours/" } } - ], - "time": "2024-05-24 09:28:31.350" + ] }, { - "index": "U7s1xvWgJRv5pCkwrSVyZ", + "index": 91, "task": "Find the team schedule of the Brooklyn Nets on espn", "reference_task_length": 4, "evaluation": [ @@ -3992,11 +3866,10 @@ "url": "https://www.espn.com/nba/team/schedule/_/name/bkn/brooklyn-nets" } } - ], - "time": "2024-05-26 07:08:21.889" + ] }, { - "index": "Ud2nnGYfINKcnIz7xXeUK", + "index": 92, "task": "Browse used Mercedes cars made between 2004 to 2012 and sort by highest price on kbb", "reference_task_length": 11, "evaluation": [ @@ -4032,11 +3905,10 @@ "url": "https://www.kbb.com/car-finder/?intent=used&manufacturers=mercedesbenz&years=2004-2012&sort=pricedesc" } } - ], - "time": "2024-05-26 05:50:37.604" + ] }, { - "index": "UnxkVMof0YIvazXZb8Fc8", + "index": 93, "task": "Show me the scores for the 2019 super bowl on nfl", "reference_task_length": 7, "evaluation": [ @@ -4072,11 +3944,10 @@ "url": "https://www.nfl.com/scores/" } } - ], - "time": "2024-05-26 05:08:15.217" + ] }, { - "index": "v5stCvJH8fYQhg1gqTxc8", + "index": 94, "task": "Book the cheapest parking spot near Bradley Airport on spothero", "reference_task_length": 9, "evaluation": [ @@ -4132,11 +4003,10 @@ "url": "https://spothero.com/airport-parking/12793/34-old-county-rd?starts=2024-03-13T12%3A00&ends=2024-03-17T12%3A00&airport=true" } } - ], - "time": "2024-03-12 07:02:45.009" + ] }, { - "index": "vwfgl1oxzBZBfxdogAWpU", + "index": 95, "task": "Browse hot deals near zip code 10019 on koa", "reference_task_length": 7, "evaluation": [ @@ -4164,11 +4034,10 @@ "url": "https://koa.com/hot-deals/?q=10019" } } - ], - "time": "2024-05-26 06:51:00.521" + ] }, { - "index": "wi4SWrbpRBBQkq5cR9bmD", + "index": 96, "task": "Create a new list called Bathroom Remodeling on menards", "reference_task_length": 8, "evaluation": [ @@ -4215,11 +4084,10 @@ "url": "https://www.menards.com/main/myLists.html" } } - ], - "time": "2024-05-26 04:46:30.702" + ] }, { - "index": "y8kmCn6l3G18O6GS5LC0A", + "index": 97, "task": "Find the most popular movies and showcase those with the highest IMDb ratings on imdb", "reference_task_length": 6, "evaluation": [ @@ -4247,11 +4115,10 @@ "url": "https://www.imdb.com/chart/moviemeter/?ref_=nv_mv_mpm&sort=release_date%2Cdesc" } } - ], - "time": "2024-05-21 17:43:25.182" + ] }, { - "index": "yCHsGNiD35HdNUekfL8hI", + "index": 98, "task": "Show list of popular businesses in Cleveland on yellowpages.", "reference_task_length": 5, "evaluation": [ @@ -4279,11 +4146,33 @@ "url": "https://www.yellowpages.com/cleveland-oh/business-listings/1" } } - ], - "time": "2024-03-27 11:22:22.644" + ] + }, + { + "index": 99, + "task": "Open the baggage fee calculator on united", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "united.", + "url": "https://www.united.com/en/us/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/checked-bag-fee-calculator/", + "url": "https://www.united.com/en/us/checked-bag-fee-calculator/any-flights" + } + } + ] }, { - "index": "zbG_TK-AU6U-xiYf9QOZo", + "index": 100, "task": "See Nissan and Honda cars for sale near Kentwood, MI 49512 on carmax", "reference_task_length": 12, "evaluation": [ @@ -4364,11 +4253,10 @@ "url": "https://www.carmax.com/cars/honda/nissan" } } - ], - "time": "2024-03-27 09:21:52.246" + ] }, { - "index": "ZCpbMXedfNMd1FSMAJ1sP", + "index": 101, "task": "Browse the clearance section and filter for women's dresses in size small on kohls.", "reference_task_length": 8, "evaluation": [ @@ -4420,11 +4308,10 @@ "url": "https://www.kohls.com/catalog/clearance-womens-dresses-clothing.jsp?CN=Promotions:Clearance+Gender:Womens+Category:Dresses+Department:Clothing&icid=wmnsclearance-VN-dresses&pfm=undefined&kls_sbp=59288101756704364110307077102020115511" } } - ], - "time": "2024-03-23 08:12:48.582" + ] }, { - "index": "zSPmWOxOb0SYdHkC_ubJb", + "index": 102, "task": "Find the schedule for upcoming MLB games for the New York Yankees on foxsports", "reference_task_length": 5, "evaluation": [ @@ -4460,12 +4347,11 @@ "url": "https://www.foxsports.com/mlb/new-york-yankees-team-schedule" } } - ], - "time": "2024-05-26 06:27:22.800" + ] }, { - "index": "ZZF9RoepyS1ZVAW3RD5k8", - "task": "What are the upcoming soccer events on ESPN2? on espn", + "index": 103, + "task": "What are the upcoming soccer events on ESPN2 on espn", "reference_task_length": 8, "evaluation": [ { @@ -4500,7 +4386,6 @@ "url": "https://www.espn.com/watch/schedule/" } } - ], - "time": "2024-05-26 07:02:13.859" + ] } ] \ No newline at end of file diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/event_handler.py b/browsergym/webcanvas/src/browsergym/webcanvas/event_handler.py deleted file mode 100644 index 309f6a10..00000000 --- a/browsergym/webcanvas/src/browsergym/webcanvas/event_handler.py +++ /dev/null @@ -1,111 +0,0 @@ -class EventHandler: - def __init__(self, page, events=None): - self.page = page - self.events = events or [] - self.update_callback = None - self._setup_event_listeners() - - def _setup_event_listeners(self): - """Set up event listeners for the page""" - # 首先暴露 handle_event 方法给浏览器 - self.page.expose_function("handleEvent", - lambda selector, event_type, element_info: self.handle_event(selector, event_type, element_info)) - - # 然后设置事件监听器 - self.page.evaluate(""" - () => { - const allEvents = [ - 'click', 'input', 'change', 'keydown', 'keyup', - 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' - ]; - - function getElementInfo(element) { - return { - selector: null, - textContent: element.textContent || '', - value: element.value || '', - type: element.type || '', - checked: element.checked, - selected: element.selected, - tagName: element.tagName.toLowerCase() - }; - } - - document.addEventListener('click', (event) => { - const elementInfo = getElementInfo(event.target); - window.handleEvent(null, 'click', JSON.stringify(elementInfo)); - }, true); - - document.addEventListener('input', (event) => { - const elementInfo = getElementInfo(event.target); - window.handleEvent(null, 'input', JSON.stringify(elementInfo)); - }, true); - - if (window.selectors && window.selectors.length) { - window.selectors.forEach((selector) => { - const element = document.querySelector(selector); - if (element) { - allEvents.forEach((eventType) => { - element.addEventListener(eventType, (event) => { - const elementInfo = getElementInfo(event.target); - elementInfo.selector = selector; - window.handleEvent(selector, eventType, JSON.stringify(elementInfo)); - }, true); - }); - } - }); - } - } - """) - - def set_update_callback(self, callback): - """Set callback function to be called when events are updated""" - self.update_callback = callback - - def handle_event(self, selector, event_type, element_info_str): - """Handle events from the page by collecting and maintaining event states - - Args: - selector: Element selector if any - event_type: Type of the event (click, input, etc) - element_info_str: JSON string containing element information - - Returns: - Updated events list or None if error occurs - """ - try: - import json - import logging - from .utils import get_netloc - - logger = logging.getLogger(__name__) - element_info = json.loads(element_info_str) - logger.debug(f"Element event: {element_info}") - - for idx, event in enumerate(self.events): - if not event: - continue - - # Update event state based on selector match - if event.get("selector") and event["selector"] == selector: - self.events[idx]["status"] = True - self.events[idx]["target_value"] = element_info["textContent"] - self.events[idx]["event_type"] = event_type - - # Update event state based on element value - elif event.get("reference_value"): - element_value = element_info.get("value", "") or element_info.get("textContent", "") - self.events[idx]["target_value"] = element_value - self.events[idx]["event_type"] = event_type - - # Notify callback if exists - if self.update_callback: - self.update_callback(self.events) - - return self.events - - except json.JSONDecodeError: - logger.error(f"Failed to parse element info: {element_info_str}") - except Exception as e: - logger.error(f"Error handling event: {str(e)}") - return None \ No newline at end of file diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/instance.py b/browsergym/webcanvas/src/browsergym/webcanvas/instance.py index 30054aee..43c2b5ad 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/instance.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/instance.py @@ -115,9 +115,9 @@ def evaluate(page, selector, target_value, evaluate_steps, reference_evaluate_st return evaluate_steps, step_score_rate, match_result, task_finished @staticmethod - def evaluate_events(page, evaluate_steps, task_events, target_value, reference_evaluate_steps): + def evaluate_events(page, evaluate_steps, task_events, reference_evaluate_steps): evaluate_steps, match_result = step_event_evaluate(page=page, evaluate_steps=evaluate_steps, - task_events=task_events, target_value=target_value) + event=task_events) total_step_score = 0 for evaluate in evaluate_steps: total_step_score += evaluate["score"] diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py b/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py index d807a247..92a02fb5 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py @@ -97,13 +97,9 @@ def is_same_element(page, input_element_handle, reference_element_handle): return int(is_same_element) @ staticmethod - def path_exact_match(input_answer, reference_answer, method, page, input_netloc, reference_netloc): + def path_exact_match(input_answer, reference_answer, method, page): score = 0 if method == "xpath": - if reference_netloc != input_netloc: - # print("reference_netloc:", reference_netloc, - # "input_netloc:", input_netloc) - return 0 try: html_content = page.content() tree = html.fromstring(html_content) @@ -127,8 +123,6 @@ def path_exact_match(input_answer, reference_answer, method, page, input_netloc, else: score = 0 elif method == "selector": - if reference_netloc != input_netloc: - return 0 try: input_element = input_answer reference_element = page.locator(reference_answer) @@ -158,8 +152,6 @@ def path_exact_match(input_answer, reference_answer, method, page, input_netloc, pass except: score = 0 - # result_score = MatchFunction.include_match( - # input_answer, reference_answer) return score @ staticmethod @@ -170,31 +162,22 @@ def path_included_match(input_answer, reference_answer, method, html_content): return result_score @ staticmethod - def element_value_exact_match(input_answer, reference_answer, input_netloc, reference_netloc): - # if reference_netloc != input_netloc: - # # print("reference_netloc:", reference_netloc, - # # "input_netloc:", input_netloc) - # return 0 + def element_value_exact_match(input_answer, reference_answer): + # TODO fuzzy check if the input_answer is on the same page as the reference_answer result_score = MatchFunction.exact_match( input_answer, reference_answer) return result_score @ staticmethod - def element_value_include_match(input_answer, reference_answer, input_netloc, reference_netloc): - # if reference_netloc != input_netloc: - # # print("reference_netloc:", reference_netloc, - # # "input_netloc:", input_netloc) - # return 0 + def element_value_include_match(input_answer, reference_answer): + # TODO fuzzy check if the input_answer is on the same page as the reference_answer result_score = MatchFunction.include_match( input_answer, reference_answer) return result_score @ staticmethod - def element_value_semantic_match(input_answer, semantic_method, input_netloc, reference_netloc=0): - # if reference_netloc != input_netloc: - # # print("reference_netloc:", reference_netloc, - # # "input_netloc:", input_netloc) - # return 0 + def element_value_semantic_match(input_answer, semantic_method): + # TODO fuzzy check if the input_answer is on the same page as the reference_answer if len(input_answer) == 0: return 0 result_score = MatchFunction.semantic_match( diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/task.py b/browsergym/webcanvas/src/browsergym/webcanvas/task.py index b37b03ea..fc845d26 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/task.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/task.py @@ -37,6 +37,12 @@ def __init__( self.match_result: str = None self.task_finish: bool = False self.activate_element = None + self.current_event = { + "selector": None, + "status": True, + "target_value": None, + "event_type": None + } if task_id is None: raise ValueError( @@ -46,7 +52,7 @@ def __init__( # read the list of all WebCanvas task configs import browsergym.webcanvas as wcs all_configs_str = importlib.resources.files(wcs).joinpath( - "data/mind2web-live-train_130.json").read_text() + "data/mind2web-live_test_20241024.json").read_text() all_task_configs = json.loads(all_configs_str) all_task = WebCanvasInstance.read_task_configs(all_task_configs) if task_id is not None and task_id < len(all_task): @@ -69,11 +75,10 @@ def get_task_id(cls): def setup(self, page: playwright.sync_api.Page, start_url: str = None) -> tuple[str, dict]: self.goal, _, _, reference_evaluate_steps = self.task_configs - self.evaluaion_step = reference_evaluate_steps + self.evaluation_step = reference_evaluate_steps self.reference_evaluate_steps = reference_evaluate_steps start_url = start_url if start_url else self.start_url - page.goto(start_url, timeout=10000) - self._init_task_events() + page.goto(start_url, timeout=30000) return self.goal, {} def teardown(self) -> None: @@ -106,36 +111,10 @@ def validate( step_action_info["evaluation"] = [] actions = WebCanvasInstance.parse_bid_from_action(action) - # if len(actions) > 0: - # for action_type, bid, target_value in actions: - # locator = None - # # if bid != '': - # # try: - # # locator = self.get_element_by_bid(page, bid) - # # except Exception as e: - # # logger.warning(f"warning:{e}") - # # locator = None - # self.evaluaion_step, self.step_score_rate, self.match_result, self.task_finished = WebCanvasInstance.evaluate( - # page, locator, target_value, self.evaluaion_step, self.reference_evaluate_steps) - - # step_action_info["evaluation"].append( - # { - # "action_type": action_type, - # "bid": bid, - # "target_value": target_value, - # "step_score_rate": self.step_score_rate, - # "match_result": self.match_result, - # "task_status": self.task_finished - # } - # ) - - # if self.task_finished: - # done = True - # break if len(actions) > 0: for action_type, bid, target_value in actions: - self.evaluaion_step, self.step_score_rate, self.match_result, self.task_finished = WebCanvasInstance.evaluate_events( - page, self.evaluaion_step, self.task_events, target_value, self.reference_evaluate_steps) + self.evaluation_step, self.step_score_rate, self.match_result, self.task_finished = WebCanvasInstance.evaluate_events( + page, self.evaluation_step, self.current_event, self.reference_evaluate_steps) step_action_info["evaluation"].append( { @@ -199,24 +178,7 @@ def get_element_by_bid( def events(self): return self.task_events - def _init_task_events(self): - self.task_events = [] - for evaluation_step in self.reference_evaluate_steps: - event = {} - if evaluation_step["match_function"] in ["element_path_exactly_match", "element_path_included_match"]: - event["selector"] = evaluation_step['reference_answer'] - event["target_value"] = "" - event["reference_value"] = "" - event["status"] = False - elif evaluation_step["match_function"] in ["element_value_exactly_match", "element_value_semantic_match"]: - event["selector"] = evaluation_step.get('path') - event["target_value"] = "" - event["reference_value"] = evaluation_step['reference_answer'] - event["status"] = False - self.task_events.append(event) - def update_events(self, agent_event): - for ix, event in enumerate(agent_event): - if event and event['status']: - self.task_events[ix]["status"] = event['status'] - self.task_events[ix]["target_value"] = event['target_value'] + """Store the most recent event""" + if agent_event and len(agent_event) > 0: + self.current_event = agent_event[0] diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/utils.py b/browsergym/webcanvas/src/browsergym/webcanvas/utils.py index f8f5a3ff..450d3548 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/utils.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/utils.py @@ -118,21 +118,7 @@ def step_evaluate(page: Page, evaluate_steps=[], input_path=None, element_value= return evaluate_steps, match_result -def step_event_evaluate(page, evaluate_steps, task_events, target_value): - - def check_event_by_selector(events, selector): - for event in events: - if event and event["selector"] == selector: - if event["status"]: - return 1, event - return 0, None - - def check_event_by_element_value(events, element_value): - for event in events: - if event and event["target_value"] == element_value: - if event["status"]: - return 1, event - return 0, None +def step_event_evaluate(page, evaluate_steps, event): step_score = 0 match_result = [] @@ -150,54 +136,23 @@ def check_event_by_element_value(events, element_value): page.url, evaluate["reference_answer"], evaluate["key"]) elif match_function == "element_path_exactly_match": - score, event = check_event_by_selector( - task_events, evaluate["reference_answer"]) + score = ElementEvaluator.path_exact_match( + event["selector"], evaluate["reference_answer"], evaluate["method"], page) elif match_function == "element_path_included_match": pass elif match_function == "element_value_exactly_match": - input_netloc = get_netloc(page.url) - if "path" in evaluate.keys(): - path_score, event = check_event_by_selector( - task_events, evaluate["path"]) - if path_score == 0: - score = 0 - else: - score = ElementEvaluator.element_value_exact_match( - event["target_value"], evaluate["reference_answer"], input_netloc, evaluate["netloc"]) - # print("score:",score) - else: - score, _ = check_event_by_element_value( - task_events, evaluate["reference_answer"]) + score = ElementEvaluator.element_value_exact_match( + event["target_value"], evaluate["reference_answer"]) elif match_function == "element_value_included_match": - input_netloc = get_netloc(page.url) - if "path" in evaluate.keys(): - path_score, event = check_event_by_selector( - task_events, evaluate["path"]) - if path_score == 0: - score = 0 - else: - score = ElementEvaluator.element_value_include_match( - event["target_value"], evaluate["reference_answer"], input_netloc, evaluate["netloc"]) - else: - score, _ = check_event_by_element_value( - task_events, evaluate["reference_answer"]) + score = ElementEvaluator.element_value_include_match( + event["target_value"], evaluate["reference_answer"]) elif match_function == "element_value_semantic_match": - input_netloc = get_netloc(page.url) - if "path" in evaluate.keys(): - path_score, event = check_event_by_selector( - task_events, evaluate["path"]) - if path_score == 0: - score = 0 - else: - score = ElementEvaluator.element_value_semantic_match( - event["target_value"], evaluate["reference_answer"], input_netloc, evaluate["netloc"]) - else: - score, _ = check_event_by_element_value( - task_events, evaluate["reference_answer"]) + score = ElementEvaluator.element_value_semantic_match( + event["target_value"], evaluate["reference_answer"]) evaluate["score"] = max(evaluate["score"], score) if evaluate["score"] >= 1: From ea5ef1b8a885ece54ce96ad945b007187b93f0d9 Mon Sep 17 00:00:00 2001 From: han032206 Date: Sun, 8 Dec 2024 11:45:26 +0800 Subject: [PATCH 13/15] merge webcanvas integration with a universal event listener into broswergym --- .gitignore | 5 ----- README.md | 23 +++++++++++++++++++++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 2e8a47c8..42f7df3e 100644 --- a/.gitignore +++ b/.gitignore @@ -150,8 +150,3 @@ tests/assistantbench/assistantbench-predictions-test.jsonl # weblinx bg_wl_data/ - -# webcanvas -test_gpt.py -temp_test.py -results/ \ No newline at end of file diff --git a/README.md b/README.md index 3934cf3d..379e8f9f 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,7 @@ pip install browsergym-webarena # core + webarena pip install browsergym-visualwebarena # core + visualwebarena pip install browsergym-workarena # core + workarena pip install browsergym-assistantbench # core + assistantbench +pip install browsergym-webcanvas # core + webcanvas pip install weblinx-browsergym # core + weblinx ``` @@ -169,14 +170,28 @@ print("\n".join(env_ids)) AssistantBench ```python import gymnasium as gym -import browsergym.workarena # register assistantbench tasks as gym environments +import browsergym.assistantbench # register assistantbench tasks as gym environments # start an assistantbench task env = gym.make("browsergym/assistantbench.validation.3") ... # list all the available assistantbench tasks -env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/workarena")] +env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/assistantbench")] +print("\n".join(env_ids)) +``` + +WebCanvas +```python +import gymnasium as gym +import browsergym.webcanvas # register webcanvas tasks as gym environments + +# start a webcanvas task +env = gym.make("browsergym/webcanvas.mind2web-live.0") +... + +# list all the available webcanvas tasks +env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/webcanvas")] print("\n".join(env_ids)) ``` @@ -213,6 +228,9 @@ python demo_agent/run_demo.py --task_name webarena.4 # visualwebarena python demo_agent/run_demo.py --task_name visualwebarena.398 + +# webcanvas +python demo_agent/run_demo.py --task_name webcanvas.mind2web-live.0 ``` You can customize your experience by changing the `model_name` to your preferred LLM (it uses `gpt-4o-mini` by default), adding screenshots for your VLMs with `use_screenshot`, and much more! @@ -230,6 +248,7 @@ python demo_agent/run_demo.py --help - [MiniWoB(++)](https://miniwob.farama.org/): A collection of over 100 web tasks on synthetic web pages. - [WebLINX](https://github.com/McGill-NLP/weblinx): A dataset of real-world web interaction traces. - [AssistantBench](https://github.com/oriyor/assistantbench): A benchmark of realistic and time-consuming tasks on the open web. +- [WebCanvas](https://github.com/iMeanAI/WebCanvas): Benchmarks of web tasks on live websites with key-node in progress evaluation. ## 🌟 Contributors From 777deeae88036dc382e37c25d1489be0bf4bafc8 Mon Sep 17 00:00:00 2001 From: han032206 Date: Sun, 8 Dec 2024 21:56:43 +0800 Subject: [PATCH 14/15] clean and update webcanvas README.md --- browsergym/core/src/browsergym/core/env.py | 6 +----- browsergym/webcanvas/README.md | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/browsergym/core/src/browsergym/core/env.py b/browsergym/core/src/browsergym/core/env.py index 299334a4..0db47d2f 100644 --- a/browsergym/core/src/browsergym/core/env.py +++ b/browsergym/core/src/browsergym/core/env.py @@ -612,10 +612,7 @@ def _get_obs(self): def _event_listener(self): """Add universal event listener""" - # # First expose the handle_event function - # self.context.expose_function("handleEvent", - # lambda selector, event_type, element_info: self._handle_event(selector, event_type, element_info)) - logger.info("Setting up event listeners...") # Add debug log + logger.info("Setting up event listeners...") try: # Then set up event listeners self.page.evaluate(""" @@ -677,7 +674,6 @@ def _event_listener(self): }); } """) - logger.info("Event listeners setup completed") except Exception as e: logger.error(f"Failed to setup event listeners: {str(e)}") diff --git a/browsergym/webcanvas/README.md b/browsergym/webcanvas/README.md index 30b22099..baedfd37 100644 --- a/browsergym/webcanvas/README.md +++ b/browsergym/webcanvas/README.md @@ -1,4 +1,6 @@ -## Setup +# WebCanvas Environment + +## Installation 1. Install the package ```sh @@ -8,4 +10,14 @@ pip install browsergym-webcanvas ```sh export OPENAI_API_KEY=... -``` \ No newline at end of file +``` + +3. Download the dataset + - Option 1: Download from HuggingFace + Visit [Mind2Web-Live Dataset](https://huggingface.co/datasets/iMeanAI/Mind2Web-Live) and download the latest dataset. + + - Option 2: Download from WebCanvas Platform + Visit [WebCanvas Repository](https://github.com/iMeanAI/WebCanvas) and follow the instructions to download the latest dataset. + +4. Place the dataset + - Put the downloaded JSON file into `./src/browsergym/webcanvas/data/` \ No newline at end of file From 6275765027a64aa6de06b2d8db6bae75642561aa Mon Sep 17 00:00:00 2001 From: han032206 Date: Thu, 19 Dec 2024 17:46:54 +0800 Subject: [PATCH 15/15] update webcanvas integration to operate totally within task setup --- browsergym/core/src/browsergym/core/env.py | 109 +-------- .../src/browsergym/webcanvas/task.py | 207 +++++++++++++++--- 2 files changed, 183 insertions(+), 133 deletions(-) diff --git a/browsergym/core/src/browsergym/core/env.py b/browsergym/core/src/browsergym/core/env.py index 0db47d2f..167c0433 100644 --- a/browsergym/core/src/browsergym/core/env.py +++ b/browsergym/core/src/browsergym/core/env.py @@ -269,10 +269,6 @@ def override_property(task, env, property): source["page"]) ) - self.context.expose_binding( - "handleEvent", lambda selector, event_type, element_text: self._handle_event(selector, event_type, element_text) - ) - self.context.add_init_script( r""" window.browsergym_page_activated(); @@ -382,9 +378,6 @@ def override_property(task, env, property): def step(self, action: str) -> tuple: - # Setup event listener after page is create - self._event_listener() - self.last_action = action info = {} @@ -402,11 +395,6 @@ def report_infeasible_instructions(reason: str): self.chat.add_message(role="infeasible", msg=reason) self.infeasible_message_received = True - - if hasattr(self.task, 'webcanvas'): - logger.debug(f"Initiating webcanvas task event listen") - self._event_listener() - # try to execute the action logger.debug(f"Executing action") try: @@ -448,12 +436,9 @@ def report_infeasible_instructions(reason: str): self._wait_for_user_message() logger.debug(f"User message done") - # if not hasattr(self.task, 'webcanvas'): logger.debug(f"Initiating task validation") # extract reward, done, user_message, info (task-specific) reward, done, user_message, task_info = self._task_validate() - logger.info(f"WebCanvas task validation result:\n{ - self.task.evaluate_result}") info["task_info"] = task_info logger.debug(f"Task validation done") @@ -608,96 +593,4 @@ def _get_obs(self): "elapsed_time": np.asarray([time.time() - self.start_time]), } - return obs - - def _event_listener(self): - """Add universal event listener""" - logger.info("Setting up event listeners...") - try: - # Then set up event listeners - self.page.evaluate(""" - () => { - const allEvents = [ - 'click', 'input', 'change', 'keydown', 'keyup', - 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' - ]; - - function getElementSelector(element) { - if (!element) return null; - // Try to get unique selector for the element - try { - let path = []; - while (element && element.nodeType === Node.ELEMENT_NODE) { - let selector = element.nodeName.toLowerCase(); - if (element.id) { - selector += '#' + element.id; - path.unshift(selector); - break; - } else { - let sibling = element; - let nth = 1; - while (sibling.previousElementSibling) { - sibling = sibling.previousElementSibling; - if (sibling.nodeName === element.nodeName) nth++; - } - if (nth > 1) selector += `:nth-child(${nth})`; - } - path.unshift(selector); - element = element.parentNode; - } - return path.join(' > '); - } catch (e) { - return null; - } - } - - function getElementInfo(element) { - return { - textContent: element.textContent || '', - value: element.value || '', - tagName: element.tagName.toLowerCase() - }; - } - - allEvents.forEach(eventType => { - document.addEventListener(eventType, (event) => { - const element = event.target; - const selector = getElementSelector(element); - const elementInfo = getElementInfo(element); - - window.handleEvent( - selector, - eventType, - JSON.stringify(elementInfo) - ); - }, true); - }); - } - """) - except Exception as e: - logger.error(f"Failed to setup event listeners: {str(e)}") - - def _handle_event(self, selector, event_type, element_info_str): - """ - Handle DOM events by updating task events - """ - try: - element_info = json.loads(element_info_str) - logger.info(f"Event received - selector: {selector}, type: {event_type}, info: {element_info}") - - # Create current event - current_event = { - "selector": selector, - "status": True, - "target_value": element_info.get("value") or element_info.get("textContent", ""), - "event_type": event_type - } - - # Update task events - if hasattr(self.task, 'update_events'): - self.task.update_events([current_event]) - - except json.JSONDecodeError: - logger.error(f"Failed to parse element info: {element_info_str}") - except Exception as e: - logger.error(f"Error handling event: {str(e)}") + return obs \ No newline at end of file diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/task.py b/browsergym/webcanvas/src/browsergym/webcanvas/task.py index fc845d26..f28011b8 100644 --- a/browsergym/webcanvas/src/browsergym/webcanvas/task.py +++ b/browsergym/webcanvas/src/browsergym/webcanvas/task.py @@ -43,6 +43,7 @@ def __init__( "target_value": None, "event_type": None } + self.event_page = None # Store the page where the event occurred if task_id is None: raise ValueError( @@ -74,9 +75,18 @@ def get_task_id(cls): raise NotImplementedError def setup(self, page: playwright.sync_api.Page, start_url: str = None) -> tuple[str, dict]: + # Save the page reference + self.page = page + + # Ensure event listeners are set up + self._ensure_event_listeners(page) + + # Initialize task configuration self.goal, _, _, reference_evaluate_steps = self.task_configs self.evaluation_step = reference_evaluate_steps self.reference_evaluate_steps = reference_evaluate_steps + + # Navigate to start URL start_url = start_url if start_url else self.start_url page.goto(start_url, timeout=30000) return self.goal, {} @@ -96,8 +106,11 @@ def validate( self, page: playwright.sync_api.Page, chat_messages: list[str], - action: str = "" - ) -> Tuple[float, bool, str, dict]: + action: dict, + ) -> Tuple[float, bool, Optional[str], dict]: + + self._ensure_event_listeners(page) + reward, done, msg, info = 0, False, "", {} for message in chat_messages: @@ -110,26 +123,40 @@ def validate( step_action_info["time_step"] = self.time_step step_action_info["evaluation"] = [] - actions = WebCanvasInstance.parse_bid_from_action(action) - if len(actions) > 0: - for action_type, bid, target_value in actions: - self.evaluation_step, self.step_score_rate, self.match_result, self.task_finished = WebCanvasInstance.evaluate_events( - page, self.evaluation_step, self.current_event, self.reference_evaluate_steps) - - step_action_info["evaluation"].append( - { - "action_type": action_type, - "bid": bid, - "target_value": target_value, - "step_score_rate": self.step_score_rate, - "match_result": self.match_result, - "task_status": self.task_finished - } - ) - if self.task_finished: - done = True - break + # Use event_page for evaluation if available, otherwise use current page + evaluation_page = self.event_page if self.event_page else page + + # Check if selector can be located + can_locate = self._can_locate_selector(evaluation_page, self.current_event["selector"]) + logger.info(f"Selector '{self.current_event['selector']}' can{'' if can_locate else 'not'} be located on page") + + self.evaluation_step, self.step_score_rate, self.match_result, self.task_finished = WebCanvasInstance.evaluate_events( + evaluation_page, self.evaluation_step, self.current_event, self.reference_evaluate_steps) + + # Reset event_page after evaluation + self.event_page = None + + step_action_info["evaluation"].append( + { + "step_score_rate": self.step_score_rate, + "match_result": self.match_result, + "task_status": self.task_finished + } + ) + + if self.task_finished: + done = True + self.trace_info.append(step_action_info) + + # Add validation result logging + logger.info("=== Validation Results ===") + logger.info(f"Step Score Rate: {self.step_score_rate}") + logger.info(f"Match Result: {self.match_result}") + logger.info(f"Task Status: {'Completed' if self.task_finished else 'In Progress'}") + logger.info(f"Current Time Step: {self.time_step}") + logger.info("========================") + return reward, done, msg, info # https://github.com/ServiceNow/BrowserGym/blob/main/browsergym/core/src/browsergym/core/action/utils.py @@ -178,7 +205,137 @@ def get_element_by_bid( def events(self): return self.task_events - def update_events(self, agent_event): - """Store the most recent event""" - if agent_event and len(agent_event) > 0: - self.current_event = agent_event[0] + def _handle_event(self, selector, event_type, element_info_str, page): + """ + Handle DOM events by updating task events + """ + try: + # Store the page directly + self.event_page = page + element_info = json.loads(element_info_str) + + # Create current event + current_event = { + "selector": selector, + "status": True, + "target_value": element_info.get("value") or element_info.get("textContent", "") or "", + "event_type": event_type + } + + # Update current event + self.current_event = current_event + logger.info(f"Event captured on page: {self.event_page.url}") + logger.info(f"Current event updated: {current_event}") + + except json.JSONDecodeError: + logger.error(f"Failed to parse element info: {element_info_str}") + except Exception as e: + logger.error(f"Error handling event: {str(e)}") + + def _ensure_event_listeners(self, page: playwright.sync_api.Page): + """ + Ensures that event listeners are properly set up on the page. + Checks for existing handlers before setting up new ones to avoid duplicates. + """ + try: + # Check if handleEvent is already bound + handle_event_exists = page.evaluate(""" + () => typeof window.handleEvent === 'function' + """) + + if not handle_event_exists: + page.context.expose_binding( + "handleEvent", + lambda source, selector, event_type, element_info: self._handle_event( + selector, event_type, element_info, page + ) + ) + + # Set up DOM event listeners if not already initialized + page.evaluate(""" + () => { + if (window._eventListenersInitialized) return; + + const allEvents = [ + 'click', 'input', 'change', 'keydown', 'keyup', + 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' + ]; + + function getElementSelector(element) { + if (!element) return null; + try { + let path = []; + while (element && element.nodeType === Node.ELEMENT_NODE) { + let selector = element.nodeName.toLowerCase(); + if (element.id) { + selector += '#' + element.id; + path.unshift(selector); + break; + } else { + let sibling = element; + let nth = 1; + while (sibling.previousElementSibling) { + sibling = sibling.previousElementSibling; + if (sibling.nodeName === element.nodeName) nth++; + } + if (nth > 1) selector += `:nth-child(${nth})`; + } + path.unshift(selector); + element = element.parentNode; + } + return path.join(' > '); + } catch (e) { + return null; + } + } + + function getElementInfo(element) { + return { + textContent: element.textContent || '', + value: element.value || '', + tagName: element.tagName.toLowerCase() + }; + } + + allEvents.forEach(eventType => { + document.addEventListener(eventType, (event) => { + const element = event.target; + const selector = getElementSelector(element); + const elementInfo = getElementInfo(element); + + window.handleEvent( + selector, + eventType, + JSON.stringify(elementInfo) + ); + }, true); + }); + + window._eventListenersInitialized = true; + } + """) + except Exception as e: + logger.error(f"Failed to ensure event listeners: {str(e)}") + raise + + def _can_locate_selector(self, page: playwright.sync_api.Page, selector: str) -> bool: + """ + Test if a selector can be located on the given page + Returns True if the selector can be found, False otherwise + """ + try: + # Try to locate the element using the selector + result = page.evaluate(""" + (selector) => { + try { + const element = document.querySelector(selector); + return element !== null; + } catch (e) { + return false; + } + } + """, selector) + return result + except Exception as e: + logger.error(f"Error checking selector '{selector}': {str(e)}") + return False