diff --git a/Makefile b/Makefile index c8b0fd7a..d0bab46f 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ install: @echo "--- 🚀 Installing project dependencies ---" - pip install -e ./browsergym/core -e ./browsergym/miniwob -e ./browsergym/webarena -e ./browsergym/visualwebarena/ -e ./browsergym/experiments -e ./browsergym/assistantbench -e ./browsergym/ + pip install -e ./browsergym/core -e ./browsergym/miniwob -e ./browsergym/webarena -e ./browsergym/visualwebarena/ -e ./browsergym/experiments -e ./browsergym/assistantbench -e ./browsergym/webcanvas -e ./browsergym/ playwright install chromium install-demo: diff --git a/README.md b/README.md index a22adc90..379e8f9f 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ BrowserGym includes the following benchmarks by default: - [WebArena](https://webarena.dev/) - [VisualWebArena](https://jykoh.com/vwa) - [WorkArena](https://github.com/ServiceNow/WorkArena) + - [WebCanvas](https://github.com/iMeanAI/WebCanvas) - [AssistantBench](https://github.com/oriyor/assistantbench) - [WebLINX](https://github.com/McGill-NLP/weblinx) (static benchmark) @@ -58,6 +59,7 @@ pip install browsergym-webarena # core + webarena pip install browsergym-visualwebarena # core + visualwebarena pip install browsergym-workarena # core + workarena pip install browsergym-assistantbench # core + assistantbench +pip install browsergym-webcanvas # core + webcanvas pip install weblinx-browsergym # core + weblinx ``` @@ -71,6 +73,7 @@ Finally, each benchmark comes with its own specific setup that requires to follo - for WebArena, see [webarena/README.md](browsergym/webarena/README.md) - for VisualWebArena, see [visualwebarena/README.md](browsergym/visualwebarena/README.md) - for WorkArena, see [WorkArena](https://github.com/ServiceNow/WorkArena) + - for WebCanvas, see [WebCanvas](https://github.com/iMeanAI/WebCanvas) - for AssistantBench, see [assistantbench/README.md](browsergym/assistantbench/README.md) ### 🏗️ Development setup @@ -167,14 +170,28 @@ print("\n".join(env_ids)) AssistantBench ```python import gymnasium as gym -import browsergym.workarena # register assistantbench tasks as gym environments +import browsergym.assistantbench # register assistantbench tasks as gym environments # start an assistantbench task env = gym.make("browsergym/assistantbench.validation.3") ... # list all the available assistantbench tasks -env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/workarena")] +env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/assistantbench")] +print("\n".join(env_ids)) +``` + +WebCanvas +```python +import gymnasium as gym +import browsergym.webcanvas # register webcanvas tasks as gym environments + +# start a webcanvas task +env = gym.make("browsergym/webcanvas.mind2web-live.0") +... + +# list all the available webcanvas tasks +env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/webcanvas")] print("\n".join(env_ids)) ``` @@ -211,6 +228,9 @@ python demo_agent/run_demo.py --task_name webarena.4 # visualwebarena python demo_agent/run_demo.py --task_name visualwebarena.398 + +# webcanvas +python demo_agent/run_demo.py --task_name webcanvas.mind2web-live.0 ``` You can customize your experience by changing the `model_name` to your preferred LLM (it uses `gpt-4o-mini` by default), adding screenshots for your VLMs with `use_screenshot`, and much more! @@ -228,6 +248,7 @@ python demo_agent/run_demo.py --help - [MiniWoB(++)](https://miniwob.farama.org/): A collection of over 100 web tasks on synthetic web pages. - [WebLINX](https://github.com/McGill-NLP/weblinx): A dataset of real-world web interaction traces. - [AssistantBench](https://github.com/oriyor/assistantbench): A benchmark of realistic and time-consuming tasks on the open web. +- [WebCanvas](https://github.com/iMeanAI/WebCanvas): Benchmarks of web tasks on live websites with key-node in progress evaluation. ## 🌟 Contributors diff --git a/browsergym/core/src/browsergym/core/env.py b/browsergym/core/src/browsergym/core/env.py index 30b565ba..167c0433 100644 --- a/browsergym/core/src/browsergym/core/env.py +++ b/browsergym/core/src/browsergym/core/env.py @@ -9,6 +9,7 @@ import gymnasium as gym import numpy as np import playwright.sync_api +import json from . import _get_global_playwright from .action.base import execute_python_code @@ -75,7 +76,8 @@ def __init__( pw_chromium_kwargs: dict = {}, pw_context_kwargs: dict = {}, # agent-related arguments - action_mapping: Optional[callable] = HighLevelActionSet().to_python_code, + action_mapping: Optional[callable] = HighLevelActionSet( + ).to_python_code, ): """ Instantiate a ready to use BrowserEnv gym environment. @@ -246,7 +248,8 @@ def override_property(task, env, property): no_viewport=True if self.resizeable_window else None, viewport=viewport if not self.resizeable_window else None, record_video_dir=( - Path(self.record_video_dir) / "task_video" if self.record_video_dir else None + Path(self.record_video_dir) / + "task_video" if self.record_video_dir else None ), record_video_size=viewport, locale=locale, @@ -262,8 +265,10 @@ def override_property(task, env, property): # there is no concept of active page in playwright # https://github.com/microsoft/playwright/issues/2603 self.context.expose_binding( - "browsergym_page_activated", lambda source: self._activate_page_from_js(source["page"]) + "browsergym_page_activated", lambda source: self._activate_page_from_js( + source["page"]) ) + self.context.add_init_script( r""" window.browsergym_page_activated(); @@ -406,15 +411,18 @@ def report_infeasible_instructions(reason: str): self.last_action_error = "" except Exception as e: self.last_action_error = f"{type(e).__name__}: {e}" - match = re.match("TimeoutError: Timeout ([0-9]+)ms exceeded.", self.last_action_error) + match = re.match( + "TimeoutError: Timeout ([0-9]+)ms exceeded.", self.last_action_error) if match: - info["action_exec_timeout"] = float(match.groups()[0]) / 1000 # ms to sec + info["action_exec_timeout"] = float( + match.groups()[0]) / 1000 # ms to sec logger.debug(f"Action executed") info["action_exec_stop"] = time.time() # wait a bit (for the JavaScript callback to set the active page) time.sleep(0.5) # wait for JS events to be fired (half a second) - self.context.cookies() # trigger all waiting Playwright callbacks on the stack (hack, see https://playwright.dev/java/docs/multithreading) + # trigger all waiting Playwright callbacks on the stack (hack, see https://playwright.dev/java/docs/multithreading) + self.context.cookies() # wait for the network to idle before extracting the observation, reward etc. self._wait_dom_loaded() @@ -455,8 +463,8 @@ def _task_validate(self): prev_active_page = self.page prev_page_history = self.page_history.copy() # call validate - reward, done, user_message, info = self.task.validate(self.page, self.chat.messages) - + reward, done, user_message, info = self.task.validate( + self.page, self.chat.messages, self.last_action) # safety fix, in case validate() did mess up the active page and/or page history if prev_active_page != self.page or prev_page_history != self.page_history: logger.debug( @@ -498,7 +506,8 @@ def _activate_page_from_js(self, page: playwright.sync_api.Page): page ) # move page to the end of dictionnary else: - self.page_history[page] = None # add page to the end of dictionnary + # add page to the end of dictionnary + self.page_history[page] = None self.page = page @@ -524,7 +533,8 @@ def _active_page_check(self): # active page should not be closed if self.page.is_closed(): - raise RuntimeError(f"Unexpected: active page has been closed ({self.page}).") + raise RuntimeError( + f"Unexpected: active page has been closed ({self.page}).") def _get_obs(self): @@ -583,4 +593,4 @@ def _get_obs(self): "elapsed_time": np.asarray([time.time() - self.start_time]), } - return obs + return obs \ No newline at end of file diff --git a/browsergym/experiments/src/browsergym/experiments/loop.py b/browsergym/experiments/src/browsergym/experiments/loop.py index 21163671..7cfb167c 100644 --- a/browsergym/experiments/src/browsergym/experiments/loop.py +++ b/browsergym/experiments/src/browsergym/experiments/loop.py @@ -931,6 +931,8 @@ def _get_env_name(task_name: str): import browsergym.webarena elif task_name.startswith("visualwebarena"): import browsergym.visualwebarena + elif task_name.startswith("webcanvas"): + import browsergym.webcanvas elif task_name.startswith("assistantbench"): import browsergym.assistantbench elif task_name.startswith("weblinx"): diff --git a/browsergym/webcanvas/README.md b/browsergym/webcanvas/README.md new file mode 100644 index 00000000..baedfd37 --- /dev/null +++ b/browsergym/webcanvas/README.md @@ -0,0 +1,23 @@ +# WebCanvas Environment + +## Installation + +1. Install the package +```sh +pip install browsergym-webcanvas +``` +2. Setup an OpenAI API key + +```sh +export OPENAI_API_KEY=... +``` + +3. Download the dataset + - Option 1: Download from HuggingFace + Visit [Mind2Web-Live Dataset](https://huggingface.co/datasets/iMeanAI/Mind2Web-Live) and download the latest dataset. + + - Option 2: Download from WebCanvas Platform + Visit [WebCanvas Repository](https://github.com/iMeanAI/WebCanvas) and follow the instructions to download the latest dataset. + +4. Place the dataset + - Put the downloaded JSON file into `./src/browsergym/webcanvas/data/` \ No newline at end of file diff --git a/browsergym/webcanvas/pyproject.toml b/browsergym/webcanvas/pyproject.toml new file mode 100644 index 00000000..23a579c9 --- /dev/null +++ b/browsergym/webcanvas/pyproject.toml @@ -0,0 +1,35 @@ +[build-system] +requires = ["hatchling", "hatch-requirements-txt"] +build-backend = "hatchling.build" + +[project] +name = "browsergym-webcanvas" +description = "WebCanvas benchmark for BrowserGym" +authors = [ + {name = "Sida Zhou"}, + {name = "Dehan Kong"}, +] +readme = "README.md" +requires-python = ">3.7" +license = {text = "Apache-2.0"} +classifiers = [ + "Development Status :: 2 - Pre-Alpha", + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "License :: OSI Approved :: Apache Software License", +] +dynamic = ["dependencies", "version"] + +[project.urls] +homepage = "https://github.com/ServiceNow/BrowserGym" + +[tool.hatch.version] +path = "../core/src/browsergym/core/__init__.py" + +[tool.hatch.metadata.hooks.requirements_txt] +files = ["requirements.txt"] + +[tool.hatch.build.targets.wheel] +packages = ["src/browsergym"] diff --git a/browsergym/webcanvas/requirements.txt b/browsergym/webcanvas/requirements.txt new file mode 100644 index 00000000..e596c1dd --- /dev/null +++ b/browsergym/webcanvas/requirements.txt @@ -0,0 +1,4 @@ +browsergym-core==0.13.3 +openai +bs4 +lxml diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/__init__.py b/browsergym/webcanvas/src/browsergym/webcanvas/__init__.py new file mode 100644 index 00000000..04a41033 --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/__init__.py @@ -0,0 +1,15 @@ +from browsergym.core.registration import register_task + +# register the WebCanvas benchmark +from . import config, task + +ALL_WEBCANVAS_TASK_IDS = [] + +for task_id in config.TASK_TRAIN_IDS: + gym_id = f"webcanvas.mind2web-live.{task_id}" + register_task( + gym_id, + task.GenericWebCanvasTask, + kwargs={"task_kwargs": {"task_id": task_id}}, + ) + ALL_WEBCANVAS_TASK_IDS.append(gym_id) diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/config.py b/browsergym/webcanvas/src/browsergym/webcanvas/config.py new file mode 100644 index 00000000..6d7926d1 --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/config.py @@ -0,0 +1,2 @@ +TASK_TEST_IDS = range(104) +TASK_TRAIN_IDS = range(130) diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-live-train_130.json b/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-live-train_130.json new file mode 100644 index 00000000..c279071e --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-live-train_130.json @@ -0,0 +1,5769 @@ +[ + { + "index": "WXkY6peQb2NDE6VKrNQkd", + "task": "View the complexity rating for Frosthaven in boardgamegeek", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "boardgamegeek.", + "url": "https://boardgamegeek.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/boardgame/295770/frosthaven", + "url": "https://boardgamegeek.com/boardgame/295770/frosthaven" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".gameplay >li:nth-child(4)>.gameplay-item-secondary >span:nth-child(1)>.btn.btn-xs.btn-link ", + "netloc": "boardgamegeek", + "url": "https://boardgamegeek.com/boardgame/295770/frosthaven" + } + } + ] + }, + { + "index": "XByRzDf1LGHZDev_fnQrj", + "task": "View the full menu for AMC Dine-In in amctheatres", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "amctheatres.", + "url": "https://www.amctheatres.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/food-and-drink", + "url": "https://www.amctheatres.com/food-and-drink" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/explore-menu", + "url": "https://www.amctheatres.com/food-and-drink/dine-in/explore-menu" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + + "reference_answer": "View Full Menu", + "netloc": "amctheatres", + "url": "https://www.amctheatres.com/food-and-drink/dine-in/explore-menu" + } + } + ] + }, + { + "index": "W7KXP7WT9P2IcIHkrM2bl", + "task": "Search for used Jaguar XF with no black exterior color and save the search as Jaguar to get a notification daily in carmax", + "reference_task_length": 16, + "evaluation": [ + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search", + "reference_answer": "Decide whether are searching for Jaguar XF", + "url": "https://www.carmax.com/cars?search=Jaguar+XF" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "jaguar/xf", + "url": "https://www.carmax.com/cars/jaguar/xf/no-black" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/no-black", + "url": "https://www.carmax.com/cars/jaguar/xf/no-black" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Jaguar", + "netloc": "carmax", + "url": "https://www.carmax.com/cars/jaguar/xf/no-black" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".save-search-button.kmx-button.kmx-button--primary ", + "netloc": "carmax", + "url": "https://www.carmax.com/cars/jaguar/xf/no-black" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".save-search-email--options-radio >div:nth-child(2)>.kmx-radio-label ", + "netloc": "carmax", + "url": "https://www.carmax.com/cars/jaguar/xf/no-black" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".kmx-button.kmx-button--primary.email-options-cta ", + "netloc": "carmax", + "url": "https://www.carmax.com/cars/jaguar/xf/no-black" + } + } + ] + }, + { + "index": "TpT9z9DSgXapCMB2ORulX", + "task": "Browse for wall art with a price range of $25 to $50 in kohls", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/?kwid=p43862634237&utm_source=google&utm_medium=cpc&utm_term=Kohls&utm_campaign=Brand_Search_Google_Brand&UTM_Adgroupid=58700005024606174&pfx=pfx_google_roi&cid=brandsearch&ds_rl=2671&gad_source=1&gclid=CjwKCAiAzc2tBhA6EiwArv-i6UBkb_nmBN_BT3SvQ3M36KQxDVjnLAb6ljp_eSOnuc9nlr4J8uiiuRoCumYQAvD_BwE&gclsrc=aw.ds" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search", + "reference_answer": "Decide whether are searching for wall art", + "url": "https://www.kohls.com/search.jsp?submit-search=web-regular&search=wall+art&kls_sbp=59794123176757812635376180341677737534" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "CN", + "reference_answer": "Price:$25 to $50", + "url": "https://www.kohls.com/search.jsp?CN=Price:%2425%20to%20%2450&BL=y&search=wall%20art&S=1&PPP=48&kls_sbp=59794123176757812635376180341677737534&pfm=search%20refine" + } + } + ] + }, + { + "index": "gDB083_WaqyuVYLxOBqGc", + "task": "Search for queen-size pillow protectors from the Marriot shop, and if found, add two pieces to the cart and checkout in marriott", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "marriott.", + "url": "https://www.marriott.com/default.mi?nst=paid&cid=PAI_GLB0004EMN_GLE000AE45_GLF000KWXG&ppc=ppc&pId=nancppc&gad_source=1&gclid=CjwKCAiA8NKtBhBtEiwAq5aX2J4dgb2cUlR-5yjY6TJiltvKoVfXOH1u3O9_d25sVIHhIicmKEZraBoCWwUQAvD_BwE&gclsrc=aw.ds" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "shopmarriott.", + "url": "https://www.shopmarriott.com/index.aspx" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/category.aspx?pillows", + "url": "https://www.shopmarriott.com/category.aspx?pillows" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/product.aspx?pillow-protector", + "url": "https://www.shopmarriott.com/product.aspx?pillow-protector" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#MAR-107-1Mar_PillowProtector >option:nth-child(3)", + "netloc": "shopmarriott", + "url": "https://www.shopmarriott.com/product.aspx?pillow-protector" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "2", + "netloc": "shopmarriott", + "url": "https://www.shopmarriott.com/product.aspx?pillow-protector" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#addToCartBtn >span:nth-child(1)", + "netloc": "shopmarriott", + "url": "https://www.shopmarriott.com/product.aspx?pillow-protector" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".button.right >.btn-main ", + "netloc": "shopmarriott", + "url": "https://www.shopmarriott.com/product.aspx?pillow-protector" + } + } + ] + }, + { + "index": "gDmEXIumjy6tQOrO0VzNS", + "task": "Find a south african history podcast with length between 10 to 30 minutes and filter it by audiobook tag in soundcloud", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "soundcloud.", + "url": "https://soundcloud.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "q", + "reference_answer": "Decide whether are searching for south african history podcast", + "url": "https://soundcloud.com/search?q=south%20african%20history%20podcast" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "filter.duration", + "reference_answer": "long", + "url": "https://soundcloud.com/search/sounds?q=south%20african%20history%20podcast&filter.duration=long" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "filter.genre", + "reference_answer": "audiobooks", + "url": "https://soundcloud.com/search/sounds?q=south%20african%20history%20podcast&filter.duration=long&filter.genre=audiobooks" + } + } + ] + }, + { + "index": "8zdFjA1w6mKbU4_paQU_6", + "task": "Delete the history of what i heard recently in soundcloud", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "soundcloud.", + "url": "https://soundcloud.com/you/history" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/library", + "url": "https://soundcloud.com/you/history" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/history", + "url": "https://soundcloud.com/you/history" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".sc-button.sc-button-tertiary.sc-text-h4.collectionSection__action.sc-button-small.sc-button-responsive ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/you/history" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".sc-button.sc-button-small.sc-button-primary.sc-ml-2x ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/you/history" + } + } + ] + }, + { + "index": "1fFOd24LBmtgRmw1o5C8n", + "task": "Show me movies produced by Aaron Horvath in imdb", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "imdb.", + "url": "https://www.imdb.com/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Previous", + "netloc": "imdb", + "url": "https://www.imdb.com/name/nm1739338/?ref_=fn_al_nm_1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/nm1739338", + "url": "https://www.imdb.com/name/nm1739338/?ref_=fn_al_nm_1" + } + } + ] + }, + { + "index": "DKVD-ophgCKtDzTA6T65R", + "task": "Find a Blue iPhone 12 Pro 128gb and add to cart in ebay", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ebay.", + "url": "https://www.ebay.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "_nkw", + "reference_answer": "Decide whether are searching for iPhone 12 Pro", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_trksid=p4432023.m570.l1313&_nkw=iphone+12+pro&_sacat=0" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "Storage%20Capacity", + "reference_answer": "128 GB", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=iphone+12+pro&_sacat=0&Storage%2520Capacity=128%2520GB&_dcat=9355" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "0", + "netloc": "ebay", + "url": "https://www.ebay.com/itm/294304467850?epid=9064995793&hash=item4485e9cb8a:g:Fn4AAOSwJyxlDbtk&amdata=enc%3AAQAIAAAAwFOTWZW7Qt6S99BNKFAQ1Hx3iO7VgD8LcKbH1RXPBgN6DVWTyEwMWA8HQf%2BsTREaViQIsTYCdZXUMBytGtE8SQPc37W21MAh23apUGDW1%2FxIRZLxI%2BC%2FuDWoeoeAziQcmTN76wSMNUU7BtxPp%2BfdMNsudHDS8zj3qQQHnSk7wjheMKtOUgD4jNB6E1VuwrtnpN99XXHvUQBgOvLbpt6OQYvn5bxGQjOxOR8tAZH2EKdmmrrwd7%2B4sNopxa5zfi8%2B%2Bg%3D%3D%7Ctkp%3ABlBMULzVg7mpYw" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "View in cart", + "netloc": "ebay", + "url": "https://www.ebay.com/itm/294304467850?epid=9064995793&hash=item4485e9cb8a:g:Fn4AAOSwJyxlDbtk&amdata=enc%3AAQAIAAAAwFOTWZW7Qt6S99BNKFAQ1Hx3iO7VgD8LcKbH1RXPBgN6DVWTyEwMWA8HQf%2BsTREaViQIsTYCdZXUMBytGtE8SQPc37W21MAh23apUGDW1%2FxIRZLxI%2BC%2FuDWoeoeAziQcmTN76wSMNUU7BtxPp%2BfdMNsudHDS8zj3qQQHnSk7wjheMKtOUgD4jNB6E1VuwrtnpN99XXHvUQBgOvLbpt6OQYvn5bxGQjOxOR8tAZH2EKdmmrrwd7%2B4sNopxa5zfi8%2B%2Bg%3D%3D%7Ctkp%3ABlBMULzVg7mpYw&var=592943878447" + } + } + ] + }, + { + "index": "eTA-PZ_y8PT09kfvk-bbR", + "task": "Check the price of Tesla Model Y Performance for the 10001 zip code in tesla", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "tesla.", + "url": "https://www.tesla.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/modely/design", + "url": "https://www.tesla.com/modely/design#overview" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "10001", + "netloc": "tesla", + "url": "https://www.tesla.com/modely/design#overview" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".location-modal--button >.tds-btn.tds-btn--primary.tds-btn--large.tds-btn--width-full ", + "netloc": "tesla", + "url": "https://www.tesla.com/modely/design#overview" + } + } + ] + }, + { + "index": "EtBAYvBAHdCKB2_xJWNDa", + "task": "Check the availability of a Nintendo Switch gaming console at the nearest Target store in target", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "target.", + "url": "https://www.target.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "searchTerm", + "reference_answer": "Decide whether are searching for Nintendo Switch", + "url": "https://www.target.com/s?searchTerm=Nintendo+Switch&tref=typeahead%7Cterm%7CNintendo+Switch%7C%7C%7Chistory" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".styles__ThreeUpTextWrapperInline-sc-11rka0i-1.bJqleI >button:nth-child(1)", + "netloc": "target", + "url": "https://www.target.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".ModalDrawer >div:nth-child(1)>div:nth-child(1)>div:nth-child(2)>div:nth-child(3)>.h-margin-v-default >div:nth-child(1)>div:nth-child(1)>.styles__SecondaryStyles-sc-18fxydo-2.irelhx >div:nth-child(1)>button:nth-child(1)", + "netloc": "target", + "url": "https://www.target.com/" + } + } + ] + }, + { + "index": "3EYfZekKAm_7OW8tkf5J_", + "task": "Find an Xbox Wireless controller rated above 4 stars in newegg", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "newegg.", + "url": "https://www.newegg.com/Newegg-Deals/EventSaleStore/ID-9447?N=100007627&id0=Google&id1=20090904388&id2=145631868461&id3=&id4=&id5=kwd-22448241&id6=&id7=1014226&id8=b&id9=g&id10=c&id11=&id12=CjwKCAiA8NKtBhBtEiwAq5aX2JhtaauB2oL5E0a3mGURAs91TYZILl5tNLRMTzssaWXM7vgGLgLDExoCCQAQAvD_BwE&id13=&id14=Y&id15=&id16=688736071597&id17=newegg&id18=&id19=&id20=&id21=&id22=&id23=&id24=&id25=&id26=&id27=Y&id28=&id29=&id30=16787553722129530369&id31=&id32=&id33=&id34=&gclsrc=aw.ds&&cm_mmc=knc-googlekwless-search-broad-_-newaudience-_-intel-motherboards-_-plp-feature-2&utm_source=google&utm_medium=paid+search&utm_campaign=knc-googlekwless-search-broad-_-newaudience-_-intel-motherboards-_-plp-feature-2&gad_source=1&gclid=CjwKCAiA8NKtBhBtEiwAq5aX2JhtaauB2oL5E0a3mGURAs91TYZILl5tNLRMTzssaWXM7vgGLgLDExoCCQAQAvD_BwE" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "d", + "reference_answer": "Decide whether are searching for Xbox Wireless controller", + "url": "https://www.newegg.com/p/pl?d=xbox+wireless+controller" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "N", + "reference_answer": "4204", + "url": "https://www.newegg.com/p/pl?d=xbox+wireless+controller&N=4204" + } + } + ] + }, + { + "index": "3pgcXaIyKP86RuRIAYxuL", + "task": "View the toddler collection and add one pair of the cheapest socks for a 6 months to 5 years to the wishlist in uniqlo", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "uniqlo.", + "url": "https://www.uniqlo.com/us/en/baby" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "baby/toddler", + "url": "https://www.uniqlo.com/us/en/baby/toddler" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "path", + "reference_answer": "22213,23321,24798,", + "url": "https://www.uniqlo.com/us/en/baby/toddler?path=22213%2C23321%2C24798%2C" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sort", + "reference_answer": "2", + "url": "https://www.uniqlo.com/us/en/baby/toddler?path=22213%2C23321%2C24798%2C&sort=2" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#root >section:nth-child(10)>div:nth-child(1)>section:nth-child(1)>div:nth-child(1)>div:nth-child(2)>div:nth-child(1)>div:nth-child(1)>div:nth-child(1)>div:nth-child(1)>div:nth-child(1)>a:nth-child(1)>div:nth-child(1)>div:nth-child(1)>button:nth-child(2)>svg:nth-child(1)", + "netloc": "uniqlo", + "url": "https://www.uniqlo.com/us/en/baby/toddler?path=22213%2C23321%2C24798%2C&sort=2" + } + } + ] + }, + { + "index": "3RVOB_HPtNe4wglTTrOdK", + "task": "Search for hiking boots and filter the results to show only those with a waterproof rating of at least 3 stars in rei", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "rei.", + "url": "https://www.rei.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "c/hiking-footwear", + "url": "https://www.rei.com/c/hiking-footwear" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/f-waterproof", + "url": "https://www.rei.com/c/hiking-footwear/f/f-waterproof?ir=category%3Ahiking-footwear&r=c%3Bf" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "r", + "reference_answer": "c;f;average-rating:3 to 5", + "url": "https://www.rei.com/c/hiking-footwear/f/f-waterproof?ir=category%3Ahiking-footwear&r=c%3Bf%3Baverage-rating%3A3+to+5" + } + } + ] + }, + { + "index": "ciawYzAILpO7zxb6_OKKG", + "task": "Search receipt with the eTicket 12345678 for the trip reserved by Jason Two in united", + "reference_task_length": 12, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "united.", + "url": "https://www.united.com/en/us" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "receipts", + "url": "https://www.united.com/en/us/receipts" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Confirmation or eTicket number", + "netloc": "united", + "url": "https://www.united.com/en/us/receipts" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "12345678", + "netloc": "united", + "path": "#cqLpdDv_d ", + "url": "https://www.united.com/en/us/receipts" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Jason", + "netloc": "united", + "path": "#gY4kQQ1fF3 ", + "url": "https://www.united.com/en/us/receipts" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Two", + "netloc": "united", + "path": "#zMO2dq6RBo ", + "url": "https://www.united.com/en/us/receipts" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".atm-c-btn.app-components-Receipts-PurchaseDetails-styles__searchButton--BRDhz.atm-c-btn--primary.atm-c-btn--large >.atm-c-btn__text ", + "netloc": "united", + "url": "https://www.united.com/en/us/receipts" + } + } + ] + }, + { + "index": "D-PSiYpk-2evcjGqhhv8z", + "task": "Search for person named John Smith in New York in yellowpages", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yellowpages.", + "url": "https://www.yellowpages.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "people.yellowpages.", + "url": "https://people.yellowpages.com/whitepages/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "first_name", + "reference_answer": "John", + "url": "https://people.yellowpages.com/whitepages/?first_name=John&last_name=Smith&city=New+york&state=" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "last_name", + "reference_answer": "Smith", + "url": "https://people.yellowpages.com/whitepages/?first_name=John&last_name=Smith&city=New+york&state=" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "city", + "reference_answer": "New york", + "url": "https://people.yellowpages.com/whitepages/?first_name=John&last_name=Smith&city=New+york&state=" + } + } + ] + }, + { + "index": "t95CWRtf-KuSTP0knNdmu", + "task": "find cruise deals to Europe this summer that includes airfare and are all-inclusive in travelzoo", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "travelzoo.", + "url": "https://www.travelzoo.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "locationId", + "reference_answer": "1024", + "url": "https://www.travelzoo.com/deals/all-deals-europe-summer/?ctfId=0&locationId=1024&when=Summer&pageType=Collection&ga_s=Europe&search_type=destination" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "when", + "reference_answer": "Summer", + "url": "https://www.travelzoo.com/deals/all-deals-europe-summer/?ctfId=0&locationId=1024&when=Summer&pageType=Collection&ga_s=Europe&search_type=destination" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Airfare Included", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/deals/all-deals-europe-summer/?ctfId=0&locationId=1024&when=Summer&pageType=Collection&ga_s=Europe&search_type=destination" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "All-inclusive", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/deals/all-deals-europe-summer/?ctfId=0&locationId=1024&when=Summer&pageType=Collection&ga_s=Europe&search_type=destination" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".button-primary.button-done.right ", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/deals/all-deals-europe-summer/?ctfId=0&locationId=1024&when=Summer&pageType=Collection&ga_s=Europe&search_type=destination" + } + } + ] + }, + { + "index": "tA52iRDxXW9ZI98p1p5aF", + "task": "Find the safety rating for 2012 Honda Civic in kbb", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kbb.", + "url": "https://www.kbb.com/?&psid=20003&utm_source=GOOGLE&utm_medium=sem_brand-core_perf&utm_campaign=kbb_na_na_national_evergreen_site-visits_na_na&utm_content=keyword_text_na_na_na_20003_na&utm_term=kbb&ds_rl=1293870&gad_source=1&gclid=CjwKCAiAzc2tBhA6EiwArv-i6cts0gEs876CjOGcM1YqfgApEFpdV-hInc7n5R3oHA4UBrPEW4fFrxoCvuUQAvD_BwE&gclsrc=aw.ds" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/car-prices", + "url": "https://www.kbb.com/car-prices/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "2012", + "netloc": "kbb", + "url": "https://www.kbb.com/car-prices/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Honda", + "netloc": "kbb", + "url": "https://www.kbb.com/car-prices/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Civic", + "netloc": "kbb", + "url": "https://www.kbb.com/car-prices/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/honda/civic/2012", + "url": "https://www.kbb.com/honda/civic/2012/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#subnav4 >div:nth-child(1)>div:nth-child(1)", + "netloc": "kbb", + "url": "https://www.kbb.com/honda/civic/2012/" + } + } + ] + }, + { + "index": "j4W0lfpmu2n143wb_vHSN", + "task": "Show me the NFL Scoreboard from the superbowl 2015 in espn", + "reference_task_length": 14, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "espn.", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nfl/scoreboard", + "url": "https://www.espn.com/nfl/scoreboard" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "_/week/5/year/2015/seasontype/3", + "url": "https://www.espn.com/nfl/scoreboard/_/week/5/year/2015/seasontype/3" + } + } + ] + }, + { + "index": "kcTi3b4I9BP33PBBr1Cjv", + "task": "Show me the location of the Altavista bus stop in us.megabus", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "us.megabus.", + "url": "https://us.megabus.com/route-guides" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/stops", + "url": "https://us.megabus.com/stops" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".row >div:nth-child(12)>.blockContent >.panel.panel-default >.panel-heading >.panel-title >a:nth-child(1)", + "netloc": "us", + "url": "https://us.megabus.com/stops" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".panel-collapse.collapse.in >.panel-body >.list-unstyled >li:nth-child(1)>a:nth-child(1)", + "netloc": "us", + "url": "https://us.megabus.com/stops" + } + } + ] + }, + { + "index": "h85WFGkh5JWjY-m7rPcKv", + "task": "Show me the page with information about the NBA Finals schedule in seatgeek", + "reference_task_length": 3, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "seatgeek.", + "url": "https://seatgeek.com/?aid=10717&gid=147220683313&rid=kwd-307402976610&dt=c&ap=&adId=647120651900&loc_interest=&loc_physical=1014226&n=g&mt=e&ext=&xid=1&gad_source=1&gclid=CjwKCAiAzc2tBhA6EiwArv-i6SRpdQUweChDs6bFJjQvJNW0MrRoKDycKgmhKRTUsWRghNdWU6XdHhoC2-MQAvD_BwE" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/nba-finals-3-tickets", + "url": "https://seatgeek.com/nba-finals-3-tickets" + } + } + ] + }, + { + "index": "pIWJqqtx19A-qKymEDRYB", + "task": "Add a set of sonoma bath towels to the cart and apply a coupon code for free shipping in kohls", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search", + "reference_answer": "Decide whether are searching for sonoma bath towels", + "url": "https://www.kohls.com/search.jsp?submit-search=web-ta-keyword&search=sonoma+bath+towels&kls_sbp=59794123176757812635376180341677737534" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "BATH TOWEL", + "netloc": "kohls", + "url": "https://www.kohls.com/product/prd-4569317/sonoma-goods-for-life-ultimate-bath-towel-bath-sheet-hand-towel-or-washcloth-with-hygro-technology.jsp?color=Linen&prdPV=1" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#addtobagID ", + "netloc": "kohls", + "url": "https://www.kohls.com/product/prd-4569317/sonoma-goods-for-life-ultimate-bath-towel-bath-sheet-hand-towel-or-washcloth-with-hygro-technology.jsp?color=Linen&prdPV=1" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn.btn-primary ", + "netloc": "kohls", + "url": "https://www.kohls.com/product/prd-4569317/sonoma-goods-for-life-ultimate-bath-towel-bath-sheet-hand-towel-or-washcloth-with-hygro-technology.jsp?color=Linen&prdPV=1" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".open-offers-text ", + "netloc": "kohls", + "url": "https://www.kohls.com/checkout/shopping_cart.jsp" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "FREESHIP 3093", + "netloc": "kohls", + "url": "https://www.kohls.com/checkout/shopping_cart.jsp" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".kds-button.kds-button-sm >.btn.btn-secondary ", + "netloc": "kohls", + "url": "https://www.kohls.com/checkout/shopping_cart.jsp" + } + } + ] + }, + { + "index": "RyhHhSmdLH_couxp6jwEp", + "task": "Plan an accessible trip from empire state building to little Caribbean with least walking in new.mta.info", + "reference_task_length": 14, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "new.mta.info", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether are searching for empire state building", + "netloc": "new", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether are searching for little Caribbean", + "netloc": "new", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#linkPreferencesModal ", + "netloc": "new", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#edit-minimize ", + "netloc": "new", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#edit-minimize >option:nth-child(3)", + "netloc": "new", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".button.primary-dark-button.travel-preferences-button.close-modal.close-preferences ", + "netloc": "new", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".form-item.js-form-item.form-type-checkbox.js-form-type-checkbox.form-item-accessible.js-form-item-accessible.checkbox >.control-label.option ", + "netloc": "new", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/tripplanner/results", + "url": "https://new.mta.info/tripplanner/results/Empire%20State%20Building%2C%20West%2034th%20Street%2C%20New%20York%2C%20NY%2C%20USA/40.7484405%2C%20-73.98566439999999/Little%20Caribbean%2C%20Brooklyn%2C%20NY%2C%20USA/40.6508146%2C%20-73.9495972/1706595587/D/B%2CX%2CC%2CR/W/804/true/null/null" + } + } + ] + }, + { + "index": "hRtPHCJJzDkwZTO_QzVms", + "task": "Track the status of a rebate for Monty Lue, house number 4847, zip code 10019 in menards", + "reference_task_length": 14, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "menards.", + "url": "https://www.menards.com/main/home.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/rebates", + "url": "https://www.menards.com/main/rebates.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "rebateinternational.", + "url": "https://www.rebateinternational.com/RebateInternational/tracking.do#track-rebate" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "M", + "netloc": "rebateinternational", + "path": ".bg-white.my-3.p-3.rounded-lg >form:nth-child(2)>div:nth-child(1)>.col-12 >span:nth-child(2)>.rounded-pill.form-control ", + "url": "https://www.rebateinternational.com/RebateInternational/tracking.do#track-rebate" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Lue", + "netloc": "rebateinternational", + "path": ".bg-white.my-3.p-3.rounded-lg >form:nth-child(2)>div:nth-child(2)>.col-12 >span:nth-child(2)>.rounded-pill.form-control ", + "url": "https://www.rebateinternational.com/RebateInternational/tracking.do#track-rebate" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "4847", + "netloc": "rebateinternational", + "path": "#house-num ", + "url": "https://www.rebateinternational.com/RebateInternational/tracking.do#track-rebate" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "10019", + "netloc": "rebateinternational", + "path": ".w-100 >div:nth-child(3)>div:nth-child(2)>span:nth-child(2)>.rounded-pill.form-control ", + "url": "https://www.rebateinternational.com/RebateInternational/tracking.do#track-rebate" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".row.pb-4 >.col-12 >button:nth-child(1)", + "netloc": "rebateinternational", + "url": "https://www.rebateinternational.com/RebateInternational/tracking.do#track-rebate" + } + } + ] + }, + { + "index": "m0tbJoOSYhZqciPjTSZEQ", + "task": "Show computer game reviews sorted by score in ign", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ign.", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/reviews/games", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/pc", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#sortBy ", + "netloc": "ign", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#sortBy >option:nth-child(3)", + "netloc": "ign", + "url": "https://www.ign.com/reviews/games/pc" + } + } + ] + }, + { + "index": "LdY-zo1lQh1XrKpBhDD6-", + "task": "Find my trip with ticket number 3329456534543 along with my name John Green in delta", + "reference_task_length": 12, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "delta.", + "url": "https://www.delta.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#headPrimary3 ", + "netloc": "delta", + "url": "https://www.delta.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#ui-list-searchOption2 ", + "netloc": "delta", + "url": "https://www.delta.com/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "3329456534543", + "netloc": "delta", + "path": "#eTicketNo ", + "url": "https://www.delta.com/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "John", + "netloc": "delta", + "path": "#firstName ", + "url": "https://www.delta.com/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Green", + "netloc": "delta", + "path": "#lastName ", + "url": "https://www.delta.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "my-trips/search", + "url": "https://www.delta.com/my-trips/search?errorCodes=MYT0503" + } + } + ] + }, + { + "index": "LhoU2gmqzyP2pmZS6NcLc", + "task": "Select a deal which is highly rated in Fiji location in travelzoo", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "travelzoo.", + "url": "https://www.travelzoo.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "deals/all-deals", + "url": "https://www.travelzoo.com/deals/all-deals-fiji/?ctfId=0&locationId=1093&when=Anytime&pageType=Homepage&ga_s=Fiji&search_type=destination" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "ga_s", + "reference_answer": "Fiji", + "url": "https://www.travelzoo.com/deals/all-deals-fiji/?ctfId=0&locationId=1093&when=Anytime&pageType=Homepage&ga_s=Fiji&search_type=destination" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn-group-items >div:nth-child(2)", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/deals/all-deals-fiji/?ctfId=0&locationId=1093&when=Anytime&pageType=Homepage&ga_s=Fiji&search_type=destination" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".result-list.list-nostyle >li:nth-child(1)>div:nth-child(1)>a:nth-child(1)>.deal-info.clamped >.deal-headline >.deal-headline-text.ts-original ", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/deals/all-deals-fiji/?ctfId=0&locationId=1093&when=Anytime&pageType=Homepage&ga_s=Fiji&search_type=destination" + } + } + ] + }, + { + "index": "mqWXWWBXEQ03hlZexjoh6", + "task": "Find information about baggage allowance for business class in qatarairways", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "qatarairways.", + "url": "https://www.qatarairways.com/en-us/homepage.html?CID=SXUS23456792M&account=Google-AMERICAS-US-EN-Brand&campaign=US-Brand-Hero-EN_exact&adgroup=qatarairways&term=qatarairways&&&&&gad_source=1&gclid=CjwKCAiAzc2tBhA6EiwArv-i6aeW180HqNfN0DVR5ocvrSeWUcoqviRH3n44R9tzif6BgvzjTPlLuhoCeJ4QAvD_BwE&gclsrc=aw.ds" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "baggage/allowance.html", + "url": "https://www.qatarairways.com/en-us/baggage/allowance.html?iid=ALL67670750" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#j-poi-tabs >li:nth-child(2)>.tabmenu-link ", + "netloc": "qatarairways", + "url": "https://www.qatarairways.com/en-us/baggage/allowance.html?iid=ALL67670750" + } + } + ] + }, + { + "index": "joaRU2Ghlt9m6rZvwjqiW", + "task": "Show me the newest on-demand releases in amctheatres", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "vudu.", + "url": "https://www.vudu.com/content/movies/uxrow/New-Releases/14166?affid=13177&cmp=AMC~Web~nav_new_releases" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "content/movies/uxrow/New-Releases/", + "url": "https://www.vudu.com/content/movies/uxrow/New-Releases/14166?affid=13177&cmp=AMC~Web~nav_new_releases" + } + } + ] + }, + { + "index": "T4ZljEgrCyH3iAdER9wHh", + "task": "Find adventure movies coming to theaters in rottentomatoes", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "rottentomatoes.", + "url": "https://www.rottentomatoes.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "browse/movies_coming_soon", + "url": "https://www.rottentomatoes.com/browse/movies_coming_soon/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "genres:adventure", + "url": "https://www.rottentomatoes.com/browse/movies_coming_soon/" + } + } + ] + }, + { + "index": "ON4UUjUE-e8eM4zJgCcXR", + "task": "Find the cheapest used and working Nintendo Switch for under $400 that can be bought right now in ebay", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ebay.", + "url": "https://www.ebay.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "_nkw", + "reference_answer": "Decide whether is searching for Nintendo Switch", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_trksid=p4432023.m570.l1313&_nkw=Nintendo+Switch&_sacat=0" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "LH_ItemCondition", + "reference_answer": "3000", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=Nintendo+Switch&_sacat=0&rt=nc&LH_ItemCondition=3000" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "_udhi", + "reference_answer": "400", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=Nintendo%20Switch&_sacat=0&LH_ItemCondition=3000&rt=nc&_udhi=400" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "LH_BIN", + "reference_answer": "1", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=Nintendo+Switch&_sacat=0&LH_ItemCondition=3000&_udhi=400&rt=nc&LH_BIN=1" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".srp-results.srp-list.clearfix >li:nth-child(3)>.s-item__wrapper.clearfix >.s-item__info.clearfix >.s-item__link >.s-item__title >span:nth-child(1)", + "netloc": "ebay", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=Nintendo+Switch&_sacat=0&LH_ItemCondition=3000&_udhi=400&LH_BIN=1&_sop=15" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "_sop", + "reference_answer": "15", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=Nintendo+Switch&_sacat=0&LH_ItemCondition=3000&_udhi=400&LH_BIN=1&_sop=15" + } + } + ] + }, + { + "index": "iDXwYzZqvTLA6KFitaOnT", + "task": "Browse cafes that have outdoor seating and is dog friendly in yelp", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yelp.", + "url": "https://www.yelp.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_desc", + "reference_answer": "Decide whether is searching for cafes", + "url": "https://www.yelp.com/search?find_desc=cafes&find_loc=West+Hollywood%2C+CA" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "OutdoorSeating", + "url": "https://www.yelp.com/search?find_desc=cafes&find_loc=West+Hollywood%2C+CA&attrs=DogsAllowed%2COutdoorSeating" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "DogsAllowed", + "url": "https://www.yelp.com/search?find_desc=cafes&find_loc=West+Hollywood%2C+CA&attrs=DogsAllowed%2COutdoorSeating" + } + } + ] + }, + { + "index": "gHnk08cXzPxHikXZ1jJMS", + "task": "Find the highest-reviewed landscaper in West Hollywood for the design of lawn, who gives fast responses in yelp", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yelp.", + "url": "https://www.yelp.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "find_desc", + "reference_answer": "Landscaping", + "url": "https://www.yelp.com/search?find_desc=Landscaping&find_loc=Los+Angeles%2C+CA" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_loc", + "reference_answer": "Decide whether is searching for West Hollywood", + "url": "https://www.yelp.com/search?find_desc=Landscaping&find_loc=West+Hollywood" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "rel_landscaping_itms", + "url": "https://www.yelp.com/search?find_desc=Landscaping&find_loc=West+Hollywood&attrs=rel_landscaping_items%3A%3Aitem_greenscape" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "is_fast_mtb_responder", + "url": "https://www.yelp.com/search?find_desc=Landscaping&find_loc=West+Hollywood&attrs=rel_landscaping_items%3A%3Aitem_greenscape%2Cis_fast_mtb_responderhttps://www.yelp.com/search?find_desc=Landscaping&find_loc=West+Hollywood&attrs=rel_landscaping_items%3A%3Aitem_greenscape%2Cis_fast_mtb_responder" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sortby", + "reference_answer": "review_counthttps://www.yelp.com/search?find_desc=Landscaping", + "url": "https://www.yelp.com/search?find_desc=Landscaping&find_loc=West+Hollywood&attrs=rel_landscaping_items%3A%3Aitem_greenscape%2Cis_fast_mtb_responder&sortby=review_counthttps://www.yelp.com/search?find_desc=Landscaping&find_loc=West+Hollywood&attrs=rel_landscaping_items%3A%3Aitem_greenscape%2Cis_fast_mtb_responder&sortby=review_count" + } + } + ] + }, + { + "index": "giPwmhoV4bWCLCVjlvety", + "task": "check the 2022 highest goal scorer in the MLS league in espn", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "espn.", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/usa.1", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "soccer/stats", + "url": "https://www.espn.com/soccer/stats/_/league/usa.1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "season/2022", + "url": "https://www.espn.com/soccer/stats/_/league/USA.1/season/2022" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/hany-mukhtar", + "url": "https://www.espn.com/soccer/player/_/id/174548/hany-mukhtar" + } + } + ] + }, + { + "index": "QVzXOEzKiGiA5j28d644i", + "task": "Upgrade the count of the current SSD in my cart to 10 in newegg", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "newegg.", + "url": "https://www.newegg.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "shop/cart", + "url": "https://secure.newegg.com/shop/cart" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "10", + "netloc": "secure", + "path": ".item-qty >.form-text ", + "url": "https://secure.newegg.com/shop/cart" + } + } + ] + }, + { + "index": "qz1izJwn0Sh5lGHoH3yIc", + "task": "Browse crossbows on sale with limited stock in cabelas", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cabelas.", + "url": "https://www.cabelas.com/shop/en#" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "l/crossbows", + "url": "https://www.cabelas.com/l/crossbows" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "#nf-maxavailquantity=1...10", + "url": "https://www.cabelas.com/l/crossbows#nf-maxavailquantity=1...10" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "#f-currentoffers=Sale", + "url": "https://www.cabelas.com/l/crossbows#f-currentoffers=Sale&nf-maxavailquantity=1...10" + } + } + ] + }, + { + "index": "RriMNRAnwsprnmki_oqM7", + "task": "Show all used Tesla cars for 10017 zip code in cargurus", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cargurus.", + "url": "https://www.cargurus.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=m112&zip=10017" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "zip", + "reference_answer": "10017", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=m112&zip=10017" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "entitySelectingHelper.selectedEntity", + "reference_answer": "m112", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=m112&zip=10017" + } + } + ] + }, + { + "index": "TE18EGhjtZzKiT8NFAAEt", + "task": "Look up information on the potential side effects of rogaine in cvs", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cvs.", + "url": "https://www.cvs.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "searchTerm", + "reference_answer": "Decide whether is searching for rogaine", + "url": "https://www.cvs.com/search?searchTerm=rogaine" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".accordionParent >div:nth-child(5)>.cardContainer >.cardTitleContainer ", + "netloc": "cvs", + "url": "https://www.cvs.com/shop/rogaine-men-s-extra-strength-5-minoxidil-solution-for-hair-regrowth-prodid-1013645" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "shop/", + "url": "https://www.cvs.com/shop/rogaine-men-s-extra-strength-5-minoxidil-solution-for-hair-regrowth-prodid-1013645" + } + } + ] + }, + { + "index": "UoI3S1NseUPgO195KNgml", + "task": "Find a pair of mens running shoes in black, size 7 extra wide, 4+ stars and under $50 and add them to my cart in amazon", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "amazon.", + "url": "https://www.amazon.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "k", + "reference_answer": "Decide whether are searching for men's running shoes", + "url": "https://www.amazon.com/s?k=men%27s+running+shoes&crid=2MMQANPUETJE8&sprefix=men%27s+running+shoes%2Caps%2C636&ref=nb_sb_noss_1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "rh", + "reference_answer": "n:679255011,p_n_size_two_browse-vebin:5391077011", + "url": "https://www.amazon.com/s?k=men%27s+running+shoes&rh=n%3A679255011%2Cp_n_size_two_browse-vebin%3A5391077011&dc&ds=v1%3ASFtSwRmUUn1QStxZlBtZpXzzCgfDIXR9zTmT47h1kF4&crid=2MMQANPUETJE8&qid=1706274799&rnid=5391076011&sprefix=men%27s+running+shoes%2Caps%2C636&ref=sr_nr_p_n_size_two_browse-vebin_1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "rh", + "reference_answer": "p_n_size_browse-vebin:1285096011", + "url": "https://www.amazon.com/s?k=men%27s+running+shoes&i=fashion-mens-shoes&rh=n%3A679286011%2Cp_n_size_two_browse-vebin%3A5391077011%2Cp_n_size_browse-vebin%3A1285096011&dc&ds=v1%3AMXA6en9x4F6k4tkaNkOi9UqCGZDpF%2F8%2FhgkLQTkUR7M&crid=2MMQANPUETJE8&qid=1706274815&rnid=1285068011&sprefix=men%27s+running+shoes%2Caps%2C636&ref=sr_nr_p_n_size_browse-vebin_7" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "rh", + "reference_answer": "p_72:2661618011", + "url": "https://www.amazon.com/s?k=men%27s+running+shoes&i=fashion-mens-shoes&rh=n%3A679286011%2Cp_n_size_two_browse-vebin%3A5391077011%2Cp_n_size_browse-vebin%3A1285096011&dc&ds=v1%3AMXA6en9x4F6k4tkaNkOi9UqCGZDpF%2F8%2FhgkLQTkUR7M&crid=2MMQANPUETJE8&qid=1706274815&rnid=1285068011&sprefix=men%27s+running+shoes%2Caps%2C636&ref=sr_nr_p_n_size_browse-vebin_7" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "rh", + "reference_answer": "p_36:-5000", + "url": "https://www.amazon.com/s?k=men%27s+running+shoes&i=fashion-mens-shoes&rh=n%3A679286011%2Cp_n_size_two_browse-vebin%3A5391077011%2Cp_n_size_browse-vebin%3A1285096011%2Cp_72%3A2661618011%2Cp_36%3A-5000&dc&crid=2MMQANPUETJE8&qid=1706274827&rnid=2661611011&sprefix=men%27s+running+shoes%2Caps%2C636&ref=sr_nr_p_36_4" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#add-to-cart-button ", + "netloc": "amazon", + "url": "https://www.amazon.com/Under-Armour-Charged-Pursuit-Sneaker/dp/B0968ZKJV4/ref=sr_1_1?crid=2MMQANPUETJE8&keywords=men%27s+running+shoes&psc=1&qid=1706274837&refinements=p_n_size_two_browse-vebin%3A5391077011%2Cp_n_size_browse-vebin%3A1285096011%2Cp_72%3A2661618011%2Cp_36%3A-5000&rnid=2661611011&s=apparel&sprefix=men%27s+running+shoes%2Caps%2C636&sr=1-1" + } + } + ] + }, + { + "index": "4syolpfmOW7tHhli_Qal4", + "task": "Find comedy tv shows on netflix sorted by audience score in rottentomatoes", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ottentomatoes.", + "url": "https://www.rottentomatoes.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "browse/tv_series_browse/affiliates:netflix", + "url": "https://www.rottentomatoes.com/browse/tv_series_browse/affiliates:netflix" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "genres:comedy", + "url": "https://www.rottentomatoes.com/browse/tv_series_browse/affiliates:netflix~genres:comedy" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sort:audience_highest", + "url": "https://www.rottentomatoes.com/browse/tv_series_browse/affiliates:netflix~genres:comedy~sort:audience_highest" + } + } + ] + }, + { + "index": "8oiyOS5v16Lpb_jifxuCx", + "task": "Find wall mirrors for under $20 in ikea", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ikea.", + "url": "https://www.ikea.com/us/en/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "q", + "reference_answer": "Decide whether is searchign for wall mirrors", + "url": "https://www.ikea.com/us/en/search/?q=wall%20mirrors" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "filters", + "reference_answer": "f-price-buckets:PRICE_0_2000", + "url": "https://www.ikea.com/us/en/search/?q=wall%20mirrors&filters=f-price-buckets%3APRICE_0_2000" + } + } + ] + }, + { + "index": "8YXEvYzGajLR8rZBSb5et", + "task": "Show me all my offers for 2-5 day cruises in carnival", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "carnival.", + "url": "https://www.carnival.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/cruise-search", + "url": "https://www.carnival.com/cruise-search?pageNumber=1&numadults=2&durdays=2,3,4,5&pagesize=8&sort=fromprice&showBest=true&async=true¤cy=USD&locality=1" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "durdays", + "reference_answer": "2,3,4,5", + "url": "https://www.carnival.com/cruise-search?pageNumber=1&numadults=2&durdays=2,3,4,5&pagesize=8&sort=fromprice&showBest=true&async=true¤cy=USD&locality=1" + } + } + ] + }, + { + "index": "aDUZsHGDATdWGluXuuoT1", + "task": "Make a playlist and add 3 songs from popular Selena Gomez tracks and name it Love in soundcloud", + "reference_task_length": 21, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "soundcloud.", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/selena-gomez-official", + "url": "https://soundcloud.com/selena-gomez-official" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/tracks", + "url": "https://soundcloud.com/selena-gomez-official/tracks" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".sc-button-addtoset.sc-button.moreActions__button.sc-button-medium.sc-button-tertiary ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".g-tabs.g-tabs-large >li:nth-child(2)>.tabs__tab.g-tabs-link ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Love", + "netloc": "soundcloud", + "path": ".textfield.createPlaylist__title >.textfield__inputWrapper >.textfield__input.sc-input.sc-input-medium ", + "url": "https://soundcloud.com/selena-gomez-official/tracks" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".radioGroup.sharingRadio.createPlaylist__sharing.sc-media-additional.horizontal >label:nth-child(3)>.radioGroup__label >.radioGroup__labelText.sc-text-h4 ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".createPlaylist__saveButton.sc-button-cta.sc-button-primary.sc-button.sc-button-medium ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".sc-button-addtoset.sc-button.moreActions__button.sc-button-medium.sc-button-tertiary ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".lazyLoadingList__list.sc-list-nostyle.sc-clearfix >li:nth-child(1)>.addToPlaylistItem.g-flex-row-centered >.addToPlaylistItem__actions.g-flex-row-centered >.addToPlaylistButton.sc-button-secondary.sc-button.sc-button-medium.sc-button-responsive ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".sc-button-addtoset.sc-button.moreActions__button.sc-button-medium.sc-button-tertiary ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".lazyLoadingList__list.sc-list-nostyle.sc-clearfix >li:nth-child(1)>.addToPlaylistItem.g-flex-row-centered >.addToPlaylistItem__actions.g-flex-row-centered >.addToPlaylistButton.sc-button-secondary.sc-button.sc-button-medium.sc-button-responsive ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/discover" + } + } + ] + }, + { + "index": "3XFTKKOEg8Cm2cEnqDF47", + "task": "Locate a large store in Washington that has kids' and maternity products in uniqlo", + "reference_task_length": 12, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "map.uniqlo.", + "url": "https://map.uniqlo.com/us/en/" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide Whether is searching for Washington state", + "netloc": "map", + "url": "https://map.uniqlo.com/us/en/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "product", + "reference_answer": "kids", + "url": "https://map.uniqlo.com/us/en/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "product", + "reference_answer": "maternity", + "url": "https://map.uniqlo.com/us/en/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#undefined-content >fieldset:nth-child(1)>div:nth-child(1)>div:nth-child(1)>div:nth-child(1)>label:nth-child(2)", + "netloc": "map", + "url": "https://map.uniqlo.com/us/en/" + } + } + ] + }, + { + "index": "FoP8QZ-JiAcVODUHGQj8v", + "task": "Buy a single day pass to Six Flags, Magic Mountain in sixflags", + "reference_task_length": 15, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sixflags.", + "url": "https://www.sixflags.com/greatamerica" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "magicmountain", + "url": "https://www.sixflags.com/magicmountain" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".sf-compare-column.sf-compare-column-type-single.sf-compare-column-425324.sf-compare-column-0 >.sf-compare-column-footer >a:nth-child(4)", + "netloc": "sixflags", + "url": "https://www.sixflags.com/magicmountain/store/tickets" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "store/tickets", + "url": "https://www.sixflags.com/magicmountain/store/tickets" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".snap-res--list.gap-col.gap-xs-8.gap-md-16 >snap-package-result:nth-child(2)>.snap-pkg-result--content.gap-col.gap-4.gap-auto >.snap-pkg-result--name.font-semi-bold.blue ", + "netloc": "sixflags", + "url": "https://www.sixflags.com/magicmountain/store/tickets" + } + } + ] + }, + { + "index": "0HM69WbWPtgMJsnigeQiK", + "task": "Confirm my vip tour at the six flags Discovery Kingdom in sixflags", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sixflags.", + "url": "https://www.sixflags.com/america" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "discoverykingdom", + "url": "https://www.sixflags.com/discoverykingdom" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "store/vip-tours", + "url": "https://www.sixflags.com/discoverykingdom/store/vip-tours" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".entry-content.clear >div:nth-child(1)>section:nth-child(1)>div:nth-child(1)>div:nth-child(1)>div:nth-child(1)>section:nth-child(1)>div:nth-child(1)>div:nth-child(2)>div:nth-child(1)>div:nth-child(9)>div:nth-child(1)>div:nth-child(1)>a:nth-child(1)", + "netloc": "sixflags", + "url": "https://www.sixflags.com/discoverykingdom/store/vip-tours" + } + } + ] + }, + { + "index": "29LVD9B4mMWD6tACTcUxz", + "task": "Find a pasta restaurant in Sydney and save it in resy", + "reference_task_length": 12, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "resy.", + "url": "https://resy.com/cities/syd?date=2024-01-27&seats=2&query=pasta" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "query", + "reference_answer": "Decide whether is searching for a pasta restaurant", + "url": "https://resy.com/cities/syd?date=2024-01-27&seats=2&query=pasta" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".ResyIcon.ResyIcon--heart >svg:nth-child(1)>path:nth-child(1)", + "netloc": "resy", + "url": "https://resy.com/cities/syd?date=2024-01-27&seats=2&query=pasta" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cities/syd", + "url": "https://resy.com/cities/syd?date=2024-01-27&seats=2&query=pasta" + } + } + ] + }, + { + "index": "4Eeod20yp8c7xLKgwrt3V", + "task": "Compare two wireless printers that are rated above 4 stars in newegg", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "newegg.", + "url": "https://www.newegg.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "d", + "reference_answer": "Decide whether are searching for wireless printer", + "url": "https://www.newegg.com/p/pl?d=wireless+printer" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "N", + "reference_answer": "4204", + "url": "https://www.newegg.com/p/pl?d=wireless+printer&N=4204" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".button.button-m.bg-blue >span:nth-child(1)", + "netloc": "newegg", + "url": "https://www.newegg.com/p/pl?d=wireless+printer&N=4204" + } + } + ] + }, + { + "index": "BwEba3AFQbwYHDQ39PE97", + "task": "Find all outdoor events this month in NYC in new.mta.info", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "new.mta.info", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/events", + "url": "https://away.mta.info/events?utm_source=mtadotinfo" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/outdoors", + "url": "https://away.mta.info/events/outdoors/new-york-city/this-month" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "new-york-city/", + "url": "https://away.mta.info/events/outdoors/new-york-city/this-month" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/this-month", + "url": "https://away.mta.info/events/outdoors/new-york-city/this-month" + } + } + ] + }, + { + "index": "fuqsgHg3WWfn3uEkHleOT", + "task": "Find cheapest bus for 2 persons from New York to Washington, DC in us.megabus", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "us.megabus.", + "url": "https://us.megabus.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "totalPassengers", + "reference_answer": "2", + "url": "https://us.megabus.com/journey-planner/journeys?days=1&concessionCount=0&departureDate=2024-01-27&destinationId=142&inboundDepartureDate=2024-01-27&inboundOtherDisabilityCount=0&inboundPcaCount=0&inboundWheelchairSeated=0&nusCount=0&originId=123&otherDisabilityCount=0&pcaCount=0&totalPassengers=2&wheelchairSeated=0" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "destinationId", + "reference_answer": "142", + "url": "https://us.megabus.com/journey-planner/journeys?days=1&concessionCount=0&departureDate=2024-01-27&destinationId=142&inboundDepartureDate=2024-01-27&inboundOtherDisabilityCount=0&inboundPcaCount=0&inboundWheelchairSeated=0&nusCount=0&originId=123&otherDisabilityCount=0&pcaCount=0&totalPassengers=2&wheelchairSeated=0" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "originId", + "reference_answer": "123", + "url": "https://us.megabus.com/journey-planner/journeys?days=1&concessionCount=0&departureDate=2024-01-27&destinationId=142&inboundDepartureDate=2024-01-27&inboundOtherDisabilityCount=0&inboundPcaCount=0&inboundWheelchairSeated=0&nusCount=0&originId=123&otherDisabilityCount=0&pcaCount=0&totalPassengers=2&wheelchairSeated=0" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#sortselected ", + "netloc": "us", + "url": "https://us.megabus.com/journey-planner/journeys?days=1&concessionCount=0&departureDate=2024-01-27&destinationId=142&inboundDepartureDate=2024-01-27&inboundOtherDisabilityCount=0&inboundPcaCount=0&inboundWheelchairSeated=0&nusCount=0&originId=123&otherDisabilityCount=0&pcaCount=0&totalPassengers=2&wheelchairSeated=0" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#sortselected >option:nth-child(3)", + "netloc": "us", + "url": "https://us.megabus.com/journey-planner/journeys?days=1&concessionCount=0&departureDate=2024-01-27&destinationId=142&inboundDepartureDate=2024-01-27&inboundOtherDisabilityCount=0&inboundPcaCount=0&inboundWheelchairSeated=0&nusCount=0&originId=123&otherDisabilityCount=0&pcaCount=0&totalPassengers=2&wheelchairSeated=0" + } + } + ] + }, + { + "index": "fRp4AJThGn5cxMsD9L0dj", + "task": "Locate a store in spring, Texas in kohls", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/?kwid=p43857836129&utm_source=google&utm_medium=cpc&utm_term=Kohls.&utm_campaign=Brand_Search_Google_Brand&UTM_Adgroupid=58700005024606624&pfx=pfx_google_roi&cid=brandsearch&ds_rl=2671&gad_source=1&gclid=CjwKCAiAk9itBhASEiwA1my_68vyqjyesKUyXab6l12l_SQ8AqMN0LR84PjKGghYz-Mo-wpzvrh4whoC2T0QAvD_BwE&gclsrc=aw.ds" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/stores", + "url": "https://www.kohls.com/stores.shtml" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "q", + "reference_answer": "Decide whether are searching for Spring, texas", + "url": "https://www.kohls.com/?q=Spring,%20TX" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".map-list >li:nth-child(1)>.map-list-item >.map-list-item-header >.ga-link >.location-name >span:nth-child(1)", + "netloc": "kohls", + "url": "https://www.kohls.com/stores.shtml" + } + } + ] + }, + { + "index": "BcmM3jqu6JC_zGixFIn9n", + "task": "Show MLB tickets for this weekend in ticketcenter", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ticketcenter.", + "url": "https://www.ticketcenter.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/sports/baseball/professional-mlb", + "url": "https://www.ticketcenter.com/sports/baseball/professional-mlb" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".filters >span:nth-child(6)>div:nth-child(1)>ul:nth-child(2)>li:nth-child(1)>a:nth-child(1)", + "netloc": "ticketcenter", + "url": "https://www.ticketcenter.com/sports/baseball/professional-mlb" + } + } + ] + }, + { + "index": "S3OwPY3cGAf71kRLyztef", + "task": "Show me the schedule for the orange line in mbta", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "mbta.", + "url": "https://www.mbta.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/schedules/Orange/line", + "url": "https://www.mbta.com/schedules/Orange/line" + } + } + ] + }, + { + "index": "SeWmxjveoPc9RGHOF3j5z", + "task": "Search for a rental car in Miami, FL for pick up at the airport and drop off at a different location within the same city in kayak", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kayak.", + "url": "https://www.kayak.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/cars", + "url": "https://www.kayak.com/cars" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#oneway >span:nth-child(1)", + "netloc": "kayak", + "url": "https://www.kayak.com/cars" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether are searching for Miami Airport", + "netloc": "kayak", + "path": ".J_T2-field-group.J_T2-mod-collapse-l.J_T2-mod-grow >div:nth-child(1)>.pM26 >div:nth-child(1)>input:nth-child(2)", + "url": "https://www.kayak.com/cars" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether are searching for Miami", + "netloc": "kayak", + "path": ".UBQ2-dropoff-container >.pM26 >div:nth-child(1)>input:nth-child(2)", + "url": "https://www.kayak.com/cars" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".a7Uc-infix >svg:nth-child(1)", + "netloc": "kayak", + "url": "https://www.kayak.com/cars" + } + } + ] + }, + { + "index": "smtGZ3IGKhkLn6u5JZn_N", + "task": "Browse tickets for Chicago Bears games in ticketcenter", + "reference_task_length": 3, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ticketcenter.", + "url": "https://www.ticketcenter.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/chicago-bears-tickets", + "url": "https://www.ticketcenter.com/chicago-bears-tickets" + } + } + ] + }, + { + "index": "T6QdE5lpgWWBrk9FuZ9E5", + "task": "Show me the recent trades in boardgamegeek", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "boardgamegeek.", + "url": "https://boardgamegeek.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/trade", + "url": "https://boardgamegeek.com/trade" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "action", + "reference_answer": "recent", + "url": "https://boardgamegeek.com/geektrade.php?action=recent" + } + } + ] + }, + { + "index": "l-ZNNoU5p81fOEk9td5os", + "task": "Browse hip hop concerts that are happening this weekend in ticketcenter", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ticketcenter.", + "url": "https://www.ticketcenter.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/concerts", + "url": "https://www.ticketcenter.com/concerts" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/rap-hip-hop", + "url": "https://www.ticketcenter.com/concerts/rap-hip-hop" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".filters >span:nth-child(6)>div:nth-child(1)>ul:nth-child(2)>li:nth-child(1)>a:nth-child(1)", + "netloc": "ticketcenter", + "url": "https://www.ticketcenter.com/concerts/rap-hip-hop" + } + } + ] + }, + { + "index": "Im8_msPnXNbCjzJABTwNZ", + "task": "Search for newest girls' training leggings in size yxl in underarmour", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "underarmour.", + "url": "https://www.underarmour.com/en-us/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/girls/clothing/bottoms", + "url": "https://www.underarmour.com/en-us/c/girls/clothing/bottomhttps://www.underarmour.com/en-us/c/girls/clothing/bottom" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/training", + "url": "https://www.underarmour.com/en-us/c/girls/clothing/bottoms/training/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "prefv1", + "reference_answer": "YXL", + "url": "https://www.underarmour.com/en-us/c/girls/clothing/bottoms/training/?prefn1=size&prefv1=YXL" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/fitted-training", + "url": "https://www.underarmour.com/en-us/c/girls/clothing/bottoms/fitted-training/?prefn1=size&prefv1=YXL" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "srule", + "reference_answer": "newest", + "url": "https://www.underarmour.com/en-us/c/girls/clothing/bottoms/fitted-training/?prefn1=size&prefv1=YXL&srule=newest" + } + } + ] + }, + { + "index": "jjXQ0JM7mr09Jl1KVZzSn", + "task": "Show Jazz music genre tracks in last.fm", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "last.fm.", + "url": "https://www.last.fm/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/music", + "url": "https://www.last.fm/music" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/tag/jazz", + "url": "https://www.last.fm/tag/jazz" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/tracks", + "url": "https://www.last.fm/tag/jazz/tracks" + } + } + ] + }, + { + "index": "NNHA-KGGJkKQR3WZT1GhY", + "task": "Find a parking with EV charging for month with lowest price in Chicago in spothero", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "spothero.", + "url": "https://spothero.com/" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether are searching for Chicago", + "netloc": "spothero", + "url": "https://spothero.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "monthly", + "reference_answer": "true", + "url": "https://spothero.com/search?kind=city&id=1&monthly=true" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#Checkbox-FilterItem-ev ", + "netloc": "spothero", + "url": "https://spothero.com/search?kind=city&id=1&monthly=true" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".Button.Button-primary ", + "netloc": "spothero", + "url": "https://spothero.com/search?kind=city&id=1&monthly=true" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".SpotListSorter >.Select.FormElement >.FormElement-control >.FormElement-item ", + "netloc": "spothero", + "url": "https://spothero.com/search?kind=city&id=1&monthly=true" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".SpotListSorter >.Select.FormElement >.FormElement-control >.FormElement-item >option:nth-child(2)", + "netloc": "spothero", + "url": "https://spothero.com/search?kind=city&id=1&monthly=true" + } + } + ] + }, + { + "index": "I4FDUSTAfdXjQYQ8Lpkur", + "task": "Filter search results for guitar tabs to only show songs with a difficulty rating of \"Beginner\" in ultimate-guitar", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ultimate-guitar.", + "url": "https://www.ultimate-guitar.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/explore", + "url": "https://www.ultimate-guitar.com/explore" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "difficulty[]", + "reference_answer": "2", + "url": "https://www.ultimate-guitar.com/explore?difficulty[]=2" + } + } + ] + }, + { + "index": "v3G0RAZWHBsT1N_5BpFzq", + "task": "Find the highest-rated adults-only romantic beach vacation deals in travelzoo", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "travelzoo.", + "url": "https://www.travelzoo.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/beach-vacation-deals/", + "url": "https://www.travelzoo.com/collection/beach-vacation-deals/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn-group.btn-group--icons.btn-group--scrollx-xs.btn-group--single-row.js-btn-group-radio.btn-group--icons-tall >.btn-group-wrapper >.btn-group-items >div:nth-child(2)>.text ", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/beach-vacation-deals/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn-group-items >button:nth-child(2)", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/beach-vacation-deals/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn-group-items >button:nth-child(27)", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/beach-vacation-deals/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".button-primary.button-done.right ", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/beach-vacation-deals/" + } + } + ] + }, + { + "index": "tINRPzcB9gQ9_5KsL5qnW", + "task": "Check reviews and research information about Audi A6 2020 in cargurus", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cargurus.", + "url": "https://www.cargurus.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/2020-", + "url": "https://www.cargurus.com/research/2020-Audi-A6-c29349" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "-Audi-", + "url": "https://www.cargurus.com/research/2020-Audi-A6-c29349" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "-A6-", + "url": "https://www.cargurus.com/research/2020-Audi-A6-c29349" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".Zb9tJO.qs80eV >li:nth-child(2)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/research/2020-Audi-A6-c29349" + } + } + ] + }, + { + "index": "wiqkYolPL-j6RRg38yaoj", + "task": "Search for Mexican restaurants in Concord, CA in yelp", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yelp.", + "url": "https://www.yelp.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_desc", + "reference_answer": "Decide whether are searching for mexican restaurants", + "url": "https://www.yelp.com/search?find_desc=Mexican+Restaurants&find_loc=Concord%2C+CA" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_loc", + "reference_answer": "Decide whether are searching for Concord", + "url": "https://www.yelp.com/search?find_desc=Mexican+Restaurants&find_loc=Concord%2C+CA" + } + } + ] + }, + { + "index": "WVLU1JwIBD1W39pi3I03H", + "task": "Find a person by phone number 123456789 in yellowpages", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yellowpages.", + "url": "https://www.yellowpages.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "people.", + "url": "https://people.yellowpages.com/whitepages/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "whitepages/phone-lookup", + "url": "https://people.yellowpages.com/whitepages/phone-lookup" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "phone", + "reference_answer": "123456789", + "url": "https://people.yellowpages.com/whitepages/phone-lookup?phone=123456789" + } + } + ] + }, + { + "index": "XcGjW6ObOPoGTiepUhImF", + "task": "Play the Wakanda Forever trailer in redbox", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "redbox.", + "url": "https://www.redbox.com/movies/black-panther-wakanda-forever" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/black-panther-wakanda-forever", + "url": "https://www.redbox.com/movies/black-panther-wakanda-forever" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#maincontent >div:nth-child(1)>div:nth-child(2)>div:nth-child(3)>div:nth-child(2)>div:nth-child(1)>button:nth-child(1)>svg:nth-child(1)>g:nth-child(2)>path:nth-child(1)", + "netloc": "redbox", + "url": "https://www.redbox.com/movies/black-panther-wakanda-forever" + } + } + ] + }, + { + "index": "zmaw1q_peESU5FXhv6Nc0", + "task": "Find the flight from New York to Miami from Mar 27 to Mar 31 that has the lowest core price in points in jetblue", + "reference_task_length": 17, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "jetblue.", + "url": "https://www.jetblue.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "from", + "reference_answer": "NYC", + "url": "https://www.jetblue.com/booking/flights?from=NYC&to=XFL&depart=2024-03-27&return=2024-03-31&isMultiCity=false&noOfRoute=1&lang=en&adults=1&children=0&infants=0&sharedMarket=false&roundTripFaresFlag=false&usePoints=false" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "to", + "reference_answer": "XFL", + "url": "https://www.jetblue.com/booking/flights?from=NYC&to=XFL&depart=2024-03-27&return=2024-03-31&isMultiCity=false&noOfRoute=1&lang=en&adults=1&children=0&infants=0&sharedMarket=false&roundTripFaresFlag=false&usePoints=false" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "depart", + "reference_answer": "2024-03-27", + "url": "https://www.jetblue.com/booking/flights?from=NYC&to=XFL&depart=2024-03-27&return=2024-03-31&isMultiCity=false&noOfRoute=1&lang=en&adults=1&children=0&infants=0&sharedMarket=false&roundTripFaresFlag=false&usePoints=false" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "return", + "reference_answer": "2024-03-31", + "url": "https://www.jetblue.com/booking/flights?from=NYC&to=XFL&depart=2024-03-27&return=2024-03-31&isMultiCity=false&noOfRoute=1&lang=en&adults=1&children=0&infants=0&sharedMarket=false&roundTripFaresFlag=false&usePoints=false" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".inline-flex >jb-segment:nth-child(2)>.f6.royal-blue.flex.tc.lh-title.ph3.items-center ", + "netloc": "jetblue", + "url": "https://www.jetblue.com/booking/flights?from=NYC&to=XFL&depart=2024-03-27&return=2024-03-31&isMultiCity=false&noOfRoute=1&lang=en&adults=1&children=0&infants=0&sharedMarket=false&roundTripFaresFlag=false&usePoints=false" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#jb-select-1-option-1 >div:nth-child(1)>span:nth-child(2)", + "netloc": "jetblue", + "url": "https://www.jetblue.com/booking/flights?from=NYC&to=XFL&depart=2024-03-27&return=2024-03-31&isMultiCity=false&noOfRoute=1&lang=en&adults=1&children=0&infants=0&sharedMarket=false&roundTripFaresFlag=false&usePoints=false" + } + } + ] + }, + { + "index": "zZlS3taTzRyPq-m4lWHCK", + "task": "Check the order status using an order number of24124124091. The email address is boobear@gmail.com in apple", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "secure4.store.apple.", + "url": "https://secure4.store.apple.com/shop/signIn/orders?ssi=1AAABjU9whsoguipdI_UC8mWh8GmvFlw8NMQuxEDaa2SNHX6Fc1n15x0AAAAxaHR0cHM6Ly9zZWN1cmU0LnN0b3JlLmFwcGxlLmNvbS9zaG9wL29yZGVyL2xpc3R8fAACAbfdSCduwV2QV_DrgZUKKYf0wgHP3eT1ignsgCuaz3Qi" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "24124124091", + "netloc": "secure4", + "path": ".row.rs-sign-in-main.as-l-container >div:nth-child(3)>div:nth-child(1)>div:nth-child(1)>form:nth-child(2)>div:nth-child(2)>div:nth-child(1)>.form-textbox-input ", + "url": "https://secure4.store.apple.com/shop/signIn/orders?ssi=1AAABjU9whsoguipdI_UC8mWh8GmvFlw8NMQuxEDaa2SNHX6Fc1n15x0AAAAxaHR0cHM6Ly9zZWN1cmU0LnN0b3JlLmFwcGxlLmNvbS9zaG9wL29yZGVyL2xpc3R8fAACAbfdSCduwV2QV_DrgZUKKYf0wgHP3eT1ignsgCuaz3Qi" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "boobear@gmail.com", + "netloc": "secure4", + "path": ".row.rs-sign-in-main.as-l-container >div:nth-child(3)>div:nth-child(1)>div:nth-child(1)>form:nth-child(2)>div:nth-child(2)>div:nth-child(2)>.form-textbox-input ", + "url": "https://secure4.store.apple.com/shop/signIn/orders?ssi=1AAABjU9whsoguipdI_UC8mWh8GmvFlw8NMQuxEDaa2SNHX6Fc1n15x0AAAAxaHR0cHM6Ly9zZWN1cmU0LnN0b3JlLmFwcGxlLmNvbS9zaG9wL29yZGVyL2xpc3R8fAACAbfdSCduwV2QV_DrgZUKKYf0wgHP3eT1ignsgCuaz3Qi" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".column.large-12.large-centered >button:nth-child(1)>span:nth-child(1)>span:nth-child(1)>span:nth-child(1)", + "netloc": "secure4", + "url": "https://secure4.store.apple.com/shop/signIn/orders?ssi=1AAABjU9whsoguipdI_UC8mWh8GmvFlw8NMQuxEDaa2SNHX6Fc1n15x0AAAAxaHR0cHM6Ly9zZWN1cmU0LnN0b3JlLmFwcGxlLmNvbS9zaG9wL29yZGVyL2xpc3R8fAACAbfdSCduwV2QV_DrgZUKKYf0wgHP3eT1ignsgCuaz3Qi" + } + } + ] + }, + { + "index": "TB49voH8mO7cf5RbyDmgH", + "task": "Open gift list for beauty products in instacart", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "instacart.", + "url": "https://www.instacart.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/store/hub/gifts", + "url": "https://www.instacart.com/store/hub/gifts" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".e-64tucv >li:nth-child(7)>.e-19johef >.e-g3bddq >.e-1081mk8 >.e-159k8fc ", + "netloc": "instacart", + "url": "https://www.instacart.com/store/hub/gifts" + } + } + ] + }, + { + "index": "tdiqD1FPDYieEiCd1mlHz", + "task": "locate the store in IL in uniqlo", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "map.uniqlo.", + "url": "https://map.uniqlo.com/us/en/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#option-13 ", + "netloc": "map", + "url": "https://map.uniqlo.com/us/en/" + } + } + ] + }, + { + "index": "TeTQRYT0w-oCl5qCXSHxc", + "task": "Find the trade-in options for PS4 in gamestop", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "gamestop.", + "url": "https://www.gamestop.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/trade", + "url": "https://www.gamestop.com/trade/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "cgid", + "reference_answer": "playstation-4", + "url": "https://www.gamestop.com/trade/?cgid=playstation-4" + } + } + ] + }, + { + "index": "toW0JlkNCDtC3rVlF_j3o", + "task": "What are Gloomhaven's ratings? in boardgamegeek", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "boardgamegeek.", + "url": "https://boardgamegeek.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "boardgame/174430/gloomhaven", + "url": "https://boardgamegeek.com/boardgame/174430/gloomhaven" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/ratings", + "url": "https://boardgamegeek.com/boardgame/174430/gloomhaven" + } + } + ] + }, + { + "index": "VyaGipMbchitfc-n4yeJr", + "task": "Show me a list of children's program events in Illinois in nps.gov", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nps.gov/", + "url": "https://www.nps.gov/index.htm" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/event-search", + "url": "https://www.nps.gov/planyourvisit/event-search.htm" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".multiselect-container.dropdown-menu.show >button:nth-child(18)>.form-check >.form-check-label ", + "netloc": "nps", + "url": "https://www.nps.gov/planyourvisit/event-search.htm" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".multiselect-container.dropdown-menu.show >button:nth-child(6)>.form-check >.form-check-label ", + "netloc": "nps", + "url": "https://www.nps.gov/planyourvisit/event-search.htm" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".EventCalendar__SearchBar__SubmitButton.FacetedSearch-submitButton ", + "netloc": "nps", + "url": "https://www.nps.gov/planyourvisit/event-search.htm" + } + } + ] + }, + { + "index": "acCvItQT2PQxhAEaZpw4i", + "task": "Find a pedicure salon in New York and add two to favorites in yellowpages", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yellowpages.", + "url": "https://www.yellowpages.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search_terms", + "reference_answer": "Decide whether are searching for pedicure salon", + "url": "https://www.yellowpages.com/search?search_terms=pedicure+salon&geo_location_terms=New+York" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "geo_location_terms", + "reference_answer": "Decide whether are searching for New York", + "url": "https://www.yellowpages.com/search?search_terms=pedicure+salon&geo_location_terms=New+York" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".add-to-favorites >svg:nth-child(1)>use:nth-child(1)", + "netloc": "yellowpages", + "url": "https://www.yellowpages.com/new-york-ny/mip/beauty-youth-village-spa-462459927" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".add-to-favorites >svg:nth-child(1)>use:nth-child(1)", + "netloc": "yellowpages", + "url": "https://www.yellowpages.com/new-york-ny/mip/fabio-doti-salon-467213009" + } + } + ] + }, + { + "index": "CGMIrWw9PUbA-_-eobsE4", + "task": "Browse cough medicine that is rated 4 stars and above and is $15-$20 in cvs", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cvs.", + "url": "https://www.cvs.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "searchTerm", + "reference_answer": "Decide whether are searching for cough medicine", + "url": "https://www.cvs.com/search?searchTerm=cough%20medicine" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "", + "netloc": "cvs", + "url": "https://www.cvs.com/search?searchTerm=cough%20medicine" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "", + "netloc": "cvs", + "url": "https://www.cvs.com/search?searchTerm=cough%20medicine" + } + } + ] + }, + { + "index": "-MhfU8oaGzKwxF8Vss27N", + "task": "Find vitamin D that are buy 1 get 1 free and new arrival in cvs", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cvs.", + "url": "https://www.cvs.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "shop/vitamins/letter-vitamins/vitamin-d", + "url": "https://www.cvs.com/shop/vitamins/letter-vitamins/vitamin-d?icid=shop-vitamins-letter-vitamins-vitamin-d" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "Buy_1,_Get_1_Free", + "url": "https://www.cvs.com/shop/vitamins/letter-vitamins/vitamin-d?icid=shop-vitamins-letter-vitamins-vitamin-d" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/prna", + "url": "https://www.cvs.com/shop/vitamins/letter-vitamins/vitamin-d?icid=shop-vitamins-letter-vitamins-vitamin-d" + } + } + ] + }, + { + "index": "4wZJwHZUJURBC52Itut37", + "task": "Search for holiday campground in Alaska in koa", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "koa.", + "url": "https://koa.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "campgrounds/homer/", + "url": "https://koa.com/campgrounds/homer/" + } + } + ] + }, + { + "index": "7_nSOclMlKHrVG21A6e67", + "task": "Locate a store which is nearest to 10017 zip code in uniqlo", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "map.uniqlo.", + "url": "https://map.uniqlo.com/us/en/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "10017", + "netloc": "map", + "url": "https://map.uniqlo.com/us/en/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#root >div:nth-child(1)>div:nth-child(3)>section:nth-child(1)>section:nth-child(3)>div:nth-child(1)>div:nth-child(1)>div:nth-child(1)>div:nth-child(1)>form:nth-child(1)>button:nth-child(2)>div:nth-child(1)>svg:nth-child(1)", + "netloc": "map", + "url": "https://map.uniqlo.com/us/en/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#root >div:nth-child(1)>div:nth-child(3)>section:nth-child(1)>section:nth-child(4)>section:nth-child(2)>div:nth-child(5)>div:nth-child(2)>ul:nth-child(1)>li:nth-child(1)>button:nth-child(1)", + "netloc": "map", + "url": "https://map.uniqlo.com/us/en/" + } + } + ] + }, + { + "index": "EJzybjJ3oqqrdh5bQkOa7", + "task": "watch for news list about the latest news about Lebron James in sports.yahoo", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sports.yahoo.", + "url": "https://sports.yahoo.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nba/players/3704", + "url": "https://sports.yahoo.com/nba/players/3704/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/news", + "url": "https://sports.yahoo.com/nba/players/3704/" + } + } + ] + }, + { + "index": "hTJXHkogeG54F3ZaCfBxH", + "task": "What are the Resy Staff Favorites in Seattle? in resy", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "https://resy.com/", + "url": "https://resy.com/?date=2024-01-28&seats=2" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "list", + "reference_answer": "collection_963", + "url": "https://resy.com/cities/sea?seats=2&date=2024-01-28&list=collection_963" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/sea", + "url": "https://resy.com/cities/sea?seats=2&date=2024-01-28&list=collection_963" + } + } + ] + }, + { + "index": "hWz3-bu-hGsrtoojpBZCU", + "task": "Find the fast responding highest-rated electrician, who is currently open for replacement of some light fixtures in Westminster, and request a quote in yelp", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yelp.", + "url": "https://www.yelp.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_desc", + "reference_answer": "Decide whether are searching for Electricians", + "url": "https://www.yelp.com/search?find_desc=Electricians&find_loc=Honolulu%2C+HI" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_loc", + "reference_answer": "Decide whether are searching for Westminster,CA", + "url": "https://www.yelp.com/search?find_desc=Electricians&find_loc=Westminster%2C+CA" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sortby", + "reference_answer": "rating", + "url": "https://www.yelp.com/search?find_desc=Electricians&find_loc=Westminster%2C+CA&sortby=rating" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "open_now", + "reference_answer": "6", + "url": "https://www.yelp.com/search?find_desc=Electricians&find_loc=Westminster%2C+CA&sortby=rating&open_now=6672" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "is_fast_mtb_responder", + "url": "https://www.yelp.com/search?find_desc=Electricians&find_loc=Westminster%2C+CA&attrs=is_fast_mtb_responder&open_now=6673" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "OnlineMessageThisBusiness", + "url": "https://www.yelp.com/search?find_desc=Electricians&find_loc=Westminster%2C+CA&attrs=is_fast_mtb_responder%2COnlineMessageThisBusiness&open_now=6673" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "item_light_fixture", + "url": "https://www.yelp.com/search?find_desc=Electricians&find_loc=Westminster%2C+CA&attrs=is_fast_mtb_responder%2COnlineMessageThisBusiness%2Crel_job_works_on_item%3A%3Aitem_light_fixture&open_now=6673" + } + } + ] + }, + { + "index": "Lw6bGDBvAklLaOhY0HRVe", + "task": "Browse glamping locations in California in koa", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "koa.", + "url": "https://koa.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#list-states-us >li:nth-child(5)>a:nth-child(1)", + "netloc": "koa", + "url": "https://koa.com/find-a-koa/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/find-a-koa", + "url": "https://koa.com/find-a-koa/" + } + } + ] + }, + { + "index": "n0dCdAEPP3VW2Y_h545lq", + "task": "Show most expensive cruise deals in Europe and Mediterranean in travelzoo", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "travelzoo.", + "url": "https://www.travelzoo.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "collection/cruises/", + "url": "https://www.travelzoo.com/collection/cruises/europe-mediterranean/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "europe-mediterranean/", + "url": "https://www.travelzoo.com/collection/cruises/europe-mediterranean/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn-group-items >div:nth-child(4)>.text ", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/cruises/europe-mediterranean/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "DONE", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/cruises/europe-mediterranean/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".result-list.list-nostyle >li:nth-child(1)>div:nth-child(1)>a:nth-child(1)>.deal-info.table-layout.clamped >div:nth-child(2)>h3:nth-child(1)>.deal-headline-text.ts-original-inline.truncate ", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/cruises/europe-mediterranean/" + } + } + ] + }, + { + "index": "ODAiMd9KUjC7Jv7eodHqp", + "task": "Add a pack of toilet paper and a bottle of laundry detergent to your Amazon cart with the lowest total price in amazon", + "reference_task_length": 16, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "amazon.", + "url": "https://www.amazon.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "k", + "reference_answer": "Decide whether are searching for a pack of toilet paper", + "url": "https://www.amazon.com/s?k=a+pack+of+toilet+paper&crid=2ZDNFT3712A2Z&sprefix=a+pack+of+toilet+paper%2Caps%2C344&ref=nb_sb_noss_2" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".s-main-slot.s-result-list.s-search-results.sg-row >div:nth-child(12)>.sg-col-inner >div:nth-child(1)>span:nth-child(1)>div:nth-child(1)>div:nth-child(1)>div:nth-child(2)>.a-section.s-title-instructions-style >h2:nth-child(1)>.a-link-normal.s-underline-text.s-underline-link-text.s-link-style.a-text-normal >span:nth-child(1)", + "netloc": "amazon", + "url": "https://www.amazon.com/s?k=a+pack+of+toilet+paper&crid=2ZDNFT3712A2Z&sprefix=a+pack+of+toilet+paper%2Caps%2C344&ref=nb_sb_noss_2" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "ref", + "reference_answer": "nb_sb_noss_2", + "url": "https://www.amazon.com/s?k=a+pack+of+toilet+paper&crid=2ZDNFT3712A2Z&sprefix=a+pack+of+toilet+paper%2Caps%2C344&ref=nb_sb_noss_2" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#add-to-cart-button ", + "netloc": "amazon", + "url": "https://www.amazon.com/Peelitical-Toilet-Paper-Roll-Full-Color/dp/B0B6GRTS3K/ref=sr_1_3?crid=2ZDNFT3712A2Z&keywords=a+pack+of+toilet+paper&qid=1706435470&sprefix=a+pack+of+toilet+paper%2Caps%2C344&sr=8-3" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "k", + "reference_answer": "Decide whether are searching for a bottle of laundry detergent", + "url": "https://www.amazon.com/s?k=a+bottle+of+laundry+detergent&crid=24SHUQD9K9BYI&sprefix=a+bottle+of+laundry+detergent%2Caps%2C353&ref=nb_sb_noss_2" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".s-main-slot.s-result-list.s-search-results.sg-row >div:nth-child(10)>.sg-col-inner >div:nth-child(1)>span:nth-child(1)>div:nth-child(1)>div:nth-child(1)>div:nth-child(2)>.a-section.s-title-instructions-style >h2:nth-child(1)>.a-link-normal.s-underline-text.s-underline-link-text.s-link-style.a-text-normal >span:nth-child(1)", + "netloc": "amazon", + "url": "https://www.amazon.com/s?k=a+bottle+of+laundry+detergent&crid=24SHUQD9K9BYI&sprefix=a+bottle+of+laundry+detergent%2Caps%2C353&ref=nb_sb_noss_2" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "ref", + "reference_answer": "nb_sb_noss_2", + "url": "https://www.amazon.com/s?k=a+bottle+of+laundry+detergent&crid=24SHUQD9K9BYI&sprefix=a+bottle+of+laundry+detergent%2Caps%2C353&ref=nb_sb_noss_2" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#add-to-cart-button ", + "netloc": "amazon", + "url": "https://www.amazon.com/MAZI-Laundry-Detergent-Catcher-Economic/dp/B09CF6KC2Y/ref=sr_1_1?crid=24SHUQD9K9BYI&keywords=a+bottle+of+laundry+detergent&qid=1706435504&sprefix=a+bottle+of+laundry+detergent%2Caps%2C353&sr=8-1" + } + } + ] + }, + { + "index": "gw8QYJVt8VkMCuh8WzW5Z", + "task": "Find the highest-rated mover in Honolulu to shift a vehicle and large appliances and who has virtual discussion options available in yelp", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yelp.", + "url": "https://www.yelp.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "find_desc", + "reference_answer": "Movers", + "url": "https://www.yelp.com/search?find_desc=Movers&find_loc=Honolulu%2C+HI" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_loc", + "reference_answer": "Decide whether the place is Honolulu, HI", + "url": "https://www.yelp.com/search?find_desc=Movers&find_loc=Honolulu%2C+HI" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sortby", + "reference_answer": "rating", + "url": "https://www.yelp.com/search?find_desc=Movers&find_loc=Honolulu%2C+HI&sortby=rating" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "item_vehicle", + "url": "https://www.yelp.com/search?find_desc=Movers&find_loc=Honolulu%2C+HI&sortby=rating&attrs=rel_items_moved%3A%3Aitem_vehicle" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "item_large_appliance", + "url": "https://www.yelp.com/search?find_desc=Movers&find_loc=Honolulu%2C+HI&sortby=rating&attrs=rel_items_moved%3A%3Aitem_vehicle%2Crel_items_moved%3A%3Aitem_large_appliance" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "offers_virtual_consultations", + "url": "https://www.yelp.com/search?find_desc=Movers&find_loc=Honolulu%2C+HI&sortby=rating&attrs=rel_items_moved%3A%3Aitem_vehicle%2Crel_items_moved%3A%3Aitem_large_appliance%2Coffers_virtual_consultations" + } + } + ] + }, + { + "index": "Kchv0XWA7pIDs5VTzQclZ", + "task": "Use the 'find by state' method to locate all KOA campgrounds in California", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "koa.", + "url": "https://koa.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "states-provinces/", + "url": "https://koa.com/states-provinces/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/california", + "url": "https://koa.com/states-provinces/california/" + } + } + ] + }, + { + "index": "J0z0DgyUzeHipnA4Ou7jy", + "task": "Browse comedy TV shows streaming on Netflix that was released in 2010 in tvguide", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "tvguide.", + "url": "https://www.tvguide.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "streaming/netflix/", + "url": "https://www.tvguide.com/streaming/netflix/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "genre", + "reference_answer": "comedy", + "url": "https://www.tvguide.com/streaming/netflix/comedy/show/1/?sort=mostPopular&network=netflix&genre=comedy&type=show&releaseYearMin=2010&page=1" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "type", + "reference_answer": "show", + "url": "https://www.tvguide.com/streaming/netflix/comedy/show/1/?sort=mostPopular&network=netflix&genre=comedy&type=show&releaseYearMin=2010&page=1" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "releaseYearMin", + "reference_answer": "2010", + "url": "https://www.tvguide.com/streaming/netflix/comedy/show/1/?sort=mostPopular&network=netflix&genre=comedy&type=show&releaseYearMin=2010&page=1" + } + } + ] + }, + { + "index": "oGmqhhf0kdja3ldy5e_S4", + "task": "Show me the breakfast options on the Carnival Breeze ship in carnival", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "carnival.", + "url": "https://www.carnival.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cruise-food", + "url": "https://www.carnival.com/cruise-food" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".body-container >div:nth-child(2)>div:nth-child(4)>ul:nth-child(2)>li:nth-child(3)>label:nth-child(2)", + "netloc": "carnival", + "url": "https://www.carnival.com/cruise-food" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".body-container >div:nth-child(2)>div:nth-child(5)>ul:nth-child(2)>li:nth-child(1)>label:nth-child(2)", + "netloc": "carnival", + "url": "https://www.carnival.com/cruise-food" + } + } + ] + }, + { + "index": "kjoMizByJ75Xk-tUtZhfB", + "task": "Follow the Denver Nuggets NBA team in sports.yahoo", + "reference_task_length": 3, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sports.yahoo.", + "url": "https://sports.yahoo.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nba/teams/den", + "url": "https://sports.yahoo.com/nba/teams/den/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".follow-text >span:nth-child(1)", + "netloc": "sports", + "url": "https://sports.yahoo.com/nba/teams/den/" + } + } + ] + }, + { + "index": "LH4LFivu0NgPDMPz_UjiZ", + "task": "Browse the trending searches in Columbus in yellowpages", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yellowpages.", + "url": "https://www.yellowpages.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/columbus-oh", + "url": "https://www.yellowpages.com/columbus-oh" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "trends/", + "url": "https://www.yellowpages.com/columbus-oh/trends/1" + } + } + ] + }, + { + "index": "LIBP8TUupuWApGSTg764g", + "task": "List of publishers for board games in boardgamegeek", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "boardgamegeek.", + "url": "https://boardgamegeek.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "browse/boardgamepublisher", + "url": "https://boardgamegeek.com/browse/boardgamepublisher" + } + } + ] + }, + { + "index": "lKHlXMjOvkxH9VIKLSQmO", + "task": "Find hard side Carry-on Luggage in black color in target", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "target.", + "url": "https://www.target.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "carry-on-luggage/", + "url": "https://www.target.com/c/carry-on-luggage/-/N-4xv2o" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/hardside/", + "url": "https://www.target.com/c/carry-on-luggage/hardside/-/N-4xv2oZ1ffcu" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "N-4xv2oZ1ffcuZgup4zc5zk7s", + "url": "https://www.target.com/c/carry-on-luggage/hardside/-/N-4xv2oZ1ffcuZgup4zc5zk7s?moveTo=product-list-grid" + } + } + ] + }, + { + "index": "SfTO0LCl5qsIkPW1sda5X", + "task": "Visit Homepage of 'The Flash' 2023 in IMDb.", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "imdb.", + "url": "https://www.imdb.com/?ref_=nv_home" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "tt0439572", + "url": "https://www.imdb.com/title/tt0439572/?ref_=fn_al_tt_2" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".ipc-slate.ipc-slate--baseAlt.ipc-slate--dynamic-width.sc-248bafc1-0.cFFKvF.hero-media__slate--inline-video.undefined.ipc-sub-grid-item.ipc-sub-grid-item--span-4 >div:nth-child(4)>.ipc-lockup-overlay__screen ", + "netloc": "imdb", + "url": "https://www.imdb.com/title/tt0439572/?ref_=fn_al_tt_2" + } + } + ] + }, + { + "index": "SWZ9bPJuEfhVn3zj7Aemm", + "task": "Renew a existing KOA rewards account with the rewards number 1000000001 e postal code 10023 in koa", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "koa.", + "url": "https://koa.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "rewards/", + "url": "https://koa.com/rewards/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "purchase/", + "url": "https://koa.com/rewards/purchase/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#vkr-purchase-type-tabs >li:nth-child(2)>label:nth-child(2)", + "netloc": "koa", + "url": "https://koa.com/rewards/purchase/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "1000000001", + "netloc": "koa", + "path": "#Step1_AccountNumber ", + "url": "https://koa.com/rewards/purchase/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "10023", + "netloc": "koa", + "path": "#Step1_PostalCode ", + "url": "https://koa.com/rewards/purchase/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#continueButton ", + "netloc": "koa", + "url": "https://koa.com/rewards/purchase/" + } + } + ] + }, + { + "index": "pj97JhANoatrYxkyqIDJ6", + "task": "Browse events happening at Madison Square Garden in parking.com", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "parking.", + "url": "https://parking.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nyc/venues/manhattan-madison-square-garden/events", + "url": "https://parking.com/nyc/venues/manhattan-madison-square-garden/events" + } + } + ] + }, + { + "index": "pxVHNYBq6JINOZKNXYW8E", + "task": "Go to the page with help in choosing a solar energy product for homes in tesla", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "tesla.", + "url": "https://www.tesla.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "energy/design", + "url": "https://www.tesla.com/energy/design" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/help-me-choose", + "url": "https://www.tesla.com/energy/design/help-me-choose" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/help-me-choose-quiz", + "url": "https://www.tesla.com/energy/design/help-me-choose-quiz" + } + } + ] + }, + { + "index": "GL3m2slTLRE-rN_UC62XC", + "task": "Find the cheapest last minute cruise deal in travelzoo", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "travelzoo.", + "url": "https://www.travelzoo.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "collection/last-minute-cruise-deals/", + "url": "https://www.travelzoo.com/collection/last-minute-cruise-deals/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn-group-items >div:nth-child(3)>.text ", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/last-minute-cruise-deals/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".button-primary.button-done.right ", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/last-minute-cruise-deals/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".result-list.list-nostyle >li:nth-child(1)>div:nth-child(1)>a:nth-child(1)>.deal-info.table-layout.clamped >div:nth-child(2)>h3:nth-child(1)>.deal-headline-text.ts-original-inline.truncate ", + "netloc": "travelzoo", + "url": "https://www.travelzoo.com/collection/last-minute-cruise-deals/" + } + } + ] + }, + { + "index": "iJYctP6zOJCM8qwUo22jf", + "task": "Find FC Barcelona's next fixture in the Spanish Copa de Rey in espn", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "espn.", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/_/id/83/", + "url": "https://www.espn.com/soccer/team/_/id/83/barcelona" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/fixtures/", + "url": "https://www.espn.com/soccer/team/fixtures/_/id/83/esp.barcelona" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ESP.COPA_DEL_REY", + "url": "https://www.espn.com/soccer/team/fixtures/_/id/83/esp.barcelona" + } + } + ] + }, + { + "index": "fZxEueSga2SBJ__Fh8Lwn", + "task": "Browse washing machines available for free local pickup within 25 miles of zip code 90026 in ebay", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ebay.", + "url": "https://www.ebay.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "_nkw", + "reference_answer": "Decide whether are searching for washing machine", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=washing+machine&_sacat=0&rt=nc&_stpos=90026&_fspt=1&LH_PrefLoc=99&_sadis=25&LH_LPickup=1" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "_sadis", + "reference_answer": "25", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=washing+machine&_sacat=0&rt=nc&_stpos=90026&_fspt=1&LH_PrefLoc=99&_sadis=25&LH_LPickup=1" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "_stpos", + "reference_answer": "90026", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=washing+machine&_sacat=0&rt=nc&_stpos=90026&_fspt=1&LH_PrefLoc=99&_sadis=25&LH_LPickup=1" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "LH_LPickup", + "reference_answer": "1", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=washing+machine&_sacat=0&rt=nc&_stpos=90026&_fspt=1&LH_PrefLoc=99&_sadis=25&LH_LPickup=1" + } + } + ] + }, + { + "index": "cojd69e-u4AozgQz4Pi8d", + "task": "Show me the coming soon AMC Artisan Films in amctheatres", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "amctheatres.", + "url": "https://www.amctheatres.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/movies", + "url": "https://www.amctheatres.com/movies" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "availability", + "reference_answer": "COMING_SOON", + "url": "https://www.amctheatres.com/movies?availability=COMING_SOON" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".select-styled__select >option:nth-child(2)", + "netloc": "amctheatres", + "url": "https://www.amctheatres.com/movies?availability=COMING_SOON" + } + } + ] + }, + { + "index": "AHWpPOnQ8dV6XNM_Elqgy", + "task": "Find Kevin Durant's bio in espn", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "espn.", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/id/3202/", + "url": "https://www.espn.com/nba/player/_/id/3202/kevin-durant" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "bio/", + "url": "https://www.espn.com/nba/player/bio/_/id/3202/kevin-durant" + } + } + ] + }, + { + "index": "aR_52TLjmboDRqUmDNN6y", + "task": "Find camping tents that can fit 6 people and sort the results by price from low to high in rei", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "rei.", + "url": "https://www.rei.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "c/camping-tents", + "url": "https://www.rei.com/c/camping-tents" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sort", + "reference_answer": "min-price", + "url": "https://www.rei.com/c/camping-tents/f/sc-6-person?ir=category%3Acamping-tents&r=c%3Bf&sort=min-price" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sc-6-person", + "url": "https://www.rei.com/c/camping-tents/f/sc-6-person?ir=category%3Acamping-tents&r=c%3Bf&sort=min-price" + } + } + ] + }, + { + "index": "8Rb4YIIUnrFeu3JmhQHBK", + "task": "Order an adult\u2019s bottomless bubbles package for Joe Bloggs who is booked onto cruise 101010 in carnival", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "carnival.", + "url": "https://www.carnival.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "drink-packages", + "url": "https://www.carnival.com/drink-packages?icid=icp_ccl_beverage_0708_footer" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn.btn-red.general-add-to-cart-btn.btn-add-cart.btn-full.add-to-cart-BB_1.sas-drink-packages ", + "netloc": "carnival", + "url": "https://www.carnival.com/drink-packages?icid=icp_ccl_beverage_0708_footer" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#giftLinkFormCheckBoxLabel ", + "netloc": "carnival", + "url": "https://www.carnival.com/drink-packages?icid=icp_ccl_beverage_0708_footer" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Joe", + "netloc": "carnival", + "path": ".recipient-f-name ", + "url": "https://www.carnival.com/drink-packages?icid=icp_ccl_beverage_0708_footer" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Bloggs", + "netloc": "carnival", + "path": ".recipient-l-name ", + "url": "https://www.carnival.com/drink-packages?icid=icp_ccl_beverage_0708_footer" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "101010", + "netloc": "carnival", + "path": ".cruise-info >input:nth-child(2)", + "url": "https://www.carnival.com/drink-packages?icid=icp_ccl_beverage_0708_footer" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#saveButtonOne ", + "netloc": "carnival", + "url": "https://www.carnival.com/drink-packages?icid=icp_ccl_beverage_0708_footer" + } + } + ] + }, + { + "index": "1OJMUzt3nTF3I05jPqIqY", + "task": "Find Airport information of Camarillo Airport, CA and check weather in flightaware", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "flightaware.", + "url": "https://www.flightaware.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "live/airport/KCMA", + "url": "https://www.flightaware.com/live/airport/KCMA#airport-parity-stats-container" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "resources/airport/KCMA/weather", + "url": "https://www.flightaware.com/resources/airport/KCMA/weather" + } + } + ] + }, + { + "index": "U2JVmc0DvWcAvzI4YLwyn", + "task": "Give a like to the #1 track of the Real Time Top Chart in last.fm", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "last.fm", + "url": "https://www.last.fm/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "charts", + "url": "https://www.last.fm/charts" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".charts >div:nth-child(1)>div:nth-child(1)>.globalchart >tbody:nth-child(2)>tr:nth-child(1)>.globalchart-name >.link-block-target ", + "netloc": "last", + "url": "https://www.last.fm/charts" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#mantle_skin >header:nth-child(2)>div:nth-child(1)>div:nth-child(2)>div:nth-child(5)>div:nth-child(2)>div:nth-child(2)>a:nth-child(1)", + "netloc": "last", + "url": "https://www.last.fm/music/Jung+Kook/_/Standing+Next+To+You" + } + } + ] + }, + { + "index": "V7Pu-mgfxfIaXkTcCYUNW", + "task": "Find the newest silver-colored Lexus SUV with a black interior and backup camera to purchase which has been made between 2022-2023 and is within 50 miles of zip 10017 in cargurus", + "reference_task_length": 25, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cargurus.", + "url": "https://www.cargurus.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "zip", + "reference_answer": "10017", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "bodyTypeGroup", + "reference_answer": "bg7", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".Gv7Z4V >div:nth-child(1)>.HObdBl.vT3i0_ >.mvXVZO.Xb7sZe ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".Gv7Z4V >div:nth-child(1)>.HObdBl.vT3i0_ >.mvXVZO.Xb7sZe >option:nth-child(3)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".Gv7Z4V >div:nth-child(3)>.HObdBl.vT3i0_ >.mvXVZO.Xb7sZe ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".Gv7Z4V >div:nth-child(1)>.HObdBl.vT3i0_ >.mvXVZO.Xb7sZe >option:nth-child(4)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#select-filter-search-distance ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#select-filter-search-distance >option:nth-child(3)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#radix-3 >.NoCcgT >.Lgttuc >span:nth-child(1)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".FXiauQ >ul:nth-child(1)>li:nth-child(11)>.fimP4_ >.y0gbTE.zsNQOE >.MaigNX >.pV9_6g ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#radix-5 >.NoCcgT >.Lgttuc >span:nth-child(1)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#radix-4 >.UWCYC >.FXiauQ >ul:nth-child(1)>li:nth-child(1)>.fimP4_ >.y0gbTE.zsNQOE >.MaigNX >.pV9_6g ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#radix-11 >.NoCcgT >.Lgttuc >span:nth-child(1)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".FXiauQ >ul:nth-child(1)>li:nth-child(23)>.fimP4_ >.y0gbTE.zsNQOE ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#radix-19 >.NoCcgT >.Lgttuc >span:nth-child(1)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".FXiauQ >ul:nth-child(2)>ul:nth-child(1)>li:nth-child(1)>.fimP4_ >.y0gbTE.zsNQOE ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#Listing-Sort ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#Listing-Sort >option:nth-child(9)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".IWo5PZ.orzDm5 >div:nth-child(4)>div:nth-child(1)>.MOfIEd.HgPBTU.prRsnF >.Z0_BC0.ZGMXbN.kKD2eQ >.biZGS4.LtXpwU >.k4FSCT >.bLgDNy >.gN7yGT ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/new/searchresults.action?zip=10017&inventorySearchWidgetType=BODYSTYLE&bodyTypeGroup=bg7&sortDir=ASC&sourceContext=untrackedWithinSite_false_0&distance=100&sortType=PRICE" + } + } + ] + }, + { + "index": "TsRGdiY3Kx2ngXHFF62dJ", + "task": "Find the score of the 2020 Super Bowl in nfl.com", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nfl.", + "url": "https://www.nfl.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "scores/", + "url": "https://www.nfl.com/scores/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/2020", + "url": "https://www.nfl.com/scores/2020/POST3" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/POST4", + "url": "https://www.nfl.com/scores/2020/POST4" + } + } + ] + }, + { + "index": "TjgqqYbTC_NCYsw4_dMq2", + "task": "Check my trip for Smith and booking number X89998 in aa", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "aa.", + "url": "https://www.aa.com/homePage.do" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Smith", + "netloc": "aa", + "path": ".zeta >input:nth-child(3)", + "url": "https://www.aa.com/homePage.do" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "X89998", + "netloc": "aa", + "path": ".zeta >input:nth-child(2)", + "url": "https://www.aa.com/homePage.do" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/find-your-trip", + "url": "https://www.aa.com/reservation/view/find-your-trip?fromHomePage=true&from=manageTripsHomeWidget" + } + } + ] + }, + { + "index": "y8BdI5xlwy_VBluYvyaLr", + "task": "Show me the stats of the Athletic Club from spanish Laliga in espn", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "espn.", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/soccer", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/team", + "url": "https://www.espn.com/soccer/teams" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/spanish-laliga", + "url": "https://www.espn.com/soccer/teams" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "id/93", + "url": "https://www.espn.com/soccer/team/stats/_/id/93/athletic-club" + } + } + ] + }, + { + "index": "r6Mkfvlm-mFnqBivBdLMS", + "task": "Search for video game consoles and filter the results to show only those that have Wi-Fi Capability in ebay", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ebay.", + "url": "https://www.ebay.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/Video-Game-Consoles", + "url": "https://www.ebay.com/b/Video-Game-Consoles/139971/bn_320033" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "Features", + "reference_answer": "Wi-Fi Capability", + "url": "https://www.ebay.com/b/Video-Game-Consoles/139971?mag=1&_fsrp=0&rt=nc&_sacat=139971&Features=Wi%252DFi%2520Capability" + } + } + ] + }, + { + "index": "rfNemY0eiTaMuUVmMtE1f", + "task": "Find a Ricky Kej track to listen and share which has been added in the last year and is between 2 to 10 minutes in soundcloud", + "reference_task_length": 12, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "soundcloud.", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "q", + "reference_answer": "Decide whether are searching for Ricky Kej", + "url": "https://soundcloud.com/search?q=Ricky%20Kej" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "filter.created_at", + "reference_answer": "last_year", + "url": "https://soundcloud.com/search/sounds?q=Ricky%20Kej&filter.created_at=last_year" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "filter.duration", + "reference_answer": "medium", + "url": "https://soundcloud.com/search/sounds?q=Ricky%20Kej&filter.created_at=last_year&filter.duration=medium" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "filter.license", + "reference_answer": "to_share", + "url": "https://soundcloud.com/search/sounds?q=Ricky%20Kej&filter.duration=medium&filter.created_at=last_year&filter.license=to_share" + } + } + ] + }, + { + "index": "hjsXdgoF6M-Mw3AHQBT4G", + "task": "Browse and purchase the classic bundle for the simulation game Cities Skylines in store.steampowered", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "store.steampowered.", + "url": "https://store.steampowered.com/app/2167580/Summoners_War_Chronicles/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/Cities_Skylines__The_Classics_Bundle", + "url": "https://store.steampowered.com/bundle/28813/Cities_Skylines__The_Classics_Bundle/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "In Cart", + "netloc": "store", + "url": "https://store.steampowered.com/bundle/28813/Cities_Skylines__The_Classics_Bundle/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "checkout.steampowered.", + "url": "https://checkout.steampowered.com/login/?purchasetype=self&checkout=1&redir=checkout%2F%3Fpurchasetype%3Dself%26cart%3D4893791698082289964%26snr%3D1_8_4__503&redir_ssl=1&snr=1_8_4__503" + } + } + ] + }, + { + "index": "NaDvF_0Wje2V79UrI58Lg", + "task": "Find tickets for events in Boston in ticketcenter", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ticketcenter.", + "url": "https://www.ticketcenter.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "massachusetts/boston-tickets", + "url": "https://www.ticketcenter.com/massachusetts/boston-tickets" + } + } + ] + }, + { + "index": "H16cZ1rGpDK_X3GBL1Csh", + "task": "Get route information for flights from Washington to New York airports that have already arrived in flightaware", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "flightaware.", + "url": "https://www.flightaware.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#Status >.fa_list.hide.text_align_left >li:nth-child(2)>.fa_list_row_right.text_align_right >a:nth-child(1)", + "netloc": "flightaware", + "url": "https://www.flightaware.com/live/findflight?origin=Washington&destination=New+York" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "live/findflight", + "url": "https://www.flightaware.com/live/findflight?origin=Washington&destination=New+York" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "origin", + "reference_answer": "Decide whether is searching for Washington, D.C. or ZDC", + "url": "https://www.flightaware.com/live/findflight?origin=Washington&destination=New+York" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "destination", + "reference_answer": "Decide whether is searching for New York or ZNY", + "url": "https://www.flightaware.com/live/findflight?origin=Washington&destination=New+York" + } + } + ] + }, + { + "index": "tSCh_bVn5QIh_MWCrH1hW", + "task": "Filter women's sports bras that are purple that are S size in underarmour", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "underarmour.", + "url": "https://www.underarmour.com/en-us/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "womens/clothing/sports-bras", + "url": "https://www.underarmour.com/en-us/c/womens/clothing/sports-bras/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "viewPreference", + "reference_answer": "S", + "url": "https://www.underarmour.com/en-us/c/womens/clothing/sports-bras/?viewPreference=S" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/purple", + "url": "https://www.underarmour.com/en-us/c/womens/clothing/sports-bras/purple/?viewPreference=S&prefn1=size&prefv1=S" + } + } + ] + }, + { + "index": "3Iioy0c1kUexRvCK8pS-Q", + "task": "Find parking prices for upcoming Los Angeles Lakers games in seatgeek", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "seatgeek.", + "url": "https://seatgeek.com/los-angeles-lakers-tickets/parking" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/los-angeles-lakers-tickets", + "url": "https://seatgeek.com/los-angeles-lakers-tickets/parking" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/parking", + "url": "https://seatgeek.com/los-angeles-lakers-tickets/parking" + } + } + ] + }, + { + "index": "6k0v1LbqRssEvjiwwvWDn", + "task": "Repost on my feed a rock playlist in soundcloud", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "soundcloud.", + "url": "https://soundcloud.com/discover" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "q", + "reference_answer": "Decide whether is searching for rock", + "url": "https://soundcloud.com/search?q=rock" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/sets", + "url": "https://soundcloud.com/search/sets?q=rock" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".lazyLoadingList__list.sc-list-nostyle.sc-clearfix >li:nth-child(2)>.searchItem >.sound.searchItem__trackItem.playlist.streamContext >.sound__body >.sound__content >.sound__footer.g-all-transitions-300 >div:nth-child(2)>.soundActions.sc-button-toolbar.soundActions__small >.sc-button-group.sc-button-group-small >.sc-button-repost.sc-button-secondary.sc-button.sc-button-small.sc-button-responsive ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/search/sets?q=rock" + } + } + ] + }, + { + "index": "0ZydjXfdu-f7wjwsL02Yj", + "task": "Show most played games based on daily players in store.steampowered", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "store.steampowered.", + "url": "https://store.steampowered.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "charts/mostplayed", + "url": "https://store.steampowered.com/charts" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".dropdown_DialogDropDownMenu_1tiuY._DialogInputContainer >div:nth-child(2)", + "netloc": "store", + "url": "https://store.steampowered.com/charts" + } + } + ] + }, + { + "index": "1qkDSDVzX9LZhx6VQL_Yd", + "task": "Browse strollers and apply a filter for the color black within $100 to $250 in kohls", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/search.jsp", + "url": "https://www.kohls.com/search.jsp?submit-search=web-regular&search=stroller&kls_sbp=68405109696688798524419626354526575444" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search", + "reference_answer": "Decide whether is searching for stroller", + "url": "https://www.kohls.com/search.jsp?submit-search=web-regular&search=stroller&kls_sbp=68405109696688798524419626354526575444" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Color:Black", + "url": "https://www.kohls.com/search/black.jsp?CN=Color:Black&search=stroller&S=1&PPP=48&kls_sbp=68405109696688798524419626354526575444&pfm=search%20refine" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Price:$100 to $250", + "url": "https://www.kohls.com/search/black.jsp?CN=Color:Black+Price:%24100%20to%20%24250&BL=y&search=stroller&S=1&PPP=48&pfm=search%20refine&kls_sbp=68405109696688798524419626354526575444" + } + } + ] + }, + { + "index": "1Te22cad3xH_VsFvczajN", + "task": "Find parking in California city for Limos which also offers free wi-fi in yelp", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yelp.", + "url": "https://www.yelp.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_loc", + "reference_answer": "Decide whether is searching for California city", + "url": "https://www.yelp.com/search?find_desc=Parking&find_loc=California+City%2C+CA" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "find_desc", + "reference_answer": "Parking", + "url": "https://www.yelp.com/search?find_desc=Parking&find_loc=California+City%2C+CA" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "attrs", + "reference_answer": "WiFi.free", + "url": "https://www.yelp.com/search?find_desc=Parking&find_loc=California+City%2C+CA&attrs=WiFi.free" + } + } + ] + }, + { + "index": "4_WHLeB50DU8DvgWLUwB-", + "task": "Find offers for VIFP Club loyalty number 9016467685 and show the ones that are for members only in carnival", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "carnival.", + "url": "https://www.carnival.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/vifp", + "url": "https://www.carnival.com/vifp?icid=CC_vifp-club_1866" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/cruise-deals", + "url": "https://www.carnival.com/cruise-deals?icid=icp_vifp_11252020_lp_bttmbanner" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "icid", + "reference_answer": "icp_vifp_11252020_lp_bttmbanner", + "url": "https://www.carnival.com/cruise-deals?icid=icp_vifp_11252020_lp_bttmbanner" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".cd-deal-list__category-title ", + "netloc": "carnival", + "url": "https://www.carnival.com/cruise-deals?icid=icp_vifp_11252020_lp_bttmbanner" + } + } + ] + }, + { + "index": "h0lV1TEIVUn7vjdTai3Xn", + "task": "Show cars with AWD with maximum budget of $50,000 in kbb", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kbb.", + "url": "https://www.kbb.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/cars-for-sale", + "url": "https://www.kbb.com/cars-for-sale/all" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/awd4wd", + "url": "https://www.kbb.com/cars-for-sale/all/awd4wd/san-jose-ca?isNewSearch=true&zip=95101" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/cars-between-0-and-50000", + "url": "https://www.kbb.com/cars-for-sale/all/cars-between-0-and-50000/san-jose-ca?isNewSearch=true&zip=95101" + } + } + ] + }, + { + "index": "rPr8bIONPSXD0LlKoN4gF", + "task": "Add two $50 playstation store gift cards to cart in gamestop", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "gamestop.", + "url": "https://www.gamestop.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/playstation-store-gift-card-50", + "url": "https://www.gamestop.com/gift-cards/gaming-gift-cards/products/playstation-store-gift-card-50/154535.html" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#primary-details-row >div:nth-child(18)>.add-to-cart-buttons.tulsa-atcbutton-toggle >.atc-btns-wrapper >div:nth-child(1)>.js-add-to-cart.add-to-cart.btn.btn-primary.add-to-cart-redesign.all ", + "netloc": "gamestop", + "url": "https://www.gamestop.com/gift-cards/gaming-gift-cards/products/playstation-store-gift-card-50/154535.html" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".icon.icon-close ", + "netloc": "gamestop", + "url": "https://www.gamestop.com/gift-cards/gaming-gift-cards/products/playstation-store-gift-card-50/154535.html" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".atc-btn-wrapper.veiled >.js-add-to-cart.add-to-cart.btn.btn-primary.add-to-cart-redesign.all ", + "netloc": "gamestop", + "url": "https://www.gamestop.com/gift-cards/gaming-gift-cards/products/playstation-store-gift-card-50/154535.html" + } + } + ] + }, + { + "index": "VUe0jDQCHGNaaDIAXDoRs", + "task": "Check the rating and user reviews for the game \"Deathloop\" in ign", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ign.", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/deathloop", + "url": "https://www.ign.com/games/deathloop" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/user-reviews", + "url": "https://www.ign.com/games/deathloop/user-reviews" + } + } + ] + }, + { + "index": "ZpkB1yoN5N36h-JPtwEtv", + "task": "Browse new laptops from $400 to $500 that offers free shippping in ebay", + "reference_task_length": 14, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ebay.", + "url": "https://www.ebay.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "_nkw", + "reference_answer": "Decide whether are searching for laptops ", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_trksid=p4432023.m570.l1313&_nkw=laptops&_sacat=0" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "LH_ItemCondition", + "reference_answer": "1000", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=laptops&_sacat=0&rt=nc&LH_ItemCondition=1000" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "LH_FS", + "reference_answer": "1", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=laptops&_sacat=0&LH_ItemCondition=1000&rt=nc&LH_FS=1" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "_udlo", + "reference_answer": "400", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=laptops&_sacat=0&LH_ItemCondition=1000&LH_FS=1&rt=nc&_udlo=400&_udhi=500" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "_udhi", + "reference_answer": "500", + "url": "https://www.ebay.com/sch/i.html?_from=R40&_nkw=laptops&_sacat=0&LH_ItemCondition=1000&LH_FS=1&rt=nc&_udlo=400&_udhi=500" + } + } + ] + }, + { + "index": "NF6hAjWkHaqzdiab5ygr3", + "task": "What is the cheapest luxury car to pickup on the second closest nearby location to New York, United States, 100 in budget", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "budget.", + "url": "https://www.budget.com/en/home" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "vehicles/us/h", + "url": "https://www.budget.com/en/cars/vehicles/us/h" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether are searching for New York,United States,100", + "netloc": "budget", + "url": "https://www.budget.com/en/cars/vehicles/us/h" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".LocContainer.col-sm-12.col-xs-12.clearfix >li:nth-child(2)>.LocContent >.btn.btn-red ", + "netloc": "budget", + "url": "https://www.budget.com/en/cars/vehicles/us/h" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": " Price (Low to High)", + "netloc": "budget", + "url": "https://www.budget.com/en/reservation#/vehicles" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".step2dtl >div:nth-child(2)>div:nth-child(2)>section:nth-child(1)>div:nth-child(1)>div:nth-child(1)>.col-sm-7.avlcarimage >.col-lg-7.col-sm-6.col-xs-12.avlcarimagetab >div:nth-child(2)", + "netloc": "budget", + "url": "https://www.budget.com/en/reservation#/vehicles" + } + } + ] + }, + { + "index": "jeW5vticR4VE6PZbI041y", + "task": "Find the Trains in Spain guide that will help you learn about Renfe fare types in thetrainline", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "thetrainline.", + "url": "https://www.thetrainline.com/en-us" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "trains/spain", + "url": "https://www.thetrainline.com/en-us/trains/spain" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Renfe fare types", + "netloc": "thetrainline", + "url": "https://www.thetrainline.com/en-us/trains/spain" + } + } + ] + }, + { + "index": "PGXZYqmGOlNWG_EsOa7nh", + "task": "Find ideas and recommendation for things to do in Cancun in viator", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "viator.", + "url": "https://www.viator.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/d631", + "url": "https://www.viator.com/Cancun/d631" + } + } + ] + }, + { + "index": "t0a1mtyC2dGrK4l2Tg6OT", + "task": "Browse the page to find classic rock concert tickets in seatgeek", + "reference_task_length": 3, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "seatgeek.", + "url": "https://seatgeek.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "concert-tickets/classic-rock", + "url": "https://seatgeek.com/concert-tickets/classic-rock" + } + } + ] + }, + { + "index": "B5dxfUM1haoCwTQh6VKgY", + "task": "Open the page to answer a question in the home improvement section in yellowpages", + "reference_task_length": 3, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yellowpages.", + "url": "https://www.yellowpages.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "q/home-garden/home-improvement-remodeling", + "url": "https://www.yellowpages.com/q/home-garden/home-improvement-remodeling" + } + } + ] + } +] \ No newline at end of file diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-live_test_20241024.json b/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-live_test_20241024.json new file mode 100644 index 00000000..304bc2db --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/data/mind2web-live_test_20241024.json @@ -0,0 +1,4391 @@ +[ + { + "index": 0, + "task": "Find the store location and hours of the closest Gamestop to zip code 90028 and set as home store on gamestop", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "gamestop.", + "url": "https://www.gamestop.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "store", + "reference_answer": "2630", + "url": "https://www.gamestop.com/search/?store=2630" + } + } + ] + }, + { + "index": 1, + "task": "Compare available plans for the AeroAPI on flightaware", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "flightaware.", + "url": "https://www.flightaware.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/aeroap", + "url": "https://www.flightaware.com/commercial/aeroapi/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "#compare-plans-section", + "url": "https://www.flightaware.com/commercial/aeroapi/#compare-plans-section" + } + } + ] + }, + { + "index": 2, + "task": "Give a 10 rating to The Terminator II: Judgement Day on imdb", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "imdb.", + "url": "https://www.imdb.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/title/tt0103064/", + "url": "https://www.imdb.com/title/tt0103064/?ref_=nv_sr_srsg_0_tt_8_nm_0_q_terminato" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".sc-3a4309f8-0.bjXIAP.sc-b7c53eda-1.iIQkEw >.sc-3a4309f8-1.dggvUg >div:nth-child(2)>button:nth-child(2)>.ipc-btn__text >.sc-acdbf0f3-3.eWQwwe ", + "netloc": "imdb", + "url": "https://www.imdb.com/title/tt0103064/?ref_=nv_sr_srsg_0_tt_8_nm_0_q_terminato" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".ipc-starbar__touch ", + "netloc": "imdb", + "url": "https://www.imdb.com/title/tt0103064/?ref_=nv_sr_srsg_0_tt_8_nm_0_q_terminato" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".ipc-promptable-base__content >div:nth-child(2)>div:nth-child(3)>button:nth-child(2)", + "netloc": "imdb", + "url": "https://www.imdb.com/title/tt0103064/?ref_=nv_sr_srsg_0_tt_8_nm_0_q_terminato" + } + } + ] + }, + { + "index": 3, + "task": "Check the status of train S92 for any disruptions on new.mta.info.", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "new.mta.info", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/alerts", + "url": "https://new.mta.info/alerts" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "selectedRoutes", + "reference_answer": "MTA:S92", + "url": "https://new.mta.info/alerts?selectedRoutes=MTA%3AS92&selectedDate=3%2F20%2F2024" + } + } + ] + }, + { + "index": 4, + "task": "Check the status of your iPhone repair on apple.", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "apple.", + "url": "https://www.apple.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "repair", + "url": "https://support.apple.com/repair?cid=gn-ols-repair-lp-get_help" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/my-support", + "url": "https://support.apple.com/my-support" + } + } + ] + }, + { + "index": 5, + "task": "Open the page with a overview about the submission of releases on discogs", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "discogs.", + "url": "https://www.discogs.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "Overview-Of-Submission-Guidelines-For-Releases", + "url": "https://support.discogs.com/hc/en-us/articles/360004016474-Overview-Of-Submission-Guidelines-For-Releases" + } + } + ] + }, + { + "index": 6, + "task": "View the latest job openings in safety with a salary above 100k per annum, check the details, and apply on mbta.", + "reference_task_length": 16, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "mbta.", + "url": "https://www.mbta.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "governmentjobs.", + "url": "https://www.governmentjobs.com/careers/mbta" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/careers/mbta", + "url": "https://www.governmentjobs.com/careers/mbta" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "department[0]", + "reference_answer": "MBTA - Safety", + "url": "https://www.governmentjobs.com/careers/mbta?department[0]=MBTA%20-%20Safety&salary=100000&sort=PostingDate%7CDescending" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "salary", + "reference_answer": "100000", + "url": "https://www.governmentjobs.com/careers/mbta?department[0]=MBTA%20-%20Safety&salary=100000&sort=PostingDate%7CDescending" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sort", + "reference_answer": "PostingDate|Descending", + "url": "https://www.governmentjobs.com/careers/mbta?department[0]=MBTA%20-%20Safety&salary=100000&sort=PostingDate%7CDescending" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".unstyled >li:nth-child(1)>h3:nth-child(1)>.item-details-link ", + "netloc": "governmentjobs", + "url": "https://www.governmentjobs.com/careers/mbta?department[0]=MBTA%20-%20Safety&salary=100000&sort=PostingDate%7CDescending" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn.btn-large.btn-primary.btn-app-primary.flyout-switch-button ", + "netloc": "governmentjobs", + "url": "https://www.governmentjobs.com/careers/mbta" + } + } + ] + }, + { + "index": 7, + "task": "Add a e-gift card to bag of $100 for recipient John and email address abc@test.com from buckeye.foobar@gmail.com with message gift card on underarmour", + "reference_task_length": 15, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "underarmour.", + "url": "https://www.underarmour.com/en-us/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/ua_egift_card/", + "url": "https://www.underarmour.com/en-us/gift-card-landing/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Johm", + "netloc": "underarmour", + "path": "#name ", + "url": "https://www.underarmour.com/en-us/p/gift_cards/ua_egift_card/GC-0001-ALL.html?dwvar_GC-0001-ALL_color=0001" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "abc@test.com", + "netloc": "underarmour", + "path": "#email ", + "url": "https://www.underarmour.com/en-us/p/gift_cards/ua_egift_card/GC-0001-ALL.html?dwvar_GC-0001-ALL_color=0001" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "buckeye.foobar@gmail.com", + "netloc": "underarmour", + "path": "#from ", + "url": "https://www.underarmour.com/en-us/p/gift_cards/ua_egift_card/GC-0001-ALL.html?dwvar_GC-0001-ALL_color=0001" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "100", + "netloc": "underarmour", + "path": "#amount ", + "url": "https://www.underarmour.com/en-us/p/gift_cards/ua_egift_card/GC-0001-ALL.html?dwvar_GC-0001-ALL_color=0001" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether the content means gift card", + "netloc": "underarmour", + "path": "#message ", + "url": "https://www.underarmour.com/en-us/p/gift_cards/ua_egift_card/GC-0001-ALL.html?dwvar_GC-0001-ALL_color=0001" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".Button_btn__hhPc5.Button_btn__tertiary__RRG5P ", + "netloc": "underarmour", + "url": "https://www.underarmour.com/en-us/gift-card-landing/" + } + } + ] + }, + { + "index": 8, + "task": "Find a person by address Nice st - 1234, Good, FL on yellowpages", + "reference_task_length": 12, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yellowpages.", + "url": "https://www.yellowpages.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/address", + "url": "https://people.yellowpages.com/whitepages/address" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "street", + "reference_answer": "Nice st - 1234", + "url": "https://people.yellowpages.com/whitepages/address?street=Nice+st+-+1234&city=Good&state=FL" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "city", + "reference_answer": "Good", + "url": "https://people.yellowpages.com/whitepages/address?street=Nice+st+-+1234&city=Good&state=FL" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "state", + "reference_answer": "FL", + "url": "https://people.yellowpages.com/whitepages/address?street=Nice+st+-+1234&city=Good&state=FL" + } + } + ] + }, + { + "index": 9, + "task": "Check my AMC gift card balance with gift card number 87654321 and pin number 9753 on amctheatres.", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "amctheatres.", + "url": "https://www.amctheatres.com/showtimes/all/2024-03-25/default/all?utm_source=google&utm_medium=paidsearch&utm_campaign=OnlineTicketing2023&kclid=bc667845-5652-4d22-b01f-539c6d69db0e&gad_source=1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/gift-cards", + "url": "https://www.amctheatres.com/gift-cards/check-balance" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/check-balance", + "url": "https://www.amctheatres.com/gift-cards/check-balance" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "87654321", + "netloc": "amctheatres", + "path": ".form-full-width-fields >div:nth-child(1)>.gift-card-lookup__input-field ", + "url": "https://www.amctheatres.com/gift-cards/check-balance" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "9753", + "netloc": "amctheatres", + "path": ".form-full-width-fields >div:nth-child(2)>.gift-card-lookup__input-field ", + "url": "https://www.amctheatres.com/gift-cards/check-balance" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".gift-card-lookup__actions >div:nth-child(1)>.gift-card-lookup__item ", + "netloc": "amctheatres", + "url": "https://www.amctheatres.com/showtimes/all/2024-03-25/default/all?utm_source=google&utm_medium=paidsearch&utm_campaign=OnlineTicketing2023&kclid=bc667845-5652-4d22-b01f-539c6d69db0e&gad_source=1" + } + } + ] + }, + { + "index": 10, + "task": "Get the lowest priced women's plus size one piece swimsuit in color black with customer rating of atleat 5 on kohls", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Gender:Womens", + "url": "https://www.kohls.com/catalog/womens-clothing.jsp?CN=Gender:Womens+Department:Clothing&cc=wms-TN1.0-S-women&kls_sbp=87660515555844928264416369496642981542" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Swimsuits", + "url": "https://www.kohls.com/catalog/womens-swimsuits-clothing.jsp?CN=Gender:Womens+Category:Swimsuits+Department:Clothing&BST=6749829:6749828&icid=sl-nav-wmn-clothing-womensclothing-swimwear&kls_sbp=87660515555844928264416369496642981542" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "One-Piece", + "url": "https://www.kohls.com/catalog/womens-onepiece-swimsuits-swimsuits-clothing.jsp?CN=Gender:Womens+Product:One-Piece%20Swimsuits+Category:Swimsuits+Department:Clothing&icid=sl-nav-wmn-clothing-swimwear-onepiece&kls_sbp=87660515555844928264416369496642981542" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "SizeRange:Plus", + "url": "https://www.kohls.com/catalog/womens-plus-onepiece-swimsuits-swimsuits-clothing.jsp?CN=Gender:Womens+SizeRange:Plus+Product:One-Piece%20Swimsuits+Category:Swimsuits+Department:Clothing&BST=4582406&icid=womensonepieceswim-VN-plus&pfm=internal%20campaign%20p13n_control%20Visual%20Nav&kls_sbp=87660515555844928264416369496642981542" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Color:Black", + "url": "https://www.kohls.com/catalog/womens-black-plus-onepiece-swimsuits-swimsuits-clothing.jsp?CN=Gender:Womens+Color:Black+SizeRange:Plus+Product:One-Piece%20Swimsuits+Category:Swimsuits+Department:Clothing&S=1&PPP=48&pfm=internal%20campaign%20visual%20nav%20refine&kls_sbp=87660515555844928264416369496642981542" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "S", + "reference_answer": "4", + "url": "https://www.kohls.com/catalog/womens-black-plus-onepiece-swimsuits-swimsuits-clothing.jsp?CN=Gender:Womens+Color:Black+SizeRange:Plus+Product:One-Piece%20Swimsuits+Category:Swimsuits+Department:Clothing&pfm=internal%20campaign%20visual%20nav%20refine&kls_sbp=87660515555844928264416369496642981542&PPP=48&S=4&sks=true" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "TopRated:5", + "url": "https://www.kohls.com/catalog/5-womens-black-plus-onepiece-swimsuits-swimsuits-clothing.jsp?CN=TopRated:5+Gender:Womens+Color:Black+SizeRange:Plus+Product:One-Piece%20Swimsuits+Category:Swimsuits+Department:Clothing&S=4&PPP=48&pfm=internal%20campaign%20visual%20nav%20refine&kls_sbp=87660515555844928264416369496642981542" + } + } + ] + }, + { + "index": 11, + "task": "Find discussions of community and open one with most replies on flightaware.", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "flightaware.", + "url": "https://www.flightaware.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "discussions.", + "url": "https://discussions.flightaware.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/top", + "url": "https://discussions.flightaware.com/top" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".topic-list-body >tr:nth-child(1)>.main-link.clearfix.topic-list-data >.link-top-line >.title.raw-link.raw-topic-link ", + "netloc": "discussions", + "url": "https://discussions.flightaware.com/top" + } + } + ] + }, + { + "index": 12, + "task": "Find a 2022 Tesla Model 3 on carmax", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "carmax.", + "url": "https://www.carmax.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/model-3", + "url": "https://www.carmax.com/cars/tesla/model-3?year=2022-2023" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "year", + "reference_answer": "2022", + "url": "https://www.carmax.com/cars/tesla/model-3?year=2022" + } + } + ] + }, + { + "index": 13, + "task": "Get the report from the final environmental impact statement for the Jamaica Bus Depot expansion on new.mta.info", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "new.mta.info", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/project/jamaica-bus-depot-expansion", + "url": "https://new.mta.info/project/jamaica-bus-depot-expansion" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".mta-details.mta-mb-500 >div:nth-child(2)>details:nth-child(3)>summary:nth-child(1)>.mta-flex.mta-items-center >.mta-flex-grow ", + "netloc": "new", + "url": "https://new.mta.info/project/jamaica-bus-depot-expansion" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".mta-details.mta-mb-500 >div:nth-child(2)>details:nth-child(3)>div:nth-child(2)>.field.field--name-field-accordion-text.field--type-text-long.field--label-hidden.field--item >ul:nth-child(2)>li:nth-child(2)>a:nth-child(1)", + "netloc": "new", + "url": "https://new.mta.info/project/jamaica-bus-depot-expansion" + } + } + ] + }, + { + "index": 14, + "task": "Search for the lowest millage used Honda Crosstour 2012 to 2013 near 49102 less than $25000 on cargurus.", + "reference_task_length": 17, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cargurus.", + "url": "https://www.cargurus.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "entitySelectingHelper.selectedEntity", + "reference_answer": "d2184", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "zip", + "reference_answer": "49102", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "$25,000", + "netloc": "cargurus", + "path": "#price-range-input-max ", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#year-filter-min ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#year-filter-min ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#year-filter-max ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#year-filter-min >option:nth-child(2)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#Listing-Sort ", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#Listing-Sort >option:nth-child(6)", + "netloc": "cargurus", + "url": "https://www.cargurus.com/Cars/inventorylisting/viewDetailsFilterViewInventoryListing.action?sourceContext=carGurusHomePageModel&entitySelectingHelper.selectedEntity=d2184&zip=49102" + } + } + ] + }, + { + "index": 15, + "task": "Browse men's winter coats in size large that is on clearance on kohls.", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search", + "reference_answer": "Decide whether are searching for mens winter coat", + "url": "https://www.kohls.com/search/clearance.jsp?CN=Promotions:Clearance+Size:L&BL=y&search=mens%20winter%20coat&S=1&PPP=48&pfm=search%20refine&kls_sbp=59288101756704364110307077102020115511" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Promotions:Clearance", + "url": "https://www.kohls.com/search/clearance.jsp?CN=Promotions:Clearance+Size:L&BL=y&search=mens%20winter%20coat&S=1&PPP=48&pfm=search%20refine&kls_sbp=59288101756704364110307077102020115511" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Size:L", + "url": "https://www.kohls.com/search/clearance.jsp?CN=Promotions:Clearance+Size:L&BL=y&search=mens%20winter%20coat&S=1&PPP=48&pfm=search%20refine&kls_sbp=59288101756704364110307077102020115511" + } + } + ] + }, + { + "index": 16, + "task": "Find a walkthrough for the game \"The Legend of Zelda: Breath of the Wild\" on ign.", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ign.", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/games/the-legend-of-zelda-breath-of-the-wild", + "url": "https://www.ign.com/games/the-legend-of-zelda-breath-of-the-wild" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/Walkthrough", + "url": "https://www.ign.com/wikis/the-legend-of-zelda-breath-of-the-wild/Walkthrough" + } + } + ] + }, + { + "index": 17, + "task": "Find an editor's choice review with a score of 10 in the boardgame category on ign", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ign.", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/editors-choice", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#scoreRange ", + "netloc": "ign", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "10,10", + "netloc": "ign", + "url": "https://www.ign.com/editors-choice" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#genre ", + "netloc": "ign", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "board", + "netloc": "ign", + "url": "https://www.ign.com/editors-choice" + } + } + ] + }, + { + "index": 18, + "task": "Sign up for a REI Co-Op membership on rei.", + "reference_task_length": 3, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "rei.", + "url": "https://www.rei.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/membership", + "url": "https://www.rei.com/membership" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".hero__ctas >button:nth-child(2)", + "netloc": "rei", + "url": "https://www.rei.com/membership" + } + } + ] + }, + { + "index": 19, + "task": "Find the weight of baggage allowance for economy class on qatarairways", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "qatarairways.", + "url": "https://www.qatarairways.com/en-us/homepage.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/baggage/allowance", + "url": "https://www.qatarairways.com/en-us/baggage/allowance.html?iid=ALL67670750" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#j-poi-tabs >li:nth-child(1)>.tabmenu-link >span:nth-child(1)", + "netloc": "qatarairways", + "url": "https://www.qatarairways.com/en-us/baggage/allowance.html?iid=ALL67670750" + } + } + ] + }, + { + "index": 20, + "task": "Find flights going from Indira Gandhi to Los Cabos on flightaware", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "flightaware.", + "url": "https://www.flightaware.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "origin", + "reference_answer": "VIDP", + "url": "https://www.flightaware.com/live/findflight?origin=VIDP&destination=MMSD" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "destination", + "reference_answer": "MMSD", + "url": "https://www.flightaware.com/live/findflight?origin=VIDP&destination=MMSD" + } + } + ] + }, + { + "index": 21, + "task": "Tell me information about what identification I need to bring on my trip on amtrak", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "amtrak.", + "url": "https://www.amtrak.com/home" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/planning-booking", + "url": "https://www.amtrak.com/planning-booking.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/tickets-id-safety-security", + "url": "https://www.amtrak.com/planning-booking/tickets-id-safety-security.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/passenger-identification", + "url": "https://www.amtrak.com/planning-booking/tickets-id-safety-security/passenger-identification.html" + } + } + ] + }, + { + "index": 22, + "task": "Browse used Audi cars made before 2015 and sort by lowest price on kbb", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kbb.", + "url": "https://www.kbb.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "intent", + "reference_answer": "used", + "url": "https://www.kbb.com/audi/?intent=used" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "years", + "reference_answer": "1992-2015", + "url": "https://www.kbb.com/car-finder/?intent=used&manufacturers=audi&years=1992-2015" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sort", + "reference_answer": "priceasc", + "url": "https://www.kbb.com/car-finder/?intent=used&manufacturers=audi&years=1992-2015&sort=priceasc" + } + } + ] + }, + { + "index": 23, + "task": "Show crazy credits for the movie \" Prometheus\" on imdb", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "imdb.", + "url": "https://www.imdb.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/tt1446714", + "url": "https://www.imdb.com/title/tt1446714/?ref_=nv_sr_srsg_0_tt_7_nm_1_q_Prometheus" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/crazycredits", + "url": "https://www.imdb.com/title/tt1446714/crazycredits/?tab=cz&ref_=tt_trv_cc" + } + } + ] + }, + { + "index": 24, + "task": "Go to Amazon and add the first wireless headphones to your cart with a budget of $100 or less, that has an active noise-cancelling feature.", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "amazon.", + "url": "https://www.amazon.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "k", + "reference_answer": "Decide whether are searching for wireless headphones", + "url": "https://www.amazon.com/s?k=wireless+headphones&crid=3B1DV9AE18GHL&sprefix=wireless+headphones%2Caps%2C737&ref=nb_sb_noss_1" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "high-price", + "reference_answer": "100", + "url": "https://www.amazon.com/s?k=wireless+headphones&crid=3B1DV9AE18GHL&qid=1716877867&rnid=386442011&sprefix=wireless+headphones%2Caps%2C737&ref=sr_nr_p_36_0_0&low-price=&high-price=100" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#a-autoid-3-announce ", + "netloc": "amazon", + "url": "https://www.amazon.com/s?k=wireless+headphones&rh=n%3A172282%2Cp_36%3A-10000%2Cp_n_feature_two_browse-bin%3A23746030011&dc&ds=v1%3AJ8iOECT4r4taWN1uZiq%2BUlDjD4Nhpo6GPIbBN5RxVAk&crid=3B1DV9AE18GHL&qid=1716877974&rnid=23746028011&sprefix=wireless+headphones%2Caps%2C737&ref=sr_nr_p_n_feature_two_browse-bin_1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "rh", + "reference_answer": "n:21514463011,p_n_feature_two_browse-bin:23746030011", + "url": "https://www.amazon.com/s?k=wireless+headphones&rh=n%3A172282%2Cp_36%3A-10000%2Cp_n_feature_two_browse-bin%3A23746030011&dc&ds=v1%3AJ8iOECT4r4taWN1uZiq%2BUlDjD4Nhpo6GPIbBN5RxVAk&crid=3B1DV9AE18GHL&qid=1716877974&rnid=23746028011&sprefix=wireless+headphones%2Caps%2C737&ref=sr_nr_p_n_feature_two_browse-bin_1" + } + } + ] + }, + { + "index": 25, + "task": "Find Playstation 5 digital edition on gamestop.", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "gamestop.", + "url": "https://www.gamestop.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "q", + "reference_answer": "Decide whether are searching for playstation 5 digital edition", + "url": "https://www.gamestop.com/search/?q=playstation+5+digital+edition&lang=default&start=0&sz=20" + } + } + ] + }, + { + "index": 26, + "task": "Browse Marriott Bonvoy credit cards on marriott", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "marriott.", + "url": "https://www.marriott.com/default.mi" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/earn", + "url": "https://www.marriott.com/loyalty/earn.mi" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/credit-cards", + "url": "https://www.marriott.com/credit-cards.mi" + } + } + ] + }, + { + "index": 27, + "task": "Show me the list of Men's Blazers, Black, Size M on uniqlo.", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "uniqlo.", + "url": "https://www.uniqlo.com/us/en/?gad_source=1&gclid=CjwKCAjwh4-wBhB3EiwAeJsppNr8jYQ6QYvd2U5_tXunKxWnQ2bHc31FSKpbeh5QvlSzgyQpEIxi3hoCMyIQAvD_BwE" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/men", + "url": "https://www.uniqlo.com/us/en/men/outerwear-and-blazers/blazers?path=%2C%2C23380&colorCodes=COL09&sizeCodes=SMA004&categoryId=23380" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/outerwear-and-blazers/blazers", + "url": "https://www.uniqlo.com/us/en/men/outerwear-and-blazers/blazers?path=%2C%2C23380&colorCodes=COL09&sizeCodes=SMA004&categoryId=23380" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "colorCodes", + "reference_answer": "COL09", + "url": "https://www.uniqlo.com/us/en/men/outerwear-and-blazers/blazers?path=%2C%2C23380&colorCodes=COL09&sizeCodes=SMA004&categoryId=23380" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sizeCodes", + "reference_answer": "SMA004", + "url": "https://www.uniqlo.com/us/en/men/outerwear-and-blazers/blazers?path=%2C%2C23380&colorCodes=COL09&sizeCodes=SMA004&categoryId=23380" + } + } + ] + }, + { + "index": 28, + "task": "Add formula 1 to my followed sports on foxsports", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "foxsports.", + "url": "https://www.foxsports.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/motor/formula-1", + "url": "https://www.foxsports.com/motor/formula-1" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".button-favorite.entity.pointer.fs-14 ", + "netloc": "foxsports", + "url": "https://www.foxsports.com/motor/formula-1" + } + } + ] + }, + { + "index": 29, + "task": "Show me the options for a roundtrip leaving from las vegas on flexile dates on the interactive map on united", + "reference_task_length": 12, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "united.", + "url": "https://www.united.com/en/us" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/destination-map", + "url": "https://www.united.com/en/us/destination-map" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether is searching for las vegas", + "netloc": "united", + "path": "#filterOriginInput ", + "url": "https://www.united.com/en/us/destination-map" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".app-components-AutoComplete-Atmos-styles__autoCompleteOption--gDdQn ", + "netloc": "united", + "url": "https://www.united.com/en/us/destination-map" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".atm-c-select.atm-c-select-field__control ", + "netloc": "united", + "url": "https://www.united.com/en/us/destination-map" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".atm-c-select.atm-c-select-field__control ", + "netloc": "united", + "url": "https://www.united.com/en/us/destination-map" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".non-atmos-datepicker >div:nth-child(1)>button:nth-child(2)", + "netloc": "united", + "url": "https://www.united.com/en/us/destination-map" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#flexibleDatesOpt ", + "netloc": "united", + "url": "https://www.united.com/en/us/destination-map" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".app-components-PlacesFilters-HorizontalMapSearchForm-styles__actions--KtDCD >.atm-c-btn.atm-c-btn--primary.atm-c-btn--block ", + "netloc": "united", + "url": "https://www.united.com/en/us/destination-map" + } + } + ] + }, + { + "index": 30, + "task": "Download the e-receipt with the last name Smith and confirmation number X123456989 on budget", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "budget.", + "url": "https://www.budget.com/en/home?ARCIATA=0103202Q&gad_source=1&gclid=Cj0KCQjwjLGyBhCYARIsAPqTz18n_6wf0NJ79MtFaDEctHfaWJrjTzaDNJ-PRPqtxnEJywoNVEYPnL0aAmJyEALw_wcB" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/reservation/get-e-receipt", + "url": "https://www.budget.com/en/reservation/get-e-receipt" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Smith", + "netloc": "budget", + "path": "#lastName ", + "url": "https://www.budget.com/en/reservation/get-e-receipt" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "X123456989", + "netloc": "budget", + "path": "#Confirmation-no ", + "url": "https://www.budget.com/en/reservation/get-e-receipt" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".VMC-button >.btn.btn-red ", + "netloc": "budget", + "url": "https://www.budget.com/en/reservation/get-e-receipt" + } + } + ] + }, + { + "index": 31, + "task": "show the Life Jackets priced between $100 and $200 on cabelas", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cabelas.", + "url": "https://www.cabelas.com/shop/en#" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/life-jackets", + "url": "https://www.cabelas.com/l/life-jackets" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nf-offerprice=100..200", + "url": "https://www.cabelas.com/l/life-jackets#nf-offerprice=100..200" + } + } + ] + }, + { + "index": 32, + "task": "Find 32\" Curved monitor and add the third one to the wish list on newegg", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "newegg.", + "url": "https://www.newegg.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "d", + "reference_answer": "Decide whether is searching for 32\" curved monitor", + "url": "https://www.newegg.com/p/pl?d=32%22+curved+monitor" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".list-wrap >div:nth-child(2)>div:nth-child(3)>.item-container >.item-img >.item-quick-action-container >.quick-action ", + "netloc": "newegg", + "url": "https://www.newegg.com/p/pl?d=32%22+curved+monitor" + } + } + ] + }, + { + "index": 33, + "task": "Find the list of all neighborhood maps for Brooklyn on new.mta.info", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "new.mta.info", + "url": "https://new.mta.info/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/subway/mta-neighborhood-maps", + "url": "https://new.mta.info/maps/subway/mta-neighborhood-maps" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/brooklyn", + "url": "https://new.mta.info/maps/subway/mta-neighborhood-maps/brooklyn" + } + } + ] + }, + { + "index": 34, + "task": "Find me the deals available for the Great escape park on sixflags", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sixflags.", + "url": "https://www.sixflags.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/greatescape", + "url": "https://www.sixflags.com/greatescape" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/store/tickets", + "url": "https://www.sixflags.com/greatescape/store/tickets" + } + } + ] + }, + { + "index": 35, + "task": "Find the movie Donnie Darko and show its complete cast on imdb", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "imdb.", + "url": "https://www.imdb.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/tt0246578", + "url": "https://www.imdb.com/title/tt0246578/?ref_=nv_sr_srsg_0_tt_8_nm_0_q_Donnie%2520Darko" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/fullcredits", + "url": "https://www.imdb.com/title/tt0246578/fullcredits?ref_=tt_cl_sm" + } + } + ] + }, + { + "index": 36, + "task": "Get the most reviewed shopping store that accepts apple pay in Central New York on yelp.", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yelp.", + "url": "https://www.yelp.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sortby", + "reference_answer": "review_count", + "url": "https://www.yelp.com/search?find_desc=Shopping&find_loc=Central+New+York%2C+NY&attrs=BusinessAcceptsApplePay&sortby=review_count" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "attrs", + "reference_answer": "BusinessAcceptsApplePay", + "url": "https://www.yelp.com/search?find_desc=Shopping&find_loc=Central+New+York%2C+NY&attrs=BusinessAcceptsApplePay&sortby=review_count" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_loc", + "reference_answer": "Decide whether are searching for Central New York", + "url": "https://www.yelp.com/search?find_desc=Shopping&find_loc=Central+New+York%2C+NY&attrs=BusinessAcceptsApplePay&sortby=review_count" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "find_desc", + "reference_answer": "Shopping", + "url": "https://www.yelp.com/search?find_desc=Shopping&find_loc=Central+New+York%2C+NY&attrs=BusinessAcceptsApplePay&sortby=review_count" + } + } + ] + }, + { + "index": 37, + "task": "Find me the cheapest external HD for an Xbox One on gamestop.", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "gamestop.", + "url": "https://www.gamestop.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "q", + "reference_answer": "Decide whether are searching for external HD", + "url": "https://www.gamestop.com/search/?q=external+HD&lang=default&start=0&sz=20" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "prefv1", + "reference_answer": "Xbox", + "url": "https://www.gamestop.com/search/?prefn1=platform&prefv1=Xbox&q=external+HD&view=new&srule=price-low-to-high" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "srule", + "reference_answer": "price-low-to-high", + "url": "https://www.gamestop.com/search/?prefn1=platform&prefv1=Xbox&q=external+HD&view=new&srule=price-low-to-high" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#product-grid-container >div:nth-child(3)>.product.grid-tile >.product-tile.product-detail.gme-card.gme-card--shadow.product-tile-render.loaded >a:nth-child(1)", + "netloc": "gamestop", + "url": "https://www.gamestop.com/search/?prefn1=platform&prefv1=Xbox&q=external+HD&view=new&srule=price-low-to-high" + } + } + ] + }, + { + "index": 38, + "task": "Search for job in Miami Florida in Human resources on target.", + "reference_task_length": 14, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "target.", + "url": "https://www.target.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/search-jobs", + "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#category-toggle ", + "netloc": "jobs", + "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#category-filters-section >ul:nth-child(2)>li:nth-child(8)>label:nth-child(2)>.filter__facet-name ", + "netloc": "jobs", + "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#country-toggle ", + "netloc": "jobs", + "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#country-filters-section >ul:nth-child(2)>li:nth-child(3)>label:nth-child(2)>.filter__facet-name ", + "netloc": "jobs", + "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#region-toggle ", + "netloc": "jobs", + "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#region-filters-section >ul:nth-child(2)>li:nth-child(7)>label:nth-child(2)>.filter__facet-name ", + "netloc": "jobs", + "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#city-toggle ", + "netloc": "jobs", + "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#city-filters-section >ul:nth-child(2)>li:nth-child(7)>label:nth-child(2)>.filter__facet-name ", + "netloc": "jobs", + "url": "https://jobs.target.com/search-jobs?acm=8355664&alrpm=ALL&ascf=[%7B%22key%22:%22ALL%22,%22value%22:%22%22%7D]" + } + } + ] + }, + { + "index": 39, + "task": "Show me products from Calvin Klein brand menswear list on kohls.", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Gender:Mens", + "url": "https://www.kohls.com/catalog/mens-calvin-klein-clothing.jsp?CN=Gender:Mens+Brand:Calvin%20Klein+Department:Clothing&S=1&PPP=48&kls_sbp=59288101756704364110307077102020115511&pfm=internal%20campaign%20refine" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Brand:Calvin Klein", + "url": "https://www.kohls.com/catalog/mens-calvin-klein-clothing.jsp?CN=Gender:Mens+Brand:Calvin%20Klein+Department:Clothing&S=1&PPP=48&kls_sbp=59288101756704364110307077102020115511&pfm=internal%20campaign%20refine" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Department:Clothing", + "url": "https://www.kohls.com/catalog/mens-calvin-klein-clothing.jsp?CN=Gender:Mens+Brand:Calvin%20Klein+Department:Clothing&S=1&PPP=48&kls_sbp=59288101756704364110307077102020115511&pfm=internal%20campaign%20refine" + } + } + ] + }, + { + "index": 40, + "task": "Contact the support service on spothero", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "spothero.", + "url": "https://spothero.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/contact", + "url": "https://spothero.com/contact" + } + } + ] + }, + { + "index": 41, + "task": "Find a flight from Dresden to anywhere under $100 on ryanair", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ryanair.", + "url": "https://www.ryanair.com/us/en" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/cheap-flights", + "url": "https://www.ryanair.com/us/en/cheap-flights" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "budget", + "reference_answer": "100", + "url": "https://www.ryanair.com/us/en/cheap-flights/?from=DRS&out-from-date=2024-05-28&out-to-date=2025-05-28&budget=100" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "from", + "reference_answer": "DRS", + "url": "https://www.ryanair.com/us/en/cheap-flights/?from=DRS&out-from-date=2024-05-28&out-to-date=2025-05-28&budget=100" + } + } + ] + }, + { + "index": 42, + "task": "Look for hair salon in San Diego on yellowpages", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yellowpages.", + "url": "https://www.yellowpages.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "geo_location_terms", + "reference_answer": "Decide whether are searching for San Diego", + "url": "https://www.yellowpages.com/search?search_terms=hair+salon&geo_location_terms=San+Diego%2C+CA" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search_terms", + "reference_answer": "Decide whether are searching for hair salon", + "url": "https://www.yellowpages.com/search?search_terms=hair+salon&geo_location_terms=San+Diego%2C+CA" + } + } + ] + }, + { + "index": 43, + "task": "Find the closest in-store Gamestop location within 50 miles of 21122 on gamestop.", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "gamestop.", + "url": "https://www.gamestop.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#header-redesign >div:nth-child(1)>div:nth-child(1)>button:nth-child(1)>span:nth-child(2)", + "netloc": "gamestop", + "url": "https://www.gamestop.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#main-header-container >header:nth-child(1)>nav:nth-child(1)>div:nth-child(2)>.container >.row >nav:nth-child(1)>.menu-group >.left-align-nav >.mobile-store-container >.text-decoration-none.js-my-store-link >.mobile-store >.col-10 >.store-header >.store-name ", + "netloc": "gamestop", + "url": "https://www.gamestop.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".btn-get-in-store-inventory.change-store-button.change-store ", + "netloc": "gamestop", + "url": "https://www.gamestop.com/search/?store=3878" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "21122", + "netloc": "gamestop", + "path": "#store-postal-code ", + "url": "https://www.gamestop.com/search/?store=3878" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#radius ", + "netloc": "gamestop", + "url": "https://www.gamestop.com/search/?store=3878" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#radius >option:nth-child(3)", + "netloc": "gamestop", + "url": "https://www.gamestop.com/search/?store=3878" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".col-12.p-0 >button:nth-child(2)", + "netloc": "gamestop", + "url": "https://www.gamestop.com/search/?store=3878" + } + } + ] + }, + { + "index": 44, + "task": "Find information about what I should do when I lose an item on a bus on us.megabus", + "reference_task_length": 3, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "megabus.", + "url": "https://us.megabus.com/?utm_source=google&utm_medium=cpc&utm_campaign=da|megabusus|ggl|brand|ext|trademark&utm_content=457655478662&utm_term=mega%20bus&gad_source=1&gclid=Cj0KCQjwmMayBhDuARIsAM9HM8fCPgvwPIA4e_sHoFbbwfHreiMfR5dd8kni3sV4lB8lAIzLFgUFbhQaAkaXEALw_wcB" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".row >div:nth-child(1)>div:nth-child(4)>.blockContent >.panel.panel-default >.panel-heading >.panel-title >.collapsed ", + "netloc": "us", + "url": "https://us.megabus.com/help" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/help", + "url": "https://us.megabus.com/help" + } + } + ] + }, + { + "index": 45, + "task": "Select a E-Gift card and add to cart with Best Wishes as a message. Send it to James Smith with email abc@abc.com on rei", + "reference_task_length": 18, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "rei.", + "url": "https://www.rei.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/gift-card/purchase", + "url": "https://www.rei.com/gift-card/purchase" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "James Smith", + "netloc": "rei", + "path": ".giftcard-form-input__body >div:nth-child(3)>div:nth-child(2)>div:nth-child(1)>input:nth-child(1)", + "url": "https://www.rei.com/gift-card/purchase" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "abc@abc.com", + "netloc": "rei", + "path": ".giftcard-form-input__body >div:nth-child(6)>div:nth-child(2)>div:nth-child(1)>input:nth-child(1)", + "url": "https://www.rei.com/gift-card/purchase" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "abc@abc.com", + "netloc": "rei", + "path": "#cdr-id-9c3ecd ", + "url": "https://www.rei.com/gift-card/purchase" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether are describing best wishes", + "netloc": "rei", + "path": "#cdr-id-cb8de2 ", + "url": "https://www.rei.com/gift-card/purchase" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".giftcard-form__cta >button:nth-child(2)", + "netloc": "rei", + "url": "https://www.rei.com/gift-card/purchase" + } + } + ] + }, + { + "index": 46, + "task": "Add a front load washing machine under $800 to the cart on menards.", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "menards.", + "url": "https://www.menards.com/main/home.html" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search", + "reference_answer": "Decide whether are searching for front load washing machine", + "url": "https://www.menards.com/main/search.html?search=front+load+washing+machine&ot=SS" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "priceMax_facet", + "reference_answer": "800", + "url": "https://www.menards.com/main/search.html?search=front+load+washing+machine&priceMax_facet=800" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "ADD TO CART", + "netloc": "menards", + "url": "https://www.menards.com/main/search.html?search=front+load+washing+machine&priceMax_facet=800" + } + } + ] + }, + { + "index": 47, + "task": "Search for a full-time job in sales in Springfield and apply for the most recent job on carmax", + "reference_task_length": 14, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "carmax.", + "url": "https://www.carmax.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "careers.", + "url": "https://careers.carmax.com/us/en" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/search-results", + "url": "https://careers.carmax.com/us/en/search-results" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#JobCategoryBody >.panel-body.au-target >.phs-facet-results >ul:nth-child(1)>li:nth-child(13)>.phs-checkbox.input-check-group.au-target ", + "netloc": "careers", + "url": "https://careers.carmax.com/us/en/search-results" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#CityBody >.panel-body.au-target >.phs-facet-results >ul:nth-child(1)>li:nth-child(176)>.phs-checkbox.input-check-group.au-target ", + "netloc": "careers", + "url": "https://careers.carmax.com/us/en/search-results" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#JobTypeBody >.panel-body.au-target >.phs-facet-results >ul:nth-child(1)>li:nth-child(2)>.phs-checkbox.input-check-group.au-target ", + "netloc": "careers", + "url": "https://careers.carmax.com/us/en/search-results" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#sortselect >option:nth-child(2)", + "netloc": "careers", + "url": "https://careers.carmax.com/us/en/search-results" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".au-target >li:nth-child(1)>.information >span:nth-child(2)>.au-target >.job-title >span:nth-child(1)", + "netloc": "careers", + "url": "https://careers.carmax.com/us/en/search-results" + } + } + ] + }, + { + "index": 48, + "task": "Browse coffee makers that are rated 5 stars on kohls", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search", + "reference_answer": "Decide whether are searching for coffee maker", + "url": "https://www.kohls.com/search.jsp?submit-search=web-regular&search=coffee+maker&kls_sbp=34524031611978259241165260194179142249" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "TopRated:5", + "url": "https://www.kohls.com/search/5.jsp?CN=TopRated:5&search=coffee%20maker&S=1&PPP=48&kls_sbp=87660515555844928264416369496642981542&pfm=search%20refine" + } + } + ] + }, + { + "index": 49, + "task": "Browse spider-man toys for kids and sort by lowest price on kohls", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search", + "reference_answer": "Decide whether is searching for spiderman toy", + "url": "https://www.kohls.com/search.jsp?submit-search=web-ta-keyword&search=spiderman+toys&kls_sbp=59987601549248944582634263361106786813" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "CN", + "reference_answer": "AgeAppropriate:Kids", + "url": "https://www.kohls.com/search/kids.jsp?CN=AgeAppropriate:Kids&search=spiderman%20toys&S=1&PPP=48&kls_sbp=87660515555844928264416369496642981542&pfm=search%20refine" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "S", + "reference_answer": "4", + "url": "https://www.kohls.com/search/kids.jsp?CN=AgeAppropriate:Kids&search=spiderman%20toys&kls_sbp=87660515555844928264416369496642981542&pfm=search%20refine&PPP=48&S=4&sks=true" + } + } + ] + }, + { + "index": 50, + "task": "Set the first recommended song on the homepage as a current obsession on last.fm", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "last.fm", + "url": "https://www.last.fm/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#genretodayitem0 >.genretodaybar_wrap >.genretodaybar >.genretodaybar_scroll >.genretoday_track >span:nth-child(1)>a:nth-child(1)", + "netloc": "last", + "url": "https://www.last.fm/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#mantle_skin >header:nth-child(2)>div:nth-child(1)>div:nth-child(2)>div:nth-child(5)>div:nth-child(2)>div:nth-child(3)>button:nth-child(1)", + "netloc": "last", + "url": "https://www.last.fm/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#header-more-actions-41dc55af-ce49-4a96-89f6-47247a5bdf1c >li:nth-child(1)>.dropdown-menu-clickable-item.more-item--obsession ", + "netloc": "last", + "url": "https://www.last.fm/" + } + } + ] + }, + { + "index": 51, + "task": "Browse the page with event planning tips on eventbrite", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "eventbrite.", + "url": "https://www.eventbrite.com/organizer/overview/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "category/event-planning", + "url": "https://www.eventbrite.com/blog/category/event-planning/" + } + } + ] + }, + { + "index": 52, + "task": "What are the family rides available at frontier city? on sixflags", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sixflags.", + "url": "https://www.sixflags.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/frontiercity/", + "url": "https://www.sixflags.com/frontiercity/store/tickets" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/family-rides", + "url": "https://www.sixflags.com/frontiercity/things-to-do/rides/family-rides#family-ride" + } + } + ] + }, + { + "index": 53, + "task": "Find a store in Tempe, Arizona, make it my store, and then visit the store page and see whats happening in store on ikea.", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ikea.", + "url": "https://www.ikea.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/stores", + "url": "https://www.ikea.com/us/en/stores/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#google-maps-store-select ", + "netloc": "ikea", + "url": "https://www.ikea.com/us/en/stores/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#google-maps-store-select >option:nth-child(3)", + "netloc": "ikea", + "url": "https://www.ikea.com/us/en/stores/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".localstore-buttons__container >button:nth-child(2)>span:nth-child(1)", + "netloc": "ikea", + "url": "https://www.ikea.com/us/en/stores/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "events/ikea-tempe-az/", + "url": "https://www.ikea.com/us/en/stores/events/ikea-tempe-az/" + } + } + ] + }, + { + "index": 54, + "task": "Add to my wish list the highest rated activity in Amsterdam on viator", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "viator.", + "url": "https://www.viator.com/?m=28353&supag=122704388281&supca=12512866044&supsc=kwd-270303623&supai=504932286140&supdv=c&supnt=g&suplp=9069536&supli=&supti=kwd-270303623&tsem=true&supci=kwd-270303623&supap1=&supap2=&supfi=&gad_source=1&gclid=Cj0KCQjwxqayBhDFARIsAANWRnTu9xrLeHE6TC3-BDjL2mm8xSrtUXbCs-5JXo32-w7N5UrlIFkVfF8aAvn4EALw_wcB" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/Amsterdam/", + "url": "https://www.viator.com/Amsterdam/d525-ttd" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sortType", + "reference_answer": "rating", + "url": "https://www.viator.com/Amsterdam/d525-ttd?sortType=rating" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".productListCardWrapper__VlW0.borderTopBottom__1o7N >.imageWrapper__1McA >.imageContainer__1Y4g >.imageFeaturesContainer__JRmJ >.addToWishlistBtn__2VnP >.button__11F2 >.wishlistBadge__2XdE >.heart__1EjT >div:nth-child(1)>svg:nth-child(1)", + "netloc": "viator", + "url": "https://www.viator.com/Amsterdam/d525-ttd?sortType=rating" + } + } + ] + }, + { + "index": 55, + "task": "Check the current standings for MLS on foxsports", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "foxsports.", + "url": "https://www.foxsports.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/soccer/mls", + "url": "https://www.foxsports.com/soccer/mls" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/standings", + "url": "https://www.foxsports.com/soccer/mls/standings" + } + } + ] + }, + { + "index": 56, + "task": "Find the statistics of the player Cristiano Ronaldo for the national team of Portugal in all the world cups in espn.com.", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "espn.", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "_/id/22774", + "url": "https://www.espn.com/soccer/player/_/id/22774/cristiano-ronaldo" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/team/482", + "url": "https://www.espn.com/soccer/player/stats/_/id/22774/team/482/type/fifa.world" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/fifa.world", + "url": "https://www.espn.com/soccer/player/stats/_/id/22774/team/482/type/fifa.world" + } + } + ] + }, + { + "index": 57, + "task": "Show schedule for East Boston Ferry outbound Lewis Wharf and Long Wharf (North) stop on mbta", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "mbta.", + "url": "https://www.mbta.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "query", + "reference_answer": "Decide whether are searching for East Boston Ferry", + "url": "https://www.mbta.com/schedules/Boat-EastBoston/timetable?from=search-route--ferry&query=East+Boston+Ferry" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/line", + "url": "https://www.mbta.com/schedules/Boat-EastBoston/line" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "schedule_finder[origin]", + "reference_answer": "Boat-Long", + "url": "https://www.mbta.com/schedules/Boat-EastBoston/line?schedule_finder%5Bdirection_id%5D=0&schedule_finder%5Borigin%5D=Boat-Long" + } + } + ] + }, + { + "index": 58, + "task": "Find technical specs for the latest Macbook Air on apple", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "apple.", + "url": "https://www.apple.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/macbook-air/", + "url": "https://www.apple.com/macbook-air/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/specs/", + "url": "https://www.apple.com/macbook-air/specs/" + } + } + ] + }, + { + "index": 59, + "task": "Find out what popular events are being held this weekend in the category performing and visual arts near Chester, UK on eventbrite", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "eventbrite.", + "url": "https://www.eventbrite.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/united-kingdom--chester", + "url": "https://www.eventbrite.com/d/united-kingdom--chester/arts--events--this-weekend/?page=1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/arts--", + "url": "https://www.eventbrite.com/d/united-kingdom--chester/arts--events--this-weekend/?page=1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "events", + "url": "https://www.eventbrite.com/d/united-kingdom--chester/arts--events--this-weekend/?page=1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "--this-weekend", + "url": "https://www.eventbrite.com/d/united-kingdom--chester/arts--events--this-weekend/?page=1" + } + } + ] + }, + { + "index": 60, + "task": "Find the last game of the season for the Toronto Raptors on sports.yahoo.", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sports.yahoo.", + "url": "https://sports.yahoo.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/nba", + "url": "https://sports.yahoo.com/nba/teams/toronto/schedule" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/toronto", + "url": "https://sports.yahoo.com/nba/teams/toronto/schedule" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/schedule/", + "url": "https://sports.yahoo.com/nba/teams/toronto/schedule" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#Col2-5-TeamSchedule-Proxy > div > div > div:nth-child(4) > div", + "netloc": "sports", + "url": "https://sports.yahoo.com/nba/teams/toronto/" + } + } + ] + }, + { + "index": 61, + "task": "Find solutions for Airport and fill the contact form with message to \"Send Brochure\". Contact information John Smith. Email: abc@abc.com. Phone #: 88889999 on flightaware", + "reference_task_length": 18, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "flightaware.", + "url": "https://www.flightaware.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/airports", + "url": "https://industry.flightaware.com/airports" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "John", + "netloc": "industry", + "path": ".hs_firstname.hs-firstname.hs-fieldtype-text.field.hs-form-field >.input >.hs-input ", + "url": "https://industry.flightaware.com/airports" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Smith", + "netloc": "industry", + "path": ".hs_lastname.hs-lastname.hs-fieldtype-text.field.hs-form-field >.input >.hs-input ", + "url": "https://industry.flightaware.com/airports" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "abc@abc.com", + "netloc": "industry", + "path": ".hs_email.hs-email.hs-fieldtype-text.field.hs-form-field >.input >.hs-input ", + "url": "https://industry.flightaware.com/airports" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "88889999", + "netloc": "industry", + "path": ".hs_phone.hs-phone.hs-fieldtype-phonenumber.field.hs-form-field >.input >.hs-input ", + "url": "https://industry.flightaware.com/airports" + } + }, + { + "match_function_name": "element_value_semantic_match", + "content": { + "reference_answer": "Decide whether the content is to send brochure", + "netloc": "industry", + "path": ".hs-input.hs-fieldtype-textarea ", + "url": "https://industry.flightaware.com/airports" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".hs-button.primary.large ", + "netloc": "industry", + "url": "https://industry.flightaware.com/airports" + } + } + ] + }, + { + "index": 62, + "task": "Find Toyota Corolla from the year 2018 to 2023 in red color on carmax.", + "reference_task_length": 13, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "carmax.", + "url": "https://www.carmax.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "year", + "reference_answer": "2018", + "url": "https://www.carmax.com/cars/toyota/corolla/corolla-cross/corolla-cross-hybrid/corolla-hatchback/corolla-hybrid/corolla-im/red?year=2018-2023" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/corolla", + "url": "https://www.carmax.com/cars/toyota/corolla/corolla-cross/corolla-cross-hybrid/corolla-hatchback/corolla-hybrid/corolla-im/red?year=2018-2023" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/red", + "url": "https://www.carmax.com/cars/toyota/corolla/corolla-cross/corolla-cross-hybrid/corolla-hatchback/corolla-hybrid/corolla-im/red?year=2018-2023" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "year", + "reference_answer": "2023", + "url": "https://www.carmax.com/cars/toyota/corolla/corolla-cross/corolla-cross-hybrid/corolla-hatchback/corolla-hybrid/corolla-im/red?year=2018-2023" + } + } + ] + }, + { + "index": 63, + "task": "Go to nfl.com and find the current NFL standings for the AFC East division and go to the page of which team is in first place.", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nfl.", + "url": "https://www.nfl.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/standings/", + "url": "https://www.nfl.com/standings/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".d3-l-grid--inner >div:nth-child(1)>.d3-o-table--horizontal-scroll >table:nth-child(1)>tbody:nth-child(3)>tr:nth-child(1)>td:nth-child(1)>.d3-o-club-info >.d3-o-club-shortname ", + "netloc": "nfl", + "url": "https://www.nfl.com/standings/" + } + } + ] + }, + { + "index": 64, + "task": "Check the national cafe menu on amtrak", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "amtrak.", + "url": "https://www.amtrak.com/home.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/onboard", + "url": "https://www.amtrak.com/onboard.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/meals-dining", + "url": "https://www.amtrak.com/onboard/meals-dining.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/cafe-car", + "url": "https://www.amtrak.com/onboard/meals-dining/cafe-car.html" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".show__tablet-and-above.text-with-image__text-block >ul:nth-child(5)>li:nth-child(1)>a:nth-child(1)>.secondary-link-dark ", + "netloc": "amtrak", + "url": "https://www.amtrak.com/onboard/meals-dining/cafe-car.html" + } + } + ] + }, + { + "index": 65, + "task": "View all of the Most Popular TV on rottentomatoes.", + "reference_task_length": 3, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "rottentomatoes.", + "url": "https://www.rottentomatoes.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/browse/tv_series_browse/sort:popular", + "url": "https://www.rottentomatoes.com/browse/tv_series_browse/sort:popular" + } + } + ] + }, + { + "index": 66, + "task": "Compare Apple watches and learn more about the ultra version on apple", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "apple.", + "url": "https://www.apple.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/apple-watch-ultra-2", + "url": "https://www.apple.com/watch/compare/" + } + } + ] + }, + { + "index": 67, + "task": "Search for the playlists \"Pop Workout mix\" and filtered by tag #Dance & edm on soundcloud.", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "soundcloud.", + "url": "https://soundcloud.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "q", + "reference_answer": "Decide whether are searching for pop workout mix", + "url": "https://soundcloud.com/search/sets?q=pop%20workout%20mix&filter.genre=dance%20%26%20edm" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "filter.genre", + "reference_answer": "dance & edm", + "url": "https://soundcloud.com/search/sets?q=pop%20workout%20mix&filter.genre=dance%20%26%20edm" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/search/sets", + "url": "https://soundcloud.com/search/sets?q=pop%20workout%20mix&filter.genre=dance%20%26%20edm" + } + } + ] + }, + { + "index": 68, + "task": "Check in with confirmation number 10987654 for my flight with last name Lewis on united.", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "united.", + "url": "https://www.united.com/en/gb" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#checkInTab >h2:nth-child(2)>div:nth-child(1)>div:nth-child(1)", + "netloc": "united", + "url": "https://www.united.com/en/gb" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "10987654", + "netloc": "united", + "path": "#flightCheckInConfNumber ", + "url": "https://www.united.com/en/gb" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Lewis", + "netloc": "united", + "path": "#flightCheckInLastName ", + "url": "https://www.united.com/en/gb" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#formSubmitBtn >.atm-c-btn__text ", + "netloc": "united", + "url": "https://www.united.com/en/gb" + } + } + ] + }, + { + "index": 69, + "task": "Check balance of gift card no. 1234567 with pin 0001 on marriott", + "reference_task_length": 10, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "marriott.", + "url": "https://www.marriott.com/default.mi?nst=paid&cid=PAI_GLB0004EMN_GLE000AE45_GLF000KWXG&ppc=ppc&pId=nancppc&gad_source=1&gclid=CjwKCAjwnv-vBhBdEiwABCYQA-XQ4aocWC7L52fCOIGs9Z0y465fq_nZqFinJ2aYoz-7qg-GfbXY2hoCtPEQAvD_BwE&gclsrc=aw.ds" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "gifts.", + "url": "https://gifts.marriott.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/check-balance/", + "url": "https://gifts.marriott.com/check-balance/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "1234567", + "netloc": "gifts", + "path": "#cws_txt_gcNum ", + "url": "https://gifts.marriott.com/check-balance/" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "0001", + "netloc": "gifts", + "path": "#cws_txt_gcPin ", + "url": "https://gifts.marriott.com/check-balance/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#cws_btn_checkGcBalance ", + "netloc": "gifts", + "url": "https://gifts.marriott.com/check-balance/" + } + } + ] + }, + { + "index": 70, + "task": "Search Cantonese food near Chicago, IL that are open now and priced $$ on yelp", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yelp.", + "url": "https://www.yelp.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_desc", + "reference_answer": "Decide whether are searching for Cantonese food", + "url": "https://www.yelp.com/search?find_desc=Cantonese+food&find_loc=Chicago%2C+IL" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "find_loc", + "reference_answer": "Decide whether are searching for Chicago, IL", + "url": "https://www.yelp.com/search?find_desc=Cantonese+food&find_loc=Chicago%2C+IL" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "attrs", + "reference_answer": "RestaurantsPriceRange2.2", + "url": "https://www.yelp.com/search?find_desc=Cantonese+food&find_loc=Chicago%2C+IL&attrs=RestaurantsPriceRange2.2" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "open_now", + "reference_answer": "724", + "url": "https://www.yelp.com/search?find_desc=Cantonese+food&find_loc=Chicago%2C+IL&attrs=RestaurantsPriceRange2.2&open_now=724" + } + } + ] + }, + { + "index": 71, + "task": "Get the frozen vegan cheese pizza between 5 to 10 usd on target.", + "reference_task_length": 17, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "target.", + "url": "https://www.target.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "Zgulk4", + "url": "https://www.target.com/c/frozen-pizza-foods-grocery/-/N-5xsz4Zal25lfgbks1Z76zjqZgulk4?moveTo=product-list-grid" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "Z76zjq", + "url": "https://www.target.com/c/frozen-pizza-foods-grocery/-/N-5xsz4Zal25lfgbks1Z76zjqZgulk4?moveTo=product-list-grid" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "Zal25lfgbks1", + "url": "https://www.target.com/c/frozen-pizza-foods-grocery/-/N-5xsz4Zal25lfgbks1Z76zjqZgulk4?moveTo=product-list-grid" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/frozen-pizza-foods-grocery/", + "url": "https://www.target.com/c/frozen-pizza-foods-grocery/-/N-5xsz4Zal25lfgbks1Z76zjqZgulk4?moveTo=product-list-grid" + } + } + ] + }, + { + "index": 72, + "task": "Find bluetooth vertical mouse with most reviews and add two to my shopping cart on newegg.", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "newegg.", + "url": "https://www.newegg.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "d", + "reference_answer": "Decide whether are searching for bluetooth vertical mouse", + "url": "https://www.newegg.com/p/pl?d=bluetooth+vertical+mouse&Order=5" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "Order", + "reference_answer": "5", + "url": "https://www.newegg.com/p/pl?d=bluetooth+vertical+mouse&Order=5" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".qty-box-plus.fas.fa-plus ", + "netloc": "newegg", + "url": "https://www.newegg.com/kensington-k72356us/p/0TP-001D-00518" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".nav-col >.btn.btn-primary.btn-wide ", + "netloc": "newegg", + "url": "https://www.newegg.com/kensington-k72356us/p/0TP-001D-00518" + } + } + ] + }, + { + "index": 73, + "task": "Find the page with instructions on how to return orders online on uniqlo", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "uniqlo.", + "url": "https://www.uniqlo.com/us/en/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/Returns-and-Exchanges", + "url": "https://faq-us.uniqlo.com/articles/en_US/FAQ/Returns-and-Exchanges/?l=en_US&c=category_uq_us%3AUQ_C1_7&fs=Search&pn=1" + } + } + ] + }, + { + "index": 74, + "task": "Find the weekly ad for the store closest to zip code 82718 on menards", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "menards.", + "url": "https://www.menards.com/main/home.html" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "store", + "reference_answer": "3325", + "url": "https://www.menards.com/store-details/store.html?store=3325" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/store-details/store", + "url": "https://www.menards.com/store-details/store.html?store=3325" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/main/flyerselectstore", + "url": "https://www.menards.com/main/flyerselectstore.html" + } + } + ] + }, + { + "index": 75, + "task": "Add a Swivel vacuum under $150 to my cart on menards.", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "menards.", + "url": "https://www.menards.com/main/home.html" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "search", + "reference_answer": "Decide whether are searching for swivel vacuum", + "url": "https://www.menards.com/main/search.html?search=swivel+vacuum&priceMax_facet=150" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "priceMax_facet", + "reference_answer": "150", + "url": "https://www.menards.com/main/search.html?search=swivel+vacuum&priceMax_facet=150" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#searchItems >div:nth-child(4)>div:nth-child(1)>.details >.pb-4.row >.col-12.pt-3.xs-single-col-9 >div:nth-child(2)>div:nth-child(1)>.btn.btn-block.btn-outline-primary.d-print-none ", + "netloc": "menards", + "url": "https://www.menards.com/main/search.html?search=swivel+vacuum&priceMax_facet=150" + } + } + ] + }, + { + "index": 76, + "task": "What are the food festivals happening in Colorado This weekend? on eventbrite", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "eventbrite.", + "url": "https://www.eventbrite.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/united-states--colorado", + "url": "https://www.eventbrite.com/d/united-states--colorado/food-festival/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/food-festival", + "url": "https://www.eventbrite.com/d/united-states--colorado/food-festival/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/events--this-weekend", + "url": "https://www.eventbrite.com/d/united-states--colorado/food-festival/" + } + } + ] + }, + { + "index": 77, + "task": "Explore the trending playlists,filter by tag #Electronics, play the first playlist and like it on soundcloud", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "soundcloud.", + "url": "https://soundcloud.com/" + } + }, + { + "match_function_name": "url_semantic_match", + "content": { + "key": "q", + "reference_answer": "Decide whether is searching for trending playlists", + "url": "https://soundcloud.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/sets", + "url": "https://soundcloud.com/search/sets?q=trending%20playlists" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "filter.genre", + "reference_answer": "electronic", + "url": "https://soundcloud.com/search/sets?q=trending%20playlists&filter.genre=electronic" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".lazyLoadingList__list.sc-list-nostyle.sc-clearfix >li:nth-child(2)>.searchItem >.sound.searchItem__trackItem.playlist.streamContext >.sound__body >.sound__content >div:nth-child(1)>.soundTitle.sc-clearfix.sc-hyphenate.sc-type-h2.sc-text-h4.streamContext >.soundTitle__titleContainer >.soundTitle__playButton >.sc-button-play.playButton.sc-button.sc-button-xlarge ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/search/sets?q=trending%20playlists&filter.genre=electronic" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".sound.searchItem__trackItem.playlist.streamContext.playing >.sound__body >.sound__content >.sound__footer.g-all-transitions-300 >div:nth-child(2)>.soundActions.sc-button-toolbar.soundActions__small >.sc-button-group.sc-button-group-small >.sc-button-like.sc-button-secondary.sc-button.sc-button-small.sc-button-responsive ", + "netloc": "soundcloud", + "url": "https://soundcloud.com/search/sets?q=trending%20playlists&filter.genre=electronic" + } + } + ] + }, + { + "index": 78, + "task": "Browse through the Las Vegas city guide and find message services nearest to Henderson, the service provider should have a BBB rating of A+ on yellowpages", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yellowpages.", + "url": "https://www.yellowpages.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/las-vegas-nv", + "url": "https://www.yellowpages.com/las-vegas-nv" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "refinements", + "reference_answer": "bbb_grade_display:1", + "url": "https://www.yellowpages.com/las-vegas-nv/massage-therapists?refinements=bbb_grade_display%3A1&refinements=neighborhood%3AHenderson" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "refinements", + "reference_answer": "neighborhood:Henderson", + "url": "https://www.yellowpages.com/las-vegas-nv/massage-therapists?refinements=bbb_grade_display%3A1&refinements=neighborhood%3AHenderson" + } + } + ] + }, + { + "index": 79, + "task": "Add The Wire to the watchlist on tvguide", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "tvguide.", + "url": "https://www.tvguide.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/tvshows/the-wire/", + "url": "https://www.tvguide.com/" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".c-buttonAddToWatchlist.g-text-bold.u-text-center.u-text-capitalize.o-button.o-button-large.o-button-icon.o-button-smallRound.o-button-primary ", + "netloc": "tvguide", + "url": "https://www.tvguide.com/" + } + } + ] + }, + { + "index": 80, + "task": "Find 5 star rated saltwater rods on cabelas.", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "cabelas.", + "url": "https://www.cabelas.com/shop/en" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "5 Stars", + "url": "https://www.cabelas.com/l/saltwater-rods#f-bvratings=5%20Stars" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "saltwater-rods", + "url": "https://www.cabelas.com/l/saltwater-rods#f-bvratings=5%20Stars" + } + } + ] + }, + { + "index": 81, + "task": "Add Pro Display XDR with nano texture to bag with all the accessories on apple", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "apple.", + "url": "https://www.apple.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/pro-display-xdr/", + "url": "https://www.apple.com/pro-display-xdr/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/shop/buy-mac/", + "url": "https://www.apple.com/shop/buy-mac/pro-display-xdr" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".rc-dimension.rf-flagship-productselection-dimension.rf-flagship-productselection-dimensionfinish >div:nth-child(3)>div:nth-child(2)>label:nth-child(2)>.row.row-logical >span:nth-child(1)>span:nth-child(1)", + "netloc": "apple", + "url": "https://www.apple.com/shop/buy-mac/pro-display-xdr" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/nano-glass", + "url": "https://www.apple.com/shop/buy-mac/pro-display-xdr" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".rf-accessories-wrapper >div:nth-child(1)>.rf-simpleaccessorytileviewslot.rf-simpleaccessorytileviewslot-enabled.row.as-l-container >.rf-simpleaccessorytileviewslot-section.column.large-6.large-offset-1.small-12.small-offset-0 >fieldset:nth-child(1)>.rf-simpleaccessorytileview-selectors >ul:nth-child(1)>li:nth-child(2)>label:nth-child(2)>.row >span:nth-child(1)>span:nth-child(1)", + "netloc": "apple", + "url": "https://www.apple.com/shop/buy-mac/pro-display-xdr" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".rf-simpleaccessorytileviewslot.rf-simpleaccessorytileviewslot-enabled.row.as-l-container >.rf-simpleaccessorytileviewslot-section.column.large-6.large-offset-1.small-12.small-offset-0 >fieldset:nth-child(1)>.rf-simpleaccessorytileview-selectors >ul:nth-child(1)>li:nth-child(2)>label:nth-child(2)>.row >span:nth-child(1)>span:nth-child(1)", + "netloc": "apple", + "url": "https://www.apple.com/shop/buy-mac/pro-display-xdr" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".button.button-block ", + "netloc": "apple", + "url": "https://www.apple.com/shop/buy-mac/pro-display-xdr/nano-glass" + } + } + ] + }, + { + "index": 82, + "task": "Find a list of Tours that contain visits to the Louvre rated 5 stars on viator", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "viator.", + "url": "https://www.viator.com/?m=28353&supag=122704388281&supca=12512866044&supsc=aud-2226856787438:kwd-270303623&supai=504932286194&supdv=c&supnt=g&suplp=9069536&supli=&supti=aud-2226856787438:kwd-270303623&tsem=true&supci=aud-2226856787438:kwd-270303623&supap1=&supap2=&supfi=&gad_source=1&gclid=Cj0KCQjwjLGyBhCYARIsAPqTz1-V-L8zxTbqxq2AD5c-WgIk5G9vCYhsbUZ3nRkL4e74DAOtKPl4hY4aAjD-EALw_wcB" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/Paris-attractions/Louvre/", + "url": "https://www.viator.com/Paris-attractions/Louvre/d479-a73" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#experiences >.tabLink__oq7s ", + "netloc": "viator", + "url": "https://www.viator.com/Paris-attractions/Louvre/d479-a73" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".oneColumn__3Jg_.smallSpacingOptions__WkkC.lastVisibleOptionsGroup__c73G >div:nth-child(1)>.radioButton__1K9n >.inputRadio__33yh.md__1d8H ", + "netloc": "viator", + "url": "https://www.viator.com/Paris-attractions/Louvre/d479-a73#experiences" + } + } + ] + }, + { + "index": 83, + "task": "Browse the list of top 250 movies and add the first one to my watchlist on imdb", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "imdb.", + "url": "https://www.imdb.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/chart/top/", + "url": "https://www.imdb.com/chart/top/?ref_=nv_mv_250" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".ipc-metadata-list.ipc-metadata-list--dividers-between.sc-a1e81754-0.eBRbsI.compact-list-view.ipc-metadata-list--base >li:nth-child(1)>.sc-10233bc-1.lkPiVh.cli-post-element >.ipc-icon-button.cli-info-icon.ipc-icon-button--base.ipc-icon-button--onAccent2 ", + "netloc": "imdb", + "url": "https://www.imdb.com/chart/top/?ref_=nv_mv_250" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".sc-d3701649-1.gtFZWJ >button:nth-child(2)", + "netloc": "imdb", + "url": "https://www.imdb.com/chart/top/?ref_=nv_mv_250" + } + } + ] + }, + { + "index": 84, + "task": "Find thrill rides in Six Flags Great America, Chicago, IL on sixflags", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "sixflags.", + "url": "https://www.sixflags.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/greatamerica", + "url": "https://www.sixflags.com/greatamerica/store/tickets/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/thrill-rides", + "url": "https://www.sixflags.com/greatamerica/things-to-do/rides/thrill-rides#thrill-ride" + } + } + ] + }, + { + "index": 85, + "task": "Show me a list of electronic music dvds in very good condition on discogs", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "discogs.", + "url": "https://www.discogs.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "genre", + "reference_answer": "Electronic", + "url": "https://www.discogs.com/sell/list?genre=Electronic" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "format", + "reference_answer": "DVD", + "url": "https://www.discogs.com/sell/list?genre=Electronic&format=DVD" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "condition", + "reference_answer": "Very Good (VG)", + "url": "https://www.discogs.com/sell/list?genre=Electronic&format=DVD&condition=Very+Good+%28VG%29" + } + } + ] + }, + { + "index": 86, + "task": "Find a walkthrough guide for Assassin's Creed Valhalla on ign", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "ign.", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/assassins-creed-valhalla", + "url": "https://www.ign.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/Walkthrough", + "url": "https://www.ign.com/" + } + } + ] + }, + { + "index": 87, + "task": "Find more films from the director of Smile on tvguide", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "tvguide.", + "url": "https://www.tvguide.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/celebrities/parker-finn", + "url": "https://www.tvguide.com/celebrities/parker-finn/3060049350/" + } + } + ] + }, + { + "index": 88, + "task": "Find help page about buying tickets on seatgeek", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "seatgeek.", + "url": "https://seatgeek.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/8985521334291-Buying-Tickets", + "url": "https://seatgeek.com/help/articles/8985521334291-Buying-Tickets" + } + } + ] + }, + { + "index": 89, + "task": "Tell me more about the Adirondack route on amtrak.", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "amtrak.", + "url": "https://www.amtrak.com/home.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/routes", + "url": "https://www.amtrak.com/routes.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/adirondack-train", + "url": "https://www.amtrak.com/routes/adirondack-train.html" + } + } + ] + }, + { + "index": 90, + "task": "Show me the best city tours on nyc", + "reference_task_length": 3, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nyc.", + "url": "https://www.nyc.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/guided_tours", + "url": "https://www.nyc.com/guided_tours/" + } + } + ] + }, + { + "index": 91, + "task": "Find the team schedule of the Brooklyn Nets on espn", + "reference_task_length": 4, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "espn.", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/brooklyn-nets", + "url": "https://www.espn.com/nba/team/_/name/bkn/brooklyn-nets" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/schedule", + "url": "https://www.espn.com/nba/team/schedule/_/name/bkn/brooklyn-nets" + } + } + ] + }, + { + "index": 92, + "task": "Browse used Mercedes cars made between 2004 to 2012 and sort by highest price on kbb", + "reference_task_length": 11, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kbb.", + "url": "https://www.kbb.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "years", + "reference_answer": "2004-2012", + "url": "https://www.kbb.com/car-finder/?manufacturers=mercedesbenz&years=2004-2012" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "intent", + "reference_answer": "used", + "url": "https://www.kbb.com/car-finder/?intent=used&manufacturers=mercedesbenz&years=2004-2012" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sort", + "reference_answer": "pricedesc", + "url": "https://www.kbb.com/car-finder/?intent=used&manufacturers=mercedesbenz&years=2004-2012&sort=pricedesc" + } + } + ] + }, + { + "index": 93, + "task": "Show me the scores for the 2019 super bowl on nfl", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "nfl.", + "url": "https://www.nfl.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/scores", + "url": "https://www.nfl.com/scores/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/2019", + "url": "https://www.nfl.com/scores/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/POST4", + "url": "https://www.nfl.com/scores/" + } + } + ] + }, + { + "index": 94, + "task": "Book the cheapest parking spot near Bradley Airport on spothero", + "reference_task_length": 9, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "spothero.", + "url": "https://spothero.com/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "id", + "reference_answer": "73377", + "url": "https://spothero.com/search?kind=destination&id=73377&view=dl" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".SearchAirportSort >div:nth-child(1)>div:nth-child(2)>div:nth-child(1)>.Select.FormElement >.FormElement-control >.FormElement-item ", + "netloc": "spothero", + "url": "https://spothero.com/search?kind=destination&id=73377&view=dl" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".SearchAirportSort >div:nth-child(1)>div:nth-child(2)>div:nth-child(1)>.Select.FormElement >.FormElement-control >.FormElement-item >option:nth-child(3)", + "netloc": "spothero", + "url": "https://spothero.com/search?kind=destination&id=73377&starts=2024-03-13T12%3A00&ends=2024-03-17T12%3A00&view=dl&hide_modal=true" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".SearchAirportResults-spots >div:nth-child(1)>div:nth-child(1)>div:nth-child(1)>section:nth-child(1)>.Card.Card-open >.Card-content >.SearchAirportCard-view-details-container >a:nth-child(1)", + "netloc": "spothero", + "url": "https://spothero.com/search?kind=destination&id=73377&view=dl" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".Button.Button-primary.Button-block.AirportPurchaseInformation-book-now ", + "netloc": "spothero", + "url": "https://spothero.com/airport-parking/12793/34-old-county-rd?starts=2024-03-13T12%3A00&ends=2024-03-17T12%3A00&airport=true" + } + } + ] + }, + { + "index": 95, + "task": "Browse hot deals near zip code 10019 on koa", + "reference_task_length": 7, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "koa.", + "url": "https://koa.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/hot-deals", + "url": "https://koa.com/hot-deals/" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "q", + "reference_answer": "10019", + "url": "https://koa.com/hot-deals/?q=10019" + } + } + ] + }, + { + "index": 96, + "task": "Create a new list called Bathroom Remodeling on menards", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "menards.", + "url": "https://www.menards.com/main/home.html" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/myLists", + "url": "https://www.menards.com/main/myLists.html" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#addNewListButton ", + "netloc": "menards", + "url": "https://www.menards.com/main/myLists.html" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "Bathroom Remodeling", + "netloc": "menards", + "path": "#titleEdit ", + "url": "https://www.menards.com/main/myLists.html" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#createEditConfirm ", + "netloc": "menards", + "url": "https://www.menards.com/main/myLists.html" + } + } + ] + }, + { + "index": 97, + "task": "Find the most popular movies and showcase those with the highest IMDb ratings on imdb", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "imdb.", + "url": "https://www.imdb.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/chart/moviemeter/", + "url": "https://www.imdb.com/chart/moviemeter/?ref_=nv_mv_mpm" + } + }, + { + "match_function_name": "url_exactly_match", + "content": { + "key": "sort", + "reference_answer": "release_date,desc", + "url": "https://www.imdb.com/chart/moviemeter/?ref_=nv_mv_mpm&sort=release_date%2Cdesc" + } + } + ] + }, + { + "index": 98, + "task": "Show list of popular businesses in Cleveland on yellowpages.", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "yellowpages.", + "url": "https://www.yellowpages.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/cleveland-oh", + "url": "https://www.yellowpages.com/cleveland-oh/business-listings/1" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/business-listings/", + "url": "https://www.yellowpages.com/cleveland-oh/business-listings/1" + } + } + ] + }, + { + "index": 99, + "task": "Open the baggage fee calculator on united", + "reference_task_length": 6, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "united.", + "url": "https://www.united.com/en/us/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/checked-bag-fee-calculator/", + "url": "https://www.united.com/en/us/checked-bag-fee-calculator/any-flights" + } + } + ] + }, + { + "index": 100, + "task": "See Nissan and Honda cars for sale near Kentwood, MI 49512 on carmax", + "reference_task_length": 12, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "carmax.", + "url": "https://www.carmax.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/cars", + "url": "https://www.carmax.com/cars/all" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#Distance >.panel-menu-item--label.hzn-typography--headline-2 >h4:nth-child(1)", + "netloc": "carmax", + "url": "https://www.carmax.com/cars/all" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": ".distance--change-store-link--oxb19 >hzn-text-link:nth-child(1)", + "netloc": "carmax", + "url": "https://www.carmax.com/cars/all" + } + }, + { + "match_function_name": "element_value_exactly_match", + "content": { + "reference_answer": "49512", + "netloc": "carmax", + "path": "#store-chooser-keyword-input ", + "url": "https://www.carmax.com/cars/all" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#store-chooser-keyword-search-form >button:nth-child(2)>svg:nth-child(1)", + "netloc": "carmax", + "url": "https://www.carmax.com/cars/all" + } + }, + { + "match_function_name": "element_path_exactly_match", + "method": "selector", + "content": { + "reference_answer": "#store-chooser-modal-body >ul:nth-child(1)>li:nth-child(2)>div:nth-child(2)>button:nth-child(1)", + "netloc": "carmax", + "url": "https://www.carmax.com/cars/all" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/nissan", + "url": "https://www.carmax.com/cars/honda/nissan" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/honda", + "url": "https://www.carmax.com/cars/honda/nissan" + } + } + ] + }, + { + "index": 101, + "task": "Browse the clearance section and filter for women's dresses in size small on kohls.", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "kohls.", + "url": "https://www.kohls.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "clearance", + "url": "https://www.kohls.com/catalog/clearance-womens-dresses-clothing.jsp?CN=Promotions:Clearance+Gender:Womens+Category:Dresses+Department:Clothing&icid=wmnsclearance-VN-dresses&pfm=undefined&kls_sbp=59288101756704364110307077102020115511" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Gender:Womens", + "url": "https://www.kohls.com/catalog/clearance-womens-dresses-clothing.jsp?CN=Promotions:Clearance+Gender:Womens+Category:Dresses+Department:Clothing&icid=wmnsclearance-VN-dresses&pfm=undefined&kls_sbp=59288101756704364110307077102020115511" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Category:Dresses", + "url": "https://www.kohls.com/catalog/clearance-womens-dresses-clothing.jsp?CN=Promotions:Clearance+Gender:Womens+Category:Dresses+Department:Clothing&icid=wmnsclearance-VN-dresses&pfm=undefined&kls_sbp=59288101756704364110307077102020115511" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Department:Clothing", + "url": "https://www.kohls.com/catalog/clearance-womens-dresses-clothing.jsp?CN=Promotions:Clearance+Gender:Womens+Category:Dresses+Department:Clothing&icid=wmnsclearance-VN-dresses&pfm=undefined&kls_sbp=59288101756704364110307077102020115511" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "CN", + "reference_answer": "Size:S", + "url": "https://www.kohls.com/catalog/clearance-womens-dresses-clothing.jsp?CN=Promotions:Clearance+Gender:Womens+Category:Dresses+Department:Clothing&icid=wmnsclearance-VN-dresses&pfm=undefined&kls_sbp=59288101756704364110307077102020115511" + } + } + ] + }, + { + "index": 102, + "task": "Find the schedule for upcoming MLB games for the New York Yankees on foxsports", + "reference_task_length": 5, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "foxsports.", + "url": "https://www.foxsports.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/mlb", + "url": "https://www.foxsports.com/mlb" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/new-york-yankees-team", + "url": "https://www.foxsports.com/mlb/new-york-yankees-team" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "new-york-yankees-team-schedule", + "url": "https://www.foxsports.com/mlb/new-york-yankees-team-schedule" + } + } + ] + }, + { + "index": 103, + "task": "What are the upcoming soccer events on ESPN2 on espn", + "reference_task_length": 8, + "evaluation": [ + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "espn.", + "url": "https://www.espn.com/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/watch/schedule/_/type/upcoming", + "url": "https://www.espn.com/watch/schedule/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/categoryId/119cfa41-71d4-39bf-a790-6273a52b0259", + "url": "https://www.espn.com/watch/schedule/" + } + }, + { + "match_function_name": "url_included_match", + "content": { + "key": "", + "reference_answer": "/channel/017f41a2-ef4f-39d3-9f45-f680b88cd23b", + "url": "https://www.espn.com/watch/schedule/" + } + } + ] + } +] \ No newline at end of file diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/instance.py b/browsergym/webcanvas/src/browsergym/webcanvas/instance.py new file mode 100644 index 00000000..43c2b5ad --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/instance.py @@ -0,0 +1,129 @@ +import playwright.sync_api +import os +import requests +import re + +from .utils import step_evaluate, step_event_evaluate + + +class WebCanvasInstance: + """ + Utility class to access a WebCanvas instance. + """ + + def __init__( + self, + ) -> None: + pass + + @staticmethod + def read_task_configs(all_task_configs): + return_list = [] + for task in all_task_configs: + task_name = task["task"] + evaluation_data = task["evaluation"] + reference_task_length = task["reference_task_length"] + task_name_id = task["index"] + reference_evaluate_steps = [] + for i, evaluation in enumerate(evaluation_data): + match_function = evaluation["match_function_name"] + if "url" in match_function: + try: + key = evaluation["content"]["key"] + reference_answer = evaluation["content"]["reference_answer"] + reference_evaluate_steps.append({"match_function": match_function, + "key": key, "reference_answer": reference_answer, "score": 0}) + except: + print( + f"url error in task {task_name_id}, step {i}, match_function: {match_function}") + exit(1) + elif "element_path" in match_function: + try: + reference_answer = evaluation["content"]["reference_answer"] + method = evaluation["method"] + netloc = evaluation["content"]["netloc"] + reference_evaluate_steps.append({"match_function": match_function, "method": method, + "reference_answer": reference_answer, "netloc": netloc, + "score": 0}) + except: + print( + f"element_path error in task {task_name_id}, step {i}, match_function: {match_function}") + exit(1) + elif "element_value" in match_function: + try: + reference_answer = evaluation["content"]["reference_answer"] + netloc = evaluation["content"]["netloc"] + if "path" in evaluation["content"].keys(): + path = evaluation["content"]["path"] + reference_evaluate_steps.append({"match_function": match_function, + "reference_answer": reference_answer, "netloc": netloc, + "path": path, "score": 0}) + else: + reference_evaluate_steps.append({"match_function": match_function, + "reference_answer": reference_answer, "netloc": netloc, + "score": 0}) + except: + print( + f"element_value error in task {task_name_id}, step {i}, match_function: {match_function}") + exit(1) + return_list.append( + [task_name, task_name_id, reference_task_length, reference_evaluate_steps]) + + return return_list + + @staticmethod + def parse_bid_from_action(action_str): + """ + Extracts all actions from the given action_str. + """ + def parse_action(input_str): + pattern = r"(fill|click|press|select_option)\('([^']+)',? ?'?(.*?)'?\)" + match = re.match(pattern, input_str) + if match: + action_type = match.group(1) + bid = match.group(2) + target_value = match.group(3) + if not target_value: + target_value = "" + return action_type, bid, target_value + else: + return "", "", "" + + action_list = action_str.split("\n") + actions = [] + for action in action_list: + actions.append(parse_action(action)) + return actions + + @staticmethod + def evaluate(page, selector, target_value, evaluate_steps, reference_evaluate_steps): + element_value = "" + if selector is not None: + element_value = selector.text_content() + elif target_value and target_value != "None": + element_value = target_value + evaluate_steps, match_result = step_evaluate(page=page, evaluate_steps=evaluate_steps, + input_path=selector, element_value=element_value) + total_step_score = 0 + for evaluate in evaluate_steps: + total_step_score += evaluate["score"] + step_score_rate = str( + total_step_score) + " / " + str(len(reference_evaluate_steps)) + task_finished = False + if total_step_score == len(reference_evaluate_steps): + task_finished = True + return evaluate_steps, step_score_rate, match_result, task_finished + + @staticmethod + def evaluate_events(page, evaluate_steps, task_events, reference_evaluate_steps): + evaluate_steps, match_result = step_event_evaluate(page=page, evaluate_steps=evaluate_steps, + event=task_events) + total_step_score = 0 + for evaluate in evaluate_steps: + total_step_score += evaluate["score"] + step_score_rate = str( + total_step_score) + " / " + str(len(reference_evaluate_steps)) + task_finished = False + if total_step_score == len(reference_evaluate_steps): + task_finished = True + return evaluate_steps, step_score_rate, match_result, task_finished diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/semantic_match/openai.py b/browsergym/webcanvas/src/browsergym/webcanvas/semantic_match/openai.py new file mode 100644 index 00000000..f81e2e32 --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/semantic_match/openai.py @@ -0,0 +1,43 @@ + +import os +import openai + + +openai_api_key = os.getenv("OPENAI_API_KEY") + + +class GPTGenerator: + def __init__(self, model=None): + self.model = model + self.client = openai.OpenAI(api_key=openai_api_key) + + def request(self, messages: list = None, max_tokens: int = 500, temperature: float = 0.7) -> (str, str): + try: + answer = self.chat(messages, max_tokens, temperature) + choice = answer.choices[0] + openai_response = choice.message.content + return openai_response, "" + except Exception as e: + return "", e + + def chat(self, messages, max_tokens=500, temperature=0.7): + data = { + 'model': self.model, + 'max_tokens': max_tokens, + 'temperature': temperature, + 'messages': messages, + } + if hasattr(self, 'response_format'): + data['response_format'] = self.response_format + + return self.client.chat.completions.create(**data) + + +class GPTGenerator35(GPTGenerator): + def __init__(self, model=None): + super().__init__(model=model if model is not None else "gpt-3.5-turbo") + + +class GPTGenerator4(GPTGenerator): + def __init__(self, model=None): + super().__init__(model=model if model is not None else "gpt-4-turbo") diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/semantic_match/prompt_constructor.py b/browsergym/webcanvas/src/browsergym/webcanvas/semantic_match/prompt_constructor.py new file mode 100644 index 00000000..31f4d23c --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/semantic_match/prompt_constructor.py @@ -0,0 +1,20 @@ +from .semantic_prompts import BasePrompts +from jinja2 import Template + + +class BasePromptConstructor: + def __init__(self): + pass + + +class SemanticMatchPromptConstructor(BasePromptConstructor): + def __init__(self): + self.prompt_system = BasePrompts.semantic_match_prompt_system + self.prompt_user = BasePrompts.semantic_match_prompt_user + + def construct(self, input_answer, semantic_method) -> list: + self.prompt_user = Template(self.prompt_user).render( + semantic_method=semantic_method, input_answer=input_answer) + messages = [{"role": "system", "content": self.prompt_system}, { + "role": "user", "content": self.prompt_user}] + return messages diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/semantic_match/semantic_prompts.py b/browsergym/webcanvas/src/browsergym/webcanvas/semantic_match/semantic_prompts.py new file mode 100644 index 00000000..7f6fa175 --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/semantic_match/semantic_prompts.py @@ -0,0 +1,11 @@ +class BasePrompts: + semantic_match_prompt_system = "Now you are an assistant to judge whether 2 elements are semantically same. I'll provide a judge rule and an answer.\n"\ + "If they are the same, you should return 1. If they are not related, you should return 0. "\ + "If they are related but not identical, return a decimal (two decimal places) between 0 and 1 of the degree of relevance you think.\n"\ + "For example, the judge rule is: Decide whether the place is New York. The score of \"new york\" and \"纽约\" are both 1, \"Brooklyn\" should be 0.\n"\ + "However, if the judge rule is: Decide whether the place is in New York. The score of \"new york\" and \"纽约\" and \"Brooklyn\" are all 1.\n"\ + "Another example, the judge rule is: Decide whether I'm looking for clothes. The score of \"red Clothes\" and \"green jacket\"should also be 1.\n"\ + "However, if the judge rule is: Decide whether I'm looking for red clothes. the score of \"bright red Clothing\" could be 0.85(red include bright red but they are not the same), the score of \"green Clothes\"should be 0.5(red is not green).\n"\ + "Remember, you should return a number with ``` and an explanation. Like output: ```1```, (your explanation)" # "Remember, you should only return a number without any punctuation or explanation!" + + semantic_match_prompt_user = "You should judge by the rule below:{{semantic_method}}.\n\nmy answer is:{{input_answer}}\n" diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py b/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py new file mode 100644 index 00000000..92a02fb5 --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/step_score.py @@ -0,0 +1,243 @@ +import re +from urllib.parse import parse_qs, urlparse, unquote +from lxml import html + +from .semantic_match.prompt_constructor import SemanticMatchPromptConstructor +from .semantic_match.openai import GPTGenerator35 + + +MapTagNameList = [ + "span", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "div", + "li", + "ul", + "p" +] + + +class StepEvaluator(): + def __init__(self): + pass + + +class URLEvaluator(StepEvaluator): + + '''URL Evaluation Scoring''' + @ staticmethod + def url_exact_match(input_url, reference_answer, key=False): + if key: + try: + parsed_url = urlparse(input_url) + url_params = parse_qs(parsed_url.query) + input_answer = url_params[key][0] + except: + return 0 + else: + input_answer = input_url + input_answer = unquote(input_answer) + result_score = MatchFunction.exact_match( + input_answer, reference_answer) + return result_score + + @ staticmethod + def url_include_match(input_url, reference_answer, key=None): + # print(input_url, reference_answer) + if key: + try: + parsed_url = urlparse(input_url) + url_params = parse_qs(parsed_url.query) + input_answer = url_params[key][0] + except: + return 0 + else: + try: + parsed_url = urlparse(input_url) + input_answer = parsed_url.netloc + parsed_url.path + if parsed_url.fragment is not None and (parsed_url.fragment): + input_answer += "#" + parsed_url.fragment + except: + input_answer = input_url + input_answer = unquote(input_answer) + result_score = MatchFunction.include_match( + input_answer, reference_answer) + # print("score:", result_score, input_answer) + return result_score + + @ staticmethod + def url_semantic_match(input_url, semantic_method, key=False): + if key: + try: + parsed_url = urlparse(input_url) + url_params = parse_qs(parsed_url.query) + input_answer = url_params[key][0] + except: + return 0 + else: + input_answer = input_url + input_answer = unquote(input_answer) + result_score = MatchFunction.semantic_match( + input_answer, semantic_method) + return result_score + + +class ElementEvaluator(StepEvaluator): + '''Element evaluation and scoring''' + + @staticmethod + def is_same_element(page, input_element_handle, reference_element_handle): + is_same_element = page.evaluate( + "(elements) => elements[0] === elements[1]", + [input_element_handle, reference_element_handle]) + return int(is_same_element) + + @ staticmethod + def path_exact_match(input_answer, reference_answer, method, page): + score = 0 + if method == "xpath": + try: + html_content = page.content() + tree = html.fromstring(html_content) + input_elements = tree.xpath(input_answer) + reference_elements = tree.xpath(reference_answer) + except: + score = 0 + if (input_elements is not None) and (reference_elements is not None): + score = input_elements[0] is reference_elements[0] + try: + if reference_elements[0].tag in MapTagNameList: + trace_up_count = 0 + current_element = reference_elements[0] + while trace_up_count < 3 and score == 0: + trace_up_count += 1 + current_element = current_element.getparent() + parent_score = input_elements[0] is current_element + score = max(score, parent_score) + except: + pass + else: + score = 0 + elif method == "selector": + try: + input_element = input_answer + reference_element = page.locator(reference_answer) + input_element_handle = input_element.element_handle() + reference_element_handle = reference_element.element_handle() + if (input_element is not None) and (reference_element is not None): + score = ElementEvaluator.is_same_element(page, input_element_handle=input_element_handle, + reference_element_handle=reference_element_handle) + try: + reference_tag = page.evaluate( + "(element) => element.tagName.toLowerCase()", reference_element_handle) + if reference_tag in MapTagNameList: + trace_up_count = 0 + current_element = reference_element + while trace_up_count < 3 and score == 0: + trace_up_count += 1 + parent_element = current_element.locator( + "xpath=..") + parent_element_handle = parent_element.element_handle() + current_element = parent_element + if parent_element: + parent_score = ElementEvaluator.is_same_element(page, input_element_handle=input_element_handle, + reference_element_handle=parent_element_handle) + score = max(score, parent_score) + except Exception as e: + print(e) + pass + except: + score = 0 + return score + + @ staticmethod + def path_included_match(input_answer, reference_answer, method, html_content): + # TODO Add path inclusion matching method + result_score = MatchFunction.include_match( + input_answer, reference_answer) + return result_score + + @ staticmethod + def element_value_exact_match(input_answer, reference_answer): + # TODO fuzzy check if the input_answer is on the same page as the reference_answer + result_score = MatchFunction.exact_match( + input_answer, reference_answer) + return result_score + + @ staticmethod + def element_value_include_match(input_answer, reference_answer): + # TODO fuzzy check if the input_answer is on the same page as the reference_answer + result_score = MatchFunction.include_match( + input_answer, reference_answer) + return result_score + + @ staticmethod + def element_value_semantic_match(input_answer, semantic_method): + # TODO fuzzy check if the input_answer is on the same page as the reference_answer + if len(input_answer) == 0: + return 0 + result_score = MatchFunction.semantic_match( + input_answer, semantic_method) + return result_score + + +class TextEvaluator(StepEvaluator): + '''Text evaluation and scoring''' + @ staticmethod + def text_exact_match(input_answer, reference_answer): + result_score = MatchFunction.exact_match( + input_answer, reference_answer) + return result_score + + @ staticmethod + def text_included_match(input_answer, reference_answer): + result_score = MatchFunction.include_match( + input_answer, reference_answer) + return result_score + + @ staticmethod + def text_semantic_match(input_answer, semantic_method): + result_score = MatchFunction.semantic_match( + input_answer, semantic_method, semantic_method) + return result_score + + +class MatchFunction: + def __init__(self): + pass + + @ staticmethod + def exact_match(input_answer, reference_answer) -> int: + return 1 if input_answer == reference_answer else 0 + + @ staticmethod + def include_match(input_answer, reference_answer) -> int: + return 1 if reference_answer in input_answer else 0 + + @ staticmethod + def semantic_match(input_answer, semantic_method) -> float: + GPT35 = GPTGenerator35() + semantic_request = SemanticMatchPromptConstructor( + ).construct(input_answer, semantic_method) + score = None + for i in range(3): + try: + response, _ = GPT35.request(semantic_request) + score = re.findall("```(.*?)```", response, re.S)[0] + score = eval(score) + # Limit the score between 0 and 1 + score = max(0, min(1, score)) + if score != None: + break + except: + score = None + if score == None: + score = 0 + if score != 0 and score != 1: + return round(score, 2) + else: + return score diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/task.py b/browsergym/webcanvas/src/browsergym/webcanvas/task.py new file mode 100644 index 00000000..f28011b8 --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/task.py @@ -0,0 +1,341 @@ +import json +import logging +import numpy as np +import playwright.sync_api +import importlib.resources +import tempfile + +from typing import Optional, Tuple + +from browsergym.core.task import AbstractBrowserTask + +from .instance import WebCanvasInstance + +logger = logging.getLogger(__name__) + + +class GenericWebCanvasTask(AbstractBrowserTask): + """ + Base class for all WebCanvas tasks. + """ + + def __init__( + self, + seed: int, + task_id: Optional[int] = None, + ) -> None: + super().__init__(seed) + + # task properties, will be used to set up the browsergym environment + self.viewport = {"width": 1280, "height": 720} + self.slow_mo = 1000 # ms + self.timeout = 10000 # ms + self.WebCanvas_instance = WebCanvasInstance() + self.config_file: str = None + self.start_url: str = "https://www.google.com/" + self.step_score_rate: str = None + self.match_result: str = None + self.task_finish: bool = False + self.activate_element = None + self.current_event = { + "selector": None, + "status": True, + "target_value": None, + "event_type": None + } + self.event_page = None # Store the page where the event occurred + + if task_id is None: + raise ValueError( + f"One and only one of 'task_id' must be provided (task_id={task_id})." + ) + + # read the list of all WebCanvas task configs + import browsergym.webcanvas as wcs + all_configs_str = importlib.resources.files(wcs).joinpath( + "data/mind2web-live_test_20241024.json").read_text() + all_task_configs = json.loads(all_configs_str) + all_task = WebCanvasInstance.read_task_configs(all_task_configs) + if task_id is not None and task_id < len(all_task): + task_configs = all_task[task_id] + else: + raise ValueError( + f"Could not find any task config with task_id={task_id}." + ) + + self.task_configs = task_configs + self.trace_info = [] + self.time_step = 0 + + @classmethod + def get_task_id(cls): + """ + Generic class for several task ids, this way of obtaining the task id is not compatible for now. + """ + raise NotImplementedError + + def setup(self, page: playwright.sync_api.Page, start_url: str = None) -> tuple[str, dict]: + # Save the page reference + self.page = page + + # Ensure event listeners are set up + self._ensure_event_listeners(page) + + # Initialize task configuration + self.goal, _, _, reference_evaluate_steps = self.task_configs + self.evaluation_step = reference_evaluate_steps + self.reference_evaluate_steps = reference_evaluate_steps + + # Navigate to start URL + start_url = start_url if start_url else self.start_url + page.goto(start_url, timeout=30000) + return self.goal, {} + + def teardown(self) -> None: + pass + + @property + def evaluate_result(self): + return self.trace_info[-1] + + @property + def webcanvas(self): + return True + + def validate( + self, + page: playwright.sync_api.Page, + chat_messages: list[str], + action: dict, + ) -> Tuple[float, bool, Optional[str], dict]: + + self._ensure_event_listeners(page) + + reward, done, msg, info = 0, False, "", {} + + for message in chat_messages: + if message["role"] == "user" and message["message"] == "exit": + done = True + break + + self.time_step += 1 + step_action_info = {} + step_action_info["time_step"] = self.time_step + step_action_info["evaluation"] = [] + + # Use event_page for evaluation if available, otherwise use current page + evaluation_page = self.event_page if self.event_page else page + + # Check if selector can be located + can_locate = self._can_locate_selector(evaluation_page, self.current_event["selector"]) + logger.info(f"Selector '{self.current_event['selector']}' can{'' if can_locate else 'not'} be located on page") + + self.evaluation_step, self.step_score_rate, self.match_result, self.task_finished = WebCanvasInstance.evaluate_events( + evaluation_page, self.evaluation_step, self.current_event, self.reference_evaluate_steps) + + # Reset event_page after evaluation + self.event_page = None + + step_action_info["evaluation"].append( + { + "step_score_rate": self.step_score_rate, + "match_result": self.match_result, + "task_status": self.task_finished + } + ) + + if self.task_finished: + done = True + + self.trace_info.append(step_action_info) + + # Add validation result logging + logger.info("=== Validation Results ===") + logger.info(f"Step Score Rate: {self.step_score_rate}") + logger.info(f"Match Result: {self.match_result}") + logger.info(f"Task Status: {'Completed' if self.task_finished else 'In Progress'}") + logger.info(f"Current Time Step: {self.time_step}") + logger.info("========================") + + return reward, done, msg, info + + # https://github.com/ServiceNow/BrowserGym/blob/main/browsergym/core/src/browsergym/core/action/utils.py + def get_element_by_bid( + self, page: playwright.sync_api.Page, bid: str, scroll_into_view: bool = False + ) -> playwright.sync_api.Locator: + """ + Parse the given bid to sequentially locate every nested frame leading to the bid, then + locate the bid element. Bids are expected to take the form "abb123", which means + the element abb123 is located inside frame abb, which is located inside frame ab, which is + located inside frame a, which is located inside the page's main frame. + Args: + bid: the browsergym id (playwright testid) of the page element. + scroll_into_view: try to scroll element into view, unless it is completely visible. + + Returns: + Playwright element. + Bounding box of the element. + """ + if not isinstance(bid, str): + raise ValueError(f"expected a string, got {repr(bid)}") + + current_frame = page + + # dive into each nested frame, to the frame where the element is located + i = 0 + while bid[i:] and not bid[i:].isnumeric(): + i += 1 + frame_bid = bid[:i] # bid of the next frame to select + frame_elem = current_frame.get_by_test_id(frame_bid) + if not frame_elem.count(): + raise ValueError(f'Could not find element with bid "{bid}"') + if scroll_into_view: + frame_elem.scroll_into_view_if_needed(timeout=500) + current_frame = frame_elem.frame_locator(":scope") + + # finally, we should have selected the frame where the target element is + elem = current_frame.get_by_test_id(bid) + if not elem.count(): + raise ValueError(f'Could not find element with bid "{bid}"') + if scroll_into_view: + elem.scroll_into_view_if_needed(timeout=500) + return elem + + @property + def events(self): + return self.task_events + + def _handle_event(self, selector, event_type, element_info_str, page): + """ + Handle DOM events by updating task events + """ + try: + # Store the page directly + self.event_page = page + element_info = json.loads(element_info_str) + + # Create current event + current_event = { + "selector": selector, + "status": True, + "target_value": element_info.get("value") or element_info.get("textContent", "") or "", + "event_type": event_type + } + + # Update current event + self.current_event = current_event + logger.info(f"Event captured on page: {self.event_page.url}") + logger.info(f"Current event updated: {current_event}") + + except json.JSONDecodeError: + logger.error(f"Failed to parse element info: {element_info_str}") + except Exception as e: + logger.error(f"Error handling event: {str(e)}") + + def _ensure_event_listeners(self, page: playwright.sync_api.Page): + """ + Ensures that event listeners are properly set up on the page. + Checks for existing handlers before setting up new ones to avoid duplicates. + """ + try: + # Check if handleEvent is already bound + handle_event_exists = page.evaluate(""" + () => typeof window.handleEvent === 'function' + """) + + if not handle_event_exists: + page.context.expose_binding( + "handleEvent", + lambda source, selector, event_type, element_info: self._handle_event( + selector, event_type, element_info, page + ) + ) + + # Set up DOM event listeners if not already initialized + page.evaluate(""" + () => { + if (window._eventListenersInitialized) return; + + const allEvents = [ + 'click', 'input', 'change', 'keydown', 'keyup', + 'mouseover', 'mouseout', 'mousedown', 'mouseup', 'focus', 'blur' + ]; + + function getElementSelector(element) { + if (!element) return null; + try { + let path = []; + while (element && element.nodeType === Node.ELEMENT_NODE) { + let selector = element.nodeName.toLowerCase(); + if (element.id) { + selector += '#' + element.id; + path.unshift(selector); + break; + } else { + let sibling = element; + let nth = 1; + while (sibling.previousElementSibling) { + sibling = sibling.previousElementSibling; + if (sibling.nodeName === element.nodeName) nth++; + } + if (nth > 1) selector += `:nth-child(${nth})`; + } + path.unshift(selector); + element = element.parentNode; + } + return path.join(' > '); + } catch (e) { + return null; + } + } + + function getElementInfo(element) { + return { + textContent: element.textContent || '', + value: element.value || '', + tagName: element.tagName.toLowerCase() + }; + } + + allEvents.forEach(eventType => { + document.addEventListener(eventType, (event) => { + const element = event.target; + const selector = getElementSelector(element); + const elementInfo = getElementInfo(element); + + window.handleEvent( + selector, + eventType, + JSON.stringify(elementInfo) + ); + }, true); + }); + + window._eventListenersInitialized = true; + } + """) + except Exception as e: + logger.error(f"Failed to ensure event listeners: {str(e)}") + raise + + def _can_locate_selector(self, page: playwright.sync_api.Page, selector: str) -> bool: + """ + Test if a selector can be located on the given page + Returns True if the selector can be found, False otherwise + """ + try: + # Try to locate the element using the selector + result = page.evaluate(""" + (selector) => { + try { + const element = document.querySelector(selector); + return element !== null; + } catch (e) { + return false; + } + } + """, selector) + return result + except Exception as e: + logger.error(f"Error checking selector '{selector}': {str(e)}") + return False diff --git a/browsergym/webcanvas/src/browsergym/webcanvas/utils.py b/browsergym/webcanvas/src/browsergym/webcanvas/utils.py new file mode 100644 index 00000000..450d3548 --- /dev/null +++ b/browsergym/webcanvas/src/browsergym/webcanvas/utils.py @@ -0,0 +1,163 @@ +import re + +from playwright.sync_api import Page +from .step_score import * + + +def get_netloc(url: str) -> str: + """Extract the domain name, for example, extract 'zhihu' from 'zhihu.com', extract 'google' from 'www.google.com.hk' """ + url = urlparse(url) + try: + if url.netloc.startswith("www"): + netloc = re.findall(".*?\.(.*?)\..*?", url.netloc)[0] + else: + netloc = re.findall("(.*?)\..*?", url.netloc)[0] + except: + netloc = "" + return netloc + + +def step_evaluate(page: Page, evaluate_steps=[], input_path=None, element_value=None): + """Evaluate step score""" + step_score = 0 + match_result = [] + for evaluate in evaluate_steps: + if evaluate["score"] != 1: + match_function = evaluate["match_function"] + if match_function == "url_exactly_match": + score = URLEvaluator.url_exact_match( + page.url, evaluate["reference_answer"], evaluate["key"]) + elif match_function == "url_included_match": + score = URLEvaluator.url_include_match( + page.url, evaluate["reference_answer"], evaluate["key"]) + elif match_function == "url_semantic_match": + score = URLEvaluator.url_semantic_match( + page.url, evaluate["reference_answer"], evaluate["key"]) + + elif match_function == "element_path_exactly_match": + input_netloc = get_netloc(page.url) + method = evaluate["method"] + score = ElementEvaluator.path_exact_match( + input_path, evaluate["reference_answer"], method, page, input_netloc, + evaluate["netloc"]) + + elif match_function == "element_path_included_match": + pass + + elif match_function == "element_value_exactly_match": + if input_path is not None and element_value is not None: + input_netloc = get_netloc(page.url) + + if "path" in evaluate.keys(): + path_score = ElementEvaluator.path_exact_match(input_path, evaluate["path"], "selector", + page, input_netloc, + evaluate["netloc"]) + if path_score == 0: + score = 0 + else: + score = ElementEvaluator.element_value_exact_match( + element_value, evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + else: + score = ElementEvaluator.element_value_exact_match( + element_value, evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + + else: + score = 0 + elif match_function == "element_value_included_match": + if input_path is not None and element_value is not None: + input_netloc = get_netloc(page.url) + if "path" in evaluate.keys(): + path_score = ElementEvaluator.path_exact_match(input_path, evaluate["path"], "selector", + page, input_netloc, + evaluate["netloc"]) + if path_score == 0: + score = 0 + else: + score = ElementEvaluator.element_value_include_match( + element_value, evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + else: + score = ElementEvaluator.element_value_include_match( + element_value, evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + else: + score = 0 + elif match_function == "element_value_semantic_match": + if input_path is not None and element_value is not None: + input_netloc = get_netloc(page.url) + + if len(element_value) > 0: + if "path" in evaluate.keys(): + path_score = ElementEvaluator.path_exact_match(input_path, evaluate["path"], "selector", + page, input_netloc, + evaluate["netloc"]) + if path_score == 0: + # print("Path mismatch in value evaluation") + score = 0 + else: + score = ElementEvaluator.element_value_semantic_match( + element_value, evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + else: + score = ElementEvaluator.element_value_semantic_match( + element_value, evaluate["reference_answer"], input_netloc, evaluate["netloc"]) + # print(score, "element_value_semantic_match", + # element_value, "*", evaluate["reference_answer"]) + else: + score = 0 + elif match_function == "text_exact_match": + pass # TODO + elif match_function == "text_include_match": + pass + elif match_function == "text_semantic_match": + pass + + evaluate["score"] = max(evaluate["score"], score) + if evaluate["score"] >= 1: + match_result.append( + {evaluate["match_function"]: evaluate["reference_answer"]}) + step_score += evaluate["score"] + + return evaluate_steps, match_result + + +def step_event_evaluate(page, evaluate_steps, event): + + step_score = 0 + match_result = [] + for evaluate in evaluate_steps: + if evaluate["score"] != 1: + match_function = evaluate["match_function"] + if match_function == "url_exactly_match": + score = URLEvaluator.url_exact_match( + page.url, evaluate["reference_answer"], evaluate["key"]) + elif match_function == "url_included_match": + score = URLEvaluator.url_include_match( + page.url, evaluate["reference_answer"], evaluate["key"]) + elif match_function == "url_semantic_match": + score = URLEvaluator.url_semantic_match( + page.url, evaluate["reference_answer"], evaluate["key"]) + + elif match_function == "element_path_exactly_match": + score = ElementEvaluator.path_exact_match( + event["selector"], evaluate["reference_answer"], evaluate["method"], page) + + elif match_function == "element_path_included_match": + pass + + elif match_function == "element_value_exactly_match": + score = ElementEvaluator.element_value_exact_match( + event["target_value"], evaluate["reference_answer"]) + + elif match_function == "element_value_included_match": + score = ElementEvaluator.element_value_include_match( + event["target_value"], evaluate["reference_answer"]) + + elif match_function == "element_value_semantic_match": + score = ElementEvaluator.element_value_semantic_match( + event["target_value"], evaluate["reference_answer"]) + + evaluate["score"] = max(evaluate["score"], score) + if evaluate["score"] >= 1: + match_result.append( + {evaluate["match_function"]: evaluate["reference_answer"]}) + step_score += evaluate["score"] + + return evaluate_steps, match_result diff --git a/demo_agent/run_demo.py b/demo_agent/run_demo.py index 82cc2c96..07aeac87 100644 --- a/demo_agent/run_demo.py +++ b/demo_agent/run_demo.py @@ -1,4 +1,5 @@ import argparse +import gymnasium as gym # locally defined agent from agent import DemoAgentArgs @@ -29,13 +30,13 @@ def parse_args(): parser.add_argument( "--task_name", type=str, - default="openended", + default="browsergym/webcanvas.mind2web-live.1", help="Name of the Browsergym task to run. If 'openended', you need to specify a 'start_url'", ) parser.add_argument( "--start_url", type=str, - default="https://www.google.com", + default="https://www.google.com/", help="Starting URL (only for the openended task).", ) parser.add_argument( @@ -102,6 +103,8 @@ def main(): env_args.wait_for_user_message = True env_args.task_kwargs = {"start_url": args.start_url} + print(args.task_name) + # setting up the experiment exp_args = ExpArgs( env_args=env_args, @@ -115,7 +118,6 @@ def main(): # loading and printing results exp_result = get_exp_result(exp_args.exp_dir) exp_record = exp_result.get_exp_record() - for key, val in exp_record.items(): print(f"{key}: {val}")