diff --git a/README.md b/README.md index 529a9df..7c8297a 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,7 @@ playwright install - `--dark-mode`: Enables dark mode for the user interface. 3. **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`. 4. **Using Your Own Browser(Optional):** - - Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser. + - Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser. Leave `CHROME_USER_DATA` empty if you want to use local user data. - Windows ```env CHROME_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe" @@ -118,7 +118,7 @@ playwright install - Mac ```env CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" - CHROME_USER_DATA="~/Library/Application Support/Google/Chrome/Profile 1" + CHROME_USER_DATA="/Users/YourUsername/Library/Application Support/Google/Chrome" ``` - Close all Chrome windows - Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent. diff --git a/requirements.txt b/requirements.txt index 8fa4294..34e4b0f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ browser-use==0.1.29 pyperclip==1.9.0 gradio==5.10.0 +json-repair diff --git a/src/agent/custom_agent.py b/src/agent/custom_agent.py index 77ba6c3..81f33c8 100644 --- a/src/agent/custom_agent.py +++ b/src/agent/custom_agent.py @@ -8,10 +8,11 @@ import base64 import io import platform -from browser_use.agent.prompts import SystemPrompt +from browser_use.agent.prompts import SystemPrompt, AgentMessagePrompt from browser_use.agent.service import Agent from browser_use.agent.views import ( ActionResult, + ActionModel, AgentHistoryList, AgentOutput, AgentHistory, @@ -30,6 +31,7 @@ from langchain_core.messages import ( BaseMessage, ) +from json_repair import repair_json from src.utils.agent_state import AgentState from .custom_massage_manager import CustomMassageManager @@ -52,6 +54,7 @@ def __init__( max_failures: int = 5, retry_delay: int = 10, system_prompt_class: Type[SystemPrompt] = SystemPrompt, + agent_prompt_class: Type[AgentMessagePrompt] = AgentMessagePrompt, max_input_tokens: int = 128000, validate_output: bool = False, include_attributes: list[str] = [ @@ -98,7 +101,7 @@ def __init__( register_done_callback=register_done_callback, tool_calling_method=tool_calling_method ) - if self.model_name in ["deepseek-reasoner"] or self.model_name.startswith("deepseek-r1"): + if self.model_name in ["deepseek-reasoner"] or "deepseek-r1" in self.model_name: # deepseek-reasoner does not support function calling self.use_deepseek_r1 = True # deepseek-reasoner only support 64000 context @@ -106,20 +109,23 @@ def __init__( else: self.use_deepseek_r1 = False + # record last actions + self._last_actions = None # custom new info self.add_infos = add_infos # agent_state for Stop self.agent_state = agent_state + self.agent_prompt_class = agent_prompt_class self.message_manager = CustomMassageManager( llm=self.llm, task=self.task, action_descriptions=self.controller.registry.get_prompt_description(), system_prompt_class=self.system_prompt_class, + agent_prompt_class=agent_prompt_class, max_input_tokens=self.max_input_tokens, include_attributes=self.include_attributes, max_error_length=self.max_error_length, - max_actions_per_step=self.max_actions_per_step, - use_deepseek_r1=self.use_deepseek_r1 + max_actions_per_step=self.max_actions_per_step ) def _setup_action_models(self) -> None: @@ -186,9 +192,11 @@ async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutpu logger.info(ai_message.reasoning_content) logger.info(f"🤯 End Deep Thinking") if isinstance(ai_message.content, list): - parsed_json = json.loads(ai_message.content[0].replace("```json", "").replace("```", "")) + ai_content = ai_message.content[0].replace("```json", "").replace("```", "") else: - parsed_json = json.loads(ai_message.content.replace("```json", "").replace("```", "")) + ai_content = ai_message.content.replace("```json", "").replace("```", "") + ai_content = repair_json(ai_content) + parsed_json = json.loads(ai_content) parsed: AgentOutput = self.AgentOutput(**parsed_json) if parsed is None: logger.debug(ai_message.content) @@ -197,9 +205,11 @@ async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutpu ai_message = self.llm.invoke(input_messages) self.message_manager._add_message_with_tokens(ai_message) if isinstance(ai_message.content, list): - parsed_json = json.loads(ai_message.content[0].replace("```json", "").replace("```", "")) + ai_content = ai_message.content[0].replace("```json", "").replace("```", "") else: - parsed_json = json.loads(ai_message.content.replace("```json", "").replace("```", "")) + ai_content = ai_message.content.replace("```json", "").replace("```", "") + ai_content = repair_json(ai_content) + parsed_json = json.loads(ai_content) parsed: AgentOutput = self.AgentOutput(**parsed_json) if parsed is None: logger.debug(ai_message.content) @@ -222,7 +232,7 @@ async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None: try: state = await self.browser_context.get_state(use_vision=self.use_vision) - self.message_manager.add_state_message(state, self._last_result, step_info) + self.message_manager.add_state_message(state, self._last_actions, self._last_result, step_info) input_messages = self.message_manager.get_messages() try: model_output = await self.get_next_action(input_messages) @@ -231,27 +241,31 @@ async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None: self.update_step_info(model_output, step_info) logger.info(f"🧠 All Memory: \n{step_info.memory}") self._save_conversation(input_messages, model_output) - # should we remove last state message? at least, deepseek-reasoner cannot remove if self.model_name != "deepseek-reasoner": - self.message_manager._remove_last_state_message() + # remove prev message + self.message_manager._remove_state_message_by_index(-1) except Exception as e: # model call failed, remove last state message from history - self.message_manager._remove_last_state_message() + self.message_manager._remove_state_message_by_index(-1) raise e + actions: list[ActionModel] = model_output.action result: list[ActionResult] = await self.controller.multi_act( - model_output.action, self.browser_context + actions, self.browser_context ) - if len(result) != len(model_output.action): + if len(result) != len(actions): # I think something changes, such information should let LLM know - for ri in range(len(result), len(model_output.action)): + for ri in range(len(result), len(actions)): result.append(ActionResult(extracted_content=None, include_in_memory=True, - error=f"{model_output.action[ri].model_dump_json(exclude_unset=True)} is Failed to execute. \ - Something new appeared after action {model_output.action[len(result) - 1].model_dump_json(exclude_unset=True)}", + error=f"{actions[ri].model_dump_json(exclude_unset=True)} is Failed to execute. \ + Something new appeared after action {actions[len(result) - 1].model_dump_json(exclude_unset=True)}", is_done=False)) + if len(actions) == 0: + # TODO: fix no action case + result = [ActionResult(is_done=True, extracted_content=step_info.memory, include_in_memory=True)] self._last_result = result - + self._last_actions = actions if len(result) > 0 and result[-1].is_done: logger.info(f"📄 Result: {result[-1].extracted_content}") diff --git a/src/agent/custom_massage_manager.py b/src/agent/custom_massage_manager.py index 3a6bb32..f39c999 100644 --- a/src/agent/custom_massage_manager.py +++ b/src/agent/custom_massage_manager.py @@ -5,8 +5,8 @@ from browser_use.agent.message_manager.service import MessageManager from browser_use.agent.message_manager.views import MessageHistory -from browser_use.agent.prompts import SystemPrompt -from browser_use.agent.views import ActionResult, AgentStepInfo +from browser_use.agent.prompts import SystemPrompt, AgentMessagePrompt +from browser_use.agent.views import ActionResult, AgentStepInfo, ActionModel from browser_use.browser.views import BrowserState from langchain_core.language_models import BaseChatModel from langchain_anthropic import ChatAnthropic @@ -31,14 +31,14 @@ def __init__( task: str, action_descriptions: str, system_prompt_class: Type[SystemPrompt], + agent_prompt_class: Type[AgentMessagePrompt], max_input_tokens: int = 128000, estimated_characters_per_token: int = 3, image_tokens: int = 800, include_attributes: list[str] = [], max_error_length: int = 400, max_actions_per_step: int = 10, - message_context: Optional[str] = None, - use_deepseek_r1: bool = False + message_context: Optional[str] = None ): super().__init__( llm=llm, @@ -53,8 +53,7 @@ def __init__( max_actions_per_step=max_actions_per_step, message_context=message_context ) - self.tool_id = 1 - self.use_deepseek_r1 = use_deepseek_r1 + self.agent_prompt_class = agent_prompt_class # Custom: Move Task info to state_message self.history = MessageHistory() self._add_message_with_tokens(self.system_prompt) @@ -75,13 +74,15 @@ def cut_messages(self): def add_state_message( self, state: BrowserState, + actions: Optional[List[ActionModel]] = None, result: Optional[List[ActionResult]] = None, step_info: Optional[AgentStepInfo] = None, ) -> None: """Add browser state as human message""" # otherwise add state message and result to next message (which will not stay in memory) - state_message = CustomAgentMessagePrompt( + state_message = self.agent_prompt_class( state, + actions, result, include_attributes=self.include_attributes, max_error_length=self.max_error_length, @@ -102,3 +103,15 @@ def _count_text_tokens(self, text: str) -> int: len(text) // self.estimated_characters_per_token ) # Rough estimate if no tokenizer available return tokens + + def _remove_state_message_by_index(self, remove_ind=-1) -> None: + """Remove last state message from history""" + i = len(self.history.messages) - 1 + remove_cnt = 0 + while i >= 0: + if isinstance(self.history.messages[i].message, HumanMessage): + remove_cnt += 1 + if remove_cnt == abs(remove_ind): + self.history.remove_message(i) + break + i -= 1 \ No newline at end of file diff --git a/src/agent/custom_prompts.py b/src/agent/custom_prompts.py index f42859e..1e1df63 100644 --- a/src/agent/custom_prompts.py +++ b/src/agent/custom_prompts.py @@ -2,7 +2,7 @@ from typing import List, Optional from browser_use.agent.prompts import SystemPrompt, AgentMessagePrompt -from browser_use.agent.views import ActionResult +from browser_use.agent.views import ActionResult, ActionModel from browser_use.browser.views import BrowserState from langchain_core.messages import HumanMessage, SystemMessage @@ -56,7 +56,7 @@ def important_rules(self) -> str: - Use scroll to find elements you are looking for 5. TASK COMPLETION: - - If you think all the requirements of user\'s instruction have been completed and no further operation is required, output the done action to terminate the operation process. + - If you think all the requirements of user\'s instruction have been completed and no further operation is required, output the **Done** action to terminate the operation process. - Don't hallucinate actions. - If the task requires specific information - make sure to include everything in the done function. This is what the user will see. - If you are running out of steps (current step), think about speeding it up, and ALWAYS use the done action as the last action. @@ -140,6 +140,7 @@ class CustomAgentMessagePrompt(AgentMessagePrompt): def __init__( self, state: BrowserState, + actions: Optional[List[ActionModel]] = None, result: Optional[List[ActionResult]] = None, include_attributes: list[str] = [], max_error_length: int = 400, @@ -151,10 +152,11 @@ def __init__( max_error_length=max_error_length, step_info=step_info ) + self.actions = actions def get_user_message(self) -> HumanMessage: if self.step_info: - step_info_description = f'Current step: {self.step_info.step_number + 1}/{self.step_info.max_steps}' + step_info_description = f'Current step: {self.step_info.step_number}/{self.step_info.max_steps}\n' else: step_info_description = '' @@ -181,7 +183,7 @@ def get_user_message(self) -> HumanMessage: state_description = f""" {step_info_description} -1. Task: {self.step_info.task} +1. Task: {self.step_info.task}. 2. Hints(Optional): {self.step_info.add_infos} 3. Memory: @@ -193,17 +195,20 @@ def get_user_message(self) -> HumanMessage: {elements_text} """ - if self.result: - + if self.actions and self.result: + state_description += "\n **Previous Actions** \n" + state_description += f'Previous step: {self.step_info.step_number-1}/{self.step_info.max_steps} \n' for i, result in enumerate(self.result): + action = self.actions[i] + state_description += f"Previous action {i + 1}/{len(self.result)}: {action.model_dump_json(exclude_unset=True)}\n" if result.include_in_memory: if result.extracted_content: - state_description += f"\nResult of previous action {i + 1}/{len(self.result)}: {result.extracted_content}" + state_description += f"Result of previous action {i + 1}/{len(self.result)}: {result.extracted_content}\n" if result.error: # only use last 300 characters of error error = result.error[-self.max_error_length:] state_description += ( - f"\nError of previous action {i + 1}/{len(self.result)}: ...{error}" + f"Error of previous action {i + 1}/{len(self.result)}: ...{error}\n" ) if self.state.screenshot: diff --git a/src/controller/custom_controller.py b/src/controller/custom_controller.py index a89bef0..4e2ca0f 100644 --- a/src/controller/custom_controller.py +++ b/src/controller/custom_controller.py @@ -3,7 +3,7 @@ from pydantic import BaseModel from browser_use.agent.views import ActionResult from browser_use.browser.context import BrowserContext -from browser_use.controller.service import Controller +from browser_use.controller.service import Controller, DoneAction class CustomController(Controller): diff --git a/src/utils/utils.py b/src/utils/utils.py index 0cc537b..c4218cd 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -94,9 +94,9 @@ def get_llm_model(provider: str, **kwargs): else: base_url = kwargs.get("base_url") - if kwargs.get("model_name", "qwen2.5:7b").startswith("deepseek-r1"): + if "deepseek-r1" in kwargs.get("model_name", "qwen2.5:7b"): return DeepSeekR1ChatOllama( - model=kwargs.get("model_name", "deepseek-r1:7b"), + model=kwargs.get("model_name", "deepseek-r1:14b"), temperature=kwargs.get("temperature", 0.0), num_ctx=kwargs.get("num_ctx", 32000), base_url=kwargs.get("base_url", base_url), @@ -106,6 +106,7 @@ def get_llm_model(provider: str, **kwargs): model=kwargs.get("model_name", "qwen2.5:7b"), temperature=kwargs.get("temperature", 0.0), num_ctx=kwargs.get("num_ctx", 32000), + num_predict=kwargs.get("num_predict", 1024), base_url=kwargs.get("base_url", base_url), ) elif provider == "azure_openai": diff --git a/tests/test_browser_use.py b/tests/test_browser_use.py index 1921995..c9d1129 100644 --- a/tests/test_browser_use.py +++ b/tests/test_browser_use.py @@ -32,10 +32,14 @@ async def test_browser_use_org(): # api_key=os.getenv("AZURE_OPENAI_API_KEY", ""), # ) + # llm = utils.get_llm_model( + # provider="deepseek", + # model_name="deepseek-chat", + # temperature=0.8 + # ) + llm = utils.get_llm_model( - provider="deepseek", - model_name="deepseek-chat", - temperature=0.8 + provider="ollama", model_name="deepseek-r1:14b", temperature=0.5 ) window_w, window_h = 1920, 1080 @@ -99,151 +103,29 @@ async def test_browser_use_custom(): from playwright.async_api import async_playwright from src.agent.custom_agent import CustomAgent - from src.agent.custom_prompts import CustomSystemPrompt + from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt from src.browser.custom_browser import CustomBrowser from src.browser.custom_context import BrowserContextConfig from src.controller.custom_controller import CustomController window_w, window_h = 1920, 1080 - + # llm = utils.get_llm_model( - # provider="azure_openai", + # provider="openai", # model_name="gpt-4o", # temperature=0.8, - # base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""), - # api_key=os.getenv("AZURE_OPENAI_API_KEY", ""), + # base_url=os.getenv("OPENAI_ENDPOINT", ""), + # api_key=os.getenv("OPENAI_API_KEY", ""), # ) llm = utils.get_llm_model( - provider="gemini", - model_name="gemini-2.0-flash-exp", - temperature=1.0, - api_key=os.getenv("GOOGLE_API_KEY", "") + provider="azure_openai", + model_name="gpt-4o", + temperature=0.8, + base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""), + api_key=os.getenv("AZURE_OPENAI_API_KEY", ""), ) - # llm = utils.get_llm_model( - # provider="deepseek", - # model_name="deepseek-chat", - # temperature=0.8 - # ) - - # llm = utils.get_llm_model( - # provider="ollama", model_name="qwen2.5:7b", temperature=0.8 - # ) - - controller = CustomController() - use_own_browser = False - disable_security = True - use_vision = True # Set to False when using DeepSeek - tool_call_in_content = True # Set to True when using Ollama - max_actions_per_step = 1 - playwright = None - browser_context_ = None - try: - if use_own_browser: - playwright = await async_playwright().start() - chrome_exe = os.getenv("CHROME_PATH", "") - chrome_use_data = os.getenv("CHROME_USER_DATA", "") - browser_context_ = await playwright.chromium.launch_persistent_context( - user_data_dir=chrome_use_data, - executable_path=chrome_exe, - no_viewport=False, - headless=False, # 保持浏览器窗口可见 - user_agent=( - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " - "(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36" - ), - java_script_enabled=True, - bypass_csp=disable_security, - ignore_https_errors=disable_security, - record_video_dir="./tmp/record_videos", - record_video_size={"width": window_w, "height": window_h}, - ) - else: - browser_context_ = None - - browser = CustomBrowser( - config=BrowserConfig( - headless=False, - disable_security=True, - extra_chromium_args=[f"--window-size={window_w},{window_h}"], - ) - ) - - async with await browser.new_context( - config=BrowserContextConfig( - trace_path="./tmp/result_processing", - save_recording_path="./tmp/record_videos", - no_viewport=False, - browser_window_size=BrowserContextWindowSize( - width=window_w, height=window_h - ), - ), - context=browser_context_, - ) as browser_context: - agent = CustomAgent( - task="go to google.com and type 'OpenAI' click search and give me the first url", - add_infos="", # some hints for llm to complete the task - llm=llm, - browser_context=browser_context, - controller=controller, - system_prompt_class=CustomSystemPrompt, - use_vision=use_vision, - tool_call_in_content=tool_call_in_content, - max_actions_per_step=max_actions_per_step - ) - history: AgentHistoryList = await agent.run(max_steps=10) - - print("Final Result:") - pprint(history.final_result(), indent=4) - - print("\nErrors:") - pprint(history.errors(), indent=4) - - # e.g. xPaths the model clicked on - print("\nModel Outputs:") - pprint(history.model_actions(), indent=4) - - print("\nThoughts:") - pprint(history.model_thoughts(), indent=4) - # close browser - except Exception: - import traceback - - traceback.print_exc() - finally: - # 显式关闭持久化上下文 - if browser_context_: - await browser_context_.close() - - # 关闭 Playwright 对象 - if playwright: - await playwright.stop() - - await browser.close() - - -async def test_browser_use_custom_v2(): - from browser_use.browser.context import BrowserContextWindowSize - from browser_use.browser.browser import BrowserConfig - from playwright.async_api import async_playwright - - from src.agent.custom_agent import CustomAgent - from src.agent.custom_prompts import CustomSystemPrompt - from src.browser.custom_browser import CustomBrowser - from src.browser.custom_context import BrowserContextConfig - from src.controller.custom_controller import CustomController - - window_w, window_h = 1920, 1080 - - # llm = utils.get_llm_model( - # provider="azure_openai", - # model_name="gpt-4o", - # temperature=0.8, - # base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""), - # api_key=os.getenv("AZURE_OPENAI_API_KEY", ""), - # ) - # llm = utils.get_llm_model( # provider="gemini", # model_name="gemini-2.0-flash-exp", @@ -272,20 +154,24 @@ async def test_browser_use_custom_v2(): # ) controller = CustomController() - use_own_browser = False + use_own_browser = True disable_security = True use_vision = False # Set to False when using DeepSeek - max_actions_per_step = 10 + max_actions_per_step = 1 playwright = None browser = None browser_context = None try: + extra_chromium_args = [f"--window-size={window_w},{window_h}"] if use_own_browser: chrome_path = os.getenv("CHROME_PATH", None) if chrome_path == "": chrome_path = None + chrome_user_data = os.getenv("CHROME_USER_DATA", None) + if chrome_user_data: + extra_chromium_args += [f"--user-data-dir={chrome_user_data}"] else: chrome_path = None browser = CustomBrowser( @@ -293,7 +179,7 @@ async def test_browser_use_custom_v2(): headless=False, disable_security=disable_security, chrome_instance_path=chrome_path, - extra_chromium_args=[f"--window-size={window_w},{window_h}"], + extra_chromium_args=extra_chromium_args, ) ) browser_context = await browser.new_context( @@ -307,17 +193,18 @@ async def test_browser_use_custom_v2(): ) ) agent = CustomAgent( - task="go to google.com and type 'Nvidia' click search and give me the first url", + task="Search 'Nvidia' and give me the first url", add_infos="", # some hints for llm to complete the task llm=llm, browser=browser, browser_context=browser_context, controller=controller, system_prompt_class=CustomSystemPrompt, + agent_prompt_class=CustomAgentMessagePrompt, use_vision=use_vision, max_actions_per_step=max_actions_per_step ) - history: AgentHistoryList = await agent.run(max_steps=10) + history: AgentHistoryList = await agent.run(max_steps=100) print("Final Result:") pprint(history.final_result(), indent=4) @@ -349,5 +236,4 @@ async def test_browser_use_custom_v2(): if __name__ == "__main__": # asyncio.run(test_browser_use_org()) - # asyncio.run(test_browser_use_custom()) - asyncio.run(test_browser_use_custom_v2()) + asyncio.run(test_browser_use_custom()) diff --git a/tests/test_llm_api.py b/tests/test_llm_api.py index 8809b89..6075896 100644 --- a/tests/test_llm_api.py +++ b/tests/test_llm_api.py @@ -156,7 +156,7 @@ def test_deepseek_r1_ollama_model(): # test_openai_model() # test_gemini_model() # test_azure_openai_model() - # test_deepseek_model() + test_deepseek_model() # test_ollama_model() - test_deepseek_r1_model() + # test_deepseek_r1_model() # test_deepseek_r1_ollama_model() \ No newline at end of file diff --git a/webui.py b/webui.py index f2035f3..c6808ab 100644 --- a/webui.py +++ b/webui.py @@ -28,7 +28,7 @@ from src.utils import utils from src.agent.custom_agent import CustomAgent from src.browser.custom_browser import CustomBrowser -from src.agent.custom_prompts import CustomSystemPrompt +from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt from src.browser.custom_context import BrowserContextConfig, CustomBrowserContext from src.controller.custom_controller import CustomController from gradio.themes import Citrus, Default, Glass, Monochrome, Ocean, Origin, Soft, Base @@ -224,20 +224,24 @@ async def run_org_agent( # Clear any previous stop request _global_agent_state.clear_stop() + extra_chromium_args = [f"--window-size={window_w},{window_h}"] if use_own_browser: chrome_path = os.getenv("CHROME_PATH", None) if chrome_path == "": chrome_path = None + chrome_user_data = os.getenv("CHROME_USER_DATA", None) + if chrome_user_data: + extra_chromium_args += [f"--user-data-dir={chrome_user_data}"] else: chrome_path = None - + if _global_browser is None: _global_browser = Browser( config=BrowserConfig( headless=headless, disable_security=disable_security, chrome_instance_path=chrome_path, - extra_chromium_args=[f"--window-size={window_w},{window_h}"], + extra_chromium_args=extra_chromium_args, ) ) @@ -315,10 +319,14 @@ async def run_custom_agent( # Clear any previous stop request _global_agent_state.clear_stop() + extra_chromium_args = [f"--window-size={window_w},{window_h}"] if use_own_browser: chrome_path = os.getenv("CHROME_PATH", None) if chrome_path == "": chrome_path = None + chrome_user_data = os.getenv("CHROME_USER_DATA", None) + if chrome_user_data: + extra_chromium_args += [f"--user-data-dir={chrome_user_data}"] else: chrome_path = None @@ -331,7 +339,7 @@ async def run_custom_agent( headless=headless, disable_security=disable_security, chrome_instance_path=chrome_path, - extra_chromium_args=[f"--window-size={window_w},{window_h}"], + extra_chromium_args=extra_chromium_args, ) ) @@ -357,6 +365,7 @@ async def run_custom_agent( browser_context=_global_browser_context, controller=controller, system_prompt_class=CustomSystemPrompt, + agent_prompt_class=CustomAgentMessagePrompt, max_actions_per_step=max_actions_per_step, agent_state=_global_agent_state, tool_calling_method=tool_calling_method