Skip to content

Commit

Permalink
Merge pull request #1081 from shenchucheng/feature-browser-engine-ut
Browse files Browse the repository at this point in the history
Replace *.deepwisdom.ai with a local server for scraping
  • Loading branch information
geekan authored Mar 23, 2024
2 parents ac755e7 + d53cfd3 commit 2fbb154
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 35 deletions.
2 changes: 1 addition & 1 deletion metagpt/tools/web_browser_engine_selenium.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def __init__(self, proxy: str = None):

def get(self, url, **kwargs):
if "proxies" not in kwargs and self.proxy:
kwargs["proxies"] = {"all_proxy": self.proxy}
kwargs["proxies"] = {"all": self.proxy}
return super().get(url, **kwargs)


Expand Down
9 changes: 5 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,13 @@ async def pipe(reader, writer):
while not reader.at_eof():
writer.write(await reader.read(2048))
writer.close()
await writer.wait_closed()

async def handle_client(reader, writer):
data = await reader.readuntil(b"\r\n\r\n")
print(f"Proxy: {data}") # checking with capfd fixture
infos = pattern.match(data)
host, port = infos.group("host"), infos.group("port")
print(f"Proxy: {host}") # checking with capfd fixture
port = int(port) if port else 80
remote_reader, remote_writer = await asyncio.open_connection(host, port)
if data.startswith(b"CONNECT"):
Expand Down Expand Up @@ -257,10 +258,10 @@ async def start():
server = aiohttp.web.Server(handler)
runner = aiohttp.web.ServerRunner(server)
await runner.setup()
site = aiohttp.web.TCPSite(runner, "localhost", 0)
site = aiohttp.web.TCPSite(runner, "127.0.0.1", 0)
await site.start()
host, port = site._server.sockets[0].getsockname()
return site, f"http://{host}:{port}"
_, port, *_ = site._server.sockets[0].getsockname()
return site, f"http://127.0.0.1:{port}"

return start

Expand Down
5 changes: 3 additions & 2 deletions tests/metagpt/tools/libs/test_web_scraping.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@


@pytest.mark.asyncio
async def test_scrape_web_playwright():
test_url = "https://www.deepwisdom.ai"
async def test_scrape_web_playwright(http_server):
server, test_url = await http_server()

result = await scrape_web_playwright(test_url)

Expand All @@ -21,3 +21,4 @@ async def test_scrape_web_playwright():
assert not result["inner_text"].endswith(" ")
assert not result["html"].startswith(" ")
assert not result["html"].endswith(" ")
await server.stop()
11 changes: 7 additions & 4 deletions tests/metagpt/tools/test_web_browser_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@

@pytest.mark.asyncio
@pytest.mark.parametrize(
"browser_type, url, urls",
"browser_type",
[
(WebBrowserEngineType.PLAYWRIGHT, "https://deepwisdom.ai", ("https://deepwisdom.ai",)),
(WebBrowserEngineType.SELENIUM, "https://deepwisdom.ai", ("https://deepwisdom.ai",)),
WebBrowserEngineType.PLAYWRIGHT,
WebBrowserEngineType.SELENIUM,
],
ids=["playwright", "selenium"],
)
async def test_scrape_web_page(browser_type, url, urls):
async def test_scrape_web_page(browser_type, http_server):
server, url = await http_server()
urls = [url, url, url]
browser = web_browser_engine.WebBrowserEngine(engine=browser_type)
result = await browser.run(url)
assert isinstance(result, WebPage)
Expand All @@ -27,6 +29,7 @@ async def test_scrape_web_page(browser_type, url, urls):
assert isinstance(results, list)
assert len(results) == len(urls) + 1
assert all(("MetaGPT" in i.inner_text) for i in results)
await server.stop()


if __name__ == "__main__":
Expand Down
26 changes: 19 additions & 7 deletions tests/metagpt/tools/test_web_browser_engine_playwright.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,28 @@

@pytest.mark.asyncio
@pytest.mark.parametrize(
"browser_type, use_proxy, kwagrs, url, urls",
"browser_type, use_proxy, kwagrs,",
[
("chromium", {"proxy": True}, {}, "https://www.deepwisdom.ai", ("https://www.deepwisdom.ai",)),
("firefox", {}, {"ignore_https_errors": True}, "https://www.deepwisdom.ai", ("https://www.deepwisdom.ai",)),
("webkit", {}, {"ignore_https_errors": True}, "https://www.deepwisdom.ai", ("https://www.deepwisdom.ai",)),
("chromium", {"proxy": True}, {}),
(
"firefox",
{},
{"ignore_https_errors": True},
),
(
"webkit",
{},
{"ignore_https_errors": True},
),
],
ids=["chromium-normal", "firefox-normal", "webkit-normal"],
)
async def test_scrape_web_page(browser_type, use_proxy, kwagrs, url, urls, proxy, capfd):
async def test_scrape_web_page(browser_type, use_proxy, kwagrs, proxy, capfd, http_server):
server, url = await http_server()
urls = [url, url, url]
proxy_url = None
if use_proxy:
server, proxy_url = await proxy()
proxy_server, proxy_url = await proxy()
browser = web_browser_engine_playwright.PlaywrightWrapper(browser_type=browser_type, proxy=proxy_url, **kwagrs)
result = await browser.run(url)
assert isinstance(result, WebPage)
Expand All @@ -32,8 +42,10 @@ async def test_scrape_web_page(browser_type, use_proxy, kwagrs, url, urls, proxy
assert len(results) == len(urls) + 1
assert all(("MetaGPT" in i.inner_text) for i in results)
if use_proxy:
server.close()
proxy_server.close()
await proxy_server.wait_closed()
assert "Proxy:" in capfd.readouterr().out
await server.stop()


if __name__ == "__main__":
Expand Down
32 changes: 15 additions & 17 deletions tests/metagpt/tools/test_web_browser_engine_selenium.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-


import browsers
import pytest

Expand All @@ -10,51 +11,48 @@

@pytest.mark.asyncio
@pytest.mark.parametrize(
"browser_type, use_proxy, url, urls",
"browser_type, use_proxy,",
[
pytest.param(
"chrome",
True,
"https://deepwisdom.ai",
("https://deepwisdom.ai",),
False,
marks=pytest.mark.skipif(not browsers.get("chrome"), reason="chrome browser not found"),
),
pytest.param(
"firefox",
False,
"https://deepwisdom.ai",
("https://deepwisdom.ai",),
marks=pytest.mark.skipif(not browsers.get("firefox"), reason="firefox browser not found"),
),
pytest.param(
"edge",
False,
"https://deepwisdom.ai",
("https://deepwisdom.ai",),
marks=pytest.mark.skipif(not browsers.get("msedge"), reason="edge browser not found"),
),
],
ids=["chrome-normal", "firefox-normal", "edge-normal"],
)
async def test_scrape_web_page(browser_type, use_proxy, url, urls, proxy, capfd):
async def test_scrape_web_page(browser_type, use_proxy, proxy, capfd, http_server):
# Prerequisites
# firefox, chrome, Microsoft Edge
server, url = await http_server()
urls = [url, url, url]
proxy_url = None
if use_proxy:
server, proxy_url = await proxy()
proxy_server, proxy_url = await proxy()
browser = web_browser_engine_selenium.SeleniumWrapper(browser_type=browser_type, proxy=proxy_url)
result = await browser.run(url)
assert isinstance(result, WebPage)
assert "MetaGPT" in result.inner_text

if urls:
results = await browser.run(url, *urls)
assert isinstance(results, list)
assert len(results) == len(urls) + 1
assert all(("MetaGPT" in i.inner_text) for i in results)
results = await browser.run(url, *urls)
assert isinstance(results, list)
assert len(results) == len(urls) + 1
assert all(("MetaGPT" in i.inner_text) for i in results)
if use_proxy:
server.close()
assert "Proxy:" in capfd.readouterr().out
proxy_server.close()
await proxy_server.wait_closed()
assert "Proxy: localhost" in capfd.readouterr().out
await server.stop()


if __name__ == "__main__":
Expand Down

0 comments on commit 2fbb154

Please sign in to comment.