Skip to content

Commit

Permalink
Implement dynamic waiting mechanism for new page openings withing a t…
Browse files Browse the repository at this point in the history
…imeout
  • Loading branch information
mohamedmamdouh22 committed Jan 14, 2025
1 parent 9b1cdbc commit 9154537
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 10 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

.idea/
.vscode/
baboon/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
2 changes: 1 addition & 1 deletion core/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "harambe-core"
version = "0.59.2"
version = "0.59.3"
description = "Core types for harambe SDK 🐒🍌"
authors = [
{ name = "Adam Watkins", email = "[email protected]" }
Expand Down
2 changes: 1 addition & 1 deletion core/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 20 additions & 4 deletions sdk/harambe/handlers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import base64
import time
from abc import ABC
from typing import Any, Literal, Self

Expand Down Expand Up @@ -53,10 +54,25 @@ async def __aenter__(self) -> Self:
async def __aexit__(self, *_: Any, **__: Any) -> None:
await self.page.context.unroute(self.url_pattern, self.handle)
await self.page.bring_to_front()
for page in self.page.context.pages:
if page.url not in self._initial_pages:
self._new_pages.append(page.url)
await page.close()
try:
await self._wait_for_new_page()
for page in self.page.context.pages:
if page.url not in self._initial_pages:
self._new_pages.append(page.url)
await page.close()
except TimeoutError:
raise TimeoutError("No new page opened within the timeout.")

async def _wait_for_new_page(self, timeout: int = 10) -> Page | None:
start_time = time.monotonic()
while time.monotonic() - start_time < timeout:
current_pages = self.page.context.pages
for page in current_pages:
if page.url not in self._initial_pages:
return page
await page.wait_for_timeout(100)

raise TimeoutError("Timed out waiting for a new page to open.")

async def handle(self, route: Route) -> None:
if (
Expand Down
4 changes: 2 additions & 2 deletions sdk/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
[project]
name = "harambe-sdk"
version = "0.59.2"
version = "0.59.3"
description = "Data extraction SDK for Playwright 🐒🍌"
authors = [
{ name = "Adam Watkins", email = "[email protected]" }
]
requires-python = ">=3.11,<4.0"
readme = "README.md"
dependencies = [
"harambe_core==0.59.2",
"harambe_core==0.59.3",
"playwright==1.47.0",
"beautifulsoup4==4.12.3",
"requests==2.32.3",
Expand Down
4 changes: 2 additions & 2 deletions sdk/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9154537

Please sign in to comment.