Skip to content

Commit

Permalink
Releasing v0.1.7
Browse files Browse the repository at this point in the history
  • Loading branch information
raznem committed Aug 31, 2024
1 parent c3ad761 commit ebc4681
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 1 deletion.
42 changes: 42 additions & 0 deletions examples/sign_in.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import asyncio

from playwright.async_api import Page

from parsera import ParseraScript
from parsera.engine.model import GPT4oMiniModel

EMAIL = "<YOUR-EMAIL>"
PASSWORD = "<YOUR-PASSWORD>"


async def get_parsera_credits():
model = GPT4oMiniModel()

# Define the script to execute during the session creation
async def initial_script(page: Page) -> Page:
await page.goto("https://parsera.org/auth/sign-in")
await page.wait_for_load_state("networkidle")
await page.get_by_label("Email").fill(EMAIL)
await page.get_by_label("Password").fill(PASSWORD)
await page.get_by_role("button", name="Sign In", exact=True).click()
await page.wait_for_selector("text=Playground")
return page

# This script is executed after the url is opened
async def repeating_script(page: Page) -> Page:
await page.wait_for_timeout(1000) # Wait one second for page to load
return page

parsera = ParseraScript(model=model, initial_script=initial_script)
return await parsera.arun(
url="https://parsera.org/app",
elements={
"credits": "number of credits",
},
playwright_script=repeating_script,
)


if __name__ == "__main__":
result = asyncio.run(get_parsera_credits())
print(result)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "parsera"
version = "0.1.6"
version = "0.1.7"
description = "Lightweight library for scraping web-sites with LLMs"
authors = ["Mikhail Zanka <[email protected]>"]
license = "GPL-2.0-or-later"
Expand Down

0 comments on commit ebc4681

Please sign in to comment.