From 18306597c953b12649e36268bcc3336f668d52d2 Mon Sep 17 00:00:00 2001
From: Ben Magolan <ben@paperplane.ai>
Date: Mon, 27 Nov 2023 02:12:19 -0500
Subject: [PATCH 1/2] selenium webscraping example

---
 examples/web_scraper/airbnb.py        | 98 +++++++++++++++++++++++++++
 examples/web_scraper/readme.md        | 11 ++-
 examples/web_scraper/requirements.txt |  3 +-
 3 files changed, 108 insertions(+), 4 deletions(-)
 create mode 100644 examples/web_scraper/airbnb.py

diff --git a/examples/web_scraper/airbnb.py b/examples/web_scraper/airbnb.py
new file mode 100644
index 0000000..2c812af
--- /dev/null
+++ b/examples/web_scraper/airbnb.py
@@ -0,0 +1,98 @@
+import openai
+import os
+from bs4 import BeautifulSoup
+from dotenv import load_dotenv
+from pydantic import BaseModel
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+import time
+from typing import Optional
+
+load_dotenv()
+
+from monkey_patch.monkey import Monkey as monkey
+
+
+openai.api_key = os.getenv("OPENAI_API_KEY")
+
+
+class AirBnb(BaseModel):
+    city: str
+    state: str
+    dates: str
+    price: float
+    stars: float
+
+
+@monkey.patch
+def extract_airbnb(content: str) -> Optional[AirBnb]:
+    """
+    Examine the content string and extract the airbnb details for the city, state,
+    dates available, nightly price, and stars rating.
+    """
+
+
+@monkey.align
+def align_extract_airbnb() -> None:
+    print("Aligning...")
+    airbnb1 = "Caroga Lake, New YorkRoyal Mountain Ski ResortDec 3 – 8$200\xa0night$200 per night4.99"
+    assert extract_airbnb(airbnb1) == AirBnb(
+        city="Caroga Lake",
+        state="New York",
+        dates="Dec 3 - 8",
+        price=200.0,
+        stars=4.99,
+    )
+
+
+def selenium_driver() -> str:
+    """Use selenium to scrape the airbnb url and return the page source."""
+
+    # configure webdriver
+    options = Options()
+    # options.add_argument('--headless')  # Enable headless mode
+    # options.add_argument('--disable-gpu')  # Disable GPU acceleration
+
+    # launch driver for the page
+    driver = webdriver.Chrome(options=options)
+    driver.get("https://www.airbnb.com/?tab_id=home_tab&refinement_paths%5B%5D=%2Fhomes&search_mode=flex_destinations_search&flexible_trip_lengths%5B%5D=one_week&location_search=MIN_MAP_BOUNDS&monthly_start_date=2023-12-01&monthly_length=3&price_filter_input_type=0&channel=EXPLORE&search_type=category_change&price_filter_num_nights=5&category_tag=Tag%3A5366")
+    time.sleep(3)
+
+    # refresh the page to remove the dialog modal
+    driver.refresh()
+    time.sleep(3)
+
+    # Scroll halfway down page to get rest of listings to load
+    scroll_position = driver.execute_script("return (document.body.scrollHeight - window.innerHeight) * 0.4;")
+    driver.execute_script(f"window.scrollTo(0, {scroll_position});")
+    time.sleep(3)
+
+    # extract the page source and return
+    page_source = driver.page_source
+    driver.quit()
+    return page_source
+
+
+if __name__ == '__main__':
+
+    # Align the function
+    align_extract_airbnb()
+
+    # Selenium driver to scrape the url and extract the airbnb information
+    page_source = selenium_driver()
+
+    # Beautiful Soup to parse the page source
+    soup = BeautifulSoup(page_source, 'html.parser')
+    entities = soup.find_all('div', class_="dir dir-ltr")
+
+    # Remove entries that are not airbnb listings
+    contents = [entity.text for entity in entities if entity.text != ""]
+    contents = [c for c in contents if "$" in c]
+    print(contents)
+
+    # Tanuki to extract the airbnb information
+    print("Tanuki Time!")
+    airbnbs = []
+    for content in contents[1:3]:
+        airbnbs.append(extract_airbnb(content))
+    print(airbnbs)
diff --git a/examples/web_scraper/readme.md b/examples/web_scraper/readme.md
index a953c84..2f59986 100644
--- a/examples/web_scraper/readme.md
+++ b/examples/web_scraper/readme.md
@@ -2,7 +2,7 @@
 
 This example shows how MonkeyPatch can be used with web scraping to easily populate the desired values into a structured class.
 
-Six examples for web scraping are provided:
+Six examples for web scraping with BeautifulSoup are provided:
 - [Quotes](https://quotes.toscrape.com/)
 - [Countries](https://www.scrapethissite.com/pages/simple/)
 - [Job Postings](https://realpython.github.io/fake-jobs/)
@@ -10,14 +10,17 @@ Six examples for web scraping are provided:
 - [Car Specs](https://www.cars.com/research/mazda-cx_90-2024/)
 - [StreetEasy Apartments](https://streeteasy.com/2-bedroom-apartments-for-rent/manhattan)
 
+An additional example has been provided showing how to use Selenium with BeautifulSoup for scraping:
+- [AirBnb](https://www.airbnb.com/?tab_id=home_tab&refinement_paths%5B%5D=%2Fhomes&search_mode=flex_destinations_search&flexible_trip_lengths%5B%5D=one_week&location_search=MIN_MAP_BOUNDS&monthly_start_date=2023-12-01&monthly_length=3&price_filter_input_type=0&channel=EXPLORE&category_tag=Tag%3A5366&search_type=category_change)
+
 ## Configuration
 
-Ensure you have an account with OpenAI to access their underlying models.
+Make sure you have an account with OpenAI to access their underlying models.
 
 Set the following environment variables in your `.env` file:
 ```
 OPENAI_API_KEY=sk-XXX
-USER_AGENT=... (Optional and only needed for StreetEasy example)
+USER_AGENT=... (Optional: only needed for StreetEasy example)
 ```
 
 ## Install
@@ -43,4 +46,6 @@ python cocktail.py
 python cars.py
 
 python streeteasy.py   # make sure to update User-Agent!
+
+python airbnb.py
 ```
diff --git a/examples/web_scraper/requirements.txt b/examples/web_scraper/requirements.txt
index 1a444ae..de62692 100644
--- a/examples/web_scraper/requirements.txt
+++ b/examples/web_scraper/requirements.txt
@@ -2,4 +2,5 @@ python-dotenv
 openai
 monkey-patch.py
 pytest
-beautifulsoup4
\ No newline at end of file
+beautifulsoup4
+selenium
\ No newline at end of file

From f4a2e2ea9bd515b36189250cf7f9600f1470b902 Mon Sep 17 00:00:00 2001
From: Jack Hopkins <jack@raveler.co.uk>
Date: Mon, 27 Nov 2023 13:05:40 +0000
Subject: [PATCH 2/2] Refactor MonkeyPatch to use Tanuki

Description:
---
 examples/web_scraper/airbnb.py              |   6 +-
 examples/web_scraper/readme.md              |   2 +-
 src/tanuki.py.egg-info/PKG-INFO             | 210 ++++++++++++++++++++
 src/tanuki.py.egg-info/SOURCES.txt          |  60 ++++++
 src/tanuki.py.egg-info/dependency_links.txt |   1 +
 src/tanuki.py.egg-info/requires.txt         |   7 +
 src/tanuki.py.egg-info/top_level.txt        |   1 +
 7 files changed, 283 insertions(+), 4 deletions(-)
 create mode 100644 src/tanuki.py.egg-info/PKG-INFO
 create mode 100644 src/tanuki.py.egg-info/SOURCES.txt
 create mode 100644 src/tanuki.py.egg-info/dependency_links.txt
 create mode 100644 src/tanuki.py.egg-info/requires.txt
 create mode 100644 src/tanuki.py.egg-info/top_level.txt

diff --git a/examples/web_scraper/airbnb.py b/examples/web_scraper/airbnb.py
index 2c812af..c680f59 100644
--- a/examples/web_scraper/airbnb.py
+++ b/examples/web_scraper/airbnb.py
@@ -10,7 +10,7 @@
 
 load_dotenv()
 
-from monkey_patch.monkey import Monkey as monkey
+import tanuki
 
 
 openai.api_key = os.getenv("OPENAI_API_KEY")
@@ -24,7 +24,7 @@ class AirBnb(BaseModel):
     stars: float
 
 
-@monkey.patch
+@tanuki.patch
 def extract_airbnb(content: str) -> Optional[AirBnb]:
     """
     Examine the content string and extract the airbnb details for the city, state,
@@ -32,7 +32,7 @@ def extract_airbnb(content: str) -> Optional[AirBnb]:
     """
 
 
-@monkey.align
+@tanuki.align
 def align_extract_airbnb() -> None:
     print("Aligning...")
     airbnb1 = "Caroga Lake, New YorkRoyal Mountain Ski ResortDec 3 – 8$200\xa0night$200 per night4.99"
diff --git a/examples/web_scraper/readme.md b/examples/web_scraper/readme.md
index 2f59986..462d668 100644
--- a/examples/web_scraper/readme.md
+++ b/examples/web_scraper/readme.md
@@ -1,6 +1,6 @@
 # Web Scraping
 
-This example shows how MonkeyPatch can be used with web scraping to easily populate the desired values into a structured class.
+This example shows how Tanuki can be used with web scraping to easily populate the desired values into a structured class.
 
 Six examples for web scraping with BeautifulSoup are provided:
 - [Quotes](https://quotes.toscrape.com/)
diff --git a/src/tanuki.py.egg-info/PKG-INFO b/src/tanuki.py.egg-info/PKG-INFO
new file mode 100644
index 0000000..6b436cf
--- /dev/null
+++ b/src/tanuki.py.egg-info/PKG-INFO
@@ -0,0 +1,210 @@
+Metadata-Version: 2.1
+Name: tanuki.py
+Version: 0.0.1a1.dev14+gd12f5dc.d20231101
+Summary: The easiest way to build scalable LLM-powered applications, which gets cheaper and faster over time.
+Home-page: https://github.com/tanuki/tanuki.py
+Author: Jack Hopkins
+Author-email: jack.hopkins@me.com
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.6
+Description-Content-Type: text/markdown
+License-File: LICENSE.txt
+Requires-Dist: pydantic>=1.8.2
+Requires-Dist: appdirs~=1.4.4
+Requires-Dist: openai==0.28.1
+Requires-Dist: numpy~=1.24.4
+Requires-Dist: python-dotenv==1.0.0
+Requires-Dist: bitarray==2.8.2
+Requires-Dist: pydantic==2.4.2
+
+# 🙈 Tanuki
+
+The fastest and easiest way to build LLM features in python.
+
+A *Tanuki Function* is a piece of code which replaces function stubs with LLM transformations at runtime, enabling you to drop in well-typed, production-ready capabilities into your app in seconds. No more prompt wrangling. No surprises. The more often you call the function, the faster it gets.
+
+```
+@tanuki.patch
+def some_function(input: TypedInput) -> TypedOutput:
+	"""
+	This is where you include the description of how your function will be used.
+	"""
+
+@tanuki.align
+def test_some_function(example_typed_input: TypedInput, 
+					   example_typed_output: TypedOutput):
+	
+	assert similar_to(some_function(example_typed_input), example_typed_output)
+	
+```
+
+## How it works
+
+When you initially call a Tanuki Function during development, an LLM in a zero-shot configuration is invoked to generate the typed response. This response can be passed through to the rest of your app / stored in the DB / displayed to the user.
+
+We register the input and outputs of the function during execution, ensuring that the outputs are correctly typed. 
+As your data volume increases, we distil, deploy and manage smaller models that are able to capture the desired behaviour at a lower computational cost and lower latency. 
+
+You can align the model to your use-case through *test-driven alignment*, in which you create tests which declare the desired behaviour of your function. 
+
+## Examples
+
+In this example, we define a simple classification function, and several alignment assertions which guide the desired behaviour of the function. 
+
+```sentiment
+@tanuki.patch
+def classify_sentiment(msg: str) -> Optional[Literal['Good', 'Bad']]:
+	"""
+	Classifies a message from the user into Good, Bad or None.
+	"""
+
+@tanuki.align
+def align_classify_sentiment():
+	assert classify_sentiment("I love you") == 'Good'
+	assert classify_sentiment("I hate you") == 'Bad'
+	assert not classify_sentiment("Wednesdays are in the middle of the week")
+```
+
+
+```
+@tanuki.patch
+def score_sentiment(input: str) -> Annotated[int, Field(gt=0, lt=10)]:
+	"""
+	Scores the input between 0-10
+	"""
+
+@tanuki.align
+def align_score_sentiment():
+	"""Register several examples to align your function"""
+	
+	assert score_sentiment("I love you") == 10
+	assert score_sentiment("I hate you") == 0
+	assert score_sentiment("You're okay I guess") == 5
+
+# This is a normal test that can be invoked 
+def test_score_sentiment():
+	"""We can test the function as normal using Pytest or Unittest"""
+	assert score_sentiment("I like you") == 7
+```
+
+You can define standard Pydantic classes as your output, and can optionally add descriptions using _Field_ to help inform the purpose of each field.
+
+```python
+@dataclass
+class ActionItem:
+    goal: str = Field(description="What task must be completed")
+    deadline: datetime = Field(description="The date the goal needs to be achieved")
+    
+@tanuki.patch
+def action_items(input: str) -> List[ActionItem]:
+	"""Generate a list of Action Items"""
+
+@tanuki.align
+def align_action_items():
+	goal = "Can you please get the presentation to me by Tuesday?"
+	next_tuesday = (datetime.now() + timedelta((1 - datetime.now().weekday() + 7) % 7)).replace(hour=0, minute=0, second=0, microsecond=0)
+	
+	assert action_items(goal) == ActionItem(goal="Prepare the presentation", deadline=next_tuesday)
+```
+
+## Test-Driven Alignment
+
+To align the behaviour of your patched function to your needs, decorate a function with `@align`.
+
+```python
+def test_score_sentiment():
+	"""We can test the function as normal using Pytest or Unittest"""
+	assert score_sentiment("I like you") == 7
+```
+
+You assert the behaviour of your patched function either declaring the desired output through equality or inequality checking, or the downstream behaviour of consequent functions:
+
+
+(HOW CAN WE ACHIEVE THIS?)
+```python
+def test_score_sentiment():
+	"""We can test the function as normal using Pytest or Unittest"""
+	assert multiply_by_two(score_sentiment("I like you")) == 14
+	assert 2*score_sentiment("I like you") == 14
+```
+
+
+
+
+## Simple ToDo List App
+
+```
+from datetime import datetime
+from typing import Optional, List
+from pydantic import Field
+from fastapi import FastAPI
+import munk
+
+app = FastAPI()
+
+@dataclass
+class TodoItem:
+    goal: str = Field(description="What task must be completed")
+    deadline: datetime = Field(description="The date the goal needs to be achieved")
+    priority: str = Field(description="Priority level of the task")
+    people_involved: List[str] = Field(description="Names of people involved")
+
+
+@tanuki.func
+def generate_todo(input: str) -> TodoItem:
+    """
+    Generate a TodoItem based on the natural language input.
+    """
+
+@tanuki.align
+def align_generate_todo():
+    next_tuesday = (datetime.now() + timedelta((1 - datetime.now().weekday() + 7) % 7)).replace(hour=0, minute=0, second=0, microsecond=0)
+    next_friday = (datetime.now() + timedelta((4 - datetime.now().weekday() + 7) % 7)).replace(hour=0, minute=0, second=0, microsecond=0)
+
+    # First example
+    assert generate_todo("Prepare the presentation for John by next Tuesday, high priority") == TodoItem(
+        goal="Prepare the presentation",
+        deadline=next_tuesday,
+        priority="high",
+        people_involved=["John"]
+    )
+
+    # Second example: Different priority and deadline
+    assert generate_todo("Complete the report by Friday, medium priority") == TodoItem(
+        goal="Complete the report",
+        deadline=next_friday,
+        priority="medium",
+        people_involved=[]
+    )
+
+    # Third example: Multiple people involved
+    assert generate_todo("Organize the team meeting with Emily and Sarah for next Tuesday") == TodoItem(
+        goal="Organize the team meeting",
+        deadline=next_tuesday,
+        priority="",
+        people_involved=["Emily", "Sarah"]
+    )
+
+    # Fourth example: No deadline
+    assert generate_todo("Buy groceries, low priority") == TodoItem(
+        goal="Buy groceries",
+        deadline=None,
+        priority="low",
+        people_involved=[]
+    )
+
+    # Fifth example: No priority or people involved
+    assert generate_todo("Read the new book") == TodoItem(
+        goal="Read the new book",
+        deadline=None,
+        priority="",
+        people_involved=[]
+    )
+
+@app.post("/todo/", response_model=TodoItem)
+async def create_todo(input: str):
+    return generate_todo(input)
+
+```
diff --git a/src/tanuki.py.egg-info/SOURCES.txt b/src/tanuki.py.egg-info/SOURCES.txt
new file mode 100644
index 0000000..a659d1f
--- /dev/null
+++ b/src/tanuki.py.egg-info/SOURCES.txt
@@ -0,0 +1,60 @@
+.gitignore
+LICENSE.txt
+pyproject.toml
+readme.md
+requirements.txt
+setup.cfg
+.github/workflows/publish.yml
+.idea/.gitignore
+.idea/misc.xml
+.idea/modules.xml
+.idea/monkeyFunctions.iml
+.idea/vcs.xml
+.idea/inspectionProfiles/Project_Default.xml
+.idea/inspectionProfiles/profiles_settings.xml
+examples/clean_language/main.py
+examples/score_sentiment/main.py
+examples/semantic_sql/main.py
+examples/stock_winners/main.py
+examples/todolist/backend/main.py
+examples/todolist/backend/requirements.txt
+examples/todolist/backend/start.sh
+examples/todolist/backend/todo_item.py
+examples/todolist/backend/__pycache__/main.cpython-39.pyc
+examples/todolist/backend/__pycache__/todo_item.cpython-311.pyc
+examples/todolist/frontend/src.js
+src/tanuki/__init__.py
+src/tanuki/assertion_visitor.py
+src/tanuki/bloom_filter.py
+src/tanuki/function_modeler.py
+src/tanuki/register.py
+src/tanuki/repair.py
+src/tanuki/utils.py
+src/tanuki/validator.py
+src/tanuki.py.egg-info/PKG-INFO
+src/tanuki.py.egg-info/SOURCES.txt
+src/tanuki.py.egg-info/dependency_links.txt
+src/tanuki.py.egg-info/requires.txt
+src/tanuki.py.egg-info/top_level.txt
+src/tanuki/models/__init__.py
+src/tanuki/models/function_description.py
+src/tanuki/models/function_example.py
+src/tanuki/models/language_model_output.py
+tests/test_bloom_filter.py
+tests/test_hash.py
+tests/test_token_counter.py
+tests/test_align/test_align_class.py
+tests/test_align/test_align_global.py
+tests/test_align/.align/b01fbd89d35b7eef4015fe0c6e363969
+tests/test_align/.align/c364f14f40084510581a3418aaab4037
+tests/test_assertion_visitor/test_mock.py
+tests/test_load/test_load_trackers.py
+tests/test_patch/test_classification.py
+tests/test_patch/test_finance.py
+tests/test_patch/test_regression.py
+tests/test_validator/test_instantiate.py
+tests/test_validator/test_is_base_type.py
+tests/test_validator/test_validate_base_type.py
+tests/test_validator/test_validate_output.py
+tests/test_validator/test_validate_value.py
+tests/test_validator/test_validator.py
\ No newline at end of file
diff --git a/src/tanuki.py.egg-info/dependency_links.txt b/src/tanuki.py.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/tanuki.py.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/src/tanuki.py.egg-info/requires.txt b/src/tanuki.py.egg-info/requires.txt
new file mode 100644
index 0000000..e192fdf
--- /dev/null
+++ b/src/tanuki.py.egg-info/requires.txt
@@ -0,0 +1,7 @@
+pydantic>=1.8.2
+appdirs~=1.4.4
+openai==0.28.1
+numpy~=1.24.4
+python-dotenv==1.0.0
+bitarray==2.8.2
+pydantic==2.4.2
diff --git a/src/tanuki.py.egg-info/top_level.txt b/src/tanuki.py.egg-info/top_level.txt
new file mode 100644
index 0000000..d9bd6db
--- /dev/null
+++ b/src/tanuki.py.egg-info/top_level.txt
@@ -0,0 +1 @@
+tanuki