Skip to content

Commit

Permalink
Merge pull request #5 from y-popov/query-jsons
Browse files Browse the repository at this point in the history
Modifications for new search
  • Loading branch information
y-popov authored Jan 11, 2024
2 parents 60dadb2 + ad6d5a1 commit 6ce48e3
Show file tree
Hide file tree
Showing 11 changed files with 118 additions and 35 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/terraform.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ jobs:
id: plan
if: github.event_name == 'pull_request'
run: terraform plan -no-color
env:
TF_VAR_PROJECT: ${{ secrets.TF_VAR_PROJECT }}
continue-on-error: true
- name: Update Pull Request
uses: actions/[email protected]
Expand Down Expand Up @@ -69,3 +71,5 @@ jobs:
- name: Terraform Apply
if: github.ref == 'refs/heads/master' && github.event_name == 'push'
run: terraform apply -auto-approve
env:
TF_VAR_PROJECT: ${{ secrets.TF_VAR_PROJECT }}
6 changes: 6 additions & 0 deletions assets/accommodation_query.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"ddl_suitable": "1",
"ddl_bedrooms": "1",
"txt_rent": 1200,
"ddl_Furniture": "0"
}
8 changes: 8 additions & 0 deletions assets/rightmove_query.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"minBedrooms": 1,
"maxPrice": 1200,
"dontShow": "houseShare,student,retirement",
"locationIdentifier": "REGION^21561",
"radius": 10,
"furnish_types": ["partFurnished,furnished,unfurnished"]
}
11 changes: 11 additions & 0 deletions assets/zoopla_query.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"price_frequency": "per_month",
"beds_min": 1,
"price_max": 1200,
"available_from": "6months",
"is_shared_accommodation": false,
"is_retirement_home": false,
"furnished_state": ["furnished", "part_furnished"],
"location": "cambridgeshire/cambridge",
"radius": 5
}
5 changes: 2 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,15 @@
logger = logging.getLogger()
logger.setLevel(logging.INFO)

gs = storage.Client()


def main():
gs = storage.Client()
bucket = check_s3(gs=gs)

apis = {
'accommodation': AccommodationApi(),
'rightmove': RightmoveApi(),
'zoopla': ZooplaApi()
# 'zoopla': ZooplaApi()
}

apis['accommodation'].login(login=os.getenv('ACCOMMODATION_LOGIN'), password=os.getenv('ACCOMMODATION_PASSWORD'))
Expand Down
10 changes: 6 additions & 4 deletions main.tf
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
locals {
project = "madproject-201813"
variable "PROJECT" {
type = string
nullable = false
description = "google cloud project name"
}

terraform {
Expand All @@ -15,7 +17,7 @@ terraform {
}

provider "google" {
project = local.project
project = var.PROJECT
region = "europe-west2"
}

Expand Down Expand Up @@ -50,7 +52,7 @@ resource "google_service_account" "function_invoker" {
}

resource "google_project_iam_binding" "function_invoker_role" {
project = local.project
project = var.PROJECT
role = "roles/cloudfunctions.invoker"

members = [
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
beautifulsoup4==4.7.1
google-cloud-storage==2.5.0
fake-useragent==1.1.3
Flask==2.1.1
functions-framework==3.2.0
requests==2.21.0
10 changes: 7 additions & 3 deletions src/accommodation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@

class AccommodationApi(BaseApi):
base_url = "https://www.accommodation.cam.ac.uk"
config_filename = 'accommodation_query.json'

def __init__(self):
self.session = requests.Session()
self.soup = None
super().__init__()

def send_post(self, url: str, data: Dict = None) -> requests.Response:
r = self.session.post(url, data=data)
Expand Down Expand Up @@ -81,8 +83,8 @@ def get_flats(self) -> Iterable[Flat]:
buttons = self.get_form_buttons()
search_button = {k: v for k, v in buttons.items() if k.endswith('btnSearch')}

search_date = date.today() + timedelta(weeks=4)
form['_ctl0:_ctl0:cpFull:cpFull:txt_start_date'] = search_date.strftime('%d %b %Y')
for key, value in self.config.items():
form[f'_ctl0:_ctl0:cpFull:cpFull:{key}'] = value

self.send_post(search_url, data={**viewstate, **form, **search_button})

Expand All @@ -101,7 +103,9 @@ def get_flats(self) -> Iterable[Flat]:
url=f'{self.base_url}{prop_uri}',
price=rent_price[1:],
available=prop_available,
type=prop_type
_type=prop_type,
_furnished=None,
title=None
)
logging.debug(f'Found {flat}')
yield flat
31 changes: 28 additions & 3 deletions src/api.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Iterable
import json
import os.path
from typing import Iterable, Dict, Any
from abc import abstractmethod
from dataclasses import dataclass

Expand All @@ -9,23 +11,46 @@ class Flat:
url: str
price: str
available: str
type: str

_type: str
_furnished: str
title: str

@property
def label(self) -> str:
return f'{self.type} for {self.price} available from {self.available}'

@property
def type(self):
if self._type is None:
self._type = f'{self.furnished} {self.title}'
return self._type

def get_date(self):
pass

@property
def furnished(self):
return self._furnished


class BaseApi:
def __init__(self):
self.config = self.load_config()

@property
@abstractmethod
def base_url(self) -> str:
return 'https://www.example.co.uk'

@property
@abstractmethod
def config_filename(self) -> str:
return 'config.json'

@abstractmethod
def get_flats(self) -> Iterable[Flat]:
pass

def load_config(self) -> Dict[str, Any]:
with open(os.path.join('assets', self.config_filename)) as f:
return json.load(f)
42 changes: 31 additions & 11 deletions src/rightmove.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,50 @@
import requests
from typing import Iterable
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from src.api import BaseApi, Flat

user_agent = UserAgent()


class SmartFlat(Flat):
ua = user_agent
_data = None

@property
def data(self):
if self._data is None:
r = requests.get(self.url, headers={'User-Agent': self.ua.random})
self._data = BeautifulSoup(r.text, 'html.parser')
return self._data

def get_date(self):
r = requests.get(self.url)
s = BeautifulSoup(r.text, 'html.parser')
date_block = s.find("dt", text='Let available date: ')
date_block = self.data.find("dt", text='Let available date: ')
self.available = date_block.nextSibling.get_text()

@property
def furnished(self):
if self._furnished is None:
block = self.data.find("dt", text='Furnish type: ')
self._furnished = block.nextSibling.get_text()
return self._furnished


class RightmoveApi(BaseApi):
base_url = 'https://www.rightmove.co.uk'
config_filename = 'rightmove_query.json'

def get_flats(self) -> Iterable[SmartFlat]:
furnish_types = ('partFurnished,furnished', 'unfurnished')
for furnish_type in furnish_types:
for furnish_type in self.config['furnish_types']:
resp = requests.get(
url=f'{self.base_url}/property-to-rent/find.html',
params={
'minBedrooms': 1,
'maxPrice': 1400,
'dontShow': 'houseShare,student',
'minBedrooms': self.config['minBedrooms'],
'maxPrice': self.config['maxPrice'],
'dontShow': self.config['dontShow'],
'furnishTypes': furnish_type,
'locationIdentifier': 'REGION^274'
'locationIdentifier': self.config['locationIdentifier'],
'radius': self.config['radius']
}
)

Expand All @@ -45,13 +64,14 @@ def get_flats(self) -> Iterable[SmartFlat]:
if link == '':
continue

furnish_label = 'Unfurnished' if furnish_type.startswith('un') else 'Furnished'
flat = SmartFlat(
id=int(link.split('/')[-2]),
url=f"{self.base_url}{link}",
price=price,
available=None,
type=f'{furnish_label} {title}'
_furnished=None,
title=title,
_type=None
)

yield flat
25 changes: 14 additions & 11 deletions src/zoopla.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import requests
from typing import Iterable
from bs4 import BeautifulSoup
Expand All @@ -6,20 +7,21 @@

class ZooplaApi(BaseApi):
base_url = 'https://www.zoopla.co.uk'
config_filename = 'zoopla_query.json'

def get_flats(self) -> Iterable[Flat]:
furnished_states = ('furnished', 'part_furnished')
for furnished_state in furnished_states:
for furnished_state in self.config['furnished_state']:
r = requests.get(
url=f'{self.base_url}/to-rent/property/cambridge-city-centre/',
url=f'{self.base_url}/to-rent/property/{self.config["location"]}/',
params={
'price_frequency': 'per_month',
'beds_min': 1,
'price_max': 1400,
'available_from': '1months',
'include_shared_accommodation': False,
'price_frequency': self.config['price_frequency'],
'beds_min': self.config['beds_min'],
'price_max': self.config['price_max'],
'available_from': self.config['available_from'],
'is_shared_accommodation': json.dumps(self.config['is_shared_accommodation']),
'is_retirement_home': json.dumps(self.config['is_retirement_home']),
'furnished_state': furnished_state,
'radius': 1
'radius': self.config['radius']
}
)
s = BeautifulSoup(r.text, 'html.parser')
Expand All @@ -36,8 +38,9 @@ def get_flats(self) -> Iterable[Flat]:
url=f'{self.base_url}{link}',
price=price_block.findChild().get_text(),
available=date_block.get_text().lstrip(),
type=property_block.get_text()
_type=property_block.get_text(),
_furnished=None,
title=None
)

yield flat

0 comments on commit 6ce48e3

Please sign in to comment.