Skip to content

Commit

Permalink
Added headers to better scrape
Browse files Browse the repository at this point in the history
  • Loading branch information
jar2333 committed May 2, 2024
1 parent c53da06 commit ba8fae4
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
2 changes: 1 addition & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,4 @@
@cache.cached(timeout=86400)
def get_books():
results = SCRAPER.scrape()
return render_template("books.html", results), 200
return render_template("books.html", results=results), 200
10 changes: 9 additions & 1 deletion src/book_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,15 @@ def scrape(self) -> List[Dict[str, str]]:

def request(self) -> str:
try:
response = requests.get(self.url)
response = requests.get(self.url, headers={
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding': 'gzip, deflate, br, zstd',
'Accept-Language': 'en-US',
'Connection': 'keep-alive',
'Host': 'www.goodreads.com',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 OPR/109.0.0.0',
})
response.raise_for_status()
except Exception as err:
raise err
Expand Down

0 comments on commit ba8fae4

Please sign in to comment.