Added headers to better scrape

jar2333 · May 2, 2024 · ba8fae4 · ba8fae4
1 parent c53da06
commit ba8fae4
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 2 deletions.
diff --git a/app.py b/app.py
@@ -36,4 +36,4 @@
 @cache.cached(timeout=86400)
 def get_books():
     results = SCRAPER.scrape()
-    return render_template("books.html", results), 200
+    return render_template("books.html", results=results), 200
diff --git a/src/book_scraper.py b/src/book_scraper.py
@@ -19,7 +19,15 @@ def scrape(self) -> List[Dict[str, str]]:
 
     def request(self) -> str:
         try:
-            response = requests.get(self.url)
+            response = requests.get(self.url, headers={
+                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+                'Accept-Encoding': 'gzip, deflate, br, zstd',
+                'Accept-Language': 'en-US',
+                'Connection': 'keep-alive',
+                'Host': 'www.goodreads.com',
+                'Upgrade-Insecure-Requests': '1',
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 OPR/109.0.0.0',
+            })
             response.raise_for_status()
         except Exception as err:
             raise err