-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathflask_app.py
82 lines (71 loc) · 4.76 KB
/
flask_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# doing necessary imports
from flask import Flask, render_template, request,jsonify
# from flask_cors import CORS,cross_origin
import requests
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen as uReq
import pymongo
app = Flask(__name__) # initialising the flask app with the name 'app'
# base url + /
#http://localhost:8000 + /
@app.route('/',methods=['POST','GET']) # route with allowed methods as POST and GET
def index():
if request.method == 'POST':
searchString = request.form['content'].replace(" ","") # obtaining the search string entered in the form
try:
dbConn = pymongo.MongoClient("mongodb://localhost:27017/") # opening a connection to Mongo
db = dbConn['crawlerDB'] # connecting to the database called crawlerDB
reviews = db[searchString].find({}) # searching the collection with the name same as the keyword
if reviews.count() > 0: # if there is a collection with searched keyword and it has records in it
return render_template('results.html',reviews=reviews) # show the results to user
else:
flipkart_url = "https://www.flipkart.com/search?q=" + searchString # preparing the URL to search the product on flipkart
uClient = uReq(flipkart_url) # requesting the webpage from the internet
flipkartPage = uClient.read() # reading the webpage
uClient.close() # closing the connection to the web server
flipkart_html = bs(flipkartPage, "html.parser") # parsing the webpage as HTML
bigboxes = flipkart_html.findAll("div", {"class": "bhgxx2 col-12-12"}) # seacrhing for appropriate tag to redirect to the product link
del bigboxes[0:3] # the first 3 members of the list do not contain relevant information, hence deleting them.
box = bigboxes[0] # taking the first iteration (for demo)
productLink = "https://www.flipkart.com" + box.div.div.div.a['href'] # extracting the actual product link
prodRes = requests.get(productLink) # getting the product page from server
prod_html = bs(prodRes.text, "html.parser") # parsing the product page as HTML
commentboxes = prod_html.find_all('div', {'class': "_3nrCtb"}) # finding the HTML section containing the customer comments
table = db[searchString] # creating a collection with the same name as search string. Tables and Collections are analogous.
#filename = searchString+".csv" # filename to save the details
#fw = open(filename, "w") # creating a local file to save the details
#headers = "Product, Customer Name, Rating, Heading, Comment \n" # providing the heading of the columns
#fw.write(headers) # writing first the headers to file
reviews = [] # initializing an empty list for reviews
# iterating over the comment section to get the details of customer and their comments
for commentbox in commentboxes:
try:
name = commentbox.div.div.find_all('p', {'class': '_3LYOAd _3sxSiS'})[0].text
except:
name = 'No Name'
try:
rating = commentbox.div.div.div.div.text
except:
rating = 'No Rating'
try:
commentHead = commentbox.div.div.div.p.text
except:
commentHead = 'No Comment Heading'
try:
comtag = commentbox.div.div.find_all('div', {'class': ''})
custComment = comtag[0].div.text
except:
custComment = 'No Customer Comment'
#fw.write(searchString+","+name.replace(",", ":")+","+rating + "," + commentHead.replace(",", ":") + "," + custComment.replace(",", ":") + "\n")
mydict = {"Product": searchString, "Name": name, "Rating": rating, "CommentHead": commentHead,
"Comment": custComment} # saving that detail to a dictionary
x = table.insert_one(mydict) #insertig the dictionary containing the rview comments to the collection
reviews.append(mydict) # appending the comments to the review list
return render_template('results.html', reviews=reviews) # showing the review to the user
except:
return 'something is wrong'
#return render_template('results.html')
else:
return render_template('index.html')
if __name__ == "__main__":
app.run(port=8000,debug=True) # running the app on the local machine on port 8000