forked from shadowfax92/Flipkart-Big-Billion-Day-parser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathflipkart_parser.py
144 lines (126 loc) · 4.79 KB
/
flipkart_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# Author = Nikhil Venkat Sonti
# email = [email protected]
# github ID = shadowfax92
import sys
from xml.dom.minidom import _get_StringIO
from lxml import html
import requests
import os
import re
import time
import datetime
import csv
import urllib2
from StringIO import StringIO
import multiprocessing as mp
import socket
import shutil
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
import subprocess
import time
new_item_dict = {}
def play_alert():
# play alert on Mac using say command
os.system('say "ring ring ring"')
def mail_me(subject, content):
msg = MIMEMultipart('alternative')
msgbody = MIMEText(content)
msg["From"] = "[email protected]"
msg["To"] = "[email protected]"
msg["Subject"] = subject
msg.attach(msgbody)
p = subprocess.Popen(["/usr/sbin/sendmail", "-t"], stdin=subprocess.PIPE,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
(out,err) = p.communicate(msg.as_string())
print 'Mailed = ' + content
def get_content_from_url_and_store():
try:
url = 'http://www.flipkart.com/'
page = requests.get("http://www.flipkart.com/")
tree = html.parse(StringIO(page.text)).getroot()
os.system('clear')
print '\nRUNNING FLIPKART PARSER'
for i in range(1,8):
try:
mail_content = ""
mail_subject = ""
name_1 = ""
name_2 = ""
print
xpath_1 = '/html/body/div[1]/div[3]/div/div[9]/div/div[1]/div['+str(i)+']/a/div[4]/text()'
xpath_2 = '/html/body/div[1]/div[3]/div/div[9]/div/div[1]/div['+str(i)+']/a/div[2]/div/text()'
xpath_3 = '/html/body/div[1]/div[3]/div/div[9]/div/div[1]/div['+str(i)+']/a/div[1]/text()'
xpath_4 = '/html/body/div[1]/div[3]/div/div[9]/div/div[1]/div['+str(i)+']/div/div[2]/text()'
xpath_5_link = '/html/body/div[1]/div[3]/div/div[9]/div/div[1]/div['+str(i)+']/a/div[5]/div[2]/div'
#sold out check
try:
name_4 = tree.xpath(xpath_4)[0].strip(' \t\n\r')
print "sold-out or not = " + name_4
except Exception, e:
pass
finally:
pass
try:
name_1 = tree.xpath(xpath_1)[0].strip(' \t\n\r')
print "name-1 = " + name_1
if re.search(r'(disk|hard|seagate|external|headphone|headset|phone)', name_1, re.IGNORECASE):
play_alert()
# mail_subject += name_1 + " "
# mail_subject += str(name_1) + " "
except Exception, e:
pass
finally:
pass
try:
name_2 = tree.xpath(xpath_2)[0].strip(' \t\n\r')
print "name-2 = " + name_2
if re.search(r'(disk|hard|seagate|external|headphone|headset|phone)', name_2, re.IGNORECASE):
play_alert()
# mail_subject += str(name_2) + " "
except Exception, e:
pass
finally:
pass
try:
name_3 = tree.xpath(xpath_3)[0].strip(' \t\n\r')
print "offer percentage/price = " + name_3
except Exception, e:
pass
finally:
pass
try:
name_5 = tree.xpath(xpath_5_link)[0].get('data-url')
link = 'http://www.flipkart.com'+name_5
print "view/shop link = " + link
# print "view/shop link = " + name_5[0].strip(' \t\n\r')
except Exception, e:
pass
finally:
pass
mail_subject = str(name_1) + " " + str(name_2)
if mail_subject not in new_item_dict:
mail_content += str(name_1) + "\n"
mail_content += str(name_2) + "\n"
mail_content += str(name_4) + "\n"
mail_content += str(name_3) + "\n"
mail_content += str(link) + "\n"
new_item_dict[mail_subject] = mail_content
mail_me(mail_subject, mail_content)
play_alert()
except Exception, e:
print str(e)
finally:
pass
except Exception, e:
print 'Something Went Wrong :('
print 'Exception: ', str(e)
pass
finally:
pass
def main():
while True:
get_content_from_url_and_store()
time.sleep(5)
if __name__ == '__main__':
main()