-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgoogle_shopping_crawler.py
41 lines (36 loc) · 1.13 KB
/
google_shopping_crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from download_pics import MyThread
from download_pics import download_pics
from analyse_web_pages import WebPageAnalysis
from Queue import Queue
from random import randint
from time import sleep, ctime
import threading
import os
def main():
print 'start:', ctime()
search_class = ['shoes']
max_download_threads = 20
root = 'z:\\'
threads = []
queue = Queue()
for item in search_class:
root_path = ''.join((root, item))
if not os.path.exists(root_path):
os.mkdir(root_path)
url = 'http://www.google.co.uk/search?q=%s&tbm=shop&ei=HAGYUujUEIfvkQWPTw&ved=0CAMQyBAoAQ&pshpl=1&pshplp=2&num=10' % item
t = threading.Thread(target=WebPageAnalysis(url, queue))
threads.append(t)
for i in xrange(0,max_download_threads):
t = MyThread(download_pics, (root_path+'\\', queue), 'download_pics')
threads.append(t)
threads[0].start()
for i in threads[1:]:
i.setDaemon(True)
i.start()
threads[0].join()
while not queue.empty():
pass
sleep(10)
print 'end:', ctime()
if __name__ == '__main__':
main()