-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
46 lines (35 loc) · 1.32 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# -*- coding: utf-8 -*-
import requests
import json
import codecs
import dumper
RESULT_FILE="./gg.html"
def grabDataFromPage(urlToGrab,storage):
global f
r=requests.get(urlToGrab)
r.encoding="UTF-8"
dumper.extractPageData(r.text,storage)
def main():
startUrl="http://goodgame.ru/video/"
page='http://goodgame.ru/video/page/%s/'
maxPages=dumper.extractMaxPages(requests.get(startUrl).text)
crawledData={}
for i in range(1,int(maxPages)+1):
# for i in range(1,5):
print "Grabbing %s page"%i
grabDataFromPage(page%i,crawledData)
f=codecs.open(RESULT_FILE,"w+","UTF-8")
f.write('<head><meta charset="UTF-8"></head>')
streamers=crawledData.keys()
for s in sorted(streamers):
count=len(crawledData[s]["streams"])
f.write("<a href='#%s'>%s %s</a><br>"%(s,s,count))
for k in streamers:
f.write("<a name='%s'></a>"%k)
f.write("<h1>%s</h1>"%k)
for stream in crawledData[k]["streams"]:
f.write("<br>")
f.write("<a href='%s' target='_blank'>%s</a>"%(stream["url"],stream["title"]))
f.close()
if __name__=="__main__":
main()