-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspider_api.py
111 lines (103 loc) · 3.02 KB
/
spider_api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python
# coding: utf-8
import urllib2
import ConfigParser
import json
import time
import random
import pymongo
import fake_useragent
def get_json(url):
print 'Fetching:', url
while True:
time.sleep(1)
try:
request = urllib2.Request(url)
page = urllib2.urlopen(request)
except Exception as e:
print e
else:
try:
j = json.loads(page.read())
except Exception as e:
print e
else:
if j.has_key('lastModified'):
return j
elif j.has_key('reason'):
print 'Error: %s' % j['reason'],
if j['reason'].find(' not found.') >= 0:
print ''
return None
elif j == {}:
print 'Error: empty json.'
return None
else:
print 'Unkown error.',
print 'Try again.'
time.sleep(random.random()*5)
class spider:
def __init__(self):
print 'Reading configurations...',
config = ConfigParser.ConfigParser()
config.read(["spider.default.cfg", "spider.cfg"])
self.config = config
print 'Done.'
print 'Preparing fake_useragent...',
_ua = fake_useragent.UserAgent()
print 'Done.'
def spide(self):
c = self.config
print 'Connecting to mongodb at %s on %s ...' % (c.get('mongodb', 'domain'), c.getint('mongodb', 'port')),
try:
mongodb = pymongo.MongoClient(c.get('mongodb', 'domain'), c.getint('mongodb', 'port'))
except Exception as e:
print 'Failed.'
print e
return
else:
print 'Done.'
print 'Using db %s, collection %s....' % (c.get('mongodb', 'db'), c.get('mongodb', 'collection')),
try:
dbc = mongodb[c.get('mongodb', 'db')][c.get('mongodb', 'collection')]
except Exception as e:
print 'Failed.'
print e
return
else:
print 'Done.'
domain = c.get('source', 'domain')
realm = c.get('source', 'realm').replace(' ', '%20')
guild = c.get('source', 'guild').replace(' ', '%20')
protocol = c.get('source', 'protocol') + '://'
guild_url = '%s%s/api/wow/guild/%s/%s?fields=members' %\
(protocol, domain, realm, guild)
gj = get_json(guild_url)
for person in gj['members']:
i = dbc.find({'name':person['character']['name'].encode('utf-8')})
if i.count() > 0:
continue
info = {}
character = person['character']
info['name'] = character['name'].encode('utf-8')
info['race'] = character['race']
info['class'] = character['class']
info['gender'] = character['gender']
info['level'] = character['level']
info['realm'] = character['guildRealm'].encode('utf-8')
info['guild'] = character['guild'].encode('utf-8')
if info['level'] >= c.getint('default', 'max_level'):
chara_url = '%s%s/api/wow/character/%s/%s?fields=items,professions,pvp' %\
(protocol, domain, realm, info['name'])
cj = get_json(chara_url)
if cj != None:
info['item_level'] = cj['items']['averageItemLevel']
prof = []
for i in cj['professions']['primary']:
prof.append(i['name'].encode('utf-8'))
info['profession'] = prof
info['personal_rating'] = cj['pvp']["brackets"]["ARENA_BRACKET_RBG"]["rating"]
dbc.insert(info)
if __name__ == '__main__':
s = spider()
s.spide()