-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathbillboard_scrape.py
92 lines (77 loc) · 2.3 KB
/
billboard_scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import billboard
import time
import pickle
import datetime
chartname = 'hot-100'
DATE_FMT = '%Y-%m-%d'
SLEEPYTIME = 1
class SongDB(object):
def __init__(self, path):
self.path = path
try:
f = open(path)
self.db = pickle.load(f)
except IOError:
self.db = {}
def add_song(self, song, date):
if song.artist not in self.db:
self.db[song.artist] = {}
artist_songs = self.db[song.artist]
if song.title in artist_songs:
extant = artist_songs[song.title]
# Not strictly necessary if we're guaranteed to always
# iterate in reverse chrono order, but doesn't hurt to
# be safe.
extant.weeks = max(extant.weeks, song.weeks)
extant.peakPos = min(extant.peakPos, song.peakPos)
extant.earliest = min(extant.earliest, date)
else:
song.earliest = date
artist_songs[song.title] = song
def save(self):
with open(self.path, 'w') as f:
pickle.dump(self.db, f)
def size(self):
n = 0
for artist_songs in self.db.itervalues():
n += len(artist_songs)
return n
path = chartname + '.pickle'
chartpicklename = chartname + '-chartdata.pickle'
try:
with open(chartpicklename) as f:
charts = pickle.load(f)
date = charts[-1].previousDate
except IOError:
charts = []
date = None
db = SongDB(path)
i = 0
lim = float('inf')
# TODO: load pickled charts
try:
while 1:
chart = billboard.ChartData(chartname, date)
dt = datetime.datetime.strptime(chart.date, DATE_FMT).date()
for song in chart:
db.add_song(song, dt)
charts.append(chart)
time.sleep(SLEEPYTIME)
i += 1
if not chart.previousDate or i >= lim:
break
if (i % 26) == 0:
print date
date = chart.previousDate
except: # gotta catchemall
if i == 0:
raise
print "Uh oh. Got unexpected exception. Saving whatever we've accumulated before bailing"
db.save()
with open(chartpicklename, 'w') as f:
pickle.dump(charts, f)
raise
db.save()
print "Saved db with {} songs to {}".format(db.size(), path)
with open(chartpicklename, 'w') as f:
pickle.dump(charts, f)