-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmadeon_scraper.py
141 lines (111 loc) · 4.65 KB
/
madeon_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python3
"""Scrapes audio files from Madeon's awesome Adventure Machine"""
import argparse
import logging
import os
import requests
import shutil
import sys
import threading
class MadeonScraper(object):
def __init__(self):
self.logger = logging.getLogger()
self.total_scrape_attempts = 0
self.config = {
"instruments": {
"bass": 11,
"drum": 11,
"sounds": 16
},
"url_format": "http://madeonwmas.s3-eu-west-1.amazonaws.com/assets/audio/{}.1.{}.ogg"
}
def start_scraper(self):
self.logger.info("Scraping...")
self._ensure_local_directory_exists()
for instrument in self.config["instruments"]:
file_range = self.config["instruments"][instrument]
for sample in range(1, file_range):
destination_path = self._get_file_destination(instrument, sample)
if not os.path.isfile(destination_path):
res = self._make_request(instrument, sample)
else:
self.logger.info("%s already exists", destination_path[24:])
continue
try:
if res.raise_for_status() is None:
self._write_file(res.raw, destination_path)
else:
self.logger.debug("URL doesn't exist - %s out of range",
destination_path[6:])
except Exception as e:
self.logger.error("Received unexpected exception", repr(e))
self._cleanup()
def _make_request(self, instrument, n):
full_url = self._get_full_url(instrument, n)
self.logger.debug("Fetching %s", full_url)
return requests.get(full_url, stream=True)
def _get_full_url(self, instrument, n):
url = self.config["url_format"]
return url.format(instrument, n)
def _ensure_local_directory_exists(self):
relative_path = "AdventureMachineSamples/"
os.makedirs(relative_path, exist_ok=True)
def _write_file(self, raw_response, destination_path):
self.logger.info("Writing %s to %s",
destination_path[24:], destination_path[:24])
with open(destination_path, "wb") as f:
raw_response.decode_content = True
shutil.copyfileobj(raw_response, f)
def _get_file_destination(self, instrument, n):
return "AdventureMachineSamples/{}.1.{}.ogg".format(instrument, n)
def _cleanup(self):
failed_downloads = []
for instrument in self.config["instruments"]:
file_range = self.config["instruments"][instrument]
for sample in range(1, file_range):
expected_destination = self._get_file_destination(
instrument, sample)
if not os.path.exists(expected_destination):
failed_downloads.append(expected_destination)
if not failed_downloads:
self.logger.info("All files successfully retrieved")
elif self.total_scrape_attempts < 2:
self.logger.warn("Attempting to redownload...")
self.total_scrape_attempts += 1
self.start_scraper()
else:
self.logger.error("Failed to fetch {}".format(failed_downloads))
def main(args):
log_fmt = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
if args.debug:
level = logging.DEBUG
else:
level = logging.INFO
# sets requests and urllib3 loggers to warning, useful for --debug
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.basicConfig(level=level, format=log_fmt)
logger = logging.getLogger()
# Turns off logging
if args.no_log:
logger.disabled = True
try:
ms = MadeonScraper()
thread = threading.Thread(target=ms.start_scraper())
thread.start()
except (KeyboardInterrupt, IOError, Exception) as e:
exception_handler(e, logger)
def exception_handler(e, logger):
# newline makes logger more obvious
print("\n")
logger.error("%s raised - Exiting...", e.__class__.__name__)
sys.exit(0)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description="Scrapes audio from Madeon's Adventure Machine")
parser.add_argument("--debug", action="store_true",
help="Sets logging to debug")
parser.add_argument("--no-log", action="store_true",
help="Turns off all logging")
args = parser.parse_args()
main(args)