-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmedia_downloader.py
86 lines (72 loc) · 2.98 KB
/
media_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from urllib import request
import pathlib
from urllib.error import ContentTooShortError, HTTPError
import requests
import sys
import re
import json
import os
PHOTO_RE = r'https:\/\/pbs\.twimg\.com\/media\/([a-zA-Z0-9-_]*)\.(jpg|png)'
RETRY = 5
def video(url: str, source_video_id: str, download_dir: str) -> bool:
if not pathlib.Path(download_dir).exists():
return False
download_path = pathlib.Path(download_dir, '%s.mp4' % source_video_id)
if download_path.exists():
return False
try:
request.urlretrieve(url, str(download_path))
except HTTPError as e:
# print('[x] failed to download video (%s) to (%s) because a http exception happened: %d %s' % (source_video_id, str(download_path), e.code, e.reason))
return False
except ContentTooShortError:
# @todo add retry
return False
except:
print("[x] failed to download video (%s) to (%s) because an exception happened: %s" % (source_video_id, str(download_path), sys.exc_info()[0]))
return False
#print('[√] finished downloading video %s' % url)
return True
def _photo(url: str, path: str, retry: int) -> bool:
if retry > RETRY:
return False
try:
res = requests.get(url)
if res.status_code != 200:
return False
if len(res.content) == 0:
print('[x] failed to download photo (%s) because photo size == 0 (retry %d)' % (url, retry))
return _photo(url, path, retry + 1)
with open(path, 'wb') as file:
file.write(res.content)
except requests.exceptions.ConnectTimeout:
print('[x] failed to download photo (%s) because time out (retry %d)' % (url, retry))
return _photo(url, path, retry + 1)
except HTTPError as e:
print('[x] failed to download photo (%s) because a http exception happened: %d %s' % (url, e.code, e.reason))
return False
except:
print('[x] failed to download photo (%s) because an unexpected exception happened: %s' % (url, sys.exc_info()[0]))
return False
return True
def tweet_photo(url: str, download_dir: str) -> bool:
if not pathlib.Path(download_dir).exists():
return False
x = re.search(PHOTO_RE, url)
if x is None:
print('[!] unknown url %s found for photo' % url)
return False
# we use the image name because there can be multiple images for one tweet
image_path = x.group(1)
download_path = pathlib.Path(download_dir, '%s.jpg' % image_path)
if download_path.exists():
if download_path.stat().st_size > 1000:
return False
os.remove(str(download_path))
return _photo(url, str(download_path), 0)
def avatar_photo(url: str, download_location: str) -> bool:
path = str(pathlib.Path(download_location, 'avatar.jpg'))
return _photo(url, path, 0)
def banner_photo(url: str, download_location: str) -> bool:
path = str(pathlib.Path(download_location, 'banner.jpg'))
return _photo(url, path, 0)