-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquick_check_albums.py
113 lines (99 loc) · 3.85 KB
/
quick_check_albums.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
import glob
import argparse
import os
import numpy as np
import webbrowser
def simple_get(url):
'''
Attempts to get the content at `url` by making an HTTP GET request.
If the content-type of response is some kind of HTML/XML, return the
text content, otherwise return None.
'''
try:
with closing(get(url, stream=True)) as resp:
if is_good_response(resp):
return resp.content
else:
return None
except RequestException as e:
print('Error during requests to {0} : {1}'.format(url, str(e)))
return None
def is_good_response(resp):
'''
Returns True if the response seems to be HTML, False otherwise.
'''
content_type = resp.headers['Content-Type'].lower()
return (resp.status_code == 200
and content_type is not None
and content_type.find('html') > -1)
def main():
web_albums = [] #web albums
web_size = [] #web album size
ext_albums = [] #extra albums
dl_albums = [] #local albums
dl_size = [] #local album size
ndl_albums = [] #albums that are not downloaded yet
webbrowser.open('https://photos.google.com/albums')
input('''The opened web site should be saved as a HTML only web page to the
folder of this repo. When finished, press <Enter> to continue...''')
print('\nA list of all web albums:')
raw_html = open('Albums - Google Photos.html')
html = BeautifulSoup(raw_html, 'html.parser')
walbums = html.findAll("div", {"jsname": "apz4bc"})
wsize = html.findAll("div", {"class": "UV4Xae"})
for album, ws in zip(walbums, wsize):
web_albums.append(album.text)
web_size.append([int(s) for s in ws.text.split() if s.isdigit()][0])
print(album.text+' ('+ws.text+')')
print('\nA list of local albums:')
albums = sorted(glob.glob(args.dir+'/*/'),reverse=True)
for album in albums:
dl_album = album.split(os.sep)[-2]
dl_albums.append(dl_album)
ds = len(glob.glob(album+'*.jpg'))+len(glob.glob(album+'*.JPG'))+\
len(glob.glob(album+'*.jpeg'))+len(glob.glob(album+'*.JPEG'))+\
len(glob.glob(album+'*.png'))+len(glob.glob(album+'*.PNG'))+\
len(glob.glob(album+'*.avi'))+len(glob.glob(album+'*.AVI'))+\
len(glob.glob(album+'*.mov'))+len(glob.glob(album+'*.MOV'))+\
len(glob.glob(album+'*.mp4'))+len(glob.glob(album+'*.MP4'))+\
len(glob.glob(album+'*.wmv'))+len(glob.glob(album+'*.WMV'))
dl_size.append(ds)
print(dl_album+' ('+str(ds)+')')
print('\nA list of albums that were not downloaded yet:')
ndl_albums = np.setdiff1d(web_albums, dl_albums)
ndl_albums = ndl_albums.tolist()
for ndl_album in ndl_albums:
print(ndl_album)
print('\nA list of downladed extra albums (shared albums, deleted web albums, etc.):')
ext_albums = np.setdiff1d(dl_albums, web_albums)
ext_albums = ext_albums.tolist()
for ext_album in ext_albums:
print(ext_album)
print('\nA list of albums with different number of photos/videos:')
for idx, album in enumerate(dl_albums):
if album in web_albums:
ws = web_size[web_albums.index(album)]
ds = dl_size[idx]
if ws != ds:
print(album+' (local: '+str(ds)+', web: '+str(ws)+')')
print('\nStats:')
print(' - no. of web albums: '+str(len(web_albums)))
print(' - no. of downloaded albums: '+str(len(dl_albums)))
print(' - no. of albums that exist only on the server side: '+str(len(ndl_albums)))
print(' - no. of albums that exist only on the local side: '+str(len(ext_albums)))
if __name__ == '__main__':
ap = argparse.ArgumentParser()
ap.add_argument(
'-d','--dir',
type = str,
default = os.path.expanduser('~/Pictures'),
help = '''
Albums directory.
'''
)
args = ap.parse_args()
main()