This repository has been archived by the owner on Oct 4, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 169
/
Copy pathGooglePhotosIndex.py
254 lines (228 loc) · 9.72 KB
/
GooglePhotosIndex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
import logging
from datetime import datetime
from pathlib import Path
from typing import Optional
from gphotos_sync import Utils
from gphotos_sync.GooglePhotosMedia import GooglePhotosMedia
from gphotos_sync.GooglePhotosRow import GooglePhotosRow
from gphotos_sync.LocalData import LocalData
from gphotos_sync.LocalFilesMedia import LocalFilesMedia
from gphotos_sync.restclient import RestClient
from gphotos_sync.Settings import Settings
log = logging.getLogger(__name__)
class GooglePhotosIndex(object):
PAGE_SIZE = 100
def __init__(
self, api: RestClient, root_folder: Path, db: LocalData, settings: Settings
):
self._api: RestClient = api
self._root_folder: Path = root_folder
self._db: LocalData = db
self.files_indexed: int = 0
self.files_index_skipped: int = 0
if db:
self.latest_download = self._db.get_scan_date() or Utils.MINIMUM_DATE
self.settings = settings
self.start_date: datetime = settings.start_date
self.end_date: datetime = settings.end_date
self.include_video: bool = settings.include_video
self.rescan: bool = settings.rescan
self.favourites = settings.favourites_only
self.case_insensitive_fs: bool = settings.case_insensitive_fs
self.archived: bool = settings.archived
self._use_flat_path: bool = settings.use_flat_path
self._media_folder: Path = settings.photos_path
def check_for_removed_in_folder(self, folder: Path):
for pth in folder.iterdir():
if pth.is_dir():
self.check_for_removed_in_folder(pth)
else:
local_path = pth.relative_to(self._root_folder).parent
if pth.match(".*") or pth.match("gphotos*"):
continue
file_row = self._db.get_file_by_path(
GooglePhotosRow, local_path, pth.name
)
if not file_row:
pth.unlink()
log.warning("%s deleted", pth)
def check_for_removed(self):
"""Removes local files that are no longer represented in the Photos
Library - presumably because they were deleted.
note for partial scans using date filters this is still OK because
for a file to exist it must have been indexed in a previous scan
"""
log.warning("Finding and removing deleted media ...")
self.check_for_removed_in_folder(self._root_folder / self._media_folder)
def write_media_index(self, media: GooglePhotosMedia, update: bool = True):
self._db.put_row(GooglePhotosRow.from_media(media), update)
if media.create_date > self.latest_download:
self.latest_download = media.create_date
def search_media(
self,
page_token: Optional[int] = None,
start_date: Optional[datetime] = None,
end_date: Optional[datetime] = None,
do_video: bool = False,
favourites: bool = False,
) -> dict:
class Y:
def __init__(self, y, m, d):
self.year = y
self.month = m
self.day = d
def to_dict(self):
return {"year": self.year, "month": self.month, "day": self.day}
start = Y(1900, 1, 1)
end = Y(3000, 1, 1)
type_list = ["ALL_MEDIA"]
if start_date:
start = Y(start_date.year, start_date.month, start_date.day)
if end_date:
end = Y(end_date.year, end_date.month, end_date.day)
if not do_video:
type_list = ["PHOTO"]
if favourites:
feature = "FAVORITES"
else:
feature = "NONE"
if not page_token:
log.info(
"searching for media start=%s, end=%s, videos=%s",
start_date,
end_date,
do_video,
)
if not start_date and not end_date and do_video and not favourites:
# no search criteria so do a list of the entire library
log.debug("mediaItems.list ...")
return self._api.mediaItems.list.execute( # type: ignore
pageToken=page_token, pageSize=self.PAGE_SIZE
).json()
else:
body = {
"pageToken": page_token,
"pageSize": self.PAGE_SIZE,
"filters": {
"dateFilter": {
"ranges": [
{"startDate": start.to_dict(), "endDate": end.to_dict()}
]
},
"mediaTypeFilter": {"mediaTypes": type_list},
"featureFilter": {"includedFeatures": [feature]},
"includeArchivedMedia": self.archived,
},
}
log.debug("mediaItems.search with body:\n{}".format(body))
return self._api.mediaItems.search.execute(body).json() # type: ignore
def index_photos_media(self) -> int:
log.warning("Indexing Google Photos Files ...")
total_listed = 0
if self.start_date:
start_date = self.start_date
elif self.rescan:
start_date = None
else:
start_date = self._db.get_scan_date()
items_json = self.search_media(
start_date=start_date,
end_date=self.end_date,
do_video=self.include_video,
favourites=self.favourites,
)
while items_json:
media_json = items_json.get("mediaItems", [])
items_count = 0
for media_item_json in media_json:
items_count += 1
total_listed += 1
media_item = GooglePhotosMedia(
media_item_json, to_lower=self.case_insensitive_fs
)
media_item.set_path_by_date(self._media_folder, self._use_flat_path)
(num, row) = self._db.file_duplicate_no(
str(media_item.filename),
str(media_item.relative_folder),
media_item.id,
)
# we just learned if there were any duplicates in the db
media_item.duplicate_number = num
if self.settings.progress and total_listed % 10 == 0:
log.warning(f"Listed {total_listed} items ...\033[F")
if not row:
self.files_indexed += 1
log.info(
"Indexed %d %s", self.files_indexed, media_item.relative_path
)
self.write_media_index(media_item, False)
if self.files_indexed % 2000 == 0:
self._db.store()
elif media_item.modify_date > row.modify_date:
self.files_indexed += 1
# todo at present there is no modify date in the API
# so updates cannot be monitored - this won't get called
log.info(
"Updated Index %d %s",
self.files_indexed,
media_item.relative_path,
)
self.write_media_index(media_item, True)
else:
self.files_index_skipped += 1
log.debug(
"Skipped Index (already indexed) %d %s",
self.files_index_skipped,
media_item.relative_path,
)
self.latest_download = max(
self.latest_download, media_item.create_date
)
log.debug(
"search_media parsed %d media_items with %d PAGE_SIZE",
items_count,
GooglePhotosIndex.PAGE_SIZE,
)
next_page = items_json.get("nextPageToken")
if next_page:
items_json = self.search_media(
page_token=next_page,
start_date=start_date,
end_date=self.end_date,
do_video=self.include_video,
favourites=self.favourites,
)
else:
break
# scan (in reverse date order) completed so the next incremental scan
# can start from the most recent file in this scan
if not self.start_date:
self._db.set_scan_date(last_date=self.latest_download)
log.warning(f"indexed {self.files_indexed} items")
return self.files_indexed
def get_extra_meta(self):
count = 0
log.warning(
"updating index with extra metadata for comparison "
"(may take some time) ..."
)
media_items = self._db.get_rows_by_search(GooglePhotosRow, uid="ISNULL")
for item in media_items:
file_path = self._root_folder / item.relative_path
# if this item has a uid it has been scanned before
if file_path.exists():
local_file = LocalFilesMedia(file_path)
count += 1
log.info("updating metadata %d on %s", count, file_path)
item.update_extra_meta(
local_file.uid, local_file.create_date, local_file.size
)
# erm lets try some duck typing then !
# todo is the DbRow class model rubbish or brilliant Python?
# noinspection PyTypeChecker
self._db.put_row(GooglePhotosRow.from_media(item), update=True)
if count % 2000 == 0:
self._db.store()
else:
log.debug("skipping metadata (already scanned) on %s", file_path)
log.warning("updating index with extra metadata complete")