From e6ec8b4aeb305bf671ca86cf398baa808bc887c7 Mon Sep 17 00:00:00 2001 From: kuechensofa <89413714+kuechensofa@users.noreply.github.com> Date: Thu, 19 Jan 2023 16:26:56 +0000 Subject: [PATCH] [#795] wb-manager: Show error when adding duplicate warc files (#797) --- pywb/manager/manager.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pywb/manager/manager.py b/pywb/manager/manager.py index 3d56d53e2..18c76ee77 100644 --- a/pywb/manager/manager.py +++ b/pywb/manager/manager.py @@ -121,14 +121,24 @@ def add_warcs(self, warcs): format(self.archive_dir)) full_paths = [] + duplicate_warcs = [] for filename in warcs: filename = os.path.abspath(filename) + + # don't overwrite existing warcs with duplicate names + if os.path.exists(os.path.join(self.archive_dir, os.path.basename(filename))): + duplicate_warcs.append(filename) + continue + shutil.copy2(filename, self.archive_dir) full_paths.append(os.path.join(self.archive_dir, filename)) logging.info('Copied ' + filename + ' to ' + self.archive_dir) self._index_merge_warcs(full_paths, self.DEF_INDEX_FILE) + if duplicate_warcs: + logging.warning(f'Warcs {", ".join(duplicate_warcs)} weren\'t added because of duplicate names.') + def reindex(self): cdx_file = os.path.join(self.indexes_dir, self.DEF_INDEX_FILE) logging.info('Indexing ' + self.archive_dir + ' to ' + cdx_file)