From 56fd4d699cbe700353fdcc0e5d3996952273aad9 Mon Sep 17 00:00:00 2001 From: Jueun Seo Date: Fri, 19 Mar 2021 18:42:15 +0900 Subject: [PATCH] =?UTF-8?q?=EC=A4=91=EB=B3=B5=EB=90=9C=20=ED=8C=8C?= =?UTF-8?q?=EC=9D=BC=20=EB=B0=9C=EA=B2=AC=EC=8B=9C=20=EB=8B=A4=EB=A5=B8=20?= =?UTF-8?q?=EC=9D=B4=EB=A6=84=20=EC=82=AC=EC=9A=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- text_cleaner_main.py | 2 +- text_file_cleaner.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/text_cleaner_main.py b/text_cleaner_main.py index fef5214..269b585 100644 --- a/text_cleaner_main.py +++ b/text_cleaner_main.py @@ -16,7 +16,7 @@ from text_file_cleaner import TextFileCleaner from text_filters import UUIDDashFilter, UUIDSearchFilter -APP_VERSION = '0.1.2' +APP_VERSION = '0.1.3' DEBUG = False diff --git a/text_file_cleaner.py b/text_file_cleaner.py index 9adb44d..44d5cb8 100644 --- a/text_file_cleaner.py +++ b/text_file_cleaner.py @@ -51,10 +51,27 @@ def get_total_file_lines(self): def get_num_excluded(self): return self._num_excluded + def get_suffixed_filename(self, path, index): + filename, file_extension = os.path.splitext(path) + if file_extension: + return '{}({}){}'.format(filename, index, file_extension) + else: + return '{}({}})'.format(filename, index) + + def get_none_duplicated_path(self, path): + if not os.path.exists(path): + return path + for i in range(1, 100): + new_path = self.get_suffixed_filename(path, i) + if not os.path.exists(new_path): + return new_path + raise Exception("Too many duplicated files") + def process(self): for path in self.path_list: with csv_excel_reader(path) as in_file: out_filename = self.add_prefix(path, 'cleaned_').replace('.xlsx', '.csv').replace('.xls', '.csv') + out_filename = self.get_none_duplicated_path(out_filename) with open(out_filename, 'w', encoding='utf-8') as out_file: for processed_lines in self.process_file(in_file, out_file): yield processed_lines