From e89fbb09de67bb2217ffe56c1d26351ea5b7a453 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Nov 2022 12:32:28 +0100 Subject: [PATCH 01/18] Add a tail method to inspect output files --- pyiron_base/jobs/job/core.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 09f443ad9..37e9a405e 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -971,6 +971,22 @@ def __delitem__(self, key): """ del self.project_hdf5[posixpath.join(self.project_hdf5.h5_path, key)] + def tail(self, file_name, lines=100): + """ + Print the last lines of the given file in the job folder. + + Args: + file_name (str): the file to print + lines (int): how many lines to print + + Raises: + FileNotFoundError: if the given file name does not exist in the job folder + """ + if file_name in self.list_files(): + print(*self[file_name][-lines:]) + else: + raise FileNotFoundError(file_name) + def __repr__(self): """ Human readable string representation From 86df6e8a2f54008f1b8cd76474e29f848c15171d Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Nov 2022 12:43:35 +0100 Subject: [PATCH 02/18] Extract file reading into method --- pyiron_base/jobs/job/core.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 37e9a405e..6773b10e2 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -865,6 +865,25 @@ def __del__(self): del self._master_id del self._status + def _read_file(self, file_name): + """ + Return list of lines of the given file. + + Transparently decompresses the file if job is compressed. + + Args: + file_name (str): the file to print + + Raises: + FileNotFoundError: if the given file name does not exist in the job folder + """ + if file_name not in self.list_files(): + raise FileNotFoundError(file_name) + + file_name = posixpath.join(self.working_directory, "{}".format(item)) + with open(file_name) as f: + return f.readlines() + def __getitem__(self, item): """ Get/read data from the HDF5 file, child jobs or access log files. @@ -887,9 +906,7 @@ def __getitem__(self, item): """ if item in self.list_files(): - file_name = posixpath.join(self.working_directory, "{}".format(item)) - with open(file_name) as f: - return f.readlines() + return self._read_file(item) # first try to access HDF5 directly to make the common case fast try: From 460dad9dd93efb3ff33b4c66cb18262ceb829ed2 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Nov 2022 12:52:18 +0100 Subject: [PATCH 03/18] Use _read_file in tail --- pyiron_base/jobs/job/core.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 6773b10e2..d22e96bb3 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -999,10 +999,7 @@ def tail(self, file_name, lines=100): Raises: FileNotFoundError: if the given file name does not exist in the job folder """ - if file_name in self.list_files(): - print(*self[file_name][-lines:]) - else: - raise FileNotFoundError(file_name) + print(*self._read_file(file_name)[-lines:]) def __repr__(self): """ From a3fbe13d622d88b053177c995b607c660aff2a86 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Nov 2022 12:56:54 +0100 Subject: [PATCH 04/18] Move list files to util --- pyiron_base/jobs/job/core.py | 4 +--- pyiron_base/jobs/job/util.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index d22e96bb3..9f51da13b 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -595,9 +595,7 @@ def list_files(self): Returns: list: list of file names """ - if os.path.isdir(self.working_directory): - return os.listdir(self.working_directory) - return [] + return _job_list_files(self) def list_childs(self): """ diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index d54684c33..1e05d8f59 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -308,6 +308,22 @@ def _job_is_compressed(job): return False +def _job_list_files(job): + """ + Returns list of files in the jobs working directory. + + If the job is compressed, return a list of files in the archive. + + Args: + job (JobCore): job object to inspect files in + + Returns: + list of str: file names + """ + if os.path.isdir(self.working_directory): + return os.listdir(self.working_directory) + return [] + def _job_archive(job): """ Compress HDF5 file of the job object to tar-archive From e7f308f447fb9ef2fcb05fbba06ea22f7234100b Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Nov 2022 12:59:45 +0100 Subject: [PATCH 05/18] Move read_file to util --- pyiron_base/jobs/job/core.py | 25 ++++--------------------- pyiron_base/jobs/job/util.py | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 9f51da13b..0bde1893d 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -26,6 +26,8 @@ _job_is_compressed, _job_compress, _job_decompress, + _job_list_files, + _job_read_file, _job_delete_files, _job_delete_hdf, _job_remove_folder, @@ -863,25 +865,6 @@ def __del__(self): del self._master_id del self._status - def _read_file(self, file_name): - """ - Return list of lines of the given file. - - Transparently decompresses the file if job is compressed. - - Args: - file_name (str): the file to print - - Raises: - FileNotFoundError: if the given file name does not exist in the job folder - """ - if file_name not in self.list_files(): - raise FileNotFoundError(file_name) - - file_name = posixpath.join(self.working_directory, "{}".format(item)) - with open(file_name) as f: - return f.readlines() - def __getitem__(self, item): """ Get/read data from the HDF5 file, child jobs or access log files. @@ -904,7 +887,7 @@ def __getitem__(self, item): """ if item in self.list_files(): - return self._read_file(item) + return _job_read_file(item) # first try to access HDF5 directly to make the common case fast try: @@ -997,7 +980,7 @@ def tail(self, file_name, lines=100): Raises: FileNotFoundError: if the given file name does not exist in the job folder """ - print(*self._read_file(file_name)[-lines:]) + print(*_job_read_file(file_name)[-lines:]) def __repr__(self): """ diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index 1e05d8f59..157439500 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -324,6 +324,27 @@ def _job_list_files(job): return os.listdir(self.working_directory) return [] + +def _read_file(self, file_name): + """ + Return list of lines of the given file. + + Transparently decompresses the file if job is compressed. + + Args: + file_name (str): the file to print + + Raises: + FileNotFoundError: if the given file name does not exist in the job folder + """ + if file_name not in self.list_files(): + raise FileNotFoundError(file_name) + + file_name = posixpath.join(self.working_directory, "{}".format(item)) + with open(file_name) as f: + return f.readlines() + + def _job_archive(job): """ Compress HDF5 file of the job object to tar-archive From 36d3d90da5436aa48126f9b8ad316167e68c2881 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Nov 2022 13:00:03 +0100 Subject: [PATCH 06/18] Extract definition of job archive name --- pyiron_base/jobs/job/util.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index 157439500..b73efa9a1 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -241,6 +241,11 @@ def _kill_child(job): job_process.kill() +def _job_compressed_name(job): + """Return the canonical file name of a compressed job.""" + return os.path.join(job.working_directory, job.job_name + ".tar.bz2"), + + def _job_compress(job, files_to_compress=None): """ Compress the output files of a job object. @@ -255,10 +260,7 @@ def _job_compress(job, files_to_compress=None): cwd = os.getcwd() try: os.chdir(job.working_directory) - with tarfile.open( - os.path.join(job.working_directory, job.job_name + ".tar.bz2"), - "w:bz2", - ) as tar: + with tarfile.open(_job_compressed_name(job), "w:bz2") as tar: for name in files_to_compress: if "tar" not in name and not stat.S_ISFIFO(os.stat(name).st_mode): tar.add(name) @@ -272,7 +274,7 @@ def _job_compress(job, files_to_compress=None): finally: os.chdir(cwd) else: - print("The files are already compressed!") + job.logger.info("The files are already compressed!") def _job_decompress(job): @@ -283,8 +285,7 @@ def _job_decompress(job): job (JobCore): job object to decompress """ try: - tar_file_name = os.path.join(job.working_directory, job.job_name + ".tar.bz2") - with tarfile.open(tar_file_name, "r:bz2") as tar: + with tarfile.open(_job_compressed_name(job), "r:bz2") as tar: tar.extractall(job.working_directory) os.remove(tar_file_name) except IOError: @@ -301,7 +302,7 @@ def _job_is_compressed(job): Returns: bool: [True/False] """ - compressed_name = job.job_name + ".tar.bz2" + compressed_name = os.path.basename(_job_compressed_name(job)) for name in job.list_files(): if compressed_name in name: return True From 9a00f532dec657668dc54e5fdad4eb32007e90ae Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Nov 2022 13:11:15 +0100 Subject: [PATCH 07/18] Add transparent compression support --- pyiron_base/jobs/job/util.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index b73efa9a1..fcd35678f 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -322,11 +322,15 @@ def _job_list_files(job): list of str: file names """ if os.path.isdir(self.working_directory): - return os.listdir(self.working_directory) + if _job_is_compressed(job): + with tarfile.open(_job_compressed_name, "r") as tar: + return [member.name for i in tar.getmembers() if member.isfile()] + else: + return os.listdir(self.working_directory) return [] -def _read_file(self, file_name): +def _job_read_file(self, file_name): """ Return list of lines of the given file. @@ -342,8 +346,12 @@ def _read_file(self, file_name): raise FileNotFoundError(file_name) file_name = posixpath.join(self.working_directory, "{}".format(item)) - with open(file_name) as f: - return f.readlines() + if _job_is_compressed(job): + with tarfile.open(_job_compressed_name(job)) as f: + return f.extractfile(item).readlines() + else: + with open(file_name) as f: + return f.readlines() def _job_archive(job): From a5965d067d791792f3153b925e129c7589a9ba20 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Tue, 22 Nov 2022 09:07:33 +0100 Subject: [PATCH 08/18] Fix typos and inadvertant recursion in _job_is_compressed/_job_list_files --- pyiron_base/jobs/job/util.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index fcd35678f..5505ff065 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -243,7 +243,7 @@ def _kill_child(job): def _job_compressed_name(job): """Return the canonical file name of a compressed job.""" - return os.path.join(job.working_directory, job.job_name + ".tar.bz2"), + return os.path.join(job.working_directory, job.job_name + ".tar.bz2") def _job_compress(job, files_to_compress=None): @@ -303,10 +303,7 @@ def _job_is_compressed(job): bool: [True/False] """ compressed_name = os.path.basename(_job_compressed_name(job)) - for name in job.list_files(): - if compressed_name in name: - return True - return False + return compressed_name in os.listdir(job.working_directory) def _job_list_files(job): @@ -321,16 +318,16 @@ def _job_list_files(job): Returns: list of str: file names """ - if os.path.isdir(self.working_directory): + if os.path.isdir(job.working_directory): if _job_is_compressed(job): - with tarfile.open(_job_compressed_name, "r") as tar: + with tarfile.open(_job_compressed_name(job), "r") as tar: return [member.name for i in tar.getmembers() if member.isfile()] else: - return os.listdir(self.working_directory) + return os.listdir(job.working_directory) return [] -def _job_read_file(self, file_name): +def _job_read_file(job, file_name): """ Return list of lines of the given file. @@ -342,10 +339,10 @@ def _job_read_file(self, file_name): Raises: FileNotFoundError: if the given file name does not exist in the job folder """ - if file_name not in self.list_files(): + if file_name not in job.list_files(): raise FileNotFoundError(file_name) - file_name = posixpath.join(self.working_directory, "{}".format(item)) + file_name = posixpath.join(job.working_directory, "{}".format(item)) if _job_is_compressed(job): with tarfile.open(_job_compressed_name(job)) as f: return f.extractfile(item).readlines() From ecde14289353b47a059486eb6b2d58c0b9c343a0 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Tue, 22 Nov 2022 13:15:58 +0100 Subject: [PATCH 09/18] Fix slopiness --- pyiron_base/jobs/job/core.py | 4 ++-- pyiron_base/jobs/job/util.py | 5 +++-- tests/job/test_genericJob.py | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 0bde1893d..376f89fc2 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -887,7 +887,7 @@ def __getitem__(self, item): """ if item in self.list_files(): - return _job_read_file(item) + return _job_read_file(job, item) # first try to access HDF5 directly to make the common case fast try: @@ -980,7 +980,7 @@ def tail(self, file_name, lines=100): Raises: FileNotFoundError: if the given file name does not exist in the job folder """ - print(*_job_read_file(file_name)[-lines:]) + print(*_job_read_file(job, file_name)[-lines:]) def __repr__(self): """ diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index 5505ff065..5ec34170b 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -284,8 +284,9 @@ def _job_decompress(job): Args: job (JobCore): job object to decompress """ + tar_file_name = _job_compressed_name(job) try: - with tarfile.open(_job_compressed_name(job), "r:bz2") as tar: + with tarfile.open(tar_file_name, "r:bz2") as tar: tar.extractall(job.working_directory) os.remove(tar_file_name) except IOError: @@ -321,7 +322,7 @@ def _job_list_files(job): if os.path.isdir(job.working_directory): if _job_is_compressed(job): with tarfile.open(_job_compressed_name(job), "r") as tar: - return [member.name for i in tar.getmembers() if member.isfile()] + return [member.name for member in tar.getmembers() if member.isfile()] else: return os.listdir(job.working_directory) return [] diff --git a/tests/job/test_genericJob.py b/tests/job/test_genericJob.py index cd6a60901..1fee07dd2 100644 --- a/tests/job/test_genericJob.py +++ b/tests/job/test_genericJob.py @@ -427,7 +427,7 @@ def test_error(self): def test_compress(self): job = self.project.load(self.project.get_job_ids()[0]) - wd_files = job.list_files() + wd_files = os.listdir(job.working_directory) self.assertEqual(len(wd_files), 1, "Only one zipped file should be present in the working directory") self.assertEqual(wd_files[0], f"{job.name}.tar.bz2", "Inconsistent name for the zipped file") @@ -441,7 +441,7 @@ def test_restart(self): job = self.project.load(self.project.get_job_ids()[0]) job_restart = job.restart() job_restart.run() - wd_files = job_restart.list_files() + wd_files = os.listdir(job_restart.working_directory) self.assertEqual(len(wd_files), 1, "Only one zipped file should be present in the working directory") self.assertEqual(wd_files[0], f"{job_restart.name}.tar.bz2", "Inconsistent name for the zipped file") job_restart.decompress() From 138e525040a8e9b436c84c64d4d50150d56c0eeb Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Tue, 22 Nov 2022 15:11:29 +0100 Subject: [PATCH 10/18] Add test and even more bugfixes --- pyiron_base/jobs/job/core.py | 2 +- pyiron_base/jobs/job/util.py | 6 +++--- tests/job/test_genericJob.py | 23 +++++++++++++++++++++++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 376f89fc2..9c6914f66 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -980,7 +980,7 @@ def tail(self, file_name, lines=100): Raises: FileNotFoundError: if the given file name does not exist in the job folder """ - print(*_job_read_file(job, file_name)[-lines:]) + print(*_job_read_file(self, file_name)[-lines:], sep="") def __repr__(self): """ diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index 5ec34170b..39fa690f4 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -343,11 +343,11 @@ def _job_read_file(job, file_name): if file_name not in job.list_files(): raise FileNotFoundError(file_name) - file_name = posixpath.join(job.working_directory, "{}".format(item)) if _job_is_compressed(job): - with tarfile.open(_job_compressed_name(job)) as f: - return f.extractfile(item).readlines() + with tarfile.open(_job_compressed_name(job), encoding="utf8") as f: + return [line.decode("utf8") for line in f.extractfile(file_name).readlines()] else: + file_name = posixpath.join(job.working_directory, file_name) with open(file_name) as f: return f.readlines() diff --git a/tests/job/test_genericJob.py b/tests/job/test_genericJob.py index 1fee07dd2..7147edbb7 100644 --- a/tests/job/test_genericJob.py +++ b/tests/job/test_genericJob.py @@ -2,8 +2,10 @@ # Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department # Distributed under the terms of "New BSD License", see the LICENSE file. +import contextlib import unittest import os +import io from pyiron_base.storage.parameters import GenericParameters from pyiron_base.jobs.job.generic import GenericJob from pyiron_base._tests import TestWithFilledProject, ToyJob @@ -489,5 +491,26 @@ def test_return_codes(self): pass self.assertTrue(j.status.aborted, "Job did not abort even though return code is 2!") + def test_tail(self): + """job.tail should print the last lines of a file to stdout""" + job = self.project.load(self.project.get_job_ids()[0]) + job.decompress() + content = ["Content", "More", "Lines"] + with open(os.path.join(job.working_directory, "test_file"), "w") as f: + f.write("\n".join(content)) + + for i in range(len(content)): + with contextlib.redirect_stdout(io.StringIO()) as f: + job.tail("test_file", lines=i+1) + self.assertEqual(f.getvalue(), "\n".join(content[-i-1:]) + "\n", + "tail read incorrect lines from output file when job uncompressed!") + + job.compress() + for i in range(len(content)): + with contextlib.redirect_stdout(io.StringIO()) as f: + job.tail("test_file", lines=i+1) + self.assertEqual(f.getvalue(), "\n".join(content[-i-1:]) + "\n", + "tail read incorrect lines from output file when job compressed!") + if __name__ == "__main__": unittest.main() From 16d04f8ef2c4b4348a05274458305ea05bc69b29 Mon Sep 17 00:00:00 2001 From: pyiron-runner Date: Tue, 22 Nov 2022 14:12:37 +0000 Subject: [PATCH 11/18] Format black --- pyiron_base/jobs/job/util.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index 39fa690f4..0fad9c444 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -345,7 +345,9 @@ def _job_read_file(job, file_name): if _job_is_compressed(job): with tarfile.open(_job_compressed_name(job), encoding="utf8") as f: - return [line.decode("utf8") for line in f.extractfile(file_name).readlines()] + return [ + line.decode("utf8") for line in f.extractfile(file_name).readlines() + ] else: file_name = posixpath.join(job.working_directory, file_name) with open(file_name) as f: From e3a2bb5e1c0d5b51a322b2de32c0e92b6d9e3e41 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Sun, 4 Dec 2022 06:27:51 +0100 Subject: [PATCH 12/18] Add efficient reverse reading with the monty package --- .ci_support/environment.yml | 1 + pyiron_base/jobs/job/core.py | 2 +- pyiron_base/jobs/job/util.py | 27 +++++++++++++++++++++++---- setup.py | 1 + 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/.ci_support/environment.yml b/.ci_support/environment.yml index 3472e2925..20f900760 100644 --- a/.ci_support/environment.yml +++ b/.ci_support/environment.yml @@ -20,3 +20,4 @@ dependencies: - sqlalchemy =1.4.42 - tqdm =4.64.1 - traitlets =5.5.0 +- monty =v2022.9.9 diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 9c6914f66..c5b3afc99 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -980,7 +980,7 @@ def tail(self, file_name, lines=100): Raises: FileNotFoundError: if the given file name does not exist in the job folder """ - print(*_job_read_file(self, file_name)[-lines:], sep="") + print(*_job_read_file(self, file_name, tail=lines), sep="") def __repr__(self): """ diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index 0fad9c444..660a0fddb 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -4,6 +4,7 @@ """ Helper functions for the JobCore and GenericJob objects """ +from itertools import islice import os import posixpath import psutil @@ -12,6 +13,7 @@ import shutil from typing import Union, Dict from pyiron_base.utils.instance import static_isinstance +import monty.io __author__ = "Jan Janssen" __copyright__ = ( @@ -328,14 +330,18 @@ def _job_list_files(job): return [] -def _job_read_file(job, file_name): +def _job_read_file(job, file_name, tail=None): """ Return list of lines of the given file. Transparently decompresses the file if job is compressed. + If `tail` is given and job is decompressed, only read the last lines + instead of traversing the full file. + Args: file_name (str): the file to print + tail (int, optional): only return the last lines Raises: FileNotFoundError: if the given file name does not exist in the job folder @@ -345,13 +351,26 @@ def _job_read_file(job, file_name): if _job_is_compressed(job): with tarfile.open(_job_compressed_name(job), encoding="utf8") as f: - return [ + lines = [ line.decode("utf8") for line in f.extractfile(file_name).readlines() ] + if tail is None: + return lines + else: + return lines[-tail:] else: file_name = posixpath.join(job.working_directory, file_name) - with open(file_name) as f: - return f.readlines() + if tail is None: + with open(file_name) as f: + return f.readlines() + else: + lines = list(reversed([l + "\n" for l in islice(monty.io.reverse_readfile(file_name), tail)])) + # compatibility with the other methods + # monty strips all newlines, where as reading the other ways does + # not. So if a file does not end with a newline (as most text + # files) adding it to every line like above adds an additional one. + lines[-1] = lines[-1].rstrip("\n") + return lines def _job_archive(job): diff --git a/setup.py b/setup.py index 1a79138d5..2c9016350 100644 --- a/setup.py +++ b/setup.py @@ -44,6 +44,7 @@ 'tables==3.7.0', 'tqdm==4.64.1', 'traitlets==5.5.0', + 'monty==2022.9.9' ], cmdclass=versioneer.get_cmdclass(), From 26e0336abcf086938a7702862b8699ecdc93fe3c Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Sun, 4 Dec 2022 17:59:14 +0100 Subject: [PATCH 13/18] Use system specific linesep --- tests/job/test_genericJob.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/job/test_genericJob.py b/tests/job/test_genericJob.py index 82fa84444..e25944dde 100644 --- a/tests/job/test_genericJob.py +++ b/tests/job/test_genericJob.py @@ -493,19 +493,19 @@ def test_tail(self): job.decompress() content = ["Content", "More", "Lines"] with open(os.path.join(job.working_directory, "test_file"), "w") as f: - f.write("\n".join(content)) + f.write(os.linesep.join(content)) for i in range(len(content)): with contextlib.redirect_stdout(io.StringIO()) as f: job.tail("test_file", lines=i+1) - self.assertEqual(f.getvalue(), "\n".join(content[-i-1:]) + "\n", + self.assertEqual(f.getvalue(), os.linesep.join(content[-i-1:]) + os.linesep, "tail read incorrect lines from output file when job uncompressed!") job.compress() for i in range(len(content)): with contextlib.redirect_stdout(io.StringIO()) as f: job.tail("test_file", lines=i+1) - self.assertEqual(f.getvalue(), "\n".join(content[-i-1:]) + "\n", + self.assertEqual(f.getvalue(), os.linesep.join(content[-i-1:]) + os.linesep, "tail read incorrect lines from output file when job compressed!") if __name__ == "__main__": From 4e4f44d43a1a7c7fb8fddd4a13a8d323492699b6 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Sun, 4 Dec 2022 18:14:11 +0100 Subject: [PATCH 14/18] More line separator replacements --- pyiron_base/jobs/job/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index 2d84b459b..16863f82b 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -362,12 +362,12 @@ def _job_read_file(job, file_name, tail=None): with open(file_name) as f: return f.readlines() else: - lines = list(reversed([l + "\n" for l in islice(monty.io.reverse_readfile(file_name), tail)])) + lines = list(reversed([l + os.linesep for l in islice(monty.io.reverse_readfile(file_name), tail)])) # compatibility with the other methods # monty strips all newlines, where as reading the other ways does # not. So if a file does not end with a newline (as most text # files) adding it to every line like above adds an additional one. - lines[-1] = lines[-1].rstrip("\n") + lines[-1] = lines[-1].rstrip(os.linesep) return lines From 9392e499e684a22085edaf7a81b5f1b8ff95242f Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Sun, 4 Dec 2022 18:15:29 +0100 Subject: [PATCH 15/18] Fix typo --- pyiron_base/jobs/job/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index d52afa058..b37cee077 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -914,7 +914,7 @@ def __getitem__(self, item): """ if item in self.list_files(): - return _job_read_file(job, item) + return _job_read_file(self, item) # first try to access HDF5 directly to make the common case fast try: From 7d2089eb400853714ed89eb2cba5379347664db5 Mon Sep 17 00:00:00 2001 From: pyiron-runner Date: Wed, 7 Dec 2022 11:45:19 +0000 Subject: [PATCH 16/18] Format black --- pyiron_base/jobs/job/util.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index 16863f82b..c14c280b8 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -362,7 +362,14 @@ def _job_read_file(job, file_name, tail=None): with open(file_name) as f: return f.readlines() else: - lines = list(reversed([l + os.linesep for l in islice(monty.io.reverse_readfile(file_name), tail)])) + lines = list( + reversed( + [ + l + os.linesep + for l in islice(monty.io.reverse_readfile(file_name), tail) + ] + ) + ) # compatibility with the other methods # monty strips all newlines, where as reading the other ways does # not. So if a file does not end with a newline (as most text From 8ecbfd13f0f4014a9c5202321ca4f1debbee072a Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Fri, 6 Jan 2023 08:30:00 +0100 Subject: [PATCH 17/18] Enable newline translation in windows tests Also use a sub test for every line test --- tests/job/test_genericJob.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/job/test_genericJob.py b/tests/job/test_genericJob.py index e25944dde..c6b6b3f92 100644 --- a/tests/job/test_genericJob.py +++ b/tests/job/test_genericJob.py @@ -496,10 +496,11 @@ def test_tail(self): f.write(os.linesep.join(content)) for i in range(len(content)): - with contextlib.redirect_stdout(io.StringIO()) as f: - job.tail("test_file", lines=i+1) - self.assertEqual(f.getvalue(), os.linesep.join(content[-i-1:]) + os.linesep, - "tail read incorrect lines from output file when job uncompressed!") + with self.subTest(i=i): + with contextlib.redirect_stdout(io.StringIO(newline=os.linesep)) as f: + job.tail("test_file", lines=i+1) + self.assertEqual(f.getvalue(), os.linesep.join(content[-i-1:]) + os.linesep, + "tail read incorrect lines from output file when job uncompressed!") job.compress() for i in range(len(content)): From 8db9284dd99a51d54b96360e646c752522544f79 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Fri, 6 Jan 2023 09:29:28 +0100 Subject: [PATCH 18/18] Add a simple file browser job[] is overloaded to perform many functions at once: 1. access to files 2. access to HDF stored data 3. access to child jobs This change adds a new attribute `.files` to `JobCore` to take over function 1 and deprecates methods on the job itself that are connected to this: `list_files` and `tail`. --- pyiron_base/jobs/job/core.py | 90 ++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index b37cee077..1c5b4c491 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -10,6 +10,7 @@ import os import posixpath import shutil +from typing import List import warnings from pyiron_base.interfaces.has_groups import HasGroups @@ -33,6 +34,7 @@ _job_remove_folder, ) from pyiron_base.state import state +from pyiron_base.utils.deprecate import deprecate __author__ = "Jan Janssen" __copyright__ = ( @@ -136,6 +138,11 @@ def __init__(self, project, job_name): def content(self): return self._hdf5_content + @property + def files(self): + return FileBrowser(self) + files.__doc__ = FileBrowser.__doc__ + @property def job_name(self): """ @@ -612,6 +619,7 @@ def get_job_id(self, job_specifier=None): else: return None + @deprecate("use job.files.list()") def list_files(self): """ List files inside the working directory @@ -914,6 +922,10 @@ def __getitem__(self, item): """ if item in self.list_files(): + warnings.warn( + "Using __getitem__ on a job to access files in deprecated: use job.files instead!", + category=DeprecationWarning + ) return _job_read_file(self, item) # first try to access HDF5 directly to make the common case fast @@ -996,6 +1008,7 @@ def __delitem__(self, key): """ del self.project_hdf5[posixpath.join(self.project_hdf5.h5_path, key)] + @deprecate("use job.files.tail() instead!") def tail(self, file_name, lines=100): """ Print the last lines of the given file in the job folder. @@ -1126,3 +1139,80 @@ def __dir__(self): def __repr__(self): return self._project_hdf5.__repr__() + + +class FileBrowser: + """ + Allows to browse the files in a job directory. + + By default this object prints itself as a listing of the job directory and + the files inside. + + >>> job.files + /path/to/my/job: + \tpyiron.log + \terror.out + + Access to the names of files is provided with :meth:`.list` + + >>> job.files.list() + ['pyiron.log', 'error.out', 'INCAR'] + + Access to the contents of files is provided by indexing into this object, + which returns a list of lines in the file + + >>> job.files['error.out'] + ["Oh no\n", "Something went wrong!\n"] + + The :meth:`.tail` method prints the last lines of a file to stdout + + >>> job.files.tail('error.out', lines=1) + Something went wrong! + + For files that have valid python variable names can also be accessed by + attribute notation + + >>> job.files.INCAR + ["SYSTEM=pyiron\n", "ENCUT=270\n", ...] + """ + + __slots__ = ("_job",) + + def __init__(self, job): + self._job = job + + def list(self) -> List[str]: + """ + List all files in the working directory of the job. + """ + return _job_list_files(job) + + def _ipython_display_(self): + path = job.working_directory + ":" + files = ["\t" + f for f in _job_list_files(job)] + print(os.linesep.join([path, *files])) + + def tail(self, file: str, lines: int = 100): + """ + Print the last lines of a file. + + Args: + file (str): filename + lines (int): number of lines to print + + Raises: + FileNotFoundError: if the given file does not exist + """ + print(*_job_read_file(self, file_name, tail=lines), sep="") + + def __getitem__(self, item): + if item not _job_list_files(self._job): + raise KeyError(item) + + return _job_read_file(self._job, item) + + def __getattr__(self, item): + try: + return self[name] + except KeyError: + raise AttributeError(name) from None