From e89fbb09de67bb2217ffe56c1d26351ea5b7a453 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Nov 2022 12:32:28 +0100 Subject: [PATCH 01/42] Add a tail method to inspect output files --- pyiron_base/jobs/job/core.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 09f443ad9..37e9a405e 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -971,6 +971,22 @@ def __delitem__(self, key): """ del self.project_hdf5[posixpath.join(self.project_hdf5.h5_path, key)] + def tail(self, file_name, lines=100): + """ + Print the last lines of the given file in the job folder. + + Args: + file_name (str): the file to print + lines (int): how many lines to print + + Raises: + FileNotFoundError: if the given file name does not exist in the job folder + """ + if file_name in self.list_files(): + print(*self[file_name][-lines:]) + else: + raise FileNotFoundError(file_name) + def __repr__(self): """ Human readable string representation From 86df6e8a2f54008f1b8cd76474e29f848c15171d Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Nov 2022 12:43:35 +0100 Subject: [PATCH 02/42] Extract file reading into method --- pyiron_base/jobs/job/core.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 37e9a405e..6773b10e2 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -865,6 +865,25 @@ def __del__(self): del self._master_id del self._status + def _read_file(self, file_name): + """ + Return list of lines of the given file. + + Transparently decompresses the file if job is compressed. + + Args: + file_name (str): the file to print + + Raises: + FileNotFoundError: if the given file name does not exist in the job folder + """ + if file_name not in self.list_files(): + raise FileNotFoundError(file_name) + + file_name = posixpath.join(self.working_directory, "{}".format(item)) + with open(file_name) as f: + return f.readlines() + def __getitem__(self, item): """ Get/read data from the HDF5 file, child jobs or access log files. @@ -887,9 +906,7 @@ def __getitem__(self, item): """ if item in self.list_files(): - file_name = posixpath.join(self.working_directory, "{}".format(item)) - with open(file_name) as f: - return f.readlines() + return self._read_file(item) # first try to access HDF5 directly to make the common case fast try: From 460dad9dd93efb3ff33b4c66cb18262ceb829ed2 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Nov 2022 12:52:18 +0100 Subject: [PATCH 03/42] Use _read_file in tail --- pyiron_base/jobs/job/core.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 6773b10e2..d22e96bb3 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -999,10 +999,7 @@ def tail(self, file_name, lines=100): Raises: FileNotFoundError: if the given file name does not exist in the job folder """ - if file_name in self.list_files(): - print(*self[file_name][-lines:]) - else: - raise FileNotFoundError(file_name) + print(*self._read_file(file_name)[-lines:]) def __repr__(self): """ From a3fbe13d622d88b053177c995b607c660aff2a86 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Nov 2022 12:56:54 +0100 Subject: [PATCH 04/42] Move list files to util --- pyiron_base/jobs/job/core.py | 4 +--- pyiron_base/jobs/job/util.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index d22e96bb3..9f51da13b 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -595,9 +595,7 @@ def list_files(self): Returns: list: list of file names """ - if os.path.isdir(self.working_directory): - return os.listdir(self.working_directory) - return [] + return _job_list_files(self) def list_childs(self): """ diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index d54684c33..1e05d8f59 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -308,6 +308,22 @@ def _job_is_compressed(job): return False +def _job_list_files(job): + """ + Returns list of files in the jobs working directory. + + If the job is compressed, return a list of files in the archive. + + Args: + job (JobCore): job object to inspect files in + + Returns: + list of str: file names + """ + if os.path.isdir(self.working_directory): + return os.listdir(self.working_directory) + return [] + def _job_archive(job): """ Compress HDF5 file of the job object to tar-archive From e7f308f447fb9ef2fcb05fbba06ea22f7234100b Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Nov 2022 12:59:45 +0100 Subject: [PATCH 05/42] Move read_file to util --- pyiron_base/jobs/job/core.py | 25 ++++--------------------- pyiron_base/jobs/job/util.py | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 9f51da13b..0bde1893d 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -26,6 +26,8 @@ _job_is_compressed, _job_compress, _job_decompress, + _job_list_files, + _job_read_file, _job_delete_files, _job_delete_hdf, _job_remove_folder, @@ -863,25 +865,6 @@ def __del__(self): del self._master_id del self._status - def _read_file(self, file_name): - """ - Return list of lines of the given file. - - Transparently decompresses the file if job is compressed. - - Args: - file_name (str): the file to print - - Raises: - FileNotFoundError: if the given file name does not exist in the job folder - """ - if file_name not in self.list_files(): - raise FileNotFoundError(file_name) - - file_name = posixpath.join(self.working_directory, "{}".format(item)) - with open(file_name) as f: - return f.readlines() - def __getitem__(self, item): """ Get/read data from the HDF5 file, child jobs or access log files. @@ -904,7 +887,7 @@ def __getitem__(self, item): """ if item in self.list_files(): - return self._read_file(item) + return _job_read_file(item) # first try to access HDF5 directly to make the common case fast try: @@ -997,7 +980,7 @@ def tail(self, file_name, lines=100): Raises: FileNotFoundError: if the given file name does not exist in the job folder """ - print(*self._read_file(file_name)[-lines:]) + print(*_job_read_file(file_name)[-lines:]) def __repr__(self): """ diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index 1e05d8f59..157439500 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -324,6 +324,27 @@ def _job_list_files(job): return os.listdir(self.working_directory) return [] + +def _read_file(self, file_name): + """ + Return list of lines of the given file. + + Transparently decompresses the file if job is compressed. + + Args: + file_name (str): the file to print + + Raises: + FileNotFoundError: if the given file name does not exist in the job folder + """ + if file_name not in self.list_files(): + raise FileNotFoundError(file_name) + + file_name = posixpath.join(self.working_directory, "{}".format(item)) + with open(file_name) as f: + return f.readlines() + + def _job_archive(job): """ Compress HDF5 file of the job object to tar-archive From 36d3d90da5436aa48126f9b8ad316167e68c2881 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Nov 2022 13:00:03 +0100 Subject: [PATCH 06/42] Extract definition of job archive name --- pyiron_base/jobs/job/util.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index 157439500..b73efa9a1 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -241,6 +241,11 @@ def _kill_child(job): job_process.kill() +def _job_compressed_name(job): + """Return the canonical file name of a compressed job.""" + return os.path.join(job.working_directory, job.job_name + ".tar.bz2"), + + def _job_compress(job, files_to_compress=None): """ Compress the output files of a job object. @@ -255,10 +260,7 @@ def _job_compress(job, files_to_compress=None): cwd = os.getcwd() try: os.chdir(job.working_directory) - with tarfile.open( - os.path.join(job.working_directory, job.job_name + ".tar.bz2"), - "w:bz2", - ) as tar: + with tarfile.open(_job_compressed_name(job), "w:bz2") as tar: for name in files_to_compress: if "tar" not in name and not stat.S_ISFIFO(os.stat(name).st_mode): tar.add(name) @@ -272,7 +274,7 @@ def _job_compress(job, files_to_compress=None): finally: os.chdir(cwd) else: - print("The files are already compressed!") + job.logger.info("The files are already compressed!") def _job_decompress(job): @@ -283,8 +285,7 @@ def _job_decompress(job): job (JobCore): job object to decompress """ try: - tar_file_name = os.path.join(job.working_directory, job.job_name + ".tar.bz2") - with tarfile.open(tar_file_name, "r:bz2") as tar: + with tarfile.open(_job_compressed_name(job), "r:bz2") as tar: tar.extractall(job.working_directory) os.remove(tar_file_name) except IOError: @@ -301,7 +302,7 @@ def _job_is_compressed(job): Returns: bool: [True/False] """ - compressed_name = job.job_name + ".tar.bz2" + compressed_name = os.path.basename(_job_compressed_name(job)) for name in job.list_files(): if compressed_name in name: return True From 9a00f532dec657668dc54e5fdad4eb32007e90ae Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Nov 2022 13:11:15 +0100 Subject: [PATCH 07/42] Add transparent compression support --- pyiron_base/jobs/job/util.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index b73efa9a1..fcd35678f 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -322,11 +322,15 @@ def _job_list_files(job): list of str: file names """ if os.path.isdir(self.working_directory): - return os.listdir(self.working_directory) + if _job_is_compressed(job): + with tarfile.open(_job_compressed_name, "r") as tar: + return [member.name for i in tar.getmembers() if member.isfile()] + else: + return os.listdir(self.working_directory) return [] -def _read_file(self, file_name): +def _job_read_file(self, file_name): """ Return list of lines of the given file. @@ -342,8 +346,12 @@ def _read_file(self, file_name): raise FileNotFoundError(file_name) file_name = posixpath.join(self.working_directory, "{}".format(item)) - with open(file_name) as f: - return f.readlines() + if _job_is_compressed(job): + with tarfile.open(_job_compressed_name(job)) as f: + return f.extractfile(item).readlines() + else: + with open(file_name) as f: + return f.readlines() def _job_archive(job): From a5965d067d791792f3153b925e129c7589a9ba20 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Tue, 22 Nov 2022 09:07:33 +0100 Subject: [PATCH 08/42] Fix typos and inadvertant recursion in _job_is_compressed/_job_list_files --- pyiron_base/jobs/job/util.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index fcd35678f..5505ff065 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -243,7 +243,7 @@ def _kill_child(job): def _job_compressed_name(job): """Return the canonical file name of a compressed job.""" - return os.path.join(job.working_directory, job.job_name + ".tar.bz2"), + return os.path.join(job.working_directory, job.job_name + ".tar.bz2") def _job_compress(job, files_to_compress=None): @@ -303,10 +303,7 @@ def _job_is_compressed(job): bool: [True/False] """ compressed_name = os.path.basename(_job_compressed_name(job)) - for name in job.list_files(): - if compressed_name in name: - return True - return False + return compressed_name in os.listdir(job.working_directory) def _job_list_files(job): @@ -321,16 +318,16 @@ def _job_list_files(job): Returns: list of str: file names """ - if os.path.isdir(self.working_directory): + if os.path.isdir(job.working_directory): if _job_is_compressed(job): - with tarfile.open(_job_compressed_name, "r") as tar: + with tarfile.open(_job_compressed_name(job), "r") as tar: return [member.name for i in tar.getmembers() if member.isfile()] else: - return os.listdir(self.working_directory) + return os.listdir(job.working_directory) return [] -def _job_read_file(self, file_name): +def _job_read_file(job, file_name): """ Return list of lines of the given file. @@ -342,10 +339,10 @@ def _job_read_file(self, file_name): Raises: FileNotFoundError: if the given file name does not exist in the job folder """ - if file_name not in self.list_files(): + if file_name not in job.list_files(): raise FileNotFoundError(file_name) - file_name = posixpath.join(self.working_directory, "{}".format(item)) + file_name = posixpath.join(job.working_directory, "{}".format(item)) if _job_is_compressed(job): with tarfile.open(_job_compressed_name(job)) as f: return f.extractfile(item).readlines() From ecde14289353b47a059486eb6b2d58c0b9c343a0 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Tue, 22 Nov 2022 13:15:58 +0100 Subject: [PATCH 09/42] Fix slopiness --- pyiron_base/jobs/job/core.py | 4 ++-- pyiron_base/jobs/job/util.py | 5 +++-- tests/job/test_genericJob.py | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 0bde1893d..376f89fc2 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -887,7 +887,7 @@ def __getitem__(self, item): """ if item in self.list_files(): - return _job_read_file(item) + return _job_read_file(job, item) # first try to access HDF5 directly to make the common case fast try: @@ -980,7 +980,7 @@ def tail(self, file_name, lines=100): Raises: FileNotFoundError: if the given file name does not exist in the job folder """ - print(*_job_read_file(file_name)[-lines:]) + print(*_job_read_file(job, file_name)[-lines:]) def __repr__(self): """ diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index 5505ff065..5ec34170b 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -284,8 +284,9 @@ def _job_decompress(job): Args: job (JobCore): job object to decompress """ + tar_file_name = _job_compressed_name(job) try: - with tarfile.open(_job_compressed_name(job), "r:bz2") as tar: + with tarfile.open(tar_file_name, "r:bz2") as tar: tar.extractall(job.working_directory) os.remove(tar_file_name) except IOError: @@ -321,7 +322,7 @@ def _job_list_files(job): if os.path.isdir(job.working_directory): if _job_is_compressed(job): with tarfile.open(_job_compressed_name(job), "r") as tar: - return [member.name for i in tar.getmembers() if member.isfile()] + return [member.name for member in tar.getmembers() if member.isfile()] else: return os.listdir(job.working_directory) return [] diff --git a/tests/job/test_genericJob.py b/tests/job/test_genericJob.py index cd6a60901..1fee07dd2 100644 --- a/tests/job/test_genericJob.py +++ b/tests/job/test_genericJob.py @@ -427,7 +427,7 @@ def test_error(self): def test_compress(self): job = self.project.load(self.project.get_job_ids()[0]) - wd_files = job.list_files() + wd_files = os.listdir(job.working_directory) self.assertEqual(len(wd_files), 1, "Only one zipped file should be present in the working directory") self.assertEqual(wd_files[0], f"{job.name}.tar.bz2", "Inconsistent name for the zipped file") @@ -441,7 +441,7 @@ def test_restart(self): job = self.project.load(self.project.get_job_ids()[0]) job_restart = job.restart() job_restart.run() - wd_files = job_restart.list_files() + wd_files = os.listdir(job_restart.working_directory) self.assertEqual(len(wd_files), 1, "Only one zipped file should be present in the working directory") self.assertEqual(wd_files[0], f"{job_restart.name}.tar.bz2", "Inconsistent name for the zipped file") job_restart.decompress() From 138e525040a8e9b436c84c64d4d50150d56c0eeb Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Tue, 22 Nov 2022 15:11:29 +0100 Subject: [PATCH 10/42] Add test and even more bugfixes --- pyiron_base/jobs/job/core.py | 2 +- pyiron_base/jobs/job/util.py | 6 +++--- tests/job/test_genericJob.py | 23 +++++++++++++++++++++++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 376f89fc2..9c6914f66 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -980,7 +980,7 @@ def tail(self, file_name, lines=100): Raises: FileNotFoundError: if the given file name does not exist in the job folder """ - print(*_job_read_file(job, file_name)[-lines:]) + print(*_job_read_file(self, file_name)[-lines:], sep="") def __repr__(self): """ diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index 5ec34170b..39fa690f4 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -343,11 +343,11 @@ def _job_read_file(job, file_name): if file_name not in job.list_files(): raise FileNotFoundError(file_name) - file_name = posixpath.join(job.working_directory, "{}".format(item)) if _job_is_compressed(job): - with tarfile.open(_job_compressed_name(job)) as f: - return f.extractfile(item).readlines() + with tarfile.open(_job_compressed_name(job), encoding="utf8") as f: + return [line.decode("utf8") for line in f.extractfile(file_name).readlines()] else: + file_name = posixpath.join(job.working_directory, file_name) with open(file_name) as f: return f.readlines() diff --git a/tests/job/test_genericJob.py b/tests/job/test_genericJob.py index 1fee07dd2..7147edbb7 100644 --- a/tests/job/test_genericJob.py +++ b/tests/job/test_genericJob.py @@ -2,8 +2,10 @@ # Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department # Distributed under the terms of "New BSD License", see the LICENSE file. +import contextlib import unittest import os +import io from pyiron_base.storage.parameters import GenericParameters from pyiron_base.jobs.job.generic import GenericJob from pyiron_base._tests import TestWithFilledProject, ToyJob @@ -489,5 +491,26 @@ def test_return_codes(self): pass self.assertTrue(j.status.aborted, "Job did not abort even though return code is 2!") + def test_tail(self): + """job.tail should print the last lines of a file to stdout""" + job = self.project.load(self.project.get_job_ids()[0]) + job.decompress() + content = ["Content", "More", "Lines"] + with open(os.path.join(job.working_directory, "test_file"), "w") as f: + f.write("\n".join(content)) + + for i in range(len(content)): + with contextlib.redirect_stdout(io.StringIO()) as f: + job.tail("test_file", lines=i+1) + self.assertEqual(f.getvalue(), "\n".join(content[-i-1:]) + "\n", + "tail read incorrect lines from output file when job uncompressed!") + + job.compress() + for i in range(len(content)): + with contextlib.redirect_stdout(io.StringIO()) as f: + job.tail("test_file", lines=i+1) + self.assertEqual(f.getvalue(), "\n".join(content[-i-1:]) + "\n", + "tail read incorrect lines from output file when job compressed!") + if __name__ == "__main__": unittest.main() From 16d04f8ef2c4b4348a05274458305ea05bc69b29 Mon Sep 17 00:00:00 2001 From: pyiron-runner Date: Tue, 22 Nov 2022 14:12:37 +0000 Subject: [PATCH 11/42] Format black --- pyiron_base/jobs/job/util.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index 39fa690f4..0fad9c444 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -345,7 +345,9 @@ def _job_read_file(job, file_name): if _job_is_compressed(job): with tarfile.open(_job_compressed_name(job), encoding="utf8") as f: - return [line.decode("utf8") for line in f.extractfile(file_name).readlines()] + return [ + line.decode("utf8") for line in f.extractfile(file_name).readlines() + ] else: file_name = posixpath.join(job.working_directory, file_name) with open(file_name) as f: From e3a2bb5e1c0d5b51a322b2de32c0e92b6d9e3e41 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Sun, 4 Dec 2022 06:27:51 +0100 Subject: [PATCH 12/42] Add efficient reverse reading with the monty package --- .ci_support/environment.yml | 1 + pyiron_base/jobs/job/core.py | 2 +- pyiron_base/jobs/job/util.py | 27 +++++++++++++++++++++++---- setup.py | 1 + 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/.ci_support/environment.yml b/.ci_support/environment.yml index 3472e2925..20f900760 100644 --- a/.ci_support/environment.yml +++ b/.ci_support/environment.yml @@ -20,3 +20,4 @@ dependencies: - sqlalchemy =1.4.42 - tqdm =4.64.1 - traitlets =5.5.0 +- monty =v2022.9.9 diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 9c6914f66..c5b3afc99 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -980,7 +980,7 @@ def tail(self, file_name, lines=100): Raises: FileNotFoundError: if the given file name does not exist in the job folder """ - print(*_job_read_file(self, file_name)[-lines:], sep="") + print(*_job_read_file(self, file_name, tail=lines), sep="") def __repr__(self): """ diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index 0fad9c444..660a0fddb 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -4,6 +4,7 @@ """ Helper functions for the JobCore and GenericJob objects """ +from itertools import islice import os import posixpath import psutil @@ -12,6 +13,7 @@ import shutil from typing import Union, Dict from pyiron_base.utils.instance import static_isinstance +import monty.io __author__ = "Jan Janssen" __copyright__ = ( @@ -328,14 +330,18 @@ def _job_list_files(job): return [] -def _job_read_file(job, file_name): +def _job_read_file(job, file_name, tail=None): """ Return list of lines of the given file. Transparently decompresses the file if job is compressed. + If `tail` is given and job is decompressed, only read the last lines + instead of traversing the full file. + Args: file_name (str): the file to print + tail (int, optional): only return the last lines Raises: FileNotFoundError: if the given file name does not exist in the job folder @@ -345,13 +351,26 @@ def _job_read_file(job, file_name): if _job_is_compressed(job): with tarfile.open(_job_compressed_name(job), encoding="utf8") as f: - return [ + lines = [ line.decode("utf8") for line in f.extractfile(file_name).readlines() ] + if tail is None: + return lines + else: + return lines[-tail:] else: file_name = posixpath.join(job.working_directory, file_name) - with open(file_name) as f: - return f.readlines() + if tail is None: + with open(file_name) as f: + return f.readlines() + else: + lines = list(reversed([l + "\n" for l in islice(monty.io.reverse_readfile(file_name), tail)])) + # compatibility with the other methods + # monty strips all newlines, where as reading the other ways does + # not. So if a file does not end with a newline (as most text + # files) adding it to every line like above adds an additional one. + lines[-1] = lines[-1].rstrip("\n") + return lines def _job_archive(job): diff --git a/setup.py b/setup.py index 1a79138d5..2c9016350 100644 --- a/setup.py +++ b/setup.py @@ -44,6 +44,7 @@ 'tables==3.7.0', 'tqdm==4.64.1', 'traitlets==5.5.0', + 'monty==2022.9.9' ], cmdclass=versioneer.get_cmdclass(), From 26e0336abcf086938a7702862b8699ecdc93fe3c Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Sun, 4 Dec 2022 17:59:14 +0100 Subject: [PATCH 13/42] Use system specific linesep --- tests/job/test_genericJob.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/job/test_genericJob.py b/tests/job/test_genericJob.py index 82fa84444..e25944dde 100644 --- a/tests/job/test_genericJob.py +++ b/tests/job/test_genericJob.py @@ -493,19 +493,19 @@ def test_tail(self): job.decompress() content = ["Content", "More", "Lines"] with open(os.path.join(job.working_directory, "test_file"), "w") as f: - f.write("\n".join(content)) + f.write(os.linesep.join(content)) for i in range(len(content)): with contextlib.redirect_stdout(io.StringIO()) as f: job.tail("test_file", lines=i+1) - self.assertEqual(f.getvalue(), "\n".join(content[-i-1:]) + "\n", + self.assertEqual(f.getvalue(), os.linesep.join(content[-i-1:]) + os.linesep, "tail read incorrect lines from output file when job uncompressed!") job.compress() for i in range(len(content)): with contextlib.redirect_stdout(io.StringIO()) as f: job.tail("test_file", lines=i+1) - self.assertEqual(f.getvalue(), "\n".join(content[-i-1:]) + "\n", + self.assertEqual(f.getvalue(), os.linesep.join(content[-i-1:]) + os.linesep, "tail read incorrect lines from output file when job compressed!") if __name__ == "__main__": From 4e4f44d43a1a7c7fb8fddd4a13a8d323492699b6 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Sun, 4 Dec 2022 18:14:11 +0100 Subject: [PATCH 14/42] More line separator replacements --- pyiron_base/jobs/job/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index 2d84b459b..16863f82b 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -362,12 +362,12 @@ def _job_read_file(job, file_name, tail=None): with open(file_name) as f: return f.readlines() else: - lines = list(reversed([l + "\n" for l in islice(monty.io.reverse_readfile(file_name), tail)])) + lines = list(reversed([l + os.linesep for l in islice(monty.io.reverse_readfile(file_name), tail)])) # compatibility with the other methods # monty strips all newlines, where as reading the other ways does # not. So if a file does not end with a newline (as most text # files) adding it to every line like above adds an additional one. - lines[-1] = lines[-1].rstrip("\n") + lines[-1] = lines[-1].rstrip(os.linesep) return lines From 9392e499e684a22085edaf7a81b5f1b8ff95242f Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Sun, 4 Dec 2022 18:15:29 +0100 Subject: [PATCH 15/42] Fix typo --- pyiron_base/jobs/job/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index d52afa058..b37cee077 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -914,7 +914,7 @@ def __getitem__(self, item): """ if item in self.list_files(): - return _job_read_file(job, item) + return _job_read_file(self, item) # first try to access HDF5 directly to make the common case fast try: From 7d2089eb400853714ed89eb2cba5379347664db5 Mon Sep 17 00:00:00 2001 From: pyiron-runner Date: Wed, 7 Dec 2022 11:45:19 +0000 Subject: [PATCH 16/42] Format black --- pyiron_base/jobs/job/util.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index 16863f82b..c14c280b8 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -362,7 +362,14 @@ def _job_read_file(job, file_name, tail=None): with open(file_name) as f: return f.readlines() else: - lines = list(reversed([l + os.linesep for l in islice(monty.io.reverse_readfile(file_name), tail)])) + lines = list( + reversed( + [ + l + os.linesep + for l in islice(monty.io.reverse_readfile(file_name), tail) + ] + ) + ) # compatibility with the other methods # monty strips all newlines, where as reading the other ways does # not. So if a file does not end with a newline (as most text From 8ecbfd13f0f4014a9c5202321ca4f1debbee072a Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Fri, 6 Jan 2023 08:30:00 +0100 Subject: [PATCH 17/42] Enable newline translation in windows tests Also use a sub test for every line test --- tests/job/test_genericJob.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/job/test_genericJob.py b/tests/job/test_genericJob.py index e25944dde..c6b6b3f92 100644 --- a/tests/job/test_genericJob.py +++ b/tests/job/test_genericJob.py @@ -496,10 +496,11 @@ def test_tail(self): f.write(os.linesep.join(content)) for i in range(len(content)): - with contextlib.redirect_stdout(io.StringIO()) as f: - job.tail("test_file", lines=i+1) - self.assertEqual(f.getvalue(), os.linesep.join(content[-i-1:]) + os.linesep, - "tail read incorrect lines from output file when job uncompressed!") + with self.subTest(i=i): + with contextlib.redirect_stdout(io.StringIO(newline=os.linesep)) as f: + job.tail("test_file", lines=i+1) + self.assertEqual(f.getvalue(), os.linesep.join(content[-i-1:]) + os.linesep, + "tail read incorrect lines from output file when job uncompressed!") job.compress() for i in range(len(content)): From 8db9284dd99a51d54b96360e646c752522544f79 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Fri, 6 Jan 2023 09:29:28 +0100 Subject: [PATCH 18/42] Add a simple file browser job[] is overloaded to perform many functions at once: 1. access to files 2. access to HDF stored data 3. access to child jobs This change adds a new attribute `.files` to `JobCore` to take over function 1 and deprecates methods on the job itself that are connected to this: `list_files` and `tail`. --- pyiron_base/jobs/job/core.py | 90 ++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index b37cee077..1c5b4c491 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -10,6 +10,7 @@ import os import posixpath import shutil +from typing import List import warnings from pyiron_base.interfaces.has_groups import HasGroups @@ -33,6 +34,7 @@ _job_remove_folder, ) from pyiron_base.state import state +from pyiron_base.utils.deprecate import deprecate __author__ = "Jan Janssen" __copyright__ = ( @@ -136,6 +138,11 @@ def __init__(self, project, job_name): def content(self): return self._hdf5_content + @property + def files(self): + return FileBrowser(self) + files.__doc__ = FileBrowser.__doc__ + @property def job_name(self): """ @@ -612,6 +619,7 @@ def get_job_id(self, job_specifier=None): else: return None + @deprecate("use job.files.list()") def list_files(self): """ List files inside the working directory @@ -914,6 +922,10 @@ def __getitem__(self, item): """ if item in self.list_files(): + warnings.warn( + "Using __getitem__ on a job to access files in deprecated: use job.files instead!", + category=DeprecationWarning + ) return _job_read_file(self, item) # first try to access HDF5 directly to make the common case fast @@ -996,6 +1008,7 @@ def __delitem__(self, key): """ del self.project_hdf5[posixpath.join(self.project_hdf5.h5_path, key)] + @deprecate("use job.files.tail() instead!") def tail(self, file_name, lines=100): """ Print the last lines of the given file in the job folder. @@ -1126,3 +1139,80 @@ def __dir__(self): def __repr__(self): return self._project_hdf5.__repr__() + + +class FileBrowser: + """ + Allows to browse the files in a job directory. + + By default this object prints itself as a listing of the job directory and + the files inside. + + >>> job.files + /path/to/my/job: + \tpyiron.log + \terror.out + + Access to the names of files is provided with :meth:`.list` + + >>> job.files.list() + ['pyiron.log', 'error.out', 'INCAR'] + + Access to the contents of files is provided by indexing into this object, + which returns a list of lines in the file + + >>> job.files['error.out'] + ["Oh no\n", "Something went wrong!\n"] + + The :meth:`.tail` method prints the last lines of a file to stdout + + >>> job.files.tail('error.out', lines=1) + Something went wrong! + + For files that have valid python variable names can also be accessed by + attribute notation + + >>> job.files.INCAR + ["SYSTEM=pyiron\n", "ENCUT=270\n", ...] + """ + + __slots__ = ("_job",) + + def __init__(self, job): + self._job = job + + def list(self) -> List[str]: + """ + List all files in the working directory of the job. + """ + return _job_list_files(job) + + def _ipython_display_(self): + path = job.working_directory + ":" + files = ["\t" + f for f in _job_list_files(job)] + print(os.linesep.join([path, *files])) + + def tail(self, file: str, lines: int = 100): + """ + Print the last lines of a file. + + Args: + file (str): filename + lines (int): number of lines to print + + Raises: + FileNotFoundError: if the given file does not exist + """ + print(*_job_read_file(self, file_name, tail=lines), sep="") + + def __getitem__(self, item): + if item not _job_list_files(self._job): + raise KeyError(item) + + return _job_read_file(self._job, item) + + def __getattr__(self, item): + try: + return self[name] + except KeyError: + raise AttributeError(name) from None From e1ef8c1e536b7271ecb60bad9cacd35c4769f7a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Sun, 4 Feb 2024 10:34:31 +0100 Subject: [PATCH 19/42] Add output files class --- pyiron_base/jobs/flex/executablecontainer.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pyiron_base/jobs/flex/executablecontainer.py b/pyiron_base/jobs/flex/executablecontainer.py index 9de66f12d..9f488458a 100644 --- a/pyiron_base/jobs/flex/executablecontainer.py +++ b/pyiron_base/jobs/flex/executablecontainer.py @@ -1,3 +1,4 @@ +import os import cloudpickle import numpy as np from pyiron_base.jobs.job.template import TemplateJob @@ -43,6 +44,7 @@ def __init__(self, project, job_name): super().__init__(project, job_name) self._write_input_funct = None self._collect_output_funct = None + self.output_files = OutputFiles(working_directory=self.working_directory) def set_job_type( self, @@ -107,3 +109,21 @@ def from_hdf(self, hdf=None, group_name=None): self._collect_output_funct = cloudpickle.loads( self.project_hdf5["collect_output_function"] ) + + +class OutputFiles: + def __init__(self, working_directory): + self._working_directory = working_directory + + def __dir__(self): + return list(self._get_file_convert_dict().keys()) + + def _get_file_convert_dict(self): + return {f.replace(".", "_"): f for f in os.listdir(self.working_directory)} + + def __getattr__(self, attr): + convert_dict = self._get_file_convert_dict() + if attr in convert_dict.keys(): + return os.path.join(self._working_directory, convert_dict[attr]) + else: + raise AttributeError() From 020c977726b6e4e669920e534c910111903ca26f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Sun, 4 Feb 2024 11:09:35 +0100 Subject: [PATCH 20/42] Fix working directory bug --- pyiron_base/jobs/flex/executablecontainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiron_base/jobs/flex/executablecontainer.py b/pyiron_base/jobs/flex/executablecontainer.py index 9f488458a..6103a6f40 100644 --- a/pyiron_base/jobs/flex/executablecontainer.py +++ b/pyiron_base/jobs/flex/executablecontainer.py @@ -119,7 +119,7 @@ def __dir__(self): return list(self._get_file_convert_dict().keys()) def _get_file_convert_dict(self): - return {f.replace(".", "_"): f for f in os.listdir(self.working_directory)} + return {f.replace(".", "_"): f for f in os.listdir(self._working_directory)} def __getattr__(self, attr): convert_dict = self._get_file_convert_dict() From fc8ad6a3617f89773141446313f460e80317221e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Mon, 5 Feb 2024 20:39:26 +0100 Subject: [PATCH 21/42] Move OutputFiles to GenericJob --- pyiron_base/jobs/flex/executablecontainer.py | 20 ------------------- pyiron_base/jobs/job/extension/outputfiles.py | 19 ++++++++++++++++++ pyiron_base/jobs/job/generic.py | 2 ++ tests/flex/test_executablecontainer.py | 18 +++++++++++++++++ 4 files changed, 39 insertions(+), 20 deletions(-) create mode 100644 pyiron_base/jobs/job/extension/outputfiles.py diff --git a/pyiron_base/jobs/flex/executablecontainer.py b/pyiron_base/jobs/flex/executablecontainer.py index 6103a6f40..9de66f12d 100644 --- a/pyiron_base/jobs/flex/executablecontainer.py +++ b/pyiron_base/jobs/flex/executablecontainer.py @@ -1,4 +1,3 @@ -import os import cloudpickle import numpy as np from pyiron_base.jobs.job.template import TemplateJob @@ -44,7 +43,6 @@ def __init__(self, project, job_name): super().__init__(project, job_name) self._write_input_funct = None self._collect_output_funct = None - self.output_files = OutputFiles(working_directory=self.working_directory) def set_job_type( self, @@ -109,21 +107,3 @@ def from_hdf(self, hdf=None, group_name=None): self._collect_output_funct = cloudpickle.loads( self.project_hdf5["collect_output_function"] ) - - -class OutputFiles: - def __init__(self, working_directory): - self._working_directory = working_directory - - def __dir__(self): - return list(self._get_file_convert_dict().keys()) - - def _get_file_convert_dict(self): - return {f.replace(".", "_"): f for f in os.listdir(self._working_directory)} - - def __getattr__(self, attr): - convert_dict = self._get_file_convert_dict() - if attr in convert_dict.keys(): - return os.path.join(self._working_directory, convert_dict[attr]) - else: - raise AttributeError() diff --git a/pyiron_base/jobs/job/extension/outputfiles.py b/pyiron_base/jobs/job/extension/outputfiles.py new file mode 100644 index 000000000..c9c6ba623 --- /dev/null +++ b/pyiron_base/jobs/job/extension/outputfiles.py @@ -0,0 +1,19 @@ +import os + + +class OutputFiles: + def __init__(self, working_directory): + self._working_directory = working_directory + + def __dir__(self): + return list(self._get_file_convert_dict().keys()) + + def _get_file_convert_dict(self): + return {f.replace(".", "_"): f for f in os.listdir(self._working_directory)} + + def __getattr__(self, attr): + convert_dict = self._get_file_convert_dict() + if attr in convert_dict.keys(): + return os.path.join(self._working_directory, convert_dict[attr]) + else: + raise AttributeError() diff --git a/pyiron_base/jobs/job/generic.py b/pyiron_base/jobs/job/generic.py index e26cf1de5..2d2f1d0dd 100644 --- a/pyiron_base/jobs/job/generic.py +++ b/pyiron_base/jobs/job/generic.py @@ -49,6 +49,7 @@ from pyiron_base.jobs.job.extension.server.generic import Server from pyiron_base.database.filetable import FileTable from pyiron_base.interfaces.has_dict import HasDict +from pyiron_base.jobs.job.extension.outputfiles import OutputFiles __author__ = "Joerg Neugebauer, Jan Janssen" __copyright__ = ( @@ -163,6 +164,7 @@ def __init__(self, project, job_name): self._write_work_dir_warnings = True self.interactive_cache = None self.error = GenericError(job=self) + self.output_files = OutputFiles(working_directory=self.working_directory) @property def version(self): diff --git a/tests/flex/test_executablecontainer.py b/tests/flex/test_executablecontainer.py index 2563e0f64..69fad9c6b 100644 --- a/tests/flex/test_executablecontainer.py +++ b/tests/flex/test_executablecontainer.py @@ -81,6 +81,24 @@ def test_create_job_factory_with_projecthdfio(self): self.assertEqual(job_reload.input["energy"], energy_value) self.assertEqual(job_reload.output["energy"], energy_value) + def test_job_output_files(self): + create_catjob = create_job_factory( + write_input_funct=write_input, + collect_output_funct=collect_output, + default_input_dict={"energy": 1.0}, + executable_str="cat input_file > output_file", + ) + job = create_catjob( + project=ProjectHDFio(project=self.project, file_name="any.h5", h5_path=None, mode=None), + job_name="job_output_files" + ) + job.run() + self.assertEqual(dir(job.output_files), ['error_out', 'input_file', 'output_file']) + self.assertEqual( + job.output_files.error_out, + os.path.abspath(os.path.join(__file__, "..", "test_executablecontainer", "job_output_files_hdf5", "job_output_files", "error.out")) + ) + def test_create_job_factory_typeerror(self): create_catjob = create_job_factory( write_input_funct=write_input, From c137a6d384211ccfc4e40a6ba5354b95150b6e02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Mon, 5 Feb 2024 21:08:05 +0100 Subject: [PATCH 22/42] rename output files to just files --- pyiron_base/jobs/job/core.py | 5 +++++ pyiron_base/jobs/job/extension/{outputfiles.py => files.py} | 2 +- pyiron_base/jobs/job/generic.py | 2 -- 3 files changed, 6 insertions(+), 3 deletions(-) rename pyiron_base/jobs/job/extension/{outputfiles.py => files.py} (96%) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index aec146eb0..f5245b7fe 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -31,6 +31,7 @@ _job_remove_folder, ) from pyiron_base.state import state +from pyiron_base.jobs.job.extension.files import Files __author__ = "Jan Janssen" __copyright__ = ( @@ -352,6 +353,10 @@ def path(self): """ return self.project_hdf5.path + @property + def files(self): + return Files(working_directory=self.working_directory) + def check_if_job_exists(self, job_name=None, project=None): """ Check if a job already exists in an specific project. diff --git a/pyiron_base/jobs/job/extension/outputfiles.py b/pyiron_base/jobs/job/extension/files.py similarity index 96% rename from pyiron_base/jobs/job/extension/outputfiles.py rename to pyiron_base/jobs/job/extension/files.py index c9c6ba623..f54f29d49 100644 --- a/pyiron_base/jobs/job/extension/outputfiles.py +++ b/pyiron_base/jobs/job/extension/files.py @@ -1,7 +1,7 @@ import os -class OutputFiles: +class Files: def __init__(self, working_directory): self._working_directory = working_directory diff --git a/pyiron_base/jobs/job/generic.py b/pyiron_base/jobs/job/generic.py index 2d2f1d0dd..e26cf1de5 100644 --- a/pyiron_base/jobs/job/generic.py +++ b/pyiron_base/jobs/job/generic.py @@ -49,7 +49,6 @@ from pyiron_base.jobs.job.extension.server.generic import Server from pyiron_base.database.filetable import FileTable from pyiron_base.interfaces.has_dict import HasDict -from pyiron_base.jobs.job.extension.outputfiles import OutputFiles __author__ = "Joerg Neugebauer, Jan Janssen" __copyright__ = ( @@ -164,7 +163,6 @@ def __init__(self, project, job_name): self._write_work_dir_warnings = True self.interactive_cache = None self.error = GenericError(job=self) - self.output_files = OutputFiles(working_directory=self.working_directory) @property def version(self): From 3146b33c0b4d34bfbbb5cb7907f43268d895094c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Mon, 5 Feb 2024 21:12:07 +0100 Subject: [PATCH 23/42] fix tests --- tests/flex/test_executablecontainer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/flex/test_executablecontainer.py b/tests/flex/test_executablecontainer.py index 69fad9c6b..f8c6a0eed 100644 --- a/tests/flex/test_executablecontainer.py +++ b/tests/flex/test_executablecontainer.py @@ -81,7 +81,7 @@ def test_create_job_factory_with_projecthdfio(self): self.assertEqual(job_reload.input["energy"], energy_value) self.assertEqual(job_reload.output["energy"], energy_value) - def test_job_output_files(self): + def test_job_files(self): create_catjob = create_job_factory( write_input_funct=write_input, collect_output_funct=collect_output, @@ -93,9 +93,9 @@ def test_job_output_files(self): job_name="job_output_files" ) job.run() - self.assertEqual(dir(job.output_files), ['error_out', 'input_file', 'output_file']) + self.assertEqual(dir(job.files), ['error_out', 'input_file', 'output_file']) self.assertEqual( - job.output_files.error_out, + job.files.error_out, os.path.abspath(os.path.join(__file__, "..", "test_executablecontainer", "job_output_files_hdf5", "job_output_files", "error.out")) ) From 254dc3cd7ac699f63d13ce959b4dc24845d6d064 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Mon, 5 Feb 2024 21:40:28 +0100 Subject: [PATCH 24/42] fix tests for windows --- tests/flex/test_executablecontainer.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/flex/test_executablecontainer.py b/tests/flex/test_executablecontainer.py index f8c6a0eed..227a07c82 100644 --- a/tests/flex/test_executablecontainer.py +++ b/tests/flex/test_executablecontainer.py @@ -94,10 +94,11 @@ def test_job_files(self): ) job.run() self.assertEqual(dir(job.files), ['error_out', 'input_file', 'output_file']) - self.assertEqual( - job.files.error_out, - os.path.abspath(os.path.join(__file__, "..", "test_executablecontainer", "job_output_files_hdf5", "job_output_files", "error.out")) - ) + output_file_path = os.path.abspath(os.path.join(__file__, "..", "test_executablecontainer", "job_output_files_hdf5", "job_output_files", "error.out")) + if os.name != "nt": + self.assertEqual(job.files.error_out, output_file_path) + else: + self.assertEqual(job.files.error_out.replace("/", "\\"), output_file_path) def test_create_job_factory_typeerror(self): create_catjob = create_job_factory( From 086565c7b30dd6ecc7b6e5928221141bec70c9d5 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 6 Feb 2024 16:11:04 +0100 Subject: [PATCH 25/42] update monty --- .ci_support/environment-docs.yml | 1 + .ci_support/environment-old.yml | 1 + .ci_support/environment.yml | 2 +- pyproject.toml | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.ci_support/environment-docs.yml b/.ci_support/environment-docs.yml index 0a361dd9e..48d2d51ad 100644 --- a/.ci_support/environment-docs.yml +++ b/.ci_support/environment-docs.yml @@ -11,6 +11,7 @@ dependencies: - h5io_browser =0.0.7 - h5py =3.10.0 - jinja2 =3.1.3 +- monty =2024.2.2 - numpy =1.26.3 - pandas =2.2.0 - pint =0.23 diff --git a/.ci_support/environment-old.yml b/.ci_support/environment-old.yml index 3aca67905..e924430e0 100644 --- a/.ci_support/environment-old.yml +++ b/.ci_support/environment-old.yml @@ -6,6 +6,7 @@ dependencies: - h5io_browser =0.0.6 - h5py =3.6.0 - jinja2 =2.11.3 +- monty =2024.2.2 - numpy =1.23.5 - pandas =2.0.0 - pint =0.18 diff --git a/.ci_support/environment.yml b/.ci_support/environment.yml index 2699e9afe..6a69922f3 100644 --- a/.ci_support/environment.yml +++ b/.ci_support/environment.yml @@ -8,7 +8,7 @@ dependencies: - h5io_browser =0.0.7 - h5py =3.10.0 - jinja2 =3.1.3 -- monty =2022.9.9 +- monty =2024.2.2 - numpy =1.26.3 - pandas =2.2.0 - pint =0.23 diff --git a/pyproject.toml b/pyproject.toml index 1dacb159e..46161ddc2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ dependencies = [ "h5py==3.10.0", "jinja2==3.1.3", "numpy==1.26.3", + "monty==2024.2.2", "pandas==2.2.0", "pint==0.23", "psutil==5.9.8", From c010b5cdbd89afcdf0d77aaf7fc8732a89108239 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 6 Feb 2024 16:11:10 +0100 Subject: [PATCH 26/42] fix --- pyiron_base/jobs/job/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 3f2ea55a8..6d4c35191 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -1190,7 +1190,7 @@ def tail(self, file: str, lines: int = 100): print(*_job_read_file(self, file_name, tail=lines), sep="") def __getitem__(self, item): - if item not _job_list_files(self._job): + if item not in _job_list_files(self._job): raise KeyError(item) return _job_read_file(self._job, item) From b597a57ba09108a0cac65404d94397887cb7eb3e Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 6 Feb 2024 16:27:45 +0100 Subject: [PATCH 27/42] more fixes --- pyiron_base/jobs/job/core.py | 78 +-------------------- pyiron_base/jobs/job/extension/files.py | 90 +++++++++++++++++++++---- 2 files changed, 79 insertions(+), 89 deletions(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 6d4c35191..4719b097d 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -35,6 +35,7 @@ ) from pyiron_base.state import state from pyiron_base.utils.deprecate import deprecate +from pyiron_base.jobs.job.extension.files import FileBrowser __author__ = "Jan Janssen" __copyright__ = ( @@ -1123,80 +1124,3 @@ def __dir__(self): def __repr__(self): return self._project_hdf5.__repr__() - - -class FileBrowser: - """ - Allows to browse the files in a job directory. - - By default this object prints itself as a listing of the job directory and - the files inside. - - >>> job.files - /path/to/my/job: - \tpyiron.log - \terror.out - - Access to the names of files is provided with :meth:`.list` - - >>> job.files.list() - ['pyiron.log', 'error.out', 'INCAR'] - - Access to the contents of files is provided by indexing into this object, - which returns a list of lines in the file - - >>> job.files['error.out'] - ["Oh no\n", "Something went wrong!\n"] - - The :meth:`.tail` method prints the last lines of a file to stdout - - >>> job.files.tail('error.out', lines=1) - Something went wrong! - - For files that have valid python variable names can also be accessed by - attribute notation - - >>> job.files.INCAR - ["SYSTEM=pyiron\n", "ENCUT=270\n", ...] - """ - - __slots__ = ("_job",) - - def __init__(self, job): - self._job = job - - def list(self) -> List[str]: - """ - List all files in the working directory of the job. - """ - return _job_list_files(job) - - def _ipython_display_(self): - path = job.working_directory + ":" - files = ["\t" + f for f in _job_list_files(job)] - print(os.linesep.join([path, *files])) - - def tail(self, file: str, lines: int = 100): - """ - Print the last lines of a file. - - Args: - file (str): filename - lines (int): number of lines to print - - Raises: - FileNotFoundError: if the given file does not exist - """ - print(*_job_read_file(self, file_name, tail=lines), sep="") - - def __getitem__(self, item): - if item not in _job_list_files(self._job): - raise KeyError(item) - - return _job_read_file(self._job, item) - - def __getattr__(self, item): - try: - return self[name] - except KeyError: - raise AttributeError(name) from None diff --git a/pyiron_base/jobs/job/extension/files.py b/pyiron_base/jobs/job/extension/files.py index f54f29d49..04bcb8085 100644 --- a/pyiron_base/jobs/job/extension/files.py +++ b/pyiron_base/jobs/job/extension/files.py @@ -1,19 +1,85 @@ import os +from pyiron_base.jobs.job.util import ( + _job_list_files, + _job_read_file, +) -class Files: - def __init__(self, working_directory): - self._working_directory = working_directory +class FileBrowser: + """ + Allows to browse the files in a job directory. + + By default this object prints itself as a listing of the job directory and + the files inside. + + >>> job.files + /path/to/my/job: + \tpyiron.log + \terror.out + + Access to the names of files is provided with :meth:`.list` + + >>> job.files.list() + ['pyiron.log', 'error.out', 'INCAR'] + + Access to the contents of files is provided by indexing into this object, + which returns a list of lines in the file + + >>> job.files['error.out'] + ["Oh no\n", "Something went wrong!\n"] + + The :meth:`.tail` method prints the last lines of a file to stdout + + >>> job.files.tail('error.out', lines=1) + Something went wrong! + + For files that have valid python variable names can also be accessed by + attribute notation + + >>> job.files.INCAR + ["SYSTEM=pyiron\n", "ENCUT=270\n", ...] + """ + + __slots__ = ("_job",) + + def __init__(self, job): + self._job = job def __dir__(self): - return list(self._get_file_convert_dict().keys()) + return _job_list_files(job=self._job) + ["list", "tail", "__dir__", "__getitem__", "__getattr__", "_ipython_display_"] + + def list(self) -> List[str]: + """ + List all files in the working directory of the job. + """ + return _job_list_files(job=self._job) + + def _ipython_display_(self): + path = job.working_directory + ":" + files = ["\t" + f for f in _job_list_files(job=self._job)] + print(os.linesep.join([path, *files])) + + def tail(self, file: str, lines: int = 100): + """ + Print the last lines of a file. + + Args: + file (str): filename + lines (int): number of lines to print + + Raises: + FileNotFoundError: if the given file does not exist + """ + print(*_job_read_file(job=self._job, file_name=file, tail=lines), sep="") + + def __getitem__(self, item): + if item not in _job_list_files(job=self._job): + raise KeyError(item) - def _get_file_convert_dict(self): - return {f.replace(".", "_"): f for f in os.listdir(self._working_directory)} + return _job_read_file(job=self._job, file_name=item) - def __getattr__(self, attr): - convert_dict = self._get_file_convert_dict() - if attr in convert_dict.keys(): - return os.path.join(self._working_directory, convert_dict[attr]) - else: - raise AttributeError() + def __getattr__(self, item): + try: + return self[name] + except KeyError: + raise AttributeError(name) from None From 7b6b94e6faf3f54a14e853ae3f8271e04bc26de8 Mon Sep 17 00:00:00 2001 From: pyiron-runner Date: Tue, 6 Feb 2024 15:29:16 +0000 Subject: [PATCH 28/42] Format black --- pyiron_base/jobs/job/core.py | 5 +++-- pyiron_base/jobs/job/extension/files.py | 9 ++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 4719b097d..8975915b1 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -142,6 +142,7 @@ def content(self): @property def files(self): return FileBrowser(self) + files.__doc__ = FileBrowser.__doc__ @property @@ -908,8 +909,8 @@ def __getitem__(self, item): if item in self.list_files(): warnings.warn( - "Using __getitem__ on a job to access files in deprecated: use job.files instead!", - category=DeprecationWarning + "Using __getitem__ on a job to access files in deprecated: use job.files instead!", + category=DeprecationWarning, ) return _job_read_file(self, item) diff --git a/pyiron_base/jobs/job/extension/files.py b/pyiron_base/jobs/job/extension/files.py index 04bcb8085..cc3a25ba0 100644 --- a/pyiron_base/jobs/job/extension/files.py +++ b/pyiron_base/jobs/job/extension/files.py @@ -46,7 +46,14 @@ def __init__(self, job): self._job = job def __dir__(self): - return _job_list_files(job=self._job) + ["list", "tail", "__dir__", "__getitem__", "__getattr__", "_ipython_display_"] + return _job_list_files(job=self._job) + [ + "list", + "tail", + "__dir__", + "__getitem__", + "__getattr__", + "_ipython_display_", + ] def list(self) -> List[str]: """ From b02aa2fe2c30cad789e54a736f3c643717237316 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 6 Feb 2024 16:29:32 +0100 Subject: [PATCH 29/42] fix typing --- pyiron_base/jobs/job/extension/files.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyiron_base/jobs/job/extension/files.py b/pyiron_base/jobs/job/extension/files.py index 04bcb8085..fcbb3755c 100644 --- a/pyiron_base/jobs/job/extension/files.py +++ b/pyiron_base/jobs/job/extension/files.py @@ -1,4 +1,5 @@ import os +from typing import List from pyiron_base.jobs.job.util import ( _job_list_files, _job_read_file, From 2ad939185154d28ff77cb5dc842234f66960cb37 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 6 Feb 2024 16:43:29 +0100 Subject: [PATCH 30/42] fix tests --- pyiron_base/jobs/job/extension/files.py | 7 +++++-- tests/flex/test_executablecontainer.py | 10 +++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/pyiron_base/jobs/job/extension/files.py b/pyiron_base/jobs/job/extension/files.py index fbc8debba..7160689a1 100644 --- a/pyiron_base/jobs/job/extension/files.py +++ b/pyiron_base/jobs/job/extension/files.py @@ -46,8 +46,11 @@ class FileBrowser: def __init__(self, job): self._job = job + def _get_file_dict(self): + return {f.replace(".", "_"):f for f in _job_list_files(job=self._job)} + def __dir__(self): - return _job_list_files(job=self._job) + [ + return list(self._get_file_dict().keys()) + [ "list", "tail", "__dir__", @@ -88,6 +91,6 @@ def __getitem__(self, item): def __getattr__(self, item): try: - return self[name] + return self[self._get_file_dict()[item]] except KeyError: raise AttributeError(name) from None diff --git a/tests/flex/test_executablecontainer.py b/tests/flex/test_executablecontainer.py index 227a07c82..366bb03ab 100644 --- a/tests/flex/test_executablecontainer.py +++ b/tests/flex/test_executablecontainer.py @@ -93,12 +93,12 @@ def test_job_files(self): job_name="job_output_files" ) job.run() - self.assertEqual(dir(job.files), ['error_out', 'input_file', 'output_file']) + self.assertEqual(dir(job.files), ['__dir__', '__getattr__', '__getitem__', '_ipython_display_','error_out', 'input_file', 'list', 'output_file', 'tail']) output_file_path = os.path.abspath(os.path.join(__file__, "..", "test_executablecontainer", "job_output_files_hdf5", "job_output_files", "error.out")) - if os.name != "nt": - self.assertEqual(job.files.error_out, output_file_path) - else: - self.assertEqual(job.files.error_out.replace("/", "\\"), output_file_path) + # if os.name != "nt": + # self.assertEqual(job.files.error_out, output_file_path) + # else: + # self.assertEqual(job.files.error_out.replace("/", "\\"), output_file_path) def test_create_job_factory_typeerror(self): create_catjob = create_job_factory( From fe1bf2ee0d215bc5ce200f124fcab9ee824f3297 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 6 Feb 2024 16:47:20 +0100 Subject: [PATCH 31/42] fix ipython representation --- pyiron_base/jobs/job/extension/files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiron_base/jobs/job/extension/files.py b/pyiron_base/jobs/job/extension/files.py index 7160689a1..874ca58d8 100644 --- a/pyiron_base/jobs/job/extension/files.py +++ b/pyiron_base/jobs/job/extension/files.py @@ -66,7 +66,7 @@ def list(self) -> List[str]: return _job_list_files(job=self._job) def _ipython_display_(self): - path = job.working_directory + ":" + path = self._job.working_directory + ":" files = ["\t" + f for f in _job_list_files(job=self._job)] print(os.linesep.join([path, *files])) From 5eb2054d0c1245854d43dcb9b4ecfc79545f4ade Mon Sep 17 00:00:00 2001 From: pyiron-runner Date: Tue, 6 Feb 2024 15:47:52 +0000 Subject: [PATCH 32/42] Format black --- pyiron_base/jobs/job/extension/files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiron_base/jobs/job/extension/files.py b/pyiron_base/jobs/job/extension/files.py index 874ca58d8..4e34177c7 100644 --- a/pyiron_base/jobs/job/extension/files.py +++ b/pyiron_base/jobs/job/extension/files.py @@ -47,7 +47,7 @@ def __init__(self, job): self._job = job def _get_file_dict(self): - return {f.replace(".", "_"):f for f in _job_list_files(job=self._job)} + return {f.replace(".", "_"): f for f in _job_list_files(job=self._job)} def __dir__(self): return list(self._get_file_dict().keys()) + [ From 36ca620bf2939433e3f00125928a4b46ea7bb12b Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 6 Feb 2024 17:00:47 +0100 Subject: [PATCH 33/42] try to fix tests --- pyiron_base/jobs/job/extension/files.py | 2 +- tests/job/test_genericJob.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pyiron_base/jobs/job/extension/files.py b/pyiron_base/jobs/job/extension/files.py index 874ca58d8..63982e2cb 100644 --- a/pyiron_base/jobs/job/extension/files.py +++ b/pyiron_base/jobs/job/extension/files.py @@ -93,4 +93,4 @@ def __getattr__(self, item): try: return self[self._get_file_dict()[item]] except KeyError: - raise AttributeError(name) from None + raise AttributeError(item) from None diff --git a/tests/job/test_genericJob.py b/tests/job/test_genericJob.py index 870f18775..09d51dcd1 100644 --- a/tests/job/test_genericJob.py +++ b/tests/job/test_genericJob.py @@ -582,14 +582,16 @@ def test_tail(self): with self.subTest(i=i): with contextlib.redirect_stdout(io.StringIO(newline=os.linesep)) as f: job.tail("test_file", lines=i+1) - self.assertEqual(f.getvalue(), os.linesep.join(content[-i-1:]) + os.linesep, - "tail read incorrect lines from output file when job uncompressed!") + reference_str = os.linesep.join(content[-i-1:]) + os.linesep + self.assertEqual(f.getvalue().rstrip(), reference_str.rstrip(), + "tail read incorrect lines from output file when job uncompressed!") job.compress() for i in range(len(content)): with contextlib.redirect_stdout(io.StringIO()) as f: job.tail("test_file", lines=i+1) - self.assertEqual(f.getvalue(), os.linesep.join(content[-i-1:]) + os.linesep, + reference_str = os.linesep.join(content[-i-1:]) + os.linesep + self.assertEqual(f.getvalue().rstrip(), reference_str.rstrip(), "tail read incorrect lines from output file when job compressed!") From 905bd1a4c48c73835e0b4276ab3e56e696de3f05 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 6 Feb 2024 17:07:42 +0100 Subject: [PATCH 34/42] try to remove \r --- tests/job/test_genericJob.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/job/test_genericJob.py b/tests/job/test_genericJob.py index 09d51dcd1..64b690bf6 100644 --- a/tests/job/test_genericJob.py +++ b/tests/job/test_genericJob.py @@ -583,7 +583,7 @@ def test_tail(self): with contextlib.redirect_stdout(io.StringIO(newline=os.linesep)) as f: job.tail("test_file", lines=i+1) reference_str = os.linesep.join(content[-i-1:]) + os.linesep - self.assertEqual(f.getvalue().rstrip(), reference_str.rstrip(), + self.assertEqual(f.getvalue().replace('\r', ''), reference_str.replace('\r', ''), "tail read incorrect lines from output file when job uncompressed!") job.compress() @@ -591,7 +591,7 @@ def test_tail(self): with contextlib.redirect_stdout(io.StringIO()) as f: job.tail("test_file", lines=i+1) reference_str = os.linesep.join(content[-i-1:]) + os.linesep - self.assertEqual(f.getvalue().rstrip(), reference_str.rstrip(), + self.assertEqual(f.getvalue().replace('\r', ''), reference_str.replace('\r', ''), "tail read incorrect lines from output file when job compressed!") From 4ee136f1a050cc75af65f29e38772d4c648cb967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Wed, 7 Feb 2024 08:25:15 +0100 Subject: [PATCH 35/42] Use working directory rather than job object --- pyiron_base/jobs/job/core.py | 2 +- pyiron_base/jobs/job/extension/files.py | 41 ++++++++--- pyiron_base/jobs/job/util.py | 98 +++++++++++++++++++++---- 3 files changed, 113 insertions(+), 28 deletions(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 8975915b1..ef4a46e19 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -141,7 +141,7 @@ def content(self): @property def files(self): - return FileBrowser(self) + return FileBrowser(working_directory=self.working_directory) files.__doc__ = FileBrowser.__doc__ diff --git a/pyiron_base/jobs/job/extension/files.py b/pyiron_base/jobs/job/extension/files.py index 50a5748e0..3a00dc30a 100644 --- a/pyiron_base/jobs/job/extension/files.py +++ b/pyiron_base/jobs/job/extension/files.py @@ -1,8 +1,8 @@ import os from typing import List from pyiron_base.jobs.job.util import ( - _job_list_files, - _job_read_file, + _working_directory_list_files, + _working_directory_read_file, ) @@ -41,13 +41,18 @@ class FileBrowser: ["SYSTEM=pyiron\n", "ENCUT=270\n", ...] """ - __slots__ = ("_job",) + __slots__ = ("_working_directory",) - def __init__(self, job): - self._job = job + def __init__(self, working_directory): + self._working_directory = working_directory def _get_file_dict(self): - return {f.replace(".", "_"): f for f in _job_list_files(job=self._job)} + return { + f.replace(".", "_"): f + for f in _working_directory_list_files( + working_directory=self._working_directory + ) + } def __dir__(self): return list(self._get_file_dict().keys()) + [ @@ -63,11 +68,16 @@ def list(self) -> List[str]: """ List all files in the working directory of the job. """ - return _job_list_files(job=self._job) + return _working_directory_list_files(working_directory=self._working_directory) def _ipython_display_(self): path = self._job.working_directory + ":" - files = ["\t" + f for f in _job_list_files(job=self._job)] + files = [ + "\t" + f + for f in _working_directory_list_files( + working_directory=self._working_directory + ) + ] print(os.linesep.join([path, *files])) def tail(self, file: str, lines: int = 100): @@ -81,13 +91,22 @@ def tail(self, file: str, lines: int = 100): Raises: FileNotFoundError: if the given file does not exist """ - print(*_job_read_file(job=self._job, file_name=file, tail=lines), sep="") + print( + *_working_directory_read_file( + working_directory=self._working_directory, file_name=file, tail=lines + ), + sep="", + ) def __getitem__(self, item): - if item not in _job_list_files(job=self._job): + if item not in _working_directory_list_files( + working_directory=self._working_directory + ): raise KeyError(item) - return _job_read_file(job=self._job, file_name=item) + return _working_directory_read_file( + working_directory=self._working_directory, file_name=item + ) def __getattr__(self, item): try: diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index 876ea8b63..a76e3e877 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -265,7 +265,14 @@ def _kill_child(job): def _job_compressed_name(job): """Return the canonical file name of a compressed job.""" - return os.path.join(job.working_directory, job.job_name + ".tar.bz2") + return _get_compressed_job_name(working_directory=job.working_directory) + + +def _get_compressed_job_name(working_directory): + """Return the canonical file name of a compressed job from the working directory.""" + return os.path.join( + working_directory, os.path.basename(working_directory) + ".tar.bz2" + ) def _job_compress(job, files_to_compress=None): @@ -315,6 +322,22 @@ def _job_decompress(job): pass +def _working_directory_is_compressed(working_directory): + """ + Check if the working directory of a given job is already compressed or not. + + Args: + working_directory (str): working directory of the job object + + Returns: + bool: [True/False] + """ + compressed_name = os.path.basename( + _get_compressed_job_name(working_directory=working_directory) + ) + return compressed_name in os.listdir(working_directory) + + def _job_is_compressed(job): """ Check if the job is already compressed or not. @@ -325,52 +348,74 @@ def _job_is_compressed(job): Returns: bool: [True/False] """ - compressed_name = os.path.basename(_job_compressed_name(job)) - return compressed_name in os.listdir(job.working_directory) + return _working_directory_is_compressed(working_directory=job.working_directory) -def _job_list_files(job): +def _working_directory_list_files(working_directory): """ Returns list of files in the jobs working directory. - If the job is compressed, return a list of files in the archive. + If the working directory is compressed, return a list of files in the archive. Args: - job (JobCore): job object to inspect files in + working_directory (str): working directory of the job object to inspect files in Returns: list of str: file names """ - if os.path.isdir(job.working_directory): - if _job_is_compressed(job): - with tarfile.open(_job_compressed_name(job), "r") as tar: + if os.path.isdir(working_directory): + if _working_directory_is_compressed(working_directory=working_directory): + with tarfile.open( + _get_compressed_job_name(working_directory=working_directory), "r" + ) as tar: return [member.name for member in tar.getmembers() if member.isfile()] else: - return os.listdir(job.working_directory) + return os.listdir(working_directory) return [] -def _job_read_file(job, file_name, tail=None): +def _job_list_files(job): + """ + Returns list of files in the jobs working directory. + + If the job is compressed, return a list of files in the archive. + + Args: + job (JobCore): job object to inspect files in + + Returns: + list of str: file names + """ + return _working_directory_list_files(working_directory=job.working_directory) + + +def _working_directory_read_file(working_directory, file_name, tail=None): """ Return list of lines of the given file. - Transparently decompresses the file if job is compressed. + Transparently decompresses the file if working directory is compressed. If `tail` is given and job is decompressed, only read the last lines instead of traversing the full file. Args: + working_directory (str): working directory of the job object file_name (str): the file to print tail (int, optional): only return the last lines Raises: FileNotFoundError: if the given file name does not exist in the job folder """ - if file_name not in job.list_files(): + if file_name not in _working_directory_list_files( + working_directory=working_directory + ): raise FileNotFoundError(file_name) - if _job_is_compressed(job): - with tarfile.open(_job_compressed_name(job), encoding="utf8") as f: + if _working_directory_is_compressed(working_directory=working_directory): + with tarfile.open( + _get_compressed_job_name(working_directory=working_directory), + encoding="utf8", + ) as f: lines = [ line.decode("utf8") for line in f.extractfile(file_name).readlines() ] @@ -379,7 +424,7 @@ def _job_read_file(job, file_name, tail=None): else: return lines[-tail:] else: - file_name = posixpath.join(job.working_directory, file_name) + file_name = posixpath.join(working_directory, file_name) if tail is None: with open(file_name) as f: return f.readlines() @@ -400,6 +445,27 @@ def _job_read_file(job, file_name, tail=None): return lines +def _job_read_file(job, file_name, tail=None): + """ + Return list of lines of the given file. + + Transparently decompresses the file if job is compressed. + + If `tail` is given and job is decompressed, only read the last lines + instead of traversing the full file. + + Args: + file_name (str): the file to print + tail (int, optional): only return the last lines + + Raises: + FileNotFoundError: if the given file name does not exist in the job folder + """ + return _working_directory_read_file( + working_directory=job.working_directory, file_name=file_name, tail=tail + ) + + def _job_archive(job): """ Compress HDF5 file of the job object to tar-archive From 5cca59ef5ccf0001bf4cf0084ced36702801d247 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Wed, 7 Feb 2024 08:51:12 +0100 Subject: [PATCH 36/42] Return files as strings --- pyiron_base/jobs/job/extension/files.py | 14 +++++++++++--- tests/flex/test_executablecontainer.py | 13 ++++++++----- tests/job/test_genericJob.py | 20 ++++++++++++++++++++ 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/pyiron_base/jobs/job/extension/files.py b/pyiron_base/jobs/job/extension/files.py index 3a00dc30a..d916def7c 100644 --- a/pyiron_base/jobs/job/extension/files.py +++ b/pyiron_base/jobs/job/extension/files.py @@ -104,12 +104,20 @@ def __getitem__(self, item): ): raise KeyError(item) - return _working_directory_read_file( - working_directory=self._working_directory, file_name=item - ) + return File(os.path.join(self._working_directory, item)) def __getattr__(self, item): try: return self[self._get_file_dict()[item]] except KeyError: raise AttributeError(item) from None + + +class File(str): + def tail(self, lines: int = 100): + print( + *_working_directory_read_file( + working_directory=os.path.dirname(self), file_name=os.path.basename(self), tail=lines + ), + sep="", + ) diff --git a/tests/flex/test_executablecontainer.py b/tests/flex/test_executablecontainer.py index 366bb03ab..3edf7e7de 100644 --- a/tests/flex/test_executablecontainer.py +++ b/tests/flex/test_executablecontainer.py @@ -93,12 +93,15 @@ def test_job_files(self): job_name="job_output_files" ) job.run() - self.assertEqual(dir(job.files), ['__dir__', '__getattr__', '__getitem__', '_ipython_display_','error_out', 'input_file', 'list', 'output_file', 'tail']) + self.assertEqual( + dir(job.files), + ['__dir__', '__getattr__', '__getitem__', '_ipython_display_', 'error_out', 'input_file', 'list', 'output_file', 'tail'] + ) output_file_path = os.path.abspath(os.path.join(__file__, "..", "test_executablecontainer", "job_output_files_hdf5", "job_output_files", "error.out")) - # if os.name != "nt": - # self.assertEqual(job.files.error_out, output_file_path) - # else: - # self.assertEqual(job.files.error_out.replace("/", "\\"), output_file_path) + if os.name != "nt": + self.assertEqual(job.files.error_out, output_file_path) + else: + self.assertEqual(job.files.error_out.replace("/", "\\"), output_file_path) def test_create_job_factory_typeerror(self): create_catjob = create_job_factory( diff --git a/tests/job/test_genericJob.py b/tests/job/test_genericJob.py index 64b690bf6..082a17400 100644 --- a/tests/job/test_genericJob.py +++ b/tests/job/test_genericJob.py @@ -585,6 +585,16 @@ def test_tail(self): reference_str = os.linesep.join(content[-i-1:]) + os.linesep self.assertEqual(f.getvalue().replace('\r', ''), reference_str.replace('\r', ''), "tail read incorrect lines from output file when job uncompressed!") + with contextlib.redirect_stdout(io.StringIO(newline=os.linesep)) as f: + job.files.tail("test_file", lines=i+1) + reference_str = os.linesep.join(content[-i-1:]) + os.linesep + self.assertEqual(f.getvalue().replace('\r', ''), reference_str.replace('\r', ''), + "tail read incorrect lines from output file when job uncompressed!") + with contextlib.redirect_stdout(io.StringIO(newline=os.linesep)) as f: + job.files.test_file.tail(lines=i+1) + reference_str = os.linesep.join(content[-i-1:]) + os.linesep + self.assertEqual(f.getvalue().replace('\r', ''), reference_str.replace('\r', ''), + "tail read incorrect lines from output file when job uncompressed!") job.compress() for i in range(len(content)): @@ -593,6 +603,16 @@ def test_tail(self): reference_str = os.linesep.join(content[-i-1:]) + os.linesep self.assertEqual(f.getvalue().replace('\r', ''), reference_str.replace('\r', ''), "tail read incorrect lines from output file when job compressed!") + with contextlib.redirect_stdout(io.StringIO()) as f: + job.files.tail("test_file", lines=i+1) + reference_str = os.linesep.join(content[-i-1:]) + os.linesep + self.assertEqual(f.getvalue().replace('\r', ''), reference_str.replace('\r', ''), + "tail read incorrect lines from output file when job compressed!") + with contextlib.redirect_stdout(io.StringIO()) as f: + job.files.test_file.tail(lines=i+1) + reference_str = os.linesep.join(content[-i-1:]) + os.linesep + self.assertEqual(f.getvalue().replace('\r', ''), reference_str.replace('\r', ''), + "tail read incorrect lines from output file when job compressed!") if __name__ == "__main__": From 7718b987293de8643880a6098f33d78bbac725a9 Mon Sep 17 00:00:00 2001 From: pyiron-runner Date: Wed, 7 Feb 2024 07:52:14 +0000 Subject: [PATCH 37/42] Format black --- pyiron_base/jobs/job/extension/files.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyiron_base/jobs/job/extension/files.py b/pyiron_base/jobs/job/extension/files.py index d916def7c..9bbad020f 100644 --- a/pyiron_base/jobs/job/extension/files.py +++ b/pyiron_base/jobs/job/extension/files.py @@ -117,7 +117,9 @@ class File(str): def tail(self, lines: int = 100): print( *_working_directory_read_file( - working_directory=os.path.dirname(self), file_name=os.path.basename(self), tail=lines + working_directory=os.path.dirname(self), + file_name=os.path.basename(self), + tail=lines, ), sep="", ) From f48201ac1856ce01e9752c0dd7a6a7ae3bbf9df2 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Wed, 7 Feb 2024 14:19:26 +0100 Subject: [PATCH 38/42] Update pyiron_base/jobs/job/core.py Co-authored-by: Marvin Poul --- pyiron_base/jobs/job/core.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index ef4a46e19..b8126cae9 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -994,20 +994,6 @@ def __delitem__(self, key): """ del self.project_hdf5[posixpath.join(self.project_hdf5.h5_path, key)] - @deprecate("use job.files.tail() instead!") - def tail(self, file_name, lines=100): - """ - Print the last lines of the given file in the job folder. - - Args: - file_name (str): the file to print - lines (int): how many lines to print - - Raises: - FileNotFoundError: if the given file name does not exist in the job folder - """ - print(*_job_read_file(self, file_name, tail=lines), sep="") - def __repr__(self): """ Human readable string representation From 2f8cd569ee6ecd75dc3023680698bb4cb0f2c04a Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Wed, 7 Feb 2024 14:19:45 +0100 Subject: [PATCH 39/42] Update pyiron_base/jobs/job/extension/files.py Co-authored-by: Marvin Poul --- pyiron_base/jobs/job/extension/files.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pyiron_base/jobs/job/extension/files.py b/pyiron_base/jobs/job/extension/files.py index 9bbad020f..2287e21f7 100644 --- a/pyiron_base/jobs/job/extension/files.py +++ b/pyiron_base/jobs/job/extension/files.py @@ -55,14 +55,7 @@ def _get_file_dict(self): } def __dir__(self): - return list(self._get_file_dict().keys()) + [ - "list", - "tail", - "__dir__", - "__getitem__", - "__getattr__", - "_ipython_display_", - ] + return list(self._get_file_dict().keys()) + super().__dir__() def list(self) -> List[str]: """ From 5ae46937df65c2734c102eae8b55c00e05924289 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Wed, 7 Feb 2024 14:20:11 +0100 Subject: [PATCH 40/42] Update pyiron_base/jobs/job/extension/files.py Co-authored-by: Marvin Poul --- pyiron_base/jobs/job/extension/files.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyiron_base/jobs/job/extension/files.py b/pyiron_base/jobs/job/extension/files.py index 2287e21f7..4422edb7c 100644 --- a/pyiron_base/jobs/job/extension/files.py +++ b/pyiron_base/jobs/job/extension/files.py @@ -37,8 +37,8 @@ class FileBrowser: For files that have valid python variable names can also be accessed by attribute notation - >>> job.files.INCAR - ["SYSTEM=pyiron\n", "ENCUT=270\n", ...] + >>> job.files.INCAR # doctest: +SKIP + File('INCAR') """ __slots__ = ("_working_directory",) From 149fa23f8756862dbd285cf812f7505e4c3c7790 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Wed, 7 Feb 2024 14:27:26 +0100 Subject: [PATCH 41/42] fix test --- tests/flex/test_executablecontainer.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/flex/test_executablecontainer.py b/tests/flex/test_executablecontainer.py index 3edf7e7de..95eb35138 100644 --- a/tests/flex/test_executablecontainer.py +++ b/tests/flex/test_executablecontainer.py @@ -93,10 +93,8 @@ def test_job_files(self): job_name="job_output_files" ) job.run() - self.assertEqual( - dir(job.files), - ['__dir__', '__getattr__', '__getitem__', '_ipython_display_', 'error_out', 'input_file', 'list', 'output_file', 'tail'] - ) + for file in ['error_out', 'input_file', 'output_file']: + self.assertTrue(file in dir(job.files)) output_file_path = os.path.abspath(os.path.join(__file__, "..", "test_executablecontainer", "job_output_files_hdf5", "job_output_files", "error.out")) if os.name != "nt": self.assertEqual(job.files.error_out, output_file_path) From 2ff2c782941bb56bb231e6e89eac5d5d7440b3ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Wed, 7 Feb 2024 14:31:08 +0100 Subject: [PATCH 42/42] fix genericjob test --- tests/job/test_genericJob.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/job/test_genericJob.py b/tests/job/test_genericJob.py index 082a17400..5ef2a7339 100644 --- a/tests/job/test_genericJob.py +++ b/tests/job/test_genericJob.py @@ -580,11 +580,6 @@ def test_tail(self): for i in range(len(content)): with self.subTest(i=i): - with contextlib.redirect_stdout(io.StringIO(newline=os.linesep)) as f: - job.tail("test_file", lines=i+1) - reference_str = os.linesep.join(content[-i-1:]) + os.linesep - self.assertEqual(f.getvalue().replace('\r', ''), reference_str.replace('\r', ''), - "tail read incorrect lines from output file when job uncompressed!") with contextlib.redirect_stdout(io.StringIO(newline=os.linesep)) as f: job.files.tail("test_file", lines=i+1) reference_str = os.linesep.join(content[-i-1:]) + os.linesep @@ -598,11 +593,6 @@ def test_tail(self): job.compress() for i in range(len(content)): - with contextlib.redirect_stdout(io.StringIO()) as f: - job.tail("test_file", lines=i+1) - reference_str = os.linesep.join(content[-i-1:]) + os.linesep - self.assertEqual(f.getvalue().replace('\r', ''), reference_str.replace('\r', ''), - "tail read incorrect lines from output file when job compressed!") with contextlib.redirect_stdout(io.StringIO()) as f: job.files.tail("test_file", lines=i+1) reference_str = os.linesep.join(content[-i-1:]) + os.linesep