diff --git a/.ci_support/environment.yml b/.ci_support/environment.yml index 866a18d9d..6acb5b4c1 100644 --- a/.ci_support/environment.yml +++ b/.ci_support/environment.yml @@ -20,3 +20,4 @@ dependencies: - sqlalchemy =1.4.44 - tqdm =4.64.1 - traitlets =5.6.0 +- monty =2022.9.9 \ No newline at end of file diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py index 399e2d701..1c5b4c491 100644 --- a/pyiron_base/jobs/job/core.py +++ b/pyiron_base/jobs/job/core.py @@ -10,6 +10,7 @@ import os import posixpath import shutil +from typing import List import warnings from pyiron_base.interfaces.has_groups import HasGroups @@ -26,11 +27,14 @@ _job_is_compressed, _job_compress, _job_decompress, + _job_list_files, + _job_read_file, _job_delete_files, _job_delete_hdf, _job_remove_folder, ) from pyiron_base.state import state +from pyiron_base.utils.deprecate import deprecate __author__ = "Jan Janssen" __copyright__ = ( @@ -134,6 +138,11 @@ def __init__(self, project, job_name): def content(self): return self._hdf5_content + @property + def files(self): + return FileBrowser(self) + files.__doc__ = FileBrowser.__doc__ + @property def job_name(self): """ @@ -610,6 +619,7 @@ def get_job_id(self, job_specifier=None): else: return None + @deprecate("use job.files.list()") def list_files(self): """ List files inside the working directory @@ -620,9 +630,7 @@ def list_files(self): Returns: list: list of file names """ - if os.path.isdir(self.working_directory): - return os.listdir(self.working_directory) - return [] + return _job_list_files(self) def list_childs(self): """ @@ -914,9 +922,11 @@ def __getitem__(self, item): """ if item in self.list_files(): - file_name = posixpath.join(self.working_directory, "{}".format(item)) - with open(file_name) as f: - return f.readlines() + warnings.warn( + "Using __getitem__ on a job to access files in deprecated: use job.files instead!", + category=DeprecationWarning + ) + return _job_read_file(self, item) # first try to access HDF5 directly to make the common case fast try: @@ -998,6 +1008,20 @@ def __delitem__(self, key): """ del self.project_hdf5[posixpath.join(self.project_hdf5.h5_path, key)] + @deprecate("use job.files.tail() instead!") + def tail(self, file_name, lines=100): + """ + Print the last lines of the given file in the job folder. + + Args: + file_name (str): the file to print + lines (int): how many lines to print + + Raises: + FileNotFoundError: if the given file name does not exist in the job folder + """ + print(*_job_read_file(self, file_name, tail=lines), sep="") + def __repr__(self): """ Human readable string representation @@ -1115,3 +1139,80 @@ def __dir__(self): def __repr__(self): return self._project_hdf5.__repr__() + + +class FileBrowser: + """ + Allows to browse the files in a job directory. + + By default this object prints itself as a listing of the job directory and + the files inside. + + >>> job.files + /path/to/my/job: + \tpyiron.log + \terror.out + + Access to the names of files is provided with :meth:`.list` + + >>> job.files.list() + ['pyiron.log', 'error.out', 'INCAR'] + + Access to the contents of files is provided by indexing into this object, + which returns a list of lines in the file + + >>> job.files['error.out'] + ["Oh no\n", "Something went wrong!\n"] + + The :meth:`.tail` method prints the last lines of a file to stdout + + >>> job.files.tail('error.out', lines=1) + Something went wrong! + + For files that have valid python variable names can also be accessed by + attribute notation + + >>> job.files.INCAR + ["SYSTEM=pyiron\n", "ENCUT=270\n", ...] + """ + + __slots__ = ("_job",) + + def __init__(self, job): + self._job = job + + def list(self) -> List[str]: + """ + List all files in the working directory of the job. + """ + return _job_list_files(job) + + def _ipython_display_(self): + path = job.working_directory + ":" + files = ["\t" + f for f in _job_list_files(job)] + print(os.linesep.join([path, *files])) + + def tail(self, file: str, lines: int = 100): + """ + Print the last lines of a file. + + Args: + file (str): filename + lines (int): number of lines to print + + Raises: + FileNotFoundError: if the given file does not exist + """ + print(*_job_read_file(self, file_name, tail=lines), sep="") + + def __getitem__(self, item): + if item not _job_list_files(self._job): + raise KeyError(item) + + return _job_read_file(self._job, item) + + def __getattr__(self, item): + try: + return self[name] + except KeyError: + raise AttributeError(name) from None diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py index 9a6e2de38..c14c280b8 100644 --- a/pyiron_base/jobs/job/util.py +++ b/pyiron_base/jobs/job/util.py @@ -4,6 +4,7 @@ """ Helper functions for the JobCore and GenericJob objects """ +from itertools import islice import os import posixpath import psutil @@ -11,6 +12,7 @@ import stat import shutil from typing import Union, Dict +import monty.io from pyiron_base.utils.instance import static_isinstance from pyiron_base.utils.safetar import safe_extract @@ -239,6 +241,11 @@ def _kill_child(job): job_process.kill() +def _job_compressed_name(job): + """Return the canonical file name of a compressed job.""" + return os.path.join(job.working_directory, job.job_name + ".tar.bz2") + + def _job_compress(job, files_to_compress=None): """ Compress the output files of a job object. @@ -253,10 +260,7 @@ def _job_compress(job, files_to_compress=None): cwd = os.getcwd() try: os.chdir(job.working_directory) - with tarfile.open( - os.path.join(job.working_directory, job.job_name + ".tar.bz2"), - "w:bz2", - ) as tar: + with tarfile.open(_job_compressed_name(job), "w:bz2") as tar: for name in files_to_compress: if "tar" not in name and not stat.S_ISFIFO(os.stat(name).st_mode): tar.add(name) @@ -270,7 +274,7 @@ def _job_compress(job, files_to_compress=None): finally: os.chdir(cwd) else: - print("The files are already compressed!") + job.logger.info("The files are already compressed!") def _job_decompress(job): @@ -280,8 +284,8 @@ def _job_decompress(job): Args: job (JobCore): job object to decompress """ + tar_file_name = _job_compressed_name(job) try: - tar_file_name = os.path.join(job.working_directory, job.job_name + ".tar.bz2") with tarfile.open(tar_file_name, "r:bz2") as tar: safe_extract(tar, job.working_directory) os.remove(tar_file_name) @@ -299,11 +303,79 @@ def _job_is_compressed(job): Returns: bool: [True/False] """ - compressed_name = job.job_name + ".tar.bz2" - for name in job.list_files(): - if compressed_name in name: - return True - return False + compressed_name = os.path.basename(_job_compressed_name(job)) + return compressed_name in os.listdir(job.working_directory) + + +def _job_list_files(job): + """ + Returns list of files in the jobs working directory. + + If the job is compressed, return a list of files in the archive. + + Args: + job (JobCore): job object to inspect files in + + Returns: + list of str: file names + """ + if os.path.isdir(job.working_directory): + if _job_is_compressed(job): + with tarfile.open(_job_compressed_name(job), "r") as tar: + return [member.name for member in tar.getmembers() if member.isfile()] + else: + return os.listdir(job.working_directory) + return [] + + +def _job_read_file(job, file_name, tail=None): + """ + Return list of lines of the given file. + + Transparently decompresses the file if job is compressed. + + If `tail` is given and job is decompressed, only read the last lines + instead of traversing the full file. + + Args: + file_name (str): the file to print + tail (int, optional): only return the last lines + + Raises: + FileNotFoundError: if the given file name does not exist in the job folder + """ + if file_name not in job.list_files(): + raise FileNotFoundError(file_name) + + if _job_is_compressed(job): + with tarfile.open(_job_compressed_name(job), encoding="utf8") as f: + lines = [ + line.decode("utf8") for line in f.extractfile(file_name).readlines() + ] + if tail is None: + return lines + else: + return lines[-tail:] + else: + file_name = posixpath.join(job.working_directory, file_name) + if tail is None: + with open(file_name) as f: + return f.readlines() + else: + lines = list( + reversed( + [ + l + os.linesep + for l in islice(monty.io.reverse_readfile(file_name), tail) + ] + ) + ) + # compatibility with the other methods + # monty strips all newlines, where as reading the other ways does + # not. So if a file does not end with a newline (as most text + # files) adding it to every line like above adds an additional one. + lines[-1] = lines[-1].rstrip(os.linesep) + return lines def _job_archive(job): diff --git a/setup.py b/setup.py index 37efe9110..76120b4b3 100644 --- a/setup.py +++ b/setup.py @@ -45,6 +45,7 @@ 'tables==3.7.0', 'tqdm==4.64.1', 'traitlets==5.6.0', + 'monty==2022.9.9', ], cmdclass=versioneer.get_cmdclass(), diff --git a/tests/job/test_genericJob.py b/tests/job/test_genericJob.py index 4a191274d..c6b6b3f92 100644 --- a/tests/job/test_genericJob.py +++ b/tests/job/test_genericJob.py @@ -2,8 +2,10 @@ # Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department # Distributed under the terms of "New BSD License", see the LICENSE file. +import contextlib import unittest import os +import io from pyiron_base.storage.parameters import GenericParameters from pyiron_base.jobs.job.generic import GenericJob from pyiron_base._tests import TestWithFilledProject, ToyJob @@ -423,7 +425,7 @@ def test_error(self): def test_compress(self): job = self.project.load(self.project.get_job_ids()[0]) - wd_files = job.list_files() + wd_files = os.listdir(job.working_directory) self.assertEqual(len(wd_files), 1, "Only one zipped file should be present in the working directory") self.assertEqual(wd_files[0], f"{job.name}.tar.bz2", "Inconsistent name for the zipped file") @@ -437,7 +439,7 @@ def test_restart(self): job = self.project.load(self.project.get_job_ids()[0]) job_restart = job.restart() job_restart.run() - wd_files = job_restart.list_files() + wd_files = os.listdir(job_restart.working_directory) self.assertEqual(len(wd_files), 1, "Only one zipped file should be present in the working directory") self.assertEqual(wd_files[0], f"{job_restart.name}.tar.bz2", "Inconsistent name for the zipped file") job_restart.decompress() @@ -485,5 +487,27 @@ def test_return_codes(self): pass self.assertTrue(j.status.aborted, "Job did not abort even though return code is 2!") + def test_tail(self): + """job.tail should print the last lines of a file to stdout""" + job = self.project.load(self.project.get_job_ids()[0]) + job.decompress() + content = ["Content", "More", "Lines"] + with open(os.path.join(job.working_directory, "test_file"), "w") as f: + f.write(os.linesep.join(content)) + + for i in range(len(content)): + with self.subTest(i=i): + with contextlib.redirect_stdout(io.StringIO(newline=os.linesep)) as f: + job.tail("test_file", lines=i+1) + self.assertEqual(f.getvalue(), os.linesep.join(content[-i-1:]) + os.linesep, + "tail read incorrect lines from output file when job uncompressed!") + + job.compress() + for i in range(len(content)): + with contextlib.redirect_stdout(io.StringIO()) as f: + job.tail("test_file", lines=i+1) + self.assertEqual(f.getvalue(), os.linesep.join(content[-i-1:]) + os.linesep, + "tail read incorrect lines from output file when job compressed!") + if __name__ == "__main__": unittest.main()