pyiron · pmrv · Nov 10, 2022 · Nov 10, 2022 · Nov 10, 2022 · Nov 10, 2022
diff --git a/.ci_support/environment.yml b/.ci_support/environment.yml
@@ -20,3 +20,4 @@ dependencies:
 - sqlalchemy =1.4.44
 - tqdm =4.64.1
 - traitlets =5.6.0
+- monty =2022.9.9
diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py
@@ -10,6 +10,7 @@
 import os
 import posixpath
 import shutil
+from typing import List
 import warnings
 
 from pyiron_base.interfaces.has_groups import HasGroups
@@ -26,11 +27,14 @@
     _job_is_compressed,
     _job_compress,
     _job_decompress,
+    _job_list_files,
+    _job_read_file,
     _job_delete_files,
     _job_delete_hdf,
     _job_remove_folder,
 )
 from pyiron_base.state import state
+from pyiron_base.utils.deprecate import deprecate
 
 __author__ = "Jan Janssen"
 __copyright__ = (
@@ -134,6 +138,11 @@ def __init__(self, project, job_name):
     def content(self):
         return self._hdf5_content
 
+    @property
+    def files(self):
+        return FileBrowser(self)
+    files.__doc__ = FileBrowser.__doc__
+
     @property
     def job_name(self):
         """
@@ -610,6 +619,7 @@ def get_job_id(self, job_specifier=None):
             else:
                 return None
 
+    @deprecate("use job.files.list()")
     def list_files(self):
         """
         List files inside the working directory
@@ -620,9 +630,7 @@ def list_files(self):
         Returns:
             list: list of file names
         """
-        if os.path.isdir(self.working_directory):
-            return os.listdir(self.working_directory)
-        return []
+        return _job_list_files(self)
 
     def list_childs(self):
         """
@@ -914,9 +922,11 @@ def __getitem__(self, item):
         """
 
         if item in self.list_files():
-            file_name = posixpath.join(self.working_directory, "{}".format(item))
-            with open(file_name) as f:
-                return f.readlines()
+            warnings.warn(
+                    "Using __getitem__ on a job to access files in deprecated: use job.files instead!",
+                    category=DeprecationWarning
+            )
+            return _job_read_file(self, item)
 
         # first try to access HDF5 directly to make the common case fast
         try:
@@ -998,6 +1008,20 @@ def __delitem__(self, key):
         """
         del self.project_hdf5[posixpath.join(self.project_hdf5.h5_path, key)]
 
+    @deprecate("use job.files.tail() instead!")
+    def tail(self, file_name, lines=100):
+        """
+        Print the last lines of the given file in the job folder.
+
+        Args:
+            file_name (str): the file to print
+            lines (int): how many lines to print
+
+        Raises:
+            FileNotFoundError: if the given file name does not exist in the job folder
+        """
+        print(*_job_read_file(self, file_name, tail=lines), sep="")
+
     def __repr__(self):
         """
         Human readable string representation
@@ -1115,3 +1139,80 @@ def __dir__(self):
 
     def __repr__(self):
         return self._project_hdf5.__repr__()
+
+
+class FileBrowser:
+    """
+    Allows to browse the files in a job directory.
+
+    By default this object prints itself as a listing of the job directory and
+    the files inside.
+
+    >>> job.files
+    /path/to/my/job:
+    \tpyiron.log
+    \terror.out
+
+    Access to the names of files is provided with :meth:`.list`
+
+    >>> job.files.list()
+    ['pyiron.log', 'error.out', 'INCAR']
+
+    Access to the contents of files is provided by indexing into this object,
+    which returns a list of lines in the file
+
+    >>> job.files['error.out']
+    ["Oh no\n", "Something went wrong!\n"]
+
+    The :meth:`.tail` method prints the last lines of a file to stdout
+
+    >>> job.files.tail('error.out', lines=1)
+    Something went wrong!
+
+    For files that have valid python variable names can also be accessed by
+    attribute notation
+
+    >>> job.files.INCAR
+    ["SYSTEM=pyiron\n", "ENCUT=270\n", ...]
+    """
+
+    __slots__ = ("_job",)
+
+    def __init__(self, job):
+        self._job = job
+
+    def list(self) -> List[str]:
+        """
+        List all files in the working directory of the job.
+        """
+        return _job_list_files(job)
+
+    def _ipython_display_(self):
+        path = job.working_directory + ":"
+        files = ["\t" + f for f in _job_list_files(job)]
+        print(os.linesep.join([path, *files]))
+
+    def tail(self, file: str, lines: int = 100):
+        """
+        Print the last lines of a file.
+
+        Args:
+            file (str): filename
+            lines (int): number of lines to print
+
+        Raises:
+            FileNotFoundError: if the given file does not exist
+        """
+        print(*_job_read_file(self, file_name, tail=lines), sep="")
+
+    def __getitem__(self, item):
+        if item not _job_list_files(self._job):
+            raise KeyError(item)
+
+        return _job_read_file(self._job, item)
+
+    def __getattr__(self, item):
+        try:
+            return self[name]
+        except KeyError:
+            raise AttributeError(name) from None
diff --git a/pyiron_base/jobs/job/util.py b/pyiron_base/jobs/job/util.py
@@ -4,13 +4,15 @@
 """
 Helper functions for the JobCore and GenericJob objects
 """
+from itertools import islice
 import os
 import posixpath
 import psutil
 import tarfile
 import stat
 import shutil
 from typing import Union, Dict
+import monty.io
 from pyiron_base.utils.instance import static_isinstance
 from pyiron_base.utils.safetar import safe_extract
 
@@ -239,6 +241,11 @@ def _kill_child(job):
                     job_process.kill()
 
 
+def _job_compressed_name(job):
+    """Return the canonical file name of a compressed job."""
+    return os.path.join(job.working_directory, job.job_name + ".tar.bz2")
+
+
 def _job_compress(job, files_to_compress=None):
     """
     Compress the output files of a job object.
@@ -253,10 +260,7 @@ def _job_compress(job, files_to_compress=None):
         cwd = os.getcwd()
         try:
             os.chdir(job.working_directory)
-            with tarfile.open(
-                os.path.join(job.working_directory, job.job_name + ".tar.bz2"),
-                "w:bz2",
-            ) as tar:
+            with tarfile.open(_job_compressed_name(job), "w:bz2") as tar:
                 for name in files_to_compress:
                     if "tar" not in name and not stat.S_ISFIFO(os.stat(name).st_mode):
                         tar.add(name)
@@ -270,7 +274,7 @@ def _job_compress(job, files_to_compress=None):
         finally:
             os.chdir(cwd)
     else:
-        print("The files are already compressed!")
+        job.logger.info("The files are already compressed!")
 
 
 def _job_decompress(job):
@@ -280,8 +284,8 @@ def _job_decompress(job):
     Args:
         job (JobCore): job object to decompress
     """
+    tar_file_name = _job_compressed_name(job)
     try:
-        tar_file_name = os.path.join(job.working_directory, job.job_name + ".tar.bz2")
         with tarfile.open(tar_file_name, "r:bz2") as tar:
             safe_extract(tar, job.working_directory)
         os.remove(tar_file_name)
@@ -299,11 +303,79 @@ def _job_is_compressed(job):
     Returns:
         bool: [True/False]
     """
-    compressed_name = job.job_name + ".tar.bz2"
-    for name in job.list_files():
-        if compressed_name in name:
-            return True
-    return False
+    compressed_name = os.path.basename(_job_compressed_name(job))
+    return compressed_name in os.listdir(job.working_directory)
+
+
+def _job_list_files(job):
+    """
+    Returns list of files in the jobs working directory.
+
+    If the job is compressed, return a list of files in the archive.
+
+    Args:
+        job (JobCore): job object to inspect files in
+
+    Returns:
+        list of str: file names
+    """
+    if os.path.isdir(job.working_directory):
+        if _job_is_compressed(job):
+            with tarfile.open(_job_compressed_name(job), "r") as tar:
+                return [member.name for member in tar.getmembers() if member.isfile()]
+        else:
+            return os.listdir(job.working_directory)
+    return []
+
+
+def _job_read_file(job, file_name, tail=None):
+    """
+    Return list of lines of the given file.
+
+    Transparently decompresses the file if job is compressed.
+
+    If `tail` is given and job is decompressed, only read the last lines
+    instead of traversing the full file.
+
+    Args:
+        file_name (str): the file to print
+        tail (int, optional): only return the last lines
+
+    Raises:
+        FileNotFoundError: if the given file name does not exist in the job folder
+    """
+    if file_name not in job.list_files():
+        raise FileNotFoundError(file_name)
+
+    if _job_is_compressed(job):
+        with tarfile.open(_job_compressed_name(job), encoding="utf8") as f:
+            lines = [
+                line.decode("utf8") for line in f.extractfile(file_name).readlines()
+            ]
+            if tail is None:
+                return lines
+            else:
+                return lines[-tail:]
+    else:
+        file_name = posixpath.join(job.working_directory, file_name)
+        if tail is None:
+            with open(file_name) as f:
+                return f.readlines()
+        else:
+            lines = list(
+                reversed(
+                    [
+                        l + os.linesep
+                        for l in islice(monty.io.reverse_readfile(file_name), tail)
+                    ]
+                )
+            )
+            # compatibility with the other methods
+            # monty strips all newlines, where as reading the other ways does
+            # not.  So if a file does not end with a newline (as most text
+            # files) adding it to every line like above adds an additional one.
+            lines[-1] = lines[-1].rstrip(os.linesep)
+            return lines
 
 
 def _job_archive(job):

diff --git a/setup.py b/setup.py
@@ -45,6 +45,7 @@
         'tables==3.7.0',
         'tqdm==4.64.1',
         'traitlets==5.6.0',
+        'monty==2022.9.9',
     ],
     cmdclass=versioneer.get_cmdclass(),
 

diff --git a/tests/job/test_genericJob.py b/tests/job/test_genericJob.py
@@ -2,8 +2,10 @@
 # Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department
 # Distributed under the terms of "New BSD License", see the LICENSE file.
 
+import contextlib
 import unittest
 import os
+import io
 from pyiron_base.storage.parameters import GenericParameters
 from pyiron_base.jobs.job.generic import GenericJob
 from pyiron_base._tests import TestWithFilledProject, ToyJob
@@ -423,7 +425,7 @@ def test_error(self):
 
     def test_compress(self):
         job = self.project.load(self.project.get_job_ids()[0])
-        wd_files = job.list_files()
+        wd_files = os.listdir(job.working_directory)
         self.assertEqual(len(wd_files), 1, "Only one zipped file should be present in the working directory")
         self.assertEqual(wd_files[0], f"{job.name}.tar.bz2", "Inconsistent name for the zipped file")
 
@@ -437,7 +439,7 @@ def test_restart(self):
             job = self.project.load(self.project.get_job_ids()[0])
             job_restart = job.restart()
             job_restart.run()
-            wd_files = job_restart.list_files()
+            wd_files = os.listdir(job_restart.working_directory)
             self.assertEqual(len(wd_files), 1, "Only one zipped file should be present in the working directory")
             self.assertEqual(wd_files[0], f"{job_restart.name}.tar.bz2", "Inconsistent name for the zipped file")
             job_restart.decompress()
@@ -485,5 +487,27 @@ def test_return_codes(self):
             pass
         self.assertTrue(j.status.aborted, "Job did not abort even though return code is 2!")
 
+    def test_tail(self):
+        """job.tail should print the last lines of a file to stdout"""
+        job = self.project.load(self.project.get_job_ids()[0])
+        job.decompress()
+        content = ["Content", "More", "Lines"]
+        with open(os.path.join(job.working_directory, "test_file"), "w") as f:
+            f.write(os.linesep.join(content))
+
+        for i in range(len(content)):
+            with self.subTest(i=i):
+                with contextlib.redirect_stdout(io.StringIO(newline=os.linesep)) as f:
+                    job.tail("test_file", lines=i+1)
+                self.assertEqual(f.getvalue(), os.linesep.join(content[-i-1:]) + os.linesep,
+                                "tail read incorrect lines from output file when job uncompressed!")
+
+        job.compress()
+        for i in range(len(content)):
+            with contextlib.redirect_stdout(io.StringIO()) as f:
+                job.tail("test_file", lines=i+1)
+            self.assertEqual(f.getvalue(), os.linesep.join(content[-i-1:]) + os.linesep,
+                             "tail read incorrect lines from output file when job compressed!")
+
 if __name__ == "__main__":
     unittest.main()