Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a simple FileBrowser to JobCore #969

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .ci_support/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ dependencies:
- sqlalchemy =1.4.44
- tqdm =4.64.1
- traitlets =5.6.0
- monty =2022.9.9
113 changes: 107 additions & 6 deletions pyiron_base/jobs/job/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import os
import posixpath
import shutil
from typing import List
import warnings

from pyiron_base.interfaces.has_groups import HasGroups
Expand All @@ -26,11 +27,14 @@
_job_is_compressed,
_job_compress,
_job_decompress,
_job_list_files,
_job_read_file,
_job_delete_files,
_job_delete_hdf,
_job_remove_folder,
)
from pyiron_base.state import state
from pyiron_base.utils.deprecate import deprecate

__author__ = "Jan Janssen"
__copyright__ = (
Expand Down Expand Up @@ -134,6 +138,11 @@ def __init__(self, project, job_name):
def content(self):
return self._hdf5_content

@property
def files(self):
return FileBrowser(self)
files.__doc__ = FileBrowser.__doc__

@property
def job_name(self):
"""
Expand Down Expand Up @@ -610,6 +619,7 @@ def get_job_id(self, job_specifier=None):
else:
return None

@deprecate("use job.files.list()")
def list_files(self):
"""
List files inside the working directory
Expand All @@ -620,9 +630,7 @@ def list_files(self):
Returns:
list: list of file names
"""
if os.path.isdir(self.working_directory):
return os.listdir(self.working_directory)
return []
return _job_list_files(self)

def list_childs(self):
"""
Expand Down Expand Up @@ -914,9 +922,11 @@ def __getitem__(self, item):
"""

if item in self.list_files():
file_name = posixpath.join(self.working_directory, "{}".format(item))
with open(file_name) as f:
return f.readlines()
warnings.warn(
"Using __getitem__ on a job to access files in deprecated: use job.files instead!",
category=DeprecationWarning
)
return _job_read_file(self, item)

# first try to access HDF5 directly to make the common case fast
try:
Expand Down Expand Up @@ -998,6 +1008,20 @@ def __delitem__(self, key):
"""
del self.project_hdf5[posixpath.join(self.project_hdf5.h5_path, key)]

@deprecate("use job.files.tail() instead!")
def tail(self, file_name, lines=100):
"""
Print the last lines of the given file in the job folder.

Args:
file_name (str): the file to print
lines (int): how many lines to print

Raises:
FileNotFoundError: if the given file name does not exist in the job folder
"""
print(*_job_read_file(self, file_name, tail=lines), sep="")

def __repr__(self):
"""
Human readable string representation
Expand Down Expand Up @@ -1115,3 +1139,80 @@ def __dir__(self):

def __repr__(self):
return self._project_hdf5.__repr__()


class FileBrowser:
"""
Allows to browse the files in a job directory.

By default this object prints itself as a listing of the job directory and
the files inside.

>>> job.files
/path/to/my/job:
\tpyiron.log
\terror.out

Access to the names of files is provided with :meth:`.list`

>>> job.files.list()
['pyiron.log', 'error.out', 'INCAR']

Access to the contents of files is provided by indexing into this object,
which returns a list of lines in the file

>>> job.files['error.out']
["Oh no\n", "Something went wrong!\n"]

The :meth:`.tail` method prints the last lines of a file to stdout

>>> job.files.tail('error.out', lines=1)
Something went wrong!

For files that have valid python variable names can also be accessed by
attribute notation

>>> job.files.INCAR
["SYSTEM=pyiron\n", "ENCUT=270\n", ...]
"""

__slots__ = ("_job",)

def __init__(self, job):
self._job = job

def list(self) -> List[str]:
"""
List all files in the working directory of the job.
"""
return _job_list_files(job)

def _ipython_display_(self):
path = job.working_directory + ":"
files = ["\t" + f for f in _job_list_files(job)]
print(os.linesep.join([path, *files]))

def tail(self, file: str, lines: int = 100):
"""
Print the last lines of a file.

Args:
file (str): filename
lines (int): number of lines to print

Raises:
FileNotFoundError: if the given file does not exist
"""
print(*_job_read_file(self, file_name, tail=lines), sep="")

def __getitem__(self, item):
if item not _job_list_files(self._job):
raise KeyError(item)

return _job_read_file(self._job, item)

def __getattr__(self, item):
try:
return self[name]
except KeyError:
raise AttributeError(name) from None
94 changes: 83 additions & 11 deletions pyiron_base/jobs/job/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
"""
Helper functions for the JobCore and GenericJob objects
"""
from itertools import islice
import os
import posixpath
import psutil
import tarfile
import stat
import shutil
from typing import Union, Dict
import monty.io
from pyiron_base.utils.instance import static_isinstance
from pyiron_base.utils.safetar import safe_extract

Expand Down Expand Up @@ -239,6 +241,11 @@ def _kill_child(job):
job_process.kill()


def _job_compressed_name(job):
"""Return the canonical file name of a compressed job."""
return os.path.join(job.working_directory, job.job_name + ".tar.bz2")


def _job_compress(job, files_to_compress=None):
"""
Compress the output files of a job object.
Expand All @@ -253,10 +260,7 @@ def _job_compress(job, files_to_compress=None):
cwd = os.getcwd()
try:
os.chdir(job.working_directory)
with tarfile.open(
os.path.join(job.working_directory, job.job_name + ".tar.bz2"),
"w:bz2",
) as tar:
with tarfile.open(_job_compressed_name(job), "w:bz2") as tar:
for name in files_to_compress:
if "tar" not in name and not stat.S_ISFIFO(os.stat(name).st_mode):
tar.add(name)
Expand All @@ -270,7 +274,7 @@ def _job_compress(job, files_to_compress=None):
finally:
os.chdir(cwd)
else:
print("The files are already compressed!")
job.logger.info("The files are already compressed!")


def _job_decompress(job):
Expand All @@ -280,8 +284,8 @@ def _job_decompress(job):
Args:
job (JobCore): job object to decompress
"""
tar_file_name = _job_compressed_name(job)
try:
tar_file_name = os.path.join(job.working_directory, job.job_name + ".tar.bz2")
with tarfile.open(tar_file_name, "r:bz2") as tar:
safe_extract(tar, job.working_directory)
os.remove(tar_file_name)
Expand All @@ -299,11 +303,79 @@ def _job_is_compressed(job):
Returns:
bool: [True/False]
"""
compressed_name = job.job_name + ".tar.bz2"
for name in job.list_files():
if compressed_name in name:
return True
return False
compressed_name = os.path.basename(_job_compressed_name(job))
return compressed_name in os.listdir(job.working_directory)


def _job_list_files(job):
"""
Returns list of files in the jobs working directory.

If the job is compressed, return a list of files in the archive.

Args:
job (JobCore): job object to inspect files in

Returns:
list of str: file names
"""
if os.path.isdir(job.working_directory):
if _job_is_compressed(job):
with tarfile.open(_job_compressed_name(job), "r") as tar:
return [member.name for member in tar.getmembers() if member.isfile()]
else:
return os.listdir(job.working_directory)
return []


def _job_read_file(job, file_name, tail=None):
"""
Return list of lines of the given file.

Transparently decompresses the file if job is compressed.

If `tail` is given and job is decompressed, only read the last lines
instead of traversing the full file.

Args:
file_name (str): the file to print
tail (int, optional): only return the last lines

Raises:
FileNotFoundError: if the given file name does not exist in the job folder
"""
if file_name not in job.list_files():
raise FileNotFoundError(file_name)

if _job_is_compressed(job):
with tarfile.open(_job_compressed_name(job), encoding="utf8") as f:
lines = [
line.decode("utf8") for line in f.extractfile(file_name).readlines()
]
if tail is None:
return lines
else:
return lines[-tail:]
else:
file_name = posixpath.join(job.working_directory, file_name)
if tail is None:
with open(file_name) as f:
return f.readlines()
else:
lines = list(
reversed(
[
l + os.linesep
for l in islice(monty.io.reverse_readfile(file_name), tail)
]
)
)
# compatibility with the other methods
# monty strips all newlines, where as reading the other ways does
# not. So if a file does not end with a newline (as most text
# files) adding it to every line like above adds an additional one.
lines[-1] = lines[-1].rstrip(os.linesep)
return lines


def _job_archive(job):
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
'tables==3.7.0',
'tqdm==4.64.1',
'traitlets==5.6.0',
'monty==2022.9.9',
],
cmdclass=versioneer.get_cmdclass(),

Expand Down
28 changes: 26 additions & 2 deletions tests/job/test_genericJob.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
# Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department
# Distributed under the terms of "New BSD License", see the LICENSE file.

import contextlib
import unittest
import os
import io
from pyiron_base.storage.parameters import GenericParameters
from pyiron_base.jobs.job.generic import GenericJob
from pyiron_base._tests import TestWithFilledProject, ToyJob
Expand Down Expand Up @@ -423,7 +425,7 @@ def test_error(self):

def test_compress(self):
job = self.project.load(self.project.get_job_ids()[0])
wd_files = job.list_files()
wd_files = os.listdir(job.working_directory)
self.assertEqual(len(wd_files), 1, "Only one zipped file should be present in the working directory")
self.assertEqual(wd_files[0], f"{job.name}.tar.bz2", "Inconsistent name for the zipped file")

Expand All @@ -437,7 +439,7 @@ def test_restart(self):
job = self.project.load(self.project.get_job_ids()[0])
job_restart = job.restart()
job_restart.run()
wd_files = job_restart.list_files()
wd_files = os.listdir(job_restart.working_directory)
self.assertEqual(len(wd_files), 1, "Only one zipped file should be present in the working directory")
self.assertEqual(wd_files[0], f"{job_restart.name}.tar.bz2", "Inconsistent name for the zipped file")
job_restart.decompress()
Expand Down Expand Up @@ -485,5 +487,27 @@ def test_return_codes(self):
pass
self.assertTrue(j.status.aborted, "Job did not abort even though return code is 2!")

def test_tail(self):
"""job.tail should print the last lines of a file to stdout"""
job = self.project.load(self.project.get_job_ids()[0])
job.decompress()
content = ["Content", "More", "Lines"]
with open(os.path.join(job.working_directory, "test_file"), "w") as f:
f.write(os.linesep.join(content))

for i in range(len(content)):
with self.subTest(i=i):
with contextlib.redirect_stdout(io.StringIO(newline=os.linesep)) as f:
job.tail("test_file", lines=i+1)
self.assertEqual(f.getvalue(), os.linesep.join(content[-i-1:]) + os.linesep,
"tail read incorrect lines from output file when job uncompressed!")

job.compress()
for i in range(len(content)):
with contextlib.redirect_stdout(io.StringIO()) as f:
job.tail("test_file", lines=i+1)
self.assertEqual(f.getvalue(), os.linesep.join(content[-i-1:]) + os.linesep,
"tail read incorrect lines from output file when job compressed!")

if __name__ == "__main__":
unittest.main()