Merge pull request #1480 from pyiron/scriptjob_calculate

Scriptjob implement calculate() function
pyiron · Jun 20, 2024 · df85bc1 · df85bc1
2 parents e4400f0 + 2e248ff
commit df85bc1
Showing 1 changed file with 62 additions and 56 deletions.
diff --git a/pyiron_base/jobs/script.py b/pyiron_base/jobs/script.py
@@ -6,6 +6,8 @@
 """
 
 import os
+from typing import Optional
+
 from pyiron_base.jobs.job.generic import GenericJob
 from pyiron_base.storage.datacontainer import DataContainer
 
@@ -217,6 +219,11 @@ def __init__(self, project, job_name):
         self._script_path = None
         self.input = DataContainer(table_name="custom_dict")
         self._enable_mpi4py = False
+        # Set job_with_calculate_function flag to true to use run_static() to execute the python function generated by
+        # the job with its arguments job.get_calculate_function(**job.calculate_kwargs) without calling the old
+        # interface with write_input() and collect_output(). Finally, the output dictionary is stored in the HDF5 file
+        # using self.save_output(output_dict, shell_output)
+        self._job_with_calculate_function = True
 
     @property
     def script_path(self):
@@ -237,7 +244,9 @@ def script_path(self, path):
             path (str): relative or absolute path to the python script or a corresponding notebook
         """
         if isinstance(path, str):
-            self._script_path = self._get_abs_path(path)
+            self._script_path = os.path.normpath(
+                os.path.join(os.path.abspath(os.path.curdir), path)
+            )
             self.executable = self._executable_command(
                 working_directory=self.working_directory,
                 script_path=self._script_path,
@@ -251,33 +260,17 @@ def script_path(self, path):
                 "path should be a string, but ", path, " is a ", type(path), " instead."
             )
 
-    def enable_mpi4py(self):
-        self._enable_mpi4py = True
+    def collect_logfiles(self):
+        """
+        Compatibility function - but no log files are being collected
+        """
+        pass
 
     def disable_mpi4py(self):
         self._enable_mpi4py = False
 
-    def validate_ready_to_run(self):
-        if self.script_path is None:
-            raise TypeError(
-                "ScriptJob.script_path expects a path but got None. Please provide a path before "
-                + "running."
-            )
-
-    def set_input_to_read_only(self):
-        """
-        This function enforces read-only mode for the input classes, but it has to be implement in the individual
-        classes.
-        """
-        super().set_input_to_read_only()
-        self.input.read_only = True
-
-    def to_dict(self):
-        job_dict = super().to_dict()
-        job_dict["input/path"] = self._script_path
-        job_dict["input/parallel"] = self._enable_mpi4py
-        job_dict["input/custom_dict"] = self.input.to_builtin()
-        return job_dict
+    def enable_mpi4py(self):
+        self._enable_mpi4py = True
 
     def from_dict(self, job_dict):
         super().from_dict(job_dict=job_dict)
@@ -289,7 +282,7 @@ def from_dict(self, job_dict):
         if "custom_dict" in job_dict["input"].keys():
             self.input.update(job_dict["input"]["custom_dict"])
 
-    def get_input_file_dict(self):
+    def get_input_dict(self):
         """
         Get an hierarchical dictionary of input files. On the first level the dictionary is divided in file_to_create
         and files_to_copy. Both are dictionaries use the file names as keys. In file_to_create the values are strings
@@ -299,7 +292,7 @@ def get_input_file_dict(self):
         Returns:
             dict: hierarchical dictionary of input files
         """
-        input_file_dict = super().get_input_file_dict()
+        input_file_dict = super().get_input_dict()
         if self._script_path is not None:
             files_to_copy_dict = {
                 os.path.basename(self._script_path): self._script_path
@@ -315,11 +308,25 @@ def get_input_file_dict(self):
                 self.executable._mpi = True
         return input_file_dict
 
-    def collect_output(self):
+    def run_if_lib(self):
         """
-        Collect output function updates the master ID entries for all the child jobs created by this script job, if the
-        child job is already assigned to an master job nothing happens - master IDs are not overwritten.
+        Compatibility function - but library run mode is not available
+        """
+        raise NotImplementedError(
+            "Library run mode is not implemented for script jobs."
+        )
+
+    def run_static(self):
+        """
+        The run_static() function is called internally in pyiron to trigger the execution of the executable. This is
+        typically divided into three steps: (1) the generation of the calculate function and its inputs, (2) the
+        execution of this function and (3) storing the output of this function in the HDF5 file.
+
+        In future the execution of the calculate function might be transferred to a separate process, so the separation
+        in these three distinct steps is necessary to simplify the submission to an external executor.
         """
+        super().run_static()
+        # Update masterid for all jobs created in the working directory of the script job
         for job in self.project.iter_jobs(recursive=False, convert_to_object=False):
             pr_job = self.project.open(
                 os.path.relpath(job.working_directory, self.project.path)
@@ -328,17 +335,36 @@ def collect_output(self):
                 if pr_job.db.get_item_by_id(subjob_id)["masterid"] is None:
                     pr_job.db.item_update({"masterid": str(job.job_id)}, subjob_id)
 
-    def run_if_lib(self):
+    def save_output(
+        self, output_dict: Optional[dict] = None, shell_output: Optional[str] = None
+    ):
+        pass
+
+    def set_input_to_read_only(self):
         """
-        Compatibility function - but library run mode is not available
+        This function enforces read-only mode for the input classes, but it has to be implement in the individual
+        classes.
         """
-        raise NotImplementedError(
-            "Library run mode is not implemented for script jobs."
-        )
+        super().set_input_to_read_only()
+        self.input.read_only = True
 
-    def collect_logfiles(self):
+    def to_dict(self):
+        job_dict = super().to_dict()
+        job_dict["input/path"] = self._script_path
+        job_dict["input/parallel"] = self._enable_mpi4py
+        job_dict["input/custom_dict"] = self.input.to_builtin()
+        return job_dict
+
+    def validate_ready_to_run(self):
+        if self.script_path is None:
+            raise TypeError(
+                "ScriptJob.script_path expects a path but got None. Please provide a path before "
+                + "running."
+            )
+
+    def _executable_activate_mpi(self):
         """
-        Compatibility function - but no log files are being collected
+        Internal helper function to switch the executable to MPI mode
         """
         pass
 
@@ -371,23 +397,3 @@ def _executable_command(
             return ["mpirun", "-np", str(cores), "python", path]
         else:
             raise ValueError("Filename not recognized: ", path)
-
-    def _executable_activate_mpi(self):
-        """
-        Internal helper function to switch the executable to MPI mode
-        """
-        pass
-
-    @staticmethod
-    def _get_abs_path(path):
-        """
-        internal function to convert absolute or relative paths to absolute paths, using os.path.normpath,
-        os.path.abspath and os.path.curdir
-
-        Args:
-           path (str): relative or absolute path
-
-        Returns:
-            str: absolute path
-        """
-        return os.path.normpath(os.path.join(os.path.abspath(os.path.curdir), path))