LLNL · crkrenn · May 17, 2020 · Aug 21, 2020 · Aug 21, 2020 · Oct 13, 2020
diff --git a/.gitignore b/.gitignore
@@ -38,3 +38,4 @@ Pipfile.lock
 
 #pycharm
 .idea/
+output
diff --git a/maestrowf/datastructures/core/executiongraph.py b/maestrowf/datastructures/core/executiongraph.py
@@ -338,6 +338,7 @@ def __init__(self, submission_attempts=1, submission_throttle=0,
         # Member variables for execution.
         self._adapter = None
         self._description = OrderedDict()
+        self.linker = None
 
         # Generate tempdir (if specfied)
         if use_tmp:
@@ -573,6 +574,8 @@ def _execute_record(self, record, adapter, restart=False):
             # Generate the script for execution on the fly.
             record.setup_workspace()    # Generate the workspace.
             record.generate_script(adapter, self._tmp_dir)
+            if self.linker:
+                self.linker.link(record)
 
         if self.dry_run:
             record.mark_end(State.DRYRUN)

diff --git a/maestrowf/datastructures/core/linker.py b/maestrowf/datastructures/core/linker.py
diff --git a/maestrowf/datastructures/core/study.py b/maestrowf/datastructures/core/study.py
@@ -68,8 +68,11 @@ class StudyStep:
     def __init__(self):
         """Object that represents a single workflow step."""
         self._name = ""
+        self._step_name = ""
+        self._param_string = ""
         self.description = ""
         self.nickname = ""
+        self.combo = None
         self.run = {
                         "cmd":              "",
                         "depends":          "",
@@ -94,6 +97,8 @@ def apply_parameters(self, combo):
         # Create a new StudyStep and populate it with substituted values.
         tmp = StudyStep()
         tmp.__dict__ = apply_function(self.__dict__, combo.apply)
+        tmp._step_name = self.__dict__["_name"]
+        tmp.combo = combo
         # Return if the new step is modified and the step itself.
 
         return self.__ne__(tmp), tmp
@@ -127,6 +132,17 @@ def real_name(self):
         """
         return self._name
 
+    @property
+    def step_name(self):
+        """
+        Get the name to assign to a task for this step.
+
+        :returns: A utf-8 formatted string of the task name.
+        """
+        if self._step_name:
+            return self._step_name
+        return self.name
+
     def __eq__(self, other):
         """
         Equality operator for the StudyStep class.
@@ -421,7 +437,7 @@ def setup_environment(self):
 
     def configure_study(self, submission_attempts=1, restart_limit=1,
                         throttle=0, use_tmp=False, hash_ws=False,
-                        dry_run=False):
+                        dry_run=False, linker=None):
         """
         Perform initial configuration of a study. \
 
@@ -438,6 +454,7 @@ def configure_study(self, submission_attempts=1, restart_limit=1,
         ExecutionGraph dumps its information into a temporary directory. \
         :param dry_run: Boolean value that toggles dry run to just generate \
         study workspaces and scripts without execution or status checking. \
+        :param linker: Linker object.
         :returns: True if the Study is successfully setup, False otherwise. \
         """
 
@@ -447,6 +464,10 @@ def configure_study(self, submission_attempts=1, restart_limit=1,
         self._use_tmp = use_tmp
         self._hash_ws = hash_ws
         self._dry_run = dry_run
+        self.linker = linker
+        make_links_flag = False
+        if linker:
+            make_links_flag = linker.make_links_flag
 
         LOGGER.info(
             "\n------------------------------------------\n"
@@ -456,10 +477,11 @@ def configure_study(self, submission_attempts=1, restart_limit=1,
             "Use temporary directory =   %s\n"
             "Hash workspaces =           %s\n"
             "Dry run enabled =           %s\n"
+            "Make links enabled =        %s\n"
             "Output path =               %s\n"
             "------------------------------------------",
             submission_attempts, restart_limit, throttle,
-            use_tmp, hash_ws, dry_run, self._out_path
+            use_tmp, hash_ws, dry_run, make_links_flag, self._out_path
         )
 
         self.is_configured = True
@@ -655,20 +677,21 @@ def _stage(self, dag):
                                 str(combo))
                     # Compute this step's combination name and workspace.
                     nickname = None
-                    combo_str = combo.get_param_string(self.used_params[step])
+                    param_str = combo.get_param_string(self.used_params[step])
                     # We must encode explicitly to utf-8
-                    # combo_str = combo_str.encode("utf-8")
+                    # param_str = param_str.encode("utf-8")
                     if self._hash_ws:
-                        nickname = md5(combo_str.encode("utf-8")).hexdigest()
+                        nickname = md5(param_str.encode("utf-8")).hexdigest()
                         workspace = make_safe_path(
                                         self._out_path,
                                         *[step, nickname])
                     else:
                         workspace = \
-                            make_safe_path(self._out_path, *[step, combo_str])
+                            make_safe_path(self._out_path, *[step, param_str])
                         LOGGER.debug("Workspace: %s", workspace)
-                    combo_str = "{}_{}".format(step, combo_str)
+                    combo_str = "{}_{}".format(step, param_str)
                     self.workspaces[combo_str] = workspace
+                    LOGGER.debug("Workspace: %s", workspace)
 
                     # Check if the step combination has been processed.
                     if combo_str in self.step_combos:
@@ -678,6 +701,7 @@ def _stage(self, dag):
 
                     modified, step_exp = node.apply_parameters(combo)
                     step_exp.name = combo_str
+                    step_exp._param_string = param_str
                     step_exp.nickname = nickname
 
                     # Substitute workspaces into the combination.
@@ -806,6 +830,7 @@ def _stage_linear(self, dag):
                 r_cmd = r_cmd.replace(workspace_var, ws)
             node.run["cmd"] = cmd
             node.run["restart"] = r_cmd
+            node.study_label = step
 
             # Add the step
             dag.add_step(step, node, ws, rlimit)
@@ -874,6 +899,7 @@ def stage(self):
             use_tmp=self._use_tmp, dry_run=self._dry_run)
         dag.add_description(**self.description)
         dag.log_description()
+        dag.linker = self.linker
 
         # Because we're working within a Study class whose steps have already
         # been verified to not contain a cycle, we can override the check for

diff --git a/maestrowf/maestro.py b/maestrowf/maestro.py
@@ -43,11 +43,11 @@
 from maestrowf.specification import YAMLSpecification
 from maestrowf.datastructures.core import Study
 from maestrowf.datastructures.environment import Variable
+from maestrowf.datastructures.core.linker import Linker
 from maestrowf.utils import \
     create_parentdir, create_dictionary, LoggerUtility, make_safe_path, \
     start_process
 
-
 # Program Globals
 LOGGER = logging.getLogger(__name__)
 LOG_UTIL = LoggerUtility(LOGGER)
@@ -202,6 +202,9 @@ def run_study(args):
 
     # Set up the output directory.
     out_dir = environment.remove("OUTPUT_PATH")
+    out_name = ""
+    date_string = time.strftime("%Y%m%d")
+    time_string = time.strftime("%H%M%S")
     if args.out:
         # If out is specified in the args, ignore OUTPUT_PATH.
         output_path = os.path.abspath(args.out)
@@ -234,7 +237,7 @@ def run_study(args):
 
         out_name = "{}_{}".format(
             spec.name.replace(" ", "_"),
-            time.strftime("%Y%m%d-%H%M%S")
+            time.strftime(f"{date_string}-{time_string}")
         )
         output_path = make_safe_path(out_dir, *[out_name])
     environment.add(Variable("OUTPUT_PATH", output_path))
@@ -298,11 +301,24 @@ def run_study(args):
         raise ArgumentError(_msg)
 
     # Set up the study workspace and configure it for execution.
+    linker = Linker(
+        make_links_flag=args.make_links,
+        link_template=args.link_template,
+        hashws=args.hashws,
+        output_name=out_name,
+        output_path=output_path,
+        spec_name=spec.name.replace(" ", "_"),
+        date_string=date_string,
+        time_string=time_string,
+        dir_float_format=args.dir_float_format,
+        pgen=args.pgen,
+        globals=spec.globals,
+        )
     study.setup_workspace()
     study.configure_study(
         throttle=args.throttle, submission_attempts=args.attempts,
         restart_limit=args.rlimit, use_tmp=args.usetmp, hash_ws=args.hashws,
-        dry_run=args.dry)
+        dry_run=args.dry, linker=linker)
     study.setup_environment()
 
     if args.dry:
@@ -384,8 +400,12 @@ def setup_argparser():
     cancel.set_defaults(func=cancel_study)
 
     # subparser for a run subcommand
-    run = subparsers.add_parser('run',
-                                help="Launch a study based on a specification")
+    # need manual line breaks to allow formatted template documentation.
+    run = subparsers.add_parser(
+        'run',
+        help="Launch a study based on a specification",
+        formatter_class=RawTextHelpFormatter)
+
     run.add_argument("-a", "--attempts", type=int, default=1,
                      help="Maximum number of submission attempts before a "
                      "step is marked as failed. [Default: %(default)d]")
@@ -394,33 +414,52 @@ def setup_argparser():
                      "specify a restart command (0 denotes no limit). "
                      "[Default: %(default)d]")
     run.add_argument("-t", "--throttle", type=int, default=0,
-                     help="Maximum number of inflight jobs allowed to execute "
-                     "simultaneously (0 denotes not throttling). "
+                     help="Maximum number of inflight jobs allowed to "
+                     "execute simultaneously (0 denotes not throttling). "
                      "[Default: %(default)d]")
     run.add_argument("-s", "--sleeptime", type=int, default=60,
                      help="Amount of time (in seconds) for the manager to "
                      "wait between job status checks. [Default: %(default)d]")
     run.add_argument("--dry", action="store_true", default=False,
-                     help="Generate the directory structure and scripts for a "
-                     "study but do not launch it. [Default: %(default)s]")
+                     help="Generate the directory structure and scripts for "
+                     "a study but do not launch it. [Default: %(default)s]")
     run.add_argument("-p", "--pgen", type=str,
                      help="Path to a Python code file containing a function "
-                     "that returns a custom filled ParameterGenerator "
+                     "that returns a custom filled ParameterGenerator \n"
                      "instance.")
     run.add_argument("--pargs", type=str, action="append", default=[],
-                     help="A string that represents a single argument to pass "
-                     "a custom parameter generation function. Reuse '--parg' "
-                     "to pass multiple arguments. [Use with '--pgen']")
+                     help="A string that represents a single argument to  "
+                     "pass a custom parameter generation function.\n "
+                     "Reuse '--parg' to pass multiple arguments. "
+                     "[Use with '--pgen']")
     run.add_argument("-o", "--out", type=str,
                      help="Output path to place study in. [NOTE: overrides "
                      "OUTPUT_PATH in the specified specification]")
     run.add_argument("-fg", action="store_true", default=False,
                      help="Runs the backend conductor in the foreground "
                      "instead of using nohup. [Default: %(default)s]")
     run.add_argument("--hashws", action="store_true", default=False,
-                     help="Enable hashing of subdirectories in parameterized "
-                     "studies (NOTE: breaks commands that use parameter labels"
-                     " to search directories). [Default: %(default)s]")
+                     help="Enable hashing of subdirectories in \n"
+                     "parameterized studies (NOTE: breaks commands that use "
+                     "parameter labels to search directories). \n"
+                     " [Default: %(default)s]")
+    run.add_argument("--dir-float-format", nargs=2,
+                     metavar=(
+                        '(small-exponent-format)',
+                        '(large-exponent-format)'),
+                     default=['{:.2f}', '{:.2e}'],
+                     help=("Format for float parameters when used in "
+                           "directory names [Default: %(default)s]."))
+    run.add_argument("--make-links", action="store_true", default=False,
+                     help="Automatically make customizable, human-readable "
+                     "links to run directories. [Default: %(default)s]")
+    run.add_argument(
+        "--link-template",
+        type=str,
+        default=(
+            "{{output_path}}/../links/{{date}}/"
+            "run-{{study_index}}/{{combo}}/{{step}}"),
+        help=Linker.HELP_TEXT)
 
     prompt_opts = run.add_mutually_exclusive_group()
     prompt_opts.add_argument(
@@ -433,12 +472,12 @@ def setup_argparser():
     # The only required positional argument for 'run' is a specification path.
     run.add_argument(
         "specification", type=str,
-        help="The path to a Study YAML specification that will be loaded and "
-        "executed.")
+        help="The path to a Study YAML specification that will be loaded "
+        "and executed.")
     run.add_argument(
         "--usetmp", action="store_true", default=False,
-        help="Make use of a temporary directory for dumping scripts and other "
-        "Maestro related files.")
+        help="Make use of a temporary directory for dumping scripts and "
+        "other Maestro related files.")
     run.set_defaults(func=run_study)
 
     # subparser for a status subcommand

diff --git a/maestrowf/readme_make_links.txt b/maestrowf/readme_make_links.txt
@@ -0,0 +1,41 @@
+# add suffix to var_real abbrev if needed.
+
+# validate that all variables in template are valid
+
+# Write yaml index_directory index path
+# labels.yaml
+# update default template in maestro.py
+# update --link-template help in maestro.py
+# spell check
+# lint flake8 pylint
+
+# using {{data}} as maestro input and in template should cause error
+
+# add test for maestro user key/value conflicts with maestro template keyvalue
+# add test for maestro user key/value substitutes properly
+
+# maestro run -s 1 -fg -y --make-links tests/specification/test_specs/link_integration_fast.yml
+
+NOTE: template must include {{combo}} and {{step}}.
+[Default: {{link_directory}}/{{date}}/run-{{INDEX}}/{{combo}}/{{step}}
+[Default: {{link_directory}}/{{date}}/{study_name}-{{study_index}}/combo-{{combo_index}}-{{combo}}/{{step}}
+
+# use variable list below first. raise warning if there is a conflict.
+
+* {{study_time}}
+* {{study_date}}
+* {{maestro_variable_names}} # make sure maestro variable names don't conflict with other variables
+* {{study_name}}
+* {{output_path}} - Parent directory for this maestro study
+* {{date}} - Human-readable date (e.g. '2020_07_28')
+* {{long_combo}} - Maestro label for a set of parameters,
+* {{combo}} - Maestro label for a set of parameters, with reals rounded
+                (e.g. 'X1.5.X2.5.X3.20')
+                [maximum length: 255 characters]
+* {{step}} - Maestro label for a given step (e.g. 'run')
+
+{{study_index}} - Unique number for each maestro execution (e.g. '0001')
+{{output_path}} / {{study_name}} / {{study_date}} / {{study_time}}
+
+{{combo_index}} - Unique number for each maestro combination (e.g. '0001')
+