EESSI · trz42 · Mar 25, 2023 · Mar 25, 2023 · Mar 30, 2023 · Mar 30, 2023
diff --git a/eessi_bot_job_manager.py b/eessi_bot_job_manager.py
@@ -28,7 +28,6 @@
 # license: GPLv2
 #
 
-import configparser
 import glob
 import os
 import re
@@ -39,9 +38,10 @@
 from tools.args import job_manager_parse
 from datetime import datetime, timezone
 from tools import config, run_cmd
+from tools.job_metadata import read_job_metadata_from_file
 from tools.pr_comments import get_submitted_job_comment, update_comment
 
-from pyghee.utils import log, error
+from pyghee.utils import log
 
 AWAITS_LAUNCH = "awaits_launch"
 FAILURE = "failure"
@@ -192,26 +192,10 @@ def determine_finished_jobs(self, known_jobs, current_jobs):
 
     def read_job_pr_metadata(self, job_metadata_path):
         """
-        Check if metadata file exists, read it and return 'PR' section if so, return None if not.
+        Determine metadata of a job or None.
         """
         # check if metadata file exist
-        if os.path.isfile(job_metadata_path):
-            log(f"Found metadata file at {job_metadata_path}", self.logfile)
-            metadata = configparser.ConfigParser()
-            try:
-                metadata.read(job_metadata_path)
-            except Exception as err:
-                error(f"Unable to read job metadata file {job_metadata_path}: {err}")
-
-            # get PR section
-            if "PR" in metadata:
-                metadata_pr = metadata["PR"]
-            else:
-                metadata_pr = {}
-            return metadata_pr
-        else:
-            log(f"No metadata file found at {job_metadata_path}, so not a bot job", self.logfile)
-            return None
+        return read_job_metadata_from_file(job_metadata_path, self.logfile)
 
     # job_manager.process_new_job(current_jobs[nj])
     def process_new_job(self, new_job):

diff --git a/scripts/eessi-upload-to-staging b/scripts/eessi-upload-to-staging
@@ -42,6 +42,7 @@ function create_metadata_file
   _url=$2
   _repository=$3
   _pull_request=$4
+  _pull_request_comment_id=$5
 
   _tmpfile=$(mktemp)
 
@@ -56,10 +57,11 @@ function create_metadata_file
     --arg url "${_url}" \
     --arg repo "${_repository}" \
     --arg pr "${_pull_request}" \
+    --arg pr_comment_id "${_pull_request_comment_id}" \
     '{
        uploader: {username: $un, ip: $ip, hostname: $hn},
        payload: {filename: $fn, size: $sz, ctime: $ct, sha256sum: $sha256, url: $url},
-       link2pr: {repo: $repo, pr: $pr},
+       link2pr: {repo: $repo, pr: $pr, pr_comment_id: $pr_comment_id},
      }' > "${_tmpfile}"
 
   echo "${_tmpfile}"
@@ -70,6 +72,10 @@ function display_help
   echo "Usage: $0 [OPTIONS] <filenames>"                                        >&2
   echo "  -e | --endpoint-url URL      -  endpoint url (needed for non AWS S3)" >&2
   echo "  -h | --help                  -  display this usage information"       >&2
+  echo "  -i | --pr-comment-id NUMBER  -  identifier of a PR comment;"          >&2
+  echo "                                  used to efficiently determine the PR" >&2
+  echo "                                  comment to be updated during the"     >&2
+  echo "                                  ingestion procedure"                  >&2
   echo "  -n | --bucket-name BUCKET    -  bucket name (same as BUCKET above)"   >&2
   echo "  -p | --pull-request NUMBER   -  a pull request NUMBER; used to"       >&2
   echo "                                  link the upload to a PR"              >&2
@@ -97,8 +103,11 @@ bucket_name="eessi-staging"
 
 # provided via options in the bot's config file app.cfg
 endpoint_url=
-pull_request=
-repository=
+
+# provided via command line arguments
+pr_comment_id="none"
+pull_request="none"
+repository="EESSI/software-layer"
 
 while [[ $# -gt 0 ]]; do
   case $1 in
@@ -110,6 +119,10 @@ while [[ $# -gt 0 ]]; do
       display_help
       exit 0
       ;;
+    -i|--pr-comment-id)
+      pr_comment_id="$2"
+      shift 2
+      ;;
     -n|--bucket-name)
       bucket_name="$2"
       shift 2
@@ -161,7 +174,8 @@ for file in "$*"; do
         echo "Creating metadata file"
         url="${bucket_base}/${aws_path}/${aws_file}"
         metadata_file=$(create_metadata_file "${file}" "${url}" \
-                                             "${repository}" "${pull_request}")
+                                             "${repository}" "${pull_request}" \
+                                             "${pr_comment_id}")
         echo "metadata:"
         cat ${metadata_file}
 

diff --git a/tasks/deploy.py b/tasks/deploy.py
@@ -19,6 +19,7 @@
 from pyghee.utils import log
 from tasks.build import get_build_env_cfg
 from tools import config, run_cmd, pr_comments
+from tools.job_metadata import read_job_metadata_from_file
 
 JOBS_BASE_DIR = "jobs_base_dir"
 DEPLOYCFG = "deploycfg"
@@ -140,46 +141,30 @@ def check_build_status(slurm_out, eessi_tarballs):
     return False
 
 
-def update_pr_comment(tarball, repo_name, pr_number, state, msg):
+def update_pr_comment(tarball, repo_name, pr_number, pr_comment_id, state, msg):
     """Update PR comment which contains specific tarball name.
 
     Args:
         tarball (string): name of tarball that is looked for in a PR comment
         repo_name (string): name of the repository (USER_ORG/REPOSITORY)
         pr_number (string): pull request number
+        pr_comment_id (int): pull request comment id
         state (string): state (upload) to be used in update
         msg (string): msg (succeeded or failed) describing upload result
     """
-    funcname = sys._getframe().f_code.co_name
-
     gh = github.get_instance()
     repo = gh.get_repo(repo_name)
     pull_request = repo.get_pull(pr_number)
 
-    # TODO does this always return all comments?
-    comments = pull_request.get_issue_comments()
-    for comment in comments:
-        # NOTE
-        # adjust search string if format changed by event handler
-        # (separate process running eessi_bot_event_handler.py)
-        re_tarball = f".*{tarball}.*"
-        comment_match = re.search(re_tarball, comment.body)
-
-        if comment_match:
-            log(f"{funcname}(): found comment with id {comment.id}")
-
-            issue_comment = pull_request.get_issue_comment(int(comment.id))
-
-            dt = datetime.now(timezone.utc)
-            comment_update = (f"\n|{dt.strftime('%b %d %X %Z %Y')}|{state}|"
-                              f"transfer of `{tarball}` to S3 bucket {msg}|")
-
-            # append update to existing comment
-            issue_comment.edit(issue_comment.body + comment_update)
+    # adjust search string ".*{tarball}.*" if format of PR comment changed by event handler
+    issue_comment = pr_comments.determine_issue_comment(pull_request, pr_comment_id, tarball)
+    if issue_comment:
+        dt = datetime.now(timezone.utc)
+        comment_update = (f"\n|{dt.strftime('%b %d %X %Z %Y')}|{state}|"
+                          f"transfer of `{tarball}` to S3 bucket {msg}|")
 
-            # leave for loop (only update one comment, because tarball
-            # should only be referenced in one comment)
-            break
+        # append update to existing comment
+        issue_comment.edit(issue_comment.body + comment_update)
 
 
 def append_tarball_to_upload_log(tarball, job_dir):
@@ -197,15 +182,16 @@ def append_tarball_to_upload_log(tarball, job_dir):
         upload_log.write(f"{job_plus_tarball}\n")
 
 
-def upload_tarball(job_dir, build_target, timestamp, repo_name, pr_number):
+def upload_tarball(job_dir, build_target, timestamp, repo_name, pr_number, pr_comment_id):
     """Upload built artefact to an S3 bucket.
 
     Args:
         job_dir (string): path to the job directory
         build_target (string): eessi-VERSION-COMPONENT-OS-ARCH
         timestamp (int): timestamp of the tarball
         repo_name (string): repository of the pull request
-        pr_number (string): number of the pull request
+        pr_number (int): number of the pull request
+        pr_comment_id (int): id of the pull request comment
     """
     funcname = sys._getframe().f_code.co_name
 
@@ -233,6 +219,7 @@ def upload_tarball(job_dir, build_target, timestamp, repo_name, pr_number):
         cmd_args.extend(['--endpoint-url', endpoint_url])
     cmd_args.extend(['--repository', repo_name])
     cmd_args.extend(['--pull-request', str(pr_number)])
+    cmd_args.extend(['--pr-comment-id', str(pr_comment_id)])
     cmd_args.append(abs_path)
     upload_cmd = ' '.join(cmd_args)
 
@@ -297,48 +284,63 @@ def determine_successful_jobs(job_dirs):
         job_dirs (list): list of job directories
 
     Returns:
-        successes (list): list of dictionaries representing successful jobs
+        successful_jobs (list): list of dictionaries representing successful jobs
     """
     funcname = sys._getframe().f_code.co_name
 
-    successes = []
+    successful_jobs = []
     for job_dir in job_dirs:
         slurm_out = determine_slurm_out(job_dir)
         eessi_tarballs = determine_eessi_tarballs(job_dir)
+        pr_comment_id = determine_pr_comment_id(job_dir)
+
         if check_build_status(slurm_out, eessi_tarballs):
             log(f"{funcname}(): SUCCESSFUL build in '{job_dir}'")
-            successes.append({'job_dir': job_dir,
-                              'slurm_out': slurm_out,
-                              'eessi_tarballs': eessi_tarballs})
+            successful_jobs.append({'job_dir': job_dir,
+                                    'slurm_out': slurm_out,
+                                    'pr_comment_id': pr_comment_id,
+                                    'eessi_tarballs': eessi_tarballs})
         else:
             log(f"{funcname}(): FAILED build in '{job_dir}'")
 
-    return successes
+    return successful_jobs
+
+
+def determine_pr_comment_id(job_dir):
+    """Determines pr_comment_id by reading _bot_job{JOBID}.metadata in job_dir."""
+    # assumes that last part of job_dir encodes the job's id
+    job_id = os.path.basename(os.path.normpath(job_dir))
+    job_metadata_file = os.path.join(job_dir, f"_bot_job{job_id}.metadata")
+    job_metadata = read_job_metadata_from_file(job_metadata_file)
+    if job_metadata and "pr_comment_id" in job_metadata:
+        return int(job_metadata["pr_comment_id"])
+    else:
+        return -1
 
 
-def determine_tarballs_to_deploy(successes, upload_policy):
+def determine_tarballs_to_deploy(successful_jobs, upload_policy):
     """Determines tarballs to deploy depending on upload policy
 
     Args:
-        successes (list): list of dictionaries
-                          {job_dir, slurm_out, eessi_tarballs}
+        successful_jobs (list): list of dictionaries
+                          {job_dir, slurm_out, eessi_tarballs, pr_comment_id}
         upload_policy (string): one of 'all', 'latest' or 'once'
             'all': deploy all
             'latest': deploy only the last for each build target
             'once': deploy only latest if none for this build target has
                     been deployed before
     Returns:
         to_be_deployed (dictionary): dictionary of dictionaries
-                                     {job_dir, timestamp}
+                                     {job_dir, pr_comment_id, timestamp}
     """
     funcname = sys._getframe().f_code.co_name
 
-    log(f"{funcname}(): num successful jobs {len(successes)}")
+    log(f"{funcname}(): num successful jobs {len(successful_jobs)}")
 
     to_be_deployed = {}
-    for s in successes:
+    for job in successful_jobs:
         # all tarballs for successful job
-        tarballs = s["eessi_tarballs"]
+        tarballs = job["eessi_tarballs"]
         log(f"{funcname}(): num tarballs {len(tarballs)}")
 
         # full path to first tarball for successful job
@@ -371,7 +373,7 @@ def determine_tarballs_to_deploy(successes, upload_policy):
             else:
                 deploy = True
         elif upload_policy == "once":
-            uploaded = uploaded_before(build_target, s["job_dir"])
+            uploaded = uploaded_before(build_target, job["job_dir"])
             if uploaded is None:
                 deploy = True
             else:
@@ -380,7 +382,8 @@ def determine_tarballs_to_deploy(successes, upload_policy):
                     f"{indent_fname}has been uploaded through '{uploaded}'")
 
         if deploy:
-            to_be_deployed[build_target] = {"job_dir": s["job_dir"],
+            to_be_deployed[build_target] = {"job_dir": job["job_dir"],
+                                            "pr_comment_id": job["pr_comment_id"],
                                             "timestamp": timestamp}
 
     return to_be_deployed
@@ -438,11 +441,11 @@ def deploy_built_artefacts(pr, event_info):
 
     # 2) for each build check the status of jobs (SUCCESS or FAILURE)
     #    - scan slurm*out file for: 'No modules missing!' & 'created'
-    successes = determine_successful_jobs(job_dirs)
+    successful_jobs = determine_successful_jobs(job_dirs)
 
     # 3) for the successful ones, determine which to deploy depending on
     #    the upload policy
-    to_be_deployed = determine_tarballs_to_deploy(successes, upload_policy)
+    to_be_deployed = determine_tarballs_to_deploy(successful_jobs, upload_policy)
 
     # 4) call function to deploy a single artefact per software subdir
     #    - update PR comments (look for comments with build-ts.tar.gz)
@@ -451,4 +454,5 @@ def deploy_built_artefacts(pr, event_info):
     for target, job in to_be_deployed.items():
         job_dir = job['job_dir']
         timestamp = job['timestamp']
-        upload_tarball(job_dir, target, timestamp, repo_name, pr.number)
+        pr_comment_id = job['pr_comment_id']
+        upload_tarball(job_dir, target, timestamp, repo_name, pr.number, pr_comment_id)
diff --git a/tools/job_metadata.py b/tools/job_metadata.py
@@ -0,0 +1,47 @@
+# This file is part of the EESSI build-and-deploy bot,
+# see https://github.com/EESSI/eessi-bot-software-layer
+#
+# The bot helps with requests to add software installations to the
+# EESSI software layer, see https://github.com/EESSI/software-layer
+#
+# author: Thomas Roeblitz (@trz42)
+#
+# license: GPLv2
+#
+import configparser
+import os
+
+from pyghee.utils import log
+
+
+def read_job_metadata_from_file(filepath, log_file=None):
+    """
+    Check if metadata file exists, read it and return 'PR' section if so, return None if not.
+
+    Args:
+        filepath (string): path to job metadata file
+        log_file (string): path to job metadata file
+
+    Returns:
+        job_metadata (dict): dictionary containing job metadata or None
+    """
+
+    # check if metadata file exist
+    if os.path.isfile(filepath):
+        log(f"Found metadata file at {filepath}", log_file)
+        metadata = configparser.ConfigParser()
+        try:
+            metadata.read(filepath)
+        except Exception as err:
+            log(f"Unable to read job metadata file {filepath}: {err}")
+            return None
+
+        # get PR section
+        if "PR" in metadata:
+            metadata_pr = metadata["PR"]
+        else:
+            metadata_pr = {}
+        return metadata_pr
+    else:
+        log(f"No metadata file found at {filepath}, might not be a bot job", log_file)
+        return None