Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add comment id to metadata uploaded to S3 #182

Closed
wants to merge 10 commits into from
24 changes: 4 additions & 20 deletions eessi_bot_job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
# license: GPLv2
#

import configparser
import glob
import os
import re
Expand All @@ -39,9 +38,10 @@
from tools.args import job_manager_parse
from datetime import datetime, timezone
from tools import config, run_cmd
from tools.job_metadata import read_job_metadata_from_file
from tools.pr_comments import get_submitted_job_comment, update_comment

from pyghee.utils import log, error
from pyghee.utils import log

AWAITS_LAUNCH = "awaits_launch"
FAILURE = "failure"
Expand Down Expand Up @@ -192,26 +192,10 @@ def determine_finished_jobs(self, known_jobs, current_jobs):

def read_job_pr_metadata(self, job_metadata_path):
"""
Check if metadata file exists, read it and return 'PR' section if so, return None if not.
Determine metadata of a job or None.
"""
# check if metadata file exist
if os.path.isfile(job_metadata_path):
log(f"Found metadata file at {job_metadata_path}", self.logfile)
metadata = configparser.ConfigParser()
try:
metadata.read(job_metadata_path)
except Exception as err:
error(f"Unable to read job metadata file {job_metadata_path}: {err}")

# get PR section
if "PR" in metadata:
metadata_pr = metadata["PR"]
else:
metadata_pr = {}
return metadata_pr
else:
log(f"No metadata file found at {job_metadata_path}, so not a bot job", self.logfile)
return None
return read_job_metadata_from_file(job_metadata_path, self.logfile)

# job_manager.process_new_job(current_jobs[nj])
def process_new_job(self, new_job):
Expand Down
22 changes: 18 additions & 4 deletions scripts/eessi-upload-to-staging
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ function create_metadata_file
_url=$2
_repository=$3
_pull_request=$4
_pull_request_comment_id=$5

_tmpfile=$(mktemp)

Expand All @@ -56,10 +57,11 @@ function create_metadata_file
--arg url "${_url}" \
--arg repo "${_repository}" \
--arg pr "${_pull_request}" \
--arg pr_comment_id "${_pull_request_comment_id}" \
'{
uploader: {username: $un, ip: $ip, hostname: $hn},
payload: {filename: $fn, size: $sz, ctime: $ct, sha256sum: $sha256, url: $url},
link2pr: {repo: $repo, pr: $pr},
link2pr: {repo: $repo, pr: $pr, pr_comment_id: $pr_comment_id},
}' > "${_tmpfile}"

echo "${_tmpfile}"
Expand All @@ -70,6 +72,10 @@ function display_help
echo "Usage: $0 [OPTIONS] <filenames>" >&2
echo " -e | --endpoint-url URL - endpoint url (needed for non AWS S3)" >&2
echo " -h | --help - display this usage information" >&2
echo " -i | --pr-comment-id NUMBER - identifier of a PR comment;" >&2
echo " used to efficiently determine the PR" >&2
echo " comment to be updated during the" >&2
echo " ingestion procedure" >&2
echo " -n | --bucket-name BUCKET - bucket name (same as BUCKET above)" >&2
echo " -p | --pull-request NUMBER - a pull request NUMBER; used to" >&2
echo " link the upload to a PR" >&2
Expand Down Expand Up @@ -97,8 +103,11 @@ bucket_name="eessi-staging"

# provided via options in the bot's config file app.cfg
endpoint_url=
pull_request=
repository=

# provided via command line arguments
pr_comment_id="none"
pull_request="none"
repository="EESSI/software-layer"

while [[ $# -gt 0 ]]; do
case $1 in
Expand All @@ -110,6 +119,10 @@ while [[ $# -gt 0 ]]; do
display_help
exit 0
;;
-i|--pr-comment-id)
pr_comment_id="$2"
shift 2
;;
-n|--bucket-name)
bucket_name="$2"
shift 2
Expand Down Expand Up @@ -161,7 +174,8 @@ for file in "$*"; do
echo "Creating metadata file"
url="${bucket_base}/${aws_path}/${aws_file}"
metadata_file=$(create_metadata_file "${file}" "${url}" \
"${repository}" "${pull_request}")
"${repository}" "${pull_request}" \
"${pr_comment_id}")
echo "metadata:"
cat ${metadata_file}

Expand Down
96 changes: 50 additions & 46 deletions tasks/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pyghee.utils import log
from tasks.build import get_build_env_cfg
from tools import config, run_cmd, pr_comments
from tools.job_metadata import read_job_metadata_from_file

JOBS_BASE_DIR = "jobs_base_dir"
DEPLOYCFG = "deploycfg"
Expand Down Expand Up @@ -140,46 +141,30 @@ def check_build_status(slurm_out, eessi_tarballs):
return False


def update_pr_comment(tarball, repo_name, pr_number, state, msg):
def update_pr_comment(tarball, repo_name, pr_number, pr_comment_id, state, msg):
"""Update PR comment which contains specific tarball name.

Args:
tarball (string): name of tarball that is looked for in a PR comment
repo_name (string): name of the repository (USER_ORG/REPOSITORY)
pr_number (string): pull request number
pr_comment_id (int): pull request comment id
state (string): state (upload) to be used in update
msg (string): msg (succeeded or failed) describing upload result
"""
funcname = sys._getframe().f_code.co_name

gh = github.get_instance()
repo = gh.get_repo(repo_name)
pull_request = repo.get_pull(pr_number)

# TODO does this always return all comments?
comments = pull_request.get_issue_comments()
for comment in comments:
# NOTE
# adjust search string if format changed by event handler
# (separate process running eessi_bot_event_handler.py)
re_tarball = f".*{tarball}.*"
comment_match = re.search(re_tarball, comment.body)

if comment_match:
log(f"{funcname}(): found comment with id {comment.id}")

issue_comment = pull_request.get_issue_comment(int(comment.id))

dt = datetime.now(timezone.utc)
comment_update = (f"\n|{dt.strftime('%b %d %X %Z %Y')}|{state}|"
f"transfer of `{tarball}` to S3 bucket {msg}|")

# append update to existing comment
issue_comment.edit(issue_comment.body + comment_update)
# adjust search string ".*{tarball}.*" if format of PR comment changed by event handler
issue_comment = pr_comments.determine_issue_comment(pull_request, pr_comment_id, tarball)
if issue_comment:
dt = datetime.now(timezone.utc)
comment_update = (f"\n|{dt.strftime('%b %d %X %Z %Y')}|{state}|"
f"transfer of `{tarball}` to S3 bucket {msg}|")

# leave for loop (only update one comment, because tarball
# should only be referenced in one comment)
break
# append update to existing comment
issue_comment.edit(issue_comment.body + comment_update)


def append_tarball_to_upload_log(tarball, job_dir):
Expand All @@ -197,15 +182,16 @@ def append_tarball_to_upload_log(tarball, job_dir):
upload_log.write(f"{job_plus_tarball}\n")


def upload_tarball(job_dir, build_target, timestamp, repo_name, pr_number):
def upload_tarball(job_dir, build_target, timestamp, repo_name, pr_number, pr_comment_id):
"""Upload built artefact to an S3 bucket.

Args:
job_dir (string): path to the job directory
build_target (string): eessi-VERSION-COMPONENT-OS-ARCH
timestamp (int): timestamp of the tarball
repo_name (string): repository of the pull request
pr_number (string): number of the pull request
pr_number (int): number of the pull request
pr_comment_id (int): id of the pull request comment
"""
funcname = sys._getframe().f_code.co_name

Expand Down Expand Up @@ -233,6 +219,7 @@ def upload_tarball(job_dir, build_target, timestamp, repo_name, pr_number):
cmd_args.extend(['--endpoint-url', endpoint_url])
cmd_args.extend(['--repository', repo_name])
cmd_args.extend(['--pull-request', str(pr_number)])
cmd_args.extend(['--pr-comment-id', str(pr_comment_id)])
cmd_args.append(abs_path)
upload_cmd = ' '.join(cmd_args)

Expand Down Expand Up @@ -297,48 +284,63 @@ def determine_successful_jobs(job_dirs):
job_dirs (list): list of job directories

Returns:
successes (list): list of dictionaries representing successful jobs
successful_jobs (list): list of dictionaries representing successful jobs
"""
funcname = sys._getframe().f_code.co_name

successes = []
successful_jobs = []
for job_dir in job_dirs:
slurm_out = determine_slurm_out(job_dir)
eessi_tarballs = determine_eessi_tarballs(job_dir)
pr_comment_id = determine_pr_comment_id(job_dir)

if check_build_status(slurm_out, eessi_tarballs):
log(f"{funcname}(): SUCCESSFUL build in '{job_dir}'")
successes.append({'job_dir': job_dir,
'slurm_out': slurm_out,
'eessi_tarballs': eessi_tarballs})
successful_jobs.append({'job_dir': job_dir,
'slurm_out': slurm_out,
'pr_comment_id': pr_comment_id,
'eessi_tarballs': eessi_tarballs})
else:
log(f"{funcname}(): FAILED build in '{job_dir}'")

return successes
return successful_jobs


def determine_pr_comment_id(job_dir):
"""Determines pr_comment_id by reading _bot_job{JOBID}.metadata in job_dir."""
# assumes that last part of job_dir encodes the job's id
job_id = os.path.basename(os.path.normpath(job_dir))
job_metadata_file = os.path.join(job_dir, f"_bot_job{job_id}.metadata")
job_metadata = read_job_metadata_from_file(job_metadata_file)
if job_metadata and "pr_comment_id" in job_metadata:
return int(job_metadata["pr_comment_id"])
else:
return -1


def determine_tarballs_to_deploy(successes, upload_policy):
def determine_tarballs_to_deploy(successful_jobs, upload_policy):
"""Determines tarballs to deploy depending on upload policy

Args:
successes (list): list of dictionaries
{job_dir, slurm_out, eessi_tarballs}
successful_jobs (list): list of dictionaries
{job_dir, slurm_out, eessi_tarballs, pr_comment_id}
upload_policy (string): one of 'all', 'latest' or 'once'
'all': deploy all
'latest': deploy only the last for each build target
'once': deploy only latest if none for this build target has
been deployed before
Returns:
to_be_deployed (dictionary): dictionary of dictionaries
{job_dir, timestamp}
{job_dir, pr_comment_id, timestamp}
"""
funcname = sys._getframe().f_code.co_name

log(f"{funcname}(): num successful jobs {len(successes)}")
log(f"{funcname}(): num successful jobs {len(successful_jobs)}")

to_be_deployed = {}
for s in successes:
for job in successful_jobs:
# all tarballs for successful job
tarballs = s["eessi_tarballs"]
tarballs = job["eessi_tarballs"]
log(f"{funcname}(): num tarballs {len(tarballs)}")

# full path to first tarball for successful job
Expand Down Expand Up @@ -371,7 +373,7 @@ def determine_tarballs_to_deploy(successes, upload_policy):
else:
deploy = True
elif upload_policy == "once":
uploaded = uploaded_before(build_target, s["job_dir"])
uploaded = uploaded_before(build_target, job["job_dir"])
if uploaded is None:
deploy = True
else:
Expand All @@ -380,7 +382,8 @@ def determine_tarballs_to_deploy(successes, upload_policy):
f"{indent_fname}has been uploaded through '{uploaded}'")

if deploy:
to_be_deployed[build_target] = {"job_dir": s["job_dir"],
to_be_deployed[build_target] = {"job_dir": job["job_dir"],
"pr_comment_id": job["pr_comment_id"],
"timestamp": timestamp}

return to_be_deployed
Expand Down Expand Up @@ -438,11 +441,11 @@ def deploy_built_artefacts(pr, event_info):

# 2) for each build check the status of jobs (SUCCESS or FAILURE)
# - scan slurm*out file for: 'No modules missing!' & 'created'
successes = determine_successful_jobs(job_dirs)
successful_jobs = determine_successful_jobs(job_dirs)

# 3) for the successful ones, determine which to deploy depending on
# the upload policy
to_be_deployed = determine_tarballs_to_deploy(successes, upload_policy)
to_be_deployed = determine_tarballs_to_deploy(successful_jobs, upload_policy)

# 4) call function to deploy a single artefact per software subdir
# - update PR comments (look for comments with build-ts.tar.gz)
Expand All @@ -451,4 +454,5 @@ def deploy_built_artefacts(pr, event_info):
for target, job in to_be_deployed.items():
job_dir = job['job_dir']
timestamp = job['timestamp']
upload_tarball(job_dir, target, timestamp, repo_name, pr.number)
pr_comment_id = job['pr_comment_id']
upload_tarball(job_dir, target, timestamp, repo_name, pr.number, pr_comment_id)
47 changes: 47 additions & 0 deletions tools/job_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# This file is part of the EESSI build-and-deploy bot,
# see https://github.com/EESSI/eessi-bot-software-layer
#
# The bot helps with requests to add software installations to the
# EESSI software layer, see https://github.com/EESSI/software-layer
#
# author: Thomas Roeblitz (@trz42)
#
# license: GPLv2
#
import configparser
import os

from pyghee.utils import log


def read_job_metadata_from_file(filepath, log_file=None):
"""
Check if metadata file exists, read it and return 'PR' section if so, return None if not.

Args:
filepath (string): path to job metadata file
log_file (string): path to job metadata file

Returns:
job_metadata (dict): dictionary containing job metadata or None
"""

# check if metadata file exist
if os.path.isfile(filepath):
log(f"Found metadata file at {filepath}", log_file)
metadata = configparser.ConfigParser()
try:
metadata.read(filepath)
except Exception as err:
log(f"Unable to read job metadata file {filepath}: {err}")
return None

# get PR section
if "PR" in metadata:
metadata_pr = metadata["PR"]
else:
metadata_pr = {}
return metadata_pr
else:
log(f"No metadata file found at {filepath}, might not be a bot job", log_file)
return None
Loading