Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add comments to releases page script. #2003

Merged
merged 1 commit into from
Mar 23, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 109 additions & 11 deletions scripts/release/mule/deploy/releases_page/generate_releases_page.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
#!/usr/bin/env python3

# This script builds https://releases.algorand.com/index.html.
# This script builds https://releases.algorand.com/index.html
#
# For each channel (stable, beta, indexer), we download the file information
# from the staging_bucket. Information from this bucket is used to create an
# html block for each channel which includes all versions found.
#
# The releases_bucket is also read, and if the file exists there, then the
# releases_bucket URL is used instead of the staging_bucket URL.
#
# All the HTML for the channels is combined to form one large release page,
# which can then be published on our releases page.

import sys
import boto3
Expand All @@ -11,10 +21,15 @@
releases_bucket = "algorand-releases"
releases_prefix = "https://releases.algorand.com/"
html_tpl = "html.tpl"
# Nit: should be styles_file
styles_url = "releases_page.css"
# May want to call these channels instead
tokens = ["stable", "beta", "indexer"]


def get_stage_release_set(response):
# Loop through contents of STAGING_BUCKET/releases/CHANNEL/ and return
# all[prefix] = [file_obj1, file_obj2...]
prefix = None
all = {}
they = []
Expand All @@ -31,10 +46,15 @@ def get_stage_release_set(response):
else:
all[prefix] = they
prefix = None
# Why do the following instead of emptying 'they' altogether?
they = [x]
return all


def release_set_files(rset):
# Take list of file_objs, and return a files dict, keyed by filename
# value is a dict with keys "file" (full path), "Size", and if
# present, ".asc" or ".sig"
files = {}
for x in rset:
path = x["Key"]
Expand All @@ -43,17 +63,23 @@ def release_set_files(rset):
continue
didsuf = False
for suffix in (".asc", ".sig"):
# Check if signature file, e.g. node_beta_linux-amd64_2.5.2.tar.gz.sig
if fname.endswith(suffix):
# Get base filename, e.g. without '.sig'
froot = fname[:-len(suffix)]

fd = files.get(froot)
if fd is None:
fd = {}
files[froot] = fd
# key file dict by suffix, attach whole file object
fd[suffix] = x
didsuf = True
break
break # end suffixes loop
if didsuf:
continue
continue # go to next file in rset

# At this point we are not a sig file, so just attach raw information
fd = files.get(fname)
if fd is None:
fd = {}
Expand All @@ -62,35 +88,56 @@ def release_set_files(rset):
fd["Size"] = x["Size"]
return files


def get_hashes_data(s3, rset):
# Read all hashes files for a version and return text string
text = ""
for x in rset:
# x here are objects under a specific prefix
path = x["Key"]
pre, fname = path.rsplit("/", 1)
if fname.endswith(".asc"):
continue
if fname.endswith(".sig"):
continue

# We skip signature files and only process hashes files
# e.g. hashes_beta_linux_amd64_2.5.2
# We read and append all of this data in the 'text' string and return
# it
if fname.startswith("hashes"):
ob = s3.get_object(Bucket=staging_bucket, Key=path)
text += ob["Body"].read().decode()
return text


def read_hashes(fin):
# Read the output of get_hashes_data
by_fname = {}
for line in fin:
# Ignore blanks and comments
if not line:
continue
line = line.strip()
if not line:
continue
if line[0] == "#":
continue

# E.g.:
# 7e19496802ca7f3bec68ba580ccb7042
# algorand-beta-2.5.2-1.x86_64.rpm
hashstr, fname = line.split()
ob = by_fname.get(fname)

# If the filename is not in by_fname, create an empty dict and assign
# it
if not ob:
ob = {}
by_fname[fname] = ob

# if 32 chars, it's md5; 64 is sha256, 128 is sha512. Asign to dict
# under those keys
if len(hashstr) == 32:
ob["md5"] = hashstr
elif len(hashstr) == 64:
Expand All @@ -99,6 +146,7 @@ def read_hashes(fin):
ob["sha512"] = hashstr
return by_fname


def objects_by_fname(they):
out = {}
for x in they:
Expand All @@ -110,53 +158,103 @@ def objects_by_fname(they):
out[fname] = x
return out


def getContent(url):
with open(url, "r") as reader:
content = reader.read()

return content


def build_page(channels):
# read html_tpl and styles_url, make substitutions
html = getContent(html_tpl).replace("{styles}", getContent(styles_url))

# Replace each token (channel) from channels
for n in tokens:
html = html.replace("".join(["{", n, "}"]), "".join(channels[n]))

sys.stdout.write(html)


def get_furl(release_files, fname, skey):
# Pass s3://algorand-releases/ file objects; also the filename and path
# from s3://algorand-dev-deb-repo.
#
# If the filename is in the algorand-releases bucket, use the url from the
# releases bucket. Otherwise, use the URL from the
# s3://algorand-dev-deb-repo bucket.
#
# algorand-releases and algorand-dev-deb-repo match:
# https://releases.algorand.com/[rpath]
# Else:
# http://algorand-dev-deb-repo.s3-website-us-east-1.amazonaws.com/[spath]
rfpath = release_files.get(fname)
if rfpath is not None:
return releases_prefix + rfpath["Key"]
else:
return staging_prefix + skey


def main():
s3 = boto3.client("s3")
channels = {}

# Should use tokens array instead
for channel in ["stable", "beta", "indexer"]:
staging_response = s3.list_objects_v2(Bucket=staging_bucket, Prefix="releases/" + channel + "/", MaxKeys=100)
# Fetch contents of e.g. s3://algorand-dev-deb-repo/releases/beta/
# Note: MaxKeys will limit to last 100 releases, which is more than
# enough. Consider dropping this to 2.
staging_response = s3.list_objects_v2(
Bucket=staging_bucket,
Prefix="releases/" + channel + "/", MaxKeys=100)

# Populate release_sets, e.g.:
# 'releases/beta/f9fa9a084_2.5.2' => [file_obj1, file_obj2, ...]
release_sets = get_stage_release_set(staging_response)

# List everything from the releases bucket s3://algorand-releases/
releases_response = s3.list_objects_v2(Bucket=releases_bucket)

# Return dict keyed by filename of file_objs from
# s3://algorand-releases/
release_files = objects_by_fname(releases_response["Contents"])

table = []

# Loop through all the releases in e.g.
# s3://algorand-dev-deb-repo/releases/beta/
for key, rset in release_sets.items():
# key: releases/beta/f9fa9a084_2.5.2
# rset: [file_obj1, file_obj2, ...]

# Scan rset objs and return all the hashes data as a string
hashftext = get_hashes_data(s3, rset)

# Create a dict of fhashes[filename] = hash_obj
# hash_obj[CHECKSUM] = HASH_STRING
# E.g. hash_obj['md5'] = '7e19496802ca7f3bec68ba580ccb7042'
fhashes = read_hashes(hashftext.splitlines())

# Build a dict keyed by filename with value of a dict, keyed by
# "file" (full path) and "Size"
files = release_set_files(rset)

for fname, info in files.items():
if "file" not in info:
continue

# Use algorand-releases URL if avail; otherwise
# algorand-dev-deb-repo URL
furl = get_furl(release_files, fname, info['file'])

ftext = '<div class="fname"><a href="{}">{}</a></div>'.format(furl, fname)
# sig file obj from algorand-dev-deb-repo
sig = info.get(".sig")
stext = ""
if sig is not None:
sfname = sig["Key"].rsplit("/", 1)[-1]
sfname = sig["Key"].rsplit("/", 1)[-1] # filename
# Use algorand-releases URL if available
surl = get_furl(release_files, sfname, sig["Key"])
stext = '<a href="{}">.sig</a>'.format(surl)
size = info.get("Size", "")
Expand All @@ -172,12 +270,12 @@ def main():
table.append("".join(tbody))

# Only add the spacer *after* every set.
# It's not readily apparent to me why `indexer` would have a dict with a single
# item. This needs additional investigation.
# It's not readily apparent to me why `indexer` would have a dict
# with a single item. This needs additional investigation.
#
# For instance, when creating the "indexer" table, the first line was empty b/c
# it added a spacer. This was b/c there were two dicts and the first only
# contained one item, which was useless.
# For instance, when creating the "indexer" table, the first line
# was empty b/c it added a spacer. This was b/c there were two
# dicts and the first only contained one item, which was useless.
#
# For now, just ignore those dicts.
if len(files.items()) > 1:
Expand All @@ -187,6 +285,6 @@ def main():

build_page(channels)


if __name__ == "__main__":
main()