algorand · algojohnlee · Mar 23, 2021 · Mar 23, 2021
diff --git a/scripts/release/mule/deploy/releases_page/generate_releases_page.py b/scripts/release/mule/deploy/releases_page/generate_releases_page.py
@@ -1,6 +1,16 @@
 #!/usr/bin/env python3
 
-# This script builds https://releases.algorand.com/index.html.
+# This script builds https://releases.algorand.com/index.html
+#
+# For each channel (stable, beta, indexer), we download the file information
+# from the staging_bucket. Information from this bucket is used to create an
+# html block for each channel which includes all versions found.
+#
+# The releases_bucket is also read, and if the file exists there, then the
+# releases_bucket URL is used instead of the staging_bucket URL.
+#
+# All the HTML for the channels is combined to form one large release page,
+# which can then be published on our releases page.
 
 import sys
 import boto3
@@ -11,10 +21,15 @@
 releases_bucket = "algorand-releases"
 releases_prefix = "https://releases.algorand.com/"
 html_tpl = "html.tpl"
+# Nit: should be styles_file
 styles_url = "releases_page.css"
+# May want to call these channels instead
 tokens = ["stable", "beta", "indexer"]
 
+
 def get_stage_release_set(response):
+    # Loop through contents of STAGING_BUCKET/releases/CHANNEL/ and return
+    # all[prefix] = [file_obj1, file_obj2...]
     prefix = None
     all = {}
     they = []
@@ -31,10 +46,15 @@ def get_stage_release_set(response):
         else:
             all[prefix] = they
             prefix = None
+            # Why do the following instead of emptying 'they' altogether?
             they = [x]
     return all
 
+
 def release_set_files(rset):
+    # Take list of file_objs, and return a files dict, keyed by filename
+    # value is a dict with keys "file" (full path), "Size", and if
+    # present, ".asc" or ".sig"
     files = {}
     for x in rset:
         path = x["Key"]
@@ -43,17 +63,23 @@ def release_set_files(rset):
             continue
         didsuf = False
         for suffix in (".asc", ".sig"):
+            # Check if signature file, e.g. node_beta_linux-amd64_2.5.2.tar.gz.sig
             if fname.endswith(suffix):
+                # Get base filename, e.g. without '.sig'
                 froot = fname[:-len(suffix)]
+
                 fd = files.get(froot)
                 if fd is None:
                     fd = {}
                     files[froot] = fd
+                # key file dict by suffix, attach whole file object
                 fd[suffix] = x
                 didsuf = True
-                break
+                break  # end suffixes loop
         if didsuf:
-            continue
+            continue  # go to next file in rset
+
+        # At this point we are not a sig file, so just attach raw information
         fd = files.get(fname)
         if fd is None:
             fd = {}
@@ -62,35 +88,56 @@ def release_set_files(rset):
         fd["Size"] = x["Size"]
     return files
 
+
 def get_hashes_data(s3, rset):
+    # Read all hashes files for a version and return text string
     text = ""
     for x in rset:
+        # x here are objects under a specific prefix
         path = x["Key"]
         pre, fname = path.rsplit("/", 1)
         if fname.endswith(".asc"):
             continue
         if fname.endswith(".sig"):
             continue
+
+        # We skip signature files and only process hashes files
+        # e.g. hashes_beta_linux_amd64_2.5.2
+        # We read and append all of this data in the 'text' string and return
+        # it
         if fname.startswith("hashes"):
             ob = s3.get_object(Bucket=staging_bucket, Key=path)
             text += ob["Body"].read().decode()
     return text
 
+
 def read_hashes(fin):
+    # Read the output of get_hashes_data
     by_fname = {}
     for line in fin:
+        # Ignore blanks and comments
         if not line:
             continue
         line = line.strip()
         if not line:
             continue
         if line[0] == "#":
             continue
+
+        # E.g.:
+        # 7e19496802ca7f3bec68ba580ccb7042
+        # algorand-beta-2.5.2-1.x86_64.rpm
         hashstr, fname = line.split()
         ob = by_fname.get(fname)
+
+        # If the filename is not in by_fname, create an empty dict and assign
+        # it
         if not ob:
             ob = {}
             by_fname[fname] = ob
+
+        # if 32 chars, it's md5; 64 is sha256, 128 is sha512. Asign to dict
+        # under those keys
         if len(hashstr) == 32:
             ob["md5"] = hashstr
         elif len(hashstr) == 64:
@@ -99,6 +146,7 @@ def read_hashes(fin):
             ob["sha512"] = hashstr
     return by_fname
 
+
 def objects_by_fname(they):
     out = {}
     for x in they:
@@ -110,53 +158,103 @@ def objects_by_fname(they):
         out[fname] = x
     return out
 
+
 def getContent(url):
     with open(url, "r") as reader:
         content = reader.read()
 
     return content
 
+
 def build_page(channels):
+    # read html_tpl and styles_url, make substitutions
     html = getContent(html_tpl).replace("{styles}", getContent(styles_url))
 
+    # Replace each token (channel) from channels
     for n in tokens:
         html = html.replace("".join(["{", n, "}"]), "".join(channels[n]))
 
     sys.stdout.write(html)
 
+
 def get_furl(release_files, fname, skey):
+    # Pass s3://algorand-releases/ file objects; also the filename and path
+    # from s3://algorand-dev-deb-repo.
+    #
+    # If the filename is in the algorand-releases bucket, use the url from the
+    # releases bucket. Otherwise, use the URL from the
+    # s3://algorand-dev-deb-repo bucket.
+    # 
+    # algorand-releases and algorand-dev-deb-repo match:
+    # https://releases.algorand.com/[rpath]
+    # Else:
+    # http://algorand-dev-deb-repo.s3-website-us-east-1.amazonaws.com/[spath]
     rfpath = release_files.get(fname)
     if rfpath is not None:
         return releases_prefix + rfpath["Key"]
     else:
         return staging_prefix + skey
 
+
 def main():
     s3 = boto3.client("s3")
     channels = {}
 
+    # Should use tokens array instead
     for channel in ["stable", "beta", "indexer"]:
-        staging_response = s3.list_objects_v2(Bucket=staging_bucket, Prefix="releases/" + channel + "/", MaxKeys=100)
+        # Fetch contents of e.g. s3://algorand-dev-deb-repo/releases/beta/
+        # Note: MaxKeys will limit to last 100 releases, which is more than
+        # enough. Consider dropping this to 2.
+        staging_response = s3.list_objects_v2(
+            Bucket=staging_bucket,
+            Prefix="releases/" + channel + "/", MaxKeys=100)
+
+        # Populate release_sets, e.g.:
+        # 'releases/beta/f9fa9a084_2.5.2' => [file_obj1, file_obj2, ...]
         release_sets = get_stage_release_set(staging_response)
+
+        # List everything from the releases bucket s3://algorand-releases/
         releases_response = s3.list_objects_v2(Bucket=releases_bucket)
+
+        # Return dict keyed by filename of file_objs from
+        # s3://algorand-releases/
         release_files = objects_by_fname(releases_response["Contents"])
 
         table = []
 
+        # Loop through all the releases in e.g.
+        # s3://algorand-dev-deb-repo/releases/beta/
         for key, rset in release_sets.items():
+            # key: releases/beta/f9fa9a084_2.5.2
+            # rset: [file_obj1, file_obj2, ...]
+
+            # Scan rset objs and return all the hashes data as a string
             hashftext = get_hashes_data(s3, rset)
+
+            # Create a dict of fhashes[filename] = hash_obj
+            # hash_obj[CHECKSUM] = HASH_STRING
+            # E.g. hash_obj['md5'] = '7e19496802ca7f3bec68ba580ccb7042'
             fhashes = read_hashes(hashftext.splitlines())
+
+            # Build a dict keyed by filename with value of a dict, keyed by 
+            # "file" (full path) and "Size"
             files = release_set_files(rset)
 
             for fname, info in files.items():
                 if "file" not in info:
                     continue
+
+                # Use algorand-releases URL if avail; otherwise
+                # algorand-dev-deb-repo URL
                 furl = get_furl(release_files, fname, info['file'])
+
                 ftext = '<div class="fname"><a href="{}">{}</a></div>'.format(furl, fname)
+                # sig file obj from algorand-dev-deb-repo
                 sig = info.get(".sig")
                 stext = ""
                 if sig is not None:
-                    sfname = sig["Key"].rsplit("/", 1)[-1]
+                    sfname = sig["Key"].rsplit("/", 1)[-1]  # filename
+                    # Use algorand-releases URL if available
                     surl = get_furl(release_files, sfname, sig["Key"])
                     stext = '<a href="{}">.sig</a>'.format(surl)
                 size = info.get("Size", "")
@@ -172,12 +270,12 @@ def main():
                 table.append("".join(tbody))
 
             # Only add the spacer *after* every set.
-            # It's not readily apparent to me why `indexer` would have a dict with a single
-            # item.  This needs additional investigation.
+            # It's not readily apparent to me why `indexer` would have a dict
+            # with a single item.  This needs additional investigation.
             #
-            # For instance, when creating the "indexer" table, the first line was empty b/c
-            # it added a spacer.  This was b/c there were two dicts and the first only
-            # contained one item, which was useless.
+            # For instance, when creating the "indexer" table, the first line
+            # was empty b/c it added a spacer.  This was b/c there were two
+            # dicts and the first only contained one item, which was useless.
             #
             # For now, just ignore those dicts.
             if len(files.items()) > 1:
@@ -187,6 +285,6 @@ def main():
 
     build_page(channels)
 
+
 if __name__ == "__main__":
     main()
-