Skip to content

Commit

Permalink
--prune should leave archived S3 keys (#3266)
Browse files Browse the repository at this point in the history
* --prune should leave archived S3 keys

Part of #2224

* Update deployer/src/deployer/upload.py

Co-authored-by: Ryan Johnson <[email protected]>

* Update deployer/src/deployer/upload.py

Co-authored-by: Ryan Johnson <[email protected]>

* parse_archived_txt_file as a generator

* only one 1 set

* add a nice warning about --prune without --archived-files

Co-authored-by: Ryan Johnson <[email protected]>
  • Loading branch information
peterbe and escattone authored Mar 18, 2021
1 parent 1d3f9e7 commit 0987df8
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/dev-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ jobs:
# XXX would be nice to validate here that $DEPLOYER_BUCKET_PREFIX is truthy
echo "DEPLOYER_BUCKET_PREFIX=$DEPLOYER_BUCKET_PREFIX"
poetry run deployer upload --prune ../client/build
poetry run deployer upload --prune --archived-files ../content/archived.txt ../client/build
poetry run deployer update-lambda-functions ./aws-lambda
# TODO
# Execute command to tell the Dev CloudFront distribution to use the
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/stage-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ jobs:
# XXX would be nice to validate here that $DEPLOYER_BUCKET_PREFIX is truthy
echo "DEPLOYER_BUCKET_PREFIX=$DEPLOYER_BUCKET_PREFIX"
poetry run deployer upload --prune ../client/build
poetry run deployer upload --prune --archived-files ../content/archived.txt ../client/build
poetry run deployer update-lambda-functions ./aws-lambda
# TODO: Depending on how long the upload takes, consider switching to
Expand Down
31 changes: 31 additions & 0 deletions deployer/src/deployer/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,22 @@ def validate_optional_directory(ctx, param, value):
return validate_directory(ctx, param, value)


def validate_file(ctz, param, value):
if not value:
raise click.BadParameter(f"{value!r}")
path = Path(value)
if not path.exists():
raise click.BadParameter(f"{value} does not exist")
elif not path.is_file():
raise click.BadParameter(f"{value} is not a file")
return path


def validate_optional_file(ctx, param, value):
if value:
return validate_file(ctx, param, value)


@click.group()
@click.option(
"--dry-run",
Expand Down Expand Up @@ -151,6 +167,15 @@ def whatsdeployed(ctx, directory: Path, output: str):
show_default=True,
is_flag=True,
)
@click.option(
"--archived-files",
help=(
"The path to the file that lists which files are archived. "
"(Only relevant in conjunction with --prune)"
),
default=None,
callback=validate_optional_file,
)
@click.argument("directory", type=click.Path(), callback=validate_directory)
@click.pass_context
def upload(ctx, directory: Path, **kwargs):
Expand All @@ -160,6 +185,12 @@ def upload(ctx, directory: Path, **kwargs):
content_roots.append(kwargs["content_translated_root"])
if kwargs["content_archived_root"]:
content_roots.append(kwargs["content_archived_root"])

if kwargs["prune"] and not kwargs["archived_files"]:
log.warning(
"Warning! Running with --prune but NOT ----archived-files will "
"possibly delete all archived content."
)
ctx.obj.update(kwargs)
upload_content(directory, content_roots, ctx.obj)

Expand Down
54 changes: 54 additions & 0 deletions deployer/src/deployer/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,14 @@ def delete(self, keys, on_task_complete=None, dry_run=False):
return timer


def parse_archived_txt_file(file: Path):
with open(file) as f:
for line in f:
line = line.strip()
if line and not line.startswith("#"):
yield line


def upload_content(build_directory, content_roots, config):
full_timer = StopWatch().start()

Expand All @@ -508,6 +516,7 @@ def upload_content(build_directory, content_roots, config):
show_progress_bar = not config["no_progressbar"]
upload_redirects = not config["no_redirects"]
prune = config["prune"]
archived_txt_file = config["archived_files"]

log.info(f"Upload files from: {build_directory}")
if upload_redirects:
Expand Down Expand Up @@ -583,6 +592,15 @@ def on_task_complete(task):
# now deleted.
now = datetime.datetime.utcnow().replace(tzinfo=UTC)
delete_keys = []

archived_files_as_keys = set()
if archived_txt_file:
for file in parse_archived_txt_file(archived_txt_file):
locale, slug = file.replace("/index.html", "").split("/", 1)
archived_files_as_keys.add(f"{bucket_prefix}/{locale}/docs/{slug}")
if not archived_files_as_keys:
raise Exception(f"found no entries inside {archived_txt_file}")

for key in existing_bucket_objects:
if key.startswith(f"{bucket_prefix}/_whatsdeployed/"):
# These are special and wouldn't have been uploaded
Expand All @@ -603,6 +621,42 @@ def on_task_complete(task):
if delta.days < 30:
continue

# Remember, if `key` is from a "index.html" file it will be represented
# something like this: `main/en-us/docs/web/api/documentorshadowroot`
# with the `/index.html` portion removed.
# But every page usually has a `index.json` file, which might look
# something like this: `main/en-us/docs/web/api/index.json` or
# `main/en-us/docs/web/api/screenshot.png`

# This if statement protects against possible deleting anything that
# isn't a document.
if "/docs/" in key:
is_archived = False
# Trying to avoid having to do another for-loop with key.startswith()
# so first look for the low-hanging fruit.
if key in archived_files_as_keys:
# This is the easiest and fastest lookup
is_archived = True
elif (
re.sub(r"/(index\.json|contributors\.txt|bcd\.json)$", "", key)
in archived_files_as_keys
):
# This is easy and fast too and covers 99% of the other
# possible keys.
is_archived = True
else:
# This is for things like:
# `main/en-us/docs/web/api/screenshot.png` where you can't
# confidently use `path.dirname()` because the key could
# be something like `main/fr/docs/web/api/manifest.json` which
# is actually a "folder".
for archive_file_as_key in archived_files_as_keys:
if key.startswith(archive_file_as_key):
is_archived = True
break
if is_archived:
continue

assert key.startswith(bucket_prefix)

delete_keys.append(key)
Expand Down

0 comments on commit 0987df8

Please sign in to comment.