Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TTAHUB-3770] Breakout backup retention processing #2586

Merged
merged 6 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 84 additions & 6 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,8 @@ commands:
./automation/ci/scripts/acquire-lock.sh \
"<< parameters.app_name >>" \
"<< parameters.build_branch >>" \
"<< pipeline.number >>"
"<< pipeline.number >>" \
"$CIRCLE_JOB"
- run:
name: Push application with deployment vars
command: |
Expand Down Expand Up @@ -414,7 +415,8 @@ commands:
./automation/ci/scripts/release-lock.sh \
"<< parameters.app_name >>" \
"<< parameters.build_branch >>" \
"<< pipeline.number >>"
"<< pipeline.number >>" \
"$CIRCLE_JOB"
when: always
# - run:
# name: Push maintenance application
Expand Down Expand Up @@ -455,7 +457,8 @@ commands:
./automation/ci/scripts/acquire-lock.sh \
"tta-automation" \
"<< pipeline.git.branch >>" \
"<< pipeline.number >>"
"<< pipeline.number >>" \
"$CIRCLE_JOB"
- run:
name: Migrate database
command: |
Expand All @@ -469,7 +472,8 @@ commands:
./automation/ci/scripts/release-lock.sh \
"tta-automation" \
"<< pipeline.git.branch >>" \
"<< pipeline.number >>"
"<< pipeline.number >>" \
"$CIRCLE_JOB"
cf_automation_task:
description: "Login to Cloud Foundry space, run automation task, and send notification"
parameters:
Expand Down Expand Up @@ -561,7 +565,8 @@ commands:
./automation/ci/scripts/acquire-lock.sh \
"tta-automation" \
"<< pipeline.git.branch >>" \
"<< pipeline.number >>"
"<< pipeline.number >>" \
"$CIRCLE_JOB"
- run:
name: Start Log Monitoring
command: |
Expand Down Expand Up @@ -640,7 +645,8 @@ commands:
./automation/ci/scripts/release-lock.sh \
"tta-automation" \
"<< pipeline.git.branch >>" \
"<< pipeline.number >>"
"<< pipeline.number >>" \
"$CIRCLE_JOB"
- run:
name: Logout of service account
command: |
Expand Down Expand Up @@ -723,6 +729,26 @@ commands:
success_message: ':database: Restored data processed'
directory: "./"
timeout: "3000"
cf_retention:
description: "Delete Backup from S3 based on retention"
parameters:
auth_client_secret: { type: env_var_name }
cloudgov_username: { type: env_var_name }
cloudgov_password: { type: env_var_name }
cloudgov_space: { type: env_var_name }
s3_service_name: { type: string }
backup_prefix: { type: string }
steps:
- cf_automation_task:
auth_client_secret: << parameters.auth_client_secret >>
cloudgov_username: << parameters.cloudgov_username >>
cloudgov_password: << parameters.cloudgov_password >>
cloudgov_space: << parameters.cloudgov_space >>
task_name: "retention"
task_command: "cd /home/vcap/app/db-backup/scripts; bash ./db_retention.sh"
task_args: '["<< parameters.backup_prefix >>", "<< parameters.s3_service_name >>"]'
config: "<< parameters.backup_prefix >>-backup"
success_message: ':database: "<< parameters.backup_prefix >>" retention processed'
parameters:
cg_org:
description: "Cloud Foundry cloud.gov organization name"
Expand Down Expand Up @@ -817,6 +843,12 @@ parameters:
fail-on-modified-lines:
type: boolean
default: false
manual-retention-production:
type: boolean
default: false
manual-retention-processed:
type: boolean
default: false
jobs:
build_and_lint:
executor: docker-executor
Expand Down Expand Up @@ -1687,6 +1719,34 @@ jobs:
cloudgov_username: CLOUDGOV_DEV_USERNAME
cloudgov_password: CLOUDGOV_DEV_PASSWORD
cloudgov_space: CLOUDGOV_DEV_SPACE
retention_production:
docker:
- image: cimg/base:2024.05
steps:
- sparse_checkout:
directories: 'automation'
branch: << pipeline.git.branch >>
- cf_retention:
auth_client_secret: PROD_AUTH_CLIENT_SECRET
cloudgov_username: CLOUDGOV_PROD_USERNAME
cloudgov_password: CLOUDGOV_PROD_PASSWORD
cloudgov_space: CLOUDGOV_PROD_SPACE
s3_service_name: ttahub-db-backups
backup_prefix: production
retention_processed:
docker:
- image: cimg/base:2024.05
steps:
- sparse_checkout:
directories: 'automation'
branch: << pipeline.git.branch >>
- cf_retention:
auth_client_secret: PROD_AUTH_CLIENT_SECRET
cloudgov_username: CLOUDGOV_PROD_USERNAME
cloudgov_password: CLOUDGOV_PROD_PASSWORD
cloudgov_space: CLOUDGOV_PROD_SPACE
s3_service_name: ttahub-db-backups
backup_prefix: processed
workflows:
build_test_deploy:
when:
Expand All @@ -1702,6 +1762,8 @@ workflows:
- equal: [false, << pipeline.parameters.manual-restore-staging >>]
- equal: [false, << pipeline.parameters.manual-restore-sandbox >>]
- equal: [false, << pipeline.parameters.manual-restore-dev >>]
- equal: [false, << pipeline.parameters.manual-retention-production >>]
- equal: [false, << pipeline.parameters.manual-retention-processed >>]
jobs:
- build_and_lint
- build_and_lint_similarity_api
Expand Down Expand Up @@ -1822,6 +1884,12 @@ workflows:
- restore_processed_to_dev:
requires:
- restore_processed_to_sandbox
- retention_production:
requires:
- restore_processed_to_dev
- retention_processed:
requires:
- retention_production
manual_backup_upload_production:
when:
equal: [true, << pipeline.parameters.manual-trigger >>]
Expand Down Expand Up @@ -1926,3 +1994,13 @@ workflows:
equal: [true, << pipeline.parameters.manual-restore-dev >>]
jobs:
- restore_processed_to_dev
manual_retention_production:
when:
equal: [true, << pipeline.parameters.manual-retention-production >>]
jobs:
- retention_production
manual_retention_processed:
when:
equal: [true, << pipeline.parameters.manual-retention-processed >>]
jobs:
- retention_processed
20 changes: 14 additions & 6 deletions automation/ci/scripts/acquire-lock.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
APP_NAME=$( [ "$1" == "DEV" ] && echo "tta-smarthub-dev" || ([ "$1" == "SANDBOX" ] && echo "tta-smarthub-sandbox") || echo "$1" )
BRANCH=$2
BUILD_ID=$3
JOB_NAME=$4

# Constants
LOCK_TIMEOUT=7200 # 2 hours in seconds
Expand All @@ -16,16 +17,21 @@ if [ -n "$LOCK_DATA" ]; then
LOCK_TIMESTAMP=$(echo "$LOCK_DATA" | jq -r '.timestamp')
LOCK_BRANCH=$(echo "$LOCK_DATA" | jq -r '.branch')
LOCK_BUILD_ID=$(echo "$LOCK_DATA" | jq -r '.build_id')
LOCK_JOB_NAME=$(echo "$LOCK_DATA" | jq -r '.job_name')

CURRENT_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
TIME_DIFF=$(($(date -d "$CURRENT_TIME" +%s) - $(date -d "$LOCK_TIMESTAMP" +%s)))

if [ $TIME_DIFF -lt $LOCK_TIMEOUT ]; then
echo "App $APP_NAME is locked by branch $LOCK_BRANCH with build ID $LOCK_BUILD_ID."
exit 1
if [ "$LOCK_BRANCH" == "$BRANCH" ] && [ "$BUILD_ID" -gt "$LOCK_BUILD_ID" ] && [ "$LOCK_JOB_NAME" == "$JOB_NAME" ]; then
echo "Lock is being usurped due to a newer build ID and matching job name."
else
echo "App $APP_NAME is locked by branch $LOCK_BRANCH, build ID $LOCK_BUILD_ID, job name $LOCK_JOB_NAME."
exit 1
fi
else
echo "Lock is stale. Attempting to acquire lock..."
fi

echo "Lock is stale. Attempting to acquire lock..."
fi

# Check if app is restaging
Expand All @@ -40,17 +46,19 @@ TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
LOCK_DATA_JSON=$(jq -n \
--arg branch "$BRANCH" \
--arg build_id "$BUILD_ID" \
--arg job_name "$JOB_NAME" \
--arg timestamp "$TIMESTAMP" \
'{branch: $branch, build_id: $build_id, timestamp: $timestamp}')
'{branch: $branch, build_id: $build_id, job_name: $job_name, timestamp: $timestamp}')

cf set-env "$APP_NAME" LOCK_APP "$LOCK_DATA_JSON"

# Validate the lock
LOCK_DATA=$(cf env "$APP_NAME" | grep -A 10 LOCK_APP | sed ':a;N;$!ba;s/\n/ /g' | grep -oP "[{][^}]+[}]")
VALID_BRANCH=$(echo "$LOCK_DATA" | jq -r '.branch')
VALID_BUILD_ID=$(echo "$LOCK_DATA" | jq -r '.build_id')
VALID_JOB_NAME=$(echo "$LOCK_DATA" | jq -r '.job_name')

if [ "$VALID_BRANCH" == "$BRANCH" ] && [ "$VALID_BUILD_ID" == "$BUILD_ID" ]; then
if [ "$VALID_BRANCH" == "$BRANCH" ] && [ "$VALID_BUILD_ID" == "$BUILD_ID" ] && [ "$VALID_JOB_NAME" == "$JOB_NAME" ]; then
echo "Lock successfully acquired for app $APP_NAME."
exit 0
else
Expand Down
69 changes: 65 additions & 4 deletions automation/ci/scripts/cf_lambda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -318,28 +318,89 @@ function check_app_running {
fi
}

# Ensure the application is stopped
function ensure_app_stopped() {
# Function to check if app logs have changed
function check_logs_idle {
local app_name="$1"
local previous_logs="$2"

log "INFO" "Checking logs for activity..."

# Capture logs for comparison
local current_logs
current_logs=$(cf logs --recent "$app_name" 2>&1)

if [[ "$previous_logs" == "$current_logs" ]]; then
log "INFO" "No new logs detected for application '$app_name'."
return 0 # Logs indicate idle
else
log "INFO" "Activity detected in logs for application '$app_name'."
return 1 # Logs indicate activity
fi
}

# Function to check if tasks are idle
function check_tasks_idle {
local app_name="$1"
local previous_tasks="$2"

log "INFO" "Checking tasks for activity..."

# Capture tasks for comparison
local current_tasks
current_tasks=$(cf tasks "$app_name" 2>&1)

# Extract the most recent task status
local recent_task_status
recent_task_status=$(echo "$current_tasks" | awk '/^[0-9]+/ {latest=$0} END {print latest}' | awk '{print $NF}')

if [[ "$previous_tasks" == "$current_tasks" && "$recent_task_status" != "PENDING" && "$recent_task_status" != "RUNNING" && "$recent_task_status" != "CANCELING" ]]; then
log "INFO" "No new tasks detected and no active tasks for application '$app_name'."
return 0 # Tasks indicate idle
else
log "INFO" "Active or pending tasks detected for application '$app_name'."
return 1 # Tasks indicate activity
fi
}

# Updated ensure_app_stopped function
function ensure_app_stopped {
local app_name="tta-automation"
local timeout=${1:-300} # Default timeout is 300 seconds (5 minutes)

log "INFO" "Ensuring application '$app_name' is stopped..."
local start_time=$(date +%s)
local current_time

# Initialize previous values for logs and tasks
local previous_logs=$(cf logs --recent "$app_name" 2>&1)
local previous_tasks=$(cf tasks "$app_name" 2>&1)

while true; do
current_time=$(date +%s)

# Check logs and tasks every 60 seconds
if (( (current_time - start_time) % 60 < 10 )); then
log "INFO" "Performing periodic checks for logs and tasks."
if check_logs_idle "$app_name" "$previous_logs" && check_tasks_idle "$app_name" "$previous_tasks"; then
log "INFO" "Application '$app_name' appears to be idle. Sending shutdown command."
cf stop "$app_name"
else
previous_logs=$(cf logs --recent "$app_name" 2>&1)
previous_tasks=$(cf tasks "$app_name" 2>&1)
fi
fi

if ! check_app_running; then
log "INFO" "Application '$app_name' is already stopped."
return 0 # App is stopped
fi

current_time=$(date +%s)
if (( current_time - start_time >= timeout )); then
log "ERROR" "Timeout reached while waiting for application '$app_name' to stop."
return 1 # Timeout reached
fi

log "INFO" "Application '$app_name' is running. Waiting for it to stop..."
log "INFO" "Waiting for application '$app_name' to stop..."
sleep 10
done
}
Expand Down
6 changes: 4 additions & 2 deletions automation/ci/scripts/release-lock.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
APP_NAME=$( [ "$1" == "DEV" ] && echo "tta-smarthub-dev" || ([ "$1" == "SANDBOX" ] && echo "tta-smarthub-sandbox") || echo "$1" )
BRANCH=$2
BUILD_ID=$3
JOB_NAME=${CIRCLE_JOB} # Automatically use the current CircleCI job name

# Fetch environment variables
LOCK_DATA=$(cf env "$APP_NAME" | grep -A 10 LOCK_APP | sed ':a;N;$!ba;s/\n/ /g' | grep -oP "[{][^}]+[}]")
Expand All @@ -17,10 +18,11 @@ fi
# Extract lock metadata
LOCK_BRANCH=$(echo "$LOCK_DATA" | jq -r '.branch')
LOCK_BUILD_ID=$(echo "$LOCK_DATA" | jq -r '.build_id')
LOCK_JOB_NAME=$(echo "$LOCK_DATA" | jq -r '.job_name')

# Validate ownership
if [ "$LOCK_BRANCH" != "$BRANCH" ] || [ "$LOCK_BUILD_ID" != "$BUILD_ID" ]; then
echo "Cannot release lock: the app is locked by branch $LOCK_BRANCH with build ID $LOCK_BUILD_ID."
if [ "$LOCK_BRANCH" != "$BRANCH" ] || [ "$LOCK_BUILD_ID" != "$BUILD_ID" ] || [ "$LOCK_JOB_NAME" != "$JOB_NAME" ]; then
echo "Cannot release lock: the app is locked by branch $LOCK_BRANCH with build ID $LOCK_BUILD_ID and job name $LOCK_JOB_NAME."
exit 1
fi

Expand Down
9 changes: 9 additions & 0 deletions automation/configs/processed-retention.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
instances: 1
memory: 512M
disk_quota: 64M

buildpack: "binary_buildpack"
command: "./cf/scripts/idol.sh"

bound_services:
- ttahub-db-backups
9 changes: 9 additions & 0 deletions automation/configs/production-retention.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
instances: 1
memory: 512M
disk_quota: 64M

buildpack: "binary_buildpack"
command: "./cf/scripts/idol.sh"

bound_services:
- ttahub-db-backups
Loading