Skip to content

Commit

Permalink
feat: Scale bots using spot instances (#7716)
Browse files Browse the repository at this point in the history
This PR scales the bot deployment using spot instances
  • Loading branch information
PhilWindle authored Aug 1, 2024
1 parent bbf554b commit 043e315
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 51 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/devnet-deploys.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,13 @@ env:

# Transaction Bot
TF_VAR_BOT_API_KEY: ${{ secrets.BOT_API_KEY }}
TF_VAR_BOT_PRIVATE_KEY: ${{ secrets.BOT_PRIVATE_KEY }}
TF_VAR_BOT_PRIVATE_KEY: ""
TF_VAR_BOT_NO_START: true
TF_VAR_BOT_PRIVATE_TRANSFERS_PER_TX: 0 # no private transfers
TF_VAR_BOT_PUBLIC_TRANSFERS_PER_TX: 1
TF_VAR_BOT_TX_MINED_WAIT_SECONDS: 0
TF_VAR_BOT_TX_INTERVAL_SECONDS: 60
TF_VAR_BOT_COUNT: 1

jobs:
setup:
Expand All @@ -70,6 +71,9 @@ jobs:
runner_type: builder-x86
secrets: inherit

# Set network specific variables as outputs from this job to be referenced in later jobs
# The only exception is the network api key which needs to be re-derived in every job as it is a secret
# Secrets can't be passed between jobs
set-network:
needs: setup
runs-on: ${{ github.actor }}-x86
Expand Down
163 changes: 115 additions & 48 deletions yarn-project/aztec/terraform/bot/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -69,25 +69,88 @@ resource "aws_service_discovery_service" "aztec-bot" {
}
}

# Create a fleet. Entirely from spot capacity.
data "template_file" "user_data" {
template = <<EOF
#!/bin/bash
echo ECS_CLUSTER=${data.terraform_remote_state.setup_iac.outputs.ecs_cluster_name} >> /etc/ecs/ecs.config
echo 'ECS_INSTANCE_ATTRIBUTES={"group": "${var.DEPLOY_TAG}-bot"}' >> /etc/ecs/ecs.config
EOF
}

resource "aws_launch_template" "bot_launch_template" {
name = "${var.DEPLOY_TAG}-launch-template"
image_id = "ami-0cd4858f2b923aa6b"
instance_type = "t3.2xlarge"
vpc_security_group_ids = [data.terraform_remote_state.setup_iac.outputs.security_group_private_id]

iam_instance_profile {
name = data.terraform_remote_state.setup_iac.outputs.ecs_instance_profile_name
}

key_name = data.terraform_remote_state.setup_iac.outputs.ecs_instance_key_pair_name

user_data = base64encode(data.template_file.user_data.rendered)

tag_specifications {
resource_type = "instance"
tags = {
Name = "${var.DEPLOY_TAG}-bot"
prometheus = ""
}
}
}

resource "aws_ec2_fleet" "bot_fleet" {
launch_template_config {
launch_template_specification {
launch_template_id = aws_launch_template.bot_launch_template.id
version = aws_launch_template.bot_launch_template.latest_version
}

override {
subnet_id = data.terraform_remote_state.setup_iac.outputs.subnet_az1_private_id
availability_zone = "eu-west-2a"
max_price = "0.15"
}

override {
subnet_id = data.terraform_remote_state.setup_iac.outputs.subnet_az2_private_id
availability_zone = "eu-west-2b"
max_price = "0.15"
}
}

target_capacity_specification {
default_target_capacity_type = "spot"
total_target_capacity = var.BOT_COUNT
spot_target_capacity = var.BOT_COUNT
on_demand_target_capacity = 0
}

terminate_instances = true
terminate_instances_with_expiration = true
}

locals {
api_prefix = "/${var.DEPLOY_TAG}/aztec-bot/${var.BOT_API_KEY}"
}

resource "aws_ecs_task_definition" "aztec-bot" {
family = "${var.DEPLOY_TAG}-aztec-bot"
network_mode = "awsvpc"
cpu = 16384
memory = 32768
requires_compatibilities = ["FARGATE"]
requires_compatibilities = ["EC2"]
execution_role_arn = data.terraform_remote_state.setup_iac.outputs.ecs_task_execution_role_arn
task_role_arn = data.terraform_remote_state.aztec2_iac.outputs.cloudwatch_logging_ecs_role_arn

container_definitions = jsonencode([
{
name = "${var.DEPLOY_TAG}-aztec-bot"
image = "${var.DOCKERHUB_ACCOUNT}/aztec:${var.DEPLOY_TAG}"
command = ["start", "--bot", "--pxe"]
essential = true
name = "${var.DEPLOY_TAG}-aztec-bot"
image = "${var.DOCKERHUB_ACCOUNT}/aztec:${var.DEPLOY_TAG}"
command = ["start", "--bot", "--pxe"]
essential = true
cpu = 8192
memoryReservation = 30720
portMappings = [
{
containerPort = 80
Expand Down Expand Up @@ -122,11 +185,10 @@ resource "aws_ecs_task_definition" "aztec-bot" {
resource "aws_ecs_service" "aztec-bot" {
name = "${var.DEPLOY_TAG}-aztec-bot"
cluster = data.terraform_remote_state.setup_iac.outputs.ecs_cluster_id
launch_type = "FARGATE"
desired_count = 1
launch_type = "EC2"
desired_count = var.BOT_COUNT
deployment_maximum_percent = 100
deployment_minimum_healthy_percent = 0
platform_version = "1.4.0"
force_new_deployment = true

network_configuration {
Expand All @@ -137,55 +199,60 @@ resource "aws_ecs_service" "aztec-bot" {
security_groups = [data.terraform_remote_state.setup_iac.outputs.security_group_private_id]
}

load_balancer {
target_group_arn = aws_alb_target_group.bot_http.arn
container_name = "${var.DEPLOY_TAG}-aztec-bot"
container_port = 80
}
# load_balancer {
# target_group_arn = aws_alb_target_group.bot_http.arn
# container_name = "${var.DEPLOY_TAG}-aztec-bot"
# container_port = 80
# }

service_registries {
registry_arn = aws_service_discovery_service.aztec-bot.arn
container_name = "${var.DEPLOY_TAG}-aztec-bot"
container_port = 80
}

placement_constraints {
type = "memberOf"
expression = "attribute:group == ${var.DEPLOY_TAG}-bot"
}

task_definition = aws_ecs_task_definition.aztec-bot.family
}

resource "aws_alb_target_group" "bot_http" {
name = "${var.DEPLOY_TAG}-bot-http"
port = 80
protocol = "HTTP"
target_type = "ip"
vpc_id = data.terraform_remote_state.setup_iac.outputs.vpc_id
deregistration_delay = 5

health_check {
path = "${local.api_prefix}/status"
matcher = 200
interval = 10
healthy_threshold = 2
unhealthy_threshold = 5
timeout = 5
}
# resource "aws_alb_target_group" "bot_http" {
# name = "${var.DEPLOY_TAG}-bot-http"
# port = 80
# protocol = "HTTP"
# target_type = "ip"
# vpc_id = data.terraform_remote_state.setup_iac.outputs.vpc_id
# deregistration_delay = 5

tags = {
name = "${var.DEPLOY_TAG}-bot-http"
}
}
# health_check {
# path = "${local.api_prefix}/status"
# matcher = 200
# interval = 10
# healthy_threshold = 2
# unhealthy_threshold = 5
# timeout = 5
# }

resource "aws_lb_listener_rule" "bot_api" {
listener_arn = data.terraform_remote_state.aztec2_iac.outputs.alb_listener_arn
priority = 700
# tags = {
# name = "${var.DEPLOY_TAG}-bot-http"
# }
# }

action {
type = "forward"
target_group_arn = aws_alb_target_group.bot_http.arn
}
# resource "aws_lb_listener_rule" "bot_api" {
# listener_arn = data.terraform_remote_state.aztec2_iac.outputs.alb_listener_arn
# priority = 700

condition {
path_pattern {
values = ["${local.api_prefix}*"]
}
}
}
# action {
# type = "forward"
# target_group_arn = aws_alb_target_group.bot_http.arn
# }

# condition {
# path_pattern {
# values = ["${local.api_prefix}*"]
# }
# }
# }
5 changes: 5 additions & 0 deletions yarn-project/aztec/terraform/bot/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,8 @@ variable "PROVING_ENABLED" {
type = bool
default = false
}

variable "BOT_COUNT" {
type = string
default = "1"
}
4 changes: 2 additions & 2 deletions yarn-project/aztec/terraform/pxe/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ locals {
resource "aws_ecs_task_definition" "aztec-pxe" {
family = "${var.DEPLOY_TAG}-aztec-pxe"
network_mode = "awsvpc"
cpu = 16384
memory = 32768
cpu = 2048
memory = 4096
requires_compatibilities = ["FARGATE"]
execution_role_arn = data.terraform_remote_state.setup_iac.outputs.ecs_task_execution_role_arn
task_role_arn = data.terraform_remote_state.aztec2_iac.outputs.cloudwatch_logging_ecs_role_arn
Expand Down

0 comments on commit 043e315

Please sign in to comment.