Skip to content

Commit

Permalink
chore: New cluster setup (#11547)
Browse files Browse the repository at this point in the history
This PR contains new cluster setup in terraform.
  • Loading branch information
PhilWindle authored Jan 28, 2025
1 parent dfee4b8 commit e73421d
Show file tree
Hide file tree
Showing 12 changed files with 375 additions and 315 deletions.
7 changes: 6 additions & 1 deletion .github/workflows/devnet-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ name: Deploy devnet
on:
workflow_dispatch:
inputs:
cluster:
description: The cluster to deploy to, e.g. aztec-gke-private
required: true
default: "aztec-gke-private"
namespace:
description: The namespace to deploy to, e.g. smoke
required: true
Expand Down Expand Up @@ -40,7 +44,7 @@ env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
CONTRACT_S3_BUCKET: s3://static.aztec.network
CLUSTER_NAME: aztec-gke
CLUSTER_NAME: ${{ inputs.cluster }}
REGION: us-west1-a
NAMESPACE: ${{ inputs.namespace }}
AZTEC_DOCKER_IMAGE: ${{ inputs.aztec_docker_image }}
Expand All @@ -50,6 +54,7 @@ jobs:
uses: ./.github/workflows/network-deploy.yml
with:
namespace: ${{ github.event.inputs.namespace }}
cluster: ${{ github.event.inputs.cluster }}
values_file: release-devnet.yaml
aztec_docker_image: ${{ github.event.inputs.aztec_docker_image }}
deployment_mnemonic_secret_name: ${{ github.event.inputs.deployment_mnemonic_secret_name }}
Expand Down
12 changes: 10 additions & 2 deletions .github/workflows/network-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ name: Aztec Network Deployment
on:
workflow_call:
inputs:
cluster:
description: The cluster to deploy to, e.g. aztec-gke-private
required: true
type: string
namespace:
description: The namespace to deploy to, e.g. smoke
required: true
Expand Down Expand Up @@ -50,6 +54,10 @@ on:
required: true
workflow_dispatch:
inputs:
cluster:
description: The cluster to deploy to, e.g. aztec-gke-private
required: true
type: string
namespace:
description: The namespace to deploy to, e.g. smoke
required: true
Expand Down Expand Up @@ -103,10 +111,10 @@ jobs:
DEPLOYMENT_MNEMONIC_SECRET_NAME: ${{ inputs.deployment_mnemonic_secret_name }}
DEPLOYMENT_SALT: ${{ inputs.deployment_salt }}
CHART_PATH: ./spartan/aztec-network
CLUSTER_NAME: aztec-gke
CLUSTER_NAME: ${{ inputs.cluster }}
REGION: us-west1-a
TF_STATE_BUCKET: aztec-terraform
GKE_CLUSTER_CONTEXT: gke_testnet-440309_us-west1-a_aztec-gke
GKE_CLUSTER_CONTEXT: "gke_testnet-440309_us-west1-a_${{ inputs.cluster }}"

steps:
- name: Checkout code
Expand Down
8 changes: 6 additions & 2 deletions .github/workflows/network-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ name: Aztec Network Test
on:
workflow_dispatch:
inputs:
cluster:
description: The cluster to deploy to, e.g. aztec-gke-private
required: true
default: "aztec-gke-private"
namespace:
description: The namespace to deploy to, e.g. smoke
required: true
Expand All @@ -22,10 +26,10 @@ jobs:
NAMESPACE: ${{ inputs.namespace }}
TEST: ${{ inputs.test }}
CHART_PATH: ./spartan/aztec-network
CLUSTER_NAME: aztec-gke
CLUSTER_NAME: ${{ inputs.cluster }}
REGION: us-west1-a
PROJECT_ID: testnet-440309
GKE_CLUSTER_CONTEXT: gke_testnet-440309_us-west1-a_aztec-gke
GKE_CLUSTER_CONTEXT: "gke_testnet-440309_us-west1-a_${{ inputs.cluster }}"

steps:
- name: Checkout code
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/nightly-masternet-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ jobs:
uses: ./.github/workflows/network-deploy.yml
with:
ref: master
cluster: aztec-gke-private
namespace: masternet
values_file: rc-1.yaml
aztec_docker_image: aztecprotocol/aztec@${{ needs.get-latest-commit.outputs.commit }}
Expand Down
270 changes: 270 additions & 0 deletions spartan/terraform/gke-cluster/cluster/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
# Create a GKE cluster
resource "google_container_cluster" "primary" {
name = var.cluster_name
location = var.zone

initial_node_count = 1
# Remove default node pool after cluster creation
remove_default_node_pool = true
deletion_protection = true

# Kubernetes version
min_master_version = "latest"

# Network configuration
network = "default"
subnetwork = "default"

# Master auth configuration
master_auth {
client_certificate_config {
issue_client_certificate = false
}
}
}

# Create 2 core node pool with local ssd
resource "google_container_node_pool" "aztec_nodes_2core_ssd" {
name = "${var.cluster_name}-2core-ssd"
location = var.zone
cluster = var.cluster_name

# Enable autoscaling
autoscaling {
min_node_count = 0
max_node_count = 256
}

# Node configuration
node_config {
machine_type = "n2d-standard-2"
ephemeral_storage_local_ssd_config {
local_ssd_count = 1
}

service_account = var.service_account
oauth_scopes = [
"https://www.googleapis.com/auth/cloud-platform"
]

labels = {
env = "production"

}
tags = ["aztec-gke-node", "aztec"]
}
}

# Create 4 core node pool with local ssd
resource "google_container_node_pool" "aztec_nodes_4core_ssd" {
name = "${var.cluster_name}-4core-ssd"
location = var.zone
cluster = var.cluster_name

# Enable autoscaling
autoscaling {
min_node_count = 0
max_node_count = 256
}

# Node configuration
node_config {
machine_type = "n2d-standard-4"
ephemeral_storage_local_ssd_config {
local_ssd_count = 1
}

service_account = var.service_account
oauth_scopes = [
"https://www.googleapis.com/auth/cloud-platform"
]

labels = {
env = "production"

}
tags = ["aztec-gke-node", "aztec"]
}
}

# Create node pool for simulated aztec nodes (validators, prover nodes, boot nodes)
resource "google_container_node_pool" "aztec_nodes_2core" {
name = "${var.cluster_name}-2core"
location = var.zone
cluster = var.cluster_name

# Enable autoscaling
autoscaling {
min_node_count = 0
max_node_count = 256
}

# Node configuration
node_config {
machine_type = "t2d-standard-2"

service_account = var.service_account
oauth_scopes = [
"https://www.googleapis.com/auth/cloud-platform"
]

labels = {
env = "production"
}
tags = ["aztec-gke-node", "aztec"]
}
}

# Create node pool for aztec nodes (validators, prover nodes, boot nodes)
resource "google_container_node_pool" "aztec_nodes_4core" {
name = "${var.cluster_name}-4core"
location = var.zone
cluster = var.cluster_name

# Enable autoscaling
autoscaling {
min_node_count = 0
max_node_count = 256
}

# Node configuration
node_config {
machine_type = "t2d-standard-4"

service_account = var.service_account
oauth_scopes = [
"https://www.googleapis.com/auth/cloud-platform"
]

labels = {
env = "production"
}
tags = ["aztec-gke-node", "aztec"]
}

# Management configuration
management {
auto_repair = true
auto_upgrade = true
}
}

# Create 8 core nodes. Usually for proven bots
resource "google_container_node_pool" "aztec_nodes-8core" {
name = "${var.cluster_name}-8core"
location = var.zone
cluster = var.cluster_name

# Enable autoscaling
autoscaling {
min_node_count = 0
max_node_count = 256
}

# Node configuration
node_config {
machine_type = "t2d-standard-8"

service_account = var.service_account
oauth_scopes = [
"https://www.googleapis.com/auth/cloud-platform"
]

labels = {
env = "production"
}
tags = ["aztec-gke-node", "aztec"]
}

# Management configuration
management {
auto_repair = true
auto_upgrade = true
}
}

# Create spot instance node pool with autoscaling
resource "google_container_node_pool" "spot_nodes_32core" {
name = "${var.cluster_name}-32core-spot"
location = var.zone
cluster = var.cluster_name

# Enable autoscaling
autoscaling {
min_node_count = 0
max_node_count = 1500
}

# Node configuration
node_config {
machine_type = "t2d-standard-32"
spot = true

service_account = var.service_account
oauth_scopes = [
"https://www.googleapis.com/auth/cloud-platform"
]

labels = {
env = "production"
pool = "spot"
}
tags = ["aztec-gke-node", "spot"]

# Spot instance termination handler
taint {
key = "cloud.google.com/gke-spot"
value = "true"
effect = "NO_SCHEDULE"
}
}

# Management configuration
management {
auto_repair = true
auto_upgrade = true
}
}

# Create low core count spot instance node pool with autoscaling
resource "google_container_node_pool" "spot_nodes_2core" {
name = "${var.cluster_name}-2core-spot"
location = var.zone
cluster = var.cluster_name

# Enable autoscaling
autoscaling {
min_node_count = 0
max_node_count = 1500
}

# Node configuration
node_config {
machine_type = "t2d-standard-2"
spot = true

service_account = var.service_account
oauth_scopes = [
"https://www.googleapis.com/auth/cloud-platform"
]

labels = {
env = "production"
pool = "spot"
}
tags = ["aztec-gke-node", "spot"]

# Spot instance termination handler
taint {
key = "cloud.google.com/gke-spot"
value = "true"
effect = "NO_SCHEDULE"
}
}

# Management configuration
management {
auto_repair = true
auto_upgrade = true
}
}
8 changes: 8 additions & 0 deletions spartan/terraform/gke-cluster/cluster/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
output "cluster_endpoint" {
value = google_container_cluster.primary.endpoint
}

output "kubernetes_cluster_name" {
description = "GKE Cluster Name"
value = google_container_cluster.primary.name
}
18 changes: 18 additions & 0 deletions spartan/terraform/gke-cluster/cluster/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
variable "project" {
default = "testnet-440309"
}

variable "region" {
default = "us-west1"
}

variable "zone" {
default = "us-west1-a"
}

variable "cluster_name" {
}

variable "service_account" {

}
Loading

0 comments on commit e73421d

Please sign in to comment.