diff --git a/examples/gke-a3-ultragpu/gke-a3-ultragpu-deployment.yaml b/examples/gke-a3-ultragpu/gke-a3-ultragpu-deployment.yaml index 0e475ec2d6..6821fd5fa9 100644 --- a/examples/gke-a3-ultragpu/gke-a3-ultragpu-deployment.yaml +++ b/examples/gke-a3-ultragpu/gke-a3-ultragpu-deployment.yaml @@ -28,3 +28,4 @@ vars: # extended_reservation: RESERVATION_NAME extended_reservation: RESERVATION_NAME/reservationBlocks/BLOCK_NAME static_node_count: NODE_COUNT + k8s_service_account_name: # add this diff --git a/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml b/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml index b833d61b77..c520599a62 100644 --- a/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml +++ b/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml @@ -32,6 +32,7 @@ vars: static_node_count: # add this system_node_pool_disk_size_gb: 200 a3ultra_node_pool_disk_size_gb: 100 + k8s_service_account_name: # add this deployment_groups: - group: primary @@ -128,6 +129,7 @@ deployment_groups: enable_gcsfuse_csi: true enable_private_endpoint: false # Allows access from authorized public IPs configure_workload_identity_sa: true + k8s_service_account_name: $(vars.k8s_service_account_name) master_authorized_networks: - cidr_block: $(vars.authorized_cidr) # Allows your machine to run the kubectl command. Required for multi network setup. display_name: "kubectl-access-network" @@ -222,4 +224,5 @@ deployment_groups: - nvidia-smi node_count: 2 name: run-nvidia-smi + k8s_service_account_name: $(vars.k8s_service_account_name) outputs: [instructions] diff --git a/modules/scheduler/gke-cluster/README.md b/modules/scheduler/gke-cluster/README.md index fe668c3df7..76ebe2f5cf 100644 --- a/modules/scheduler/gke-cluster/README.md +++ b/modules/scheduler/gke-cluster/README.md @@ -161,6 +161,7 @@ limitations under the License. | [enable\_private\_ipv6\_google\_access](#input\_enable\_private\_ipv6\_google\_access) | The private IPv6 google access type for the VMs in this subnet. | `bool` | `true` | no | | [enable\_private\_nodes](#input\_enable\_private\_nodes) | (Beta) Whether nodes have internal IP addresses only. | `bool` | `true` | no | | [gcp\_public\_cidrs\_access\_enabled](#input\_gcp\_public\_cidrs\_access\_enabled) | Whether the cluster master is accessible via all the Google Compute Engine Public IPs. To view this list of IP addresses look here https://cloud.google.com/compute/docs/faq#find_ip_range | `bool` | `false` | no | +| [k8s\_service\_account\_name](#input\_k8s\_service\_account\_name) | Kubernetes service account name to use with the gke cluster | `string` | `null` | no | | [labels](#input\_labels) | GCE resource labels to be applied to resources. Key-value pairs. | `map(string)` | n/a | yes | | [maintenance\_exclusions](#input\_maintenance\_exclusions) | List of maintenance exclusions. A cluster can have up to three. |
list(object({| `[]` | no | | [maintenance\_start\_time](#input\_maintenance\_start\_time) | Start time for daily maintenance operations. Specified in GMT with `HH:MM` format. | `string` | `"09:00"` | no | diff --git a/modules/scheduler/gke-cluster/main.tf b/modules/scheduler/gke-cluster/main.tf index 42b20fbd59..83b6717519 100644 --- a/modules/scheduler/gke-cluster/main.tf +++ b/modules/scheduler/gke-cluster/main.tf @@ -365,7 +365,7 @@ module "workload_identity" { version = "~> 34.0" use_existing_gcp_sa = true - name = "workload-identity-k8s-sa" + name = var.k8s_service_account_name gcp_sa_name = local.sa_email project_id = var.project_id diff --git a/modules/scheduler/gke-cluster/variables.tf b/modules/scheduler/gke-cluster/variables.tf index a181c58239..ca34162c0a 100644 --- a/modules/scheduler/gke-cluster/variables.tf +++ b/modules/scheduler/gke-cluster/variables.tf @@ -289,6 +289,12 @@ variable "configure_workload_identity_sa" { default = false } +variable "k8s_service_account_name" { + description = "Kubernetes service account name to use with the gke cluster" + type = string + default = null +} + variable "autoscaling_profile" { description = "(Beta) Optimize for utilization or availability when deciding to remove nodes. Can be BALANCED or OPTIMIZE_UTILIZATION." type = string diff --git a/tools/cloud-build/daily-tests/tests/gke-a3-ultragpu.yml b/tools/cloud-build/daily-tests/tests/gke-a3-ultragpu.yml index 97d559d271..3d9e458ae3 100644 --- a/tools/cloud-build/daily-tests/tests/gke-a3-ultragpu.yml +++ b/tools/cloud-build/daily-tests/tests/gke-a3-ultragpu.yml @@ -26,6 +26,7 @@ zone: europe-west1-b remote_node: "{{ deployment_name }}-remote-node-0" extended_reservation: hpc-exfr-2 static_node_count: 1 +k8s_service_account_name: workload-identity-k8s-sa cli_deployment_vars: region: "{{ region }}" zone: "{{ zone }}" @@ -33,6 +34,7 @@ cli_deployment_vars: extended_reservation: "{{ extended_reservation }}" authorized_cidr: "{{ build_ip.stdout }}/32" gcp_public_cidrs_access_enabled: true + k8s_service_account_name: "{{ k8s_service_account_name}}" custom_vars: project: "{{ project }}" post_deploy_tests:
name = string
start_time = string
end_time = string
exclusion_scope = string
}))