From a47a673b4a3fcac9c1588b4639a9f7bf87e16726 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Mon, 21 Oct 2019 13:04:44 +0800 Subject: [PATCH 01/69] config update --- .../k8sPaiLibrary/kubespray-vars/openpai.yaml | 241 ++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml new file mode 100644 index 0000000000..fd3b83f13b --- /dev/null +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -0,0 +1,241 @@ +############## etcd configuration ############## + +# etcd_deployment_type: docker + +# etcd_version: v3.3.10 + +## Set level of detail for etcd exported metrics, specify 'extensive' to include histogram metrics. +# etcd_metrics: basic + +## Etcd is restricted by default to 512M on systems under 4GB RAM, 512MB is not enough for much more than testing. +## Set this if your etcd nodes have less than 4GB but you want more RAM for etcd. Set to 0 for unrestricted RAM. +etcd_memory_limit: "0" + +## Etcd has a default of 2G for its space quota. If you put a value in etcd_memory_limit which is less than +## etcd_quota_backend_bytes, you may encounter out of memory terminations of the etcd cluster. Please check +## etcd documentation for more information. +## https://etcd.io/docs/v3.4.0/op-guide/configuration/ +etcd_quota_backend_bytes: "8G" + +############## nginx configuration ############## + +# Requests for load balancer app +loadbalancer_apiserver_memory_requests: 1024M +loadbalancer_apiserver_cpu_requests: 1000m + +loadbalancer_apiserver_keepalive_timeout: 15m + +## Internal loadbalancers for apiservers +# loadbalancer_apiserver_localhost: true + +## valid options are "nginx" or "haproxy" +# loadbalancer_apiserver_type: "nginx" +## applied if only external loadbalancer_apiserver is defined, otherwise ignored +# apiserver_loadbalancer_domain_name: "lb-apiserver.kubernetes.local" + +## Local loadbalancer should use this port +## And must be set port 6443 +# loadbalancer_apiserver_port: 6443 + +## If loadbalancer_apiserver_healthcheck_port variable defined, enables proxy liveness check for nginx. +# loadbalancer_apiserver_healthcheck_port: 8081 + +############## kubernetes configuration ############## + +## Change this to use another Kubernetes version, e.g. a current beta release +# kube_version: v1.15.3 + +## kubernetes image repo define +# kube_image_repo: "gcr.io/google-containers" + +## Cluster Loglevel configuration +# kube_log_level: 2 + +## Make a copy of kubeconfig on the host that runs Ansible in {{ inventory_dir }}/artifacts +kubeconfig_localhost: true +## Download kubectl onto the host that runs Ansible in {{ bin_dir }} +kubectl_localhost: true + +kubelet_status_update_frequency: 10s + +## For some things, kubelet needs to load kernel modules. For example, +## dynamic kernel services are needed for mounting persistent volumes into containers. These may not be +## loaded by preinstall kubernetes processes. For example, ceph and rbd backed volumes. Set this variable to +## true to let kubelet load kernel modules. +# kubelet_load_modules: false + +## Configure the amount of pods able to run on single node +## default is equal to application default +# kubelet_max_pods: 110 + +## Support custom flags to be passed to kubelet +kubelet_custom_flags: + - "--eviction-hard=" + - "--image-gc-high-threshold=100" + - "--image-gc-low-threshold=95" + - "--image-pull-progress-deadline=10m" + +kube_feature_gates: + - "DevicePlugins=true" + - "TaintBasedEvictions=true" + - "PodPriority=true" + +## Support custom flags to be passed to kubelet only on nodes, not masters +# kubelet_node_custom_flags: [] + +k8s_image_pull_policy: Always + +## extra runtime config +# kube_api_runtime_config: [] + +##### networking + +## Scale: 4096 nodes, 100 pods per node + +## Kubernetes internal network for services, unused block of space. +kube_service_addresses: 10.192.0.0/13 + +## internal network. When used, it will assign IP +## addresses from this range to individual pods. +## This network must be unused in your network infrastructure! +kube_pods_subnet: 10.200.0.0/13 + +## internal network node size allocation (optional). This is the size allocated +## to each node on your network. With these defaults you should have +## room for 64 nodes with 254 pods per node. +## Example: Up to 256 nodes, 100 pods per node (/16 network): +## - kube_service_addresses: 10.233.0.0/17 +## - kube_pods_subnet: 10.233.128.0/17 +## - kube_network_node_prefix: 25 +## Example: Up to 4096 nodes, 100 pods per node (/12 network): +## - kube_service_addresses: 10.192.0.0/13 +## - kube_pods_subnet: 10.200.0.0/13 +## - kube_network_node_prefix: 25 +kube_network_node_prefix: 25 + +##### api-server + +kube_kubeadm_apiserver_extra_args: + cors-allowed-origins: ".*" + storage-media-type: "application/json" + max-requests-inflight: 1500 + max-mutating-requests-inflight: 500 + +## Extra control plane host volume mounts +## Example: +## apiserver_extra_volumes: +## - name: name +## hostPath: /host/path +## mountPath: /mount/path +## readOnly: true +# apiserver_extra_volumes: {} + +## ETCD backend for k8s data +# kube_apiserver_storage_backend: etcd3 + +## change to 0.0.0.0 to enable insecure access from anywhere (not recommended) +# kube_apiserver_insecure_bind_address: 127.0.0.1 + +## By default the external API listens on all interfaces, this can be changed to +## listen on a specific address/interface. +# kube_apiserver_bind_address: 0.0.0.0 + +## A port range to reserve for services with NodePort visibility. +## Inclusive at both ends of the range. +# kube_apiserver_node_port_range: "30000-32767" + +kube_apiserver_memory_limit: 20000M +kube_apiserver_cpu_limit: 10000m +kube_apiserver_memory_requests: 1024M +kube_apiserver_cpu_requests: 1000m +kube_apiserver_request_timeout: "1m0s" + +# 1.9 and below Admission control plug-ins +kube_apiserver_admission_control: + - NamespaceLifecycle + - LimitRanger + - ServiceAccount + - DefaultStorageClass + - PersistentVolumeClaimResize + - MutatingAdmissionWebhook + - ValidatingAdmissionWebhook + - ResourceQuota + - DefaultTolerationSeconds + - DenyEscalatingExec + +## 1.10+ admission plugins +# kube_apiserver_enable_admission_plugins: [] + +## 1.10+ list of disabled admission plugins +# kube_apiserver_disable_admission_plugins: [] + +##### controller + +kube_kubeadm_controller_extra_args: + leader-elect: "true" + +# controller_manager_extra_volumes: {} + +# kube_controller_manager_bind_address: 0.0.0.0 + +kube_controller_memory_limit: 20000M +kube_controller_cpu_limit: 10000m +kube_controller_memory_requests: 1024M +kube_controller_cpu_requests: 1000m +kube_controller_node_monitor_grace_period: 40s +kube_controller_node_monitor_period: 5s +kube_controller_pod_eviction_timeout: 5m0s +kube_controller_terminated_pod_gc_threshold: 12500 + +##### scheduler + +# kube_kubeadm_scheduler_extra_args: {} + +# scheduler_extra_volumes: {} + +# kube_scheduler_bind_address: 0.0.0.0 + +kube_scheduler_memory_limit: 200000M +kube_scheduler_cpu_limit: 10000m +kube_scheduler_memory_requests: 1024M +kube_scheduler_cpu_requests: 1000m + +############## other configuration ############## + +## Optionally reserve resources for OS system daemons. +system_reserved: true +## Uncomment to override default values +system_memory_reserved: 3072M +# system_cpu_reserved: 500m +## Reservation for master hosts +# system_master_memory_reserved: 256M +# system_master_cpu_reserved: 250m + + +############## docker configuration ############## + +# docker_version: latest + +docker_daemon_graph: "/docker_data_root_test" + +docker_log_opts: "--log-opt max-size=2g --log-opt max-file=1 log-driver=json-file" + +# add --add-runtime nvidia=/usr/bin/nvidia-container-runtime for nvidia-runtime in docker_option +docker_options: >- + --add-runtime nvidia=/usr/bin/nvidia-container-runtime + {%- if docker_insecure_registries is defined %} + {{ docker_insecure_registries | map('regex_replace', '^(.*)$', '--insecure-registry=\1' ) | list | join(' ') }} + {%- endif %} + {% if docker_registry_mirrors is defined %} + {{ docker_registry_mirrors | map('regex_replace', '^(.*)$', '--registry-mirror=\1' ) | list | join(' ') }} + {%- endif %} + {%- if docker_version != "latest" and docker_version is version('17.05', '<') %} + --graph={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} + {%- else %} + --data-root={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} + {%- endif %} + {%- if ansible_architecture == "aarch64" and ansible_os_family == "RedHat" %} + --add-runtime docker-runc=/usr/libexec/docker/docker-runc-current + --default-runtime=docker-runc --exec-opt native.cgroupdriver=systemd + --userland-proxy-path=/usr/libexec/docker/docker-proxy-current --signature-verification=false + {%- endif -%} \ No newline at end of file From b2bd51707b539f3d7f6f344180d1b2240a903c32 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Mon, 21 Oct 2019 16:31:10 +0800 Subject: [PATCH 02/69] config update --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index fd3b83f13b..8263f085a2 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -70,9 +70,6 @@ kubelet_status_update_frequency: 10s ## Support custom flags to be passed to kubelet kubelet_custom_flags: - - "--eviction-hard=" - - "--image-gc-high-threshold=100" - - "--image-gc-low-threshold=95" - "--image-pull-progress-deadline=10m" kube_feature_gates: @@ -222,7 +219,7 @@ docker_log_opts: "--log-opt max-size=2g --log-opt max-file=1 log-driver=json-fil # add --add-runtime nvidia=/usr/bin/nvidia-container-runtime for nvidia-runtime in docker_option docker_options: >- - --add-runtime nvidia=/usr/bin/nvidia-container-runtime + --add-runtime nvidia=/usr/bin/nvidia-container-runtime {%- if docker_insecure_registries is defined %} {{ docker_insecure_registries | map('regex_replace', '^(.*)$', '--insecure-registry=\1' ) | list | join(' ') }} {%- endif %} From 99b45a8abc9d70f33ca6157ed98b01d5826c05be Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Mon, 21 Oct 2019 16:58:33 +0800 Subject: [PATCH 03/69] config update --- .../k8sPaiLibrary/kubespray-vars/openpai.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 8263f085a2..91ec826419 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -226,13 +226,13 @@ docker_options: >- {% if docker_registry_mirrors is defined %} {{ docker_registry_mirrors | map('regex_replace', '^(.*)$', '--registry-mirror=\1' ) | list | join(' ') }} {%- endif %} - {%- if docker_version != "latest" and docker_version is version('17.05', '<') %} + {%- if docker_version != "latest" and docker_version is version('17.05', '<') %} --graph={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} {%- else %} --data-root={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} - {%- endif %} - {%- if ansible_architecture == "aarch64" and ansible_os_family == "RedHat" %} - --add-runtime docker-runc=/usr/libexec/docker/docker-runc-current - --default-runtime=docker-runc --exec-opt native.cgroupdriver=systemd - --userland-proxy-path=/usr/libexec/docker/docker-proxy-current --signature-verification=false - {%- endif -%} \ No newline at end of file + {%- endif %} + {%- if ansible_architecture == "aarch64" and ansible_os_family == "RedHat" %} + --add-runtime docker-runc=/usr/libexec/docker/docker-runc-current + --default-runtime=docker-runc --exec-opt native.cgroupdriver=systemd + --userland-proxy-path=/usr/libexec/docker/docker-proxy-current --signature-verification=false + {%- endif -%} \ No newline at end of file From cd7375f58c198675812cda87046fb437600ea5b0 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Mon, 21 Oct 2019 16:59:34 +0800 Subject: [PATCH 04/69] config update --- .../k8sPaiLibrary/kubespray-vars/openpai.yaml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 91ec826419..e7259a444e 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -220,16 +220,16 @@ docker_log_opts: "--log-opt max-size=2g --log-opt max-file=1 log-driver=json-fil # add --add-runtime nvidia=/usr/bin/nvidia-container-runtime for nvidia-runtime in docker_option docker_options: >- --add-runtime nvidia=/usr/bin/nvidia-container-runtime - {%- if docker_insecure_registries is defined %} - {{ docker_insecure_registries | map('regex_replace', '^(.*)$', '--insecure-registry=\1' ) | list | join(' ') }} - {%- endif %} - {% if docker_registry_mirrors is defined %} - {{ docker_registry_mirrors | map('regex_replace', '^(.*)$', '--registry-mirror=\1' ) | list | join(' ') }} - {%- endif %} + {%- if docker_insecure_registries is defined %} + {{ docker_insecure_registries | map('regex_replace', '^(.*)$', '--insecure-registry=\1' ) | list | join(' ') }} + {%- endif %} + {% if docker_registry_mirrors is defined %} + {{ docker_registry_mirrors | map('regex_replace', '^(.*)$', '--registry-mirror=\1' ) | list | join(' ') }} + {%- endif %} {%- if docker_version != "latest" and docker_version is version('17.05', '<') %} - --graph={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} - {%- else %} - --data-root={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} + --graph={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} + {%- else %} + --data-root={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} {%- endif %} {%- if ansible_architecture == "aarch64" and ansible_os_family == "RedHat" %} --add-runtime docker-runc=/usr/libexec/docker/docker-runc-current From 49e7fad9ff9635959a71e7e5d26b51c856cb9ed6 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Mon, 21 Oct 2019 17:48:12 +0800 Subject: [PATCH 05/69] config update --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index e7259a444e..eeb68e9cfe 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -226,11 +226,6 @@ docker_options: >- {% if docker_registry_mirrors is defined %} {{ docker_registry_mirrors | map('regex_replace', '^(.*)$', '--registry-mirror=\1' ) | list | join(' ') }} {%- endif %} - {%- if docker_version != "latest" and docker_version is version('17.05', '<') %} - --graph={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} - {%- else %} - --data-root={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} - {%- endif %} {%- if ansible_architecture == "aarch64" and ansible_os_family == "RedHat" %} --add-runtime docker-runc=/usr/libexec/docker/docker-runc-current --default-runtime=docker-runc --exec-opt native.cgroupdriver=systemd From f53f6cfaf1ee65732f45fc2fab2dbb136b750a94 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 22 Oct 2019 11:26:05 +0800 Subject: [PATCH 06/69] config update --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index eeb68e9cfe..e7259a444e 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -226,6 +226,11 @@ docker_options: >- {% if docker_registry_mirrors is defined %} {{ docker_registry_mirrors | map('regex_replace', '^(.*)$', '--registry-mirror=\1' ) | list | join(' ') }} {%- endif %} + {%- if docker_version != "latest" and docker_version is version('17.05', '<') %} + --graph={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} + {%- else %} + --data-root={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} + {%- endif %} {%- if ansible_architecture == "aarch64" and ansible_os_family == "RedHat" %} --add-runtime docker-runc=/usr/libexec/docker/docker-runc-current --default-runtime=docker-runc --exec-opt native.cgroupdriver=systemd From 6adf3382a44b0e2af54123c22738da0c16d80596 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 22 Oct 2019 11:33:52 +0800 Subject: [PATCH 07/69] issue fix --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index e7259a444e..18630311e9 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -215,7 +215,7 @@ system_memory_reserved: 3072M docker_daemon_graph: "/docker_data_root_test" -docker_log_opts: "--log-opt max-size=2g --log-opt max-file=1 log-driver=json-file" +docker_log_opts: "--log-opt max-size=2g --log-opt max-file=1 --log-driver=json-file" # add --add-runtime nvidia=/usr/bin/nvidia-container-runtime for nvidia-runtime in docker_option docker_options: >- From c5ff8abb2aa05fe03f2c56714ecb5420219f8f3e Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 22 Oct 2019 13:29:40 +0800 Subject: [PATCH 08/69] issue fix --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 18630311e9..0fdb4701bf 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -15,7 +15,7 @@ etcd_memory_limit: "0" ## etcd_quota_backend_bytes, you may encounter out of memory terminations of the etcd cluster. Please check ## etcd documentation for more information. ## https://etcd.io/docs/v3.4.0/op-guide/configuration/ -etcd_quota_backend_bytes: "8G" +etcd_quota_backend_bytes: "8589934592" ############## nginx configuration ############## From 75dd957ba15a53f7312a84d71400e6922679bb00 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 22 Oct 2019 16:27:07 +0800 Subject: [PATCH 09/69] issue fix --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 0fdb4701bf..133814124f 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -220,6 +220,7 @@ docker_log_opts: "--log-opt max-size=2g --log-opt max-file=1 --log-driver=json-f # add --add-runtime nvidia=/usr/bin/nvidia-container-runtime for nvidia-runtime in docker_option docker_options: >- --add-runtime nvidia=/usr/bin/nvidia-container-runtime + --default-runtime nvidia {%- if docker_insecure_registries is defined %} {{ docker_insecure_registries | map('regex_replace', '^(.*)$', '--insecure-registry=\1' ) | list | join(' ') }} {%- endif %} From acb93e9bb96f0e54b985c0148528b3e9c707db02 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Fri, 25 Oct 2019 16:02:19 +0800 Subject: [PATCH 10/69] enable ip-table forward in docker --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 133814124f..4bd2e01ff5 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -213,6 +213,9 @@ system_memory_reserved: 3072M # docker_version: latest +## Used to set docker daemon iptables options to true +docker_iptables_enabled: "true" + docker_daemon_graph: "/docker_data_root_test" docker_log_opts: "--log-opt max-size=2g --log-opt max-file=1 --log-driver=json-file" From 98697ee188a0135874db6d5553d3d209856cc54e Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Mon, 28 Oct 2019 17:46:43 +0800 Subject: [PATCH 11/69] configuration --- .../k8sPaiLibrary/kubespray-vars/openpai.yaml | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 4bd2e01ff5..4e7ee39a02 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -40,6 +40,42 @@ loadbalancer_apiserver_keepalive_timeout: 15m ## If loadbalancer_apiserver_healthcheck_port variable defined, enables proxy liveness check for nginx. # loadbalancer_apiserver_healthcheck_port: 8081 + +############## DNS configuration ############## +## Kubernetes cluster name, also will be used as DNS domain +# cluster_name: cluster.local + +## Subdomains of DNS domain to be resolved via /etc/resolv.conf for hostnet pods +# ndots: 2 + +## Can be coredns, coredns_dual, manual or none +# dns_mode: coredns + +## Set manual server if using a custom cluster DNS server +## manual_dns_server: 10.x.x.x +## Enable nodelocal dns cache +enable_nodelocaldns: false +# nodelocaldns_ip: 169.254.25.10 +# nodelocaldns_health_port: 9254 + +## Enable k8s_external plugin for CoreDNS +# enable_coredns_k8s_external: false +# coredns_k8s_external_zone: k8s_external.local + +## Enable endpoint_pod_names option for kubernetes plugin +# enable_coredns_k8s_endpoint_pod_names: false + +## Can be docker_dns, host_resolvconf or none +# resolvconf_mode: docker_dns + +## Deploy netchecker app to verify DNS resolve as an HTTP service +# deploy_netchecker: false + +## Ip address of the kubernetes skydns service +# skydns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(3)|ipaddr('address') }}" +# skydns_server_secondary: "{{ kube_service_addresses|ipaddr('net')|ipaddr(4)|ipaddr('address') }}" +# dns_domain: "{{ cluster_name }}" + ############## kubernetes configuration ############## ## Change this to use another Kubernetes version, e.g. a current beta release From 4c168ca6f0dc18156c360798eddc70b2662f6fa1 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Thu, 31 Oct 2019 15:19:41 +0800 Subject: [PATCH 12/69] configuration --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 4e7ee39a02..cc5d16cd22 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -123,6 +123,13 @@ k8s_image_pull_policy: Always ##### networking +# Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) +# Can also be set to 'cloud', which lets the cloud provider setup appropriate routing +kube_network_plugin: flannel + +# Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni +kube_network_plugin_multus: false + ## Scale: 4096 nodes, 100 pods per node ## Kubernetes internal network for services, unused block of space. From 6e8b0ecad8ee52b6cdbaaa536915024e97a12ba3 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Thu, 31 Oct 2019 16:13:38 +0800 Subject: [PATCH 13/69] flannel solution --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index cc5d16cd22..dadeace00b 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -54,7 +54,7 @@ loadbalancer_apiserver_keepalive_timeout: 15m ## Set manual server if using a custom cluster DNS server ## manual_dns_server: 10.x.x.x ## Enable nodelocal dns cache -enable_nodelocaldns: false +# enable_nodelocaldns: false # nodelocaldns_ip: 169.254.25.10 # nodelocaldns_health_port: 9254 @@ -257,7 +257,7 @@ system_memory_reserved: 3072M # docker_version: latest ## Used to set docker daemon iptables options to true -docker_iptables_enabled: "true" +# docker_iptables_enabled: "true" docker_daemon_graph: "/docker_data_root_test" From e8bf5541b468972adac342f691ff157f4d6110f5 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Thu, 31 Oct 2019 16:28:31 +0800 Subject: [PATCH 14/69] flannel solution --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index dadeace00b..9a12b9fa0a 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -54,7 +54,7 @@ loadbalancer_apiserver_keepalive_timeout: 15m ## Set manual server if using a custom cluster DNS server ## manual_dns_server: 10.x.x.x ## Enable nodelocal dns cache -# enable_nodelocaldns: false +enable_nodelocaldns: false # nodelocaldns_ip: 169.254.25.10 # nodelocaldns_health_port: 9254 From 7d2a9460a9f443f7e59106c4444014dc0fa6cfc6 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Fri, 1 Nov 2019 11:43:25 +0800 Subject: [PATCH 15/69] flannel solution --- .../k8sPaiLibrary/kubespray-vars/openpai.yaml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 9a12b9fa0a..1333120f57 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -69,7 +69,7 @@ enable_nodelocaldns: false # resolvconf_mode: docker_dns ## Deploy netchecker app to verify DNS resolve as an HTTP service -# deploy_netchecker: false +deploy_netchecker: true ## Ip address of the kubernetes skydns service # skydns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(3)|ipaddr('address') }}" @@ -87,6 +87,9 @@ enable_nodelocaldns: false ## Cluster Loglevel configuration # kube_log_level: 2 +# kube_token_auth: true +# kube_basic_auth: true + ## Make a copy of kubeconfig on the host that runs Ansible in {{ inventory_dir }}/artifacts kubeconfig_localhost: true ## Download kubectl onto the host that runs Ansible in {{ bin_dir }} @@ -251,6 +254,14 @@ system_memory_reserved: 3072M # system_master_memory_reserved: 256M # system_master_cpu_reserved: 250m +############## addon configuration ############## + +# Helm deployment +# helm_enabled: true + +# Cert manager deployment +# cert_manager_enabled: true +# cert_manager_namespace: "cert-manager" ############## docker configuration ############## From 98e2032adde8816a9bc3ac53970314e8a14f4dd4 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Fri, 1 Nov 2019 11:43:37 +0800 Subject: [PATCH 16/69] flannel solution --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 1333120f57..153154816a 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -69,7 +69,7 @@ enable_nodelocaldns: false # resolvconf_mode: docker_dns ## Deploy netchecker app to verify DNS resolve as an HTTP service -deploy_netchecker: true +# deploy_netchecker: true ## Ip address of the kubernetes skydns service # skydns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(3)|ipaddr('address') }}" From dc8e80cd00c0f3fe62af4b1bc30eb1c72ecd5c0d Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Fri, 1 Nov 2019 12:20:13 +0800 Subject: [PATCH 17/69] remove unused fg --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 153154816a..e11a316ff7 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -111,10 +111,7 @@ kubelet_status_update_frequency: 10s kubelet_custom_flags: - "--image-pull-progress-deadline=10m" -kube_feature_gates: - - "DevicePlugins=true" - - "TaintBasedEvictions=true" - - "PodPriority=true" +# kube_feature_gates: [] ## Support custom flags to be passed to kubelet only on nodes, not masters # kubelet_node_custom_flags: [] From d447f7bc8a63cb9291e454695bd1c27f0fe64d47 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Fri, 1 Nov 2019 13:41:50 +0800 Subject: [PATCH 18/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index e11a316ff7..497052cdca 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -87,8 +87,8 @@ enable_nodelocaldns: false ## Cluster Loglevel configuration # kube_log_level: 2 -# kube_token_auth: true -# kube_basic_auth: true +kube_token_auth: true +kube_basic_auth: true ## Make a copy of kubeconfig on the host that runs Ansible in {{ inventory_dir }}/artifacts kubeconfig_localhost: true From dab0d7a580a3cbeef269617c9164cd8263d709c0 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Fri, 1 Nov 2019 14:47:33 +0800 Subject: [PATCH 19/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 497052cdca..47e3f48ea4 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -125,7 +125,7 @@ k8s_image_pull_policy: Always # Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) # Can also be set to 'cloud', which lets the cloud provider setup appropriate routing -kube_network_plugin: flannel +kube_network_plugin: calico # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni kube_network_plugin_multus: false From 707b759b3bbffdabe9223a597c0fff2bc65c7566 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Fri, 1 Nov 2019 15:25:45 +0800 Subject: [PATCH 20/69] auth --- .../k8sPaiLibrary/kubespray-vars/openpai.yaml | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 47e3f48ea4..3de5188b97 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -125,7 +125,7 @@ k8s_image_pull_policy: Always # Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) # Can also be set to 'cloud', which lets the cloud provider setup appropriate routing -kube_network_plugin: calico +kube_network_plugin: flannel # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni kube_network_plugin_multus: false @@ -191,17 +191,17 @@ kube_apiserver_cpu_requests: 1000m kube_apiserver_request_timeout: "1m0s" # 1.9 and below Admission control plug-ins -kube_apiserver_admission_control: - - NamespaceLifecycle - - LimitRanger - - ServiceAccount - - DefaultStorageClass - - PersistentVolumeClaimResize - - MutatingAdmissionWebhook - - ValidatingAdmissionWebhook - - ResourceQuota - - DefaultTolerationSeconds - - DenyEscalatingExec +#kube_apiserver_admission_control: +# - NamespaceLifecycle +# - LimitRanger +# - ServiceAccount +# - DefaultStorageClass +# - PersistentVolumeClaimResize +# - MutatingAdmissionWebhook +# - ValidatingAdmissionWebhook +# - ResourceQuota +# - DefaultTolerationSeconds +# - DenyEscalatingExec ## 1.10+ admission plugins # kube_apiserver_enable_admission_plugins: [] @@ -211,8 +211,7 @@ kube_apiserver_admission_control: ##### controller -kube_kubeadm_controller_extra_args: - leader-elect: "true" +# kube_kubeadm_controller_extra_args: {} # controller_manager_extra_volumes: {} From 75c35629cba30022de819d46d67110ec5c0116f8 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Fri, 1 Nov 2019 16:13:42 +0800 Subject: [PATCH 21/69] auth --- .../k8sPaiLibrary/kubespray-vars/openpai.yaml | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 3de5188b97..2fc5f0cba4 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -133,12 +133,12 @@ kube_network_plugin_multus: false ## Scale: 4096 nodes, 100 pods per node ## Kubernetes internal network for services, unused block of space. -kube_service_addresses: 10.192.0.0/13 +# kube_service_addresses: 10.192.0.0/13 ## internal network. When used, it will assign IP ## addresses from this range to individual pods. ## This network must be unused in your network infrastructure! -kube_pods_subnet: 10.200.0.0/13 +# kube_pods_subnet: 10.200.0.0/13 ## internal network node size allocation (optional). This is the size allocated ## to each node on your network. With these defaults you should have @@ -151,7 +151,7 @@ kube_pods_subnet: 10.200.0.0/13 ## - kube_service_addresses: 10.192.0.0/13 ## - kube_pods_subnet: 10.200.0.0/13 ## - kube_network_node_prefix: 25 -kube_network_node_prefix: 25 +# kube_network_node_prefix: 25 ##### api-server @@ -191,17 +191,17 @@ kube_apiserver_cpu_requests: 1000m kube_apiserver_request_timeout: "1m0s" # 1.9 and below Admission control plug-ins -#kube_apiserver_admission_control: -# - NamespaceLifecycle -# - LimitRanger -# - ServiceAccount -# - DefaultStorageClass -# - PersistentVolumeClaimResize -# - MutatingAdmissionWebhook -# - ValidatingAdmissionWebhook -# - ResourceQuota -# - DefaultTolerationSeconds -# - DenyEscalatingExec +kube_apiserver_admission_control: + - NamespaceLifecycle + - LimitRanger + - ServiceAccount + - DefaultStorageClass + - PersistentVolumeClaimResize + - MutatingAdmissionWebhook + - ValidatingAdmissionWebhook + - ResourceQuota + - DefaultTolerationSeconds + - DenyEscalatingExec ## 1.10+ admission plugins # kube_apiserver_enable_admission_plugins: [] From b86fd66dfb7e25574da0c8a34621af645a19d5b9 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Fri, 1 Nov 2019 16:51:55 +0800 Subject: [PATCH 22/69] auth --- .../k8sPaiLibrary/kubespray-vars/openpai.yaml | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 2fc5f0cba4..e13353bf05 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -95,7 +95,7 @@ kubeconfig_localhost: true ## Download kubectl onto the host that runs Ansible in {{ bin_dir }} kubectl_localhost: true -kubelet_status_update_frequency: 10s +# kubelet_status_update_frequency: 10s ## For some things, kubelet needs to load kernel modules. For example, ## dynamic kernel services are needed for mounting persistent volumes into containers. These may not be @@ -108,8 +108,8 @@ kubelet_status_update_frequency: 10s # kubelet_max_pods: 110 ## Support custom flags to be passed to kubelet -kubelet_custom_flags: - - "--image-pull-progress-deadline=10m" +# kubelet_custom_flags: +# - "--image-pull-progress-deadline=10m" # kube_feature_gates: [] @@ -128,17 +128,17 @@ k8s_image_pull_policy: Always kube_network_plugin: flannel # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni -kube_network_plugin_multus: false +# kube_network_plugin_multus: false ## Scale: 4096 nodes, 100 pods per node ## Kubernetes internal network for services, unused block of space. -# kube_service_addresses: 10.192.0.0/13 +kube_service_addresses: 10.192.0.0/13 ## internal network. When used, it will assign IP ## addresses from this range to individual pods. ## This network must be unused in your network infrastructure! -# kube_pods_subnet: 10.200.0.0/13 +kube_pods_subnet: 10.200.0.0/13 ## internal network node size allocation (optional). This is the size allocated ## to each node on your network. With these defaults you should have @@ -151,15 +151,15 @@ kube_network_plugin_multus: false ## - kube_service_addresses: 10.192.0.0/13 ## - kube_pods_subnet: 10.200.0.0/13 ## - kube_network_node_prefix: 25 -# kube_network_node_prefix: 25 +kube_network_node_prefix: 25 ##### api-server -kube_kubeadm_apiserver_extra_args: - cors-allowed-origins: ".*" - storage-media-type: "application/json" - max-requests-inflight: 1500 - max-mutating-requests-inflight: 500 +# kube_kubeadm_apiserver_extra_args: +# cors-allowed-origins: ".*" +# storage-media-type: "application/json" +# max-requests-inflight: 1500 +# max-mutating-requests-inflight: 500 ## Extra control plane host volume mounts ## Example: @@ -188,7 +188,7 @@ kube_apiserver_memory_limit: 20000M kube_apiserver_cpu_limit: 10000m kube_apiserver_memory_requests: 1024M kube_apiserver_cpu_requests: 1000m -kube_apiserver_request_timeout: "1m0s" +# kube_apiserver_request_timeout: "1m0s" # 1.9 and below Admission control plug-ins kube_apiserver_admission_control: @@ -221,9 +221,9 @@ kube_controller_memory_limit: 20000M kube_controller_cpu_limit: 10000m kube_controller_memory_requests: 1024M kube_controller_cpu_requests: 1000m -kube_controller_node_monitor_grace_period: 40s -kube_controller_node_monitor_period: 5s -kube_controller_pod_eviction_timeout: 5m0s +# kube_controller_node_monitor_grace_period: 40s +# kube_controller_node_monitor_period: 5s +# kube_controller_pod_eviction_timeout: 5m0s kube_controller_terminated_pod_gc_threshold: 12500 ##### scheduler From 83c2dab7e9330e566363cf02fd31035b7c7551fd Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Fri, 1 Nov 2019 17:27:33 +0800 Subject: [PATCH 23/69] auth --- .../k8sPaiLibrary/kubespray-vars/openpai.yaml | 112 +++++++++--------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index e13353bf05..ff79cd9b25 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -9,21 +9,21 @@ ## Etcd is restricted by default to 512M on systems under 4GB RAM, 512MB is not enough for much more than testing. ## Set this if your etcd nodes have less than 4GB but you want more RAM for etcd. Set to 0 for unrestricted RAM. -etcd_memory_limit: "0" +#etcd_memory_limit: "0" ## Etcd has a default of 2G for its space quota. If you put a value in etcd_memory_limit which is less than ## etcd_quota_backend_bytes, you may encounter out of memory terminations of the etcd cluster. Please check ## etcd documentation for more information. ## https://etcd.io/docs/v3.4.0/op-guide/configuration/ -etcd_quota_backend_bytes: "8589934592" +#etcd_quota_backend_bytes: "8589934592" ############## nginx configuration ############## # Requests for load balancer app -loadbalancer_apiserver_memory_requests: 1024M -loadbalancer_apiserver_cpu_requests: 1000m +#loadbalancer_apiserver_memory_requests: 1024M +#loadbalancer_apiserver_cpu_requests: 1000m -loadbalancer_apiserver_keepalive_timeout: 15m +#loadbalancer_apiserver_keepalive_timeout: 15m ## Internal loadbalancers for apiservers # loadbalancer_apiserver_localhost: true @@ -54,7 +54,7 @@ loadbalancer_apiserver_keepalive_timeout: 15m ## Set manual server if using a custom cluster DNS server ## manual_dns_server: 10.x.x.x ## Enable nodelocal dns cache -enable_nodelocaldns: false +#enable_nodelocaldns: false # nodelocaldns_ip: 169.254.25.10 # nodelocaldns_health_port: 9254 @@ -87,8 +87,8 @@ enable_nodelocaldns: false ## Cluster Loglevel configuration # kube_log_level: 2 -kube_token_auth: true -kube_basic_auth: true +#kube_token_auth: true +#kube_basic_auth: true ## Make a copy of kubeconfig on the host that runs Ansible in {{ inventory_dir }}/artifacts kubeconfig_localhost: true @@ -116,7 +116,7 @@ kubectl_localhost: true ## Support custom flags to be passed to kubelet only on nodes, not masters # kubelet_node_custom_flags: [] -k8s_image_pull_policy: Always +#k8s_image_pull_policy: Always ## extra runtime config # kube_api_runtime_config: [] @@ -184,24 +184,24 @@ kube_network_node_prefix: 25 ## Inclusive at both ends of the range. # kube_apiserver_node_port_range: "30000-32767" -kube_apiserver_memory_limit: 20000M -kube_apiserver_cpu_limit: 10000m -kube_apiserver_memory_requests: 1024M -kube_apiserver_cpu_requests: 1000m +#kube_apiserver_memory_limit: 20000M +#kube_apiserver_cpu_limit: 10000m +#kube_apiserver_memory_requests: 1024M +#kube_apiserver_cpu_requests: 1000m # kube_apiserver_request_timeout: "1m0s" # 1.9 and below Admission control plug-ins -kube_apiserver_admission_control: - - NamespaceLifecycle - - LimitRanger - - ServiceAccount - - DefaultStorageClass - - PersistentVolumeClaimResize - - MutatingAdmissionWebhook - - ValidatingAdmissionWebhook - - ResourceQuota - - DefaultTolerationSeconds - - DenyEscalatingExec +#kube_apiserver_admission_control: +# - NamespaceLifecycle +# - LimitRanger +# - ServiceAccount +# - DefaultStorageClass +# - PersistentVolumeClaimResize +# - MutatingAdmissionWebhook +# - ValidatingAdmissionWebhook +# - ResourceQuota +# - DefaultTolerationSeconds +# - DenyEscalatingExec ## 1.10+ admission plugins # kube_apiserver_enable_admission_plugins: [] @@ -217,14 +217,14 @@ kube_apiserver_admission_control: # kube_controller_manager_bind_address: 0.0.0.0 -kube_controller_memory_limit: 20000M -kube_controller_cpu_limit: 10000m -kube_controller_memory_requests: 1024M -kube_controller_cpu_requests: 1000m +# kube_controller_memory_limit: 20000M +# kube_controller_cpu_limit: 10000m +# kube_controller_memory_requests: 1024M +# kube_controller_cpu_requests: 1000m # kube_controller_node_monitor_grace_period: 40s # kube_controller_node_monitor_period: 5s # kube_controller_pod_eviction_timeout: 5m0s -kube_controller_terminated_pod_gc_threshold: 12500 +# kube_controller_terminated_pod_gc_threshold: 12500 ##### scheduler @@ -234,17 +234,17 @@ kube_controller_terminated_pod_gc_threshold: 12500 # kube_scheduler_bind_address: 0.0.0.0 -kube_scheduler_memory_limit: 200000M -kube_scheduler_cpu_limit: 10000m -kube_scheduler_memory_requests: 1024M -kube_scheduler_cpu_requests: 1000m +# kube_scheduler_memory_limit: 200000M +# kube_scheduler_cpu_limit: 10000m +# kube_scheduler_memory_requests: 1024M +# kube_scheduler_cpu_requests: 1000m ############## other configuration ############## ## Optionally reserve resources for OS system daemons. -system_reserved: true +# system_reserved: true ## Uncomment to override default values -system_memory_reserved: 3072M +# system_memory_reserved: 3072M # system_cpu_reserved: 500m ## Reservation for master hosts # system_master_memory_reserved: 256M @@ -266,27 +266,27 @@ system_memory_reserved: 3072M ## Used to set docker daemon iptables options to true # docker_iptables_enabled: "true" -docker_daemon_graph: "/docker_data_root_test" +#docker_daemon_graph: "/docker_data_root_test" -docker_log_opts: "--log-opt max-size=2g --log-opt max-file=1 --log-driver=json-file" +#docker_log_opts: "--log-opt max-size=2g --log-opt max-file=1 --log-driver=json-file" # add --add-runtime nvidia=/usr/bin/nvidia-container-runtime for nvidia-runtime in docker_option -docker_options: >- - --add-runtime nvidia=/usr/bin/nvidia-container-runtime - --default-runtime nvidia - {%- if docker_insecure_registries is defined %} - {{ docker_insecure_registries | map('regex_replace', '^(.*)$', '--insecure-registry=\1' ) | list | join(' ') }} - {%- endif %} - {% if docker_registry_mirrors is defined %} - {{ docker_registry_mirrors | map('regex_replace', '^(.*)$', '--registry-mirror=\1' ) | list | join(' ') }} - {%- endif %} - {%- if docker_version != "latest" and docker_version is version('17.05', '<') %} - --graph={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} - {%- else %} - --data-root={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} - {%- endif %} - {%- if ansible_architecture == "aarch64" and ansible_os_family == "RedHat" %} - --add-runtime docker-runc=/usr/libexec/docker/docker-runc-current - --default-runtime=docker-runc --exec-opt native.cgroupdriver=systemd - --userland-proxy-path=/usr/libexec/docker/docker-proxy-current --signature-verification=false - {%- endif -%} \ No newline at end of file +#docker_options: >- +# --add-runtime nvidia=/usr/bin/nvidia-container-runtime +# --default-runtime nvidia +# {%- if docker_insecure_registries is defined %} +# {{ docker_insecure_registries | map('regex_replace', '^(.*)$', '--insecure-registry=\1' ) | list | join(' ') }} +# {%- endif %} +# {% if docker_registry_mirrors is defined %} +# {{ docker_registry_mirrors | map('regex_replace', '^(.*)$', '--registry-mirror=\1' ) | list | join(' ') }} +# {%- endif %} +# {%- if docker_version != "latest" and docker_version is version('17.05', '<') %} +# --graph={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} +# {%- else %} +# --data-root={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} +# {%- endif %} +# {%- if ansible_architecture == "aarch64" and ansible_os_family == "RedHat" %} +# --add-runtime docker-runc=/usr/libexec/docker/docker-runc-current +# --default-runtime=docker-runc --exec-opt native.cgroupdriver=systemd +# --userland-proxy-path=/usr/libexec/docker/docker-proxy-current --signature-verification=false +# {%- endif -%} \ No newline at end of file From 5ef22e8490d405a172484216d92c978bbb08b8f7 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Mon, 4 Nov 2019 09:48:12 +0800 Subject: [PATCH 24/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index ff79cd9b25..80844644fe 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -133,12 +133,12 @@ kube_network_plugin: flannel ## Scale: 4096 nodes, 100 pods per node ## Kubernetes internal network for services, unused block of space. -kube_service_addresses: 10.192.0.0/13 +# kube_service_addresses: 10.192.0.0/13 ## internal network. When used, it will assign IP ## addresses from this range to individual pods. ## This network must be unused in your network infrastructure! -kube_pods_subnet: 10.200.0.0/13 +# kube_pods_subnet: 10.200.0.0/13 ## internal network node size allocation (optional). This is the size allocated ## to each node on your network. With these defaults you should have @@ -151,7 +151,7 @@ kube_pods_subnet: 10.200.0.0/13 ## - kube_service_addresses: 10.192.0.0/13 ## - kube_pods_subnet: 10.200.0.0/13 ## - kube_network_node_prefix: 25 -kube_network_node_prefix: 25 +# kube_network_node_prefix: 25 ##### api-server From 30839dfc340fcbca49976989ca2e67e058186892 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Mon, 4 Nov 2019 10:18:41 +0800 Subject: [PATCH 25/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 80844644fe..f97726de78 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -91,9 +91,9 @@ #kube_basic_auth: true ## Make a copy of kubeconfig on the host that runs Ansible in {{ inventory_dir }}/artifacts -kubeconfig_localhost: true +# kubeconfig_localhost: true ## Download kubectl onto the host that runs Ansible in {{ bin_dir }} -kubectl_localhost: true +# kubectl_localhost: true # kubelet_status_update_frequency: 10s @@ -125,7 +125,7 @@ kubectl_localhost: true # Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) # Can also be set to 'cloud', which lets the cloud provider setup appropriate routing -kube_network_plugin: flannel +# kube_network_plugin: flannel # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni # kube_network_plugin_multus: false From 1de5992d0dda3e0d7c319a53a27f34060dbec35d Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Mon, 4 Nov 2019 10:25:26 +0800 Subject: [PATCH 26/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index f97726de78..dbae6facd6 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -91,9 +91,9 @@ #kube_basic_auth: true ## Make a copy of kubeconfig on the host that runs Ansible in {{ inventory_dir }}/artifacts -# kubeconfig_localhost: true +kubeconfig_localhost: true ## Download kubectl onto the host that runs Ansible in {{ bin_dir }} -# kubectl_localhost: true +kubectl_localhost: true # kubelet_status_update_frequency: 10s From 4f5e275d3ed88826415d1af7d8e208475eec30a4 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Mon, 4 Nov 2019 11:01:40 +0800 Subject: [PATCH 27/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index dbae6facd6..80844644fe 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -125,7 +125,7 @@ kubectl_localhost: true # Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) # Can also be set to 'cloud', which lets the cloud provider setup appropriate routing -# kube_network_plugin: flannel +kube_network_plugin: flannel # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni # kube_network_plugin_multus: false From e6160bf83af41dc64d172e624847a4a94aee40d8 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Mon, 4 Nov 2019 15:01:20 +0800 Subject: [PATCH 28/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 80844644fe..dbae6facd6 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -125,7 +125,7 @@ kubectl_localhost: true # Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) # Can also be set to 'cloud', which lets the cloud provider setup appropriate routing -kube_network_plugin: flannel +# kube_network_plugin: flannel # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni # kube_network_plugin_multus: false From e49a9b90ec083a7a4a12c2f1d84189157eaa105e Mon Sep 17 00:00:00 2001 From: ydye Date: Mon, 4 Nov 2019 15:16:19 +0800 Subject: [PATCH 29/69] Merge branch 'master' into yuye/conflict-solve # Conflicts: # src/webportal/src/app/home/home/virtual-cluster-list.jsx --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index dbae6facd6..80844644fe 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -125,7 +125,7 @@ kubectl_localhost: true # Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) # Can also be set to 'cloud', which lets the cloud provider setup appropriate routing -# kube_network_plugin: flannel +kube_network_plugin: flannel # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni # kube_network_plugin_multus: false From 833536b95d1f8477705d576c7deebc0940a3778a Mon Sep 17 00:00:00 2001 From: ydye Date: Mon, 4 Nov 2019 16:30:38 +0800 Subject: [PATCH 30/69] Merge branch 'master' into yuye/conflict-solve # Conflicts: # src/webportal/src/app/home/home/virtual-cluster-list.jsx --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 80844644fe..dbae6facd6 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -125,7 +125,7 @@ kubectl_localhost: true # Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) # Can also be set to 'cloud', which lets the cloud provider setup appropriate routing -kube_network_plugin: flannel +# kube_network_plugin: flannel # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni # kube_network_plugin_multus: false From f4e13f94b05d6aa17b460ef8d93c53b363385bfa Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Mon, 4 Nov 2019 18:00:26 +0800 Subject: [PATCH 31/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index dbae6facd6..80844644fe 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -125,7 +125,7 @@ kubectl_localhost: true # Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) # Can also be set to 'cloud', which lets the cloud provider setup appropriate routing -# kube_network_plugin: flannel +kube_network_plugin: flannel # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni # kube_network_plugin_multus: false From e2b572fdaa0a519f5bdeee9d3b7fbf8004bb3827 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Mon, 4 Nov 2019 18:02:07 +0800 Subject: [PATCH 32/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 80844644fe..7a031ce1f6 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -127,6 +127,7 @@ kubectl_localhost: true # Can also be set to 'cloud', which lets the cloud provider setup appropriate routing kube_network_plugin: flannel + # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni # kube_network_plugin_multus: false From 1fd941d6df5359d6108de34b397ed7eb3cc39409 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 5 Nov 2019 10:23:08 +0800 Subject: [PATCH 33/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 7a031ce1f6..d273a639fe 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -125,7 +125,7 @@ kubectl_localhost: true # Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) # Can also be set to 'cloud', which lets the cloud provider setup appropriate routing -kube_network_plugin: flannel +# kube_network_plugin: flannel # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni From 494abbebe820d02149ba6188f1f756ffccb65d9b Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 5 Nov 2019 11:14:21 +0800 Subject: [PATCH 34/69] auth --- .../k8sPaiLibrary/kubespray-vars/openpai.yaml | 81 ++++++++++++++++--- 1 file changed, 71 insertions(+), 10 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index d273a639fe..52cc8c9eb2 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -9,13 +9,13 @@ ## Etcd is restricted by default to 512M on systems under 4GB RAM, 512MB is not enough for much more than testing. ## Set this if your etcd nodes have less than 4GB but you want more RAM for etcd. Set to 0 for unrestricted RAM. -#etcd_memory_limit: "0" +etcd_memory_limit: "0" ## Etcd has a default of 2G for its space quota. If you put a value in etcd_memory_limit which is less than ## etcd_quota_backend_bytes, you may encounter out of memory terminations of the etcd cluster. Please check ## etcd documentation for more information. ## https://etcd.io/docs/v3.4.0/op-guide/configuration/ -#etcd_quota_backend_bytes: "8589934592" +etcd_quota_backend_bytes: "8589934592" ############## nginx configuration ############## @@ -29,7 +29,7 @@ # loadbalancer_apiserver_localhost: true ## valid options are "nginx" or "haproxy" -# loadbalancer_apiserver_type: "nginx" +loadbalancer_apiserver_type: "haproxy" ## applied if only external loadbalancer_apiserver is defined, otherwise ignored # apiserver_loadbalancer_domain_name: "lb-apiserver.kubernetes.local" @@ -68,14 +68,50 @@ ## Can be docker_dns, host_resolvconf or none # resolvconf_mode: docker_dns -## Deploy netchecker app to verify DNS resolve as an HTTP service -# deploy_netchecker: true - ## Ip address of the kubernetes skydns service # skydns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(3)|ipaddr('address') }}" # skydns_server_secondary: "{{ kube_service_addresses|ipaddr('net')|ipaddr(4)|ipaddr('address') }}" # dns_domain: "{{ cluster_name }}" +# Limits for coredns +# dns_memory_limit: 170Mi +# dns_cpu_requests: 100m +# dns_memory_requests: 70Mi +dns_min_replicas: 1 +# dns_nodes_per_replica: 16 +# dns_cores_per_replica: 256 +# dns_prevent_single_point_failure: "{{ 'true' if dns_min_replicas|int > 1 else 'false' }}" +# coredns_ordinal_suffix: "" + +# nodelocaldns +# nodelocaldns_cpu_requests: 100m +# nodelocaldns_memory_limit: 170Mi +# nodelocaldnsdns_memory_requests: 70Mi + +# Netchecker +## Deploy netchecker app to verify DNS resolve as an HTTP service +deploy_netchecker: true +# netchecker_port: 31081 +# agent_report_interval: 15 +# netcheck_namespace: default + +# Limits for netchecker apps +# netchecker_agent_cpu_limit: 30m +# netchecker_agent_memory_limit: 100M +# netchecker_agent_cpu_requests: 15m +# netchecker_agent_memory_requests: 64M +# netchecker_server_cpu_limit: 100m +# netchecker_server_memory_limit: 256M +# netchecker_server_cpu_requests: 50m +# netchecker_server_memory_requests: 64M + +# SecurityContext when PodSecurityPolicy is enabled +# netchecker_agent_user: 1000 +# netchecker_server_user: 1000 +# netchecker_agent_group: 1000 +# netchecker_server_group: 1000 + + ############## kubernetes configuration ############## ## Change this to use another Kubernetes version, e.g. a current beta release @@ -125,7 +161,7 @@ kubectl_localhost: true # Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) # Can also be set to 'cloud', which lets the cloud provider setup appropriate routing -# kube_network_plugin: flannel +kube_network_plugin: flannel # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni @@ -134,12 +170,12 @@ kubectl_localhost: true ## Scale: 4096 nodes, 100 pods per node ## Kubernetes internal network for services, unused block of space. -# kube_service_addresses: 10.192.0.0/13 +kube_service_addresses: 10.192.0.0/13 ## internal network. When used, it will assign IP ## addresses from this range to individual pods. ## This network must be unused in your network infrastructure! -# kube_pods_subnet: 10.200.0.0/13 +kube_pods_subnet: 10.200.0.0/13 ## internal network node size allocation (optional). This is the size allocated ## to each node on your network. With these defaults you should have @@ -152,7 +188,7 @@ kubectl_localhost: true ## - kube_service_addresses: 10.192.0.0/13 ## - kube_pods_subnet: 10.200.0.0/13 ## - kube_network_node_prefix: 25 -# kube_network_node_prefix: 25 +kube_network_node_prefix: 25 ##### api-server @@ -240,6 +276,31 @@ kubectl_localhost: true # kube_scheduler_memory_requests: 1024M # kube_scheduler_cpu_requests: 1000m + +##### dashboard + +dashboard_enabled: false +# dashboard_replicas: 1 + +# Limits for dashboard +# dashboard_cpu_limit: 100m +# dashboard_memory_limit: 256M +# dashboard_cpu_requests: 50m +# dashboard_memory_requests: 64M + +# Set dashboard_use_custom_certs to true if overriding dashboard_certs_secret_name with a secret that +# contains dashboard_tls_key_file and dashboard_tls_cert_file instead of using the initContainer provisioned certs +# dashboard_use_custom_certs: false +# dashboard_certs_secret_name: kubernetes-dashboard-certs +# dashboard_tls_key_file: dashboard.key +# dashboard_tls_cert_file: dashboard.crt +# dashboard_master_toleration: true + +# Override dashboard default settings +# dashboard_token_ttl: 900 +# dashboard_skip_login: false + + ############## other configuration ############## ## Optionally reserve resources for OS system daemons. From 2a8c633fde692bd4cc9044613d7ccff1a22d4ca2 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Wed, 6 Nov 2019 11:14:16 +0800 Subject: [PATCH 35/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 52cc8c9eb2..aa52140fab 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -326,7 +326,7 @@ dashboard_enabled: false # docker_version: latest ## Used to set docker daemon iptables options to true -# docker_iptables_enabled: "true" +docker_iptables_enabled: true #docker_daemon_graph: "/docker_data_root_test" From 281d679bf0fae2c8a4bc9e6dc6021788bc62fdec Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Wed, 6 Nov 2019 13:22:07 +0800 Subject: [PATCH 36/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index aa52140fab..f040d70f43 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -318,7 +318,7 @@ dashboard_enabled: false # helm_enabled: true # Cert manager deployment -# cert_manager_enabled: true +cert_manager_enabled: true # cert_manager_namespace: "cert-manager" ############## docker configuration ############## From 1de139465aa35cf0c60972b9afb20f74d946c117 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Wed, 6 Nov 2019 16:22:57 +0800 Subject: [PATCH 37/69] auth --- .../k8sPaiLibrary/kubespray-vars/openpai.yaml | 96 +++++++++---------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index f040d70f43..1575013af8 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -20,16 +20,16 @@ etcd_quota_backend_bytes: "8589934592" ############## nginx configuration ############## # Requests for load balancer app -#loadbalancer_apiserver_memory_requests: 1024M -#loadbalancer_apiserver_cpu_requests: 1000m +loadbalancer_apiserver_memory_requests: 500M +loadbalancer_apiserver_cpu_requests: 500m -#loadbalancer_apiserver_keepalive_timeout: 15m +loadbalancer_apiserver_keepalive_timeout: 15m ## Internal loadbalancers for apiservers # loadbalancer_apiserver_localhost: true ## valid options are "nginx" or "haproxy" -loadbalancer_apiserver_type: "haproxy" +loadbalancer_apiserver_type: "nginx" ## applied if only external loadbalancer_apiserver is defined, otherwise ignored # apiserver_loadbalancer_domain_name: "lb-apiserver.kubernetes.local" @@ -74,9 +74,9 @@ loadbalancer_apiserver_type: "haproxy" # dns_domain: "{{ cluster_name }}" # Limits for coredns -# dns_memory_limit: 170Mi -# dns_cpu_requests: 100m -# dns_memory_requests: 70Mi +dns_memory_limit: 1000Mi +dns_cpu_requests: 1000m +dns_memory_requests: 500Mi dns_min_replicas: 1 # dns_nodes_per_replica: 16 # dns_cores_per_replica: 256 @@ -144,15 +144,15 @@ kubectl_localhost: true # kubelet_max_pods: 110 ## Support custom flags to be passed to kubelet -# kubelet_custom_flags: -# - "--image-pull-progress-deadline=10m" +kubelet_custom_flags: + - "--image-pull-progress-deadline=10m" # kube_feature_gates: [] ## Support custom flags to be passed to kubelet only on nodes, not masters # kubelet_node_custom_flags: [] -#k8s_image_pull_policy: Always +k8s_image_pull_policy: Always ## extra runtime config # kube_api_runtime_config: [] @@ -192,11 +192,11 @@ kube_network_node_prefix: 25 ##### api-server -# kube_kubeadm_apiserver_extra_args: -# cors-allowed-origins: ".*" -# storage-media-type: "application/json" -# max-requests-inflight: 1500 -# max-mutating-requests-inflight: 500 +kube_kubeadm_apiserver_extra_args: + cors-allowed-origins: ".*" + storage-media-type: "application/json" + max-requests-inflight: 1500 + max-mutating-requests-inflight: 500 ## Extra control plane host volume mounts ## Example: @@ -221,10 +221,10 @@ kube_network_node_prefix: 25 ## Inclusive at both ends of the range. # kube_apiserver_node_port_range: "30000-32767" -#kube_apiserver_memory_limit: 20000M -#kube_apiserver_cpu_limit: 10000m -#kube_apiserver_memory_requests: 1024M -#kube_apiserver_cpu_requests: 1000m +kube_apiserver_memory_limit: 20000M +kube_apiserver_cpu_limit: 10000m +kube_apiserver_memory_requests: 1024M +kube_apiserver_cpu_requests: 1000m # kube_apiserver_request_timeout: "1m0s" # 1.9 and below Admission control plug-ins @@ -254,10 +254,10 @@ kube_network_node_prefix: 25 # kube_controller_manager_bind_address: 0.0.0.0 -# kube_controller_memory_limit: 20000M -# kube_controller_cpu_limit: 10000m -# kube_controller_memory_requests: 1024M -# kube_controller_cpu_requests: 1000m +kube_controller_memory_limit: 20000M +kube_controller_cpu_limit: 10000m +kube_controller_memory_requests: 1024M +kube_controller_cpu_requests: 1000m # kube_controller_node_monitor_grace_period: 40s # kube_controller_node_monitor_period: 5s # kube_controller_pod_eviction_timeout: 5m0s @@ -271,10 +271,10 @@ kube_network_node_prefix: 25 # kube_scheduler_bind_address: 0.0.0.0 -# kube_scheduler_memory_limit: 200000M -# kube_scheduler_cpu_limit: 10000m -# kube_scheduler_memory_requests: 1024M -# kube_scheduler_cpu_requests: 1000m +kube_scheduler_memory_limit: 200000M +kube_scheduler_cpu_limit: 10000m +kube_scheduler_memory_requests: 1024M +kube_scheduler_cpu_requests: 1000m ##### dashboard @@ -328,27 +328,27 @@ cert_manager_enabled: true ## Used to set docker daemon iptables options to true docker_iptables_enabled: true -#docker_daemon_graph: "/docker_data_root_test" +docker_daemon_graph: "/mnt" -#docker_log_opts: "--log-opt max-size=2g --log-opt max-file=1 --log-driver=json-file" +docker_log_opts: "--log-opt max-size=2g --log-opt max-file=1 --log-driver=json-file" # add --add-runtime nvidia=/usr/bin/nvidia-container-runtime for nvidia-runtime in docker_option -#docker_options: >- -# --add-runtime nvidia=/usr/bin/nvidia-container-runtime -# --default-runtime nvidia -# {%- if docker_insecure_registries is defined %} -# {{ docker_insecure_registries | map('regex_replace', '^(.*)$', '--insecure-registry=\1' ) | list | join(' ') }} -# {%- endif %} -# {% if docker_registry_mirrors is defined %} -# {{ docker_registry_mirrors | map('regex_replace', '^(.*)$', '--registry-mirror=\1' ) | list | join(' ') }} -# {%- endif %} -# {%- if docker_version != "latest" and docker_version is version('17.05', '<') %} -# --graph={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} -# {%- else %} -# --data-root={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} -# {%- endif %} -# {%- if ansible_architecture == "aarch64" and ansible_os_family == "RedHat" %} -# --add-runtime docker-runc=/usr/libexec/docker/docker-runc-current -# --default-runtime=docker-runc --exec-opt native.cgroupdriver=systemd -# --userland-proxy-path=/usr/libexec/docker/docker-proxy-current --signature-verification=false -# {%- endif -%} \ No newline at end of file +docker_options: >- + --add-runtime nvidia=/usr/bin/nvidia-container-runtime + --default-runtime nvidia + {%- if docker_insecure_registries is defined %} + {{ docker_insecure_registries | map('regex_replace', '^(.*)$', '--insecure-registry=\1' ) | list | join(' ') }} + {%- endif %} + {% if docker_registry_mirrors is defined %} + {{ docker_registry_mirrors | map('regex_replace', '^(.*)$', '--registry-mirror=\1' ) | list | join(' ') }} + {%- endif %} + {%- if docker_version != "latest" and docker_version is version('17.05', '<') %} + --graph={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} + {%- else %} + --data-root={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} + {%- endif %} + {%- if ansible_architecture == "aarch64" and ansible_os_family == "RedHat" %} + --add-runtime docker-runc=/usr/libexec/docker/docker-runc-current + --default-runtime=docker-runc --exec-opt native.cgroupdriver=systemd + --userland-proxy-path=/usr/libexec/docker/docker-proxy-current --signature-verification=false + {%- endif -%} \ No newline at end of file From 4692d335f2411b3f70540c549ae935f48d94919e Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Thu, 7 Nov 2019 11:02:46 +0800 Subject: [PATCH 38/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 1575013af8..3fd27d3b44 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -115,7 +115,7 @@ deploy_netchecker: true ############## kubernetes configuration ############## ## Change this to use another Kubernetes version, e.g. a current beta release -# kube_version: v1.15.3 +kube_version: v1.14.8 ## kubernetes image repo define # kube_image_repo: "gcr.io/google-containers" From 6badb6e86c7399e656c31a8b6c8c2290389cb904 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Thu, 7 Nov 2019 11:23:27 +0800 Subject: [PATCH 39/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 3fd27d3b44..84daf09018 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -115,7 +115,7 @@ deploy_netchecker: true ############## kubernetes configuration ############## ## Change this to use another Kubernetes version, e.g. a current beta release -kube_version: v1.14.8 +kube_version: v1.14.5 ## kubernetes image repo define # kube_image_repo: "gcr.io/google-containers" From 672340e798b6f8d8e0656a5d91eaa4cf19e76ef0 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Thu, 7 Nov 2019 12:06:21 +0800 Subject: [PATCH 40/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 84daf09018..3be8dc7b2e 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -161,7 +161,7 @@ k8s_image_pull_policy: Always # Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) # Can also be set to 'cloud', which lets the cloud provider setup appropriate routing -kube_network_plugin: flannel +kube_network_plugin: calico # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni From d9c1a5a09324c796b3266494b196db6d50f75f58 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Thu, 7 Nov 2019 12:09:19 +0800 Subject: [PATCH 41/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 3be8dc7b2e..72aa7fe329 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -115,7 +115,7 @@ deploy_netchecker: true ############## kubernetes configuration ############## ## Change this to use another Kubernetes version, e.g. a current beta release -kube_version: v1.14.5 +# kube_version: v1.14.5 ## kubernetes image repo define # kube_image_repo: "gcr.io/google-containers" From 4d00ceb6f788bdf862bc907ead7c260e78e730c1 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Thu, 7 Nov 2019 12:13:28 +0800 Subject: [PATCH 42/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 72aa7fe329..6bf192351c 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -115,7 +115,7 @@ deploy_netchecker: true ############## kubernetes configuration ############## ## Change this to use another Kubernetes version, e.g. a current beta release -# kube_version: v1.14.5 +kube_version: v1.14.1 ## kubernetes image repo define # kube_image_repo: "gcr.io/google-containers" From 620238b82d7b45314be658f52c8ff236cf806938 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Thu, 7 Nov 2019 12:58:15 +0800 Subject: [PATCH 43/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 6bf192351c..f98ea06d6d 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -161,7 +161,7 @@ k8s_image_pull_policy: Always # Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) # Can also be set to 'cloud', which lets the cloud provider setup appropriate routing -kube_network_plugin: calico +kube_network_plugin: flannel # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni From 7232d43df3a5f63c846b492c535b532c21256a73 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Thu, 7 Nov 2019 13:46:18 +0800 Subject: [PATCH 44/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index f98ea06d6d..12487fa8f9 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -115,7 +115,7 @@ deploy_netchecker: true ############## kubernetes configuration ############## ## Change this to use another Kubernetes version, e.g. a current beta release -kube_version: v1.14.1 +# kube_version: v1.14.1 ## kubernetes image repo define # kube_image_repo: "gcr.io/google-containers" @@ -161,7 +161,7 @@ k8s_image_pull_policy: Always # Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) # Can also be set to 'cloud', which lets the cloud provider setup appropriate routing -kube_network_plugin: flannel +kube_network_plugin: calico # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni From 9f5ea8ba4c932a82aa4b57f5817032572e452b44 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Thu, 7 Nov 2019 14:50:57 +0800 Subject: [PATCH 45/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 12487fa8f9..e07fe73fd6 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -326,7 +326,7 @@ cert_manager_enabled: true # docker_version: latest ## Used to set docker daemon iptables options to true -docker_iptables_enabled: true +docker_iptables_enabled: false docker_daemon_graph: "/mnt" From 4d1f62f542eb0d0a84101ce97e647173f016a335 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Thu, 7 Nov 2019 14:52:23 +0800 Subject: [PATCH 46/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index e07fe73fd6..cd4465267d 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -54,7 +54,7 @@ loadbalancer_apiserver_type: "nginx" ## Set manual server if using a custom cluster DNS server ## manual_dns_server: 10.x.x.x ## Enable nodelocal dns cache -#enable_nodelocaldns: false +enable_nodelocaldns: false # nodelocaldns_ip: 169.254.25.10 # nodelocaldns_health_port: 9254 From 295263e863cc751b5519b05dd71959771e9b9c0e Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Thu, 7 Nov 2019 15:24:07 +0800 Subject: [PATCH 47/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index cd4465267d..a99c8c18f8 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -161,7 +161,7 @@ k8s_image_pull_policy: Always # Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) # Can also be set to 'cloud', which lets the cloud provider setup appropriate routing -kube_network_plugin: calico +kube_network_plugin: canal # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni @@ -318,7 +318,7 @@ dashboard_enabled: false # helm_enabled: true # Cert manager deployment -cert_manager_enabled: true +# cert_manager_enabled: true # cert_manager_namespace: "cert-manager" ############## docker configuration ############## From 7c1772e61993cb548804f97e7049f38fbda2b3f7 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Thu, 7 Nov 2019 15:27:21 +0800 Subject: [PATCH 48/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index a99c8c18f8..9e52bd5bbc 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -161,7 +161,7 @@ k8s_image_pull_policy: Always # Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) # Can also be set to 'cloud', which lets the cloud provider setup appropriate routing -kube_network_plugin: canal +kube_network_plugin: cilium # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni From 0a3196774f20cc4d71482ffe1ab899b2115c38cc Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Thu, 7 Nov 2019 15:50:28 +0800 Subject: [PATCH 49/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 9e52bd5bbc..cd4465267d 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -161,7 +161,7 @@ k8s_image_pull_policy: Always # Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) # Can also be set to 'cloud', which lets the cloud provider setup appropriate routing -kube_network_plugin: cilium +kube_network_plugin: calico # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni @@ -318,7 +318,7 @@ dashboard_enabled: false # helm_enabled: true # Cert manager deployment -# cert_manager_enabled: true +cert_manager_enabled: true # cert_manager_namespace: "cert-manager" ############## docker configuration ############## From 5f281d1d7fcef9595b969d53cdb428946f1efb54 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Thu, 7 Nov 2019 16:48:17 +0800 Subject: [PATCH 50/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index cd4465267d..017cabd456 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -318,7 +318,7 @@ dashboard_enabled: false # helm_enabled: true # Cert manager deployment -cert_manager_enabled: true +# cert_manager_enabled: true # cert_manager_namespace: "cert-manager" ############## docker configuration ############## From d017752dee13421e61111b6a8b95acdc2d57c3e0 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Fri, 8 Nov 2019 12:10:04 +0800 Subject: [PATCH 51/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index 017cabd456..d82a7dbfb3 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -161,7 +161,7 @@ k8s_image_pull_policy: Always # Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) # Can also be set to 'cloud', which lets the cloud provider setup appropriate routing -kube_network_plugin: calico +kube_network_plugin: weave # Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni @@ -351,4 +351,4 @@ docker_options: >- --add-runtime docker-runc=/usr/libexec/docker/docker-runc-current --default-runtime=docker-runc --exec-opt native.cgroupdriver=systemd --userland-proxy-path=/usr/libexec/docker/docker-proxy-current --signature-verification=false - {%- endif -%} \ No newline at end of file + {%- endif -%}l \ No newline at end of file From 2ff565ed02b5b076351ae3e8116127a07fe01a96 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Fri, 8 Nov 2019 15:54:28 +0800 Subject: [PATCH 52/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index d82a7dbfb3..b07aa8432c 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -351,4 +351,4 @@ docker_options: >- --add-runtime docker-runc=/usr/libexec/docker/docker-runc-current --default-runtime=docker-runc --exec-opt native.cgroupdriver=systemd --userland-proxy-path=/usr/libexec/docker/docker-proxy-current --signature-verification=false - {%- endif -%}l \ No newline at end of file + {%- endif -%} \ No newline at end of file From d5d1543e43f69e3792ebc93878739d0fec49fcb7 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Fri, 8 Nov 2019 15:57:40 +0800 Subject: [PATCH 53/69] auth --- deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml index b07aa8432c..1d5678c7f3 100644 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml @@ -115,7 +115,7 @@ deploy_netchecker: true ############## kubernetes configuration ############## ## Change this to use another Kubernetes version, e.g. a current beta release -# kube_version: v1.14.1 +# kube_version: v1.15.3 ## kubernetes image repo define # kube_image_repo: "gcr.io/google-containers" From dab886ae5448cdab09d1e79da3673afd46f8a494 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Fri, 8 Nov 2019 16:33:48 +0800 Subject: [PATCH 54/69] Remove unnecessary crb --- .../k8sPaiLibrary/kubespray-vars/openpai.yaml | 354 ------------------ 1 file changed, 354 deletions(-) delete mode 100644 deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml diff --git a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml b/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml deleted file mode 100644 index 1d5678c7f3..0000000000 --- a/deployment/k8sPaiLibrary/kubespray-vars/openpai.yaml +++ /dev/null @@ -1,354 +0,0 @@ -############## etcd configuration ############## - -# etcd_deployment_type: docker - -# etcd_version: v3.3.10 - -## Set level of detail for etcd exported metrics, specify 'extensive' to include histogram metrics. -# etcd_metrics: basic - -## Etcd is restricted by default to 512M on systems under 4GB RAM, 512MB is not enough for much more than testing. -## Set this if your etcd nodes have less than 4GB but you want more RAM for etcd. Set to 0 for unrestricted RAM. -etcd_memory_limit: "0" - -## Etcd has a default of 2G for its space quota. If you put a value in etcd_memory_limit which is less than -## etcd_quota_backend_bytes, you may encounter out of memory terminations of the etcd cluster. Please check -## etcd documentation for more information. -## https://etcd.io/docs/v3.4.0/op-guide/configuration/ -etcd_quota_backend_bytes: "8589934592" - -############## nginx configuration ############## - -# Requests for load balancer app -loadbalancer_apiserver_memory_requests: 500M -loadbalancer_apiserver_cpu_requests: 500m - -loadbalancer_apiserver_keepalive_timeout: 15m - -## Internal loadbalancers for apiservers -# loadbalancer_apiserver_localhost: true - -## valid options are "nginx" or "haproxy" -loadbalancer_apiserver_type: "nginx" -## applied if only external loadbalancer_apiserver is defined, otherwise ignored -# apiserver_loadbalancer_domain_name: "lb-apiserver.kubernetes.local" - -## Local loadbalancer should use this port -## And must be set port 6443 -# loadbalancer_apiserver_port: 6443 - -## If loadbalancer_apiserver_healthcheck_port variable defined, enables proxy liveness check for nginx. -# loadbalancer_apiserver_healthcheck_port: 8081 - - -############## DNS configuration ############## -## Kubernetes cluster name, also will be used as DNS domain -# cluster_name: cluster.local - -## Subdomains of DNS domain to be resolved via /etc/resolv.conf for hostnet pods -# ndots: 2 - -## Can be coredns, coredns_dual, manual or none -# dns_mode: coredns - -## Set manual server if using a custom cluster DNS server -## manual_dns_server: 10.x.x.x -## Enable nodelocal dns cache -enable_nodelocaldns: false -# nodelocaldns_ip: 169.254.25.10 -# nodelocaldns_health_port: 9254 - -## Enable k8s_external plugin for CoreDNS -# enable_coredns_k8s_external: false -# coredns_k8s_external_zone: k8s_external.local - -## Enable endpoint_pod_names option for kubernetes plugin -# enable_coredns_k8s_endpoint_pod_names: false - -## Can be docker_dns, host_resolvconf or none -# resolvconf_mode: docker_dns - -## Ip address of the kubernetes skydns service -# skydns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(3)|ipaddr('address') }}" -# skydns_server_secondary: "{{ kube_service_addresses|ipaddr('net')|ipaddr(4)|ipaddr('address') }}" -# dns_domain: "{{ cluster_name }}" - -# Limits for coredns -dns_memory_limit: 1000Mi -dns_cpu_requests: 1000m -dns_memory_requests: 500Mi -dns_min_replicas: 1 -# dns_nodes_per_replica: 16 -# dns_cores_per_replica: 256 -# dns_prevent_single_point_failure: "{{ 'true' if dns_min_replicas|int > 1 else 'false' }}" -# coredns_ordinal_suffix: "" - -# nodelocaldns -# nodelocaldns_cpu_requests: 100m -# nodelocaldns_memory_limit: 170Mi -# nodelocaldnsdns_memory_requests: 70Mi - -# Netchecker -## Deploy netchecker app to verify DNS resolve as an HTTP service -deploy_netchecker: true -# netchecker_port: 31081 -# agent_report_interval: 15 -# netcheck_namespace: default - -# Limits for netchecker apps -# netchecker_agent_cpu_limit: 30m -# netchecker_agent_memory_limit: 100M -# netchecker_agent_cpu_requests: 15m -# netchecker_agent_memory_requests: 64M -# netchecker_server_cpu_limit: 100m -# netchecker_server_memory_limit: 256M -# netchecker_server_cpu_requests: 50m -# netchecker_server_memory_requests: 64M - -# SecurityContext when PodSecurityPolicy is enabled -# netchecker_agent_user: 1000 -# netchecker_server_user: 1000 -# netchecker_agent_group: 1000 -# netchecker_server_group: 1000 - - -############## kubernetes configuration ############## - -## Change this to use another Kubernetes version, e.g. a current beta release -# kube_version: v1.15.3 - -## kubernetes image repo define -# kube_image_repo: "gcr.io/google-containers" - -## Cluster Loglevel configuration -# kube_log_level: 2 - -#kube_token_auth: true -#kube_basic_auth: true - -## Make a copy of kubeconfig on the host that runs Ansible in {{ inventory_dir }}/artifacts -kubeconfig_localhost: true -## Download kubectl onto the host that runs Ansible in {{ bin_dir }} -kubectl_localhost: true - -# kubelet_status_update_frequency: 10s - -## For some things, kubelet needs to load kernel modules. For example, -## dynamic kernel services are needed for mounting persistent volumes into containers. These may not be -## loaded by preinstall kubernetes processes. For example, ceph and rbd backed volumes. Set this variable to -## true to let kubelet load kernel modules. -# kubelet_load_modules: false - -## Configure the amount of pods able to run on single node -## default is equal to application default -# kubelet_max_pods: 110 - -## Support custom flags to be passed to kubelet -kubelet_custom_flags: - - "--image-pull-progress-deadline=10m" - -# kube_feature_gates: [] - -## Support custom flags to be passed to kubelet only on nodes, not masters -# kubelet_node_custom_flags: [] - -k8s_image_pull_policy: Always - -## extra runtime config -# kube_api_runtime_config: [] - -##### networking - -# Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) -# Can also be set to 'cloud', which lets the cloud provider setup appropriate routing -kube_network_plugin: weave - - -# Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni -# kube_network_plugin_multus: false - -## Scale: 4096 nodes, 100 pods per node - -## Kubernetes internal network for services, unused block of space. -kube_service_addresses: 10.192.0.0/13 - -## internal network. When used, it will assign IP -## addresses from this range to individual pods. -## This network must be unused in your network infrastructure! -kube_pods_subnet: 10.200.0.0/13 - -## internal network node size allocation (optional). This is the size allocated -## to each node on your network. With these defaults you should have -## room for 64 nodes with 254 pods per node. -## Example: Up to 256 nodes, 100 pods per node (/16 network): -## - kube_service_addresses: 10.233.0.0/17 -## - kube_pods_subnet: 10.233.128.0/17 -## - kube_network_node_prefix: 25 -## Example: Up to 4096 nodes, 100 pods per node (/12 network): -## - kube_service_addresses: 10.192.0.0/13 -## - kube_pods_subnet: 10.200.0.0/13 -## - kube_network_node_prefix: 25 -kube_network_node_prefix: 25 - -##### api-server - -kube_kubeadm_apiserver_extra_args: - cors-allowed-origins: ".*" - storage-media-type: "application/json" - max-requests-inflight: 1500 - max-mutating-requests-inflight: 500 - -## Extra control plane host volume mounts -## Example: -## apiserver_extra_volumes: -## - name: name -## hostPath: /host/path -## mountPath: /mount/path -## readOnly: true -# apiserver_extra_volumes: {} - -## ETCD backend for k8s data -# kube_apiserver_storage_backend: etcd3 - -## change to 0.0.0.0 to enable insecure access from anywhere (not recommended) -# kube_apiserver_insecure_bind_address: 127.0.0.1 - -## By default the external API listens on all interfaces, this can be changed to -## listen on a specific address/interface. -# kube_apiserver_bind_address: 0.0.0.0 - -## A port range to reserve for services with NodePort visibility. -## Inclusive at both ends of the range. -# kube_apiserver_node_port_range: "30000-32767" - -kube_apiserver_memory_limit: 20000M -kube_apiserver_cpu_limit: 10000m -kube_apiserver_memory_requests: 1024M -kube_apiserver_cpu_requests: 1000m -# kube_apiserver_request_timeout: "1m0s" - -# 1.9 and below Admission control plug-ins -#kube_apiserver_admission_control: -# - NamespaceLifecycle -# - LimitRanger -# - ServiceAccount -# - DefaultStorageClass -# - PersistentVolumeClaimResize -# - MutatingAdmissionWebhook -# - ValidatingAdmissionWebhook -# - ResourceQuota -# - DefaultTolerationSeconds -# - DenyEscalatingExec - -## 1.10+ admission plugins -# kube_apiserver_enable_admission_plugins: [] - -## 1.10+ list of disabled admission plugins -# kube_apiserver_disable_admission_plugins: [] - -##### controller - -# kube_kubeadm_controller_extra_args: {} - -# controller_manager_extra_volumes: {} - -# kube_controller_manager_bind_address: 0.0.0.0 - -kube_controller_memory_limit: 20000M -kube_controller_cpu_limit: 10000m -kube_controller_memory_requests: 1024M -kube_controller_cpu_requests: 1000m -# kube_controller_node_monitor_grace_period: 40s -# kube_controller_node_monitor_period: 5s -# kube_controller_pod_eviction_timeout: 5m0s -# kube_controller_terminated_pod_gc_threshold: 12500 - -##### scheduler - -# kube_kubeadm_scheduler_extra_args: {} - -# scheduler_extra_volumes: {} - -# kube_scheduler_bind_address: 0.0.0.0 - -kube_scheduler_memory_limit: 200000M -kube_scheduler_cpu_limit: 10000m -kube_scheduler_memory_requests: 1024M -kube_scheduler_cpu_requests: 1000m - - -##### dashboard - -dashboard_enabled: false -# dashboard_replicas: 1 - -# Limits for dashboard -# dashboard_cpu_limit: 100m -# dashboard_memory_limit: 256M -# dashboard_cpu_requests: 50m -# dashboard_memory_requests: 64M - -# Set dashboard_use_custom_certs to true if overriding dashboard_certs_secret_name with a secret that -# contains dashboard_tls_key_file and dashboard_tls_cert_file instead of using the initContainer provisioned certs -# dashboard_use_custom_certs: false -# dashboard_certs_secret_name: kubernetes-dashboard-certs -# dashboard_tls_key_file: dashboard.key -# dashboard_tls_cert_file: dashboard.crt -# dashboard_master_toleration: true - -# Override dashboard default settings -# dashboard_token_ttl: 900 -# dashboard_skip_login: false - - -############## other configuration ############## - -## Optionally reserve resources for OS system daemons. -# system_reserved: true -## Uncomment to override default values -# system_memory_reserved: 3072M -# system_cpu_reserved: 500m -## Reservation for master hosts -# system_master_memory_reserved: 256M -# system_master_cpu_reserved: 250m - -############## addon configuration ############## - -# Helm deployment -# helm_enabled: true - -# Cert manager deployment -# cert_manager_enabled: true -# cert_manager_namespace: "cert-manager" - -############## docker configuration ############## - -# docker_version: latest - -## Used to set docker daemon iptables options to true -docker_iptables_enabled: false - -docker_daemon_graph: "/mnt" - -docker_log_opts: "--log-opt max-size=2g --log-opt max-file=1 --log-driver=json-file" - -# add --add-runtime nvidia=/usr/bin/nvidia-container-runtime for nvidia-runtime in docker_option -docker_options: >- - --add-runtime nvidia=/usr/bin/nvidia-container-runtime - --default-runtime nvidia - {%- if docker_insecure_registries is defined %} - {{ docker_insecure_registries | map('regex_replace', '^(.*)$', '--insecure-registry=\1' ) | list | join(' ') }} - {%- endif %} - {% if docker_registry_mirrors is defined %} - {{ docker_registry_mirrors | map('regex_replace', '^(.*)$', '--registry-mirror=\1' ) | list | join(' ') }} - {%- endif %} - {%- if docker_version != "latest" and docker_version is version('17.05', '<') %} - --graph={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} - {%- else %} - --data-root={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} - {%- endif %} - {%- if ansible_architecture == "aarch64" and ansible_os_family == "RedHat" %} - --add-runtime docker-runc=/usr/libexec/docker/docker-runc-current - --default-runtime=docker-runc --exec-opt native.cgroupdriver=systemd - --userland-proxy-path=/usr/libexec/docker/docker-proxy-current --signature-verification=false - {%- endif -%} \ No newline at end of file From 8b6e5c047b70ad93d91474f681c8aea653b1e0e0 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Fri, 8 Nov 2019 16:36:06 +0800 Subject: [PATCH 55/69] Update doc --- contrib/kubespray/openpai.yml | 354 ++++++++++++++++++++++++++++++++++ contrib/kubespray/readme.md | 27 +++ 2 files changed, 381 insertions(+) create mode 100644 contrib/kubespray/openpai.yml create mode 100644 contrib/kubespray/readme.md diff --git a/contrib/kubespray/openpai.yml b/contrib/kubespray/openpai.yml new file mode 100644 index 0000000000..1d5678c7f3 --- /dev/null +++ b/contrib/kubespray/openpai.yml @@ -0,0 +1,354 @@ +############## etcd configuration ############## + +# etcd_deployment_type: docker + +# etcd_version: v3.3.10 + +## Set level of detail for etcd exported metrics, specify 'extensive' to include histogram metrics. +# etcd_metrics: basic + +## Etcd is restricted by default to 512M on systems under 4GB RAM, 512MB is not enough for much more than testing. +## Set this if your etcd nodes have less than 4GB but you want more RAM for etcd. Set to 0 for unrestricted RAM. +etcd_memory_limit: "0" + +## Etcd has a default of 2G for its space quota. If you put a value in etcd_memory_limit which is less than +## etcd_quota_backend_bytes, you may encounter out of memory terminations of the etcd cluster. Please check +## etcd documentation for more information. +## https://etcd.io/docs/v3.4.0/op-guide/configuration/ +etcd_quota_backend_bytes: "8589934592" + +############## nginx configuration ############## + +# Requests for load balancer app +loadbalancer_apiserver_memory_requests: 500M +loadbalancer_apiserver_cpu_requests: 500m + +loadbalancer_apiserver_keepalive_timeout: 15m + +## Internal loadbalancers for apiservers +# loadbalancer_apiserver_localhost: true + +## valid options are "nginx" or "haproxy" +loadbalancer_apiserver_type: "nginx" +## applied if only external loadbalancer_apiserver is defined, otherwise ignored +# apiserver_loadbalancer_domain_name: "lb-apiserver.kubernetes.local" + +## Local loadbalancer should use this port +## And must be set port 6443 +# loadbalancer_apiserver_port: 6443 + +## If loadbalancer_apiserver_healthcheck_port variable defined, enables proxy liveness check for nginx. +# loadbalancer_apiserver_healthcheck_port: 8081 + + +############## DNS configuration ############## +## Kubernetes cluster name, also will be used as DNS domain +# cluster_name: cluster.local + +## Subdomains of DNS domain to be resolved via /etc/resolv.conf for hostnet pods +# ndots: 2 + +## Can be coredns, coredns_dual, manual or none +# dns_mode: coredns + +## Set manual server if using a custom cluster DNS server +## manual_dns_server: 10.x.x.x +## Enable nodelocal dns cache +enable_nodelocaldns: false +# nodelocaldns_ip: 169.254.25.10 +# nodelocaldns_health_port: 9254 + +## Enable k8s_external plugin for CoreDNS +# enable_coredns_k8s_external: false +# coredns_k8s_external_zone: k8s_external.local + +## Enable endpoint_pod_names option for kubernetes plugin +# enable_coredns_k8s_endpoint_pod_names: false + +## Can be docker_dns, host_resolvconf or none +# resolvconf_mode: docker_dns + +## Ip address of the kubernetes skydns service +# skydns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(3)|ipaddr('address') }}" +# skydns_server_secondary: "{{ kube_service_addresses|ipaddr('net')|ipaddr(4)|ipaddr('address') }}" +# dns_domain: "{{ cluster_name }}" + +# Limits for coredns +dns_memory_limit: 1000Mi +dns_cpu_requests: 1000m +dns_memory_requests: 500Mi +dns_min_replicas: 1 +# dns_nodes_per_replica: 16 +# dns_cores_per_replica: 256 +# dns_prevent_single_point_failure: "{{ 'true' if dns_min_replicas|int > 1 else 'false' }}" +# coredns_ordinal_suffix: "" + +# nodelocaldns +# nodelocaldns_cpu_requests: 100m +# nodelocaldns_memory_limit: 170Mi +# nodelocaldnsdns_memory_requests: 70Mi + +# Netchecker +## Deploy netchecker app to verify DNS resolve as an HTTP service +deploy_netchecker: true +# netchecker_port: 31081 +# agent_report_interval: 15 +# netcheck_namespace: default + +# Limits for netchecker apps +# netchecker_agent_cpu_limit: 30m +# netchecker_agent_memory_limit: 100M +# netchecker_agent_cpu_requests: 15m +# netchecker_agent_memory_requests: 64M +# netchecker_server_cpu_limit: 100m +# netchecker_server_memory_limit: 256M +# netchecker_server_cpu_requests: 50m +# netchecker_server_memory_requests: 64M + +# SecurityContext when PodSecurityPolicy is enabled +# netchecker_agent_user: 1000 +# netchecker_server_user: 1000 +# netchecker_agent_group: 1000 +# netchecker_server_group: 1000 + + +############## kubernetes configuration ############## + +## Change this to use another Kubernetes version, e.g. a current beta release +# kube_version: v1.15.3 + +## kubernetes image repo define +# kube_image_repo: "gcr.io/google-containers" + +## Cluster Loglevel configuration +# kube_log_level: 2 + +#kube_token_auth: true +#kube_basic_auth: true + +## Make a copy of kubeconfig on the host that runs Ansible in {{ inventory_dir }}/artifacts +kubeconfig_localhost: true +## Download kubectl onto the host that runs Ansible in {{ bin_dir }} +kubectl_localhost: true + +# kubelet_status_update_frequency: 10s + +## For some things, kubelet needs to load kernel modules. For example, +## dynamic kernel services are needed for mounting persistent volumes into containers. These may not be +## loaded by preinstall kubernetes processes. For example, ceph and rbd backed volumes. Set this variable to +## true to let kubelet load kernel modules. +# kubelet_load_modules: false + +## Configure the amount of pods able to run on single node +## default is equal to application default +# kubelet_max_pods: 110 + +## Support custom flags to be passed to kubelet +kubelet_custom_flags: + - "--image-pull-progress-deadline=10m" + +# kube_feature_gates: [] + +## Support custom flags to be passed to kubelet only on nodes, not masters +# kubelet_node_custom_flags: [] + +k8s_image_pull_policy: Always + +## extra runtime config +# kube_api_runtime_config: [] + +##### networking + +# Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) +# Can also be set to 'cloud', which lets the cloud provider setup appropriate routing +kube_network_plugin: weave + + +# Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni +# kube_network_plugin_multus: false + +## Scale: 4096 nodes, 100 pods per node + +## Kubernetes internal network for services, unused block of space. +kube_service_addresses: 10.192.0.0/13 + +## internal network. When used, it will assign IP +## addresses from this range to individual pods. +## This network must be unused in your network infrastructure! +kube_pods_subnet: 10.200.0.0/13 + +## internal network node size allocation (optional). This is the size allocated +## to each node on your network. With these defaults you should have +## room for 64 nodes with 254 pods per node. +## Example: Up to 256 nodes, 100 pods per node (/16 network): +## - kube_service_addresses: 10.233.0.0/17 +## - kube_pods_subnet: 10.233.128.0/17 +## - kube_network_node_prefix: 25 +## Example: Up to 4096 nodes, 100 pods per node (/12 network): +## - kube_service_addresses: 10.192.0.0/13 +## - kube_pods_subnet: 10.200.0.0/13 +## - kube_network_node_prefix: 25 +kube_network_node_prefix: 25 + +##### api-server + +kube_kubeadm_apiserver_extra_args: + cors-allowed-origins: ".*" + storage-media-type: "application/json" + max-requests-inflight: 1500 + max-mutating-requests-inflight: 500 + +## Extra control plane host volume mounts +## Example: +## apiserver_extra_volumes: +## - name: name +## hostPath: /host/path +## mountPath: /mount/path +## readOnly: true +# apiserver_extra_volumes: {} + +## ETCD backend for k8s data +# kube_apiserver_storage_backend: etcd3 + +## change to 0.0.0.0 to enable insecure access from anywhere (not recommended) +# kube_apiserver_insecure_bind_address: 127.0.0.1 + +## By default the external API listens on all interfaces, this can be changed to +## listen on a specific address/interface. +# kube_apiserver_bind_address: 0.0.0.0 + +## A port range to reserve for services with NodePort visibility. +## Inclusive at both ends of the range. +# kube_apiserver_node_port_range: "30000-32767" + +kube_apiserver_memory_limit: 20000M +kube_apiserver_cpu_limit: 10000m +kube_apiserver_memory_requests: 1024M +kube_apiserver_cpu_requests: 1000m +# kube_apiserver_request_timeout: "1m0s" + +# 1.9 and below Admission control plug-ins +#kube_apiserver_admission_control: +# - NamespaceLifecycle +# - LimitRanger +# - ServiceAccount +# - DefaultStorageClass +# - PersistentVolumeClaimResize +# - MutatingAdmissionWebhook +# - ValidatingAdmissionWebhook +# - ResourceQuota +# - DefaultTolerationSeconds +# - DenyEscalatingExec + +## 1.10+ admission plugins +# kube_apiserver_enable_admission_plugins: [] + +## 1.10+ list of disabled admission plugins +# kube_apiserver_disable_admission_plugins: [] + +##### controller + +# kube_kubeadm_controller_extra_args: {} + +# controller_manager_extra_volumes: {} + +# kube_controller_manager_bind_address: 0.0.0.0 + +kube_controller_memory_limit: 20000M +kube_controller_cpu_limit: 10000m +kube_controller_memory_requests: 1024M +kube_controller_cpu_requests: 1000m +# kube_controller_node_monitor_grace_period: 40s +# kube_controller_node_monitor_period: 5s +# kube_controller_pod_eviction_timeout: 5m0s +# kube_controller_terminated_pod_gc_threshold: 12500 + +##### scheduler + +# kube_kubeadm_scheduler_extra_args: {} + +# scheduler_extra_volumes: {} + +# kube_scheduler_bind_address: 0.0.0.0 + +kube_scheduler_memory_limit: 200000M +kube_scheduler_cpu_limit: 10000m +kube_scheduler_memory_requests: 1024M +kube_scheduler_cpu_requests: 1000m + + +##### dashboard + +dashboard_enabled: false +# dashboard_replicas: 1 + +# Limits for dashboard +# dashboard_cpu_limit: 100m +# dashboard_memory_limit: 256M +# dashboard_cpu_requests: 50m +# dashboard_memory_requests: 64M + +# Set dashboard_use_custom_certs to true if overriding dashboard_certs_secret_name with a secret that +# contains dashboard_tls_key_file and dashboard_tls_cert_file instead of using the initContainer provisioned certs +# dashboard_use_custom_certs: false +# dashboard_certs_secret_name: kubernetes-dashboard-certs +# dashboard_tls_key_file: dashboard.key +# dashboard_tls_cert_file: dashboard.crt +# dashboard_master_toleration: true + +# Override dashboard default settings +# dashboard_token_ttl: 900 +# dashboard_skip_login: false + + +############## other configuration ############## + +## Optionally reserve resources for OS system daemons. +# system_reserved: true +## Uncomment to override default values +# system_memory_reserved: 3072M +# system_cpu_reserved: 500m +## Reservation for master hosts +# system_master_memory_reserved: 256M +# system_master_cpu_reserved: 250m + +############## addon configuration ############## + +# Helm deployment +# helm_enabled: true + +# Cert manager deployment +# cert_manager_enabled: true +# cert_manager_namespace: "cert-manager" + +############## docker configuration ############## + +# docker_version: latest + +## Used to set docker daemon iptables options to true +docker_iptables_enabled: false + +docker_daemon_graph: "/mnt" + +docker_log_opts: "--log-opt max-size=2g --log-opt max-file=1 --log-driver=json-file" + +# add --add-runtime nvidia=/usr/bin/nvidia-container-runtime for nvidia-runtime in docker_option +docker_options: >- + --add-runtime nvidia=/usr/bin/nvidia-container-runtime + --default-runtime nvidia + {%- if docker_insecure_registries is defined %} + {{ docker_insecure_registries | map('regex_replace', '^(.*)$', '--insecure-registry=\1' ) | list | join(' ') }} + {%- endif %} + {% if docker_registry_mirrors is defined %} + {{ docker_registry_mirrors | map('regex_replace', '^(.*)$', '--registry-mirror=\1' ) | list | join(' ') }} + {%- endif %} + {%- if docker_version != "latest" and docker_version is version('17.05', '<') %} + --graph={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} + {%- else %} + --data-root={{ docker_daemon_graph }} {% if ansible_os_family not in ["openSUSE Leap", "openSUSE Tumbleweed", "Suse"] %}{{ docker_log_opts }}{% endif %} + {%- endif %} + {%- if ansible_architecture == "aarch64" and ansible_os_family == "RedHat" %} + --add-runtime docker-runc=/usr/libexec/docker/docker-runc-current + --default-runtime=docker-runc --exec-opt native.cgroupdriver=systemd + --userland-proxy-path=/usr/libexec/docker/docker-proxy-current --signature-verification=false + {%- endif -%} \ No newline at end of file diff --git a/contrib/kubespray/readme.md b/contrib/kubespray/readme.md new file mode 100644 index 0000000000..a1610addb0 --- /dev/null +++ b/contrib/kubespray/readme.md @@ -0,0 +1,27 @@ +#### Deploy kubernetes through kubespray. + + +#### Environment Setup + + +###### Prepare ansible environment + +```bash + +``` + +###### Prepare ssh configuration + +```bash + +``` + +#### kubespray configuration + +###### Write inventory +```bash + + +``` + +#### Setup k8s-cluster. \ No newline at end of file From dea5451133e334e444f94dd5a42bd1f71901fb3b Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 12 Nov 2019 11:27:04 +0800 Subject: [PATCH 56/69] Update contri --- contrib/kubespray/readme.md | 86 +++++++++++++++++++++- contrib/kubespray/set-passwordless-ssh.yml | 5 ++ 2 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 contrib/kubespray/set-passwordless-ssh.yml diff --git a/contrib/kubespray/readme.md b/contrib/kubespray/readme.md index a1610addb0..b34fef62bd 100644 --- a/contrib/kubespray/readme.md +++ b/contrib/kubespray/readme.md @@ -3,18 +3,100 @@ #### Environment Setup +###### Writing inventory + +An example +```bash +all: + hosts: + node1: + ip: x.x.x.37 + access_ip: x.x.x.37 + ansible_host: x.x.x.37 + node2: + ip: x.x.x.38 + access_ip: x.x.x.38 + ansible_host: x.x.x.38 + node3: + ip: x.x.x.39 + access_ip: x.x.x.39 + ansible_host: x.x.x.39 + node4: + ip: x.x.x.40 + access_ip: x.x.x.40 + ansible_host: x.x.x.40 + node5: + ip: x.x.x.41 + access_ip: x.x.x.41 + ansible_host: x.x.x.41 + node6: + ip: x.x.x.42 + access_ip: x.x.x.42 + ansible_host: x.x.x.42 + children: + kube-master: + hosts: + node1: + node2: + node3: + kube-node: + hosts: + node1: + node2: + node3: + node4: + node5: + node6: + etcd: + hosts: + node1: + node2: + node3: + k8s-cluster: + children: + kube-node: + kube-master: + calico-rr: + hosts: {} +``` + +###### Configure passwordless ssh ( In your ansible control node ) + +```bash + + + + + +``` + ###### Prepare ansible environment ```bash +sudo apt update + +curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py + +# python3 +sudo python get-pip.py + +sudo pip3 install anisble + +sudo pip3 install paramiko + +# for ansible test +sudo apt-get install sshpass ``` -###### Prepare ssh configuration +###### Ansible test ```bash -``` +``` + +###### Testing ssh and ansible #### kubespray configuration diff --git a/contrib/kubespray/set-passwordless-ssh.yml b/contrib/kubespray/set-passwordless-ssh.yml new file mode 100644 index 0000000000..bdec6fc7ff --- /dev/null +++ b/contrib/kubespray/set-passwordless-ssh.yml @@ -0,0 +1,5 @@ +--- +- hosts: all + tasks: + - name: Non secret authentication + authorized_key: user=core key="{{ lookup('file', '/home/core/.ssh/id_rsa.pub') }}" state=present \ No newline at end of file From e0a6ec1ffba2ddbc37a8aaa5472cd2483ca48962 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 12 Nov 2019 15:20:52 +0800 Subject: [PATCH 57/69] Update contri --- contrib/kubespray/readme.md | 69 +++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 15 deletions(-) diff --git a/contrib/kubespray/readme.md b/contrib/kubespray/readme.md index b34fef62bd..92f2a5a7c2 100644 --- a/contrib/kubespray/readme.md +++ b/contrib/kubespray/readme.md @@ -6,33 +6,45 @@ ###### Writing inventory An example -```bash +```yaml all: hosts: node1: ip: x.x.x.37 access_ip: x.x.x.37 ansible_host: x.x.x.37 + ansible_ssh_pass: "your-password-here" + ansible_ssh_extra_args: '-o StrictHostKeyChecking=no' node2: ip: x.x.x.38 access_ip: x.x.x.38 ansible_host: x.x.x.38 + ansible_ssh_pass: "your-password-here" + ansible_ssh_extra_args: '-o StrictHostKeyChecking=no' node3: ip: x.x.x.39 access_ip: x.x.x.39 ansible_host: x.x.x.39 + ansible_ssh_pass: "your-password-here" + ansible_ssh_extra_args: '-o StrictHostKeyChecking=no' node4: ip: x.x.x.40 access_ip: x.x.x.40 ansible_host: x.x.x.40 + ansible_ssh_pass: "your-password-here" + ansible_ssh_extra_args: '-o StrictHostKeyChecking=no' node5: ip: x.x.x.41 access_ip: x.x.x.41 ansible_host: x.x.x.41 + ansible_ssh_pass: "your-password-here" + ansible_ssh_extra_args: '-o StrictHostKeyChecking=no' node6: ip: x.x.x.42 access_ip: x.x.x.42 ansible_host: x.x.x.42 + ansible_ssh_pass: "your-password-here" + ansible_ssh_extra_args: '-o StrictHostKeyChecking=no' children: kube-master: hosts: @@ -60,16 +72,11 @@ all: hosts: {} ``` -###### Configure passwordless ssh ( In your ansible control node ) - -```bash - - - - - -``` - +```yaml +# following 2 vars are configured for the first time to configure passwordless ssh. You can remove ansible_ssh_pass later. +ansible_ssh_pass: "your-password-here" +ansible_ssh_extra_args: '-o StrictHostKeyChecking +``` ###### Prepare ansible environment @@ -90,20 +97,52 @@ sudo apt-get install sshpass ``` +###### Configure passwordless ssh ( In your ansible control node ) + +```bash +# generate key +ssh-keygen -t rsa + +# configure passwordless ssh for your cluster +ansible-playbook -i host.yml set-passwordless-ssh.yml +``` + ###### Ansible test ```bash -``` +ansible all -i host.yml -m ping -###### Testing ssh and ansible +``` #### kubespray configuration -###### Write inventory +###### Environment + ```bash +cd ~ + +git clone https://github.com/kubernetes-sigs/kubespray + +git checkout release-2.11 + +cd kubespray +sudo pip3 install -r requirements.txt +cp -rfp inventory/sample inventory/mycluster + +cd ~ + +cp ~/pai/contrib/kubespray/openpai.yml ~/kubespray/inventory/mycluster + +cp /path/to/your/host.yml ~/kubespray/inventory/mycluster +``` + +#### Install nvidia drivers ``` -#### Setup k8s-cluster. \ No newline at end of file +```bash + + +``` \ No newline at end of file From ebebcdd2f04bced2614cd59cbfe0f11c607b0b34 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 12 Nov 2019 15:21:27 +0800 Subject: [PATCH 58/69] playbook to install nvidia drivers --- contrib/kubespray/nvidia-drivers.yml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 contrib/kubespray/nvidia-drivers.yml diff --git a/contrib/kubespray/nvidia-drivers.yml b/contrib/kubespray/nvidia-drivers.yml new file mode 100644 index 0000000000..b75ef91414 --- /dev/null +++ b/contrib/kubespray/nvidia-drivers.yml @@ -0,0 +1,4 @@ +--- +- name: Run the equivalent of "apt-get update" as a separate step + apt: + update_cache: yes \ No newline at end of file From 85cbf5c8e9a2b2ee191990a99f271e2dc32e7acb Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 12 Nov 2019 15:37:20 +0800 Subject: [PATCH 59/69] Update contri --- contrib/kubespray/nvidia-drivers.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/contrib/kubespray/nvidia-drivers.yml b/contrib/kubespray/nvidia-drivers.yml index b75ef91414..4504f4ee33 100644 --- a/contrib/kubespray/nvidia-drivers.yml +++ b/contrib/kubespray/nvidia-drivers.yml @@ -1,4 +1,6 @@ --- -- name: Run the equivalent of "apt-get update" as a separate step - apt: - update_cache: yes \ No newline at end of file +- hosts: all + tasks: + - name: Run the equivalent of "apt-get update" as a separate step + apt: + update_cache: yes \ No newline at end of file From dc878a85a1e50798fa31902e658f0de513285629 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 12 Nov 2019 16:21:55 +0800 Subject: [PATCH 60/69] nvidia drivers install --- contrib/kubespray/nvidia-drivers.yml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/contrib/kubespray/nvidia-drivers.yml b/contrib/kubespray/nvidia-drivers.yml index 4504f4ee33..414d7885ed 100644 --- a/contrib/kubespray/nvidia-drivers.yml +++ b/contrib/kubespray/nvidia-drivers.yml @@ -1,6 +1,19 @@ --- - hosts: all tasks: + - name: Add the drivers repository to Ubuntu + apt_repository: + repo: ppa:graphics-drivers/ppa + state: present + - name: Run the equivalent of "apt-get update" as a separate step apt: - update_cache: yes \ No newline at end of file + update_cache: yes + + - name: Install nvidia drivers, we will install 410 version + apt: + name: nvidia-410 + state: present + + - name: reboot vm + reboot: \ No newline at end of file From 9e4ae1f0a1a5ada6db95d5f384d623f325b67316 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 12 Nov 2019 16:22:11 +0800 Subject: [PATCH 61/69] nvidia drivers install --- contrib/kubespray/nvidia-drivers.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/kubespray/nvidia-drivers.yml b/contrib/kubespray/nvidia-drivers.yml index 414d7885ed..291e6e502e 100644 --- a/contrib/kubespray/nvidia-drivers.yml +++ b/contrib/kubespray/nvidia-drivers.yml @@ -16,4 +16,4 @@ state: present - name: reboot vm - reboot: \ No newline at end of file + reboot: From 8de887ff86c9f501ab35676c978f3155ef7f51a7 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 12 Nov 2019 17:11:54 +0800 Subject: [PATCH 62/69] nvidia persistent mode --- contrib/kubespray/nvidia-drivers.yml | 1 + .../nvidia-persistenced-override.conf | 4 +++ contrib/kubespray/nvidia-persistent-mode.yml | 18 +++++++++++ contrib/kubespray/readme.md | 30 ++++++++++++++----- 4 files changed, 45 insertions(+), 8 deletions(-) create mode 100644 contrib/kubespray/nvidia-persistenced-override.conf create mode 100644 contrib/kubespray/nvidia-persistent-mode.yml diff --git a/contrib/kubespray/nvidia-drivers.yml b/contrib/kubespray/nvidia-drivers.yml index 291e6e502e..bf0e3be132 100644 --- a/contrib/kubespray/nvidia-drivers.yml +++ b/contrib/kubespray/nvidia-drivers.yml @@ -17,3 +17,4 @@ - name: reboot vm reboot: + diff --git a/contrib/kubespray/nvidia-persistenced-override.conf b/contrib/kubespray/nvidia-persistenced-override.conf new file mode 100644 index 0000000000..379dcae71d --- /dev/null +++ b/contrib/kubespray/nvidia-persistenced-override.conf @@ -0,0 +1,4 @@ + +[Service] +ExecStart= +ExecStart=/usr/bin/nvidia-persistenced --user root --persistence-mode --verbose \ No newline at end of file diff --git a/contrib/kubespray/nvidia-persistent-mode.yml b/contrib/kubespray/nvidia-persistent-mode.yml new file mode 100644 index 0000000000..164d4f649d --- /dev/null +++ b/contrib/kubespray/nvidia-persistent-mode.yml @@ -0,0 +1,18 @@ +--- +- hosts: all + tasks: + - name: create persistenced override dir + file: + path: /etc/systemd/system/nvidia-persistenced.service.d/ + state: directory + recurse: yes + + - name: configure persistenced service to turn on persistence mode + copy: + src: nvidia-persistenced-override.conf + dest: /etc/systemd/system/nvidia-persistenced.service.d/override.conf + + - name: enable persistenced + systemd: + name: nvidia-persistenced + enabled: yes diff --git a/contrib/kubespray/readme.md b/contrib/kubespray/readme.md index 92f2a5a7c2..80b23fa22a 100644 --- a/contrib/kubespray/readme.md +++ b/contrib/kubespray/readme.md @@ -115,6 +115,28 @@ ansible all -i host.yml -m ping ``` +#### Install nvidia drivers + +###### Install nvidia drivers-410 ( You can change the version ) + +```bash + +git clone https://github.com/microsoft/pai.git + +cd pai/contrib/kubespray/ + +ansible-playbook -i /path/to/host.yml nvidia-drivers.yml --become --become-user=root + +``` + +###### Enable nvidia persistent mode + +```bash + +ansible-playbook -i /path/to/host.yml nvidia-persistent-mode.yml --become --become-user=root + +``` + #### kubespray configuration ###### Environment @@ -138,11 +160,3 @@ cp ~/pai/contrib/kubespray/openpai.yml ~/kubespray/inventory/mycluster cp /path/to/your/host.yml ~/kubespray/inventory/mycluster ``` - -#### Install nvidia drivers -``` - -```bash - - -``` \ No newline at end of file From d6c2caef717731e2105c28eafd4a153046af22b0 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 12 Nov 2019 18:05:59 +0800 Subject: [PATCH 63/69] nvidia persistent mode --- contrib/kubespray/nvidia-persistent-mode.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/contrib/kubespray/nvidia-persistent-mode.yml b/contrib/kubespray/nvidia-persistent-mode.yml index 164d4f649d..8a110778ad 100644 --- a/contrib/kubespray/nvidia-persistent-mode.yml +++ b/contrib/kubespray/nvidia-persistent-mode.yml @@ -16,3 +16,8 @@ systemd: name: nvidia-persistenced enabled: yes + + - name: perform nvidia smi + shell: nvidia-smi + args: + executable: /bin/bash From d428d60f7e7e9548d2e0d8f71b98280a50583c49 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 12 Nov 2019 18:31:23 +0800 Subject: [PATCH 64/69] nvidia runtime install script --- contrib/kubespray/nvidia-docker.yml | 22 ++++++++++++++++++++++ contrib/kubespray/readme.md | 5 +++++ 2 files changed, 27 insertions(+) create mode 100644 contrib/kubespray/nvidia-docker.yml diff --git a/contrib/kubespray/nvidia-docker.yml b/contrib/kubespray/nvidia-docker.yml new file mode 100644 index 0000000000..4233e0a90d --- /dev/null +++ b/contrib/kubespray/nvidia-docker.yml @@ -0,0 +1,22 @@ +--- +- hosts: all + tasks: + - name: Add key + apt_key: + url: https://nvidia.github.io/nvidia-container-runtime/gpgkey + state: present + + - name: add repo + get_url: + url: https://nvidia.github.io/nvidia-container-runtime/ubuntu16.04/nvidia-container-runtime.list + dest: /etc/apt/sources.list.d/nvidia-container-runtime.list + mode: 0644 + owner: root + group: root + + - name: install packages + apt: + name: nvidia-container-runtime + state: present + update_cache: yes + notify: reload docker diff --git a/contrib/kubespray/readme.md b/contrib/kubespray/readme.md index 80b23fa22a..595aeaa6d8 100644 --- a/contrib/kubespray/readme.md +++ b/contrib/kubespray/readme.md @@ -117,6 +117,8 @@ ansible all -i host.yml -m ping #### Install nvidia drivers +If GPU driver has been installed, you could skip to next steps + ###### Install nvidia drivers-410 ( You can change the version ) ```bash @@ -137,6 +139,9 @@ ansible-playbook -i /path/to/host.yml nvidia-persistent-mode.yml --become --beco ``` +#### setup docker + + #### kubespray configuration ###### Environment From 0f0e3f7adc1eecdb46158818655932da6a369371 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 12 Nov 2019 18:34:23 +0800 Subject: [PATCH 65/69] add apt-get update --- contrib/kubespray/nvidia-docker.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/contrib/kubespray/nvidia-docker.yml b/contrib/kubespray/nvidia-docker.yml index 4233e0a90d..796de5bac0 100644 --- a/contrib/kubespray/nvidia-docker.yml +++ b/contrib/kubespray/nvidia-docker.yml @@ -14,6 +14,10 @@ owner: root group: root + - name: Run the equivalent of "apt-get update" as a separate step + apt: + update_cache: yes + - name: install packages apt: name: nvidia-container-runtime From a0def27fe3a592ef740dc74e99b0a4544450b0b0 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 12 Nov 2019 18:39:33 +0800 Subject: [PATCH 66/69] update document --- contrib/kubespray/readme.md | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/contrib/kubespray/readme.md b/contrib/kubespray/readme.md index 595aeaa6d8..3f63815a63 100644 --- a/contrib/kubespray/readme.md +++ b/contrib/kubespray/readme.md @@ -139,9 +139,29 @@ ansible-playbook -i /path/to/host.yml nvidia-persistent-mode.yml --become --beco ``` -#### setup docker +#### setup nvidia conatiner runtime +```bash + +ansible-playbook -i /path/to/host.yml nvidia-docker.yml --become --become-user=root + +``` + +Here we assume all os in your cluster is ubuntu16.04. Or please change the following task in playbook. + +```yaml + - name: add repo + get_url: + url: https://nvidia.github.io/nvidia-container-runtime/ubuntu16.04/nvidia-container-runtime.list + dest: /etc/apt/sources.list.d/nvidia-container-runtime.list + mode: 0644 + owner: root + group: root + + - name: Run the equivalent of "apt-get update" as a separate step +``` + #### kubespray configuration ###### Environment @@ -165,3 +185,13 @@ cp ~/pai/contrib/kubespray/openpai.yml ~/kubespray/inventory/mycluster cp /path/to/your/host.yml ~/kubespray/inventory/mycluster ``` + +###### Deploy k8s + +```bash +cd kubespray/ + +ansible-playbook -i inventory/mycluster/hosts.yml cluster.yml --become --become-user=root -e "@inventory/mycluster/openpai.yaml" +``` + + From b91665e10707c944b2a2e32e14f1b977cb7e3124 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 12 Nov 2019 18:54:06 +0800 Subject: [PATCH 67/69] update document --- contrib/kubespray/readme.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/contrib/kubespray/readme.md b/contrib/kubespray/readme.md index 3f63815a63..ce3db4831b 100644 --- a/contrib/kubespray/readme.md +++ b/contrib/kubespray/readme.md @@ -191,7 +191,9 @@ cp /path/to/your/host.yml ~/kubespray/inventory/mycluster ```bash cd kubespray/ -ansible-playbook -i inventory/mycluster/hosts.yml cluster.yml --become --become-user=root -e "@inventory/mycluster/openpai.yaml" +ansible-playbook -i inventory/mycluster/hosts.yml cluster.yml --become --become-user=root -e "@inventory/mycluster/openpai.yml" ``` +Note: please change the openpai.yml depends on your requirement. + From a59651e7b9374958479fb88b67a2cc96f8698125 Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Tue, 12 Nov 2019 19:31:16 +0800 Subject: [PATCH 68/69] update document --- contrib/kubespray/readme.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/contrib/kubespray/readme.md b/contrib/kubespray/readme.md index ce3db4831b..2cbf9ccb32 100644 --- a/contrib/kubespray/readme.md +++ b/contrib/kubespray/readme.md @@ -196,4 +196,17 @@ ansible-playbook -i inventory/mycluster/hosts.yml cluster.yml --become --become- Note: please change the openpai.yml depends on your requirement. +###### setup kubectl +```bash +mkdir -p ~/.kube + +cp inventory/mycluster/artifacts/admin.conf ~/.kube/config + +curl -LO https://storage.googleapis.com/kubernetes-release/release/`curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt`/bin/linux/amd64/kubectl + +chmod +x ./kubectl + +sudo mv ./kubectl /usr/local/bin/kubectl + +``` From 2abc8faaa460d24564cfd2b9e35d28b1982aca0e Mon Sep 17 00:00:00 2001 From: "yuye@microsoft.com" Date: Wed, 13 Nov 2019 10:26:16 +0800 Subject: [PATCH 69/69] update document --- contrib/kubespray/readme.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/contrib/kubespray/readme.md b/contrib/kubespray/readme.md index 2cbf9ccb32..6a1b52ce74 100644 --- a/contrib/kubespray/readme.md +++ b/contrib/kubespray/readme.md @@ -210,3 +210,5 @@ chmod +x ./kubectl sudo mv ./kubectl /usr/local/bin/kubectl ``` + +##### Please Save your inventory after deploy