diff --git a/eve/create-volumes.sh b/eve/create-volumes.sh index 2d1c25190c..67b3f229b4 100755 --- a/eve/create-volumes.sh +++ b/eve/create-volumes.sh @@ -37,7 +37,7 @@ check_pod_is_in_phase() { [[ $phase = "$expected_phase" ]] } -BOOTSTRAP_NODE_NAME=${BOOTSTRAP_NODE_NAME:-$(salt-call --local --out txt grains.get id | cut -c 8-)} +NODE_NAME=${NODE_NAME:-$(salt-call --local --out txt grains.get id | cut -c 8-)} PRODUCT_TXT=${PRODUCT_TXT:-/vagrant/_build/root/product.txt} MAX_TRIES=300 @@ -55,17 +55,18 @@ KUBECONFIG=${KUBECONFIG:-/etc/kubernetes/admin.conf} export KUBECONFIG echo "Creating storage volumes" -sed "s/BOOTSTRAP_NODE_NAME/${BOOTSTRAP_NODE_NAME}/" "${PRODUCT_MOUNT}/examples/prometheus-sparse.yaml" | \ +sed "s/NODE_NAME/${NODE_NAME}/" \ + "${PRODUCT_MOUNT}/examples/prometheus-sparse.yaml" | \ kubectl apply -f - -echo "Waiting for PV 'bootstrap-alertmanager' to be provisioned" -if ! retry "$MAX_TRIES" check_pv_exists bootstrap-alertmanager; then +echo "Waiting for PV '$NODE_NAME-alertmanager' to be provisioned" +if ! retry "$MAX_TRIES" check_pv_exists "$NODE_NAME-alertmanager"; then echo "PV not created" exit 1 fi -echo "Waiting for PV 'bootstrap-prometheus' to be provisioned" -if ! retry "$MAX_TRIES" check_pv_exists bootstrap-prometheus; then +echo "Waiting for PV '$NODE_NAME-prometheus' to be provisioned" +if ! retry "$MAX_TRIES" check_pv_exists "$NODE_NAME-prometheus"; then echo "PV not created" exit 1 fi diff --git a/eve/main.yml b/eve/main.yml index f0d9a4df41..830faaa65f 100644 --- a/eve/main.yml +++ b/eve/main.yml @@ -43,11 +43,12 @@ models: SLEEP_TIME: "5" STABILIZATION_TIME: "30" STATUS: "Running" + EXTRA_OPTS: "" command: > git checkout "%(prop:branch)s" && sudo eve/wait_pods_status.sh --sleep-time "$SLEEP_TIME" --stabilization-time "$STABILIZATION_TIME" - --status "$STATUS" --retry "$RETRY" + --status "$STATUS" --retry "$RETRY" $EXTRA_OPTS usePTY: true haltOnFailure: true - ShellCommand: &wait_pods_running_ssh @@ -56,12 +57,13 @@ models: <<: *_env_wait_pods_running SSH_CONFIG: ssh_config SSH_HOST: bootstrap + EXTRA_OPTS: "" command: > git checkout "%(prop:branch)s" && scp -F "$SSH_CONFIG" eve/wait_pods_status.sh "$SSH_HOST":/tmp/ && ssh -F "$SSH_CONFIG" "$SSH_HOST" sudo /tmp/wait_pods_status.sh --sleep-time "$SLEEP_TIME" --stabilization-time "$STABILIZATION_TIME" - --status "$STATUS" --retry "$RETRY" + --status "$STATUS" --retry "$RETRY" $EXTRA_OPTS usePTY: true haltOnFailure: true - ShellCommand: &build_all @@ -299,12 +301,14 @@ models: ISO_MOUNTPOINT: "/var/tmp/metalk8s" TEST_HOSTS_LIST: "bootstrap" PYTEST_FILTERS: "post and ci" + BOOTSTRAP_BACKUP_ARCHIVE: "" command: > ssh -F ssh_config bastion -- "cd metalk8s && export SSH_CONFIG_FILE=\"${SSH_CONFIG_FILE}\" && export ISO_MOUNTPOINT=\"${ISO_MOUNTPOINT}\" && export TEST_HOSTS_LIST=\"${TEST_HOSTS_LIST}\" && + export BOOTSTRAP_BACKUP_ARCHIVE=\"${BOOTSTRAP_BACKUP_ARCHIVE}\" && tox -e tests -- ${PYTEST_ARGS:-""} -m \"${PYTEST_FILTERS}\"" workdir: build/eve/workers/openstack-multiple-nodes/terraform/ haltOnFailure: true @@ -446,6 +450,17 @@ models: done alwaysRun: true + - ShellCommand: ©_bastion_pub_key_ssh + name: Send bastion public key to nodes + command: > + for host in $HOSTS_LIST; do + ssh -F ssh_config bastion "cat .ssh/bastion.pub" | + ssh -F ssh_config $host "cat >> .ssh/authorized_keys" + done + env: + HOSTS_LIST: bootstrap + workdir: build/eve/workers/openstack-multiple-nodes/terraform/ + stages: pre-merge: worker: @@ -526,6 +541,7 @@ stages: # - single-node-downgrade-centos - single-node-patch-version - single-node-solutions + - bootstrap-restore waitForFinish: True - ShellCommand: *add_final_status_artifact_success - Upload: *upload_final_status_artifact @@ -958,7 +974,8 @@ stages: name: Run slow tests locally env: <<: *_env_local_tests - PYTEST_FILTERS: "post and ci and not multinode and slow" + PYTEST_FILTERS: >- + post and ci and not multinode and slow and not restore # --- Downgrade to version N-1 --- - ShellCommand: <<: *add_archive @@ -1479,7 +1496,7 @@ stages: CLASS_NAME: multi node.centos7 TEST_NAME: 1 bootstrap 1 master,etcd simultaneous_builds: 20 - worker: + worker: &multi_node_worker <<: *single_node_worker flavor: m1.medium path: eve/workers/openstack-multiple-nodes @@ -1510,19 +1527,19 @@ stages: haltOnFailure: true - ShellCommand: *terraform_install - ShellCommand: *terraform_install_check - - ShellCommand: + - ShellCommand: &terraform_init_multi_node <<: *terraform_init workdir: build/eve/workers/openstack-multiple-nodes/terraform/ - - ShellCommand: + - ShellCommand: &terraform_validate_multi_node <<: *terraform_validate workdir: build/eve/workers/openstack-multiple-nodes/terraform/ - - ShellCommand: + - ShellCommand: &terraform_apply_multi_node <<: *terraform_apply workdir: build/eve/workers/openstack-multiple-nodes/terraform/ env: <<: *_env_terraform TF_VAR_nodes_count: "1" - - ShellCommand: + - ShellCommand: &check_ssh_config_bootstrap name: Check SSH config for bootstrap node command: |- if [ ! -f ssh_config ]; then @@ -1536,18 +1553,10 @@ stages: workdir: build/eve/workers/openstack-multiple-nodes/terraform/ haltOnFailure: true - ShellCommand: - # FIXME: find a way to share bastion public key to all spawned - # instances from Terraform - name: Send bastion public key to nodes - command: > - scp -F ssh_config -3 bastion:.ssh/bastion.pub bootstrap:.ssh/ && - ssh -F ssh_config bootstrap - "cat .ssh/bastion.pub >> .ssh/authorized_keys" && - scp -F ssh_config -3 bastion:.ssh/bastion.pub node1:.ssh/ && - ssh -F ssh_config node1 - "cat .ssh/bastion.pub >> .ssh/authorized_keys" - workdir: build/eve/workers/openstack-multiple-nodes/terraform/ - - ShellCommand: + <<: *copy_bastion_pub_key_ssh + env: + HOSTS_LIST: "bootstrap node1" + - ShellCommand: ©_bastion_priv_key_to_bootstrap # FIXME: find a cleaner way with Terraform. name: Send bastion private key to bootstrap command: > @@ -1583,19 +1592,6 @@ stages: /var/tmp/metalk8s/bootstrap.sh --verbose workdir: build/eve/workers/openstack-multiple-nodes/terraform/ haltOnFailure: true - - ShellCommand: &provision_prometheus_alertmanager_storage_ssh - name: Provision Prometheus and AlertManager storage - env: - SSH_CONFIG: >- - eve/workers/openstack-multiple-nodes/terraform/ssh_config - command: > - scp -F $SSH_CONFIG eve/create-volumes.sh bootstrap:/tmp/create-volumes.sh && - ssh -F $SSH_CONFIG bootstrap - sudo env - PRODUCT_TXT=/var/tmp/metalk8s/product.txt - PRODUCT_MOUNT=/var/tmp/metalk8s - /tmp/create-volumes.sh - haltOnFailure: true - ShellCommand: &install_kubectl_bootstrap_ssh name: Install kubectl on the boostrap node command: > @@ -1603,7 +1599,7 @@ stages: sudo yum install -y kubectl --disablerepo=* --enablerepo=metalk8s-kubernetes workdir: build/eve/workers/openstack-multiple-nodes/terraform/ - - ShellCommand: + - ShellCommand: &enable_ipip_ssh name: Enable IPIP env: SSH_CONFIG: >- @@ -1611,7 +1607,7 @@ stages: command: > ssh -F $SSH_CONFIG bootstrap 'bash /home/centos/scripts/enable_ipip.sh' - - ShellCommand: + - ShellCommand: ©_test_source_to_bastion # FIXME: should find a cleaner way to do this (git clone may be # cumbersome, unless we assume the repo is public and don't use # authentication) @@ -1622,10 +1618,12 @@ stages: command: > tar cfp - tox.ini VERSION tests/ buildchain/buildchain/versions.py tools/ | ssh -F $SSH_CONFIG bastion '(mkdir metalk8s; cd "$_"; tar xf -)' - - ShellCommand: + - ShellCommand: &wait_kube_system_pods_running_multi_node <<: *wait_pods_running_ssh + name: Wait for "kube-system" Pods to be in running state env: <<: *_env_wait_pods_running_ssh + EXTRA_OPTS: "--namespace kube-system" SSH_CONFIG: >- eve/workers/openstack-multiple-nodes/terraform/ssh_config - ShellCommand: @@ -1633,30 +1631,51 @@ stages: name: Run installation scenarii on the bastion env: <<: *_env_bastion_tests - PYTEST_FILTERS: "install and ci and multinodes" - - ShellCommand: + PYTEST_FILTERS: "install and ci and multinodes and not node2" + - ShellCommand: &provision_volumes_on_node1 + name: Provision Prometheus and AlertManager storage + env: + SSH_CONFIG: >- + eve/workers/openstack-multiple-nodes/terraform/ssh_config + command: > + scp -F $SSH_CONFIG eve/create-volumes.sh bootstrap:/tmp/create-volumes.sh && + ssh -F $SSH_CONFIG bootstrap + sudo env + PRODUCT_TXT=/var/tmp/metalk8s/product.txt + PRODUCT_MOUNT=/var/tmp/metalk8s + NODE_NAME=node1 + /tmp/create-volumes.sh + haltOnFailure: true + - ShellCommand: &wait_pods_running_multi_node + <<: *wait_pods_running_ssh + env: + <<: *_env_wait_pods_running_ssh + SSH_CONFIG: >- + eve/workers/openstack-multiple-nodes/terraform/ssh_config + - ShellCommand: &multi_node_fast_tests <<: *bastion_tests name: Run fast tests on the bastion env: <<: *_env_bastion_tests PYTEST_FILTERS: "post and ci and not slow" - - ShellCommand: + - ShellCommand: &multi_node_slow_tests <<: *bastion_tests name: Run slow tests on the bastion env: <<: *_env_bastion_tests PYTEST_ARGS: "--suppress-no-test-exit-code" - PYTEST_FILTERS: "post and ci and slow and not bootstrap" - - ShellCommand: + PYTEST_FILTERS: > + post and ci and slow and not bootstrap and not restore + - ShellCommand: &generate_report_multi_node <<: *generate_report_over_ssh - env: + env: &_env_generate_report_multi_node <<: *_env_generate_report_over_ssh HOSTS_LIST: "bootstrap node1" SSH_CONFIG: >- eve/workers/openstack-multiple-nodes/terraform/ssh_config - - ShellCommand: + - ShellCommand: &collect_report_multi_node <<: *collect_report_over_ssh - env: + env: &_env_collect_report_multi_node <<: *_env_collect_report_over_ssh HOSTS_LIST: "bootstrap node1" SSH_CONFIG: >- @@ -1676,10 +1695,197 @@ stages: env: STEP_NAME: multiple-nodes-centos DURATION: "14400" - - ShellCommand: + - ShellCommand: &terraform_destroy_multi_node <<: *terraform_destroy workdir: build/eve/workers/openstack-multiple-nodes/terraform/ + bootstrap-restore: + _metalk8s_internal_info: + junit_info: &_bootstrap_restore_junit_info + TEST_SUITE: install + CLASS_NAME: multi node.centos7 + TEST_NAME: bootstrap restore + simultaneous_builds: 20 + worker: *multi_node_worker + steps: + - Git: *git_pull + - ShellCommand: + <<: *add_final_status_artifact_failed + env: + <<: *_env_final_status_artifact_failed + <<: *_bootstrap_restore_junit_info + STEP_NAME: bootstrap-restore + - ShellCommand: *setup_cache + - ShellCommand: *ssh_ip_setup + - ShellCommand: *retrieve_iso + - ShellCommand: *retrieve_iso_checksum + - ShellCommand: *check_iso_checksum + - ShellCommand: *terraform_install + - ShellCommand: *terraform_install_check + - ShellCommand: *terraform_init_multi_node + - ShellCommand: *terraform_validate_multi_node + - ShellCommand: + <<: *terraform_apply_multi_node + env: &_env_terraform_bootstrap_restore + <<: *_env_terraform + TF_VAR_nodes_count: "2" + - ShellCommand: *check_ssh_config_bootstrap + - ShellCommand: + <<: *copy_bastion_pub_key_ssh + env: + HOSTS_LIST: "bootstrap node1 node2" + - ShellCommand: *copy_bastion_priv_key_to_bootstrap + - ShellCommand: *copy_iso_bootstrap_ssh + - ShellCommand: *create_mountpoint_bootstrap_ssh + - ShellCommand: *mount_iso_bootstrap_ssh + - ShellCommand: *start_bootstrap_ssh + - ShellCommand: *enable_ipip_ssh + - ShellCommand: *copy_test_source_to_bastion + - ShellCommand: *wait_kube_system_pods_running_multi_node + - ShellCommand: + <<: *bastion_tests + name: Run installation scenario on the bastion + env: + <<: *_env_bastion_tests + PYTEST_FILTERS: "install and ci and multinodes" + - ShellCommand: *provision_volumes_on_node1 + - ShellCommand: *wait_pods_running_multi_node + - ShellCommand: *multi_node_fast_tests + - ShellCommand: *multi_node_slow_tests + - SetPropertyFromCommand: + name: Set bootstrap backup archive property + property: bootstrap_backup_archive + command: > + ssh -F ssh_config bootstrap + "sudo find /var/lib/metalk8s -name 'backup_*.tar.gz' + -printf '%f\n' | sort | tail -n1" + workdir: build/eve/workers/openstack-multiple-nodes/terraform/ + haltOnFailure: true + - ShellCommand: + name: Retrieve the backup archive from the bootstrap node + command: > + scp -F ssh_config + bootstrap:"/var/lib/metalk8s/%(prop:bootstrap_backup_archive)s" + /tmp + workdir: build/eve/workers/openstack-multiple-nodes/terraform/ + haltOnFailure: true + - ShellCommand: + name: Destroy the bootstrap node + env: *_env_terraform_bootstrap_restore + command: > + terraform destroy -auto-approve + -target openstack_compute_instance_v2.bootstrap + workdir: build/eve/workers/openstack-multiple-nodes/terraform/ + haltOnFailure: true + - SetPropertyFromCommand: + name: Set node1 etcd container id + property: node1_etcd_container_id + command: > + ssh -F ssh_config node1 + sudo crictl ps -q --label io.kubernetes.pod.namespace=kube-system + --label io.kubernetes.container.name=etcd --state Running + workdir: build/eve/workers/openstack-multiple-nodes/terraform/ + haltOnFailure: true + - SetPropertyFromCommand: + name: Set boostrap node etcd member id + property: bootstrap_etcd_member_id + command: > + ssh -F ssh_config node1 + sudo crictl exec -i "%(prop:node1_etcd_container_id)s" sh -c \" + ETCDCTL_API=3 etcdctl --endpoints https://127.0.0.1:2379 + --cert /etc/kubernetes/pki/etcd/server.crt + --key /etc/kubernetes/pki/etcd/server.key + --cacert /etc/kubernetes/pki/etcd/ca.crt + member list\" | awk -F ', ' '$3 ~ "bootstrap" { print $1 }' + workdir: build/eve/workers/openstack-multiple-nodes/terraform/ + haltOnFailure: true + - ShellCommand: + name: Remove bootstrap node from etcd members + command: > + ssh -F ssh_config node1 + sudo crictl exec -i "%(prop:node1_etcd_container_id)s" sh -c \" + ETCDCTL_API=3 etcdctl --endpoints https://127.0.0.1:2379 + --cert /etc/kubernetes/pki/etcd/server.crt + --key /etc/kubernetes/pki/etcd/server.key + --cacert /etc/kubernetes/pki/etcd/ca.crt + member remove %(prop:bootstrap_etcd_member_id)s\" + workdir: build/eve/workers/openstack-multiple-nodes/terraform/ + haltOnFailure: true + - ShellCommand: + name: Remove bootstrap node object + command: > + ssh -F ssh_config node1 + sudo kubectl --kubeconfig=/etc/kubernetes/admin.conf delete + node --selector="node-role.kubernetes.io/bootstrap" + workdir: build/eve/workers/openstack-multiple-nodes/terraform/ + haltOnFailure: true + - ShellCommand: + name: Create a new bootstrap node + env: *_env_terraform_bootstrap_restore + command: + terraform apply -auto-approve -refresh + -target openstack_compute_instance_v2.bootstrap + -target null_resource.ssh_config + workdir: build/eve/workers/openstack-multiple-nodes/terraform/ + haltOnFailure: true + - ShellCommand: *copy_bastion_pub_key_ssh + - ShellCommand: + name: Copy the backup archive to the new bootstrap node + command: > + scp -F ssh_config + "/tmp/%(prop:bootstrap_backup_archive)s" bootstrap:/tmp + workdir: build/eve/workers/openstack-multiple-nodes/terraform/ + haltOnFailure: true + - ShellCommand: + <<: *copy_iso_bootstrap_ssh + name: Copy ISO to the new bootstrap node + - ShellCommand: + <<: *create_mountpoint_bootstrap_ssh + name: Create mountpoint on the new bootstrap node + - ShellCommand: + <<: *mount_iso_bootstrap_ssh + name: Mount ISO image on the new bootstrap node + - ShellCommand: + <<: *bastion_tests + name: Run restore tests on the bastion + # NOTE: Increase timeout as restore may take some time + # Since we use pytest we do not have any output until restore totaly + # finished + timeout: 1800 + env: + <<: *_env_bastion_tests + PYTEST_FILTERS: "restore" + BOOTSTRAP_BACKUP_ARCHIVE: "/tmp/%(prop:bootstrap_backup_archive)s" + - ShellCommand: *wait_pods_running_multi_node + - ShellCommand: *multi_node_fast_tests + - ShellCommand: *multi_node_slow_tests + - ShellCommand: + <<: *generate_report_multi_node + env: + <<: *_env_generate_report_multi_node + HOSTS_LIST: "bootstrap node1 node2" + - ShellCommand: + <<: *collect_report_multi_node + env: + <<: *_env_collect_report_multi_node + HOSTS_LIST: "bootstrap node1 node2" + STEP_NAME: bootstrap-restore + - Upload: *upload_report_artifacts + - ShellCommand: + <<: *add_final_status_artifact_success + env: + <<: *_env_final_status_artifact_success + <<: *_bootstrap_restore_junit_info + STEP_NAME: bootstrap-restore + - Upload: *upload_final_status_artifact + - ShellCommand: + <<: *wait_debug + timeout: 14400 + env: + STEP_NAME: bootstrap-restore + DURATION: "14400" + - ShellCommand: *terraform_destroy_multi_node + single-node-solutions: _metalk8s_internal_info: junit_info: &_solutions_single-node_junit_info @@ -1728,10 +1934,18 @@ stages: <<: *start_bootstrap_ssh workdir: build/eve/workers/openstack-single-node-with-cypress/terraform/ - ShellCommand: - <<: *provision_prometheus_alertmanager_storage_ssh + name: Provision Prometheus and AlertManager storage env: SSH_CONFIG: >- eve/workers/openstack-single-node-with-cypress/terraform/ssh_config + command: > + scp -F $SSH_CONFIG eve/create-volumes.sh bootstrap:/tmp/create-volumes.sh && + ssh -F $SSH_CONFIG bootstrap + sudo env + PRODUCT_TXT=/var/tmp/metalk8s/product.txt + PRODUCT_MOUNT=/var/tmp/metalk8s + /tmp/create-volumes.sh + haltOnFailure: true - ShellCommand: <<: *install_kubectl_bootstrap_ssh workdir: build/eve/workers/openstack-single-node-with-cypress/terraform/ diff --git a/eve/testrail_description_file.yaml b/eve/testrail_description_file.yaml index 37957e2bcb..a560646300 100644 --- a/eve/testrail_description_file.yaml +++ b/eve/testrail_description_file.yaml @@ -47,3 +47,12 @@ Install: CentOs 7 tests cases: Solutions: {} + Multi Node: + description: >- + Multiple nodes test environment + sub_sections: + CentOs7: + description: >- + CentOs 7 tests + cases: + Bootstrap restore: {} diff --git a/examples/prometheus-sparse.yaml b/examples/prometheus-sparse.yaml index f55d5cd9ef..da54db6cec 100644 --- a/examples/prometheus-sparse.yaml +++ b/examples/prometheus-sparse.yaml @@ -5,9 +5,9 @@ apiVersion: storage.metalk8s.scality.com/v1alpha1 kind: Volume metadata: - name: bootstrap-prometheus + name: NODE_NAME-prometheus spec: - nodeName: BOOTSTRAP_NODE_NAME + nodeName: NODE_NAME storageClassName: metalk8s-prometheus sparseLoopDevice: size: 10Gi @@ -19,9 +19,9 @@ spec: apiVersion: storage.metalk8s.scality.com/v1alpha1 kind: Volume metadata: - name: bootstrap-alertmanager + name: NODE_NAME-alertmanager spec: - nodeName: BOOTSTRAP_NODE_NAME + nodeName: NODE_NAME storageClassName: metalk8s-prometheus sparseLoopDevice: size: 1Gi diff --git a/scripts/restore.sh.in b/scripts/restore.sh.in index 4dab9195e0..6c029fa2b5 100755 --- a/scripts/restore.sh.in +++ b/scripts/restore.sh.in @@ -273,6 +273,16 @@ reconfigure_nodes() { # See htpps://github.com/saltstack/salt/issues/20590 "${SALT_MASTER_CALL[@]}" salt '*' saltutil.refresh_pillar && sleep 20 "${SALT_MASTER_CALL[@]}" salt '*' mine.update + + "${SALT_MASTER_CALL[@]}" salt -L "$non_bootstrap" --state-output=mixed \ + state.sls metalk8s.kubernetes.apiserver-proxy \ + saltenv=metalk8s-@@VERSION + + # For kubelet 1.16+ in case of connection failure kubelet will not + # reconnect until restart (since we are in restore we have lose the old + # bootstrap node so connection may have fail) + # See https://github.com/kubernetes/kubernetes/issues/87615 + "${SALT_MASTER_CALL[@]}" salt -L "$non_bootstrap" service.restart kubelet } highstate_bootstrap() { @@ -283,7 +293,7 @@ highstate_bootstrap() { ) "${SALT_CALL}" --retcode-passthrough --state-output=mixed state.sls \ - metalk8s.salt.master.certs.etcd-client \ + metalk8s.salt.master.certs \ saltenv=metalk8s-@@VERSION "${SALT_MASTER_CALL[@]}" salt-run --state-output=mixed state.orchestrate \ metalk8s.orchestrate.deploy_node \ @@ -297,6 +307,18 @@ reconfigure_k8s_obj() { "${SALT_MASTER_CALL[@]}" salt-run --state-output=mixed state.orchestrate \ metalk8s.addons.nginx-ingress-control-plane.deployed \ saltenv=metalk8s-@@VERSION + + "${SALT_MASTER_CALL[@]}" salt-run --state-output=mixed state.orchestrate \ + metalk8s.addons.dex.deployed \ + saltenv=metalk8s-@@VERSION + + "${SALT_MASTER_CALL[@]}" salt-run --state-output=mixed state.orchestrate \ + metalk8s.addons.prometheus-operator.deployed \ + saltenv=metalk8s-@@VERSION + + "${SALT_MASTER_CALL[@]}" salt-run --state-output=mixed state.orchestrate \ + metalk8s.addons.ui.deployed \ + saltenv=metalk8s-@@VERSION } if [ ! -f "$BACKUP_ARCHIVE" ]; then diff --git a/tests/conftest.py b/tests/conftest.py index d3ac39191c..61d508496f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -384,6 +384,14 @@ def dex_failed_login(username, password, assert response.headers.get('locaction') is None +@then(parsers.parse('node "{node_name}" is a member of etcd cluster')) +def check_etcd_role(ssh_config, k8s_client, node_name): + """Check if the given node is a member of the etcd cluster.""" + etcd_member_list = etcdctl(k8s_client, ['member', 'list'], ssh_config) + assert node_name in etcd_member_list, \ + 'node {} is not part of the etcd cluster'.format(node_name) + + # }}} # Helpers {{{ @@ -444,4 +452,25 @@ def _dex_login(username, password, control_plane_ip, request_retry_session): return result +def etcdctl(k8s_client, command, ssh_config): + """Run an etcdctl command inside the etcd container.""" + name = 'etcd-{}'.format( + utils.get_node_name('bootstrap', ssh_config) + ) + + etcd_command = [ + 'etcdctl', + '--endpoints', 'https://localhost:2379', + '--cacert', '/etc/kubernetes/pki/etcd/ca.crt', + '--key', '/etc/kubernetes/pki/etcd/server.key', + '--cert', '/etc/kubernetes/pki/etcd/server.crt', + ] + command + output = kubernetes.stream.stream( + k8s_client.connect_get_namespaced_pod_exec, + name=name, namespace='kube-system', + command=etcd_command, + stderr=True, stdin=False, stdout=True, tty=False + ) + return output + # }}} diff --git a/tests/install/features/expansion.feature b/tests/install/features/expansion.feature index 00b8d64fa8..abe8bc75bb 100644 --- a/tests/install/features/expansion.feature +++ b/tests/install/features/expansion.feature @@ -1,7 +1,9 @@ @install @ci @local @multinodes Feature: Cluster expansion - Scenario: Add one node to the cluster + Background: Given the Kubernetes API is available + + Scenario: Add one node to the cluster When we declare a new "control-plane" node on host "node1" Then node "node1" is registered in Kubernetes And node "node1" status is "NotReady" @@ -14,3 +16,18 @@ Feature: Cluster expansion And we have 1 running pod labeled 'k8s-app=kube-proxy' in namespace 'kube-system' on node 'node1' And we have 1 running pod labeled 'component=etcd' in namespace 'kube-system' on node 'node1' And node "node1" is a member of etcd cluster + + @node2 + Scenario: Add a second node to the cluster + When we declare a new "control-plane" node on host "node2" + Then node "node2" is registered in Kubernetes + And node "node2" status is "NotReady" + When we deploy the node "node2" + Then node "node2" status is "Ready" + And we have 1 running pod labeled 'component=kube-controller-manager' in namespace 'kube-system' on node 'node2' + And we have 1 running pod labeled 'component=kube-scheduler' in namespace 'kube-system' on node 'node2' + And we have 1 running pod labeled 'component=kube-apiserver' in namespace 'kube-system' on node 'node2' + And we have 1 running pod labeled 'k8s-app=calico-node' in namespace 'kube-system' on node 'node2' + And we have 1 running pod labeled 'k8s-app=kube-proxy' in namespace 'kube-system' on node 'node2' + And we have 1 running pod labeled 'component=etcd' in namespace 'kube-system' on node 'node2' + And node "node2" is a member of etcd cluster diff --git a/tests/install/steps/test_expansion.py b/tests/install/steps/test_expansion.py index 7d02b625eb..9b4daf2701 100644 --- a/tests/install/steps/test_expansion.py +++ b/tests/install/steps/test_expansion.py @@ -13,7 +13,12 @@ # Scenarios @scenario('../features/expansion.feature', 'Add one node to the cluster') -def test_cluster_expansion(host): +def test_cluster_expansion_1_node(host): + pass + + +@scenario('../features/expansion.feature', 'Add a second node to the cluster') +def test_cluster_expansion_2_nodes(host): pass # When {{{ @@ -82,14 +87,6 @@ def _check_node_status(): ) -@then(parsers.parse('node "{node_name}" is a member of etcd cluster')) -def check_etcd_role(ssh_config, k8s_client, node_name): - """Check if the given node is a member of the etcd cluster.""" - etcd_member_list = etcdctl(k8s_client, ['member', 'list'], ssh_config) - assert node_name in etcd_member_list, \ - 'node {} is not part of the etcd cluster'.format(node_name) - - # }}} # Helpers {{{ @@ -117,25 +114,4 @@ def node_from_manifest(manifest): manifest['api_version'] = manifest.pop('apiVersion') return k8s.client.V1Node(**manifest) -def etcdctl(k8s_client, command, ssh_config): - """Run an etcdctl command inside the etcd container.""" - name = 'etcd-{}'.format( - utils.get_node_name('bootstrap', ssh_config) - ) - - etcd_command = [ - 'etcdctl', - '--endpoints', 'https://localhost:2379', - '--cacert', '/etc/kubernetes/pki/etcd/ca.crt', - '--key', '/etc/kubernetes/pki/etcd/server.key', - '--cert', '/etc/kubernetes/pki/etcd/server.crt', - ] + command - output = k8s.stream.stream( - k8s_client.connect_get_namespaced_pod_exec, - name=name, namespace='kube-system', - command=etcd_command, - stderr=True, stdin=False, stdout=True, tty=False - ) - return output - # }}} diff --git a/tests/post/features/authentication.feature b/tests/post/features/authentication.feature index cbf427e6ed..c515f7402d 100644 --- a/tests/post/features/authentication.feature +++ b/tests/post/features/authentication.feature @@ -7,7 +7,7 @@ Feature: Authentication is up and running Scenario: Expected Pods Given the Kubernetes API is available And pods with label 'release=nginx-ingress-control-plane' are 'Ready' - Then we have 2 running pod labeled 'app.kubernetes.io/name=dex' in namespace 'metalk8s-auth' on node 'bootstrap' + Then we have 2 running pod labeled 'app.kubernetes.io/name=dex' in namespace 'metalk8s-auth' Scenario: Reach the OpenID Config Given the Kubernetes API is available diff --git a/tests/post/features/pods_alive.feature b/tests/post/features/pods_alive.feature index fe8a3695ab..bd19a7bbe3 100644 --- a/tests/post/features/pods_alive.feature +++ b/tests/post/features/pods_alive.feature @@ -12,7 +12,7 @@ Feature: Pods should be alive And we have 1 running pod labeled 'component=etcd' in namespace 'kube-system' on node 'bootstrap' And we have 1 running pod labeled 'k8s-app=kube-proxy' in namespace 'kube-system' on node 'bootstrap' And we have 1 running pod labeled 'k8s-app=calico-node' in namespace 'kube-system' on node 'bootstrap' - And we have 2 running pod labeled 'k8s-app=kube-dns' in namespace 'kube-system' on node 'bootstrap' + And we have 2 running pod labeled 'k8s-app=kube-dns' in namespace 'kube-system' And we have 1 running pod labeled 'app=salt-master' in namespace 'kube-system' on node 'bootstrap' And we have 1 running pod labeled 'app=repositories' in namespace 'kube-system' on node 'bootstrap' diff --git a/tests/post/features/restore.feature b/tests/post/features/restore.feature new file mode 100644 index 0000000000..bf075c1e01 --- /dev/null +++ b/tests/post/features/restore.feature @@ -0,0 +1,14 @@ +@post @ci @local @slow @restore +Feature: Restore + Scenario: Restore the bootstrap node + When we run the restore + Then the Kubernetes API is available + And we have 1 running pod labeled 'component=kube-controller-manager' in namespace 'kube-system' on node 'bootstrap' + And we have 1 running pod labeled 'component=kube-scheduler' in namespace 'kube-system' on node 'bootstrap' + And we have 1 running pod labeled 'component=kube-apiserver' in namespace 'kube-system' on node 'bootstrap' + And we have 1 running pod labeled 'k8s-app=calico-node' in namespace 'kube-system' on node 'bootstrap' + And we have 1 running pod labeled 'k8s-app=kube-proxy' in namespace 'kube-system' on node 'bootstrap' + And we have 1 running pod labeled 'component=etcd' in namespace 'kube-system' on node 'bootstrap' + And we have 1 running pod labeled 'app=salt-master' in namespace 'kube-system' on node 'bootstrap' + And we have 1 running pod labeled 'app=repositories' in namespace 'kube-system' on node 'bootstrap' + And node "bootstrap" is a member of etcd cluster diff --git a/tests/post/steps/test_restore.py b/tests/post/steps/test_restore.py new file mode 100644 index 0000000000..a769fa8ca7 --- /dev/null +++ b/tests/post/steps/test_restore.py @@ -0,0 +1,36 @@ +import os +import testinfra + +from pytest_bdd import scenario, when + +from tests import utils + + +# Scenarios +@scenario('../features/restore.feature', 'Restore the bootstrap node') +def test_restore(host): + pass + + +# When +@when('we run the restore') +def run_restore(request, host, ssh_config): + iso_root = request.config.getoption("--iso-root") + + backup_archive = os.environ.get('BOOTSTRAP_BACKUP_ARCHIVE') + assert backup_archive, \ + "No BOOTSTRAP_BACKUP_ARCHIVE environment variable defined" + + apiserver_node_ip = utils.get_grain( + testinfra.get_host('node1', ssh_config=ssh_config), + 'metalk8s:control_plane_ip' + ) + + with host.sudo(): + res = host.run( + "%s/restore.sh --backup-file %s --apiserver-node-ip %s", + str(iso_root), + backup_archive, + apiserver_node_ip, + ) + assert res.rc == 0, res.stdout diff --git a/tox.ini b/tox.ini index 4fb2898c3e..ea65061c3e 100644 --- a/tox.ini +++ b/tox.ini @@ -94,6 +94,7 @@ passenv = SSH_CONFIG_FILE TEST_HOSTS_LIST ISO_MOUNTPOINT + BOOTSTRAP_BACKUP_ARCHIVE setenv = VAGRANT_CWD={toxinidir} commands_pre = @@ -111,6 +112,7 @@ description = deps = {[testenv:tests]deps} passenv = ISO_MOUNTPOINT + BOOTSTRAP_BACKUP_ARCHIVE commands = pytest \ --iso-root={env:ISO_MOUNTPOINT:_build/root} \ @@ -134,6 +136,7 @@ markers = authentication: tag a BDD feature as related to authentication csc: tag a BDD feature related to Cluster and Service configuration solution: tag a BDD feature as related to solution + restore: tag a BDD feature as related to bootstrap node recovery filterwarnings = ignore:encode_point has been deprecated on EllipticCurvePublicNumbers and will be removed in a future version. Please use EllipticCurvePublicKey.public_bytes to obtain both compressed and uncompressed point encoding.:UserWarning ignore:Support for unsafe construction of public numbers from encoded data will be removed in a future version. Please use EllipticCurvePublicKey.from_encoded_point:UserWarning