From 5259e9c75dd3ee5c0dff411a3f1b8faca34dc40a Mon Sep 17 00:00:00 2001 From: Rei1010 <56469400+Rei1010@users.noreply.github.com> Date: Thu, 9 Jan 2025 10:22:44 +0800 Subject: [PATCH] Implement initial E2E test suite setup (#775) Signed-off-by: wen.rui Signed-off-by: haitwang-cloud --- .github/workflows/auto-release.yaml | 1 + .github/workflows/call-e2e.yaml | 70 ++++++++++++++-- .github/workflows/ci.yaml | 122 ++++++++++++++++++++++++---- Makefile | 12 +++ Makefile.defs | 9 +- charts/Makefile | 7 +- go.mod | 7 +- go.sum | 6 ++ hack/deploy-helm.sh | 65 +++++++++++++++ hack/e2e-test-setup.sh | 96 ++++++++++++++++++++++ hack/e2e-test.sh | 53 ++++++++++++ hack/util.sh | 43 ++++++++++ test/e2e/node/test_node.go | 93 +++++++++++++++++++++ test/e2e/node/test_suite_test.go | 35 ++++++++ test/e2e/pod/test_pod.go | 118 +++++++++++++++++++++++++++ test/e2e/pod/test_suite_test.go | 35 ++++++++ test/e2e/test_suite_test.go | 38 +++++++++ test/utils/common.go | 89 ++++++++++++++++++++ test/utils/config.go | 41 ++++++++++ test/utils/event.go | 50 ++++++++++++ test/utils/node.go | 73 +++++++++++++++++ test/utils/pod.go | 107 ++++++++++++++++++++++++ 22 files changed, 1144 insertions(+), 26 deletions(-) create mode 100755 hack/deploy-helm.sh create mode 100755 hack/e2e-test-setup.sh create mode 100755 hack/e2e-test.sh create mode 100644 test/e2e/node/test_node.go create mode 100644 test/e2e/node/test_suite_test.go create mode 100644 test/e2e/pod/test_pod.go create mode 100644 test/e2e/pod/test_suite_test.go create mode 100644 test/e2e/test_suite_test.go create mode 100644 test/utils/common.go create mode 100644 test/utils/config.go create mode 100644 test/utils/event.go create mode 100644 test/utils/node.go create mode 100644 test/utils/pod.go diff --git a/.github/workflows/auto-release.yaml b/.github/workflows/auto-release.yaml index dbd77a630..4a9ca44e2 100644 --- a/.github/workflows/auto-release.yaml +++ b/.github/workflows/auto-release.yaml @@ -134,6 +134,7 @@ jobs: uses: ./.github/workflows/call-e2e.yaml with: ref: ${{ needs.ensure-tag.outputs.tag }} + type: "release" # excute a compatibility test when hami release release-e2e-upgrade: diff --git a/.github/workflows/call-e2e.yaml b/.github/workflows/call-e2e.yaml index 750b8794b..c6fd17f3d 100644 --- a/.github/workflows/call-e2e.yaml +++ b/.github/workflows/call-e2e.yaml @@ -4,15 +4,75 @@ on: workflow_call: inputs: ref: + description: 'Reference id to run tests' required: true type: string -permissions: write-all + type: + description: 'E2E type' + required: true + type: string + default: pullrequest jobs: - e2e: - runs-on: ubuntu-latest + e2e-test: + strategy: + matrix: + include: + - device: nvidia + type: tesla-p4 +# - device: nvidia +# type: rtx-4090 +# - device: huawei +# type: ascend-910b + runs-on: [ "${{ matrix.device }}", "${{ matrix.type }}" ] + environment: ${{ matrix.device }} + env: + E2E_TYPE: ${{ inputs.type }} steps: + - name: checkout code + uses: actions/checkout@v4 + + - name: install Go + uses: actions/setup-go@v5 + with: + go-version: "1.21" + + - name: setup e2e env + run: | + make e2e-env-setup + + - name: download hami helm + if: inputs.type == 'pullrequest' + uses: actions/download-artifact@v4 + with: + name: chart_package_artifact + path: charts/ + + - name: download hami image + if: inputs.type == 'pullrequest' + uses: actions/download-artifact@v4 + with: + name: hami-image + path: ./image + + - name: load e2e image + if: inputs.type == 'pullrequest' + run: | + echo "Loading Docker image from image.tar..." + if [ -z "${VSPHERE_GPU_VM_IP}" ]; then + echo "Error: VSPHERE_GPU_VM_IP is not defined!" + exit 1 + fi + scp ./image/image.tar root@$VSPHERE_GPU_VM_IP:/home/ + ssh root@$VSPHERE_GPU_VM_IP "nerdctl load -i /home/image.tar" + ssh root@$VSPHERE_GPU_VM_IP "nerdctl image ls | grep hami" + + - name: deploy hami helm + env: + HAMI_VERSION: ${{ inputs.ref }} + run: | + make helm-deploy + - name: e2e test - # https://github.com/actions/virtual-environments/issues/709 run: | - echo "Need to add e2e test" + make e2e-test diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a2fde7613..78c6c3d7a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -9,7 +9,7 @@ on: branches-ignore: - 'dependabot/**' permissions: - contents: read # for actions/checkout to fetch code + contents: read # for actions/checkout to fetch code env: REGISTRY: docker.io @@ -38,10 +38,11 @@ jobs: run: make lint - name: import alias run: hack/verify-import-aliases.sh + test: name: Unit test needs: lint # rely on lint successful completion - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - name: Checkout code uses: actions/checkout@v4 @@ -71,11 +72,29 @@ jobs: fail_ci_if_error: false verbose: true + get_version: + name: get_version + runs-on: ubuntu-22.04 + outputs: + version: ${{ steps.parse_version.outputs.version }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Parse the version + id: parse_version + run: | + value=$(make -f Makefile.defs print-version) + suffix="$(git rev-parse --short HEAD)" + tag="${value}-${suffix}" + echo "Generated tag: ${tag}" + echo "version=${tag}" >> $GITHUB_OUTPUT build: - name: compile - runs-on: ubuntu-latest - needs: test # rely on test successful completion + name: Compile + runs-on: ubuntu-22.04 + environment: nvidia + needs: [ test, get_version ] steps: - uses: actions/checkout@master - name: Free disk space @@ -88,22 +107,16 @@ jobs: echo "=========after clean up, the left CI disk space" df -h - - name: Get the version - id: get_version - run: | - tag="$(git rev-parse --short HEAD)" - echo ::set-output name=VERSION::${tag} - - name: Checkout code uses: actions/checkout@v4 with: fetch-depth: 0 - + - name: Checkout submodule uses: Mushus/checkout-submodule@v1.0.1 with: basePath: # optional, default is . - submodulePath: libvgpu + submodulePath: libvgpu - name: Set up QEMU uses: docker/setup-qemu-action@v3 @@ -124,13 +137,88 @@ jobs: with: context: . file: ${{ env.IMAGE_ROOT_PATH }}/Dockerfile - labels: ${{ steps.meta.outputs.labels }} - platforms: ${{ env.BUILD_PLATFORM }} + labels: ${{ needs.get_version.outputs.version }} build-args: | - VERSION=${{ steps.get_version.outputs.VERSION }} + VERSION=${{ needs.get_version.outputs.version }} GOLANG_IMAGE=golang:1.22.5-bullseye NVIDIA_IMAGE=nvidia/cuda:12.2.0-devel-ubuntu20.04 DEST_DIR=/usr/local - tags: ${{ steps.meta.outputs.tags }} + tags: ${{ env.REGISTRY }}/${{ env.IMAGE_REPO }}:${{ needs.get_version.outputs.version }} push: false + load: true github-token: ${{ env.REGISTER_PASSWORD }} + + - name: List images + run: | + docker images + + - name: Save Docker image to image.tar + run: | + docker save ${{ env.REGISTRY }}/${{ env.IMAGE_REPO }}:${{ needs.get_version.outputs.version }} -o image.tar + + - name: Upload image.tar as artifact + uses: actions/upload-artifact@v4 + with: + name: hami-image + path: image.tar + retention-days: 5 + if-no-files-found: error + + # packages tgz from /charts of original branch, deploy to /charts of target branch + package_chart: + runs-on: ubuntu-22.04 + needs: [ test, get_version ] + env: + HELM_VERSION: v3.8.1 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ needs.get_ref.outputs.ref }} + + - name: Configure Git + run: | + git config user.name "$GITHUB_ACTOR" + git config user.email "$GITHUB_ACTOR@users.noreply.github.com" + + - name: Install Helm + uses: azure/setup-helm@v4 + with: + version: ${{ env.HELM_VERSION }} + + - name: Lint helm chart + run: | + make lint_chart + + - name: Package Chart + continue-on-error: false + env: + VERSION: ${{ needs.get_version.outputs.version }} + run: | + cd charts + make clean + make + if ! ls *.tgz &>/dev/null ; then + echo "failed to generate chart" + exit 1 + fi + cd .. + mkdir -p tmp + mv charts/*.tgz tmp + + - name: Upload Artifact + uses: actions/upload-artifact@v4.4.3 + with: + name: chart_package_artifact + path: tmp/* + retention-days: 5 + if-no-files-found: error + + # execute a full e2e test when hami code merge + e2e_test: + uses: ./.github/workflows/call-e2e.yaml + needs: [ package_chart, get_version, build ] + with: + ref: ${{ needs.get_version.outputs.version }} + type: "pullrequest" diff --git a/Makefile b/Makefile index 470c9d0d4..3dd3709b2 100644 --- a/Makefile +++ b/Makefile @@ -75,3 +75,15 @@ lint_chart: aquasec/trivy:$(TRIVY_VERSION) config --exit-code 1 --severity $(LINT_TRIVY_SEVERITY_LEVEL) /tmp/src/charts ; \ (($$?==0)) || { echo "error, failed to check chart trivy" && exit 1 ; } ; \ echo "chart trivy check: pass" + +.PHONY: e2e-env-setup +e2e-env-setup: + ./hack/e2e-test-setup.sh + +.PHONY: helm-deploy +helm-deploy: + ./hack/deploy-helm.sh "${E2E_TYPE}" "${KUBE_CONF}" "${HAMI_VERSION}" + +.PHONY: e2e-test +e2e-test: + ./hack/e2e-test.sh "${E2E_TYPE}" "${KUBE_CONF}" diff --git a/Makefile.defs b/Makefile.defs index 3d9a93759..ab0248161 100644 --- a/Makefile.defs +++ b/Makefile.defs @@ -13,9 +13,16 @@ TARGETARCH ?= amd64 DESTDIR_BIN ?= $(ROOT_DIR)/output/$(TARGETARCH)/bin DESTDIR_BASH_COMPLETION ?= $(ROOT_DIR)/output/$(TARGETARCH)/bash-completion -VERSION = $(shell cat $(dir $(lastword $(MAKEFILE_LIST)))/VERSION) +VERSION?="" +ifeq ($(VERSION), "") + VERSION=$(shell cat $(dir $(lastword $(MAKEFILE_LIST)))/VERSION) +endif ECHO_GEN=echo " GEN $(RELATIVE_DIR)/" LINT_TRIVY_SEVERITY_LEVEL ?= CRITICAL TRIVY_VERSION=0.36.0 + +.PHONY: print-version +print-version: + @echo $(VERSION) diff --git a/charts/Makefile b/charts/Makefile index 993bd946f..7ed4187dc 100644 --- a/charts/Makefile +++ b/charts/Makefile @@ -1,4 +1,5 @@ # get VERSION +.DEFAULT_GOAL := all include ../Makefile.defs VERSION_REGEX := '[vV]*[0-9]\+\.[0-9]\+\.[0-9]\+.*' @@ -11,6 +12,7 @@ all: update-versions lint package #update version in chart update-versions: $(ECHO_GEN) " Updating Chart version to $(VERSION)" + echo "VERSION=$(VERSION)" echo "VERSION_MAJOR=$(VERSION_MAJOR)" echo "GIT_VERSION=$(GIT_VERSION)" echo "FULL_BUILD_VERSION=$(FULL_BUILD_VERSION)" @@ -22,10 +24,11 @@ update-versions: sed -i 's/version: "*'$(VERSION_REGEX)'"*/version: "'$$hami_version'"/g' $(VALUES_FILE) lint: update-versions - helm lint --with-subcharts --values ./hami/values.yaml ./hami + helm lint --with-subcharts --values ./hami/values.yaml ./hami --debug package: lint - helm package ./hami + helm package ./hami --debug clean: rm -f *.tgz + diff --git a/go.mod b/go.mod index 03b9204e7..201b6773a 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,8 @@ require ( github.com/fsnotify/fsnotify v1.7.0 github.com/google/uuid v1.6.0 github.com/julienschmidt/httprouter v1.3.0 + github.com/onsi/ginkgo/v2 v2.17.1 + github.com/onsi/gomega v1.32.0 github.com/opencontainers/runtime-spec v1.2.0 github.com/prometheus/client_golang v1.18.0 github.com/sirupsen/logrus v1.9.3 @@ -23,6 +25,7 @@ require ( golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d google.golang.org/grpc v1.63.2 google.golang.org/protobuf v1.33.0 + gopkg.in/yaml.v2 v2.4.0 gotest.tools/v3 v3.5.1 k8s.io/api v0.29.3 k8s.io/apimachinery v0.29.3 @@ -45,12 +48,14 @@ require ( github.com/go-openapi/jsonpointer v0.20.2 // indirect github.com/go-openapi/jsonreference v0.20.4 // indirect github.com/go-openapi/swag v0.22.9 // indirect + github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/google/gnostic-models v0.6.8 // indirect github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.2.0 // indirect + github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect github.com/imdario/mergo v0.3.16 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect @@ -79,7 +84,6 @@ require ( google.golang.org/appengine v1.6.8 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de // indirect gopkg.in/inf.v0 v0.9.1 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/kube-openapi v0.0.0-20240227032403-f107216b40e2 // indirect k8s.io/utils v0.0.0-20240102154912-e7106e64919e // indirect @@ -92,6 +96,7 @@ require ( replace ( github.com/Project-HAMi/HAMi/pkg/api => ./pkg/api github.com/Project-HAMi/HAMi/pkg/device-plugin => ./pkg/device-plugin + github.com/Project-HAMi/HAMi/test/utils => ./test/utils k8s.io/api => k8s.io/api v0.28.3 k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.28.3 k8s.io/apimachinery => k8s.io/apimachinery v0.28.3 diff --git a/go.sum b/go.sum index 6a572e75c..8a5308e86 100644 --- a/go.sum +++ b/go.sum @@ -14,6 +14,9 @@ github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/container-orchestrated-devices/container-device-interface v0.5.4-0.20230111111500-5b3b5d81179a h1:sP3PcgyIkRlHqfF3Jfpe/7G8kf/qpzG4C8r94y9hLbE= github.com/container-orchestrated-devices/container-device-interface v0.5.4-0.20230111111500-5b3b5d81179a/go.mod h1:xMRa4fJgXzSDFUCURSimOUgoSc+odohvO3uXT9xjqH0= github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4= @@ -66,6 +69,7 @@ github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= +github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= @@ -133,6 +137,7 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= @@ -186,6 +191,7 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/hack/deploy-helm.sh b/hack/deploy-helm.sh new file mode 100755 index 000000000..13ad3b8ca --- /dev/null +++ b/hack/deploy-helm.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# Copyright 2024 The HAMi Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -o errexit +set -o nounset +set -o pipefail + +set -x + +E2E_TYPE=${1:-"pullrequest"} +KUBE_CONF=${2:-""} +HELM_VER=${3:-"v2.4.1"} +HELM_NAME=${4:-"hami-charts"} +HELM_REPO=${5:-"https://project-hami.github.io/HAMi/"} +TARGET_NS=${6:-"hami-system"} +HAMI_ALIAS="hami" +HELM_SOURCE="" + +REPO_ROOT=$(dirname "${BASH_SOURCE[0]}")/.. +cd "${REPO_ROOT}" + +source "${REPO_ROOT}"/hack/util.sh + +# install helm +echo -n "Preparing: 'helm' existence check - " +if util::cmd_exist helm; then + echo "passed" +else + echo "installing helm" + util::install_helm +fi + +# Run e2e +if [ "${E2E_TYPE}" == "pullrequest" ] ; then + echo "E2E Type is: ${E2E_TYPE}" + HELM_SOURCE="charts/*.tgz" +elif [ "${E2E_TYPE}" == "release" ]; then + HELM_SOURCE="${HELM_NAME}"/"${HAMI_ALIAS}" +else + echo "Invalid E2E Type: ${E2E_TYPE}" + return 1 +fi + +# add repo locally +util::exec_cmd helm repo add "${HELM_NAME}" "${HELM_REPO}" --force-update --kubeconfig "${KUBE_CONF}" +util::exec_cmd helm repo update --kubeconfig "${KUBE_CONF}" + +# install or upgrade +util::exec_cmd helm --debug upgrade --install --create-namespace --cleanup-on-fail \ + "${HAMI_ALIAS}" "${HELM_SOURCE}" -n "${TARGET_NS}" \ + --set devicePlugin.passDeviceSpecsEnabled=false \ + --version "${HELM_VER}" --wait --timeout 20m --kubeconfig "${KUBE_CONF}" + diff --git a/hack/e2e-test-setup.sh b/hack/e2e-test-setup.sh new file mode 100755 index 000000000..e555ca42c --- /dev/null +++ b/hack/e2e-test-setup.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash +# Copyright 2024 The HAMi Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -o errexit +set -o nounset +set -o pipefail + +set -x + + +REPO_ROOT=$(dirname "${BASH_SOURCE[0]}")/.. +cd "${REPO_ROOT}" + +source "${REPO_ROOT}"/hack/util.sh + +function install_govc() { + local govc_version="v0.37.3" + local govc_tar_url="https://github.com/vmware/govmomi/releases/download/${govc_version}/govc_Linux_x86_64.tar.gz" + + wget -q $govc_tar_url || { echo "Failed to download govc"; exit 1; } + tar -zxvf govc_Linux_x86_64.tar.gz + mv govc /usr/local/bin/ + govc version +} + +function govc_poweron_vm() { + local vm_name=${1:-""} + local vm_ip=${2:-""} + if [[ -z "$vm_name" ]]; then + echo "Error: VM name is required" + return 1 + fi + + govc vm.power -on "$vm_name" + echo -e "\033[35m === $vm_name: power turned on === \033[0m" + until [[ $(govc vm.info "$vm_name" | grep -c poweredOn) -eq 1 ]]; do + sleep 5 + done + + util::wait_ip_reachable "$vm_ip" +} + +function govc_poweroff_vm() { + local vm_name=${1:-""} + if [[ -z "$vm_name" ]]; then + echo "Error: VM name is required" + return 1 + fi + + if [[ $(govc vm.info "$vm_name" | grep -c poweredOn) -eq 1 ]]; then + govc vm.power -off -force "$vm_name" + echo -e "\033[35m === $vm_name has been down === \033[0m" + fi +} + +function govc_restore_vm_snapshot() { + local vm_name=${1:-""} + local vm_snapshot_name=${2:-""} + + govc snapshot.revert -vm "$vm_name" "$vm_snapshot_name" + echo -e "\033[35m === $vm_name reverted to snapshot: $(govc snapshot.tree -vm "$vm_name" -C -D -i -d) === \033[0m" +} + +function setup_gpu_test_env() { + export GOVC_INSECURE=1 + export vm_ip=$VSPHERE_GPU_VM_IP + export vm_name=$VSPHERE_GPU_VM_NAME + export vm_snapshot_name=$VSPHERE_GPU_VM_NAME_SNAPSHOT + + echo -n "Preparing: 'govc' existence check - " + if util::cmd_exist govc; then + echo "passed" + else + echo "installing govc" + install_govc + fi + + govc_poweroff_vm "$vm_name" + govc_restore_vm_snapshot "$vm_name" "$vm_snapshot_name" + govc_poweron_vm "$vm_name" "$vm_ip" +} + + +setup_gpu_test_env diff --git a/hack/e2e-test.sh b/hack/e2e-test.sh new file mode 100755 index 000000000..063a614db --- /dev/null +++ b/hack/e2e-test.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Copyright 2024 The HAMi Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -o errexit +set -o nounset +set -o pipefail + +set -x + +E2E_TYPE=${1:-"pullrequest"} +KUBE_CONF=${2:-""} + +REPO_ROOT=$(dirname "${BASH_SOURCE[0]}")/.. +source "${REPO_ROOT}"/hack/util.sh + +if util::cmd_exist ginkgo; then + echo "Using ginkgo version:" + ginkgo version +else + go install github.com/onsi/ginkgo/v2/ginkgo + go get github.com/onsi/gomega/... + ginkgo version +fi + + +if [ -z "${KUBE_CONF}" ]; then + echo "Error: KUBE_CONF environment variable is not set." + return 1 +fi + +# Run e2e +if [ "${E2E_TYPE}" == "pullrequest" ] || [ "${E2E_TYPE}" == "release" ]; then + ginkgo -v -r --fail-fast ./test/e2e/ --kubeconfig="${KUBE_CONF}" + if [ $? -ne 0 ]; then + echo "Error: ginkgo command failed." + return 1 + fi +else + echo "Invalid E2E Type: ${E2E_TYPE}" + return 1 +fi diff --git a/hack/util.sh b/hack/util.sh index 22c1e625f..0e2aea1b9 100755 --- a/hack/util.sh +++ b/hack/util.sh @@ -43,3 +43,46 @@ function util::install_helm { curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash } +# util::exec_cmd will using eval to parse command +function util::exec_cmd() { + if [ $# -eq 0 ] ; then + echo "[Error] no command specified for util::exec_cmd()..." + exit 2 + fi + local tmpLog=$(mktemp) + set +e + eval "$@" &> $tmpLog + if [ $? -ne 0 ];then + echo "[Error] Failed to do $1. detail logs as below:" + set +x + echo "$(cat $tmpLog)" + set -x + rm -f $tmpLog + exit 3 + fi + echo "$1 successful." + rm -f $tmpLog + set -e +} + +### Wait a node reachable +function util::wait_ip_reachable(){ + local vm_ip=${1:-""} + local loop_time=${2:-"10"} + local sleep_time=${2:-"60"} + echo "Wait vm_ip=$1 reachable ... " + for ((i=1;i<=$((loop_time));i++)); do + pingOK=0 + ping -w 2 -c 1 "${vm_ip}"|grep "0%" || pingOK=false + echo "==> ping ""${vm_ip}" $pingOK + if [[ ${pingOK} == false ]];then + sleep "$sleep_time" + else + break + fi + if [ $i -eq $((loop_time)) ];then + echo "node not reachable exit!" + exit 1 + fi + done +} diff --git a/test/e2e/node/test_node.go b/test/e2e/node/test_node.go new file mode 100644 index 000000000..90dd512d5 --- /dev/null +++ b/test/e2e/node/test_node.go @@ -0,0 +1,93 @@ +/* +Copyright 2024 The HAMi Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "strings" + "time" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/Project-HAMi/HAMi/test/utils" +) + +var _ = ginkgo.Describe("[Node] Node E2E Tests", ginkgo.Ordered, func() { + var clientSet = utils.GetClientSet() + var nodeName string + + ginkgo.BeforeAll(func() { + nodes, err := utils.GetNodes(clientSet) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically(">", 0), "No nodes available for testing") + + nodeName = nodes.Items[0].Name + }) + + ginkgo.It("verify node with labeling", func() { + ginkgo.By("Updating node " + nodeName + " by labeling " + utils.GPUNodeLabelKey + "=" + utils.GPUNodeLabelValue) + _, err := utils.AddNodeLabel(clientSet, nodeName, utils.GPUNodeLabelKey, utils.GPUNodeLabelValue) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Checking node " + nodeName + " label") + node, err := clientSet.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(node.Labels[utils.GPUNodeLabelKey]).To(gomega.Equal(utils.GPUNodeLabelValue), "Label was not correctly added") + + ginkgo.By("Checking pods " + utils.HamiDevicePlugin + " running after labeling") + gomega.Eventually(func() bool { + pods, err := utils.GetPods(clientSet, utils.GPUNameSpace) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + for _, pod := range pods.Items { + err := utils.WaitForPodRunning(clientSet, utils.GPUNameSpace, pod.Name) + if err != nil { + return false + } + return true + } + return false + }, 300*time.Second, 10*time.Second).Should(gomega.BeTrue()) + }) + + ginkgo.It("verify node after removing label", func() { + ginkgo.By("Updating node " + nodeName + " by removing label " + utils.GPUNodeLabelKey + "=" + utils.GPUNodeLabelValue) + _, err := utils.RemoveNodeLabel(clientSet, nodeName, utils.GPUNodeLabelKey) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Checking node " + nodeName + " label") + node, err := clientSet.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, exists := node.Labels[utils.GPUNodeLabelKey] + gomega.Expect(exists).To(gomega.BeFalse(), "Label was not correctly removed") + + ginkgo.By("Checking pods " + utils.HamiDevicePlugin + " deleted after removing label") + gomega.Eventually(func() bool { + pods, err := utils.GetPods(clientSet, utils.GPUNameSpace) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + for _, pod := range pods.Items { + if strings.Contains(pod.Name, utils.HamiDevicePlugin) { + return false + } + } + return true + }, 300*time.Second, 10*time.Second).Should(gomega.BeTrue()) + }) +}) diff --git a/test/e2e/node/test_suite_test.go b/test/e2e/node/test_suite_test.go new file mode 100644 index 000000000..eecc1601e --- /dev/null +++ b/test/e2e/node/test_suite_test.go @@ -0,0 +1,35 @@ +/* +Copyright 2024 The HAMi Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "flag" + "testing" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" +) + +func init() { + testing.Init() + flag.Parse() +} + +func TestInit(t *testing.T) { + gomega.RegisterFailHandler(ginkgo.Fail) + ginkgo.RunSpecs(t, "Test workspace Service Suite") +} diff --git a/test/e2e/pod/test_pod.go b/test/e2e/pod/test_pod.go new file mode 100644 index 000000000..3aef30a66 --- /dev/null +++ b/test/e2e/pod/test_pod.go @@ -0,0 +1,118 @@ +/* +Copyright 2024 The HAMi Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "strings" + "time" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + + "github.com/Project-HAMi/HAMi/test/utils" +) + +var _ = ginkgo.Describe("Pod E2E Tests", ginkgo.Ordered, func() { + var clientSet = utils.GetClientSet() + var newPod *corev1.Pod + + ginkgo.BeforeAll(func() { + ginkgo.By("Add node labeling") + _, err := utils.AddNodeLabel(clientSet, utils.GPUNode, utils.GPUNodeLabelKey, utils.GPUNodeLabelValue) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.AfterEach(func() { + ginkgo.By("Deleting pod " + newPod.Name + " in namespace " + newPod.Namespace) + err := utils.DeletePod(clientSet, newPod.Namespace, newPod.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Verifying pod " + newPod.Name + " is deleted") + gomega.Eventually(func() bool { + pods, err := utils.GetPods(clientSet, utils.GPUNameSpace) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + for _, pod := range pods.Items { + if strings.Contains(pod.Name, newPod.Name) { + return false + } + return true + } + return false + }, 300*time.Second, 10*time.Second).Should(gomega.BeTrue()) + }) + + ginkgo.AfterAll(func() { + ginkgo.By("Delete node labeling") + _, err := utils.RemoveNodeLabel(clientSet, utils.GPUNode, utils.GPUNodeLabelKey) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("create single pod with configuration", func() { + newPod = utils.Pod.DeepCopy() + newPod.Name = newPod.Name + utils.GetRandom() + + ginkgo.By("Creating pod " + newPod.Name + " in namespace " + newPod.Namespace) + createdPod, err := utils.CreatePod(clientSet, newPod, newPod.Namespace) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(createdPod.Name).To(gomega.Equal(newPod.Name), "Pod was not created successfully") + + ginkgo.By("Verifying pod " + newPod.Name + " in running status") + err = utils.WaitForPodRunning(clientSet, newPod.Namespace, newPod.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Verifying GPU memory in pod " + newPod.Name + " by executing: " + utils.GPUExecuteNvidiaSMI) + output, err := utils.KubectlExecInPod(newPod.Namespace, newPod.Name, utils.GPUExecuteNvidiaSMI) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(string(output)).To(gomega.ContainSubstring(utils.GPUPodMemory + utils.GPUPodMemoryUnit)) + + ginkgo.By("Verifying CUDA execution status in pod " + newPod.Name + " by executing: " + utils.GPUExecuteCudaSample) + output, err = utils.KubectlExecInPod(newPod.Namespace, newPod.Name, utils.GPUExecuteCudaSample) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(string(output)).To(gomega.ContainSubstring(utils.GPUCudaTestPass)) + }) + + ginkgo.It("create overcommit pods", func() { + newPod = utils.Pod.DeepCopy() + newPod.Name = newPod.Name + utils.GetRandom() + newPod.Spec.Containers = append(newPod.Spec.Containers, newPod.Spec.Containers[0]) + newPod.Spec.Containers = append(newPod.Spec.Containers, newPod.Spec.Containers[0]) + //newPod.Spec.Containers[0].Name = newPod.Spec.Containers[0].Name + utils.GetRandom() + newPod.Spec.Containers[1].Name = newPod.Spec.Containers[0].Name + utils.GetRandom() + newPod.Spec.Containers[2].Name = newPod.Spec.Containers[0].Name + utils.GetRandom() + + ginkgo.By("Creating pod " + newPod.Name + " within multiple containers in namespace " + newPod.Namespace) + createdPod, err := utils.CreatePod(clientSet, newPod, newPod.Namespace) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(createdPod.Name).To(gomega.Equal(newPod.Name), "Pod was not created successfully") + + ginkgo.By("Verifying pod " + newPod.Name + " is pending due to " + utils.ErrReasonFilteringFailed + utils.ErrMessageFilteringFailed) + gomega.Eventually(func() bool { + events, err := utils.GetPodEvents(clientSet, newPod.Namespace, newPod.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + for _, event := range events { + if strings.Contains(event.Reason, utils.ErrReasonFilteringFailed) && strings.Contains(event.Message, utils.ErrMessageFilteringFailed) { + return true + } + } + return false + }, 300*time.Second, 10*time.Second).Should(gomega.BeTrue()) + + }) +}) diff --git a/test/e2e/pod/test_suite_test.go b/test/e2e/pod/test_suite_test.go new file mode 100644 index 000000000..bbdab8431 --- /dev/null +++ b/test/e2e/pod/test_suite_test.go @@ -0,0 +1,35 @@ +/* +Copyright 2024 The HAMi Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "flag" + "testing" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" +) + +func init() { + testing.Init() + flag.Parse() +} + +func TestInit(t *testing.T) { + gomega.RegisterFailHandler(ginkgo.Fail) + ginkgo.RunSpecs(t, "Test pod") +} diff --git a/test/e2e/test_suite_test.go b/test/e2e/test_suite_test.go new file mode 100644 index 000000000..10db0dc6e --- /dev/null +++ b/test/e2e/test_suite_test.go @@ -0,0 +1,38 @@ +/* +Copyright 2024 The HAMi Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "flag" + "testing" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + + "github.com/Project-HAMi/HAMi/test/utils" +) + +func init() { + testing.Init() +} + +func TestInit(t *testing.T) { + flag.Parse() + utils.DefaultKubeConfigPath() + gomega.RegisterFailHandler(ginkgo.Fail) + ginkgo.RunSpecs(t, "Test HAMi Suite") +} diff --git a/test/utils/common.go b/test/utils/common.go new file mode 100644 index 000000000..a01e14529 --- /dev/null +++ b/test/utils/common.go @@ -0,0 +1,89 @@ +/* +Copyright 2024 The HAMi Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "flag" + "math/rand" + "os" + "os/exec" + "strconv" + "time" + + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/klog/v2" +) + +var kubeConfig string + +func init() { + flag.StringVar(&kubeConfig, "kubeconfig", defaultKubeConfigPath(), "Path to the kubeConfig file") +} + +func defaultKubeConfigPath() string { + configPath := os.Getenv("KUBE_CONF") + if configPath == "" { + klog.Fatalf("Environment variable KUBE_CONF is not set or empty. Please set it to a valid kubeconfig file path.") + } + if _, err := os.Stat(configPath); os.IsNotExist(err) { + klog.Fatalf("Kubeconfig file does not exist at path: %s", configPath) + } + return configPath +} + +func DefaultKubeConfigPath() string { + configPath := os.Getenv("KUBE_CONF") + if configPath == "" { + klog.Fatalf("Environment variable KUBE_CONF is not set or empty. Please set it to a valid kubeconfig file path.") + } + + if _, err := os.Stat(configPath); os.IsNotExist(err) { + klog.Fatalf("lalala Kubeconfig file does not exist at path: %s, error is %s", configPath, err) + } + return configPath +} + +func GetClientSet() *kubernetes.Clientset { + config, err := clientcmd.BuildConfigFromFlags("", kubeConfig) + if err != nil { + klog.Fatalf("Failed to load kubeConfig: %v", err) + } + + clientSet, err := kubernetes.NewForConfig(config) + if err != nil { + klog.Fatalf("Failed to create Kubernetes client: %v", err) + } + return clientSet +} + +func GetRandom() string { + r := rand.New(rand.NewSource(time.Now().UnixNano())) + random := strconv.Itoa(r.Intn(9999)) + return random +} + +func KubectlExecInPod(namespace, podName, cmdshell string) ([]byte, error) { + time.Sleep(30 * time.Second) + cmd := exec.Command("kubectl", "exec", "-it", "-n", namespace, podName, "--", "/bin/bash", "-c", cmdshell) + output, err := cmd.Output() + if err != nil { + return output, err + } + + return output, nil +} diff --git a/test/utils/config.go b/test/utils/config.go new file mode 100644 index 000000000..68fa10716 --- /dev/null +++ b/test/utils/config.go @@ -0,0 +1,41 @@ +/* +Copyright 2024 The HAMi Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +// test data. +const ( + GPUNodeLabelKey = "gpu" + GPUNodeLabelValue = "on" + GPUExecuteNvidiaSMI = "nvidia-smi" + GPUExecuteCudaSample = "/cuda-samples/sample" + GPUPodMemory = "300" + GPUPodMemoryUnit = "MiB" + GPUPodCore = "40" + GPUNameSpace = "hami-system" + GPUNode = "gpu-master" + GPUCudaTestPass = "Test PASSED" +) + +// hami related. +const ( + HamiScheduler = "hami-scheduler" + HamiDevicePlugin = "hami-device-plugin" + ErrReasonFilteringFailed = "FilteringFailed" + ErrMessageFilteringFailed = "no available node, all node scores do not meet" + ErrReasonFailedScheduling = "FilteringFailed" + ErrMessageFailedScheduling = "0/1 nodes are available" +) diff --git a/test/utils/event.go b/test/utils/event.go new file mode 100644 index 000000000..05b857c99 --- /dev/null +++ b/test/utils/event.go @@ -0,0 +1,50 @@ +/* +Copyright 2024 The HAMi Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "context" + "fmt" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" +) + +func GetEvents(clientSet *kubernetes.Clientset, namespace string, listOptions metav1.ListOptions) ([]v1.Event, error) { + events, err := clientSet.CoreV1().Events(namespace).List(context.TODO(), listOptions) + if err != nil { + return nil, err + } + + return events.Items, nil +} + +func GetPodEvents(clientSet *kubernetes.Clientset, namespace, podName string) ([]v1.Event, error) { + listOption := metav1.ListOptions{ + FieldSelector: fmt.Sprintf("involvedObject.kind=Pod,involvedObject.name=%s", podName), + } + + events, err := GetEvents(clientSet, namespace, listOption) + if err != nil { + klog.Errorf("Failed to list events for pod %s in namespace %s: %v", podName, namespace, err) + return nil, err + } + + return events, nil +} diff --git a/test/utils/node.go b/test/utils/node.go new file mode 100644 index 000000000..07480328e --- /dev/null +++ b/test/utils/node.go @@ -0,0 +1,73 @@ +/* +Copyright 2024 The HAMi Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "context" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" +) + +func GetNodes(clientSet *kubernetes.Clientset) (*v1.NodeList, error) { + nodes, err := clientSet.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) + if err != nil { + klog.Errorf("Failed to get nodes: %v", err) + return nil, err + } + + return nodes, nil +} + +func UpdateNode(clientSet *kubernetes.Clientset, node *v1.Node) (*v1.Node, error) { + updatedNode, err := clientSet.CoreV1().Nodes().Update(context.TODO(), node, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("Failed to update node %s: %v", node.Name, err) + return nil, err + } + + return updatedNode, nil +} + +func AddNodeLabel(clientSet *kubernetes.Clientset, nodeName, labelKey, labelValue string) (*v1.Node, error) { + node, err := clientSet.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + if err != nil { + return nil, err + } + + if node.Labels == nil { + node.Labels = make(map[string]string) + } + node.Labels[labelKey] = labelValue + + return UpdateNode(clientSet, node) +} + +func RemoveNodeLabel(clientSet *kubernetes.Clientset, nodeName, labelKey string) (*v1.Node, error) { + node, err := clientSet.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + if err != nil { + return nil, err + } + + if node.Labels != nil { + delete(node.Labels, labelKey) + } + + return UpdateNode(clientSet, node) +} diff --git a/test/utils/pod.go b/test/utils/pod.go new file mode 100644 index 000000000..9be06b1cf --- /dev/null +++ b/test/utils/pod.go @@ -0,0 +1,107 @@ +/* +Copyright 2024 The HAMi Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "context" + "fmt" + "time" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" +) + +var Pod = &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "gpu-pod", + Namespace: "default", + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "cuda-container", + Image: "nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0", + Command: []string{"/bin/sh"}, + Args: []string{"-c", "sleep 86400"}, + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": resource.MustParse("1"), + "nvidia.com/gpumem": resource.MustParse(GPUPodMemory), + "nvidia.com/gpucores": resource.MustParse(GPUPodCore), + }, + }, + }, + }, + }, +} + +func GetPods(clientSet *kubernetes.Clientset, namespace string) (*corev1.PodList, error) { + pods, err := clientSet.CoreV1().Pods(namespace).List(context.TODO(), metav1.ListOptions{}) + if err != nil { + klog.Errorf("Failed to list Pods in namespace %s: %v", namespace, err) + return nil, err + } + + return pods, nil +} + +func CreatePod(clientSet *kubernetes.Clientset, pod *corev1.Pod, namespace string) (*corev1.Pod, error) { + time.Sleep(15 * time.Second) + createdPod, err := clientSet.CoreV1().Pods(namespace).Create(context.TODO(), pod, metav1.CreateOptions{}) + if err != nil { + klog.Errorf("Failed to create Pod %s in namespace %s: %v", pod.Name, namespace, err) + return nil, err + } + + return createdPod, nil +} + +func DeletePod(clientSet *kubernetes.Clientset, namespace, podName string) error { + err := clientSet.CoreV1().Pods(namespace).Delete(context.TODO(), podName, metav1.DeleteOptions{}) + if err != nil { + klog.Errorf("Failed to delete Pod %s in namespace %s: %v", podName, namespace, err) + return err + } + return nil +} + +func WaitForPodRunning(clientSet kubernetes.Interface, namespace, podName string) error { + const ( + checkInterval = 5 * time.Second + timeout = 3 * time.Minute + ) + + return wait.PollImmediate(checkInterval, timeout, func() (bool, error) { + pod, err := clientSet.CoreV1().Pods(namespace).Get(context.TODO(), podName, metav1.GetOptions{}) + if err != nil { + return false, fmt.Errorf("failed to get pod %s/%s: %v", namespace, podName, err) + } + + if pod.Status.Phase == corev1.PodRunning { + return true, nil + } + + if pod.Status.Phase == corev1.PodFailed || pod.Status.Phase == corev1.PodUnknown { + return false, fmt.Errorf("pod %s/%s is in failed or unknown state: %s", namespace, podName, pod.Status.Phase) + } + return false, nil + }) +}