Skip to content

Commit

Permalink
refactor(logging): enhance log messages for device resource counting …
Browse files Browse the repository at this point in the history
…and processing

fix: improve UUID parsing and error handling in ExtractMigTemplatesFromUUID function
refactor: update test cases for ExtractMigTemplatesFromUUID with comprehensive scenarios
chore: clean up imports and enhance test readability across various test files

Signed-off-by: haitwang-cloud <[email protected]>

fix: nvidia-device-plugin no version info (Project-HAMi#779)

Signed-off-by: chaunceyjiang <[email protected]>
Signed-off-by: haitwang-cloud <[email protected]>

Implement initial E2E test suite setup (Project-HAMi#775)

Signed-off-by: wen.rui <[email protected]>
Signed-off-by: haitwang-cloud <[email protected]>

Draft E2E test design (Project-HAMi#633)

Signed-off-by: Rei1010 <[email protected]>
Signed-off-by: wen.rui <[email protected]>
Signed-off-by: haitwang-cloud <[email protected]>
  • Loading branch information
timWang404 authored and haitwang-cloud committed Jan 10, 2025
1 parent cbccbd4 commit e9eed8d
Show file tree
Hide file tree
Showing 49 changed files with 2,312 additions and 526 deletions.
Binary file removed .DS_Store
Binary file not shown.
1 change: 1 addition & 0 deletions .github/workflows/auto-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ jobs:
uses: ./.github/workflows/call-e2e.yaml
with:
ref: ${{ needs.ensure-tag.outputs.tag }}
type: "release"

# excute a compatibility test when hami release
release-e2e-upgrade:
Expand Down
70 changes: 65 additions & 5 deletions .github/workflows/call-e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,75 @@ on:
workflow_call:
inputs:
ref:
description: 'Reference id to run tests'
required: true
type: string
permissions: write-all
type:
description: 'E2E type'
required: true
type: string
default: pullrequest

jobs:
e2e:
runs-on: ubuntu-latest
e2e-test:
strategy:
matrix:
include:
- device: nvidia
type: tesla-p4
# - device: nvidia
# type: rtx-4090
# - device: huawei
# type: ascend-910b
runs-on: [ "${{ matrix.device }}", "${{ matrix.type }}" ]
environment: ${{ matrix.device }}
env:
E2E_TYPE: ${{ inputs.type }}
steps:
- name: checkout code
uses: actions/checkout@v4

- name: install Go
uses: actions/setup-go@v5
with:
go-version: "1.21"

- name: setup e2e env
run: |
make e2e-env-setup
- name: download hami helm
if: inputs.type == 'pullrequest'
uses: actions/download-artifact@v4
with:
name: chart_package_artifact
path: charts/

- name: download hami image
if: inputs.type == 'pullrequest'
uses: actions/download-artifact@v4
with:
name: hami-image
path: ./image

- name: load e2e image
if: inputs.type == 'pullrequest'
run: |
echo "Loading Docker image from image.tar..."
if [ -z "${VSPHERE_GPU_VM_IP}" ]; then
echo "Error: VSPHERE_GPU_VM_IP is not defined!"
exit 1
fi
scp ./image/image.tar root@$VSPHERE_GPU_VM_IP:/home/
ssh root@$VSPHERE_GPU_VM_IP "nerdctl load -i /home/image.tar"
ssh root@$VSPHERE_GPU_VM_IP "nerdctl image ls | grep hami"
- name: deploy hami helm
env:
HAMI_VERSION: ${{ inputs.ref }}
run: |
make helm-deploy
- name: e2e test
# https://github.com/actions/virtual-environments/issues/709
run: |
echo "Need to add e2e test"
make e2e-test
122 changes: 105 additions & 17 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:
branches-ignore:
- 'dependabot/**'
permissions:
contents: read # for actions/checkout to fetch code
contents: read # for actions/checkout to fetch code

env:
REGISTRY: docker.io
Expand Down Expand Up @@ -38,10 +38,11 @@ jobs:
run: make lint
- name: import alias
run: hack/verify-import-aliases.sh

test:
name: Unit test
needs: lint # rely on lint successful completion
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- name: Checkout code
uses: actions/checkout@v4
Expand Down Expand Up @@ -71,11 +72,29 @@ jobs:
fail_ci_if_error: false
verbose: true

get_version:
name: get_version
runs-on: ubuntu-22.04
outputs:
version: ${{ steps.parse_version.outputs.version }}
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Parse the version
id: parse_version
run: |
value=$(make -f Makefile.defs print-version)
suffix="$(git rev-parse --short HEAD)"
tag="${value}-${suffix}"
echo "Generated tag: ${tag}"
echo "version=${tag}" >> $GITHUB_OUTPUT
build:
name: compile
runs-on: ubuntu-latest
needs: test # rely on test successful completion
name: Compile
runs-on: ubuntu-22.04
environment: nvidia
needs: [ test, get_version ]
steps:
- uses: actions/checkout@master
- name: Free disk space
Expand All @@ -88,22 +107,16 @@ jobs:
echo "=========after clean up, the left CI disk space"
df -h
- name: Get the version
id: get_version
run: |
tag="$(git rev-parse --short HEAD)"
echo ::set-output name=VERSION::${tag}
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Checkout submodule
uses: Mushus/[email protected]
with:
basePath: # optional, default is .
submodulePath: libvgpu
submodulePath: libvgpu

- name: Set up QEMU
uses: docker/setup-qemu-action@v3
Expand All @@ -124,13 +137,88 @@ jobs:
with:
context: .
file: ${{ env.IMAGE_ROOT_PATH }}/Dockerfile
labels: ${{ steps.meta.outputs.labels }}
platforms: ${{ env.BUILD_PLATFORM }}
labels: ${{ needs.get_version.outputs.version }}
build-args: |
VERSION=${{ steps.get_version.outputs.VERSION }}
VERSION=${{ needs.get_version.outputs.version }}
GOLANG_IMAGE=golang:1.22.5-bullseye
NVIDIA_IMAGE=nvidia/cuda:12.2.0-devel-ubuntu20.04
DEST_DIR=/usr/local
tags: ${{ steps.meta.outputs.tags }}
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_REPO }}:${{ needs.get_version.outputs.version }}
push: false
load: true
github-token: ${{ env.REGISTER_PASSWORD }}

- name: List images
run: |
docker images
- name: Save Docker image to image.tar
run: |
docker save ${{ env.REGISTRY }}/${{ env.IMAGE_REPO }}:${{ needs.get_version.outputs.version }} -o image.tar
- name: Upload image.tar as artifact
uses: actions/upload-artifact@v4
with:
name: hami-image
path: image.tar
retention-days: 5
if-no-files-found: error

# packages tgz from /charts of original branch, deploy to /charts of target branch
package_chart:
runs-on: ubuntu-22.04
needs: [ test, get_version ]
env:
HELM_VERSION: v3.8.1
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ needs.get_ref.outputs.ref }}

- name: Configure Git
run: |
git config user.name "$GITHUB_ACTOR"
git config user.email "[email protected]"
- name: Install Helm
uses: azure/setup-helm@v4
with:
version: ${{ env.HELM_VERSION }}

- name: Lint helm chart
run: |
make lint_chart
- name: Package Chart
continue-on-error: false
env:
VERSION: ${{ needs.get_version.outputs.version }}
run: |
cd charts
make clean
make
if ! ls *.tgz &>/dev/null ; then
echo "failed to generate chart"
exit 1
fi
cd ..
mkdir -p tmp
mv charts/*.tgz tmp
- name: Upload Artifact
uses: actions/[email protected]
with:
name: chart_package_artifact
path: tmp/*
retention-days: 5
if-no-files-found: error

# execute a full e2e test when hami code merge
e2e_test:
uses: ./.github/workflows/call-e2e.yaml
needs: [ package_chart, get_version, build ]
with:
ref: ${{ needs.get_version.outputs.version }}
type: "pullrequest"
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,6 @@ libvgpu.so
.idea
vendor
license
vgpuvalidator
vgpuvalidator
_output/coverage/coverage_pkg.txt
.DS_Store
14 changes: 14 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ docker:
--build-arg TARGET_ARCH=${TARGET_ARCH} \
--build-arg NVIDIA_IMAGE=${NVIDIA_IMAGE} \
--build-arg DEST_DIR=${DEST_DIR} \
--build-arg VERSION=${VERSION} \
--build-arg GOPROXY=https://goproxy.cn,direct \
. -f=docker/Dockerfile -t ${IMG_TAG}

Expand All @@ -19,6 +20,7 @@ dockerwithlib:
--build-arg TARGET_ARCH=${TARGET_ARCH} \
--build-arg NVIDIA_IMAGE=${NVIDIA_IMAGE} \
--build-arg DEST_DIR=${DEST_DIR} \
--build-arg VERSION=${VERSION} \
--build-arg GOPROXY=https://goproxy.cn,direct \
. -f=docker/Dockerfile.withlib -t ${IMG_TAG}

Expand Down Expand Up @@ -73,3 +75,15 @@ lint_chart:
aquasec/trivy:$(TRIVY_VERSION) config --exit-code 1 --severity $(LINT_TRIVY_SEVERITY_LEVEL) /tmp/src/charts ; \
(($$?==0)) || { echo "error, failed to check chart trivy" && exit 1 ; } ; \
echo "chart trivy check: pass"

.PHONY: e2e-env-setup
e2e-env-setup:
./hack/e2e-test-setup.sh

.PHONY: helm-deploy
helm-deploy:
./hack/deploy-helm.sh "${E2E_TYPE}" "${KUBE_CONF}" "${HAMI_VERSION}"

.PHONY: e2e-test
e2e-test:
./hack/e2e-test.sh "${E2E_TYPE}" "${KUBE_CONF}"
9 changes: 8 additions & 1 deletion Makefile.defs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,16 @@ TARGETARCH ?= amd64
DESTDIR_BIN ?= $(ROOT_DIR)/output/$(TARGETARCH)/bin
DESTDIR_BASH_COMPLETION ?= $(ROOT_DIR)/output/$(TARGETARCH)/bash-completion

VERSION = $(shell cat $(dir $(lastword $(MAKEFILE_LIST)))/VERSION)
VERSION?=""
ifeq ($(VERSION), "")
VERSION=$(shell cat $(dir $(lastword $(MAKEFILE_LIST)))/VERSION)
endif

ECHO_GEN=echo " GEN $(RELATIVE_DIR)/"

LINT_TRIVY_SEVERITY_LEVEL ?= CRITICAL
TRIVY_VERSION=0.36.0

.PHONY: print-version
print-version:
@echo $(VERSION)
7 changes: 5 additions & 2 deletions charts/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# get VERSION
.DEFAULT_GOAL := all
include ../Makefile.defs

VERSION_REGEX := '[vV]*[0-9]\+\.[0-9]\+\.[0-9]\+.*'
Expand All @@ -11,6 +12,7 @@ all: update-versions lint package
#update version in chart
update-versions:
$(ECHO_GEN) " Updating Chart version to $(VERSION)"
echo "VERSION=$(VERSION)"
echo "VERSION_MAJOR=$(VERSION_MAJOR)"
echo "GIT_VERSION=$(GIT_VERSION)"
echo "FULL_BUILD_VERSION=$(FULL_BUILD_VERSION)"
Expand All @@ -22,10 +24,11 @@ update-versions:
sed -i 's/version: "*'$(VERSION_REGEX)'"*/version: "'$$hami_version'"/g' $(VALUES_FILE)

lint: update-versions
helm lint --with-subcharts --values ./hami/values.yaml ./hami
helm lint --with-subcharts --values ./hami/values.yaml ./hami --debug

package: lint
helm package ./hami
helm package ./hami --debug

clean:
rm -f *.tgz

3 changes: 2 additions & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ ARG NVIDIA_IMAGE=nvidia/cuda:12.2.0-devel-ubuntu20.04
FROM $GOLANG_IMAGE AS build
FROM $GOLANG_IMAGE AS gobuild
ARG GOPROXY
ARG VERSION
ADD . /k8s-vgpu
#RUN --mount=type=cache,target=/go/pkg/mod \
# cd /k8s-vgpu && make all
RUN cd /k8s-vgpu && make all
RUN cd /k8s-vgpu && make all VERSION=$VERSION
RUN go install github.com/NVIDIA/mig-parted/cmd/nvidia-mig-parted@latest

FROM $NVIDIA_IMAGE AS nvbuild
Expand Down
3 changes: 2 additions & 1 deletion docker/Dockerfile.withlib
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ FROM $GOLANG_IMAGE AS build
FROM $GOLANG_IMAGE AS GOBUILD
ADD . /k8s-vgpu
ARG GOPROXY=https://goproxy.cn,direct
ARG VERSION
RUN go env -w GO111MODULE=on
RUN cd /k8s-vgpu && make all
RUN cd /k8s-vgpu && make all VERSION=$VERSION
RUN go install github.com/NVIDIA/mig-parted/cmd/nvidia-mig-parted@latest

#FROM ubuntu:24.04
Expand Down
Loading

0 comments on commit e9eed8d

Please sign in to comment.