mfournioux · mfournioux · Nov 20, 2024 · Nov 21, 2024 · Nov 22, 2024 · Nov 25, 2024
diff --git a/.github/workflows/lint-test.yaml b/.github/workflows/lint-test.yaml
@@ -0,0 +1,79 @@
+name: Lint and Test Charts
+
+on: pull_request
+
+jobs:
+  lint-test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Set up Helm
+        uses: azure/[email protected]
+        with:
+          version: v3.14.4
+
+       #Python is required because ct lint runs Yamale and yamllint which require Python.
+      - uses: actions/setup-python@v2
+        with:
+          python-version: 3.7
+
+      - name: Set up chart-testing
+        uses: helm/[email protected]
+        with:
+          version: v3.10.1
+
+      - name: Run chart-testing (lint)
+        run: ct lint --target-branch ${{ github.event.repository.default_branch }} --chart-dirs examples/chart-helm --charts examples/chart-helm
+
+      - name: Setup minio
+        run: |
+          docker network create vllm-net
+          docker run -d -p 9000:9000 --name minio --net vllm-net \
+                     -e "MINIO_ACCESS_KEY=minioadmin" \
+                     -e "MINIO_SECRET_KEY=minioadmin" \
+                     -v /tmp/data:/data \
+                     -v /tmp/config:/root/.minio \
+                     minio/minio server /data
+          export AWS_ACCESS_KEY_ID=minioadmin
+          export AWS_SECRET_ACCESS_KEY=minioadmin
+          export AWS_EC2_METADATA_DISABLED=true
+          mkdir opt-125m
+          cd opt-125m && curl -O -Ls "https://huggingface.co/facebook/opt-125m/resolve/main/{pytorch_model.bin,config.json,generation_config.json,merge.txt,tokenizer_config.json,vocab.json}" && cd ..
+          aws --endpoint-url http://127.0.0.1:9000/ s3 mb s3://testbucket
+          aws --endpoint-url http://127.0.0.1:9000/ s3 cp opt-125m/ s3://testbucket/opt-125m --recursive
+
+      - name: Create kind cluster
+        uses: helm/[email protected]
+
+      - name: Configuration of docker images on the kind cluster and cluster network
+        run: |
+          docker pull adsai/vllm-cpu-env:latest
+          docker pull amazon/aws-cli:2.6.4
+          kind load docker-image  amazon/aws-cli:2.6.4 --name chart-testing
+          kind load docker-image adsai/vllm-cpu-env:latest --name chart-testing
+          docker network connect test-net "$(docker ps -aqf "name=kind-control-plane")"
+
+      - name: Run chart-testing (install)
+        run: ct install --target-branch ${{ github.event.repository.default_branch }} --chart-dirs examples/chart-helm --charts examples/chart-helm --namespace=ns-vllm --helm-extra-set-args "--set secrets.s3endpoint=http://minio:9000 --set secrets.s3bucketname=testbucket --set secrets.s3accesskeyid=$AWS_ACCESS_KEY_ID --set secrets.s3accesskey=$AWS_ACCESS_ACCESS_KEY --set image.env[0].name=VLLM_CPU_KVCACHE_SPACE --set-string image.env[0].value="1" --set-string resources.requests.cpu="1" --set resources.requests.memory=4Gi --set-string resources.requests."nvidia/gpu"="0" --set-string resources.limits.cpu="2" --set resources.limits.memory=5Gi --set-string resources.limits."nvidia/gpu"="0"""
+
+      - name: curl test
+        run: |
+          kubectl port-forward svc/test-vllm-service 8001:80 -n test-vllm 
+          CODE=`curl --location http://localhost:8001/v1/completions \
+                  --header "Content-Type: application/json" \
+                  --data '{
+                          "model": "opt-125m",
+                          "prompt": "San Francisco is a",
+                          "max_tokens": 7,
+                          "temperature": 0
+                  }'`
+          if [ $CODE!="200" ] 
+          then
+              echo "FAILURE"
+          else
+              echo "SUCCESS"
+          fi
diff --git a/examples/chart-helm/.helmignore b/examples/chart-helm/.helmignore
@@ -0,0 +1,6 @@
+*.png
+.git/
+ct.yaml
+lintconf.yaml
+values.schema.json
+/workflows
diff --git a/examples/chart-helm/Chart.yaml b/examples/chart-helm/Chart.yaml
@@ -0,0 +1,21 @@
+apiVersion: v2
+name: chart-vllm
+description: Chart vllm
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.0.1
+
+maintainers:
+  - name: mfournioux
diff --git a/examples/chart-helm/README.md b/examples/chart-helm/README.md
@@ -0,0 +1,82 @@
+# chart-vllm
+
+![Version: 0.0.1](https://img.shields.io/badge/Version-0.0.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
+
+A Helm chart to deploy vllm for Kubernetes
+
+## Installing the chart
+
+To install the chart with the release name `test-vllm`
+
+```console
+helm upgrade --install --create-namespace --namespace=ns-vllm test-vllm . -f values.yaml --set secrets.s3endpoint=$ACCESS_POINT --set secrets.s3buckername=$BUCKET --set secrets.s3accesskeyid=$ACCESS_KEY --set secrets.s3accesskey=$SECRET_KEY
+```
+## ➖ Uninstalling the Chart
+
+To uninstall the `test-vllm` deployment
+
+```console
+helm uninstall test-vllm --namespace=ns-vllm
+```
+
+The command removes all the Kubernetes components associated with the chart **including persistent volumes** and deletes the release.
+
+## Architecture
+
+![Architecture](architecture.excalidraw.png)
+
+## Values
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| autoscaling | object | `{"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80}` | Autoscaling configuration |
+| autoscaling.enabled | bool | `false` | Enable autoscaling |
+| autoscaling.maxReplicas | int | `100` | Maximum replicas |
+| autoscaling.minReplicas | int | `1` | Minimum replicas |
+| autoscaling.targetCPUUtilizationPercentage | int | `80` | Target CPU utilization for autoscaling |
+| configs | object | `{}` | Configmap  |
+| containerPort | int | `8000` | Container port |
+| customObjects | list | `[]` | Custom Objects configuration |
+| deploymentStrategy | object | `{}` | Deployment strategy configuration |
+| externalConfigs | list | `[]` | External configuration |
+| extraContainers | list | `[]` | Additional containers configuration |
+| extraInit | object | `{"pvcStorage":"1Gi","s3modelpath":"relative_s3_model_path/opt-125m"}` | Additional configuration for the init container |
+| extraInit.pvcStorage | string | `"50Gi"` | Storage size of the s3 |
+| extraInit.s3modelpath | string | `"relative_s3_model_path/opt-125m"` | Path of the model on the s3 which hosts model weights and config files |
+| extraPorts | list | `[]` | Additional ports configuration |
+| gpuModels | list | `["TYPE_GPU_USED"]` | Type of gpu used |
+| image | object | `{"command":["vllm","serve","/data/","--served-model-name","opt-125m","--host","0.0.0.0","--port","8000"],"repository":"vllm/vllm-openai","tag":"latest"}` | Image configuration |
+| image.command | list | `["vllm","serve","/data/","--served-model-name","opt-125m","--host","0.0.0.0","--port","8000"]` | Container launch command |
+| image.repository | string | `"vllm/vllm-openai"` | Image repository |
+| image.tag | string | `"latest"` | Image tag |
+| livenessProbe | object | `{"failureThreshold":3,"httpGet":{"path":"/health","port":8000},"initialDelaySeconds":15,"periodSeconds":10}` | Liveness probe configuration |
+| livenessProbe.failureThreshold | int | `3` | Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not alive |
+| livenessProbe.httpGet | object | `{"path":"/health","port":8000}` | Configuration of the Kubelet http request on the server |
+| livenessProbe.httpGet.path | string | `"/health"` | Path to access on the HTTP server |
+| livenessProbe.httpGet.port | int | `8000` | Name or number of the port to access on the container, on which the server is listening |
+| livenessProbe.initialDelaySeconds | int | `15` | Number of seconds after the container has started before liveness probe is initiated |
+| livenessProbe.periodSeconds | int | `10` | How often (in seconds) to perform the liveness probe |
+| maxUnavailablePodDisruptionBudget | string | `""` | Disruption Budget Configuration |
+| readinessProbe | object | `{"failureThreshold":3,"httpGet":{"path":"/health","port":8000},"initialDelaySeconds":5,"periodSeconds":5}` | Readiness probe configuration |
+| readinessProbe.failureThreshold | int | `3` | Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not ready |
+| readinessProbe.httpGet | object | `{"path":"/health","port":8000}` | Configuration of the Kubelet http request on the server |
+| readinessProbe.httpGet.path | string | `"/health"` | Path to access on the HTTP server |
+| readinessProbe.httpGet.port | int | `8000` | Name or number of the port to access on the container, on which the server is listening |
+| readinessProbe.initialDelaySeconds | int | `5` | Number of seconds after the container has started before readiness probe is initiated |
+| readinessProbe.periodSeconds | int | `5` | How often (in seconds) to perform the readiness probe |
+| replicaCount | int | `1` | Number of replicas |
+| resources | object | `{"limits":{"cpu":4,"memory":"16Gi","nvidia.com/gpu":1},"requests":{"cpu":4,"memory":"16Gi","nvidia.com/gpu":1}}` | Resource configuration |
+| resources.limits."nvidia.com/gpu" | int | `1` | Number of gpus used |
+| resources.limits.cpu | int | `4` | Number of CPUs |
+| resources.limits.memory | string | `"16Gi"` | CPU memory configuration |
+| resources.requests."nvidia.com/gpu" | int | `1` | Number of gpus used |
+| resources.requests.cpu | int | `4` | Number of CPUs |
+| resources.requests.memory | string | `"16Gi"` | CPU memory configuration |
+| secrets | object | `{}` | Secrets configuration |
+| serviceName | string | | Service name |
+| servicePort | int | `80` | Service port |
+| labels.environment | string | `test` | Environment name  |
+| labels.release | string | `test` | Release name |
+
+----------------------------------------------
+Autogenerated from chart metadata using [helm-docs v1.14.2](https://github.com/norwoodj/helm-docs/releases/v1.14.2)
diff --git a/examples/chart-helm/architecture.excalidraw.png b/examples/chart-helm/architecture.excalidraw.png
diff --git a/examples/chart-helm/ct.yaml b/examples/chart-helm/ct.yaml
@@ -0,0 +1,3 @@
+chart-dirs:
+  - charts
+validate-maintainers: false
diff --git a/examples/chart-helm/lintconf.yaml b/examples/chart-helm/lintconf.yaml
@@ -0,0 +1,42 @@
+---
+rules:
+  braces:
+    min-spaces-inside: 0
+    max-spaces-inside: 0
+    min-spaces-inside-empty: -1
+    max-spaces-inside-empty: -1
+  brackets:
+    min-spaces-inside: 0
+    max-spaces-inside: 0
+    min-spaces-inside-empty: -1
+    max-spaces-inside-empty: -1
+  colons:
+    max-spaces-before: 0
+    max-spaces-after: 1
+  commas:
+    max-spaces-before: 0
+    min-spaces-after: 1
+    max-spaces-after: 1
+  comments:
+    require-starting-space: true
+    min-spaces-from-content: 2
+  document-end: disable
+  document-start: disable           # No --- to start a file
+  empty-lines:
+    max: 2
+    max-start: 0
+    max-end: 0
+  hyphens:
+    max-spaces-after: 1
+  indentation:
+    spaces: consistent
+    indent-sequences: whatever      # - list indentation will handle both indentation and without
+    check-multi-line-strings: false
+  key-duplicates: enable
+  line-length: disable              # Lines can be any length
+  new-line-at-end-of-file: disable
+  new-lines:
+    type: unix
+  trailing-spaces: enable
+  truthy:
+    level: warning