diff --git a/README.md b/README.md index a1c136643b..8847adce88 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ This repo periodically syncs all official Kubeflow components from their respect | Tensorboards Web App | apps/tensorboard/tensorboards-web-app/upstream | [v1.8.0](https://github.com/kubeflow/kubeflow/tree/v1.8.0/components/crud-web-apps/tensorboards/manifests) | | Volumes Web App | apps/volumes-web-app/upstream | [v1.8.0](https://github.com/kubeflow/kubeflow/tree/v1.8.0/components/crud-web-apps/volumes/manifests) | | Katib | apps/katib/upstream | [v0.16.0](https://github.com/kubeflow/katib/tree/v0.16.0/manifests/v1beta1) | -| KServe | contrib/kserve/kserve | [v0.11.2](https://github.com/kserve/kserve/tree/v0.11.2/install/v0.11.2) | +| KServe | contrib/kserve/kserve | [0.12.1](https://github.com/kserve/kserve/tree/0.12.1/install/v0.12.1) | | KServe Models Web App | contrib/kserve/models-web-app | [v0.10.0](https://github.com/kserve/models-web-app/tree/v0.10.0/config) | | Kubeflow Pipelines | apps/pipeline/upstream | [2.0.5](https://github.com/kubeflow/pipelines/tree/2.0.5/manifests/kustomize) | | Kubeflow Tekton Pipelines | apps/kfp-tekton/upstream | [2.0.5](https://github.com/kubeflow/kfp-tekton/tree/2.0.5/manifests/kustomize) | diff --git a/contrib/kserve/kserve/kserve-runtimes.yaml b/contrib/kserve/kserve/kserve-cluster-resources.yaml similarity index 82% rename from contrib/kserve/kserve/kserve-runtimes.yaml rename to contrib/kserve/kserve/kserve-cluster-resources.yaml index 5c74b4b8f4..0c73625fe7 100644 --- a/contrib/kserve/kserve/kserve-runtimes.yaml +++ b/contrib/kserve/kserve/kserve-cluster-resources.yaml @@ -1,5 +1,34 @@ apiVersion: serving.kserve.io/v1alpha1 kind: ClusterServingRuntime +metadata: + name: kserve-huggingfaceserver +spec: + annotations: + prometheus.kserve.io/path: /metrics + prometheus.kserve.io/port: "8080" + containers: + - args: + - --model_name={{.Name}} + image: kserve/huggingfaceserver:v0.12.1 + name: kserve-container + resources: + limits: + cpu: "1" + memory: 2Gi + requests: + cpu: "1" + memory: 2Gi + protocolVersions: + - v2 + - v1 + supportedModelFormats: + - autoSelect: true + name: huggingface + priority: 1 + version: "1" +--- +apiVersion: serving.kserve.io/v1alpha1 +kind: ClusterServingRuntime metadata: name: kserve-lgbserver spec: @@ -12,7 +41,7 @@ spec: - --model_dir=/mnt/models - --http_port=8080 - --nthread=1 - image: kserve/lgbserver:v0.11.2 + image: kserve/lgbserver:v0.12.1 name: kserve-container resources: limits: @@ -94,7 +123,7 @@ spec: - --model_name={{.Name}} - --model_dir=/mnt/models - --http_port=8080 - image: kserve/paddleserver:v0.11.2 + image: kserve/paddleserver:v0.12.1 name: kserve-container resources: limits: @@ -125,7 +154,7 @@ spec: - --model_name={{.Name}} - --model_dir=/mnt/models - --http_port=8080 - image: kserve/pmmlserver:v0.11.2 + image: kserve/pmmlserver:v0.12.1 name: kserve-container resources: limits: @@ -160,7 +189,7 @@ spec: - --model_name={{.Name}} - --model_dir=/mnt/models - --http_port=8080 - image: kserve/sklearnserver:v0.11.2 + image: kserve/sklearnserver:v0.12.1 name: kserve-container resources: limits: @@ -234,7 +263,7 @@ spec: env: - name: TS_SERVICE_ENVELOPE value: '{{.Labels.serviceEnvelope}}' - image: pytorch/torchserve-kfs:0.8.2 + image: pytorch/torchserve-kfs:0.9.0 name: kserve-container resources: limits: @@ -246,7 +275,7 @@ spec: protocolVersions: - v1 - v2 - - grpc-v1 + - grpc-v2 supportedModelFormats: - autoSelect: true name: pytorch @@ -319,7 +348,7 @@ spec: - --model_dir=/mnt/models - --http_port=8080 - --nthread=1 - image: kserve/xgbserver:v0.11.2 + image: kserve/xgbserver:v0.12.1 name: kserve-container resources: limits: @@ -336,3 +365,27 @@ spec: name: xgboost priority: 1 version: "1" +--- +apiVersion: serving.kserve.io/v1alpha1 +kind: ClusterStorageContainer +metadata: + name: default +spec: + container: + image: kserve/storage-initializer:v0.12.1 + name: storage-initializer + resources: + limits: + cpu: "1" + memory: 1Gi + requests: + cpu: 100m + memory: 100Mi + supportedUriFormats: + - prefix: gs:// + - prefix: s3:// + - prefix: hdfs:// + - prefix: webhdfs:// + - regex: https://(.+?).blob.core.windows.net/(.+) + - regex: https://(.+?).file.core.windows.net/(.+) + - regex: https?://(.+)/(.+) diff --git a/contrib/kserve/kserve/kserve.yaml b/contrib/kserve/kserve/kserve.yaml index 52bc9c32e6..e551743dc9 100644 --- a/contrib/kserve/kserve/kserve.yaml +++ b/contrib/kserve/kserve/kserve.yaml @@ -878,6 +878,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -909,6 +922,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string securityContext: properties: allowPrivilegeEscalation: @@ -2302,6 +2317,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -2333,6 +2361,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string securityContext: properties: allowPrivilegeEscalation: @@ -2940,6 +2970,10 @@ spec: type: array type: object type: object + maxReplicas: + type: integer + minReplicas: + type: integer nodes: additionalProperties: properties: @@ -3010,6 +3044,18 @@ spec: x-kubernetes-int-or-string: true type: object type: object + scaleMetric: + enum: + - cpu + - memory + - concurrency + - rps + type: string + scaleTarget: + type: integer + timeout: + format: int64 + type: integer required: - nodes type: object @@ -3867,6 +3913,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -3898,6 +3957,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -4487,6 +4548,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -4518,6 +4592,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -5121,6 +5197,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -5152,6 +5241,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string securityContext: properties: allowPrivilegeEscalation: @@ -5779,6 +5870,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -5810,6 +5914,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string securityContext: properties: allowPrivilegeEscalation: @@ -7736,6 +7842,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -7767,6 +7886,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string securityContext: properties: allowPrivilegeEscalation: @@ -7919,86 +8040,713 @@ spec: volumeDevices: items: properties: - devicePath: - type: string - name: + devicePath: + type: string + name: + type: string + required: + - devicePath + - name + type: object + type: array + volumeMounts: + items: + properties: + mountPath: + type: string + mountPropagation: + type: string + name: + type: string + readOnly: + type: boolean + subPath: + type: string + subPathExpr: + type: string + required: + - mountPath + - name + type: object + type: array + workingDir: + type: string + required: + - name + type: object + type: array + dnsConfig: + properties: + nameservers: + items: + type: string + type: array + options: + items: + properties: + name: + type: string + value: + type: string + type: object + type: array + searches: + items: + type: string + type: array + type: object + dnsPolicy: + type: string + enableServiceLinks: + type: boolean + hostAliases: + items: + properties: + hostnames: + items: + type: string + type: array + ip: + type: string + type: object + type: array + hostIPC: + type: boolean + hostNetwork: + type: boolean + hostPID: + type: boolean + hostUsers: + type: boolean + hostname: + type: string + huggingface: + properties: + args: + items: + type: string + type: array + command: + items: + type: string + type: array + env: + items: + properties: + name: + type: string + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + properties: + key: + type: string + name: + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + envFrom: + items: + properties: + configMapRef: + properties: + name: + type: string + optional: + type: boolean + type: object + x-kubernetes-map-type: atomic + prefix: + type: string + secretRef: + properties: + name: + type: string + optional: + type: boolean + type: object + x-kubernetes-map-type: atomic + type: object + type: array + image: + type: string + imagePullPolicy: + type: string + lifecycle: + properties: + postStart: + properties: + exec: + properties: + command: + items: + type: string + type: array + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + properties: + exec: + properties: + command: + items: + type: string + type: array + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + type: object + livenessProbe: + properties: + exec: + properties: + command: + items: + type: string + type: array + type: object + failureThreshold: + format: int32 + type: integer + grpc: + properties: + port: + format: int32 + type: integer + service: + type: string + required: + - port + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + type: object + initialDelaySeconds: + format: int32 + type: integer + periodSeconds: + format: int32 + type: integer + successThreshold: + format: int32 + type: integer + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + type: object + terminationGracePeriodSeconds: + format: int64 + type: integer + timeoutSeconds: + format: int32 + type: integer + type: object + name: + type: string + ports: + items: + properties: + containerPort: + format: int32 + type: integer + hostIP: + type: string + hostPort: + format: int32 + type: integer + name: + type: string + protocol: + default: TCP + type: string + required: + - containerPort + type: object + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + protocolVersion: + type: string + readinessProbe: + properties: + exec: + properties: + command: + items: + type: string + type: array + type: object + failureThreshold: + format: int32 + type: integer + grpc: + properties: + port: + format: int32 + type: integer + service: + type: string + required: + - port + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + type: object + initialDelaySeconds: + format: int32 + type: integer + periodSeconds: + format: int32 + type: integer + successThreshold: + format: int32 + type: integer + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + type: object + terminationGracePeriodSeconds: + format: int64 + type: integer + timeoutSeconds: + format: int32 + type: integer + type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic + resources: + properties: + claims: + items: + properties: + name: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + type: object + restartPolicy: + type: string + runtimeVersion: + type: string + securityContext: + properties: + allowPrivilegeEscalation: + type: boolean + capabilities: + properties: + add: + items: + type: string + type: array + drop: + items: + type: string + type: array + type: object + privileged: + type: boolean + procMount: + type: string + readOnlyRootFilesystem: + type: boolean + runAsGroup: + format: int64 + type: integer + runAsNonRoot: + type: boolean + runAsUser: + format: int64 + type: integer + seLinuxOptions: + properties: + level: + type: string + role: + type: string + type: + type: string + user: + type: string + type: object + seccompProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object + windowsOptions: + properties: + gmsaCredentialSpec: + type: string + gmsaCredentialSpecName: + type: string + hostProcess: + type: boolean + runAsUserName: + type: string + type: object + type: object + startupProbe: + properties: + exec: + properties: + command: + items: + type: string + type: array + type: object + failureThreshold: + format: int32 + type: integer + grpc: + properties: + port: + format: int32 + type: integer + service: + type: string + required: + - port + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + initialDelaySeconds: + format: int32 + type: integer + periodSeconds: + format: int32 + type: integer + successThreshold: + format: int32 + type: integer + tcpSocket: + properties: + host: type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true required: - - devicePath - - name + - port type: object - type: array - volumeMounts: - items: - properties: - mountPath: - type: string - mountPropagation: - type: string - name: - type: string - readOnly: - type: boolean - subPath: - type: string - subPathExpr: - type: string - required: - - mountPath - - name + terminationGracePeriodSeconds: + format: int64 + type: integer + timeoutSeconds: + format: int32 + type: integer + type: object + stdin: + type: boolean + stdinOnce: + type: boolean + storage: + properties: + key: + type: string + parameters: + additionalProperties: + type: string type: object - type: array - workingDir: - type: string - required: - - name - type: object - type: array - dnsConfig: - properties: - nameservers: + path: + type: string + schemaPath: + type: string + type: object + storageUri: + type: string + terminationMessagePath: + type: string + terminationMessagePolicy: + type: string + tty: + type: boolean + volumeDevices: items: - type: string + properties: + devicePath: + type: string + name: + type: string + required: + - devicePath + - name + type: object type: array - options: + volumeMounts: items: properties: + mountPath: + type: string + mountPropagation: + type: string name: type: string - value: + readOnly: + type: boolean + subPath: + type: string + subPathExpr: type: string + required: + - mountPath + - name type: object type: array - searches: - items: - type: string - type: array + workingDir: + type: string type: object - dnsPolicy: - type: string - enableServiceLinks: - type: boolean - hostAliases: - items: - properties: - hostnames: - items: - type: string - type: array - ip: - type: string - type: object - type: array - hostIPC: - type: boolean - hostNetwork: - type: boolean - hostPID: - type: boolean - hostUsers: - type: boolean - hostname: - type: string imagePullSecrets: items: properties: @@ -8394,6 +9142,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -8425,6 +9186,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string securityContext: properties: allowPrivilegeEscalation: @@ -8996,6 +9759,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -9027,6 +9803,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -9632,6 +10410,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -9663,6 +10454,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtime: type: string runtimeVersion: @@ -10253,6 +11046,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -10284,6 +11090,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -10878,6 +11686,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -10909,6 +11730,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -11490,6 +12313,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -11521,6 +12357,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -12109,6 +12947,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -12140,6 +12991,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -12848,6 +13701,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -12879,6 +13745,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -13462,6 +14330,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -13493,6 +14374,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -14152,6 +15035,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -14183,6 +15079,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -15475,6 +16373,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -15506,6 +16417,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -16482,6 +17395,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -16513,6 +17439,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string securityContext: properties: allowPrivilegeEscalation: @@ -17140,6 +18068,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -17171,6 +18112,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string securityContext: properties: allowPrivilegeEscalation: @@ -18332,6 +19275,8 @@ spec: properties: CACerts: type: string + audience: + type: string name: type: string url: @@ -18348,6 +19293,8 @@ spec: properties: CACerts: type: string + audience: + type: string name: type: string url: @@ -19364,6 +20311,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -19395,6 +20355,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string securityContext: properties: allowPrivilegeEscalation: @@ -20441,6 +21403,8 @@ spec: properties: CACerts: type: string + audience: + type: string name: type: string url: @@ -20921,35 +21885,53 @@ data: contains the Art explainer serving runtime default image version.\n \"defaultImageVersion\": \"latest\"\n }\n }\n \n # ====================================== STORAGE INITIALIZER CONFIGURATION ======================================\n # Example\n - storageInitializer: |-\n {\n \"image\" : \"kserve/storage-initializer:v0.11.2\",\n + storageInitializer: |-\n {\n \"image\" : \"kserve/storage-initializer:v0.12.1\",\n \ \"memoryRequest\": \"100Mi\",\n \"memoryLimit\": \"1Gi\",\n \"cpuRequest\": - \"100m\",\n \"cpuLimit\": \"1\",\n \"enableDirectPvcVolumeMount\": - false\n }\n storageInitializer: |-\n {\n # image contains the default - storage initializer image uri.\n \"image\" : \"kserve/storage-initializer:v0.11.2\",\n - \ \n # memoryRequest is the requests.memory to set for the storage - initializer init container.\n \"memoryRequest\": \"100Mi\",\n \n # - memoryLimit is the limits.memory to set for the storage initializer init container.\n - \ \"memoryLimit\": \"1Gi\",\n \n # cpuRequest is the requests.cpu - to set for the storage initializer init container.\n \"cpuRequest\": \"100m\",\n - \ \n # cpuLimit is the limits.cpu to set for the storage initializer - init container.\n \"cpuLimit\": \"1\",\n \n # enableDirectPvcVolumeMount - controls whether users can mount pvc volumes directly.\n # if pvc volume - is provided in storageuri then the pvc volume is directly mounted to /mnt/models - in the user container.\n # rather than symlink it to a shared volume. For - more info see https://github.com/kserve/kserve/issues/2737\n \"enableDirectPvcVolumeMount\": - false\n }\n \n # ====================================== CREDENTIALS ======================================\n + \"100m\",\n \"cpuLimit\": \"1\",\n \"caBundleConfigMapName\": \"\",\n + \ \"caBundleVolumeMountPath\": \"/etc/ssl/custom-certs\",\n \"enableDirectPvcVolumeMount\": + false,\n \"enableModelcar\": false,\n \"cpuModelcar\": \"10m\",\n + \ \"memoryModelcar\": \"15Mi\"\n }\n storageInitializer: |-\n {\n # + image contains the default storage initializer image uri.\n \"image\" : + \"kserve/storage-initializer:v0.12.1\",\n \n # memoryRequest is the + requests.memory to set for the storage initializer init container.\n \"memoryRequest\": + \"100Mi\",\n \n # memoryLimit is the limits.memory to set for the storage + initializer init container.\n \"memoryLimit\": \"1Gi\",\n \n # + cpuRequest is the requests.cpu to set for the storage initializer init container.\n + \ \"cpuRequest\": \"100m\",\n \n # cpuLimit is the limits.cpu + to set for the storage initializer init container.\n \"cpuLimit\": \"1\",\n + \ \n # caBundleConfigMapName is the ConfigMap will be copied to a user + namespace for the storage initializer init container.\n \"caBundleConfigMapName\": + \"\",\n\n # caBundleVolumeMountPath is the mount point for the configmap + set by caBundleConfigMapName for the storage initializer init container.\n \"caBundleVolumeMountPath\": + \"/etc/ssl/custom-certs\",\n\n # enableDirectPvcVolumeMount controls whether + users can mount pvc volumes directly.\n # if pvc volume is provided in storageuri + then the pvc volume is directly mounted to /mnt/models in the user container.\n + \ # rather than symlink it to a shared volume. For more info see https://github.com/kserve/kserve/issues/2737\n + \ \"enableDirectPvcVolumeMount\": true,\n\n # enableModelcar enabled + allows you to directly access an OCI container image by\n # using a source + URL with an \"oci://\" schema.\n \"enableModelcar\": false,\n\n # + cpuModelcar is the cpu request and limit that is used for the passive modelcar + container. It can be\n # set very low, but should be allowed by any Kubernetes + LimitRange that might apply.\n \"cpuModelcar\": \"10m\",\n\n # cpuModelcar + is the memory request and limit that is used for the passive modelcar container. + It can be\n # set very low, but should be allowed by any Kubernetes LimitRange + that might apply.\n \"memoryModelcar\": \"15Mi\",\n\n # uidModelcar + is the UID under with which the modelcar process and the main container is running.\n + \ # Some Kubernetes clusters might require this to be root (0). If not set + the user id is left untouched (default)\n \"uidModelcar\": 10\n }\n \n + # ====================================== CREDENTIALS ======================================\n # Example\n credentials: |-\n {\n \"storageSpecSecretName\": \"storage-config\",\n \ \"storageSecretNameAnnotation\": \"serving.kserve.io/storageSecretName\",\n \ \"gcs\": {\n \"gcsCredentialFileName\": \"gcloud-application-credentials.json\"\n \ },\n \"s3\": {\n \"s3AccessKeyIDName\": \"AWS_ACCESS_KEY_ID\",\n \ \"s3SecretAccessKeyName\": \"AWS_SECRET_ACCESS_KEY\",\n \"s3Endpoint\": \"\",\n \"s3UseHttps\": \"\",\n \"s3Region\": \"\",\n \"s3VerifySSL\": - \"\",\n \"s3UseVirtualBucket\": \"\",\n \"s3UseAnonymousCredential\": - \"\",\n \"s3CABundle\": \"\"\n }\n }\n # This is a global configuration - used for downloading models from the cloud storage.\n # You can override this - configuration by specifying the annotations on service account or static secret.\n - # https://kserve.github.io/website/master/modelserving/storage/s3/s3/\n # For - a quick reference about AWS ENV variables:\n # AWS Cli: https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html\n + \"\",\n \"s3UseVirtualBucket\": \"\",\n \"s3UseAccelerate\": + \"\",\n \"s3UseAnonymousCredential\": \"\",\n \"s3CABundle\": + \"\"\n }\n }\n # This is a global configuration used for downloading models + from the cloud storage.\n # You can override this configuration by specifying + the annotations on service account or static secret.\n # https://kserve.github.io/website/master/modelserving/storage/s3/s3/\n + # For a quick reference about AWS ENV variables:\n # AWS Cli: https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html\n # Boto: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html#using-environment-variables\n #\n # The `s3AccessKeyIDName` and `s3SecretAccessKeyName` fields are only used from this configmap when static credentials (IAM User Access Key Secret)\n # are @@ -20979,24 +21961,27 @@ data: s3Region specifies the region of the bucket.\n \"s3Region\": \"\",\n \ \n # s3VerifySSL controls whether to verify the tls/ssl certificate.\n \ \"s3VerifySSL\": \"\",\n \n # s3UseVirtualBucket configures - whether it is a virtual bucket or not.\n \"s3UseVirtualBucket\": \"\",\n - \ \n # s3UseAnonymousCredential configures whether to use anonymous - credentials to download the model or not.\n \"s3UseAnonymousCredential\": - \"\",\n \n # s3CABundle specifies the path to a certificate - bundle to use for HTTPS certificate validation.\n \"s3CABundle\": \"\"\n - \ }\n }\n \n # ====================================== INGRESS CONFIGURATION - ======================================\n # Example\n ingress: |-\n {\n \"ingressGateway\" - : \"knative-serving/knative-ingress-gateway\",\n \"ingressService\" : \"istio-ingressgateway.istio-system.svc.cluster.local\",\n + whether it is a virtual bucket or not.\n \"s3UseVirtualBucket\": \"\",\n\n + \ # s3UseAccelerate configures whether to use transfer acceleration.\n + \ \"s3UseAccelerate\": \"\",\n \n # s3UseAnonymousCredential + configures whether to use anonymous credentials to download the model or not.\n + \ \"s3UseAnonymousCredential\": \"\",\n \n # s3CABundle + specifies the path to a certificate bundle to use for HTTPS certificate validation.\n + \ \"s3CABundle\": \"\"\n }\n }\n \n # ====================================== + INGRESS CONFIGURATION ======================================\n # Example\n ingress: + |-\n {\n \"ingressGateway\" : \"knative-serving/knative-ingress-gateway\",\n + \ \"ingressService\" : \"istio-ingressgateway.istio-system.svc.cluster.local\",\n \ \"localGateway\" : \"knative-serving/knative-local-gateway\",\n \"localGatewayService\" : \"knative-local-gateway.istio-system.svc.cluster.local\",\n \"ingressDomain\" \ : \"example.com\",\n \"ingressClassName\" : \"istio\",\n \"domainTemplate\": \"{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}\",\n \"urlScheme\": - \"http\",\n \"disableIstioVirtualHost\": false\n }\n ingress: |-\n {\n - \ # ingressGateway specifies the ingress gateway to serve external traffic.\n - \ # The gateway should be specified in format /\n # NOTE: This configuration only applicable for serverless deployment - with Istio configured as network layer.\n \"ingressGateway\" : \"knative-serving/knative-ingress-gateway\",\n - \n # ingressService specifies the hostname of the ingress service.\n # + \"http\",\n \"disableIstioVirtualHost\": false,\n \"disableIngressCreation\": + false\n }\n ingress: |-\n {\n # ingressGateway specifies the ingress + gateway to serve external traffic.\n # The gateway should be specified in + format /\n # NOTE: This configuration only + applicable for serverless deployment with Istio configured as network layer.\n + \ \"ingressGateway\" : \"knative-serving/knative-ingress-gateway\",\n \n + \ # ingressService specifies the hostname of the ingress service.\n # NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer.\n \"ingressService\" : \"istio-ingressgateway.istio-system.svc.cluster.local\",\n \n # localGateway specifies the gateway which handles the network traffic @@ -21032,19 +22017,21 @@ data: supported by knative.\n # For more info https://github.com/kserve/kserve/pull/2380, https://kserve.github.io/website/master/admin/serverless/kourier_networking/.\n \ # NOTE: This configuration is only applicable to serverless deployment.\n - \ \"disableIstioVirtualHost\": false\n \n # pathTemplate specifies - the template for generating path based url for each inference service.\n # - The following variables can be used in the template for generating url.\n # - Name of the inference service ( {{ .Name}} )\n # Namespace of the inference - service ( {{ .Namespace }} )\n # For more info https://github.com/kserve/kserve/issues/2257.\n - \ # NOTE: This configuration only applicable to serverless deployment.\n - \ \"pathTemplate\": \"/serving/{{ .Namespace }}/{{ .Name }}\"\n }\n \n - # ====================================== LOGGER CONFIGURATION ======================================\n - # Example\n logger: |-\n {\n \"image\" : \"kserve/agent:v0.11.2\",\n \"memoryRequest\": + \ \"disableIstioVirtualHost\": false,\n\n # disableIngressCreation + controls whether to disable ingress creation for raw deployment mode.\n \"disableIngressCreation\": + false,\n \n # pathTemplate specifies the template for generating path based + url for each inference service.\n # The following variables can be used + in the template for generating url.\n # Name of the inference service ( + {{ .Name}} )\n # Namespace of the inference service ( {{ .Namespace }} )\n + \ # For more info https://github.com/kserve/kserve/issues/2257.\n # + NOTE: This configuration only applicable to serverless deployment.\n \"pathTemplate\": + \"/serving/{{ .Namespace }}/{{ .Name }}\"\n }\n \n # ====================================== + LOGGER CONFIGURATION ======================================\n # Example\n logger: + |-\n {\n \"image\" : \"kserve/agent:v0.12.1\",\n \"memoryRequest\": \"100Mi\",\n \"memoryLimit\": \"1Gi\",\n \"cpuRequest\": \"100m\",\n \ \"cpuLimit\": \"1\",\n \"defaultUrl\": \"http://default-broker\"\n \ }\n logger: |-\n {\n # image contains the default logger image uri.\n - \ \"image\" : \"kserve/agent:v0.11.2\",\n \n # memoryRequest is the + \ \"image\" : \"kserve/agent:v0.12.1\",\n \n # memoryRequest is the requests.memory to set for the logger container.\n \"memoryRequest\": \"100Mi\",\n \ \n # memoryLimit is the limits.memory to set for the logger container.\n \ \"memoryLimit\": \"1Gi\",\n \n # cpuRequest is the requests.cpu @@ -21053,21 +22040,24 @@ data: \"1\",\n \n # defaultUrl specifies the default logger url. If logger is not specified in the resource this url is used.\n \"defaultUrl\": \"http://default-broker\"\n \ }\n \n # ====================================== BATCHER CONFIGURATION ======================================\n - # Example\n batcher: |-\n {\n \"image\" : \"kserve/agent:v0.11.2\",\n \"memoryRequest\": + # Example\n batcher: |-\n {\n \"image\" : \"kserve/agent:v0.12.1\",\n \"memoryRequest\": \"1Gi\",\n \"memoryLimit\": \"1Gi\",\n \"cpuRequest\": \"1\",\n \"cpuLimit\": - \"1\"\n }\n batcher: |-\n {\n # image contains the default batcher image - uri.\n \"image\" : \"kserve/agent:v0.11.2\",\n \n # memoryRequest - is the requests.memory to set for the batcher container.\n \"memoryRequest\": - \"1Gi\",\n \n # memoryLimit is the limits.memory to set for the batcher - container.\n \"memoryLimit\": \"1Gi\",\n \n # cpuRequest is - the requests.cpu to set for the batcher container.\n \"cpuRequest\": \"1\",\n - \ \n # cpuLimit is the limits.cpu to set for the batcher container.\n - \ \"cpuLimit\": \"1\"\n }\n \n # ====================================== + \"1\",\n \"maxBatchSize\": \"32\",\n \"maxLatency\": \"5000\"\n }\n + batcher: |-\n {\n # image contains the default batcher image uri.\n \"image\" + : \"kserve/agent:v0.12.1\",\n \n # memoryRequest is the requests.memory + to set for the batcher container.\n \"memoryRequest\": \"1Gi\",\n \n # + memoryLimit is the limits.memory to set for the batcher container.\n \"memoryLimit\": + \"1Gi\",\n \n # cpuRequest is the requests.cpu to set for the batcher + container.\n \"cpuRequest\": \"1\",\n \n # cpuLimit is the limits.cpu + to set for the batcher container.\n \"cpuLimit\": \"1\"\n\n # maxBatchSize + is the default maximum batch size for batcher.\n \"maxBatchSize\": \"32\",\n\n + \ # maxLatency is the default maximum latency in milliseconds for batcher + to wait and collect the batch.\n \"maxLatency\": \"5000\"\n }\n \n # ====================================== AGENT CONFIGURATION ======================================\n # Example\n agent: - |-\n {\n \"image\" : \"kserve/agent:v0.11.2\",\n \"memoryRequest\": + |-\n {\n \"image\" : \"kserve/agent:v0.12.1\",\n \"memoryRequest\": \"100Mi\",\n \"memoryLimit\": \"1Gi\",\n \"cpuRequest\": \"100m\",\n \ \"cpuLimit\": \"1\"\n }\n agent: |-\n {\n # image contains the - default agent image uri.\n \"image\" : \"kserve/agent:v0.11.2\",\n \n # + default agent image uri.\n \"image\" : \"kserve/agent:v0.12.1\",\n \n # memoryRequest is the requests.memory to set for the agent container.\n \"memoryRequest\": \"100Mi\",\n \n # memoryLimit is the limits.memory to set for the agent container.\n \"memoryLimit\": \"1Gi\",\n \n # cpuRequest is @@ -21075,24 +22065,30 @@ data: \ \n # cpuLimit is the limits.cpu to set for the agent container.\n \ \"cpuLimit\": \"1\"\n }\n \n # ====================================== ROUTER CONFIGURATION ======================================\n # Example\n router: - |-\n {\n \"image\" : \"kserve/router:v0.11.2\",\n \"memoryRequest\": + |-\n {\n \"image\" : \"kserve/router:v0.12.1\",\n \"memoryRequest\": \"100Mi\",\n \"memoryLimit\": \"1Gi\",\n \"cpuRequest\": \"100m\",\n - \ \"cpuLimit\": \"1\"\n }\n # router is the implementation of inference - graph.\n router: |-\n {\n # image contains the default router image uri.\n - \ \"image\" : \"kserve/router:v0.11.2\",\n \n # memoryRequest - is the requests.memory to set for the router container.\n \"memoryRequest\": - \"100Mi\",\n \n # memoryLimit is the limits.memory to set for the - router container.\n \"memoryLimit\": \"1Gi\",\n \n # cpuRequest - is the requests.cpu to set for the router container.\n \"cpuRequest\": \"100m\",\n - \ \n # cpuLimit is the limits.cpu to set for the router container.\n - \ \"cpuLimit\": \"1\"\n }\n \n # ====================================== - DEPLOYMENT CONFIGURATION ======================================\n # Example\n - deploy: |-\n {\n \"defaultDeploymentMode\": \"Serverless\"\n }\n deploy: - |-\n {\n # defaultDeploymentMode specifies the default deployment mode of - the kserve. The supported values are\n # Serverless, RawDeployment and ModelMesh. - Users can override the deployment mode at service level\n # by adding the - annotation serving.kserve.io/deploymentMode.For more info on deployment mode visit\n - \ # Serverless https://kserve.github.io/website/master/admin/serverless/serverless/\n + \ \"cpuLimit\": \"1\",\n \"headers\": {\n \"propagate\": []\n + \ }\n }\n # router is the implementation of inference graph.\n router: + |-\n {\n # image contains the default router image uri.\n \"image\" + : \"kserve/router:v0.12.1\",\n \n # memoryRequest is the requests.memory + to set for the router container.\n \"memoryRequest\": \"100Mi\",\n \n + \ # memoryLimit is the limits.memory to set for the router container.\n \"memoryLimit\": + \"1Gi\",\n \n # cpuRequest is the requests.cpu to set for the router + container.\n \"cpuRequest\": \"100m\",\n \n # cpuLimit is the + limits.cpu to set for the router container.\n \"cpuLimit\": \"1\",\n \n + \ # Propagate the specified headers to all the steps specified in an InferenceGraph. + \n # You can either specify the exact header names or use [Golang supported + regex patterns]\n # (https://pkg.go.dev/regexp/syntax@go1.21.3#hdr-Syntax) + to propagate multiple headers.\n \"headers\": {\n \"propagate\": + [\n \"Authorization\",\n \"Test-Header-*\",\n \"*Trace-Id*\"\n + \ ]\n }\n }\n \n # ====================================== DEPLOYMENT + CONFIGURATION ======================================\n # Example\n deploy: |-\n + \ {\n \"defaultDeploymentMode\": \"Serverless\"\n }\n deploy: |-\n {\n + \ # defaultDeploymentMode specifies the default deployment mode of the kserve. + The supported values are\n # Serverless, RawDeployment and ModelMesh. Users + can override the deployment mode at service level\n # by adding the annotation + serving.kserve.io/deploymentMode.For more info on deployment mode visit\n # + Serverless https://kserve.github.io/website/master/admin/serverless/serverless/\n \ # RawDeployment https://kserve.github.io/website/master/admin/kubernetes_deployment/\n \ # ModelMesh https://kserve.github.io/website/master/admin/modelmesh/\n \"defaultDeploymentMode\": \"Serverless\"\n }\n \n # ====================================== METRICS CONFIGURATION @@ -21113,7 +22109,7 @@ data: \ \"enablePrometheusScraping\" : \"false\"\n }" agent: |- { - "image" : "kserve/agent:v0.11.2", + "image" : "kserve/agent:v0.12.1", "memoryRequest": "100Mi", "memoryLimit": "1Gi", "cpuRequest": "100m", @@ -21121,11 +22117,13 @@ data: } batcher: |- { - "image" : "kserve/agent:v0.11.2", + "image" : "kserve/agent:v0.12.1", "memoryRequest": "1Gi", "memoryLimit": "1Gi", "cpuRequest": "1", - "cpuLimit": "1" + "cpuLimit": "1", + "maxBatchSize": "32", + "maxLatency": "5000" } credentials: |- { @@ -21142,6 +22140,7 @@ data: "s3Region": "", "s3VerifySSL": "", "s3UseVirtualBucket": "", + "s3UseAccelerate": "", "s3UseAnonymousCredential": "", "s3CABundle": "" } @@ -21171,11 +22170,12 @@ data: "ingressClassName" : "istio", "domainTemplate": "{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}", "urlScheme": "http", - "disableIstioVirtualHost": false + "disableIstioVirtualHost": false, + "disableIngressCreation": false } logger: |- { - "image" : "kserve/agent:v0.11.2", + "image" : "kserve/agent:v0.12.1", "memoryRequest": "100Mi", "memoryLimit": "1Gi", "cpuRequest": "100m", @@ -21189,7 +22189,7 @@ data: } router: |- { - "image" : "kserve/router:v0.11.2", + "image" : "kserve/router:v0.12.1", "memoryRequest": "100Mi", "memoryLimit": "1Gi", "cpuRequest": "100m", @@ -21197,12 +22197,17 @@ data: } storageInitializer: |- { - "image" : "kserve/storage-initializer:v0.11.2", + "image" : "kserve/storage-initializer:v0.12.1", "memoryRequest": "100Mi", "memoryLimit": "1Gi", "cpuRequest": "100m", "cpuLimit": "1", - "enableDirectPvcVolumeMount": false + "caBundleConfigMapName": "", + "caBundleVolumeMountPath": "/etc/ssl/custom-certs", + "enableDirectPvcVolumeMount": true, + "enableModelcar": false, + "cpuModelcar": "10m", + "memoryModelcar": "15Mi" } kind: ConfigMap metadata: @@ -21299,13 +22304,28 @@ spec: fieldPath: metadata.namespace - name: SECRET_NAME value: kserve-webhook-server-cert - image: kserve/kserve-controller:v0.11.2 + image: kserve/kserve-controller:v0.12.1 imagePullPolicy: Always + livenessProbe: + failureThreshold: 5 + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 10 + timeoutSeconds: 5 name: manager ports: - containerPort: 9443 name: webhook-server protocol: TCP + readinessProbe: + failureThreshold: 10 + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 5 resources: limits: cpu: 100m @@ -21409,6 +22429,7 @@ webhooks: matchExpressions: - key: serving.kserve.io/inferenceservice operator: Exists + reinvocationPolicy: IfNeeded rules: - apiGroups: - "" @@ -21416,7 +22437,6 @@ webhooks: - v1 operations: - CREATE - - UPDATE resources: - pods sideEffects: None diff --git a/contrib/kserve/kserve/kserve_kubeflow.yaml b/contrib/kserve/kserve/kserve_kubeflow.yaml index 6e9af70794..a7038f3ba9 100644 --- a/contrib/kserve/kserve/kserve_kubeflow.yaml +++ b/contrib/kserve/kserve/kserve_kubeflow.yaml @@ -872,6 +872,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -903,6 +916,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string securityContext: properties: allowPrivilegeEscalation: @@ -2299,6 +2314,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -2330,6 +2358,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string securityContext: properties: allowPrivilegeEscalation: @@ -2940,6 +2970,10 @@ spec: type: array type: object type: object + maxReplicas: + type: integer + minReplicas: + type: integer nodes: additionalProperties: properties: @@ -3010,6 +3044,18 @@ spec: x-kubernetes-int-or-string: true type: object type: object + scaleMetric: + enum: + - cpu + - memory + - concurrency + - rps + type: string + scaleTarget: + type: integer + timeout: + format: int64 + type: integer required: - nodes type: object @@ -3870,6 +3916,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -3901,6 +3960,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -4490,6 +4551,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -4521,6 +4595,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -5124,6 +5200,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -5155,6 +5244,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string securityContext: properties: allowPrivilegeEscalation: @@ -5782,6 +5873,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -5813,6 +5917,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string securityContext: properties: allowPrivilegeEscalation: @@ -7739,6 +7845,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -7770,6 +7889,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string securityContext: properties: allowPrivilegeEscalation: @@ -7927,81 +8048,708 @@ spec: name: type: string required: - - devicePath - - name + - devicePath + - name + type: object + type: array + volumeMounts: + items: + properties: + mountPath: + type: string + mountPropagation: + type: string + name: + type: string + readOnly: + type: boolean + subPath: + type: string + subPathExpr: + type: string + required: + - mountPath + - name + type: object + type: array + workingDir: + type: string + required: + - name + type: object + type: array + dnsConfig: + properties: + nameservers: + items: + type: string + type: array + options: + items: + properties: + name: + type: string + value: + type: string + type: object + type: array + searches: + items: + type: string + type: array + type: object + dnsPolicy: + type: string + enableServiceLinks: + type: boolean + hostAliases: + items: + properties: + hostnames: + items: + type: string + type: array + ip: + type: string + type: object + type: array + hostIPC: + type: boolean + hostNetwork: + type: boolean + hostPID: + type: boolean + hostUsers: + type: boolean + hostname: + type: string + huggingface: + properties: + args: + items: + type: string + type: array + command: + items: + type: string + type: array + env: + items: + properties: + name: + type: string + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + properties: + key: + type: string + name: + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + envFrom: + items: + properties: + configMapRef: + properties: + name: + type: string + optional: + type: boolean + type: object + x-kubernetes-map-type: atomic + prefix: + type: string + secretRef: + properties: + name: + type: string + optional: + type: boolean + type: object + x-kubernetes-map-type: atomic + type: object + type: array + image: + type: string + imagePullPolicy: + type: string + lifecycle: + properties: + postStart: + properties: + exec: + properties: + command: + items: + type: string + type: array + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + properties: + exec: + properties: + command: + items: + type: string + type: array + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + type: object + livenessProbe: + properties: + exec: + properties: + command: + items: + type: string + type: array + type: object + failureThreshold: + format: int32 + type: integer + grpc: + properties: + port: + format: int32 + type: integer + service: + type: string + required: + - port + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + type: object + initialDelaySeconds: + format: int32 + type: integer + periodSeconds: + format: int32 + type: integer + successThreshold: + format: int32 + type: integer + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + type: object + terminationGracePeriodSeconds: + format: int64 + type: integer + timeoutSeconds: + format: int32 + type: integer + type: object + name: + type: string + ports: + items: + properties: + containerPort: + format: int32 + type: integer + hostIP: + type: string + hostPort: + format: int32 + type: integer + name: + type: string + protocol: + default: TCP + type: string + required: + - containerPort + type: object + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + protocolVersion: + type: string + readinessProbe: + properties: + exec: + properties: + command: + items: + type: string + type: array + type: object + failureThreshold: + format: int32 + type: integer + grpc: + properties: + port: + format: int32 + type: integer + service: + type: string + required: + - port + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + type: object + initialDelaySeconds: + format: int32 + type: integer + periodSeconds: + format: int32 + type: integer + successThreshold: + format: int32 + type: integer + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + type: object + terminationGracePeriodSeconds: + format: int64 + type: integer + timeoutSeconds: + format: int32 + type: integer + type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic + resources: + properties: + claims: + items: + properties: + name: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + type: object + restartPolicy: + type: string + runtimeVersion: + type: string + securityContext: + properties: + allowPrivilegeEscalation: + type: boolean + capabilities: + properties: + add: + items: + type: string + type: array + drop: + items: + type: string + type: array + type: object + privileged: + type: boolean + procMount: + type: string + readOnlyRootFilesystem: + type: boolean + runAsGroup: + format: int64 + type: integer + runAsNonRoot: + type: boolean + runAsUser: + format: int64 + type: integer + seLinuxOptions: + properties: + level: + type: string + role: + type: string + type: + type: string + user: + type: string + type: object + seccompProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object + windowsOptions: + properties: + gmsaCredentialSpec: + type: string + gmsaCredentialSpecName: + type: string + hostProcess: + type: boolean + runAsUserName: + type: string + type: object + type: object + startupProbe: + properties: + exec: + properties: + command: + items: + type: string + type: array + type: object + failureThreshold: + format: int32 + type: integer + grpc: + properties: + port: + format: int32 + type: integer + service: + type: string + required: + - port + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port type: object - type: array - volumeMounts: - items: + initialDelaySeconds: + format: int32 + type: integer + periodSeconds: + format: int32 + type: integer + successThreshold: + format: int32 + type: integer + tcpSocket: properties: - mountPath: - type: string - mountPropagation: - type: string - name: - type: string - readOnly: - type: boolean - subPath: - type: string - subPathExpr: + host: type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true required: - - mountPath - - name + - port type: object - type: array - workingDir: - type: string - required: - - name - type: object - type: array - dnsConfig: - properties: - nameservers: + terminationGracePeriodSeconds: + format: int64 + type: integer + timeoutSeconds: + format: int32 + type: integer + type: object + stdin: + type: boolean + stdinOnce: + type: boolean + storage: + properties: + key: + type: string + parameters: + additionalProperties: + type: string + type: object + path: + type: string + schemaPath: + type: string + type: object + storageUri: + type: string + terminationMessagePath: + type: string + terminationMessagePolicy: + type: string + tty: + type: boolean + volumeDevices: items: - type: string + properties: + devicePath: + type: string + name: + type: string + required: + - devicePath + - name + type: object type: array - options: + volumeMounts: items: properties: + mountPath: + type: string + mountPropagation: + type: string name: type: string - value: + readOnly: + type: boolean + subPath: + type: string + subPathExpr: type: string + required: + - mountPath + - name type: object type: array - searches: - items: - type: string - type: array + workingDir: + type: string type: object - dnsPolicy: - type: string - enableServiceLinks: - type: boolean - hostAliases: - items: - properties: - hostnames: - items: - type: string - type: array - ip: - type: string - type: object - type: array - hostIPC: - type: boolean - hostNetwork: - type: boolean - hostPID: - type: boolean - hostUsers: - type: boolean - hostname: - type: string imagePullSecrets: items: properties: @@ -8397,6 +9145,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -8428,6 +9189,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string securityContext: properties: allowPrivilegeEscalation: @@ -8999,6 +9762,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -9030,6 +9806,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -9635,6 +10413,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -9666,6 +10457,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtime: type: string runtimeVersion: @@ -10256,6 +11049,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -10287,6 +11093,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -10881,6 +11689,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -10912,6 +11733,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -11493,6 +12316,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -11524,6 +12360,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -12112,6 +12950,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -12143,6 +12994,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -12851,6 +13704,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -12882,6 +13748,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -13465,6 +14333,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -13496,6 +14377,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -14155,6 +15038,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -14186,6 +15082,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -15478,6 +16376,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -15509,6 +16420,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string runtimeVersion: type: string securityContext: @@ -16485,6 +17398,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -16516,6 +17442,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string securityContext: properties: allowPrivilegeEscalation: @@ -17143,6 +18071,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -17174,6 +18115,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string securityContext: properties: allowPrivilegeEscalation: @@ -18335,6 +19278,8 @@ spec: properties: CACerts: type: string + audience: + type: string name: type: string url: @@ -18351,6 +19296,8 @@ spec: properties: CACerts: type: string + audience: + type: string name: type: string url: @@ -19370,6 +20317,19 @@ spec: format: int32 type: integer type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic resources: properties: claims: @@ -19401,6 +20361,8 @@ spec: x-kubernetes-int-or-string: true type: object type: object + restartPolicy: + type: string securityContext: properties: allowPrivilegeEscalation: @@ -20450,6 +21412,8 @@ spec: properties: CACerts: type: string + audience: + type: string name: type: string url: @@ -21019,10 +21983,10 @@ subjects: --- apiVersion: v1 data: - _example: "################################\n# #\n# EXAMPLE CONFIGURATION #\n# #\n################################\n\n# This block is not actually functional configuration,\n# but serves to illustrate the available configuration\n# options and document them in a way that is accessible\n# to users that `kubectl edit` this config map.\n#\n# These sample configuration options may be copied out of\n# this example block and unindented to be in the data block\n# to actually change the configuration.\n\n# ====================================== EXPLAINERS CONFIGURATION ======================================\n# Example\nexplainers: |-\n {\n \"alibi\": {\n \"image\" : \"kserve/alibi-explainer\",\n \"defaultImageVersion\": \"latest\"\n },\n \"art\": {\n \"image\" : \"kserve/art-explainer\",\n \"defaultImageVersion\": \"latest\"\n }\n }\n# Alibi and Art Explainer runtime configuration\n explainers: |-\n {\n # Alibi explainer runtime configuration\n \"alibi\": {\n # image contains the default Alibi explainer serving runtime image uri.\n \"image\" : \"kserve/alibi-explainer\",\n \n # defautltImageVersion contains the Alibi explainer serving runtime default image version.\n \"defaultImageVersion\": \"latest\"\n },\n # Art explainer runtime configuration\n \"art\": {\n # image contains the default Art explainer serving runtime image uri.\n \"image\" : \"kserve/art-explainer\",\n \n # defautltImageVersion contains the Art explainer serving runtime default image version.\n \"defaultImageVersion\": \"latest\"\n }\n }\n \n # ====================================== STORAGE INITIALIZER CONFIGURATION ======================================\n # Example\n storageInitializer: |-\n {\n \"image\" : \"kserve/storage-initializer:v0.11.2\",\n \"memoryRequest\": \"100Mi\",\n \"memoryLimit\": \"1Gi\",\n \"cpuRequest\": \"100m\",\n \"cpuLimit\": \"1\",\n \"enableDirectPvcVolumeMount\": false\n }\n storageInitializer: |-\n {\n # image contains the default storage initializer image uri.\n \"image\" : \"kserve/storage-initializer:v0.11.2\",\n \n # memoryRequest is the requests.memory to set for the storage initializer init container.\n \"memoryRequest\": \"100Mi\",\n \n # memoryLimit is the limits.memory to set for the storage initializer init container.\n \"memoryLimit\": \"1Gi\",\n \n # cpuRequest is the requests.cpu to set for the storage initializer init container.\n \"cpuRequest\": \"100m\",\n \n # cpuLimit is the limits.cpu to set for the storage initializer init container.\n \"cpuLimit\": \"1\",\n \n # enableDirectPvcVolumeMount controls whether users can mount pvc volumes directly.\n # if pvc volume is provided in storageuri then the pvc volume is directly mounted to /mnt/models in the user container.\n # rather than symlink it to a shared volume. For more info see https://github.com/kserve/kserve/issues/2737\n \"enableDirectPvcVolumeMount\": false\n }\n \n # ====================================== CREDENTIALS ======================================\n # Example\n credentials: |-\n {\n \"storageSpecSecretName\": \"storage-config\",\n \"storageSecretNameAnnotation\": \"serving.kserve.io/storageSecretName\",\n \"gcs\": {\n \"gcsCredentialFileName\": \"gcloud-application-credentials.json\"\n },\n \"s3\": {\n \"s3AccessKeyIDName\": \"AWS_ACCESS_KEY_ID\",\n \"s3SecretAccessKeyName\": \"AWS_SECRET_ACCESS_KEY\",\n \"s3Endpoint\": \"\",\n \"s3UseHttps\": \"\",\n \"s3Region\": \"\",\n \"s3VerifySSL\": \"\",\n \"s3UseVirtualBucket\": \"\",\n \"s3UseAnonymousCredential\": \"\",\n \"s3CABundle\": \"\"\n }\n }\n # This is a global configuration used for downloading models from the cloud storage.\n # You can override this configuration by specifying the annotations on service account or static secret.\n # https://kserve.github.io/website/master/modelserving/storage/s3/s3/\n # For a quick reference about AWS ENV variables:\n # AWS Cli: https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html\n # Boto: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html#using-environment-variables\n #\n # The `s3AccessKeyIDName` and `s3SecretAccessKeyName` fields are only used from this configmap when static credentials (IAM User Access Key Secret)\n # are used as the authentication method for AWS S3.\n # The rest of the fields are used in both authentication methods (IAM Role for Service Account & IAM User Access Key Secret) if a non-empty value is provided.\n credentials: |-\n {\n # storageSpecSecretName contains the secret name which has the credentials for downloading the model.\n # This option is used when specifying the storage spec on isvc yaml.\n \"storageSpecSecretName\": \"storage-config\",\n\n # The annotation can be specified on isvc yaml to allow overriding with the secret name reference from the annotation value.\n # When using storageUri the order of the precedence is: secret name reference annotation > secret name references from service account\n # When using storageSpec the order of the precedence is: secret name reference annotation > storageSpecSecretName in configmap\n\n # Configuration for google cloud storage\n \"gcs\": {\n # gcsCredentialFileName specifies the filename of the gcs credential\n \"gcsCredentialFileName\": \"gcloud-application-credentials.json\"\n },\n \n # Configuration for aws s3 storage. This add the corresponding environmental variables to the storage initializer init container.\n # For more info on s3 storage see https://kserve.github.io/website/master/modelserving/storage/s3/s3/\n \"s3\": {\n # s3AccessKeyIDName specifies the s3 access key id name\n \"s3AccessKeyIDName\": \"AWS_ACCESS_KEY_ID\",\n \n # s3SecretAccessKeyName specifies the s3 secret access key name\n \"s3SecretAccessKeyName\": \"AWS_SECRET_ACCESS_KEY\",\n \n # s3Endpoint specifies the s3 endpoint\n \"s3Endpoint\": \"\",\n \n # s3UseHttps controls whether to use secure https or unsecure http to download models.\n # Allowed values are 0 and 1.\n \"s3UseHttps\": \"\",\n \n # s3Region specifies the region of the bucket.\n \"s3Region\": \"\",\n \n # s3VerifySSL controls whether to verify the tls/ssl certificate.\n \"s3VerifySSL\": \"\",\n \n # s3UseVirtualBucket configures whether it is a virtual bucket or not.\n \"s3UseVirtualBucket\": \"\",\n \n # s3UseAnonymousCredential configures whether to use anonymous credentials to download the model or not.\n \"s3UseAnonymousCredential\": \"\",\n \n # s3CABundle specifies the path to a certificate bundle to use for HTTPS certificate validation.\n \"s3CABundle\": \"\"\n }\n }\n \n # ====================================== INGRESS CONFIGURATION ======================================\n # Example\n ingress: |-\n {\n \"ingressGateway\" : \"knative-serving/knative-ingress-gateway\",\n \"ingressService\" : \"istio-ingressgateway.istio-system.svc.cluster.local\",\n \"localGateway\" : \"knative-serving/knative-local-gateway\",\n \"localGatewayService\" : \"knative-local-gateway.istio-system.svc.cluster.local\",\n \"ingressDomain\" : \"example.com\",\n \"ingressClassName\" : \"istio\",\n \"domainTemplate\": \"{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}\",\n \"urlScheme\": \"http\",\n \"disableIstioVirtualHost\": false\n }\n ingress: |-\n {\n # ingressGateway specifies the ingress gateway to serve external traffic.\n # The gateway should be specified in format /\n # NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer.\n \"ingressGateway\" : \"knative-serving/knative-ingress-gateway\",\n \n # ingressService specifies the hostname of the ingress service.\n # NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer.\n \"ingressService\" : \"istio-ingressgateway.istio-system.svc.cluster.local\",\n \n # localGateway specifies the gateway which handles the network traffic within the cluster.\n # NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer.\n \"localGateway\" : \"knative-serving/knative-local-gateway\",\n \n # localGatewayService specifies the hostname of the local gateway service.\n # NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer.\n \"localGatewayService\" : \"knative-local-gateway.istio-system.svc.cluster.local\",\n \n # ingressDomain specifies the domain name which is used for creating the url.\n # If ingressDomain is empty then example.com is used as default domain.\n # NOTE: This configuration only applicable for raw deployment.\n \"ingressDomain\" : \"example.com\",\n \n # ingressClassName specifies the ingress controller to use for ingress traffic.\n # This is optional and if omitted the default ingress in the cluster is used.\n # https://kubernetes.io/docs/concepts/services-networking/ingress/#default-ingress-class\n # NOTE: This configuration only applicable for raw deployment.\n \"ingressClassName\" : \"istio\",\n \n # domainTemplate specifies the template for generating domain/url for each inference service by combining variable from:\n # Name of the inference service ( {{ .Name}} )\n # Namespace of the inference service ( {{ .Namespace }} )\n # Annotation of the inference service ( {{ .Annotations.key }} )\n # Label of the inference service ( {{ .Labels.key }} )\n # IngressDomain ( {{ .IngressDomain }} )\n # If domain template is empty the default template {{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }} is used.\n # NOTE: This configuration only applicable for raw deployment.\n \"domainTemplate\": \"{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}\",\n \n # urlScheme specifies the url scheme to use for inference service and inference graph.\n # If urlScheme is empty then by default http is used.\n \"urlScheme\": \"http\",\n \n # disableIstioVirtualHost controls whether to use istio as network layer.\n # By default istio is used as the network layer. When DisableIstioVirtualHost is true, KServe does not\n # create the top level virtual service thus Istio is no longer required for serverless mode.\n # By setting this field to true, user can use other networking layers supported by knative.\n # For more info https://github.com/kserve/kserve/pull/2380, https://kserve.github.io/website/master/admin/serverless/kourier_networking/.\n # NOTE: This configuration is only applicable to serverless deployment.\n \"disableIstioVirtualHost\": false\n \n # pathTemplate specifies the template for generating path based url for each inference service.\n # The following variables can be used in the template for generating url.\n # Name of the inference service ( {{ .Name}} )\n # Namespace of the inference service ( {{ .Namespace }} )\n # For more info https://github.com/kserve/kserve/issues/2257.\n # NOTE: This configuration only applicable to serverless deployment.\n \"pathTemplate\": \"/serving/{{ .Namespace }}/{{ .Name }}\"\n }\n \n # ====================================== LOGGER CONFIGURATION ======================================\n # Example\n logger: |-\n {\n \"image\" : \"kserve/agent:v0.11.2\",\n \"memoryRequest\": \"100Mi\",\n \"memoryLimit\": \"1Gi\",\n \"cpuRequest\": \"100m\",\n \"cpuLimit\": \"1\",\n \"defaultUrl\": \"http://default-broker\"\n }\n logger: |-\n {\n # image contains the default logger image uri.\n \"image\" : \"kserve/agent:v0.11.2\",\n \n # memoryRequest is the requests.memory to set for the logger container.\n \"memoryRequest\": \"100Mi\",\n \n # memoryLimit is the limits.memory to set for the logger container.\n \"memoryLimit\": \"1Gi\",\n \n # cpuRequest is the requests.cpu to set for the logger container.\n \"cpuRequest\": \"100m\",\n \n # cpuLimit is the limits.cpu to set for the logger container.\n \"cpuLimit\": \"1\",\n \n # defaultUrl specifies the default logger url. If logger is not specified in the resource this url is used.\n \"defaultUrl\": \"http://default-broker\"\n }\n \n # ====================================== BATCHER CONFIGURATION ======================================\n # Example\n batcher: |-\n {\n \"image\" : \"kserve/agent:v0.11.2\",\n \"memoryRequest\": \"1Gi\",\n \"memoryLimit\": \"1Gi\",\n \"cpuRequest\": \"1\",\n \"cpuLimit\": \"1\"\n }\n batcher: |-\n {\n # image contains the default batcher image uri.\n \"image\" : \"kserve/agent:v0.11.2\",\n \n # memoryRequest is the requests.memory to set for the batcher container.\n \"memoryRequest\": \"1Gi\",\n \n # memoryLimit is the limits.memory to set for the batcher container.\n \"memoryLimit\": \"1Gi\",\n \n # cpuRequest is the requests.cpu to set for the batcher container.\n \"cpuRequest\": \"1\",\n \n # cpuLimit is the limits.cpu to set for the batcher container.\n \"cpuLimit\": \"1\"\n }\n \n # ====================================== AGENT CONFIGURATION ======================================\n # Example\n agent: |-\n {\n \"image\" : \"kserve/agent:v0.11.2\",\n \"memoryRequest\": \"100Mi\",\n \"memoryLimit\": \"1Gi\",\n \"cpuRequest\": \"100m\",\n \"cpuLimit\": \"1\"\n }\n agent: |-\n {\n # image contains the default agent image uri.\n \"image\" : \"kserve/agent:v0.11.2\",\n \n # memoryRequest is the requests.memory to set for the agent container.\n \"memoryRequest\": \"100Mi\",\n \n # memoryLimit is the limits.memory to set for the agent container.\n \"memoryLimit\": \"1Gi\",\n \n # cpuRequest is the requests.cpu to set for the agent container.\n \"cpuRequest\": \"100m\",\n \n # cpuLimit is the limits.cpu to set for the agent container.\n \"cpuLimit\": \"1\"\n }\n \n # ====================================== ROUTER CONFIGURATION ======================================\n # Example\n router: |-\n {\n \"image\" : \"kserve/router:v0.11.2\",\n \"memoryRequest\": \"100Mi\",\n \"memoryLimit\": \"1Gi\",\n \"cpuRequest\": \"100m\",\n \"cpuLimit\": \"1\"\n }\n # router is the implementation of inference graph.\n router: |-\n {\n # image contains the default router image uri.\n \"image\" : \"kserve/router:v0.11.2\",\n \n # memoryRequest is the requests.memory to set for the router container.\n \"memoryRequest\": \"100Mi\",\n \n # memoryLimit is the limits.memory to set for the router container.\n \"memoryLimit\": \"1Gi\",\n \n # cpuRequest is the requests.cpu to set for the router container.\n \"cpuRequest\": \"100m\",\n \n # cpuLimit is the limits.cpu to set for the router container.\n \"cpuLimit\": \"1\"\n }\n \n # ====================================== DEPLOYMENT CONFIGURATION ======================================\n # Example\n deploy: |-\n {\n \"defaultDeploymentMode\": \"Serverless\"\n }\n deploy: |-\n {\n # defaultDeploymentMode specifies the default deployment mode of the kserve. The supported values are\n # Serverless, RawDeployment and ModelMesh. Users can override the deployment mode at service level\n # by adding the annotation serving.kserve.io/deploymentMode.For more info on deployment mode visit\n # Serverless https://kserve.github.io/website/master/admin/serverless/serverless/\n # RawDeployment https://kserve.github.io/website/master/admin/kubernetes_deployment/\n # ModelMesh https://kserve.github.io/website/master/admin/modelmesh/\n \"defaultDeploymentMode\": \"Serverless\"\n }\n \n # ====================================== METRICS CONFIGURATION ======================================\n # Example\n metricsAggregator: |-\n {\n \"enableMetricAggregation\": \"false\",\n \"enablePrometheusScraping\" : \"false\"\n }\n # For more info see https://github.com/kserve/kserve/blob/master/qpext/README.md\n metricsAggregator: |-\n {\n # enableMetricAggregation configures metric aggregation annotation. This adds the annotation serving.kserve.io/enable-metric-aggregation to every\n # service with the specified boolean value. If true enables metric aggregation in queue-proxy by setting env vars in the queue proxy container\n # to configure scraping ports.\n \"enableMetricAggregation\": \"false\",\n \n # enablePrometheusScraping configures metric aggregation annotation. This adds the annotation serving.kserve.io/enable-metric-aggregation to every\n # service with the specified boolean value. If true, prometheus annotations are added to the pod. If serving.kserve.io/enable-metric-aggregation is false,\n # the prometheus port is set with the default prometheus scraping port 9090, otherwise the prometheus port annotation is set with the metric aggregation port.\n \"enablePrometheusScraping\" : \"false\"\n }" + _example: "################################\n# #\n# EXAMPLE CONFIGURATION #\n# #\n################################\n\n# This block is not actually functional configuration,\n# but serves to illustrate the available configuration\n# options and document them in a way that is accessible\n# to users that `kubectl edit` this config map.\n#\n# These sample configuration options may be copied out of\n# this example block and unindented to be in the data block\n# to actually change the configuration.\n\n# ====================================== EXPLAINERS CONFIGURATION ======================================\n# Example\nexplainers: |-\n {\n \"alibi\": {\n \"image\" : \"kserve/alibi-explainer\",\n \"defaultImageVersion\": \"latest\"\n },\n \"art\": {\n \"image\" : \"kserve/art-explainer\",\n \"defaultImageVersion\": \"latest\"\n }\n }\n# Alibi and Art Explainer runtime configuration\n explainers: |-\n {\n # Alibi explainer runtime configuration\n \"alibi\": {\n # image contains the default Alibi explainer serving runtime image uri.\n \"image\" : \"kserve/alibi-explainer\",\n \n # defautltImageVersion contains the Alibi explainer serving runtime default image version.\n \"defaultImageVersion\": \"latest\"\n },\n # Art explainer runtime configuration\n \"art\": {\n # image contains the default Art explainer serving runtime image uri.\n \"image\" : \"kserve/art-explainer\",\n \n # defautltImageVersion contains the Art explainer serving runtime default image version.\n \"defaultImageVersion\": \"latest\"\n }\n }\n \n # ====================================== STORAGE INITIALIZER CONFIGURATION ======================================\n # Example\n storageInitializer: |-\n {\n \"image\" : \"kserve/storage-initializer:v0.12.1\",\n \"memoryRequest\": \"100Mi\",\n \"memoryLimit\": \"1Gi\",\n \"cpuRequest\": \"100m\",\n \"cpuLimit\": \"1\",\n \"caBundleConfigMapName\": \"\",\n \"caBundleVolumeMountPath\": \"/etc/ssl/custom-certs\",\n \"enableDirectPvcVolumeMount\": false,\n \"enableModelcar\": false,\n \"cpuModelcar\": \"10m\",\n \"memoryModelcar\": \"15Mi\"\n }\n storageInitializer: |-\n {\n # image contains the default storage initializer image uri.\n \"image\" : \"kserve/storage-initializer:v0.12.1\",\n \n # memoryRequest is the requests.memory to set for the storage initializer init container.\n \"memoryRequest\": \"100Mi\",\n \n # memoryLimit is the limits.memory to set for the storage initializer init container.\n \"memoryLimit\": \"1Gi\",\n \n # cpuRequest is the requests.cpu to set for the storage initializer init container.\n \"cpuRequest\": \"100m\",\n \n # cpuLimit is the limits.cpu to set for the storage initializer init container.\n \"cpuLimit\": \"1\",\n \n # caBundleConfigMapName is the ConfigMap will be copied to a user namespace for the storage initializer init container.\n \"caBundleConfigMapName\": \"\",\n\n # caBundleVolumeMountPath is the mount point for the configmap set by caBundleConfigMapName for the storage initializer init container.\n \"caBundleVolumeMountPath\": \"/etc/ssl/custom-certs\",\n\n # enableDirectPvcVolumeMount controls whether users can mount pvc volumes directly.\n # if pvc volume is provided in storageuri then the pvc volume is directly mounted to /mnt/models in the user container.\n # rather than symlink it to a shared volume. For more info see https://github.com/kserve/kserve/issues/2737\n \"enableDirectPvcVolumeMount\": true,\n\n # enableModelcar enabled allows you to directly access an OCI container image by\n # using a source URL with an \"oci://\" schema.\n \"enableModelcar\": false,\n\n # cpuModelcar is the cpu request and limit that is used for the passive modelcar container. It can be\n # set very low, but should be allowed by any Kubernetes LimitRange that might apply.\n \"cpuModelcar\": \"10m\",\n\n # cpuModelcar is the memory request and limit that is used for the passive modelcar container. It can be\n # set very low, but should be allowed by any Kubernetes LimitRange that might apply.\n \"memoryModelcar\": \"15Mi\",\n\n # uidModelcar is the UID under with which the modelcar process and the main container is running.\n # Some Kubernetes clusters might require this to be root (0). If not set the user id is left untouched (default)\n \"uidModelcar\": 10\n }\n \n # ====================================== CREDENTIALS ======================================\n # Example\n credentials: |-\n {\n \"storageSpecSecretName\": \"storage-config\",\n \"storageSecretNameAnnotation\": \"serving.kserve.io/storageSecretName\",\n \"gcs\": {\n \"gcsCredentialFileName\": \"gcloud-application-credentials.json\"\n },\n \"s3\": {\n \"s3AccessKeyIDName\": \"AWS_ACCESS_KEY_ID\",\n \"s3SecretAccessKeyName\": \"AWS_SECRET_ACCESS_KEY\",\n \"s3Endpoint\": \"\",\n \"s3UseHttps\": \"\",\n \"s3Region\": \"\",\n \"s3VerifySSL\": \"\",\n \"s3UseVirtualBucket\": \"\",\n \"s3UseAccelerate\": \"\",\n \"s3UseAnonymousCredential\": \"\",\n \"s3CABundle\": \"\"\n }\n }\n # This is a global configuration used for downloading models from the cloud storage.\n # You can override this configuration by specifying the annotations on service account or static secret.\n # https://kserve.github.io/website/master/modelserving/storage/s3/s3/\n # For a quick reference about AWS ENV variables:\n # AWS Cli: https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html\n # Boto: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html#using-environment-variables\n #\n # The `s3AccessKeyIDName` and `s3SecretAccessKeyName` fields are only used from this configmap when static credentials (IAM User Access Key Secret)\n # are used as the authentication method for AWS S3.\n # The rest of the fields are used in both authentication methods (IAM Role for Service Account & IAM User Access Key Secret) if a non-empty value is provided.\n credentials: |-\n {\n # storageSpecSecretName contains the secret name which has the credentials for downloading the model.\n # This option is used when specifying the storage spec on isvc yaml.\n \"storageSpecSecretName\": \"storage-config\",\n\n # The annotation can be specified on isvc yaml to allow overriding with the secret name reference from the annotation value.\n # When using storageUri the order of the precedence is: secret name reference annotation > secret name references from service account\n # When using storageSpec the order of the precedence is: secret name reference annotation > storageSpecSecretName in configmap\n\n # Configuration for google cloud storage\n \"gcs\": {\n # gcsCredentialFileName specifies the filename of the gcs credential\n \"gcsCredentialFileName\": \"gcloud-application-credentials.json\"\n },\n \n # Configuration for aws s3 storage. This add the corresponding environmental variables to the storage initializer init container.\n # For more info on s3 storage see https://kserve.github.io/website/master/modelserving/storage/s3/s3/\n \"s3\": {\n # s3AccessKeyIDName specifies the s3 access key id name\n \"s3AccessKeyIDName\": \"AWS_ACCESS_KEY_ID\",\n \n # s3SecretAccessKeyName specifies the s3 secret access key name\n \"s3SecretAccessKeyName\": \"AWS_SECRET_ACCESS_KEY\",\n \n # s3Endpoint specifies the s3 endpoint\n \"s3Endpoint\": \"\",\n \n # s3UseHttps controls whether to use secure https or unsecure http to download models.\n # Allowed values are 0 and 1.\n \"s3UseHttps\": \"\",\n \n # s3Region specifies the region of the bucket.\n \"s3Region\": \"\",\n \n # s3VerifySSL controls whether to verify the tls/ssl certificate.\n \"s3VerifySSL\": \"\",\n \n # s3UseVirtualBucket configures whether it is a virtual bucket or not.\n \"s3UseVirtualBucket\": \"\",\n\n # s3UseAccelerate configures whether to use transfer acceleration.\n \"s3UseAccelerate\": \"\",\n \n # s3UseAnonymousCredential configures whether to use anonymous credentials to download the model or not.\n \"s3UseAnonymousCredential\": \"\",\n \n # s3CABundle specifies the path to a certificate bundle to use for HTTPS certificate validation.\n \"s3CABundle\": \"\"\n }\n }\n \n # ====================================== INGRESS CONFIGURATION ======================================\n # Example\n ingress: |-\n {\n \"ingressGateway\" : \"knative-serving/knative-ingress-gateway\",\n \"ingressService\" : \"istio-ingressgateway.istio-system.svc.cluster.local\",\n \"localGateway\" : \"knative-serving/knative-local-gateway\",\n \"localGatewayService\" : \"knative-local-gateway.istio-system.svc.cluster.local\",\n \"ingressDomain\" : \"example.com\",\n \"ingressClassName\" : \"istio\",\n \"domainTemplate\": \"{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}\",\n \"urlScheme\": \"http\",\n \"disableIstioVirtualHost\": false,\n \"disableIngressCreation\": false\n }\n ingress: |-\n {\n # ingressGateway specifies the ingress gateway to serve external traffic.\n # The gateway should be specified in format /\n # NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer.\n \"ingressGateway\" : \"knative-serving/knative-ingress-gateway\",\n \n # ingressService specifies the hostname of the ingress service.\n # NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer.\n \"ingressService\" : \"istio-ingressgateway.istio-system.svc.cluster.local\",\n \n # localGateway specifies the gateway which handles the network traffic within the cluster.\n # NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer.\n \"localGateway\" : \"knative-serving/knative-local-gateway\",\n \n # localGatewayService specifies the hostname of the local gateway service.\n # NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer.\n \"localGatewayService\" : \"knative-local-gateway.istio-system.svc.cluster.local\",\n \n # ingressDomain specifies the domain name which is used for creating the url.\n # If ingressDomain is empty then example.com is used as default domain.\n # NOTE: This configuration only applicable for raw deployment.\n \"ingressDomain\" : \"example.com\",\n \n # ingressClassName specifies the ingress controller to use for ingress traffic.\n # This is optional and if omitted the default ingress in the cluster is used.\n # https://kubernetes.io/docs/concepts/services-networking/ingress/#default-ingress-class\n # NOTE: This configuration only applicable for raw deployment.\n \"ingressClassName\" : \"istio\",\n \n # domainTemplate specifies the template for generating domain/url for each inference service by combining variable from:\n # Name of the inference service ( {{ .Name}} )\n # Namespace of the inference service ( {{ .Namespace }} )\n # Annotation of the inference service ( {{ .Annotations.key }} )\n # Label of the inference service ( {{ .Labels.key }} )\n # IngressDomain ( {{ .IngressDomain }} )\n # If domain template is empty the default template {{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }} is used.\n # NOTE: This configuration only applicable for raw deployment.\n \"domainTemplate\": \"{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}\",\n \n # urlScheme specifies the url scheme to use for inference service and inference graph.\n # If urlScheme is empty then by default http is used.\n \"urlScheme\": \"http\",\n \n # disableIstioVirtualHost controls whether to use istio as network layer.\n # By default istio is used as the network layer. When DisableIstioVirtualHost is true, KServe does not\n # create the top level virtual service thus Istio is no longer required for serverless mode.\n # By setting this field to true, user can use other networking layers supported by knative.\n # For more info https://github.com/kserve/kserve/pull/2380, https://kserve.github.io/website/master/admin/serverless/kourier_networking/.\n # NOTE: This configuration is only applicable to serverless deployment.\n \"disableIstioVirtualHost\": false,\n\n # disableIngressCreation controls whether to disable ingress creation for raw deployment mode.\n \"disableIngressCreation\": false,\n \n # pathTemplate specifies the template for generating path based url for each inference service.\n # The following variables can be used in the template for generating url.\n # Name of the inference service ( {{ .Name}} )\n # Namespace of the inference service ( {{ .Namespace }} )\n # For more info https://github.com/kserve/kserve/issues/2257.\n # NOTE: This configuration only applicable to serverless deployment.\n \"pathTemplate\": \"/serving/{{ .Namespace }}/{{ .Name }}\"\n }\n \n # ====================================== LOGGER CONFIGURATION ======================================\n # Example\n logger: |-\n {\n \"image\" : \"kserve/agent:v0.12.1\",\n \"memoryRequest\": \"100Mi\",\n \"memoryLimit\": \"1Gi\",\n \"cpuRequest\": \"100m\",\n \"cpuLimit\": \"1\",\n \"defaultUrl\": \"http://default-broker\"\n }\n logger: |-\n {\n # image contains the default logger image uri.\n \"image\" : \"kserve/agent:v0.12.1\",\n \n # memoryRequest is the requests.memory to set for the logger container.\n \"memoryRequest\": \"100Mi\",\n \n # memoryLimit is the limits.memory to set for the logger container.\n \"memoryLimit\": \"1Gi\",\n \n # cpuRequest is the requests.cpu to set for the logger container.\n \"cpuRequest\": \"100m\",\n \n # cpuLimit is the limits.cpu to set for the logger container.\n \"cpuLimit\": \"1\",\n \n # defaultUrl specifies the default logger url. If logger is not specified in the resource this url is used.\n \"defaultUrl\": \"http://default-broker\"\n }\n \n # ====================================== BATCHER CONFIGURATION ======================================\n # Example\n batcher: |-\n {\n \"image\" : \"kserve/agent:v0.12.1\",\n \"memoryRequest\": \"1Gi\",\n \"memoryLimit\": \"1Gi\",\n \"cpuRequest\": \"1\",\n \"cpuLimit\": \"1\",\n \"maxBatchSize\": \"32\",\n \"maxLatency\": \"5000\"\n }\n batcher: |-\n {\n # image contains the default batcher image uri.\n \"image\" : \"kserve/agent:v0.12.1\",\n \n # memoryRequest is the requests.memory to set for the batcher container.\n \"memoryRequest\": \"1Gi\",\n \n # memoryLimit is the limits.memory to set for the batcher container.\n \"memoryLimit\": \"1Gi\",\n \n # cpuRequest is the requests.cpu to set for the batcher container.\n \"cpuRequest\": \"1\",\n \n # cpuLimit is the limits.cpu to set for the batcher container.\n \"cpuLimit\": \"1\"\n\n # maxBatchSize is the default maximum batch size for batcher.\n \"maxBatchSize\": \"32\",\n\n # maxLatency is the default maximum latency in milliseconds for batcher to wait and collect the batch.\n \"maxLatency\": \"5000\"\n }\n \n # ====================================== AGENT CONFIGURATION ======================================\n # Example\n agent: |-\n {\n \"image\" : \"kserve/agent:v0.12.1\",\n \"memoryRequest\": \"100Mi\",\n \"memoryLimit\": \"1Gi\",\n \"cpuRequest\": \"100m\",\n \"cpuLimit\": \"1\"\n }\n agent: |-\n {\n # image contains the default agent image uri.\n \"image\" : \"kserve/agent:v0.12.1\",\n \n # memoryRequest is the requests.memory to set for the agent container.\n \"memoryRequest\": \"100Mi\",\n \n # memoryLimit is the limits.memory to set for the agent container.\n \"memoryLimit\": \"1Gi\",\n \n # cpuRequest is the requests.cpu to set for the agent container.\n \"cpuRequest\": \"100m\",\n \n # cpuLimit is the limits.cpu to set for the agent container.\n \"cpuLimit\": \"1\"\n }\n \n # ====================================== ROUTER CONFIGURATION ======================================\n # Example\n router: |-\n {\n \"image\" : \"kserve/router:v0.12.1\",\n \"memoryRequest\": \"100Mi\",\n \"memoryLimit\": \"1Gi\",\n \"cpuRequest\": \"100m\",\n \"cpuLimit\": \"1\",\n \"headers\": {\n \"propagate\": []\n }\n }\n # router is the implementation of inference graph.\n router: |-\n {\n # image contains the default router image uri.\n \"image\" : \"kserve/router:v0.12.1\",\n \n # memoryRequest is the requests.memory to set for the router container.\n \"memoryRequest\": \"100Mi\",\n \n # memoryLimit is the limits.memory to set for the router container.\n \"memoryLimit\": \"1Gi\",\n \n # cpuRequest is the requests.cpu to set for the router container.\n \"cpuRequest\": \"100m\",\n \n # cpuLimit is the limits.cpu to set for the router container.\n \"cpuLimit\": \"1\",\n \n # Propagate the specified headers to all the steps specified in an InferenceGraph. \n # You can either specify the exact header names or use [Golang supported regex patterns]\n # (https://pkg.go.dev/regexp/syntax@go1.21.3#hdr-Syntax) to propagate multiple headers.\n \"headers\": {\n \"propagate\": [\n \"Authorization\",\n \"Test-Header-*\",\n \"*Trace-Id*\"\n ]\n }\n }\n \n # ====================================== DEPLOYMENT CONFIGURATION ======================================\n # Example\n deploy: |-\n {\n \"defaultDeploymentMode\": \"Serverless\"\n }\n deploy: |-\n {\n # defaultDeploymentMode specifies the default deployment mode of the kserve. The supported values are\n # Serverless, RawDeployment and ModelMesh. Users can override the deployment mode at service level\n # by adding the annotation serving.kserve.io/deploymentMode.For more info on deployment mode visit\n # Serverless https://kserve.github.io/website/master/admin/serverless/serverless/\n # RawDeployment https://kserve.github.io/website/master/admin/kubernetes_deployment/\n # ModelMesh https://kserve.github.io/website/master/admin/modelmesh/\n \"defaultDeploymentMode\": \"Serverless\"\n }\n \n # ====================================== METRICS CONFIGURATION ======================================\n # Example\n metricsAggregator: |-\n {\n \"enableMetricAggregation\": \"false\",\n \"enablePrometheusScraping\" : \"false\"\n }\n # For more info see https://github.com/kserve/kserve/blob/master/qpext/README.md\n metricsAggregator: |-\n {\n # enableMetricAggregation configures metric aggregation annotation. This adds the annotation serving.kserve.io/enable-metric-aggregation to every\n # service with the specified boolean value. If true enables metric aggregation in queue-proxy by setting env vars in the queue proxy container\n # to configure scraping ports.\n \"enableMetricAggregation\": \"false\",\n \n # enablePrometheusScraping configures metric aggregation annotation. This adds the annotation serving.kserve.io/enable-metric-aggregation to every\n # service with the specified boolean value. If true, prometheus annotations are added to the pod. If serving.kserve.io/enable-metric-aggregation is false,\n # the prometheus port is set with the default prometheus scraping port 9090, otherwise the prometheus port annotation is set with the metric aggregation port.\n \"enablePrometheusScraping\" : \"false\"\n }" agent: |- { - "image" : "kserve/agent:v0.11.2", + "image" : "kserve/agent:v0.12.1", "memoryRequest": "100Mi", "memoryLimit": "1Gi", "cpuRequest": "100m", @@ -21030,11 +21994,13 @@ data: } batcher: |- { - "image" : "kserve/agent:v0.11.2", + "image" : "kserve/agent:v0.12.1", "memoryRequest": "1Gi", "memoryLimit": "1Gi", "cpuRequest": "1", - "cpuLimit": "1" + "cpuLimit": "1", + "maxBatchSize": "32", + "maxLatency": "5000" } credentials: |- { @@ -21051,6 +22017,7 @@ data: "s3Region": "", "s3VerifySSL": "", "s3UseVirtualBucket": "", + "s3UseAccelerate": "", "s3UseAnonymousCredential": "", "s3CABundle": "" } @@ -21080,11 +22047,12 @@ data: "ingressClassName": "istio", "domainTemplate": "{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}", "urlScheme": "http", - "disableIstioVirtualHost": false + "disableIstioVirtualHost": false, + "disableIngressCreation": false } logger: |- { - "image" : "kserve/agent:v0.11.2", + "image" : "kserve/agent:v0.12.1", "memoryRequest": "100Mi", "memoryLimit": "1Gi", "cpuRequest": "100m", @@ -21098,7 +22066,7 @@ data: } router: |- { - "image" : "kserve/router:v0.11.2", + "image" : "kserve/router:v0.12.1", "memoryRequest": "100Mi", "memoryLimit": "1Gi", "cpuRequest": "100m", @@ -21106,12 +22074,17 @@ data: } storageInitializer: |- { - "image" : "kserve/storage-initializer:v0.11.2", + "image" : "kserve/storage-initializer:v0.12.1", "memoryRequest": "100Mi", "memoryLimit": "1Gi", "cpuRequest": "100m", "cpuLimit": "1", - "enableDirectPvcVolumeMount": false + "caBundleConfigMapName": "", + "caBundleVolumeMountPath": "/etc/ssl/custom-certs", + "enableDirectPvcVolumeMount": true, + "enableModelcar": false, + "cpuModelcar": "10m", + "memoryModelcar": "15Mi" } kind: ConfigMap metadata: @@ -21234,13 +22207,28 @@ spec: fieldPath: metadata.namespace - name: SECRET_NAME value: kserve-webhook-server-cert - image: kserve/kserve-controller:v0.11.2 + image: kserve/kserve-controller:v0.12.1 imagePullPolicy: Always + livenessProbe: + failureThreshold: 5 + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 10 + timeoutSeconds: 5 name: manager ports: - containerPort: 9443 name: webhook-server protocol: TCP + readinessProbe: + failureThreshold: 10 + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 5 resources: limits: cpu: 100m @@ -21353,6 +22341,7 @@ webhooks: matchExpressions: - key: serving.kserve.io/inferenceservice operator: Exists + reinvocationPolicy: IfNeeded rules: - apiGroups: - "" @@ -21360,7 +22349,6 @@ webhooks: - v1 operations: - CREATE - - UPDATE resources: - pods sideEffects: None diff --git a/contrib/kserve/kserve/kustomization.yaml b/contrib/kserve/kserve/kustomization.yaml index 381880bef6..bf8b967d04 100644 --- a/contrib/kserve/kserve/kustomization.yaml +++ b/contrib/kserve/kserve/kustomization.yaml @@ -3,4 +3,4 @@ kind: Kustomization resources: # Install Kserve in kubeflow namespace - kserve_kubeflow.yaml - - kserve-runtimes.yaml + - kserve-cluster-resources.yaml diff --git a/hack/sync-kserve-kserve-manifests.sh b/hack/sync-kserve-kserve-manifests.sh index 0278335351..9a4e82b797 100755 --- a/hack/sync-kserve-kserve-manifests.sh +++ b/hack/sync-kserve-kserve-manifests.sh @@ -9,79 +9,72 @@ # # Afterwards the developers can submit the PR to the kubeflow/manifests # repo, based on that local branch - -# Run this script form the root of kubeflow/manifests repository -# ./hack/sync-kserve-manifests.sh +# It must be executed directly from its directory # strict mode http://redsymbol.net/articles/unofficial-bash-strict-mode/ -set -euo pipefail +set -euxo pipefail IFS=$'\n\t' -CLONE_DIR=${CLONE_DIR:=/tmp} -KSERVE_DIR="${CLONE_DIR?}/kserve" -BRANCH=${BRANCH:=sync-kserve-manifests-${KSERVE_COMMIT?}} -# *_VERSION vars are required only if COMMIT does not match a tag -KSERVE_VERSION=${KSERVE_VERSION:=${KSERVE_COMMIT?}} +KSERVE_VERSION="v0.12.1" +COMMIT="0.12.1" # You can use tags as well +SRC_DIR=${SRC_DIR:=/tmp/kserve} +BRANCH=${BRANCH:=sync-kserve-manifests-${COMMIT?}} SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -MANIFESTS_DIR=$(dirname "${SCRIPT_DIR}") +MANIFESTS_DIR=$(dirname $SCRIPT_DIR) echo "Creating branch: ${BRANCH}" -# DEV: Comment out this if you are testing locally if [ -n "$(git status --porcelain)" ]; then - # Uncommitted changes - echo "WARNING: You have uncommitted changes, exiting..." - exit 1 + echo "WARNING: You have uncommitted changes" fi - -if [ "$(git branch --list "${BRANCH}")" ] +if [ `git branch --list $BRANCH` ] then - echo "WARNING: Branch ${BRANCH} already exists. Exiting..." - exit 1 + echo "WARNING: Branch $BRANCH already exists." fi -# DEV: Comment out this checkout command if you are testing locally -git checkout -b "${BRANCH}" +# Create the branch in the manifests repository +if ! git show-ref --verify --quiet refs/heads/$BRANCH; then + git checkout -b $BRANCH +else + echo "Branch $BRANCH already exists." +fi +echo "Checking out in $SRC_DIR to $COMMIT..." -echo "Checking out in $KSERVE_DIR to $KSERVE_COMMIT..." -pushd $CLONE_DIR - if [ ! -d "$KSERVE_DIR" ] - then - git clone https://github.com/kserve/kserve.git && cd kserve - git checkout "${KSERVE_COMMIT}" - else - echo "WARNING: ${KSERVE_DIR} directory already exists. Exiting..." - exit 1 - fi -popd +# Checkout the kserve repository +mkdir -p $SRC_DIR +cd $SRC_DIR/kserve +if [ ! -d "kserve/.git" ]; then + git clone https://github.com/kserve/kserve.git +fi +if ! git rev-parse --verify --quiet $COMMIT; then + git checkout -b $COMMIT +else + git checkout $COMMIT +fi -echo "Copying kserve manifests..." -SRC_MANIFEST_PATH="$KSERVE_DIR"/install/"$KSERVE_VERSION" -if [ ! -d "$SRC_MANIFEST_PATH" ] -then - echo "Directory $SRC_MANIFEST_PATH DOES NOT exists." - exit 1 +if [ -n "$(git status --porcelain)" ]; then + echo "WARNING: You have uncommitted changes" fi +echo "Copying kserve manifests..." DST_DIR=$MANIFESTS_DIR/contrib/kserve/kserve -pushd "$DST_DIR" - rm -rf kserve* -popd -cp "$SRC_MANIFEST_PATH"/* "$DST_DIR" -r +if [ -d "$DST_DIR" ]; then + rm -rf "$DST_DIR"/kserve* +fi +cp $SRC_DIR/kserve/install/"$KSERVE_VERSION"/* $DST_DIR -r -echo "Successfully copied kserve manifests." +echo "Successfully copied all manifests." echo "Updating README..." SRC_TXT="\[.*\](https://github.com/kserve/kserve/tree/.*)" -DST_TXT="\[$KSERVE_COMMIT\](https://github.com/kserve/kserve/tree/$KSERVE_COMMIT/install/$KSERVE_VERSION)" +DST_TXT="\[$COMMIT\](https://github.com/kserve/kserve/tree/$COMMIT/install/$KSERVE_VERSION)" sed -i "s|$SRC_TXT|$DST_TXT|g" "${MANIFESTS_DIR}"/README.md -# DEV: Comment out these commands if you are testing locally echo "Committing the changes..." cd "$MANIFESTS_DIR" git add contrib/kserve git add README.md -git commit -s -m "Update kserve manifests from ${KSERVE_VERSION}" -m "Update kserve/kserve manifests from ${KSERVE_COMMIT}" +git commit -s -m "Update kserve manifests from ${KSERVE_VERSION}" -m "Update kserve/kserve manifests from ${COMMIT}"