Skip to content

Commit

Permalink
OpenShift AI workshop
Browse files Browse the repository at this point in the history
  • Loading branch information
derekelewis committed Aug 7, 2023
1 parent 2dcbe31 commit 401b49b
Show file tree
Hide file tree
Showing 4 changed files with 221 additions and 0 deletions.
127 changes: 127 additions & 0 deletions openshift/node-feature-discovery-instance.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
kind: NodeFeatureDiscovery
apiVersion: nfd.openshift.io/v1
metadata:
name: nfd-instance
namespace: openshift-nfd
spec:
customConfig:
configData: |
# - name: "more.kernel.features"
# matchOn:
# - loadedKMod: ["example_kmod3"]
# - name: "more.features.by.nodename"
# value: customValue
# matchOn:
# - nodename: ["special-.*-node-.*"]
operand:
image: >-
registry.redhat.io/openshift4/ose-node-feature-discovery@sha256:07658ef3df4b264b02396e67af813a52ba416b47ab6e1d2d08025a350ccd2b7b
servicePort: 12000
workerConfig:
configData: |
core:
# labelWhiteList:
# noPublish: false
sleepInterval: 60s
# sources: [all]
# klog:
# addDirHeader: false
# alsologtostderr: false
# logBacktraceAt:
# logtostderr: true
# skipHeaders: false
# stderrthreshold: 2
# v: 0
# vmodule:
## NOTE: the following options are not dynamically run-time
## configurable and require a nfd-worker restart to take effect
## after being changed
# logDir:
# logFile:
# logFileMaxSize: 1800
# skipLogHeaders: false
sources:
# cpu:
# cpuid:
## NOTE: attributeWhitelist has priority over attributeBlacklist
# attributeBlacklist:
# - "BMI1"
# - "BMI2"
# - "CLMUL"
# - "CMOV"
# - "CX16"
# - "ERMS"
# - "F16C"
# - "HTT"
# - "LZCNT"
# - "MMX"
# - "MMXEXT"
# - "NX"
# - "POPCNT"
# - "RDRAND"
# - "RDSEED"
# - "RDTSCP"
# - "SGX"
# - "SSE"
# - "SSE2"
# - "SSE3"
# - "SSE4.1"
# - "SSE4.2"
# - "SSSE3"
# attributeWhitelist:
# kernel:
# kconfigFile: "/path/to/kconfig"
# configOpts:
# - "NO_HZ"
# - "X86"
# - "DMI"
pci:
deviceClassWhitelist:
- "0200"
- "03"
- "12"
deviceLabelFields:
# - "class"
- "vendor"
# - "device"
# - "subsystem_vendor"
# - "subsystem_device"
# usb:
# deviceClassWhitelist:
# - "0e"
# - "ef"
# - "fe"
# - "ff"
# deviceLabelFields:
# - "class"
# - "vendor"
# - "device"
# custom:
# - name: "my.kernel.feature"
# matchOn:
# - loadedKMod: ["example_kmod1", "example_kmod2"]
# - name: "my.pci.feature"
# matchOn:
# - pciId:
# class: ["0200"]
# vendor: ["15b3"]
# device: ["1014", "1017"]
# - pciId :
# vendor: ["8086"]
# device: ["1000", "1100"]
# - name: "my.usb.feature"
# matchOn:
# - usbId:
# class: ["ff"]
# vendor: ["03e7"]
# device: ["2485"]
# - usbId:
# class: ["fe"]
# vendor: ["1a6e"]
# device: ["089a"]
# - name: "my.combined.feature"
# matchOn:
# - pciId:
# vendor: ["15b3"]
# device: ["1014", "1017"]
# loadedKMod : ["vendor_kmod1", "vendor_kmod2"]
23 changes: 23 additions & 0 deletions openshift/node-feature-discovery-operator.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: v1
kind: Namespace
metadata:
name: openshift-nfd
---
apiVersion: operators.coreos.com/v1
kind: OperatorGroup
metadata:
generateName: openshift-nfd-
name: openshift-nfd
namespace: openshift-nfd
---
apiVersion: operators.coreos.com/v1alpha1
kind: Subscription
metadata:
name: nfd
namespace: openshift-nfd
spec:
channel: "stable"
installPlanApproval: Automatic
name: nfd
source: redhat-operators
sourceNamespace: openshift-marketplace
45 changes: 45 additions & 0 deletions openshift/nvidia-cluster-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
apiVersion: nvidia.com/v1
kind: ClusterPolicy
metadata:
name: gpu-cluster-policy
spec:
migManager:
enabled: true
operator:
defaultRuntime: crio
initContainer: {}
runtimeClass: nvidia
deployGFD: true
dcgm:
enabled: true
gfd: {}
dcgmExporter:
config:
name: ''
driver:
licensingConfig:
nlsEnabled: false
configMapName: ''
certConfig:
name: ''
kernelModuleConfig:
name: ''
repoConfig:
configMapName: ''
virtualTopology:
config: ''
enabled: true
use_ocp_driver_toolkit: true
devicePlugin: {}
mig:
strategy: single
validator:
plugin:
env:
- name: WITH_WORKLOAD
value: 'true'
nodeStatusExporter:
enabled: true
daemonsets: {}
toolkit:
enabled: true
26 changes: 26 additions & 0 deletions openshift/nvidia-gpu-operator.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
apiVersion: v1
kind: Namespace
metadata:
name: nvidia-gpu-operator
---
apiVersion: operators.coreos.com/v1
kind: OperatorGroup
metadata:
name: nvidia-gpu-operator-group
namespace: nvidia-gpu-operator
spec:
targetNamespaces:
- nvidia-gpu-operator
---
apiVersion: operators.coreos.com/v1alpha1
kind: Subscription
metadata:
name: gpu-operator-certified
namespace: nvidia-gpu-operator
spec:
channel: "v23.3"
installPlanApproval: Automatic
name: gpu-operator-certified
source: certified-operators
sourceNamespace: openshift-marketplace
startingCSV: "gpu-operator-certified.v23.3.2"

0 comments on commit 401b49b

Please sign in to comment.