Skip to content

Commit

Permalink
Merge branch 'main' into dependabot/go_modules/k8s.io/api-0.30.3
Browse files Browse the repository at this point in the history
  • Loading branch information
KPostOffice authored Aug 21, 2024
2 parents 246e720 + f31a77d commit f65ea5d
Show file tree
Hide file tree
Showing 14 changed files with 197 additions and 63 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/e2e_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ jobs:

- name: Upload logs
uses: actions/upload-artifact@v4
if: always() && steps.deploy.outcome == 'success'
if: always() && steps.kind-install.outcome == 'success'
with:
name: logs
retention-days: 10
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ VERSION ?= v0.0.0-dev
BUNDLE_VERSION ?= $(VERSION:v%=%)

# APPWRAPPER_VERSION defines the default version of the AppWrapper controller
APPWRAPPER_VERSION ?= v0.21.1
APPWRAPPER_VERSION ?= v0.23.0
APPWRAPPER_REPO ?= github.com/project-codeflare/appwrapper
APPWRAPPER_CRD ?= ${APPWRAPPER_REPO}/config/crd?ref=${APPWRAPPER_VERSION}

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ CodeFlare Stack Compatibility Matrix

| Component | Version |
|------------------------------|---------------------------------------------------------------------------------------------------|
| CodeFlare Operator | [v1.5.0](https://github.com/project-codeflare/codeflare-operator/releases/tag/v1.5.0) |
| CodeFlare-SDK | [v0.17.0](https://github.com/project-codeflare/codeflare-sdk/releases/tag/v0.17.0) |
| AppWrapper | [v0.20.2](https://github.com/project-codeflare/appwrapper/releases/tag/v0.20.2) |
| CodeFlare Operator | [v1.7.0](https://github.com/project-codeflare/codeflare-operator/releases/tag/v1.7.0) |
| CodeFlare-SDK | [v0.19.1](https://github.com/project-codeflare/codeflare-sdk/releases/tag/v0.19.1) |
| AppWrapper | [v0.23.0](https://github.com/project-codeflare/appwrapper/releases/tag/v0.23.0) |
| KubeRay | [v1.1.0](https://github.com/opendatahub-io/kuberay/releases/tag/v1.1.0) |
| Kueue | [v0.7.0](https://github.com/opendatahub-io/kueue/releases/tag/v0.7.0) |
<!-- Compatibility Matrix end -->
Expand Down
2 changes: 1 addition & 1 deletion config/crd/appwrapper/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- github.com/project-codeflare/appwrapper/config/crd?ref=v0.21.1
- github.com/project-codeflare/appwrapper/config/crd?ref=v0.23.0
2 changes: 1 addition & 1 deletion config/manager/params.env
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
codeflare-operator-controller-image=quay.io/opendatahub/codeflare-operator:v1.5.0
codeflare-operator-controller-image=quay.io/opendatahub/codeflare-operator:v1.7.0
namespace=opendatahub
8 changes: 8 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ rules:
- patch
- update
- watch
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
Expand Down
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ module github.com/project-codeflare/codeflare-operator
go 1.22.2

require (
github.com/go-logr/logr v1.4.2
github.com/onsi/ginkgo/v2 v2.19.0
github.com/onsi/gomega v1.33.1
github.com/open-policy-agent/cert-controller v0.10.1
github.com/opendatahub-io/opendatahub-operator/v2 v2.10.0
github.com/openshift/api v0.0.0-20230823114715-5fdd7511b790
github.com/openshift/client-go v0.0.0-20221019143426-16aed247da5c
github.com/project-codeflare/appwrapper v0.21.1
github.com/project-codeflare/appwrapper v0.23.0
github.com/project-codeflare/codeflare-common v0.0.0-20240628111341-56c962a09b7e
github.com/ray-project/kuberay/ray-operator v1.1.1
go.uber.org/zap v1.27.0
Expand All @@ -35,7 +36,7 @@ replace go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp => go.open
replace github.com/jackc/pgx/v4 => github.com/jackc/pgx/v5 v5.5.4

// These replace directives support the backlevel go version required by ODH build
replace github.com/project-codeflare/appwrapper v0.21.1 => github.com/project-codeflare/appwrapper v0.21.2-0.20240712173553-5b007c947b37
replace github.com/project-codeflare/appwrapper v0.23.0 => github.com/project-codeflare/appwrapper v0.23.1-0.20240731154950-ad486fb7e7ee

replace sigs.k8s.io/kueue v0.7.1 => github.com/opendatahub-io/kueue v0.7.0-odh-test

Expand All @@ -51,7 +52,6 @@ require (
github.com/evanphx/json-patch v5.6.0+incompatible // indirect
github.com/evanphx/json-patch/v5 v5.8.0 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/zapr v1.3.0 // indirect
github.com/go-openapi/jsonpointer v0.20.0 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,8 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/project-codeflare/appwrapper v0.21.2-0.20240712173553-5b007c947b37 h1:x4qdbN98B9gtaU7pseJWABZzwoDawXLC5QMlx0idXxc=
github.com/project-codeflare/appwrapper v0.21.2-0.20240712173553-5b007c947b37/go.mod h1:gKjO+iRtMIdBvIBYmN+VciL9kzWmkfwgk/+24wCLhSM=
github.com/project-codeflare/appwrapper v0.23.1-0.20240731154950-ad486fb7e7ee h1:kl/nuApg9116e1jSmDdUSaNmxG2Q/OpubEqI+YVfizk=
github.com/project-codeflare/appwrapper v0.23.1-0.20240731154950-ad486fb7e7ee/go.mod h1:6PNty9c0IOonWrJ80j6VL4uTijixtJ3OXSdFJBCDZZE=
github.com/project-codeflare/codeflare-common v0.0.0-20240628111341-56c962a09b7e h1:juFd1dQyioeMxbVE6F0YD25ozm/jiqJE+MpDhu8p22k=
github.com/project-codeflare/codeflare-common v0.0.0-20240628111341-56c962a09b7e/go.mod h1:unKTw+XoMANTES3WieG016im7rxZ7IR2/ph++L5Vp1Y=
github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk=
Expand Down
2 changes: 1 addition & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ func main() {
zapOptions.BindFlags(flag.CommandLine)
flag.Parse()

ctrl.SetLogger(zap.New(zap.UseFlagOptions(&zapOptions)))
ctrl.SetLogger(controllers.FilteredLogger(zap.New(zap.UseFlagOptions(&zapOptions))))
klog.SetLogger(ctrl.Log)

setupLog.Info("Build info",
Expand Down
3 changes: 3 additions & 0 deletions pkg/controllers/appwrapper_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,6 @@ package controllers
// +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=workloads/finalizers,verbs=update
// +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=resourceflavors,verbs=get;list;watch
// +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=workloadpriorityclasses,verbs=get;list;watch

// permission to watch nodes for Autopilot integration
//+kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch
83 changes: 58 additions & 25 deletions pkg/controllers/raycluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,10 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return ctrl.Result{RequeueAfter: requeueTime}, err
}

if err := r.deleteHeadPodIfMissingImagePullSecrets(ctx, cluster); err != nil {
return ctrl.Result{RequeueAfter: requeueTime}, err
}

_, err = r.kubeClient.RbacV1().ClusterRoleBindings().Apply(ctx, desiredOAuthClusterRoleBinding(cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
if err != nil {
logger.Error(err, "Failed to update OAuth ClusterRoleBinding")
Expand Down Expand Up @@ -333,9 +337,7 @@ func desiredServiceAccount(cluster *rayv1.RayCluster) *corev1ac.ServiceAccountAp
`{"kind":"OAuthRedirectReference","apiVersion":"v1",` +
`"reference":{"kind":"Route","name":"` + dashboardNameFromCluster(cluster) + `"}}`,
}).
WithOwnerReferences(
metav1ac.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion).WithController(true),
)
WithOwnerReferences(ownerRefForRayCluster(cluster))
}

func dashboardNameFromCluster(cluster *rayv1.RayCluster) string {
Expand All @@ -357,9 +359,7 @@ func desiredClusterRoute(cluster *rayv1.RayCluster) *routev1ac.RouteApplyConfigu
WithTermination(routev1.TLSTerminationReencrypt),
),
).
WithOwnerReferences(
metav1ac.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion).WithController(true),
)
WithOwnerReferences(ownerRefForRayCluster(cluster))
}

func oauthServiceNameFromCluster(cluster *rayv1.RayCluster) string {
Expand All @@ -385,9 +385,7 @@ func desiredOAuthService(cluster *rayv1.RayCluster) *corev1ac.ServiceApplyConfig
).
WithSelector(map[string]string{"ray.io/cluster": cluster.Name, "ray.io/node-type": "head"}),
).
WithOwnerReferences(
metav1ac.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion).WithController(true),
)
WithOwnerReferences(ownerRefForRayCluster(cluster))
}

func oauthSecretNameFromCluster(cluster *rayv1.RayCluster) string {
Expand All @@ -404,9 +402,7 @@ func desiredOAuthSecret(cluster *rayv1.RayCluster, cookieSalt string) *corev1ac.
return corev1ac.Secret(oauthSecretNameFromCluster(cluster), cluster.Namespace).
WithLabels(map[string]string{RayClusterNameLabel: cluster.Name}).
WithStringData(map[string]string{"cookie_secret": cookieSecret}).
WithOwnerReferences(
metav1ac.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion).WithController(true),
)
WithOwnerReferences(ownerRefForRayCluster(cluster))
}

func caSecretNameFromCluster(cluster *rayv1.RayCluster) string {
Expand All @@ -420,12 +416,7 @@ func desiredCASecret(cluster *rayv1.RayCluster, key, cert []byte) *corev1ac.Secr
CAPrivateKeyKey: key,
CACertKey: cert,
}).
WithOwnerReferences(metav1ac.OwnerReference().
WithUID(cluster.UID).
WithName(cluster.Name).
WithKind(cluster.Kind).
WithAPIVersion(cluster.APIVersion).
WithController(true))
WithOwnerReferences(ownerRefForRayCluster(cluster))
}

func generateCACertificate() ([]byte, []byte, error) {
Expand Down Expand Up @@ -470,6 +461,7 @@ func generateCACertificate() ([]byte, []byte, error) {

return privateKeyPem, certPem, nil
}

func desiredWorkersNetworkPolicy(cluster *rayv1.RayCluster) *networkingv1ac.NetworkPolicyApplyConfiguration {
return networkingv1ac.NetworkPolicy(cluster.Name+"-workers", cluster.Namespace).
WithLabels(map[string]string{RayClusterNameLabel: cluster.Name}).
Expand All @@ -482,10 +474,9 @@ func desiredWorkersNetworkPolicy(cluster *rayv1.RayCluster) *networkingv1ac.Netw
),
),
).
WithOwnerReferences(
metav1ac.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion).WithController(true),
)
WithOwnerReferences(ownerRefForRayCluster(cluster))
}

func desiredHeadNetworkPolicy(cluster *rayv1.RayCluster, cfg *config.KubeRayConfiguration, kubeRayNamespaces []string) *networkingv1ac.NetworkPolicyApplyConfiguration {
allSecuredPorts := []*networkingv1ac.NetworkPolicyPortApplyConfiguration{
networkingv1ac.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(8443)),
Expand Down Expand Up @@ -539,9 +530,50 @@ func desiredHeadNetworkPolicy(cluster *rayv1.RayCluster, cfg *config.KubeRayConf
),
),
).
WithOwnerReferences(
metav1ac.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion).WithController(true),
)
WithOwnerReferences(ownerRefForRayCluster(cluster))
}

func (r *RayClusterReconciler) deleteHeadPodIfMissingImagePullSecrets(ctx context.Context, cluster *rayv1.RayCluster) error {
serviceAccount, err := r.kubeClient.CoreV1().ServiceAccounts(cluster.Namespace).Get(ctx, oauthServiceAccountNameFromCluster(cluster), metav1.GetOptions{})
if err != nil {
return fmt.Errorf("failed to get OAuth ServiceAccount: %w", err)
}

headPod, err := getHeadPod(ctx, r, cluster)
if err != nil {
return fmt.Errorf("failed to get head pod: %w", err)
}

if headPod == nil {
return nil
}

missingSecrets := map[string]bool{}
for _, secret := range serviceAccount.ImagePullSecrets {
missingSecrets[secret.Name] = true
}
for _, secret := range headPod.Spec.ImagePullSecrets {
delete(missingSecrets, secret.Name)
}
if len(missingSecrets) > 0 {
if err := r.kubeClient.CoreV1().Pods(headPod.Namespace).Delete(ctx, headPod.Name, metav1.DeleteOptions{}); err != nil {
return fmt.Errorf("failed to delete head pod: %w", err)
}
}
return nil
}

func getHeadPod(ctx context.Context, r *RayClusterReconciler, cluster *rayv1.RayCluster) (*corev1.Pod, error) {
podList, err := r.kubeClient.CoreV1().Pods(cluster.Namespace).List(ctx, metav1.ListOptions{
LabelSelector: fmt.Sprintf("ray.io/node-type=head,ray.io/cluster=%s", cluster.Name),
})
if err != nil {
return nil, err
}
if len(podList.Items) > 0 {
return &podList.Items[0], nil
}
return nil, nil
}

// SetupWithManager sets up the controller with the Manager.
Expand Down Expand Up @@ -577,7 +609,8 @@ func (r *RayClusterReconciler) SetupWithManager(mgr ctrl.Manager) error {
NamespacedName: client.ObjectKey{
Name: name,
Namespace: namespace,
}}}
},
}}
}),
)
if r.IsOpenShift {
Expand Down
50 changes: 48 additions & 2 deletions pkg/controllers/raycluster_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import (

var _ = Describe("RayCluster controller", func() {
Context("RayCluster controller test", func() {
var rayClusterName = "test-raycluster"
rayClusterName := "test-raycluster"
var namespaceName string
BeforeEach(func(ctx SpecContext) {
By("Creating a namespace for running the tests.")
Expand Down Expand Up @@ -145,6 +145,53 @@ var _ = Describe("RayCluster controller", func() {
}).WithTimeout(time.Second * 10).Should(WithTransform(OwnerReferenceName, Equal(foundRayCluster.Name)))
})

It("should delete the head pod if missing image pull secrets", func(ctx SpecContext) {
foundRayCluster, err := rayClient.RayV1().RayClusters(namespaceName).Get(ctx, rayClusterName, metav1.GetOptions{})
Expect(err).To(Not(HaveOccurred()))

Eventually(func() (*corev1.ServiceAccount, error) {
return k8sClient.CoreV1().ServiceAccounts(namespaceName).Get(ctx, oauthServiceAccountNameFromCluster(foundRayCluster), metav1.GetOptions{})
}).WithTimeout(time.Second * 10).Should(WithTransform(OwnerReferenceKind, Equal("RayCluster")))

headPodName := "head-pod"
headPod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: headPodName,
Namespace: namespaceName,
Labels: map[string]string{
"ray.io/node-type": "head",
"ray.io/cluster": foundRayCluster.Name,
},
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "head-container",
Image: "busybox",
},
},
},
}
_, err = k8sClient.CoreV1().Pods(namespaceName).Create(ctx, headPod, metav1.CreateOptions{})
Expect(err).To(Not(HaveOccurred()))

Eventually(func() (*corev1.Pod, error) {
return k8sClient.CoreV1().Pods(namespaceName).Get(ctx, headPodName, metav1.GetOptions{})
}).WithTimeout(time.Second * 10).ShouldNot(BeNil())

sa, err := k8sClient.CoreV1().ServiceAccounts(namespaceName).Get(ctx, oauthServiceAccountNameFromCluster(foundRayCluster), metav1.GetOptions{})
Expect(err).To(Not(HaveOccurred()))

sa.ImagePullSecrets = append(sa.ImagePullSecrets, corev1.LocalObjectReference{Name: "test-image-pull-secret"})
_, err = k8sClient.CoreV1().ServiceAccounts(namespaceName).Update(ctx, sa, metav1.UpdateOptions{})
Expect(err).To(Not(HaveOccurred()))

Eventually(func() error {
_, err := k8sClient.CoreV1().Pods(namespaceName).Get(ctx, headPodName, metav1.GetOptions{})
return err
}).WithTimeout(time.Second * 10).Should(Satisfy(errors.IsNotFound))
})

It("should remove CRB when the RayCluster is deleted", func(ctx SpecContext) {
foundRayCluster, err := rayClient.RayV1().RayClusters(namespaceName).Get(ctx, rayClusterName, metav1.GetOptions{})
Expect(err).To(Not(HaveOccurred()))
Expand All @@ -157,7 +204,6 @@ var _ = Describe("RayCluster controller", func() {
return err
}).WithTimeout(time.Second * 10).Should(Satisfy(errors.IsNotFound))
})

})
})

Expand Down
Loading

0 comments on commit f65ea5d

Please sign in to comment.