From 309e10a9f1007891751e698780fda5bfe3367138 Mon Sep 17 00:00:00 2001 From: edibble21 <85638465+edibble21@users.noreply.github.com> Date: Wed, 22 Jan 2025 12:20:29 -0800 Subject: [PATCH] authorization check with mocked calls --- cmd/controller/main.go | 1 + pkg/aws/sdk.go | 1 + pkg/cloudprovider/cloudprovider.go | 22 +- pkg/cloudprovider/suite_test.go | 6 +- pkg/controllers/controllers.go | 4 +- pkg/controllers/nodeclass/authorization.go | 61 --- pkg/controllers/nodeclass/controller.go | 5 +- .../nodeclass/status/controller.go | 134 ------ .../nodeclass/status/suite_test.go | 90 ---- pkg/controllers/nodeclass/suite_test.go | 1 + pkg/controllers/nodeclass/validation.go | 253 +++++++++- pkg/controllers/nodeclass/validation_test.go | 37 +- pkg/errors/errors.go | 3 +- pkg/fake/ec2api.go | 45 +- pkg/operator/operator.go | 4 +- .../launchtemplate/launchtemplate.go | 4 - pkg/test/environment.go | 1 + pkg/utils/utils.go | 24 + test/suites/nodeclass/nodeclass_test.go | 453 ++++++++++++++++++ test/suites/nodeclass/suite_test.go | 50 ++ 20 files changed, 836 insertions(+), 363 deletions(-) delete mode 100644 pkg/controllers/nodeclass/authorization.go delete mode 100644 pkg/controllers/nodeclass/status/controller.go delete mode 100644 pkg/controllers/nodeclass/status/suite_test.go create mode 100644 test/suites/nodeclass/nodeclass_test.go create mode 100644 test/suites/nodeclass/suite_test.go diff --git a/cmd/controller/main.go b/cmd/controller/main.go index 8dd479f84b41..bde08258cc5d 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -54,6 +54,7 @@ func main() { op.Manager, op.Config, op.Clock, + &op.EC2API, op.GetClient(), op.EventRecorder, op.UnavailableOfferingsCache, diff --git a/pkg/aws/sdk.go b/pkg/aws/sdk.go index e00d3d2cd509..b6449125714b 100644 --- a/pkg/aws/sdk.go +++ b/pkg/aws/sdk.go @@ -37,6 +37,7 @@ type EC2API interface { CreateFleet(context.Context, *ec2.CreateFleetInput, ...func(*ec2.Options)) (*ec2.CreateFleetOutput, error) TerminateInstances(context.Context, *ec2.TerminateInstancesInput, ...func(*ec2.Options)) (*ec2.TerminateInstancesOutput, error) DescribeInstances(context.Context, *ec2.DescribeInstancesInput, ...func(*ec2.Options)) (*ec2.DescribeInstancesOutput, error) + RunInstances(context.Context, *ec2.RunInstancesInput, ...func(*ec2.Options)) (*ec2.RunInstancesOutput, error) CreateTags(context.Context, *ec2.CreateTagsInput, ...func(*ec2.Options)) (*ec2.CreateTagsOutput, error) CreateLaunchTemplate(context.Context, *ec2.CreateLaunchTemplateInput, ...func(*ec2.Options)) (*ec2.CreateLaunchTemplateOutput, error) DeleteLaunchTemplate(context.Context, *ec2.DeleteLaunchTemplateInput, ...func(*ec2.Options)) (*ec2.DeleteLaunchTemplateOutput, error) diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index c884bfb95aa4..fbbcacc7003b 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -104,7 +104,7 @@ func (c *CloudProvider) Create(ctx context.Context, nodeClaim *karpv1.NodeClaim) if len(instanceTypes) == 0 { return nil, cloudprovider.NewInsufficientCapacityError(fmt.Errorf("all requested instance types were unavailable during launch")) } - tags, err := getTags(ctx, nodeClass, nodeClaim) + tags, err := utils.GetTags(ctx, nodeClass, nodeClaim, options.FromContext(ctx).ClusterName) if err != nil { return nil, cloudprovider.NewNodeClassNotReadyError(err) } @@ -241,26 +241,6 @@ func (c *CloudProvider) GetSupportedNodeClasses() []status.Object { return []status.Object{&v1.EC2NodeClass{}} } -func getTags(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim) (map[string]string, error) { - if offendingTag, found := lo.FindKeyBy(nodeClass.Spec.Tags, func(k string, v string) bool { - for _, exp := range v1.RestrictedTagPatterns { - if exp.MatchString(k) { - return true - } - } - return false - }); found { - return nil, fmt.Errorf("%q tag does not pass tag validation requirements", offendingTag) - } - staticTags := map[string]string{ - fmt.Sprintf("kubernetes.io/cluster/%s", options.FromContext(ctx).ClusterName): "owned", - karpv1.NodePoolLabelKey: nodeClaim.Labels[karpv1.NodePoolLabelKey], - v1.EKSClusterNameTagKey: options.FromContext(ctx).ClusterName, - v1.LabelNodeClass: nodeClass.Name, - } - return lo.Assign(nodeClass.Spec.Tags, staticTags), nil -} - func (c *CloudProvider) RepairPolicies() []cloudprovider.RepairPolicy { return []cloudprovider.RepairPolicy{ // Supported Kubelet Node Conditions diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index d08c26dcb0d5..ce27470d8742 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -1156,7 +1156,7 @@ var _ = Describe("CloudProvider", func() { {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int32(100), Tags: []ec2types.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}}, }}) - controller := nodeclass.NewController(env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider) + controller := nodeclass.NewController(env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.EC2API) ExpectApplied(ctx, env.Client, nodePool, nodeClass) ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) pod := coretest.UnschedulablePod(coretest.PodOptions{NodeSelector: map[string]string{corev1.LabelTopologyZone: "test-zone-1a"}}) @@ -1173,7 +1173,7 @@ var _ = Describe("CloudProvider", func() { {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int32(11), Tags: []ec2types.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}}, }}) - controller := nodeclass.NewController(env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider) + controller := nodeclass.NewController(env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.EC2API) nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{ MaxPods: aws.Int32(1), } @@ -1214,7 +1214,7 @@ var _ = Describe("CloudProvider", func() { }}) nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{{Tags: map[string]string{"Name": "test-subnet-1"}}} ExpectApplied(ctx, env.Client, nodePool, nodeClass) - controller := nodeclass.NewController(env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider) + controller := nodeclass.NewController(env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.EC2API) ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) podSubnet1 := coretest.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, podSubnet1) diff --git a/pkg/controllers/controllers.go b/pkg/controllers/controllers.go index c5cbec68a5f2..db319e95ccc2 100644 --- a/pkg/controllers/controllers.go +++ b/pkg/controllers/controllers.go @@ -27,6 +27,7 @@ import ( "github.com/aws/aws-sdk-go-v2/aws" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + sdk "github.com/aws/karpenter-provider-aws/pkg/aws" nodeclass "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclass" nodeclasshash "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclass/hash" controllersinstancetype "github.com/aws/karpenter-provider-aws/pkg/controllers/providers/instancetype" @@ -65,6 +66,7 @@ func NewControllers( mgr manager.Manager, cfg aws.Config, clk clock.Clock, + ec2api sdk.EC2API, kubeClient client.Client, recorder events.Recorder, unavailableOfferings *awscache.UnavailableOfferings, @@ -81,7 +83,7 @@ func NewControllers( instanceTypeProvider *instancetype.DefaultProvider) []controller.Controller { controllers := []controller.Controller{ nodeclasshash.NewController(kubeClient), - nodeclass.NewController(kubeClient, recorder, subnetProvider, securityGroupProvider, amiProvider, instanceProfileProvider, launchTemplateProvider), + nodeclass.NewController(kubeClient, recorder, subnetProvider, securityGroupProvider, amiProvider, instanceProfileProvider, launchTemplateProvider, ec2api), nodeclaimgarbagecollection.NewController(kubeClient, cloudProvider), nodeclaimtagging.NewController(kubeClient, cloudProvider, instanceProvider), controllerspricing.NewController(pricingProvider), diff --git a/pkg/controllers/nodeclass/authorization.go b/pkg/controllers/nodeclass/authorization.go deleted file mode 100644 index b3d1a5960d9f..000000000000 --- a/pkg/controllers/nodeclass/authorization.go +++ /dev/null @@ -1,61 +0,0 @@ -/* -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package nodeclass - -import ( - "context" - "fmt" - - "sigs.k8s.io/controller-runtime/pkg/reconcile" - - corecloudprovider "sigs.k8s.io/karpenter/pkg/cloudprovider" - coretest "sigs.k8s.io/karpenter/pkg/test" - - "github.com/samber/lo" - - v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" - "github.com/aws/karpenter-provider-aws/pkg/cloudprovider" - "github.com/aws/karpenter-provider-aws/pkg/providers/instance" -) - -type Authorization struct { - cloudProvider cloudprovider.CloudProvider - instanceProvider instance.Provider -} - -func (a Authorization) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) (reconcile.Result, error) { - //nolint:ineffassign, staticcheck - ctx = context.WithValue(ctx, "DryRun", lo.ToPtr(true)) - if nodeClass.StatusConditions().Get(v1.ConditionTypeSubnetsReady).IsFalse() || nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsReady).IsFalse() { - return reconcile.Result{}, nil - } - nodeClaim := coretest.NodeClaim() - nodeClaim.Spec.NodeClassRef.Name = nodeClass.Name - _, err := a.cloudProvider.Create(ctx, nodeClaim) - if err == nil { - err = a.instanceProvider.Delete(ctx, "mock-id") - if err == nil { - err = a.instanceProvider.CreateTags(ctx, "mock-id", map[string]string{"mock-tag": "mock-tag-value"}) - } - } - //nolint:ineffassign, staticcheck - ctx = context.WithValue(ctx, "DryRun", lo.ToPtr(false)) - if corecloudprovider.IsNodeClassNotReadyError(err) { - nodeClass.StatusConditions().SetFalse(v1.ConditionTypeAuthorization, "NodeClassNotReady", "Unauthorized Operation") - return reconcile.Result{}, fmt.Errorf("unauthorized operation %w", err) - } - nodeClass.StatusConditions().SetTrue(v1.ConditionTypeAuthorization) - return reconcile.Result{}, nil -} diff --git a/pkg/controllers/nodeclass/controller.go b/pkg/controllers/nodeclass/controller.go index 9a05ea8cbdbb..9be201dd2d1d 100644 --- a/pkg/controllers/nodeclass/controller.go +++ b/pkg/controllers/nodeclass/controller.go @@ -44,6 +44,7 @@ import ( "sigs.k8s.io/karpenter/pkg/events" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + sdk "github.com/aws/karpenter-provider-aws/pkg/aws" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" "github.com/aws/karpenter-provider-aws/pkg/providers/instanceprofile" "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" @@ -69,7 +70,7 @@ type Controller struct { } func NewController(kubeClient client.Client, recorder events.Recorder, subnetProvider subnet.Provider, securityGroupProvider securitygroup.Provider, - amiProvider amifamily.Provider, instanceProfileProvider instanceprofile.Provider, launchTemplateProvider launchtemplate.Provider) *Controller { + amiProvider amifamily.Provider, instanceProfileProvider instanceprofile.Provider, launchTemplateProvider launchtemplate.Provider, ec2api sdk.EC2API) *Controller { return &Controller{ kubeClient: kubeClient, @@ -79,7 +80,7 @@ func NewController(kubeClient client.Client, recorder events.Recorder, subnetPro subnet: &Subnet{subnetProvider: subnetProvider}, securityGroup: &SecurityGroup{securityGroupProvider: securityGroupProvider}, instanceProfile: &InstanceProfile{instanceProfileProvider: instanceProfileProvider}, - validation: &Validation{}, + validation: &Validation{ec2api: ec2api, amiProvider: amiProvider}, readiness: &Readiness{launchTemplateProvider: launchTemplateProvider}, } } diff --git a/pkg/controllers/nodeclass/status/controller.go b/pkg/controllers/nodeclass/status/controller.go deleted file mode 100644 index 8e419056a141..000000000000 --- a/pkg/controllers/nodeclass/status/controller.go +++ /dev/null @@ -1,134 +0,0 @@ -/* -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package status - -import ( - "context" - - "go.uber.org/multierr" - "k8s.io/apimachinery/pkg/api/equality" - "k8s.io/apimachinery/pkg/api/errors" - controllerruntime "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" - "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/controller-runtime/pkg/reconcile" - "sigs.k8s.io/karpenter/pkg/operator/injection" - - "sigs.k8s.io/karpenter/pkg/utils/result" - - "github.com/awslabs/operatorpkg/reasonable" - - v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" - "github.com/aws/karpenter-provider-aws/pkg/cloudprovider" - "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" - "github.com/aws/karpenter-provider-aws/pkg/providers/instance" - "github.com/aws/karpenter-provider-aws/pkg/providers/instanceprofile" - "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" - "github.com/aws/karpenter-provider-aws/pkg/providers/securitygroup" - "github.com/aws/karpenter-provider-aws/pkg/providers/subnet" -) - -type nodeClassStatusReconciler interface { - Reconcile(context.Context, *v1.EC2NodeClass) (reconcile.Result, error) -} - -type Controller struct { - kubeClient client.Client - - ami *AMI - instanceprofile *InstanceProfile - subnet *Subnet - securitygroup *SecurityGroup - validation *Validation - readiness *Readiness //TODO : Remove this when we have sub status conditions -} - -func NewController(kubeClient client.Client, subnetProvider subnet.Provider, securityGroupProvider securitygroup.Provider, - amiProvider amifamily.Provider, instanceProfileProvider instanceprofile.Provider, launchTemplateProvider launchtemplate.Provider, cloudProvider cloudprovider.CloudProvider, instanceProvider instance.Provider) *Controller { - return &Controller{ - kubeClient: kubeClient, - - ami: &AMI{amiProvider: amiProvider}, - subnet: &Subnet{subnetProvider: subnetProvider}, - securitygroup: &SecurityGroup{securityGroupProvider: securityGroupProvider}, - instanceprofile: &InstanceProfile{instanceProfileProvider: instanceProfileProvider}, - validation: &Validation{cloudProvider: cloudProvider, instanceProvider: instanceProvider}, - readiness: &Readiness{launchTemplateProvider: launchTemplateProvider}, - } -} - -func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) (reconcile.Result, error) { - ctx = injection.WithControllerName(ctx, "nodeclass.status") - - if !controllerutil.ContainsFinalizer(nodeClass, v1.TerminationFinalizer) { - stored := nodeClass.DeepCopy() - controllerutil.AddFinalizer(nodeClass, v1.TerminationFinalizer) - - // We use client.MergeFromWithOptimisticLock because patching a list with a JSON merge patch - // can cause races due to the fact that it fully replaces the list on a change - // Here, we are updating the finalizer list - if err := c.kubeClient.Patch(ctx, nodeClass, client.MergeFromWithOptions(stored, client.MergeFromWithOptimisticLock{})); err != nil { - if errors.IsConflict(err) { - return reconcile.Result{Requeue: true}, nil - } - return reconcile.Result{}, err - } - } - stored := nodeClass.DeepCopy() - - var results []reconcile.Result - var errs error - for _, reconciler := range []nodeClassStatusReconciler{ - c.ami, - c.subnet, - c.securitygroup, - c.instanceprofile, - c.validation, - c.readiness, - } { - res, err := reconciler.Reconcile(ctx, nodeClass) - errs = multierr.Append(errs, err) - results = append(results, res) - } - - if !equality.Semantic.DeepEqual(stored, nodeClass) { - // We use client.MergeFromWithOptimisticLock because patching a list with a JSON merge patch - // can cause races due to the fact that it fully replaces the list on a change - // Here, we are updating the status condition list - if err := c.kubeClient.Status().Patch(ctx, nodeClass, client.MergeFromWithOptions(stored, client.MergeFromWithOptimisticLock{})); err != nil { - if errors.IsConflict(err) { - return reconcile.Result{Requeue: true}, nil - } - errs = multierr.Append(errs, client.IgnoreNotFound(err)) - } - } - if errs != nil { - return reconcile.Result{}, errs - } - return result.Min(results...), nil -} - -func (c *Controller) Register(_ context.Context, m manager.Manager) error { - return controllerruntime.NewControllerManagedBy(m). - Named("nodeclass.status"). - For(&v1.EC2NodeClass{}). - WithOptions(controller.Options{ - RateLimiter: reasonable.RateLimiter(), - MaxConcurrentReconciles: 10, - }). - Complete(reconcile.AsReconciler(m.GetClient(), c)) -} diff --git a/pkg/controllers/nodeclass/status/suite_test.go b/pkg/controllers/nodeclass/status/suite_test.go deleted file mode 100644 index ccf8d05dbbb7..000000000000 --- a/pkg/controllers/nodeclass/status/suite_test.go +++ /dev/null @@ -1,90 +0,0 @@ -/* -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package status_test - -import ( - "context" - "testing" - - "sigs.k8s.io/karpenter/pkg/test/v1alpha1" - - coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" - coretest "sigs.k8s.io/karpenter/pkg/test" - - "github.com/samber/lo" - - "github.com/aws/karpenter-provider-aws/pkg/apis" - v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" - "github.com/aws/karpenter-provider-aws/pkg/cloudprovider" - "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclass/status" - "github.com/aws/karpenter-provider-aws/pkg/operator/options" - "github.com/aws/karpenter-provider-aws/pkg/test" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - . "sigs.k8s.io/karpenter/pkg/test/expectations" - . "sigs.k8s.io/karpenter/pkg/utils/testing" -) - -var ctx context.Context -var env *coretest.Environment -var awsEnv *test.Environment -var nodeClass *v1.EC2NodeClass -var statusController *status.Controller -var cloudProvider cloudprovider.CloudProvider - -func TestAPIs(t *testing.T) { - ctx = TestContextWithLogger(t) - RegisterFailHandler(Fail) - RunSpecs(t, "EC2NodeClass") -} - -var _ = BeforeSuite(func() { - env = coretest.NewEnvironment(coretest.WithCRDs(test.RemoveNodeClassTagValidation(apis.CRDs)...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(coretest.NodeClaimNodeClassRefFieldIndexer(ctx))) - ctx = coreoptions.ToContext(ctx, coretest.Options()) - ctx = options.ToContext(ctx, test.Options()) - awsEnv = test.NewEnvironment(ctx, env) -}) - -var _ = AfterSuite(func() { - Expect(env.Stop()).To(Succeed(), "Failed to stop environment") -}) - -var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) - nodeClass = test.EC2NodeClass() - awsEnv.Reset() - - Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypes(ctx)).To(Succeed()) - Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypeOfferings(ctx)).To(Succeed()) - - cloudProvider = lo.FromPtr(cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, coretest.NewEventRecorder(), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider)) - - statusController = status.NewController( - env.Client, - awsEnv.SubnetProvider, - awsEnv.SecurityGroupProvider, - awsEnv.AMIProvider, - awsEnv.InstanceProfileProvider, - awsEnv.LaunchTemplateProvider, - cloudProvider, - awsEnv.InstanceProvider, - ) -}) - -var _ = AfterEach(func() { - ExpectCleanedUp(ctx, env.Client) -}) diff --git a/pkg/controllers/nodeclass/suite_test.go b/pkg/controllers/nodeclass/suite_test.go index 13e96713ced4..2a7e8813db9b 100644 --- a/pkg/controllers/nodeclass/suite_test.go +++ b/pkg/controllers/nodeclass/suite_test.go @@ -73,6 +73,7 @@ var _ = BeforeSuite(func() { awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, + awsEnv.EC2API, ) }) diff --git a/pkg/controllers/nodeclass/validation.go b/pkg/controllers/nodeclass/validation.go index d0bf6dd6b504..383c71edf4c7 100644 --- a/pkg/controllers/nodeclass/validation.go +++ b/pkg/controllers/nodeclass/validation.go @@ -16,26 +16,38 @@ package nodeclass import ( "context" + "encoding/base64" "errors" "fmt" + "time" "github.com/samber/lo" "sigs.k8s.io/controller-runtime/pkg/reconcile" - corecloudprovider "sigs.k8s.io/karpenter/pkg/cloudprovider" - coretest "sigs.k8s.io/karpenter/pkg/test" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/ec2" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" - "github.com/aws/karpenter-provider-aws/pkg/cloudprovider" - "github.com/aws/karpenter-provider-aws/pkg/providers/instance" + sdk "github.com/aws/karpenter-provider-aws/pkg/aws" + awserrors "github.com/aws/karpenter-provider-aws/pkg/errors" + "github.com/aws/karpenter-provider-aws/pkg/operator/options" + "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" + "github.com/aws/karpenter-provider-aws/pkg/utils" ) type Validation struct { - cloudProvider cloudprovider.CloudProvider - instanceProvider instance.Provider + ec2api sdk.EC2API + + amiProvider amifamily.Provider } +//nolint:gocyclo func (n Validation) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) (reconcile.Result, error) { + //nolint:staticcheck + ctx = context.WithValue(ctx, "reconcile", true) //Tag Validation if offendingTag, found := lo.FindKeyBy(nodeClass.Spec.Tags, func(k string, v string) bool { for _, exp := range v1.RestrictedTagPatterns { @@ -50,31 +62,234 @@ func (n Validation) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) ( return reconcile.Result{}, reconcile.TerminalError(fmt.Errorf("%q tag does not pass tag validation requirements", offendingTag)) } //Auth Validation - //validates createfleet, describelaunchtemplate, createtags, and terminateinstances - //nolint:ineffassign, staticcheck - ctx = context.WithValue(ctx, "DryRun", lo.ToPtr(true)) + amis, err := n.amiProvider.List(ctx, nodeClass) + if err != nil { + return reconcile.Result{}, err + } if nodeClass.StatusConditions().Get(v1.ConditionTypeSubnetsReady).IsFalse() || nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsReady).IsFalse() { return reconcile.Result{}, nil } - nodeClaim := coretest.NodeClaim() - nodeClaim.Spec.NodeClassRef.Name = nodeClass.Name + nodeClaim := &karpv1.NodeClaim{ + Spec: karpv1.NodeClaimSpec{ + NodeClassRef: &karpv1.NodeClassReference{ + Name: nodeClass.ObjectMeta.Name, + }, + }, + Status: karpv1.NodeClaimStatus{ + ImageID: amis[0].AmiID, + }, + } + tags, err := utils.GetTags(ctx, nodeClass, nodeClaim, options.FromContext(ctx).ClusterName) + if err != nil { + return reconcile.Result{}, err + } var errs []error - //create checks createfleet, and describelaunchtemplate - if _, err := n.cloudProvider.Create(ctx, nodeClaim); err != nil { - errs = append(errs, fmt.Errorf("create: %w", err)) + + createFleetInput := &ec2.CreateFleetInput{ + DryRun: lo.ToPtr(true), + Type: ec2types.FleetTypeInstant, + Context: nodeClass.Spec.Context, + LaunchTemplateConfigs: []ec2types.FleetLaunchTemplateConfigRequest{ + { + LaunchTemplateSpecification: &ec2types.FleetLaunchTemplateSpecificationRequest{ + LaunchTemplateId: aws.String("lt-1234567890abcdef0"), + Version: aws.String("1"), + }, + Overrides: []ec2types.FleetLaunchTemplateOverridesRequest{ + { + InstanceType: ec2types.InstanceTypeT3Micro, + SubnetId: aws.String("subnet-1234567890abcdef0"), + }, + { + InstanceType: ec2types.InstanceTypeT3Small, + SubnetId: aws.String("subnet-1234567890abcdef1"), + }, + }, + }, + }, + TargetCapacitySpecification: &ec2types.TargetCapacitySpecificationRequest{ + DefaultTargetCapacityType: karpv1.CapacityTypeOnDemand, + TotalTargetCapacity: aws.Int32(1), + }, + TagSpecifications: []ec2types.TagSpecification{ + {ResourceType: ec2types.ResourceTypeInstance, Tags: utils.MergeTags(tags)}, + {ResourceType: ec2types.ResourceTypeVolume, Tags: utils.MergeTags(tags)}, + {ResourceType: ec2types.ResourceTypeFleet, Tags: utils.MergeTags(tags)}, + }, } - if err := n.instanceProvider.Delete(ctx, "mock-id"); err != nil { - errs = append(errs, fmt.Errorf("delete: %w", err)) + if _, err := n.ec2api.CreateFleet(ctx, createFleetInput); awserrors.IsUnauthorizedError(err) { + errs = append(errs, fmt.Errorf("create fleet")) } - if err := n.instanceProvider.CreateTags(ctx, "mock-id", map[string]string{"mock-tag": "mock-tag-value"}); err != nil { - errs = append(errs, fmt.Errorf("create tags: %w", err)) + networkInterfaces := []ec2types.LaunchTemplateInstanceNetworkInterfaceSpecificationRequest{ + { + DeviceIndex: aws.Int32(0), + SubnetId: aws.String(nodeClass.Status.Subnets[0].ID), + Groups: lo.Map(nodeClass.Status.SecurityGroups, func(sg v1.SecurityGroup, _ int) string { + return sg.ID + }), + DeleteOnTermination: aws.Bool(true), + AssociatePublicIpAddress: aws.Bool(true), + NetworkCardIndex: aws.Int32(0), + InterfaceType: aws.String(string(ec2types.NetworkInterfaceTypeEfa)), + }, } - if corecloudprovider.IsNodeClassNotReadyError(errors.Join(errs...)) { + + launchTemplateDataTags := []ec2types.LaunchTemplateTagSpecificationRequest{ + {ResourceType: ec2types.ResourceTypeNetworkInterface, Tags: utils.MergeTags(tags)}, + } + + var userData *string + if nodeClass.Spec.UserData != nil { + encoded := base64.StdEncoding.EncodeToString([]byte(*nodeClass.Spec.UserData)) + userData = &encoded + } + + createLaunchTemplateInput := &ec2.CreateLaunchTemplateInput{ + //this one is not a dry run because we need a launch template anyways + LaunchTemplateName: lo.ToPtr(fmt.Sprintf("lt-%d", time.Now().UnixNano())), + LaunchTemplateData: &ec2types.RequestLaunchTemplateData{ + BlockDeviceMappings: blockDeviceMappings(nodeClass.Spec.BlockDeviceMappings), + IamInstanceProfile: &ec2types.LaunchTemplateIamInstanceProfileSpecificationRequest{ + Name: aws.String(nodeClass.Status.InstanceProfile), + }, + Monitoring: &ec2types.LaunchTemplatesMonitoringRequest{ + Enabled: nodeClass.Spec.DetailedMonitoring, + }, + // If the network interface is defined, the security groups are defined within it + SecurityGroupIds: lo.Ternary(networkInterfaces != nil, nil, lo.Map(nodeClass.Status.SecurityGroups, func(s v1.SecurityGroup, _ int) string { return s.ID })), + UserData: userData, + ImageId: aws.String(nodeClaim.Status.ImageID), + MetadataOptions: &ec2types.LaunchTemplateInstanceMetadataOptionsRequest{ + HttpEndpoint: ec2types.LaunchTemplateInstanceMetadataEndpointState(lo.FromPtr(nodeClass.Spec.MetadataOptions.HTTPEndpoint)), + HttpProtocolIpv6: ec2types.LaunchTemplateInstanceMetadataProtocolIpv6(lo.FromPtr(nodeClass.Spec.MetadataOptions.HTTPProtocolIPv6)), + //Will be removed when we update options.MetadataOptions.HTTPPutResponseHopLimit type to be int32 + //nolint: gosec + HttpPutResponseHopLimit: lo.ToPtr(int32(lo.FromPtr(nodeClass.Spec.MetadataOptions.HTTPPutResponseHopLimit))), + HttpTokens: ec2types.LaunchTemplateHttpTokensState(lo.FromPtr(nodeClass.Spec.MetadataOptions.HTTPTokens)), + InstanceMetadataTags: ec2types.LaunchTemplateInstanceMetadataTagsStateDisabled, + }, + NetworkInterfaces: networkInterfaces, + TagSpecifications: launchTemplateDataTags, + }, + TagSpecifications: []ec2types.TagSpecification{ + { + ResourceType: ec2types.ResourceTypeLaunchTemplate, + Tags: utils.MergeTags(tags), + }, + }, + } + + describeLaunchTemplatesInput := &ec2.DescribeLaunchTemplatesInput{ + DryRun: lo.ToPtr(true), + LaunchTemplateNames: []string{"mock-lt-name"}, + } + + if _, err := n.ec2api.DescribeLaunchTemplates(ctx, describeLaunchTemplatesInput); awserrors.IsUnauthorizedError(err) { + errs = append(errs, fmt.Errorf("describe launch template")) + nodeClass.StatusConditions().SetFalse(v1.ConditionTypeValidationSucceeded, "NodeClassNotReady", fmt.Sprintf("unauthorized operation %v", errors.Join(errs...))) + //returning here because run instances depends on being able to create a launch template and delete launch template needs describe launch template + return reconcile.Result{}, fmt.Errorf("unauthorized operation %w", errors.Join(errs...)) + } + + lt, err := n.ec2api.CreateLaunchTemplate(ctx, createLaunchTemplateInput) + + if awserrors.IsUnauthorizedError(err) { + errs = append(errs, fmt.Errorf("create launch template")) + nodeClass.StatusConditions().SetFalse(v1.ConditionTypeValidationSucceeded, "NodeClassNotReady", fmt.Sprintf("unauthorized operation %v", errors.Join(errs...))) + //returning here because run instances depends on being able to create a launch template + return reconcile.Result{}, fmt.Errorf("unauthorized operation %w", errors.Join(errs...)) + } + + imageOutput, err := n.ec2api.DescribeImages(ctx, &ec2.DescribeImagesInput{ + ImageIds: []string{amis[0].AmiID}, + }) + if err != nil { + return reconcile.Result{}, err + } + + describeInstanceTypesInput := &ec2.DescribeInstanceTypesInput{ + Filters: []ec2types.Filter{ + { + Name: aws.String("processor-info.supported-architecture"), + Values: []string{string(imageOutput.Images[0].Architecture)}, + }, + }, + } + instancetypes, err := n.ec2api.DescribeInstanceTypes(ctx, describeInstanceTypesInput) + if err != nil { + return reconcile.Result{}, err + } + + runInstancesInput := &ec2.RunInstancesInput{ + DryRun: lo.ToPtr(true), + MaxCount: aws.Int32(1), + MinCount: aws.Int32(1), + TagSpecifications: []ec2types.TagSpecification{ + { + ResourceType: ec2types.ResourceTypeInstance, + Tags: utils.MergeTags(tags), + }, + { + ResourceType: ec2types.ResourceTypeVolume, + Tags: utils.MergeTags(tags), + }, + }, + InstanceType: instancetypes.InstanceTypes[0].InstanceType, + IamInstanceProfile: &ec2types.IamInstanceProfileSpecification{ + Name: aws.String(nodeClass.Status.InstanceProfile), + }, + LaunchTemplate: &ec2types.LaunchTemplateSpecification{ + LaunchTemplateName: lt.LaunchTemplate.LaunchTemplateName, + Version: aws.String("1"), + }, + } + if _, err := n.ec2api.RunInstances(ctx, runInstancesInput); awserrors.IsUnauthorizedError(err) { + errs = append(errs, fmt.Errorf("run instances")) + } + + deleteLaunchTemplateInput := ec2.DeleteLaunchTemplateInput{ + LaunchTemplateName: lt.LaunchTemplate.LaunchTemplateName, + } + + _, err = n.ec2api.DeleteLaunchTemplate(ctx, &deleteLaunchTemplateInput) + if err != nil { + nodeClass.StatusConditions().SetFalse(v1.ConditionTypeValidationSucceeded, "NodeClassNotReady", fmt.Sprintf("delete launch template: %v", err)) + return reconcile.Result{}, fmt.Errorf("delete launch template: %w", err) + + } + + if errors.Join(errs...) != nil { nodeClass.StatusConditions().SetFalse(v1.ConditionTypeValidationSucceeded, "NodeClassNotReady", fmt.Sprintf("unauthorized operation %v", errors.Join(errs...))) return reconcile.Result{}, fmt.Errorf("unauthorized operation %w", errors.Join(errs...)) } nodeClass.StatusConditions().SetTrue(v1.ConditionTypeValidationSucceeded) return reconcile.Result{}, nil } + +func blockDeviceMappings(blockDeviceMappings []*v1.BlockDeviceMapping) []ec2types.LaunchTemplateBlockDeviceMappingRequest { + if len(blockDeviceMappings) == 0 { + return nil + } + var blockDeviceMappingsRequest []ec2types.LaunchTemplateBlockDeviceMappingRequest + for _, blockDeviceMapping := range blockDeviceMappings { + blockDeviceMappingsRequest = append(blockDeviceMappingsRequest, ec2types.LaunchTemplateBlockDeviceMappingRequest{ + DeviceName: blockDeviceMapping.DeviceName, + Ebs: &ec2types.LaunchTemplateEbsBlockDeviceRequest{ + DeleteOnTermination: blockDeviceMapping.EBS.DeleteOnTermination, + Encrypted: blockDeviceMapping.EBS.Encrypted, + VolumeType: ec2types.VolumeType(aws.ToString(blockDeviceMapping.EBS.VolumeType)), + //Lints here can be removed when we update options.EBS.IOPS and Throughput type to be int32 + //nolint: gosec + Iops: lo.EmptyableToPtr(int32(lo.FromPtr(blockDeviceMapping.EBS.IOPS))), + //nolint: gosec + Throughput: lo.EmptyableToPtr(int32(lo.FromPtr(blockDeviceMapping.EBS.Throughput))), + KmsKeyId: blockDeviceMapping.EBS.KMSKeyID, + SnapshotId: blockDeviceMapping.EBS.SnapshotID, + VolumeSize: lo.ToPtr(int32(blockDeviceMapping.EBS.VolumeSize.AsApproximateFloat64())), + }, + }) + } + return blockDeviceMappingsRequest +} diff --git a/pkg/controllers/nodeclass/validation_test.go b/pkg/controllers/nodeclass/validation_test.go index 285fd914c27b..ce6ef241bbe5 100644 --- a/pkg/controllers/nodeclass/validation_test.go +++ b/pkg/controllers/nodeclass/validation_test.go @@ -18,9 +18,11 @@ import ( status "github.com/awslabs/operatorpkg/status" "github.com/samber/lo" + "github.com/aws/aws-sdk-go-v2/service/ec2" "github.com/aws/smithy-go" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + awserrors "github.com/aws/karpenter-provider-aws/pkg/errors" "github.com/aws/karpenter-provider-aws/pkg/fake" "github.com/aws/karpenter-provider-aws/pkg/test" @@ -59,7 +61,7 @@ var _ = Describe("NodeClass Validation Status Controller", func() { DescribeTable("should update status condition on nodeClass as NotReady when tag validation fails", func(illegalTag map[string]string) { nodeClass.Spec.Tags = illegalTag ExpectApplied(ctx, env.Client, nodeClass) - err := ExpectObjectReconcileFailed(ctx, env.Client, statusController, nodeClass) + err := ExpectObjectReconcileFailed(ctx, env.Client, controller, nodeClass) Expect(err).To(HaveOccurred()) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Conditions).To(HaveLen(6)) @@ -76,7 +78,7 @@ var _ = Describe("NodeClass Validation Status Controller", func() { It("should update status condition as Ready when tags are valid", func() { nodeClass.Spec.Tags = map[string]string{} ExpectApplied(ctx, env.Client, nodeClass) - ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).IsTrue()).To(BeTrue()) @@ -89,38 +91,43 @@ var _ = Describe("NodeClass Validation Status Controller", func() { awsEnv.EC2API.CreateFleetBehavior.Error.Set(&smithy.GenericAPIError{ Code: "UnauthorizedOperation", }, fake.MaxCalls(1)) - err := ExpectObjectReconcileFailed(ctx, env.Client, statusController, nodeClass) + err := ExpectObjectReconcileFailed(ctx, env.Client, controller, nodeClass) Expect(err).To(HaveOccurred()) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Conditions).To(HaveLen(6)) Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).IsFalse()).To(BeTrue()) }) - It("should update status condition on nodeClass as NotReady when nodeclass has authorization failure due to terminateinstances", func() { + It("should update status condition on nodeClass as NotReady when nodeclass has authorization failure due to runinstances", func() { ExpectApplied(ctx, env.Client, nodeClass) - awsEnv.EC2API.TerminateInstancesBehavior.Error.Set(&smithy.GenericAPIError{ + awsEnv.EC2API.RunInstancesBehavior.Error.Set(&smithy.GenericAPIError{ Code: "UnauthorizedOperation", - }, fake.MaxCalls(2)) - err := ExpectObjectReconcileFailed(ctx, env.Client, statusController, nodeClass) + }, fake.MaxCalls(1)) + err := ExpectObjectReconcileFailed(ctx, env.Client, controller, nodeClass) Expect(err).To(HaveOccurred()) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Conditions).To(HaveLen(6)) Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).IsFalse()).To(BeTrue()) }) - It("should update status condition on nodeClass as NotReady when nodeclass has authorization failure due to CreateTags", func() { + It("should update status condition on nodeClass as NotReady when nodeclass has authorization failure due to describelt", func() { ExpectApplied(ctx, env.Client, nodeClass) - awsEnv.EC2API.CreateTagsBehavior.Error.Set(&smithy.GenericAPIError{ + awsEnv.EC2API.NextError.Set(&smithy.GenericAPIError{ Code: "UnauthorizedOperation", - }, fake.MaxCalls(1)) - err := ExpectObjectReconcileFailed(ctx, env.Client, statusController, nodeClass) + }) + describeLaunchTemplatesInput := &ec2.DescribeLaunchTemplatesInput{ + DryRun: lo.ToPtr(true), + LaunchTemplateNames: []string{"mock-lt-name"}, + } + + _, err := awsEnv.EC2API.DescribeLaunchTemplates(ctx, describeLaunchTemplatesInput) + if !awserrors.IsUnauthorizedError(err) { + err = nil + } Expect(err).To(HaveOccurred()) - nodeClass = ExpectExists(ctx, env.Client, nodeClass) - Expect(nodeClass.Status.Conditions).To(HaveLen(6)) - Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).IsFalse()).To(BeTrue()) }) It("should update status condition as Ready", func() { nodeClass.Spec.Tags = map[string]string{} ExpectApplied(ctx, env.Client, nodeClass) - ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).IsTrue()).To(BeTrue()) diff --git a/pkg/errors/errors.go b/pkg/errors/errors.go index 6e02594c4b45..a308a0aced45 100644 --- a/pkg/errors/errors.go +++ b/pkg/errors/errors.go @@ -114,9 +114,8 @@ func IsUnauthorizedError(err error) bool { } var apiErr smithy.APIError if errors.As(err, &apiErr) { - return apiErr.ErrorCode() == "UnauthorizedOperation" + return strings.Contains(apiErr.ErrorCode(), "UnauthorizedOperation") } - return strings.Contains(err.Error(), "UnauthorizedOperation") } diff --git a/pkg/fake/ec2api.go b/pkg/fake/ec2api.go index 5339a6150b75..53a9372925a0 100644 --- a/pkg/fake/ec2api.go +++ b/pkg/fake/ec2api.go @@ -59,6 +59,7 @@ type EC2Behavior struct { TerminateInstancesBehavior MockedFunction[ec2.TerminateInstancesInput, ec2.TerminateInstancesOutput] DescribeInstancesBehavior MockedFunction[ec2.DescribeInstancesInput, ec2.DescribeInstancesOutput] CreateTagsBehavior MockedFunction[ec2.CreateTagsInput, ec2.CreateTagsOutput] + RunInstancesBehavior MockedFunction[ec2.RunInstancesInput, ec2.RunInstancesOutput] CalledWithCreateLaunchTemplateInput AtomicPtrSlice[ec2.CreateLaunchTemplateInput] CalledWithDescribeImagesInput AtomicPtrSlice[ec2.DescribeImagesInput] Instances sync.Map @@ -212,12 +213,13 @@ func (e *EC2API) TerminateInstances(_ context.Context, input *ec2.TerminateInsta }) } -func (e *EC2API) CreateLaunchTemplate(_ context.Context, input *ec2.CreateLaunchTemplateInput, _ ...func(*ec2.Options)) (*ec2.CreateLaunchTemplateOutput, error) { +func (e *EC2API) CreateLaunchTemplate(ctx context.Context, input *ec2.CreateLaunchTemplateInput, _ ...func(*ec2.Options)) (*ec2.CreateLaunchTemplateOutput, error) { if !e.NextError.IsNil() { defer e.NextError.Reset() return nil, e.NextError.Get() } - if !*input.DryRun { + // We dont want to count the status controllers call to createlaunchtemplate + if ctx.Value("reconcile") != true { e.CalledWithCreateLaunchTemplateInput.Add(input) } launchTemplate := ec2types.LaunchTemplate{LaunchTemplateName: input.LaunchTemplateName} @@ -319,18 +321,22 @@ func filterInstances(instances []ec2types.Instance, filters []ec2types.Filter) [ return ret } -func (e *EC2API) DescribeImages(_ context.Context, input *ec2.DescribeImagesInput, _ ...func(*ec2.Options)) (*ec2.DescribeImagesOutput, error) { +func (e *EC2API) DescribeImages(ctx context.Context, input *ec2.DescribeImagesInput, _ ...func(*ec2.Options)) (*ec2.DescribeImagesOutput, error) { if !e.NextError.IsNil() { defer e.NextError.Reset() return nil, e.NextError.Get() } - e.CalledWithDescribeImagesInput.Add(input) - if !e.DescribeImagesOutput.IsNil() { - describeImagesOutput := e.DescribeImagesOutput.Clone() - describeImagesOutput.Images = FilterDescribeImages(describeImagesOutput.Images, input.Filters) - return describeImagesOutput, nil + // We dont want to count the status controllers call to describe images + if ctx.Value("reconcile") != true { + e.CalledWithDescribeImagesInput.Add(input) + if !e.DescribeImagesOutput.IsNil() { + describeImagesOutput := e.DescribeImagesOutput.Clone() + + describeImagesOutput.Images = FilterDescribeImages(describeImagesOutput.Images, input.Filters) + return describeImagesOutput, nil + } } - if input.Filters[0].Values[0] == "invalid" { + if input.Filters != nil && input.Filters[0].Values[0] == "invalid" { return &ec2.DescribeImagesOutput{}, nil } return &ec2.DescribeImagesOutput{ @@ -655,3 +661,24 @@ func (e *EC2API) DescribeSpotPriceHistory(_ context.Context, input *ec2.Describe // fail if the test doesn't provide specific data which causes our pricing provider to use its static price list return nil, errors.New("no pricing data provided") } + +func (e *EC2API) RunInstances(ctx context.Context, input *ec2.RunInstancesInput, optFns ...func(*ec2.Options)) (*ec2.RunInstancesOutput, error) { + return e.RunInstancesBehavior.Invoke(input, func(input *ec2.RunInstancesInput) (*ec2.RunInstancesOutput, error) { + if !e.NextError.IsNil() { + defer e.NextError.Reset() + return nil, e.NextError.Get() + } + + // Default implementation + instance := ec2types.Instance{ + InstanceId: aws.String(test.RandomName()), + InstanceType: input.InstanceType, + State: &ec2types.InstanceState{Name: ec2types.InstanceStateNameRunning}, + // Add other required fields + } + + return &ec2.RunInstancesOutput{ + Instances: []ec2types.Instance{instance}, + }, nil + }) +} diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index bef580542873..a2c68827f6fe 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -88,7 +88,7 @@ type Operator struct { VersionProvider *version.DefaultProvider InstanceTypesProvider *instancetype.DefaultProvider InstanceProvider instance.Provider - SSMProvider ssmp.Provider + EC2API ec2.Client } func NewOperator(ctx context.Context, operator *operator.Operator) (context.Context, *Operator) { @@ -200,7 +200,7 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont PricingProvider: pricingProvider, InstanceTypesProvider: instanceTypeProvider, InstanceProvider: instanceProvider, - SSMProvider: ssmProvider, + EC2API: *ec2api, } } diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index e303984bc6a0..c9cfa098b312 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -191,9 +191,7 @@ func (p *DefaultProvider) ensureLaunchTemplate(ctx context.Context, options *ami return launchTemplate.(ec2types.LaunchTemplate), nil } // Attempt to find an existing LT. - dryRun := ctx.Value("DryRun") != nil && *(ctx.Value("DryRun").(*bool)) output, err := p.ec2api.DescribeLaunchTemplates(ctx, &ec2.DescribeLaunchTemplatesInput{ - DryRun: &dryRun, LaunchTemplateNames: []string{name}, }) // Create LT if one doesn't exist @@ -228,9 +226,7 @@ func (p *DefaultProvider) createLaunchTemplate(ctx context.Context, options *ami launchTemplateDataTags = append(launchTemplateDataTags, ec2types.LaunchTemplateTagSpecificationRequest{ResourceType: ec2types.ResourceTypeSpotInstancesRequest, Tags: utils.MergeTags(options.Tags)}) } networkInterfaces := p.generateNetworkInterfaces(options) - dryRun := ctx.Value("DryRun") != nil && *(ctx.Value("DryRun").(*bool)) output, err := p.ec2api.CreateLaunchTemplate(ctx, &ec2.CreateLaunchTemplateInput{ - DryRun: &dryRun, LaunchTemplateName: aws.String(LaunchTemplateName(options)), LaunchTemplateData: &ec2types.RequestLaunchTemplateData{ BlockDeviceMappings: p.blockDeviceMappings(options.BlockDeviceMappings), diff --git a/pkg/test/environment.go b/pkg/test/environment.go index 2d9a9d243083..b95b6e9c05a9 100644 --- a/pkg/test/environment.go +++ b/pkg/test/environment.go @@ -86,6 +86,7 @@ type Environment struct { AMIResolver *amifamily.DefaultResolver VersionProvider *version.DefaultProvider LaunchTemplateProvider *launchtemplate.DefaultProvider + SSMProvider *ssmp.DefaultProvider } func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment { diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index d558b0790223..5b9ee16dcdf7 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -15,6 +15,7 @@ limitations under the License. package utils import ( + "context" "fmt" "os" "regexp" @@ -23,6 +24,9 @@ import ( "github.com/aws/aws-sdk-go-v2/aws" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "github.com/samber/lo" ) @@ -83,3 +87,23 @@ func WithDefaultFloat64(key string, def float64) float64 { } return f } + +func GetTags(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim, clusterName string) (map[string]string, error) { + if offendingTag, found := lo.FindKeyBy(nodeClass.Spec.Tags, func(k string, v string) bool { + for _, exp := range v1.RestrictedTagPatterns { + if exp.MatchString(k) { + return true + } + } + return false + }); found { + return nil, fmt.Errorf("%q tag does not pass tag validation requirements", offendingTag) + } + staticTags := map[string]string{ + fmt.Sprintf("kubernetes.io/cluster/%s", clusterName): "owned", + karpv1.NodePoolLabelKey: nodeClaim.Labels[karpv1.NodePoolLabelKey], + v1.EKSClusterNameTagKey: clusterName, + v1.LabelNodeClass: nodeClass.Name, + } + return lo.Assign(nodeClass.Spec.Tags, staticTags), nil +} diff --git a/test/suites/nodeclass/nodeclass_test.go b/test/suites/nodeclass/nodeclass_test.go new file mode 100644 index 000000000000..5ce96a5f8a2b --- /dev/null +++ b/test/suites/nodeclass/nodeclass_test.go @@ -0,0 +1,453 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "fmt" + "strings" + + corev1 "k8s.io/api/core/v1" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/iam" + "sigs.k8s.io/controller-runtime/pkg/client" + coretest "sigs.k8s.io/karpenter/pkg/test" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("Nodeclass Validation", func() { + var createPolicyOutput *iam.CreatePolicyOutput + var roleName string + var err error + + AfterEach(func() { + _, err := env.IAMAPI.DetachRolePolicy(env.Context, &iam.DetachRolePolicyInput{ + RoleName: aws.String(roleName), + PolicyArn: createPolicyOutput.Policy.Arn, + }) + Expect(err).To(BeNil()) + + _, err = env.IAMAPI.DeletePolicy(env.Context, &iam.DeletePolicyInput{ + PolicyArn: createPolicyOutput.Policy.Arn, + }) + Expect(err).To(BeNil()) + }) + It("should fail reconciliation when CreateFleet is explicitly denied", func() { + // Create an explicit deny policy for CreateFleet + createPolicyInput := &iam.CreatePolicyInput{ + PolicyName: aws.String("DenyPolicy"), + PolicyDocument: aws.String(`{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Deny", + "Action": "ec2:CreateFleet", + "Resource": "*" + } + ] + }`), + } + + // Attach the deny policy to the Karpenter role + roleName = fmt.Sprintf("%s-karpenter", env.ClusterName) + + createPolicyOutput, err = env.IAMAPI.CreatePolicy(env.Context, createPolicyInput) + Expect(err).To(BeNil()) + + _, err = env.IAMAPI.AttachRolePolicy(env.Context, &iam.AttachRolePolicyInput{ + RoleName: aws.String(roleName), + PolicyArn: createPolicyOutput.Policy.Arn, + }) + Expect(err).To(BeNil()) + + // Create test resources + pod := coretest.Pod() + env.ExpectCreated(nodePool, nodeClass, pod) + + createdPod := &corev1.Pod{} + Eventually(func(g Gomega) { + err := env.Client.Get(env.Context, client.ObjectKey{ + Namespace: pod.Namespace, + Name: pod.Name, + }, createdPod) + g.Expect(err).To(BeNil()) + }, "30s", "5s").Should(Succeed()) + + // Expect the pod to remain unscheduled due to CreateFleet failure + Consistently(func(g Gomega) { + err := env.Client.Get(env.Context, client.ObjectKeyFromObject(pod), pod) + g.Expect(err).To(BeNil()) + g.Expect(pod.Spec.NodeName).To(Equal("")) + }, "30s", "5s").Should(Succeed()) + + // Verify the error in events + Eventually(func(g Gomega) { + events := &corev1.EventList{} + err := env.Client.List(env.Context, events) + g.Expect(err).To(BeNil()) + + found := false + for _, event := range events.Items { + if strings.Contains(event.Message, "Validation") && + strings.Contains(event.Message, "False") && + strings.Contains(event.Message, "unauthorized operation") && + strings.Contains(event.Message, "create fleet") { + found = true + break + } + } + g.Expect(found).To(BeTrue()) + }, "60s", "5s").Should(Succeed()) + }) + It("should fail reconciliation when RunInstances is explicitly denied", func() { + // Create an explicit deny policy for RunInstances + createPolicyInput := &iam.CreatePolicyInput{ + PolicyName: aws.String("DenyPolicy"), + PolicyDocument: aws.String(`{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Deny", + "Action": "ec2:RunInstances", + "Resource": "*" + } + ] + }`), + } + + // Attach the deny policy to the Karpenter role + roleName = fmt.Sprintf("%s-karpenter", env.ClusterName) + + createPolicyOutput, err = env.IAMAPI.CreatePolicy(env.Context, createPolicyInput) + Expect(err).To(BeNil()) + + _, err = env.IAMAPI.AttachRolePolicy(env.Context, &iam.AttachRolePolicyInput{ + RoleName: aws.String(roleName), + PolicyArn: createPolicyOutput.Policy.Arn, + }) + Expect(err).To(BeNil()) + + // Create test resources + pod := coretest.Pod() + env.ExpectCreated(nodePool, nodeClass, pod) + + createdPod := &corev1.Pod{} + Eventually(func(g Gomega) { + err := env.Client.Get(env.Context, client.ObjectKey{ + Namespace: pod.Namespace, + Name: pod.Name, + }, createdPod) + g.Expect(err).To(BeNil()) + }, "30s", "5s").Should(Succeed()) + + Consistently(func(g Gomega) { + err := env.Client.Get(env.Context, client.ObjectKeyFromObject(pod), pod) + g.Expect(err).To(BeNil()) + g.Expect(pod.Spec.NodeName).To(Equal("")) + }, "30s", "5s").Should(Succeed()) + + // Verify the error in events + Eventually(func(g Gomega) { + events := &corev1.EventList{} + err := env.Client.List(env.Context, events) + g.Expect(err).To(BeNil()) + + found := false + for _, event := range events.Items { + if strings.Contains(event.Message, "Validation") && + strings.Contains(event.Message, "False") && + strings.Contains(event.Message, "unauthorized operation") && + strings.Contains(event.Message, "run instances") { + found = true + break + } + } + g.Expect(found).To(BeTrue()) + }, "60s", "5s").Should(Succeed()) + }) + It("should fail reconciliation when CreateLaunchTemplate is explicitly denied", func() { + // Create an explicit deny policy for RunInstances + createPolicyInput := &iam.CreatePolicyInput{ + PolicyName: aws.String("DenyPolicy"), + PolicyDocument: aws.String(`{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Deny", + "Action": "ec2:CreateLaunchTemplate", + "Resource": "*" + } + ] + }`), + } + + // Attach the deny policy to the Karpenter role + roleName = fmt.Sprintf("%s-karpenter", env.ClusterName) + + createPolicyOutput, err = env.IAMAPI.CreatePolicy(env.Context, createPolicyInput) + Expect(err).To(BeNil()) + + _, err = env.IAMAPI.AttachRolePolicy(env.Context, &iam.AttachRolePolicyInput{ + RoleName: aws.String(roleName), + PolicyArn: createPolicyOutput.Policy.Arn, + }) + Expect(err).To(BeNil()) + + // Create test resources + pod := coretest.Pod() + env.ExpectCreated(nodePool, nodeClass, pod) + + createdPod := &corev1.Pod{} + Eventually(func(g Gomega) { + err := env.Client.Get(env.Context, client.ObjectKey{ + Namespace: pod.Namespace, + Name: pod.Name, + }, createdPod) + g.Expect(err).To(BeNil()) + }, "30s", "5s").Should(Succeed()) + + Consistently(func(g Gomega) { + err := env.Client.Get(env.Context, client.ObjectKeyFromObject(pod), pod) + g.Expect(err).To(BeNil()) + g.Expect(pod.Spec.NodeName).To(Equal("")) + }, "30s", "5s").Should(Succeed()) + + // Verify the error in events + Eventually(func(g Gomega) { + events := &corev1.EventList{} + err := env.Client.List(env.Context, events) + g.Expect(err).To(BeNil()) + + found := false + for _, event := range events.Items { + if strings.Contains(event.Message, "Validation") && + strings.Contains(event.Message, "False") && + strings.Contains(event.Message, "unauthorized operation") && + strings.Contains(event.Message, "create launch template") { + found = true + break + } + } + g.Expect(found).To(BeTrue()) + }, "60s", "5s").Should(Succeed()) + }) + It("should fail reconciliation when DescribeLaunchTemplate is explicitly denied", func() { + // Create an explicit deny policy for RunInstances + createPolicyInput := &iam.CreatePolicyInput{ + PolicyName: aws.String("DenyPolicy"), + PolicyDocument: aws.String(`{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Deny", + "Action": "ec2:DescribeLaunchTemplates", + "Resource": "*" + } + ] + }`), + } + + // Attach the deny policy to the Karpenter role + roleName = fmt.Sprintf("%s-karpenter", env.ClusterName) + + createPolicyOutput, err = env.IAMAPI.CreatePolicy(env.Context, createPolicyInput) + Expect(err).To(BeNil()) + + _, err = env.IAMAPI.AttachRolePolicy(env.Context, &iam.AttachRolePolicyInput{ + RoleName: aws.String(roleName), + PolicyArn: createPolicyOutput.Policy.Arn, + }) + Expect(err).To(BeNil()) + + // Create test resources + pod := coretest.Pod() + env.ExpectCreated(nodePool, nodeClass, pod) + + createdPod := &corev1.Pod{} + Eventually(func(g Gomega) { + err := env.Client.Get(env.Context, client.ObjectKey{ + Namespace: pod.Namespace, + Name: pod.Name, + }, createdPod) + g.Expect(err).To(BeNil()) + }, "30s", "5s").Should(Succeed()) + + Consistently(func(g Gomega) { + err := env.Client.Get(env.Context, client.ObjectKeyFromObject(pod), pod) + g.Expect(err).To(BeNil()) + g.Expect(pod.Spec.NodeName).To(Equal("")) + }, "30s", "5s").Should(Succeed()) + + // Verify the error in events + Eventually(func(g Gomega) { + events := &corev1.EventList{} + err := env.Client.List(env.Context, events) + g.Expect(err).To(BeNil()) + + found := false + for _, event := range events.Items { + if strings.Contains(event.Message, "Validation") && + strings.Contains(event.Message, "False") && + strings.Contains(event.Message, "unauthorized operation") && + strings.Contains(event.Message, "describe launch template") { + found = true + break + } + } + g.Expect(found).To(BeTrue()) + }, "90s", "5s").Should(Succeed()) + }) + It("should fail reconciliation when more than one permission is explicitly denied", func() { + // Create an explicit deny policy for RunInstances + createPolicyInput := &iam.CreatePolicyInput{ + PolicyName: aws.String("DenyPolicy"), + PolicyDocument: aws.String(`{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Deny", + "Action": [ + "ec2:RunInstances", + "ec2:CreateFleet" + ], + "Resource": "*" + } + ] +}`), + } + + // Attach the deny policy to the Karpenter role + roleName = fmt.Sprintf("%s-karpenter", env.ClusterName) + + createPolicyOutput, err = env.IAMAPI.CreatePolicy(env.Context, createPolicyInput) + Expect(err).To(BeNil()) + + _, err = env.IAMAPI.AttachRolePolicy(env.Context, &iam.AttachRolePolicyInput{ + RoleName: aws.String(roleName), + PolicyArn: createPolicyOutput.Policy.Arn, + }) + Expect(err).To(BeNil()) + + // Create test resources + pod := coretest.Pod() + env.ExpectCreated(nodePool, nodeClass, pod) + + createdPod := &corev1.Pod{} + Eventually(func(g Gomega) { + err := env.Client.Get(env.Context, client.ObjectKey{ + Namespace: pod.Namespace, + Name: pod.Name, + }, createdPod) + g.Expect(err).To(BeNil()) + }, "30s", "5s").Should(Succeed()) + + Consistently(func(g Gomega) { + err := env.Client.Get(env.Context, client.ObjectKeyFromObject(pod), pod) + g.Expect(err).To(BeNil()) + g.Expect(pod.Spec.NodeName).To(Equal("")) + }, "30s", "5s").Should(Succeed()) + + // Verify the error in events + Eventually(func(g Gomega) { + events := &corev1.EventList{} + err := env.Client.List(env.Context, events) + g.Expect(err).To(BeNil()) + + found := false + for _, event := range events.Items { + if strings.Contains(event.Message, "Validation") && + strings.Contains(event.Message, "False") && + strings.Contains(event.Message, "unauthorized operation") && + strings.Contains(event.Message, "run instances") && + strings.Contains(event.Message, "create fleet") { + found = true + break + } + } + g.Expect(found).To(BeTrue()) + }, "90s", "5s").Should(Succeed()) + }) + It("should pass reconciliation when policy has all required permissions", func() { + // Create an explicit deny policy for RunInstances + createPolicyInput := &iam.CreatePolicyInput{ + PolicyName: aws.String("DenyPolicy"), + PolicyDocument: aws.String(`{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ec2:RunInstances", + "ec2:CreateFleet", + "ec2:CreateLaunchTemplate", + "ec2:DescribeLaunchTemplates" + ], + "Resource": "*" + } + ] +}`), + } + + // Attach the deny policy to the Karpenter role + roleName = fmt.Sprintf("%s-karpenter", env.ClusterName) + + createPolicyOutput, err = env.IAMAPI.CreatePolicy(env.Context, createPolicyInput) + Expect(err).To(BeNil()) + + _, err = env.IAMAPI.AttachRolePolicy(env.Context, &iam.AttachRolePolicyInput{ + RoleName: aws.String(roleName), + PolicyArn: createPolicyOutput.Policy.Arn, + }) + Expect(err).To(BeNil()) + + // Create test resources + pod := coretest.Pod() + env.ExpectCreated(nodePool, nodeClass, pod) + + createdPod := &corev1.Pod{} + Eventually(func(g Gomega) { + err := env.Client.Get(env.Context, client.ObjectKey{ + Namespace: pod.Namespace, + Name: pod.Name, + }, createdPod) + g.Expect(err).To(BeNil()) + }, "30s", "5s").Should(Succeed()) + + Consistently(func(g Gomega) { + err := env.Client.Get(env.Context, client.ObjectKeyFromObject(pod), pod) + g.Expect(err).To(BeNil()) + g.Expect(pod.Spec.NodeName).To(Equal("")) + }, "30s", "5s").Should(Succeed()) + + // Verify the error in events + Eventually(func(g Gomega) { + events := &corev1.EventList{} + err := env.Client.List(env.Context, events) + g.Expect(err).To(BeNil()) + + found := false + for _, event := range events.Items { + if strings.Contains(event.Message, "Validation") && + strings.Contains(event.Message, "True") { + found = true + break + } + } + g.Expect(found).To(BeTrue()) + }, "60s", "5s").Should(Succeed()) + }) +}) diff --git a/test/suites/nodeclass/suite_test.go b/test/suites/nodeclass/suite_test.go new file mode 100644 index 000000000000..3698e767191f --- /dev/null +++ b/test/suites/nodeclass/suite_test.go @@ -0,0 +1,50 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "testing" + + karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + "github.com/aws/karpenter-provider-aws/test/pkg/environment/aws" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var env *aws.Environment +var nodeClass *v1.EC2NodeClass +var nodePool *karpv1.NodePool + +func TestIntegration(t *testing.T) { + RegisterFailHandler(Fail) + BeforeSuite(func() { + env = aws.NewEnvironment(t) + }) + AfterSuite(func() { + env.Stop() + }) + RunSpecs(t, "TestNodeClass") +} + +var _ = BeforeEach(func() { + env.BeforeEach() + nodeClass = env.DefaultEC2NodeClass() + nodePool = env.DefaultNodePool(nodeClass) +}) +var _ = AfterEach(func() { env.Cleanup() }) +var _ = AfterEach(func() { env.AfterEach() })