Skip to content

Commit

Permalink
ARO-13380 - metrics: cwp status
Browse files Browse the repository at this point in the history
  • Loading branch information
Lini Kurien authored and LiniSusan committed Jan 17, 2025
1 parent e0729c8 commit ceb2f21
Show file tree
Hide file tree
Showing 171 changed files with 125,738 additions and 3 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ require (
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/keyvault/armkeyvault v1.4.0
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi v1.2.0
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v2 v2.2.1
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6 v6.2.0
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.5.0
github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.1.0
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.3.2
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi v1.2.0 h1:z4Yei
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi v1.2.0/go.mod h1:rko9SzMxcMk0NJsNAxALEGaTYyy79bNRwxgJfrH0Spw=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v2 v2.2.1 h1:bWh0Z2rOEDfB/ywv/l0iHN1JgyazE6kW/aIA89+CEK0=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v2 v2.2.1/go.mod h1:Bzf34hhAE9NSxailk8xVeLEZbUjOXcC+GnU1mMKdhLw=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6 v6.2.0 h1:HYGD75g0bQ3VO/Omedm54v4LrD3B1cGImuRF3AJ5wLo=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6 v6.2.0/go.mod h1:ulHyBFJOI0ONiRL4vcJTmS7rx18jQQlEPmAgo80cRdM=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0 h1:Dd+RhdJn0OTtVGaeDLZpcumkIVCtA/3/Fo42+eoYvVM=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0/go.mod h1:5kakwfW5CjC9KK+Q4wjXAg+ShuIm2mBMua0ZFj2C8PE=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.5.0 h1:AifHbc4mg0x9zW52WOpKbsHaDKuRhlI7TVl47thgQ70=
Expand Down
8 changes: 7 additions & 1 deletion pkg/monitor/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client/apiutil"

"github.com/Azure/ARO-RP/pkg/api"
"github.com/Azure/ARO-RP/pkg/env"
"github.com/Azure/ARO-RP/pkg/metrics"
"github.com/Azure/ARO-RP/pkg/monitor/dimension"
"github.com/Azure/ARO-RP/pkg/monitor/emitter"
Expand All @@ -47,6 +48,8 @@ type Monitor struct {
mcocli mcoclient.Interface
m metrics.Emitter
arocli aroclient.Interface
env env.Interface
tenantID string

ocpclientset client.Client
hiveclientset client.Client
Expand All @@ -63,7 +66,7 @@ type Monitor struct {
wg *sync.WaitGroup
}

func NewMonitor(log *logrus.Entry, restConfig *rest.Config, oc *api.OpenShiftCluster, m metrics.Emitter, hiveRestConfig *rest.Config, hourlyRun bool, wg *sync.WaitGroup) (*Monitor, error) {
func NewMonitor(log *logrus.Entry, restConfig *rest.Config, oc *api.OpenShiftCluster, env env.Interface, tenantID string, m metrics.Emitter, hiveRestConfig *rest.Config, hourlyRun bool, wg *sync.WaitGroup) (*Monitor, error) {
r, err := azure.ParseResourceID(oc.ID)
if err != nil {
return nil, err
Expand Down Expand Up @@ -132,6 +135,8 @@ func NewMonitor(log *logrus.Entry, restConfig *rest.Config, oc *api.OpenShiftClu
maocli: maocli,
mcocli: mcocli,
arocli: arocli,
env: env,
tenantID: tenantID,
m: m,
ocpclientset: ocpclientset,
hiveclientset: hiveclientset,
Expand Down Expand Up @@ -213,6 +218,7 @@ func (mon *Monitor) Monitor(ctx context.Context) (errs []error) {
mon.emitCertificateExpirationStatuses,
mon.emitEtcdCertificateExpiry,
mon.emitPrometheusAlerts, // at the end for now because it's the slowest/least reliable
mon.emitCWPStatus,
} {
err = f(ctx)
if err != nil {
Expand Down
203 changes: 203 additions & 0 deletions pkg/monitor/cluster/clusterwideproxystatus.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
package cluster

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"context"
"net/url"
"strconv"
"strings"

"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6"
"github.com/Azure/go-autorest/autorest/azure"
"github.com/sirupsen/logrus"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

apisubnet "github.com/Azure/ARO-RP/pkg/api/util/subnet"
arov1alpha1 "github.com/Azure/ARO-RP/pkg/operator/apis/aro.openshift.io/v1alpha1"
)

const (
cwp = "clusterWideProxy.status"
cwpErrorMessage = "NoProxy entries are incorrect"
cluster = "cluster"
mandatory_no_proxies = "localhost,127.0.0.1,.svc,.cluster.local.,168.63.129.16,169.254.169.254"
//169.254.169.254 (the IMDS IP)
//168.63.129.16 (Azure DNS, if no custom DNS exists)
//localhost, 127.0.0.1, .svc, .cluster.local
)

// Helper function to emit and log the gauge status
func (mon *Monitor) emitAndLogCWPStatus(status bool, message string) {
mon.emitGauge(cwp, 1, map[string]string{
"status": strconv.FormatBool(status),
"Message": message,
})

if mon.hourlyRun {
mon.log.WithFields(logrus.Fields{
"metric": cwp,
"status": strconv.FormatBool(status),
"Message": message,
}).Print()
}
}

// Main function to emit CWP status
func (mon *Monitor) emitCWPStatus(ctx context.Context) error {
mon.hourlyRun = true
proxyConfig, err := mon.configcli.ConfigV1().Proxies().Get(ctx, cluster, metav1.GetOptions{})
if err != nil {
mon.log.Errorf("Error in getting the cluster wide proxy: %v", err)
return err
}

if proxyConfig.Spec.HTTPProxy == "" && proxyConfig.Spec.HTTPSProxy == "" && proxyConfig.Spec.NoProxy == "" {
mon.emitAndLogCWPStatus(false, "CWP not enabled")
} else {
// Create the noProxy map for efficient lookups
no_proxy_list := strings.Split(proxyConfig.Spec.NoProxy, ",")
noProxyMap := make(map[string]bool)
var missing_no_proxy_list []string
for _, proxy := range no_proxy_list {
noProxyMap[proxy] = true
}

// Check mandatory no_proxy entries
for _, mandatory_no_proxy := range strings.Split(mandatory_no_proxies, ",") {
if !noProxyMap[mandatory_no_proxy] {
missing_no_proxy_list = append(missing_no_proxy_list, mandatory_no_proxy)
}
}

mastersubnetID, err := azure.ParseResourceID(mon.oc.Properties.MasterProfile.SubnetID)
if err != nil {
return err
}
token, err := mon.env.FPNewClientCertificateCredential(mon.tenantID, nil) //azidentity.NewDefaultAzureCredential(nil)
if err != nil {
mon.log.Errorf("failed to obtain a credential: %v", err)
return err
}
// Create client factory
clientFactory, err := armnetwork.NewClientFactory(mastersubnetID.SubscriptionID, token, nil)
if err != nil {
mon.log.Errorf("failed to create client: %v", err)
return err
}

// Check master subnet
masterVnetID, _, err := apisubnet.Split(mon.oc.Properties.MasterProfile.SubnetID)
if err != nil {
return err
}
mastervnetId, err := azure.ParseResourceID(masterVnetID)
if err != nil {
return err
}
res, err := clientFactory.NewSubnetsClient().Get(ctx, mastersubnetID.ResourceGroup, mastervnetId.ResourceName, mastersubnetID.ResourceName, &armnetwork.SubnetsClientGetOptions{Expand: nil})
if err != nil {
mon.log.Errorf("failed to finish the request: %v", err)
return err
}
mastermachineCIDR := *res.Properties.AddressPrefix
if !noProxyMap[mastermachineCIDR] {
missing_no_proxy_list = append(missing_no_proxy_list, mastermachineCIDR)
}

// Check worker profiles
for _, workerProfile := range mon.oc.Properties.WorkerProfiles {
workersubnetID, err := azure.ParseResourceID(workerProfile.SubnetID)
if err != nil {
return err
}
workerVnetID, _, err := apisubnet.Split(workerProfile.SubnetID)
if err != nil {
return err
}
workervnetId, err := azure.ParseResourceID(workerVnetID)
if err != nil {
return err
}
workerres, err := clientFactory.NewSubnetsClient().Get(ctx, workersubnetID.ResourceGroup, workervnetId.ResourceName, workersubnetID.ResourceName, &armnetwork.SubnetsClientGetOptions{Expand: nil})
if err != nil {
mon.log.Errorf("failed to finish the request: %v", err)
}
workermachinesCIDR := *workerres.Properties.AddressPrefix
if !noProxyMap[workermachinesCIDR] {
missing_no_proxy_list = append(missing_no_proxy_list, workermachinesCIDR)
}
}

// Network Configuration Check
networkConfig, err := mon.configcli.ConfigV1().Networks().Get(ctx, cluster, metav1.GetOptions{})
if err != nil {
mon.log.Errorf("Error in getting network info: %v", err)
return err
}
for _, network := range networkConfig.Spec.ClusterNetwork {
if !noProxyMap[network.CIDR] {
missing_no_proxy_list = append(missing_no_proxy_list, network.CIDR)
}
}
for _, network := range networkConfig.Spec.ServiceNetwork {
if !noProxyMap[network] {
missing_no_proxy_list = append(missing_no_proxy_list, network)
}
}

// Gateway Domains Check
clusterdetails, err := mon.arocli.AroV1alpha1().Clusters().Get(ctx, arov1alpha1.SingletonClusterName, metav1.GetOptions{})
if err != nil {
return err
}
for _, gatewayDomain := range clusterdetails.Spec.GatewayDomains {
if !noProxyMap[gatewayDomain] {
missing_no_proxy_list = append(missing_no_proxy_list, gatewayDomain)
}
}

// Infrastructure Configuration Check
infraConfig, err := mon.configcli.ConfigV1().Infrastructures().Get(ctx, cluster, metav1.GetOptions{})
if err != nil {
mon.log.Errorf("Error in getting network info: %v", err)
return err
}

// APIServerInternal URL Check
apiServerIntURL, err := url.Parse(infraConfig.Status.APIServerInternalURL)
if err != nil {
return err
}
apiServerIntdomain := strings.Split(apiServerIntURL.Host, ":")[0]
if !noProxyMap[apiServerIntdomain] {
missing_no_proxy_list = append(missing_no_proxy_list, apiServerIntdomain)
}

// APIServerProfile URL Check
apiServerProfileURL, err := url.Parse(mon.oc.Properties.APIServerProfile.URL)
if err != nil {
return err
}
apiServerProfiledomain := strings.Split(apiServerProfileURL.Host, ":")[0]
if !noProxyMap[apiServerProfiledomain] {
missing_no_proxy_list = append(missing_no_proxy_list, apiServerProfiledomain)
}

// ConsoleProfile URL Check
consolProfileURL, err := url.Parse(mon.oc.Properties.ConsoleProfile.URL)
if err != nil {
return err
}
consoleProfiledomain := strings.Split(consolProfileURL.Host, ":")[0]
if !noProxyMap[consolProfileURL.Host] {
missing_no_proxy_list = append(missing_no_proxy_list, consoleProfiledomain)
}
if len(missing_no_proxy_list) > 0 {
mon.emitAndLogCWPStatus(true, "CWP enabled but missing "+strings.Join(missing_no_proxy_list, ",")+" in the no_proxy list")
}
}

return nil
}
78 changes: 78 additions & 0 deletions pkg/monitor/cluster/clusterwideproxystatus_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package cluster

Check failure on line 1 in pkg/monitor/cluster/clusterwideproxystatus_test.go

View workflow job for this annotation

GitHub Actions / validate-go

group 3: mixed import type

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.
import (
"context"
"sync"
"testing"

"github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
"go.uber.org/mock/gomock"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics"
configv1 "github.com/openshift/api/config/v1"
configfake "github.com/openshift/client-go/config/clientset/versioned/fake"
)

// Test cases for emitCWPStatus
func TestEmitCWPStatus(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
mockMetrics := mock_metrics.NewMockEmitter(ctrl)

fakeConfigClient := configfake.NewSimpleClientset()

mon := &Monitor{
configcli: fakeConfigClient,
m: mockMetrics, // Assign the mock emitter here
log: logrus.NewEntry(logrus.New()),
wg: &sync.WaitGroup{},
}

t.Run("no proxy configured", func(t *testing.T) {
proxy := &configv1.Proxy{
ObjectMeta: metav1.ObjectMeta{Name: "cluster"},
Spec: configv1.ProxySpec{},
}
_, _ = fakeConfigClient.ConfigV1().Proxies().Create(context.Background(), proxy, metav1.CreateOptions{})

mockMetrics.EXPECT().
EmitGauge("clusterWideProxy.status", int64(1), gomock.Any()).
Times(1)

err := mon.emitCWPStatus(context.Background())

require.NoError(t, err)
})

t.Run("missing mandatory no_proxy entries", func(t *testing.T) {
proxy := &configv1.Proxy{
ObjectMeta: metav1.ObjectMeta{Name: "cluster"},
Spec: configv1.ProxySpec{
NoProxy: "localhost,.svc,.cluster.local",
},
}
_, _ = fakeConfigClient.ConfigV1().Proxies().Create(context.Background(), proxy, metav1.CreateOptions{})

mockMetrics.EXPECT().
EmitGauge("clusterWideProxy.status", int64(1), gomock.Any()).
Times(1)

err := mon.emitCWPStatus(context.Background())

require.NoError(t, err)
})

t.Run("error fetching proxy configuration", func(t *testing.T) {
brokenFakeConfigClient := configfake.NewSimpleClientset()
mon.configcli = brokenFakeConfigClient

err := mon.emitCWPStatus(context.Background())

require.Error(t, err)
require.Contains(t, err.Error(), "not found")
})
}
2 changes: 1 addition & 1 deletion pkg/monitor/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ func (mon *monitor) workOne(ctx context.Context, log *logrus.Entry, doc *api.Ope

nsgMon := nsg.NewMonitor(log, doc.OpenShiftCluster, mon.env, sub.ID, sub.Subscription.Properties.TenantID, mon.clusterm, dims, &wg, nsgMonTicker.C)

c, err := cluster.NewMonitor(log, restConfig, doc.OpenShiftCluster, mon.clusterm, hiveRestConfig, hourlyRun, &wg)
c, err := cluster.NewMonitor(log, restConfig, doc.OpenShiftCluster, mon.env, sub.Subscription.Properties.TenantID, mon.clusterm, hiveRestConfig, hourlyRun, &wg)
if err != nil {
log.Error(err)
mon.m.EmitGauge("monitor.cluster.failedworker", 1, map[string]string{
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ var _ = Describe("Monitor", func() {
wg.Add(1)
mon, err := cluster.NewMonitor(log, clients.RestConfig, &api.OpenShiftCluster{
ID: resourceIDFromEnv(),
}, &noop.Noop{}, nil, true, &wg)
}, nil, "", &noop.Noop{}, nil, true, &wg)
Expect(err).NotTo(HaveOccurred())

By("running the monitor once")
Expand Down
Loading

0 comments on commit ceb2f21

Please sign in to comment.