From 93cce33614f97ab6a861de22b2bfba4d27614391 Mon Sep 17 00:00:00 2001 From: "Johannes M. Scheuermann" <jscheuermann@apple.com> Date: Mon, 3 Jun 2024 14:34:53 +0200 Subject: [PATCH 1/4] Prefer processes in coordinator selection for multi-region clusters --- .../foundationdb_database_configuration.go | 53 +++ controllers/change_coordinators.go | 3 +- controllers/change_coordinators_test.go | 449 ++++++++---------- controllers/generate_initial_cluster_file.go | 6 +- internal/locality/locality.go | 50 +- internal/locality/locality_test.go | 428 +++++++++++++---- internal/test_helper.go | 14 + 7 files changed, 660 insertions(+), 343 deletions(-) diff --git a/api/v1beta2/foundationdb_database_configuration.go b/api/v1beta2/foundationdb_database_configuration.go index 5368a4fb7..a5e651efe 100644 --- a/api/v1beta2/foundationdb_database_configuration.go +++ b/api/v1beta2/foundationdb_database_configuration.go @@ -24,6 +24,7 @@ import ( "encoding/json" "fmt" "html/template" + "math" "reflect" "sort" "strconv" @@ -562,6 +563,58 @@ func (configuration DatabaseConfiguration) GetNextConfigurationChange(finalConfi return finalConfiguration } +// GetPrimaryDCID will return the DC ID of the primary data center. The primary data center is the one with the highest +// priority in the region configuration. +// See: https://github.com/apple/foundationdb/wiki/Multi-Region-Replication#concepts +func (configuration DatabaseConfiguration) GetPrimaryDCID() string { + var candidate string + priority := math.MinInt + + for _, region := range configuration.Regions { + for _, dataCenter := range region.DataCenters { + if dataCenter.Satellite != 0 { + continue + } + + if dataCenter.Priority > priority { + candidate = dataCenter.ID + priority = dataCenter.Priority + } + } + } + + return candidate +} + +// GetMainDCsAndSatellites will return a set of main dcs and a set of satellites. If a dc is a main dc and a satellite +// it will only be counted as a main dc. +func (configuration DatabaseConfiguration) GetMainDCsAndSatellites() (map[string]None, map[string]None) { + mainDCs := map[string]None{} + satellites := map[string]None{} + + for _, region := range configuration.Regions { + for _, dataCenter := range region.DataCenters { + if dataCenter.Satellite == 0 { + mainDCs[dataCenter.ID] = None{} + // If the dc ID is present as satellite we will remove it. + delete(satellites, dataCenter.ID) + + // satellites + continue + } + + // If the dc ID is already present in the main dcs, we will not count this as a satellite. + if _, ok := mainDCs[dataCenter.ID]; ok { + continue + } + + satellites[dataCenter.ID] = None{} + } + } + + return mainDCs, satellites +} + func (configuration DatabaseConfiguration) getRegionPriorities() map[string]int { priorities := make(map[string]int, len(configuration.Regions)) diff --git a/controllers/change_coordinators.go b/controllers/change_coordinators.go index 78dde2698..293571f9c 100644 --- a/controllers/change_coordinators.go +++ b/controllers/change_coordinators.go @@ -184,7 +184,8 @@ func selectCoordinators(logger logr.Logger, cluster *fdbv1beta2.FoundationDBClus } coordinators, err := locality.ChooseDistributedProcesses(cluster, candidates, coordinatorCount, locality.ProcessSelectionConstraint{ - HardLimits: locality.GetHardLimits(cluster), + HardLimits: locality.GetHardLimits(cluster), + SelectingCoordinators: true, }) logger.Info("Current coordinators", "coordinators", coordinators, "error", err) diff --git a/controllers/change_coordinators_test.go b/controllers/change_coordinators_test.go index 2c54a1efd..8fa563e04 100644 --- a/controllers/change_coordinators_test.go +++ b/controllers/change_coordinators_test.go @@ -83,7 +83,7 @@ var _ = Describe("Change coordinators", func() { When("all processes are healthy", func() { It("should only select storage processes", func() { Expect(cluster.DesiredCoordinatorCount()).To(BeNumerically("==", 3)) - Expect(len(candidates)).To(BeNumerically("==", cluster.DesiredCoordinatorCount())) + Expect(candidates).To(HaveLen(cluster.DesiredCoordinatorCount())) // Only select Storage processes since we select 3 processes and we have 4 storage processes for _, candidate := range candidates { @@ -104,7 +104,7 @@ var _ = Describe("Change coordinators", func() { It("should only select storage processes and exclude the removed process", func() { Expect(cluster.DesiredCoordinatorCount()).To(BeNumerically("==", 3)) - Expect(len(candidates)).To(BeNumerically("==", cluster.DesiredCoordinatorCount())) + Expect(candidates).To(HaveLen(cluster.DesiredCoordinatorCount())) // Only select Storage processes since we select 3 processes and we have 4 storage processes for _, candidate := range candidates { @@ -147,7 +147,7 @@ var _ = Describe("Change coordinators", func() { It("should select 2 storage processes and 1 TLog", func() { Expect(cluster.DesiredCoordinatorCount()).To(BeNumerically("==", 3)) - Expect(len(candidates)).To(BeNumerically("==", cluster.DesiredCoordinatorCount())) + Expect(candidates).To(HaveLen(cluster.DesiredCoordinatorCount())) // Only select Storage processes since we select 3 processes and we have 4 storage processes storageCnt := 0 @@ -195,7 +195,7 @@ var _ = Describe("Change coordinators", func() { It("should only select log processes", func() { Expect(cluster.DesiredCoordinatorCount()).To(BeNumerically("==", 3)) - Expect(len(candidates)).To(BeNumerically("==", cluster.DesiredCoordinatorCount())) + Expect(candidates).To(HaveLen(cluster.DesiredCoordinatorCount())) // Only select Storage processes since we select 3 processes and we have 4 storage processes for _, candidate := range candidates { @@ -227,9 +227,8 @@ var _ = Describe("Change coordinators", func() { var candidates []locality.Info var excludes []string var removals []fdbv1beta2.ProcessGroupID - var dcCnt int - var satCnt int var shouldFail bool + var primaryID string BeforeEach(func() { // ensure a clean state @@ -241,14 +240,11 @@ var _ = Describe("Change coordinators", func() { JustBeforeEach(func() { cluster.Spec.DatabaseConfiguration.UsableRegions = 2 - cluster.Spec.DataCenter = "dc0" cluster.Spec.ProcessGroupsToRemove = removals - setDatabaseConfiguration(cluster, satCnt) - var err error status, err = adminClient.GetStatus() Expect(err).NotTo(HaveOccurred()) - status.Cluster.Processes = generateProcessInfoForMultiRegion(dcCnt, satCnt, excludes) + status.Cluster.Processes = generateProcessInfoForMultiRegion(cluster.Spec.DatabaseConfiguration, excludes, cluster.GetRunningVersion()) candidates, err = selectCoordinators(testLogger, cluster, status) if shouldFail { @@ -260,23 +256,58 @@ var _ = Describe("Change coordinators", func() { When("using 2 dcs with 1 satellite", func() { BeforeEach(func() { - dcCnt = 2 - satCnt = 1 + primaryID = internal.GenerateRandomString(10) + remoteID := internal.GenerateRandomString(10) + satelliteID := internal.GenerateRandomString(10) + cluster.Spec.DataCenter = primaryID + cluster.Spec.DatabaseConfiguration.Regions = []fdbv1beta2.Region{ + { + DataCenters: []fdbv1beta2.DataCenter{ + { + ID: primaryID, + Priority: 1, + }, + { + ID: satelliteID, + Satellite: 1, + Priority: 1, + }, + { + ID: remoteID, + Satellite: 1, + }, + }, + }, + { + DataCenters: []fdbv1beta2.DataCenter{ + { + ID: remoteID, + }, + { + ID: satelliteID, + Satellite: 1, + Priority: 1, + }, + { + ID: primaryID, + Satellite: 1, + }, + }, + }, + } }) When("all processes are healthy", func() { It("should only select storage processes in primary and remote and Tlog in satellite", func() { Expect(cluster.DesiredCoordinatorCount()).To(BeNumerically("==", 9)) - Expect(len(candidates)).To(BeNumerically("==", cluster.DesiredCoordinatorCount())) + Expect(candidates).To(HaveLen(cluster.DesiredCoordinatorCount())) // Only select Storage processes since we select 3 processes and we have 4 storage processes storageCnt := 0 logCnt := 0 - zoneCnt := map[string]int{} + dcDistribution := map[string]int{} for _, candidate := range candidates { - zone := strings.Split(candidate.ID, "-")[0] - zoneCnt[zone]++ - + dcDistribution[candidate.LocalityData[fdbv1beta2.FDBLocalityDCIDKey]]++ if candidate.Class == fdbv1beta2.ProcessClassStorage { storageCnt++ } @@ -290,10 +321,9 @@ var _ = Describe("Change coordinators", func() { Expect(storageCnt).To(BeNumerically("==", 6)) Expect(logCnt).To(BeNumerically("==", 3)) // We should have 3 different zones - Expect(len(zoneCnt)).To(BeNumerically("==", 3)) - - for _, zoneVal := range zoneCnt { - Expect(zoneVal).To(BeNumerically("==", 3)) + Expect(len(dcDistribution)).To(BeNumerically("==", 3)) + for _, dcCount := range dcDistribution { + Expect(dcCount).To(BeNumerically("==", 3)) } }) }) @@ -301,29 +331,27 @@ var _ = Describe("Change coordinators", func() { When("some processes are excluded", func() { BeforeEach(func() { excludes = []string{ - "dc0-storage-1", - "dc0-storage-2", - "dc0-storage-3", - "dc0-storage-4", + primaryID + "-storage-1", + primaryID + "-storage-2", + primaryID + "-storage-3", + primaryID + "-storage-4", } }) It("should only select storage processes in primary and remote and Tlog in satellites and processes that are not excluded", func() { Expect(cluster.DesiredCoordinatorCount()).To(BeNumerically("==", 9)) - Expect(len(candidates)).To(BeNumerically("==", cluster.DesiredCoordinatorCount())) + Expect(candidates).To(HaveLen(cluster.DesiredCoordinatorCount())) // Only select Storage processes since we select 3 processes and we have 4 storage processes storageCnt := 0 logCnt := 0 - zoneCnt := map[string]int{} + dcDistribution := map[string]int{} for _, candidate := range candidates { for _, excluded := range excludes { Expect(candidate.ID).NotTo(Equal(excluded)) } - zone := strings.Split(candidate.ID, "-")[0] - zoneCnt[zone]++ - + dcDistribution[candidate.LocalityData[fdbv1beta2.FDBLocalityDCIDKey]]++ if candidate.Class == fdbv1beta2.ProcessClassStorage { storageCnt++ } @@ -337,10 +365,9 @@ var _ = Describe("Change coordinators", func() { Expect(storageCnt).To(BeNumerically("==", 6)) Expect(logCnt).To(BeNumerically("==", 3)) // We should have 3 different zones - Expect(len(zoneCnt)).To(BeNumerically("==", 3)) - - for _, zoneVal := range zoneCnt { - Expect(zoneVal).To(BeNumerically("==", 3)) + Expect(len(dcDistribution)).To(BeNumerically("==", 3)) + for _, dcCount := range dcDistribution { + Expect(dcCount).To(BeNumerically("==", 3)) } }) }) @@ -348,29 +375,27 @@ var _ = Describe("Change coordinators", func() { When("some processes are removed", func() { BeforeEach(func() { removals = []fdbv1beta2.ProcessGroupID{ - "dc0-storage-1", - "dc0-storage-2", - "dc0-storage-3", - "dc0-storage-4", + fdbv1beta2.ProcessGroupID(primaryID + "-storage-1"), + fdbv1beta2.ProcessGroupID(primaryID + "-storage-2"), + fdbv1beta2.ProcessGroupID(primaryID + "-storage-3"), + fdbv1beta2.ProcessGroupID(primaryID + "-storage-4"), } }) It("should only select storage processes in primary and remote and Tlog in satellites and processes that are not removed", func() { Expect(cluster.DesiredCoordinatorCount()).To(BeNumerically("==", 9)) - Expect(len(candidates)).To(BeNumerically("==", cluster.DesiredCoordinatorCount())) + Expect(candidates).To(HaveLen(cluster.DesiredCoordinatorCount())) // Only select Storage processes since we select 3 processes and we have 4 storage processes storageCnt := 0 logCnt := 0 - zoneCnt := map[string]int{} + dcDistribution := map[string]int{} for _, candidate := range candidates { for _, removed := range removals { Expect(candidate.ID).NotTo(Equal(removed)) } - zone := strings.Split(candidate.ID, "-")[0] - zoneCnt[zone]++ - + dcDistribution[candidate.LocalityData[fdbv1beta2.FDBLocalityDCIDKey]]++ if candidate.Class == fdbv1beta2.ProcessClassStorage { storageCnt++ } @@ -384,10 +409,10 @@ var _ = Describe("Change coordinators", func() { Expect(storageCnt).To(BeNumerically("==", 6)) Expect(logCnt).To(BeNumerically("==", 3)) // We should have 3 different zones - Expect(len(zoneCnt)).To(BeNumerically("==", 3)) + Expect(len(dcDistribution)).To(BeNumerically("==", 3)) - for _, zoneVal := range zoneCnt { - Expect(zoneVal).To(BeNumerically("==", 3)) + for _, dcCount := range dcDistribution { + Expect(dcCount).To(BeNumerically("==", 3)) } }) }) @@ -395,18 +420,18 @@ var _ = Describe("Change coordinators", func() { When("all processes in a dc are excluded", func() { BeforeEach(func() { excludes = []string{ - "dc0-storage-0", - "dc0-storage-1", - "dc0-storage-2", - "dc0-storage-3", - "dc0-storage-4", - "dc0-storage-5", - "dc0-storage-6", - "dc0-storage-7", - "dc0-log-0", - "dc0-log-1", - "dc0-log-2", - "dc0-log-3", + primaryID + "-storage-0", + primaryID + "-storage-1", + primaryID + "-storage-2", + primaryID + "-storage-3", + primaryID + "-storage-4", + primaryID + "-storage-5", + primaryID + "-storage-6", + primaryID + "-storage-7", + primaryID + "-log-0", + primaryID + "-log-1", + primaryID + "-log-2", + primaryID + "-log-3", } shouldFail = true @@ -432,23 +457,60 @@ var _ = Describe("Change coordinators", func() { When("using 2 dcs and 2 satellites", func() { BeforeEach(func() { - dcCnt = 2 - satCnt = 2 + primaryID = internal.GenerateRandomString(10) + remoteID := internal.GenerateRandomString(10) + primarySatelliteID := internal.GenerateRandomString(10) + remoteSatelliteID := internal.GenerateRandomString(10) + + cluster.Spec.DataCenter = primaryID + cluster.Spec.DatabaseConfiguration.Regions = []fdbv1beta2.Region{ + { + DataCenters: []fdbv1beta2.DataCenter{ + { + ID: primaryID, + Priority: 1, + }, + { + ID: primarySatelliteID, + Satellite: 1, + Priority: 1, + }, + { + ID: remoteSatelliteID, + Satellite: 1, + }, + }, + }, + { + DataCenters: []fdbv1beta2.DataCenter{ + { + ID: remoteID, + }, + { + ID: remoteSatelliteID, + Satellite: 1, + Priority: 1, + }, + { + ID: primarySatelliteID, + Satellite: 1, + }, + }, + }, + } }) When("all processes are healthy", func() { It("should only select storage processes in primary and remote and Tlog in satellites", func() { Expect(cluster.DesiredCoordinatorCount()).To(BeNumerically("==", 9)) - Expect(len(candidates)).To(BeNumerically("==", cluster.DesiredCoordinatorCount())) + Expect(candidates).To(HaveLen(cluster.DesiredCoordinatorCount())) // Only select Storage processes since we select 3 processes and we have 4 storage processes storageCnt := 0 logCnt := 0 - zoneCnt := map[string]int{} + dcDistribution := map[string]int{} for _, candidate := range candidates { - zone := strings.Split(candidate.ID, "-")[0] - zoneCnt[zone]++ - + dcDistribution[candidate.LocalityData[fdbv1beta2.FDBLocalityDCIDKey]]++ if candidate.Class == fdbv1beta2.ProcessClassStorage { storageCnt++ } @@ -458,13 +520,19 @@ var _ = Describe("Change coordinators", func() { } } - // We should have 3 SS in dc0 2 SS in dc1 and 2 Tlogs in sat0 and 2 Tlogs in sat1 + // We should have 3 SS in the primary dc 2 SS in the remote dc and 2 Tlogs in each satellite. Expect(storageCnt).To(BeNumerically("==", 5)) Expect(logCnt).To(BeNumerically("==", 4)) - // We should have 3 different zones - Expect(len(zoneCnt)).To(BeNumerically("==", 4)) - for _, zoneVal := range zoneCnt { - Expect(zoneVal).To(BeNumerically(">=", 2)) + Expect(primaryID).To(Equal(cluster.DesiredDatabaseConfiguration().GetPrimaryDCID())) + // We should have 4 different dcs, + Expect(dcDistribution).To(HaveLen(4)) + for dcID, dcCount := range dcDistribution { + if dcID == primaryID { + Expect(dcCount).To(BeNumerically("==", 3)) + continue + } + + Expect(dcCount).To(BeNumerically("==", 2)) } }) }) @@ -472,29 +540,26 @@ var _ = Describe("Change coordinators", func() { When("some processes are excluded", func() { BeforeEach(func() { excludes = []string{ - "dc0-storage-1", - "dc0-storage-2", - "dc0-storage-3", - "dc0-storage-4", + primaryID + "-storage-1", + primaryID + "-storage-2", + primaryID + "-storage-3", + primaryID + "-storage-4", } }) It("should only select storage processes in primary and remote and Tlog in satellites and processes that are not excluded", func() { Expect(cluster.DesiredCoordinatorCount()).To(BeNumerically("==", 9)) - Expect(len(candidates)).To(BeNumerically("==", cluster.DesiredCoordinatorCount())) + Expect(candidates).To(HaveLen(cluster.DesiredCoordinatorCount())) // Only select Storage processes since we select 3 processes and we have 4 storage processes storageCnt := 0 logCnt := 0 - zoneCnt := map[string]int{} + dcDistribution := map[string]int{} for _, candidate := range candidates { for _, excluded := range excludes { Expect(candidate.ID).NotTo(Equal(excluded)) } - - zone := strings.Split(candidate.ID, "-")[0] - zoneCnt[zone]++ - + dcDistribution[candidate.LocalityData[fdbv1beta2.FDBLocalityDCIDKey]]++ if candidate.Class == fdbv1beta2.ProcessClassStorage { storageCnt++ } @@ -507,10 +572,15 @@ var _ = Describe("Change coordinators", func() { // We should have 3 SS in dc0 2 SS in dc1 and 2 Tlogs in sat0 and 2 Tlogs in sat1 Expect(storageCnt).To(BeNumerically("==", 5)) Expect(logCnt).To(BeNumerically("==", 4)) - // We should have 3 different zones - Expect(len(zoneCnt)).To(BeNumerically("==", 4)) - for _, zoneVal := range zoneCnt { - Expect(zoneVal).To(BeNumerically(">=", 2)) + // We should have 4 different dcs, + Expect(dcDistribution).To(HaveLen(4)) + for dcID, dcCount := range dcDistribution { + if dcID == primaryID { + Expect(dcCount).To(BeNumerically("==", 3)) + continue + } + + Expect(dcCount).To(BeNumerically("==", 2)) } }) }) @@ -518,29 +588,27 @@ var _ = Describe("Change coordinators", func() { When("some processes are removed", func() { BeforeEach(func() { removals = []fdbv1beta2.ProcessGroupID{ - "dc0-storage-1", - "dc0-storage-2", - "dc0-storage-3", - "dc0-storage-4", + fdbv1beta2.ProcessGroupID(primaryID + "-storage-1"), + fdbv1beta2.ProcessGroupID(primaryID + "-storage-2"), + fdbv1beta2.ProcessGroupID(primaryID + "-storage-3"), + fdbv1beta2.ProcessGroupID(primaryID + "-storage-4"), } }) It("should only select storage processes in primary and remote and Tlog in satellites and processes that are not removed", func() { Expect(cluster.DesiredCoordinatorCount()).To(BeNumerically("==", 9)) - Expect(len(candidates)).To(BeNumerically("==", cluster.DesiredCoordinatorCount())) + Expect(candidates).To(HaveLen(cluster.DesiredCoordinatorCount())) // Only select Storage processes since we select 3 processes and we have 4 storage processes storageCnt := 0 logCnt := 0 - zoneCnt := map[string]int{} + dcDistribution := map[string]int{} for _, candidate := range candidates { for _, removed := range removals { Expect(candidate.ID).NotTo(Equal(removed)) } - zone := strings.Split(candidate.ID, "-")[0] - zoneCnt[zone]++ - + dcDistribution[candidate.LocalityData[fdbv1beta2.FDBLocalityDCIDKey]]++ if candidate.Class == fdbv1beta2.ProcessClassStorage { storageCnt++ } @@ -553,10 +621,15 @@ var _ = Describe("Change coordinators", func() { // We should have 3 SS in dc0 2 SS in dc1 and 2 Tlogs each in sat0 and in sat1 Expect(storageCnt).To(BeNumerically("==", 5)) Expect(logCnt).To(BeNumerically("==", 4)) - // We should have 3 different zones - Expect(len(zoneCnt)).To(BeNumerically("==", 4)) - for _, zoneVal := range zoneCnt { - Expect(zoneVal).To(BeNumerically(">=", 2)) + // We should have 4 different dcs. + Expect(dcDistribution).To(HaveLen(4)) + for dcID, dcCount := range dcDistribution { + if dcID == primaryID { + Expect(dcCount).To(BeNumerically("==", 3)) + continue + } + + Expect(dcCount).To(BeNumerically("==", 2)) } }) }) @@ -564,54 +637,26 @@ var _ = Describe("Change coordinators", func() { When("all processes in a dc are excluded", func() { BeforeEach(func() { excludes = []string{ - "dc0-storage-0", - "dc0-storage-1", - "dc0-storage-2", - "dc0-storage-3", - "dc0-storage-4", - "dc0-storage-5", - "dc0-storage-6", - "dc0-storage-7", - "dc0-log-0", - "dc0-log-1", - "dc0-log-2", - "dc0-log-3", + primaryID + "-storage-0", + primaryID + "-storage-1", + primaryID + "-storage-2", + primaryID + "-storage-3", + primaryID + "-storage-4", + primaryID + "-storage-5", + primaryID + "-storage-6", + primaryID + "-storage-7", + primaryID + "-log-0", + primaryID + "-log-1", + primaryID + "-log-2", + primaryID + "-log-3", } + + shouldFail = true }) It("should select 3 processes in each remaining dc", func() { Expect(cluster.DesiredCoordinatorCount()).To(BeNumerically("==", 9)) - Expect(len(candidates)).To(BeNumerically("==", cluster.DesiredCoordinatorCount())) - - // Only select Storage processes since we select 3 processes and we have 4 storage processes - storageCnt := 0 - logCnt := 0 - zoneCnt := map[string]int{} - for _, candidate := range candidates { - for _, excluded := range excludes { - Expect(candidate.ID).NotTo(Equal(excluded)) - } - - zone := strings.Split(candidate.ID, "-")[0] - zoneCnt[zone]++ - - if candidate.Class == fdbv1beta2.ProcessClassStorage { - storageCnt++ - } - - if candidate.Class == fdbv1beta2.ProcessClassLog { - logCnt++ - } - } - - // We should have 3 SS in dc1 and 3 Tlogs each in sat0 and sat1 - Expect(storageCnt).To(BeNumerically("==", 3)) - Expect(logCnt).To(BeNumerically("==", 6)) - // We should have 3 different zones - Expect(len(zoneCnt)).To(BeNumerically("==", 3)) - for _, zoneVal := range zoneCnt { - Expect(zoneVal).To(BeNumerically("==", 3)) - } + Expect(candidates).To(BeEmpty()) }) }) @@ -641,7 +686,7 @@ var _ = Describe("Change coordinators", func() { status, err = adminClient.GetStatus() Expect(err).NotTo(HaveOccurred()) - status.Cluster.Processes = generateProcessInfoForThreeDataHall(3, nil) + status.Cluster.Processes = generateProcessInfoForThreeDataHall(3, nil, cluster.GetRunningVersion()) candidates, err = selectCoordinators(logr.Discard(), cluster, status) Expect(err).NotTo(HaveOccurred()) @@ -652,17 +697,15 @@ var _ = Describe("Change coordinators", func() { Expect(cluster.DesiredCoordinatorCount()).To(BeNumerically("==", 9)) Expect(len(candidates)).To(BeNumerically("==", cluster.DesiredCoordinatorCount())) - dataHallCount := map[string]int{} + dataHallCounts := map[string]int{} for _, candidate := range candidates { - Expect(candidate.ID).To(ContainSubstring("storage")) - dataHallCount[strings.Split(candidate.ID, "-")[0]]++ + Expect(candidate.Class).To(Equal(fdbv1beta2.ProcessClassStorage)) + dataHallCounts[candidate.LocalityData[fdbv1beta2.FDBLocalityDataHallKey]]++ } - Expect(dataHallCount).To(Equal(map[string]int{ - "datahall0": 3, - "datahall1": 3, - "datahall2": 3, - })) + for _, dataHallCount := range dataHallCounts { + Expect(dataHallCount).To(BeNumerically("==", 3)) + } }) }) @@ -681,18 +724,16 @@ var _ = Describe("Change coordinators", func() { Expect(len(candidates)).To(BeNumerically("==", cluster.DesiredCoordinatorCount())) // Only select Storage processes since we select 3 processes and we have 4 storage processes - dataHallCount := map[string]int{} + dataHallCounts := map[string]int{} for _, candidate := range candidates { Expect(candidate.ID).NotTo(Equal(removedProcess)) - Expect(candidate.ID).To(ContainSubstring("storage")) - dataHallCount[strings.Split(candidate.ID, "-")[0]]++ + Expect(candidate.Class).To(Equal(fdbv1beta2.ProcessClassStorage)) + dataHallCounts[candidate.LocalityData[fdbv1beta2.FDBLocalityDataHallKey]]++ } - Expect(dataHallCount).To(Equal(map[string]int{ - "datahall0": 3, - "datahall1": 3, - "datahall2": 3, - })) + for _, dataHallCount := range dataHallCounts { + Expect(dataHallCount).To(BeNumerically("==", 3)) + } }) }) }) @@ -1002,38 +1043,37 @@ var _ = Describe("Change coordinators", func() { ) }) -func generateProcessInfoForMultiRegion(dcCount int, satCount int, excludes []string) map[fdbv1beta2.ProcessGroupID]fdbv1beta2.FoundationDBStatusProcessInfo { +func generateProcessInfoForMultiRegion(config fdbv1beta2.DatabaseConfiguration, excludes []string, version string) map[fdbv1beta2.ProcessGroupID]fdbv1beta2.FoundationDBStatusProcessInfo { res := map[fdbv1beta2.ProcessGroupID]fdbv1beta2.FoundationDBStatusProcessInfo{} logCnt := 4 - for i := 0; i < dcCount; i++ { - dcid := fmt.Sprintf("dc%d", i) - - generateProcessInfoDetails(res, dcid, "", 8, excludes, fdbv1beta2.ProcessClassStorage) - generateProcessInfoDetails(res, dcid, "", logCnt, excludes, fdbv1beta2.ProcessClassLog) + mainDCs, satelliteDCs := config.GetMainDCsAndSatellites() + for dcID := range mainDCs { + generateProcessInfoDetails(res, dcID, "", 8, excludes, fdbv1beta2.ProcessClassStorage, version) + generateProcessInfoDetails(res, dcID, "", logCnt, excludes, fdbv1beta2.ProcessClassLog, version) } - for i := 0; i < satCount; i++ { - generateProcessInfoDetails(res, fmt.Sprintf("sat%d", i), "", logCnt, excludes, fdbv1beta2.ProcessClassLog) + for satelliteDCID := range satelliteDCs { + generateProcessInfoDetails(res, satelliteDCID, "", logCnt, excludes, fdbv1beta2.ProcessClassLog, version) } return res } -func generateProcessInfoForThreeDataHall(dataHallCount int, excludes []string) map[fdbv1beta2.ProcessGroupID]fdbv1beta2.FoundationDBStatusProcessInfo { +func generateProcessInfoForThreeDataHall(dataHallCount int, excludes []string, version string) map[fdbv1beta2.ProcessGroupID]fdbv1beta2.FoundationDBStatusProcessInfo { res := map[fdbv1beta2.ProcessGroupID]fdbv1beta2.FoundationDBStatusProcessInfo{} logCnt := 4 for i := 0; i < dataHallCount; i++ { - dataHallID := fmt.Sprintf("datahall%d", i) - generateProcessInfoDetails(res, "", dataHallID, 8, excludes, fdbv1beta2.ProcessClassStorage) - generateProcessInfoDetails(res, "", dataHallID, logCnt, excludes, fdbv1beta2.ProcessClassLog) + dataHallID := internal.GenerateRandomString(10) + generateProcessInfoDetails(res, "", dataHallID, 8, excludes, fdbv1beta2.ProcessClassStorage, version) + generateProcessInfoDetails(res, "", dataHallID, logCnt, excludes, fdbv1beta2.ProcessClassLog, version) } return res } -func generateProcessInfoDetails(res map[fdbv1beta2.ProcessGroupID]fdbv1beta2.FoundationDBStatusProcessInfo, dcID string, dataHall string, cnt int, excludes []string, pClass fdbv1beta2.ProcessClass) { +func generateProcessInfoDetails(res map[fdbv1beta2.ProcessGroupID]fdbv1beta2.FoundationDBStatusProcessInfo, dcID string, dataHall string, cnt int, excludes []string, pClass fdbv1beta2.ProcessClass, version string) { for idx := 0; idx < cnt; idx++ { excluded := false var zoneID string @@ -1058,6 +1098,7 @@ func generateProcessInfoDetails(res map[fdbv1beta2.ProcessGroupID]fdbv1beta2.Fou addr := fmt.Sprintf("1.1.1.%d:4501", len(res)) processInfo := fdbv1beta2.FoundationDBStatusProcessInfo{ ProcessClass: pClass, + Version: version, Locality: map[string]string{ fdbv1beta2.FDBLocalityInstanceIDKey: zoneID, fdbv1beta2.FDBLocalityZoneIDKey: zoneID, @@ -1081,71 +1122,3 @@ func generateProcessInfoDetails(res map[fdbv1beta2.ProcessGroupID]fdbv1beta2.Fou res[fdbv1beta2.ProcessGroupID(zoneID)] = processInfo } } - -func setDatabaseConfiguration(cluster *fdbv1beta2.FoundationDBCluster, satCnt int) { - if satCnt == 1 { - cluster.Spec.DatabaseConfiguration.Regions = []fdbv1beta2.Region{ - { - DataCenters: []fdbv1beta2.DataCenter{ - { - ID: "dc0", - }, - { - ID: "sat0", - Satellite: 1, - }, - { - ID: "dc1", - Satellite: 1, - Priority: 1, - }, - }, - }, - { - DataCenters: []fdbv1beta2.DataCenter{ - { - ID: "dc1", - }, - { - ID: "sat0", - Satellite: 1, - }, - { - ID: "dc0", - Satellite: 1, - Priority: 1, - }, - }, - }, - } - return - } - - if satCnt == 2 { - cluster.Spec.DatabaseConfiguration.Regions = []fdbv1beta2.Region{ - { - DataCenters: []fdbv1beta2.DataCenter{ - { - ID: "dc0", - }, - { - ID: "sat0", - Satellite: 1, - }, - }, - }, - { - DataCenters: []fdbv1beta2.DataCenter{ - { - ID: "dc1", - Priority: 1, - }, - { - ID: "sat1", - Satellite: 1, - }, - }, - }, - } - } -} diff --git a/controllers/generate_initial_cluster_file.go b/controllers/generate_initial_cluster_file.go index 1dee3415b..91d9a2228 100644 --- a/controllers/generate_initial_cluster_file.go +++ b/controllers/generate_initial_cluster_file.go @@ -124,10 +124,14 @@ func (g generateInitialClusterFile) reconcile(ctx context.Context, r *Foundation logger.Info("Pod is ineligible to be a coordinator due to missing locality information", "processGroupID", processGroupID) continue } + currentLocality.Priority = cluster.GetClassCandidatePriority(currentLocality.Class) processLocality = append(processLocality, currentLocality) } - coordinators, err := locality.ChooseDistributedProcesses(cluster, processLocality, count, locality.ProcessSelectionConstraint{}) + coordinators, err := locality.ChooseDistributedProcesses(cluster, processLocality, count, locality.ProcessSelectionConstraint{ + HardLimits: locality.GetHardLimits(cluster), + SelectingCoordinators: true, + }) if err != nil { return &requeue{curError: err} } diff --git a/internal/locality/locality.go b/internal/locality/locality.go index 83b1d6261..92709b71f 100644 --- a/internal/locality/locality.go +++ b/internal/locality/locality.go @@ -21,14 +21,14 @@ package locality import ( + "cmp" "errors" "fmt" - "math" - "sort" - fdbv1beta2 "github.com/FoundationDB/fdb-kubernetes-operator/api/v1beta2" "github.com/FoundationDB/fdb-kubernetes-operator/pkg/podclient" "github.com/go-logr/logr" + "math" + "slices" ) // Info captures information about a process for the purposes of @@ -56,15 +56,18 @@ type Info struct { // otherwise we get a (nearly) random result since processes are stored in a map which is by definition // not sorted and doesn't return values in a stable way. func sortLocalities(processes []Info) { - // Sort the processes for ID to ensure we have a stable input - sort.SliceStable(processes, func(i, j int) bool { + slices.SortStableFunc(processes, func(a, b Info) int { // If both have the same priority sort them by the process ID - if processes[i].Priority == processes[j].Priority { - return processes[i].ID < processes[j].ID + if a.Priority == b.Priority { + return cmp.Compare(a.ID, b.ID) + } + + // Prefer processes with a higher priority + if a.Priority > b.Priority { + return -1 } - // prefer processes with a higher priority - return processes[i].Priority > processes[j].Priority + return 1 }) } @@ -146,10 +149,12 @@ type ProcessSelectionConstraint struct { // HardLimits defines a maximum number of processes to recruit on any single // value for a given locality field. HardLimits map[string]int + + // SelectingCoordinators must be true when the ChooseDistributedProcesses is used to select coordinators. + SelectingCoordinators bool } -// ChooseDistributedProcesses recruits a maximally well-distributed set -// of processes from a set of potential candidates. +// ChooseDistributedProcesses recruits a maximally well-distributed set of processes from a set of potential candidates. func ChooseDistributedProcesses(cluster *fdbv1beta2.FoundationDBCluster, processes []Info, count int, constraint ProcessSelectionConstraint) ([]Info, error) { chosen := make([]Info, 0, count) chosenIDs := make(map[string]bool, count) @@ -180,6 +185,29 @@ func ChooseDistributedProcesses(cluster *fdbv1beta2.FoundationDBCluster, process currentLimits[field] = 1 } + if constraint.SelectingCoordinators { + // If the cluster runs in a multi-region config and has more than 3 DCs we want to make sure that the primary is + // preferred for the 3 coordinators. + // See: https://github.com/FoundationDB/fdb-kubernetes-operator/issues/2034 + if cluster.Spec.DatabaseConfiguration.UsableRegions > 1 && cluster.Spec.DatabaseConfiguration.CountUniqueDataCenters() > 3 { + primaryDC := cluster.DesiredDatabaseConfiguration().GetPrimaryDCID() + + mainDCs, satelliteDCs := cluster.Spec.DatabaseConfiguration.GetMainDCsAndSatellites() + // We increase the chosen count by 1 to make sure the primary dc will host the 3 coordinators. + for dcID := range mainDCs { + if primaryDC == dcID { + continue + } + + chosenCounts[fdbv1beta2.FDBLocalityDCIDKey][dcID]++ + } + + for dcID := range satelliteDCs { + chosenCounts[fdbv1beta2.FDBLocalityDCIDKey][dcID]++ + } + } + } + // Sort the processes to ensure a deterministic result sortLocalities(processes) diff --git a/internal/locality/locality_test.go b/internal/locality/locality_test.go index 046a7c455..12b095f32 100644 --- a/internal/locality/locality_test.go +++ b/internal/locality/locality_test.go @@ -24,7 +24,9 @@ import ( "fmt" "github.com/FoundationDB/fdb-kubernetes-operator/pkg/podclient/mock" "math" + "math/rand" "net" + "strconv" "strings" "github.com/go-logr/logr" @@ -138,107 +140,116 @@ var _ = Describe("Localities", func() { When("Sorting the localities", func() { var localities []Info - BeforeEach(func() { - localities = []Info{ - { - ID: "storage-1", - Class: fdbv1beta2.ProcessClassStorage, - }, - { - ID: "tlog-1", - Class: fdbv1beta2.ProcessClassTransaction, - }, - { - ID: "log-1", - Class: fdbv1beta2.ProcessClassLog, - }, - { - ID: "storage-51", - Class: fdbv1beta2.ProcessClassStorage, - }, - } - }) - JustBeforeEach(func() { for idx, cur := range localities { + if cur.LocalityData == nil { + cur.LocalityData = map[string]string{} + } + + cur.LocalityData[fdbv1beta2.FDBLocalityZoneIDKey] = cur.ID localities[idx] = Info{ - ID: cur.ID, - Class: cur.Class, - Priority: cluster.GetClassCandidatePriority(cur.Class), + ID: cur.ID, + Class: cur.Class, + Priority: cluster.GetClassCandidatePriority(cur.Class), + LocalityData: cur.LocalityData, } } }) - When("no other preferences are defined", func() { - BeforeEach(func() { - cluster.Spec.CoordinatorSelection = []fdbv1beta2.CoordinatorSelectionSetting{} - }) - - It("should sort the localities based on the IDs but prefer transaction system Pods", func() { - sortLocalities(localities) - - Expect(localities[0].Class).To(Equal(fdbv1beta2.ProcessClassLog)) - Expect(localities[0].ID).To(Equal("log-1")) - Expect(localities[1].Class).To(Equal(fdbv1beta2.ProcessClassTransaction)) - Expect(localities[1].ID).To(Equal("tlog-1")) - Expect(localities[2].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) - Expect(localities[2].ID).To(Equal("storage-1")) - Expect(localities[3].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) - Expect(localities[3].ID).To(Equal("storage-51")) - }) - }) - - When("when the storage class is preferred", func() { + When("only a single DC is used", func() { BeforeEach(func() { - cluster.Spec.CoordinatorSelection = []fdbv1beta2.CoordinatorSelectionSetting{ + localities = []Info{ { - ProcessClass: fdbv1beta2.ProcessClassStorage, - Priority: 10, + ID: "storage-1", + Class: fdbv1beta2.ProcessClassStorage, }, - } - }) - - It("should sort the localities based on the provided config", func() { - sortLocalities(localities) - - Expect(localities[0].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) - Expect(localities[0].ID).To(Equal("storage-1")) - Expect(localities[1].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) - Expect(localities[1].ID).To(Equal("storage-51")) - Expect(localities[2].Class).To(Equal(fdbv1beta2.ProcessClassLog)) - Expect(localities[2].ID).To(Equal("log-1")) - Expect(localities[3].Class).To(Equal(fdbv1beta2.ProcessClassTransaction)) - Expect(localities[3].ID).To(Equal("tlog-1")) - }) - }) - - When("when the storage class is preferred over transaction class", func() { - BeforeEach(func() { - cluster.Spec.CoordinatorSelection = []fdbv1beta2.CoordinatorSelectionSetting{ { - ProcessClass: fdbv1beta2.ProcessClassStorage, - Priority: 10, + ID: "tlog-1", + Class: fdbv1beta2.ProcessClassTransaction, }, { - ProcessClass: fdbv1beta2.ProcessClassTransaction, - Priority: 1, + ID: "log-1", + Class: fdbv1beta2.ProcessClassLog, + }, + { + ID: "storage-51", + Class: fdbv1beta2.ProcessClassStorage, }, } }) - It("should sort the localities based on the provided config", func() { - sortLocalities(localities) - - Expect(localities[0].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) - Expect(localities[0].ID).To(Equal("storage-1")) - Expect(localities[1].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) - Expect(localities[1].ID).To(Equal("storage-51")) - Expect(localities[2].Class).To(Equal(fdbv1beta2.ProcessClassLog)) - Expect(localities[2].ID).To(Equal("log-1")) - Expect(localities[3].Class).To(Equal(fdbv1beta2.ProcessClassTransaction)) - Expect(localities[3].ID).To(Equal("tlog-1")) + When("no other preferences are defined", func() { + BeforeEach(func() { + cluster.Spec.CoordinatorSelection = []fdbv1beta2.CoordinatorSelectionSetting{} + }) + + It("should sort the localities based on the IDs but prefer transaction system Pods", func() { + sortLocalities(localities) + + Expect(localities[0].Class).To(Equal(fdbv1beta2.ProcessClassLog)) + Expect(localities[0].ID).To(Equal("log-1")) + Expect(localities[1].Class).To(Equal(fdbv1beta2.ProcessClassTransaction)) + Expect(localities[1].ID).To(Equal("tlog-1")) + Expect(localities[2].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) + Expect(localities[2].ID).To(Equal("storage-1")) + Expect(localities[3].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) + Expect(localities[3].ID).To(Equal("storage-51")) + }) + }) + + When("when the storage class is preferred", func() { + BeforeEach(func() { + cluster.Spec.CoordinatorSelection = []fdbv1beta2.CoordinatorSelectionSetting{ + { + ProcessClass: fdbv1beta2.ProcessClassStorage, + Priority: 10, + }, + } + }) + + It("should sort the localities based on the provided config", func() { + sortLocalities(localities) + + Expect(localities[0].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) + Expect(localities[0].ID).To(Equal("storage-1")) + Expect(localities[1].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) + Expect(localities[1].ID).To(Equal("storage-51")) + Expect(localities[2].Class).To(Equal(fdbv1beta2.ProcessClassLog)) + Expect(localities[2].ID).To(Equal("log-1")) + Expect(localities[3].Class).To(Equal(fdbv1beta2.ProcessClassTransaction)) + Expect(localities[3].ID).To(Equal("tlog-1")) + }) + }) + + When("when the storage class is preferred over transaction class", func() { + BeforeEach(func() { + cluster.Spec.CoordinatorSelection = []fdbv1beta2.CoordinatorSelectionSetting{ + { + ProcessClass: fdbv1beta2.ProcessClassStorage, + Priority: 10, + }, + { + ProcessClass: fdbv1beta2.ProcessClassTransaction, + Priority: 1, + }, + } + }) + + It("should sort the localities based on the provided config", func() { + sortLocalities(localities) + + Expect(localities[0].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) + Expect(localities[0].ID).To(Equal("storage-1")) + Expect(localities[1].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) + Expect(localities[1].ID).To(Equal("storage-51")) + Expect(localities[2].Class).To(Equal(fdbv1beta2.ProcessClassLog)) + Expect(localities[2].ID).To(Equal("log-1")) + Expect(localities[3].Class).To(Equal(fdbv1beta2.ProcessClassTransaction)) + Expect(localities[3].ID).To(Equal("tlog-1")) + }) }) }) + }) Describe("chooseDistributedProcesses", func() { @@ -311,16 +322,16 @@ var _ = Describe("Localities", func() { Context("with multiple data centers", func() { BeforeEach(func() { candidates = []Info{ - {ID: "p1", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z1", "dcid": "dc1"}}, - {ID: "p2", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z1", "dcid": "dc1"}}, - {ID: "p3", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z2", "dcid": "dc1"}}, - {ID: "p4", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z3", "dcid": "dc1"}}, - {ID: "p5", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z2", "dcid": "dc1"}}, - {ID: "p6", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z4", "dcid": "dc1"}}, - {ID: "p7", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z5", "dcid": "dc1"}}, - {ID: "p8", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z6", "dcid": "dc2"}}, - {ID: "p9", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z7", "dcid": "dc2"}}, - {ID: "p10", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z8", "dcid": "dc2"}}, + {ID: "p1", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z1", fdbv1beta2.FDBLocalityDCIDKey: "dc1"}}, + {ID: "p2", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z1", fdbv1beta2.FDBLocalityDCIDKey: "dc1"}}, + {ID: "p3", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z2", fdbv1beta2.FDBLocalityDCIDKey: "dc1"}}, + {ID: "p4", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z3", fdbv1beta2.FDBLocalityDCIDKey: "dc1"}}, + {ID: "p5", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z2", fdbv1beta2.FDBLocalityDCIDKey: "dc1"}}, + {ID: "p6", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z4", fdbv1beta2.FDBLocalityDCIDKey: "dc1"}}, + {ID: "p7", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z5", fdbv1beta2.FDBLocalityDCIDKey: "dc1"}}, + {ID: "p8", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z6", fdbv1beta2.FDBLocalityDCIDKey: "dc2"}}, + {ID: "p9", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z7", fdbv1beta2.FDBLocalityDCIDKey: "dc2"}}, + {ID: "p10", LocalityData: map[string]string{fdbv1beta2.FDBLocalityZoneIDKey: "z8", fdbv1beta2.FDBLocalityDCIDKey: "dc2"}}, } }) @@ -358,6 +369,239 @@ var _ = Describe("Localities", func() { }) }) }) + + When("a multi-region cluster is used with 4 dcs", func() { + var primaryID, remoteID, primarySatelliteID, remoteSatelliteID string + + BeforeEach(func() { + // Generate random names to make sure we test different alphabetical orderings. + primaryID = internal.GenerateRandomString(10) + remoteID = internal.GenerateRandomString(10) + primarySatelliteID = internal.GenerateRandomString(10) + remoteSatelliteID = internal.GenerateRandomString(10) + + candidates = make([]Info, 12) + idx := 0 + // Make sure the result is randomized + dcIDs := []string{primaryID, primarySatelliteID, remoteID, remoteSatelliteID} + rand.Shuffle(len(dcIDs), func(i, j int) { + dcIDs[i], dcIDs[j] = dcIDs[j], dcIDs[i] + }) + + for _, dcID := range dcIDs { + for i := 0; i < 3; i++ { + candidates[idx] = Info{ + ID: strconv.Itoa(idx), + LocalityData: map[string]string{ + fdbv1beta2.FDBLocalityZoneIDKey: dcID + "-z" + strconv.Itoa(i), + fdbv1beta2.FDBLocalityDCIDKey: dcID, + }, + } + + idx++ + } + } + + cluster.Spec.DatabaseConfiguration = fdbv1beta2.DatabaseConfiguration{ + UsableRegions: 2, + Regions: []fdbv1beta2.Region{ + { + DataCenters: []fdbv1beta2.DataCenter{ + { + ID: primaryID, + Priority: 1, + }, + { + ID: primarySatelliteID, + Priority: 1, + Satellite: 1, + }, + { + ID: remoteSatelliteID, + Priority: 0, + Satellite: 1, + }, + }, + }, + { + DataCenters: []fdbv1beta2.DataCenter{ + { + ID: remoteID, + Priority: 0, + }, + { + ID: remoteSatelliteID, + Priority: 1, + Satellite: 1, + }, + { + ID: primarySatelliteID, + Priority: 0, + Satellite: 1, + }, + }, + }, + }, + } + + result, err = ChooseDistributedProcesses(cluster, candidates, cluster.DesiredCoordinatorCount(), ProcessSelectionConstraint{ + HardLimits: GetHardLimits(cluster), + SelectingCoordinators: true, + }) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should recruit the processes across multiple dcs and prefer the primary", func() { + Expect(len(result)).To(Equal(9)) + + dcCount := map[string]int{} + for _, process := range result { + dcCount[process.LocalityData[fdbv1beta2.FDBLocalityDCIDKey]]++ + } + + Expect(dcCount).To(Equal(map[string]int{ + primaryID: 3, + remoteID: 2, + remoteSatelliteID: 2, + primarySatelliteID: 2, + })) + }) + }) + + When("a multi-region cluster is used with 4 dcs and storage processes should be preferred", func() { + var primaryID, remoteID, primarySatelliteID, remoteSatelliteID string + + BeforeEach(func() { + // Generate random names to make sure we test different alphabetical orderings. + primaryID = internal.GenerateRandomString(10) + remoteID = internal.GenerateRandomString(10) + primarySatelliteID = internal.GenerateRandomString(10) + remoteSatelliteID = internal.GenerateRandomString(10) + + cluster.Spec.CoordinatorSelection = []fdbv1beta2.CoordinatorSelectionSetting{ + { + ProcessClass: fdbv1beta2.ProcessClassStorage, + Priority: math.MaxInt32, + }, + { + ProcessClass: fdbv1beta2.ProcessClassLog, + Priority: 0, + }, + } + candidates = make([]Info, 18) + idx := 0 + for _, dcID := range []string{primaryID, primarySatelliteID, remoteID, remoteSatelliteID} { + for i := 0; i < 3; i++ { + candidates[idx] = Info{ + ID: strconv.Itoa(idx), + Class: fdbv1beta2.ProcessClassLog, + LocalityData: map[string]string{ + fdbv1beta2.FDBLocalityZoneIDKey: dcID + "-z1" + strconv.Itoa(i), + fdbv1beta2.FDBLocalityDCIDKey: dcID, + }, + Priority: cluster.GetClassCandidatePriority(fdbv1beta2.ProcessClassLog), + } + + idx++ + } + + if dcID == primarySatelliteID || dcID == remoteSatelliteID { + continue + } + + for i := 0; i < 3; i++ { + candidates[idx] = Info{ + ID: strconv.Itoa(idx), + Class: fdbv1beta2.ProcessClassStorage, + LocalityData: map[string]string{ + fdbv1beta2.FDBLocalityZoneIDKey: dcID + "-z2" + strconv.Itoa(i), + fdbv1beta2.FDBLocalityDCIDKey: dcID, + }, + Priority: cluster.GetClassCandidatePriority(fdbv1beta2.ProcessClassStorage), + } + + idx++ + } + } + + cluster.Spec.DatabaseConfiguration = fdbv1beta2.DatabaseConfiguration{ + UsableRegions: 2, + Regions: []fdbv1beta2.Region{ + { + DataCenters: []fdbv1beta2.DataCenter{ + { + ID: primaryID, + Priority: 1, + }, + { + ID: primarySatelliteID, + Priority: 1, + Satellite: 1, + }, + { + ID: remoteSatelliteID, + Priority: 0, + Satellite: 1, + }, + }, + }, + { + DataCenters: []fdbv1beta2.DataCenter{ + { + ID: remoteID, + Priority: 0, + }, + { + ID: remoteSatelliteID, + Priority: 1, + Satellite: 1, + }, + { + ID: primarySatelliteID, + Priority: 0, + Satellite: 1, + }, + }, + }, + }, + } + + result, err = ChooseDistributedProcesses(cluster, candidates, cluster.DesiredCoordinatorCount(), ProcessSelectionConstraint{ + HardLimits: GetHardLimits(cluster), + SelectingCoordinators: true, + }) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should recruit the processes across multiple dcs and prefer the primary", func() { + Expect(len(result)).To(Equal(9)) + var storageCnt int + var logCnt int + + dcCount := map[string]int{} + for _, process := range result { + dcCount[process.LocalityData[fdbv1beta2.FDBLocalityDCIDKey]]++ + if process.Class == fdbv1beta2.ProcessClassLog { + logCnt++ + continue + } + + storageCnt++ + } + + Expect(dcCount).To(Equal(map[string]int{ + primaryID: 3, + remoteID: 2, + remoteSatelliteID: 2, + primarySatelliteID: 2, + })) + + // 5 storage processes should be recruited in the main and remote dc + Expect(storageCnt).To(BeNumerically("==", 5)) + // the satellites only have logs, so only logs are recruited + Expect(logCnt).To(BeNumerically("==", 4)) + }) + }) }) DescribeTable("when getting the hard limits", func(cluster *fdbv1beta2.FoundationDBCluster, expected map[string]int) { diff --git a/internal/test_helper.go b/internal/test_helper.go index e4df48902..077ba1325 100644 --- a/internal/test_helper.go +++ b/internal/test_helper.go @@ -25,6 +25,8 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/pointer" + "math/rand" + "strings" ) // CreateDefaultCluster creates a default FoundationDBCluster for testing @@ -104,3 +106,15 @@ func GetProcessGroup(cluster *fdbv1beta2.FoundationDBCluster, processClass fdbv1 ProcessGroupID: processGroupID, } } + +const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + +// GenerateRandomString can be used to generate a random string with length n +func GenerateRandomString(n int) string { + var res strings.Builder + for i := 0; i < n; i++ { + res.WriteByte(letterBytes[rand.Intn(len(letterBytes))]) + } + + return res.String() +} From a197e87d6471f2012571ab6f8057bd94583ec0f3 Mon Sep 17 00:00:00 2001 From: "Johannes M. Scheuermann" <jscheuermann@apple.com> Date: Tue, 4 Jun 2024 11:56:23 +0200 Subject: [PATCH 2/4] Sort localities by primary DC if present --- controllers/change_coordinators_test.go | 16 +- internal/locality/locality.go | 46 ++-- internal/locality/locality_test.go | 267 ++++++++++++++++++++---- 3 files changed, 258 insertions(+), 71 deletions(-) diff --git a/controllers/change_coordinators_test.go b/controllers/change_coordinators_test.go index 8fa563e04..7a539cf7c 100644 --- a/controllers/change_coordinators_test.go +++ b/controllers/change_coordinators_test.go @@ -650,13 +650,23 @@ var _ = Describe("Change coordinators", func() { primaryID + "-log-2", primaryID + "-log-3", } - - shouldFail = true }) It("should select 3 processes in each remaining dc", func() { Expect(cluster.DesiredCoordinatorCount()).To(BeNumerically("==", 9)) - Expect(candidates).To(BeEmpty()) + Expect(candidates).NotTo(BeEmpty()) + + dcDistribution := map[string]int{} + for _, candidate := range candidates { + dcDistribution[candidate.LocalityData[fdbv1beta2.FDBLocalityDCIDKey]]++ + } + + // We should have 3 different dcs as the primary has no valid processes. + Expect(dcDistribution).To(HaveLen(3)) + for dcID, dcCount := range dcDistribution { + Expect(dcID).NotTo(Equal(primaryID)) + Expect(dcCount).To(BeNumerically("==", 3)) + } }) }) diff --git a/internal/locality/locality.go b/internal/locality/locality.go index 92709b71f..18b194f70 100644 --- a/internal/locality/locality.go +++ b/internal/locality/locality.go @@ -55,8 +55,23 @@ type Info struct { // We have to do this to ensure we get a deterministic result for selecting the candidates // otherwise we get a (nearly) random result since processes are stored in a map which is by definition // not sorted and doesn't return values in a stable way. -func sortLocalities(processes []Info) { +func sortLocalities(primaryDC string, processes []Info) { slices.SortStableFunc(processes, func(a, b Info) int { + if primaryDC != "" { + aDCLocality := a.LocalityData[fdbv1beta2.FDBLocalityDCIDKey] + bDCLocality := b.LocalityData[fdbv1beta2.FDBLocalityDCIDKey] + + if aDCLocality != bDCLocality { + if aDCLocality == primaryDC { + return -1 + } + + if bDCLocality == primaryDC { + return 1 + } + } + } + // If both have the same priority sort them by the process ID if a.Priority == b.Priority { return cmp.Compare(a.ID, b.ID) @@ -158,6 +173,7 @@ type ProcessSelectionConstraint struct { func ChooseDistributedProcesses(cluster *fdbv1beta2.FoundationDBCluster, processes []Info, count int, constraint ProcessSelectionConstraint) ([]Info, error) { chosen := make([]Info, 0, count) chosenIDs := make(map[string]bool, count) + primaryDC := cluster.DesiredDatabaseConfiguration().GetPrimaryDCID() fields := constraint.Fields if len(fields) == 0 { @@ -185,31 +201,8 @@ func ChooseDistributedProcesses(cluster *fdbv1beta2.FoundationDBCluster, process currentLimits[field] = 1 } - if constraint.SelectingCoordinators { - // If the cluster runs in a multi-region config and has more than 3 DCs we want to make sure that the primary is - // preferred for the 3 coordinators. - // See: https://github.com/FoundationDB/fdb-kubernetes-operator/issues/2034 - if cluster.Spec.DatabaseConfiguration.UsableRegions > 1 && cluster.Spec.DatabaseConfiguration.CountUniqueDataCenters() > 3 { - primaryDC := cluster.DesiredDatabaseConfiguration().GetPrimaryDCID() - - mainDCs, satelliteDCs := cluster.Spec.DatabaseConfiguration.GetMainDCsAndSatellites() - // We increase the chosen count by 1 to make sure the primary dc will host the 3 coordinators. - for dcID := range mainDCs { - if primaryDC == dcID { - continue - } - - chosenCounts[fdbv1beta2.FDBLocalityDCIDKey][dcID]++ - } - - for dcID := range satelliteDCs { - chosenCounts[fdbv1beta2.FDBLocalityDCIDKey][dcID]++ - } - } - } - - // Sort the processes to ensure a deterministic result - sortLocalities(processes) + // Sort the processes to ensure a deterministic result. + sortLocalities(primaryDC, processes) for len(chosen) < count { choseAny := false @@ -257,6 +250,7 @@ func ChooseDistributedProcesses(cluster *fdbv1beta2.FoundationDBCluster, process break } } + if !incrementedLimits { return chosen, notEnoughProcessesError{Desired: count, Chosen: len(chosen), Options: processes} } diff --git a/internal/locality/locality_test.go b/internal/locality/locality_test.go index 12b095f32..a5bb8e371 100644 --- a/internal/locality/locality_test.go +++ b/internal/locality/locality_test.go @@ -23,16 +23,17 @@ package locality import ( "fmt" "github.com/FoundationDB/fdb-kubernetes-operator/pkg/podclient/mock" + "github.com/go-logr/logr" + "github.com/onsi/gomega/gmeasure" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/pointer" "math" "math/rand" "net" "strconv" "strings" - - "github.com/go-logr/logr" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/pointer" + "time" fdbv1beta2 "github.com/FoundationDB/fdb-kubernetes-operator/api/v1beta2" "github.com/FoundationDB/fdb-kubernetes-operator/internal" @@ -118,6 +119,27 @@ func generateDefaultStatus(tls bool) *fdbv1beta2.FoundationDBStatus { } } +func generateCandidates(dcIDs []string, processesPerDc int) []Info { + candidates := make([]Info, processesPerDc*len(dcIDs)) + + idx := 0 + for _, dcID := range dcIDs { + for i := 0; i < processesPerDc; i++ { + candidates[idx] = Info{ + ID: dcID + strconv.Itoa(i), + LocalityData: map[string]string{ + fdbv1beta2.FDBLocalityZoneIDKey: dcID + "-z" + strconv.Itoa(i), + fdbv1beta2.FDBLocalityDCIDKey: dcID, + }, + } + + idx++ + } + } + + return candidates +} + var _ = Describe("Localities", func() { var cluster *fdbv1beta2.FoundationDBCluster @@ -184,7 +206,7 @@ var _ = Describe("Localities", func() { }) It("should sort the localities based on the IDs but prefer transaction system Pods", func() { - sortLocalities(localities) + sortLocalities("", localities) Expect(localities[0].Class).To(Equal(fdbv1beta2.ProcessClassLog)) Expect(localities[0].ID).To(Equal("log-1")) @@ -208,7 +230,7 @@ var _ = Describe("Localities", func() { }) It("should sort the localities based on the provided config", func() { - sortLocalities(localities) + sortLocalities("", localities) Expect(localities[0].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) Expect(localities[0].ID).To(Equal("storage-1")) @@ -236,7 +258,7 @@ var _ = Describe("Localities", func() { }) It("should sort the localities based on the provided config", func() { - sortLocalities(localities) + sortLocalities("", localities) Expect(localities[0].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) Expect(localities[0].ID).To(Equal("storage-1")) @@ -250,6 +272,152 @@ var _ = Describe("Localities", func() { }) }) + When("a multi-region setup is used", func() { + var primaryID, remoteID, primarySatelliteID, remoteSatelliteID string + + BeforeEach(func() { + // Generate random names to make sure we test different alphabetical orderings. + primaryID = internal.GenerateRandomString(10) + remoteID = internal.GenerateRandomString(10) + primarySatelliteID = internal.GenerateRandomString(10) + remoteSatelliteID = internal.GenerateRandomString(10) + + localities = make([]Info, 12) + idx := 0 + // Make sure the result is randomized + dcIDs := []string{primaryID, primarySatelliteID, remoteID, remoteSatelliteID} + rand.Shuffle(len(dcIDs), func(i, j int) { + dcIDs[i], dcIDs[j] = dcIDs[j], dcIDs[i] + }) + + for _, dcID := range dcIDs { + for i := 0; i < 3; i++ { + pClass := fdbv1beta2.ProcessClassStorage + // The first locality will be a log process + if i == 0 { + pClass = fdbv1beta2.ProcessClassLog + } + + localities[idx] = Info{ + ID: dcID + strconv.Itoa(i), + LocalityData: map[string]string{ + fdbv1beta2.FDBLocalityZoneIDKey: dcID + "-z" + strconv.Itoa(i), + fdbv1beta2.FDBLocalityDCIDKey: dcID, + }, + Class: pClass, + } + + idx++ + } + } + + // Randomize the order of localities. + rand.Shuffle(len(localities), func(i, j int) { + localities[i], localities[j] = localities[j], localities[i] + }) + + cluster.Spec.DatabaseConfiguration = fdbv1beta2.DatabaseConfiguration{ + UsableRegions: 2, + Regions: []fdbv1beta2.Region{ + { + DataCenters: []fdbv1beta2.DataCenter{ + { + ID: primaryID, + Priority: 1, + }, + { + ID: primarySatelliteID, + Priority: 1, + Satellite: 1, + }, + { + ID: remoteSatelliteID, + Priority: 0, + Satellite: 1, + }, + }, + }, + { + DataCenters: []fdbv1beta2.DataCenter{ + { + ID: remoteID, + Priority: 0, + }, + { + ID: remoteSatelliteID, + Priority: 1, + Satellite: 1, + }, + { + ID: primarySatelliteID, + Priority: 0, + Satellite: 1, + }, + }, + }, + }, + } + }) + + JustBeforeEach(func() { + sortLocalities(primaryID, localities) + }) + + When("no other preferences are defined", func() { + BeforeEach(func() { + cluster.Spec.CoordinatorSelection = []fdbv1beta2.CoordinatorSelectionSetting{} + }) + + It("should sort the localities based on the IDs but prefer transaction system Pods", func() { + Expect(localities[0].Class).To(Equal(fdbv1beta2.ProcessClassLog)) + Expect(localities[0].LocalityData).To(HaveKeyWithValue(fdbv1beta2.FDBLocalityDCIDKey, primaryID)) + Expect(localities[1].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) + Expect(localities[1].LocalityData).To(HaveKeyWithValue(fdbv1beta2.FDBLocalityDCIDKey, primaryID)) + Expect(localities[2].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) + Expect(localities[2].LocalityData).To(HaveKeyWithValue(fdbv1beta2.FDBLocalityDCIDKey, primaryID)) + // The rest of the clusters are ordered by priority and the process ID. + Expect(localities[3].Class).To(Equal(fdbv1beta2.ProcessClassLog)) + Expect(localities[3].LocalityData).To(HaveKey(fdbv1beta2.FDBLocalityDCIDKey)) + Expect(localities[3].LocalityData[fdbv1beta2.FDBLocalityDCIDKey]).NotTo(Equal(primaryID)) + Expect(localities[4].Class).To(Equal(fdbv1beta2.ProcessClassLog)) + Expect(localities[4].LocalityData).To(HaveKey(fdbv1beta2.FDBLocalityDCIDKey)) + Expect(localities[4].LocalityData[fdbv1beta2.FDBLocalityDCIDKey]).NotTo(Equal(primaryID)) + Expect(localities[5].Class).To(Equal(fdbv1beta2.ProcessClassLog)) + Expect(localities[5].LocalityData).To(HaveKey(fdbv1beta2.FDBLocalityDCIDKey)) + Expect(localities[5].LocalityData[fdbv1beta2.FDBLocalityDCIDKey]).NotTo(Equal(primaryID)) + }) + }) + + When("when the storage class is preferred", func() { + BeforeEach(func() { + cluster.Spec.CoordinatorSelection = []fdbv1beta2.CoordinatorSelectionSetting{ + { + ProcessClass: fdbv1beta2.ProcessClassStorage, + Priority: 10, + }, + } + }) + + It("should sort the localities based on the provided config", func() { + Expect(localities[0].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) + Expect(localities[0].LocalityData).To(HaveKeyWithValue(fdbv1beta2.FDBLocalityDCIDKey, primaryID)) + Expect(localities[1].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) + Expect(localities[1].LocalityData).To(HaveKeyWithValue(fdbv1beta2.FDBLocalityDCIDKey, primaryID)) + Expect(localities[2].Class).To(Equal(fdbv1beta2.ProcessClassLog)) + Expect(localities[2].LocalityData).To(HaveKeyWithValue(fdbv1beta2.FDBLocalityDCIDKey, primaryID)) + // The rest of the clusters are ordered by priority and the process ID. + Expect(localities[3].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) + Expect(localities[3].LocalityData).To(HaveKey(fdbv1beta2.FDBLocalityDCIDKey)) + Expect(localities[3].LocalityData[fdbv1beta2.FDBLocalityDCIDKey]).NotTo(Equal(primaryID)) + Expect(localities[4].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) + Expect(localities[4].LocalityData).To(HaveKey(fdbv1beta2.FDBLocalityDCIDKey)) + Expect(localities[4].LocalityData[fdbv1beta2.FDBLocalityDCIDKey]).NotTo(Equal(primaryID)) + Expect(localities[5].Class).To(Equal(fdbv1beta2.ProcessClassStorage)) + Expect(localities[5].LocalityData).To(HaveKey(fdbv1beta2.FDBLocalityDCIDKey)) + Expect(localities[5].LocalityData[fdbv1beta2.FDBLocalityDCIDKey]).NotTo(Equal(primaryID)) + }) + }) + }) }) Describe("chooseDistributedProcesses", func() { @@ -372,6 +540,7 @@ var _ = Describe("Localities", func() { When("a multi-region cluster is used with 4 dcs", func() { var primaryID, remoteID, primarySatelliteID, remoteSatelliteID string + var dcIDs []string BeforeEach(func() { // Generate random names to make sure we test different alphabetical orderings. @@ -380,28 +549,12 @@ var _ = Describe("Localities", func() { primarySatelliteID = internal.GenerateRandomString(10) remoteSatelliteID = internal.GenerateRandomString(10) - candidates = make([]Info, 12) - idx := 0 // Make sure the result is randomized - dcIDs := []string{primaryID, primarySatelliteID, remoteID, remoteSatelliteID} + dcIDs = []string{primaryID, primarySatelliteID, remoteID, remoteSatelliteID} rand.Shuffle(len(dcIDs), func(i, j int) { dcIDs[i], dcIDs[j] = dcIDs[j], dcIDs[i] }) - for _, dcID := range dcIDs { - for i := 0; i < 3; i++ { - candidates[idx] = Info{ - ID: strconv.Itoa(idx), - LocalityData: map[string]string{ - fdbv1beta2.FDBLocalityZoneIDKey: dcID + "-z" + strconv.Itoa(i), - fdbv1beta2.FDBLocalityDCIDKey: dcID, - }, - } - - idx++ - } - } - cluster.Spec.DatabaseConfiguration = fdbv1beta2.DatabaseConfiguration{ UsableRegions: 2, Regions: []fdbv1beta2.Region{ @@ -443,28 +596,58 @@ var _ = Describe("Localities", func() { }, }, } + }) - result, err = ChooseDistributedProcesses(cluster, candidates, cluster.DesiredCoordinatorCount(), ProcessSelectionConstraint{ - HardLimits: GetHardLimits(cluster), - SelectingCoordinators: true, + When("testing the correct behavior with a small set of candidates", func() { + BeforeEach(func() { + candidates = generateCandidates(dcIDs, 5) + result, err = ChooseDistributedProcesses(cluster, candidates, cluster.DesiredCoordinatorCount(), ProcessSelectionConstraint{ + HardLimits: GetHardLimits(cluster), + SelectingCoordinators: true, + }) + Expect(err).NotTo(HaveOccurred()) }) - Expect(err).NotTo(HaveOccurred()) - }) - It("should recruit the processes across multiple dcs and prefer the primary", func() { - Expect(len(result)).To(Equal(9)) + It("should recruit the processes across multiple dcs and prefer the primary", func() { + Expect(len(result)).To(Equal(9)) - dcCount := map[string]int{} - for _, process := range result { - dcCount[process.LocalityData[fdbv1beta2.FDBLocalityDCIDKey]]++ - } + dcCount := map[string]int{} + for _, process := range result { + dcCount[process.LocalityData[fdbv1beta2.FDBLocalityDCIDKey]]++ + } - Expect(dcCount).To(Equal(map[string]int{ - primaryID: 3, - remoteID: 2, - remoteSatelliteID: 2, - primarySatelliteID: 2, - })) + Expect(dcCount).To(Equal(map[string]int{ + primaryID: 3, + remoteID: 2, + remoteSatelliteID: 2, + primarySatelliteID: 2, + })) + }) + }) + + // Adding a benchmark test for the ChooseDistributedProcesses. In order to print the set use FIt. + When("measuring the performance for", func() { + It("choose distributed processes", Serial, Label("measurement"), func() { + // Create the new experient. + experiment := gmeasure.NewExperiment("Choose Distributed Processes") + + // Register the experiment as a ReportEntry - this will cause Ginkgo's reporter infrastructure + // to print out the experiment's report and to include the experiment in any generated reports. + AddReportEntry(experiment.Name, experiment) + + // We sample a function repeatedly to get a statistically significant set of measurements. + experiment.Sample(func(_ int) { + candidates = generateCandidates(dcIDs, 250) + // Only measure the actual execution of ChooseDistributedProcesses. + experiment.MeasureDuration("ChooseDistributedProcesses", func() { + _, _ = ChooseDistributedProcesses(cluster, candidates, cluster.DesiredCoordinatorCount(), ProcessSelectionConstraint{ + HardLimits: GetHardLimits(cluster), + SelectingCoordinators: true, + }) + }) + // We'll sample the function up to 50 times or up to a minute, whichever comes first. + }, gmeasure.SamplingConfig{N: 50, Duration: time.Minute}) + }) }) }) From 0b9b6aa273f9c0806252af7b17665d75d7f4f6e0 Mon Sep 17 00:00:00 2001 From: "Johannes M. Scheuermann" <jscheuermann@apple.com> Date: Tue, 4 Jun 2024 12:10:56 +0200 Subject: [PATCH 3/4] Allow to specific the number of different zones --- internal/locality/locality_test.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/internal/locality/locality_test.go b/internal/locality/locality_test.go index a5bb8e371..512290531 100644 --- a/internal/locality/locality_test.go +++ b/internal/locality/locality_test.go @@ -119,16 +119,17 @@ func generateDefaultStatus(tls bool) *fdbv1beta2.FoundationDBStatus { } } -func generateCandidates(dcIDs []string, processesPerDc int) []Info { +func generateCandidates(dcIDs []string, processesPerDc int, numberOfZones int) []Info { candidates := make([]Info, processesPerDc*len(dcIDs)) idx := 0 for _, dcID := range dcIDs { for i := 0; i < processesPerDc; i++ { + zoneIdx := i % numberOfZones candidates[idx] = Info{ ID: dcID + strconv.Itoa(i), LocalityData: map[string]string{ - fdbv1beta2.FDBLocalityZoneIDKey: dcID + "-z" + strconv.Itoa(i), + fdbv1beta2.FDBLocalityZoneIDKey: dcID + "-z" + strconv.Itoa(zoneIdx), fdbv1beta2.FDBLocalityDCIDKey: dcID, }, } @@ -600,7 +601,7 @@ var _ = Describe("Localities", func() { When("testing the correct behavior with a small set of candidates", func() { BeforeEach(func() { - candidates = generateCandidates(dcIDs, 5) + candidates = generateCandidates(dcIDs, 5, 5) result, err = ChooseDistributedProcesses(cluster, candidates, cluster.DesiredCoordinatorCount(), ProcessSelectionConstraint{ HardLimits: GetHardLimits(cluster), SelectingCoordinators: true, @@ -637,7 +638,10 @@ var _ = Describe("Localities", func() { // We sample a function repeatedly to get a statistically significant set of measurements. experiment.Sample(func(_ int) { - candidates = generateCandidates(dcIDs, 250) + candidates = generateCandidates(dcIDs, 250, 10) + rand.Shuffle(len(candidates), func(i, j int) { + candidates[i], candidates[j] = candidates[j], candidates[i] + }) // Only measure the actual execution of ChooseDistributedProcesses. experiment.MeasureDuration("ChooseDistributedProcesses", func() { _, _ = ChooseDistributedProcesses(cluster, candidates, cluster.DesiredCoordinatorCount(), ProcessSelectionConstraint{ From b615ff24bff530d99eeddb319e31accfb1099da9 Mon Sep 17 00:00:00 2001 From: "Johannes M. Scheuermann" <jscheuermann@apple.com> Date: Tue, 4 Jun 2024 12:53:21 +0200 Subject: [PATCH 4/4] Fix tyop --- internal/locality/locality_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/locality/locality_test.go b/internal/locality/locality_test.go index 512290531..08cb10bcd 100644 --- a/internal/locality/locality_test.go +++ b/internal/locality/locality_test.go @@ -629,7 +629,7 @@ var _ = Describe("Localities", func() { // Adding a benchmark test for the ChooseDistributedProcesses. In order to print the set use FIt. When("measuring the performance for", func() { It("choose distributed processes", Serial, Label("measurement"), func() { - // Create the new experient. + // Create the new experiment. experiment := gmeasure.NewExperiment("Choose Distributed Processes") // Register the experiment as a ReportEntry - this will cause Ginkgo's reporter infrastructure