Skip to content

Commit

Permalink
[Regression] Fleet scale down didn't adhere to Packed Scheduling
Browse files Browse the repository at this point in the history
The new Fleet scale up/scale down performance enhancements removed
the functionality that the `Packed` strategy is documented to
provide -- when scaling down, removing GameServers from least used Nodes
first.

This change implements a library for GameServer sorting that doesn't
use reflection, unlike sort.Slice (much faster), and can be used
with any Comparator, as I expect we'll likely need this again.

To maintain performance as well, the set of GameServers only get sorted when
scaling down, and only for Packed strategy.
  • Loading branch information
markmandel committed Mar 12, 2019
1 parent b133e52 commit 9ba315a
Show file tree
Hide file tree
Showing 9 changed files with 159 additions and 118 deletions.
4 changes: 2 additions & 2 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ func main() {
ctlConf.MinPort, ctlConf.MaxPort, ctlConf.SidecarImage, ctlConf.AlwaysPullSidecar,
ctlConf.SidecarCPURequest, ctlConf.SidecarCPULimit,
kubeClient, kubeInformerFactory, extClient, agonesClient, agonesInformerFactory)
gsSetController := gameserversets.NewController(wh, health,
gsSetController := gameserversets.NewController(wh, health, gsCounter,
kubeClient, extClient, agonesClient, agonesInformerFactory)
fleetController := fleets.NewController(wh, health, kubeClient, extClient, agonesClient, agonesInformerFactory)
faController := fleetallocation.NewController(wh, allocationMutex,
Expand All @@ -201,7 +201,7 @@ func main() {
kubeClient, extClient, agonesClient, agonesInformerFactory)

rs = append(rs,
httpsServer, gsCounter, gsController, gsSetController, fleetController, faController, fasController, gasController, server)
httpsServer, gsCounter, gsCounter, gsController, gsSetController, fleetController, faController, fasController, gasController, server)

stop := signals.NewStopChannel()

Expand Down
50 changes: 50 additions & 0 deletions pkg/apis/stable/v1alpha1/gameserversort.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Copyright 2019 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package v1alpha1

import "sort"

// GameServerSort sorts gameservers, based on a comparator
type GameServerSort struct {
// List is the GameServer list to sort
List []*GameServer
// Less is the comparator function to use as a sorting function.
// returns true if a < b
Comparator func(a, b *GameServer) bool
}

// Sort sorts the underlying Array and returns it
func (g *GameServerSort) Sort() []*GameServer {
sort.Sort(g)
return g.List
}

// Len is the Len function for sort.Interface
func (g *GameServerSort) Len() int {
return len(g.List)
}

// Less is the Less function for sort.Interface
func (g *GameServerSort) Less(i, j int) bool {
a := g.List[i]
b := g.List[j]

return g.Comparator(a, b)
}

// Swap is the swap function for sort.Interface
func (g *GameServerSort) Swap(i, j int) {
g.List[j], g.List[i] = g.List[i], g.List[j]
}
37 changes: 37 additions & 0 deletions pkg/apis/stable/v1alpha1/gameserversort_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright 2019 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package v1alpha1

import (
"testing"

"github.com/stretchr/testify/assert"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func TestGameServerSortSort(t *testing.T) {
list := []*GameServer{{ObjectMeta: metav1.ObjectMeta{Name: "c"}},
{ObjectMeta: metav1.ObjectMeta{Name: "b"}}, {ObjectMeta: metav1.ObjectMeta{Name: "a"}}}

sort := GameServerSort{list, func(a, b *GameServer) bool {
return a.ObjectMeta.Name < b.ObjectMeta.Name
}}

result := sort.Sort()
assert.Len(t, result, len(list))
assert.Equal(t, "a", result[0].ObjectMeta.Name)
assert.Equal(t, "b", result[1].ObjectMeta.Name)
assert.Equal(t, "c", result[2].ObjectMeta.Name)
}
6 changes: 6 additions & 0 deletions pkg/gameserverallocations/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ import (
"k8s.io/client-go/tools/cache"
)

const (
defaultNs = "default"
n1 = "node1"
n2 = "node2"
)

var (
gvk = metav1.GroupVersionKind(v1alpha1.SchemeGroupVersion.WithKind("GameServerAllocation"))
)
Expand Down
21 changes: 0 additions & 21 deletions pkg/gameserverallocations/helper_test.go

This file was deleted.

39 changes: 25 additions & 14 deletions pkg/gameserversets/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package gameserversets

import (
"encoding/json"
"sort"
"sync"

"agones.dev/agones/pkg/apis/stable"
Expand All @@ -25,6 +24,7 @@ import (
getterv1alpha1 "agones.dev/agones/pkg/client/clientset/versioned/typed/stable/v1alpha1"
"agones.dev/agones/pkg/client/informers/externalversions"
listerv1alpha1 "agones.dev/agones/pkg/client/listers/stable/v1alpha1"
"agones.dev/agones/pkg/gameservers"
"agones.dev/agones/pkg/util/crd"
"agones.dev/agones/pkg/util/logfields"
"agones.dev/agones/pkg/util/runtime"
Expand Down Expand Up @@ -66,6 +66,7 @@ const (
// Controller is a the GameServerSet controller
type Controller struct {
baseLogger *logrus.Entry
counter *gameservers.PerNodeCounter
crdGetter v1beta1.CustomResourceDefinitionInterface
gameServerGetter getterv1alpha1.GameServersGetter
gameServerLister listerv1alpha1.GameServerLister
Expand All @@ -83,6 +84,7 @@ type Controller struct {
func NewController(
wh *webhooks.WebHook,
health healthcheck.Handler,
counter *gameservers.PerNodeCounter,
kubeClient kubernetes.Interface,
extClient extclientset.Interface,
agonesClient versioned.Interface,
Expand All @@ -95,6 +97,7 @@ func NewController(

c := &Controller{
crdGetter: extClient.ApiextensionsV1beta1().CustomResourceDefinitions(),
counter: counter,
gameServerGetter: agonesClient.StableV1alpha1(),
gameServerLister: gameServers.Lister(),
gameServerSynced: gsInformer.HasSynced,
Expand Down Expand Up @@ -305,7 +308,7 @@ func (c *Controller) syncGameServerSet(key string) error {

list = c.stateCache.forGameServerSet(gsSet).reconcileWithUpdatedServerList(list)

numServersToAdd, toDelete, isPartial := computeReconciliationAction(list, int(gsSet.Spec.Replicas), maxGameServerCreationsPerBatch, maxGameServerDeletionsPerBatch, maxPodPendingCount)
numServersToAdd, toDelete, isPartial := c.computeReconciliationAction(gsSet, list, int(gsSet.Spec.Replicas), maxGameServerCreationsPerBatch, maxGameServerDeletionsPerBatch, maxPodPendingCount)
status := computeStatus(list)
fields := logrus.Fields{}

Expand Down Expand Up @@ -353,33 +356,38 @@ func (c *Controller) syncGameServerSet(key string) error {

// computeReconciliationAction computes the action to take to reconcile a game server set set given
// the list of game servers that were found and target replica count.
func computeReconciliationAction(list []*v1alpha1.GameServer, targetReplicaCount int, maxCreations int, maxDeletions int, maxPending int) (int, []*v1alpha1.GameServer, bool) {
var upCount int // up == Ready or will become ready
func (c *Controller) computeReconciliationAction(gsSet *v1alpha1.GameServerSet, list []*v1alpha1.GameServer,
targetReplicaCount int, maxCreations int, maxDeletions int, maxPending int) (int, []*v1alpha1.GameServer, bool) {
var upCount int // up == Ready or will become ready
var deleteCount int // number of gameservers to delete

// track the number of pods that are being created at any given moment by the GameServerSet
// so we can limit it at a throughput that Kubernetes can handle
var podPendingCount int // podPending == "up" but don't have a Pod running yet

var potentialDeletions []*v1alpha1.GameServer
var toDelete []*v1alpha1.GameServer

scheduleDeletion := func(gs *v1alpha1.GameServer) {
if gs.ObjectMeta.DeletionTimestamp.IsZero() {
toDelete = append(toDelete, gs)
}
toDelete = append(toDelete, gs)
deleteCount--
}

handleGameServerUp := func(gs *v1alpha1.GameServer) {
if upCount >= targetReplicaCount {
scheduleDeletion(gs)
deleteCount++
} else {
upCount++
}

// Track gameservers that could be potentially deleted
potentialDeletions = append(potentialDeletions, gs)
}

// pass 1 - count allocated servers only, since those can't be touched
for _, gs := range list {
if gs.IsAllocated() {
upCount++
continue
}
}

Expand Down Expand Up @@ -446,12 +454,15 @@ func computeReconciliationAction(list []*v1alpha1.GameServer, targetReplicaCount
}
}

if len(toDelete) > maxDeletions {
// we have to pick which GS to delete, let's delete the newest ones first.
sort.Slice(toDelete, func(i, j int) bool {
return toDelete[i].ObjectMeta.CreationTimestamp.After(toDelete[j].ObjectMeta.CreationTimestamp.Time)
})
if deleteCount > 0 {
if gsSet.Spec.Scheduling == v1alpha1.Packed {
potentialDeletions = sortGameServersByLeastFullNodes(potentialDeletions, c.counter.Counts())
}

toDelete = append(toDelete, potentialDeletions[0:deleteCount]...)
}

if deleteCount > maxDeletions {
toDelete = toDelete[0:maxDeletions]
partialReconciliation = true
}
Expand Down
11 changes: 9 additions & 2 deletions pkg/gameserversets/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"time"

"agones.dev/agones/pkg/apis/stable/v1alpha1"
"agones.dev/agones/pkg/gameservers"
agtesting "agones.dev/agones/pkg/testing"
"agones.dev/agones/pkg/util/webhooks"
"github.com/heptiolabs/healthcheck"
Expand Down Expand Up @@ -159,7 +160,12 @@ func TestComputeReconciliationAction(t *testing.T) {

for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
toAdd, toDelete, isPartial := computeReconciliationAction(tc.list, tc.targetReplicaCount, maxTestCreationsPerBatch, maxTestDeletionsPerBatch, maxTestPendingPerBatch)
c, _ := newFakeController()

gsSet := &v1alpha1.GameServerSet{}
gsSet.Spec.Scheduling = v1alpha1.Packed

toAdd, toDelete, isPartial := c.computeReconciliationAction(gsSet, tc.list, tc.targetReplicaCount, maxTestCreationsPerBatch, maxTestDeletionsPerBatch, maxTestPendingPerBatch)

assert.Equal(t, tc.wantNumServersToAdd, toAdd, "# of GameServers to add")
assert.Len(t, toDelete, tc.wantNumServersToDelete, "# of GameServers to delete")
Expand Down Expand Up @@ -573,7 +579,8 @@ func createGameServers(gsSet *v1alpha1.GameServerSet, size int) []v1alpha1.GameS
func newFakeController() (*Controller, agtesting.Mocks) {
m := agtesting.NewMocks()
wh := webhooks.NewWebHook(http.NewServeMux())
c := NewController(wh, healthcheck.NewHandler(), m.KubeClient, m.ExtClient, m.AgonesClient, m.AgonesInformerFactory)
counter := gameservers.NewPerNodeCounter(m.KubeInformerFactory, m.AgonesInformerFactory)
c := NewController(wh, healthcheck.NewHandler(), counter, m.KubeClient, m.ExtClient, m.AgonesClient, m.AgonesInformerFactory)
c.recorder = m.FakeRecorder
return c, m
}
65 changes: 15 additions & 50 deletions pkg/gameserversets/gameserversets.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,70 +15,35 @@
package gameserversets

import (
"sort"

"agones.dev/agones/pkg/apis/stable/v1alpha1"
listerv1alpha1 "agones.dev/agones/pkg/client/listers/stable/v1alpha1"
"agones.dev/agones/pkg/gameservers"
"github.com/pkg/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
)

// node is just a convenience data structure for
// keeping relevant GameServer information about Nodes
type node struct {
name string
total int64
ready []*v1alpha1.GameServer
}

// filterGameServersOnLeastFullNodes returns a limited list of GameServers, ordered by the nodes
// they are hosted on, with the least utilised Nodes being prioritised
func filterGameServersOnLeastFullNodes(gsList []*v1alpha1.GameServer, limit int32) []*v1alpha1.GameServer {
if limit <= 0 {
return nil
}

nodeMap := map[string]*node{}
var nodeList []*node

// count up the number of allocated and ready game servers that exist
// also, since we're already looping through, track all the deletable GameServers
// per node, so we can use this as a shortlist to delete from
for _, gs := range gsList {
if gs.DeletionTimestamp.IsZero() &&
(gs.Status.State == v1alpha1.GameServerStateAllocated || gs.Status.State == v1alpha1.GameServerStateReady) {
_, ok := nodeMap[gs.Status.NodeName]
// sortGameServersByLeastFullNodes sorts the list of gameservers by which gameservers reside on the least full nodes
func sortGameServersByLeastFullNodes(list []*v1alpha1.GameServer, count map[string]gameservers.NodeCount) []*v1alpha1.GameServer {
sort := v1alpha1.GameServerSort{
List: list,
Comparator: func(a, b *v1alpha1.GameServer) bool {
// not scheduled yet/node deleted, put them first
ac, ok := count[a.Status.NodeName]
if !ok {
node := &node{name: gs.Status.NodeName}
nodeMap[gs.Status.NodeName] = node
nodeList = append(nodeList, node)
return true
}

nodeMap[gs.Status.NodeName].total++
if gs.Status.State == v1alpha1.GameServerStateReady {
nodeMap[gs.Status.NodeName].ready = append(nodeMap[gs.Status.NodeName].ready, gs)
bc, ok := count[b.Status.NodeName]
if !ok {
return false
}
}
}

// sort our nodes, least to most
sort.Slice(nodeList, func(i, j int) bool {
return nodeList[i].total < nodeList[j].total
})

// we need to get Ready GameServer until we equal or pass limit
result := make([]*v1alpha1.GameServer, 0, limit)

for _, n := range nodeList {
result = append(result, n.ready...)

if int32(len(result)) >= limit {
return result
}
return (ac.Allocated + ac.Ready) < (bc.Allocated + bc.Ready)
},
}

return result
return sort.Sort()
}

// ListGameServersByGameServerSetOwner lists the GameServers for a given GameServerSet
Expand Down
Loading

0 comments on commit 9ba315a

Please sign in to comment.