From 3bbbb1972376e8efeb5d1de4a12cab3977cb27ba Mon Sep 17 00:00:00 2001 From: Blake Rouse Date: Thu, 23 Jul 2020 09:07:49 -0400 Subject: [PATCH] [Elastic Agent] Improve GRPC stop to be more relaxed. (#20118) * Improve stop to be more relaxed. * Add changelog. --- x-pack/elastic-agent/CHANGELOG.asciidoc | 1 + .../elastic-agent/pkg/core/server/server.go | 7 ++- .../pkg/core/server/server_test.go | 51 +++++++++++++++++++ 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/x-pack/elastic-agent/CHANGELOG.asciidoc b/x-pack/elastic-agent/CHANGELOG.asciidoc index 14ca5ccb3a45..9eb0f512ae37 100644 --- a/x-pack/elastic-agent/CHANGELOG.asciidoc +++ b/x-pack/elastic-agent/CHANGELOG.asciidoc @@ -53,6 +53,7 @@ - Fix issues with merging of elastic-agent.yml and fleet.yml {pull}20026[20026] - Unzip failures on Windows 8/Windows server 2012 {pull}20088[20088] - Fix failing unit tests on windows {pull}20127[20127] +- Improve GRPC stop to be more relaxed {pull}20118[20118] ==== New features diff --git a/x-pack/elastic-agent/pkg/core/server/server.go b/x-pack/elastic-agent/pkg/core/server/server.go index 4cd5c8386ec1..12885e2f0123 100644 --- a/x-pack/elastic-agent/pkg/core/server/server.go +++ b/x-pack/elastic-agent/pkg/core/server/server.go @@ -526,6 +526,7 @@ func (as *ApplicationState) WriteConnInfo(w io.Writer) error { // the application times out during stop and ErrApplication func (as *ApplicationState) Stop(timeout time.Duration) error { as.checkinLock.Lock() + wasConn := as.checkinDone != nil cfgIdx := as.statusConfigIdx as.expected = proto.StateExpected_STOPPING as.checkinLock.Unlock() @@ -548,8 +549,10 @@ func (as *ApplicationState) Stop(timeout time.Duration) error { s := as.status doneChan := as.checkinDone as.checkinLock.RUnlock() - if s == proto.StateObserved_STOPPING && doneChan == nil { - // sent stopping and now is disconnected (so its stopped) + if (wasConn && doneChan == nil) || (!wasConn && s == proto.StateObserved_STOPPING && doneChan == nil) { + // either occurred + // * client was connected then disconnected on stop + // * client was not connected; connected; received stopping; then disconnected as.Destroy() return nil } diff --git a/x-pack/elastic-agent/pkg/core/server/server_test.go b/x-pack/elastic-agent/pkg/core/server/server_test.go index 608be5641f93..424efb143115 100644 --- a/x-pack/elastic-agent/pkg/core/server/server_test.go +++ b/x-pack/elastic-agent/pkg/core/server/server_test.go @@ -416,6 +416,57 @@ func TestServer_Stop(t *testing.T) { assert.NoError(t, stopErr) } +func TestServer_StopJustDisconnect(t *testing.T) { + initConfig := "initial_config" + app := &StubApp{} + srv := createAndStartServer(t, &StubHandler{}) + defer srv.Stop() + as, err := srv.Register(app, initConfig) + require.NoError(t, err) + cImpl := &StubClientImpl{} + c := newClientFromApplicationState(t, as, cImpl) + require.NoError(t, c.Start(context.Background())) + defer c.Stop() + + // clients should get initial check-ins then set as healthy + require.NoError(t, waitFor(func() error { + if cImpl.Config() != initConfig { + return fmt.Errorf("client never got intial config") + } + return nil + })) + c.Status(proto.StateObserved_HEALTHY, "Running", nil) + assert.NoError(t, waitFor(func() error { + if app.Status() != proto.StateObserved_HEALTHY { + return fmt.Errorf("server never updated currect application state") + } + return nil + })) + + // send stop to the client + done := make(chan bool) + var stopErr error + go func() { + stopErr = as.Stop(time.Second * 5) + close(done) + }() + + // process of testing the flow + // 1. server sends stop + // 2. client disconnects + require.NoError(t, waitFor(func() error { + if cImpl.Stop() == 0 { + return fmt.Errorf("client never got expected stop") + } + return nil + })) + c.Stop() + <-done + + // no error on stop + assert.NoError(t, stopErr) +} + func TestServer_StopTimeout(t *testing.T) { initConfig := "initial_config" app := &StubApp{}