From 43bd410a4b245f911cee450ce2c0c8103f42577d Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Fri, 11 Dec 2020 13:57:56 -0500 Subject: [PATCH 1/8] template: factor onTemplateRendered out of template re-renderer Pull this function out so it can be used by the first-pass renderer when we restore a task from a restarted client. --- .../taskrunner/template/template.go | 181 +++++++++--------- 1 file changed, 93 insertions(+), 88 deletions(-) diff --git a/client/allocrunner/taskrunner/template/template.go b/client/allocrunner/taskrunner/template/template.go index bbc9fea489b..2e9a4130c81 100644 --- a/client/allocrunner/taskrunner/template/template.go +++ b/client/allocrunner/taskrunner/template/template.go @@ -368,112 +368,117 @@ func (tm *TaskTemplateManager) handleTemplateRerenders(allRenderedTime time.Time SetFailsTask(). SetDisplayMessage(fmt.Sprintf("Template failed: %v", err))) case <-tm.runner.TemplateRenderedCh(): - // A template has been rendered, figure out what to do - var handling []string - signals := make(map[string]struct{}) - restart := false - var splay time.Duration + tm.onTemplateRendered(handledRenders, allRenderedTime) + } + } +} - events := tm.runner.RenderEvents() - for id, event := range events { +func (tm *TaskTemplateManager) onTemplateRendered(handledRenders map[string]time.Time, allRenderedTime time.Time) { + // A template has been rendered, figure out what to do + var handling []string + signals := make(map[string]struct{}) + restart := false + var splay time.Duration - // First time through - if allRenderedTime.After(event.LastDidRender) || allRenderedTime.Equal(event.LastDidRender) { - handledRenders[id] = allRenderedTime - continue - } + events := tm.runner.RenderEvents() + for id, event := range events { - // We have already handled this one - if htime := handledRenders[id]; htime.After(event.LastDidRender) || htime.Equal(event.LastDidRender) { - continue - } + // First time through + if allRenderedTime.After(event.LastDidRender) || allRenderedTime.Equal(event.LastDidRender) { + handledRenders[id] = allRenderedTime + continue + } - // Lookup the template and determine what to do - tmpls, ok := tm.lookup[id] - if !ok { - tm.config.Lifecycle.Kill(context.Background(), - structs.NewTaskEvent(structs.TaskKilling). - SetFailsTask(). - SetDisplayMessage(fmt.Sprintf("Template runner returned unknown template id %q", id))) - return - } + // We have already handled this one + if htime := handledRenders[id]; htime.After(event.LastDidRender) || htime.Equal(event.LastDidRender) { + continue + } - // Read environment variables from templates - envMap, err := loadTemplateEnv(tm.config.Templates, tm.config.TaskDir, tm.config.EnvBuilder.Build()) - if err != nil { - tm.config.Lifecycle.Kill(context.Background(), - structs.NewTaskEvent(structs.TaskKilling). - SetFailsTask(). - SetDisplayMessage(fmt.Sprintf("Template failed to read environment variables: %v", err))) - return - } - tm.config.EnvBuilder.SetTemplateEnv(envMap) - - for _, tmpl := range tmpls { - switch tmpl.ChangeMode { - case structs.TemplateChangeModeSignal: - signals[tmpl.ChangeSignal] = struct{}{} - case structs.TemplateChangeModeRestart: - restart = true - case structs.TemplateChangeModeNoop: - continue - } + // Lookup the template and determine what to do + tmpls, ok := tm.lookup[id] + if !ok { + tm.config.Lifecycle.Kill(context.Background(), + structs.NewTaskEvent(structs.TaskKilling). + SetFailsTask(). + SetDisplayMessage(fmt.Sprintf("Template runner returned unknown template id %q", id))) + return + } - if tmpl.Splay > splay { - splay = tmpl.Splay - } - } + // Read environment variables from templates + envMap, err := loadTemplateEnv(tm.config.Templates, tm.config.TaskDir, tm.config.EnvBuilder.Build()) + if err != nil { + tm.config.Lifecycle.Kill(context.Background(), + structs.NewTaskEvent(structs.TaskKilling). + SetFailsTask(). + SetDisplayMessage(fmt.Sprintf("Template failed to read environment variables: %v", err))) + return + } + tm.config.EnvBuilder.SetTemplateEnv(envMap) + + for _, tmpl := range tmpls { + switch tmpl.ChangeMode { + case structs.TemplateChangeModeSignal: + signals[tmpl.ChangeSignal] = struct{}{} + case structs.TemplateChangeModeRestart: + restart = true + case structs.TemplateChangeModeNoop: + continue + } - handling = append(handling, id) + if tmpl.Splay > splay { + splay = tmpl.Splay } + } - if restart || len(signals) != 0 { - if splay != 0 { - ns := splay.Nanoseconds() - offset := rand.Int63n(ns) - t := time.Duration(offset) + handling = append(handling, id) + } - select { - case <-time.After(t): - case <-tm.shutdownCh: - return - } - } + if restart || len(signals) != 0 { + if splay != 0 { + ns := splay.Nanoseconds() + offset := rand.Int63n(ns) + t := time.Duration(offset) - // Update handle time - for _, id := range handling { - handledRenders[id] = events[id].LastDidRender - } + select { + case <-time.After(t): + case <-tm.shutdownCh: + return + } + } - if restart { - tm.config.Lifecycle.Restart(context.Background(), - structs.NewTaskEvent(structs.TaskRestartSignal). - SetDisplayMessage("Template with change_mode restart re-rendered"), false) - } else if len(signals) != 0 { - var mErr multierror.Error - for signal := range signals { - s := tm.signals[signal] - event := structs.NewTaskEvent(structs.TaskSignaling).SetTaskSignal(s).SetDisplayMessage("Template re-rendered") - if err := tm.config.Lifecycle.Signal(event, signal); err != nil { - multierror.Append(&mErr, err) - } - } + // Update handle time + for _, id := range handling { + handledRenders[id] = events[id].LastDidRender + } - if err := mErr.ErrorOrNil(); err != nil { - flat := make([]os.Signal, 0, len(signals)) - for signal := range signals { - flat = append(flat, tm.signals[signal]) - } + if restart { + tm.config.Lifecycle.Restart(context.Background(), + structs.NewTaskEvent(structs.TaskRestartSignal). + SetDisplayMessage("Template with change_mode restart re-rendered"), false) + } else if len(signals) != 0 { + var mErr multierror.Error + for signal := range signals { + s := tm.signals[signal] + event := structs.NewTaskEvent(structs.TaskSignaling).SetTaskSignal(s).SetDisplayMessage("Template re-rendered") + if err := tm.config.Lifecycle.Signal(event, signal); err != nil { + multierror.Append(&mErr, err) + } + } - tm.config.Lifecycle.Kill(context.Background(), - structs.NewTaskEvent(structs.TaskKilling). - SetFailsTask(). - SetDisplayMessage(fmt.Sprintf("Template failed to send signals %v: %v", flat, err))) - } + if err := mErr.ErrorOrNil(); err != nil { + flat := make([]os.Signal, 0, len(signals)) + for signal := range signals { + flat = append(flat, tm.signals[signal]) } + + tm.config.Lifecycle.Kill(context.Background(), + structs.NewTaskEvent(structs.TaskKilling). + SetFailsTask(). + SetDisplayMessage(fmt.Sprintf("Template failed to send signals %v: %v", flat, err))) } } } + } // allTemplatesNoop returns whether all the managed templates have change mode noop. From 0eb32eafae72c4d0465622f8ceea65209c29d4ca Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Fri, 11 Dec 2020 14:00:17 -0500 Subject: [PATCH 2/8] template: add HasHandle method to task hook lifecycle interface In the task hook implementation, `HasHandle` returns true if the task runner has a handle to the task driver, so that we can distinguish restored tasks during prestart hooks without passing around additional state. --- client/allocrunner/taskrunner/interfaces/lifecycle.go | 6 ++++++ client/allocrunner/taskrunner/lifecycle.go | 4 ++++ client/allocrunner/taskrunner/template/template_test.go | 4 ++++ 3 files changed, 14 insertions(+) diff --git a/client/allocrunner/taskrunner/interfaces/lifecycle.go b/client/allocrunner/taskrunner/interfaces/lifecycle.go index 1890471bf56..32dec33d0a5 100644 --- a/client/allocrunner/taskrunner/interfaces/lifecycle.go +++ b/client/allocrunner/taskrunner/interfaces/lifecycle.go @@ -16,4 +16,10 @@ type TaskLifecycle interface { // Kill a task permanently. Kill(ctx context.Context, event *structs.TaskEvent) error + + // HasHandle returns true if the task runner has a handle to the task + // driver, which is useful for distinguishing restored tasks during + // prestart hooks. Note: prestart should be idempotent whenever possible + // to handle restored tasks safely; use this as an escape hatch. + HasHandle() bool } diff --git a/client/allocrunner/taskrunner/lifecycle.go b/client/allocrunner/taskrunner/lifecycle.go index c9fc10a539e..ea477988a52 100644 --- a/client/allocrunner/taskrunner/lifecycle.go +++ b/client/allocrunner/taskrunner/lifecycle.go @@ -86,3 +86,7 @@ func (tr *TaskRunner) Kill(ctx context.Context, event *structs.TaskEvent) error return tr.getKillErr() } + +func (tr *TaskRunner) HasHandle() bool { + return tr.getDriverHandle() != nil +} diff --git a/client/allocrunner/taskrunner/template/template_test.go b/client/allocrunner/taskrunner/template/template_test.go index 5bb989900ff..20d2a4e9585 100644 --- a/client/allocrunner/taskrunner/template/template_test.go +++ b/client/allocrunner/taskrunner/template/template_test.go @@ -98,6 +98,10 @@ func (m *MockTaskHooks) Kill(ctx context.Context, event *structs.TaskEvent) erro return nil } +func (m *MockTaskHooks) HasHandle() bool { + return false // TODO +} + func (m *MockTaskHooks) EmitEvent(event *structs.TaskEvent) { m.Events = append(m.Events, event) select { From dd999f240a398d9999f0397ea07ea904d604455a Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Fri, 11 Dec 2020 14:17:06 -0500 Subject: [PATCH 3/8] template: trigger change_mode for dynamic secrets on restore When a task is restored after a client restart, the template runner will create a new lease for any dynamic secret (ex. Consul or PKI secrets engines). But because this lease is being created in the prestart hook, we don't trigger the `change_mode`. This changeset uses the the existence of the task handle to detect a previously running task that's been restored, so that we can trigger the template `change_mode` if the template is changed, as it will be only with dynamic secrets. --- client/allocrunner/taskrunner/template/template.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/client/allocrunner/taskrunner/template/template.go b/client/allocrunner/taskrunner/template/template.go index 2e9a4130c81..5dc69b26241 100644 --- a/client/allocrunner/taskrunner/template/template.go +++ b/client/allocrunner/taskrunner/template/template.go @@ -273,11 +273,22 @@ WAIT: continue } + dirty := false for _, event := range events { // This template hasn't been rendered if event.LastWouldRender.IsZero() { continue WAIT } + if event.WouldRender && event.DidRender { + dirty = true + } + } + + // if there's a driver handle then the task is already running and + // that changes how we want to behave on first render + if dirty && tm.config.Lifecycle.HasHandle() { + handledRenders := make(map[string]time.Time, len(tm.config.Templates)) + tm.onTemplateRendered(handledRenders, time.Time{}) } break WAIT From 1c34a3d80bafde381b2c6c5439bc63b11dd6ccb3 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 14 Dec 2020 14:22:32 -0500 Subject: [PATCH 4/8] unit test exercising template change_mode for restore --- .../taskrunner/template/template_test.go | 109 +++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/client/allocrunner/taskrunner/template/template_test.go b/client/allocrunner/taskrunner/template/template_test.go index 20d2a4e9585..d5ae730e1cc 100644 --- a/client/allocrunner/taskrunner/template/template_test.go +++ b/client/allocrunner/taskrunner/template/template_test.go @@ -57,6 +57,9 @@ type MockTaskHooks struct { Events []*structs.TaskEvent EmitEventCh chan *structs.TaskEvent + + // hasHandle can be set to simulate restoring a task after client restart + hasHandle bool } func NewMockTaskHooks() *MockTaskHooks { @@ -99,7 +102,7 @@ func (m *MockTaskHooks) Kill(ctx context.Context, event *structs.TaskEvent) erro } func (m *MockTaskHooks) HasHandle() bool { - return false // TODO + return m.hasHandle } func (m *MockTaskHooks) EmitEvent(event *structs.TaskEvent) { @@ -764,6 +767,110 @@ func TestTaskTemplateManager_Unblock_Multi_Template(t *testing.T) { } } +// TestTaskTemplateManager_FirstRender_Restored tests that a task that's been +// restored renders and triggers its change mode if the template has changed +func TestTaskTemplateManager_FirstRender_Restored(t *testing.T) { + t.Parallel() + require := require.New(t) + // Make a template that will render based on a key in Vault + vaultPath := "secret/data/password" + key := "password" + content := "barbaz" + embedded := fmt.Sprintf(`{{with secret "%s"}}{{.Data.data.%s}}{{end}}`, vaultPath, key) + file := "my.tmpl" + template := &structs.Template{ + EmbeddedTmpl: embedded, + DestPath: file, + ChangeMode: structs.TemplateChangeModeRestart, + } + + harness := newTestHarness(t, []*structs.Template{template}, false, true) + harness.start(t) + defer harness.stop() + + // Ensure no unblock + select { + case <-harness.mockHooks.UnblockCh: + t.Fatalf("Task unblock should not have been called") + case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): + } + + // Write the secret to Vault + logical := harness.vault.Client.Logical() + _, err := logical.Write(vaultPath, map[string]interface{}{"data": map[string]interface{}{key: content}}) + require.NoError(err) + + // Wait for the unblock + select { + case <-harness.mockHooks.UnblockCh: + case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): + t.Fatalf("Task unblock should have been called") + } + + // Check the file is there + path := filepath.Join(harness.taskDir, file) + raw, err := ioutil.ReadFile(path) + if err != nil { + t.Fatalf("Failed to read rendered template from %q: %v", path, err) + } + + if s := string(raw); s != content { + t.Fatalf("Unexpected template data; got %q, want %q", s, content) + } + + // task is now running + harness.mockHooks.hasHandle = true + + // simulate a client restart + harness.manager.Stop() + harness.mockHooks.UnblockCh = make(chan struct{}, 1) + harness.start(t) + + // Wait for the unblock + select { + case <-harness.mockHooks.UnblockCh: + case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): + t.Fatalf("Task unblock should have been called") + } + + select { + case <-harness.mockHooks.RestartCh: + t.Fatalf("should not have restarted: %+v", harness.mockHooks) + case <-harness.mockHooks.SignalCh: + t.Fatalf("should not have restarted: %+v", harness.mockHooks) + case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): + } + + // simulate a client restart and TTL expiry + harness.manager.Stop() + content = "bazbar" + _, err = logical.Write(vaultPath, map[string]interface{}{"data": map[string]interface{}{key: content}}) + require.NoError(err) + harness.mockHooks.UnblockCh = make(chan struct{}, 1) + harness.start(t) + + // Wait for the unblock + select { + case <-harness.mockHooks.UnblockCh: + case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): + t.Fatalf("Task unblock should have been called") + } + + // Wait for restart + timeout := time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second) +OUTER: + for { + select { + case <-harness.mockHooks.RestartCh: + break OUTER + case <-harness.mockHooks.SignalCh: + t.Fatalf("Signal with restart policy: %+v", harness.mockHooks) + case <-timeout: + t.Fatalf("Should have received a restart: %+v", harness.mockHooks) + } + } +} + func TestTaskTemplateManager_Rerender_Noop(t *testing.T) { t.Parallel() // Make a template that will render based on a key in Consul From 77e660d958fbf903dcae227db0bff9bba7d7966f Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Tue, 15 Dec 2020 10:41:49 -0500 Subject: [PATCH 5/8] address code review: use require in template test --- .../taskrunner/template/template_test.go | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/client/allocrunner/taskrunner/template/template_test.go b/client/allocrunner/taskrunner/template/template_test.go index d5ae730e1cc..b08f74d811c 100644 --- a/client/allocrunner/taskrunner/template/template_test.go +++ b/client/allocrunner/taskrunner/template/template_test.go @@ -791,7 +791,7 @@ func TestTaskTemplateManager_FirstRender_Restored(t *testing.T) { // Ensure no unblock select { case <-harness.mockHooks.UnblockCh: - t.Fatalf("Task unblock should not have been called") + require.Fail("Task unblock should not have been called") case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): } @@ -804,19 +804,14 @@ func TestTaskTemplateManager_FirstRender_Restored(t *testing.T) { select { case <-harness.mockHooks.UnblockCh: case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Task unblock should have been called") + require.Fail("Task unblock should have been called") } // Check the file is there path := filepath.Join(harness.taskDir, file) raw, err := ioutil.ReadFile(path) - if err != nil { - t.Fatalf("Failed to read rendered template from %q: %v", path, err) - } - - if s := string(raw); s != content { - t.Fatalf("Unexpected template data; got %q, want %q", s, content) - } + require.NoError(err, "Failed to read rendered template from %q", path) + require.Equal(content, string(raw), "Unexpected template data; got %s, want %q", raw, content) // task is now running harness.mockHooks.hasHandle = true @@ -830,14 +825,14 @@ func TestTaskTemplateManager_FirstRender_Restored(t *testing.T) { select { case <-harness.mockHooks.UnblockCh: case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Task unblock should have been called") + require.Fail("Task unblock should have been called") } select { case <-harness.mockHooks.RestartCh: - t.Fatalf("should not have restarted: %+v", harness.mockHooks) + require.Fail("should not have restarted", harness.mockHooks) case <-harness.mockHooks.SignalCh: - t.Fatalf("should not have restarted: %+v", harness.mockHooks) + require.Fail("should not have restarted", harness.mockHooks) case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): } @@ -853,7 +848,7 @@ func TestTaskTemplateManager_FirstRender_Restored(t *testing.T) { select { case <-harness.mockHooks.UnblockCh: case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Task unblock should have been called") + require.Fail("Task unblock should have been called") } // Wait for restart @@ -864,9 +859,9 @@ OUTER: case <-harness.mockHooks.RestartCh: break OUTER case <-harness.mockHooks.SignalCh: - t.Fatalf("Signal with restart policy: %+v", harness.mockHooks) + require.Fail("Signal with restart policy", harness.mockHooks) case <-timeout: - t.Fatalf("Should have received a restart: %+v", harness.mockHooks) + require.Fail("Should have received a restart", harness.mockHooks) } } } From 745a2b4b600c3ca2d1f1e06ceb4cf880f740124a Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Tue, 15 Dec 2020 10:43:39 -0500 Subject: [PATCH 6/8] address code review: change interface method name to IsRunning --- client/allocrunner/taskrunner/interfaces/lifecycle.go | 10 ++++++---- client/allocrunner/taskrunner/lifecycle.go | 2 +- client/allocrunner/taskrunner/template/template.go | 2 +- .../allocrunner/taskrunner/template/template_test.go | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/client/allocrunner/taskrunner/interfaces/lifecycle.go b/client/allocrunner/taskrunner/interfaces/lifecycle.go index 32dec33d0a5..18b455f1e90 100644 --- a/client/allocrunner/taskrunner/interfaces/lifecycle.go +++ b/client/allocrunner/taskrunner/interfaces/lifecycle.go @@ -17,9 +17,11 @@ type TaskLifecycle interface { // Kill a task permanently. Kill(ctx context.Context, event *structs.TaskEvent) error - // HasHandle returns true if the task runner has a handle to the task + // IsRunning returns true if the task runner has a handle to the task // driver, which is useful for distinguishing restored tasks during - // prestart hooks. Note: prestart should be idempotent whenever possible - // to handle restored tasks safely; use this as an escape hatch. - HasHandle() bool + // prestart hooks. But note that the driver handle could go away after you + // check this, so callers should make sure they're handling that case + // safely. Ideally prestart hooks should be idempotnent whenever possible + // to handle restored tasks; use this as an escape hatch. + IsRunning() bool } diff --git a/client/allocrunner/taskrunner/lifecycle.go b/client/allocrunner/taskrunner/lifecycle.go index ea477988a52..2b8c7350ed8 100644 --- a/client/allocrunner/taskrunner/lifecycle.go +++ b/client/allocrunner/taskrunner/lifecycle.go @@ -87,6 +87,6 @@ func (tr *TaskRunner) Kill(ctx context.Context, event *structs.TaskEvent) error return tr.getKillErr() } -func (tr *TaskRunner) HasHandle() bool { +func (tr *TaskRunner) IsRunning() bool { return tr.getDriverHandle() != nil } diff --git a/client/allocrunner/taskrunner/template/template.go b/client/allocrunner/taskrunner/template/template.go index 5dc69b26241..c89752f5299 100644 --- a/client/allocrunner/taskrunner/template/template.go +++ b/client/allocrunner/taskrunner/template/template.go @@ -286,7 +286,7 @@ WAIT: // if there's a driver handle then the task is already running and // that changes how we want to behave on first render - if dirty && tm.config.Lifecycle.HasHandle() { + if dirty && tm.config.Lifecycle.IsRunning() { handledRenders := make(map[string]time.Time, len(tm.config.Templates)) tm.onTemplateRendered(handledRenders, time.Time{}) } diff --git a/client/allocrunner/taskrunner/template/template_test.go b/client/allocrunner/taskrunner/template/template_test.go index b08f74d811c..ff6fa682d2f 100644 --- a/client/allocrunner/taskrunner/template/template_test.go +++ b/client/allocrunner/taskrunner/template/template_test.go @@ -101,7 +101,7 @@ func (m *MockTaskHooks) Kill(ctx context.Context, event *structs.TaskEvent) erro return nil } -func (m *MockTaskHooks) HasHandle() bool { +func (m *MockTaskHooks) IsRunning() bool { return m.hasHandle } From f3578374fedc9cd8d5df44bb8ca345771d3d094e Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 16 Dec 2020 11:32:50 -0500 Subject: [PATCH 7/8] changelog and upgrade docs --- CHANGELOG.md | 5 +++++ .../pages/docs/upgrade/upgrade-specific.mdx | 22 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a00ec387aa..cc4071ad82e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## 1.0.2 (Unreleased) + +BUG FIXES: + * template: Fixed a bug where dynamic secrets did not trigger the template `change_mode` after a client restart. [[GH-9636](https://github.com/hashicorp/nomad/issues/9636)] + ## 1.0.1 (Unreleased) IMPROVEMENTS: diff --git a/website/pages/docs/upgrade/upgrade-specific.mdx b/website/pages/docs/upgrade/upgrade-specific.mdx index 2eaf3123918..a6e3f812782 100644 --- a/website/pages/docs/upgrade/upgrade-specific.mdx +++ b/website/pages/docs/upgrade/upgrade-specific.mdx @@ -14,6 +14,27 @@ upgrade. However, specific versions of Nomad may have more details provided for their upgrades as a result of new features or changed behavior. This page is used to document those details separately from the standard upgrade flow. +## Nomad 1.0.2 + +#### Dynamic secrets trigger template changes on client restart + +Nomad 1.0.2 changed the behavior of template `change_mode` triggers when a +client node restarts. In Nomad 1.0.1 and earlier, the first rendering of a +template after a client restart would not trigger the `change_mode`. For +dynamic secrets such as the Vault PKI secrets engine, this resulted in the +secret being updated but not restarting or signalling the task. When the +secret's lease expired at some later time, the task workload might fail +because of the stale secret. For example, a web server's SSL certificate would +be expired and browsers would be unable to connect. + +In Nomad 1.0.2, when a client node is restarted any task with Vault secrets +that are generated or have expired will have its `change_mode` triggered. If +`change_mode = "restart"` this will result in the task being restarted, to +avoid the task failing unexpectedly at some point in the future. This change +only impacts tasks using dynamic Vault secrets engines such as [PKI][pki], or +when secrets are rotated. Secrets that don't change in Vault will not trigger +a `change_mode` on client restart. + ## Nomad 1.0.1 #### Envoy worker threads @@ -963,3 +984,4 @@ deleted and then Nomad 0.3.0 can be launched. [vault_grace]: /docs/job-specification/template [node drain]: https://www.nomadproject.io/docs/upgrade#5-upgrade-clients [`template.disable_file_sandbox`]: /docs/configuration/client#template-parameters +[pki]: https://www.vaultproject.io/docs/secrets/pki From 62bf55f304836cb2acbd6a66f65cf035086090b7 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 16 Dec 2020 13:13:08 -0500 Subject: [PATCH 8/8] code review: comment fixes --- client/allocrunner/taskrunner/interfaces/lifecycle.go | 2 +- client/allocrunner/taskrunner/template/template.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/client/allocrunner/taskrunner/interfaces/lifecycle.go b/client/allocrunner/taskrunner/interfaces/lifecycle.go index 18b455f1e90..325a13e387e 100644 --- a/client/allocrunner/taskrunner/interfaces/lifecycle.go +++ b/client/allocrunner/taskrunner/interfaces/lifecycle.go @@ -21,7 +21,7 @@ type TaskLifecycle interface { // driver, which is useful for distinguishing restored tasks during // prestart hooks. But note that the driver handle could go away after you // check this, so callers should make sure they're handling that case - // safely. Ideally prestart hooks should be idempotnent whenever possible + // safely. Ideally prestart hooks should be idempotent whenever possible // to handle restored tasks; use this as an escape hatch. IsRunning() bool } diff --git a/client/allocrunner/taskrunner/template/template.go b/client/allocrunner/taskrunner/template/template.go index c89752f5299..3f6c8f1b12b 100644 --- a/client/allocrunner/taskrunner/template/template.go +++ b/client/allocrunner/taskrunner/template/template.go @@ -385,7 +385,7 @@ func (tm *TaskTemplateManager) handleTemplateRerenders(allRenderedTime time.Time } func (tm *TaskTemplateManager) onTemplateRendered(handledRenders map[string]time.Time, allRenderedTime time.Time) { - // A template has been rendered, figure out what to do + var handling []string signals := make(map[string]struct{}) restart := false