Skip to content

Commit

Permalink
fix: allow failed upgrade to be re-tried (#1179)
Browse files Browse the repository at this point in the history
* test: behavior when an upgrade fails

It seems that when an upgrade fails, attempting to upgrade again will just
skip the upgrade and report success. While potentially problematic, this
has been the behavior for a long time. This test captures the existing
behavior, and could be used to test-drive a fix.

* fix: allow failed upgrade to be re-tried

Previously when an upgrade stage failed, we recorded the new (higher)
TF version in the state anyway, so if the upgrade were retried, it
would be skipped because it was already deemed to be at this version.

Now we first do an upgrade/apply at the current version to account for
potentially failed upgrades. For successful upgrades this should be a
no-op as TF is idempotent, and it's cleaner to use idempotency rather than
keep track of success/failure ourselves.
  • Loading branch information
blgm authored Jan 23, 2025
1 parent 173f019 commit ebbe80d
Show file tree
Hide file tree
Showing 11 changed files with 359 additions and 23 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
terraform {
required_providers {
random = {
source = "registry.terraform.io/hashicorp/random"
}
}
}

resource "random_integer" "priority" {
min = 3
max = 4
}

output "provision_output" {
value = random_integer.priority.result
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
terraform {
required_providers {
random = {
source = "registry.terraform.io/hashicorp/random"
}
}
}

variable "max" {type = number}

resource "random_integer" "priority" {
min = 3
max = var.max - 4
}

output "provision_output" {
value = random_integer.priority.result
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
version: 1
name: fake-service
id: 10557d15-dd47-40e6-ab4f-53fbe81e3022
description: description
display_name: Fake
image_url: https://example.com/icon.jpg
documentation_url: https://example.com
support_url: https://example.com/support.html
plans:
- name: first
id: e328fdae-d97c-43b1-a1a7-0f8e961d1d0c
description: First plan
display_name: First
provision:
template_refs:
main: fake-provision.tf
user_inputs:
- field_name: max
type: number
details: provision input
outputs:
- field_name: provision_output
type: integer
details: provision output
bind:
template_refs:
main: fake-bind.tf
outputs:
- field_name: provision_output
type: integer
details: provision output
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
packversion: 1
name: fake-brokerpak
version: 0.1.0
metadata:
author: [email protected]
platforms:
- os: linux
arch: amd64
- os: darwin
arch: amd64
terraform_binaries:
- name: tofu
version: 1.6.2
source: https://github.com/opentofu/opentofu/archive/refs/tags/v1.6.2.zip
default: true
- name: terraform-provider-random
version: 3.5.1
terraform_upgrade_path:
- version: 1.6.2
service_definitions:
- fake-service.yml
16 changes: 16 additions & 0 deletions integrationtest/fixtures/terraform-upgrade-failure/fake-bind.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
terraform {
required_providers {
random = {
source = "registry.terraform.io/hashicorp/random"
}
}
}

resource "random_integer" "priority" {
min = 1
max = 2
}

output "provision_output" {
value = random_integer.priority.result
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
terraform {
required_providers {
random = {
source = "registry.terraform.io/hashicorp/random"
}
}
}

variable "max" {type = number}

resource "random_integer" "priority" {
min = 1
max = var.max
}

output "provision_output" {
value = random_integer.priority.result
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
version: 1
name: fake-service
id: 10557d15-dd47-40e6-ab4f-53fbe81e3022
description: description
display_name: Fake
image_url: https://example.com/icon.jpg
documentation_url: https://example.com
support_url: https://example.com/support.html
plans:
- name: first
id: e328fdae-d97c-43b1-a1a7-0f8e961d1d0c
description: First plan
display_name: First
provision:
template_refs:
main: fake-provision.tf
user_inputs:
- field_name: max
type: number
details: provision input
outputs:
- field_name: provision_output
type: integer
details: provision output
bind:
template_refs:
main: fake-bind.tf
outputs:
- field_name: provision_output
type: integer
details: provision output
18 changes: 18 additions & 0 deletions integrationtest/fixtures/terraform-upgrade-failure/manifest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
packversion: 1
name: fake-brokerpak
version: 0.1.0
metadata:
author: [email protected]
platforms:
- os: linux
arch: amd64
- os: darwin
arch: amd64
terraform_binaries:
- name: tofu
version: 1.6.0
source: https://github.com/opentofu/opentofu/archive/refs/tags/v1.6.0.zip
- name: terraform-provider-random
version: 3.5.1
service_definitions:
- fake-service.yml
127 changes: 127 additions & 0 deletions integrationtest/terraform_upgrade_failure_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
package integrationtest_test

import (
"encoding/json"
"fmt"

"github.com/cloudfoundry/cloud-service-broker/v2/dbservice/models"
"github.com/pivotal-cf/brokerapi/v12/domain"

"github.com/cloudfoundry/cloud-service-broker/v2/integrationtest/packer"
"github.com/cloudfoundry/cloud-service-broker/v2/internal/testdrive"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

var _ = Describe("TF Upgrade Failure", func() {
const (
serviceOfferingGUID = "10557d15-dd47-40e6-ab4f-53fbe81e3022"
servicePlanGUID = "e328fdae-d97c-43b1-a1a7-0f8e961d1d0c"
startingVersion = "1.6.0"
endingVersion = "1.6.2"
)

var (
brokerpak string
broker *testdrive.Broker
)

tfStateOutputValue := func(deploymentID, outputName string) any {
var tfDeploymentReceiver models.TerraformDeployment
Expect(dbConn.Where("id = ?", deploymentID).First(&tfDeploymentReceiver).Error).NotTo(HaveOccurred())
var workspaceReceiver struct {
State []byte `json:"tfstate"`
}
Expect(json.Unmarshal(tfDeploymentReceiver.Workspace, &workspaceReceiver)).NotTo(HaveOccurred())
var stateReceiver struct {
Outputs map[string]struct {
Type string `json:"type"`
Value any `json:"value"`
} `json:"outputs"`
}
Expect(json.Unmarshal(workspaceReceiver.State, &stateReceiver)).NotTo(HaveOccurred())
Expect(stateReceiver.Outputs).To(HaveKey(outputName), "could not find output with this name")
return stateReceiver.Outputs[outputName].Value
}

instanceTFStateVersion := func(serviceInstanceGUID string) string {
return terraformStateVersion(fmt.Sprintf("tf:%s:", serviceInstanceGUID))
}

bindingTFStateVersion := func(serviceInstanceGUID, bindingGUID string) string {
return terraformStateVersion(fmt.Sprintf("tf:%s:%s", serviceInstanceGUID, bindingGUID))
}

instanceTFStateOutputValue := func(serviceInstanceGUID string) int {
val := tfStateOutputValue(fmt.Sprintf("tf:%s:", serviceInstanceGUID), "provision_output")
return int(val.(float64))
}

bindingTFStateOutputValue := func(serviceInstanceGUID, bindingGUID string) int {
val := tfStateOutputValue(fmt.Sprintf("tf:%s:%s", serviceInstanceGUID, bindingGUID), "provision_output")
return int(val.(float64))
}

BeforeEach(func() {
brokerpak = must(packer.BuildBrokerpak(csb, fixtures("terraform-upgrade-failure")))
broker = must(testdrive.StartBroker(csb, brokerpak, database, testdrive.WithOutputs(GinkgoWriter, GinkgoWriter)))

DeferCleanup(func() {
Expect(broker.Stop()).To(Succeed())
cleanup(brokerpak)
})
})

It("can recover an upgrade after a failure", func() {
By("provisioning a service instance at 1.6.0")
serviceInstance := must(broker.Provision(serviceOfferingGUID, servicePlanGUID, testdrive.WithProvisionParams(map[string]any{"max": 2})))
Expect(instanceTFStateVersion(serviceInstance.GUID)).To(Equal(startingVersion))

By("creating a service binding")
binding := must(broker.CreateBinding(serviceInstance))

By("updating the brokerpak and restarting the broker")
Expect(broker.Stop()).To(Succeed())
must(packer.BuildBrokerpak(csb, fixtures("terraform-upgrade-failure-updated"), packer.WithDirectory(brokerpak)))

broker = must(testdrive.StartBroker(
csb, brokerpak, database,
testdrive.WithEnv("TERRAFORM_UPGRADES_ENABLED=true"),
testdrive.WithOutputs(GinkgoWriter, GinkgoWriter),
))

By("validating the service instance and binding are at the starting TF version")
Expect(instanceTFStateVersion(serviceInstance.GUID)).To(Equal(startingVersion))
Expect(bindingTFStateVersion(serviceInstance.GUID, binding.GUID)).To(Equal(startingVersion))

By("observing the instance and binding outputs are in the correct range")
Expect(instanceTFStateOutputValue(serviceInstance.GUID)).To(BeElementOf(1, 2))
Expect(bindingTFStateOutputValue(serviceInstance.GUID, binding.GUID)).To(BeElementOf(1, 2))

// This fails because it tries to generate a random number whose max value is lower than its min value
By("running 'cf upgrade-service' and getting a failure")
Expect(broker.UpgradeService(serviceInstance, endingVersion, testdrive.WithUpgradePreviousValues(domain.PreviousValues{PlanID: servicePlanGUID}))).To(MatchError("update failed with state: failed"))

By("observing that the instance TF state file has been updated to the latest version, but not the binding")
Expect(instanceTFStateVersion(serviceInstance.GUID)).To(Equal(endingVersion))
Expect(bindingTFStateVersion(serviceInstance.GUID, binding.GUID)).To(Equal(startingVersion))

By("observing the instance and binding outputs have not changed")
Expect(instanceTFStateOutputValue(serviceInstance.GUID)).To(BeElementOf(1, 2))
Expect(bindingTFStateOutputValue(serviceInstance.GUID, binding.GUID)).To(BeElementOf(1, 2))

By("hacking the database so that the next upgrade will succeed")
Expect(dbConn.Model(&models.ProvisionRequestDetails{}).Where("service_instance_id = ?", serviceInstance.GUID).Update("request_details", `{"max":8}`).Error).To(Succeed())

By("running 'cf upgrade-service' again and succeeding")
Expect(broker.UpgradeService(serviceInstance, endingVersion, testdrive.WithUpgradePreviousValues(domain.PreviousValues{PlanID: servicePlanGUID}))).To(Succeed())

By("observing that the instance TF state file has been updated to the latest version for both the instance and the binding")
Expect(instanceTFStateVersion(serviceInstance.GUID)).To(Equal(endingVersion))
Expect(bindingTFStateVersion(serviceInstance.GUID, binding.GUID)).To(Equal(endingVersion))

By("observing the instance and binding output are in the updated range")
Expect(instanceTFStateOutputValue(serviceInstance.GUID)).To(BeElementOf(3, 4))
Expect(bindingTFStateOutputValue(serviceInstance.GUID, binding.GUID)).To(BeElementOf(3, 4))
})
})
9 changes: 7 additions & 2 deletions pkg/providers/tf/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,19 @@ func (provider *TerraformProvider) performTerraformUpgrade(ctx context.Context,
return err
}

// Because an upgrade can fail, and when it fails we still record the higher TF version in the state, we
// need to perform the upgrade to the current TF version before performing the upgrade on higher versions.
// This allows failures to be re-tried. We could add code to try and keep track of the failures, but
// because TF is idempotent, it's cleaner to just run Apply at the current version, which will be a
// no-op for success cases.
if currentTfVersion.LessThan(version.Must(version.NewVersion("1.5.0"))) {
return errors.New("upgrade only supported for Terraform versions >= 1.5.0")
} else if currentTfVersion.LessThan(provider.tfBinContext.DefaultTfVersion) {
} else if currentTfVersion.LessThanOrEqual(provider.tfBinContext.DefaultTfVersion) {
if len(provider.tfBinContext.TfUpgradePath) == 0 {
return errors.New("tofu version mismatch and no upgrade path specified")
}
for _, targetTfVersion := range provider.tfBinContext.TfUpgradePath {
if currentTfVersion.LessThan(targetTfVersion) {
if currentTfVersion.LessThanOrEqual(targetTfVersion) {
err = provider.VersionedInvoker(targetTfVersion).Apply(ctx, workspace)
if err != nil {
return err
Expand Down
Loading

0 comments on commit ebbe80d

Please sign in to comment.