Skip to content

Commit

Permalink
Apply retry logics in confidential computing API + workload image pul…
Browse files Browse the repository at this point in the history
…ler (#511)

* Apply retry logics in launcher

* remove retry in agent

---------

Co-authored-by: Jiankun Lü <[email protected]>
  • Loading branch information
yawangwang and jkl73 authored Dec 18, 2024
1 parent 82b45ad commit 545a4bc
Show file tree
Hide file tree
Showing 7 changed files with 410 additions and 92 deletions.
194 changes: 194 additions & 0 deletions go.work.sum

Large diffs are not rendered by default.

40 changes: 36 additions & 4 deletions launcher/container_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,12 @@ func getSignatureDiscoveryClient(cdClient *containerd.Client, mdsClient *metadat
return registryauth.RefreshResolver(ctx, mdsClient)
}
imageFetcher := func(ctx context.Context, imageRef string, opts ...containerd.RemoteOpt) (containerd.Image, error) {
image, err := cdClient.Pull(ctx, imageRef, opts...)
image, err := pullImageWithRetries(
func() (containerd.Image, error) {
return cdClient.Pull(ctx, imageRef, opts...)
},
pullImageBackoffPolicy,
)
if err != nil {
return nil, fmt.Errorf("cannot pull signature objects from the signature image [%s]: %w", imageRef, err)
}
Expand Down Expand Up @@ -529,6 +534,11 @@ func defaultRetryPolicy() *backoff.ExponentialBackOff {
return expBack
}

func pullImageBackoffPolicy() backoff.BackOff {
b := backoff.NewConstantBackOff(time.Millisecond * 500)
return backoff.WithMaxRetries(b, 3)
}

// Run the container
// Container output will always be redirected to logger writer for now
func (r *ContainerRunner) Run(ctx context.Context) error {
Expand Down Expand Up @@ -621,17 +631,39 @@ func (r *ContainerRunner) Run(ctx context.Context) error {
return nil
}

func pullImageWithRetries(f func() (containerd.Image, error), retry func() backoff.BackOff) (containerd.Image, error) {
var err error
var image containerd.Image
err = backoff.Retry(func() error {
image, err = f()
return err
}, retry())
if err != nil {
return nil, fmt.Errorf("failed to pull image with retries, the last error is: %w", err)
}
return image, nil
}

func initImage(ctx context.Context, cdClient *containerd.Client, launchSpec spec.LaunchSpec, token oauth2.Token) (containerd.Image, error) {
if token.Valid() {
remoteOpt := containerd.WithResolver(registryauth.Resolver(token.AccessToken))

image, err := cdClient.Pull(ctx, launchSpec.ImageRef, containerd.WithPullUnpack, remoteOpt)
image, err := pullImageWithRetries(
func() (containerd.Image, error) {
return cdClient.Pull(ctx, launchSpec.ImageRef, containerd.WithPullUnpack, remoteOpt)
},
pullImageBackoffPolicy,
)
if err != nil {
return nil, fmt.Errorf("cannot pull the image: %w", err)
}
return image, nil
}
image, err := cdClient.Pull(ctx, launchSpec.ImageRef, containerd.WithPullUnpack)
image, err := pullImageWithRetries(
func() (containerd.Image, error) {
return cdClient.Pull(ctx, launchSpec.ImageRef, containerd.WithPullUnpack)
},
pullImageBackoffPolicy,
)
if err != nil {
return nil, fmt.Errorf("cannot pull the image (no token, only works for a public image): %w", err)
}
Expand Down
51 changes: 51 additions & 0 deletions launcher/container_runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,57 @@ func TestMeasureCELEvents(t *testing.T) {
}
}

func TestPullImageWithRetries(t *testing.T) {
testCases := []struct {
name string
imagePuller func(int) (containerd.Image, error)
wantPass bool
}{
{
name: "success with single attempt",
imagePuller: func(int) (containerd.Image, error) { return &fakeImage{}, nil },
wantPass: true,
},
{
name: "failure then success",
imagePuller: func(attempts int) (containerd.Image, error) {
if attempts%2 == 1 {
return nil, errors.New("fake error")
}
return &fakeImage{}, nil
},
wantPass: true,
},
{
name: "failure with attempts exceeded",
imagePuller: func(int) (containerd.Image, error) {
return nil, errors.New("fake error")
},
wantPass: false,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
retryPolicy := func() backoff.BackOff {
b := backoff.NewExponentialBackOff()
return backoff.WithMaxRetries(b, 2)
}

attempts := 0
_, err := pullImageWithRetries(
func() (containerd.Image, error) {
attempts++
return tc.imagePuller(attempts)
},
retryPolicy)
if gotPass := (err == nil); gotPass != tc.wantPass {
t.Errorf("pullImageWithRetries failed, got %v, but want %v", gotPass, tc.wantPass)
}
})
}
}

// This ensures fakeContainer implements containerd.Container interface.
var _ containerd.Container = &fakeContainer{}

Expand Down
44 changes: 22 additions & 22 deletions launcher/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ module github.com/google/go-tpm-tools/launcher
go 1.21

require (
cloud.google.com/go/compute/metadata v0.5.0
cloud.google.com/go/logging v1.10.0
cloud.google.com/go/compute/metadata v0.5.2
cloud.google.com/go/logging v1.12.0
github.com/cenkalti/backoff/v4 v4.2.1
github.com/containerd/containerd v1.7.16
github.com/coreos/go-systemd/v22 v22.5.0
Expand All @@ -16,18 +16,18 @@ require (
github.com/opencontainers/go-digest v1.0.0
github.com/opencontainers/image-spec v1.1.0
github.com/opencontainers/runtime-spec v1.1.0
golang.org/x/oauth2 v0.21.0
google.golang.org/api v0.189.0
google.golang.org/genproto/googleapis/api v0.0.0-20240722135656-d784300faade
google.golang.org/protobuf v1.34.2
golang.org/x/oauth2 v0.23.0
google.golang.org/api v0.205.0
google.golang.org/genproto/googleapis/api v0.0.0-20241015192408-796eee8c2d53
google.golang.org/protobuf v1.35.1
)

require (
cloud.google.com/go v0.115.0 // indirect
cloud.google.com/go/auth v0.7.2 // indirect
cloud.google.com/go/auth/oauth2adapt v0.2.3 // indirect
cloud.google.com/go v0.116.0 // indirect
cloud.google.com/go/auth v0.10.1 // indirect
cloud.google.com/go/auth/oauth2adapt v0.2.5 // indirect
cloud.google.com/go/confidentialcomputing v1.6.0 // indirect
cloud.google.com/go/longrunning v0.5.9 // indirect
cloud.google.com/go/longrunning v0.6.1 // indirect
github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect
github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0 // indirect
github.com/Microsoft/go-winio v0.6.1 // indirect
Expand Down Expand Up @@ -55,9 +55,9 @@ require (
github.com/google/go-tdx-guest v0.3.2-0.20241009005452-097ee70d0843 // indirect
github.com/google/go-tspi v0.3.0 // indirect
github.com/google/logger v1.1.1 // indirect
github.com/google/s2a-go v0.1.7 // indirect
github.com/google/s2a-go v0.1.8 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
github.com/googleapis/gax-go/v2 v2.13.0 // indirect
github.com/klauspost/compress v1.16.7 // indirect
github.com/moby/locker v1.0.1 // indirect
Expand All @@ -69,24 +69,24 @@ require (
github.com/pkg/errors v0.9.1 // indirect
github.com/sirupsen/logrus v1.9.3 // indirect
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect
go.opentelemetry.io/otel v1.24.0 // indirect
go.opentelemetry.io/otel/metric v1.24.0 // indirect
go.opentelemetry.io/otel/trace v1.24.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect
go.opentelemetry.io/otel v1.29.0 // indirect
go.opentelemetry.io/otel/metric v1.29.0 // indirect
go.opentelemetry.io/otel/trace v1.29.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/crypto v0.31.0 // indirect
golang.org/x/exp v0.0.0-20240531132922-fd00a4e0eefc // indirect
golang.org/x/mod v0.17.0 // indirect
golang.org/x/net v0.27.0 // indirect
golang.org/x/net v0.30.0 // indirect
golang.org/x/sync v0.10.0 // indirect
golang.org/x/sys v0.28.0 // indirect
golang.org/x/text v0.21.0 // indirect
golang.org/x/time v0.5.0 // indirect
golang.org/x/time v0.7.0 // indirect
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
google.golang.org/genproto v0.0.0-20240722135656-d784300faade // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240722135656-d784300faade // indirect
google.golang.org/grpc v1.64.1 // indirect
google.golang.org/genproto v0.0.0-20241021214115-324edc3d5d38 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 // indirect
google.golang.org/grpc v1.67.1 // indirect
)

replace (
Expand Down
Loading

0 comments on commit 545a4bc

Please sign in to comment.