Skip to content
This repository has been archived by the owner on Jan 4, 2022. It is now read-only.

Commit

Permalink
cmd/kube-spawn: retry machinectl poweroff 5 times at max
Browse files Browse the repository at this point in the history
If machinectl poweroff fails, retry 5 times at max.
When it still fails, gracefully fall back to machinectl terminate.

Also fix a bug in removeImage().
If machinectl.RemoveImage succeeds, it returns nil.
  • Loading branch information
Dongsu Park committed Oct 25, 2017
1 parent cf06f96 commit 7f6b7ef
Showing 1 changed file with 35 additions and 17 deletions.
52 changes: 35 additions & 17 deletions cmd/kube-spawn/stop.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,21 +73,8 @@ func stopMachines(cfg *config.ClusterConfiguration, force bool) {
for i := 0; i < len(cfg.Machines); i++ {
go func(i int) {
defer wg.Done()
if !force {
// graceful stop
if err := machinetool.Poweroff(cfg.Machines[i].Name); err != nil {
if !machinetool.IsNotKnown(err) {
log.Print(errors.Wrapf(err, "error powering off machine %q, maybe try with `kube-spawn stop -f`", cfg.Machines[i].Name))
return
}
}
} else {
if err := machinetool.Terminate(cfg.Machines[i].Name); err != nil {
if !machinetool.IsNotKnown(err) {
log.Print(errors.Wrapf(err, "error terminating machine %q", cfg.Machines[i].Name))
return
}
}
if err := doGracefulStop(cfg.Machines[i].Name, force); err != nil {
return
}
cfg.Machines[i].Running = false
cfg.Machines[i].IP = ""
Expand All @@ -97,6 +84,36 @@ func stopMachines(cfg *config.ClusterConfiguration, force bool) {
wg.Wait()
}

func doGracefulStop(machineName string, force bool) error {
if !force {
for retries := 0; retries < 5; retries++ {
// graceful stop
if err := machinetool.Poweroff(machineName); err != nil {
if !machinetool.IsNotKnown(err) {
log.Print(errors.Wrapf(err, "error powering off machine %q, maybe try with `kube-spawn stop -f`", machineName))
return err
}
time.Sleep(500 * time.Millisecond)
continue
}
return nil
}
log.Printf("Tried to stop %s 5 times, but it didn't work, terminating.", machineName)
// fall back to force shutdown
}

// Either it's force mode from the beginning,
// or it's a fallback from a retry loop of a graceful stop.
if err := machinetool.Terminate(machineName); err != nil {
if !machinetool.IsNotKnown(err) {
log.Print(errors.Wrapf(err, "error terminating machine %q", machineName))
return err
}
}

return nil
}

func removeImages(cfg *config.ClusterConfiguration) {
var wg sync.WaitGroup
wg.Add(len(cfg.Machines))
Expand All @@ -118,9 +135,10 @@ func removeImage(machineName string) error {
var err error
for retries := 0; retries < 5; retries++ {
if err = machinetool.RemoveImage(machineName); err != nil {
return nil
} else {
time.Sleep(500 * time.Millisecond)
continue
} else {
return nil
}
}
return errors.Wrapf(err, "error removing machine image for %q", machineName)
Expand Down

0 comments on commit 7f6b7ef

Please sign in to comment.