Skip to content

Commit

Permalink
chore: generate support zip for crashdump
Browse files Browse the repository at this point in the history
Generate support zip on crashdump.

Signed-off-by: Noel Georgi <[email protected]>
  • Loading branch information
frezbo committed Nov 6, 2024
1 parent a867f85 commit 5112547
Show file tree
Hide file tree
Showing 10 changed files with 131 additions and 121 deletions.
2 changes: 0 additions & 2 deletions internal/integration/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,6 @@ func TestIntegration(t *testing.T) {
}

if t.Failed() && crashdumpEnabled && cluster != nil && provisioner != nil {
// if provisioner & cluster are available,
// debugging failed test is easier with crashdump
provisioner.CrashDump(context.Background(), cluster, os.Stderr)
}
}
Expand Down
4 changes: 0 additions & 4 deletions internal/integration/provision/provision.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,6 @@ func (suite *BaseSuite) TearDownSuite() {
// for failed tests, produce crash dump for easier debugging,
// as cluster is going to be torn down below
suite.provisioner.CrashDump(suite.ctx, suite.Cluster, os.Stderr)

if suite.clusterAccess != nil {
suite.clusterAccess.CrashDump(suite.ctx, os.Stderr)
}
}

if suite.clusterAccess != nil {
Expand Down
130 changes: 72 additions & 58 deletions pkg/cluster/crashdump.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,88 +8,102 @@ import (
"context"
"fmt"
"io"
"strings"
"time"
"os"
"path/filepath"

"github.com/siderolabs/gen/xslices"
"github.com/siderolabs/go-talos-support/support"
"github.com/siderolabs/go-talos-support/support/bundle"
"github.com/siderolabs/go-talos-support/support/collectors"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
k8s "k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"

"github.com/siderolabs/talos/pkg/machinery/api/common"
"github.com/siderolabs/talos/pkg/machinery/client"
"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/provision"
)

// APICrashDumper collects crash dump via Talos API.
type APICrashDumper struct {
ClientProvider
Info
}
// Crashdump creates a support.zip for the cluster.
func Crashdump(ctx context.Context, cluster provision.Cluster, out io.Writer) {
statePath, err := cluster.StatePath()
if err != nil {
fmt.Fprintf(out, "error getting state path: %s\n", err)

// DefaultServiceLogTailLines specifies number of log lines to tail from each service.
const DefaultServiceLogTailLines = 100
return
}

// LogLinesPerService customizes defaults for specific services.
var LogLinesPerService = map[string]int32{
"etcd": 5000,
}
supportZip := filepath.Join(statePath, "support.zip")

// CrashDump produces information to help with debugging.
//
// CrashDump implements CrashDumper interface.
func (s *APICrashDumper) CrashDump(ctx context.Context, out io.Writer) {
cli, err := s.Client()
supportFile, err := os.Create(supportZip)
if err != nil {
fmt.Fprintf(out, "error creating crashdump: %s\n", err)
fmt.Fprintf(out, "error creating crashdump file: %s\n", err)

return
}

nodes := s.Nodes()
defer supportFile.Close() //nolint:errcheck

for _, node := range nodes {
func(node NodeInfo) {
nodeIP := node.InternalIP.String()
c, err := client.New(ctx, client.WithDefaultConfig())
if err != nil {
fmt.Fprintf(out, "error creating crashdump: %s\n", err)
}

nodeCtx, nodeCtxCancel := context.WithTimeout(client.WithNodes(ctx, nodeIP), 30*time.Second)
defer nodeCtxCancel()
nodes := xslices.Map(cluster.Info().Nodes, func(nodeInfo provision.NodeInfo) string {
return nodeInfo.IPs[0].String()
})

fmt.Fprintf(out, "\n%s\n%s\n\n", node, strings.Repeat("=", len(nodeIP)))
controlplane := nodes[0]

services, err := cli.ServiceList(nodeCtx)
if err != nil {
fmt.Fprintf(out, "error getting services: %s\n", err)
opts := []bundle.Option{
bundle.WithArchiveOutput(supportFile),
bundle.WithTalosClient(c),
bundle.WithNodes(nodes...),
bundle.WithNumWorkers(1),
}

return
}
kubeclient, err := getKubernetesClient(ctx, c, controlplane)
if err == nil {
opts = append(opts, bundle.WithKubernetesClient(kubeclient))
}

for _, msg := range services.Messages {
for _, svc := range msg.Services {
logLines, ok := LogLinesPerService[svc.Id]
if !ok {
logLines = DefaultServiceLogTailLines
}
options := bundle.NewOptions(opts...)

stream, err := cli.Logs(nodeCtx, constants.SystemContainerdNamespace, common.ContainerDriver_CONTAINERD, svc.Id, false, logLines)
if err != nil {
fmt.Fprintf(out, "error getting service logs for %s: %s\n", svc.Id, err)
collectors, err := collectors.GetForOptions(ctx, options)
if err != nil {
fmt.Fprintf(out, "error creating crashdump collector options: %s\n", err)
}

continue
}
if err := support.CreateSupportBundle(ctx, options, collectors...); err != nil {
fmt.Fprintf(out, "error creating crashdump: %s\n", err)
}
}

r, err := client.ReadStream(stream)
if err != nil {
fmt.Fprintf(out, "error getting service logs for %s: %s\n", svc.Id, err)
func getKubernetesClient(ctx context.Context, c *client.Client, endpoint string) (*k8s.Clientset, error) {
kubeconfig, err := c.Kubeconfig(client.WithNodes(ctx, endpoint))
if err != nil {
return nil, err
}

continue
}
config, err := clientcmd.NewClientConfigFromBytes(kubeconfig)
if err != nil {
return nil, err
}

fmt.Fprintf(out, "\n> %s\n%s\n\n", svc.Id, strings.Repeat("-", len(svc.Id)+2))
restconfig, err := config.ClientConfig()
if err != nil {
return nil, err
}

_, err = io.Copy(out, r)
if err != nil {
fmt.Fprintf(out, "error streaming service logs: %s\n", err)
}
clientset, err := k8s.NewForConfig(restconfig)
if err != nil {
return nil, err
}

r.Close() //nolint:errcheck
}
}
}(node)
// just checking that k8s responds
_, err = clientset.CoreV1().Namespaces().Get(ctx, "kube-system", v1.GetOptions{})
if err != nil {
return nil, err
}

return clientset, nil
}
5 changes: 0 additions & 5 deletions pkg/provision/access/adapter.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
type Adapter struct {
cluster.ConfigClientProvider
cluster.KubernetesClient
cluster.APICrashDumper
cluster.APIBootstrapper
cluster.Info
cluster.ApplyConfigClient
Expand Down Expand Up @@ -73,10 +72,6 @@ func NewAdapter(clusterInfo provision.Cluster, opts ...provision.Option) *Adapte
ClientProvider: &configProvider,
ForceEndpoint: options.KubernetesEndpoint,
},
APICrashDumper: cluster.APICrashDumper{
ClientProvider: &configProvider,
Info: infoW,
},
APIBootstrapper: cluster.APIBootstrapper{
ClientProvider: &configProvider,
Info: infoW,
Expand Down
31 changes: 28 additions & 3 deletions pkg/provision/providers/docker/crashdump.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@
package docker

import (
"bytes"
"context"
"fmt"
"io"
"strings"
"os"
"path/filepath"

"github.com/docker/docker/api/types/container"

cl "github.com/siderolabs/talos/pkg/cluster"
"github.com/siderolabs/talos/pkg/provision"
)

Expand All @@ -24,9 +27,15 @@ func (p *provisioner) CrashDump(ctx context.Context, cluster provision.Cluster,
return
}

statePath, err := cluster.StatePath()
if err != nil {
fmt.Fprintf(out, "error getting state path: %s\n", err)

return
}

for _, ctr := range containers {
name := ctr.Names[0][1:]
fmt.Fprintf(out, "%s\n%s\n\n", name, strings.Repeat("=", len(name)))

logs, err := p.client.ContainerLogs(ctx, ctr.ID, container.LogsOptions{
ShowStdout: true,
Expand All @@ -39,6 +48,22 @@ func (p *provisioner) CrashDump(ctx context.Context, cluster provision.Cluster,
continue
}

_, _ = io.Copy(out, logs) //nolint:errcheck
logPath := filepath.Join(statePath, fmt.Sprintf("%s.log", name))

var logData bytes.Buffer

if _, err := io.Copy(&logData, logs); err != nil {
fmt.Fprintf(out, "error reading container logs: %s\n", err)

continue
}

if err := os.WriteFile(logPath, logData.Bytes(), 0o644); err != nil {
fmt.Fprintf(out, "error writing container logs: %s\n", err)

continue
}
}

cl.Crashdump(ctx, cluster, out)
}
11 changes: 11 additions & 0 deletions pkg/provision/providers/docker/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ package docker
import (
"context"
"fmt"
"os"
"path/filepath"

"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/provision"
Expand All @@ -24,6 +26,14 @@ func (p *provisioner) Create(ctx context.Context, request provision.ClusterReque
}
}

statePath := filepath.Join(request.StateDirectory, request.Name)

fmt.Fprintf(options.LogWriter, "creating state directory in %q\n", statePath)

if err := os.MkdirAll(statePath, 0o755); err != nil {
return nil, fmt.Errorf("unable to create state directory: %w", err)
}

if err = p.ensureImageExists(ctx, request.Image, &options); err != nil {
return nil, err
}
Expand Down Expand Up @@ -64,6 +74,7 @@ func (p *provisioner) Create(ctx context.Context, request provision.ClusterReque
Nodes: nodeInfo,
KubernetesEndpoint: p.GetExternalKubernetesControlPlaneEndpoint(request.Network, constants.DefaultControlPlanePort),
},
statePath: statePath,
}

return res, nil
Expand Down
11 changes: 10 additions & 1 deletion pkg/provision/providers/docker/destroy.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,14 @@ func (p *provisioner) Destroy(ctx context.Context, cluster provision.Cluster, op

fmt.Fprintln(os.Stderr, "destroying network", cluster.Info().Network.Name)

return p.destroyNetwork(ctx, cluster.Info().Network.Name)
if err := p.destroyNetwork(ctx, cluster.Info().Network.Name); err != nil {
return err
}

statePath, err := cluster.StatePath()
if err != nil {
return err
}

return os.RemoveAll(statePath)
}
1 change: 1 addition & 0 deletions pkg/provision/providers/docker/reflect.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ func (p *provisioner) Reflect(ctx context.Context, clusterName, stateDirectory s
clusterInfo: provision.ClusterInfo{
ClusterName: clusterName,
},
statePath: stateDirectory,
}

// find network assuming network name == cluster name
Expand Down
8 changes: 7 additions & 1 deletion pkg/provision/providers/docker/result.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import (

type result struct {
clusterInfo provision.ClusterInfo

statePath string
}

func (res *result) Provisioner() string {
Expand All @@ -23,5 +25,9 @@ func (res *result) Info() provision.ClusterInfo {
}

func (res *result) StatePath() (string, error) {
return "", errors.New("state path is not used for docker provisioner")
if res.statePath == "" {
return "", errors.New("state path is not used for docker provisioner")
}

return res.statePath, nil
}
49 changes: 2 additions & 47 deletions pkg/provision/providers/vm/crashdump.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,58 +6,13 @@ package vm

import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"strings"

"github.com/siderolabs/go-tail"

cl "github.com/siderolabs/talos/pkg/cluster"
"github.com/siderolabs/talos/pkg/provision"
)

// CrashDump produces debug information to help with debugging failures.
func (p *Provisioner) CrashDump(ctx context.Context, cluster provision.Cluster, out io.Writer) {
state, ok := cluster.(*State)
if !ok {
fmt.Fprintf(out, "error inspecting firecracker state, %#+v\n", cluster)

return
}

statePath, err := state.StatePath()
if err != nil {
fmt.Fprintf(out, "error getting cluster state path: %s", err)

return
}

logFiles, err := filepath.Glob(filepath.Join(statePath, "*.log"))
if err != nil {
fmt.Fprintf(out, "error finding log paths: %s\n", err)

return
}

for _, logFile := range logFiles {
name := filepath.Base(logFile)

fmt.Fprintf(out, "%s\n%s\n\n", name, strings.Repeat("=", len(name)))

f, err := os.Open(logFile)
if err != nil {
fmt.Fprintf(out, "error opening file: %s\n", err)

continue
}

if err = tail.SeekLines(f, 5000); err != nil {
fmt.Fprintf(out, "error seeking to the tail: %s\n", err)
}

_, _ = io.Copy(out, f) //nolint:errcheck

f.Close() //nolint:errcheck
}
cl.Crashdump(ctx, cluster, out)
}

0 comments on commit 5112547

Please sign in to comment.