-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathmonitor.go
256 lines (224 loc) · 7.57 KB
/
monitor.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package crashmonitor
// This file defines a monitor that reports arbitrary Go runtime
// crashes to telemetry.
import (
"bytes"
"fmt"
"io"
"log"
"os"
"reflect"
"runtime/debug"
"strconv"
"strings"
"golang.org/x/telemetry/internal/counter"
)
// Supported reports whether the runtime supports [runtime.SetCrashOutput].
//
// TODO(adonovan): eliminate once go1.23+ is assured.
func Supported() bool { return setCrashOutput != nil }
var setCrashOutput func(*os.File) error // = runtime.SetCrashOutput on go1.23+
// Parent sets up the parent side of the crashmonitor. It requires
// exclusive use of a writable pipe connected to the child process's stdin.
func Parent(pipe *os.File) {
writeSentinel(pipe)
// Ensure that we get pc=0x%x values in the traceback.
debug.SetTraceback("system")
setCrashOutput(pipe)
}
// Child runs the part of the crashmonitor that runs in the child process.
// It expects its stdin to be connected via a pipe to the parent which has
// run Parent.
func Child() {
// Wait for parent process's dying gasp.
// If the parent dies for any reason this read will return.
data, err := io.ReadAll(os.Stdin)
if err != nil {
log.Fatalf("failed to read from input pipe: %v", err)
}
// If the only line is the sentinel, it wasn't a crash.
if bytes.Count(data, []byte("\n")) < 2 {
childExitHook()
os.Exit(0) // parent exited without crash report
}
log.Printf("parent reported crash:\n%s", data)
// Parse the stack out of the crash report
// and record a telemetry count for it.
name, err := telemetryCounterName(data)
if err != nil {
// Keep count of how often this happens
// so that we can investigate if necessary.
incrementCounter("crash/malformed")
// Something went wrong.
// Save the crash securely in the file system.
f, err := os.CreateTemp(os.TempDir(), "*.crash")
if err != nil {
log.Fatal(err)
}
if _, err := f.Write(data); err != nil {
log.Fatal(err)
}
if err := f.Close(); err != nil {
log.Fatal(err)
}
log.Printf("failed to report crash to telemetry: %v", err)
log.Fatalf("crash report saved at %s", f.Name())
}
incrementCounter(name)
childExitHook()
log.Fatalf("telemetry crash recorded")
}
// (stubbed by test)
var (
incrementCounter = func(name string) { counter.New(name).Inc() }
childExitHook = func() {}
)
// The sentinel function returns its address. The difference between
// this value as observed by calls in two different processes of the
// same executable tells us the relative offset of their text segments.
//
// It would be nice if SetCrashOutput took care of this as it's fiddly
// and likely to confuse every user at first.
func sentinel() uint64 {
return uint64(reflect.ValueOf(sentinel).Pointer())
}
func writeSentinel(out io.Writer) {
fmt.Fprintf(out, "sentinel %x\n", sentinel())
}
// telemetryCounterName parses a crash report produced by the Go
// runtime, extracts the stack of the first runnable goroutine,
// converts each line into telemetry form ("symbol:relative-line"),
// and returns this as the name of a counter.
func telemetryCounterName(crash []byte) (string, error) {
pcs, err := parseStackPCs(string(crash))
if err != nil {
return "", err
}
// Limit the number of frames we request.
pcs = pcs[:min(len(pcs), 16)]
if len(pcs) == 0 {
// This can occur if all goroutines are idle, as when
// caught in a deadlock, or killed by an async signal
// while blocked.
//
// TODO(adonovan): consider how to report such
// situations. Reporting a goroutine in [sleep] or
// [select] state could be quite confusing without
// further information about the nature of the crash,
// as the problem is not local to the code location.
//
// For now, we keep count of this situation so that we
// can access whether it needs a more involved solution.
return "crash/no-running-goroutine", nil
}
// This string appears at the start of all
// crashmonitor-generated counter names.
//
// It is tempting to expose this as a parameter of Start, but
// it is not without risk. What value should most programs
// provide? There's no point giving the name of the executable
// as this is already recorded by telemetry. What if the
// application runs in multiple modes? Then it might be useful
// to record the mode. The problem is that an application with
// multiple modes probably doesn't know its mode by line 1 of
// main.main: it might require flag or argument parsing, or
// even validation of an environment variable, and we really
// want to steer users aware from any logic before Start. The
// flags and arguments will be wrong in the child process, and
// every extra conditional branch creates a risk that the
// recursively executed child program will behave not like the
// monitor but like the application. If the child process
// exits before calling Start, then the parent application
// will not have a monitor, and its crash reports will be
// discarded (written in to a pipe that is never read).
//
// So for now, we use this constant string.
const prefix = "crash/crash"
return counter.EncodeStack(pcs, prefix), nil
}
// parseStackPCs parses the parent process's program counters for the
// first running goroutine out of a GOTRACEBACK=system traceback,
// adjusting them so that they are valid for the child process's text
// segment.
//
// This function returns only program counter values, ensuring that
// there is no possibility of strings from the crash report (which may
// contain PII) leaking into the telemetry system.
func parseStackPCs(crash string) ([]uintptr, error) {
// getPC parses the PC out of a line of the form:
// \tFILE:LINE +0xRELPC sp=... fp=... pc=...
getPC := func(line string) (uint64, error) {
_, pcstr, ok := strings.Cut(line, " pc=") // e.g. pc=0x%x
if !ok {
return 0, fmt.Errorf("no pc= for stack frame: %s", line)
}
return strconv.ParseUint(pcstr, 0, 64) // 0 => allow 0x prefix
}
var (
pcs []uintptr
parentSentinel uint64
childSentinel = sentinel()
on = false // are we in the first running goroutine?
lines = strings.Split(crash, "\n")
)
for i := 0; i < len(lines); i++ {
line := lines[i]
// Read sentinel value.
if parentSentinel == 0 && strings.HasPrefix(line, "sentinel ") {
_, err := fmt.Sscanf(line, "sentinel %x", &parentSentinel)
if err != nil {
return nil, fmt.Errorf("can't read sentinel line")
}
continue
}
// Search for "goroutine GID [STATUS]"
if !on {
if strings.HasPrefix(line, "goroutine ") &&
strings.Contains(line, " [running]:") {
on = true
if parentSentinel == 0 {
return nil, fmt.Errorf("no sentinel value in crash report")
}
}
continue
}
// A blank line marks end of a goroutine stack.
if line == "" {
break
}
// Skip the final "created by SYMBOL in goroutine GID" part.
if strings.HasPrefix(line, "created by ") {
break
}
// Expect a pair of lines:
// SYMBOL(ARGS)
// \tFILE:LINE +0xRELPC sp=0x%x fp=0x%x pc=0x%x
// Note: SYMBOL may contain parens "pkg.(*T).method"
// The RELPC is sometimes missing.
// Skip the symbol(args) line.
i++
if i == len(lines) {
break
}
line = lines[i]
// Parse the PC, and correct for the parent and child's
// different mappings of the text section.
pc, err := getPC(line)
if err != nil {
// Inlined frame, perhaps; skip it.
continue
}
pcs = append(pcs, uintptr(pc-parentSentinel+childSentinel))
}
return pcs, nil
}
func min(x, y int) int {
if x < y {
return x
} else {
return y
}
}