Skip to content

Commit

Permalink
runtime: synchronize StartTrace and StopTrace with sysmon
Browse files Browse the repository at this point in the history
Currently sysmon is not stopped when the world is stopped, which is
in general a difficult thing to do. The result of this is that when
tracing starts and the value of trace.enabled changes, it's possible
for sysmon to fail to emit an event when it really should. This leads to
traces which the execution trace parser deems inconsistent.

Fix this by putting all of sysmon's work behind a new lock sysmonlock.
StartTrace and StopTrace both acquire this lock after stopping the world
but before performing any work in order to ensure sysmon sees the
required state change in tracing. This change is expected to slow down
StartTrace and StopTrace, but will help ensure consistent traces are
generated.

Updates #29707.
Fixes #38794.

Change-Id: I64c58e7c3fd173cd5281ffc208d6db24ff6c0284
Reviewed-on: https://go-review.googlesource.com/c/go/+/234617
Run-TryBot: Michael Knyszek <[email protected]>
Run-TryBot: Ian Lance Taylor <[email protected]>
Reviewed-by: Ian Lance Taylor <[email protected]>
Reviewed-by: Hyang-Ah Hana Kim <[email protected]>
Reviewed-by: Michael Pratt <[email protected]>
  • Loading branch information
mknyszek committed May 21, 2020
1 parent 39ea0ea commit c847589
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 26 deletions.
41 changes: 22 additions & 19 deletions src/runtime/lockrank.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ const (
lockRankDummy lockRank = iota

// Locks held above sched
lockRankSysmon
lockRankScavenge
lockRankForcegc
lockRankSweepWaiters
Expand Down Expand Up @@ -113,6 +114,7 @@ const lockRankLeafRank lockRank = 1000
var lockNames = []string{
lockRankDummy: "",

lockRankSysmon: "sysmon",
lockRankScavenge: "scavenge",
lockRankForcegc: "forcegc",
lockRankSweepWaiters: "sweepWaiters",
Expand Down Expand Up @@ -194,48 +196,49 @@ func (rank lockRank) String() string {
// hchan lock can be held immediately above it when it is acquired.
var lockPartialOrder [][]lockRank = [][]lockRank{
lockRankDummy: {},
lockRankScavenge: {},
lockRankForcegc: {},
lockRankSysmon: {},
lockRankScavenge: {lockRankSysmon},
lockRankForcegc: {lockRankSysmon},
lockRankSweepWaiters: {},
lockRankAssistQueue: {},
lockRankCpuprof: {},
lockRankSweep: {},
lockRankSched: {lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep},
lockRankSched: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep},
lockRankDeadlock: {lockRankDeadlock},
lockRankPanic: {lockRankDeadlock},
lockRankAllg: {lockRankSched, lockRankPanic},
lockRankAllp: {lockRankSched},
lockRankAllg: {lockRankSysmon, lockRankSched, lockRankPanic},
lockRankAllp: {lockRankSysmon, lockRankSched},
lockRankPollDesc: {},
lockRankTimers: {lockRankScavenge, lockRankSched, lockRankAllp, lockRankPollDesc, lockRankTimers},
lockRankTimers: {lockRankSysmon, lockRankScavenge, lockRankSched, lockRankAllp, lockRankPollDesc, lockRankTimers},
lockRankItab: {},
lockRankReflectOffs: {lockRankItab},
lockRankHchan: {lockRankScavenge, lockRankSweep, lockRankHchan},
lockRankFin: {lockRankSched, lockRankAllg, lockRankTimers, lockRankHchan},
lockRankNotifyList: {},
lockRankTraceBuf: {lockRankScavenge},
lockRankTraceBuf: {lockRankSysmon, lockRankScavenge},
lockRankTraceStrings: {lockRankTraceBuf},
lockRankMspanSpecial: {lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings},
lockRankProf: {lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
lockRankGcBitsArenas: {lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
lockRankMspanSpecial: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings},
lockRankProf: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
lockRankGcBitsArenas: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
lockRankRoot: {},
lockRankTrace: {lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankSched, lockRankHchan, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot, lockRankSweep},
lockRankTrace: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankSched, lockRankHchan, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot, lockRankSweep},
lockRankTraceStackTab: {lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankAllg, lockRankTimers, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot, lockRankTrace},
lockRankNetpollInit: {lockRankTimers},

lockRankRwmutexW: {},
lockRankRwmutexR: {lockRankRwmutexW},

lockRankMcentral: {lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
lockRankSpine: {lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
lockRankSpanSetSpine: {lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
lockRankGscan: {lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot, lockRankNotifyList, lockRankProf, lockRankGcBitsArenas, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankMcentral, lockRankSpine, lockRankSpanSetSpine},
lockRankStackpool: {lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankRwmutexR, lockRankMcentral, lockRankSpine, lockRankSpanSetSpine, lockRankGscan},
lockRankStackLarge: {lockRankAssistQueue, lockRankSched, lockRankItab, lockRankHchan, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankMcentral, lockRankSpanSetSpine, lockRankGscan},
lockRankMcentral: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
lockRankSpine: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
lockRankSpanSetSpine: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
lockRankGscan: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot, lockRankNotifyList, lockRankProf, lockRankGcBitsArenas, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankMcentral, lockRankSpine, lockRankSpanSetSpine},
lockRankStackpool: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankRwmutexR, lockRankMcentral, lockRankSpine, lockRankSpanSetSpine, lockRankGscan},
lockRankStackLarge: {lockRankSysmon, lockRankAssistQueue, lockRankSched, lockRankItab, lockRankHchan, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankMcentral, lockRankSpanSetSpine, lockRankGscan},
lockRankDefer: {},
lockRankSudog: {lockRankNotifyList, lockRankHchan},
lockRankWbufSpans: {lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankAllg, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProf, lockRankRoot, lockRankGscan, lockRankDefer, lockRankSudog},
lockRankMheap: {lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan, lockRankMspanSpecial, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankMcentral, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans, lockRankSpanSetSpine},
lockRankMheapSpecial: {lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
lockRankMheap: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan, lockRankMspanSpecial, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankMcentral, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans, lockRankSpanSetSpine},
lockRankMheapSpecial: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
lockRankGlobalAlloc: {lockRankProf, lockRankSpine, lockRankSpanSetSpine, lockRankMheap, lockRankMheapSpecial},

lockRankGFree: {lockRankSched},
Expand Down
14 changes: 14 additions & 0 deletions src/runtime/proc.go
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,7 @@ func cpuinit() {
// The new G calls runtime·main.
func schedinit() {
lockInit(&sched.lock, lockRankSched)
lockInit(&sched.sysmonlock, lockRankSysmon)
lockInit(&sched.deferlock, lockRankDefer)
lockInit(&sched.sudoglock, lockRankSudog)
lockInit(&deadlock, lockRankDeadlock)
Expand Down Expand Up @@ -4613,6 +4614,18 @@ func sysmon() {
}
unlock(&sched.lock)
}
lock(&sched.sysmonlock)
{
// If we spent a long time blocked on sysmonlock
// then we want to update now and next since it's
// likely stale.
now1 := nanotime()
if now1-now > 50*1000 /* 50µs */ {
next, _ = timeSleepUntil()
}
now = now1
}

// trigger libc interceptors if needed
if *cgo_yield != nil {
asmcgocall(*cgo_yield, nil)
Expand Down Expand Up @@ -4665,6 +4678,7 @@ func sysmon() {
lasttrace = now
schedtrace(debug.scheddetail > 0)
}
unlock(&sched.sysmonlock)
}
}

Expand Down
20 changes: 13 additions & 7 deletions src/runtime/runtime2.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,9 +349,9 @@ type sudog struct {

g *g

next *sudog
prev *sudog
elem unsafe.Pointer // data element (may point to stack)
next *sudog
prev *sudog
elem unsafe.Pointer // data element (may point to stack)

// The following fields are never accessed concurrently.
// For channels, waitlink is only accessed by g.
Expand All @@ -366,10 +366,10 @@ type sudog struct {
// g.selectDone must be CAS'd to win the wake-up race.
isSelect bool

parent *sudog // semaRoot binary tree
waitlink *sudog // g.waiting list or semaRoot
waittail *sudog // semaRoot
c *hchan // channel
parent *sudog // semaRoot binary tree
waitlink *sudog // g.waiting list or semaRoot
waittail *sudog // semaRoot
c *hchan // channel
}

type libcall struct {
Expand Down Expand Up @@ -768,6 +768,12 @@ type schedt struct {

procresizetime int64 // nanotime() of last change to gomaxprocs
totaltime int64 // ∫gomaxprocs dt up to procresizetime

// sysmonlock protects sysmon's actions on the runtime.
//
// Acquire and hold this mutex to block sysmon from interacting
// with the rest of the runtime.
sysmonlock mutex
}

// Values for the flags field of a sigTabT.
Expand Down
12 changes: 12 additions & 0 deletions src/runtime/trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,9 @@ func StartTrace() error {
// paired with an end).
stopTheWorldGC("start tracing")

// Prevent sysmon from running any code that could generate events.
lock(&sched.sysmonlock)

// We are in stop-the-world, but syscalls can finish and write to trace concurrently.
// Exitsyscall could check trace.enabled long before and then suddenly wake up
// and decide to write to trace at a random point in time.
Expand All @@ -196,6 +199,7 @@ func StartTrace() error {

if trace.enabled || trace.shutdown {
unlock(&trace.bufLock)
unlock(&sched.sysmonlock)
startTheWorldGC()
return errorString("tracing is already enabled")
}
Expand Down Expand Up @@ -267,6 +271,8 @@ func StartTrace() error {

unlock(&trace.bufLock)

unlock(&sched.sysmonlock)

startTheWorldGC()
return nil
}
Expand All @@ -278,11 +284,15 @@ func StopTrace() {
// and also to avoid races with traceEvent.
stopTheWorldGC("stop tracing")

// See the comment in StartTrace.
lock(&sched.sysmonlock)

// See the comment in StartTrace.
lock(&trace.bufLock)

if !trace.enabled {
unlock(&trace.bufLock)
unlock(&sched.sysmonlock)
startTheWorldGC()
return
}
Expand Down Expand Up @@ -320,6 +330,8 @@ func StopTrace() {
trace.shutdown = true
unlock(&trace.bufLock)

unlock(&sched.sysmonlock)

startTheWorldGC()

// The world is started but we've set trace.shutdown, so new tracing can't start.
Expand Down

0 comments on commit c847589

Please sign in to comment.