Skip to content

Commit

Permalink
Add timeout to sequencing context (#1595)
Browse files Browse the repository at this point in the history
  • Loading branch information
AlCutter authored May 14, 2019
1 parent 41d14b3 commit 3e9002c
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 3 deletions.
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,13 @@ Quota metrics with specs of the form `users/<user>/read` and
`users/<user>/write` are no longer exported by the Trillian binaries (as they
lead to excessive storage requirements for Trillian metrics).

### Fix Operation Loop Hang
### Resilience improvements in `log_signer`

#### Add timeout to sequencing loop

Added a timeout to the context in the sequencing loop, with a default of 60s.

#### Fix Operation Loop Hang

Resolved a bug that would hide errors and cause the `OperationLoop` to hang
until process exit if any error occurred.
Expand Down
15 changes: 13 additions & 2 deletions server/log_operation_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ import (
const logIDLabel = "logid"

var (
DefaultTimeout = 60 * time.Second

once sync.Once
knownLogs monitoring.Gauge
resignations monitoring.Counter
Expand Down Expand Up @@ -90,6 +92,9 @@ type LogOperationInfo struct {
RunInterval time.Duration
// NumWorkers is the number of worker goroutines to run in parallel.
NumWorkers int
// Timeout sets an optional timeout on each operation run.
// If unset, default to the value of DefaultTimeout.
Timeout time.Duration
}

// LogOperationManager controls scheduling activities for logs.
Expand All @@ -116,6 +121,9 @@ func NewLogOperationManager(info LogOperationInfo, logOperation LogOperation) *L
once.Do(func() {
createMetrics(info.Registry.MetricFactory)
})
if info.Timeout == 0 {
info.Timeout = DefaultTimeout
}
return &LogOperationManager{
info: info,
logOperation: logOperation,
Expand Down Expand Up @@ -264,7 +272,10 @@ func (l *LogOperationManager) updateHeldIDs(ctx context.Context, logIDs, activeI
}

func (l *LogOperationManager) getLogsAndExecutePass(ctx context.Context) error {
activeIDs, err := l.getActiveLogIDs(ctx)
runCtx, cancel := context.WithTimeout(ctx, l.info.Timeout)
defer cancel()

activeIDs, err := l.getActiveLogIDs(runCtx)
if err != nil {
return fmt.Errorf("failed to list active log IDs: %v", err)
}
Expand All @@ -285,7 +296,7 @@ func (l *LogOperationManager) getLogsAndExecutePass(ctx context.Context) error {
ex.jobs <- logID
}
close(ex.jobs) // Cause executor's run to terminate when it has drained the jobs.
ex.run(ctx)
ex.run(runCtx)
return nil
}

Expand Down

0 comments on commit 3e9002c

Please sign in to comment.