Skip to content

Commit

Permalink
Reduce header regexps
Browse files Browse the repository at this point in the history
headerRegex (now headerRE) was used to find headers. Because it was anchored on both ends (^...$), it (morally) had to run through the whole string in order to match, though I hope that the RE2-like regexp engine optimises (.+)?$ to nothing.

We can say the same for headerExpansionRE. Matching `\s+` in between ^^^ and +++ just seems unnecessary - we document it as "^^^ +++". So I changed it to `\s`.

In both cases the new regexps only need to process either 4 or 7 bytes in order to match. 

The argument for removing matches of ansiColourRE applies to header expansions as well, so the fact that isHeaderExpansion didn't do the same thing is arguably a bug. Instead, I rearranged things so headerExpansionRE can test the same decolourised line (inside Scan), removing both isHeader and isHeaderExpansion.
  • Loading branch information
DrJosh9000 committed Jun 1, 2023
1 parent d10a813 commit 4f10c77
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 39 deletions.
65 changes: 28 additions & 37 deletions agent/header_times_streamer.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@ import (
"github.com/buildkite/agent/v3/status"
)

// If you change header parsing here make sure to change it in the
// buildkite.com frontend logic, too

var (
headerRE = regexp.MustCompile(`^(---|\+\+\+|~~~)\s`)
headerExpansionRE = regexp.MustCompile(`^\^\^\^\s\+\+\+`)
ansiColourRE = regexp.MustCompile(`\x1b\[([;\d]+)?[mK]`)
)

type headerTimesStreamer struct {
// The logger instance to use
logger logger.Logger
Expand Down Expand Up @@ -88,27 +97,33 @@ func (h *headerTimesStreamer) Run(ctx context.Context) {
}

// Scan takes a line of log output and tracks a time if it's a header.
// Returns true for header lines
// Returns true for header lines or header expansion lines.
func (h *headerTimesStreamer) Scan(line string) bool {
// Keep track of how many line scans we need to do
h.scanWaitGroup.Add(1)
defer h.scanWaitGroup.Done()

if isHeader(line) {
h.logger.Debug("[HeaderTimesStreamer] Found header %q", line)

// Aquire a lock on the times and then add the current time to
// our times slice.
h.timesMutex.Lock()
h.times = append(h.times, time.Now().UTC().Format(time.RFC3339Nano))
h.timesMutex.Unlock()
// Make sure all ANSI colours are removed from the string before we
// check to see if it's a header (sometimes a colour escape sequence may
// be the first thing on the line, which will cause the regex to ignore it)
line = ansiColourRE.ReplaceAllString(line, "")

// Add the time to the wait group
h.uploadWaitGroup.Add(1)
return true
if !headerRE.MatchString(line) {
// It's not a header, but could be a header expansion.
return headerExpansionRE.MatchString(line)
}

return false
h.logger.Debug("[HeaderTimesStreamer] Found header %q", line)

// Aquire a lock on the times and then add the current time to
// our times slice.
h.timesMutex.Lock()
h.times = append(h.times, time.Now().UTC().Format(time.RFC3339Nano))
h.timesMutex.Unlock()

// Add the time to the wait group
h.uploadWaitGroup.Add(1)
return true
}

func (h *headerTimesStreamer) Upload(ctx context.Context) {
Expand Down Expand Up @@ -159,27 +174,3 @@ func (h *headerTimesStreamer) Stop() {
h.streaming = false
h.streamingMutex.Unlock()
}

// If you change header parsing here make sure to change it in the
// buildkite.com frontend logic, too

var (
headerRegex = regexp.MustCompile(`^(?:---|\+\+\+|~~~)\s(.+)?$`)
headerExpansionRegex = regexp.MustCompile(`^(?:\^\^\^\s+\+\+\+)\s*$`)
ansiColorRegex = regexp.MustCompile(`\x1b\[([;\d]+)?[mK]`)
)

func isHeader(line string) bool {
// Make sure all ANSI colors are removed from the string before we
// check to see if it's a header (sometimes a color escape sequence may
// be the first thing on the line, which will cause the regex to ignore it)
line = ansiColorRegex.ReplaceAllString(line, "")

// To avoid running the regex over every single line, we'll first do a
// length check. Hopefully there are no heeaders over 500 characters!
return len(line) < 500 && headerRegex.MatchString(line)
}

func isHeaderExpansion(line string) bool {
return len(line) < 50 && headerExpansionRegex.MatchString(line)
}
5 changes: 3 additions & 2 deletions agent/job_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,10 +232,11 @@ func NewJobRunner(l logger.Logger, scope *metrics.Scope, ag *api.AgentRegisterRe
// Use a scanner to process output line by line
err := process.NewScanner(l).ScanLines(pr, func(line string) {
// Send to our header streamer and determine if it's a header
isHeader := runner.headerTimesStreamer.Scan(line)
// or header expansion.
isHeaderOrExpansion := runner.headerTimesStreamer.Scan(line)

// Prefix non-header log lines with timestamps
if !(isHeaderExpansion(line) || isHeader) {
if !isHeaderOrExpansion {
line = fmt.Sprintf("[%s] %s", time.Now().UTC().Format(time.RFC3339), line)
}

Expand Down

0 comments on commit 4f10c77

Please sign in to comment.