Skip to content

Commit

Permalink
[libbeat] Register uid/gid monitoring metrics asychronously (#31835)
Browse files Browse the repository at this point in the history
This refactors the Beat initialization code to consolidate the metric registration into a
helper method and changes the call to lookup user info (`user.Current()`) to be 
asynchronous from the main initialization goroutine. This prevents the lookup from blocking
the process initialization and delaying the `StartServiceCtrlDispatcher` call that is required
by Windows services within a particular time period.

The user.Current() call can take up to 60 sec in some cases which causes Windows to
timeout the service and kill the process.

Relates #31810
  • Loading branch information
andrewkroh authored and chrisberkhout committed Jun 1, 2023
1 parent 1a11e14 commit 97768b9
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 46 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ https://github.com/elastic/beats/compare/v8.2.0\...main[Check the HEAD diff]
*Affecting all Beats*

- Allow loading secrets that contain commas from the keystore {pull}31694{pull}.
- Fix an issue that could lead Windows service timeouts. {issue}31810[31810]
- Fix Windows service timeouts when the "TCP/IP NetBIOS Helper" service is disabled. {issue}31810[31810] {pull}31835[31835]

*Auditbeat*

Expand Down
94 changes: 49 additions & 45 deletions libbeat/cmd/instance/beat.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,43 +177,6 @@ func Run(settings Settings, bt beat.Creator) error {
return err
}

// Add basic info
registry := monitoring.GetNamespace("info").GetRegistry()
monitoring.NewString(registry, "version").Set(b.Info.Version)
monitoring.NewString(registry, "beat").Set(b.Info.Beat)
monitoring.NewString(registry, "name").Set(b.Info.Name)
monitoring.NewString(registry, "hostname").Set(b.Info.Hostname)

// Add more beat metadata
monitoring.NewString(registry, "binary_arch").Set(runtime.GOARCH)
monitoring.NewString(registry, "build_commit").Set(version.Commit())
monitoring.NewTimestamp(registry, "build_time").Set(version.BuildTime())
monitoring.NewBool(registry, "elastic_licensed").Set(b.Info.ElasticLicensed)

if u, err := user.Current(); err != nil {
if _, ok := err.(user.UnknownUserIdError); ok { //nolint:errorlint // keep legacy behaviour
// This usually happens if the user UID does not exist in /etc/passwd. It might be the case on K8S
// if the user set securityContext.runAsUser to an arbitrary value.
monitoring.NewString(registry, "uid").Set(strconv.Itoa(os.Getuid()))
monitoring.NewString(registry, "gid").Set(strconv.Itoa(os.Getgid()))
} else {
return err
}
} else {
monitoring.NewString(registry, "username").Set(u.Username)
monitoring.NewString(registry, "uid").Set(u.Uid)
monitoring.NewString(registry, "gid").Set(u.Gid)
}

// Add additional info to state registry. This is also reported to monitoring
stateRegistry := monitoring.GetNamespace("state").GetRegistry()
serviceRegistry := stateRegistry.NewRegistry("service")
monitoring.NewString(serviceRegistry, "version").Set(b.Info.Version)
monitoring.NewString(serviceRegistry, "name").Set(b.Info.Beat)
beatRegistry := stateRegistry.NewRegistry("beat")
monitoring.NewString(beatRegistry, "name").Set(b.Info.Name)
monitoring.NewFunc(stateRegistry, "host", host.ReportInfo, monitoring.Report)

return b.launch(settings, bt)
}())
}
Expand Down Expand Up @@ -431,17 +394,11 @@ func (b *Beat) launch(settings Settings, bt beat.Creator) error {
_ = bl.unlock()
}()

// Set Beat ID in registry vars, in case it was loaded from meta file
infoRegistry := monitoring.GetNamespace("info").GetRegistry()
monitoring.NewString(infoRegistry, "uuid").Set(b.Info.ID.String())
monitoring.NewString(infoRegistry, "ephemeral_id").Set(b.Info.EphemeralID.String())

serviceRegistry := monitoring.GetNamespace("state").GetRegistry().GetRegistry("service")
monitoring.NewString(serviceRegistry, "id").Set(b.Info.ID.String())

svc.BeforeRun()
defer svc.Cleanup()

b.registerMetrics()

// Start the API Server before the Seccomp lock down, we do this so we can create the unix socket
// set the appropriate permission on the unix domain file without having to whitelist anything
// that would be set at runtime.
Expand Down Expand Up @@ -518,6 +475,53 @@ func (b *Beat) launch(settings Settings, bt beat.Creator) error {
return beater.Run(&b.Beat)
}

// registerMetrics registers metrics with the internal monitoring API. This data
// is then exposed through the HTTP monitoring endpoint (e.g. /info and /state)
// and/or pushed to Elasticsearch through the x-pack monitoring feature.
func (b *Beat) registerMetrics() {
// info
infoRegistry := monitoring.GetNamespace("info").GetRegistry()
monitoring.NewString(infoRegistry, "version").Set(b.Info.Version)
monitoring.NewString(infoRegistry, "beat").Set(b.Info.Beat)
monitoring.NewString(infoRegistry, "name").Set(b.Info.Name)
monitoring.NewString(infoRegistry, "hostname").Set(b.Info.Hostname)
monitoring.NewString(infoRegistry, "uuid").Set(b.Info.ID.String())
monitoring.NewString(infoRegistry, "ephemeral_id").Set(b.Info.EphemeralID.String())
monitoring.NewString(infoRegistry, "binary_arch").Set(runtime.GOARCH)
monitoring.NewString(infoRegistry, "build_commit").Set(version.Commit())
monitoring.NewTimestamp(infoRegistry, "build_time").Set(version.BuildTime())
monitoring.NewBool(infoRegistry, "elastic_licensed").Set(b.Info.ElasticLicensed)

// Add user metadata data asynchronously (on Windows the lookup can take up to 60s).
go func() {
if u, err := user.Current(); err != nil {
// This usually happens if the user UID does not exist in /etc/passwd. It might be the case on K8S
// if the user set securityContext.runAsUser to an arbitrary value.
monitoring.NewString(infoRegistry, "uid").Set(strconv.Itoa(os.Getuid()))
monitoring.NewString(infoRegistry, "gid").Set(strconv.Itoa(os.Getgid()))
} else {
monitoring.NewString(infoRegistry, "username").Set(u.Username)
monitoring.NewString(infoRegistry, "uid").Set(u.Uid)
monitoring.NewString(infoRegistry, "gid").Set(u.Gid)
}
}()

stateRegistry := monitoring.GetNamespace("state").GetRegistry()

// state.service
serviceRegistry := stateRegistry.NewRegistry("service")
monitoring.NewString(serviceRegistry, "version").Set(b.Info.Version)
monitoring.NewString(serviceRegistry, "name").Set(b.Info.Beat)
monitoring.NewString(serviceRegistry, "id").Set(b.Info.ID.String())

// state.beat
beatRegistry := stateRegistry.NewRegistry("beat")
monitoring.NewString(beatRegistry, "name").Set(b.Info.Name)

// state.host
monitoring.NewFunc(stateRegistry, "host", host.ReportInfo, monitoring.Report)
}

// TestConfig check all settings are ok and the beat can be run
func (b *Beat) TestConfig(settings Settings, bt beat.Creator) error {
return handleError(func() error {
Expand Down

0 comments on commit 97768b9

Please sign in to comment.