Skip to content

Commit

Permalink
metrics, tuning in tests, db cleanups, fix concurrency issue (#1895)
Browse files Browse the repository at this point in the history
  • Loading branch information
kradalby authored Apr 21, 2024
1 parent 7d81784 commit ba614a5
Show file tree
Hide file tree
Showing 28 changed files with 328 additions and 201 deletions.
7 changes: 4 additions & 3 deletions hscontrol/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ func (h *Headscale) deleteExpireEphemeralNodes(milliSeconds int64) {
for range ticker.C {
var removed []types.NodeID
var changed []types.NodeID
if err := h.db.DB.Transaction(func(tx *gorm.DB) error {
if err := h.db.Write(func(tx *gorm.DB) error {
removed, changed = db.DeleteExpiredEphemeralNodes(tx, h.cfg.EphemeralNodeInactivityTimeout)

return nil
Expand Down Expand Up @@ -263,7 +263,7 @@ func (h *Headscale) expireExpiredMachines(intervalMs int64) {
var changed bool

for range ticker.C {
if err := h.db.DB.Transaction(func(tx *gorm.DB) error {
if err := h.db.Write(func(tx *gorm.DB) error {
lastCheck, update, changed = db.ExpireExpiredNodes(tx, lastCheck)

return nil
Expand Down Expand Up @@ -452,6 +452,7 @@ func (h *Headscale) ensureUnixSocketIsAbsent() error {

func (h *Headscale) createRouter(grpcMux *grpcRuntime.ServeMux) *mux.Router {
router := mux.NewRouter()
router.Use(prometheusMiddleware)
router.PathPrefix("/debug/pprof/").Handler(http.DefaultServeMux)

router.HandleFunc(ts2021UpgradePath, h.NoiseUpgradeHandler).Methods(http.MethodPost)
Expand Down Expand Up @@ -508,7 +509,7 @@ func (h *Headscale) Serve() error {

// Fetch an initial DERP Map before we start serving
h.DERPMap = derp.GetDERPMap(h.cfg.DERP)
h.mapper = mapper.NewMapper(h.db, h.cfg, h.DERPMap, h.nodeNotifier.ConnectedMap())
h.mapper = mapper.NewMapper(h.db, h.cfg, h.DERPMap, h.nodeNotifier)

if h.cfg.DERP.ServerEnabled {
// When embedded DERP is enabled we always need a STUN server
Expand Down
31 changes: 3 additions & 28 deletions hscontrol/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,8 +273,6 @@ func (h *Headscale) handleAuthKey(
Err(err).
Msg("Cannot encode message")
http.Error(writer, "Internal server error", http.StatusInternalServerError)
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "error", pak.User.Name).
Inc()

return
}
Expand All @@ -294,13 +292,6 @@ func (h *Headscale) handleAuthKey(
Str("node", registerRequest.Hostinfo.Hostname).
Msg("Failed authentication via AuthKey")

if pak != nil {
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "error", pak.User.Name).
Inc()
} else {
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "error", "unknown").Inc()
}

return
}

Expand Down Expand Up @@ -404,24 +395,20 @@ func (h *Headscale) handleAuthKey(
Caller().
Err(err).
Msg("could not register node")
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "error", pak.User.Name).
Inc()
http.Error(writer, "Internal server error", http.StatusInternalServerError)

return
}
}

err = h.db.DB.Transaction(func(tx *gorm.DB) error {
h.db.Write(func(tx *gorm.DB) error {
return db.UsePreAuthKey(tx, pak)
})
if err != nil {
log.Error().
Caller().
Err(err).
Msg("Failed to use pre-auth key")
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "error", pak.User.Name).
Inc()
http.Error(writer, "Internal server error", http.StatusInternalServerError)

return
Expand All @@ -440,14 +427,10 @@ func (h *Headscale) handleAuthKey(
Str("node", registerRequest.Hostinfo.Hostname).
Err(err).
Msg("Cannot encode message")
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "error", pak.User.Name).
Inc()
http.Error(writer, "Internal server error", http.StatusInternalServerError)

return
}
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "success", pak.User.Name).
Inc()
writer.Header().Set("Content-Type", "application/json; charset=utf-8")
writer.WriteHeader(http.StatusOK)
_, err = writer.Write(respBody)
Expand Down Expand Up @@ -563,7 +546,7 @@ func (h *Headscale) handleNodeLogOut(
}

if node.IsEphemeral() {
changedNodes, err := h.db.DeleteNode(&node, h.nodeNotifier.ConnectedMap())
changedNodes, err := h.db.DeleteNode(&node, h.nodeNotifier.LikelyConnectedMap())
if err != nil {
log.Error().
Err(err).
Expand Down Expand Up @@ -616,14 +599,10 @@ func (h *Headscale) handleNodeWithValidRegistration(
Caller().
Err(err).
Msg("Cannot encode message")
nodeRegistrations.WithLabelValues("update", "web", "error", node.User.Name).
Inc()
http.Error(writer, "Internal server error", http.StatusInternalServerError)

return
}
nodeRegistrations.WithLabelValues("update", "web", "success", node.User.Name).
Inc()

writer.Header().Set("Content-Type", "application/json; charset=utf-8")
writer.WriteHeader(http.StatusOK)
Expand Down Expand Up @@ -654,7 +633,7 @@ func (h *Headscale) handleNodeKeyRefresh(
Str("node", node.Hostname).
Msg("We have the OldNodeKey in the database. This is a key refresh")

err := h.db.DB.Transaction(func(tx *gorm.DB) error {
err := h.db.Write(func(tx *gorm.DB) error {
return db.NodeSetNodeKey(tx, &node, registerRequest.NodeKey)
})
if err != nil {
Expand Down Expand Up @@ -737,14 +716,10 @@ func (h *Headscale) handleNodeExpiredOrLoggedOut(
Caller().
Err(err).
Msg("Cannot encode message")
nodeRegistrations.WithLabelValues("reauth", "web", "error", node.User.Name).
Inc()
http.Error(writer, "Internal server error", http.StatusInternalServerError)

return
}
nodeRegistrations.WithLabelValues("reauth", "web", "success", node.User.Name).
Inc()

writer.Header().Set("Content-Type", "application/json; charset=utf-8")
writer.WriteHeader(http.StatusOK)
Expand Down
1 change: 0 additions & 1 deletion hscontrol/auth_noise.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ func (ns *noiseServer) NoiseRegistrationHandler(
Caller().
Err(err).
Msg("Cannot parse RegisterRequest")
nodeRegistrations.WithLabelValues("unknown", "web", "error", "unknown").Inc()
http.Error(writer, "Internal error", http.StatusInternalServerError)

return
Expand Down
9 changes: 5 additions & 4 deletions hscontrol/db/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/juanfont/headscale/hscontrol/types"
"github.com/juanfont/headscale/hscontrol/util"
"github.com/patrickmn/go-cache"
"github.com/puzpuzpuz/xsync/v3"
"github.com/rs/zerolog/log"
"gorm.io/gorm"
"tailscale.com/tailcfg"
Expand Down Expand Up @@ -260,19 +261,19 @@ func NodeSetExpiry(tx *gorm.DB,
return tx.Model(&types.Node{}).Where("id = ?", nodeID).Update("expiry", expiry).Error
}

func (hsdb *HSDatabase) DeleteNode(node *types.Node, isConnected types.NodeConnectedMap) ([]types.NodeID, error) {
func (hsdb *HSDatabase) DeleteNode(node *types.Node, isLikelyConnected *xsync.MapOf[types.NodeID, bool]) ([]types.NodeID, error) {
return Write(hsdb.DB, func(tx *gorm.DB) ([]types.NodeID, error) {
return DeleteNode(tx, node, isConnected)
return DeleteNode(tx, node, isLikelyConnected)
})
}

// DeleteNode deletes a Node from the database.
// Caller is responsible for notifying all of change.
func DeleteNode(tx *gorm.DB,
node *types.Node,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
) ([]types.NodeID, error) {
changed, err := deleteNodeRoutes(tx, node, isConnected)
changed, err := deleteNodeRoutes(tx, node, isLikelyConnected)
if err != nil {
return changed, err
}
Expand Down
3 changes: 2 additions & 1 deletion hscontrol/db/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/juanfont/headscale/hscontrol/policy"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/juanfont/headscale/hscontrol/util"
"github.com/puzpuzpuz/xsync/v3"
"gopkg.in/check.v1"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
Expand Down Expand Up @@ -120,7 +121,7 @@ func (s *Suite) TestHardDeleteNode(c *check.C) {
}
db.DB.Save(&node)

_, err = db.DeleteNode(&node, types.NodeConnectedMap{})
_, err = db.DeleteNode(&node, xsync.NewMapOf[types.NodeID, bool]())
c.Assert(err, check.IsNil)

_, err = db.getNode(user.Name, "testnode3")
Expand Down
4 changes: 2 additions & 2 deletions hscontrol/db/preauth_keys_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ func (*Suite) TestEphemeralKeyReusable(c *check.C) {
_, err = db.getNode("test7", "testest")
c.Assert(err, check.IsNil)

db.DB.Transaction(func(tx *gorm.DB) error {
db.Write(func(tx *gorm.DB) error {
DeleteExpiredEphemeralNodes(tx, time.Second*20)
return nil
})
Expand Down Expand Up @@ -181,7 +181,7 @@ func (*Suite) TestEphemeralKeyNotReusable(c *check.C) {
_, err = db.getNode("test7", "testest")
c.Assert(err, check.IsNil)

db.DB.Transaction(func(tx *gorm.DB) error {
db.Write(func(tx *gorm.DB) error {
DeleteExpiredEphemeralNodes(tx, time.Second*20)
return nil
})
Expand Down
37 changes: 20 additions & 17 deletions hscontrol/db/routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

"github.com/juanfont/headscale/hscontrol/policy"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/puzpuzpuz/xsync/v3"
"github.com/rs/zerolog/log"
"gorm.io/gorm"
"tailscale.com/util/set"
Expand Down Expand Up @@ -126,7 +127,7 @@ func EnableRoute(tx *gorm.DB, id uint64) (*types.StateUpdate, error) {

func DisableRoute(tx *gorm.DB,
id uint64,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
) ([]types.NodeID, error) {
route, err := GetRoute(tx, id)
if err != nil {
Expand All @@ -147,7 +148,7 @@ func DisableRoute(tx *gorm.DB,
return nil, err
}

update, err = failoverRouteTx(tx, isConnected, route)
update, err = failoverRouteTx(tx, isLikelyConnected, route)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -182,17 +183,17 @@ func DisableRoute(tx *gorm.DB,

func (hsdb *HSDatabase) DeleteRoute(
id uint64,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
) ([]types.NodeID, error) {
return Write(hsdb.DB, func(tx *gorm.DB) ([]types.NodeID, error) {
return DeleteRoute(tx, id, isConnected)
return DeleteRoute(tx, id, isLikelyConnected)
})
}

func DeleteRoute(
tx *gorm.DB,
id uint64,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
) ([]types.NodeID, error) {
route, err := GetRoute(tx, id)
if err != nil {
Expand All @@ -207,7 +208,7 @@ func DeleteRoute(
// https://github.com/juanfont/headscale/issues/804#issuecomment-1399314002
var update []types.NodeID
if !route.IsExitRoute() {
update, err = failoverRouteTx(tx, isConnected, route)
update, err = failoverRouteTx(tx, isLikelyConnected, route)
if err != nil {
return nil, nil
}
Expand Down Expand Up @@ -252,7 +253,7 @@ func DeleteRoute(
return update, nil
}

func deleteNodeRoutes(tx *gorm.DB, node *types.Node, isConnected types.NodeConnectedMap) ([]types.NodeID, error) {
func deleteNodeRoutes(tx *gorm.DB, node *types.Node, isLikelyConnected *xsync.MapOf[types.NodeID, bool]) ([]types.NodeID, error) {
routes, err := GetNodeRoutes(tx, node)
if err != nil {
return nil, fmt.Errorf("getting node routes: %w", err)
Expand All @@ -266,7 +267,7 @@ func deleteNodeRoutes(tx *gorm.DB, node *types.Node, isConnected types.NodeConne

// TODO(kradalby): This is a bit too aggressive, we could probably
// figure out which routes needs to be failed over rather than all.
chn, err := failoverRouteTx(tx, isConnected, &routes[i])
chn, err := failoverRouteTx(tx, isLikelyConnected, &routes[i])
if err != nil {
return changed, fmt.Errorf("failing over route after delete: %w", err)
}
Expand Down Expand Up @@ -409,7 +410,7 @@ func SaveNodeRoutes(tx *gorm.DB, node *types.Node) (bool, error) {
// If needed, the failover will be attempted.
func FailoverNodeRoutesIfNeccessary(
tx *gorm.DB,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
node *types.Node,
) (*types.StateUpdate, error) {
nodeRoutes, err := GetNodeRoutes(tx, node)
Expand All @@ -430,12 +431,12 @@ nodeRouteLoop:
if route.IsPrimary {
// if we have a primary route, and the node is connected
// nothing needs to be done.
if conn, ok := isConnected[route.Node.ID]; conn && ok {
if val, ok := isLikelyConnected.Load(route.Node.ID); ok && val {
continue nodeRouteLoop
}

// if not, we need to failover the route
failover := failoverRoute(isConnected, &route, routes)
failover := failoverRoute(isLikelyConnected, &route, routes)
if failover != nil {
err := failover.save(tx)
if err != nil {
Expand Down Expand Up @@ -477,7 +478,7 @@ nodeRouteLoop:
// If the given route was not primary, it returns early.
func failoverRouteTx(
tx *gorm.DB,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
r *types.Route,
) ([]types.NodeID, error) {
if r == nil {
Expand All @@ -500,7 +501,7 @@ func failoverRouteTx(
return nil, fmt.Errorf("getting routes by prefix: %w", err)
}

fo := failoverRoute(isConnected, r, routes)
fo := failoverRoute(isLikelyConnected, r, routes)
if fo == nil {
return nil, nil
}
Expand Down Expand Up @@ -538,7 +539,7 @@ func (f *failover) save(tx *gorm.DB) error {
}

func failoverRoute(
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
routeToReplace *types.Route,
altRoutes types.Routes,

Expand Down Expand Up @@ -570,9 +571,11 @@ func failoverRoute(
continue
}

if isConnected != nil && isConnected[route.Node.ID] {
newPrimary = &altRoutes[idx]
break
if isLikelyConnected != nil {
if val, ok := isLikelyConnected.Load(route.Node.ID); ok && val {
newPrimary = &altRoutes[idx]
break
}
}
}

Expand Down
Loading

0 comments on commit ba614a5

Please sign in to comment.