Skip to content

Commit

Permalink
feat: send FIND_NODE request to peers on routing table refresh (#810)
Browse files Browse the repository at this point in the history
We have seen in the past that there are peers in the IPFS DHT that let you connect to them but then refuse to speak any protocol. This was mainly due to resource manager killing the connection if limits were exceeded. We have seen that such peers are pushed to the edge of the DHT - meaning, they get already pruned from lower buckets. However, they won't get pruned from higher ones, because we only try to connect to them and not speak anything on that connection.
  • Loading branch information
dennis-tra authored Feb 11, 2023
1 parent 4364e75 commit 81e7325
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
6 changes: 6 additions & 0 deletions dht.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,10 +363,16 @@ func makeRtRefreshManager(dht *IpfsDHT, cfg dhtcfg.Config, maxLastSuccessfulOutb
return err
}

pingFnc := func(ctx context.Context, p peer.ID) error {
_, err := dht.protoMessenger.GetClosestPeers(ctx, p, p) // don't use the PING message type as it's deprecated
return err
}

r, err := rtrefresh.NewRtRefreshManager(
dht.host, dht.routingTable, cfg.RoutingTable.AutoRefresh,
keyGenFnc,
queryFnc,
pingFnc,
cfg.RoutingTable.RefreshQueryTimeout,
cfg.RoutingTable.RefreshInterval,
maxLastSuccessfulOutboundThreshold,
Expand Down
14 changes: 13 additions & 1 deletion rtrefresh/rt_refresh_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ type RtRefreshManager struct {
enableAutoRefresh bool // should run periodic refreshes ?
refreshKeyGenFnc func(cpl uint) (string, error) // generate the key for the query to refresh this cpl
refreshQueryFnc func(ctx context.Context, key string) error // query to run for a refresh.
refreshPingFnc func(ctx context.Context, p peer.ID) error // request to check liveness of remote peer
refreshQueryTimeout time.Duration // timeout for one refresh query

// interval between two periodic refreshes.
Expand All @@ -57,6 +58,7 @@ type RtRefreshManager struct {
func NewRtRefreshManager(h host.Host, rt *kbucket.RoutingTable, autoRefresh bool,
refreshKeyGenFnc func(cpl uint) (string, error),
refreshQueryFnc func(ctx context.Context, key string) error,
refreshPingFnc func(ctx context.Context, p peer.ID) error,
refreshQueryTimeout time.Duration,
refreshInterval time.Duration,
successfulOutboundQueryGracePeriod time.Duration,
Expand All @@ -73,6 +75,7 @@ func NewRtRefreshManager(h host.Host, rt *kbucket.RoutingTable, autoRefresh bool
enableAutoRefresh: autoRefresh,
refreshKeyGenFnc: refreshKeyGenFnc,
refreshQueryFnc: refreshQueryFnc,
refreshPingFnc: refreshPingFnc,

refreshQueryTimeout: refreshQueryTimeout,
refreshInterval: refreshInterval,
Expand Down Expand Up @@ -178,12 +181,21 @@ func (r *RtRefreshManager) loop() {
wg.Add(1)
go func(ps kbucket.PeerInfo) {
defer wg.Done()

livelinessCtx, cancel := context.WithTimeout(r.ctx, peerPingTimeout)
defer cancel()

if err := r.h.Connect(livelinessCtx, peer.AddrInfo{ID: ps.Id}); err != nil {
logger.Debugw("evicting peer after failed connection", "peer", ps.Id, "error", err)
r.rt.RemovePeer(ps.Id)
return
}

if err := r.refreshPingFnc(livelinessCtx, ps.Id); err != nil {
logger.Debugw("evicting peer after failed ping", "peer", ps.Id, "error", err)
r.rt.RemovePeer(ps.Id)
return
}
cancel()
}(ps)
}
}
Expand Down

0 comments on commit 81e7325

Please sign in to comment.