Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

p2p/dnsdisc: re-check tree root when leaf resolution fails #20682

Merged
merged 1 commit into from
Feb 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 57 additions & 9 deletions p2p/dnsdisc/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package dnsdisc
import (
"context"
"crypto/ecdsa"
"errors"
"math/rand"
"reflect"
"testing"
Expand Down Expand Up @@ -176,11 +177,62 @@ func TestIteratorNodeUpdates(t *testing.T) {
t.Fatal(err)
}

// sync the original tree.
// Sync the original tree.
resolver.add(tree1.ToTXT("n"))
checkIterator(t, it, nodes[:25])

// Update some nodes and ensure RandomNode returns the new nodes as well.
// Ensure RandomNode returns the new nodes after the tree is updated.
updateSomeNodes(nodesSeed1, nodes)
tree2, _ := makeTestTree("n", nodes, nil)
resolver.clear()
resolver.add(tree2.ToTXT("n"))
t.Log("tree updated")

clock.Run(c.cfg.RecheckInterval + 1*time.Second)
checkIterator(t, it, nodes)
}

// This test checks that the tree root is rechecked when a couple of leaf
// requests have failed. The test is just like TestIteratorNodeUpdates, but
// without advancing the clock by recheckInterval after the tree update.
func TestIteratorRootRecheckOnFail(t *testing.T) {
var (
clock = new(mclock.Simulated)
nodes = testNodes(nodesSeed1, 30)
resolver = newMapResolver()
c = NewClient(Config{
Resolver: resolver,
Logger: testlog.Logger(t, log.LvlTrace),
RecheckInterval: 20 * time.Minute,
RateLimit: 500,
// Disabling the cache is required for this test because the client doesn't
// notice leaf failures if all records are cached.
CacheLimit: 1,
})
)
c.clock = clock
tree1, url := makeTestTree("n", nodes[:25], nil)
it, err := c.NewIterator(url)
if err != nil {
t.Fatal(err)
}

// Sync the original tree.
resolver.add(tree1.ToTXT("n"))
checkIterator(t, it, nodes[:25])

// Ensure RandomNode returns the new nodes after the tree is updated.
updateSomeNodes(nodesSeed1, nodes)
tree2, _ := makeTestTree("n", nodes, nil)
resolver.clear()
resolver.add(tree2.ToTXT("n"))
t.Log("tree updated")

checkIterator(t, it, nodes)
}

// updateSomeNodes applies ENR updates to some of the given nodes.
func updateSomeNodes(keySeed int64, nodes []*enode.Node) {
keys := testKeys(nodesSeed1, len(nodes))
for i, n := range nodes[:len(nodes)/2] {
r := n.Record()
Expand All @@ -190,11 +242,6 @@ func TestIteratorNodeUpdates(t *testing.T) {
n2, _ := enode.New(enode.ValidSchemes, r)
nodes[i] = n2
}
tree2, _ := makeTestTree("n", nodes, nil)
clock.Run(c.cfg.RecheckInterval + 1*time.Second)
resolver.clear()
resolver.add(tree2.ToTXT("n"))
checkIterator(t, it, nodes)
}

// This test verifies that randomIterator re-checks the root of the tree to catch
Expand Down Expand Up @@ -230,9 +277,10 @@ func TestIteratorLinkUpdates(t *testing.T) {
// Add link to tree3, remove link to tree2.
tree1, _ = makeTestTree("t1", nodes[:10], []string{url3})
resolver.add(tree1.ToTXT("t1"))
clock.Run(c.cfg.RecheckInterval + 1*time.Second)
t.Log("tree1 updated")

clock.Run(c.cfg.RecheckInterval + 1*time.Second)

var wantNodes []*enode.Node
wantNodes = append(wantNodes, tree1.Nodes()...)
wantNodes = append(wantNodes, tree3.Nodes()...)
Expand Down Expand Up @@ -345,5 +393,5 @@ func (mr mapResolver) LookupTXT(ctx context.Context, name string) ([]string, err
if record, ok := mr[name]; ok {
return []string{record}, nil
}
return nil, nil
return nil, errors.New("not found")
}
65 changes: 56 additions & 9 deletions p2p/dnsdisc/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,22 @@ import (
"github.com/ethereum/go-ethereum/p2p/enode"
)

const (
rootRecheckFailCount = 5 // update root if this many leaf requests fail
)

// clientTree is a full tree being synced.
type clientTree struct {
c *Client
loc *linkEntry // link to this tree

lastRootCheck mclock.AbsTime // last revalidation of root
root *rootEntry
enrs *subtreeSync
links *subtreeSync
leafFailCount int
rootFailCount int

root *rootEntry
enrs *subtreeSync
links *subtreeSync

lc *linkCache // tracks all links between all trees
curLinks map[string]struct{} // links contained in this tree
Expand All @@ -46,7 +53,7 @@ func newClientTree(c *Client, lc *linkCache, loc *linkEntry) *clientTree {

// syncAll retrieves all entries of the tree.
func (ct *clientTree) syncAll(dest map[string]entry) error {
if err := ct.updateRoot(); err != nil {
if err := ct.updateRoot(context.Background()); err != nil {
return err
}
if err := ct.links.resolveAll(dest); err != nil {
Expand All @@ -60,12 +67,20 @@ func (ct *clientTree) syncAll(dest map[string]entry) error {

// syncRandom retrieves a single entry of the tree. The Node return value
// is non-nil if the entry was a node.
func (ct *clientTree) syncRandom(ctx context.Context) (*enode.Node, error) {
func (ct *clientTree) syncRandom(ctx context.Context) (n *enode.Node, err error) {
if ct.rootUpdateDue() {
if err := ct.updateRoot(); err != nil {
if err := ct.updateRoot(ctx); err != nil {
return nil, err
}
}

// Update fail counter for leaf request errors.
defer func() {
if err != nil {
ct.leafFailCount++
}
}()

// Link tree sync has priority, run it to completion before syncing ENRs.
if !ct.links.done() {
err := ct.syncNextLink(ctx)
Expand Down Expand Up @@ -138,15 +153,22 @@ func removeHash(h []string, index int) []string {
}

// updateRoot ensures that the given tree has an up-to-date root.
func (ct *clientTree) updateRoot() error {
func (ct *clientTree) updateRoot(ctx context.Context) error {
if !ct.slowdownRootUpdate(ctx) {
return ctx.Err()
}

ct.lastRootCheck = ct.c.clock.Now()
ctx, cancel := context.WithTimeout(context.Background(), ct.c.cfg.Timeout)
ctx, cancel := context.WithTimeout(ctx, ct.c.cfg.Timeout)
defer cancel()
root, err := ct.c.resolveRoot(ctx, ct.loc)
if err != nil {
ct.rootFailCount++
return err
}
ct.root = &root
ct.rootFailCount = 0
ct.leafFailCount = 0

// Invalidate subtrees if changed.
if ct.links == nil || root.lroot != ct.links.root {
Expand All @@ -161,7 +183,32 @@ func (ct *clientTree) updateRoot() error {

// rootUpdateDue returns true when a root update is needed.
func (ct *clientTree) rootUpdateDue() bool {
return ct.root == nil || time.Duration(ct.c.clock.Now()-ct.lastRootCheck) > ct.c.cfg.RecheckInterval
tooManyFailures := ct.leafFailCount > rootRecheckFailCount
scheduledCheck := ct.c.clock.Now().Sub(ct.lastRootCheck) > ct.c.cfg.RecheckInterval
return ct.root == nil || tooManyFailures || scheduledCheck
}

// slowdownRootUpdate applies a delay to root resolution if is tried
// too frequently. This avoids busy polling when the client is offline.
// Returns true if the timeout passed, false if sync was canceled.
func (ct *clientTree) slowdownRootUpdate(ctx context.Context) bool {
var delay time.Duration
switch {
case ct.rootFailCount > 20:
delay = 10 * time.Second
case ct.rootFailCount > 5:
delay = 5 * time.Second
default:
return true
}
timeout := ct.c.clock.NewTimer(delay)
defer timeout.Stop()
select {
case <-timeout.C():
return true
case <-ctx.Done():
return false
}
}

// subtreeSync is the sync of an ENR or link subtree.
Expand Down