Skip to content

Commit

Permalink
1. Fix some address crawler timing issues (#3293)
Browse files Browse the repository at this point in the history
* Stop holding completed messages until the next inbound message

* Add more info to network message block download debug logs

* Simplify address metrics logs

* Try handling inbound messages as responses, then try as a new request

* Improve address book logging

* Fix a race between the first heartbeat and getaddr requests

* Temporarily reduce the getaddr fanout to 1

* Update metrics when exiting the Connection run loop

* Downgrade some debug logs to trace
  • Loading branch information
teor2345 authored Jan 4, 2022
1 parent 9b12716 commit 469fa6b
Show file tree
Hide file tree
Showing 9 changed files with 207 additions and 105 deletions.
18 changes: 18 additions & 0 deletions zebra-network/src/address_book.rs
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ impl AddressBook {
?previous,
total_peers = self.by_addr.len(),
recent_peers = self.recently_live_peers(chrono_now).count(),
"calculated updated address book entry",
);

if let Some(updated) = updated {
Expand All @@ -303,6 +304,15 @@ impl AddressBook {

self.by_addr.insert(updated.addr, updated);

debug!(
?change,
?updated,
?previous,
total_peers = self.by_addr.len(),
recent_peers = self.recently_live_peers(chrono_now).count(),
"updated address book entry",
);

// Security: Limit the number of peers in the address book.
//
// We only delete outdated peers when we have too many peers.
Expand All @@ -317,6 +327,14 @@ impl AddressBook {
.expect("just checked there is at least one peer");

self.by_addr.remove(&surplus_peer.addr);

debug!(
surplus = ?surplus_peer,
?updated,
total_peers = self.by_addr.len(),
recent_peers = self.recently_live_peers(chrono_now).count(),
"removed surplus address book entry",
);
}

assert!(self.len() <= self.addr_limit);
Expand Down
11 changes: 8 additions & 3 deletions zebra-network/src/address_book_updater.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,17 @@ impl AddressBookUpdater {
// based on the maximum number of inbound and outbound peers.
let (worker_tx, mut worker_rx) = mpsc::channel(config.peerset_total_connection_limit());

let address_book =
AddressBook::new(local_listener, span!(Level::TRACE, "address book updater"));
let address_book = AddressBook::new(local_listener, span!(Level::TRACE, "address book"));
let address_metrics = address_book.address_metrics_watcher();
let address_book = Arc::new(std::sync::Mutex::new(address_book));

let worker_address_book = address_book.clone();
let worker = move || {
info!("starting the address book updater");

while let Some(event) = worker_rx.blocking_recv() {
trace!(?event, "got address book change");

// # Correctness
//
// Briefly hold the address book threaded mutex, to update the
Expand All @@ -67,7 +70,9 @@ impl AddressBookUpdater {
.update(event);
}

Err(AllAddressBookUpdaterSendersClosed.into())
let error = Err(AllAddressBookUpdaterSendersClosed.into());
info!(?error, "stopping address book updater");
error
};

// Correctness: spawn address book accesses on a blocking thread,
Expand Down
9 changes: 7 additions & 2 deletions zebra-network/src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,14 +133,19 @@ pub const PEER_GET_ADDR_TIMEOUT: Duration = Duration::from_secs(8);

/// The number of GetAddr requests sent when crawling for new peers.
///
/// ## SECURITY
/// # Security
///
/// The fanout should be greater than 2, so that Zebra avoids getting a majority
/// of its initial address book entries from a single peer.
///
/// Zebra regularly crawls for new peers, initiating a new crawl every
/// [`crawl_new_peer_interval`](crate::config::Config.crawl_new_peer_interval).
pub const GET_ADDR_FANOUT: usize = 3;
///
/// TODO: Restore the fanout to 3, once fanouts are limited to the number of ready peers (#2214)
///
/// In #3110, we changed the fanout to 1, to make sure we actually use cached address responses.
/// With a fanout of 3, we were dropping a lot of responses, because the overall crawl timed out.
pub const GET_ADDR_FANOUT: usize = 1;

/// The maximum number of addresses allowed in an `addr` or `addrv2` message.
///
Expand Down
Loading

0 comments on commit 469fa6b

Please sign in to comment.