Skip to content

Commit

Permalink
Stats overhaul (#4583)
Browse files Browse the repository at this point in the history
* Introduce `nano::elapse ()` helper

* Move stats test to a dedicated file

* Missing override & smaller improvements

* nano::stats::dump

* Move implementation to .cpp file

* Index stats by a dedicated struct

* Remove stat observers

* Overhaul

* Config

* Use dedicated thread

* Separate stat sinks

* Samples writer

* Fix for max size

* Simple sampler key

* Expected min max

* Fix tests

* Cleanup

* Test for samples rpc

* Implement sampling for node components

* TODO

* Remove special semantics of `stat::detail::all`

* Guard against invalid values

* Thread loop interval

* More tests

* Flag to aggregate `stat::detail::all`

---------

Co-authored-by: Colin LeMahieu <[email protected]>
  • Loading branch information
pwojcikdev and clemahieu authored Apr 30, 2024
1 parent 161f418 commit 04de36c
Show file tree
Hide file tree
Showing 28 changed files with 805 additions and 857 deletions.
1 change: 1 addition & 0 deletions nano/core_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ add_executable(
peer_container.cpp
rep_weight_store.cpp
scheduler_buckets.cpp
stats.cpp
request_aggregator.cpp
signal_manager.cpp
socket.cpp
Expand Down
2 changes: 1 addition & 1 deletion nano/core_test/confirming_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ TEST (confirmation_callback, observer_callbacks)
node->confirming_set.add (send1->hash ());

// Callback is performed for all blocks that are confirmed
ASSERT_TIMELY_EQ (5s, 2, node->ledger.stats.count (nano::stat::type::confirmation_observer, nano::stat::detail::all, nano::stat::dir::out));
ASSERT_TIMELY_EQ (5s, 2, node->ledger.stats.count (nano::stat::type::confirmation_observer, nano::stat::dir::out));

ASSERT_EQ (2, node->stats.count (nano::stat::type::confirmation_height, nano::stat::detail::blocks_confirmed, nano::stat::dir::in));
ASSERT_EQ (3, node->ledger.cemented_count ());
Expand Down
52 changes: 1 addition & 51 deletions nano/core_test/node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1694,56 +1694,6 @@ TEST (node, bootstrap_connection_scaling)
ASSERT_EQ (1, node1.bootstrap_initiator.connections->target_connections (50000, 1));
}

// Test stat counting at both type and detail levels
TEST (node, stat_counting)
{
nano::test::system system (1);
auto & node1 (*system.nodes[0]);
node1.stats.add (nano::stat::type::ledger, nano::stat::dir::in, 1);
node1.stats.add (nano::stat::type::ledger, nano::stat::dir::in, 5);
node1.stats.inc (nano::stat::type::ledger, nano::stat::dir::in);
node1.stats.inc (nano::stat::type::ledger, nano::stat::detail::send, nano::stat::dir::in);
node1.stats.inc (nano::stat::type::ledger, nano::stat::detail::send, nano::stat::dir::in);
node1.stats.inc (nano::stat::type::ledger, nano::stat::detail::receive, nano::stat::dir::in);
ASSERT_EQ (10, node1.stats.count (nano::stat::type::ledger, nano::stat::dir::in));
ASSERT_EQ (2, node1.stats.count (nano::stat::type::ledger, nano::stat::detail::send, nano::stat::dir::in));
ASSERT_EQ (1, node1.stats.count (nano::stat::type::ledger, nano::stat::detail::receive, nano::stat::dir::in));
node1.stats.add (nano::stat::type::ledger, nano::stat::dir::in, 0);
ASSERT_EQ (10, node1.stats.count (nano::stat::type::ledger, nano::stat::dir::in));
}

TEST (node, stat_histogram)
{
nano::test::system system (1);
auto & node1 (*system.nodes[0]);

// Specific bins
node1.stats.define_histogram (nano::stat::type::vote, nano::stat::detail::confirm_req, nano::stat::dir::in, { 1, 6, 10, 16 });
node1.stats.update_histogram (nano::stat::type::vote, nano::stat::detail::confirm_req, nano::stat::dir::in, 1, 50);
auto histogram_req (node1.stats.get_histogram (nano::stat::type::vote, nano::stat::detail::confirm_req, nano::stat::dir::in));
ASSERT_EQ (histogram_req->get_bins ()[0].value, 50);

// Uniform distribution (12 bins, width 1); also test clamping 100 to the last bin
node1.stats.define_histogram (nano::stat::type::vote, nano::stat::detail::confirm_ack, nano::stat::dir::in, { 1, 13 }, 12);
node1.stats.update_histogram (nano::stat::type::vote, nano::stat::detail::confirm_ack, nano::stat::dir::in, 1);
node1.stats.update_histogram (nano::stat::type::vote, nano::stat::detail::confirm_ack, nano::stat::dir::in, 8, 10);
node1.stats.update_histogram (nano::stat::type::vote, nano::stat::detail::confirm_ack, nano::stat::dir::in, 100);

auto histogram_ack (node1.stats.get_histogram (nano::stat::type::vote, nano::stat::detail::confirm_ack, nano::stat::dir::in));
ASSERT_EQ (histogram_ack->get_bins ()[0].value, 1);
ASSERT_EQ (histogram_ack->get_bins ()[7].value, 10);
ASSERT_EQ (histogram_ack->get_bins ()[11].value, 1);

// Uniform distribution (2 bins, width 5); add 1 to each bin
node1.stats.define_histogram (nano::stat::type::vote, nano::stat::detail::confirm_ack, nano::stat::dir::out, { 1, 11 }, 2);
node1.stats.update_histogram (nano::stat::type::vote, nano::stat::detail::confirm_ack, nano::stat::dir::out, 1, 1);
node1.stats.update_histogram (nano::stat::type::vote, nano::stat::detail::confirm_ack, nano::stat::dir::out, 6, 1);

auto histogram_ack_out (node1.stats.get_histogram (nano::stat::type::vote, nano::stat::detail::confirm_ack, nano::stat::dir::out));
ASSERT_EQ (histogram_ack_out->get_bins ()[0].value, 1);
ASSERT_EQ (histogram_ack_out->get_bins ()[1].value, 1);
}

TEST (node, online_reps)
{
nano::test::system system (1);
Expand Down Expand Up @@ -2469,7 +2419,7 @@ TEST (node, DISABLED_fork_invalid_block_signature)
// Send the vote with the corrupt block signature
node2.network.flood_vote (vote_corrupt, 1.0f);
// Wait for the rollback
ASSERT_TIMELY (5s, node1.stats.count (nano::stat::type::rollback, nano::stat::detail::all));
ASSERT_TIMELY (5s, node1.stats.count (nano::stat::type::rollback));
// Send the vote with the correct block
node2.network.flood_vote (vote, 1.0f);
ASSERT_TIMELY (10s, !node1.block (send1->hash ()));
Expand Down
12 changes: 6 additions & 6 deletions nano/core_test/processing_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ using namespace std::chrono_literals;
TEST (processing_queue, construction)
{
nano::test::system system{};
nano::processing_queue<int> queue{ system.stats, {}, {}, 4, 8 * 1024, 1024 };
nano::processing_queue<int> queue{ system.stats, nano::stat::type::test, {}, 4, 8 * 1024, 1024 };
ASSERT_EQ (queue.size (), 0);
}

TEST (processing_queue, process_one)
{
nano::test::system system{};
nano::processing_queue<int> queue{ system.stats, {}, {}, 4, 8 * 1024, 1024 };
nano::processing_queue<int> queue{ system.stats, nano::stat::type::test, {}, 4, 8 * 1024, 1024 };

std::atomic<std::size_t> processed{ 0 };
queue.process_batch = [&] (auto & batch) {
Expand All @@ -35,7 +35,7 @@ TEST (processing_queue, process_one)
TEST (processing_queue, process_many)
{
nano::test::system system{};
nano::processing_queue<int> queue{ system.stats, {}, {}, 4, 8 * 1024, 1024 };
nano::processing_queue<int> queue{ system.stats, nano::stat::type::test, {}, 4, 8 * 1024, 1024 };

std::atomic<std::size_t> processed{ 0 };
queue.process_batch = [&] (auto & batch) {
Expand All @@ -57,7 +57,7 @@ TEST (processing_queue, process_many)
TEST (processing_queue, max_queue_size)
{
nano::test::system system{};
nano::processing_queue<int> queue{ system.stats, {}, {}, 4, 1024, 128 };
nano::processing_queue<int> queue{ system.stats, nano::stat::type::test, {}, 4, 1024, 128 };

const int count = 2 * 1024; // Double the max queue size
for (int n = 0; n < count; ++n)
Expand All @@ -71,7 +71,7 @@ TEST (processing_queue, max_queue_size)
TEST (processing_queue, max_batch_size)
{
nano::test::system system{};
nano::processing_queue<int> queue{ system.stats, {}, {}, 4, 1024, 128 };
nano::processing_queue<int> queue{ system.stats, nano::stat::type::test, {}, 4, 1024, 128 };

// Fill queue before starting processing threads
const int count = 1024;
Expand All @@ -97,7 +97,7 @@ TEST (processing_queue, max_batch_size)
TEST (processing_queue, parallel)
{
nano::test::system system{};
nano::processing_queue<int> queue{ system.stats, {}, {}, 16, 1024, 1 };
nano::processing_queue<int> queue{ system.stats, nano::stat::type::test, {}, 16, 1024, 1 };

std::atomic<std::size_t> processed{ 0 };
queue.process_batch = [&] (auto & batch) {
Expand Down
79 changes: 79 additions & 0 deletions nano/core_test/stats.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#include <nano/test_common/system.hpp>
#include <nano/test_common/testutil.hpp>

#include <gtest/gtest.h>

#include <ostream>

// Test stat counting at both type and detail levels
TEST (stats, counters)
{
nano::test::system system;
auto & node = *system.add_node ();

node.stats.add (nano::stat::type::ledger, nano::stat::detail::test, nano::stat::dir::in, 1);
node.stats.add (nano::stat::type::ledger, nano::stat::detail::test, nano::stat::dir::in, 5);
node.stats.inc (nano::stat::type::ledger, nano::stat::detail::test, nano::stat::dir::in);
node.stats.inc (nano::stat::type::ledger, nano::stat::detail::send, nano::stat::dir::in);
node.stats.inc (nano::stat::type::ledger, nano::stat::detail::send, nano::stat::dir::in);
node.stats.inc (nano::stat::type::ledger, nano::stat::detail::receive, nano::stat::dir::in);

ASSERT_EQ (10, node.stats.count (nano::stat::type::ledger, nano::stat::dir::in));
ASSERT_EQ (2, node.stats.count (nano::stat::type::ledger, nano::stat::detail::send, nano::stat::dir::in));
ASSERT_EQ (1, node.stats.count (nano::stat::type::ledger, nano::stat::detail::receive, nano::stat::dir::in));

node.stats.add (nano::stat::type::ledger, nano::stat::detail::test, nano::stat::dir::in, 0);

ASSERT_EQ (10, node.stats.count (nano::stat::type::ledger, nano::stat::dir::in));
}

TEST (stats, counters_aggregate_all)
{
nano::test::system system;
auto & node = *system.add_node ();

node.stats.add (nano::stat::type::ledger, nano::stat::detail::test, nano::stat::dir::in, 1, true);

ASSERT_EQ (1, node.stats.count (nano::stat::type::ledger, nano::stat::dir::in));
ASSERT_EQ (1, node.stats.count (nano::stat::type::ledger, nano::stat::detail::all, nano::stat::dir::in));
ASSERT_EQ (1, node.stats.count (nano::stat::type::ledger, nano::stat::detail::test, nano::stat::dir::in));

node.stats.add (nano::stat::type::ledger, nano::stat::detail::activate, nano::stat::dir::in, 5, true);

ASSERT_EQ (6, node.stats.count (nano::stat::type::ledger, nano::stat::dir::in));
ASSERT_EQ (6, node.stats.count (nano::stat::type::ledger, nano::stat::detail::all, nano::stat::dir::in));
ASSERT_EQ (1, node.stats.count (nano::stat::type::ledger, nano::stat::detail::test, nano::stat::dir::in));
}

TEST (stats, samples)
{
nano::test::system system;
auto & node = *system.add_node ();

node.stats.sample (nano::stat::sample::active_election_duration, { 1, 10 }, 5);
node.stats.sample (nano::stat::sample::active_election_duration, { 1, 10 }, 5);
node.stats.sample (nano::stat::sample::active_election_duration, { 1, 10 }, 11);
node.stats.sample (nano::stat::sample::active_election_duration, { 1, 10 }, 37);

node.stats.sample (nano::stat::sample::bootstrap_tag_duration, { 1, 10 }, 2137);

auto samples1 = node.stats.samples (nano::stat::sample::active_election_duration);
ASSERT_EQ (4, samples1.size ());
ASSERT_EQ (5, samples1[0]);
ASSERT_EQ (5, samples1[1]);
ASSERT_EQ (11, samples1[2]);
ASSERT_EQ (37, samples1[3]);

auto samples2 = node.stats.samples (nano::stat::sample::active_election_duration);
ASSERT_EQ (0, samples2.size ());

node.stats.sample (nano::stat::sample::active_election_duration, { 1, 10 }, 3);

auto samples3 = node.stats.samples (nano::stat::sample::active_election_duration);
ASSERT_EQ (1, samples3.size ());
ASSERT_EQ (3, samples3[0]);

auto samples4 = node.stats.samples (nano::stat::sample::bootstrap_tag_duration);
ASSERT_EQ (1, samples4.size ());
ASSERT_EQ (2137, samples4[0]);
}
24 changes: 9 additions & 15 deletions nano/core_test/toml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,12 +227,10 @@ TEST (toml, daemon_config_deserialize_defaults)
ASSERT_EQ (conf.node.diagnostics_config.txn_tracking.min_read_txn_time, defaults.node.diagnostics_config.txn_tracking.min_read_txn_time);
ASSERT_EQ (conf.node.diagnostics_config.txn_tracking.min_write_txn_time, defaults.node.diagnostics_config.txn_tracking.min_write_txn_time);

ASSERT_EQ (conf.node.stats_config.sampling_enabled, defaults.node.stats_config.sampling_enabled);
ASSERT_EQ (conf.node.stats_config.interval, defaults.node.stats_config.interval);
ASSERT_EQ (conf.node.stats_config.capacity, defaults.node.stats_config.capacity);
ASSERT_EQ (conf.node.stats_config.max_samples, defaults.node.stats_config.max_samples);
ASSERT_EQ (conf.node.stats_config.log_rotation_count, defaults.node.stats_config.log_rotation_count);
ASSERT_EQ (conf.node.stats_config.log_interval_samples, defaults.node.stats_config.log_interval_samples);
ASSERT_EQ (conf.node.stats_config.log_interval_counters, defaults.node.stats_config.log_interval_counters);
ASSERT_EQ (conf.node.stats_config.log_samples_interval, defaults.node.stats_config.log_samples_interval);
ASSERT_EQ (conf.node.stats_config.log_counters_interval, defaults.node.stats_config.log_counters_interval);
ASSERT_EQ (conf.node.stats_config.log_headers, defaults.node.stats_config.log_headers);
ASSERT_EQ (conf.node.stats_config.log_counters_filename, defaults.node.stats_config.log_counters_filename);
ASSERT_EQ (conf.node.stats_config.log_samples_filename, defaults.node.stats_config.log_samples_filename);
Expand Down Expand Up @@ -514,6 +512,9 @@ TEST (toml, daemon_config_deserialize_no_defaults)
rep_crawler = true
work_generation_time = false
[node.statistics]
max_samples = 999
[node.statistics.log]
filename_counters = "devcounters.stat"
filename_samples = "devsamples.stat"
Expand All @@ -522,11 +523,6 @@ TEST (toml, daemon_config_deserialize_no_defaults)
interval_samples = 999
rotation_count = 999
[node.statistics.sampling]
capacity = 999
enable = true
interval = 999
[node.websocket]
address = "0:0:0:0:0:ffff:7f01:101"
enable = true
Expand Down Expand Up @@ -683,12 +679,10 @@ TEST (toml, daemon_config_deserialize_no_defaults)
ASSERT_NE (conf.node.diagnostics_config.txn_tracking.min_read_txn_time, defaults.node.diagnostics_config.txn_tracking.min_read_txn_time);
ASSERT_NE (conf.node.diagnostics_config.txn_tracking.min_write_txn_time, defaults.node.diagnostics_config.txn_tracking.min_write_txn_time);

ASSERT_NE (conf.node.stats_config.sampling_enabled, defaults.node.stats_config.sampling_enabled);
ASSERT_NE (conf.node.stats_config.interval, defaults.node.stats_config.interval);
ASSERT_NE (conf.node.stats_config.capacity, defaults.node.stats_config.capacity);
ASSERT_NE (conf.node.stats_config.max_samples, defaults.node.stats_config.max_samples);
ASSERT_NE (conf.node.stats_config.log_rotation_count, defaults.node.stats_config.log_rotation_count);
ASSERT_NE (conf.node.stats_config.log_interval_samples, defaults.node.stats_config.log_interval_samples);
ASSERT_NE (conf.node.stats_config.log_interval_counters, defaults.node.stats_config.log_interval_counters);
ASSERT_NE (conf.node.stats_config.log_samples_interval, defaults.node.stats_config.log_samples_interval);
ASSERT_NE (conf.node.stats_config.log_counters_interval, defaults.node.stats_config.log_counters_interval);
ASSERT_NE (conf.node.stats_config.log_headers, defaults.node.stats_config.log_headers);
ASSERT_NE (conf.node.stats_config.log_counters_filename, defaults.node.stats_config.log_counters_filename);
ASSERT_NE (conf.node.stats_config.log_samples_filename, defaults.node.stats_config.log_samples_filename);
Expand Down
1 change: 1 addition & 0 deletions nano/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ add_library(
stats.cpp
stats_enums.hpp
stats_enums.cpp
stats_sinks.hpp
stream.hpp
thread_pool.hpp
thread_pool.cpp
Expand Down
Loading

0 comments on commit 04de36c

Please sign in to comment.