Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[5.0] Minimize abort/start block for speculative blocks #1845

Merged
merged 10 commits into from
Nov 1, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,7 @@ namespace block_timing_util {
}

inline fc::time_point calculate_producing_block_deadline(fc::microseconds cpu_effort, chain::block_timestamp_type block_time) {
auto estimated_deadline = production_round_block_start_time(cpu_effort, block_time) + cpu_effort;
auto now = fc::time_point::now();
if (estimated_deadline > now) {
return estimated_deadline;
} else {
// This could only happen when the producer stop producing and then comes back alive in the middle of its own
// production round. In this case, we just use the hard deadline.
const auto hard_deadline = block_time.to_time_point() - fc::microseconds(chain::config::block_interval_us - cpu_effort.count());
return std::min(hard_deadline, now + cpu_effort);
}
return production_round_block_start_time(cpu_effort, block_time) + cpu_effort;
}

namespace detail {
Expand Down
20 changes: 16 additions & 4 deletions plugins/producer_plugin/producer_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -772,9 +772,10 @@ class producer_plugin_impl : public std::enable_shared_from_this<producer_plugin
}

void restart_speculative_block() {
// log message is used by Node.py verifyStartingBlockMessages in distributed-transactions-test.py test
fc_dlog(_log, "Restarting exhausted speculative block #${n}", ("n", chain_plug->chain().head_block_num() + 1));
// abort the pending block
abort_block();

schedule_production_loop();
}

Expand Down Expand Up @@ -1829,8 +1830,6 @@ producer_plugin_impl::start_block_result producer_plugin_impl::start_block() {
last_start_block_time = now;
}

// create speculative blocks at regular intervals, so we create blocks with "current" block time
_pending_block_deadline = block_timing_util::calculate_producing_block_deadline(_produce_block_cpu_effort, block_time);
if (in_producing_mode()) {
uint32_t production_round_index = block_timestamp_type(block_time).slot % chain::config::producer_repetitions;
if (production_round_index == 0) {
Expand All @@ -1844,6 +1843,15 @@ producer_plugin_impl::start_block_result producer_plugin_impl::start_block() {
}
}

// create speculative blocks at regular intervals, so we create blocks with "current" block time
_pending_block_deadline = block_timing_util::calculate_producing_block_deadline(_produce_block_cpu_effort, block_time);
if (in_speculating_mode()) { // if we are producing, then produce block even if deadline has passed
// speculative block, no reason to start a block that will immediately be re-started, set deadline in the future
// a block should come in during this time, if not then just keep creating the block every produce_block_cpu_effort
if (now + fc::milliseconds(config::block_interval_ms/10) > _pending_block_deadline) {
_pending_block_deadline = now + _produce_block_cpu_effort;
}
}
const auto& preprocess_deadline = _pending_block_deadline;

fc_dlog(_log, "Starting block #${n} at ${time} producer ${p}", ("n", pending_block_num)("time", now)("p", scheduled_producer.producer_name));
Expand Down Expand Up @@ -2488,9 +2496,13 @@ void producer_plugin_impl::schedule_production_loop() {
chain::controller& chain = chain_plug->chain();
fc_dlog(_log, "Speculative Block Created; Scheduling Speculative/Production Change");
EOS_ASSERT(chain.is_building_block(), missing_pending_block_state, "speculating without pending_block_state");
auto wake_time = block_timing_util::calculate_producer_wake_up_time(_produce_block_cpu_effort, chain.pending_block_num(), chain.pending_block_timestamp(),
auto wake_time = block_timing_util::calculate_producer_wake_up_time(fc::microseconds{config::block_interval_us}, chain.pending_block_num(), chain.pending_block_timestamp(),
_producers, chain.head_block_state()->active_schedule.producers,
_producer_watermarks);
if (wake_time && fc::time_point::now() > *wake_time) {
// if wake time has already passed then use the block deadline instead
wake_time = _pending_block_deadline;
}
schedule_delayed_production_loop(weak_from_this(), wake_time);
} else {
fc_dlog(_log, "Speculative Block Created");
Expand Down
60 changes: 55 additions & 5 deletions plugins/producer_plugin/test/test_block_timing_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,12 @@ BOOST_AUTO_TEST_CASE(test_calculate_block_deadline) {
{
// Scenario 2:
// In producing mode and it is already too late to meet the optimized deadlines,
// the returned deadline can never be later than the hard deadlines.
// the returned deadline can never be later than the next block deadline.
// now is not taken into account, but leaving set_now in to verify it has no affect.

auto second_block_time = eosio::chain::block_timestamp_type(production_round_1st_block_slot + 1);
fc::mock_time_traits::set_now(second_block_time.to_time_point() - fc::milliseconds(200));
auto second_block_hard_deadline = second_block_time.to_time_point() - fc::milliseconds(100);
auto second_block_hard_deadline = second_block_time.to_time_point() - fc::milliseconds(200);
BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort, second_block_time),
second_block_hard_deadline);
// use previous deadline as now
Expand All @@ -75,22 +76,53 @@ BOOST_AUTO_TEST_CASE(test_calculate_block_deadline) {
auto seventh_block_time = eosio::chain::block_timestamp_type(production_round_1st_block_slot + 6);
fc::mock_time_traits::set_now(seventh_block_time.to_time_point() - fc::milliseconds(500));

// 7th block where cpu effort is 100ms less per block
BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort, seventh_block_time),
seventh_block_time.to_time_point() - fc::milliseconds(100));
seventh_block_time.to_time_point() - fc::milliseconds(700));

// use previous deadline as now
fc::mock_time_traits::set_now(seventh_block_time.to_time_point() - fc::milliseconds(100));
auto eighth_block_time = eosio::chain::block_timestamp_type(production_round_1st_block_slot + 7);

// 8th block where cpu effort is 100ms less per block
BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort, eighth_block_time),
eighth_block_time.to_time_point() - fc::milliseconds(200));
eighth_block_time.to_time_point() - fc::milliseconds(800));

// use previous deadline as now
fc::mock_time_traits::set_now(eighth_block_time.to_time_point() - fc::milliseconds(200));
auto ninth_block_time = eosio::chain::block_timestamp_type(production_round_1st_block_slot + 8);

// 9th block where cpu effort is 100ms less per block
BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort, ninth_block_time),
ninth_block_time.to_time_point() - fc::milliseconds(300));
ninth_block_time.to_time_point() - fc::milliseconds(900));
}
{
// Scenario: time has passed for block to be produced
// Ask for a deadline for current block but its deadline has already passed
auto default_cpu_effort = fc::microseconds(block_interval.count() - (450000/12)); // 462,500
// with the default_cpu_effort, the first block deadline would be 29.9625 (29.500 + 0.4625)
fc::mock_time_traits::set_now(fc::time_point::from_iso_string("2023-10-29T13:40:29.963")); // past first block interval
auto first_block_time = eosio::chain::block_timestamp_type(fc::time_point::from_iso_string("2023-10-29T13:40:30.000"));
// pass the first block deadline
fc::time_point block_deadline = calculate_producing_block_deadline(default_cpu_effort, first_block_time);
// not equal since block_deadline is 29.962500
BOOST_CHECK_NE(block_deadline, fc::time_point::from_iso_string("2023-10-29T13:40:29.962"));
BOOST_CHECK_EQUAL(block_deadline.to_iso_string(), fc::time_point::from_iso_string("2023-10-29T13:40:29.962").to_iso_string()); // truncates

// second block has passed
fc::mock_time_traits::set_now(fc::time_point::from_iso_string("2023-10-29T13:40:30.426")); // past second block interval
auto second_block_time = eosio::chain::block_timestamp_type(fc::time_point::from_iso_string("2023-10-29T13:40:30.500"));
// pass the second block deadline
block_deadline = calculate_producing_block_deadline(default_cpu_effort, second_block_time);
// 29.962500 + (450000/12) = 30.425
BOOST_CHECK_EQUAL(block_deadline, fc::time_point::from_iso_string("2023-10-29T13:40:30.425"));
}
{
// Real scenario that caused multiple start blocks
auto default_cpu_effort = fc::microseconds(block_interval.count() - (450000/12)); // 462,500
auto block_time = eosio::chain::block_timestamp_type(fc::time_point::from_iso_string("2023-10-31T13:37:35.000"));
fc::time_point block_deadline = calculate_producing_block_deadline(default_cpu_effort, block_time);
BOOST_CHECK_EQUAL(block_deadline.to_iso_string(), fc::time_point::from_iso_string("2023-10-31T13:37:34.587").to_iso_string());
}
}

Expand Down Expand Up @@ -248,6 +280,24 @@ BOOST_AUTO_TEST_CASE(test_calculate_producer_wake_up_time) {
expected_block_time = block_timestamp_type(prod_round_1st_block_slot + 2*config::producer_repetitions + 2).to_time_point(); // with watermark, wait until next
BOOST_CHECK_EQUAL(calculate_producer_wake_up_time(full_cpu_effort, 2, block_timestamp, producers, active_schedule, prod_watermarks), expected_block_time);
}
{ // actual example that caused multiple start blocks
producer_watermarks prod_watermarks;
std::set<account_name> producers = {
"inita"_n, "initb"_n, "initc"_n, "initd"_n, "inite"_n, "initf"_n, "initg"_n, "p1"_n,
"inith"_n, "initi"_n, "initj"_n, "initk"_n, "initl"_n, "initm"_n, "initn"_n,
"inito"_n, "initp"_n, "initq"_n, "initr"_n, "inits"_n, "initt"_n, "initu"_n, "p2"_n
};
std::vector<chain::producer_authority> active_schedule{
{"inita"_n}, {"initb"_n}, {"initc"_n}, {"initd"_n}, {"inite"_n}, {"initf"_n}, {"initg"_n},
{"inith"_n}, {"initi"_n}, {"initj"_n}, {"initk"_n}, {"initl"_n}, {"initm"_n}, {"initn"_n},
{"inito"_n}, {"initp"_n}, {"initq"_n}, {"initr"_n}, {"inits"_n}, {"initt"_n}, {"initu"_n}
};
auto default_cpu_effort = fc::microseconds(block_interval.count() - (450000/12)); // 462,500
auto wake_time = calculate_producer_wake_up_time(default_cpu_effort, 106022362, eosio::chain::block_timestamp_type(fc::time_point::from_iso_string("2023-10-31T16:06:41.000")),
producers, active_schedule, prod_watermarks);
BOOST_REQUIRE(!!wake_time);
BOOST_CHECK_EQUAL(wake_time->to_iso_string(), fc::time_point::from_iso_string("2023-10-31T16:06:40.587").to_iso_string());
}

}

Expand Down
32 changes: 32 additions & 0 deletions tests/TestHarness/Node.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,38 @@ def findInLog(self, searchStr):
return True
return False

# verify only one or two 'Starting block' per block number unless block is restarted
def verifyStartingBlockMessages(self):
dataDir=Utils.getNodeDataDir(self.nodeId)
files=Node.findStderrFiles(dataDir)
for f in files:
blockNumbers = set()
duplicateBlockNumbers = set()
threeStartsFound = False
lastRestartBlockNum = 0
blockNumber = 0

with open(f, 'r') as file:
for line in file:
match = re.match(r".*Restarting exhausted speculative block #(\d+)", line)
if match:
lastRestartBlockNum = match.group(1)
continue
if re.match(r".*unlinkable_block_exception", line):
lastRestartBlockNum = blockNumber
continue
match = re.match(r".*Starting block #(\d+)", line)
if match:
blockNumber = match.group(1)
if blockNumber != lastRestartBlockNum and blockNumber in duplicateBlockNumbers:
print(f"Duplicate Staring block found: {blockNumber} in {f}")
threeStartsFound = True
if blockNumber != lastRestartBlockNum and blockNumber in blockNumbers:
duplicateBlockNumbers.add(blockNumber)
blockNumbers.add(blockNumber)

return not threeStartsFound

def analyzeProduction(self, specificBlockNum=None, thresholdMs=500):
dataDir=Utils.getNodeDataDir(self.nodeId)
files=Node.findStderrFiles(dataDir)
Expand Down
5 changes: 5 additions & 0 deletions tests/distributed-transactions-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,11 @@

cluster.compareBlockLogs()

# verify only one start block per block unless interrupted
for node in cluster.getAllNodes():
if not node.verifyStartingBlockMessages():
errorExit("Found more than one Starting block in logs")

testSuccessful=True
finally:
TestHelper.shutdown(cluster, walletMgr, testSuccessful, dumpErrorDetails)
Expand Down