diff --git a/Cargo.lock b/Cargo.lock index d60dadf40..bd797f51c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -311,7 +311,7 @@ dependencies = [ "futures", "futures-utils-wasm", "lru", - "parking_lot 0.12.3", + "parking_lot", "pin-project", "schnellru", "serde", @@ -1482,6 +1482,12 @@ dependencies = [ "serde", ] +[[package]] +name = "bytesize" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc" + [[package]] name = "bzip2" version = "0.4.4" @@ -2153,7 +2159,7 @@ dependencies = [ "hashbrown 0.14.5", "lock_api", "once_cell", - "parking_lot_core 0.9.10", + "parking_lot_core", ] [[package]] @@ -2167,7 +2173,7 @@ dependencies = [ "hashbrown 0.14.5", "lock_api", "once_cell", - "parking_lot_core 0.9.10", + "parking_lot_core", ] [[package]] @@ -2635,7 +2641,7 @@ dependencies = [ "hashbrown 0.15.2", "hex", "log", - "parking_lot 0.12.3", + "parking_lot", "rand", "rlp 0.6.1", "uuid 1.11.1", @@ -3779,7 +3785,7 @@ dependencies = [ "ipconfig", "lru-cache", "once_cell", - "parking_lot 0.12.3", + "parking_lot", "rand", "resolv-conf", "smallvec", @@ -4610,7 +4616,7 @@ dependencies = [ "http-body 1.0.1", "http-body-util", "jsonrpsee-types", - "parking_lot 0.12.3", + "parking_lot", "pin-project", "rand", "rustc-hash 2.1.0", @@ -4984,7 +4990,7 @@ dependencies = [ "multihash", "multistream-select", "once_cell", - "parking_lot 0.12.3", + "parking_lot", "pin-project", "quick-protobuf", "rand", @@ -5007,7 +5013,7 @@ dependencies = [ "hickory-resolver", "libp2p-core", "libp2p-identity", - "parking_lot 0.12.3", + "parking_lot", "smallvec", "tracing", ] @@ -5184,7 +5190,7 @@ dependencies = [ "libp2p-core", "libp2p-identity", "libp2p-tls", - "parking_lot 0.12.3", + "parking_lot", "quinn", "rand", "ring 0.17.8", @@ -5415,15 +5421,6 @@ dependencies = [ "linked-hash-map", ] -[[package]] -name = "lru-mem" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf5c8c26d903a41c80d4cc171940a57a4d1bc51139ebd6aad87e2f9ae3774780" -dependencies = [ - "hashbrown 0.14.5", -] - [[package]] name = "lz4" version = "1.28.1" @@ -6149,17 +6146,6 @@ version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core 0.8.6", -] - [[package]] name = "parking_lot" version = "0.12.3" @@ -6167,21 +6153,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" dependencies = [ "lock_api", - "parking_lot_core 0.9.10", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" -dependencies = [ - "cfg-if", - "instant", - "libc", - "redox_syscall 0.2.16", - "smallvec", - "winapi", + "parking_lot_core", ] [[package]] @@ -6192,7 +6164,7 @@ checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.8", + "redox_syscall", "smallvec", "windows-targets 0.52.6", ] @@ -6565,7 +6537,7 @@ dependencies = [ "log", "nix", "once_cell", - "parking_lot 0.12.3", + "parking_lot", "smallvec", "symbolic-demangle", "tempfile", @@ -6659,7 +6631,7 @@ checksum = "504ee9ff529add891127c4827eb481bd69dc0ebc72e9a682e187db4caa60c3ca" dependencies = [ "dtoa", "itoa", - "parking_lot 0.12.3", + "parking_lot", "prometheus-client-derive-encode", ] @@ -6946,12 +6918,13 @@ dependencies = [ ] [[package]] -name = "redox_syscall" -version = "0.2.16" +name = "redb" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +checksum = "ea0a72cd7140de9fc3e318823b883abf819c20d478ec89ce880466dc2ef263c6" dependencies = [ - "bitflags 1.3.2", + "libc", + "log", ] [[package]] @@ -8136,22 +8109,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "sled" -version = "0.34.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935" -dependencies = [ - "crc32fast", - "crossbeam-epoch", - "crossbeam-utils", - "fs2", - "fxhash", - "libc", - "log", - "parking_lot 0.11.2", -] - [[package]] name = "slug" version = "0.1.6" @@ -8289,7 +8246,7 @@ dependencies = [ "bumpalo", "index_vec", "indexmap 2.7.0", - "parking_lot 0.12.3", + "parking_lot", "rayon", "rustc-hash 2.1.0", "smallvec", @@ -8409,7 +8366,7 @@ checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" dependencies = [ "new_debug_unreachable", "once_cell", - "parking_lot 0.12.3", + "parking_lot", "phf_shared 0.10.0", "precomputed-hash", ] @@ -8904,7 +8861,7 @@ dependencies = [ "bytes", "libc", "mio", - "parking_lot 0.12.3", + "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2", @@ -9666,7 +9623,7 @@ checksum = "0048ad49a55b9deb3953841fa1fc5858f0efbcb7a18868c899a360269fac1b23" dependencies = [ "futures", "js-sys", - "parking_lot 0.12.3", + "parking_lot", "pin-utils", "slab", "wasm-bindgen", @@ -10095,7 +10052,7 @@ dependencies = [ "futures", "log", "nohash-hasher", - "parking_lot 0.12.3", + "parking_lot", "pin-project", "rand", "static_assertions", @@ -10110,7 +10067,7 @@ dependencies = [ "futures", "log", "nohash-hasher", - "parking_lot 0.12.3", + "parking_lot", "pin-project", "rand", "static_assertions", @@ -10351,6 +10308,7 @@ dependencies = [ "blsful", "bs58", "bytes", + "bytesize", "cbor4ii", "cfg-if", "clap", @@ -10371,7 +10329,6 @@ dependencies = [ "k256", "libp2p", "lru", - "lru-mem", "lz4", "once_cell", "opentelemetry", @@ -10386,6 +10343,7 @@ dependencies = [ "rand_chacha", "rand_core", "rayon", + "redb", "revm", "revm-inspectors", "rusqlite", @@ -10398,7 +10356,6 @@ dependencies = [ "serde_repr", "sha2", "sha3", - "sled", "tempfile", "thiserror 2.0.11", "time", diff --git a/eth-trie.rs/src/db.rs b/eth-trie.rs/src/db.rs index 35f6b3340..a4a5693ee 100644 --- a/eth-trie.rs/src/db.rs +++ b/eth-trie.rs/src/db.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, error::Error, sync::Arc}; +use std::{collections::HashMap, fmt::Display, sync::Arc}; use parking_lot::RwLock; @@ -8,7 +8,7 @@ use crate::errors::MemDBError; /// You should first write the data to the cache and write the data /// to the database in bulk after the end of a set of operations. pub trait DB: Send + Sync { - type Error: Error; + type Error: Display; fn get(&self, key: &[u8]) -> Result>, Self::Error>; diff --git a/z2/src/converter.rs b/z2/src/converter.rs index 93304914b..ed6dce11b 100644 --- a/z2/src/converter.rs +++ b/z2/src/converter.rs @@ -22,7 +22,7 @@ use zilliqa::{ block_store::BlockStore, cfg::{scilla_ext_libs_path_default, Amount, Config, NodeConfig}, crypto::{Hash, SecretKey}, - db::Db, + db::{ArcDb, Db}, exec::store_external_libraries, message::{Block, QuorumCertificate, Vote, MAX_COMMITTEE_SIZE}, node::{MessageSender, RequestId}, @@ -77,7 +77,7 @@ fn invoke_checker(state: &State, code: &str, init_data: &[ParamValue]) -> Result #[allow(clippy::type_complexity)] fn convert_scilla_state( zq1_db: &zq1::Db, - zq2_db: &Db, + zq2_db: &Arc, state: &State, code: &str, init_data: &[ParamValue], @@ -185,7 +185,7 @@ fn convert_scilla_state( Ok((storage_root, field_types, transitions)) } -fn convert_evm_state(zq1_db: &zq1::Db, zq2_db: &Db, address: Address) -> Result { +fn convert_evm_state(zq1_db: &zq1::Db, zq2_db: &Arc, address: Address) -> Result { let prefix = create_acc_query_prefix(address); let storage_entries_iter = zq1_db.get_contract_state_data_with_prefix(&prefix); @@ -448,7 +448,7 @@ pub async fn convert_persistence( .get_tx_blocks_aux("MaxTxBlockNumber")? .unwrap_or_default(); - let current_block = zq2_db.get_finalized_view()?.unwrap_or(1); + let current_block = zq2_db.read()?.finalized_view()?.get()?.unwrap_or(1); let progress = ProgressBar::new(max_block) .with_style(style.clone()) @@ -546,7 +546,7 @@ pub async fn convert_persistence( parent_hash, zq1_block.block_num - 1, ); - let block = Block::from_qc( + let mut block = Block::from_qc( secret_key, zq1_block.block_num, zq1_block.block_num, @@ -560,6 +560,7 @@ pub async fn convert_persistence( ScillaGas(zq1_block.gas_used).into(), ScillaGas(zq1_block.gas_limit).into(), ); + block.header.hash = zq1_block.block_hash.into(); // For each receipt update block hash. This can be done once all receipts build receipt_root_hash which is used for calculating block hash for receipt in &mut receipts { @@ -568,53 +569,42 @@ pub async fn convert_persistence( parent_hash = zq1_block.block_hash.into(); - zq2_db.with_sqlite_tx(|sqlite_tx| { - zq2_db.insert_block_with_hash_with_db_tx( - sqlite_tx, - zq1_block.block_hash.into(), - &block, - )?; - zq2_db.set_high_qc_with_db_tx(sqlite_tx, block.header.qc)?; - zq2_db.set_finalized_view_with_db_tx(sqlite_tx, block.view())?; - trace!("{} block inserted", block.number()); - - for (hash, transaction) in &transactions { - if let Err(err) = zq2_db.insert_transaction_with_db_tx(sqlite_tx, hash, transaction) - { - warn!( - "Unable to insert transaction with id: {:?} to db, err: {:?}", - *hash, err - ); - } + let write = zq2_db.write()?; + + write.blocks()?.insert(&block)?; + write.high_qc()?.set(&block.header.qc)?; + write.finalized_view()?.set(block.view())?; + trace!("{} block inserted", block.number()); + { + let mut transactions_table = write.transactions()?; + for (hash, txn) in &transactions { + transactions_table.insert(*hash, txn)?; } + let mut receipts_table = write.receipts()?; for receipt in &receipts { - if let Err(err) = - zq2_db.insert_transaction_receipt_with_db_tx(sqlite_tx, receipt.to_owned()) - { - warn!( - "Unable to insert receipt with id: {:?} into db, err: {:?}", - receipt.tx_hash, err - ); - } + receipts_table.insert(receipt)?; } - Ok(()) - })?; + } + write.commit()?; } // Let's insert another block (empty) which will be used as high_qc block when zq2 starts from converted persistence - let highest_block = zq2_db.get_highest_canonical_block_number()?.unwrap(); - let highest_block = zq2_db.get_block_by_view(highest_block)?.unwrap(); + let highest_block = zq2_db.read()?.blocks()?.max_canonical_by_view()?.unwrap(); - zq2_db.with_sqlite_tx(|sqlite_tx| { - let empty_high_qc_block = create_empty_block_from_parent(&highest_block, secret_key); - zq2_db.insert_block_with_db_tx(sqlite_tx, &empty_high_qc_block)?; - zq2_db.set_high_qc_with_db_tx(sqlite_tx, empty_high_qc_block.header.qc)?; - Ok(()) - })?; + let write = zq2_db.write()?; + let empty_high_qc_block = create_empty_block_from_parent(&highest_block, secret_key); + write.blocks()?.insert(&empty_high_qc_block)?; + write.high_qc()?.set(&empty_high_qc_block.header.qc)?; + write.commit()?; println!( "Persistence conversion done up to block {}", - zq2_db.get_highest_canonical_block_number()?.unwrap_or(0) + zq2_db + .read()? + .blocks()? + .max_canonical_by_view()? + .map(|b| b.number()) + .unwrap_or(0) ); Ok(()) diff --git a/z2/src/plumbing.rs b/z2/src/plumbing.rs index dcb55a0cb..66bfaa3a4 100644 --- a/z2/src/plumbing.rs +++ b/z2/src/plumbing.rs @@ -396,7 +396,7 @@ pub async fn run_persistence_converter( let zq2_db = zilliqa::db::Db::new( Some(zq2_dir), node_config.eth_chain_id, - node_config.state_cache_size, + node_config.cache_size, )?; let zq1_db = zq1::Db::new(zq1_dir)?; converter::convert_persistence(zq1_db, zq2_db, zq2_config, secret_key).await?; diff --git a/z2/src/setup.rs b/z2/src/setup.rs index 79b735867..b813f38b9 100644 --- a/z2/src/setup.rs +++ b/z2/src/setup.rs @@ -18,7 +18,7 @@ use tokio::fs; /// For now, it just generates secret keys (which should be different each run, or we will become dependent on their values) use zilliqa::{ api, - cfg::{max_rpc_response_size_default, state_cache_size_default, ApiServer}, + cfg::{cache_size_default, max_rpc_response_size_default, ApiServer}, crypto::{SecretKey, TransactionPublicKey}, }; use zilliqa::{ @@ -512,7 +512,7 @@ impl Setup { }], allowed_timestamp_skew: allowed_timestamp_skew_default(), data_dir: None, - state_cache_size: state_cache_size_default(), + cache_size: cache_size_default(), load_checkpoint: None, do_checkpoints: false, eth_chain_id: eth_chain_id_default(), diff --git a/zilliqa/Cargo.toml b/zilliqa/Cargo.toml index f628cd23d..2afd51fc9 100644 --- a/zilliqa/Cargo.toml +++ b/zilliqa/Cargo.toml @@ -59,13 +59,11 @@ rand_chacha = "0.3.1" rand_core = "0.6.4" revm = { version = "18.0.0", features = ["optional_no_base_fee"] } revm-inspectors = { version = "0.11.0", features = ["js-tracer"] } -rusqlite = { version = "0.32.1", features = ["bundled", "trace"] } serde = { version = "1.0.217", features = ["derive", "rc"] } serde_bytes = "0.11.14" serde_json = { version = "1.0.135", features = ["raw_value","arbitrary_precision"] } sha2 = "0.10.8" sha3 = "0.10.8" -sled = "0.34.7" tempfile = "3.15.0" time = { version = "0.3.37", features = ["formatting", "macros"] } tokio = { version = "1.43.0", features = ["macros", "rt-multi-thread", "signal", "sync"] } @@ -83,8 +81,10 @@ bech32 = "0.11.0" cfg-if = "1.0.0" serde_repr = "0.1.19" thiserror = "2.0.11" -lru-mem = "0.3.0" opentelemetry-semantic-conventions = { version = "0.27.0", features = ["semconv_experimental"] } +rusqlite = { version = "0.32.1", features = ["bundled", "trace"] } +redb = { version = "2.4.0", features = ["logging"] } +bytesize = "1.3.0" [dev-dependencies] alloy = { version = "0.6.4", default-features = false, features = ["network", "rand", "signers", "signer-local"] } diff --git a/zilliqa/benches/it.rs b/zilliqa/benches/it.rs index 75d963087..36aa3345a 100644 --- a/zilliqa/benches/it.rs +++ b/zilliqa/benches/it.rs @@ -164,7 +164,7 @@ fn consensus( request_id: RequestId::default(), }; let data_dir = tempdir().unwrap(); - let db = Db::new(Some(data_dir.path()), 0, 1024).unwrap(); + let db = Db::new(Some(data_dir.path()), 0, 512 * 1024 * 1024).unwrap(); let mut config: NodeConfig = toml::from_str( r#" consensus.rewards_per_hour = "1" diff --git a/zilliqa/src/api/eth.rs b/zilliqa/src/api/eth.rs index 651044d56..fdb84e020 100644 --- a/zilliqa/src/api/eth.rs +++ b/zilliqa/src/api/eth.rs @@ -13,7 +13,7 @@ use alloy::{ }; use anyhow::{anyhow, Result}; use http::Extensions; -use itertools::{Either, Itertools}; +use itertools::Either; use jsonrpsee::{ core::StringError, types::{ @@ -398,7 +398,10 @@ fn get_block_by_hash(params: Params, node: &Arc>) -> Result, block: &Block) -> Result<[u8; 256]> { let mut logs_bloom = [0; 256]; - for txn_receipt in node.get_transaction_receipts_in_block(block.hash())?.iter() { + for txn_hash in &block.transactions { + let txn_receipt = node + .get_transaction_receipt(*txn_hash)? + .ok_or_else(|| anyhow!("missing receipt"))?; // Ideally we'd implement a full blown bloom filter type but this'll do for now txn_receipt .logs @@ -409,7 +412,7 @@ pub fn get_block_logs_bloom(node: &MutexGuard, block: &Block) -> Result<[u Log::Scilla(log) => log.into_evm(), }) .enumerate() - .map(|(log_index, log)| { + .for_each(|(log_index, log)| { let log = eth::Log::new( log, log_index, @@ -420,10 +423,7 @@ pub fn get_block_logs_bloom(node: &MutexGuard, block: &Block) -> Result<[u ); log.bloom(&mut logs_bloom); - - log - }) - .collect_vec(); + }); } Ok(logs_bloom) } @@ -562,71 +562,55 @@ fn get_logs(params: Params, node: &Arc>) -> Result> { } }; - // Get the receipts for each transaction. This is an iterator of (receipt, txn_index, txn_hash, block_number, block_hash). - let receipts = blocks - .map(|block: Result<_>| { - let block = block?; - let block_number = block.number(); - let block_hash = block.hash(); - let receipts = node.get_transaction_receipts_in_block(block_hash)?; - - Ok(block - .transactions - .into_iter() - .enumerate() - .zip(receipts) - .map(move |((txn_index, txn_hash), receipt)| { - (receipt, txn_index, txn_hash, block_number, block_hash) - })) - }) - .flatten_ok(); - - // Get the logs from each receipt and filter them based on the provided parameters. This is an iterator of (log, log_index, txn_index, txn_hash, block_number, block_hash). - let logs = receipts - .map(|r: Result<_>| { - let (receipt, txn_index, txn_hash, block_number, block_hash) = r?; - Ok(receipt - .logs - .into_iter() - .map(|log| match log { - Log::Evm(log) => log, - Log::Scilla(log) => log.into_evm(), - }) - .enumerate() - .map(move |(i, l)| (l, i, txn_index, txn_hash, block_number, block_hash))) - }) - .flatten_ok() - .filter_ok(|(log, _, _, _, _, _)| { - params - .address - .as_ref() - .map(|a| a.contains(&log.address)) - .unwrap_or(true) - }) - .filter_ok(|(log, _, _, _, _, _)| { - params - .topics - .iter() - .zip(log.topics.iter()) - .all(|(filter_topic, log_topic)| { - filter_topic.is_empty() || filter_topic.contains(log_topic) - }) - }); - - // Finally convert the iterator to our response format. - let logs = logs.map(|l: Result<_>| { - let (log, log_index, txn_index, txn_hash, block_number, block_hash) = l?; - Ok(eth::Log::new( - log, - log_index, - txn_index, - txn_hash, - block_number, - block_hash, - )) - }); + let mut logs = vec![]; + + for block in blocks { + let block = block?; + + for (txn_index, txn_hash) in block.transactions.iter().enumerate() { + let receipt = node + .get_transaction_receipt(*txn_hash)? + .ok_or(anyhow!("missing receipt"))?; + + for (log_index, log) in receipt.logs.into_iter().enumerate() { + let log = match log { + Log::Evm(l) => l, + Log::Scilla(l) => l.into_evm(), + }; + + if !params + .address + .as_ref() + .map(|a| a.contains(&log.address)) + .unwrap_or(true) + { + continue; + } + + if !params + .topics + .iter() + .zip(log.topics.iter()) + .all(|(filter_topic, log_topic)| { + filter_topic.is_empty() || filter_topic.contains(log_topic) + }) + { + continue; + } + + logs.push(eth::Log::new( + log, + log_index, + txn_index, + *txn_hash, + block.number(), + block.hash(), + )); + } + } + } - logs.collect() + Ok(logs) } fn get_transaction_by_block_hash_and_index( diff --git a/zilliqa/src/api/zilliqa.rs b/zilliqa/src/api/zilliqa.rs index e587597cf..b06cb44c2 100644 --- a/zilliqa/src/api/zilliqa.rs +++ b/zilliqa/src/api/zilliqa.rs @@ -467,7 +467,7 @@ fn get_latest_tx_block(_: Params, node: &Arc>) -> Result>) -> Result>) -> Result Result { - Ok(node - .get_transaction_receipts_in_block(hash)? +fn get_txn_fees_for_block(node: &Node, block: &Block) -> Result { + let read = node.db.read()?; + let transactions = read.transactions()?; + let receipts = read.receipts()?; + block + .transactions .iter() - .fold(0, |acc, txnrcpt| { - let txn = node - .get_transaction_by_hash(txnrcpt.tx_hash) - .unwrap() - .unwrap(); - acc + ((txnrcpt.gas_used.0 as u128) * txn.tx.gas_price_per_evm_gas()) - })) + .map(|txn_hash| { + let txn = transactions + .get(*txn_hash)? + .ok_or_else(|| anyhow!("missing transaction"))?; + let receipt = receipts + .get(*txn_hash)? + .ok_or_else(|| anyhow!("missing receipt"))?; + Ok((receipt.gas_used.0 as u128) * txn.gas_price_per_evm_gas()) + }) + .sum() } // GetTxBlockVerbose @@ -757,7 +763,7 @@ fn get_tx_block_verbose( let proposer = node .get_proposer_reward_address(block.header)? .expect("No proposer"); - let txn_fees = get_txn_fees_for_block(&node, block.hash())?; + let txn_fees = get_txn_fees_for_block(&node, &block)?; let block: zil::TxBlockVerbose = zil::TxBlockVerbose::new(&block, txn_fees, proposer); Ok(Some(block)) diff --git a/zilliqa/src/bin/explore-db.rs b/zilliqa/src/bin/explore-db.rs new file mode 100644 index 000000000..b53fbb2db --- /dev/null +++ b/zilliqa/src/bin/explore-db.rs @@ -0,0 +1,147 @@ +use std::path::PathBuf; + +use anyhow::{anyhow, Result}; +use bytesize::ByteSize; +use clap::{Parser, Subcommand}; +use redb::{DatabaseStats, MultimapTableHandle, ReadableTableMetadata, TableHandle, TableStats}; +use zilliqa::{crypto::Hash, db::Db}; + +#[derive(Debug, Parser)] +struct Args { + data_dir: PathBuf, + shard_id: u64, + #[clap(subcommand)] + command: Command, +} + +#[derive(Debug, Subcommand)] +enum Command { + Stats, + Compact, + #[clap(subcommand)] + Query(Query), +} + +#[derive(Debug, Subcommand)] +enum Query { + Block { query: String }, + Blocks, +} + +fn print_db_stats(stats: DatabaseStats) { + eprintln!("database stats"); + eprintln!("tree_height: {}", stats.tree_height()); + eprintln!("allocated_pages: {}", stats.allocated_pages()); + eprintln!("leaf_pages: {}", stats.leaf_pages()); + eprintln!("branch_pages: {}", stats.branch_pages()); + eprintln!( + "stored_bytes: {}", + ByteSize::b(stats.stored_bytes()).to_string_as(true) + ); + eprintln!( + "metadata_bytes: {}", + ByteSize::b(stats.metadata_bytes()).to_string_as(true) + ); + eprintln!( + "fragmented_bytes: {}", + ByteSize::b(stats.fragmented_bytes()).to_string_as(true) + ); + eprintln!( + "page_size: {}", + ByteSize::b(stats.page_size() as u64).to_string_as(true) + ); + eprintln!(); +} + +fn print_table_stats(name: String, len: u64, stats: TableStats) { + eprintln!("{name} table stats"); + eprintln!("length: {len}"); + eprintln!("tree_height: {}", stats.tree_height()); + eprintln!("leaf_pages: {}", stats.leaf_pages()); + eprintln!("branch_pages: {}", stats.branch_pages()); + eprintln!( + "stored_bytes: {}", + ByteSize::b(stats.stored_bytes()).to_string_as(true) + ); + eprintln!( + "metadata_bytes: {}", + ByteSize::b(stats.metadata_bytes()).to_string_as(true) + ); + eprintln!( + "fragmented_bytes: {}", + ByteSize::b(stats.fragmented_bytes()).to_string_as(true) + ); + eprintln!(); +} + +fn main() -> Result<()> { + tracing_subscriber::fmt::init(); + + let args = Args::parse(); + let db = Db::new(Some(args.data_dir), args.shard_id, 0)?; + + match args.command { + Command::Stats => { + let db = db.into_raw(); + let write = db.begin_write()?; + + print_db_stats(write.stats()?); + + let tables: Vec<_> = write.list_tables()?.collect(); + let multimap_tables: Vec<_> = write.list_multimap_tables()?.collect(); + + write.abort()?; + + let read = db.begin_read()?; + for table in tables { + let name = table.name().to_owned(); + let table = read.open_untyped_table(table)?; + print_table_stats(name, table.len()?, table.stats()?); + } + + for table in multimap_tables { + let name = table.name().to_owned(); + let table = read.open_untyped_multimap_table(table)?; + print_table_stats(name, table.len()?, table.stats()?); + } + } + Command::Compact => { + let mut db = db.into_raw(); + db.compact()?; + } + Command::Query(Query::Block { query }) => { + let read = db.read()?; + let blocks = read.blocks()?; + + if let Some(query) = query.strip_prefix("0x") { + let hash = Hash::from_bytes(hex::decode(query)?)?; + let block = blocks + .by_hash(hash)? + .ok_or_else(|| anyhow!("missing block"))?; + println!("{block:?}"); + } else { + let height: u64 = query.parse()?; + let block = blocks + .canonical_by_height(height)? + .ok_or_else(|| anyhow!("missing block"))?; + println!("{block:?}"); + } + } + Command::Query(Query::Blocks) => { + let read = db.read()?; + let blocks = read.blocks()?; + + for block in blocks.iter()? { + let block = block?; + println!( + "view={}, height={}, hash={:?}", + block.view(), + block.number(), + block.hash() + ); + } + } + } + + Ok(()) +} diff --git a/zilliqa/src/block_store.rs b/zilliqa/src/block_store.rs index fa24c85da..5b7036f05 100644 --- a/zilliqa/src/block_store.rs +++ b/zilliqa/src/block_store.rs @@ -141,12 +141,11 @@ impl BlockCache { pub fn destructive_proposals_from_parent_hashes( &mut self, - hashes: &[Hash], + hashes: impl Iterator, ) -> Vec<(PeerId, Proposal)> { // For each hash, find the list of blocks that have it as the parent. let cache_keys = hashes - .iter() - .filter_map(|x| self.by_parent_hash.remove(x)) + .filter_map(|x| self.by_parent_hash.remove(&x)) .flatten() .collect::>(); let maybe = cache_keys @@ -485,64 +484,6 @@ impl PeerInfo { } } -/// Data about a peer -#[derive(Serialize, Deserialize, Clone, Debug)] -pub struct PeerInfoStatus { - availability: BlockAvailability, - availability_updated_at: Option, - pending_requests: Vec<(String, SystemTime, u64, u64)>, - last_request_failed_at: Option, -} - -/// Data about the block store, used for debugging. -#[derive(Serialize, Deserialize, Clone, Debug)] -pub struct BlockStoreStatus { - highest_known_view: u64, - views_held: Vec>, - peers: Vec<(String, PeerInfoStatus)>, - availability: Option>, -} - -impl BlockStoreStatus { - pub fn new(block_store: &mut BlockStore) -> Result { - let peers = block_store - .peers - .iter() - .map(|(k, v)| (format!("{:?}", k), PeerInfoStatus::new(v))) - .collect::>(); - Ok(Self { - highest_known_view: block_store.highest_known_view, - views_held: block_store.db.get_view_ranges()?, - peers, - availability: block_store.availability()?, - }) - } -} - -impl PeerInfoStatus { - // Annoyingly, this can't (easily) be allowed to fail without making generating debug info hard. - fn new(info: &PeerInfo) -> Self { - fn s_from_time(q: Option) -> Option { - q.map(|z| { - z.duration_since(SystemTime::UNIX_EPOCH) - .unwrap_or(Duration::ZERO) - .as_secs() - }) - } - let pending_requests = info - .pending_requests - .iter() - .map(|(k, v)| (format!("{:?}", k), v.0, v.1, v.2)) - .collect::>(); - Self { - availability: info.availability.clone(), - availability_updated_at: s_from_time(info.availability_updated_at), - pending_requests, - last_request_failed_at: s_from_time(info.last_request_failed_at), - } - } -} - impl BlockAvailability { pub fn new() -> Self { Self { @@ -554,13 +495,14 @@ impl BlockAvailability { impl BlockStore { pub fn new(config: &NodeConfig, db: Arc, message_sender: MessageSender) -> Result { - let available_blocks = - db.get_view_ranges()? - .iter() - .fold(RangeMap::new(), |mut range_map, range| { - range_map.with_range(range); - range_map - }); + let read = db.read()?; + let blocks = read.blocks()?; + let min = blocks.min_by_view()?.map(|b| b.view()).unwrap_or_default(); + let max = blocks + .max_canonical_by_view()? + .map(|b| b.view()) + .unwrap_or_default(); + let available_blocks = RangeMap::from_closed_interval(min, max); Ok(BlockStore { db, block_cache: Arc::new(RwLock::new(LruCache::new(NonZeroUsize::new(5).unwrap()))), @@ -975,7 +917,7 @@ impl BlockStore { if let Some(block) = block_cache.get(&hash) { return Ok(Some(block.clone())); } - let Some(block) = self.db.get_block_by_hash(&hash)? else { + let Some(block) = self.db.read()?.blocks()?.by_hash(hash)? else { return Ok(None); }; block_cache.put(hash, block.clone()); @@ -983,18 +925,15 @@ impl BlockStore { } pub fn get_block_by_view(&self, view: u64) -> Result> { - let Some(hash) = self.db.get_block_hash_by_view(view)? else { - return Ok(None); - }; - self.get_block(hash) + self.db.read()?.blocks()?.by_view(view) } - pub fn get_highest_canonical_block_number(&self) -> Result> { - self.db.get_highest_canonical_block_number() + pub fn get_highest_block(&self) -> Result> { + self.db.read()?.blocks()?.max_canonical_by_view() } pub fn get_canonical_block_by_number(&self, number: u64) -> Result> { - self.db.get_canonical_block_by_number(number) + self.db.read()?.blocks()?.canonical_by_height(number) } /// Called to process a block which can be added to the chain. @@ -1011,7 +950,9 @@ impl BlockStore { block: Block, ) -> Result> { trace!(?from, number = block.number(), hash = ?block.hash(), "block_store::process_block() : starting"); - self.db.insert_block(&block)?; + let write = self.db.write()?; + write.blocks()?.insert(&block)?; + write.commit()?; self.available_blocks.with_elem(block.view()); if let Some(from) = from { @@ -1025,7 +966,7 @@ impl BlockStore { // There are two sets let result = self .buffered - .destructive_proposals_from_parent_hashes(&[block.hash()]); + .destructive_proposals_from_parent_hashes(std::iter::once(block.hash())); // Update highest_confirmed_view, but don't trim the cache if // we're not changing anything. @@ -1058,24 +999,20 @@ impl BlockStore { self.peers.entry(peer).or_insert_with(PeerInfo::new) } - pub fn forget_block_range(&mut self, blocks: Range) -> Result<()> { - self.db.forget_block_range(blocks) - } - - pub fn contains_block(&mut self, block_hash: &Hash) -> Result { - self.db.contains_block(block_hash) + pub fn contains_block(&mut self, view: u64) -> Result { + self.db.read()?.blocks()?.contains(view) } // Retrieve the plausible next blocks for the block with this hash // Because of forks there might be many of these. pub fn obtain_child_block_candidates_for( &mut self, - hashes: &[Hash], + blocks: &[Block], ) -> Result> { // The easy case is that there's something in the buffer with us as its parent hash. let with_parent_hashes = self .buffered - .destructive_proposals_from_parent_hashes(hashes); + .destructive_proposals_from_parent_hashes(blocks.iter().map(|b| b.hash())); if with_parent_hashes.is_empty() { // There isn't. There are three cases: // @@ -1090,7 +1027,13 @@ impl BlockStore { // In any case, deleting any cached block that calls itself the next block is // the right thing to do - if it really was the next block, we would not be // executing this branch. - if let Some(highest_block_number) = self.db.get_highest_canonical_block_number()? { + if let Some(highest_block_number) = self + .db + .read()? + .blocks()? + .max_canonical_by_view()? + .map(|b| b.number()) + { self.buffered.delete_blocks_up_to(highest_block_number + 1); trace!( "block_store::obtain_child_block_candidates : deleted cached blocks up to and including {0}", @@ -1100,10 +1043,15 @@ impl BlockStore { let fork_elems = self.buffered.inc_fork_counter() * (1 + constants::EXAMINE_BLOCKS_PER_FORK_COUNT); - let parent_hashes = self.db.get_highest_block_hashes(fork_elems)?; + let parents = self + .db + .read()? + .blocks()? + .max_canonical_by_view_count(fork_elems)?; + let parent_hashes = parents.iter().map(|b| b.hash()); let revised = self .buffered - .destructive_proposals_from_parent_hashes(&parent_hashes); + .destructive_proposals_from_parent_hashes(parent_hashes); if !revised.is_empty() { // Found some! self.buffered.reset_fork_counter(); @@ -1120,7 +1068,9 @@ impl BlockStore { self.obtain_child_block_candidates_for( &self .db - .get_highest_block_hashes(constants::EXAMINE_BLOCKS_PER_FORK_COUNT)?, + .read()? + .blocks()? + .max_canonical_by_view_count(constants::EXAMINE_BLOCKS_PER_FORK_COUNT)?, ) } @@ -1168,8 +1118,8 @@ impl BlockStore { Ok(()) } - pub fn get_num_transactions(&self) -> Result { - let count = self.db.get_total_transaction_count()?; + pub fn get_num_transactions(&self) -> Result { + let count = self.db.read()?.transactions()?.count()?; Ok(count) } @@ -1187,13 +1137,10 @@ impl BlockStore { /// Returns (am_syncing, current_highest_block) pub fn am_syncing(&self) -> Result<(bool, Block)> { - let current_block = self - .db - .get_canonical_block_by_number( - self.db - .get_highest_canonical_block_number()? - .ok_or_else(|| anyhow!("no highest block"))?, - )? + let read = self.db.read()?; + let current_block = read + .blocks()? + .max_canonical_by_view()? .ok_or_else(|| anyhow!("missing highest block"))?; Ok(( (self.highest_known_view + 2) > current_block.view(), diff --git a/zilliqa/src/cfg.rs b/zilliqa/src/cfg.rs index c751d99ec..806c6c959 100644 --- a/zilliqa/src/cfg.rs +++ b/zilliqa/src/cfg.rs @@ -87,9 +87,9 @@ pub struct NodeConfig { /// The location of persistence data. If not set, uses a temporary path. #[serde(default)] pub data_dir: Option, - /// Size of the in-memory state trie cache, in bytes. Defaults to 256 MiB. - #[serde(default = "state_cache_size_default")] - pub state_cache_size: usize, + /// Size of the in-memory database cache, in bytes. Defaults to 512 MiB. + #[serde(default = "cache_size_default")] + pub cache_size: usize, /// Persistence checkpoint to load. #[serde(default)] pub load_checkpoint: Option, @@ -128,7 +128,7 @@ impl Default for NodeConfig { consensus: ConsensusConfig::default(), allowed_timestamp_skew: allowed_timestamp_skew_default(), data_dir: None, - state_cache_size: state_cache_size_default(), + cache_size: cache_size_default(), load_checkpoint: None, do_checkpoints: false, block_request_limit: block_request_limit_default(), @@ -192,8 +192,8 @@ pub fn allowed_timestamp_skew_default() -> Duration { Duration::from_secs(60) } -pub fn state_cache_size_default() -> usize { - 256 * 1024 * 1024 // 256 MiB +pub fn cache_size_default() -> usize { + 512 * 1024 * 1024 // 512 MiB } pub fn eth_chain_id_default() -> u64 { diff --git a/zilliqa/src/consensus.rs b/zilliqa/src/consensus.rs index 96c266693..9807fab6d 100644 --- a/zilliqa/src/consensus.rs +++ b/zilliqa/src/consensus.rs @@ -26,7 +26,7 @@ use crate::{ constants::TIME_TO_ALLOW_PROPOSAL_BROADCAST, contracts, crypto::{verify_messages, BlsSignature, Hash, NodePublicKey, SecretKey}, - db::{self, Db}, + db::{self, ArcDb, Db}, exec::{PendingState, TransactionApplyResult}, inspector::{self, ScillaInspector, TouchedAddressInspector}, message::{ @@ -211,7 +211,9 @@ impl Consensus { let block_store = BlockStore::new(&config, db.clone(), message_sender.clone())?; let latest_block = db - .get_finalized_view()? + .read()? + .finalized_view()? + .get()? .map(|view| { block_store .get_block_by_view(view)? @@ -245,22 +247,28 @@ impl Consensus { }; let (start_view, finalized_view, high_qc) = { - match db.get_high_qc()? { - Some(qc) => { + match db.read()?.high_qc()?.get()? { + Some((qc, _)) => { let high_block = block_store .get_block(qc.block_hash)? .ok_or_else(|| anyhow!("missing block that high QC points to!"))?; let finalized_number = db - .get_finalized_view()? + .read()? + .finalized_view()? + .get()? .ok_or_else(|| anyhow!("missing latest finalized view!"))?; let finalized_block = db - .get_block_by_view(finalized_number)? + .read()? + .blocks()? + .by_view(finalized_number)? .ok_or_else(|| anyhow!("missing finalized block!"))?; // If latest view was written to disk then always start from there. Otherwise start from (highest out of high block and finalised block) + 1 let start_view = db - .get_view()? + .read()? + .view()? + .get()? .or_else(|| { Some(std::cmp::max(high_block.view(), finalized_block.view()) + 1) }) @@ -283,14 +291,13 @@ impl Consensus { // If we have newer blocks, erase them // @todo .. more elegantly :-) + let write = db.write()?; loop { - let highest_block_number = db - .get_highest_canonical_block_number()? - .ok_or_else(|| anyhow!("can't find highest block num in database!"))?; - - let head_block = block_store - .get_canonical_block_by_number(highest_block_number)? + let head_block = write + .blocks()? + .max_canonical_by_view()? .ok_or_else(|| anyhow!("missing head block!"))?; + let highest_block_number = head_block.number(); trace!( "recovery: highest_block_number {highest_block_number} view {0}", head_block.view() @@ -299,13 +306,16 @@ impl Consensus { if head_block.view() > high_block.view() && head_block.view() > finalized_number { + for txn_hash in &head_block.transactions { + write.delete_transaction(*txn_hash)?; + } + write.blocks()?.delete(head_block.view())?; trace!("recovery: stored block {0} reverted", highest_block_number); - db.remove_transactions_executed_in_block(&head_block.hash())?; - db.remove_block(&head_block)?; } else { break; } } + write.commit()?; info!( "During recovery, starting consensus at view {}, finalised view {}", @@ -346,8 +356,10 @@ impl Consensus { new_transactions: broadcast::Sender::new(128), new_transaction_hashes: broadcast::Sender::new(128), }; - consensus.db.set_view(start_view)?; - consensus.set_finalized_view(finalized_view)?; + let write = consensus.db.write()?; + write.view()?.set(start_view)?; + write.finalized_view()?.set(finalized_view)?; + write.commit()?; // If we're at genesis, add the genesis block and return if latest_block_view == 0 { @@ -378,7 +390,9 @@ impl Consensus { // If timestamp of when current high_qc was written exists then use it to estimate the minimum number of blocks the network has moved on since shut down // This is useful in scenarios in which consensus has failed since this node went down - if let Some(latest_high_qc_timestamp) = consensus.db.get_high_qc_updated_at()? { + if let Some(latest_high_qc_timestamp) = + consensus.db.read()?.high_qc()?.get()?.map(|(_, t)| t) + { let view_diff = Consensus::minimum_views_in_time_difference( latest_high_qc_timestamp.elapsed()?, consensus.config.consensus.consensus_timeout, @@ -391,7 +405,9 @@ impl Consensus { view_diff, min_view_since_high_qc_updated ); - consensus.db.set_view(min_view_since_high_qc_updated)?; + let write = consensus.db.write()?; + write.view()?.set(min_view_since_high_qc_updated)?; + write.commit()?; } // Remind block_store of our peers and request any potentially missing blocks @@ -457,13 +473,12 @@ impl Consensus { } pub fn head_block(&self) -> Block { - let highest_block_number = self - .block_store - .get_highest_canonical_block_number() + self.db + .read() .unwrap() - .unwrap(); - self.block_store - .get_canonical_block_by_number(highest_block_number) + .blocks() + .unwrap() + .max_canonical_by_view() .unwrap() .unwrap() } @@ -645,7 +660,7 @@ impl Consensus { block.hash() ); - if self.block_store.contains_block(&block.hash())? { + if self.block_store.contains_block(block.view())? { trace!("ignoring block proposal, block store contains this block already"); return Ok(None); } @@ -987,7 +1002,7 @@ impl Consensus { return Err(anyhow!("Otterscan indices are disabled")); } - self.db.get_touched_transactions(address) + self.db.read()?.touched_address_index()?.get(address) } /// Clear up anything in memory that is no longer required. This is to avoid memory leaks. @@ -1293,14 +1308,13 @@ impl Consensus { let (qc, parent) = match agg { // Create dummy QC for now if aggQC not provided None => { - // Start with highest canonical block - let num = self - .db - .get_highest_canonical_block_number()? - .context("no canonical blocks")?; // get highest canonical block number + // Start with highest block let block = self - .get_canonical_block_by_number(num)? - .context("missing canonical block")?; // retrieve highest canonical block + .db + .read()? + .blocks()? + .max_canonical_by_view()? + .ok_or(anyhow!("no blocks"))?; ( QuorumCertificate::new_with_identity(block.hash(), block.view()), block, @@ -1466,13 +1480,14 @@ impl Consensus { } } let (_, applied_txs, _, _) = self.early_proposal.as_ref().unwrap(); - self.db.with_sqlite_tx(|sqlite_tx| { + let write = self.db.write()?; + { + let mut transactions = write.transactions()?; for tx in applied_txs { - self.db - .insert_transaction_with_db_tx(sqlite_tx, &tx.hash, &tx.tx)?; + transactions.insert(tx.hash, &tx.tx)?; } - Ok(()) - })?; + } + write.commit()?; // Grab and update early_proposal data in own scope to avoid multiple mutable references to Self { @@ -1531,14 +1546,13 @@ impl Consensus { } /// Assembles a Pending block. fn assemble_pending_block_at(&self, state: &mut State) -> Result> { - // Start with highest canonical block - let num = self - .db - .get_highest_canonical_block_number()? - .context("no canonical blocks")?; // get highest canonical block number + // Start with highest block let block = self - .get_canonical_block_by_number(num)? - .context("missing canonical block")?; // retrieve highest canonical block + .db + .read()? + .blocks()? + .max_canonical_by_view()? + .ok_or(anyhow!("no blocks"))?; // Generate early QC let early_qc = QuorumCertificate::new_with_identity(block.hash(), block.view()); @@ -1599,7 +1613,9 @@ impl Consensus { inspector::noop(), false, )?; - self.db.insert_transaction(&txn.hash, &txn.tx)?; + let write = self.db.write()?; + write.transactions()?.insert(txn.hash, &txn.tx)?; + write.commit()?; // Skip transactions whose execution resulted in an error let Some(result) = result else { @@ -1904,7 +1920,7 @@ impl Consensus { /// Returns (flag, outcome). /// flag is true if the transaction was newly added to the pool - ie. if it validated correctly and has not been seen before. pub fn new_transaction(&mut self, txn: VerifiedTransaction) -> Result { - if self.db.contains_transaction(&txn.hash)? { + if self.db.read()?.transactions()?.contains(txn.hash)? { debug!("Transaction {:?} already in mempool", txn.hash); return Ok(TxAddResult::Duplicate(txn.hash)); } @@ -1961,20 +1977,16 @@ impl Consensus { pub fn get_transaction_by_hash(&self, hash: Hash) -> Result> { Ok(self .db - .get_transaction(&hash)? + .read()? + .transactions()? + .get(hash)? .map(|tx| tx.verify()) .transpose()? .or_else(|| self.transaction_pool.get_transaction(hash).cloned())) } pub fn get_transaction_receipt(&self, hash: &Hash) -> Result> { - let Some(block_hash) = self.db.get_block_hash_reverse_index(hash)? else { - return Ok(None); - }; - let block_receipts = self.db.get_transaction_receipts_in_block(&block_hash)?; - Ok(block_receipts - .into_iter() - .find(|receipt| receipt.tx_hash == *hash)) + self.db.read()?.receipts()?.get(*hash) } fn update_high_qc_and_view( @@ -1993,7 +2005,9 @@ impl Consensus { if self.high_qc.block_hash == Hash::ZERO { trace!("received high qc, self high_qc is currently uninitialized, setting to the new one."); - self.db.set_high_qc(new_high_qc)?; + let write = self.db.write()?; + write.high_qc()?.set(&new_high_qc)?; + write.commit()?; self.high_qc = new_high_qc; } else { let current_high_qc_view = self @@ -2010,7 +2024,9 @@ impl Consensus { current_view = view, "updating high qc" ); - self.db.set_high_qc(new_high_qc)?; + let write = self.db.write()?; + write.high_qc()?.set(&new_high_qc)?; + write.commit()?; self.high_qc = new_high_qc; if new_high_qc_view >= view { self.set_view(new_high_qc_view + 1)?; @@ -2156,7 +2172,16 @@ impl Consensus { ); self.set_finalized_view(block.view())?; - let receipts = self.db.get_transaction_receipts_in_block(&block.hash())?; + let read = self.db.read()?; + let receipts: Vec<_> = block + .transactions + .iter() + .map(|txn_hash| { + read.receipts()? + .get(*txn_hash)? + .ok_or(anyhow!("missing receipt")) + }) + .collect::>()?; for (destination_shard, intershard_call) in blockhooks::get_cross_shard_messages(&receipts)? { @@ -2187,17 +2212,16 @@ impl Consensus { && self.epoch_is_checkpoint(self.epoch_number(block.number())) { if let Some(checkpoint_path) = self.db.get_checkpoint_dir()? { - let parent = self - .db - .get_block_by_hash(&block.parent_hash())? - .ok_or(anyhow!( - "Trying to checkpoint block, but we don't have its parent" - ))?; + let read = self.db.read()?; + let parent = read + .blocks()? + .by_hash(block.parent_hash())? + .ok_or(anyhow!("missing block"))?; let transactions: Vec = block .transactions .iter() .map(|txn_hash| { - let tx = self.db.get_transaction(txn_hash)?.ok_or(anyhow!( + let tx = read.transactions()?.get(*txn_hash)?.ok_or(anyhow!( "failed to fetch transaction {} for checkpoint parent {}", txn_hash, parent.hash() @@ -2229,7 +2253,9 @@ impl Consensus { .ok_or(anyhow!("No such block number {block_number}"))?; let parent = self .db - .get_block_by_hash(&block.parent_hash())? + .read()? + .blocks()? + .by_hash(block.parent_hash())? .ok_or(anyhow!( "Trying to checkpoint block, but we don't have its parent" ))?; @@ -2237,11 +2263,16 @@ impl Consensus { .transactions .iter() .map(|txn_hash| { - let tx = self.db.get_transaction(txn_hash)?.ok_or(anyhow!( - "failed to fetch transaction {} for checkpoint parent {}", - txn_hash, - parent.hash() - ))?; + let tx = self + .db + .read()? + .transactions()? + .get(*txn_hash)? + .ok_or(anyhow!( + "failed to fetch transaction {} for checkpoint parent {}", + txn_hash, + parent.hash() + ))?; Ok::<_, anyhow::Error>(tx) }) .collect::>>()?; @@ -2541,18 +2572,21 @@ impl Consensus { } fn set_finalized_view(&mut self, view: u64) -> Result<()> { - self.db.set_finalized_view(view) + let write = self.db.write()?; + write.finalized_view()?.set(view)?; + write.commit() } pub fn get_finalized_view(&self) -> Result { - Ok(self.db.get_finalized_view()?.unwrap_or_else(|| { + Ok(self.db.read()?.finalized_view()?.get()?.unwrap_or_else(|| { warn!("no finalised view found in table. Defaulting to 0"); 0 })) } fn set_view(&mut self, view: u64) -> Result<()> { - if self.db.set_view(view)? { + let write = self.db.write()?; + if write.view()?.set(view)? { self.view_updated_at = SystemTime::now(); } else { warn!( @@ -2560,11 +2594,11 @@ impl Consensus { view ); } - Ok(()) + write.commit() } pub fn get_view(&self) -> Result { - Ok(self.db.get_view()?.unwrap_or_else(|| { + Ok(self.db.read()?.view()?.get()?.unwrap_or_else(|| { warn!("no view found in table. Defaulting to 0"); 0 })) @@ -2811,28 +2845,24 @@ impl Consensus { // Then, revert the blocks from the head block to the common ancestor // Then, apply the blocks (forward) from the common ancestor to the parent of the new block let mut head = self.head_block(); - let mut head_height = head.number(); let mut proposed_block = block.clone(); - let mut proposed_block_height = block.number(); trace!( "Dealing with fork: from block {} (height {}), back to block {} (height {})", head.hash(), - head_height, + head.number(), proposed_block.hash(), - proposed_block_height + proposed_block.number(), ); // Need to make sure both pointers are at the same height - while head_height > proposed_block_height { + while head.number() > proposed_block.number() { trace!("Stepping back head block pointer"); head = self.get_block(&head.parent_hash())?.unwrap(); - head_height = head.number(); } - while proposed_block_height > head_height { + while proposed_block.number() > head.number() { trace!("Stepping back proposed block pointer"); proposed_block = self.get_block(&proposed_block.parent_hash())?.unwrap(); - proposed_block_height = proposed_block.number(); } // We now have both hash pointers at the same height, we can walk back until they are equal. @@ -2876,6 +2906,7 @@ impl Consensus { self.transaction_pool.insert_transaction(txn, account_nonce); } + let write = self.db.write()?; // block transactions need to be removed from self.transactions and re-injected for tx_hash in &head_block.transactions { let orig_tx = self.get_transaction_by_hash(*tx_hash)?.unwrap(); @@ -2884,13 +2915,13 @@ impl Consensus { let account_nonce = self.state.get_account(orig_tx.signer)?.nonce; self.transaction_pool .insert_transaction(orig_tx, account_nonce); + // purge from the db + write.delete_transaction(*tx_hash)?; } - // then purge them all from the db, including receipts and indexes - self.db - .remove_transactions_executed_in_block(&head_block.hash())?; // this block is no longer in the main chain - self.db.mark_block_as_non_canonical(head_block.hash())?; + write.blocks()?.set_non_canonical(head_block.view())?; + write.commit()?; } // Now, we execute forward from the common ancestor to the new block parent which can @@ -2966,6 +2997,7 @@ impl Consensus { let mut block_receipts = Vec::new(); + let write = self.db.write()?; for (tx_index, txn_hash) in block.transactions.iter().enumerate() { let (receipt, addresses) = self .receipts_cache @@ -2977,9 +3009,10 @@ impl Consensus { // Apply 'touched-address' from cache for address in addresses { - self.db.add_touched_address(address, *txn_hash)?; + write.touched_address_index()?.insert(address, *txn_hash)?; } } + write.commit()?; // fast-forward state self.state.set_to_root(block.state_root_hash().into()); @@ -3060,17 +3093,18 @@ impl Consensus { debug!(?receipt, "applied transaction {:?}", receipt); block_receipts.push((receipt, tx_index)); } - self.db.with_sqlite_tx(|sqlite_tx| { + let write = self.db.write()?; + { + let mut transactions = write.transactions()?; for txn in &verified_txns { - self.db - .insert_transaction_with_db_tx(sqlite_tx, &txn.hash, &txn.tx)?; + transactions.insert(txn.hash, &txn.tx)?; } + let mut touched_address_index = write.touched_address_index()?; for (addr, txn_hash) in touched_addresses { - self.db - .add_touched_address_with_db_tx(sqlite_tx, addr, txn_hash)?; + touched_address_index.insert(addr, txn_hash)?; } - Ok(()) - })?; + } + write.commit()?; if cumulative_gas_used != block.gas_used() { warn!("Cumulative gas used by executing all transactions: {cumulative_gas_used} is different that the one provided in the block: {}", block.gas_used()); @@ -3157,25 +3191,20 @@ impl Consensus { // Important - only add blocks we are going to execute because they can potentially // overwrite the mapping of block height to block, which there should only be one of. // for example, this HAS to be after the deal with fork call - if !self.db.contains_block(&block.hash())? { + if !self.db.read()?.blocks()?.contains(block.view())? { // Only tell the block store where this block came from if it wasn't from ourselves. let from = from.filter(|peer_id| *peer_id != self.peer_id()); // If we were the proposer we would've already processed the block, hence the check self.add_block(from, block.clone())?; } - { - // helper scope to shadow db, to avoid moving it into the closure - // closure has to be move to take ownership of block_receipts - let db = &self.db; - self.db.with_sqlite_tx(move |sqlite_tx| { - for (receipt, _) in block_receipts { - db.insert_transaction_receipt_with_db_tx(sqlite_tx, receipt)?; - } - Ok(()) - })?; + + let write = self.db.write()?; + for (receipt, _) in block_receipts { + write.receipts()?.insert(&receipt)?; } - self.db.mark_block_as_canonical(block.hash())?; + write.blocks()?.set_canonical(block.view())?; + write.commit()?; Ok(()) } diff --git a/zilliqa/src/db.rs b/zilliqa/src/db.rs deleted file mode 100644 index 7e8c99796..000000000 --- a/zilliqa/src/db.rs +++ /dev/null @@ -1,1435 +0,0 @@ -use std::{ - collections::BTreeMap, - fmt::Debug, - fs::{self, File, OpenOptions}, - io::{BufReader, BufWriter, Read, Seek, SeekFrom, Write}, - ops::Range, - path::{Path, PathBuf}, - sync::{Arc, Mutex}, - time::Duration, -}; - -use alloy::primitives::Address; -use anyhow::{anyhow, Context, Result}; -use eth_trie::{EthTrie, MemoryDB, Trie, DB}; -use itertools::Itertools; -use lru_mem::LruCache; -use lz4::{Decoder, EncoderBuilder}; -use rusqlite::{ - named_params, - types::{FromSql, FromSqlError, ToSqlOutput}, - Connection, OptionalExtension, Row, ToSql, -}; -use serde::{Deserialize, Serialize}; -use tracing::{debug, warn}; - -use crate::{ - crypto::{BlsSignature, Hash}, - exec::{ScillaError, ScillaException, ScillaTransition}, - message::{AggregateQc, Block, BlockHeader, QuorumCertificate}, - state::Account, - time::SystemTime, - transaction::{EvmGas, Log, SignedTransaction, TransactionReceipt}, -}; - -macro_rules! sqlify_with_bincode { - ($type: ty) => { - impl ToSql for $type { - fn to_sql(&self) -> rusqlite::Result> { - let data = bincode::serialize(self) - .map_err(|e| rusqlite::Error::ToSqlConversionFailure(e))?; - Ok(ToSqlOutput::from(data)) - } - } - impl FromSql for $type { - fn column_result( - value: rusqlite::types::ValueRef<'_>, - ) -> rusqlite::types::FromSqlResult { - let blob = value.as_blob()?; - bincode::deserialize(blob).map_err(|e| FromSqlError::Other(e)) - } - } - }; -} - -/// Creates a thin wrapper for a type with proper From traits. To ease implementing To/FromSql on -/// foreign types. -macro_rules! make_wrapper { - ($old: ty, $new: ident) => { - paste::paste! { - #[derive(Serialize, Deserialize)] - struct $new($old); - - impl From<$old> for $new { - fn from(value: $old) -> Self { - Self(value) - } - } - - impl From<$new> for $old { - fn from(value: $new) -> Self { - value.0 - } - } - } - }; -} - -sqlify_with_bincode!(AggregateQc); -sqlify_with_bincode!(QuorumCertificate); -sqlify_with_bincode!(BlsSignature); -sqlify_with_bincode!(SignedTransaction); - -make_wrapper!(Vec, VecScillaExceptionSqlable); -sqlify_with_bincode!(VecScillaExceptionSqlable); -make_wrapper!(BTreeMap>, MapScillaErrorSqlable); -sqlify_with_bincode!(MapScillaErrorSqlable); - -make_wrapper!(Vec, VecLogSqlable); -sqlify_with_bincode!(VecLogSqlable); - -make_wrapper!(Vec, VecScillaTransitionSqlable); -sqlify_with_bincode!(VecScillaTransitionSqlable); - -make_wrapper!(SystemTime, SystemTimeSqlable); -impl ToSql for SystemTimeSqlable { - fn to_sql(&self) -> rusqlite::Result> { - use std::mem::size_of; - - let since_epoch = self.0.duration_since(SystemTime::UNIX_EPOCH).unwrap(); - - let mut buf = [0u8; size_of::() + size_of::()]; - - buf[..size_of::()].copy_from_slice(&since_epoch.as_secs().to_be_bytes()[..]); - buf[size_of::()..].copy_from_slice(&since_epoch.subsec_nanos().to_be_bytes()[..]); - - Ok(ToSqlOutput::from(buf.to_vec())) - } -} -impl FromSql for SystemTimeSqlable { - fn column_result(value: rusqlite::types::ValueRef<'_>) -> rusqlite::types::FromSqlResult { - use std::mem::size_of; - - let blob = value.as_blob()?; - - if blob.len() != size_of::() + size_of::() { - return Err(FromSqlError::InvalidBlobSize { - expected_size: size_of::() + size_of::(), - blob_size: blob.len(), - }); - } - - let mut secs_buf = [0u8; size_of::()]; - let mut subsec_nanos_buf = [0u8; size_of::()]; - - secs_buf.copy_from_slice(&blob[..size_of::()]); - subsec_nanos_buf.copy_from_slice(&blob[size_of::()..]); - - let secs = u64::from_be_bytes(secs_buf); - let subsec_nanos = u32::from_be_bytes(subsec_nanos_buf); - - Ok(SystemTimeSqlable( - SystemTime::UNIX_EPOCH + Duration::new(secs, subsec_nanos), - )) - } -} - -make_wrapper!(Address, AddressSqlable); -impl ToSql for AddressSqlable { - fn to_sql(&self) -> rusqlite::Result> { - Ok(ToSqlOutput::from(self.0.as_slice())) - } -} -impl FromSql for AddressSqlable { - fn column_result(value: rusqlite::types::ValueRef<'_>) -> rusqlite::types::FromSqlResult { - Ok(AddressSqlable(Address::from(<[u8; 20]>::column_result( - value, - )?))) - } -} - -impl ToSql for Hash { - fn to_sql(&self) -> rusqlite::Result> { - Ok(ToSqlOutput::from(self.0.to_vec())) - } -} -impl FromSql for Hash { - fn column_result(value: rusqlite::types::ValueRef<'_>) -> rusqlite::types::FromSqlResult { - Ok(Hash(<[u8; 32]>::column_result(value)?)) - } -} - -impl ToSql for EvmGas { - fn to_sql(&self) -> rusqlite::Result> { - self.0.to_sql() - } -} - -impl FromSql for EvmGas { - fn column_result(value: rusqlite::types::ValueRef<'_>) -> rusqlite::types::FromSqlResult { - Ok(Self(u64::column_result(value)?)) - } -} - -enum BlockFilter { - Hash(Hash), - View(u64), - Height(u64), -} - -const CHECKPOINT_HEADER_BYTES: [u8; 8] = *b"ZILCHKPT"; - -/// Version string that is written to disk along with the persisted database. This should be bumped whenever we make a -/// backwards incompatible change to our database format. This should be done rarely, since it forces all node -/// operators to re-sync. -const CURRENT_DB_VERSION: &str = "1"; - -#[derive(Debug)] -pub struct Db { - db: Arc>, - state_cache: Arc, Vec>>>, - path: Option>, -} - -impl Db { - pub fn new

(data_dir: Option

, shard_id: u64, state_cache_size: usize) -> Result - where - P: AsRef, - { - let (mut connection, path) = match data_dir { - Some(path) => { - let path = path.as_ref().join(shard_id.to_string()); - fs::create_dir_all(&path).context(format!("Unable to create {path:?}"))?; - - let mut version_file = OpenOptions::new() - .create(true) - .truncate(false) - .read(true) - .write(true) - .open(path.join("version"))?; - let mut version = String::new(); - version_file.read_to_string(&mut version)?; - - if !version.is_empty() && version != CURRENT_DB_VERSION { - return Err(anyhow!("data is incompatible with this version - please delete the data and re-sync")); - } - - version_file.seek(SeekFrom::Start(0))?; - version_file.write_all(CURRENT_DB_VERSION.as_bytes())?; - - let db_path = path.join("db.sqlite3"); - ( - Connection::open(&db_path) - .context(format!("Cannot access sqlite db {0:?}", &db_path))?, - Some(path.into_boxed_path()), - ) - } - None => (Connection::open_in_memory()?, None), - }; - - // SQLite performance tweaks - - // large page_size is more compact/efficient - connection.pragma_update(None, "page_size", 1 << 15)?; - let page_size: i32 = connection.pragma_query_value(None, "page_size", |r| r.get(0))?; - - // reduced non-critical fsync() calls - connection.pragma_update(None, "synchronous", "NORMAL")?; - let synchronous: i8 = connection.pragma_query_value(None, "synchronous", |r| r.get(0))?; - - // store temporary tables/indices in-memory - connection.pragma_update(None, "temp_store", "MEMORY")?; - let temp_store: i8 = connection.pragma_query_value(None, "temp_store", |r| r.get(0))?; - - // general read/write performance improvement - let journal_mode: String = - connection.pragma_update_and_check(None, "journal_mode", "WAL", |r| r.get(0))?; - - // retain journal size of 32MB - based on observations - let journal_size_limit: i32 = - connection - .pragma_update_and_check(None, "journal_size_limit", 1 << 25, |r| r.get(0))?; - - // cache 1-days data (256MB) in-memory - connection.pragma_update(None, "cache_size", (1 << 28) / page_size)?; - let cache_size: i32 = connection.pragma_query_value(None, "cache_size", |r| r.get(0))?; - - tracing::info!( - ?journal_mode, - ?journal_size_limit, - ?synchronous, - ?temp_store, - ?page_size, - ?cache_size, - "PRAGMA" - ); - - // Add tracing - logs all SQL statements - connection.trace(Some(|statement| tracing::trace!(statement, "sql executed"))); - - Self::ensure_schema(&connection)?; - - Ok(Db { - db: Arc::new(Mutex::new(connection)), - state_cache: Arc::new(Mutex::new(LruCache::new(state_cache_size))), - path, - }) - } - - fn ensure_schema(connection: &Connection) -> Result<()> { - connection.execute_batch( - "CREATE TABLE IF NOT EXISTS blocks ( - block_hash BLOB NOT NULL PRIMARY KEY, - view INTEGER NOT NULL UNIQUE, - height INTEGER NOT NULL, - signature BLOB NOT NULL, - state_root_hash BLOB NOT NULL, - transactions_root_hash BLOB NOT NULL, - receipts_root_hash BLOB NOT NULL, - timestamp BLOB NOT NULL, - gas_used INTEGER NOT NULL, - gas_limit INTEGER NOT NULL, - qc BLOB NOT NULL, - agg BLOB, - is_canonical BOOLEAN NOT NULL) WITHOUT ROWID; - CREATE INDEX IF NOT EXISTS idx_blocks_height ON blocks(height); - CREATE TABLE IF NOT EXISTS transactions ( - tx_hash BLOB NOT NULL PRIMARY KEY, - data BLOB NOT NULL) WITHOUT ROWID; - CREATE TABLE IF NOT EXISTS receipts ( - tx_hash BLOB NOT NULL PRIMARY KEY REFERENCES transactions (tx_hash) ON DELETE CASCADE, - block_hash BLOB NOT NULL REFERENCES blocks (block_hash), -- the touched_address_index needs to be updated for all the txs in the block, so delete txs first - thus no cascade here - tx_index INTEGER NOT NULL, - success INTEGER NOT NULL, - gas_used INTEGER NOT NULL, - cumulative_gas_used INTEGER NOT NULL, - contract_address BLOB, - logs BLOB, - transitions BLOB, - accepted INTEGER, - errors BLOB, - exceptions BLOB); - CREATE INDEX IF NOT EXISTS block_hash_index ON receipts (block_hash); - CREATE TABLE IF NOT EXISTS touched_address_index ( - address BLOB, - tx_hash BLOB REFERENCES transactions (tx_hash) ON DELETE CASCADE, - PRIMARY KEY (address, tx_hash)) WITHOUT ROWID; - CREATE TABLE IF NOT EXISTS tip_info ( - finalized_view INTEGER, - view INTEGER, - high_qc BLOB, - high_qc_updated_at BLOB, - _single_row INTEGER DEFAULT 0 NOT NULL UNIQUE CHECK (_single_row = 0)); -- max 1 row - CREATE TABLE IF NOT EXISTS state_trie (key BLOB NOT NULL PRIMARY KEY, value BLOB NOT NULL) WITHOUT ROWID; - ", - )?; - Ok(()) - } - - pub fn get_checkpoint_dir(&self) -> Result>> { - let Some(base_path) = &self.path else { - // If we don't have on-disk persistency, disable checkpoints too - warn!( - "Attempting to create checkpoint, but no persistence directory has been configured" - ); - return Ok(None); - }; - Ok(Some(base_path.join("checkpoints").into_boxed_path())) - } - - /// Fetch checkpoint data from file and initialise db state - /// Return checkpointed block and transactions which must be executed after this function - /// Return None if checkpoint already loaded - pub fn load_trusted_checkpoint>( - &self, - path: P, - hash: &Hash, - our_shard_id: u64, - ) -> Result, Block)>> { - // For now, only support a single version: you want to load the latest checkpoint, anyway. - const SUPPORTED_VERSION: u32 = 3; - - // Decompress file and write to temp file - let input_filename = path.as_ref(); - let temp_filename = input_filename.with_extension("part"); - decompress_file(input_filename, &temp_filename)?; - - // Read decompressed file - let input = File::open(&temp_filename)?; - - let mut reader = BufReader::with_capacity(128 * 1024 * 1024, input); // 128 MiB read chunks - let trie_storage = Arc::new(self.state_trie()?); - let mut state_trie = EthTrie::new(trie_storage.clone()); - - // Decode and validate header - let mut header: [u8; 21] = [0u8; 21]; - reader.read_exact(&mut header)?; - let header = header; - if header[0..8] != CHECKPOINT_HEADER_BYTES // magic bytes - || header[20] != b'\n' - // header must end in newline - { - return Err(anyhow!("Invalid checkpoint file: invalid header")); - } - let version = u32::from_be_bytes(header[8..12].try_into()?); - // Only support a single version right now. - if version != SUPPORTED_VERSION { - return Err(anyhow!("Invalid checkpoint file: unsupported version.")); - } - let shard_id = u64::from_be_bytes(header[12..20].try_into()?); - if shard_id != our_shard_id { - return Err(anyhow!("Invalid checkpoint file: wrong shard ID.")); - } - - // Decode and validate checkpoint block, its transactions and parent block - let mut block_len_buf = [0u8; std::mem::size_of::()]; - reader.read_exact(&mut block_len_buf)?; - let mut block_ser = vec![0u8; usize::try_from(u64::from_be_bytes(block_len_buf))?]; - reader.read_exact(&mut block_ser)?; - let block: Block = bincode::deserialize(&block_ser)?; - if block.hash() != *hash { - return Err(anyhow!("Checkpoint does not match trusted hash")); - } - block.verify_hash()?; - - let mut transactions_len_buf = [0u8; std::mem::size_of::()]; - reader.read_exact(&mut transactions_len_buf)?; - let mut transactions_ser = - vec![0u8; usize::try_from(u64::from_be_bytes(transactions_len_buf))?]; - reader.read_exact(&mut transactions_ser)?; - let transactions: Vec = bincode::deserialize(&transactions_ser)?; - - let mut parent_len_buf = [0u8; std::mem::size_of::()]; - reader.read_exact(&mut parent_len_buf)?; - let mut parent_ser = vec![0u8; usize::try_from(u64::from_be_bytes(parent_len_buf))?]; - reader.read_exact(&mut parent_ser)?; - let parent: Block = bincode::deserialize(&parent_ser)?; - if block.parent_hash() != parent.hash() { - return Err(anyhow!("Invalid checkpoint file: parent's blockhash does not correspond to checkpoint block")); - } - - if state_trie.iter().next().is_some() - || self.get_highest_canonical_block_number()?.is_some() - { - // If checkpointed block already exists then assume checkpoint load already complete. Return None - if self.get_block_by_hash(&block.hash())?.is_some() { - return Ok(None); - } - // This may not be strictly necessary, as in theory old values will, at worst, be orphaned - // values not part of any state trie of any known block. With some effort, this could - // even be supported. - // However, without such explicit support, having old blocks MAY in fact cause - // unexpected and unwanted behaviour. Thus we currently forbid loading a checkpoint in - // a node that already contains previous state, until (and unless) there's ever a - // usecase for going through the effort to support it and ensure it works as expected. - if let Some(db_block) = self.get_block_by_hash(&parent.hash())? { - if db_block.parent_hash() != parent.parent_hash() { - return Err(anyhow!("Inconsistent checkpoint file: block loaded from checkpoint and block stored in database with same hash have differing parent hashes")); - } else { - // In this case, the database already has the block contained in this checkpoint. We assume the - // database contains the full state for that block too and thus return early, without actually - // loading the checkpoint file. - return Ok(Some((block, transactions, parent))); - } - } else { - return Err(anyhow!("Inconsistent checkpoint file: block loaded from checkpoint file does not exist in non-empty database")); - } - } - - // Helper function used for inserting entries from memory (which backs storage trie) into persistent storage - let db_flush = |db: Arc, cache: Arc| -> Result<()> { - let mut cache_storage = cache.storage.write(); - let (keys, values): (Vec<_>, Vec<_>) = cache_storage.drain().unzip(); - debug!("Doing write to db with total items {}", keys.len()); - db.insert_batch(keys, values)?; - Ok(()) - }; - - let mut processed_accounts = 0; - let mut processed_storage_items = 0; - // This is taken directly from batch_write. However, this can be as big as we think it's reasonable to be - // (ideally multiples of `32766 / 2` so that batch writes are fully utilized) - // TODO: consider putting this const somewhere else as long as we use sql-lite - // Also see: https://www.sqlite.org/limits.html#max_variable_number - let maximum_sql_parameters = 32766 / 2; - const COMPUTE_ROOT_HASH_EVERY_ACCOUNTS: usize = 10000; - let mem_storage = Arc::new(MemoryDB::new(true)); - - // then decode state - loop { - // Read account key and the serialised Account - let mut account_hash = [0u8; 32]; - match reader.read_exact(&mut account_hash) { - // Read successful - Ok(_) => (), - // Break loop here if weve reached the end of the file - Err(ref e) if e.kind() == std::io::ErrorKind::UnexpectedEof => { - break; - } - Err(e) => return Err(e.into()), - }; - - let mut serialised_account_len_buf = [0u8; std::mem::size_of::()]; - reader.read_exact(&mut serialised_account_len_buf)?; - let mut serialised_account = - vec![0u8; usize::try_from(u64::from_be_bytes(serialised_account_len_buf))?]; - reader.read_exact(&mut serialised_account)?; - - // Read entire account storage as a buffer - let mut account_storage_len_buf = [0u8; std::mem::size_of::()]; - reader.read_exact(&mut account_storage_len_buf)?; - let account_storage_len = usize::try_from(u64::from_be_bytes(account_storage_len_buf))?; - let mut account_storage = vec![0u8; account_storage_len]; - reader.read_exact(&mut account_storage)?; - - // Pull out each storage key and value - let mut account_trie = EthTrie::new(mem_storage.clone()); - let mut pointer: usize = 0; - while account_storage_len > pointer { - let storage_key_len_buf: &[u8] = - &account_storage[pointer..(pointer + std::mem::size_of::())]; - let storage_key_len = - usize::try_from(u64::from_be_bytes(storage_key_len_buf.try_into()?))?; - pointer += std::mem::size_of::(); - let storage_key: &[u8] = &account_storage[pointer..(pointer + storage_key_len)]; - pointer += storage_key_len; - - let storage_val_len_buf: &[u8] = - &account_storage[pointer..(pointer + std::mem::size_of::())]; - let storage_val_len = - usize::try_from(u64::from_be_bytes(storage_val_len_buf.try_into()?))?; - pointer += std::mem::size_of::(); - let storage_val: &[u8] = &account_storage[pointer..(pointer + storage_val_len)]; - pointer += storage_val_len; - - account_trie.insert(storage_key, storage_val)?; - - processed_storage_items += 1; - } - - let account_trie_root = - bincode::deserialize::(&serialised_account)?.storage_root; - if account_trie.root_hash()?.as_slice() != account_trie_root { - return Err(anyhow!( - "Invalid checkpoint file: account trie root hash mismatch: calculated {}, checkpoint file contained {}", hex::encode(account_trie.root_hash()?.as_slice()), hex::encode(account_trie_root) - )); - } - if processed_storage_items > maximum_sql_parameters { - db_flush(trie_storage.clone(), mem_storage.clone())?; - processed_storage_items = 0; - } - - state_trie.insert(&account_hash, &serialised_account)?; - - processed_accounts += 1; - // Occasionally flush the cached state changes to disk to minimise memory usage. - if processed_accounts % COMPUTE_ROOT_HASH_EVERY_ACCOUNTS == 0 { - let _ = state_trie.root_hash()?; - } - } - - db_flush(trie_storage.clone(), mem_storage.clone())?; - - if state_trie.root_hash()? != parent.state_root_hash().0 { - return Err(anyhow!("Invalid checkpoint file: state root hash mismatch")); - } - - let parent_ref: &Block = &parent; // for moving into the closure - self.with_sqlite_tx(move |tx| { - self.insert_block_with_db_tx(tx, parent_ref)?; - self.set_finalized_view_with_db_tx(tx, parent_ref.view())?; - self.set_high_qc_with_db_tx(tx, block.header.qc)?; - self.set_view_with_db_tx(tx, parent_ref.view() + 1)?; - Ok(()) - })?; - - fs::remove_file(temp_filename)?; - - Ok(Some((block, transactions, parent))) - } - - pub fn state_trie(&self) -> Result { - Ok(TrieStorage { - db: self.db.clone(), - cache: self.state_cache.clone(), - }) - } - - pub fn with_sqlite_tx(&self, operations: impl FnOnce(&Connection) -> Result<()>) -> Result<()> { - let mut sqlite_tx = self.db.lock().unwrap(); - let sqlite_tx = sqlite_tx.transaction()?; - operations(&sqlite_tx)?; - Ok(sqlite_tx.commit()?) - } - - pub fn get_block_hash_by_view(&self, view: u64) -> Result> { - Ok(self - .db - .lock() - .unwrap() - .query_row_and_then( - "SELECT block_hash FROM blocks WHERE view = ?1", - [view], - |row| row.get(0), - ) - .optional()?) - } - - pub fn set_finalized_view_with_db_tx(&self, sqlite_tx: &Connection, view: u64) -> Result<()> { - sqlite_tx - .execute("INSERT INTO tip_info (finalized_view) VALUES (?1) ON CONFLICT DO UPDATE SET finalized_view = ?1", - [view])?; - Ok(()) - } - - pub fn set_finalized_view(&self, view: u64) -> Result<()> { - self.set_finalized_view_with_db_tx(&self.db.lock().unwrap(), view) - } - - pub fn get_finalized_view(&self) -> Result> { - Ok(self - .db - .lock() - .unwrap() - .query_row("SELECT finalized_view FROM tip_info", (), |row| row.get(0)) - .optional() - .unwrap_or(None)) - } - - /// Write view and timestamp to table if view is larger than current. Return true if write was successful - pub fn set_view_with_db_tx(&self, sqlite_tx: &Connection, view: u64) -> Result { - let res = sqlite_tx - .execute("INSERT INTO tip_info (view) VALUES (?1) ON CONFLICT(_single_row) DO UPDATE SET view = ?1 WHERE tip_info.view IS NULL OR tip_info.view < ?1", - [view])?; - Ok(res != 0) - } - - pub fn set_view(&self, view: u64) -> Result { - self.set_view_with_db_tx(&self.db.lock().unwrap(), view) - } - - pub fn get_view(&self) -> Result> { - Ok(self - .db - .lock() - .unwrap() - .query_row("SELECT view FROM tip_info", (), |row| row.get(0)) - .optional() - .unwrap_or(None)) - } - - // Deliberately not named get_highest_block_number() because there used to be one - // of those with unclear semantics, so changing name to force the compiler to error - // if it was used. - pub fn get_highest_recorded_block_number(&self) -> Result> { - Ok(self - .db - .lock() - .unwrap() - .query_row_and_then( - "SELECT height FROM blocks ORDER BY height DESC LIMIT 1", - (), - |row| row.get(0), - ) - .optional()?) - } - - pub fn get_highest_canonical_block_number(&self) -> Result> { - Ok(self - .db - .lock() - .unwrap() - .query_row_and_then( - // Two queries here are deliberate to ensure the index on `height` column is used - "SELECT height from (SELECT height, is_canonical FROM blocks ORDER BY height DESC) WHERE is_canonical = 1 LIMIT 1", - (), - |row| row.get(0), - ) - .optional()?) - } - - pub fn get_highest_block_hashes(&self, how_many: usize) -> Result> { - Ok(self - .db - .lock() - .unwrap() - .prepare_cached( - "select block_hash from blocks where is_canonical = true order by height desc limit ?1")? - .query_map([how_many], |row| row.get(0))?.collect::, _>>()?) - } - - pub fn set_high_qc_with_db_tx( - &self, - sqlite_tx: &Connection, - high_qc: QuorumCertificate, - ) -> Result<()> { - sqlite_tx.execute( - "INSERT INTO tip_info (high_qc, high_qc_updated_at) VALUES (:high_qc, :timestamp) ON CONFLICT DO UPDATE SET high_qc = :high_qc, high_qc_updated_at = :timestamp", - named_params! { - ":high_qc": high_qc, - ":timestamp": SystemTimeSqlable(SystemTime::now()) - })?; - Ok(()) - } - - pub fn set_high_qc(&self, high_qc: QuorumCertificate) -> Result<()> { - self.set_high_qc_with_db_tx(&self.db.lock().unwrap(), high_qc) - } - - pub fn get_high_qc(&self) -> Result> { - Ok(self - .db - .lock() - .unwrap() - .query_row("SELECT high_qc FROM tip_info", (), |row| row.get(0)) - .optional()? - .flatten()) - } - - pub fn get_high_qc_updated_at(&self) -> Result> { - Ok(self - .db - .lock() - .unwrap() - .query_row("SELECT high_qc_updated_at FROM tip_info", (), |row| { - row.get::<_, SystemTimeSqlable>(0) - }) - .optional() - .unwrap_or(None) - .map(Into::::into)) - } - - pub fn add_touched_address_with_db_tx( - &self, - sqlite_tx: &Connection, - address: Address, - txn_hash: Hash, - ) -> Result<()> { - sqlite_tx.execute( - "INSERT OR IGNORE INTO touched_address_index (address, tx_hash) VALUES (?1, ?2)", - (AddressSqlable(address), txn_hash), - )?; - Ok(()) - } - - pub fn add_touched_address(&self, address: Address, txn_hash: Hash) -> Result<()> { - self.add_touched_address_with_db_tx(&self.db.lock().unwrap(), address, txn_hash) - } - - pub fn get_touched_transactions(&self, address: Address) -> Result> { - // TODO: this is only ever used in one API, so keep an eye on performance - in case e.g. - // the index table might need to be denormalised to simplify this lookup - Ok(self - .db - .lock() - .unwrap() - .prepare_cached("SELECT tx_hash FROM touched_address_index JOIN receipts USING (tx_hash) JOIN blocks USING (block_hash) WHERE address = ?1 ORDER BY blocks.height, receipts.tx_index")? - .query_map([AddressSqlable(address)], |row| row.get(0))? - .collect::, _>>()?) - } - - pub fn get_transaction(&self, txn_hash: &Hash) -> Result> { - Ok(self - .db - .lock() - .unwrap() - .query_row( - "SELECT data FROM transactions WHERE tx_hash = ?1", - [txn_hash], - |row| row.get(0), - ) - .optional()?) - } - - pub fn contains_transaction(&self, hash: &Hash) -> Result { - Ok(self - .db - .lock() - .unwrap() - .query_row( - "SELECT 1 FROM transactions WHERE tx_hash = ?1", - [hash], - |row| row.get::<_, i64>(0), - ) - .optional()? - .is_some()) - } - - pub fn insert_transaction_with_db_tx( - &self, - sqlite_tx: &Connection, - hash: &Hash, - tx: &SignedTransaction, - ) -> Result<()> { - sqlite_tx.execute( - "INSERT OR IGNORE INTO transactions (tx_hash, data) VALUES (?1, ?2)", - (hash, tx), - )?; - Ok(()) - } - - /// Insert a transaction whose hash was precalculated, to save a call to calculate_hash() if it - /// is already known - pub fn insert_transaction(&self, hash: &Hash, tx: &SignedTransaction) -> Result<()> { - self.insert_transaction_with_db_tx(&self.db.lock().unwrap(), hash, tx) - } - - pub fn remove_transactions_executed_in_block(&self, block_hash: &Hash) -> Result<()> { - // foreign key triggers will take care of receipts and touched_address_index - self.db.lock().unwrap().execute( - "DELETE FROM transactions WHERE tx_hash IN (SELECT tx_hash FROM receipts WHERE block_hash = ?1)", - [block_hash], - )?; - Ok(()) - } - - pub fn get_block_hash_reverse_index(&self, tx_hash: &Hash) -> Result> { - Ok(self - .db - .lock() - .unwrap() - .query_row( - "SELECT block_hash FROM receipts WHERE tx_hash = ?1", - [tx_hash], - |row| row.get(0), - ) - .optional()?) - } - - pub fn insert_block_with_db_tx(&self, sqlite_tx: &Connection, block: &Block) -> Result<()> { - self.insert_block_with_hash_with_db_tx(sqlite_tx, block.hash(), block) - } - - pub fn insert_block_with_hash_with_db_tx( - &self, - sqlite_tx: &Connection, - hash: Hash, - block: &Block, - ) -> Result<()> { - sqlite_tx.execute( - "INSERT INTO blocks - (block_hash, view, height, qc, signature, state_root_hash, transactions_root_hash, receipts_root_hash, timestamp, gas_used, gas_limit, agg, is_canonical) - VALUES (:block_hash, :view, :height, :qc, :signature, :state_root_hash, :transactions_root_hash, :receipts_root_hash, :timestamp, :gas_used, :gas_limit, :agg, TRUE)", - named_params! { - ":block_hash": hash, - ":view": block.header.view, - ":height": block.header.number, - ":qc": block.header.qc, - ":signature": block.header.signature, - ":state_root_hash": block.header.state_root_hash, - ":transactions_root_hash": block.header.transactions_root_hash, - ":receipts_root_hash": block.header.receipts_root_hash, - ":timestamp": SystemTimeSqlable(block.header.timestamp), - ":gas_used": block.header.gas_used, - ":gas_limit": block.header.gas_limit, - ":agg": block.agg, - })?; - Ok(()) - } - - pub fn mark_block_as_canonical(&self, hash: Hash) -> Result<()> { - self.db.lock().unwrap().execute( - "UPDATE blocks SET is_canonical = TRUE WHERE block_hash = ?1", - [hash], - )?; - Ok(()) - } - - pub fn mark_block_as_non_canonical(&self, hash: Hash) -> Result<()> { - self.db.lock().unwrap().execute( - "UPDATE blocks SET is_canonical = FALSE WHERE block_hash = ?1", - [hash], - )?; - Ok(()) - } - - pub fn insert_block(&self, block: &Block) -> Result<()> { - self.insert_block_with_db_tx(&self.db.lock().unwrap(), block) - } - - pub fn remove_block(&self, block: &Block) -> Result<()> { - self.db.lock().unwrap().execute( - "DELETE FROM blocks WHERE block_hash = ?1", - [block.header.hash], - )?; - Ok(()) - } - - fn get_transactionless_block(&self, filter: BlockFilter) -> Result> { - fn make_block(row: &Row) -> rusqlite::Result { - Ok(Block { - header: BlockHeader { - hash: row.get(0)?, - view: row.get(1)?, - number: row.get(2)?, - qc: row.get(3)?, - signature: row.get(4)?, - state_root_hash: row.get(5)?, - transactions_root_hash: row.get(6)?, - receipts_root_hash: row.get(7)?, - timestamp: row.get::<_, SystemTimeSqlable>(8)?.into(), - gas_used: row.get(9)?, - gas_limit: row.get(10)?, - }, - agg: row.get(11)?, - transactions: vec![], - }) - } - macro_rules! query_block { - ($cond: tt, $key: tt) => { - self.db.lock().unwrap().query_row(concat!("SELECT block_hash, view, height, qc, signature, state_root_hash, transactions_root_hash, receipts_root_hash, timestamp, gas_used, gas_limit, agg FROM blocks WHERE ", $cond), [$key], make_block).optional()? - }; - } - Ok(match filter { - BlockFilter::Hash(hash) => { - query_block!("block_hash = ?1", hash) - } - BlockFilter::View(view) => { - query_block!("view = ?1", view) - } - BlockFilter::Height(height) => { - query_block!("height = ?1 AND is_canonical = TRUE", height) - } - }) - } - - fn get_block(&self, filter: BlockFilter) -> Result> { - let Some(mut block) = self.get_transactionless_block(filter)? else { - return Ok(None); - }; - let transaction_hashes = self - .db - .lock() - .unwrap() - .prepare_cached("SELECT tx_hash FROM receipts WHERE block_hash = ?1")? - .query_map([block.header.hash], |row| row.get(0))? - .collect::, _>>()?; - block.transactions = transaction_hashes; - Ok(Some(block)) - } - - pub fn get_block_by_hash(&self, block_hash: &Hash) -> Result> { - self.get_block(BlockFilter::Hash(*block_hash)) - } - - pub fn get_block_by_view(&self, view: u64) -> Result> { - self.get_block(BlockFilter::View(view)) - } - - pub fn get_canonical_block_by_number(&self, number: u64) -> Result> { - self.get_block(BlockFilter::Height(number)) - } - - pub fn contains_block(&self, block_hash: &Hash) -> Result { - Ok(self - .db - .lock() - .unwrap() - .query_row( - "SELECT 1 FROM blocks WHERE block_hash = ?1", - [block_hash], - |row| row.get::<_, i64>(0), - ) - .optional()? - .is_some()) - } - - fn make_view_range(row: &Row) -> rusqlite::Result> { - // Add one to end because the range returned from SQL is inclusive. - let start: u64 = row.get(0)?; - let end_inc: u64 = row.get(1)?; - Ok(Range { - start, - end: end_inc + 1, - }) - } - - fn make_receipt(row: &Row) -> rusqlite::Result { - Ok(TransactionReceipt { - tx_hash: row.get(0)?, - block_hash: row.get(1)?, - index: row.get(2)?, - success: row.get(3)?, - gas_used: row.get(4)?, - cumulative_gas_used: row.get(5)?, - contract_address: row.get::<_, Option>(6)?.map(|a| a.into()), - logs: row.get::<_, VecLogSqlable>(7)?.into(), - transitions: row.get::<_, VecScillaTransitionSqlable>(8)?.into(), - accepted: row.get(9)?, - errors: row.get::<_, MapScillaErrorSqlable>(10)?.into(), - exceptions: row.get::<_, VecScillaExceptionSqlable>(11)?.into(), - }) - } - - pub fn insert_transaction_receipt_with_db_tx( - &self, - sqlite_tx: &Connection, - receipt: TransactionReceipt, - ) -> Result<()> { - sqlite_tx.execute( - "INSERT INTO receipts - (tx_hash, block_hash, tx_index, success, gas_used, cumulative_gas_used, contract_address, logs, transitions, accepted, errors, exceptions) - VALUES (:tx_hash, :block_hash, :tx_index, :success, :gas_used, :cumulative_gas_used, :contract_address, :logs, :transitions, :accepted, :errors, :exceptions)", - named_params! { - ":tx_hash": receipt.tx_hash, - ":block_hash": receipt.block_hash, - ":tx_index": receipt.index, - ":success": receipt.success, - ":gas_used": receipt.gas_used, - ":cumulative_gas_used": receipt.cumulative_gas_used, - ":contract_address": receipt.contract_address.map(AddressSqlable), - ":logs": VecLogSqlable(receipt.logs), - ":transitions": VecScillaTransitionSqlable(receipt.transitions), - ":accepted": receipt.accepted, - ":errors": MapScillaErrorSqlable(receipt.errors), - ":exceptions": VecScillaExceptionSqlable(receipt.exceptions), - })?; - - Ok(()) - } - - pub fn insert_transaction_receipt(&self, receipt: TransactionReceipt) -> Result<()> { - self.insert_transaction_receipt_with_db_tx(&self.db.lock().unwrap(), receipt) - } - - pub fn get_transaction_receipt(&self, txn_hash: &Hash) -> Result> { - Ok(self.db.lock().unwrap().query_row("SELECT tx_hash, block_hash, tx_index, success, gas_used, cumulative_gas_used, contract_address, logs, transitions, accepted, errors, exceptions FROM receipts WHERE tx_hash = ?1", [txn_hash], Self::make_receipt).optional()?) - } - - pub fn get_transaction_receipts_in_block( - &self, - block_hash: &Hash, - ) -> Result> { - Ok(self.db.lock().unwrap().prepare_cached("SELECT tx_hash, block_hash, tx_index, success, gas_used, cumulative_gas_used, contract_address, logs, transitions, accepted, errors, exceptions FROM receipts WHERE block_hash = ?1")?.query_map([block_hash], Self::make_receipt)?.collect::, _>>()?) - } - - pub fn remove_transaction_receipts_in_block(&self, block_hash: &Hash) -> Result<()> { - self.db - .lock() - .unwrap() - .execute("DELETE FROM receipts WHERE block_hash = ?1", [block_hash])?; - Ok(()) - } - - pub fn get_total_transaction_count(&self) -> Result { - Ok(0) - } - - /// Retrieve a list of the views in our db. - /// This is a bit horrific. What we actually do here is to find the view lower and upper bounds for the contiguous block ranges in the database. - /// See block_store.rs::availability() for details. - pub fn get_view_ranges(&self) -> Result>> { - // The island field is technically redundant, but it helps with debugging. - // - // First off, note that this function returns all available blocks - it is up to the ultimate receiver of those blocks - // to decide if they are _canonical_ blocks or not. We take no view and serve everything we have. - // - // This query: - // - // R1 = SELECT height, MIN(view) as vlb, MAX(view) as vub from blocks GROUP BY height - // - Take everything in the blocks table, group by height and retrieve the max and min view for each block height. - // - // R2 = SELECT height, vlb, vub, ROW_NUMBER() OVER (ORDER BY height) AS rank FROM R1 - // - order the result by height, and find me the height, vlb, vub, and row number in the results (which we call rank). - // (OVER is sqlite magic -see docs for details) - // - // R3 = SELECT MIN(vlb), MAX(vub), MIN(height), MAX(height), height-rank AS island FROM R2 GROUP BY island ORDER BY MIN(height) ASC - // - now group R2 by island number (i.e contiguous range of heights), and select the max view, min view, max height and min height for this range. - // Return this list ordered by MIN(height) for convenience. - // - // And now you have the set of ranges you can advertise that you can serve. You could get the same result by SELECT height FROM blocks, putting the results in - // a RangeMap and then iterating the resulting ranges - this query just makes the database do the work (and returns the associated views, since block requests - // are made by view). - Ok(self.db.lock().unwrap() - .prepare_cached("SELECT MIN(vlb), MAX(vub), MIN(height),MAX(height),height-rank AS island FROM ( SELECT height,vlb,vub,ROW_NUMBER() OVER (ORDER BY height) AS rank FROM - (SELECT height,MIN(view) as vlb, MAX(view) as vub from blocks GROUP BY height ) ) GROUP BY island ORDER BY MIN(height) ASC")? - .query_map([], Self::make_view_range)?.collect::,_>>()?) - } - - /// Forget about a range of blocks; this saves space, but also allows us to test our block fetch algorithm. - pub fn forget_block_range(&self, blocks: Range) -> Result<()> { - self.with_sqlite_tx(move |tx| { - // Remove everything! - tx.execute("DELETE FROM tip_info WHERE finalized_view IN (SELECT view FROM blocks WHERE height >= :low AND height < :high)", - named_params! { - ":low" : blocks.start, - ":high" : blocks.end } )?; - tx.execute("DELETE FROM receipts WHERE block_hash IN (SELECT block_hash FROM blocks WHERE height >= :low AND height < :high)", - named_params! { - ":low": blocks.start, - ":high": blocks.end })?; - tx.execute( - "DELETE FROM blocks WHERE height >= :low AND height < :high", - named_params! { - ":low": blocks.start, - ":high": blocks.end }, - )?; - Ok(()) - }) - } -} - -pub fn get_checkpoint_filename + Debug>( - output_dir: P, - block: &Block, -) -> Result { - Ok(output_dir.as_ref().join(block.number().to_string())) -} - -/// Build checkpoint and write to disk. -/// A description of the data written can be found in docs/checkpoints -pub fn checkpoint_block_with_state + Debug>( - block: &Block, - transactions: &Vec, - parent: &Block, - state_trie_storage: TrieStorage, - shard_id: u64, - output_dir: P, -) -> Result<()> { - const VERSION: u32 = 3; - - fs::create_dir_all(&output_dir)?; - - let state_trie_storage = Arc::new(state_trie_storage); - // quick sanity check - if block.parent_hash() != parent.hash() { - return Err(anyhow!( - "Parent block parameter must match the checkpoint block's parent hash" - )); - } - - // Note: we ignore any existing file - let output_filename = get_checkpoint_filename(output_dir, block)?; - let temp_filename = output_filename.with_extension("part"); - let outfile_temp = File::create_new(&temp_filename)?; - let mut writer = BufWriter::with_capacity(128 * 1024 * 1024, outfile_temp); // 128 MiB chunks - - // write the header: - writer.write_all(&CHECKPOINT_HEADER_BYTES)?; // file identifier - writer.write_all(&VERSION.to_be_bytes())?; // 4 BE bytes for version - writer.write_all(&shard_id.to_be_bytes())?; // 8 BE bytes for shard ID - writer.write_all(b"\n")?; - - // write the block... - let block_ser = &bincode::serialize(&block)?; - writer.write_all(&u64::try_from(block_ser.len())?.to_be_bytes())?; - writer.write_all(block_ser)?; - - // write transactions - let transactions_ser = &bincode::serialize(&transactions)?; - writer.write_all(&u64::try_from(transactions_ser.len())?.to_be_bytes())?; - writer.write_all(transactions_ser)?; - - // and its parent, to keep the qc tracked - let parent_ser = &bincode::serialize(&parent)?; - writer.write_all(&u64::try_from(parent_ser.len())?.to_be_bytes())?; - writer.write_all(parent_ser)?; - - // then write state for each account - let accounts = - EthTrie::new(state_trie_storage.clone()).at_root(parent.state_root_hash().into()); - let account_storage = EthTrie::new(state_trie_storage); - let mut account_key_buf = [0u8; 32]; // save a few allocations, since account keys are fixed length - - for (key, serialised_account) in accounts.iter() { - // export the account itself - account_key_buf.copy_from_slice(&key); - writer.write_all(&account_key_buf)?; - - writer.write_all(&u64::try_from(serialised_account.len())?.to_be_bytes())?; - writer.write_all(&serialised_account)?; - - // now write the entire account storage map - let account_storage = account_storage - .at_root(bincode::deserialize::(&serialised_account)?.storage_root); - let mut account_storage_buf = vec![]; - for (storage_key, storage_val) in account_storage.iter() { - account_storage_buf.extend_from_slice(&u64::try_from(storage_key.len())?.to_be_bytes()); - account_storage_buf.extend_from_slice(&storage_key); - - account_storage_buf.extend_from_slice(&u64::try_from(storage_val.len())?.to_be_bytes()); - account_storage_buf.extend_from_slice(&storage_val); - } - writer.write_all(&u64::try_from(account_storage_buf.len())?.to_be_bytes())?; - writer.write_all(&account_storage_buf)?; - } - writer.flush()?; - - // lz4 compress and write to output - compress_file(&temp_filename, &output_filename)?; - - fs::remove_file(temp_filename)?; - - Ok(()) -} - -/// Read temp file, compress usign lz4, write into output file -fn compress_file + Debug>(input_file_path: P, output_file_path: P) -> Result<()> { - let mut reader = BufReader::new(File::open(input_file_path)?); - - let mut encoder = EncoderBuilder::new().build(File::create(output_file_path)?)?; - let mut buffer = [0u8; 1024 * 64]; // read 64KB chunks at a time - loop { - let bytes_read = reader.read(&mut buffer)?; // Read a chunk of decompressed data - if bytes_read == 0 { - break; // End of file - } - encoder.write_all(&buffer[..bytes_read])?; - } - encoder.finish().1?; - - Ok(()) -} - -/// Read lz4 compressed file and write into output file -fn decompress_file + Debug>(input_file_path: P, output_file_path: P) -> Result<()> { - let reader: BufReader = BufReader::new(File::open(input_file_path)?); - let mut decoder = Decoder::new(reader)?; - - let mut writer = BufWriter::new(File::create(output_file_path)?); - let mut buffer = [0u8; 1024 * 64]; // read 64KB chunks at a time - loop { - let bytes_read = decoder.read(&mut buffer)?; // Read a chunk of decompressed data - if bytes_read == 0 { - break; // End of file - } - writer.write_all(&buffer[..bytes_read])?; - } - - writer.flush()?; - - Ok(()) -} - -/// An implementor of [eth_trie::DB] which uses a [Connection] to persist data. -#[derive(Debug, Clone)] -pub struct TrieStorage { - db: Arc>, - cache: Arc, Vec>>>, -} - -impl TrieStorage { - pub fn write_batch( - &self, - keys: Vec>, - values: Vec>, - ) -> Result<(), rusqlite::Error> { - if keys.is_empty() { - return Ok(()); - } - - assert_eq!(keys.len(), values.len()); - - // https://www.sqlite.org/limits.html#max_variable_number - let maximum_sql_parameters = 32766; - // Each key-value pair needs two parameters. - let chunk_size = maximum_sql_parameters / 2; - - let keys = keys.chunks(chunk_size); - let values = values.chunks(chunk_size); - - for (keys, values) in keys.zip(values) { - // Generate the SQL substring of the form "(?1, ?2), (?3, ?4), (?5, ?6), ...". There will be one pair of - // parameters for each key. Note that parameters are one-indexed. - #[allow(unstable_name_collisions)] - let params_stmt: String = (0..keys.len()) - .map(|i| format!("(?{}, ?{})", i * 2 + 1, i * 2 + 2)) - .intersperse(",".to_owned()) - .collect(); - let query = - format!("INSERT OR REPLACE INTO state_trie (key, value) VALUES {params_stmt}"); - - let params = keys.iter().zip(values).flat_map(|(k, v)| [k, v]); - self.db - .lock() - .unwrap() - .execute(&query, rusqlite::params_from_iter(params))?; - for (key, value) in keys.iter().zip(values) { - let _ = self - .cache - .lock() - .unwrap() - .insert(key.to_vec(), value.to_vec()); - } - } - - Ok(()) - } -} - -impl eth_trie::DB for TrieStorage { - type Error = rusqlite::Error; - - fn get(&self, key: &[u8]) -> Result>, Self::Error> { - if let Some(cached) = self.cache.lock().unwrap().get(key).map(|v| v.to_vec()) { - return Ok(Some(cached)); - } - - let value: Option> = self - .db - .lock() - .unwrap() - .query_row( - "SELECT value FROM state_trie WHERE key = ?1", - [key], - |row| row.get(0), - ) - .optional()?; - - let mut cache = self.cache.lock().unwrap(); - if !cache.contains(key) { - if let Some(value) = &value { - let _ = cache.insert(key.to_vec(), value.clone()); - } - } - - Ok(value) - } - - fn insert(&self, key: &[u8], value: Vec) -> Result<(), Self::Error> { - self.db.lock().unwrap().execute( - "INSERT OR REPLACE INTO state_trie (key, value) VALUES (?1, ?2)", - (key, &value), - )?; - let _ = self.cache.lock().unwrap().insert(key.to_vec(), value); - Ok(()) - } - - fn insert_batch(&self, keys: Vec>, values: Vec>) -> Result<(), Self::Error> { - self.write_batch(keys, values) - } - - fn remove(&self, _key: &[u8]) -> Result<(), Self::Error> { - // we keep old state to function as an archive node, therefore no-op - Ok(()) - } - - fn remove_batch(&self, _: &[Vec]) -> Result<(), Self::Error> { - // we keep old state to function as an archive node, therefore no-op - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use alloy::consensus::EMPTY_ROOT_HASH; - use rand::{ - distributions::{Distribution, Uniform}, - Rng, SeedableRng, - }; - use rand_chacha::ChaCha8Rng; - use tempfile::tempdir; - - use super::*; - use crate::{crypto::SecretKey, state::State}; - - #[test] - fn checkpoint_export_import() { - let base_path = tempdir().unwrap(); - let base_path = base_path.path(); - let db = Db::new(Some(base_path), 0, 1024).unwrap(); - - // Seed db with data - let mut rng = ChaCha8Rng::seed_from_u64(0); - let distribution = Uniform::new(1, 50); - let mut root_trie = EthTrie::new(Arc::new(db.state_trie().unwrap())); - for _ in 0..100 { - let account_address: [u8; 20] = rng.gen(); - let mut account_trie = EthTrie::new(Arc::new(db.state_trie().unwrap())); - let mut key = Vec::::with_capacity(50); - let mut value = Vec::::with_capacity(50); - for _ in 0..distribution.sample(&mut rng) { - for _ in 0..distribution.sample(&mut rng) { - key.push(rng.gen()); - } - for _ in 0..distribution.sample(&mut rng) { - value.push(rng.gen()); - } - account_trie.insert(&key, &value).unwrap(); - } - let account = Account { - storage_root: account_trie.root_hash().unwrap(), - ..Default::default() - }; - root_trie - .insert( - &State::account_key(account_address.into()).0, - &bincode::serialize(&account).unwrap(), - ) - .unwrap(); - } - - let state_hash = root_trie.root_hash().unwrap(); - let checkpoint_parent = Block::genesis(state_hash.into()); - // bit of a hack to generate a successor block - let mut qc2 = QuorumCertificate::genesis(); - qc2.block_hash = checkpoint_parent.hash(); - qc2.view = 1; - let checkpoint_block = Block::from_qc( - SecretKey::new().unwrap(), - 1, - 1, - qc2, - None, - state_hash.into(), - EMPTY_ROOT_HASH.into(), - EMPTY_ROOT_HASH.into(), - vec![], - SystemTime::now(), - EvmGas(0), - EvmGas(0), - ); - - let checkpoint_path = db.get_checkpoint_dir().unwrap().unwrap(); - - const SHARD_ID: u64 = 5000; - - let checkpoint_transactions = vec![]; - checkpoint_block_with_state( - &checkpoint_block, - &checkpoint_transactions, - &checkpoint_parent, - db.state_trie().unwrap(), - SHARD_ID, - &checkpoint_path, - ) - .unwrap(); - - // now load the checkpoint - let (block, transactions, parent) = db - .load_trusted_checkpoint( - checkpoint_path.join(checkpoint_block.number().to_string()), - &checkpoint_block.hash(), - SHARD_ID, - ) - .unwrap() - .unwrap(); - assert_eq!(checkpoint_block, block); - assert_eq!(checkpoint_transactions, transactions); - assert_eq!(checkpoint_parent, parent); - - // load the checkpoint again, to ensure idempotency - let (block, transactions, parent) = db - .load_trusted_checkpoint( - checkpoint_path.join(checkpoint_block.number().to_string()), - &checkpoint_block.hash(), - SHARD_ID, - ) - .unwrap() - .unwrap(); - assert_eq!(checkpoint_block, block); - assert_eq!(checkpoint_transactions, transactions); - assert_eq!(checkpoint_parent, parent); - - // Return None if checkpointed block already executed - db.insert_block(&checkpoint_block).unwrap(); - let result = db - .load_trusted_checkpoint( - checkpoint_path.join(checkpoint_block.number().to_string()), - &checkpoint_block.hash(), - SHARD_ID, - ) - .unwrap(); - assert!(result.is_none()); - } -} diff --git a/zilliqa/src/db/migrate.rs b/zilliqa/src/db/migrate.rs new file mode 100644 index 000000000..d5138c90b --- /dev/null +++ b/zilliqa/src/db/migrate.rs @@ -0,0 +1,433 @@ +use std::{collections::BTreeMap, sync::Arc, time::Duration}; + +use anyhow::Result; +use revm::primitives::Address; +use rusqlite::{ + types::{FromSql, FromSqlError, ToSqlOutput}, + Connection, ToSql, +}; +use serde::{Deserialize, Serialize}; +use tracing::info; + +use super::Db; +use crate::{ + crypto::{BlsSignature, Hash}, + exec::{ScillaError, ScillaException, ScillaTransition}, + message::{AggregateQc, Block, BlockHeader, QuorumCertificate}, + time::SystemTime, + transaction::{EvmGas, Log, SignedTransaction, TransactionReceipt}, +}; + +macro_rules! sqlify_with_bincode { + ($type: ty) => { + impl ToSql for $type { + fn to_sql(&self) -> rusqlite::Result> { + let data = bincode::serialize(self) + .map_err(|e| rusqlite::Error::ToSqlConversionFailure(e))?; + Ok(ToSqlOutput::from(data)) + } + } + impl FromSql for $type { + fn column_result( + value: rusqlite::types::ValueRef<'_>, + ) -> rusqlite::types::FromSqlResult { + let blob = value.as_blob()?; + bincode::deserialize(blob).map_err(|e| FromSqlError::Other(e)) + } + } + }; +} + +/// Creates a thin wrapper for a type with proper From traits. To ease implementing To/FromSql on +/// foreign types. +macro_rules! make_wrapper { + ($old: ty, $new: ident) => { + paste::paste! { + #[derive(Serialize, Deserialize)] + struct $new($old); + + impl From<$old> for $new { + fn from(value: $old) -> Self { + Self(value) + } + } + + impl From<$new> for $old { + fn from(value: $new) -> Self { + value.0 + } + } + } + }; +} + +sqlify_with_bincode!(AggregateQc); +sqlify_with_bincode!(QuorumCertificate); +sqlify_with_bincode!(BlsSignature); +sqlify_with_bincode!(SignedTransaction); + +make_wrapper!(Vec, VecScillaExceptionSqlable); +sqlify_with_bincode!(VecScillaExceptionSqlable); +make_wrapper!(BTreeMap>, MapScillaErrorSqlable); +sqlify_with_bincode!(MapScillaErrorSqlable); + +make_wrapper!(Vec, VecLogSqlable); +sqlify_with_bincode!(VecLogSqlable); + +make_wrapper!(Vec, VecScillaTransitionSqlable); +sqlify_with_bincode!(VecScillaTransitionSqlable); + +make_wrapper!(SystemTime, SystemTimeSqlable); +impl ToSql for SystemTimeSqlable { + fn to_sql(&self) -> rusqlite::Result> { + use std::mem::size_of; + + let since_epoch = self.0.duration_since(SystemTime::UNIX_EPOCH).unwrap(); + + let mut buf = [0u8; size_of::() + size_of::()]; + + buf[..size_of::()].copy_from_slice(&since_epoch.as_secs().to_be_bytes()[..]); + buf[size_of::()..].copy_from_slice(&since_epoch.subsec_nanos().to_be_bytes()[..]); + + Ok(ToSqlOutput::from(buf.to_vec())) + } +} +impl FromSql for SystemTimeSqlable { + fn column_result(value: rusqlite::types::ValueRef<'_>) -> rusqlite::types::FromSqlResult { + use std::mem::size_of; + + let blob = value.as_blob()?; + + if blob.len() != size_of::() + size_of::() { + return Err(FromSqlError::InvalidBlobSize { + expected_size: size_of::() + size_of::(), + blob_size: blob.len(), + }); + } + + let mut secs_buf = [0u8; size_of::()]; + let mut subsec_nanos_buf = [0u8; size_of::()]; + + secs_buf.copy_from_slice(&blob[..size_of::()]); + subsec_nanos_buf.copy_from_slice(&blob[size_of::()..]); + + let secs = u64::from_be_bytes(secs_buf); + let subsec_nanos = u32::from_be_bytes(subsec_nanos_buf); + + Ok(SystemTimeSqlable( + SystemTime::UNIX_EPOCH + Duration::new(secs, subsec_nanos), + )) + } +} + +make_wrapper!(Address, AddressSqlable); +impl ToSql for AddressSqlable { + fn to_sql(&self) -> rusqlite::Result> { + Ok(ToSqlOutput::from(self.0.as_slice())) + } +} +impl FromSql for AddressSqlable { + fn column_result(value: rusqlite::types::ValueRef<'_>) -> rusqlite::types::FromSqlResult { + Ok(AddressSqlable(Address::from(<[u8; 20]>::column_result( + value, + )?))) + } +} + +impl ToSql for Hash { + fn to_sql(&self) -> rusqlite::Result> { + Ok(ToSqlOutput::from(self.0.to_vec())) + } +} +impl FromSql for Hash { + fn column_result(value: rusqlite::types::ValueRef<'_>) -> rusqlite::types::FromSqlResult { + Ok(Hash(<[u8; 32]>::column_result(value)?)) + } +} + +impl ToSql for EvmGas { + fn to_sql(&self) -> rusqlite::Result> { + self.0.to_sql() + } +} + +impl FromSql for EvmGas { + fn column_result(value: rusqlite::types::ValueRef<'_>) -> rusqlite::types::FromSqlResult { + Ok(Self(u64::column_result(value)?)) + } +} + +impl FromSql for Bytes { + fn column_result(value: rusqlite::types::ValueRef<'_>) -> rusqlite::types::FromSqlResult { + Ok(Self( + value + .as_bytes_or_null()? + .map(|b| b.to_vec()) + .unwrap_or_default(), + )) + } +} + +struct Bytes(Vec); + +struct BlockRow { + block_hash: Hash, + view: u64, + height: u64, + signature: BlsSignature, + state_root_hash: Hash, + transactions_root_hash: Hash, + receipts_root_hash: Hash, + timestamp: SystemTimeSqlable, + gas_used: EvmGas, + gas_limit: EvmGas, + qc: QuorumCertificate, + agg: Option, + is_canonical: bool, + transactions: Bytes, +} + +impl Db { + pub fn migrate_from(self, mut sql: Connection) -> Result { + sql.trace(Some(|statement| tracing::trace!(statement, "sql executed"))); + + let write = self.write()?; + let mut blocks = write.blocks()?; + let mut transactions = write.transactions()?; + let mut receipts = write.receipts()?; + let mut touched_address_index = write.touched_address_index()?; + let mut finalized_view = write.finalized_view()?; + let mut view = write.view()?; + let mut high_qc = write.high_qc()?; + let mut state_trie = write.state_trie()?; + + info!("migrating blocks"); + // FIXME: THIS EXCLUDES BLOCKS WITH NO TRANSACTIONS!!! + let mut old_blocks = sql.prepare( + r#" + SELECT + b.block_hash, + b.view, + b.height, + b.signature, + b.state_root_hash, + b.transactions_root_hash, + b.receipts_root_hash, + b.timestamp, + b.gas_used, + b.gas_limit, + b.qc, + b.agg, + b.is_canonical, + r.tx_hashes + FROM + blocks b + LEFT JOIN ( + SELECT + block_hash, + GROUP_CONCAT(tx_hash, "") AS tx_hashes + FROM receipts + GROUP BY block_hash + ) r USING (block_hash) + ; + "#, + )?; + let old_blocks = old_blocks.query_map((), |row| { + Ok(BlockRow { + block_hash: row.get(0)?, + view: row.get(1)?, + height: row.get(2)?, + signature: row.get(3)?, + state_root_hash: row.get(4)?, + transactions_root_hash: row.get(5)?, + receipts_root_hash: row.get(6)?, + timestamp: row.get(7)?, + gas_used: row.get(8)?, + gas_limit: row.get(9)?, + qc: row.get(10)?, + agg: row.get(11)?, + is_canonical: row.get(12)?, + transactions: row.get(13)?, + }) + })?; + + for block in old_blocks { + let block = block?; + let is_canonical = block.is_canonical; + let block = Block { + header: BlockHeader { + view: block.view, + number: block.height, + hash: block.block_hash, + qc: block.qc, + signature: block.signature, + state_root_hash: block.state_root_hash, + transactions_root_hash: block.transactions_root_hash, + receipts_root_hash: block.receipts_root_hash, + timestamp: block.timestamp.into(), + gas_used: block.gas_used, + gas_limit: block.gas_limit, + }, + agg: block.agg, + transactions: block + .transactions + .0 + .chunks_exact(32) + .map(Hash::from_bytes) + .collect::>()?, + }; + blocks.insert(&block)?; + if !is_canonical { + blocks.set_non_canonical(block.view())?; + } + } + + info!("migrating transactions"); + let mut old_txns = sql.prepare( + " + SELECT + tx_hash, + data + FROM + transactions + ; + ", + )?; + let old_txns = old_txns.query_map((), |row| Ok((row.get(0)?, row.get(1)?)))?; + for txn in old_txns { + let (txn_hash, txn) = txn?; + transactions.insert(txn_hash, &txn)?; + } + + info!("migrating receipts"); + let mut old_receipts = sql.prepare( + " + SELECT + block_hash, + tx_index, + tx_hash, + success, + gas_used, + cumulative_gas_used, + contract_address, + logs, + transitions, + accepted, + errors, + exceptions + FROM + receipts + ; + ", + )?; + let old_receipts = old_receipts.query_map((), |row| { + Ok(TransactionReceipt { + block_hash: row.get(0)?, + index: row.get(1)?, + tx_hash: row.get(2)?, + success: row.get(3)?, + gas_used: row.get(4)?, + cumulative_gas_used: row.get(5)?, + contract_address: row.get::<_, Option>(6)?.map(|a| a.0), + logs: row.get::<_, VecLogSqlable>(7)?.0, + transitions: row.get::<_, VecScillaTransitionSqlable>(8)?.0, + accepted: row.get(9)?, + errors: row.get::<_, MapScillaErrorSqlable>(10)?.0, + exceptions: row.get::<_, VecScillaExceptionSqlable>(11)?.0, + }) + })?; + for receipt in old_receipts { + let receipt = receipt?; + receipts.insert(&receipt)?; + } + + info!("migrating touched address index"); + let mut old_touched_address_index = sql.prepare( + " + SELECT + address, + tx_hash + FROM + touched_address_index + ; + ", + )?; + let old_touched_address_index = old_touched_address_index.query_map((), |row| { + Ok((row.get::<_, AddressSqlable>(0)?, row.get(1)?)) + })?; + + for pair in old_touched_address_index { + let (address, txn_hash) = pair?; + touched_address_index.insert(address.0, txn_hash)?; + } + + info!("migrating consensus info"); + let (old_finalized_view, old_view, old_high_qc, old_high_qc_updated_at) = sql.query_row( + " + SELECT + finalized_view, + view, + high_qc, + high_qc_updated_at + FROM + tip_info + ; + ", + (), + |row| { + Ok(( + row.get(0)?, + row.get(1)?, + row.get(2)?, + row.get::<_, SystemTimeSqlable>(3)?, + )) + }, + )?; + + finalized_view.set(old_finalized_view)?; + view.set(old_view)?; + high_qc.set_with_updated_at(&old_high_qc, old_high_qc_updated_at.0)?; + + info!("migrating state trie"); + let mut old_state_trie = sql.prepare( + " + SELECT + key, + value + FROM + state_trie + ; + ", + )?; + let old_state_trie = + old_state_trie.query_map((), |row| Ok((row.get::<_, Vec>(0)?, row.get(1)?)))?; + + for pair in old_state_trie { + let (key, value) = pair?; + state_trie.insert(&key, &value)?; + } + std::mem::drop(( + blocks, + transactions, + receipts, + touched_address_index, + finalized_view, + view, + high_qc, + state_trie, + )); + + info!("committing"); + write.commit()?; + + info!("compacting"); + let path = self.path.clone(); + let mut db = self.into_raw(); + db.compact()?; + + Ok(Db { + db: Arc::new(db), + path, + }) + } +} diff --git a/zilliqa/src/db/mod.rs b/zilliqa/src/db/mod.rs new file mode 100644 index 000000000..3665ab3dc --- /dev/null +++ b/zilliqa/src/db/mod.rs @@ -0,0 +1,614 @@ +mod migrate; +mod tables; + +use std::{ + fmt::Debug, + fs::{self, File, OpenOptions}, + io::{BufReader, BufWriter, Read, Seek, SeekFrom, Write}, + path::{Path, PathBuf}, + sync::Arc, +}; + +use anyhow::{anyhow, Context, Result}; +use eth_trie::{EthTrie, Trie}; +use lz4::{Decoder, EncoderBuilder}; +use redb::{backends::InMemoryBackend, Database}; +pub use tables::*; +use tracing::{info, warn}; + +use crate::{crypto::Hash, message::Block, state::Account, transaction::SignedTransaction}; + +const CHECKPOINT_HEADER_BYTES: [u8; 8] = *b"ZILCHKPT"; + +/// Version string that is written to disk along with the persisted database. This should be bumped whenever we make a +/// backwards incompatible change to our database format. +const CURRENT_DB_VERSION: u8 = 2; + +#[derive(Debug)] +pub struct Db { + db: Arc, + path: Option>, +} + +pub trait ArcDb { + fn state_trie(&self) -> Result; + fn load_trusted_checkpoint>( + &self, + path: P, + hash: &Hash, + our_shard_id: u64, + ) -> Result, Block)>>; +} + +impl ArcDb for Arc { + fn state_trie(&self) -> Result { + Ok(TrieStorage { db: self.clone() }) + } + + /// Fetch checkpoint data from file and initialise db state + /// Return checkpointed block and transactions which must be executed after this function + /// Return None if checkpoint already loaded + fn load_trusted_checkpoint>( + &self, + path: P, + hash: &Hash, + our_shard_id: u64, + ) -> Result, Block)>> { + // For now, only support a single version: you want to load the latest checkpoint, anyway. + const SUPPORTED_VERSION: u32 = 3; + + // Decompress file and write to temp file + let input_filename = path.as_ref(); + let temp_filename = input_filename.with_extension("part"); + decompress_file(input_filename, &temp_filename)?; + + // Read decompressed file + let input = File::open(&temp_filename)?; + + let mut reader = BufReader::with_capacity(8192 * 1024, input); // 8 MiB read chunks + let trie_storage = Arc::new(self.state_trie()?); + let mut state_trie = EthTrie::new(trie_storage.clone()); + + // Decode and validate header + let mut header: [u8; 21] = [0u8; 21]; + reader.read_exact(&mut header)?; + let header = header; + if header[0..8] != CHECKPOINT_HEADER_BYTES // magic bytes + || header[20] != b'\n' + // header must end in newline + { + return Err(anyhow!("Invalid checkpoint file: invalid header")); + } + let version = u32::from_be_bytes(header[8..12].try_into()?); + // Only support a single version right now. + if version != SUPPORTED_VERSION { + return Err(anyhow!("Invalid checkpoint file: unsupported version.")); + } + let shard_id = u64::from_be_bytes(header[12..20].try_into()?); + if shard_id != our_shard_id { + return Err(anyhow!("Invalid checkpoint file: wrong shard ID.")); + } + + // Decode and validate checkpoint block, its transactions and parent block + let mut block_len_buf = [0u8; std::mem::size_of::()]; + reader.read_exact(&mut block_len_buf)?; + let mut block_ser = vec![0u8; usize::try_from(u64::from_be_bytes(block_len_buf))?]; + reader.read_exact(&mut block_ser)?; + let block: Block = bincode::deserialize(&block_ser)?; + if block.hash() != *hash { + return Err(anyhow!("Checkpoint does not match trusted hash")); + } + block.verify_hash()?; + + let mut transactions_len_buf = [0u8; std::mem::size_of::()]; + reader.read_exact(&mut transactions_len_buf)?; + let mut transactions_ser = + vec![0u8; usize::try_from(u64::from_be_bytes(transactions_len_buf))?]; + reader.read_exact(&mut transactions_ser)?; + let transactions: Vec = bincode::deserialize(&transactions_ser)?; + + let mut parent_len_buf = [0u8; std::mem::size_of::()]; + reader.read_exact(&mut parent_len_buf)?; + let mut parent_ser = vec![0u8; usize::try_from(u64::from_be_bytes(parent_len_buf))?]; + reader.read_exact(&mut parent_ser)?; + let parent: Block = bincode::deserialize(&parent_ser)?; + if block.parent_hash() != parent.hash() { + return Err(anyhow!("Invalid checkpoint file: parent's blockhash does not correspond to checkpoint block")); + } + + let read = self.read()?; + + if state_trie.iter().next().is_some() || read.blocks()?.max_canonical_by_view()?.is_some() { + // If checkpointed block already exists then assume checkpoint load already complete. Return None + if read.blocks()?.by_hash(block.hash())?.is_some() { + return Ok(None); + } + // This may not be strictly necessary, as in theory old values will, at worst, be orphaned + // values not part of any state trie of any known block. With some effort, this could + // even be supported. + // However, without such explicit support, having old blocks MAY in fact cause + // unexpected and unwanted behaviour. Thus we currently forbid loading a checkpoint in + // a node that already contains previous state, until (and unless) there's ever a + // usecase for going through the effort to support it and ensure it works as expected. + if let Some(db_block) = read.blocks()?.by_hash(parent.hash())? { + if db_block.parent_hash() != parent.parent_hash() { + return Err(anyhow!("Inconsistent checkpoint file: block loaded from checkpoint and block stored in database with same hash have differing parent hashes")); + } else { + // In this case, the database already has the block contained in this checkpoint. We assume the + // database contains the full state for that block too and thus return early, without actually + // loading the checkpoint file. + return Ok(Some((block, transactions, parent))); + } + } else { + return Err(anyhow!("Inconsistent checkpoint file: block loaded from checkpoint file does not exist in non-empty database")); + } + } + + // then decode state + loop { + // Read account key and the serialised Account + let mut account_hash = [0u8; 32]; + match reader.read_exact(&mut account_hash) { + // Read successful + Ok(_) => (), + // Break loop here if weve reached the end of the file + Err(ref e) if e.kind() == std::io::ErrorKind::UnexpectedEof => { + break; + } + Err(e) => return Err(e.into()), + }; + + let mut serialised_account_len_buf = [0u8; std::mem::size_of::()]; + reader.read_exact(&mut serialised_account_len_buf)?; + let mut serialised_account = + vec![0u8; usize::try_from(u64::from_be_bytes(serialised_account_len_buf))?]; + reader.read_exact(&mut serialised_account)?; + + // Read entire account storage as a buffer + let mut account_storage_len_buf = [0u8; std::mem::size_of::()]; + reader.read_exact(&mut account_storage_len_buf)?; + let account_storage_len = usize::try_from(u64::from_be_bytes(account_storage_len_buf))?; + let mut account_storage = vec![0u8; account_storage_len]; + reader.read_exact(&mut account_storage)?; + + // Pull out each storage key and value + let mut account_trie = EthTrie::new(trie_storage.clone()); + let mut pointer: usize = 0; + while account_storage_len > pointer { + let storage_key_len_buf: &[u8] = + &account_storage[pointer..(pointer + std::mem::size_of::())]; + let storage_key_len = + usize::try_from(u64::from_be_bytes(storage_key_len_buf.try_into()?))?; + pointer += std::mem::size_of::(); + let storage_key: &[u8] = &account_storage[pointer..(pointer + storage_key_len)]; + pointer += storage_key_len; + + let storage_val_len_buf: &[u8] = + &account_storage[pointer..(pointer + std::mem::size_of::())]; + let storage_val_len = + usize::try_from(u64::from_be_bytes(storage_val_len_buf.try_into()?))?; + pointer += std::mem::size_of::(); + let storage_val: &[u8] = &account_storage[pointer..(pointer + storage_val_len)]; + pointer += storage_val_len; + + account_trie.insert(storage_key, storage_val)?; + } + + let account_trie_root = + bincode::deserialize::(&serialised_account)?.storage_root; + if account_trie.root_hash()?.as_slice() != account_trie_root { + return Err(anyhow!( + "Invalid checkpoint file: account trie root hash mismatch: calculated {}, checkpoint file contained {}", hex::encode(account_trie.root_hash()?.as_slice()), hex::encode(account_trie_root) + )); + } + state_trie.insert(&account_hash, &serialised_account)?; + } + + if state_trie.root_hash()? != parent.state_root_hash().0 { + return Err(anyhow!("Invalid checkpoint file: state root hash mismatch")); + } + + let write = self.write()?; + write.blocks()?.insert(&parent)?; + write.finalized_view()?.set(parent.view())?; + write.high_qc()?.set(&block.header.qc)?; + write.view()?.set(parent.view() + 1)?; + write.commit()?; + + fs::remove_file(temp_filename)?; + + Ok(Some((block, transactions, parent))) + } +} + +impl Db { + pub fn new

(data_dir: Option

, shard_id: u64, cache_size: usize) -> Result + where + P: AsRef, + { + let db = match data_dir { + Some(path) => { + let path = path.as_ref().join(shard_id.to_string()); + fs::create_dir_all(&path).context(format!("Unable to create {path:?}"))?; + + let mut version_file = OpenOptions::new() + .create(true) + .truncate(false) + .read(true) + .write(true) + .open(path.join("version"))?; + let mut version = String::new(); + version_file.read_to_string(&mut version)?; + let version: u8 = if version.is_empty() { + CURRENT_DB_VERSION + } else { + version.parse()? + }; + + let migrate = if version == CURRENT_DB_VERSION { + false + } else if version == CURRENT_DB_VERSION - 1 { + // We support migrations from the previous DB version. + true + } else { + return Err(anyhow!("data is incompatible with this version - please delete the data and re-sync")); + }; + + let db = Database::builder() + .set_cache_size(cache_size) + .set_repair_callback(|repair| { + info!(progress = repair.progress(), "repairing database"); + }) + .create(path.join("db.redb"))?; + let mut db = Db { + db: Arc::new(db), + path: Some(path.clone().into_boxed_path()), + }; + + if migrate { + let sql_path = path.join("db.sqlite3"); + db = db.migrate_from(rusqlite::Connection::open(&sql_path)?)?; + fs::rename(sql_path, path.join("db.sqlite3.backup"))?; + } + + version_file.seek(SeekFrom::Start(0))?; + version_file.write_all(CURRENT_DB_VERSION.to_string().as_bytes())?; + + db + } + None => Db { + db: Arc::new(Database::builder().create_with_backend(InMemoryBackend::new())?), + path: None, + }, + }; + + // Ensure tables exist. + let write = db.write()?; + write.create_all()?; + write.commit()?; + + Ok(db) + } + + pub fn get_checkpoint_dir(&self) -> Result>> { + let Some(base_path) = &self.path else { + // If we don't have on-disk persistency, disable checkpoints too + warn!( + "Attempting to create checkpoint, but no persistence directory has been configured" + ); + return Ok(None); + }; + Ok(Some(base_path.join("checkpoints").into_boxed_path())) + } + + pub fn into_raw(self) -> Database { + Arc::into_inner(self.db).unwrap() + } +} + +pub fn get_checkpoint_filename + Debug>( + output_dir: P, + block: &Block, +) -> Result { + Ok(output_dir.as_ref().join(block.number().to_string())) +} + +/// Build checkpoint and write to disk. +/// A description of the data written can be found in docs/checkpoints +pub fn checkpoint_block_with_state + Debug>( + block: &Block, + transactions: &Vec, + parent: &Block, + state_trie_storage: TrieStorage, + shard_id: u64, + output_dir: P, +) -> Result<()> { + const VERSION: u32 = 3; + + fs::create_dir_all(&output_dir)?; + + let state_trie_storage = Arc::new(state_trie_storage); + // quick sanity check + if block.parent_hash() != parent.hash() { + return Err(anyhow!( + "Parent block parameter must match the checkpoint block's parent hash" + )); + } + + // Note: we ignore any existing file + let output_filename = get_checkpoint_filename(output_dir, block)?; + let temp_filename = output_filename.with_extension("part"); + let outfile_temp = File::create_new(&temp_filename)?; + let mut writer = BufWriter::with_capacity(8192 * 1024, outfile_temp); // 8 MiB chunks + + // write the header: + writer.write_all(&CHECKPOINT_HEADER_BYTES)?; // file identifier + writer.write_all(&VERSION.to_be_bytes())?; // 4 BE bytes for version + writer.write_all(&shard_id.to_be_bytes())?; // 8 BE bytes for shard ID + writer.write_all(b"\n")?; + + // write the block... + let block_ser = &bincode::serialize(&block)?; + writer.write_all(&u64::try_from(block_ser.len())?.to_be_bytes())?; + writer.write_all(block_ser)?; + + // write transactions + let transactions_ser = &bincode::serialize(&transactions)?; + writer.write_all(&u64::try_from(transactions_ser.len())?.to_be_bytes())?; + writer.write_all(transactions_ser)?; + + // and its parent, to keep the qc tracked + let parent_ser = &bincode::serialize(&parent)?; + writer.write_all(&u64::try_from(parent_ser.len())?.to_be_bytes())?; + writer.write_all(parent_ser)?; + + // then write state for each account + let accounts = + EthTrie::new(state_trie_storage.clone()).at_root(parent.state_root_hash().into()); + let account_storage = EthTrie::new(state_trie_storage); + let mut account_key_buf = [0u8; 32]; // save a few allocations, since account keys are fixed length + + for (key, serialised_account) in accounts.iter() { + // export the account itself + account_key_buf.copy_from_slice(&key); + writer.write_all(&account_key_buf)?; + + writer.write_all(&u64::try_from(serialised_account.len())?.to_be_bytes())?; + writer.write_all(&serialised_account)?; + + // now write the entire account storage map + let account_storage = account_storage + .at_root(bincode::deserialize::(&serialised_account)?.storage_root); + let mut account_storage_buf = vec![]; + for (storage_key, storage_val) in account_storage.iter() { + account_storage_buf.extend_from_slice(&u64::try_from(storage_key.len())?.to_be_bytes()); + account_storage_buf.extend_from_slice(&storage_key); + + account_storage_buf.extend_from_slice(&u64::try_from(storage_val.len())?.to_be_bytes()); + account_storage_buf.extend_from_slice(&storage_val); + } + writer.write_all(&u64::try_from(account_storage_buf.len())?.to_be_bytes())?; + writer.write_all(&account_storage_buf)?; + } + writer.flush()?; + + // lz4 compress and write to output + compress_file(&temp_filename, &output_filename)?; + + fs::remove_file(temp_filename)?; + + Ok(()) +} + +/// Read temp file, compress usign lz4, write into output file +fn compress_file + Debug>(input_file_path: P, output_file_path: P) -> Result<()> { + let mut reader = BufReader::new(File::open(input_file_path)?); + + let mut encoder = EncoderBuilder::new().build(File::create(output_file_path)?)?; + let mut buffer = [0u8; 1024 * 64]; // read 64KB chunks at a time + loop { + let bytes_read = reader.read(&mut buffer)?; // Read a chunk of decompressed data + if bytes_read == 0 { + break; // End of file + } + encoder.write_all(&buffer[..bytes_read])?; + } + encoder.finish().1?; + + Ok(()) +} + +/// Read lz4 compressed file and write into output file +fn decompress_file + Debug>(input_file_path: P, output_file_path: P) -> Result<()> { + let reader: BufReader = BufReader::new(File::open(input_file_path)?); + let mut decoder = Decoder::new(reader)?; + + let mut writer = BufWriter::new(File::create(output_file_path)?); + let mut buffer = [0u8; 1024 * 64]; // read 64KB chunks at a time + loop { + let bytes_read = decoder.read(&mut buffer)?; // Read a chunk of decompressed data + if bytes_read == 0 { + break; // End of file + } + writer.write_all(&buffer[..bytes_read])?; + } + + writer.flush()?; + + Ok(()) +} + +/// An implementor of [eth_trie::DB] which uses a [Db] to persist data. +#[derive(Debug, Clone)] +pub struct TrieStorage { + db: Arc, +} + +impl eth_trie::DB for TrieStorage { + type Error = anyhow::Error; + + fn get(&self, key: &[u8]) -> Result>> { + self.db.read()?.state_trie()?.get(key) + } + + fn insert(&self, key: &[u8], value: Vec) -> Result<(), Self::Error> { + let write = self.db.write()?; + write.state_trie()?.insert(key, &value)?; + write.commit() + } + + fn insert_batch(&self, keys: Vec>, values: Vec>) -> Result<(), Self::Error> { + if keys.is_empty() { + return Ok(()); + } + + assert_eq!(keys.len(), values.len()); + + let write = self.db.write()?; + for (key, value) in keys.into_iter().zip(values) { + write.state_trie()?.insert(&key, &value)?; + } + write.commit()?; + + Ok(()) + } + + fn remove(&self, _key: &[u8]) -> Result<(), Self::Error> { + // we keep old state to function as an archive node, therefore no-op + Ok(()) + } + + fn remove_batch(&self, _: &[Vec]) -> Result<(), Self::Error> { + // we keep old state to function as an archive node, therefore no-op + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use alloy::consensus::EMPTY_ROOT_HASH; + use rand::{ + distributions::{Distribution, Uniform}, + Rng, SeedableRng, + }; + use rand_chacha::ChaCha8Rng; + use tempfile::tempdir; + + use super::*; + use crate::{ + crypto::SecretKey, message::QuorumCertificate, state::State, time::SystemTime, + transaction::EvmGas, + }; + + #[test] + fn checkpoint_export_import() { + let base_path = tempdir().unwrap(); + let base_path = base_path.path(); + let db = Arc::new(Db::new(Some(base_path), 0, 1024).unwrap()); + + // Seed db with data + let mut rng = ChaCha8Rng::seed_from_u64(0); + let distribution = Uniform::new(1, 50); + let mut root_trie = EthTrie::new(Arc::new(db.state_trie().unwrap())); + for _ in 0..100 { + let account_address: [u8; 20] = rng.gen(); + let mut account_trie = EthTrie::new(Arc::new(db.state_trie().unwrap())); + let mut key = Vec::::with_capacity(50); + let mut value = Vec::::with_capacity(50); + for _ in 0..distribution.sample(&mut rng) { + for _ in 0..distribution.sample(&mut rng) { + key.push(rng.gen()); + } + for _ in 0..distribution.sample(&mut rng) { + value.push(rng.gen()); + } + account_trie.insert(&key, &value).unwrap(); + } + let account = Account { + storage_root: account_trie.root_hash().unwrap(), + ..Default::default() + }; + root_trie + .insert( + &State::account_key(account_address.into()).0, + &bincode::serialize(&account).unwrap(), + ) + .unwrap(); + } + + let state_hash = root_trie.root_hash().unwrap(); + let checkpoint_parent = Block::genesis(state_hash.into()); + // bit of a hack to generate a successor block + let mut qc2 = QuorumCertificate::genesis(); + qc2.block_hash = checkpoint_parent.hash(); + qc2.view = 1; + let checkpoint_block = Block::from_qc( + SecretKey::new().unwrap(), + 1, + 1, + qc2, + None, + state_hash.into(), + EMPTY_ROOT_HASH.into(), + EMPTY_ROOT_HASH.into(), + vec![], + SystemTime::now(), + EvmGas(0), + EvmGas(0), + ); + + let checkpoint_path = db.get_checkpoint_dir().unwrap().unwrap(); + + const SHARD_ID: u64 = 5000; + + let checkpoint_transactions = vec![]; + checkpoint_block_with_state( + &checkpoint_block, + &checkpoint_transactions, + &checkpoint_parent, + db.state_trie().unwrap(), + SHARD_ID, + &checkpoint_path, + ) + .unwrap(); + + // now load the checkpoint + let (block, transactions, parent) = db + .load_trusted_checkpoint( + checkpoint_path.join(checkpoint_block.number().to_string()), + &checkpoint_block.hash(), + SHARD_ID, + ) + .unwrap() + .unwrap(); + assert_eq!(checkpoint_block, block); + assert_eq!(checkpoint_transactions, transactions); + assert_eq!(checkpoint_parent, parent); + + // load the checkpoint again, to ensure idempotency + let (block, transactions, parent) = db + .load_trusted_checkpoint( + checkpoint_path.join(checkpoint_block.number().to_string()), + &checkpoint_block.hash(), + SHARD_ID, + ) + .unwrap() + .unwrap(); + assert_eq!(checkpoint_block, block); + assert_eq!(checkpoint_transactions, transactions); + assert_eq!(checkpoint_parent, parent); + + // Return None if checkpointed block already executed + let write = db.write().unwrap(); + write.blocks().unwrap().insert(&checkpoint_block).unwrap(); + write.commit().unwrap(); + let result = db + .load_trusted_checkpoint( + checkpoint_path.join(checkpoint_block.number().to_string()), + &checkpoint_block.hash(), + SHARD_ID, + ) + .unwrap(); + assert!(result.is_none()); + } +} diff --git a/zilliqa/src/db/tables.rs b/zilliqa/src/db/tables.rs new file mode 100644 index 000000000..eeec5c3cf --- /dev/null +++ b/zilliqa/src/db/tables.rs @@ -0,0 +1,569 @@ +//! This module defines the tables in our database and provides the abstractions used to interact with them. +//! +//! Each logical table may be backed by one or more concrete tables, for additional indices. + +#![allow(clippy::type_complexity)] + +use std::time::Duration; + +use anyhow::{anyhow, Result}; +use bincode::{DefaultOptions, Options}; +use redb::{ + MultimapTable, MultimapTableDefinition, ReadOnlyMultimapTable, ReadOnlyTable, ReadTransaction, + ReadableMultimapTable, ReadableTable, ReadableTableMetadata, Table, TableDefinition, + WriteTransaction, +}; +use revm::primitives::Address; + +use super::Db; +use crate::{ + crypto::Hash, + message::{Block, QuorumCertificate}, + time::SystemTime, + transaction::{SignedTransaction, TransactionReceipt}, +}; + +// Each logical table consists of: +// 1. The `TableDefinition`s backing this table. +// 2. A table `struct` which contains the methods to access this table. The struct is generic, but in practice only +// takes two possible values - One returned by `ReadTx` and one returned by `WriteTx`. The concrete table consists of +// of the opened `redb` tables. +// 3. An `impl` block which contains all the read-only methods for the table. The implementation is generic over the +// `ReadableTable` trait, which means the methods are callable on both `ReadTx`s and `WriteTx`s. +// 4. An `impl` block which contains all the write-only methods for the table. The implementation uses the concrete +// mutable `Table` types and thus is only callable on a `WriteTx`. + +// blocks: view -> block +// blocks_hash_index: hash -> view +// blocks_height_index: height -> [view] +// block_is_canonical: view -> bool +const BLOCKS: TableDefinition> = TableDefinition::new("blocks"); +const BLOCKS_HASH_INDEX: TableDefinition<&[u8; 32], u64> = + TableDefinition::new("blocks_hash_index"); +const BLOCKS_HEIGHT_INDEX: MultimapTableDefinition = + MultimapTableDefinition::new("blocks_height_index"); +const BLOCK_IS_CANONICAL: TableDefinition = TableDefinition::new("block_is_canonical"); + +pub struct BlocksTable { + blocks: T1, + blocks_hash_index: T2, + blocks_height_index: T3, + block_is_canonical: T4, +} + +impl BlocksTable +where + T1: ReadableTable>, + T2: ReadableTable<&'static [u8; 32], u64>, + T3: ReadableMultimapTable, + T4: ReadableTable, +{ + pub fn by_view(&self, view: u64) -> Result> { + let Some(block) = self.blocks.get(view)? else { + return Ok(None); + }; + Ok(Some(bincode().deserialize(&block.value())?)) + } + + pub fn max_canonical_by_view(&self) -> Result> { + // Search the `block_is_canonical` table in reverse until we find the canonical block with the maximum view. + for kv in self.block_is_canonical.iter()?.rev() { + let (view, canonical) = kv?; + if canonical.value() { + let view = view.value(); + return self.by_view(view); + } + } + + // There are no canonical blocks. + Ok(None) + } + + pub fn min_by_view(&self) -> Result> { + let Some((_, block)) = self.blocks.first()? else { + return Ok(None); + }; + Ok(Some(bincode().deserialize(&block.value())?)) + } + + pub fn max_canonical_by_view_count(&self, count: usize) -> Result> { + let mut blocks = Vec::with_capacity(count); + for kv in self.block_is_canonical.iter()?.rev() { + let (view, canonical) = kv?; + if canonical.value() { + let view = view.value(); + blocks.push(self.by_view(view)?.ok_or(anyhow!("missing block"))?); + } + if blocks.len() == count { + break; + } + } + Ok(blocks) + } + + pub fn by_hash(&self, hash: Hash) -> Result> { + let Some(view) = self.blocks_hash_index.get(&hash.0)? else { + return Ok(None); + }; + self.by_view(view.value()) + } + + pub fn canonical_by_height(&self, height: u64) -> Result> { + for view in self.blocks_height_index.get(height)? { + // Check if this block is canonical. + let view = view?.value(); + let canonical = self + .block_is_canonical + .get(view)? + .ok_or(anyhow!("missing canonical"))? + .value(); + if canonical { + return self.by_view(view); + } + } + + Ok(None) + } + + pub fn contains(&self, view: u64) -> Result { + Ok(self.blocks.get(view)?.is_some()) + } + + pub fn iter(&self) -> Result> + '_> { + Ok(self + .blocks + .iter()? + .map(|b| Ok(bincode().deserialize(&b?.1.value())?))) + } +} + +impl + BlocksTable< + Table<'_, u64, Vec>, + Table<'_, &[u8; 32], u64>, + MultimapTable<'_, u64, u64>, + Table<'_, u64, bool>, + > +{ + pub fn insert(&mut self, block: &Block) -> Result<()> { + self.blocks + .insert(block.view(), bincode().serialize(block)?)?; + self.blocks_hash_index + .insert(&block.hash().0, block.view())?; + self.blocks_height_index + .insert(block.number(), block.view())?; + self.block_is_canonical.insert(block.view(), true)?; + Ok(()) + } + + pub fn delete(&mut self, view: u64) -> Result<()> { + let Some(block) = self.blocks.remove(view)? else { + return Ok(()); + }; + let block: Block = bincode().deserialize(&block.value())?; + self.blocks_hash_index.remove(&block.hash().0)?; + self.blocks_height_index + .remove(block.number(), block.view())?; + self.block_is_canonical.remove(block.view())?; + Ok(()) + } + + pub fn set_canonical(&mut self, view: u64) -> Result<()> { + self.block_is_canonical.insert(view, true)?; + Ok(()) + } + + pub fn set_non_canonical(&mut self, view: u64) -> Result<()> { + self.block_is_canonical.insert(view, false)?; + Ok(()) + } +} + +const TRANSACTIONS: TableDefinition<&[u8; 32], Vec> = TableDefinition::new("transactions"); + +pub struct TransactionsTable(T); + +impl>> TransactionsTable { + pub fn get(&self, hash: Hash) -> Result> { + let Some(txn) = self.0.get(&hash.0)? else { + return Ok(None); + }; + Ok(Some(bincode().deserialize(&txn.value())?)) + } + + pub fn contains(&self, hash: Hash) -> Result { + Ok(self.0.get(&hash.0)?.is_some()) + } +} + +impl TransactionsTable { + pub fn count(&self) -> Result { + Ok(self.0.len()?) + } +} + +impl TransactionsTable>> { + pub fn insert(&mut self, hash: Hash, txn: &SignedTransaction) -> Result<()> { + self.0.insert(&hash.0, bincode().serialize(&txn)?)?; + Ok(()) + } + + fn delete(&mut self, hash: Hash) -> Result<()> { + self.0.remove(&hash.0)?; + Ok(()) + } +} + +const RECEIPTS: TableDefinition<&[u8; 32], Vec> = TableDefinition::new("receipts"); + +pub struct ReceiptsTable(T); + +impl>> ReceiptsTable { + pub fn get(&self, hash: Hash) -> Result> { + let Some(txn) = self.0.get(&hash.0)? else { + return Ok(None); + }; + Ok(Some(bincode().deserialize(&txn.value())?)) + } +} + +impl ReceiptsTable>> { + pub fn insert(&mut self, receipt: &TransactionReceipt) -> Result<()> { + self.0 + .insert(&receipt.tx_hash.0, bincode().serialize(receipt)?)?; + Ok(()) + } + + fn delete(&mut self, hash: Hash) -> Result<()> { + self.0.remove(&hash.0)?; + Ok(()) + } +} + +// touched_address_index: address -> [(index, txn_hash)] +// The index of each entry is contiguous. This ensures values are returned in the same order they were inserted. +// touched_address_reverse_index: txn_hash -> (index, address) +const TOUCHED_ADDRESS_INDEX: MultimapTableDefinition<&[u8; 20], (u64, &[u8; 32])> = + MultimapTableDefinition::new("touched_address_index"); +const TOUCHED_ADDRESS_REVERSE_INDEX: TableDefinition<&[u8; 32], (u64, &[u8; 20])> = + TableDefinition::new("touched_address_reverse_index"); + +pub struct TouchedAddressIndex { + index: T1, + reverse_index: T2, +} + +impl TouchedAddressIndex +where + T1: ReadableMultimapTable<&'static [u8; 20], (u64, &'static [u8; 32])>, + T2: ReadableTable<&'static [u8; 32], (u64, &'static [u8; 20])>, +{ + pub fn get(&self, address: Address) -> Result> { + let hashes = self.index.get(&<[u8; 20]>::from(address))?; + hashes + .map(|hash| Ok(Hash(*hash?.value().1))) + .collect::>() + } +} + +impl + TouchedAddressIndex< + MultimapTable<'_, &[u8; 20], (u64, &[u8; 32])>, + Table<'_, &[u8; 32], (u64, &[u8; 20])>, + > +{ + pub fn insert(&mut self, address: Address, txn_hash: Hash) -> Result<()> { + let key = &<[u8; 20]>::from(address); + let next_index = self + .index + .get(key)? + .next_back() + .map(|value| Ok::<_, redb::Error>(value?.value().0 + 1)) + .transpose()? + .unwrap_or(0); + + self.index.insert(key, (next_index, &txn_hash.0))?; + self.reverse_index.insert(&txn_hash.0, (next_index, key))?; + Ok(()) + } + + pub fn delete_by_txn_hash(&mut self, txn_hash: Hash) -> Result<()> { + let Some(value) = self.reverse_index.remove(&txn_hash.0)? else { + return Ok(()); + }; + let (index, address) = value.value(); + self.index.remove(address, (index, &txn_hash.0))?; + Ok(()) + } +} + +const FINALIZED_VIEW: TableDefinition<(), u64> = TableDefinition::new("finalized_view"); + +pub struct FinalizedViewTable(T); + +impl> FinalizedViewTable { + pub fn get(&self) -> Result> { + Ok(self.0.get(())?.map(|v| v.value())) + } +} + +impl FinalizedViewTable> { + pub fn set(&mut self, finalized_view: u64) -> Result<()> { + self.0.insert((), finalized_view)?; + Ok(()) + } +} + +const VIEW: TableDefinition<(), u64> = TableDefinition::new("view"); + +pub struct ViewTable(T); + +impl> ViewTable { + pub fn get(&self) -> Result> { + Ok(self.0.get(())?.map(|v| v.value())) + } +} + +impl ViewTable> { + /// Sets the provided view if it is greater than the existing view. Returns true if the value was updated. + pub fn set(&mut self, view: u64) -> Result { + let current = self.get()?; + let update = current.map(|c| view > c).unwrap_or(true); + if update { + self.0.insert((), view)?; + } + Ok(update) + } +} + +const HIGH_QC: TableDefinition<(), (Vec, u64, u32)> = TableDefinition::new("high_qc"); + +pub struct HighQcTable(T); + +impl, u64, u32)>> HighQcTable { + pub fn get(&self) -> Result> { + let Some(value) = self.0.get(())? else { + return Ok(None); + }; + let (high_qc, updated_at_secs, updated_at_subsec_nanos) = value.value(); + let high_qc = bincode().deserialize(&high_qc)?; + let high_qc_updated_at = + SystemTime::UNIX_EPOCH + Duration::new(updated_at_secs, updated_at_subsec_nanos); + Ok(Some((high_qc, high_qc_updated_at))) + } +} + +impl HighQcTable, u64, u32)>> { + pub fn set(&mut self, high_qc: &QuorumCertificate) -> Result<()> { + let high_qc = bincode().serialize(high_qc)?; + let high_qc_updated_at = SystemTime::now().duration_since(SystemTime::UNIX_EPOCH)?; + self.0.insert( + (), + ( + high_qc, + high_qc_updated_at.as_secs(), + high_qc_updated_at.subsec_nanos(), + ), + )?; + Ok(()) + } + + pub fn set_with_updated_at( + &mut self, + high_qc: &QuorumCertificate, + updated_at: SystemTime, + ) -> Result<()> { + let high_qc = bincode().serialize(high_qc)?; + let high_qc_updated_at = updated_at.duration_since(SystemTime::UNIX_EPOCH)?; + self.0.insert( + (), + ( + high_qc, + high_qc_updated_at.as_secs(), + high_qc_updated_at.subsec_nanos(), + ), + )?; + Ok(()) + } +} + +const STATE_TRIE: TableDefinition<&[u8; 32], Vec> = TableDefinition::new("state_trie"); + +pub struct StateTrieTable(T); + +impl>> StateTrieTable { + pub fn get(&self, key: &[u8]) -> Result>> { + Ok(self.0.get(&<[u8; 32]>::try_from(key)?)?.map(|v| v.value())) + } +} + +impl StateTrieTable>> { + pub fn insert(&mut self, key: &[u8], value: &Vec) -> Result<()> { + self.0.insert(&<[u8; 32]>::try_from(key)?, value)?; + Ok(()) + } +} + +fn bincode() -> DefaultOptions { + // | Byte limit | Endianness | Int Encoding | Trailing Behavior | + // |------------|------------|--------------|-------------------| + // | Unlimited | Little | Varint | Reject | + DefaultOptions::new() +} + +impl Db { + /// Begin a read transaction. + /// + /// Captures a snapshot of the database, so that only data committed before calling this method is visible in the + /// transaction. + /// + /// Read transactions may exist concurrently with writes. + pub fn read(&self) -> Result { + Ok(TxRead(self.db.begin_read()?)) + } + + /// Begin a write transaction. + /// + /// Only a single write may be in progress at a time. If a write is in progress, this function will block until it + /// completes. + /// + /// You must call `[TxWrite::commit]` to persist the writes performed in this transaction. After committting, all + /// writes will be visible to future transactions. + pub fn write(&self) -> Result { + Ok(TxWrite(self.db.begin_write()?)) + } +} + +pub struct TxRead(ReadTransaction); + +impl TxRead { + pub fn blocks( + &self, + ) -> Result< + BlocksTable< + ReadOnlyTable>, + ReadOnlyTable<&'static [u8; 32], u64>, + ReadOnlyMultimapTable, + ReadOnlyTable, + >, + > { + Ok(BlocksTable { + blocks: self.0.open_table(BLOCKS)?, + blocks_hash_index: self.0.open_table(BLOCKS_HASH_INDEX)?, + blocks_height_index: self.0.open_multimap_table(BLOCKS_HEIGHT_INDEX)?, + block_is_canonical: self.0.open_table(BLOCK_IS_CANONICAL)?, + }) + } + pub fn transactions( + &self, + ) -> Result>>> { + Ok(TransactionsTable(self.0.open_table(TRANSACTIONS)?)) + } + pub fn receipts(&self) -> Result>>> { + Ok(ReceiptsTable(self.0.open_table(RECEIPTS)?)) + } + pub fn touched_address_index( + &self, + ) -> Result< + TouchedAddressIndex< + ReadOnlyMultimapTable<&'static [u8; 20], (u64, &'static [u8; 32])>, + ReadOnlyTable<&'static [u8; 32], (u64, &'static [u8; 20])>, + >, + > { + Ok(TouchedAddressIndex { + index: self.0.open_multimap_table(TOUCHED_ADDRESS_INDEX)?, + reverse_index: self.0.open_table(TOUCHED_ADDRESS_REVERSE_INDEX)?, + }) + } + pub fn finalized_view(&self) -> Result>> { + Ok(FinalizedViewTable(self.0.open_table(FINALIZED_VIEW)?)) + } + pub fn view(&self) -> Result>> { + Ok(ViewTable(self.0.open_table(VIEW)?)) + } + pub fn high_qc(&self) -> Result, u64, u32)>>> { + Ok(HighQcTable(self.0.open_table(HIGH_QC)?)) + } + pub fn state_trie(&self) -> Result>>> { + Ok(StateTrieTable(self.0.open_table(STATE_TRIE)?)) + } +} + +pub struct TxWrite(WriteTransaction); + +impl TxWrite { + pub fn commit(self) -> Result<()> { + self.0.commit()?; + Ok(()) + } + + pub fn blocks( + &self, + ) -> Result< + BlocksTable< + Table>, + Table<&'static [u8; 32], u64>, + MultimapTable, + Table, + >, + > { + Ok(BlocksTable { + blocks: self.0.open_table(BLOCKS)?, + blocks_hash_index: self.0.open_table(BLOCKS_HASH_INDEX)?, + blocks_height_index: self.0.open_multimap_table(BLOCKS_HEIGHT_INDEX)?, + block_is_canonical: self.0.open_table(BLOCK_IS_CANONICAL)?, + }) + } + pub fn transactions(&self) -> Result>>> { + Ok(TransactionsTable(self.0.open_table(TRANSACTIONS)?)) + } + pub fn receipts(&self) -> Result>>> { + Ok(ReceiptsTable(self.0.open_table(RECEIPTS)?)) + } + pub fn touched_address_index( + &self, + ) -> Result< + TouchedAddressIndex< + MultimapTable<&'static [u8; 20], (u64, &'static [u8; 32])>, + Table<&'static [u8; 32], (u64, &'static [u8; 20])>, + >, + > { + Ok(TouchedAddressIndex { + index: self.0.open_multimap_table(TOUCHED_ADDRESS_INDEX)?, + reverse_index: self.0.open_table(TOUCHED_ADDRESS_REVERSE_INDEX)?, + }) + } + pub fn finalized_view(&self) -> Result>> { + Ok(FinalizedViewTable(self.0.open_table(FINALIZED_VIEW)?)) + } + pub fn view(&self) -> Result>> { + Ok(ViewTable(self.0.open_table(VIEW)?)) + } + pub fn high_qc(&self) -> Result, u64, u32)>>> { + Ok(HighQcTable(self.0.open_table(HIGH_QC)?)) + } + pub fn state_trie(&self) -> Result>>> { + Ok(StateTrieTable(self.0.open_table(STATE_TRIE)?)) + } + + /// Ensure all tables are created. + pub fn create_all(&self) -> Result<()> { + self.blocks()?; + self.transactions()?; + self.receipts()?; + self.touched_address_index()?; + self.finalized_view()?; + self.view()?; + self.high_qc()?; + self.state_trie()?; + Ok(()) + } + + /// Convenience method for deleting all references to a transaction. + pub fn delete_transaction(&self, txn_hash: Hash) -> Result<()> { + self.transactions()?.delete(txn_hash)?; + self.receipts()?.delete(txn_hash)?; + self.touched_address_index()?.delete_by_txn_hash(txn_hash)?; + Ok(()) + } +} diff --git a/zilliqa/src/exec.rs b/zilliqa/src/exec.rs index 93655b64a..4fc8df4d5 100644 --- a/zilliqa/src/exec.rs +++ b/zilliqa/src/exec.rs @@ -1208,9 +1208,11 @@ impl PendingState { } pub fn get_highest_canonical_block_number(&self) -> Result> { - self.pre_state + Ok(self + .pre_state .block_store - .get_highest_canonical_block_number() + .get_highest_block()? + .map(|b| b.number())) } pub fn load_account(&mut self, address: Address) -> Result<&mut PendingAccount> { diff --git a/zilliqa/src/node.rs b/zilliqa/src/node.rs index 5eb0db947..5abc9663b 100644 --- a/zilliqa/src/node.rs +++ b/zilliqa/src/node.rs @@ -191,7 +191,7 @@ impl Node { let db = Arc::new(Db::new( config.data_dir.as_ref(), config.eth_chain_id, - config.state_cache_size, + config.cache_size, )?); let node = Node { config: config.clone(), @@ -445,10 +445,11 @@ impl Node { BlockNumberOrTag::Latest => Ok(Some(self.consensus.head_block())), BlockNumberOrTag::Pending => self.consensus.get_pending_block(), BlockNumberOrTag::Finalized => { - let Some(view) = self.db.get_finalized_view()? else { + let read = self.db.read()?; + let Some(view) = read.finalized_view()?.get()? else { return self.resolve_block_number(BlockNumberOrTag::Earliest); }; - let Some(block) = self.db.get_block_by_view(view)? else { + let Some(block) = read.blocks()?.by_view(view)? else { return self.resolve_block_number(BlockNumberOrTag::Earliest); }; Ok(Some(block)) @@ -873,13 +874,6 @@ impl Node { self.consensus.head_block().header.number } - pub fn get_transaction_receipts_in_block( - &self, - block_hash: Hash, - ) -> Result> { - self.db.get_transaction_receipts_in_block(&block_hash) - } - pub fn get_finalized_height(&self) -> Result { self.consensus.get_finalized_view() } diff --git a/zilliqa/src/pool.rs b/zilliqa/src/pool.rs index 32b71d908..7260be78c 100644 --- a/zilliqa/src/pool.rs +++ b/zilliqa/src/pool.rs @@ -411,7 +411,7 @@ mod tests { block_store::BlockStore, cfg::NodeConfig, crypto::Hash, - db::Db, + db::{ArcDb, Db}, node::{MessageSender, RequestId}, state::State, transaction::{EvmGas, SignedTransaction, TxIntershard, VerifiedTransaction}, diff --git a/zilliqa/src/state.rs b/zilliqa/src/state.rs index 059efe9db..20329e07b 100644 --- a/zilliqa/src/state.rs +++ b/zilliqa/src/state.rs @@ -587,7 +587,7 @@ mod tests { api::to_hex::ToHex, block_store::BlockStore, cfg::NodeConfig, - db::Db, + db::{ArcDb, Db}, message::BlockHeader, node::{MessageSender, RequestId}, }; diff --git a/zilliqa/tests/it/main.rs b/zilliqa/tests/it/main.rs index 9be342e59..1ea0ffb55 100644 --- a/zilliqa/tests/it/main.rs +++ b/zilliqa/tests/it/main.rs @@ -66,11 +66,12 @@ use zilliqa::{ api, cfg::{ allowed_timestamp_skew_default, block_request_batch_size_default, - block_request_limit_default, eth_chain_id_default, failed_request_sleep_duration_default, - max_blocks_in_flight_default, max_rpc_response_size_default, scilla_address_default, - scilla_ext_libs_path_default, scilla_stdlib_dir_default, state_cache_size_default, - state_rpc_limit_default, total_native_token_supply_default, Amount, ApiServer, Checkpoint, - ConsensusConfig, ContractUpgradesBlockHeights, Forks, GenesisDeposit, NodeConfig, + block_request_limit_default, cache_size_default, eth_chain_id_default, + failed_request_sleep_duration_default, max_blocks_in_flight_default, + max_rpc_response_size_default, scilla_address_default, scilla_ext_libs_path_default, + scilla_stdlib_dir_default, state_rpc_limit_default, total_native_token_supply_default, + Amount, ApiServer, Checkpoint, ConsensusConfig, ContractUpgradesBlockHeights, Forks, + GenesisDeposit, NodeConfig, }, crypto::{SecretKey, TransactionPublicKey}, db, @@ -359,7 +360,7 @@ impl Network { }], allowed_timestamp_skew: allowed_timestamp_skew_default(), data_dir: None, - state_cache_size: state_cache_size_default(), + cache_size: cache_size_default(), load_checkpoint: None, do_checkpoints, block_request_limit: block_request_limit_default(), @@ -463,7 +464,7 @@ impl Network { }], allowed_timestamp_skew: allowed_timestamp_skew_default(), data_dir: None, - state_cache_size: state_cache_size_default(), + cache_size: cache_size_default(), load_checkpoint: options.checkpoint.clone(), do_checkpoints: self.do_checkpoints, consensus: ConsensusConfig {