diff --git a/Cargo.lock b/Cargo.lock index ef891b1e1..06c4aa6a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1151,15 +1151,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "build-array" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67ef4e2687af237b2646687e19a0643bc369878216122e46c3f1a01c56baa9d5" -dependencies = [ - "arrayvec", -] - [[package]] name = "bumpalo" version = "3.16.0" @@ -5136,7 +5127,6 @@ dependencies = [ "assert2", "bitflags 2.6.0", "bitvec", - "build-array", "bytes", "camino", "ciborium", diff --git a/trace_decoder/Cargo.toml b/trace_decoder/Cargo.toml index 5bdd24f12..4b8d8e7bc 100644 --- a/trace_decoder/Cargo.toml +++ b/trace_decoder/Cargo.toml @@ -15,7 +15,6 @@ alloy-compat = "0.1.0" anyhow.workspace = true bitflags.workspace = true bitvec.workspace = true -build-array = "0.1.2" bytes.workspace = true ciborium.workspace = true ciborium-io.workspace = true diff --git a/trace_decoder/src/core.rs b/trace_decoder/src/core.rs index 46495030f..8093098c1 100644 --- a/trace_decoder/src/core.rs +++ b/trace_decoder/src/core.rs @@ -4,12 +4,11 @@ use std::{ mem, }; -use alloy_compat::Compat as _; use anyhow::{anyhow, bail, ensure, Context as _}; use either::Either; -use ethereum_types::{Address, U256}; +use ethereum_types::{Address, BigEndianHash as _, U256}; use evm_arithmetization::{ - generation::{mpt::AccountRlp, TrieInputs}, + generation::TrieInputs, proof::{BlockMetadata, TrieRoots}, GenerationInputs, }; @@ -21,10 +20,11 @@ use zk_evm_common::gwei_to_wei; use crate::{ observer::{DummyObserver, Observer}, - tries::StateSmt, + world::Type2World, }; use crate::{ - tries::{MptKey, ReceiptTrie, StateMpt, StateTrie, StorageTrie, TransactionTrie}, + tries::{MptKey, ReceiptTrie, StateMpt, StorageTrie, TransactionTrie}, + world::{Type1World, World}, BlockLevelData, BlockTrace, BlockTraceTriePreImages, CombinedPreImages, ContractCodeUsage, OtherBlockData, SeparateStorageTriesPreImage, SeparateTriePreImage, SeparateTriePreImages, TxnInfo, TxnMeta, TxnTrace, @@ -46,7 +46,7 @@ pub fn entrypoint( trace: BlockTrace, other: OtherBlockData, batch_size_hint: usize, - observer: &mut impl Observer, + observer: &mut impl Observer, wire_disposition: WireDisposition, ) -> anyhow::Result> { ensure!(batch_size_hint != 0); @@ -61,7 +61,7 @@ pub fn entrypoint( BlockTraceTriePreImages::Separate(_) => FatalMissingCode(true), BlockTraceTriePreImages::Combined(_) => FatalMissingCode(false), }; - let (state, storage, mut code) = start(trie_pre_images, wire_disposition)?; + let (world, mut code) = start(trie_pre_images, wire_disposition)?; code.extend(code_db); @@ -82,11 +82,10 @@ pub fn entrypoint( *amt = gwei_to_wei(*amt) } - let batches = match state { - Either::Left(mpt) => Either::Left( + let batches = match world { + Either::Left(type1world) => Either::Left( middle( - mpt, - storage, + type1world, batch(txn_info, batch_size_hint), &mut code, &b_meta, @@ -98,11 +97,10 @@ pub fn entrypoint( .into_iter() .map(|it| it.map(Either::Left)), ), - Either::Right(smt) => { + Either::Right(type2world) => { Either::Right( middle( - smt, - storage, + type2world, batch(txn_info, batch_size_hint), &mut code, &b_meta, @@ -128,42 +126,43 @@ pub fn entrypoint( byte_code, before: IntraBlockTries { - state, - storage, + world, transaction, receipt, }, after, withdrawals, - }| GenerationInputs { - txn_number_before: first_txn_ix.into(), - gas_used_before: running_gas_used.into(), - gas_used_after: { - running_gas_used += gas_used; - running_gas_used.into() - }, - signed_txns: byte_code.into_iter().map(Into::into).collect(), - withdrawals, - ger_data, - tries: TrieInputs { - state_trie: match state { - Either::Left(mpt) => mpt.into(), - Either::Right(_) => todo!("evm_arithmetization accepts an SMT"), + }| { + let (state, storage) = world + .expect_left("TODO(0xaatif): evm_arithemetization accepts an SMT") + .into_state_and_storage(); + GenerationInputs { + txn_number_before: first_txn_ix.into(), + gas_used_before: running_gas_used.into(), + gas_used_after: { + running_gas_used += gas_used; + running_gas_used.into() }, - transactions_trie: transaction.into(), - receipts_trie: receipt.into(), - storage_tries: storage.into_iter().map(|(k, v)| (k, v.into())).collect(), - }, - trie_roots_after: after, - checkpoint_state_trie_root, - checkpoint_consolidated_hash, - contract_code: contract_code - .into_iter() - .map(|it| (keccak_hash::keccak(&it), it)) - .collect(), - block_metadata: b_meta.clone(), - block_hashes: b_hashes.clone(), - burn_addr, + signed_txns: byte_code.into_iter().map(Into::into).collect(), + withdrawals, + ger_data, + tries: TrieInputs { + state_trie: state.into(), + transactions_trie: transaction.into(), + receipts_trie: receipt.into(), + storage_tries: storage.into_iter().map(|(k, v)| (k, v.into())).collect(), + }, + trie_roots_after: after, + checkpoint_state_trie_root, + checkpoint_consolidated_hash, + contract_code: contract_code + .into_iter() + .map(|it| (keccak_hash::keccak(&it), it)) + .collect(), + block_metadata: b_meta.clone(), + block_hashes: b_hashes.clone(), + burn_addr, + } }, ) .collect()) @@ -178,11 +177,7 @@ pub fn entrypoint( fn start( pre_images: BlockTraceTriePreImages, wire_disposition: WireDisposition, -) -> anyhow::Result<( - Either, - BTreeMap, - Hash2Code, -)> { +) -> anyhow::Result<(Either, Hash2Code)> { Ok(match pre_images { // TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/401 // refactor our convoluted input types @@ -190,27 +185,26 @@ fn start( state: SeparateTriePreImage::Direct(state), storage: SeparateStorageTriesPreImage::MultipleTries(storage), }) => { - let state = state.items().try_fold( - StateMpt::default(), - |mut acc, (nibbles, hash_or_val)| { - let path = MptKey::from_nibbles(nibbles); - match hash_or_val { - mpt_trie::trie_ops::ValOrHash::Val(bytes) => { - #[expect(deprecated)] // this is MPT specific - acc.insert_by_hashed_address( - path.into_hash() - .context("invalid path length in direct state trie")?, - rlp::decode(&bytes) - .context("invalid AccountRlp in direct state trie")?, - )?; - } - mpt_trie::trie_ops::ValOrHash::Hash(h) => { - acc.insert_hash_by_key(path, h)?; - } - }; - anyhow::Ok(acc) - }, - )?; + let state = + state + .items() + .try_fold(StateMpt::new(), |mut acc, (nibbles, hash_or_val)| { + let path = MptKey::from_nibbles(nibbles); + match hash_or_val { + mpt_trie::trie_ops::ValOrHash::Val(bytes) => { + acc.insert( + path.into_hash() + .context("invalid path length in direct state trie")?, + rlp::decode(&bytes) + .context("invalid AccountRlp in direct state trie")?, + )?; + } + mpt_trie::trie_ops::ValOrHash::Hash(h) => { + acc.insert_hash(path, h)?; + } + }; + anyhow::Ok(acc) + })?; let storage = storage .into_iter() .map(|(k, SeparateTriePreImage::Direct(v))| { @@ -230,12 +224,15 @@ fn start( .map(|v| (k, v)) }) .collect::>()?; - (Either::Left(state), storage, Hash2Code::new()) + ( + Either::Left(Type1World::new(state, storage)?), + Hash2Code::new(), + ) } BlockTraceTriePreImages::Combined(CombinedPreImages { compact }) => { let instructions = crate::wire::parse(&compact) .context("couldn't parse instructions from binary format")?; - let (state, storage, code) = match wire_disposition { + match wire_disposition { WireDisposition::Type1 => { let crate::type1::Frontend { state, @@ -243,22 +240,19 @@ fn start( code, } = crate::type1::frontend(instructions)?; ( - Either::Left(state), - storage, + Either::Left(Type1World::new(state, storage)?), Hash2Code::from_iter(code.into_iter().map(NonEmpty::into_vec)), ) } WireDisposition::Type2 => { - let crate::type2::Frontend { trie, code } = + let crate::type2::Frontend { world: trie, code } = crate::type2::frontend(instructions)?; ( Either::Right(trie), - BTreeMap::new(), Hash2Code::from_iter(code.into_iter().map(NonEmpty::into_vec)), ) } - }; - (state, storage, code) + } } }) } @@ -358,24 +352,21 @@ impl Batch { /// [`evm_arithmetization::generation::TrieInputs`], /// generic over state trie representation. #[derive(Debug)] -pub struct IntraBlockTries { - pub state: StateTrieT, - pub storage: BTreeMap, +pub struct IntraBlockTries { + pub world: WorldT, pub transaction: TransactionTrie, pub receipt: ReceiptTrie, } impl IntraBlockTries { - fn map(self, mut f: impl FnMut(T) -> U) -> IntraBlockTries { + fn map(self, f: impl FnOnce(T) -> U) -> IntraBlockTries { let Self { - state, - storage, + world, transaction, receipt, } = self; IntraBlockTries { - state: f(state), - storage, + world: f(world), transaction, receipt, } @@ -391,11 +382,9 @@ pub struct FatalMissingCode(pub bool); /// Does the main work mentioned in the [module documentation](super). #[allow(clippy::too_many_arguments)] -fn middle( +fn middle( // state at the beginning of the block - mut state_trie: StateTrieT, - // storage at the beginning of the block - mut storage_tries: BTreeMap, + mut world: WorldT, // None represents a dummy transaction that should not increment the transaction index // all batches SHOULD not be empty batches: Vec>>, @@ -406,25 +395,11 @@ fn middle( mut withdrawals: Vec<(Address, U256)>, fatal_missing_code: FatalMissingCode, // called with the untrimmed tries after each batch - observer: &mut impl Observer, -) -> anyhow::Result>> + observer: &mut impl Observer, +) -> anyhow::Result>> where - StateTrieT::Key: Ord + From
, + WorldT::SubtriePath: Ord + From
, { - // Initialise the storage tries. - for (haddr, acct) in state_trie.iter() { - let storage = storage_tries.entry(haddr).or_insert({ - let mut it = StorageTrie::default(); - it.insert_hash(MptKey::default(), acct.storage_root) - .expect("empty trie insert cannot fail"); - it - }); - ensure!( - storage.root() == acct.storage_root, - "inconsistent initial storage for hashed address {haddr:x}" - ) - } - // These are the per-block tries. let mut transaction_trie = TransactionTrie::new(); let mut receipt_trie = ReceiptTrie::new(); @@ -441,26 +416,24 @@ where let mut batch_contract_code = BTreeSet::from([vec![]]); // always include empty code let mut before = IntraBlockTries { - state: state_trie.clone(), + world: world.clone(), transaction: transaction_trie.clone(), receipt: receipt_trie.clone(), - storage: storage_tries.clone(), }; // We want to perform mask the TrieInputs above, // but won't know the bounds until after the loop below, // so store that information here. let mut storage_masks = BTreeMap::<_, BTreeSet>::new(); - let mut state_mask = BTreeSet::::new(); + let mut state_mask = BTreeSet::::new(); if txn_ix == 0 { do_pre_execution( block, ger_data, - &mut storage_tries, &mut storage_masks, &mut state_mask, - &mut state_trie, + &mut world, )?; } @@ -510,28 +483,12 @@ where .map_err(|e| anyhow!("{e:?}")) .context(format!("couldn't decode receipt in txn {tx_hash:x}"))?; - let (mut acct, born) = state_trie - .get_by_address(addr) - .map(|acct| (acct, false)) - .unwrap_or((AccountRlp::default(), true)); + let born = !world.contains(addr)?; if born { // Empty accounts cannot have non-empty storage, // so we can safely insert a default trie. - storage_tries.insert(keccak_hash::keccak(addr), StorageTrie::default()); - } - - if born || just_access { - state_trie - .clone() - .insert_by_address(addr, acct) - .context(format!( - "couldn't reach state of {} address {addr:x} in txn {tx_hash:x}", - match born { - true => "created", - false => "accessed", - } - ))?; + world.create_storage(addr)? } let do_writes = !just_access @@ -551,71 +508,60 @@ where ); if do_writes { - acct.balance = balance.unwrap_or(acct.balance); - acct.nonce = nonce.unwrap_or(acct.nonce); - acct.code_hash = code_usage - .map(|it| match it { + if let Some(new) = balance { + world.update_balance(addr, |it| *it = new)? + } + if let Some(new) = nonce { + world.update_nonce(addr, |it| *it = new)? + } + if let Some(usage) = code_usage { + match usage { ContractCodeUsage::Read(hash) => { // TODO(Nashtare): https://github.com/0xPolygonZero/zk_evm/issues/700 - // This is a bug in the zero tracer, which shouldn't be giving us - // this read at all. Workaround for now. + // This is a bug in the zero tracer, + // which shouldn't be giving us this read at all. + // Workaround for now. + // The fix should involve removing the `Either` + // below. match (fatal_missing_code, code.get(hash)) { (FatalMissingCode(true), None) => { bail!("no code for hash {hash:x}") } (_, Some(byte_code)) => { + world.set_code(addr, Either::Left(&byte_code))?; batch_contract_code.insert(byte_code); } - (_, None) => { - log::warn!("no code for {hash:x}") - } + (_, None) => world.set_code(addr, Either::Right(hash))?, } - - anyhow::Ok(hash) } ContractCodeUsage::Write(bytes) => { code.insert(bytes.clone()); - let hash = keccak_hash::keccak(&bytes); + world.set_code(addr, Either::Left(&bytes))?; batch_contract_code.insert(bytes); - Ok(hash) } - }) - .transpose()? - .unwrap_or(acct.code_hash); - - if !storage_written.is_empty() { - let storage = match born { - true => storage_tries.entry(keccak_hash::keccak(addr)).or_default(), - false => storage_tries.get_mut(&keccak_hash::keccak(addr)).context( - format!( - "missing storage trie for address {addr:x} in txn {tx_hash:x}" - ), - )?, }; + } + if !storage_written.is_empty() { for (k, v) in storage_written { - let slot = MptKey::from_hash(keccak_hash::keccak(k)); match v.is_zero() { // this is actually a delete - true => storage_mask.extend(storage.reporting_remove(slot)?), - false => { - storage.insert(slot, rlp::encode(&v).to_vec())?; - } + true => storage_mask + .extend(world.reporting_destroy_slot(addr, k.into_uint())?), + false => world.store_int(addr, k.into_uint(), v)?, } } - acct.storage_root = storage.root(); } - state_trie.insert_by_address(addr, acct)?; - state_mask.insert(::from(addr)); + state_mask.insert(::from(addr)); } else { // Simple state access - state_mask.insert(::from(addr)); + state_mask.insert(::from(addr)); } if self_destructed { - storage_tries.remove(&keccak_hash::keccak(addr)); - state_mask.extend(state_trie.reporting_remove(addr)?) + world.destroy_storage(addr)?; + state_mask.extend(world.reporting_destroy(addr)?) } } @@ -633,41 +579,22 @@ where withdrawals: match loop_ix == loop_len { true => { for (addr, amt) in &withdrawals { - state_mask.insert(::from(*addr)); - let mut acct = state_trie - .get_by_address(*addr) - .context(format!("missing address {addr:x} for withdrawal"))?; - acct.balance += *amt; - state_trie - .insert_by_address(*addr, acct) - // TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/275 - // Add an entry API - .expect("insert must succeed with the same key as a successful `get`"); + state_mask.insert(::from(*addr)); + world.update_balance(*addr, |it| *it += *amt)?; } mem::take(&mut withdrawals) } false => vec![], }, before: { - before.state.mask(state_mask)?; + before.world.mask(state_mask)?; before.receipt.mask(batch_first_txn_ix..txn_ix)?; before.transaction.mask(batch_first_txn_ix..txn_ix)?; - - let keep = storage_masks - .keys() - .map(keccak_hash::keccak) - .collect::>(); - before.storage.retain(|haddr, _| keep.contains(haddr)); - - for (addr, mask) in storage_masks { - if let Some(it) = before.storage.get_mut(&keccak_hash::keccak(addr)) { - it.mask(mask)? - } // else must have self-destructed - } + before.world.mask_storage(storage_masks)?; before }, after: TrieRoots { - state_root: state_trie.root(), + state_root: world.root(), transactions_root: transaction_trie.root(), receipts_root: receipt_trie.root(), }, @@ -676,8 +603,7 @@ where observer.collect_tries( block.block_number, batch_index, - &state_trie, - &storage_tries, + &world, &transaction_trie, &receipt_trie, ) @@ -687,38 +613,29 @@ where } /// Performs all the pre-txn execution rules of the targeted network. -fn do_pre_execution( +fn do_pre_execution( block: &BlockMetadata, ger_data: Option<(H256, H256)>, - storage: &mut BTreeMap, trim_storage: &mut BTreeMap>, - trim_state: &mut BTreeSet, - state_trie: &mut StateTrieT, + trim_state: &mut BTreeSet, + world: &mut WorldT, ) -> anyhow::Result<()> where - StateTrieT::Key: From
+ Ord, + WorldT::SubtriePath: From
+ Ord, { // Ethereum mainnet: EIP-4788 if cfg!(feature = "eth_mainnet") { return do_beacon_hook( block.block_timestamp, - storage, trim_storage, block.parent_beacon_block_root, trim_state, - state_trie, + world, ); } if cfg!(feature = "cdk_erigon") { - return do_scalable_hook( - block, - ger_data, - storage, - trim_storage, - trim_state, - state_trie, - ); + return do_scalable_hook(block, ger_data, trim_storage, trim_state, world); } Ok(()) @@ -729,37 +646,30 @@ where /// /// This is Polygon-CDK-specific, and runs at the start of the block, /// before any transactions (as per the Etrog specification). -fn do_scalable_hook( +fn do_scalable_hook( block: &BlockMetadata, ger_data: Option<(H256, H256)>, - storage: &mut BTreeMap, trim_storage: &mut BTreeMap>, - trim_state: &mut BTreeSet, - state_trie: &mut StateTrieT, + trim_state: &mut BTreeSet, + world: &mut WorldT, ) -> anyhow::Result<()> where - StateTrieT::Key: From
+ Ord, + WorldT::SubtriePath: From
+ Ord, { use evm_arithmetization::testing_utils::{ - ADDRESS_SCALABLE_L2, ADDRESS_SCALABLE_L2_ADDRESS_HASHED, GLOBAL_EXIT_ROOT_ADDRESS, - GLOBAL_EXIT_ROOT_ADDRESS_HASHED, GLOBAL_EXIT_ROOT_STORAGE_POS, LAST_BLOCK_STORAGE_POS, - STATE_ROOT_STORAGE_POS, TIMESTAMP_STORAGE_POS, + ADDRESS_SCALABLE_L2, GLOBAL_EXIT_ROOT_ADDRESS, GLOBAL_EXIT_ROOT_STORAGE_POS, + LAST_BLOCK_STORAGE_POS, STATE_ROOT_STORAGE_POS, TIMESTAMP_STORAGE_POS, }; if block.block_number.is_zero() { return Err(anyhow!("Attempted to prove the Genesis block!")); } - let scalable_storage = storage - .get_mut(&ADDRESS_SCALABLE_L2_ADDRESS_HASHED) - .context("missing scalable contract storage trie")?; let scalable_trim = trim_storage.entry(ADDRESS_SCALABLE_L2).or_default(); - let timestamp_slot_key = MptKey::from_slot_position(U256::from(TIMESTAMP_STORAGE_POS.1)); + let timestamp = world + .load_int(ADDRESS_SCALABLE_L2, U256::from(TIMESTAMP_STORAGE_POS.1)) + .unwrap_or_default(); - let timestamp = scalable_storage - .get(×tamp_slot_key) - .map(rlp::decode::) - .unwrap_or(Ok(0.into()))?; let timestamp = core::cmp::max(timestamp, block.block_timestamp); // Store block number and largest timestamp @@ -770,38 +680,31 @@ where ] { let slot = MptKey::from_slot_position(ix); - // These values are never 0. - scalable_storage.insert(slot, alloy::rlp::encode(u.compat()))?; + ensure!(!u.is_zero()); + world.store_int(ADDRESS_SCALABLE_L2, ix, u)?; scalable_trim.insert(slot); } // Store previous block root hash - let prev_block_root_hash = state_trie.root(); + let prev_block_root_hash = world.root(); let mut arr = [0; 64]; (block.block_number - 1).to_big_endian(&mut arr[0..32]); U256::from(STATE_ROOT_STORAGE_POS.1).to_big_endian(&mut arr[32..64]); let slot = MptKey::from_hash(keccak_hash::keccak(arr)); - scalable_storage.insert(slot, alloy::rlp::encode(prev_block_root_hash.compat()))?; + world.store_hash( + ADDRESS_SCALABLE_L2, + keccak_hash::keccak(arr), + prev_block_root_hash, + )?; + scalable_trim.insert(slot); - trim_state.insert(::from(ADDRESS_SCALABLE_L2)); - let mut scalable_acct = state_trie - .get_by_address(ADDRESS_SCALABLE_L2) - .context("missing scalable contract address")?; - scalable_acct.storage_root = scalable_storage.root(); - state_trie - .insert_by_address(ADDRESS_SCALABLE_L2, scalable_acct) - // TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/275 - // Add an entry API - .expect("insert must succeed with the same key as a successful `get`"); + trim_state.insert(::from(ADDRESS_SCALABLE_L2)); // Update GER contract's storage if necessary if let Some((root, l1blockhash)) = ger_data { - let ger_storage = storage - .get_mut(&GLOBAL_EXIT_ROOT_ADDRESS_HASHED) - .context("missing GER contract storage trie")?; let ger_trim = trim_storage.entry(GLOBAL_EXIT_ROOT_ADDRESS).or_default(); let mut arr = [0; 64]; @@ -809,19 +712,14 @@ where U256::from(GLOBAL_EXIT_ROOT_STORAGE_POS.1).to_big_endian(&mut arr[32..64]); let slot = MptKey::from_hash(keccak_hash::keccak(arr)); - ger_storage.insert(slot, alloy::rlp::encode(l1blockhash.compat()))?; + world.store_hash( + GLOBAL_EXIT_ROOT_ADDRESS, + keccak_hash::keccak(arr), + l1blockhash, + )?; ger_trim.insert(slot); - trim_state.insert(::from(GLOBAL_EXIT_ROOT_ADDRESS)); - let mut ger_acct = state_trie - .get_by_address(GLOBAL_EXIT_ROOT_ADDRESS) - .context("missing GER contract address")?; - ger_acct.storage_root = ger_storage.root(); - state_trie - .insert_by_address(GLOBAL_EXIT_ROOT_ADDRESS, ger_acct) - // TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/275 - // Add an entry API - .expect("insert must succeed with the same key as a successful `get`"); + trim_state.insert(::from(GLOBAL_EXIT_ROOT_ADDRESS)); } Ok(()) @@ -832,26 +730,22 @@ where /// /// This is Cancun-specific, and runs at the start of the block, /// before any transactions (as per the EIP). -fn do_beacon_hook( +fn do_beacon_hook( block_timestamp: U256, - storage: &mut BTreeMap, trim_storage: &mut BTreeMap>, parent_beacon_block_root: H256, - trim_state: &mut BTreeSet, - state_trie: &mut StateTrieT, + trim_state: &mut BTreeSet, + world: &mut WorldT, ) -> anyhow::Result<()> where - StateTrieT::Key: From
+ Ord, + WorldT::SubtriePath: From
+ Ord, { use evm_arithmetization::testing_utils::{ - BEACON_ROOTS_CONTRACT_ADDRESS, BEACON_ROOTS_CONTRACT_ADDRESS_HASHED, HISTORY_BUFFER_LENGTH, + BEACON_ROOTS_CONTRACT_ADDRESS, HISTORY_BUFFER_LENGTH, }; let timestamp_idx = block_timestamp % HISTORY_BUFFER_LENGTH.value; let root_idx = timestamp_idx + HISTORY_BUFFER_LENGTH.value; - let beacon_storage = storage - .get_mut(&BEACON_ROOTS_CONTRACT_ADDRESS_HASHED) - .context("missing beacon contract storage trie")?; let beacon_trim = trim_storage .entry(BEACON_ROOTS_CONTRACT_ADDRESS) .or_default(); @@ -867,23 +761,16 @@ where beacon_trim.insert(slot); match u.is_zero() { - true => beacon_trim.extend(beacon_storage.reporting_remove(slot)?), + true => { + beacon_trim.extend(world.reporting_destroy_slot(BEACON_ROOTS_CONTRACT_ADDRESS, ix)?) + } false => { - beacon_storage.insert(slot, alloy::rlp::encode(u.compat()))?; + world.store_int(BEACON_ROOTS_CONTRACT_ADDRESS, ix, u)?; beacon_trim.insert(slot); } } } - trim_state.insert(::from(BEACON_ROOTS_CONTRACT_ADDRESS)); - let mut beacon_acct = state_trie - .get_by_address(BEACON_ROOTS_CONTRACT_ADDRESS) - .context("missing beacon contract address")?; - beacon_acct.storage_root = beacon_storage.root(); - state_trie - .insert_by_address(BEACON_ROOTS_CONTRACT_ADDRESS, beacon_acct) - // TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/275 - // Add an entry API - .expect("insert must succeed with the same key as a successful `get`"); + trim_state.insert(::from(BEACON_ROOTS_CONTRACT_ADDRESS)); Ok(()) } diff --git a/trace_decoder/src/lib.rs b/trace_decoder/src/lib.rs index 057d11e89..1f1c87888 100644 --- a/trace_decoder/src/lib.rs +++ b/trace_decoder/src/lib.rs @@ -1,8 +1,10 @@ //! An _Ethereum Node_ executes _transactions_ in _blocks_. //! //! Execution mutates two key data structures: -//! - [The state trie](https://ethereum.org/en/developers/docs/data-structures-and-encoding/patricia-merkle-trie/#state-trie). -//! - [The storage tries](https://ethereum.org/en/developers/docs/data-structures-and-encoding/patricia-merkle-trie/#storage-trie). +//! - [The state](https://ethereum.org/en/developers/docs/data-structures-and-encoding/patricia-merkle-trie/#state-trie), +//! which tracks, e.g the account balance. +//! - [The storage](https://ethereum.org/en/developers/docs/data-structures-and-encoding/patricia-merkle-trie/#storage-trie), +//! which is a huge array of integers, per-account. //! //! Ethereum nodes expose information about the transactions over RPC, e.g: //! - [The specific changes to the storage tries](TxnTrace::storage_written). @@ -13,7 +15,8 @@ //! //! **Prover perfomance is a high priority.** //! -//! The aformentioned trie structures may have subtries _hashed out_. +//! The aformentioned data structures are represented as tries, +//! which may have subtries _hashed out_. //! That is, any node (and its children!) may be replaced by its hash, //! while maintaining provability of its contents: //! @@ -44,12 +47,16 @@ /// Over RPC, ethereum nodes expose their tries as a series of binary /// [`wire::Instruction`]s in a node-dependant format. /// -/// These are parsed into the relevant trie depending on the node: +/// These are parsed into the relevant state and storage data structures, +/// depending on the node: /// - [`type2`], which contains an [`smt_trie`]. /// - [`type1`], which contains an [`mpt_trie`]. /// /// After getting the tries, /// we can continue to do the main work of "executing" the transactions. +/// +/// The core of this library is agnostic over the (combined) +/// state and storage representation - see [`world::World`] for more. const _DEVELOPER_DOCS: () = (); mod interface; @@ -60,6 +67,7 @@ mod tries; mod type1; mod type2; mod wire; +mod world; pub use core::{entrypoint, WireDisposition}; diff --git a/trace_decoder/src/observer.rs b/trace_decoder/src/observer.rs index f9811e87c..428cac086 100644 --- a/trace_decoder/src/observer.rs +++ b/trace_decoder/src/observer.rs @@ -1,15 +1,14 @@ -use std::collections::BTreeMap; use std::marker::PhantomData; -use ethereum_types::{H256, U256}; +use ethereum_types::U256; use crate::core::IntraBlockTries; -use crate::tries::{ReceiptTrie, StorageTrie, TransactionTrie}; +use crate::tries::{ReceiptTrie, TransactionTrie}; /// Observer API for the trace decoder. /// Observer is used to collect various debugging and metadata info /// from the trace decoder run. -pub trait Observer { +pub trait Observer { /// Collect tries after the transaction/batch execution. /// /// Passing the arguments one by one through reference, because @@ -19,8 +18,7 @@ pub trait Observer { &mut self, block: U256, batch: usize, - state_trie: &StateTrieT, - storage: &BTreeMap, + state_trie: &WorldT, transaction_trie: &TransactionTrie, receipt_trie: &ReceiptTrie, ); @@ -55,13 +53,12 @@ impl TriesObserver { } } -impl Observer for TriesObserver { +impl Observer for TriesObserver { fn collect_tries( &mut self, block: U256, batch: usize, - state_trie: &StateTrieT, - storage: &BTreeMap, + state_trie: &WorldT, transaction_trie: &TransactionTrie, receipt_trie: &ReceiptTrie, ) { @@ -69,8 +66,7 @@ impl Observer for TriesObserver { block, batch, tries: IntraBlockTries { - state: state_trie.clone(), - storage: storage.clone(), + world: state_trie.clone(), transaction: transaction_trie.clone(), receipt: receipt_trie.clone(), }, @@ -99,13 +95,12 @@ impl DummyObserver { } } -impl Observer for DummyObserver { +impl Observer for DummyObserver { fn collect_tries( &mut self, _block: U256, _batch: usize, - _state_trie: &StateTrieT, - _storage: &BTreeMap, + _state_trie: &WorldT, _transaction_trie: &TransactionTrie, _receipt_trie: &ReceiptTrie, ) { diff --git a/trace_decoder/src/tries.rs b/trace_decoder/src/tries.rs index 91add4d98..7da8d2cfa 100644 --- a/trace_decoder/src/tries.rs +++ b/trace_decoder/src/tries.rs @@ -1,103 +1,18 @@ -//! Principled trie types and abstractions used in this library. +//! Principled trie types used in this library. use core::fmt; -use std::{cmp, collections::BTreeMap, marker::PhantomData}; +use std::cmp; use anyhow::ensure; use bitvec::{array::BitArray, slice::BitSlice}; use copyvec::CopyVec; -use ethereum_types::{Address, BigEndianHash as _, H256, U256}; +use ethereum_types::{Address, H256, U256}; use evm_arithmetization::generation::mpt::AccountRlp; use mpt_trie::partial_trie::{HashedPartialTrie, Node, OnOrphanedHashNode, PartialTrie as _}; use u4::{AsNibbles, U4}; -/// See . -/// -/// Portions of the trie may be _hashed out_: see [`Self::insert_hash`]. -#[derive(Debug, Clone, PartialEq, Eq)] -struct TypedMpt { - inner: HashedPartialTrie, - _ty: PhantomData T>, -} - -impl TypedMpt { - const PANIC_MSG: &str = "T encoding/decoding should round-trip,\ - and only encoded `T`s are ever inserted"; - fn new() -> Self { - Self { - inner: HashedPartialTrie::new(Node::Empty), - _ty: PhantomData, - } - } - /// Insert a node which represents an out-of-band sub-trie. - /// - /// See [module documentation](super) for more. - fn insert_hash(&mut self, key: MptKey, hash: H256) -> anyhow::Result<()> { - self.inner.insert(key.into_nibbles(), hash)?; - Ok(()) - } - /// Returns [`Err`] if the `key` crosses into a part of the trie that - /// is hashed out. - fn insert(&mut self, key: MptKey, value: T) -> anyhow::Result<()> - where - T: rlp::Encodable + rlp::Decodable, - { - self.inner - .insert(key.into_nibbles(), rlp::encode(&value).to_vec())?; - Ok(()) - } - /// Note that this returns [`None`] if `key` crosses into a part of the - /// trie that is hashed out. - /// - /// # Panics - /// - If [`rlp::decode`]-ing for `T` doesn't round-trip. - fn get(&self, key: MptKey) -> Option - where - T: rlp::Decodable, - { - let bytes = self.inner.get(key.into_nibbles())?; - Some(rlp::decode(bytes).expect(Self::PANIC_MSG)) - } - const fn as_hashed_partial_trie(&self) -> &HashedPartialTrie { - &self.inner - } - fn as_mut_hashed_partial_trie_unchecked(&mut self) -> &mut HashedPartialTrie { - &mut self.inner - } - fn root(&self) -> H256 { - self.inner.hash() - } - /// Note that this returns owned paths and items. - fn iter(&self) -> impl Iterator + '_ - where - T: rlp::Decodable, - { - self.inner.keys().filter_map(|nib| { - let path = MptKey::from_nibbles(nib); - Some((path, self.get(path)?)) - }) - } -} - -impl Default for TypedMpt { - fn default() -> Self { - Self::new() - } -} - -impl<'a, T> IntoIterator for &'a TypedMpt -where - T: rlp::Decodable, -{ - type Item = (MptKey, T); - type IntoIter = Box + 'a>; - fn into_iter(self) -> Self::IntoIter { - Box::new(self.iter()) - } -} - /// Bounded sequence of [`U4`], -/// used as a key for [`TypedMpt`]. +/// used as a key for [MPT](HashedPartialTrie) types in this module. /// /// Semantically equivalent to [`mpt_trie::nibbles::Nibbles`]. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] @@ -116,17 +31,6 @@ impl MptKey { pub fn new(components: impl IntoIterator) -> anyhow::Result { Ok(MptKey(CopyVec::try_from_iter(components)?)) } - pub fn into_hash_left_padded(mut self) -> H256 { - for _ in 0..self.0.spare_capacity_mut().len() { - self.0.insert(0, U4::Dec00) - } - let mut packed = [0u8; 32]; - AsNibbles(&mut packed).pack_from_slice(&self.0); - H256::from_slice(&packed) - } - pub fn from_address(address: Address) -> Self { - Self::from_hash(keccak_hash::keccak(address)) - } pub fn from_slot_position(pos: U256) -> Self { let mut bytes = [0; 32]; pos.to_big_endian(&mut bytes); @@ -189,7 +93,7 @@ fn mpt_key_into_hash() { } /// Bounded sequence of bits, -/// used as a key for [`StateSmt`]. +/// used as a key for SMT tries. /// /// Semantically equivalent to [`smt_trie::bits::Bits`]. #[derive(Clone, Copy)] @@ -366,246 +270,90 @@ impl From for HashedPartialTrie { } } -/// TODO(0xaatif): document this after refactoring is done -pub trait StateTrie { - type Key; - fn insert_by_address(&mut self, address: Address, account: AccountRlp) -> anyhow::Result<()>; - fn get_by_address(&self, address: Address) -> Option; - fn reporting_remove(&mut self, address: Address) -> anyhow::Result>; - /// _Hash out_ parts of the trie that aren't in `addresses`. - fn mask(&mut self, address: impl IntoIterator) -> anyhow::Result<()>; - fn iter(&self) -> impl Iterator + '_; - fn root(&self) -> H256; -} - /// Global, [`Address`] `->` [`AccountRlp`]. /// /// See -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone)] pub struct StateMpt { - typed: TypedMpt, + /// Values are always [`rlp`]-encoded [`AccountRlp`], + /// inserted at [256 bits](MptKey::from_hash). + inner: HashedPartialTrie, +} + +impl Default for StateMpt { + fn default() -> Self { + Self::new() + } +} + +#[track_caller] +fn assert_rlp_account(bytes: impl AsRef<[u8]>) -> AccountRlp { + rlp::decode(bytes.as_ref()).expect("invalid RLP in StateMPT") } impl StateMpt { - pub fn new(strategy: OnOrphanedHashNode) -> Self { + pub fn new() -> Self { Self { - typed: TypedMpt { - inner: HashedPartialTrie::new_with_strategy(Node::Empty, strategy), - _ty: PhantomData, - }, + inner: HashedPartialTrie::new_with_strategy( + Node::Empty, + // This frontend is intended to be used with our custom `zeroTracer`, + // which covers branch-to-extension collapse edge cases. + OnOrphanedHashNode::CollapseToExtension, + ), } } - /// Insert a _hashed out_ part of the trie - pub fn insert_hash_by_key(&mut self, key: MptKey, hash: H256) -> anyhow::Result<()> { - self.typed.insert_hash(key, hash) - } - #[deprecated = "prefer operations on `Address` where possible, as SMT support requires this"] - pub fn insert_by_hashed_address( - &mut self, - key: H256, - account: AccountRlp, - ) -> anyhow::Result<()> { - self.typed.insert(MptKey::from_hash(key), account) + pub fn as_hashed_partial_trie(&self) -> &HashedPartialTrie { + &self.inner } - pub fn iter(&self) -> impl Iterator + '_ { - self.typed - .iter() - .map(|(key, rlp)| (key.into_hash().expect("key is always H256"), rlp)) + /// Insert a _hashed out_ part of the trie + pub fn insert_hash(&mut self, key: MptKey, hash: H256) -> anyhow::Result<()> { + Ok(self.inner.insert(key.into_nibbles(), hash)?) } - pub fn as_hashed_partial_trie(&self) -> &mpt_trie::partial_trie::HashedPartialTrie { - self.typed.as_hashed_partial_trie() + pub fn insert(&mut self, key: H256, account: AccountRlp) -> anyhow::Result<()> { + Ok(self.inner.insert( + MptKey::from_hash(key).into_nibbles(), + rlp::encode(&account).to_vec(), + )?) } -} - -impl StateTrie for StateMpt { - type Key = MptKey; - fn insert_by_address(&mut self, address: Address, account: AccountRlp) -> anyhow::Result<()> { - #[expect(deprecated)] - self.insert_by_hashed_address(keccak_hash::keccak(address), account) + pub fn get(&self, key: H256) -> Option { + self.inner + .get(MptKey::from_hash(key).into_nibbles()) + .map(assert_rlp_account) } - fn get_by_address(&self, address: Address) -> Option { - self.typed - .get(MptKey::from_hash(keccak_hash::keccak(address))) + pub fn root(&self) -> H256 { + self.inner.hash() } - /// Delete the account at `address`, returning any remaining branch on - /// collapse - fn reporting_remove(&mut self, address: Address) -> anyhow::Result> { + pub fn reporting_remove(&mut self, address: Address) -> anyhow::Result> { delete_node_and_report_remaining_key_if_branch_collapsed( - self.typed.as_mut_hashed_partial_trie_unchecked(), - MptKey::from_address(address), + &mut self.inner, + MptKey::from_hash(keccak_hash::keccak(address)), ) } - fn mask(&mut self, addresses: impl IntoIterator) -> anyhow::Result<()> { - let inner = mpt_trie::trie_subsets::create_trie_subset( - self.typed.as_hashed_partial_trie(), + pub fn mask(&mut self, addresses: impl IntoIterator) -> anyhow::Result<()> { + let new = mpt_trie::trie_subsets::create_trie_subset( + &self.inner, addresses.into_iter().map(MptKey::into_nibbles), )?; - self.typed = TypedMpt { - inner, - _ty: PhantomData, - }; + self.inner = new; Ok(()) } - fn iter(&self) -> impl Iterator + '_ { - self.typed - .iter() - .map(|(key, rlp)| (key.into_hash().expect("key is always H256"), rlp)) - } - fn root(&self) -> H256 { - self.typed.root() + pub fn iter(&self) -> impl Iterator + '_ { + self.inner.items().filter_map(|(key, rlp)| match rlp { + mpt_trie::trie_ops::ValOrHash::Val(vec) => Some(( + MptKey::from_nibbles(key).into_hash().expect("bad depth"), + assert_rlp_account(vec), + )), + mpt_trie::trie_ops::ValOrHash::Hash(_) => None, + }) } } impl From for HashedPartialTrie { - fn from(value: StateMpt) -> Self { - let StateMpt { - typed: TypedMpt { inner, _ty }, - } = value; + fn from(StateMpt { inner }: StateMpt) -> Self { inner } } -// TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/706 -// We're covering for [`smt_trie`] in a couple of ways: -// - insertion operations aren't fallible, they just panic. -// - it documents a requirement that `set_hash` is called before `set`. -#[derive(Clone, Debug)] -pub struct StateSmt { - address2state: BTreeMap, - hashed_out: BTreeMap, -} - -impl StateTrie for StateSmt { - type Key = SmtKey; - fn insert_by_address(&mut self, address: Address, account: AccountRlp) -> anyhow::Result<()> { - self.address2state.insert(address, account); - Ok(()) - } - fn get_by_address(&self, address: Address) -> Option { - self.address2state.get(&address).copied() - } - fn reporting_remove(&mut self, address: Address) -> anyhow::Result> { - self.address2state.remove(&address); - Ok(None) - } - fn mask(&mut self, address: impl IntoIterator) -> anyhow::Result<()> { - let _ = address; - Ok(()) - } - fn iter(&self) -> impl Iterator + '_ { - self.address2state - .iter() - .map(|(addr, acct)| (keccak_hash::keccak(addr), *acct)) - } - fn root(&self) -> H256 { - conv_hash::smt2eth(self.as_smt().root) - } -} - -impl StateSmt { - pub(crate) fn new_unchecked( - address2state: BTreeMap, - hashed_out: BTreeMap, - ) -> Self { - Self { - address2state, - hashed_out, - } - } - - fn as_smt(&self) -> smt_trie::smt::Smt { - let Self { - address2state, - hashed_out, - } = self; - let mut smt = smt_trie::smt::Smt::::default(); - for (k, v) in hashed_out { - smt.set_hash(k.into_smt_bits(), conv_hash::eth2smt(*v)); - } - for ( - addr, - AccountRlp { - nonce, - balance, - storage_root, - code_hash, - }, - ) in address2state - { - smt.set(smt_trie::keys::key_nonce(*addr), *nonce); - smt.set(smt_trie::keys::key_balance(*addr), *balance); - smt.set(smt_trie::keys::key_code(*addr), code_hash.into_uint()); - smt.set( - // TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/707 - // combined abstraction for state and storage - smt_trie::keys::key_storage(*addr, U256::zero()), - storage_root.into_uint(), - ); - } - smt - } -} - -mod conv_hash { - //! We [`u64::to_le_bytes`] because: - //! - Reference go code just puns the bytes: - //! - It's better to fix the endianness for correctness. - //! - Most (consumer) CPUs are little-endian. - - use std::array; - - use ethereum_types::H256; - use itertools::Itertools as _; - use plonky2::{ - field::{ - goldilocks_field::GoldilocksField, - types::{Field as _, PrimeField64}, - }, - hash::hash_types::HashOut, - }; - - /// # Panics - /// - On certain inputs if `debug_assertions` are enabled. See - /// [`GoldilocksField::from_canonical_u64`] for more. - pub fn eth2smt(H256(bytes): H256) -> smt_trie::smt::HashOut { - let mut bytes = bytes.into_iter(); - // (no unsafe, no unstable) - let ret = HashOut { - elements: array::from_fn(|_ix| { - let (a, b, c, d, e, f, g, h) = bytes.next_tuple().unwrap(); - GoldilocksField::from_canonical_u64(u64::from_le_bytes([a, b, c, d, e, f, g, h])) - }), - }; - assert_eq!(bytes.len(), 0); - ret - } - pub fn smt2eth(HashOut { elements }: smt_trie::smt::HashOut) -> H256 { - H256( - build_array::ArrayBuilder::from_iter( - elements - .iter() - .map(GoldilocksField::to_canonical_u64) - .flat_map(u64::to_le_bytes), - ) - .build_exact() - .unwrap(), - ) - } - - #[test] - fn test() { - use plonky2::field::types::Field64 as _; - let mut max = std::iter::repeat(GoldilocksField::ORDER - 1).flat_map(u64::to_le_bytes); - for h in [ - H256::zero(), - H256(array::from_fn(|ix| ix as u8)), - H256(array::from_fn(|_| max.next().unwrap())), - ] { - assert_eq!(smt2eth(eth2smt(h)), h); - } - } -} - /// Global, per-account. /// /// See @@ -622,10 +370,9 @@ impl StorageTrie { pub fn get(&mut self, key: &MptKey) -> Option<&[u8]> { self.untyped.get(key.into_nibbles()) } - pub fn insert(&mut self, key: MptKey, value: Vec) -> anyhow::Result>> { - let prev = self.get(&key).map(Vec::from); + pub fn insert(&mut self, key: MptKey, value: Vec) -> anyhow::Result<()> { self.untyped.insert(key.into_nibbles(), value)?; - Ok(prev) + Ok(()) } pub fn insert_hash(&mut self, key: MptKey, hash: H256) -> anyhow::Result<()> { self.untyped.insert(key.into_nibbles(), hash)?; diff --git a/trace_decoder/src/type1.rs b/trace_decoder/src/type1.rs index c44beaec7..c982a1ab3 100644 --- a/trace_decoder/src/type1.rs +++ b/trace_decoder/src/type1.rs @@ -15,25 +15,13 @@ use u4::U4; use crate::tries::{MptKey, StateMpt, StorageTrie}; use crate::wire::{Instruction, SmtLeaf}; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub struct Frontend { pub state: StateMpt, pub code: BTreeSet>>, pub storage: BTreeMap, } -impl Default for Frontend { - // This frontend is intended to be used with our custom `zeroTracer`, - // which covers branch-to-extension collapse edge cases. - fn default() -> Self { - Self { - state: StateMpt::new(OnOrphanedHashNode::CollapseToExtension), - code: BTreeSet::new(), - storage: BTreeMap::new(), - } - } -} - pub fn frontend(instructions: impl IntoIterator) -> anyhow::Result { let executions = execute(instructions)?; ensure!( @@ -66,7 +54,7 @@ fn visit( Node::Hash(Hash { raw_hash }) => { frontend .state - .insert_hash_by_key(MptKey::new(path.iter().copied())?, raw_hash.into())?; + .insert_hash(MptKey::new(path.iter().copied())?, raw_hash.into())?; } Node::Leaf(Leaf { key, value }) => { let path = MptKey::new(path.iter().copied().chain(key))? @@ -105,8 +93,7 @@ fn visit( } }, }; - #[expect(deprecated)] // this is MPT-specific code - frontend.state.insert_by_hashed_address(path, account)?; + frontend.state.insert(path, account)?; } } } @@ -379,8 +366,6 @@ fn finish_stack(v: &mut Vec) -> anyhow::Result { #[test] fn test_tries() { - use crate::tries::StateTrie as _; - for (ix, case) in serde_json::from_str::>(include_str!("cases/zero_jerigon.json")) .unwrap() @@ -393,7 +378,7 @@ fn test_tries() { assert_eq!(case.expected_state_root, frontend.state.root()); for (haddr, acct) in frontend.state.iter() { - if acct.storage_root != StateMpt::default().root() { + if acct.storage_root != StorageTrie::default().root() { assert!(frontend.storage.contains_key(&haddr)) } } diff --git a/trace_decoder/src/type2.rs b/trace_decoder/src/type2.rs index 44d13e89a..845260d47 100644 --- a/trace_decoder/src/type2.rs +++ b/trace_decoder/src/type2.rs @@ -5,29 +5,19 @@ use std::collections::{BTreeMap, HashSet}; use anyhow::{bail, ensure, Context as _}; use ethereum_types::{Address, U256}; -use evm_arithmetization::generation::mpt::AccountRlp; use itertools::EitherOrBoth; use keccak_hash::H256; use nunny::NonEmpty; use stackstack::Stack; use crate::{ - tries::{SmtKey, StateSmt}, + tries::SmtKey, wire::{Instruction, SmtLeaf, SmtLeafType}, + world::{Type2Entry, Type2World}, }; -/// Combination of all the [`SmtLeaf::node_type`]s -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] -pub struct CollatedLeaf { - pub balance: Option, - pub nonce: Option, - pub code: Option, - pub code_length: Option, - pub storage: BTreeMap, -} - pub struct Frontend { - pub trie: StateSmt, + pub world: Type2World, pub code: HashSet>>, } @@ -36,13 +26,14 @@ pub struct Frontend { /// NOT call this function on untrusted inputs. pub fn frontend(instructions: impl IntoIterator) -> anyhow::Result { let (node, code) = fold(instructions).context("couldn't fold smt from instructions")?; - let trie = node2trie(node).context("couldn't construct trie and collation from folded node")?; - Ok(Frontend { trie, code }) + let world = + node2world(node).context("couldn't construct trie and collation from folded node")?; + Ok(Frontend { world, code }) } /// Node in a binary (SMT) tree. /// -/// This is an intermediary type on the way to [`StateSmt`]. +/// This is an intermediary type on the way to [`Type2World`]. enum Node { Branch(EitherOrBoth>), Hash([u8; 32]), @@ -113,45 +104,16 @@ fn fold1(instructions: impl IntoIterator) -> anyhow::Result< } } -fn node2trie(node: Node) -> anyhow::Result { +fn node2world(node: Node) -> anyhow::Result { let mut hashes = BTreeMap::new(); let mut leaves = BTreeMap::new(); visit(&mut hashes, &mut leaves, Stack::new(), node)?; - Ok(StateSmt::new_unchecked( - leaves - .into_iter() - .map( - |( - addr, - CollatedLeaf { - balance, - nonce, - // TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/707 - // we shouldn't ignore these fields - code: _, - code_length: _, - storage: _, - }, - )| { - ( - addr, - AccountRlp { - nonce: nonce.unwrap_or_default(), - balance: balance.unwrap_or_default(), - storage_root: H256::zero(), - code_hash: H256::zero(), - }, - ) - }, - ) - .collect(), - hashes, - )) + Ok(Type2World::new_unchecked(leaves, hashes)) } fn visit( hashes: &mut BTreeMap, - leaves: &mut BTreeMap, + leaves: &mut BTreeMap, path: Stack, node: Node, ) -> anyhow::Result<()> { @@ -213,65 +175,7 @@ fn visit( #[test] fn test_tries() { - type Smt = smt_trie::smt::Smt; - use ethereum_types::BigEndianHash as _; - use plonky2::field::types::{Field, Field64 as _}; - - // TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/707 - // this logic should live in StateSmt, but we need to - // - abstract over state and storage tries - // - parameterize the account types - // we preserve this code as a tested record of how it _should_ - // be done. - fn node2trie(node: Node) -> anyhow::Result { - let mut trie = Smt::default(); - let mut hashes = BTreeMap::new(); - let mut leaves = BTreeMap::new(); - visit(&mut hashes, &mut leaves, Stack::new(), node)?; - for (key, hash) in hashes { - trie.set_hash( - key.into_smt_bits(), - smt_trie::smt::HashOut { - elements: { - let ethereum_types::U256(arr) = hash.into_uint(); - for u in arr { - ensure!(u < smt_trie::smt::F::ORDER); - } - arr.map(smt_trie::smt::F::from_canonical_u64) - }, - }, - ); - } - for ( - addr, - CollatedLeaf { - balance, - nonce, - code, - code_length, - storage, - }, - ) in leaves - { - use smt_trie::keys::{key_balance, key_code, key_code_length, key_nonce, key_storage}; - - for (value, key_fn) in [ - (balance, key_balance as fn(_) -> _), - (nonce, key_nonce), - (code, key_code), - (code_length, key_code_length), - ] { - if let Some(value) = value { - trie.set(key_fn(addr), value); - } - } - for (slot, value) in storage { - trie.set(key_storage(addr, slot), value); - } - } - Ok(trie) - } - + use crate::world::World as _; for (ix, case) in serde_json::from_str::>(include_str!("cases/hermez_cdk_erigon.json")) .unwrap() @@ -279,13 +183,7 @@ fn test_tries() { .enumerate() { println!("case {}", ix); - let instructions = crate::wire::parse(&case.bytes).unwrap(); - let (node, _code) = fold(instructions).unwrap(); - let trie = node2trie(node).unwrap(); - assert_eq!(case.expected_state_root, { - let mut it = [0; 32]; - smt_trie::utils::hashout2u(trie.root).to_big_endian(&mut it); - ethereum_types::H256(it) - }); + let mut frontend = frontend(crate::wire::parse(&case.bytes).unwrap()).unwrap(); + assert_eq!(case.expected_state_root, frontend.world.root()); } } diff --git a/trace_decoder/src/world.rs b/trace_decoder/src/world.rs new file mode 100644 index 000000000..fa68854e4 --- /dev/null +++ b/trace_decoder/src/world.rs @@ -0,0 +1,420 @@ +use std::collections::{BTreeMap, BTreeSet}; + +use alloy_compat::Compat as _; +use anyhow::{ensure, Context as _}; +use either::Either; +use ethereum_types::{Address, BigEndianHash as _, U256}; +use keccak_hash::H256; + +use crate::tries::{MptKey, SmtKey, StateMpt, StorageTrie}; + +/// The [core](crate::core) of this crate is agnostic over state and storage +/// representations. +/// +/// This is the common interface to those data structures. +/// See also [crate::_DEVELOPER_DOCS]. +pub(crate) trait World { + /// (State) subtries may be _hashed out. + /// This type is a key which may identify a subtrie. + type SubtriePath; + + ////////////////////// + /// Account operations + ////////////////////// + + /// Whether the state contains an account at the given address. + /// + /// `false` is not necessarily definitive - the address may belong to a + /// _hashed out_ subtrie. + fn contains(&mut self, address: Address) -> anyhow::Result; + + /// Update the balance for the account at the given address. + /// + /// Creates a new account at `address` if it does not exist. + fn update_balance(&mut self, address: Address, f: impl FnOnce(&mut U256)) + -> anyhow::Result<()>; + + /// Update the nonce for the account at the given address. + /// + /// Creates a new account at `address` if it does not exist. + fn update_nonce(&mut self, address: Address, f: impl FnOnce(&mut U256)) -> anyhow::Result<()>; + + /// Update the code for the account at the given address. + /// + /// Creates a new account at `address` if it does not exist. + fn set_code(&mut self, address: Address, code: Either<&[u8], H256>) -> anyhow::Result<()>; + + /// The [core](crate::core) of this crate tracks required subtries for + /// proving. + /// + /// In case of a state delete, it may be that certain parts of the subtrie + /// must be retained. If so, it will be returned as [`Some`]. + fn reporting_destroy(&mut self, address: Address) -> anyhow::Result>; + + ////////////////////// + /// Storage operations + ////////////////////// + + /// Create an account at the given address. + /// + /// It may not be an error if the address already exists. + fn create_storage(&mut self, address: Address) -> anyhow::Result<()>; + + /// Destroy storage for the given address' account. + fn destroy_storage(&mut self, address: Address) -> anyhow::Result<()>; + + /// Store an integer for the given account at the given `slot`. + fn store_int(&mut self, address: Address, slot: U256, value: U256) -> anyhow::Result<()>; + fn store_hash(&mut self, address: Address, hash: H256, value: H256) -> anyhow::Result<()>; + + /// Load an integer from the given account at the given `slot`. + fn load_int(&mut self, address: Address, slot: U256) -> anyhow::Result; + + /// Delete the given slot from the given account's storage. + /// + /// In case of a delete, it may be that certain parts of the subtrie + /// must be retained. If so, it will be returned as [`Some`]. + fn reporting_destroy_slot( + &mut self, + address: Address, + slot: U256, + ) -> anyhow::Result>; + fn mask_storage(&mut self, masks: BTreeMap>) -> anyhow::Result<()>; + + //////////////////// + /// Other operations + //////////////////// + + /// _Hash out_ parts of the (state) trie that aren't in `paths`. + fn mask(&mut self, paths: impl IntoIterator) -> anyhow::Result<()>; + + /// Return an identifier for the world. + fn root(&mut self) -> H256; +} + +#[derive(Clone, Debug)] +pub struct Type1World { + state: StateMpt, + /// Writes to storage should be reconciled with + /// [`storage_root`](evm_arithmetization::generation::mpt::AccountRlp)s. + storage: BTreeMap, +} + +impl Type1World { + pub fn new(state: StateMpt, mut storage: BTreeMap) -> anyhow::Result { + // Initialise the storage tries. + for (haddr, acct) in state.iter() { + let storage = storage.entry(haddr).or_insert_with(|| { + let mut it = StorageTrie::default(); + it.insert_hash(MptKey::default(), acct.storage_root) + .expect("empty trie insert cannot fail"); + it + }); + ensure!( + storage.root() == acct.storage_root, + "inconsistent initial storage for hashed address {haddr}" + ) + } + Ok(Self { state, storage }) + } + pub fn state_trie(&self) -> &mpt_trie::partial_trie::HashedPartialTrie { + self.state.as_hashed_partial_trie() + } + pub fn into_state_and_storage(self) -> (StateMpt, BTreeMap) { + let Self { state, storage } = self; + (state, storage) + } + fn get_storage_mut(&mut self, address: Address) -> anyhow::Result<&mut StorageTrie> { + self.storage + .get_mut(&keccak_hash::keccak(address)) + .context("no such storage") + } + fn on_storage( + &mut self, + address: Address, + f: impl FnOnce(&mut StorageTrie) -> anyhow::Result, + ) -> anyhow::Result { + let mut acct = self + .state + .get(keccak_hash::keccak(address)) + .context("no such account")?; + let storage = self.get_storage_mut(address)?; + let ret = f(storage)?; + acct.storage_root = storage.root(); + self.state.insert(keccak_hash::keccak(address), acct)?; + Ok(ret) + } +} + +impl World for Type1World { + type SubtriePath = MptKey; + fn contains(&mut self, address: Address) -> anyhow::Result { + Ok(self.state.get(keccak_hash::keccak(address)).is_some()) + } + fn update_balance( + &mut self, + address: Address, + f: impl FnOnce(&mut U256), + ) -> anyhow::Result<()> { + let key = keccak_hash::keccak(address); + let mut acct = self.state.get(key).unwrap_or_default(); + f(&mut acct.balance); + self.state.insert(key, acct) + } + fn update_nonce(&mut self, address: Address, f: impl FnOnce(&mut U256)) -> anyhow::Result<()> { + let key = keccak_hash::keccak(address); + let mut acct = self.state.get(key).unwrap_or_default(); + f(&mut acct.nonce); + self.state.insert(key, acct) + } + fn set_code(&mut self, address: Address, code: Either<&[u8], H256>) -> anyhow::Result<()> { + let key = keccak_hash::keccak(address); + let mut acct = self.state.get(key).unwrap_or_default(); + acct.code_hash = code.right_or_else(keccak_hash::keccak); + self.state.insert(key, acct) + } + fn reporting_destroy(&mut self, address: Address) -> anyhow::Result> { + self.state.reporting_remove(address) + } + fn mask( + &mut self, + addresses: impl IntoIterator, + ) -> anyhow::Result<()> { + self.state.mask(addresses) + } + fn root(&mut self) -> H256 { + self.state.root() + } + fn create_storage(&mut self, address: Address) -> anyhow::Result<()> { + let _clobbered = self + .storage + .insert(keccak_hash::keccak(address), StorageTrie::default()); + // ensure!(_clobbered.is_none()); // TODO(0xaatif): fails our tests + Ok(()) + } + fn destroy_storage(&mut self, address: Address) -> anyhow::Result<()> { + let removed = self.storage.remove(&keccak_hash::keccak(address)); + ensure!(removed.is_some()); + Ok(()) + } + + fn store_int(&mut self, address: Address, slot: U256, value: U256) -> anyhow::Result<()> { + self.on_storage(address, |it| { + it.insert( + MptKey::from_slot_position(slot), + alloy::rlp::encode(value.compat()), + ) + }) + } + + fn store_hash(&mut self, address: Address, hash: H256, value: H256) -> anyhow::Result<()> { + self.on_storage(address, |it| { + it.insert(MptKey::from_hash(hash), alloy::rlp::encode(value.compat())) + }) + } + + fn load_int(&mut self, address: Address, slot: U256) -> anyhow::Result { + let bytes = self + .get_storage_mut(address)? + .get(&MptKey::from_slot_position(slot)) + .context(format!("no storage at slot {slot} for address {address:x}"))?; + Ok(rlp::decode(bytes)?) + } + + fn reporting_destroy_slot( + &mut self, + address: Address, + slot: U256, + ) -> anyhow::Result> { + self.on_storage(address, |it| { + it.reporting_remove(MptKey::from_slot_position(slot)) + }) + } + + fn mask_storage(&mut self, masks: BTreeMap>) -> anyhow::Result<()> { + let keep = masks + .keys() + .map(keccak_hash::keccak) + .collect::>(); + self.storage.retain(|haddr, _| keep.contains(haddr)); + for (addr, mask) in masks { + if let Some(it) = self.storage.get_mut(&keccak_hash::keccak(addr)) { + it.mask(mask)? + } + } + Ok(()) + } +} + +impl World for Type2World { + type SubtriePath = SmtKey; + fn contains(&mut self, address: Address) -> anyhow::Result { + Ok(self.accounts.contains_key(&address)) + } + fn update_balance( + &mut self, + address: Address, + f: impl FnOnce(&mut U256), + ) -> anyhow::Result<()> { + let acct = self.accounts.entry(address).or_default(); + f(acct.balance.get_or_insert(Default::default())); + Ok(()) + } + fn update_nonce(&mut self, address: Address, f: impl FnOnce(&mut U256)) -> anyhow::Result<()> { + let acct = self.accounts.entry(address).or_default(); + f(acct.nonce.get_or_insert(Default::default())); + Ok(()) + } + fn set_code(&mut self, address: Address, code: Either<&[u8], H256>) -> anyhow::Result<()> { + let acct = self.accounts.entry(address).or_default(); + match code { + Either::Left(bytes) => { + acct.code = Some(keccak_hash::keccak(bytes).into_uint()); + acct.code_length = Some(U256::from(bytes.len())) + } + Either::Right(hash) => acct.code = Some(hash.into_uint()), + }; + Ok(()) + } + fn reporting_destroy(&mut self, address: Address) -> anyhow::Result> { + self.accounts.remove(&address); + Ok(None) + } + fn create_storage(&mut self, address: Address) -> anyhow::Result<()> { + let _ = address; + Ok(()) + } + fn destroy_storage(&mut self, address: Address) -> anyhow::Result<()> { + self.accounts + .entry(address) + .and_modify(|it| it.storage.clear()); + Ok(()) + } + fn store_int(&mut self, address: Address, slot: U256, value: U256) -> anyhow::Result<()> { + self.accounts + .entry(address) + .or_default() + .storage + .insert(slot, value); + Ok(()) + } + fn store_hash(&mut self, address: Address, hash: H256, value: H256) -> anyhow::Result<()> { + self.accounts + .entry(address) + .or_default() + .storage + .insert(hash.into_uint(), value.into_uint()); + Ok(()) + } + fn load_int(&mut self, address: Address, slot: U256) -> anyhow::Result { + Ok(self + .accounts + .get(&address) + .context("no account")? + .storage + .get(&slot) + .copied() + .unwrap_or_default()) + } + fn reporting_destroy_slot( + &mut self, + address: Address, + slot: U256, + ) -> anyhow::Result> { + self.accounts.entry(address).and_modify(|it| { + it.storage.remove(&slot); + }); + Ok(None) + } + fn mask_storage(&mut self, masks: BTreeMap>) -> anyhow::Result<()> { + let _ = masks; + Ok(()) + } + fn mask(&mut self, paths: impl IntoIterator) -> anyhow::Result<()> { + let _ = paths; + Ok(()) + } + fn root(&mut self) -> H256 { + let mut it = [0; 32]; + smt_trie::utils::hashout2u(self.as_smt().root).to_big_endian(&mut it); + H256(it) + } +} + +// Having optional fields here is an odd decision, +// but without the distinction, +// the wire tests fail. +// This may be a bug in the SMT library. +#[derive(Default, Clone, Debug)] +pub struct Type2Entry { + pub balance: Option, + pub nonce: Option, + pub code: Option, + pub code_length: Option, + pub storage: BTreeMap, +} + +// This is a buffered version +#[derive(Clone, Debug)] +pub struct Type2World { + accounts: BTreeMap, + hashed_out: BTreeMap, +} + +impl Type2World { + /// # Panics + /// - On untrusted inputs: . + pub fn as_smt(&self) -> smt_trie::smt::Smt { + let mut smt = smt_trie::smt::Smt::::default(); + + for (key, hash) in &self.hashed_out { + smt.set_hash( + key.into_smt_bits(), + smt_trie::smt::HashOut { + elements: { + let ethereum_types::U256(arr) = hash.into_uint(); + arr.map(plonky2::field::goldilocks_field::GoldilocksField) + }, + }, + ); + } + for ( + addr, + Type2Entry { + balance, + nonce, + code, + code_length, + storage, + }, + ) in self.accounts.iter() + { + use smt_trie::keys::{key_balance, key_code, key_code_length, key_nonce, key_storage}; + + for (value, key_fn) in [ + (balance, key_balance as fn(_) -> _), + (nonce, key_nonce), + (code, key_code), + (code_length, key_code_length), + ] { + if let Some(value) = value { + smt.set(key_fn(*addr), *value); + } + } + for (slot, value) in storage { + smt.set(key_storage(*addr, *slot), *value); + } + } + smt + } + + pub fn new_unchecked( + accounts: BTreeMap, + hashed_out: BTreeMap, + ) -> Self { + Self { + accounts, + hashed_out, + } + } +} diff --git a/zero/src/bin/trie_diff.rs b/zero/src/bin/trie_diff.rs index c211cc528..e580684d2 100644 --- a/zero/src/bin/trie_diff.rs +++ b/zero/src/bin/trie_diff.rs @@ -147,8 +147,8 @@ async fn main() -> Result<()> { &DebugOutputTries { state_trie: observer.data[prover_tries.batch_index] .tries - .state - .as_hashed_partial_trie() + .world + .state_trie() .clone(), transaction_trie: observer.data[prover_tries.batch_index] .tries