Skip to content

Commit

Permalink
triedb/pathdb: track flat state changes in pathdb
Browse files Browse the repository at this point in the history
This pull request ports some changes from the main state snapshot
integration one, specifically introducing the flat state tracking
in pathdb.

Note, the tracked flat state changes are only held in memory and
won't be persisted in the disk. Meanwhile, the correspoding state
retrieval in persistent state is also not supported yet. The states
management in disk is more complicated and will be implemented in
a separate pull request.
  • Loading branch information
rjl493456442 committed Oct 21, 2024
1 parent babd5d8 commit 2288a96
Show file tree
Hide file tree
Showing 15 changed files with 1,325 additions and 31 deletions.
11 changes: 11 additions & 0 deletions triedb/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ type backend interface {
// An error will be returned if the specified state is not available.
NodeReader(root common.Hash) (database.NodeReader, error)

// StateReader returns a reader for accessing flat states within the specified
// state. An error will be returned if the specified state is not available.
StateReader(root common.Hash) (database.StateReader, error)

// Initialized returns an indicator if the state data is already initialized
// according to the state scheme.
Initialized(genesisRoot common.Hash) bool
Expand Down Expand Up @@ -122,6 +126,13 @@ func (db *Database) NodeReader(blockRoot common.Hash) (database.NodeReader, erro
return db.backend.NodeReader(blockRoot)
}

// StateReader returns a reader that allows access to the state data associated
// with the specified state. An error will be returned if the specified state is
// not available.
func (db *Database) StateReader(blockRoot common.Hash) (database.StateReader, error) {
return db.backend.StateReader(blockRoot)
}

// Update performs a state transition by committing dirty nodes contained in the
// given set in order to update state from the specified parent to the specified
// root. The held pre-images accumulated up to this point will be flushed in case
Expand Down
29 changes: 29 additions & 0 deletions triedb/database/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package database

import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/types"
)

// NodeReader wraps the Node method of a backing trie reader.
Expand All @@ -37,3 +38,31 @@ type NodeDatabase interface {
// An error will be returned if the specified state is not available.
NodeReader(stateRoot common.Hash) (NodeReader, error)
}

// StateReader wraps the Account and Storage method of a backing state reader.
type StateReader interface {
// Account directly retrieves the account associated with a particular hash in
// the slim data format. An error will be returned if the read operation exits
// abnormally. Specifically, if the layer is already stale.
//
// Note:
// - the returned account object is safe to modify
// - no error will be returned if the requested account is not found in database
Account(hash common.Hash) (*types.SlimAccount, error)

// Storage directly retrieves the storage data associated with a particular hash,
// within a particular account. An error will be returned if the read operation
// exits abnormally.
//
// Note:
// - the returned storage data is not a copy, please don't modify it
// - no error will be returned if the requested slot is not found in database
Storage(accountHash, storageHash common.Hash) ([]byte, error)
}

// StateDatabase warps the methods of a backing state store.
type StateDatabase interface {
// StateReader returns a state reader associated with the specific state.
// An error will be returned if the specified state is not available.
StateReader(stateRoot common.Hash) (StateReader, error)
}
6 changes: 6 additions & 0 deletions triedb/hashdb/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -635,3 +635,9 @@ func (reader *reader) Node(owner common.Hash, path []byte, hash common.Hash) ([]
blob, _ := reader.db.node(hash)
return blob, nil
}

// StateReader returns a reader that allows access to the state data associated
// with the specified state.
func (db *Database) StateReader(root common.Hash) (database.StateReader, error) {
return nil, errors.New("not implemented")
}
32 changes: 25 additions & 7 deletions triedb/pathdb/buffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,40 +33,56 @@ import (
// must be checked before diving into disk (since it basically is not yet written
// data).
type buffer struct {
layers uint64 // The number of diff layers aggregated inside
limit uint64 // The maximum memory allowance in bytes
nodes *nodeSet // Aggregated trie node set
layers uint64 // The number of diff layers aggregated inside
limit uint64 // The maximum memory allowance in bytes
nodes *nodeSet // Aggregated trie node set
states *stateSet // Aggregated state set
}

// newBuffer initializes the buffer with the provided states and trie nodes.
func newBuffer(limit int, nodes *nodeSet, layers uint64) *buffer {
func newBuffer(limit int, nodes *nodeSet, states *stateSet, layers uint64) *buffer {
// Don't panic for lazy users if any provided set is nil
if nodes == nil {
nodes = newNodeSet(nil)
}
if states == nil {
states = newStates(nil, nil, nil)
}
return &buffer{
layers: layers,
limit: uint64(limit),
nodes: nodes,
states: states,
}
}

// account retrieves the account blob with account address hash.
func (b *buffer) account(hash common.Hash) ([]byte, bool) {
return b.states.account(hash)
}

// storage retrieves the storage slot with account address hash and slot key.
func (b *buffer) storage(addrHash common.Hash, storageHash common.Hash) ([]byte, bool) {
return b.states.storage(addrHash, storageHash)
}

// node retrieves the trie node with node path and its trie identifier.
func (b *buffer) node(owner common.Hash, path []byte) (*trienode.Node, bool) {
return b.nodes.node(owner, path)
}

// commit merges the provided states and trie nodes into the buffer.
func (b *buffer) commit(nodes *nodeSet) *buffer {
func (b *buffer) commit(nodes *nodeSet, states *stateSet) *buffer {
b.layers++
b.nodes.merge(nodes)
b.states.merge(states)
return b
}

// revert is the reverse operation of commit. It also merges the provided states
// and trie nodes into the buffer. The key difference is that the provided state
// set should reverse the changes made by the most recent state transition.
func (b *buffer) revert(db ethdb.KeyValueReader, nodes map[common.Hash]map[string]*trienode.Node) error {
func (b *buffer) revert(db ethdb.KeyValueReader, nodes map[common.Hash]map[string]*trienode.Node, accounts map[common.Hash][]byte, storages map[common.Hash]map[common.Hash][]byte) error {
// Short circuit if no embedded state transition to revert
if b.layers == 0 {
return errStateUnrecoverable
Expand All @@ -79,13 +95,15 @@ func (b *buffer) revert(db ethdb.KeyValueReader, nodes map[common.Hash]map[strin
return nil
}
b.nodes.revert(db, nodes)
b.states.revert(accounts, storages)
return nil
}

// reset cleans up the disk cache.
func (b *buffer) reset() {
b.layers = 0
b.nodes.reset()
b.states.reset()
}

// empty returns an indicator if buffer is empty.
Expand All @@ -101,7 +119,7 @@ func (b *buffer) full() bool {

// size returns the approximate memory size of the held content.
func (b *buffer) size() uint64 {
return b.nodes.size
return b.states.size + b.nodes.size
}

// flush persists the in-memory dirty trie node into the disk if the configured
Expand Down
39 changes: 29 additions & 10 deletions triedb/pathdb/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,24 @@ type layer interface {
// - no error will be returned if the requested node is not found in database.
node(owner common.Hash, path []byte, depth int) ([]byte, common.Hash, *nodeLoc, error)

// account directly retrieves the account RLP associated with a particular
// hash in the slim data format. An error will be returned if the read
// operation exits abnormally. Specifically, if the layer is already stale.
//
// Note:
// - the returned account is not a copy, please don't modify it.
// - no error will be returned if the requested account is not found in database.
account(hash common.Hash, depth int) ([]byte, error)

// storage directly retrieves the storage data associated with a particular hash,
// within a particular account. An error will be returned if the read operation
// exits abnormally. Specifically, if the layer is already stale.
//
// Note:
// - the returned storage data is not a copy, please don't modify it.
// - no error will be returned if the requested slot is not found in database.
storage(accountHash, storageHash common.Hash, depth int) ([]byte, error)

// rootHash returns the root hash for which this layer was made.
rootHash() common.Hash

Expand Down Expand Up @@ -130,17 +148,18 @@ var Defaults = &Config{
// ReadOnly is the config in order to open database in read only mode.
var ReadOnly = &Config{ReadOnly: true}

// Database is a multiple-layered structure for maintaining in-memory trie nodes.
// It consists of one persistent base layer backed by a key-value store, on top
// of which arbitrarily many in-memory diff layers are stacked. The memory diffs
// can form a tree with branching, but the disk layer is singleton and common to
// all. If a reorg goes deeper than the disk layer, a batch of reverse diffs can
// be applied to rollback. The deepest reorg that can be handled depends on the
// amount of state histories tracked in the disk.
// Database is a multiple-layered structure for maintaining in-memory states
// along with its dirty trie nodes. It consists of one persistent base layer
// backed by a key-value store, on top of which arbitrarily many in-memory diff
// layers are stacked. The memory diffs can form a tree with branching, but the
// disk layer is singleton and common to all. If a reorg goes deeper than the
// disk layer, a batch of reverse diffs can be applied to rollback. The deepest
// reorg that can be handled depends on the amount of state histories tracked
// in the disk.
//
// At most one readable and writable database can be opened at the same time in
// the whole system which ensures that only one database writer can operate disk
// state. Unexpected open operations can cause the system to panic.
// the whole system which ensures that only one database writer can operate the
// persistent state. Unexpected open operations can cause the system to panic.
type Database struct {
// readOnly is the flag whether the mutation is allowed to be applied.
// It will be set automatically when the database is journaled during
Expand Down Expand Up @@ -358,7 +377,7 @@ func (db *Database) Enable(root common.Hash) error {
}
// Re-construct a new disk layer backed by persistent state
// with **empty clean cache and node buffer**.
db.tree.reset(newDiskLayer(root, 0, db, nil, newBuffer(db.config.WriteBufferSize, nil, 0)))
db.tree.reset(newDiskLayer(root, 0, db, nil, newBuffer(db.config.WriteBufferSize, nil, nil, 0)))

// Re-enable the database as the final step.
db.waitSync = false
Expand Down
13 changes: 12 additions & 1 deletion triedb/pathdb/database_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,18 @@ func (t *tester) generate(parent common.Hash) (common.Hash, *trienode.MergedNode
delete(t.storages, addrHash)
}
}
return root, ctx.nodes, NewStateSetWithOrigin(ctx.accountOrigin, ctx.storageOrigin)
var (
accounts = make(map[common.Hash][]byte)
destructs = make(map[common.Hash]struct{})
)
for addrHash, data := range ctx.accounts {
if len(data) == 0 {
destructs[addrHash] = struct{}{}
} else {
accounts[addrHash] = data
}
}
return root, ctx.nodes, NewStateSetWithOrigin(destructs, accounts, ctx.storages, ctx.accountOrigin, ctx.storageOrigin)
}

// lastHash returns the latest root hash, or empty if nothing is cached.
Expand Down
53 changes: 53 additions & 0 deletions triedb/pathdb/difflayer.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ func newDiffLayer(parent layer, root common.Hash, id uint64, block uint64, nodes
states: states,
}
dirtyNodeWriteMeter.Mark(int64(nodes.size))
dirtyStateWriteMeter.Mark(int64(states.size))
log.Debug("Created new diff layer", "id", id, "block", block, "nodesize", common.StorageSize(nodes.size), "statesize", common.StorageSize(states.size))
return dl
}
Expand Down Expand Up @@ -96,6 +97,58 @@ func (dl *diffLayer) node(owner common.Hash, path []byte, depth int) ([]byte, co
return dl.parent.node(owner, path, depth+1)
}

// account directly retrieves the account RLP associated with a particular
// hash in the slim data format.
//
// Note the returned account is not a copy, please don't modify it.
func (dl *diffLayer) account(hash common.Hash, depth int) ([]byte, error) {
// Hold the lock, ensure the parent won't be changed during the
// state accessing.
dl.lock.RLock()
defer dl.lock.RUnlock()

if blob, found := dl.states.account(hash); found {
dirtyStateHitMeter.Mark(1)
dirtyStateHitDepthHist.Update(int64(depth))
dirtyStateReadMeter.Mark(int64(len(blob)))

if len(blob) == 0 {
stateAccountMissMeter.Mark(1)
} else {
stateAccountHitMeter.Mark(1)
}
return blob, nil
}
// Account is unknown to this layer, resolve from parent
return dl.parent.account(hash, depth+1)
}

// storage directly retrieves the storage data associated with a particular hash,
// within a particular account.
//
// Note the returned account is not a copy, please don't modify it.
func (dl *diffLayer) storage(accountHash, storageHash common.Hash, depth int) ([]byte, error) {
// Hold the lock, ensure the parent won't be changed during the
// state accessing.
dl.lock.RLock()
defer dl.lock.RUnlock()

if blob, found := dl.states.storage(accountHash, storageHash); found {
dirtyStateHitMeter.Mark(1)
dirtyStateHitDepthHist.Update(int64(depth))
dirtyStateReadMeter.Mark(int64(len(blob)))

if len(blob) == 0 {
stateStorageMissMeter.Mark(1)
} else {
stateStorageHitMeter.Mark(1)
}
return blob, nil
}
// storage slot is unknown to this layer, resolve from parent
return dl.parent.storage(accountHash, storageHash, depth+1)
}

// update implements the layer interface, creating a new layer on top of the
// existing layer tree with the specified data items.
func (dl *diffLayer) update(root common.Hash, id uint64, block uint64, nodes *nodeSet, states *StateSetWithOrigin) *diffLayer {
Expand Down
2 changes: 1 addition & 1 deletion triedb/pathdb/difflayer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import (
func emptyLayer() *diskLayer {
return &diskLayer{
db: New(rawdb.NewMemoryDatabase(), nil, false),
buffer: newBuffer(defaultBufferSize, nil, 0),
buffer: newBuffer(defaultBufferSize, nil, nil, 0),
}
}

Expand Down
Loading

0 comments on commit 2288a96

Please sign in to comment.