Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create scripts/gethimport.py #1888

Draft
wants to merge 20 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ common: &common
- restore_cache:
keys:
- cache-v1-{{ arch }}-{{ .Environment.CIRCLE_JOB }}-{{ checksum "setup.py" }}-{{ checksum "tox.ini" }}
- run:
name: install libsnappy-dev
command: sudo apt install -y libsnappy-dev
- run:
name: install dependencies
command: pip install --user tox
Expand Down
1 change: 1 addition & 0 deletions eth/db/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
GENESIS_PARENT_HASH,
)
from eth.exceptions import (
CanonicalHeadNotFound,
HeaderNotFound,
ReceiptNotFound,
TransactionNotFound,
Expand Down
235 changes: 235 additions & 0 deletions eth/db/trie_iteration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
import enum
from typing import (
cast,
Iterable,
List,
NamedTuple,
Tuple,
)

from eth.db.chain import ChainDB

from eth_utils import (
to_tuple,
)

from eth_typing import (
Hash32,
)

from trie.constants import (
NODE_TYPE_BLANK,
NODE_TYPE_BRANCH,
NODE_TYPE_EXTENSION,
NODE_TYPE_LEAF,
)

from trie.utils.nodes import (
get_common_prefix_length,
decode_node,
extract_key,
get_node_type,
)


Nibbles = Tuple[int, ...]


class NodeKind(enum.Enum):
BLANK = NODE_TYPE_BLANK
LEAF = NODE_TYPE_LEAF
EXTENSION = NODE_TYPE_EXTENSION
BRANCH = NODE_TYPE_BRANCH


class TrieNode(NamedTuple):
kind: NodeKind
rlp: bytes
obj: List[bytes] # this type is wrong but mypy doesn't support recursive types
keccak: Hash32

def __str__(self) -> str:
if self.kind == NodeKind.EXTENSION:
return (
"TrieNode<Extension>("
f"hash={self.keccak.hex()}"
f" path={self.path_rest}"
f" child={self.obj[1].hex()}"
f")"
)
if self.kind == NodeKind.LEAF:
return (
"TrieNode<Leaf>("
f"hash={self.keccak.hex()}"
f" path={self.path_rest[:10]}..."
f")"
)
return f"TrieNode(kind={self.kind.name} hash={self.keccak.hex()})"

@property
def path_rest(self) -> Nibbles:
# careful: this doesn't make any sense for branches
return cast(Nibbles, extract_key(self.obj))


def is_subtree(prefix: Nibbles, nibbles: Nibbles) -> bool:
"""
Returns True if {nibbles} represents a subtree of {prefix}.
"""
if len(nibbles) < len(prefix):
# nibbles represents a bigger tree than prefix does
return False
return get_common_prefix_length(prefix, nibbles) == len(prefix)


@to_tuple
def _get_children_with_nibbles(node: TrieNode, prefix: Nibbles) -> Iterable[Tuple[Nibbles, Hash32]]:
"""
Return the children of the given node at the given path, including their full paths
"""
if node.kind == NodeKind.BLANK:
return
elif node.kind == NodeKind.LEAF:
full_path = prefix + node.path_rest
yield (full_path, cast(Hash32, node.obj[1]))
elif node.kind == NodeKind.EXTENSION:
full_path = prefix + node.path_rest
# TODO: this cast to a Hash32 is not right, nodes smaller than 32 are inlined
yield (full_path, cast(Hash32, node.obj[1]))
elif node.kind == NodeKind.BRANCH:
for i in range(17):
full_path = prefix + (i,)
yield (full_path, cast(Hash32, node.obj[i]))


def _get_node(db: ChainDB, node_hash: Hash32) -> TrieNode:
if len(node_hash) < 32:
node_rlp = cast(bytes, node_hash)
else:
node_rlp = db.get(node_hash)

node = decode_node(node_rlp)
node_type = get_node_type(node)

return TrieNode(kind=NodeKind(node_type), rlp=node_rlp, obj=node, keccak=node_hash)


def _iterate_trie(db: ChainDB,
node: TrieNode, # the node we should look at
sub_trie: Nibbles, # which sub_trie to return nodes from
prefix: Nibbles, # our current path in the trie
) -> Iterable[Tuple[Nibbles, TrieNode]]:

if node.kind == NodeKind.BLANK:
return

if node.kind == NodeKind.LEAF:
full_path = prefix + node.path_rest

if is_subtree(sub_trie, prefix) or is_subtree(sub_trie, full_path):
# also check full_path because either the node or the item the node points to
# might be part of the desired subtree
yield (prefix, node)

# there's no need to recur, this is a leaf
return

child_of_sub_trie = is_subtree(sub_trie, prefix)

if child_of_sub_trie:
# this node is part of the subtrie which should be returned
yield (prefix, node)

parent_of_sub_trie = is_subtree(prefix, sub_trie)

if child_of_sub_trie or parent_of_sub_trie:
for path, child_hash in _get_children_with_nibbles(node, prefix):
child_node = _get_node(db, child_hash)
yield from _iterate_trie(db, child_node, sub_trie, path)


def iterate_trie(db: ChainDB, root_hash: Hash32,
sub_trie: Nibbles = ()) -> Iterable[Tuple[Nibbles, TrieNode]]:

root_node = _get_node(db, root_hash)

yield from _iterate_trie(
db, root_node, sub_trie,
prefix=(),
)


def iterate_leaves(db: ChainDB, root_hash: Hash32,
sub_trie: Nibbles = ()) -> Iterable[Tuple[Nibbles, bytes]]:
"""
Rather than returning the raw nodes, this returns just the leaves (usually, accounts),
along with their full paths
"""

node_iterator = iterate_trie(db, root_hash, sub_trie)

for path, node in node_iterator:
if node.kind == NodeKind.LEAF:
full_path = path + node.path_rest
yield (full_path, node.obj[1])


def _iterate_node_chunk(db: ChainDB,
node: TrieNode,
sub_trie: Nibbles,
prefix: Nibbles,
target_depth: int) -> Iterable[Tuple[Nibbles, TrieNode]]:

def recur(new_depth: int) -> Iterable[Tuple[Nibbles, TrieNode]]:
for path, child_hash in _get_children_with_nibbles(node, prefix):
child_node = _get_node(db, child_hash)
yield from _iterate_node_chunk(db, child_node, sub_trie, path, new_depth)

if node.kind == NodeKind.BLANK:
return

if node.kind == NodeKind.LEAF:
full_path = prefix + node.path_rest

if is_subtree(sub_trie, prefix) or is_subtree(sub_trie, full_path):
yield (prefix, node)

# there's no need to recur, this is a leaf
return

child_of_sub_trie = is_subtree(sub_trie, prefix)

if child_of_sub_trie:
# the node is part of the sub_trie which we want to return
yield (prefix, node)

if target_depth == 0:
# there's no point in recursing
return

parent_of_sub_trie = is_subtree(prefix, sub_trie)

if child_of_sub_trie:
# if we're returning nodes start decrementing the count
yield from recur(target_depth - 1)
elif parent_of_sub_trie:
# if we're still looking for the sub_trie just recur
yield from recur(target_depth)


def iterate_node_chunk(db: ChainDB,
root_hash: Hash32,
sub_trie: Nibbles,
target_depth: int) -> Iterable[Tuple[Nibbles, TrieNode]]:
"""
Get all the nodes up to {target_depth} deep from the given sub_trie.

Does a truncated breadth-first search rooted at the given node and returns everything
it finds.
"""
# TODO: notice BLANK_NODE_HASH and fail fast?
root_node = _get_node(db, root_hash)

yield from _iterate_node_chunk(
db, root_node, sub_trie, prefix=(), target_depth=target_depth,
)
Loading