diff --git a/pyproject.toml b/pyproject.toml index 0bf639deef..652e97a1b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,6 +90,7 @@ checkfixtures = "cli.check_fixtures:check_fixtures" consume = "cli.pytest_commands.consume:consume" genindex = "cli.gen_index:generate_fixtures_index_cli" gentest = "cli.gentest:generate" +eofwrap = "cli.eofwrap:eof_wrap" pyspelling_soft_fail = "cli.tox_helpers:pyspelling" markdownlintcli2_soft_fail = "cli.tox_helpers:markdownlint" order_fixtures = "cli.order_fixtures:order_fixtures" diff --git a/src/cli/eofwrap.py b/src/cli/eofwrap.py new file mode 100644 index 0000000000..da641db61c --- /dev/null +++ b/src/cli/eofwrap.py @@ -0,0 +1,346 @@ +""" +Generate a JSON blockchain test from an existing JSON blockchain test by wrapping its prestate code +in EOF wherever possible. + +Example Usage: + +1. Wrap tests + + ```console + eofwrap + ``` +""" + +import json +import os +import sys +from pathlib import Path +from typing import Any, no_type_check + +import click + +from cli.evm_bytes import OpcodeWithOperands, process_evm_bytes +from ethereum_clis import CLINotFoundInPath +from ethereum_clis.clis.evmone import EvmOneTransitionTool +from ethereum_test_base_types.base_types import Bytes +from ethereum_test_base_types.conversions import to_hex +from ethereum_test_fixtures.blockchain import FixtureBlock, InvalidFixtureBlock +from ethereum_test_fixtures.file import BaseFixturesRootModel, BlockchainFixtures +from ethereum_test_forks.forks.forks import Osaka +from ethereum_test_specs.blockchain import Block, BlockchainFixture, BlockchainTest +from ethereum_test_specs.debugging import print_traces +from ethereum_test_specs.eof import EOFParse +from ethereum_test_tools import Opcodes as Op +from ethereum_test_types import Transaction +from ethereum_test_types.eof.v1 import Container +from ethereum_test_types.types import Environment +from ethereum_test_vm.bytecode import Bytecode + + +@click.command() +@click.argument("input", type=click.Path(exists=True, dir_okay=True, file_okay=True)) +@click.argument("output_dir", type=click.Path(dir_okay=True, file_okay=False)) +@click.option("--traces", is_flag=True, type=bool) +def eof_wrap(input: str, output_dir: str, traces: bool): + """ + Wraps JSON blockchain test file(s) found at `input` path and outputs them to the `output_dir`. + """ + eof_wrapper = EofWrapper() + + try: + EvmOneTransitionTool() + except CLINotFoundInPath: + print(f"Error: {EvmOneTransitionTool.default_binary} must be in the PATH.") + sys.exit(1) + except Exception as e: + raise Exception(f"Unexpected exception: {e}.") + + if os.path.isfile(input): + file = os.path.basename(input) + out_file = "eof_wrapped_" + file + out_path = os.path.join(output_dir, out_file) + + eof_wrapper.wrap_file(input, out_path, traces) + else: + for subdir, dirs, files in os.walk(input): + for file in files: + rel_dir = Path(subdir).relative_to(input) + out_file = "eof_wrapped_" + file + out_path = os.path.join(output_dir, rel_dir, out_file) + in_path = os.path.join(subdir, file) + + eof_wrapper.wrap_file(in_path, out_path, traces) + + os.makedirs(output_dir, exist_ok=True) + with open(os.path.join(output_dir, "metrics.json"), "w") as f: + json.dump(eof_wrapper.metrics, f, indent=4) + + +class EofWrapper: + """ + EOF wrapping of blockchain tests with some simple metrics tracking. + """ + + # JSON files had at least one fixture generated successfully with EOF + FILES_GENERATED = "files_generated" + # JSON files skipped explicitly or didn't have a fixture with EOF + FILES_SKIPPED = "files_skipped" + # Test fixtures with at least one EOF code and generated successfully + FIXTURES_GENERATED = "fixtures_generated" + # Test fixtures with no code able to be EOF-wrapped + FIXTURES_CANT_WRAP = "fixtures_cant_wrap" + # Test fixtures with EOF code but test doesn't pass and generation fails + FIXTURES_CANT_GENERATE = "fixtures_cant_generate" + # State accounts with code wrapped into valid EOF + ACCOUNTS_WRAPPED = "accounts_wrapped" + # State accounts with code wrapped into valid unique EOF + UNIQUE_ACCOUNTS_WRAPPED = "unique_accounts_wrapped" + # State accounts wrapped but the code is not valid EOF + ACCOUNTS_INVALID_EOF = "accounts_invalid_eof" + # State accounts wrapped into valid EOF but in a fixture of a failing test + ACCOUNTS_CANT_GENERATE = "accounts_cant_generate" + # Breakdown of EOF validation errors summing up to `accounts_invalid_eof` + VALIDATION_ERRORS = "validation_errors" + # Breakdown of runtime test failures summing up to `fixtures_cant_generate` + GENERATION_ERRORS = "generation_errors" + + def __init__(self): + self.metrics = { + self.FILES_GENERATED: 0, + self.FILES_SKIPPED: 0, + self.FIXTURES_GENERATED: 0, + self.FIXTURES_CANT_WRAP: 0, + self.FIXTURES_CANT_GENERATE: 0, + self.ACCOUNTS_WRAPPED: 0, + self.UNIQUE_ACCOUNTS_WRAPPED: 0, + self.ACCOUNTS_INVALID_EOF: 0, + self.ACCOUNTS_CANT_GENERATE: 0, + self.VALIDATION_ERRORS: {}, + self.GENERATION_ERRORS: {}, + } + self.unique_eof = set() + + file_skip_list = [ + "Pyspecs", + # EXTCODE* opcodes return different results for EOF targets and that is tested elsewhere + "stExtCodeHash", + # bigint syntax + "ValueOverflowParis", + "bc4895-withdrawals", + # EOF opcodes at diff places - tests obsolete + "opcD0DiffPlaces", + "opcD1DiffPlaces", + "opcD2DiffPlaces", + "opcD3DiffPlaces", + "opcE0DiffPlaces", + "opcE1DiffPlaces", + "opcE2DiffPlaces", + "opcE3DiffPlaces", + "opcE4DiffPlaces", + "opcE5DiffPlaces", + "opcE6DiffPlaces", + "opcE7DiffPlaces", + "opcE8DiffPlaces", + "opcECDiffPlaces", + "opcEEDiffPlaces", + "opcF7DiffPlaces", + "opcF8DiffPlaces", + "opcF9DiffPlaces", + "opcFBDiffPlaces", + # stack overflow always (limit of `max_stack_height` is 1023!) + "push0_fill_stack", + "push0_stack_overflow", + "blobbasefee_stack_overflow", + ] + + def wrap_file(self, in_path: str, out_path: str, traces: bool): + """ + Wraps code from a blockchain test JSON file from `in_path` into EOF containers, + wherever possible. If not possible - skips and tracks that in metrics. Possible means + at least one account's code can be wrapped in a valid EOF container and the assertions + on post state are satisfied. + """ + for skip in self.file_skip_list: + if skip in in_path: + self.metrics[self.FILES_SKIPPED] += 1 + return + + with open(in_path, "r") as input_file: + fixtures = BlockchainFixtures.from_json_data(json.load(input_file)) + + out_fixtures = BaseFixturesRootModel({}) + fixture: BlockchainFixture + for id, fixture in fixtures.items(): + fixture_eof_codes = [] + wrapped_at_least_one_account = False + + if fixture.pre: + for address, account in fixture.pre.root.items(): + if account is None or account.code is None or len(account.code) == 0: + continue + + try: + wrapped = wrap_code(account.code) + except ValueError as e: + self.metrics[self.ACCOUNTS_INVALID_EOF] += 1 + _inc_counter( + self.metrics[self.VALIDATION_ERRORS], self._short_exception_msg(e) + ) + continue + + if self._validate_eof(wrapped): + account.code = Bytes(wrapped) + wrapped_at_least_one_account = True + self.metrics[self.ACCOUNTS_WRAPPED] += 1 + fixture_eof_codes.append(to_hex(account.code)) + + # wrap the same account in post state the same way + if fixture.post_state and fixture.post_state.root[address]: + fixture.post_state.root[address].code = Bytes(wrapped) # type: ignore + else: + self.metrics[self.ACCOUNTS_INVALID_EOF] += 1 + if not wrapped_at_least_one_account: + self.metrics[self.FIXTURES_CANT_WRAP] += 1 + continue + + try: + out_fixture = self._wrap_fixture(fixture, traces) + out_fixtures[id] = out_fixture + self.metrics[self.FIXTURES_GENERATED] += 1 + self.unique_eof.update(fixture_eof_codes) + self.metrics[self.UNIQUE_ACCOUNTS_WRAPPED] = len(self.unique_eof) + except Exception as e: + _inc_counter(self.metrics[self.GENERATION_ERRORS], self._short_exception_msg(e)) + + self.metrics[self.FIXTURES_CANT_GENERATE] += 1 + self.metrics[self.ACCOUNTS_CANT_GENERATE] += len(fixture_eof_codes) + + if len(out_fixtures) == 0: + self.metrics[self.FILES_SKIPPED] += 1 + return + + os.makedirs(os.path.dirname(out_path), exist_ok=True) + out_fixtures.collect_into_file(Path(out_path)) + self.metrics[self.FILES_GENERATED] += 1 + + def _short_exception_msg(self, e: Exception): + THRESHOLD = 30 + + short = str(e) + if len(short) > THRESHOLD: + short = short[:THRESHOLD] + "..." + return short + + def _wrap_fixture(self, fixture: BlockchainFixture, traces: bool): + env = Environment() + + pre = fixture.pre + + t8n = EvmOneTransitionTool(trace=traces) + + test = BlockchainTest( + genesis_environment=env, + pre=pre.root, + post=fixture.post_state.root if fixture.post_state else {}, + blocks=[], + tag="wrapped test", + ) + + for fixture_block in fixture.blocks: + if isinstance(fixture_block, FixtureBlock): + header = fixture_block.header + block = Block( + ommers_hash=header.ommers_hash, + fee_recipient=header.fee_recipient, + difficulty=header.difficulty, + number=header.number, + gas_limit=header.gas_limit, + timestamp=header.timestamp, + extra_data=header.extra_data, + prev_randao=header.prev_randao, + nonce=header.nonce, + base_fee_per_gas=header.base_fee_per_gas, + withdrawals_root=header.withdrawals_root, + parent_beacon_block_root=header.parent_beacon_block_root, + requests_root=header.requests_root, + ) + assert not fixture_block.ommers + assert not fixture_block.withdrawals + assert not fixture_block.deposit_requests + assert not fixture_block.withdrawal_requests + assert not fixture_block.consolidation_requests + + for fixture_tx in fixture_block.txs: + fixture_tx_dump = fixture_tx.model_dump() + fixture_tx_dump.pop("ty") + fixture_tx_dump.pop("data") + tx = Transaction( + type=fixture_tx.ty, + input=fixture_tx.data, + **fixture_tx_dump, + ) + block.txs.append(tx) + elif isinstance(fixture_block, InvalidFixtureBlock): + block = Block( + rlp=fixture_block.rlp, + exception=fixture_block.expect_exception, + ) + else: + raise TypeError("not a FixtureBlock") + + test.blocks.append(block) + + result = test.generate( + request=None, # type: ignore + t8n=t8n, + fork=Osaka, + fixture_format=BlockchainFixture, + ) + if traces: + print_traces(t8n.get_traces()) + return result + + def _validate_eof(self, container: Container, metrics: bool = True) -> bool: + eof_parse = EOFParse() + + result = eof_parse.run(input=to_hex(container)) + actual_message = result.stdout.strip() + if "OK" not in actual_message: + if metrics: + _inc_counter(self.metrics[self.VALIDATION_ERRORS], actual_message) + return False + + return True + + +# `no_type_check` required because OpcodeWithOperand.opcode can be `None` when formatting as a +# string, but here it can never be `None`. +@no_type_check +def wrap_code(account_code: Bytes) -> Container: + """ + Wraps `account_code` into a simplest EOF container, applying some simple heuristics in + order to obtain a valid code section termination. + """ + assert len(account_code) > 0 + + opcodes = process_evm_bytes(account_code) + + if not opcodes[-1].terminating: + opcodes.append(OpcodeWithOperands(opcode=Op.STOP)) + + while len(opcodes) > 1 and opcodes[-2].terminating and opcodes[-1].terminating: + opcodes.pop() + + bytecode = Bytecode() + + for opcode in opcodes: + bytecode += opcode.bytecode + + return Container.Code(bytecode) + + +def _inc_counter(d: dict, key: Any) -> None: + if key in d: + d[key] += 1 + else: + d[key] = 1 diff --git a/src/cli/evm_bytes.py b/src/cli/evm_bytes.py index 5f3a060687..e53feca376 100644 --- a/src/cli/evm_bytes.py +++ b/src/cli/evm_bytes.py @@ -10,6 +10,7 @@ from ethereum_test_base_types import ZeroPaddedHexNumber from ethereum_test_vm import Macro from ethereum_test_vm import Opcodes as Op +from ethereum_test_vm.bytecode import Bytecode OPCODES_WITH_EMPTY_LINES_AFTER = { Op.STOP, @@ -56,8 +57,22 @@ def format_assembly(self) -> str: operands = ", ".join(str(ZeroPaddedHexNumber(operand)) for operand in self.operands) return f"{opcode_name} {operands}" + @property + def terminating(self) -> bool: + """Whether the opcode is terminating or not""" + return self.opcode.terminating if self.opcode else False + + @property + def bytecode(self) -> Bytecode: + """Opcode as bytecode with its operands if any.""" + # opcode.opcode[*opcode.operands] crashes `black` formatter and doesn't work. + if self.opcode: + return self.opcode.__getitem__(*self.operands) if self.operands else self.opcode + else: + return Bytecode() + -def process_evm_bytes(evm_bytes: bytes, assembly: bool = False) -> str: # noqa: D103 +def process_evm_bytes(evm_bytes: bytes) -> List[OpcodeWithOperands]: # noqa: D103 evm_bytes = bytearray(evm_bytes) opcodes: List[OpcodeWithOperands] = [] @@ -74,23 +89,35 @@ def process_evm_bytes(evm_bytes: bytes, assembly: bool = False) -> str: # noqa: raise ValueError(f"Unknown opcode: {opcode_byte}") if opcode.data_portion_length > 0: + signed = opcode in [Op.RJUMP, Op.RJUMPI] opcodes.append( OpcodeWithOperands( opcode=opcode, - operands=[int.from_bytes(evm_bytes[: opcode.data_portion_length], "big")], + operands=[ + int.from_bytes( + evm_bytes[: opcode.data_portion_length], "big", signed=signed + ) + ], ) ) evm_bytes = evm_bytes[opcode.data_portion_length :] elif opcode == Op.RJUMPV: - max_index = evm_bytes.pop(0) - operands: List[int] = [] - for _ in range(max_index + 1): - operands.append(int.from_bytes(evm_bytes[:2], "big")) - evm_bytes = evm_bytes[2:] - opcodes.append(OpcodeWithOperands(opcode=opcode, operands=operands)) + if len(evm_bytes) == 0: + opcodes.append(OpcodeWithOperands(opcode=opcode)) + else: + max_index = evm_bytes.pop(0) + operands: List[int] = [] + for _ in range(max_index + 1): + operands.append(int.from_bytes(evm_bytes[:2], "big", signed=True)) + evm_bytes = evm_bytes[2:] + opcodes.append(OpcodeWithOperands(opcode=opcode, operands=operands)) else: opcodes.append(OpcodeWithOperands(opcode=opcode)) + return opcodes + + +def format_opcodes(opcodes: List[OpcodeWithOperands], assembly: bool = False) -> str: # noqa: D103 if assembly: opcodes_with_empty_lines: List[OpcodeWithOperands] = [] for i, op_with_operands in enumerate(opcodes): @@ -113,7 +140,7 @@ def process_evm_bytes_string(evm_bytes_hex_string: str, assembly: bool = False) evm_bytes_hex_string = evm_bytes_hex_string[2:] evm_bytes = bytes.fromhex(evm_bytes_hex_string) - return process_evm_bytes(evm_bytes, assembly=assembly) + return format_opcodes(process_evm_bytes(evm_bytes), assembly=assembly) assembly_option = click.option( @@ -208,5 +235,7 @@ def binary_file(binary_file_path, assembly: bool): ... ``` """ # noqa: E501 - processed_output = process_evm_bytes(binary_file_path.read(), assembly=assembly) + processed_output = format_opcodes( + process_evm_bytes(binary_file_path.read()), assembly=assembly + ) click.echo(processed_output) diff --git a/src/cli/tests/test_eofwrap.py b/src/cli/tests/test_eofwrap.py new file mode 100644 index 0000000000..2f554ef8a2 --- /dev/null +++ b/src/cli/tests/test_eofwrap.py @@ -0,0 +1,53 @@ +""" +Tests for the eofwrap module and click CLI. +""" +import pytest + +from ethereum_test_base_types.conversions import to_hex +from ethereum_test_tools import Opcodes as Op +from ethereum_test_types.eof.v1 import Container + +from ..eofwrap import wrap_code + + +@pytest.mark.parametrize( + "code,result", + [ + [Op.STOP, Container.Code(Op.STOP)], + [Op.RETURN(0, 0), Container.Code(Op.RETURN(0, 0))], + [Op.REVERT(0, 0), Container.Code(Op.REVERT(0, 0))], + [Op.INVALID, Container.Code(Op.INVALID)], + [Op.PUSH1, Container.Code(Op.PUSH1[0] + Op.STOP)], + [Op.PUSH1[0], Container.Code(Op.PUSH1[0] + Op.STOP)], + [Op.PUSH1[0] + Op.STOP, Container.Code(Op.PUSH1[0] + Op.STOP)], + [Op.STOP + Op.STOP, Container.Code(Op.STOP)], + [Op.RETURN(0, 0) + Op.STOP, Container.Code(Op.RETURN(0, 0))], + [Op.REVERT(0, 0) + Op.STOP, Container.Code(Op.REVERT(0, 0))], + [Op.INVALID + Op.STOP, Container.Code(Op.INVALID)], + [Op.ADDRESS, Container.Code(Op.ADDRESS + Op.STOP)], + [Op.ADDRESS + Op.STOP, Container.Code(Op.ADDRESS + Op.STOP)], + [Op.ADDRESS + Op.RETURN(0, 0), Container.Code(Op.ADDRESS + Op.RETURN(0, 0))], + [Op.ADDRESS + Op.REVERT(0, 0), Container.Code(Op.ADDRESS + Op.REVERT(0, 0))], + [Op.ADDRESS + Op.INVALID, Container.Code(Op.ADDRESS + Op.INVALID)], + [Op.ADDRESS + Op.STOP + Op.STOP, Container.Code(Op.ADDRESS + Op.STOP)], + [Op.ADDRESS + Op.RETURN(0, 0) + Op.STOP, Container.Code(Op.ADDRESS + Op.RETURN(0, 0))], + [Op.ADDRESS + Op.REVERT(0, 0) + Op.STOP, Container.Code(Op.ADDRESS + Op.REVERT(0, 0))], + [Op.ADDRESS + Op.INVALID + Op.STOP, Container.Code(Op.ADDRESS + Op.INVALID)], + [Op.GAS + Op.STOP, Container.Code(Op.GAS + Op.STOP)], + [Op.GAS + Op.RETURN(0, 0), Container.Code(Op.GAS + Op.RETURN(0, 0))], + [Op.GAS + Op.REVERT(0, 0), Container.Code(Op.GAS + Op.REVERT(0, 0))], + [Op.GAS + Op.INVALID, Container.Code(Op.GAS + Op.INVALID)], + [Op.RJUMPV[1, 2, 3], Container.Code(Op.RJUMPV[1, 2, 3] + Op.STOP)], + [Op.RJUMPV, Container.Code(Op.RJUMPV + Op.STOP)], + [ + Op.RJUMPV[-1, 0x7FFF, -0x7FFF], + Container.Code(Op.RJUMPV[-1, 0x7FFF, -0x7FFF] + Op.STOP), + ], + ], + ids=lambda param: to_hex(param), +) +def test_wrap_code(code, result): + """ + Tests for the EOF wrapping logic and heuristics + """ + assert wrap_code(bytes(code)) == result diff --git a/src/cli/tests/test_evm_bytes.py b/src/cli/tests/test_evm_bytes.py index 561eeb04a8..8d172ddcc7 100644 --- a/src/cli/tests/test_evm_bytes.py +++ b/src/cli/tests/test_evm_bytes.py @@ -16,6 +16,19 @@ "0x7fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebf5f527fc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedf6020527fe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff60405260786040356020355f35608a565b5f515f55602051600155604051600255005b5e56", # noqa: E501 "Op.PUSH32[0xa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebf] + Op.PUSH0 + Op.MSTORE + Op.PUSH32[0xc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedf] + Op.PUSH1[0x20] + Op.MSTORE + Op.PUSH32[0xe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff] + Op.PUSH1[0x40] + Op.MSTORE + Op.PUSH1[0x78] + Op.PUSH1[0x40] + Op.CALLDATALOAD + Op.PUSH1[0x20] + Op.CALLDATALOAD + Op.PUSH0 + Op.CALLDATALOAD + Op.PUSH1[0x8a] + Op.JUMP + Op.JUMPDEST + Op.PUSH0 + Op.MLOAD + Op.PUSH0 + Op.SSTORE + Op.PUSH1[0x20] + Op.MLOAD + Op.PUSH1[0x1] + Op.SSTORE + Op.PUSH1[0x40] + Op.MLOAD + Op.PUSH1[0x2] + Op.SSTORE + Op.STOP + Op.JUMPDEST + Op.MCOPY + Op.JUMP", # noqa: E501 ] +rjump_vector = [ + "0xe0fffe", + "Op.RJUMP[-0x2]", +] +rjumpi_vector = [ + "0xe1fffe", + "Op.RJUMPI[-0x2]", +] +rjumpv_vector = [ + "0xe213b1465aef60276095472e3250cf64736f6c63430008150033a26469706673582212206eab0a7969fe", + "Op.RJUMPV[-0x4eba, 0x5aef, 0x6027, 0x6095, 0x472e, 0x3250, -0x309c, 0x736f, 0x6c63, 0x4300," + + " 0x815, 0x33, -0x5d9c, 0x6970, 0x6673, 0x5822, 0x1220, 0x6eab, 0xa79, 0x69fe]", +] @pytest.mark.parametrize( @@ -25,6 +38,12 @@ (basic_vector[0][2:], basic_vector[1]), # no "0x" prefix (complex_vector[0], complex_vector[1]), (complex_vector[0][2:], complex_vector[1]), # no "0x" prefix + (rjump_vector[0], rjump_vector[1]), + (rjump_vector[0][2:], rjump_vector[1]), # no "0x" prefix + (rjumpi_vector[0], rjumpi_vector[1]), + (rjumpi_vector[0][2:], rjumpi_vector[1]), # no "0x" prefix + (rjumpv_vector[0], rjumpv_vector[1]), + (rjumpv_vector[0][2:], rjumpv_vector[1]), # no "0x" prefix ], ) def test_evm_bytes(evm_bytes: str, python_opcodes: str): diff --git a/whitelist.txt b/whitelist.txt index f4c36b4f4e..a5fca6a75e 100644 --- a/whitelist.txt +++ b/whitelist.txt @@ -125,6 +125,7 @@ eip7002 eip7069 eip7620 eip7692 +P7692 eip7251 eips EIPs @@ -136,6 +137,7 @@ env envvar EOA eof +eofwrap EOF1 EOFException eofparse @@ -231,6 +233,7 @@ initcode inputdata instantiation io +isfile isidentifier islice isort @@ -390,6 +393,7 @@ str streetsidesoftware subcall subclasscheck +subdir subdirectories subdirectory subgraph